{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5191097268185063, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 23.553096771240234, "learning_rate": 2.1628636314480375e-09, "loss": 10.2888, "step": 1 }, { "epoch": 0.0, "grad_norm": 25.197978973388672, "learning_rate": 4.325727262896075e-09, "loss": 10.3772, "step": 2 }, { "epoch": 0.0, "grad_norm": 21.00067138671875, "learning_rate": 6.488590894344112e-09, "loss": 10.2294, "step": 3 }, { "epoch": 0.0, "grad_norm": 20.86727523803711, "learning_rate": 8.65145452579215e-09, "loss": 10.3001, "step": 4 }, { "epoch": 0.0, "grad_norm": 21.43191146850586, "learning_rate": 1.0814318157240188e-08, "loss": 10.3429, "step": 5 }, { "epoch": 0.0, "grad_norm": 23.445964813232422, "learning_rate": 1.2977181788688223e-08, "loss": 10.3249, "step": 6 }, { "epoch": 0.0, "grad_norm": 24.848257064819336, "learning_rate": 1.5140045420136263e-08, "loss": 10.3401, "step": 7 }, { "epoch": 0.0, "grad_norm": 23.019306182861328, "learning_rate": 1.73029090515843e-08, "loss": 10.3554, "step": 8 }, { "epoch": 0.0, "grad_norm": 22.129606246948242, "learning_rate": 1.9465772683032338e-08, "loss": 10.3217, "step": 9 }, { "epoch": 0.0, "grad_norm": 25.314077377319336, "learning_rate": 2.1628636314480375e-08, "loss": 10.3335, "step": 10 }, { "epoch": 0.0, "grad_norm": 23.840106964111328, "learning_rate": 2.3791499945928413e-08, "loss": 10.3455, "step": 11 }, { "epoch": 0.0, "grad_norm": 21.08882713317871, "learning_rate": 2.5954363577376447e-08, "loss": 10.3332, "step": 12 }, { "epoch": 0.0, "grad_norm": 25.759031295776367, "learning_rate": 2.8117227208824484e-08, "loss": 10.3212, "step": 13 }, { "epoch": 0.0, "grad_norm": 24.198076248168945, "learning_rate": 3.0280090840272525e-08, "loss": 10.3179, "step": 14 }, { "epoch": 0.0, "grad_norm": 20.876924514770508, "learning_rate": 3.244295447172056e-08, "loss": 10.2895, "step": 15 }, { "epoch": 0.0, "grad_norm": 23.755441665649414, "learning_rate": 3.46058181031686e-08, "loss": 10.2583, "step": 16 }, { "epoch": 0.0, "grad_norm": 21.180809020996094, "learning_rate": 3.676868173461664e-08, "loss": 10.2894, "step": 17 }, { "epoch": 0.0, "grad_norm": 21.2594051361084, "learning_rate": 3.8931545366064675e-08, "loss": 10.325, "step": 18 }, { "epoch": 0.0, "grad_norm": 23.950672149658203, "learning_rate": 4.109440899751271e-08, "loss": 10.3029, "step": 19 }, { "epoch": 0.0, "grad_norm": 22.12279510498047, "learning_rate": 4.325727262896075e-08, "loss": 10.2832, "step": 20 }, { "epoch": 0.0, "grad_norm": 23.719799041748047, "learning_rate": 4.542013626040879e-08, "loss": 10.3635, "step": 21 }, { "epoch": 0.0, "grad_norm": 22.70052719116211, "learning_rate": 4.7582999891856825e-08, "loss": 10.301, "step": 22 }, { "epoch": 0.0, "grad_norm": 20.433380126953125, "learning_rate": 4.974586352330486e-08, "loss": 10.316, "step": 23 }, { "epoch": 0.0, "grad_norm": 22.572046279907227, "learning_rate": 5.1908727154752894e-08, "loss": 10.3362, "step": 24 }, { "epoch": 0.0, "grad_norm": 24.220745086669922, "learning_rate": 5.407159078620093e-08, "loss": 10.282, "step": 25 }, { "epoch": 0.0, "grad_norm": 20.527843475341797, "learning_rate": 5.623445441764897e-08, "loss": 10.4309, "step": 26 }, { "epoch": 0.0, "grad_norm": 21.87952423095703, "learning_rate": 5.8397318049097006e-08, "loss": 10.3606, "step": 27 }, { "epoch": 0.0, "grad_norm": 22.898963928222656, "learning_rate": 6.056018168054505e-08, "loss": 10.305, "step": 28 }, { "epoch": 0.0, "grad_norm": 24.55191993713379, "learning_rate": 6.272304531199309e-08, "loss": 10.3088, "step": 29 }, { "epoch": 0.0, "grad_norm": 22.53409767150879, "learning_rate": 6.488590894344113e-08, "loss": 10.3122, "step": 30 }, { "epoch": 0.0, "grad_norm": 21.917856216430664, "learning_rate": 6.704877257488916e-08, "loss": 10.252, "step": 31 }, { "epoch": 0.0, "grad_norm": 22.368698120117188, "learning_rate": 6.92116362063372e-08, "loss": 10.2408, "step": 32 }, { "epoch": 0.0, "grad_norm": 23.809144973754883, "learning_rate": 7.137449983778524e-08, "loss": 10.2692, "step": 33 }, { "epoch": 0.0, "grad_norm": 22.491676330566406, "learning_rate": 7.353736346923328e-08, "loss": 10.2982, "step": 34 }, { "epoch": 0.0, "grad_norm": 21.08791160583496, "learning_rate": 7.570022710068131e-08, "loss": 10.2904, "step": 35 }, { "epoch": 0.0, "grad_norm": 22.148027420043945, "learning_rate": 7.786309073212935e-08, "loss": 10.2609, "step": 36 }, { "epoch": 0.0, "grad_norm": 21.017131805419922, "learning_rate": 8.002595436357739e-08, "loss": 10.3107, "step": 37 }, { "epoch": 0.0, "grad_norm": 20.733613967895508, "learning_rate": 8.218881799502543e-08, "loss": 10.248, "step": 38 }, { "epoch": 0.0, "grad_norm": 23.850990295410156, "learning_rate": 8.435168162647346e-08, "loss": 10.4038, "step": 39 }, { "epoch": 0.0, "grad_norm": 24.76868438720703, "learning_rate": 8.65145452579215e-08, "loss": 10.2393, "step": 40 }, { "epoch": 0.0, "grad_norm": 19.14295768737793, "learning_rate": 8.867740888936954e-08, "loss": 10.2496, "step": 41 }, { "epoch": 0.0, "grad_norm": 21.484033584594727, "learning_rate": 9.084027252081758e-08, "loss": 10.3103, "step": 42 }, { "epoch": 0.0, "grad_norm": 23.926406860351562, "learning_rate": 9.300313615226561e-08, "loss": 10.2448, "step": 43 }, { "epoch": 0.0, "grad_norm": 20.617441177368164, "learning_rate": 9.516599978371365e-08, "loss": 10.1772, "step": 44 }, { "epoch": 0.0, "grad_norm": 24.04540252685547, "learning_rate": 9.732886341516169e-08, "loss": 10.1788, "step": 45 }, { "epoch": 0.0, "grad_norm": 22.945199966430664, "learning_rate": 9.949172704660973e-08, "loss": 10.2547, "step": 46 }, { "epoch": 0.0, "grad_norm": 22.188213348388672, "learning_rate": 1.0165459067805776e-07, "loss": 10.1771, "step": 47 }, { "epoch": 0.0, "grad_norm": 21.242319107055664, "learning_rate": 1.0381745430950579e-07, "loss": 10.1943, "step": 48 }, { "epoch": 0.0, "grad_norm": 24.322362899780273, "learning_rate": 1.0598031794095383e-07, "loss": 10.1855, "step": 49 }, { "epoch": 0.0, "grad_norm": 23.864559173583984, "learning_rate": 1.0814318157240186e-07, "loss": 10.2265, "step": 50 }, { "epoch": 0.0, "grad_norm": 22.243202209472656, "learning_rate": 1.103060452038499e-07, "loss": 10.2436, "step": 51 }, { "epoch": 0.0, "grad_norm": 25.88116455078125, "learning_rate": 1.1246890883529794e-07, "loss": 10.2304, "step": 52 }, { "epoch": 0.0, "grad_norm": 21.843965530395508, "learning_rate": 1.1463177246674598e-07, "loss": 10.2638, "step": 53 }, { "epoch": 0.0, "grad_norm": 23.006492614746094, "learning_rate": 1.1679463609819401e-07, "loss": 10.1831, "step": 54 }, { "epoch": 0.0, "grad_norm": 22.44184112548828, "learning_rate": 1.1895749972964205e-07, "loss": 10.2296, "step": 55 }, { "epoch": 0.0, "grad_norm": 24.16782569885254, "learning_rate": 1.211203633610901e-07, "loss": 10.2052, "step": 56 }, { "epoch": 0.0, "grad_norm": 21.779760360717773, "learning_rate": 1.2328322699253813e-07, "loss": 10.1833, "step": 57 }, { "epoch": 0.0, "grad_norm": 24.798952102661133, "learning_rate": 1.2544609062398618e-07, "loss": 10.1978, "step": 58 }, { "epoch": 0.0, "grad_norm": 22.264480590820312, "learning_rate": 1.276089542554342e-07, "loss": 10.2554, "step": 59 }, { "epoch": 0.0, "grad_norm": 23.560829162597656, "learning_rate": 1.2977181788688225e-07, "loss": 10.2509, "step": 60 }, { "epoch": 0.0, "grad_norm": 24.011770248413086, "learning_rate": 1.3193468151833028e-07, "loss": 10.237, "step": 61 }, { "epoch": 0.0, "grad_norm": 25.16282844543457, "learning_rate": 1.3409754514977833e-07, "loss": 10.1496, "step": 62 }, { "epoch": 0.0, "grad_norm": 22.257789611816406, "learning_rate": 1.3626040878122635e-07, "loss": 10.1515, "step": 63 }, { "epoch": 0.0, "grad_norm": 21.723556518554688, "learning_rate": 1.384232724126744e-07, "loss": 10.1591, "step": 64 }, { "epoch": 0.0, "grad_norm": 22.331525802612305, "learning_rate": 1.4058613604412243e-07, "loss": 10.2135, "step": 65 }, { "epoch": 0.0, "grad_norm": 21.801191329956055, "learning_rate": 1.4274899967557048e-07, "loss": 10.1626, "step": 66 }, { "epoch": 0.0, "grad_norm": 22.74690055847168, "learning_rate": 1.449118633070185e-07, "loss": 10.2034, "step": 67 }, { "epoch": 0.0, "grad_norm": 22.821561813354492, "learning_rate": 1.4707472693846655e-07, "loss": 10.1387, "step": 68 }, { "epoch": 0.0, "grad_norm": 22.9634952545166, "learning_rate": 1.4923759056991458e-07, "loss": 10.1093, "step": 69 }, { "epoch": 0.0, "grad_norm": 20.369136810302734, "learning_rate": 1.5140045420136263e-07, "loss": 10.1064, "step": 70 }, { "epoch": 0.0, "grad_norm": 20.836593627929688, "learning_rate": 1.5356331783281065e-07, "loss": 10.1669, "step": 71 }, { "epoch": 0.0, "grad_norm": 20.46863555908203, "learning_rate": 1.557261814642587e-07, "loss": 10.1172, "step": 72 }, { "epoch": 0.0, "grad_norm": 21.337827682495117, "learning_rate": 1.5788904509570675e-07, "loss": 10.0774, "step": 73 }, { "epoch": 0.0, "grad_norm": 24.05677032470703, "learning_rate": 1.6005190872715478e-07, "loss": 10.0598, "step": 74 }, { "epoch": 0.0, "grad_norm": 20.203996658325195, "learning_rate": 1.6221477235860283e-07, "loss": 10.2098, "step": 75 }, { "epoch": 0.0, "grad_norm": 24.29408073425293, "learning_rate": 1.6437763599005085e-07, "loss": 10.1496, "step": 76 }, { "epoch": 0.0, "grad_norm": 22.885944366455078, "learning_rate": 1.665404996214989e-07, "loss": 10.0588, "step": 77 }, { "epoch": 0.0, "grad_norm": 22.61448860168457, "learning_rate": 1.6870336325294693e-07, "loss": 10.0441, "step": 78 }, { "epoch": 0.0, "grad_norm": 22.70732879638672, "learning_rate": 1.7086622688439498e-07, "loss": 10.0388, "step": 79 }, { "epoch": 0.0, "grad_norm": 22.565797805786133, "learning_rate": 1.73029090515843e-07, "loss": 10.0844, "step": 80 }, { "epoch": 0.0, "grad_norm": 21.13549041748047, "learning_rate": 1.7519195414729105e-07, "loss": 10.047, "step": 81 }, { "epoch": 0.0, "grad_norm": 21.265478134155273, "learning_rate": 1.7735481777873908e-07, "loss": 10.1196, "step": 82 }, { "epoch": 0.0, "grad_norm": 22.20195770263672, "learning_rate": 1.7951768141018713e-07, "loss": 10.128, "step": 83 }, { "epoch": 0.0, "grad_norm": 22.19556999206543, "learning_rate": 1.8168054504163515e-07, "loss": 10.0567, "step": 84 }, { "epoch": 0.0, "grad_norm": 21.732242584228516, "learning_rate": 1.838434086730832e-07, "loss": 10.0163, "step": 85 }, { "epoch": 0.0, "grad_norm": 23.383371353149414, "learning_rate": 1.8600627230453123e-07, "loss": 10.041, "step": 86 }, { "epoch": 0.0, "grad_norm": 22.42377471923828, "learning_rate": 1.8816913593597928e-07, "loss": 10.0721, "step": 87 }, { "epoch": 0.0, "grad_norm": 25.397470474243164, "learning_rate": 1.903319995674273e-07, "loss": 9.935, "step": 88 }, { "epoch": 0.0, "grad_norm": 21.490507125854492, "learning_rate": 1.9249486319887535e-07, "loss": 9.9904, "step": 89 }, { "epoch": 0.0, "grad_norm": 20.521724700927734, "learning_rate": 1.9465772683032338e-07, "loss": 10.0082, "step": 90 }, { "epoch": 0.0, "grad_norm": 24.559785842895508, "learning_rate": 1.9682059046177143e-07, "loss": 10.0069, "step": 91 }, { "epoch": 0.0, "grad_norm": 21.89800262451172, "learning_rate": 1.9898345409321945e-07, "loss": 9.948, "step": 92 }, { "epoch": 0.0, "grad_norm": 22.330047607421875, "learning_rate": 2.011463177246675e-07, "loss": 9.9068, "step": 93 }, { "epoch": 0.0, "grad_norm": 22.413738250732422, "learning_rate": 2.0330918135611553e-07, "loss": 10.01, "step": 94 }, { "epoch": 0.0, "grad_norm": 22.858686447143555, "learning_rate": 2.0547204498756358e-07, "loss": 9.9522, "step": 95 }, { "epoch": 0.0, "grad_norm": 24.853361129760742, "learning_rate": 2.0763490861901158e-07, "loss": 9.8786, "step": 96 }, { "epoch": 0.0, "grad_norm": 22.937639236450195, "learning_rate": 2.0979777225045965e-07, "loss": 9.8495, "step": 97 }, { "epoch": 0.0, "grad_norm": 22.283105850219727, "learning_rate": 2.1196063588190765e-07, "loss": 9.9005, "step": 98 }, { "epoch": 0.0, "grad_norm": 22.61289405822754, "learning_rate": 2.1412349951335573e-07, "loss": 9.8826, "step": 99 }, { "epoch": 0.0, "grad_norm": 21.40757179260254, "learning_rate": 2.1628636314480373e-07, "loss": 10.0006, "step": 100 }, { "epoch": 0.0, "grad_norm": 20.426544189453125, "learning_rate": 2.184492267762518e-07, "loss": 10.0307, "step": 101 }, { "epoch": 0.0, "grad_norm": 22.071224212646484, "learning_rate": 2.206120904076998e-07, "loss": 9.9564, "step": 102 }, { "epoch": 0.0, "grad_norm": 23.58004379272461, "learning_rate": 2.2277495403914788e-07, "loss": 9.8982, "step": 103 }, { "epoch": 0.0, "grad_norm": 21.84090805053711, "learning_rate": 2.2493781767059588e-07, "loss": 9.9122, "step": 104 }, { "epoch": 0.0, "grad_norm": 23.626684188842773, "learning_rate": 2.2710068130204395e-07, "loss": 9.881, "step": 105 }, { "epoch": 0.0, "grad_norm": 22.08587074279785, "learning_rate": 2.2926354493349195e-07, "loss": 9.8684, "step": 106 }, { "epoch": 0.0, "grad_norm": 23.338193893432617, "learning_rate": 2.3142640856494003e-07, "loss": 9.801, "step": 107 }, { "epoch": 0.0, "grad_norm": 21.316844940185547, "learning_rate": 2.3358927219638803e-07, "loss": 9.8854, "step": 108 }, { "epoch": 0.0, "grad_norm": 21.240270614624023, "learning_rate": 2.357521358278361e-07, "loss": 9.8899, "step": 109 }, { "epoch": 0.0, "grad_norm": 22.199283599853516, "learning_rate": 2.379149994592841e-07, "loss": 9.85, "step": 110 }, { "epoch": 0.0, "grad_norm": 23.941295623779297, "learning_rate": 2.4007786309073215e-07, "loss": 9.7582, "step": 111 }, { "epoch": 0.0, "grad_norm": 20.944568634033203, "learning_rate": 2.422407267221802e-07, "loss": 9.874, "step": 112 }, { "epoch": 0.0, "grad_norm": 20.994827270507812, "learning_rate": 2.4440359035362825e-07, "loss": 9.8386, "step": 113 }, { "epoch": 0.0, "grad_norm": 21.710002899169922, "learning_rate": 2.4656645398507625e-07, "loss": 9.8324, "step": 114 }, { "epoch": 0.0, "grad_norm": 22.057842254638672, "learning_rate": 2.487293176165243e-07, "loss": 9.7831, "step": 115 }, { "epoch": 0.0, "grad_norm": 20.51582145690918, "learning_rate": 2.5089218124797235e-07, "loss": 9.9072, "step": 116 }, { "epoch": 0.0, "grad_norm": 21.51612091064453, "learning_rate": 2.530550448794204e-07, "loss": 9.8028, "step": 117 }, { "epoch": 0.0, "grad_norm": 21.31599235534668, "learning_rate": 2.552179085108684e-07, "loss": 9.7846, "step": 118 }, { "epoch": 0.0, "grad_norm": 19.85677719116211, "learning_rate": 2.5738077214231645e-07, "loss": 9.8139, "step": 119 }, { "epoch": 0.0, "grad_norm": 21.694122314453125, "learning_rate": 2.595436357737645e-07, "loss": 9.7479, "step": 120 }, { "epoch": 0.0, "grad_norm": 22.512771606445312, "learning_rate": 2.6170649940521255e-07, "loss": 9.7346, "step": 121 }, { "epoch": 0.0, "grad_norm": 20.306888580322266, "learning_rate": 2.6386936303666055e-07, "loss": 9.8561, "step": 122 }, { "epoch": 0.0, "grad_norm": 22.075054168701172, "learning_rate": 2.660322266681086e-07, "loss": 9.7285, "step": 123 }, { "epoch": 0.0, "grad_norm": 21.737171173095703, "learning_rate": 2.6819509029955665e-07, "loss": 9.7774, "step": 124 }, { "epoch": 0.0, "grad_norm": 19.997831344604492, "learning_rate": 2.703579539310047e-07, "loss": 9.7671, "step": 125 }, { "epoch": 0.0, "grad_norm": 20.88042449951172, "learning_rate": 2.725208175624527e-07, "loss": 9.753, "step": 126 }, { "epoch": 0.0, "grad_norm": 20.499643325805664, "learning_rate": 2.7468368119390075e-07, "loss": 9.7535, "step": 127 }, { "epoch": 0.0, "grad_norm": 22.01158905029297, "learning_rate": 2.768465448253488e-07, "loss": 9.705, "step": 128 }, { "epoch": 0.0, "grad_norm": 20.9617919921875, "learning_rate": 2.7900940845679685e-07, "loss": 9.7569, "step": 129 }, { "epoch": 0.0, "grad_norm": 21.100675582885742, "learning_rate": 2.8117227208824485e-07, "loss": 9.634, "step": 130 }, { "epoch": 0.0, "grad_norm": 19.005966186523438, "learning_rate": 2.833351357196929e-07, "loss": 9.7571, "step": 131 }, { "epoch": 0.0, "grad_norm": 20.455400466918945, "learning_rate": 2.8549799935114095e-07, "loss": 9.6282, "step": 132 }, { "epoch": 0.0, "grad_norm": 25.059383392333984, "learning_rate": 2.87660862982589e-07, "loss": 9.4609, "step": 133 }, { "epoch": 0.0, "grad_norm": 22.465221405029297, "learning_rate": 2.89823726614037e-07, "loss": 9.6287, "step": 134 }, { "epoch": 0.0, "grad_norm": 22.215463638305664, "learning_rate": 2.9198659024548505e-07, "loss": 9.5868, "step": 135 }, { "epoch": 0.0, "grad_norm": 22.238353729248047, "learning_rate": 2.941494538769331e-07, "loss": 9.474, "step": 136 }, { "epoch": 0.0, "grad_norm": 21.732637405395508, "learning_rate": 2.9631231750838115e-07, "loss": 9.5695, "step": 137 }, { "epoch": 0.0, "grad_norm": 21.525062561035156, "learning_rate": 2.9847518113982915e-07, "loss": 9.488, "step": 138 }, { "epoch": 0.0, "grad_norm": 20.780780792236328, "learning_rate": 3.006380447712772e-07, "loss": 9.5372, "step": 139 }, { "epoch": 0.0, "grad_norm": 21.818195343017578, "learning_rate": 3.0280090840272525e-07, "loss": 9.5393, "step": 140 }, { "epoch": 0.0, "grad_norm": 20.39167022705078, "learning_rate": 3.049637720341733e-07, "loss": 9.6094, "step": 141 }, { "epoch": 0.0, "grad_norm": 20.60177230834961, "learning_rate": 3.071266356656213e-07, "loss": 9.5329, "step": 142 }, { "epoch": 0.0, "grad_norm": 22.983524322509766, "learning_rate": 3.0928949929706935e-07, "loss": 9.3688, "step": 143 }, { "epoch": 0.0, "grad_norm": 21.172470092773438, "learning_rate": 3.114523629285174e-07, "loss": 9.5586, "step": 144 }, { "epoch": 0.0, "grad_norm": 20.044313430786133, "learning_rate": 3.136152265599654e-07, "loss": 9.5138, "step": 145 }, { "epoch": 0.0, "grad_norm": 20.184913635253906, "learning_rate": 3.157780901914135e-07, "loss": 9.4865, "step": 146 }, { "epoch": 0.0, "grad_norm": 20.85598373413086, "learning_rate": 3.179409538228615e-07, "loss": 9.5409, "step": 147 }, { "epoch": 0.0, "grad_norm": 20.213144302368164, "learning_rate": 3.2010381745430955e-07, "loss": 9.5787, "step": 148 }, { "epoch": 0.0, "grad_norm": 21.550270080566406, "learning_rate": 3.2226668108575755e-07, "loss": 9.56, "step": 149 }, { "epoch": 0.0, "grad_norm": 21.05380630493164, "learning_rate": 3.2442954471720565e-07, "loss": 9.5087, "step": 150 }, { "epoch": 0.0, "grad_norm": 20.972829818725586, "learning_rate": 3.2659240834865365e-07, "loss": 9.4174, "step": 151 }, { "epoch": 0.0, "grad_norm": 21.933712005615234, "learning_rate": 3.287552719801017e-07, "loss": 9.3986, "step": 152 }, { "epoch": 0.0, "grad_norm": 18.771142959594727, "learning_rate": 3.309181356115497e-07, "loss": 9.5336, "step": 153 }, { "epoch": 0.0, "grad_norm": 20.886577606201172, "learning_rate": 3.330809992429978e-07, "loss": 9.3942, "step": 154 }, { "epoch": 0.0, "grad_norm": 20.518022537231445, "learning_rate": 3.352438628744458e-07, "loss": 9.2985, "step": 155 }, { "epoch": 0.0, "grad_norm": 22.33663558959961, "learning_rate": 3.3740672650589385e-07, "loss": 9.2869, "step": 156 }, { "epoch": 0.0, "grad_norm": 20.49265480041504, "learning_rate": 3.3956959013734185e-07, "loss": 9.3833, "step": 157 }, { "epoch": 0.0, "grad_norm": 21.164505004882812, "learning_rate": 3.4173245376878995e-07, "loss": 9.1998, "step": 158 }, { "epoch": 0.0, "grad_norm": 20.798309326171875, "learning_rate": 3.4389531740023795e-07, "loss": 9.353, "step": 159 }, { "epoch": 0.0, "grad_norm": 20.112377166748047, "learning_rate": 3.46058181031686e-07, "loss": 9.3845, "step": 160 }, { "epoch": 0.0, "grad_norm": 20.79449462890625, "learning_rate": 3.48221044663134e-07, "loss": 9.3402, "step": 161 }, { "epoch": 0.0, "grad_norm": 19.995769500732422, "learning_rate": 3.503839082945821e-07, "loss": 9.2959, "step": 162 }, { "epoch": 0.0, "grad_norm": 20.100221633911133, "learning_rate": 3.525467719260301e-07, "loss": 9.2478, "step": 163 }, { "epoch": 0.0, "grad_norm": 19.777793884277344, "learning_rate": 3.5470963555747815e-07, "loss": 9.2395, "step": 164 }, { "epoch": 0.0, "grad_norm": 18.50743293762207, "learning_rate": 3.5687249918892615e-07, "loss": 9.4214, "step": 165 }, { "epoch": 0.0, "grad_norm": 20.54340362548828, "learning_rate": 3.5903536282037425e-07, "loss": 9.3107, "step": 166 }, { "epoch": 0.0, "grad_norm": 18.89211654663086, "learning_rate": 3.6119822645182225e-07, "loss": 9.3373, "step": 167 }, { "epoch": 0.0, "grad_norm": 19.14583396911621, "learning_rate": 3.633610900832703e-07, "loss": 9.3352, "step": 168 }, { "epoch": 0.0, "grad_norm": 18.429685592651367, "learning_rate": 3.655239537147183e-07, "loss": 9.2633, "step": 169 }, { "epoch": 0.0, "grad_norm": 19.37230682373047, "learning_rate": 3.676868173461664e-07, "loss": 9.149, "step": 170 }, { "epoch": 0.0, "grad_norm": 20.008216857910156, "learning_rate": 3.698496809776144e-07, "loss": 9.1327, "step": 171 }, { "epoch": 0.0, "grad_norm": 19.025781631469727, "learning_rate": 3.7201254460906245e-07, "loss": 9.2263, "step": 172 }, { "epoch": 0.0, "grad_norm": 19.082801818847656, "learning_rate": 3.7417540824051045e-07, "loss": 9.1783, "step": 173 }, { "epoch": 0.0, "grad_norm": 19.174095153808594, "learning_rate": 3.7633827187195855e-07, "loss": 9.1591, "step": 174 }, { "epoch": 0.0, "grad_norm": 17.991397857666016, "learning_rate": 3.7850113550340655e-07, "loss": 9.2117, "step": 175 }, { "epoch": 0.0, "grad_norm": 17.474328994750977, "learning_rate": 3.806639991348546e-07, "loss": 9.2118, "step": 176 }, { "epoch": 0.0, "grad_norm": 18.76927375793457, "learning_rate": 3.828268627663026e-07, "loss": 9.1744, "step": 177 }, { "epoch": 0.0, "grad_norm": 19.721216201782227, "learning_rate": 3.849897263977507e-07, "loss": 8.9782, "step": 178 }, { "epoch": 0.0, "grad_norm": 17.717561721801758, "learning_rate": 3.871525900291987e-07, "loss": 9.1306, "step": 179 }, { "epoch": 0.0, "grad_norm": 19.2369441986084, "learning_rate": 3.8931545366064675e-07, "loss": 9.0526, "step": 180 }, { "epoch": 0.0, "grad_norm": 18.5240478515625, "learning_rate": 3.9147831729209475e-07, "loss": 9.1335, "step": 181 }, { "epoch": 0.0, "grad_norm": 19.23812484741211, "learning_rate": 3.9364118092354285e-07, "loss": 8.9935, "step": 182 }, { "epoch": 0.0, "grad_norm": 19.843547821044922, "learning_rate": 3.9580404455499085e-07, "loss": 8.9755, "step": 183 }, { "epoch": 0.0, "grad_norm": 17.178321838378906, "learning_rate": 3.979669081864389e-07, "loss": 9.2329, "step": 184 }, { "epoch": 0.0, "grad_norm": 18.378787994384766, "learning_rate": 4.001297718178869e-07, "loss": 9.1155, "step": 185 }, { "epoch": 0.0, "grad_norm": 18.019943237304688, "learning_rate": 4.02292635449335e-07, "loss": 8.9902, "step": 186 }, { "epoch": 0.0, "grad_norm": 19.29707908630371, "learning_rate": 4.04455499080783e-07, "loss": 8.9558, "step": 187 }, { "epoch": 0.0, "grad_norm": 18.585241317749023, "learning_rate": 4.0661836271223105e-07, "loss": 8.9622, "step": 188 }, { "epoch": 0.0, "grad_norm": 18.621723175048828, "learning_rate": 4.0878122634367905e-07, "loss": 8.9057, "step": 189 }, { "epoch": 0.0, "grad_norm": 17.660737991333008, "learning_rate": 4.1094408997512715e-07, "loss": 9.0921, "step": 190 }, { "epoch": 0.0, "grad_norm": 18.0108585357666, "learning_rate": 4.1310695360657515e-07, "loss": 8.9613, "step": 191 }, { "epoch": 0.0, "grad_norm": 17.698707580566406, "learning_rate": 4.1526981723802315e-07, "loss": 9.0141, "step": 192 }, { "epoch": 0.0, "grad_norm": 17.216506958007812, "learning_rate": 4.174326808694712e-07, "loss": 9.0464, "step": 193 }, { "epoch": 0.0, "grad_norm": 17.831798553466797, "learning_rate": 4.195955445009193e-07, "loss": 8.9569, "step": 194 }, { "epoch": 0.0, "grad_norm": 17.578813552856445, "learning_rate": 4.217584081323673e-07, "loss": 9.0, "step": 195 }, { "epoch": 0.0, "grad_norm": 17.516889572143555, "learning_rate": 4.239212717638153e-07, "loss": 8.9102, "step": 196 }, { "epoch": 0.0, "grad_norm": 17.43419647216797, "learning_rate": 4.2608413539526335e-07, "loss": 9.0304, "step": 197 }, { "epoch": 0.0, "grad_norm": 17.844459533691406, "learning_rate": 4.2824699902671145e-07, "loss": 8.8186, "step": 198 }, { "epoch": 0.0, "grad_norm": 18.10988426208496, "learning_rate": 4.3040986265815945e-07, "loss": 8.8025, "step": 199 }, { "epoch": 0.0, "grad_norm": 18.25366973876953, "learning_rate": 4.3257272628960745e-07, "loss": 8.7862, "step": 200 }, { "epoch": 0.0, "grad_norm": 18.213245391845703, "learning_rate": 4.347355899210555e-07, "loss": 8.6898, "step": 201 }, { "epoch": 0.0, "grad_norm": 17.2493839263916, "learning_rate": 4.368984535525036e-07, "loss": 8.7093, "step": 202 }, { "epoch": 0.0, "grad_norm": 15.825667381286621, "learning_rate": 4.390613171839516e-07, "loss": 8.9152, "step": 203 }, { "epoch": 0.0, "grad_norm": 16.46337127685547, "learning_rate": 4.412241808153996e-07, "loss": 8.9586, "step": 204 }, { "epoch": 0.0, "grad_norm": 16.125751495361328, "learning_rate": 4.4338704444684765e-07, "loss": 8.8634, "step": 205 }, { "epoch": 0.0, "grad_norm": 17.71835708618164, "learning_rate": 4.4554990807829575e-07, "loss": 8.7144, "step": 206 }, { "epoch": 0.0, "grad_norm": 16.920223236083984, "learning_rate": 4.4771277170974375e-07, "loss": 8.7138, "step": 207 }, { "epoch": 0.0, "grad_norm": 16.775911331176758, "learning_rate": 4.4987563534119175e-07, "loss": 8.8212, "step": 208 }, { "epoch": 0.0, "grad_norm": 17.356964111328125, "learning_rate": 4.520384989726398e-07, "loss": 8.6679, "step": 209 }, { "epoch": 0.0, "grad_norm": 16.754121780395508, "learning_rate": 4.542013626040879e-07, "loss": 8.6426, "step": 210 }, { "epoch": 0.0, "grad_norm": 17.262462615966797, "learning_rate": 4.563642262355359e-07, "loss": 8.5455, "step": 211 }, { "epoch": 0.0, "grad_norm": 16.422042846679688, "learning_rate": 4.585270898669839e-07, "loss": 8.7141, "step": 212 }, { "epoch": 0.0, "grad_norm": 17.98436164855957, "learning_rate": 4.6068995349843195e-07, "loss": 8.4206, "step": 213 }, { "epoch": 0.0, "grad_norm": 16.195453643798828, "learning_rate": 4.6285281712988006e-07, "loss": 8.7182, "step": 214 }, { "epoch": 0.0, "grad_norm": 15.409658432006836, "learning_rate": 4.6501568076132805e-07, "loss": 8.7277, "step": 215 }, { "epoch": 0.0, "grad_norm": 15.76379108428955, "learning_rate": 4.6717854439277605e-07, "loss": 8.6458, "step": 216 }, { "epoch": 0.0, "grad_norm": 16.30803680419922, "learning_rate": 4.693414080242241e-07, "loss": 8.6802, "step": 217 }, { "epoch": 0.0, "grad_norm": 15.894920349121094, "learning_rate": 4.715042716556722e-07, "loss": 8.5435, "step": 218 }, { "epoch": 0.0, "grad_norm": 15.94607162475586, "learning_rate": 4.736671352871202e-07, "loss": 8.525, "step": 219 }, { "epoch": 0.0, "grad_norm": 15.694913864135742, "learning_rate": 4.758299989185682e-07, "loss": 8.4709, "step": 220 }, { "epoch": 0.0, "grad_norm": 15.887449264526367, "learning_rate": 4.779928625500163e-07, "loss": 8.5416, "step": 221 }, { "epoch": 0.0, "grad_norm": 16.090248107910156, "learning_rate": 4.801557261814643e-07, "loss": 8.4845, "step": 222 }, { "epoch": 0.0, "grad_norm": 14.405329704284668, "learning_rate": 4.823185898129124e-07, "loss": 8.7136, "step": 223 }, { "epoch": 0.0, "grad_norm": 16.50909996032715, "learning_rate": 4.844814534443604e-07, "loss": 8.4042, "step": 224 }, { "epoch": 0.0, "grad_norm": 14.649124145507812, "learning_rate": 4.866443170758083e-07, "loss": 8.6573, "step": 225 }, { "epoch": 0.0, "grad_norm": 15.930281639099121, "learning_rate": 4.888071807072565e-07, "loss": 8.5232, "step": 226 }, { "epoch": 0.0, "grad_norm": 14.742424964904785, "learning_rate": 4.909700443387045e-07, "loss": 8.6137, "step": 227 }, { "epoch": 0.0, "grad_norm": 15.779716491699219, "learning_rate": 4.931329079701525e-07, "loss": 8.3969, "step": 228 }, { "epoch": 0.0, "grad_norm": 15.137015342712402, "learning_rate": 4.952957716016006e-07, "loss": 8.3644, "step": 229 }, { "epoch": 0.0, "grad_norm": 15.633292198181152, "learning_rate": 4.974586352330486e-07, "loss": 8.3004, "step": 230 }, { "epoch": 0.0, "grad_norm": 14.073404312133789, "learning_rate": 4.996214988644967e-07, "loss": 8.4799, "step": 231 }, { "epoch": 0.0, "grad_norm": 14.884912490844727, "learning_rate": 5.017843624959447e-07, "loss": 8.5605, "step": 232 }, { "epoch": 0.0, "grad_norm": 14.509788513183594, "learning_rate": 5.039472261273926e-07, "loss": 8.3312, "step": 233 }, { "epoch": 0.0, "grad_norm": 14.348807334899902, "learning_rate": 5.061100897588408e-07, "loss": 8.5087, "step": 234 }, { "epoch": 0.0, "grad_norm": 13.893919944763184, "learning_rate": 5.082729533902888e-07, "loss": 8.4527, "step": 235 }, { "epoch": 0.0, "grad_norm": 15.584893226623535, "learning_rate": 5.104358170217368e-07, "loss": 8.2623, "step": 236 }, { "epoch": 0.0, "grad_norm": 13.869586944580078, "learning_rate": 5.125986806531849e-07, "loss": 8.5237, "step": 237 }, { "epoch": 0.0, "grad_norm": 13.353141784667969, "learning_rate": 5.147615442846329e-07, "loss": 8.576, "step": 238 }, { "epoch": 0.0, "grad_norm": 13.953998565673828, "learning_rate": 5.16924407916081e-07, "loss": 8.3315, "step": 239 }, { "epoch": 0.0, "grad_norm": 13.56255054473877, "learning_rate": 5.19087271547529e-07, "loss": 8.4652, "step": 240 }, { "epoch": 0.0, "grad_norm": 14.039337158203125, "learning_rate": 5.21250135178977e-07, "loss": 8.3957, "step": 241 }, { "epoch": 0.0, "grad_norm": 13.71146011352539, "learning_rate": 5.234129988104251e-07, "loss": 8.3819, "step": 242 }, { "epoch": 0.0, "grad_norm": 13.897501945495605, "learning_rate": 5.25575862441873e-07, "loss": 8.5495, "step": 243 }, { "epoch": 0.0, "grad_norm": 12.7084321975708, "learning_rate": 5.277387260733211e-07, "loss": 8.5414, "step": 244 }, { "epoch": 0.0, "grad_norm": 14.301532745361328, "learning_rate": 5.299015897047692e-07, "loss": 8.2515, "step": 245 }, { "epoch": 0.0, "grad_norm": 13.409151077270508, "learning_rate": 5.320644533362172e-07, "loss": 8.3723, "step": 246 }, { "epoch": 0.0, "grad_norm": 13.518638610839844, "learning_rate": 5.342273169676653e-07, "loss": 8.4365, "step": 247 }, { "epoch": 0.0, "grad_norm": 13.851225852966309, "learning_rate": 5.363901805991133e-07, "loss": 8.1689, "step": 248 }, { "epoch": 0.0, "grad_norm": 14.032099723815918, "learning_rate": 5.385530442305612e-07, "loss": 8.1983, "step": 249 }, { "epoch": 0.0, "grad_norm": 12.771573066711426, "learning_rate": 5.407159078620094e-07, "loss": 8.465, "step": 250 }, { "epoch": 0.0, "grad_norm": 13.015631675720215, "learning_rate": 5.428787714934574e-07, "loss": 8.2738, "step": 251 }, { "epoch": 0.0, "grad_norm": 13.184597969055176, "learning_rate": 5.450416351249054e-07, "loss": 8.2136, "step": 252 }, { "epoch": 0.0, "grad_norm": 13.759464263916016, "learning_rate": 5.472044987563535e-07, "loss": 8.1406, "step": 253 }, { "epoch": 0.0, "grad_norm": 11.923513412475586, "learning_rate": 5.493673623878015e-07, "loss": 8.4492, "step": 254 }, { "epoch": 0.0, "grad_norm": 12.263082504272461, "learning_rate": 5.515302260192496e-07, "loss": 8.3709, "step": 255 }, { "epoch": 0.0, "grad_norm": 12.93655014038086, "learning_rate": 5.536930896506976e-07, "loss": 8.3236, "step": 256 }, { "epoch": 0.0, "grad_norm": 11.760838508605957, "learning_rate": 5.558559532821455e-07, "loss": 8.5827, "step": 257 }, { "epoch": 0.0, "grad_norm": 13.790403366088867, "learning_rate": 5.580188169135937e-07, "loss": 8.1158, "step": 258 }, { "epoch": 0.0, "grad_norm": 14.041502952575684, "learning_rate": 5.601816805450417e-07, "loss": 7.9108, "step": 259 }, { "epoch": 0.0, "grad_norm": 14.310694694519043, "learning_rate": 5.623445441764897e-07, "loss": 7.8587, "step": 260 }, { "epoch": 0.0, "grad_norm": 12.704385757446289, "learning_rate": 5.645074078079378e-07, "loss": 8.2112, "step": 261 }, { "epoch": 0.0, "grad_norm": 12.102681159973145, "learning_rate": 5.666702714393858e-07, "loss": 8.3694, "step": 262 }, { "epoch": 0.0, "grad_norm": 12.198502540588379, "learning_rate": 5.688331350708339e-07, "loss": 8.3641, "step": 263 }, { "epoch": 0.0, "grad_norm": 12.08797836303711, "learning_rate": 5.709959987022819e-07, "loss": 8.4695, "step": 264 }, { "epoch": 0.0, "grad_norm": 12.460247993469238, "learning_rate": 5.731588623337298e-07, "loss": 8.0314, "step": 265 }, { "epoch": 0.0, "grad_norm": 12.111571311950684, "learning_rate": 5.75321725965178e-07, "loss": 8.3143, "step": 266 }, { "epoch": 0.0, "grad_norm": 11.690836906433105, "learning_rate": 5.77484589596626e-07, "loss": 8.4665, "step": 267 }, { "epoch": 0.0, "grad_norm": 12.126970291137695, "learning_rate": 5.79647453228074e-07, "loss": 8.247, "step": 268 }, { "epoch": 0.0, "grad_norm": 11.778799057006836, "learning_rate": 5.81810316859522e-07, "loss": 8.4058, "step": 269 }, { "epoch": 0.0, "grad_norm": 13.055191040039062, "learning_rate": 5.839731804909701e-07, "loss": 7.9021, "step": 270 }, { "epoch": 0.0, "grad_norm": 13.8544282913208, "learning_rate": 5.861360441224182e-07, "loss": 7.7789, "step": 271 }, { "epoch": 0.0, "grad_norm": 12.667625427246094, "learning_rate": 5.882989077538662e-07, "loss": 7.9818, "step": 272 }, { "epoch": 0.0, "grad_norm": 11.690534591674805, "learning_rate": 5.904617713853141e-07, "loss": 8.1623, "step": 273 }, { "epoch": 0.0, "grad_norm": 12.096883773803711, "learning_rate": 5.926246350167623e-07, "loss": 8.0415, "step": 274 }, { "epoch": 0.0, "grad_norm": 11.830472946166992, "learning_rate": 5.947874986482103e-07, "loss": 8.2239, "step": 275 }, { "epoch": 0.0, "grad_norm": 12.189642906188965, "learning_rate": 5.969503622796583e-07, "loss": 8.0966, "step": 276 }, { "epoch": 0.0, "grad_norm": 12.442920684814453, "learning_rate": 5.991132259111064e-07, "loss": 7.8693, "step": 277 }, { "epoch": 0.0, "grad_norm": 13.025102615356445, "learning_rate": 6.012760895425544e-07, "loss": 7.8591, "step": 278 }, { "epoch": 0.0, "grad_norm": 13.674722671508789, "learning_rate": 6.034389531740025e-07, "loss": 7.7477, "step": 279 }, { "epoch": 0.0, "grad_norm": 12.335646629333496, "learning_rate": 6.056018168054505e-07, "loss": 7.9576, "step": 280 }, { "epoch": 0.0, "grad_norm": 12.493202209472656, "learning_rate": 6.077646804368984e-07, "loss": 7.9028, "step": 281 }, { "epoch": 0.0, "grad_norm": 11.555167198181152, "learning_rate": 6.099275440683466e-07, "loss": 8.0901, "step": 282 }, { "epoch": 0.0, "grad_norm": 11.246159553527832, "learning_rate": 6.120904076997946e-07, "loss": 8.0477, "step": 283 }, { "epoch": 0.0, "grad_norm": 11.631522178649902, "learning_rate": 6.142532713312426e-07, "loss": 8.0312, "step": 284 }, { "epoch": 0.0, "grad_norm": 11.416245460510254, "learning_rate": 6.164161349626907e-07, "loss": 8.2771, "step": 285 }, { "epoch": 0.0, "grad_norm": 13.075343132019043, "learning_rate": 6.185789985941387e-07, "loss": 7.6799, "step": 286 }, { "epoch": 0.0, "grad_norm": 10.881826400756836, "learning_rate": 6.207418622255868e-07, "loss": 8.5188, "step": 287 }, { "epoch": 0.0, "grad_norm": 12.972158432006836, "learning_rate": 6.229047258570348e-07, "loss": 7.6503, "step": 288 }, { "epoch": 0.0, "grad_norm": 11.437459945678711, "learning_rate": 6.250675894884827e-07, "loss": 8.0285, "step": 289 }, { "epoch": 0.0, "grad_norm": 10.65837574005127, "learning_rate": 6.272304531199308e-07, "loss": 8.3295, "step": 290 }, { "epoch": 0.0, "grad_norm": 11.358356475830078, "learning_rate": 6.293933167513789e-07, "loss": 8.0153, "step": 291 }, { "epoch": 0.0, "grad_norm": 11.745759963989258, "learning_rate": 6.31556180382827e-07, "loss": 7.8874, "step": 292 }, { "epoch": 0.0, "grad_norm": 10.646491050720215, "learning_rate": 6.33719044014275e-07, "loss": 8.4099, "step": 293 }, { "epoch": 0.0, "grad_norm": 12.138163566589355, "learning_rate": 6.35881907645723e-07, "loss": 7.7753, "step": 294 }, { "epoch": 0.0, "grad_norm": 11.850903511047363, "learning_rate": 6.380447712771711e-07, "loss": 7.7876, "step": 295 }, { "epoch": 0.0, "grad_norm": 10.99194622039795, "learning_rate": 6.402076349086191e-07, "loss": 8.0118, "step": 296 }, { "epoch": 0.0, "grad_norm": 12.427305221557617, "learning_rate": 6.42370498540067e-07, "loss": 7.6687, "step": 297 }, { "epoch": 0.0, "grad_norm": 11.799513816833496, "learning_rate": 6.445333621715151e-07, "loss": 7.9823, "step": 298 }, { "epoch": 0.0, "grad_norm": 11.773194313049316, "learning_rate": 6.466962258029632e-07, "loss": 8.0394, "step": 299 }, { "epoch": 0.0, "grad_norm": 10.654277801513672, "learning_rate": 6.488590894344113e-07, "loss": 7.9843, "step": 300 }, { "epoch": 0.0, "grad_norm": 11.642991065979004, "learning_rate": 6.510219530658593e-07, "loss": 7.7073, "step": 301 }, { "epoch": 0.0, "grad_norm": 10.364863395690918, "learning_rate": 6.531848166973073e-07, "loss": 8.1885, "step": 302 }, { "epoch": 0.0, "grad_norm": 10.888786315917969, "learning_rate": 6.553476803287554e-07, "loss": 7.8123, "step": 303 }, { "epoch": 0.0, "grad_norm": 11.012328147888184, "learning_rate": 6.575105439602034e-07, "loss": 7.8397, "step": 304 }, { "epoch": 0.0, "grad_norm": 11.597561836242676, "learning_rate": 6.596734075916514e-07, "loss": 7.7117, "step": 305 }, { "epoch": 0.0, "grad_norm": 11.123008728027344, "learning_rate": 6.618362712230994e-07, "loss": 7.8241, "step": 306 }, { "epoch": 0.0, "grad_norm": 10.491878509521484, "learning_rate": 6.639991348545475e-07, "loss": 8.0875, "step": 307 }, { "epoch": 0.0, "grad_norm": 11.473376274108887, "learning_rate": 6.661619984859956e-07, "loss": 7.6589, "step": 308 }, { "epoch": 0.0, "grad_norm": 10.99420166015625, "learning_rate": 6.683248621174436e-07, "loss": 8.0695, "step": 309 }, { "epoch": 0.0, "grad_norm": 11.36573600769043, "learning_rate": 6.704877257488916e-07, "loss": 7.7531, "step": 310 }, { "epoch": 0.0, "grad_norm": 10.904829025268555, "learning_rate": 6.726505893803397e-07, "loss": 7.9148, "step": 311 }, { "epoch": 0.0, "grad_norm": 11.962646484375, "learning_rate": 6.748134530117877e-07, "loss": 7.3609, "step": 312 }, { "epoch": 0.0, "grad_norm": 10.643083572387695, "learning_rate": 6.769763166432357e-07, "loss": 7.8667, "step": 313 }, { "epoch": 0.0, "grad_norm": 10.740886688232422, "learning_rate": 6.791391802746837e-07, "loss": 7.6612, "step": 314 }, { "epoch": 0.0, "grad_norm": 9.868824005126953, "learning_rate": 6.813020439061318e-07, "loss": 8.0413, "step": 315 }, { "epoch": 0.0, "grad_norm": 11.34249496459961, "learning_rate": 6.834649075375799e-07, "loss": 7.6437, "step": 316 }, { "epoch": 0.0, "grad_norm": 10.849279403686523, "learning_rate": 6.856277711690279e-07, "loss": 7.7523, "step": 317 }, { "epoch": 0.0, "grad_norm": 10.859596252441406, "learning_rate": 6.877906348004759e-07, "loss": 7.772, "step": 318 }, { "epoch": 0.0, "grad_norm": 11.54712963104248, "learning_rate": 6.89953498431924e-07, "loss": 7.655, "step": 319 }, { "epoch": 0.0, "grad_norm": 10.296342849731445, "learning_rate": 6.92116362063372e-07, "loss": 8.2079, "step": 320 }, { "epoch": 0.0, "grad_norm": 10.39718246459961, "learning_rate": 6.9427922569482e-07, "loss": 7.932, "step": 321 }, { "epoch": 0.0, "grad_norm": 10.831357955932617, "learning_rate": 6.96442089326268e-07, "loss": 7.9506, "step": 322 }, { "epoch": 0.0, "grad_norm": 10.79401969909668, "learning_rate": 6.98604952957716e-07, "loss": 7.9036, "step": 323 }, { "epoch": 0.0, "grad_norm": 9.841075897216797, "learning_rate": 7.007678165891642e-07, "loss": 7.9223, "step": 324 }, { "epoch": 0.0, "grad_norm": 10.132159233093262, "learning_rate": 7.029306802206122e-07, "loss": 8.3029, "step": 325 }, { "epoch": 0.0, "grad_norm": 11.263625144958496, "learning_rate": 7.050935438520602e-07, "loss": 7.5813, "step": 326 }, { "epoch": 0.0, "grad_norm": 11.008390426635742, "learning_rate": 7.072564074835083e-07, "loss": 7.8747, "step": 327 }, { "epoch": 0.0, "grad_norm": 10.98115348815918, "learning_rate": 7.094192711149563e-07, "loss": 7.8333, "step": 328 }, { "epoch": 0.0, "grad_norm": 10.181358337402344, "learning_rate": 7.115821347464043e-07, "loss": 8.0514, "step": 329 }, { "epoch": 0.0, "grad_norm": 11.423297882080078, "learning_rate": 7.137449983778523e-07, "loss": 7.5621, "step": 330 }, { "epoch": 0.0, "grad_norm": 10.719755172729492, "learning_rate": 7.159078620093004e-07, "loss": 7.5506, "step": 331 }, { "epoch": 0.0, "grad_norm": 11.125874519348145, "learning_rate": 7.180707256407485e-07, "loss": 7.8633, "step": 332 }, { "epoch": 0.0, "grad_norm": 10.65368938446045, "learning_rate": 7.202335892721965e-07, "loss": 7.722, "step": 333 }, { "epoch": 0.0, "grad_norm": 11.128124237060547, "learning_rate": 7.223964529036445e-07, "loss": 7.7247, "step": 334 }, { "epoch": 0.0, "grad_norm": 10.631607055664062, "learning_rate": 7.245593165350926e-07, "loss": 7.7929, "step": 335 }, { "epoch": 0.0, "grad_norm": 11.625543594360352, "learning_rate": 7.267221801665406e-07, "loss": 7.1176, "step": 336 }, { "epoch": 0.0, "grad_norm": 10.66912841796875, "learning_rate": 7.288850437979886e-07, "loss": 7.6093, "step": 337 }, { "epoch": 0.0, "grad_norm": 10.634684562683105, "learning_rate": 7.310479074294366e-07, "loss": 7.883, "step": 338 }, { "epoch": 0.0, "grad_norm": 10.297965049743652, "learning_rate": 7.332107710608847e-07, "loss": 7.973, "step": 339 }, { "epoch": 0.0, "grad_norm": 10.935160636901855, "learning_rate": 7.353736346923328e-07, "loss": 7.6289, "step": 340 }, { "epoch": 0.0, "grad_norm": 11.573995590209961, "learning_rate": 7.375364983237808e-07, "loss": 7.6922, "step": 341 }, { "epoch": 0.0, "grad_norm": 10.57896900177002, "learning_rate": 7.396993619552288e-07, "loss": 7.9293, "step": 342 }, { "epoch": 0.0, "grad_norm": 9.882609367370605, "learning_rate": 7.418622255866769e-07, "loss": 7.9495, "step": 343 }, { "epoch": 0.0, "grad_norm": 10.326014518737793, "learning_rate": 7.440250892181249e-07, "loss": 8.0347, "step": 344 }, { "epoch": 0.0, "grad_norm": 10.151006698608398, "learning_rate": 7.461879528495729e-07, "loss": 8.0018, "step": 345 }, { "epoch": 0.0, "grad_norm": 11.150446891784668, "learning_rate": 7.483508164810209e-07, "loss": 7.363, "step": 346 }, { "epoch": 0.0, "grad_norm": 10.627467155456543, "learning_rate": 7.50513680112469e-07, "loss": 7.6559, "step": 347 }, { "epoch": 0.0, "grad_norm": 11.966575622558594, "learning_rate": 7.526765437439171e-07, "loss": 7.1753, "step": 348 }, { "epoch": 0.0, "grad_norm": 10.45740032196045, "learning_rate": 7.548394073753651e-07, "loss": 7.5735, "step": 349 }, { "epoch": 0.0, "grad_norm": 11.003700256347656, "learning_rate": 7.570022710068131e-07, "loss": 7.6342, "step": 350 }, { "epoch": 0.0, "grad_norm": 11.47400188446045, "learning_rate": 7.591651346382612e-07, "loss": 7.1666, "step": 351 }, { "epoch": 0.0, "grad_norm": 9.925835609436035, "learning_rate": 7.613279982697092e-07, "loss": 7.7841, "step": 352 }, { "epoch": 0.0, "grad_norm": 11.226770401000977, "learning_rate": 7.634908619011572e-07, "loss": 7.3963, "step": 353 }, { "epoch": 0.0, "grad_norm": 10.058028221130371, "learning_rate": 7.656537255326052e-07, "loss": 8.119, "step": 354 }, { "epoch": 0.0, "grad_norm": 11.04355525970459, "learning_rate": 7.678165891640533e-07, "loss": 7.2775, "step": 355 }, { "epoch": 0.0, "grad_norm": 12.586406707763672, "learning_rate": 7.699794527955014e-07, "loss": 6.8553, "step": 356 }, { "epoch": 0.0, "grad_norm": 10.484878540039062, "learning_rate": 7.721423164269494e-07, "loss": 7.5092, "step": 357 }, { "epoch": 0.0, "grad_norm": 9.614654541015625, "learning_rate": 7.743051800583974e-07, "loss": 7.938, "step": 358 }, { "epoch": 0.0, "grad_norm": 10.653554916381836, "learning_rate": 7.764680436898455e-07, "loss": 7.6159, "step": 359 }, { "epoch": 0.0, "grad_norm": 10.69241714477539, "learning_rate": 7.786309073212935e-07, "loss": 7.3853, "step": 360 }, { "epoch": 0.0, "grad_norm": 10.507760047912598, "learning_rate": 7.807937709527415e-07, "loss": 7.549, "step": 361 }, { "epoch": 0.0, "grad_norm": 10.599308967590332, "learning_rate": 7.829566345841895e-07, "loss": 7.5176, "step": 362 }, { "epoch": 0.0, "grad_norm": 10.627382278442383, "learning_rate": 7.851194982156376e-07, "loss": 7.4846, "step": 363 }, { "epoch": 0.0, "grad_norm": 11.015400886535645, "learning_rate": 7.872823618470857e-07, "loss": 7.2554, "step": 364 }, { "epoch": 0.0, "grad_norm": 10.782411575317383, "learning_rate": 7.894452254785337e-07, "loss": 7.342, "step": 365 }, { "epoch": 0.0, "grad_norm": 10.187736511230469, "learning_rate": 7.916080891099817e-07, "loss": 8.0056, "step": 366 }, { "epoch": 0.0, "grad_norm": 9.969917297363281, "learning_rate": 7.937709527414298e-07, "loss": 7.8576, "step": 367 }, { "epoch": 0.0, "grad_norm": 10.166338920593262, "learning_rate": 7.959338163728778e-07, "loss": 7.8015, "step": 368 }, { "epoch": 0.0, "grad_norm": 10.43881607055664, "learning_rate": 7.980966800043258e-07, "loss": 7.6224, "step": 369 }, { "epoch": 0.0, "grad_norm": 10.97603702545166, "learning_rate": 8.002595436357738e-07, "loss": 7.3194, "step": 370 }, { "epoch": 0.0, "grad_norm": 10.11705493927002, "learning_rate": 8.024224072672219e-07, "loss": 7.8604, "step": 371 }, { "epoch": 0.0, "grad_norm": 10.917549133300781, "learning_rate": 8.0458527089867e-07, "loss": 7.5768, "step": 372 }, { "epoch": 0.0, "grad_norm": 11.064948081970215, "learning_rate": 8.06748134530118e-07, "loss": 7.4414, "step": 373 }, { "epoch": 0.0, "grad_norm": 11.227031707763672, "learning_rate": 8.08910998161566e-07, "loss": 7.4055, "step": 374 }, { "epoch": 0.0, "grad_norm": 10.64448070526123, "learning_rate": 8.110738617930141e-07, "loss": 7.4982, "step": 375 }, { "epoch": 0.0, "grad_norm": 10.707783699035645, "learning_rate": 8.132367254244621e-07, "loss": 7.4125, "step": 376 }, { "epoch": 0.0, "grad_norm": 11.578241348266602, "learning_rate": 8.1539958905591e-07, "loss": 7.0595, "step": 377 }, { "epoch": 0.0, "grad_norm": 11.386665344238281, "learning_rate": 8.175624526873581e-07, "loss": 7.4771, "step": 378 }, { "epoch": 0.0, "grad_norm": 11.280173301696777, "learning_rate": 8.197253163188062e-07, "loss": 7.1443, "step": 379 }, { "epoch": 0.0, "grad_norm": 11.583728790283203, "learning_rate": 8.218881799502543e-07, "loss": 7.1036, "step": 380 }, { "epoch": 0.0, "grad_norm": 10.849283218383789, "learning_rate": 8.240510435817023e-07, "loss": 7.4728, "step": 381 }, { "epoch": 0.0, "grad_norm": 11.23311996459961, "learning_rate": 8.262139072131503e-07, "loss": 7.0759, "step": 382 }, { "epoch": 0.0, "grad_norm": 10.710949897766113, "learning_rate": 8.283767708445984e-07, "loss": 7.6405, "step": 383 }, { "epoch": 0.0, "grad_norm": 10.911259651184082, "learning_rate": 8.305396344760463e-07, "loss": 7.4391, "step": 384 }, { "epoch": 0.0, "grad_norm": 10.443777084350586, "learning_rate": 8.327024981074944e-07, "loss": 7.2243, "step": 385 }, { "epoch": 0.01, "grad_norm": 12.056902885437012, "learning_rate": 8.348653617389424e-07, "loss": 6.9871, "step": 386 }, { "epoch": 0.01, "grad_norm": 10.853910446166992, "learning_rate": 8.370282253703905e-07, "loss": 7.1519, "step": 387 }, { "epoch": 0.01, "grad_norm": 10.784841537475586, "learning_rate": 8.391910890018386e-07, "loss": 7.2966, "step": 388 }, { "epoch": 0.01, "grad_norm": 9.898833274841309, "learning_rate": 8.413539526332866e-07, "loss": 7.6002, "step": 389 }, { "epoch": 0.01, "grad_norm": 10.563908576965332, "learning_rate": 8.435168162647346e-07, "loss": 7.3098, "step": 390 }, { "epoch": 0.01, "grad_norm": 11.77676773071289, "learning_rate": 8.456796798961827e-07, "loss": 7.0816, "step": 391 }, { "epoch": 0.01, "grad_norm": 10.88199234008789, "learning_rate": 8.478425435276306e-07, "loss": 7.2583, "step": 392 }, { "epoch": 0.01, "grad_norm": 10.56164264678955, "learning_rate": 8.500054071590787e-07, "loss": 7.3959, "step": 393 }, { "epoch": 0.01, "grad_norm": 10.710770606994629, "learning_rate": 8.521682707905267e-07, "loss": 7.3235, "step": 394 }, { "epoch": 0.01, "grad_norm": 10.756287574768066, "learning_rate": 8.543311344219748e-07, "loss": 7.5391, "step": 395 }, { "epoch": 0.01, "grad_norm": 11.113382339477539, "learning_rate": 8.564939980534229e-07, "loss": 7.5518, "step": 396 }, { "epoch": 0.01, "grad_norm": 10.816576957702637, "learning_rate": 8.586568616848709e-07, "loss": 7.4173, "step": 397 }, { "epoch": 0.01, "grad_norm": 11.06883716583252, "learning_rate": 8.608197253163189e-07, "loss": 7.9162, "step": 398 }, { "epoch": 0.01, "grad_norm": 10.735918998718262, "learning_rate": 8.62982588947767e-07, "loss": 7.3101, "step": 399 }, { "epoch": 0.01, "grad_norm": 11.247931480407715, "learning_rate": 8.651454525792149e-07, "loss": 7.2963, "step": 400 }, { "epoch": 0.01, "grad_norm": 11.560468673706055, "learning_rate": 8.67308316210663e-07, "loss": 7.504, "step": 401 }, { "epoch": 0.01, "grad_norm": 10.884642601013184, "learning_rate": 8.69471179842111e-07, "loss": 7.1955, "step": 402 }, { "epoch": 0.01, "grad_norm": 11.105306625366211, "learning_rate": 8.716340434735591e-07, "loss": 7.1519, "step": 403 }, { "epoch": 0.01, "grad_norm": 11.445807456970215, "learning_rate": 8.737969071050072e-07, "loss": 7.5859, "step": 404 }, { "epoch": 0.01, "grad_norm": 10.477038383483887, "learning_rate": 8.759597707364552e-07, "loss": 7.2276, "step": 405 }, { "epoch": 0.01, "grad_norm": 10.447543144226074, "learning_rate": 8.781226343679032e-07, "loss": 7.3241, "step": 406 }, { "epoch": 0.01, "grad_norm": 11.506038665771484, "learning_rate": 8.802854979993513e-07, "loss": 7.6396, "step": 407 }, { "epoch": 0.01, "grad_norm": 11.496040344238281, "learning_rate": 8.824483616307992e-07, "loss": 7.1402, "step": 408 }, { "epoch": 0.01, "grad_norm": 11.172258377075195, "learning_rate": 8.846112252622473e-07, "loss": 7.2891, "step": 409 }, { "epoch": 0.01, "grad_norm": 11.616873741149902, "learning_rate": 8.867740888936953e-07, "loss": 7.2702, "step": 410 }, { "epoch": 0.01, "grad_norm": 10.626029968261719, "learning_rate": 8.889369525251434e-07, "loss": 7.244, "step": 411 }, { "epoch": 0.01, "grad_norm": 10.876832008361816, "learning_rate": 8.910998161565915e-07, "loss": 7.0975, "step": 412 }, { "epoch": 0.01, "grad_norm": 10.622139930725098, "learning_rate": 8.932626797880395e-07, "loss": 7.2348, "step": 413 }, { "epoch": 0.01, "grad_norm": 11.86733627319336, "learning_rate": 8.954255434194875e-07, "loss": 7.0732, "step": 414 }, { "epoch": 0.01, "grad_norm": 10.715790748596191, "learning_rate": 8.975884070509356e-07, "loss": 7.701, "step": 415 }, { "epoch": 0.01, "grad_norm": 10.682657241821289, "learning_rate": 8.997512706823835e-07, "loss": 7.3417, "step": 416 }, { "epoch": 0.01, "grad_norm": 11.444178581237793, "learning_rate": 9.019141343138316e-07, "loss": 6.793, "step": 417 }, { "epoch": 0.01, "grad_norm": 10.69895076751709, "learning_rate": 9.040769979452796e-07, "loss": 7.1105, "step": 418 }, { "epoch": 0.01, "grad_norm": 11.906815528869629, "learning_rate": 9.062398615767277e-07, "loss": 7.2735, "step": 419 }, { "epoch": 0.01, "grad_norm": 11.236828804016113, "learning_rate": 9.084027252081758e-07, "loss": 7.0929, "step": 420 }, { "epoch": 0.01, "grad_norm": 10.785062789916992, "learning_rate": 9.105655888396238e-07, "loss": 7.3012, "step": 421 }, { "epoch": 0.01, "grad_norm": 11.812397003173828, "learning_rate": 9.127284524710718e-07, "loss": 7.1435, "step": 422 }, { "epoch": 0.01, "grad_norm": 10.965103149414062, "learning_rate": 9.148913161025199e-07, "loss": 6.9362, "step": 423 }, { "epoch": 0.01, "grad_norm": 10.985068321228027, "learning_rate": 9.170541797339678e-07, "loss": 7.2556, "step": 424 }, { "epoch": 0.01, "grad_norm": 11.177844047546387, "learning_rate": 9.192170433654159e-07, "loss": 7.113, "step": 425 }, { "epoch": 0.01, "grad_norm": 11.526851654052734, "learning_rate": 9.213799069968639e-07, "loss": 6.9423, "step": 426 }, { "epoch": 0.01, "grad_norm": 11.620122909545898, "learning_rate": 9.23542770628312e-07, "loss": 7.4857, "step": 427 }, { "epoch": 0.01, "grad_norm": 11.42298412322998, "learning_rate": 9.257056342597601e-07, "loss": 7.3159, "step": 428 }, { "epoch": 0.01, "grad_norm": 10.434772491455078, "learning_rate": 9.278684978912081e-07, "loss": 7.4255, "step": 429 }, { "epoch": 0.01, "grad_norm": 11.197028160095215, "learning_rate": 9.300313615226561e-07, "loss": 7.036, "step": 430 }, { "epoch": 0.01, "grad_norm": 11.237401962280273, "learning_rate": 9.321942251541042e-07, "loss": 7.1595, "step": 431 }, { "epoch": 0.01, "grad_norm": 10.678382873535156, "learning_rate": 9.343570887855521e-07, "loss": 7.4994, "step": 432 }, { "epoch": 0.01, "grad_norm": 10.999505996704102, "learning_rate": 9.365199524170002e-07, "loss": 7.2681, "step": 433 }, { "epoch": 0.01, "grad_norm": 11.453808784484863, "learning_rate": 9.386828160484482e-07, "loss": 6.8472, "step": 434 }, { "epoch": 0.01, "grad_norm": 10.893820762634277, "learning_rate": 9.408456796798963e-07, "loss": 7.3155, "step": 435 }, { "epoch": 0.01, "grad_norm": 12.139395713806152, "learning_rate": 9.430085433113444e-07, "loss": 7.0584, "step": 436 }, { "epoch": 0.01, "grad_norm": 10.643254280090332, "learning_rate": 9.451714069427924e-07, "loss": 7.2343, "step": 437 }, { "epoch": 0.01, "grad_norm": 11.424565315246582, "learning_rate": 9.473342705742404e-07, "loss": 7.2194, "step": 438 }, { "epoch": 0.01, "grad_norm": 11.012331008911133, "learning_rate": 9.494971342056885e-07, "loss": 7.3097, "step": 439 }, { "epoch": 0.01, "grad_norm": 11.059545516967773, "learning_rate": 9.516599978371364e-07, "loss": 7.0855, "step": 440 }, { "epoch": 0.01, "grad_norm": 10.386860847473145, "learning_rate": 9.538228614685845e-07, "loss": 7.7221, "step": 441 }, { "epoch": 0.01, "grad_norm": 11.532678604125977, "learning_rate": 9.559857251000325e-07, "loss": 6.8408, "step": 442 }, { "epoch": 0.01, "grad_norm": 11.038549423217773, "learning_rate": 9.581485887314806e-07, "loss": 6.897, "step": 443 }, { "epoch": 0.01, "grad_norm": 11.090032577514648, "learning_rate": 9.603114523629286e-07, "loss": 7.3364, "step": 444 }, { "epoch": 0.01, "grad_norm": 11.08510684967041, "learning_rate": 9.624743159943767e-07, "loss": 7.2732, "step": 445 }, { "epoch": 0.01, "grad_norm": 11.725444793701172, "learning_rate": 9.646371796258247e-07, "loss": 7.2928, "step": 446 }, { "epoch": 0.01, "grad_norm": 11.422737121582031, "learning_rate": 9.668000432572728e-07, "loss": 7.1711, "step": 447 }, { "epoch": 0.01, "grad_norm": 10.87169075012207, "learning_rate": 9.689629068887208e-07, "loss": 7.4959, "step": 448 }, { "epoch": 0.01, "grad_norm": 11.520834922790527, "learning_rate": 9.711257705201689e-07, "loss": 6.9918, "step": 449 }, { "epoch": 0.01, "grad_norm": 11.324690818786621, "learning_rate": 9.732886341516167e-07, "loss": 6.9122, "step": 450 }, { "epoch": 0.01, "grad_norm": 11.36355972290039, "learning_rate": 9.754514977830647e-07, "loss": 6.6217, "step": 451 }, { "epoch": 0.01, "grad_norm": 12.043499946594238, "learning_rate": 9.77614361414513e-07, "loss": 6.5914, "step": 452 }, { "epoch": 0.01, "grad_norm": 11.049127578735352, "learning_rate": 9.79777225045961e-07, "loss": 6.9334, "step": 453 }, { "epoch": 0.01, "grad_norm": 10.952451705932617, "learning_rate": 9.81940088677409e-07, "loss": 7.1125, "step": 454 }, { "epoch": 0.01, "grad_norm": 11.98150634765625, "learning_rate": 9.84102952308857e-07, "loss": 6.8487, "step": 455 }, { "epoch": 0.01, "grad_norm": 11.39083194732666, "learning_rate": 9.86265815940305e-07, "loss": 6.5464, "step": 456 }, { "epoch": 0.01, "grad_norm": 11.653234481811523, "learning_rate": 9.88428679571753e-07, "loss": 6.8553, "step": 457 }, { "epoch": 0.01, "grad_norm": 10.894904136657715, "learning_rate": 9.90591543203201e-07, "loss": 7.3079, "step": 458 }, { "epoch": 0.01, "grad_norm": 11.847002983093262, "learning_rate": 9.927544068346492e-07, "loss": 6.8085, "step": 459 }, { "epoch": 0.01, "grad_norm": 12.108366012573242, "learning_rate": 9.949172704660972e-07, "loss": 7.2705, "step": 460 }, { "epoch": 0.01, "grad_norm": 11.488194465637207, "learning_rate": 9.970801340975453e-07, "loss": 6.8725, "step": 461 }, { "epoch": 0.01, "grad_norm": 12.607979774475098, "learning_rate": 9.992429977289933e-07, "loss": 6.6081, "step": 462 }, { "epoch": 0.01, "grad_norm": 10.649169921875, "learning_rate": 1.0014058613604414e-06, "loss": 7.2185, "step": 463 }, { "epoch": 0.01, "grad_norm": 11.45957088470459, "learning_rate": 1.0035687249918894e-06, "loss": 7.1323, "step": 464 }, { "epoch": 0.01, "grad_norm": 11.521224021911621, "learning_rate": 1.0057315886233375e-06, "loss": 7.0534, "step": 465 }, { "epoch": 0.01, "grad_norm": 11.292110443115234, "learning_rate": 1.0078944522547853e-06, "loss": 7.1102, "step": 466 }, { "epoch": 0.01, "grad_norm": 11.874016761779785, "learning_rate": 1.0100573158862333e-06, "loss": 6.7626, "step": 467 }, { "epoch": 0.01, "grad_norm": 12.343805313110352, "learning_rate": 1.0122201795176816e-06, "loss": 7.0991, "step": 468 }, { "epoch": 0.01, "grad_norm": 11.34910774230957, "learning_rate": 1.0143830431491297e-06, "loss": 7.1213, "step": 469 }, { "epoch": 0.01, "grad_norm": 11.376497268676758, "learning_rate": 1.0165459067805775e-06, "loss": 7.1726, "step": 470 }, { "epoch": 0.01, "grad_norm": 11.23877239227295, "learning_rate": 1.0187087704120256e-06, "loss": 6.7368, "step": 471 }, { "epoch": 0.01, "grad_norm": 11.533340454101562, "learning_rate": 1.0208716340434736e-06, "loss": 7.0685, "step": 472 }, { "epoch": 0.01, "grad_norm": 10.851320266723633, "learning_rate": 1.0230344976749217e-06, "loss": 7.7243, "step": 473 }, { "epoch": 0.01, "grad_norm": 11.319485664367676, "learning_rate": 1.0251973613063697e-06, "loss": 6.8717, "step": 474 }, { "epoch": 0.01, "grad_norm": 12.034823417663574, "learning_rate": 1.0273602249378178e-06, "loss": 7.007, "step": 475 }, { "epoch": 0.01, "grad_norm": 11.183178901672363, "learning_rate": 1.0295230885692658e-06, "loss": 7.0261, "step": 476 }, { "epoch": 0.01, "grad_norm": 11.613727569580078, "learning_rate": 1.0316859522007139e-06, "loss": 6.7045, "step": 477 }, { "epoch": 0.01, "grad_norm": 11.919496536254883, "learning_rate": 1.033848815832162e-06, "loss": 6.9026, "step": 478 }, { "epoch": 0.01, "grad_norm": 10.627421379089355, "learning_rate": 1.03601167946361e-06, "loss": 7.1882, "step": 479 }, { "epoch": 0.01, "grad_norm": 11.75552749633789, "learning_rate": 1.038174543095058e-06, "loss": 7.2288, "step": 480 }, { "epoch": 0.01, "grad_norm": 12.109320640563965, "learning_rate": 1.040337406726506e-06, "loss": 6.8997, "step": 481 }, { "epoch": 0.01, "grad_norm": 11.672327041625977, "learning_rate": 1.042500270357954e-06, "loss": 6.4799, "step": 482 }, { "epoch": 0.01, "grad_norm": 11.668148040771484, "learning_rate": 1.044663133989402e-06, "loss": 6.3492, "step": 483 }, { "epoch": 0.01, "grad_norm": 11.26370620727539, "learning_rate": 1.0468259976208502e-06, "loss": 6.8329, "step": 484 }, { "epoch": 0.01, "grad_norm": 11.831554412841797, "learning_rate": 1.0489888612522983e-06, "loss": 7.0992, "step": 485 }, { "epoch": 0.01, "grad_norm": 10.71716022491455, "learning_rate": 1.051151724883746e-06, "loss": 6.8611, "step": 486 }, { "epoch": 0.01, "grad_norm": 11.579619407653809, "learning_rate": 1.0533145885151942e-06, "loss": 6.9484, "step": 487 }, { "epoch": 0.01, "grad_norm": 11.70088005065918, "learning_rate": 1.0554774521466422e-06, "loss": 7.4263, "step": 488 }, { "epoch": 0.01, "grad_norm": 11.929848670959473, "learning_rate": 1.0576403157780903e-06, "loss": 7.0986, "step": 489 }, { "epoch": 0.01, "grad_norm": 10.792500495910645, "learning_rate": 1.0598031794095383e-06, "loss": 6.9355, "step": 490 }, { "epoch": 0.01, "grad_norm": 12.517864227294922, "learning_rate": 1.0619660430409864e-06, "loss": 6.2076, "step": 491 }, { "epoch": 0.01, "grad_norm": 10.73897933959961, "learning_rate": 1.0641289066724344e-06, "loss": 6.9208, "step": 492 }, { "epoch": 0.01, "grad_norm": 10.975313186645508, "learning_rate": 1.0662917703038825e-06, "loss": 7.462, "step": 493 }, { "epoch": 0.01, "grad_norm": 10.463448524475098, "learning_rate": 1.0684546339353305e-06, "loss": 7.1649, "step": 494 }, { "epoch": 0.01, "grad_norm": 11.297101020812988, "learning_rate": 1.0706174975667786e-06, "loss": 7.2512, "step": 495 }, { "epoch": 0.01, "grad_norm": 11.690046310424805, "learning_rate": 1.0727803611982266e-06, "loss": 6.654, "step": 496 }, { "epoch": 0.01, "grad_norm": 11.364738464355469, "learning_rate": 1.0749432248296747e-06, "loss": 6.7716, "step": 497 }, { "epoch": 0.01, "grad_norm": 11.050647735595703, "learning_rate": 1.0771060884611225e-06, "loss": 6.824, "step": 498 }, { "epoch": 0.01, "grad_norm": 12.245965003967285, "learning_rate": 1.0792689520925705e-06, "loss": 6.8475, "step": 499 }, { "epoch": 0.01, "grad_norm": 11.829086303710938, "learning_rate": 1.0814318157240188e-06, "loss": 7.392, "step": 500 }, { "epoch": 0.01, "grad_norm": 11.665075302124023, "learning_rate": 1.0835946793554669e-06, "loss": 7.2789, "step": 501 }, { "epoch": 0.01, "grad_norm": 11.66236686706543, "learning_rate": 1.0857575429869147e-06, "loss": 6.8035, "step": 502 }, { "epoch": 0.01, "grad_norm": 11.876258850097656, "learning_rate": 1.0879204066183628e-06, "loss": 6.3859, "step": 503 }, { "epoch": 0.01, "grad_norm": 11.237165451049805, "learning_rate": 1.0900832702498108e-06, "loss": 7.0671, "step": 504 }, { "epoch": 0.01, "grad_norm": 10.961540222167969, "learning_rate": 1.0922461338812589e-06, "loss": 6.9768, "step": 505 }, { "epoch": 0.01, "grad_norm": 11.188392639160156, "learning_rate": 1.094408997512707e-06, "loss": 6.4845, "step": 506 }, { "epoch": 0.01, "grad_norm": 11.968476295471191, "learning_rate": 1.096571861144155e-06, "loss": 7.0299, "step": 507 }, { "epoch": 0.01, "grad_norm": 11.329046249389648, "learning_rate": 1.098734724775603e-06, "loss": 6.5394, "step": 508 }, { "epoch": 0.01, "grad_norm": 11.199234962463379, "learning_rate": 1.100897588407051e-06, "loss": 6.5797, "step": 509 }, { "epoch": 0.01, "grad_norm": 10.263569831848145, "learning_rate": 1.1030604520384991e-06, "loss": 7.2205, "step": 510 }, { "epoch": 0.01, "grad_norm": 11.155059814453125, "learning_rate": 1.1052233156699472e-06, "loss": 6.6492, "step": 511 }, { "epoch": 0.01, "grad_norm": 11.267718315124512, "learning_rate": 1.1073861793013952e-06, "loss": 6.5777, "step": 512 }, { "epoch": 0.01, "grad_norm": 10.493149757385254, "learning_rate": 1.1095490429328433e-06, "loss": 7.0325, "step": 513 }, { "epoch": 0.01, "grad_norm": 10.789677619934082, "learning_rate": 1.111711906564291e-06, "loss": 7.2409, "step": 514 }, { "epoch": 0.01, "grad_norm": 11.255308151245117, "learning_rate": 1.1138747701957391e-06, "loss": 6.6485, "step": 515 }, { "epoch": 0.01, "grad_norm": 11.897136688232422, "learning_rate": 1.1160376338271874e-06, "loss": 6.5791, "step": 516 }, { "epoch": 0.01, "grad_norm": 12.194074630737305, "learning_rate": 1.1182004974586355e-06, "loss": 6.7886, "step": 517 }, { "epoch": 0.01, "grad_norm": 11.049796104431152, "learning_rate": 1.1203633610900833e-06, "loss": 6.6, "step": 518 }, { "epoch": 0.01, "grad_norm": 11.086819648742676, "learning_rate": 1.1225262247215314e-06, "loss": 6.8422, "step": 519 }, { "epoch": 0.01, "grad_norm": 10.910130500793457, "learning_rate": 1.1246890883529794e-06, "loss": 6.3071, "step": 520 }, { "epoch": 0.01, "grad_norm": 11.431610107421875, "learning_rate": 1.1268519519844275e-06, "loss": 6.9971, "step": 521 }, { "epoch": 0.01, "grad_norm": 11.560690879821777, "learning_rate": 1.1290148156158755e-06, "loss": 6.4934, "step": 522 }, { "epoch": 0.01, "grad_norm": 11.096409797668457, "learning_rate": 1.1311776792473236e-06, "loss": 6.4575, "step": 523 }, { "epoch": 0.01, "grad_norm": 11.958191871643066, "learning_rate": 1.1333405428787716e-06, "loss": 6.6572, "step": 524 }, { "epoch": 0.01, "grad_norm": 11.583562850952148, "learning_rate": 1.1355034065102197e-06, "loss": 7.0136, "step": 525 }, { "epoch": 0.01, "grad_norm": 11.014350891113281, "learning_rate": 1.1376662701416677e-06, "loss": 7.2276, "step": 526 }, { "epoch": 0.01, "grad_norm": 11.220401763916016, "learning_rate": 1.1398291337731158e-06, "loss": 6.5922, "step": 527 }, { "epoch": 0.01, "grad_norm": 11.97567081451416, "learning_rate": 1.1419919974045638e-06, "loss": 6.156, "step": 528 }, { "epoch": 0.01, "grad_norm": 11.654022216796875, "learning_rate": 1.1441548610360119e-06, "loss": 6.9699, "step": 529 }, { "epoch": 0.01, "grad_norm": 10.624717712402344, "learning_rate": 1.1463177246674597e-06, "loss": 7.2015, "step": 530 }, { "epoch": 0.01, "grad_norm": 11.469489097595215, "learning_rate": 1.1484805882989077e-06, "loss": 6.8899, "step": 531 }, { "epoch": 0.01, "grad_norm": 11.721708297729492, "learning_rate": 1.150643451930356e-06, "loss": 6.5993, "step": 532 }, { "epoch": 0.01, "grad_norm": 11.628155708312988, "learning_rate": 1.152806315561804e-06, "loss": 6.7089, "step": 533 }, { "epoch": 0.01, "grad_norm": 11.254840850830078, "learning_rate": 1.154969179193252e-06, "loss": 6.7467, "step": 534 }, { "epoch": 0.01, "grad_norm": 12.636506080627441, "learning_rate": 1.1571320428247e-06, "loss": 6.6551, "step": 535 }, { "epoch": 0.01, "grad_norm": 11.535199165344238, "learning_rate": 1.159294906456148e-06, "loss": 6.4155, "step": 536 }, { "epoch": 0.01, "grad_norm": 11.200018882751465, "learning_rate": 1.161457770087596e-06, "loss": 6.9801, "step": 537 }, { "epoch": 0.01, "grad_norm": 10.97948169708252, "learning_rate": 1.163620633719044e-06, "loss": 7.0341, "step": 538 }, { "epoch": 0.01, "grad_norm": 11.398344039916992, "learning_rate": 1.1657834973504922e-06, "loss": 7.0528, "step": 539 }, { "epoch": 0.01, "grad_norm": 10.494927406311035, "learning_rate": 1.1679463609819402e-06, "loss": 7.2854, "step": 540 }, { "epoch": 0.01, "grad_norm": 11.315098762512207, "learning_rate": 1.1701092246133883e-06, "loss": 6.3706, "step": 541 }, { "epoch": 0.01, "grad_norm": 11.483386993408203, "learning_rate": 1.1722720882448363e-06, "loss": 6.8923, "step": 542 }, { "epoch": 0.01, "grad_norm": 11.732893943786621, "learning_rate": 1.1744349518762844e-06, "loss": 6.7256, "step": 543 }, { "epoch": 0.01, "grad_norm": 12.080839157104492, "learning_rate": 1.1765978155077324e-06, "loss": 6.7883, "step": 544 }, { "epoch": 0.01, "grad_norm": 10.917027473449707, "learning_rate": 1.1787606791391802e-06, "loss": 7.1399, "step": 545 }, { "epoch": 0.01, "grad_norm": 12.472942352294922, "learning_rate": 1.1809235427706283e-06, "loss": 6.7902, "step": 546 }, { "epoch": 0.01, "grad_norm": 11.083635330200195, "learning_rate": 1.1830864064020763e-06, "loss": 6.6278, "step": 547 }, { "epoch": 0.01, "grad_norm": 11.571915626525879, "learning_rate": 1.1852492700335246e-06, "loss": 6.4272, "step": 548 }, { "epoch": 0.01, "grad_norm": 10.537367820739746, "learning_rate": 1.1874121336649727e-06, "loss": 7.1535, "step": 549 }, { "epoch": 0.01, "grad_norm": 11.113005638122559, "learning_rate": 1.1895749972964205e-06, "loss": 6.6371, "step": 550 }, { "epoch": 0.01, "grad_norm": 10.944640159606934, "learning_rate": 1.1917378609278686e-06, "loss": 7.1208, "step": 551 }, { "epoch": 0.01, "grad_norm": 10.943193435668945, "learning_rate": 1.1939007245593166e-06, "loss": 6.5281, "step": 552 }, { "epoch": 0.01, "grad_norm": 11.32503890991211, "learning_rate": 1.1960635881907647e-06, "loss": 6.7059, "step": 553 }, { "epoch": 0.01, "grad_norm": 12.306096076965332, "learning_rate": 1.1982264518222127e-06, "loss": 6.4383, "step": 554 }, { "epoch": 0.01, "grad_norm": 11.172005653381348, "learning_rate": 1.2003893154536608e-06, "loss": 6.9463, "step": 555 }, { "epoch": 0.01, "grad_norm": 11.260525703430176, "learning_rate": 1.2025521790851088e-06, "loss": 6.825, "step": 556 }, { "epoch": 0.01, "grad_norm": 10.731559753417969, "learning_rate": 1.2047150427165569e-06, "loss": 6.6368, "step": 557 }, { "epoch": 0.01, "grad_norm": 11.37520694732666, "learning_rate": 1.206877906348005e-06, "loss": 6.3131, "step": 558 }, { "epoch": 0.01, "grad_norm": 10.857343673706055, "learning_rate": 1.209040769979453e-06, "loss": 6.3986, "step": 559 }, { "epoch": 0.01, "grad_norm": 11.257913589477539, "learning_rate": 1.211203633610901e-06, "loss": 6.3617, "step": 560 }, { "epoch": 0.01, "grad_norm": 11.910099983215332, "learning_rate": 1.2133664972423488e-06, "loss": 6.9295, "step": 561 }, { "epoch": 0.01, "grad_norm": 10.420963287353516, "learning_rate": 1.215529360873797e-06, "loss": 6.5938, "step": 562 }, { "epoch": 0.01, "grad_norm": 12.209613800048828, "learning_rate": 1.217692224505245e-06, "loss": 6.4372, "step": 563 }, { "epoch": 0.01, "grad_norm": 12.392038345336914, "learning_rate": 1.2198550881366932e-06, "loss": 6.5034, "step": 564 }, { "epoch": 0.01, "grad_norm": 11.09473705291748, "learning_rate": 1.2220179517681413e-06, "loss": 6.5738, "step": 565 }, { "epoch": 0.01, "grad_norm": 11.765522956848145, "learning_rate": 1.224180815399589e-06, "loss": 6.9745, "step": 566 }, { "epoch": 0.01, "grad_norm": 11.290587425231934, "learning_rate": 1.2263436790310372e-06, "loss": 6.873, "step": 567 }, { "epoch": 0.01, "grad_norm": 10.685554504394531, "learning_rate": 1.2285065426624852e-06, "loss": 6.7729, "step": 568 }, { "epoch": 0.01, "grad_norm": 13.841720581054688, "learning_rate": 1.2306694062939333e-06, "loss": 5.9772, "step": 569 }, { "epoch": 0.01, "grad_norm": 10.80512523651123, "learning_rate": 1.2328322699253813e-06, "loss": 6.3636, "step": 570 }, { "epoch": 0.01, "grad_norm": 10.536163330078125, "learning_rate": 1.2349951335568294e-06, "loss": 6.5983, "step": 571 }, { "epoch": 0.01, "grad_norm": 11.898454666137695, "learning_rate": 1.2371579971882774e-06, "loss": 6.5091, "step": 572 }, { "epoch": 0.01, "grad_norm": 11.296006202697754, "learning_rate": 1.2393208608197255e-06, "loss": 6.9454, "step": 573 }, { "epoch": 0.01, "grad_norm": 12.246112823486328, "learning_rate": 1.2414837244511735e-06, "loss": 6.2397, "step": 574 }, { "epoch": 0.01, "grad_norm": 10.619585990905762, "learning_rate": 1.2436465880826216e-06, "loss": 6.4743, "step": 575 }, { "epoch": 0.01, "grad_norm": 10.918953895568848, "learning_rate": 1.2458094517140696e-06, "loss": 6.4233, "step": 576 }, { "epoch": 0.01, "grad_norm": 11.505589485168457, "learning_rate": 1.2479723153455174e-06, "loss": 6.4363, "step": 577 }, { "epoch": 0.01, "grad_norm": 12.305403709411621, "learning_rate": 1.2501351789769655e-06, "loss": 7.1762, "step": 578 }, { "epoch": 0.01, "grad_norm": 14.458165168762207, "learning_rate": 1.2522980426084136e-06, "loss": 6.5031, "step": 579 }, { "epoch": 0.01, "grad_norm": 11.053033828735352, "learning_rate": 1.2544609062398616e-06, "loss": 7.0293, "step": 580 }, { "epoch": 0.01, "grad_norm": 11.390913963317871, "learning_rate": 1.2566237698713097e-06, "loss": 5.9938, "step": 581 }, { "epoch": 0.01, "grad_norm": 10.55255126953125, "learning_rate": 1.2587866335027577e-06, "loss": 6.8692, "step": 582 }, { "epoch": 0.01, "grad_norm": 12.221663475036621, "learning_rate": 1.2609494971342058e-06, "loss": 6.0203, "step": 583 }, { "epoch": 0.01, "grad_norm": 12.089621543884277, "learning_rate": 1.263112360765654e-06, "loss": 6.5123, "step": 584 }, { "epoch": 0.01, "grad_norm": 10.71709156036377, "learning_rate": 1.265275224397102e-06, "loss": 6.2615, "step": 585 }, { "epoch": 0.01, "grad_norm": 11.59022045135498, "learning_rate": 1.26743808802855e-06, "loss": 5.8687, "step": 586 }, { "epoch": 0.01, "grad_norm": 10.123419761657715, "learning_rate": 1.269600951659998e-06, "loss": 6.6205, "step": 587 }, { "epoch": 0.01, "grad_norm": 10.974987983703613, "learning_rate": 1.271763815291446e-06, "loss": 6.6773, "step": 588 }, { "epoch": 0.01, "grad_norm": 11.472400665283203, "learning_rate": 1.273926678922894e-06, "loss": 6.6939, "step": 589 }, { "epoch": 0.01, "grad_norm": 11.014259338378906, "learning_rate": 1.2760895425543421e-06, "loss": 6.6158, "step": 590 }, { "epoch": 0.01, "grad_norm": 11.039405822753906, "learning_rate": 1.2782524061857902e-06, "loss": 5.9987, "step": 591 }, { "epoch": 0.01, "grad_norm": 11.393438339233398, "learning_rate": 1.2804152698172382e-06, "loss": 6.2704, "step": 592 }, { "epoch": 0.01, "grad_norm": 10.449114799499512, "learning_rate": 1.282578133448686e-06, "loss": 6.5713, "step": 593 }, { "epoch": 0.01, "grad_norm": 11.318787574768066, "learning_rate": 1.284740997080134e-06, "loss": 6.685, "step": 594 }, { "epoch": 0.01, "grad_norm": 10.796393394470215, "learning_rate": 1.2869038607115822e-06, "loss": 6.6159, "step": 595 }, { "epoch": 0.01, "grad_norm": 11.234869956970215, "learning_rate": 1.2890667243430302e-06, "loss": 6.1442, "step": 596 }, { "epoch": 0.01, "grad_norm": 11.142796516418457, "learning_rate": 1.2912295879744783e-06, "loss": 6.8673, "step": 597 }, { "epoch": 0.01, "grad_norm": 11.31752872467041, "learning_rate": 1.2933924516059263e-06, "loss": 6.7728, "step": 598 }, { "epoch": 0.01, "grad_norm": 10.859241485595703, "learning_rate": 1.2955553152373744e-06, "loss": 6.6253, "step": 599 }, { "epoch": 0.01, "grad_norm": 12.699633598327637, "learning_rate": 1.2977181788688226e-06, "loss": 6.406, "step": 600 }, { "epoch": 0.01, "grad_norm": 12.199456214904785, "learning_rate": 1.2998810425002707e-06, "loss": 6.6928, "step": 601 }, { "epoch": 0.01, "grad_norm": 11.309900283813477, "learning_rate": 1.3020439061317185e-06, "loss": 6.0828, "step": 602 }, { "epoch": 0.01, "grad_norm": 11.010135650634766, "learning_rate": 1.3042067697631666e-06, "loss": 6.1651, "step": 603 }, { "epoch": 0.01, "grad_norm": 11.077139854431152, "learning_rate": 1.3063696333946146e-06, "loss": 6.2565, "step": 604 }, { "epoch": 0.01, "grad_norm": 11.076945304870605, "learning_rate": 1.3085324970260627e-06, "loss": 6.5491, "step": 605 }, { "epoch": 0.01, "grad_norm": 11.224116325378418, "learning_rate": 1.3106953606575107e-06, "loss": 6.5137, "step": 606 }, { "epoch": 0.01, "grad_norm": 10.520318031311035, "learning_rate": 1.3128582242889588e-06, "loss": 6.329, "step": 607 }, { "epoch": 0.01, "grad_norm": 10.263981819152832, "learning_rate": 1.3150210879204068e-06, "loss": 6.5194, "step": 608 }, { "epoch": 0.01, "grad_norm": 11.181574821472168, "learning_rate": 1.3171839515518546e-06, "loss": 6.8525, "step": 609 }, { "epoch": 0.01, "grad_norm": 10.37027645111084, "learning_rate": 1.3193468151833027e-06, "loss": 7.1693, "step": 610 }, { "epoch": 0.01, "grad_norm": 10.310280799865723, "learning_rate": 1.3215096788147508e-06, "loss": 6.3868, "step": 611 }, { "epoch": 0.01, "grad_norm": 10.167061805725098, "learning_rate": 1.3236725424461988e-06, "loss": 6.8797, "step": 612 }, { "epoch": 0.01, "grad_norm": 12.323694229125977, "learning_rate": 1.3258354060776469e-06, "loss": 6.3557, "step": 613 }, { "epoch": 0.01, "grad_norm": 10.90567398071289, "learning_rate": 1.327998269709095e-06, "loss": 5.9509, "step": 614 }, { "epoch": 0.01, "grad_norm": 11.81119155883789, "learning_rate": 1.330161133340543e-06, "loss": 6.2893, "step": 615 }, { "epoch": 0.01, "grad_norm": 10.799939155578613, "learning_rate": 1.3323239969719912e-06, "loss": 6.2584, "step": 616 }, { "epoch": 0.01, "grad_norm": 12.237105369567871, "learning_rate": 1.3344868606034393e-06, "loss": 6.2534, "step": 617 }, { "epoch": 0.01, "grad_norm": 12.408539772033691, "learning_rate": 1.3366497242348871e-06, "loss": 6.3417, "step": 618 }, { "epoch": 0.01, "grad_norm": 10.339521408081055, "learning_rate": 1.3388125878663352e-06, "loss": 6.5341, "step": 619 }, { "epoch": 0.01, "grad_norm": 11.042280197143555, "learning_rate": 1.3409754514977832e-06, "loss": 5.7829, "step": 620 }, { "epoch": 0.01, "grad_norm": 11.961530685424805, "learning_rate": 1.3431383151292313e-06, "loss": 6.8578, "step": 621 }, { "epoch": 0.01, "grad_norm": 11.298822402954102, "learning_rate": 1.3453011787606793e-06, "loss": 6.0957, "step": 622 }, { "epoch": 0.01, "grad_norm": 12.38333511352539, "learning_rate": 1.3474640423921274e-06, "loss": 6.1894, "step": 623 }, { "epoch": 0.01, "grad_norm": 10.065309524536133, "learning_rate": 1.3496269060235754e-06, "loss": 6.9701, "step": 624 }, { "epoch": 0.01, "grad_norm": 10.96956729888916, "learning_rate": 1.3517897696550232e-06, "loss": 6.4517, "step": 625 }, { "epoch": 0.01, "grad_norm": 12.371439933776855, "learning_rate": 1.3539526332864713e-06, "loss": 6.3441, "step": 626 }, { "epoch": 0.01, "grad_norm": 10.709290504455566, "learning_rate": 1.3561154969179194e-06, "loss": 6.3658, "step": 627 }, { "epoch": 0.01, "grad_norm": 10.530659675598145, "learning_rate": 1.3582783605493674e-06, "loss": 6.6867, "step": 628 }, { "epoch": 0.01, "grad_norm": 11.639716148376465, "learning_rate": 1.3604412241808155e-06, "loss": 6.1108, "step": 629 }, { "epoch": 0.01, "grad_norm": 11.470030784606934, "learning_rate": 1.3626040878122635e-06, "loss": 6.4813, "step": 630 }, { "epoch": 0.01, "grad_norm": 10.969605445861816, "learning_rate": 1.3647669514437116e-06, "loss": 6.331, "step": 631 }, { "epoch": 0.01, "grad_norm": 11.253927230834961, "learning_rate": 1.3669298150751598e-06, "loss": 6.7131, "step": 632 }, { "epoch": 0.01, "grad_norm": 11.119747161865234, "learning_rate": 1.3690926787066079e-06, "loss": 6.037, "step": 633 }, { "epoch": 0.01, "grad_norm": 11.0097074508667, "learning_rate": 1.3712555423380557e-06, "loss": 6.2805, "step": 634 }, { "epoch": 0.01, "grad_norm": 11.86964225769043, "learning_rate": 1.3734184059695038e-06, "loss": 6.5555, "step": 635 }, { "epoch": 0.01, "grad_norm": 10.82007884979248, "learning_rate": 1.3755812696009518e-06, "loss": 6.491, "step": 636 }, { "epoch": 0.01, "grad_norm": 11.673319816589355, "learning_rate": 1.3777441332323999e-06, "loss": 6.655, "step": 637 }, { "epoch": 0.01, "grad_norm": 10.584513664245605, "learning_rate": 1.379906996863848e-06, "loss": 6.5021, "step": 638 }, { "epoch": 0.01, "grad_norm": 11.492204666137695, "learning_rate": 1.382069860495296e-06, "loss": 6.5192, "step": 639 }, { "epoch": 0.01, "grad_norm": 10.348852157592773, "learning_rate": 1.384232724126744e-06, "loss": 6.1385, "step": 640 }, { "epoch": 0.01, "grad_norm": 11.076118469238281, "learning_rate": 1.3863955877581918e-06, "loss": 6.2196, "step": 641 }, { "epoch": 0.01, "grad_norm": 11.421080589294434, "learning_rate": 1.38855845138964e-06, "loss": 5.8525, "step": 642 }, { "epoch": 0.01, "grad_norm": 14.161803245544434, "learning_rate": 1.390721315021088e-06, "loss": 6.1028, "step": 643 }, { "epoch": 0.01, "grad_norm": 38.79696273803711, "learning_rate": 1.392884178652536e-06, "loss": 7.5338, "step": 644 }, { "epoch": 0.01, "grad_norm": 11.578100204467773, "learning_rate": 1.395047042283984e-06, "loss": 6.2571, "step": 645 }, { "epoch": 0.01, "grad_norm": 10.465785026550293, "learning_rate": 1.397209905915432e-06, "loss": 6.1675, "step": 646 }, { "epoch": 0.01, "grad_norm": 10.23678970336914, "learning_rate": 1.3993727695468802e-06, "loss": 6.6851, "step": 647 }, { "epoch": 0.01, "grad_norm": 11.817134857177734, "learning_rate": 1.4015356331783284e-06, "loss": 6.3865, "step": 648 }, { "epoch": 0.01, "grad_norm": 9.926251411437988, "learning_rate": 1.4036984968097765e-06, "loss": 6.2569, "step": 649 }, { "epoch": 0.01, "grad_norm": 12.058063507080078, "learning_rate": 1.4058613604412243e-06, "loss": 6.5711, "step": 650 }, { "epoch": 0.01, "grad_norm": 12.109704971313477, "learning_rate": 1.4080242240726724e-06, "loss": 6.1636, "step": 651 }, { "epoch": 0.01, "grad_norm": 11.351661682128906, "learning_rate": 1.4101870877041204e-06, "loss": 6.6661, "step": 652 }, { "epoch": 0.01, "grad_norm": 10.956212043762207, "learning_rate": 1.4123499513355685e-06, "loss": 6.6644, "step": 653 }, { "epoch": 0.01, "grad_norm": 11.701925277709961, "learning_rate": 1.4145128149670165e-06, "loss": 6.4378, "step": 654 }, { "epoch": 0.01, "grad_norm": 11.3658447265625, "learning_rate": 1.4166756785984646e-06, "loss": 6.2009, "step": 655 }, { "epoch": 0.01, "grad_norm": 10.535888671875, "learning_rate": 1.4188385422299126e-06, "loss": 6.8272, "step": 656 }, { "epoch": 0.01, "grad_norm": 11.468435287475586, "learning_rate": 1.4210014058613605e-06, "loss": 5.7458, "step": 657 }, { "epoch": 0.01, "grad_norm": 10.837594985961914, "learning_rate": 1.4231642694928085e-06, "loss": 5.7456, "step": 658 }, { "epoch": 0.01, "grad_norm": 12.423521995544434, "learning_rate": 1.4253271331242566e-06, "loss": 5.9275, "step": 659 }, { "epoch": 0.01, "grad_norm": 13.103716850280762, "learning_rate": 1.4274899967557046e-06, "loss": 6.2908, "step": 660 }, { "epoch": 0.01, "grad_norm": 10.996908187866211, "learning_rate": 1.4296528603871527e-06, "loss": 6.3095, "step": 661 }, { "epoch": 0.01, "grad_norm": 10.88699722290039, "learning_rate": 1.4318157240186007e-06, "loss": 6.4838, "step": 662 }, { "epoch": 0.01, "grad_norm": 11.484030723571777, "learning_rate": 1.4339785876500488e-06, "loss": 6.3841, "step": 663 }, { "epoch": 0.01, "grad_norm": 10.787925720214844, "learning_rate": 1.436141451281497e-06, "loss": 6.2686, "step": 664 }, { "epoch": 0.01, "grad_norm": 10.87221908569336, "learning_rate": 1.438304314912945e-06, "loss": 6.4454, "step": 665 }, { "epoch": 0.01, "grad_norm": 10.878434181213379, "learning_rate": 1.440467178544393e-06, "loss": 6.2773, "step": 666 }, { "epoch": 0.01, "grad_norm": 11.947867393493652, "learning_rate": 1.442630042175841e-06, "loss": 6.3324, "step": 667 }, { "epoch": 0.01, "grad_norm": 10.538961410522461, "learning_rate": 1.444792905807289e-06, "loss": 6.2335, "step": 668 }, { "epoch": 0.01, "grad_norm": 11.65137767791748, "learning_rate": 1.446955769438737e-06, "loss": 5.9711, "step": 669 }, { "epoch": 0.01, "grad_norm": 10.767414093017578, "learning_rate": 1.4491186330701851e-06, "loss": 6.4033, "step": 670 }, { "epoch": 0.01, "grad_norm": 12.641079902648926, "learning_rate": 1.4512814967016332e-06, "loss": 6.1589, "step": 671 }, { "epoch": 0.01, "grad_norm": 12.42453670501709, "learning_rate": 1.4534443603330812e-06, "loss": 5.6544, "step": 672 }, { "epoch": 0.01, "grad_norm": 10.35632038116455, "learning_rate": 1.455607223964529e-06, "loss": 6.7441, "step": 673 }, { "epoch": 0.01, "grad_norm": 10.555092811584473, "learning_rate": 1.457770087595977e-06, "loss": 6.5912, "step": 674 }, { "epoch": 0.01, "grad_norm": 10.891725540161133, "learning_rate": 1.4599329512274252e-06, "loss": 6.6408, "step": 675 }, { "epoch": 0.01, "grad_norm": 11.282659530639648, "learning_rate": 1.4620958148588732e-06, "loss": 6.7014, "step": 676 }, { "epoch": 0.01, "grad_norm": 11.032565116882324, "learning_rate": 1.4642586784903213e-06, "loss": 6.4656, "step": 677 }, { "epoch": 0.01, "grad_norm": 10.943863868713379, "learning_rate": 1.4664215421217693e-06, "loss": 5.671, "step": 678 }, { "epoch": 0.01, "grad_norm": 11.031856536865234, "learning_rate": 1.4685844057532174e-06, "loss": 6.0327, "step": 679 }, { "epoch": 0.01, "grad_norm": 11.901752471923828, "learning_rate": 1.4707472693846656e-06, "loss": 6.4006, "step": 680 }, { "epoch": 0.01, "grad_norm": 12.3109769821167, "learning_rate": 1.4729101330161137e-06, "loss": 5.7967, "step": 681 }, { "epoch": 0.01, "grad_norm": 11.737595558166504, "learning_rate": 1.4750729966475615e-06, "loss": 5.6647, "step": 682 }, { "epoch": 0.01, "grad_norm": 10.801926612854004, "learning_rate": 1.4772358602790096e-06, "loss": 6.3437, "step": 683 }, { "epoch": 0.01, "grad_norm": 11.344746589660645, "learning_rate": 1.4793987239104576e-06, "loss": 5.6921, "step": 684 }, { "epoch": 0.01, "grad_norm": 11.254656791687012, "learning_rate": 1.4815615875419057e-06, "loss": 6.0876, "step": 685 }, { "epoch": 0.01, "grad_norm": 11.768481254577637, "learning_rate": 1.4837244511733537e-06, "loss": 6.4406, "step": 686 }, { "epoch": 0.01, "grad_norm": 11.175058364868164, "learning_rate": 1.4858873148048018e-06, "loss": 6.0669, "step": 687 }, { "epoch": 0.01, "grad_norm": 11.153687477111816, "learning_rate": 1.4880501784362498e-06, "loss": 5.7354, "step": 688 }, { "epoch": 0.01, "grad_norm": 11.675339698791504, "learning_rate": 1.4902130420676977e-06, "loss": 5.876, "step": 689 }, { "epoch": 0.01, "grad_norm": 12.375574111938477, "learning_rate": 1.4923759056991457e-06, "loss": 5.9658, "step": 690 }, { "epoch": 0.01, "grad_norm": 12.16137409210205, "learning_rate": 1.4945387693305938e-06, "loss": 5.9465, "step": 691 }, { "epoch": 0.01, "grad_norm": 11.669864654541016, "learning_rate": 1.4967016329620418e-06, "loss": 5.7196, "step": 692 }, { "epoch": 0.01, "grad_norm": 12.72304916381836, "learning_rate": 1.4988644965934899e-06, "loss": 6.2698, "step": 693 }, { "epoch": 0.01, "grad_norm": 11.892084121704102, "learning_rate": 1.501027360224938e-06, "loss": 6.2191, "step": 694 }, { "epoch": 0.01, "grad_norm": 11.395092010498047, "learning_rate": 1.503190223856386e-06, "loss": 5.9908, "step": 695 }, { "epoch": 0.01, "grad_norm": 11.890297889709473, "learning_rate": 1.5053530874878342e-06, "loss": 5.9858, "step": 696 }, { "epoch": 0.01, "grad_norm": 11.917362213134766, "learning_rate": 1.5075159511192823e-06, "loss": 6.5749, "step": 697 }, { "epoch": 0.01, "grad_norm": 11.466175079345703, "learning_rate": 1.5096788147507301e-06, "loss": 5.8888, "step": 698 }, { "epoch": 0.01, "grad_norm": 10.165840148925781, "learning_rate": 1.5118416783821782e-06, "loss": 6.0213, "step": 699 }, { "epoch": 0.01, "grad_norm": 12.08061408996582, "learning_rate": 1.5140045420136262e-06, "loss": 6.1357, "step": 700 }, { "epoch": 0.01, "grad_norm": 11.855850219726562, "learning_rate": 1.5161674056450743e-06, "loss": 6.1902, "step": 701 }, { "epoch": 0.01, "grad_norm": 10.651144981384277, "learning_rate": 1.5183302692765223e-06, "loss": 5.9321, "step": 702 }, { "epoch": 0.01, "grad_norm": 10.620654106140137, "learning_rate": 1.5204931329079704e-06, "loss": 5.4892, "step": 703 }, { "epoch": 0.01, "grad_norm": 10.895367622375488, "learning_rate": 1.5226559965394184e-06, "loss": 6.0687, "step": 704 }, { "epoch": 0.01, "grad_norm": 11.55395221710205, "learning_rate": 1.5248188601708663e-06, "loss": 6.2665, "step": 705 }, { "epoch": 0.01, "grad_norm": 10.808296203613281, "learning_rate": 1.5269817238023143e-06, "loss": 6.5552, "step": 706 }, { "epoch": 0.01, "grad_norm": 11.149186134338379, "learning_rate": 1.5291445874337624e-06, "loss": 6.1038, "step": 707 }, { "epoch": 0.01, "grad_norm": 11.498642921447754, "learning_rate": 1.5313074510652104e-06, "loss": 6.3181, "step": 708 }, { "epoch": 0.01, "grad_norm": 10.428350448608398, "learning_rate": 1.5334703146966585e-06, "loss": 6.2461, "step": 709 }, { "epoch": 0.01, "grad_norm": 10.979613304138184, "learning_rate": 1.5356331783281065e-06, "loss": 6.6767, "step": 710 }, { "epoch": 0.01, "grad_norm": 10.734042167663574, "learning_rate": 1.5377960419595546e-06, "loss": 5.9989, "step": 711 }, { "epoch": 0.01, "grad_norm": 12.333511352539062, "learning_rate": 1.5399589055910028e-06, "loss": 6.2448, "step": 712 }, { "epoch": 0.01, "grad_norm": 13.107518196105957, "learning_rate": 1.5421217692224509e-06, "loss": 5.9052, "step": 713 }, { "epoch": 0.01, "grad_norm": 11.318769454956055, "learning_rate": 1.5442846328538987e-06, "loss": 6.4297, "step": 714 }, { "epoch": 0.01, "grad_norm": 11.624274253845215, "learning_rate": 1.5464474964853468e-06, "loss": 6.5567, "step": 715 }, { "epoch": 0.01, "grad_norm": 10.549545288085938, "learning_rate": 1.5486103601167948e-06, "loss": 6.1655, "step": 716 }, { "epoch": 0.01, "grad_norm": 12.788383483886719, "learning_rate": 1.5507732237482429e-06, "loss": 6.0721, "step": 717 }, { "epoch": 0.01, "grad_norm": 11.860374450683594, "learning_rate": 1.552936087379691e-06, "loss": 5.6561, "step": 718 }, { "epoch": 0.01, "grad_norm": 10.882765769958496, "learning_rate": 1.555098951011139e-06, "loss": 6.2381, "step": 719 }, { "epoch": 0.01, "grad_norm": 12.07241153717041, "learning_rate": 1.557261814642587e-06, "loss": 5.9186, "step": 720 }, { "epoch": 0.01, "grad_norm": 10.682535171508789, "learning_rate": 1.5594246782740349e-06, "loss": 6.0776, "step": 721 }, { "epoch": 0.01, "grad_norm": 10.209247589111328, "learning_rate": 1.561587541905483e-06, "loss": 6.4386, "step": 722 }, { "epoch": 0.01, "grad_norm": 11.684428215026855, "learning_rate": 1.563750405536931e-06, "loss": 6.4741, "step": 723 }, { "epoch": 0.01, "grad_norm": 12.999131202697754, "learning_rate": 1.565913269168379e-06, "loss": 6.4333, "step": 724 }, { "epoch": 0.01, "grad_norm": 9.925653457641602, "learning_rate": 1.568076132799827e-06, "loss": 6.1451, "step": 725 }, { "epoch": 0.01, "grad_norm": 10.751015663146973, "learning_rate": 1.570238996431275e-06, "loss": 5.7996, "step": 726 }, { "epoch": 0.01, "grad_norm": 10.500097274780273, "learning_rate": 1.5724018600627232e-06, "loss": 5.5232, "step": 727 }, { "epoch": 0.01, "grad_norm": 10.841303825378418, "learning_rate": 1.5745647236941714e-06, "loss": 5.8466, "step": 728 }, { "epoch": 0.01, "grad_norm": 11.55490779876709, "learning_rate": 1.5767275873256195e-06, "loss": 6.1874, "step": 729 }, { "epoch": 0.01, "grad_norm": 10.976614952087402, "learning_rate": 1.5788904509570673e-06, "loss": 5.6011, "step": 730 }, { "epoch": 0.01, "grad_norm": 12.132737159729004, "learning_rate": 1.5810533145885154e-06, "loss": 5.8294, "step": 731 }, { "epoch": 0.01, "grad_norm": 11.330782890319824, "learning_rate": 1.5832161782199634e-06, "loss": 6.0804, "step": 732 }, { "epoch": 0.01, "grad_norm": 11.023468017578125, "learning_rate": 1.5853790418514115e-06, "loss": 5.9016, "step": 733 }, { "epoch": 0.01, "grad_norm": 11.470672607421875, "learning_rate": 1.5875419054828595e-06, "loss": 5.1784, "step": 734 }, { "epoch": 0.01, "grad_norm": 12.632916450500488, "learning_rate": 1.5897047691143076e-06, "loss": 6.1293, "step": 735 }, { "epoch": 0.01, "grad_norm": 11.52581787109375, "learning_rate": 1.5918676327457556e-06, "loss": 6.0703, "step": 736 }, { "epoch": 0.01, "grad_norm": 12.093666076660156, "learning_rate": 1.5940304963772035e-06, "loss": 6.2988, "step": 737 }, { "epoch": 0.01, "grad_norm": 12.2667236328125, "learning_rate": 1.5961933600086515e-06, "loss": 5.6626, "step": 738 }, { "epoch": 0.01, "grad_norm": 12.220056533813477, "learning_rate": 1.5983562236400996e-06, "loss": 6.1687, "step": 739 }, { "epoch": 0.01, "grad_norm": 11.876791954040527, "learning_rate": 1.6005190872715476e-06, "loss": 5.5838, "step": 740 }, { "epoch": 0.01, "grad_norm": 10.406172752380371, "learning_rate": 1.6026819509029957e-06, "loss": 6.2354, "step": 741 }, { "epoch": 0.01, "grad_norm": 11.294522285461426, "learning_rate": 1.6048448145344437e-06, "loss": 5.8223, "step": 742 }, { "epoch": 0.01, "grad_norm": 11.704903602600098, "learning_rate": 1.6070076781658915e-06, "loss": 6.32, "step": 743 }, { "epoch": 0.01, "grad_norm": 12.419720649719238, "learning_rate": 1.60917054179734e-06, "loss": 6.2162, "step": 744 }, { "epoch": 0.01, "grad_norm": 11.691240310668945, "learning_rate": 1.611333405428788e-06, "loss": 5.7381, "step": 745 }, { "epoch": 0.01, "grad_norm": 12.65031623840332, "learning_rate": 1.613496269060236e-06, "loss": 6.3281, "step": 746 }, { "epoch": 0.01, "grad_norm": 10.966395378112793, "learning_rate": 1.615659132691684e-06, "loss": 5.8926, "step": 747 }, { "epoch": 0.01, "grad_norm": 12.845497131347656, "learning_rate": 1.617821996323132e-06, "loss": 5.7474, "step": 748 }, { "epoch": 0.01, "grad_norm": 11.934906959533691, "learning_rate": 1.61998485995458e-06, "loss": 5.6272, "step": 749 }, { "epoch": 0.01, "grad_norm": 12.549614906311035, "learning_rate": 1.6221477235860281e-06, "loss": 5.4821, "step": 750 }, { "epoch": 0.01, "grad_norm": 11.076972961425781, "learning_rate": 1.6243105872174762e-06, "loss": 5.7283, "step": 751 }, { "epoch": 0.01, "grad_norm": 12.677290916442871, "learning_rate": 1.6264734508489242e-06, "loss": 5.8031, "step": 752 }, { "epoch": 0.01, "grad_norm": 12.537320137023926, "learning_rate": 1.628636314480372e-06, "loss": 6.0509, "step": 753 }, { "epoch": 0.01, "grad_norm": 12.480597496032715, "learning_rate": 1.63079917811182e-06, "loss": 5.3825, "step": 754 }, { "epoch": 0.01, "grad_norm": 11.18263053894043, "learning_rate": 1.6329620417432682e-06, "loss": 6.0648, "step": 755 }, { "epoch": 0.01, "grad_norm": 11.334943771362305, "learning_rate": 1.6351249053747162e-06, "loss": 5.5093, "step": 756 }, { "epoch": 0.01, "grad_norm": 12.554569244384766, "learning_rate": 1.6372877690061643e-06, "loss": 5.5867, "step": 757 }, { "epoch": 0.01, "grad_norm": 11.160502433776855, "learning_rate": 1.6394506326376123e-06, "loss": 5.9178, "step": 758 }, { "epoch": 0.01, "grad_norm": 11.613041877746582, "learning_rate": 1.6416134962690601e-06, "loss": 5.7004, "step": 759 }, { "epoch": 0.01, "grad_norm": 11.148305892944336, "learning_rate": 1.6437763599005086e-06, "loss": 5.9485, "step": 760 }, { "epoch": 0.01, "grad_norm": 10.925002098083496, "learning_rate": 1.6459392235319567e-06, "loss": 5.8584, "step": 761 }, { "epoch": 0.01, "grad_norm": 11.246163368225098, "learning_rate": 1.6481020871634045e-06, "loss": 5.928, "step": 762 }, { "epoch": 0.01, "grad_norm": 11.437241554260254, "learning_rate": 1.6502649507948526e-06, "loss": 5.7942, "step": 763 }, { "epoch": 0.01, "grad_norm": 13.452352523803711, "learning_rate": 1.6524278144263006e-06, "loss": 5.5226, "step": 764 }, { "epoch": 0.01, "grad_norm": 11.985716819763184, "learning_rate": 1.6545906780577487e-06, "loss": 5.628, "step": 765 }, { "epoch": 0.01, "grad_norm": 12.26706314086914, "learning_rate": 1.6567535416891967e-06, "loss": 6.9555, "step": 766 }, { "epoch": 0.01, "grad_norm": 10.916909217834473, "learning_rate": 1.6589164053206448e-06, "loss": 6.0789, "step": 767 }, { "epoch": 0.01, "grad_norm": 11.313389778137207, "learning_rate": 1.6610792689520926e-06, "loss": 6.0985, "step": 768 }, { "epoch": 0.01, "grad_norm": 11.053415298461914, "learning_rate": 1.6632421325835407e-06, "loss": 6.0918, "step": 769 }, { "epoch": 0.01, "grad_norm": 12.515541076660156, "learning_rate": 1.6654049962149887e-06, "loss": 6.0909, "step": 770 }, { "epoch": 0.01, "grad_norm": 11.264525413513184, "learning_rate": 1.6675678598464368e-06, "loss": 6.2222, "step": 771 }, { "epoch": 0.01, "grad_norm": 11.385527610778809, "learning_rate": 1.6697307234778848e-06, "loss": 5.7177, "step": 772 }, { "epoch": 0.01, "grad_norm": 12.061413764953613, "learning_rate": 1.6718935871093329e-06, "loss": 5.6292, "step": 773 }, { "epoch": 0.01, "grad_norm": 12.357651710510254, "learning_rate": 1.674056450740781e-06, "loss": 6.1387, "step": 774 }, { "epoch": 0.01, "grad_norm": 11.301048278808594, "learning_rate": 1.6762193143722287e-06, "loss": 5.5727, "step": 775 }, { "epoch": 0.01, "grad_norm": 11.11856460571289, "learning_rate": 1.6783821780036772e-06, "loss": 5.9182, "step": 776 }, { "epoch": 0.01, "grad_norm": 13.103865623474121, "learning_rate": 1.680545041635125e-06, "loss": 5.41, "step": 777 }, { "epoch": 0.01, "grad_norm": 12.806309700012207, "learning_rate": 1.6827079052665731e-06, "loss": 5.3779, "step": 778 }, { "epoch": 0.01, "grad_norm": 11.457908630371094, "learning_rate": 1.6848707688980212e-06, "loss": 6.0713, "step": 779 }, { "epoch": 0.01, "grad_norm": 11.95404052734375, "learning_rate": 1.6870336325294692e-06, "loss": 6.1672, "step": 780 }, { "epoch": 0.01, "grad_norm": 12.226105690002441, "learning_rate": 1.6891964961609173e-06, "loss": 5.4699, "step": 781 }, { "epoch": 0.01, "grad_norm": 10.715810775756836, "learning_rate": 1.6913593597923653e-06, "loss": 6.3918, "step": 782 }, { "epoch": 0.01, "grad_norm": 10.483301162719727, "learning_rate": 1.6935222234238134e-06, "loss": 5.8649, "step": 783 }, { "epoch": 0.01, "grad_norm": 11.853592872619629, "learning_rate": 1.6956850870552612e-06, "loss": 5.7515, "step": 784 }, { "epoch": 0.01, "grad_norm": 11.013644218444824, "learning_rate": 1.6978479506867093e-06, "loss": 6.5168, "step": 785 }, { "epoch": 0.01, "grad_norm": 13.037696838378906, "learning_rate": 1.7000108143181573e-06, "loss": 5.6958, "step": 786 }, { "epoch": 0.01, "grad_norm": 10.827667236328125, "learning_rate": 1.7021736779496054e-06, "loss": 5.7867, "step": 787 }, { "epoch": 0.01, "grad_norm": 11.28102970123291, "learning_rate": 1.7043365415810534e-06, "loss": 6.1324, "step": 788 }, { "epoch": 0.01, "grad_norm": 12.007631301879883, "learning_rate": 1.7064994052125015e-06, "loss": 5.8959, "step": 789 }, { "epoch": 0.01, "grad_norm": 12.543063163757324, "learning_rate": 1.7086622688439495e-06, "loss": 5.7095, "step": 790 }, { "epoch": 0.01, "grad_norm": 10.890372276306152, "learning_rate": 1.7108251324753973e-06, "loss": 5.7582, "step": 791 }, { "epoch": 0.01, "grad_norm": 10.979680061340332, "learning_rate": 1.7129879961068458e-06, "loss": 5.471, "step": 792 }, { "epoch": 0.01, "grad_norm": 11.538753509521484, "learning_rate": 1.7151508597382937e-06, "loss": 5.5562, "step": 793 }, { "epoch": 0.01, "grad_norm": 10.88123893737793, "learning_rate": 1.7173137233697417e-06, "loss": 5.7553, "step": 794 }, { "epoch": 0.01, "grad_norm": 12.669584274291992, "learning_rate": 1.7194765870011898e-06, "loss": 5.2983, "step": 795 }, { "epoch": 0.01, "grad_norm": 10.625351905822754, "learning_rate": 1.7216394506326378e-06, "loss": 5.9138, "step": 796 }, { "epoch": 0.01, "grad_norm": 11.944992065429688, "learning_rate": 1.7238023142640859e-06, "loss": 6.2175, "step": 797 }, { "epoch": 0.01, "grad_norm": 11.56509780883789, "learning_rate": 1.725965177895534e-06, "loss": 5.2329, "step": 798 }, { "epoch": 0.01, "grad_norm": 11.515691757202148, "learning_rate": 1.728128041526982e-06, "loss": 5.6525, "step": 799 }, { "epoch": 0.01, "grad_norm": 10.834278106689453, "learning_rate": 1.7302909051584298e-06, "loss": 6.0395, "step": 800 }, { "epoch": 0.01, "grad_norm": 11.681975364685059, "learning_rate": 1.7324537687898779e-06, "loss": 6.2524, "step": 801 }, { "epoch": 0.01, "grad_norm": 11.211983680725098, "learning_rate": 1.734616632421326e-06, "loss": 5.8272, "step": 802 }, { "epoch": 0.01, "grad_norm": 13.785158157348633, "learning_rate": 1.736779496052774e-06, "loss": 6.2103, "step": 803 }, { "epoch": 0.01, "grad_norm": 12.80506706237793, "learning_rate": 1.738942359684222e-06, "loss": 5.3516, "step": 804 }, { "epoch": 0.01, "grad_norm": 11.351305961608887, "learning_rate": 1.74110522331567e-06, "loss": 5.263, "step": 805 }, { "epoch": 0.01, "grad_norm": 11.326339721679688, "learning_rate": 1.7432680869471181e-06, "loss": 5.9558, "step": 806 }, { "epoch": 0.01, "grad_norm": 12.098394393920898, "learning_rate": 1.745430950578566e-06, "loss": 5.4528, "step": 807 }, { "epoch": 0.01, "grad_norm": 12.51269245147705, "learning_rate": 1.7475938142100144e-06, "loss": 6.343, "step": 808 }, { "epoch": 0.01, "grad_norm": 10.342690467834473, "learning_rate": 1.7497566778414623e-06, "loss": 5.2887, "step": 809 }, { "epoch": 0.01, "grad_norm": 10.838496208190918, "learning_rate": 1.7519195414729103e-06, "loss": 5.5759, "step": 810 }, { "epoch": 0.01, "grad_norm": 12.923534393310547, "learning_rate": 1.7540824051043584e-06, "loss": 5.7503, "step": 811 }, { "epoch": 0.01, "grad_norm": 10.351228713989258, "learning_rate": 1.7562452687358064e-06, "loss": 6.0598, "step": 812 }, { "epoch": 0.01, "grad_norm": 12.066675186157227, "learning_rate": 1.7584081323672545e-06, "loss": 5.7592, "step": 813 }, { "epoch": 0.01, "grad_norm": 10.55141830444336, "learning_rate": 1.7605709959987025e-06, "loss": 5.6387, "step": 814 }, { "epoch": 0.01, "grad_norm": 11.492643356323242, "learning_rate": 1.7627338596301506e-06, "loss": 5.4606, "step": 815 }, { "epoch": 0.01, "grad_norm": 11.501786231994629, "learning_rate": 1.7648967232615984e-06, "loss": 7.0163, "step": 816 }, { "epoch": 0.01, "grad_norm": 11.631900787353516, "learning_rate": 1.7670595868930465e-06, "loss": 5.9613, "step": 817 }, { "epoch": 0.01, "grad_norm": 10.917844772338867, "learning_rate": 1.7692224505244945e-06, "loss": 5.6804, "step": 818 }, { "epoch": 0.01, "grad_norm": 12.947103500366211, "learning_rate": 1.7713853141559426e-06, "loss": 5.7981, "step": 819 }, { "epoch": 0.01, "grad_norm": 11.95811939239502, "learning_rate": 1.7735481777873906e-06, "loss": 5.5887, "step": 820 }, { "epoch": 0.01, "grad_norm": 11.349714279174805, "learning_rate": 1.7757110414188387e-06, "loss": 6.5044, "step": 821 }, { "epoch": 0.01, "grad_norm": 12.639211654663086, "learning_rate": 1.7778739050502867e-06, "loss": 6.0029, "step": 822 }, { "epoch": 0.01, "grad_norm": 11.324773788452148, "learning_rate": 1.7800367686817345e-06, "loss": 6.0833, "step": 823 }, { "epoch": 0.01, "grad_norm": 13.121479034423828, "learning_rate": 1.782199632313183e-06, "loss": 5.8, "step": 824 }, { "epoch": 0.01, "grad_norm": 11.871471405029297, "learning_rate": 1.7843624959446309e-06, "loss": 5.8962, "step": 825 }, { "epoch": 0.01, "grad_norm": 11.935101509094238, "learning_rate": 1.786525359576079e-06, "loss": 5.8567, "step": 826 }, { "epoch": 0.01, "grad_norm": 10.691187858581543, "learning_rate": 1.788688223207527e-06, "loss": 5.7206, "step": 827 }, { "epoch": 0.01, "grad_norm": 11.688618659973145, "learning_rate": 1.790851086838975e-06, "loss": 6.3729, "step": 828 }, { "epoch": 0.01, "grad_norm": 10.746369361877441, "learning_rate": 1.793013950470423e-06, "loss": 5.9757, "step": 829 }, { "epoch": 0.01, "grad_norm": 12.631640434265137, "learning_rate": 1.7951768141018711e-06, "loss": 5.8517, "step": 830 }, { "epoch": 0.01, "grad_norm": 11.834850311279297, "learning_rate": 1.7973396777333192e-06, "loss": 6.0487, "step": 831 }, { "epoch": 0.01, "grad_norm": 11.496427536010742, "learning_rate": 1.799502541364767e-06, "loss": 5.4151, "step": 832 }, { "epoch": 0.01, "grad_norm": 11.272348403930664, "learning_rate": 1.801665404996215e-06, "loss": 5.3859, "step": 833 }, { "epoch": 0.01, "grad_norm": 13.016539573669434, "learning_rate": 1.803828268627663e-06, "loss": 5.1995, "step": 834 }, { "epoch": 0.01, "grad_norm": 9.603890419006348, "learning_rate": 1.8059911322591112e-06, "loss": 5.9048, "step": 835 }, { "epoch": 0.01, "grad_norm": 11.474241256713867, "learning_rate": 1.8081539958905592e-06, "loss": 5.645, "step": 836 }, { "epoch": 0.01, "grad_norm": 10.987944602966309, "learning_rate": 1.8103168595220073e-06, "loss": 5.8266, "step": 837 }, { "epoch": 0.01, "grad_norm": 13.702225685119629, "learning_rate": 1.8124797231534553e-06, "loss": 4.9918, "step": 838 }, { "epoch": 0.01, "grad_norm": 12.099648475646973, "learning_rate": 1.8146425867849031e-06, "loss": 5.8754, "step": 839 }, { "epoch": 0.01, "grad_norm": 11.326868057250977, "learning_rate": 1.8168054504163516e-06, "loss": 6.0394, "step": 840 }, { "epoch": 0.01, "grad_norm": 11.89172649383545, "learning_rate": 1.8189683140477995e-06, "loss": 6.6949, "step": 841 }, { "epoch": 0.01, "grad_norm": 12.261894226074219, "learning_rate": 1.8211311776792475e-06, "loss": 5.5654, "step": 842 }, { "epoch": 0.01, "grad_norm": 12.044840812683105, "learning_rate": 1.8232940413106956e-06, "loss": 5.5735, "step": 843 }, { "epoch": 0.01, "grad_norm": 11.571501731872559, "learning_rate": 1.8254569049421436e-06, "loss": 5.2803, "step": 844 }, { "epoch": 0.01, "grad_norm": 12.180561065673828, "learning_rate": 1.8276197685735917e-06, "loss": 4.9752, "step": 845 }, { "epoch": 0.01, "grad_norm": 10.877634048461914, "learning_rate": 1.8297826322050397e-06, "loss": 5.8578, "step": 846 }, { "epoch": 0.01, "grad_norm": 12.401741981506348, "learning_rate": 1.8319454958364878e-06, "loss": 5.2457, "step": 847 }, { "epoch": 0.01, "grad_norm": 11.224726676940918, "learning_rate": 1.8341083594679356e-06, "loss": 5.4845, "step": 848 }, { "epoch": 0.01, "grad_norm": 11.876104354858398, "learning_rate": 1.8362712230993837e-06, "loss": 5.5795, "step": 849 }, { "epoch": 0.01, "grad_norm": 12.214438438415527, "learning_rate": 1.8384340867308317e-06, "loss": 5.6975, "step": 850 }, { "epoch": 0.01, "grad_norm": 12.723592758178711, "learning_rate": 1.8405969503622798e-06, "loss": 5.3616, "step": 851 }, { "epoch": 0.01, "grad_norm": 12.800437927246094, "learning_rate": 1.8427598139937278e-06, "loss": 5.5607, "step": 852 }, { "epoch": 0.01, "grad_norm": 12.886734962463379, "learning_rate": 1.8449226776251759e-06, "loss": 6.1665, "step": 853 }, { "epoch": 0.01, "grad_norm": 13.267148971557617, "learning_rate": 1.847085541256624e-06, "loss": 6.4529, "step": 854 }, { "epoch": 0.01, "grad_norm": 12.723325729370117, "learning_rate": 1.8492484048880717e-06, "loss": 5.5363, "step": 855 }, { "epoch": 0.01, "grad_norm": 13.31346607208252, "learning_rate": 1.8514112685195202e-06, "loss": 5.8359, "step": 856 }, { "epoch": 0.01, "grad_norm": 11.727386474609375, "learning_rate": 1.853574132150968e-06, "loss": 5.3616, "step": 857 }, { "epoch": 0.01, "grad_norm": 13.079546928405762, "learning_rate": 1.8557369957824161e-06, "loss": 5.0635, "step": 858 }, { "epoch": 0.01, "grad_norm": 12.421878814697266, "learning_rate": 1.8578998594138642e-06, "loss": 5.5461, "step": 859 }, { "epoch": 0.01, "grad_norm": 11.029240608215332, "learning_rate": 1.8600627230453122e-06, "loss": 5.3883, "step": 860 }, { "epoch": 0.01, "grad_norm": 11.826979637145996, "learning_rate": 1.8622255866767603e-06, "loss": 5.4298, "step": 861 }, { "epoch": 0.01, "grad_norm": 11.657444953918457, "learning_rate": 1.8643884503082083e-06, "loss": 5.2388, "step": 862 }, { "epoch": 0.01, "grad_norm": 12.95515251159668, "learning_rate": 1.8665513139396564e-06, "loss": 5.7792, "step": 863 }, { "epoch": 0.01, "grad_norm": 13.125017166137695, "learning_rate": 1.8687141775711042e-06, "loss": 5.7658, "step": 864 }, { "epoch": 0.01, "grad_norm": 11.428915977478027, "learning_rate": 1.8708770412025523e-06, "loss": 5.9977, "step": 865 }, { "epoch": 0.01, "grad_norm": 12.543364524841309, "learning_rate": 1.8730399048340003e-06, "loss": 6.1396, "step": 866 }, { "epoch": 0.01, "grad_norm": 10.818520545959473, "learning_rate": 1.8752027684654484e-06, "loss": 5.7905, "step": 867 }, { "epoch": 0.01, "grad_norm": 12.943737030029297, "learning_rate": 1.8773656320968964e-06, "loss": 5.6569, "step": 868 }, { "epoch": 0.01, "grad_norm": 12.39784049987793, "learning_rate": 1.8795284957283445e-06, "loss": 5.1791, "step": 869 }, { "epoch": 0.01, "grad_norm": 13.553924560546875, "learning_rate": 1.8816913593597925e-06, "loss": 5.5248, "step": 870 }, { "epoch": 0.01, "grad_norm": 11.60079288482666, "learning_rate": 1.8838542229912403e-06, "loss": 5.8122, "step": 871 }, { "epoch": 0.01, "grad_norm": 11.19101619720459, "learning_rate": 1.8860170866226888e-06, "loss": 5.4318, "step": 872 }, { "epoch": 0.01, "grad_norm": 10.788249969482422, "learning_rate": 1.8881799502541367e-06, "loss": 5.3549, "step": 873 }, { "epoch": 0.01, "grad_norm": 10.898164749145508, "learning_rate": 1.8903428138855847e-06, "loss": 5.1405, "step": 874 }, { "epoch": 0.01, "grad_norm": 13.076639175415039, "learning_rate": 1.8925056775170328e-06, "loss": 6.2273, "step": 875 }, { "epoch": 0.01, "grad_norm": 10.674334526062012, "learning_rate": 1.8946685411484808e-06, "loss": 5.2717, "step": 876 }, { "epoch": 0.01, "grad_norm": 11.812979698181152, "learning_rate": 1.8968314047799289e-06, "loss": 5.7447, "step": 877 }, { "epoch": 0.01, "grad_norm": 13.113821983337402, "learning_rate": 1.898994268411377e-06, "loss": 5.954, "step": 878 }, { "epoch": 0.01, "grad_norm": 12.029549598693848, "learning_rate": 1.901157132042825e-06, "loss": 5.3554, "step": 879 }, { "epoch": 0.01, "grad_norm": 11.310863494873047, "learning_rate": 1.9033199956742728e-06, "loss": 6.0343, "step": 880 }, { "epoch": 0.01, "grad_norm": 13.33658218383789, "learning_rate": 1.9054828593057209e-06, "loss": 6.3281, "step": 881 }, { "epoch": 0.01, "grad_norm": 11.738794326782227, "learning_rate": 1.907645722937169e-06, "loss": 5.636, "step": 882 }, { "epoch": 0.01, "grad_norm": 10.478214263916016, "learning_rate": 1.909808586568617e-06, "loss": 5.3848, "step": 883 }, { "epoch": 0.01, "grad_norm": 12.65900707244873, "learning_rate": 1.911971450200065e-06, "loss": 5.8164, "step": 884 }, { "epoch": 0.01, "grad_norm": 11.898097038269043, "learning_rate": 1.914134313831513e-06, "loss": 5.2946, "step": 885 }, { "epoch": 0.01, "grad_norm": 12.03226375579834, "learning_rate": 1.916297177462961e-06, "loss": 5.3113, "step": 886 }, { "epoch": 0.01, "grad_norm": 12.879765510559082, "learning_rate": 1.918460041094409e-06, "loss": 5.6518, "step": 887 }, { "epoch": 0.01, "grad_norm": 11.3041353225708, "learning_rate": 1.920622904725857e-06, "loss": 6.3219, "step": 888 }, { "epoch": 0.01, "grad_norm": 12.04900074005127, "learning_rate": 1.9227857683573053e-06, "loss": 5.9983, "step": 889 }, { "epoch": 0.01, "grad_norm": 11.280834197998047, "learning_rate": 1.9249486319887533e-06, "loss": 6.2555, "step": 890 }, { "epoch": 0.01, "grad_norm": 11.004423141479492, "learning_rate": 1.9271114956202014e-06, "loss": 5.8489, "step": 891 }, { "epoch": 0.01, "grad_norm": 11.290423393249512, "learning_rate": 1.9292743592516494e-06, "loss": 6.2741, "step": 892 }, { "epoch": 0.01, "grad_norm": 10.30569076538086, "learning_rate": 1.9314372228830975e-06, "loss": 6.2782, "step": 893 }, { "epoch": 0.01, "grad_norm": 11.024198532104492, "learning_rate": 1.9336000865145455e-06, "loss": 5.6679, "step": 894 }, { "epoch": 0.01, "grad_norm": 11.290217399597168, "learning_rate": 1.9357629501459936e-06, "loss": 5.6364, "step": 895 }, { "epoch": 0.01, "grad_norm": 12.391955375671387, "learning_rate": 1.9379258137774416e-06, "loss": 5.7113, "step": 896 }, { "epoch": 0.01, "grad_norm": 12.84719181060791, "learning_rate": 1.9400886774088897e-06, "loss": 5.6484, "step": 897 }, { "epoch": 0.01, "grad_norm": 11.616910934448242, "learning_rate": 1.9422515410403377e-06, "loss": 5.4647, "step": 898 }, { "epoch": 0.01, "grad_norm": 11.791565895080566, "learning_rate": 1.9444144046717853e-06, "loss": 5.4571, "step": 899 }, { "epoch": 0.01, "grad_norm": 10.618576049804688, "learning_rate": 1.9465772683032334e-06, "loss": 5.8379, "step": 900 }, { "epoch": 0.01, "grad_norm": 11.750131607055664, "learning_rate": 1.9487401319346814e-06, "loss": 5.3675, "step": 901 }, { "epoch": 0.01, "grad_norm": 11.153709411621094, "learning_rate": 1.9509029955661295e-06, "loss": 5.6636, "step": 902 }, { "epoch": 0.01, "grad_norm": 10.521871566772461, "learning_rate": 1.9530658591975775e-06, "loss": 5.7674, "step": 903 }, { "epoch": 0.01, "grad_norm": 10.948542594909668, "learning_rate": 1.955228722829026e-06, "loss": 6.076, "step": 904 }, { "epoch": 0.01, "grad_norm": 11.127927780151367, "learning_rate": 1.957391586460474e-06, "loss": 6.1853, "step": 905 }, { "epoch": 0.01, "grad_norm": 11.349613189697266, "learning_rate": 1.959554450091922e-06, "loss": 5.4016, "step": 906 }, { "epoch": 0.01, "grad_norm": 12.268774032592773, "learning_rate": 1.96171731372337e-06, "loss": 5.8032, "step": 907 }, { "epoch": 0.01, "grad_norm": 11.963766098022461, "learning_rate": 1.963880177354818e-06, "loss": 5.939, "step": 908 }, { "epoch": 0.01, "grad_norm": 13.18955135345459, "learning_rate": 1.966043040986266e-06, "loss": 5.8039, "step": 909 }, { "epoch": 0.01, "grad_norm": 15.125717163085938, "learning_rate": 1.968205904617714e-06, "loss": 4.8646, "step": 910 }, { "epoch": 0.01, "grad_norm": 11.508737564086914, "learning_rate": 1.970368768249162e-06, "loss": 5.7047, "step": 911 }, { "epoch": 0.01, "grad_norm": 13.203163146972656, "learning_rate": 1.97253163188061e-06, "loss": 5.5362, "step": 912 }, { "epoch": 0.01, "grad_norm": 11.599491119384766, "learning_rate": 1.974694495512058e-06, "loss": 5.561, "step": 913 }, { "epoch": 0.01, "grad_norm": 11.504705429077148, "learning_rate": 1.976857359143506e-06, "loss": 5.3895, "step": 914 }, { "epoch": 0.01, "grad_norm": 10.687528610229492, "learning_rate": 1.979020222774954e-06, "loss": 5.7137, "step": 915 }, { "epoch": 0.01, "grad_norm": 10.637984275817871, "learning_rate": 1.981183086406402e-06, "loss": 5.7605, "step": 916 }, { "epoch": 0.01, "grad_norm": 13.346915245056152, "learning_rate": 1.9833459500378503e-06, "loss": 5.0232, "step": 917 }, { "epoch": 0.01, "grad_norm": 11.513808250427246, "learning_rate": 1.9855088136692983e-06, "loss": 5.6816, "step": 918 }, { "epoch": 0.01, "grad_norm": 13.347766876220703, "learning_rate": 1.9876716773007464e-06, "loss": 5.2663, "step": 919 }, { "epoch": 0.01, "grad_norm": 13.447403907775879, "learning_rate": 1.9898345409321944e-06, "loss": 5.5246, "step": 920 }, { "epoch": 0.01, "grad_norm": 11.5504789352417, "learning_rate": 1.9919974045636425e-06, "loss": 5.3467, "step": 921 }, { "epoch": 0.01, "grad_norm": 11.684592247009277, "learning_rate": 1.9941602681950905e-06, "loss": 5.8848, "step": 922 }, { "epoch": 0.01, "grad_norm": 12.023804664611816, "learning_rate": 1.9963231318265386e-06, "loss": 5.8232, "step": 923 }, { "epoch": 0.01, "grad_norm": 11.506987571716309, "learning_rate": 1.9984859954579866e-06, "loss": 4.9223, "step": 924 }, { "epoch": 0.01, "grad_norm": 12.558695793151855, "learning_rate": 2.0006488590894347e-06, "loss": 4.6907, "step": 925 }, { "epoch": 0.01, "grad_norm": 12.62324333190918, "learning_rate": 2.0028117227208827e-06, "loss": 5.8102, "step": 926 }, { "epoch": 0.01, "grad_norm": 12.985611915588379, "learning_rate": 2.0049745863523308e-06, "loss": 5.9542, "step": 927 }, { "epoch": 0.01, "grad_norm": 11.859646797180176, "learning_rate": 2.007137449983779e-06, "loss": 5.4809, "step": 928 }, { "epoch": 0.01, "grad_norm": 12.167885780334473, "learning_rate": 2.009300313615227e-06, "loss": 6.2018, "step": 929 }, { "epoch": 0.01, "grad_norm": 11.921554565429688, "learning_rate": 2.011463177246675e-06, "loss": 5.7096, "step": 930 }, { "epoch": 0.01, "grad_norm": 12.191428184509277, "learning_rate": 2.0136260408781225e-06, "loss": 5.1518, "step": 931 }, { "epoch": 0.01, "grad_norm": 11.651116371154785, "learning_rate": 2.0157889045095706e-06, "loss": 5.4612, "step": 932 }, { "epoch": 0.01, "grad_norm": 12.222402572631836, "learning_rate": 2.0179517681410186e-06, "loss": 5.7507, "step": 933 }, { "epoch": 0.01, "grad_norm": 13.165680885314941, "learning_rate": 2.0201146317724667e-06, "loss": 5.545, "step": 934 }, { "epoch": 0.01, "grad_norm": 12.853486061096191, "learning_rate": 2.0222774954039147e-06, "loss": 5.8699, "step": 935 }, { "epoch": 0.01, "grad_norm": 11.353189468383789, "learning_rate": 2.0244403590353632e-06, "loss": 4.84, "step": 936 }, { "epoch": 0.01, "grad_norm": 11.631105422973633, "learning_rate": 2.0266032226668113e-06, "loss": 5.5974, "step": 937 }, { "epoch": 0.01, "grad_norm": 11.650654792785645, "learning_rate": 2.0287660862982593e-06, "loss": 5.8235, "step": 938 }, { "epoch": 0.01, "grad_norm": 12.496689796447754, "learning_rate": 2.0309289499297074e-06, "loss": 5.6648, "step": 939 }, { "epoch": 0.01, "grad_norm": 11.676721572875977, "learning_rate": 2.033091813561155e-06, "loss": 6.0432, "step": 940 }, { "epoch": 0.01, "grad_norm": 12.990039825439453, "learning_rate": 2.035254677192603e-06, "loss": 5.6199, "step": 941 }, { "epoch": 0.01, "grad_norm": 12.528104782104492, "learning_rate": 2.037417540824051e-06, "loss": 5.6023, "step": 942 }, { "epoch": 0.01, "grad_norm": 11.627287864685059, "learning_rate": 2.039580404455499e-06, "loss": 5.6206, "step": 943 }, { "epoch": 0.01, "grad_norm": 12.096786499023438, "learning_rate": 2.041743268086947e-06, "loss": 5.8384, "step": 944 }, { "epoch": 0.01, "grad_norm": 11.361957550048828, "learning_rate": 2.0439061317183953e-06, "loss": 5.527, "step": 945 }, { "epoch": 0.01, "grad_norm": 11.546660423278809, "learning_rate": 2.0460689953498433e-06, "loss": 5.6575, "step": 946 }, { "epoch": 0.01, "grad_norm": 11.877059936523438, "learning_rate": 2.0482318589812914e-06, "loss": 5.7373, "step": 947 }, { "epoch": 0.01, "grad_norm": 12.63611888885498, "learning_rate": 2.0503947226127394e-06, "loss": 5.1965, "step": 948 }, { "epoch": 0.01, "grad_norm": 11.111897468566895, "learning_rate": 2.0525575862441875e-06, "loss": 5.1529, "step": 949 }, { "epoch": 0.01, "grad_norm": 12.353042602539062, "learning_rate": 2.0547204498756355e-06, "loss": 5.4218, "step": 950 }, { "epoch": 0.01, "grad_norm": 11.357918739318848, "learning_rate": 2.0568833135070836e-06, "loss": 5.3823, "step": 951 }, { "epoch": 0.01, "grad_norm": 11.315196990966797, "learning_rate": 2.0590461771385316e-06, "loss": 5.252, "step": 952 }, { "epoch": 0.01, "grad_norm": 12.324690818786621, "learning_rate": 2.0612090407699797e-06, "loss": 5.73, "step": 953 }, { "epoch": 0.01, "grad_norm": 13.05231761932373, "learning_rate": 2.0633719044014277e-06, "loss": 6.0105, "step": 954 }, { "epoch": 0.01, "grad_norm": 14.195113182067871, "learning_rate": 2.0655347680328758e-06, "loss": 5.8481, "step": 955 }, { "epoch": 0.01, "grad_norm": 11.60770034790039, "learning_rate": 2.067697631664324e-06, "loss": 5.1199, "step": 956 }, { "epoch": 0.01, "grad_norm": 11.419693946838379, "learning_rate": 2.069860495295772e-06, "loss": 5.5798, "step": 957 }, { "epoch": 0.01, "grad_norm": 13.562572479248047, "learning_rate": 2.07202335892722e-06, "loss": 5.4289, "step": 958 }, { "epoch": 0.01, "grad_norm": 10.75188159942627, "learning_rate": 2.074186222558668e-06, "loss": 5.7155, "step": 959 }, { "epoch": 0.01, "grad_norm": 10.225061416625977, "learning_rate": 2.076349086190116e-06, "loss": 5.3577, "step": 960 }, { "epoch": 0.01, "grad_norm": 13.179553031921387, "learning_rate": 2.078511949821564e-06, "loss": 5.5018, "step": 961 }, { "epoch": 0.01, "grad_norm": 12.198165893554688, "learning_rate": 2.080674813453012e-06, "loss": 4.9152, "step": 962 }, { "epoch": 0.01, "grad_norm": 12.403935432434082, "learning_rate": 2.0828376770844597e-06, "loss": 4.8543, "step": 963 }, { "epoch": 0.01, "grad_norm": 11.04776668548584, "learning_rate": 2.085000540715908e-06, "loss": 5.3208, "step": 964 }, { "epoch": 0.01, "grad_norm": 11.407807350158691, "learning_rate": 2.087163404347356e-06, "loss": 5.6594, "step": 965 }, { "epoch": 0.01, "grad_norm": 13.494098663330078, "learning_rate": 2.089326267978804e-06, "loss": 4.883, "step": 966 }, { "epoch": 0.01, "grad_norm": 12.799774169921875, "learning_rate": 2.091489131610252e-06, "loss": 5.4266, "step": 967 }, { "epoch": 0.01, "grad_norm": 11.693506240844727, "learning_rate": 2.0936519952417004e-06, "loss": 5.2889, "step": 968 }, { "epoch": 0.01, "grad_norm": 10.90342903137207, "learning_rate": 2.0958148588731485e-06, "loss": 5.2632, "step": 969 }, { "epoch": 0.01, "grad_norm": 10.955254554748535, "learning_rate": 2.0979777225045965e-06, "loss": 5.2007, "step": 970 }, { "epoch": 0.01, "grad_norm": 11.22655200958252, "learning_rate": 2.1001405861360446e-06, "loss": 5.1376, "step": 971 }, { "epoch": 0.01, "grad_norm": 10.174407005310059, "learning_rate": 2.102303449767492e-06, "loss": 5.0535, "step": 972 }, { "epoch": 0.01, "grad_norm": 12.61461353302002, "learning_rate": 2.1044663133989403e-06, "loss": 4.5203, "step": 973 }, { "epoch": 0.01, "grad_norm": 11.504549980163574, "learning_rate": 2.1066291770303883e-06, "loss": 5.3723, "step": 974 }, { "epoch": 0.01, "grad_norm": 12.968668937683105, "learning_rate": 2.1087920406618364e-06, "loss": 5.118, "step": 975 }, { "epoch": 0.01, "grad_norm": 11.610069274902344, "learning_rate": 2.1109549042932844e-06, "loss": 5.3671, "step": 976 }, { "epoch": 0.01, "grad_norm": 12.888916969299316, "learning_rate": 2.1131177679247325e-06, "loss": 5.5823, "step": 977 }, { "epoch": 0.01, "grad_norm": 11.14486312866211, "learning_rate": 2.1152806315561805e-06, "loss": 5.6252, "step": 978 }, { "epoch": 0.01, "grad_norm": 12.61385726928711, "learning_rate": 2.1174434951876286e-06, "loss": 5.2529, "step": 979 }, { "epoch": 0.01, "grad_norm": 12.110929489135742, "learning_rate": 2.1196063588190766e-06, "loss": 5.459, "step": 980 }, { "epoch": 0.01, "grad_norm": 13.29830551147461, "learning_rate": 2.1217692224505247e-06, "loss": 5.9003, "step": 981 }, { "epoch": 0.01, "grad_norm": 11.528132438659668, "learning_rate": 2.1239320860819727e-06, "loss": 5.4935, "step": 982 }, { "epoch": 0.01, "grad_norm": 13.250223159790039, "learning_rate": 2.1260949497134208e-06, "loss": 4.7511, "step": 983 }, { "epoch": 0.01, "grad_norm": 12.520238876342773, "learning_rate": 2.128257813344869e-06, "loss": 5.145, "step": 984 }, { "epoch": 0.01, "grad_norm": 11.850123405456543, "learning_rate": 2.130420676976317e-06, "loss": 5.7089, "step": 985 }, { "epoch": 0.01, "grad_norm": 11.565938949584961, "learning_rate": 2.132583540607765e-06, "loss": 5.3473, "step": 986 }, { "epoch": 0.01, "grad_norm": 12.548650741577148, "learning_rate": 2.134746404239213e-06, "loss": 4.8163, "step": 987 }, { "epoch": 0.01, "grad_norm": 11.049901962280273, "learning_rate": 2.136909267870661e-06, "loss": 5.4234, "step": 988 }, { "epoch": 0.01, "grad_norm": 11.600199699401855, "learning_rate": 2.139072131502109e-06, "loss": 5.2375, "step": 989 }, { "epoch": 0.01, "grad_norm": 12.821365356445312, "learning_rate": 2.141234995133557e-06, "loss": 5.8854, "step": 990 }, { "epoch": 0.01, "grad_norm": 13.426133155822754, "learning_rate": 2.143397858765005e-06, "loss": 4.9691, "step": 991 }, { "epoch": 0.01, "grad_norm": 11.84454345703125, "learning_rate": 2.1455607223964532e-06, "loss": 5.004, "step": 992 }, { "epoch": 0.01, "grad_norm": 11.243974685668945, "learning_rate": 2.1477235860279013e-06, "loss": 5.3741, "step": 993 }, { "epoch": 0.01, "grad_norm": 13.101871490478516, "learning_rate": 2.1498864496593493e-06, "loss": 5.3981, "step": 994 }, { "epoch": 0.01, "grad_norm": 14.45081901550293, "learning_rate": 2.152049313290797e-06, "loss": 6.1235, "step": 995 }, { "epoch": 0.01, "grad_norm": 11.003610610961914, "learning_rate": 2.154212176922245e-06, "loss": 5.3917, "step": 996 }, { "epoch": 0.01, "grad_norm": 13.999098777770996, "learning_rate": 2.156375040553693e-06, "loss": 5.1292, "step": 997 }, { "epoch": 0.01, "grad_norm": 10.631781578063965, "learning_rate": 2.158537904185141e-06, "loss": 5.1202, "step": 998 }, { "epoch": 0.01, "grad_norm": 14.475948333740234, "learning_rate": 2.160700767816589e-06, "loss": 5.6177, "step": 999 }, { "epoch": 0.01, "grad_norm": 13.436748504638672, "learning_rate": 2.1628636314480376e-06, "loss": 5.7325, "step": 1000 }, { "epoch": 0.01, "grad_norm": 11.255372047424316, "learning_rate": 2.1650264950794857e-06, "loss": 5.4095, "step": 1001 }, { "epoch": 0.01, "grad_norm": 12.986687660217285, "learning_rate": 2.1671893587109337e-06, "loss": 5.7996, "step": 1002 }, { "epoch": 0.01, "grad_norm": 12.03474235534668, "learning_rate": 2.1693522223423818e-06, "loss": 5.5835, "step": 1003 }, { "epoch": 0.01, "grad_norm": 11.280332565307617, "learning_rate": 2.1715150859738294e-06, "loss": 6.2227, "step": 1004 }, { "epoch": 0.01, "grad_norm": 11.977200508117676, "learning_rate": 2.1736779496052775e-06, "loss": 5.7456, "step": 1005 }, { "epoch": 0.01, "grad_norm": 11.572710037231445, "learning_rate": 2.1758408132367255e-06, "loss": 5.0514, "step": 1006 }, { "epoch": 0.01, "grad_norm": 12.67811107635498, "learning_rate": 2.1780036768681736e-06, "loss": 4.8002, "step": 1007 }, { "epoch": 0.01, "grad_norm": 11.189492225646973, "learning_rate": 2.1801665404996216e-06, "loss": 5.4929, "step": 1008 }, { "epoch": 0.01, "grad_norm": 12.536866188049316, "learning_rate": 2.1823294041310697e-06, "loss": 4.2263, "step": 1009 }, { "epoch": 0.01, "grad_norm": 11.76315975189209, "learning_rate": 2.1844922677625177e-06, "loss": 5.1145, "step": 1010 }, { "epoch": 0.01, "grad_norm": 12.763660430908203, "learning_rate": 2.1866551313939658e-06, "loss": 5.0848, "step": 1011 }, { "epoch": 0.01, "grad_norm": 11.695320129394531, "learning_rate": 2.188817995025414e-06, "loss": 5.4256, "step": 1012 }, { "epoch": 0.01, "grad_norm": 14.285406112670898, "learning_rate": 2.190980858656862e-06, "loss": 4.5041, "step": 1013 }, { "epoch": 0.01, "grad_norm": 11.513455390930176, "learning_rate": 2.19314372228831e-06, "loss": 4.7518, "step": 1014 }, { "epoch": 0.01, "grad_norm": 13.253535270690918, "learning_rate": 2.195306585919758e-06, "loss": 5.1328, "step": 1015 }, { "epoch": 0.01, "grad_norm": 11.65506649017334, "learning_rate": 2.197469449551206e-06, "loss": 5.3984, "step": 1016 }, { "epoch": 0.01, "grad_norm": 12.349883079528809, "learning_rate": 2.199632313182654e-06, "loss": 5.5505, "step": 1017 }, { "epoch": 0.01, "grad_norm": 11.817212104797363, "learning_rate": 2.201795176814102e-06, "loss": 4.8972, "step": 1018 }, { "epoch": 0.01, "grad_norm": 10.858301162719727, "learning_rate": 2.20395804044555e-06, "loss": 5.0496, "step": 1019 }, { "epoch": 0.01, "grad_norm": 11.500687599182129, "learning_rate": 2.2061209040769982e-06, "loss": 4.9647, "step": 1020 }, { "epoch": 0.01, "grad_norm": 11.7075834274292, "learning_rate": 2.2082837677084463e-06, "loss": 5.1901, "step": 1021 }, { "epoch": 0.01, "grad_norm": 12.455549240112305, "learning_rate": 2.2104466313398943e-06, "loss": 5.3906, "step": 1022 }, { "epoch": 0.01, "grad_norm": 13.536737442016602, "learning_rate": 2.2126094949713424e-06, "loss": 5.9959, "step": 1023 }, { "epoch": 0.01, "grad_norm": 10.950525283813477, "learning_rate": 2.2147723586027904e-06, "loss": 4.7806, "step": 1024 }, { "epoch": 0.01, "grad_norm": 12.370314598083496, "learning_rate": 2.2169352222342385e-06, "loss": 5.1827, "step": 1025 }, { "epoch": 0.01, "grad_norm": 13.247105598449707, "learning_rate": 2.2190980858656865e-06, "loss": 4.7972, "step": 1026 }, { "epoch": 0.01, "grad_norm": 11.771316528320312, "learning_rate": 2.221260949497134e-06, "loss": 4.8941, "step": 1027 }, { "epoch": 0.01, "grad_norm": 14.000138282775879, "learning_rate": 2.223423813128582e-06, "loss": 5.0821, "step": 1028 }, { "epoch": 0.01, "grad_norm": 13.52556324005127, "learning_rate": 2.2255866767600302e-06, "loss": 5.0073, "step": 1029 }, { "epoch": 0.01, "grad_norm": 11.073802947998047, "learning_rate": 2.2277495403914783e-06, "loss": 5.1182, "step": 1030 }, { "epoch": 0.01, "grad_norm": 11.605463981628418, "learning_rate": 2.2299124040229263e-06, "loss": 5.0954, "step": 1031 }, { "epoch": 0.01, "grad_norm": 13.814257621765137, "learning_rate": 2.232075267654375e-06, "loss": 5.5179, "step": 1032 }, { "epoch": 0.01, "grad_norm": 12.976222038269043, "learning_rate": 2.234238131285823e-06, "loss": 4.9734, "step": 1033 }, { "epoch": 0.01, "grad_norm": 13.094535827636719, "learning_rate": 2.236400994917271e-06, "loss": 5.0879, "step": 1034 }, { "epoch": 0.01, "grad_norm": 10.59969711303711, "learning_rate": 2.238563858548719e-06, "loss": 5.8453, "step": 1035 }, { "epoch": 0.01, "grad_norm": 11.665510177612305, "learning_rate": 2.2407267221801666e-06, "loss": 5.3266, "step": 1036 }, { "epoch": 0.01, "grad_norm": 11.864903450012207, "learning_rate": 2.2428895858116147e-06, "loss": 5.8224, "step": 1037 }, { "epoch": 0.01, "grad_norm": 11.328152656555176, "learning_rate": 2.2450524494430627e-06, "loss": 5.5848, "step": 1038 }, { "epoch": 0.01, "grad_norm": 12.135106086730957, "learning_rate": 2.2472153130745108e-06, "loss": 4.6043, "step": 1039 }, { "epoch": 0.01, "grad_norm": 13.002248764038086, "learning_rate": 2.249378176705959e-06, "loss": 5.1752, "step": 1040 }, { "epoch": 0.01, "grad_norm": 11.756299018859863, "learning_rate": 2.251541040337407e-06, "loss": 4.6618, "step": 1041 }, { "epoch": 0.01, "grad_norm": 11.843010902404785, "learning_rate": 2.253703903968855e-06, "loss": 5.6742, "step": 1042 }, { "epoch": 0.01, "grad_norm": 11.563942909240723, "learning_rate": 2.255866767600303e-06, "loss": 4.914, "step": 1043 }, { "epoch": 0.01, "grad_norm": 12.98978328704834, "learning_rate": 2.258029631231751e-06, "loss": 4.4067, "step": 1044 }, { "epoch": 0.01, "grad_norm": 11.973729133605957, "learning_rate": 2.260192494863199e-06, "loss": 3.8741, "step": 1045 }, { "epoch": 0.01, "grad_norm": 12.528961181640625, "learning_rate": 2.262355358494647e-06, "loss": 5.3232, "step": 1046 }, { "epoch": 0.01, "grad_norm": 10.965381622314453, "learning_rate": 2.264518222126095e-06, "loss": 5.0236, "step": 1047 }, { "epoch": 0.01, "grad_norm": 11.94437026977539, "learning_rate": 2.2666810857575432e-06, "loss": 5.3682, "step": 1048 }, { "epoch": 0.01, "grad_norm": 11.588783264160156, "learning_rate": 2.2688439493889913e-06, "loss": 4.8401, "step": 1049 }, { "epoch": 0.01, "grad_norm": 11.558303833007812, "learning_rate": 2.2710068130204393e-06, "loss": 4.5851, "step": 1050 }, { "epoch": 0.01, "grad_norm": 12.252446174621582, "learning_rate": 2.2731696766518874e-06, "loss": 6.1922, "step": 1051 }, { "epoch": 0.01, "grad_norm": 11.562610626220703, "learning_rate": 2.2753325402833354e-06, "loss": 5.2653, "step": 1052 }, { "epoch": 0.01, "grad_norm": 12.164976119995117, "learning_rate": 2.2774954039147835e-06, "loss": 5.4726, "step": 1053 }, { "epoch": 0.01, "grad_norm": 11.057821273803711, "learning_rate": 2.2796582675462315e-06, "loss": 4.9447, "step": 1054 }, { "epoch": 0.01, "grad_norm": 12.466117858886719, "learning_rate": 2.2818211311776796e-06, "loss": 4.3375, "step": 1055 }, { "epoch": 0.01, "grad_norm": 11.87690258026123, "learning_rate": 2.2839839948091276e-06, "loss": 4.6214, "step": 1056 }, { "epoch": 0.01, "grad_norm": 12.740851402282715, "learning_rate": 2.2861468584405757e-06, "loss": 4.9392, "step": 1057 }, { "epoch": 0.01, "grad_norm": 13.31125259399414, "learning_rate": 2.2883097220720237e-06, "loss": 4.6386, "step": 1058 }, { "epoch": 0.01, "grad_norm": 14.42203140258789, "learning_rate": 2.2904725857034713e-06, "loss": 5.5044, "step": 1059 }, { "epoch": 0.01, "grad_norm": 10.787795066833496, "learning_rate": 2.2926354493349194e-06, "loss": 5.1611, "step": 1060 }, { "epoch": 0.01, "grad_norm": 13.229445457458496, "learning_rate": 2.2947983129663674e-06, "loss": 5.3338, "step": 1061 }, { "epoch": 0.01, "grad_norm": 12.535478591918945, "learning_rate": 2.2969611765978155e-06, "loss": 4.4201, "step": 1062 }, { "epoch": 0.01, "grad_norm": 11.959352493286133, "learning_rate": 2.2991240402292635e-06, "loss": 5.1142, "step": 1063 }, { "epoch": 0.01, "grad_norm": 12.966474533081055, "learning_rate": 2.301286903860712e-06, "loss": 5.5291, "step": 1064 }, { "epoch": 0.01, "grad_norm": 11.395965576171875, "learning_rate": 2.30344976749216e-06, "loss": 4.9782, "step": 1065 }, { "epoch": 0.01, "grad_norm": 11.54504680633545, "learning_rate": 2.305612631123608e-06, "loss": 5.0128, "step": 1066 }, { "epoch": 0.01, "grad_norm": 11.86709976196289, "learning_rate": 2.307775494755056e-06, "loss": 4.6208, "step": 1067 }, { "epoch": 0.01, "grad_norm": 12.746209144592285, "learning_rate": 2.309938358386504e-06, "loss": 5.3, "step": 1068 }, { "epoch": 0.01, "grad_norm": 12.233748435974121, "learning_rate": 2.312101222017952e-06, "loss": 5.5508, "step": 1069 }, { "epoch": 0.01, "grad_norm": 13.721236228942871, "learning_rate": 2.3142640856494e-06, "loss": 5.0709, "step": 1070 }, { "epoch": 0.01, "grad_norm": 13.470386505126953, "learning_rate": 2.316426949280848e-06, "loss": 4.845, "step": 1071 }, { "epoch": 0.01, "grad_norm": 13.298507690429688, "learning_rate": 2.318589812912296e-06, "loss": 4.835, "step": 1072 }, { "epoch": 0.01, "grad_norm": 13.264602661132812, "learning_rate": 2.320752676543744e-06, "loss": 4.7104, "step": 1073 }, { "epoch": 0.01, "grad_norm": 12.689982414245605, "learning_rate": 2.322915540175192e-06, "loss": 5.9611, "step": 1074 }, { "epoch": 0.01, "grad_norm": 11.270084381103516, "learning_rate": 2.32507840380664e-06, "loss": 5.3027, "step": 1075 }, { "epoch": 0.01, "grad_norm": 11.252735137939453, "learning_rate": 2.327241267438088e-06, "loss": 4.4538, "step": 1076 }, { "epoch": 0.01, "grad_norm": 13.789633750915527, "learning_rate": 2.3294041310695363e-06, "loss": 5.4919, "step": 1077 }, { "epoch": 0.01, "grad_norm": 11.202215194702148, "learning_rate": 2.3315669947009843e-06, "loss": 5.3791, "step": 1078 }, { "epoch": 0.01, "grad_norm": 13.522788047790527, "learning_rate": 2.3337298583324324e-06, "loss": 5.2461, "step": 1079 }, { "epoch": 0.01, "grad_norm": 12.231073379516602, "learning_rate": 2.3358927219638804e-06, "loss": 4.8046, "step": 1080 }, { "epoch": 0.01, "grad_norm": 11.887709617614746, "learning_rate": 2.3380555855953285e-06, "loss": 5.273, "step": 1081 }, { "epoch": 0.01, "grad_norm": 11.785308837890625, "learning_rate": 2.3402184492267765e-06, "loss": 5.0223, "step": 1082 }, { "epoch": 0.01, "grad_norm": 12.021777153015137, "learning_rate": 2.3423813128582246e-06, "loss": 4.9752, "step": 1083 }, { "epoch": 0.01, "grad_norm": 12.55070686340332, "learning_rate": 2.3445441764896726e-06, "loss": 5.3954, "step": 1084 }, { "epoch": 0.01, "grad_norm": 12.835126876831055, "learning_rate": 2.3467070401211207e-06, "loss": 5.4055, "step": 1085 }, { "epoch": 0.01, "grad_norm": 13.667510032653809, "learning_rate": 2.3488699037525687e-06, "loss": 5.3657, "step": 1086 }, { "epoch": 0.01, "grad_norm": 14.400945663452148, "learning_rate": 2.3510327673840168e-06, "loss": 5.7744, "step": 1087 }, { "epoch": 0.01, "grad_norm": 11.967581748962402, "learning_rate": 2.353195631015465e-06, "loss": 4.915, "step": 1088 }, { "epoch": 0.01, "grad_norm": 11.701173782348633, "learning_rate": 2.355358494646913e-06, "loss": 4.7888, "step": 1089 }, { "epoch": 0.01, "grad_norm": 11.132173538208008, "learning_rate": 2.3575213582783605e-06, "loss": 4.9419, "step": 1090 }, { "epoch": 0.01, "grad_norm": 12.037631034851074, "learning_rate": 2.3596842219098085e-06, "loss": 4.9711, "step": 1091 }, { "epoch": 0.01, "grad_norm": 11.067641258239746, "learning_rate": 2.3618470855412566e-06, "loss": 5.1829, "step": 1092 }, { "epoch": 0.01, "grad_norm": 13.28984260559082, "learning_rate": 2.3640099491727046e-06, "loss": 4.9948, "step": 1093 }, { "epoch": 0.01, "grad_norm": 12.013520240783691, "learning_rate": 2.3661728128041527e-06, "loss": 4.9736, "step": 1094 }, { "epoch": 0.01, "grad_norm": 11.544456481933594, "learning_rate": 2.3683356764356008e-06, "loss": 4.9723, "step": 1095 }, { "epoch": 0.01, "grad_norm": 12.522334098815918, "learning_rate": 2.3704985400670492e-06, "loss": 5.2569, "step": 1096 }, { "epoch": 0.01, "grad_norm": 13.011107444763184, "learning_rate": 2.3726614036984973e-06, "loss": 5.5372, "step": 1097 }, { "epoch": 0.01, "grad_norm": 12.8798189163208, "learning_rate": 2.3748242673299453e-06, "loss": 5.4086, "step": 1098 }, { "epoch": 0.01, "grad_norm": 11.015758514404297, "learning_rate": 2.376987130961393e-06, "loss": 5.1725, "step": 1099 }, { "epoch": 0.01, "grad_norm": 13.952581405639648, "learning_rate": 2.379149994592841e-06, "loss": 4.8661, "step": 1100 }, { "epoch": 0.01, "grad_norm": 11.432270050048828, "learning_rate": 2.381312858224289e-06, "loss": 5.2953, "step": 1101 }, { "epoch": 0.01, "grad_norm": 11.858591079711914, "learning_rate": 2.383475721855737e-06, "loss": 5.1183, "step": 1102 }, { "epoch": 0.01, "grad_norm": 13.678193092346191, "learning_rate": 2.385638585487185e-06, "loss": 4.7655, "step": 1103 }, { "epoch": 0.01, "grad_norm": 13.006749153137207, "learning_rate": 2.387801449118633e-06, "loss": 4.9836, "step": 1104 }, { "epoch": 0.01, "grad_norm": 11.951420783996582, "learning_rate": 2.3899643127500813e-06, "loss": 5.0899, "step": 1105 }, { "epoch": 0.01, "grad_norm": 13.198343276977539, "learning_rate": 2.3921271763815293e-06, "loss": 4.6444, "step": 1106 }, { "epoch": 0.01, "grad_norm": 12.191675186157227, "learning_rate": 2.3942900400129774e-06, "loss": 4.9559, "step": 1107 }, { "epoch": 0.01, "grad_norm": 13.338408470153809, "learning_rate": 2.3964529036444254e-06, "loss": 4.774, "step": 1108 }, { "epoch": 0.01, "grad_norm": 12.00066089630127, "learning_rate": 2.3986157672758735e-06, "loss": 4.6077, "step": 1109 }, { "epoch": 0.01, "grad_norm": 13.13228988647461, "learning_rate": 2.4007786309073215e-06, "loss": 4.1497, "step": 1110 }, { "epoch": 0.01, "grad_norm": 11.833444595336914, "learning_rate": 2.4029414945387696e-06, "loss": 4.5004, "step": 1111 }, { "epoch": 0.01, "grad_norm": 12.78213119506836, "learning_rate": 2.4051043581702176e-06, "loss": 4.8229, "step": 1112 }, { "epoch": 0.01, "grad_norm": 13.374102592468262, "learning_rate": 2.4072672218016657e-06, "loss": 4.6647, "step": 1113 }, { "epoch": 0.01, "grad_norm": 12.59460735321045, "learning_rate": 2.4094300854331137e-06, "loss": 5.3119, "step": 1114 }, { "epoch": 0.01, "grad_norm": 12.760224342346191, "learning_rate": 2.4115929490645618e-06, "loss": 5.7353, "step": 1115 }, { "epoch": 0.01, "grad_norm": 12.550970077514648, "learning_rate": 2.41375581269601e-06, "loss": 4.6829, "step": 1116 }, { "epoch": 0.01, "grad_norm": 12.274862289428711, "learning_rate": 2.415918676327458e-06, "loss": 4.7351, "step": 1117 }, { "epoch": 0.01, "grad_norm": 13.260741233825684, "learning_rate": 2.418081539958906e-06, "loss": 4.4793, "step": 1118 }, { "epoch": 0.01, "grad_norm": 12.258895874023438, "learning_rate": 2.420244403590354e-06, "loss": 4.1054, "step": 1119 }, { "epoch": 0.01, "grad_norm": 11.92104434967041, "learning_rate": 2.422407267221802e-06, "loss": 4.9705, "step": 1120 }, { "epoch": 0.01, "grad_norm": 11.024102210998535, "learning_rate": 2.42457013085325e-06, "loss": 4.7005, "step": 1121 }, { "epoch": 0.01, "grad_norm": 11.826395988464355, "learning_rate": 2.4267329944846977e-06, "loss": 4.6336, "step": 1122 }, { "epoch": 0.01, "grad_norm": 12.481073379516602, "learning_rate": 2.4288958581161457e-06, "loss": 5.1702, "step": 1123 }, { "epoch": 0.01, "grad_norm": 14.557912826538086, "learning_rate": 2.431058721747594e-06, "loss": 5.079, "step": 1124 }, { "epoch": 0.01, "grad_norm": 13.70667552947998, "learning_rate": 2.433221585379042e-06, "loss": 4.9962, "step": 1125 }, { "epoch": 0.01, "grad_norm": 14.224626541137695, "learning_rate": 2.43538444901049e-06, "loss": 4.5993, "step": 1126 }, { "epoch": 0.01, "grad_norm": 13.222163200378418, "learning_rate": 2.437547312641938e-06, "loss": 5.6834, "step": 1127 }, { "epoch": 0.01, "grad_norm": 14.065500259399414, "learning_rate": 2.4397101762733864e-06, "loss": 4.6088, "step": 1128 }, { "epoch": 0.01, "grad_norm": 14.01540470123291, "learning_rate": 2.4418730399048345e-06, "loss": 4.6634, "step": 1129 }, { "epoch": 0.01, "grad_norm": 12.237967491149902, "learning_rate": 2.4440359035362825e-06, "loss": 5.1819, "step": 1130 }, { "epoch": 0.01, "grad_norm": 13.161698341369629, "learning_rate": 2.44619876716773e-06, "loss": 5.177, "step": 1131 }, { "epoch": 0.01, "grad_norm": 11.533987045288086, "learning_rate": 2.448361630799178e-06, "loss": 5.5148, "step": 1132 }, { "epoch": 0.01, "grad_norm": 11.415440559387207, "learning_rate": 2.4505244944306263e-06, "loss": 4.7801, "step": 1133 }, { "epoch": 0.01, "grad_norm": 13.533787727355957, "learning_rate": 2.4526873580620743e-06, "loss": 5.5475, "step": 1134 }, { "epoch": 0.01, "grad_norm": 15.475044250488281, "learning_rate": 2.4548502216935224e-06, "loss": 5.0672, "step": 1135 }, { "epoch": 0.01, "grad_norm": 13.518646240234375, "learning_rate": 2.4570130853249704e-06, "loss": 5.2102, "step": 1136 }, { "epoch": 0.01, "grad_norm": 12.398319244384766, "learning_rate": 2.4591759489564185e-06, "loss": 5.2227, "step": 1137 }, { "epoch": 0.01, "grad_norm": 11.317719459533691, "learning_rate": 2.4613388125878665e-06, "loss": 4.8167, "step": 1138 }, { "epoch": 0.01, "grad_norm": 11.27446174621582, "learning_rate": 2.4635016762193146e-06, "loss": 5.0791, "step": 1139 }, { "epoch": 0.01, "grad_norm": 14.035728454589844, "learning_rate": 2.4656645398507626e-06, "loss": 4.2551, "step": 1140 }, { "epoch": 0.01, "grad_norm": 12.550190925598145, "learning_rate": 2.4678274034822107e-06, "loss": 5.3851, "step": 1141 }, { "epoch": 0.01, "grad_norm": 10.81533432006836, "learning_rate": 2.4699902671136587e-06, "loss": 5.0367, "step": 1142 }, { "epoch": 0.01, "grad_norm": 12.834761619567871, "learning_rate": 2.4721531307451068e-06, "loss": 4.8337, "step": 1143 }, { "epoch": 0.01, "grad_norm": 12.50570297241211, "learning_rate": 2.474315994376555e-06, "loss": 3.9894, "step": 1144 }, { "epoch": 0.01, "grad_norm": 12.655842781066895, "learning_rate": 2.476478858008003e-06, "loss": 5.142, "step": 1145 }, { "epoch": 0.01, "grad_norm": 11.729559898376465, "learning_rate": 2.478641721639451e-06, "loss": 5.0498, "step": 1146 }, { "epoch": 0.01, "grad_norm": 13.156980514526367, "learning_rate": 2.480804585270899e-06, "loss": 5.7266, "step": 1147 }, { "epoch": 0.01, "grad_norm": 11.68835735321045, "learning_rate": 2.482967448902347e-06, "loss": 4.9808, "step": 1148 }, { "epoch": 0.01, "grad_norm": 11.409342765808105, "learning_rate": 2.485130312533795e-06, "loss": 4.6751, "step": 1149 }, { "epoch": 0.01, "grad_norm": 11.014466285705566, "learning_rate": 2.487293176165243e-06, "loss": 4.8788, "step": 1150 }, { "epoch": 0.01, "grad_norm": 12.908368110656738, "learning_rate": 2.489456039796691e-06, "loss": 5.0476, "step": 1151 }, { "epoch": 0.01, "grad_norm": 11.699524879455566, "learning_rate": 2.4916189034281392e-06, "loss": 4.8235, "step": 1152 }, { "epoch": 0.01, "grad_norm": 12.771876335144043, "learning_rate": 2.4937817670595873e-06, "loss": 4.8041, "step": 1153 }, { "epoch": 0.01, "grad_norm": 11.792655944824219, "learning_rate": 2.495944630691035e-06, "loss": 5.7288, "step": 1154 }, { "epoch": 0.01, "grad_norm": 13.658998489379883, "learning_rate": 2.498107494322483e-06, "loss": 5.1546, "step": 1155 }, { "epoch": 0.02, "grad_norm": 12.435925483703613, "learning_rate": 2.500270357953931e-06, "loss": 4.4656, "step": 1156 }, { "epoch": 0.02, "grad_norm": 13.53827953338623, "learning_rate": 2.502433221585379e-06, "loss": 4.964, "step": 1157 }, { "epoch": 0.02, "grad_norm": 12.217103004455566, "learning_rate": 2.504596085216827e-06, "loss": 4.9193, "step": 1158 }, { "epoch": 0.02, "grad_norm": 10.991811752319336, "learning_rate": 2.506758948848275e-06, "loss": 5.4316, "step": 1159 }, { "epoch": 0.02, "grad_norm": 13.009590148925781, "learning_rate": 2.508921812479723e-06, "loss": 4.8435, "step": 1160 }, { "epoch": 0.02, "grad_norm": 12.538680076599121, "learning_rate": 2.5110846761111713e-06, "loss": 4.5695, "step": 1161 }, { "epoch": 0.02, "grad_norm": 12.86175537109375, "learning_rate": 2.5132475397426193e-06, "loss": 5.176, "step": 1162 }, { "epoch": 0.02, "grad_norm": 14.497267723083496, "learning_rate": 2.5154104033740674e-06, "loss": 5.4288, "step": 1163 }, { "epoch": 0.02, "grad_norm": 12.890731811523438, "learning_rate": 2.5175732670055154e-06, "loss": 5.1932, "step": 1164 }, { "epoch": 0.02, "grad_norm": 14.760941505432129, "learning_rate": 2.5197361306369635e-06, "loss": 3.9538, "step": 1165 }, { "epoch": 0.02, "grad_norm": 13.623431205749512, "learning_rate": 2.5218989942684115e-06, "loss": 4.9034, "step": 1166 }, { "epoch": 0.02, "grad_norm": 12.793553352355957, "learning_rate": 2.5240618578998596e-06, "loss": 5.1436, "step": 1167 }, { "epoch": 0.02, "grad_norm": 11.69408893585205, "learning_rate": 2.526224721531308e-06, "loss": 5.332, "step": 1168 }, { "epoch": 0.02, "grad_norm": 11.424040794372559, "learning_rate": 2.528387585162756e-06, "loss": 4.2767, "step": 1169 }, { "epoch": 0.02, "grad_norm": 11.526888847351074, "learning_rate": 2.530550448794204e-06, "loss": 4.1746, "step": 1170 }, { "epoch": 0.02, "grad_norm": 11.605302810668945, "learning_rate": 2.532713312425652e-06, "loss": 4.6818, "step": 1171 }, { "epoch": 0.02, "grad_norm": 11.457826614379883, "learning_rate": 2.5348761760571e-06, "loss": 4.5818, "step": 1172 }, { "epoch": 0.02, "grad_norm": 13.418597221374512, "learning_rate": 2.537039039688548e-06, "loss": 5.0461, "step": 1173 }, { "epoch": 0.02, "grad_norm": 12.549918174743652, "learning_rate": 2.539201903319996e-06, "loss": 5.0988, "step": 1174 }, { "epoch": 0.02, "grad_norm": 11.033331871032715, "learning_rate": 2.541364766951444e-06, "loss": 4.3016, "step": 1175 }, { "epoch": 0.02, "grad_norm": 12.908978462219238, "learning_rate": 2.543527630582892e-06, "loss": 4.6804, "step": 1176 }, { "epoch": 0.02, "grad_norm": 12.020636558532715, "learning_rate": 2.54569049421434e-06, "loss": 4.9617, "step": 1177 }, { "epoch": 0.02, "grad_norm": 13.333161354064941, "learning_rate": 2.547853357845788e-06, "loss": 5.7073, "step": 1178 }, { "epoch": 0.02, "grad_norm": 11.1371488571167, "learning_rate": 2.550016221477236e-06, "loss": 4.4651, "step": 1179 }, { "epoch": 0.02, "grad_norm": 12.218526840209961, "learning_rate": 2.5521790851086842e-06, "loss": 4.7288, "step": 1180 }, { "epoch": 0.02, "grad_norm": 11.808462142944336, "learning_rate": 2.5543419487401323e-06, "loss": 4.9423, "step": 1181 }, { "epoch": 0.02, "grad_norm": 11.96012020111084, "learning_rate": 2.5565048123715803e-06, "loss": 4.4012, "step": 1182 }, { "epoch": 0.02, "grad_norm": 12.392343521118164, "learning_rate": 2.5586676760030284e-06, "loss": 4.355, "step": 1183 }, { "epoch": 0.02, "grad_norm": 13.407183647155762, "learning_rate": 2.5608305396344764e-06, "loss": 4.3271, "step": 1184 }, { "epoch": 0.02, "grad_norm": 14.08337116241455, "learning_rate": 2.5629934032659245e-06, "loss": 4.3148, "step": 1185 }, { "epoch": 0.02, "grad_norm": 13.21928882598877, "learning_rate": 2.565156266897372e-06, "loss": 4.8175, "step": 1186 }, { "epoch": 0.02, "grad_norm": 12.316628456115723, "learning_rate": 2.56731913052882e-06, "loss": 4.404, "step": 1187 }, { "epoch": 0.02, "grad_norm": 12.575953483581543, "learning_rate": 2.569481994160268e-06, "loss": 4.0212, "step": 1188 }, { "epoch": 0.02, "grad_norm": 12.039996147155762, "learning_rate": 2.5716448577917163e-06, "loss": 4.6494, "step": 1189 }, { "epoch": 0.02, "grad_norm": 11.813307762145996, "learning_rate": 2.5738077214231643e-06, "loss": 4.7399, "step": 1190 }, { "epoch": 0.02, "grad_norm": 11.749353408813477, "learning_rate": 2.5759705850546124e-06, "loss": 4.7863, "step": 1191 }, { "epoch": 0.02, "grad_norm": 13.073655128479004, "learning_rate": 2.5781334486860604e-06, "loss": 4.8115, "step": 1192 }, { "epoch": 0.02, "grad_norm": 11.796736717224121, "learning_rate": 2.5802963123175085e-06, "loss": 4.9411, "step": 1193 }, { "epoch": 0.02, "grad_norm": 12.78290843963623, "learning_rate": 2.5824591759489565e-06, "loss": 4.9233, "step": 1194 }, { "epoch": 0.02, "grad_norm": 15.858702659606934, "learning_rate": 2.5846220395804046e-06, "loss": 4.9883, "step": 1195 }, { "epoch": 0.02, "grad_norm": 13.586823463439941, "learning_rate": 2.5867849032118526e-06, "loss": 4.6995, "step": 1196 }, { "epoch": 0.02, "grad_norm": 14.53465747833252, "learning_rate": 2.5889477668433007e-06, "loss": 4.4072, "step": 1197 }, { "epoch": 0.02, "grad_norm": 12.182819366455078, "learning_rate": 2.5911106304747487e-06, "loss": 4.5979, "step": 1198 }, { "epoch": 0.02, "grad_norm": 14.115896224975586, "learning_rate": 2.5932734941061968e-06, "loss": 4.5698, "step": 1199 }, { "epoch": 0.02, "grad_norm": 13.105918884277344, "learning_rate": 2.5954363577376452e-06, "loss": 4.1121, "step": 1200 }, { "epoch": 0.02, "grad_norm": 11.754578590393066, "learning_rate": 2.5975992213690933e-06, "loss": 4.6022, "step": 1201 }, { "epoch": 0.02, "grad_norm": 12.042720794677734, "learning_rate": 2.5997620850005413e-06, "loss": 4.2043, "step": 1202 }, { "epoch": 0.02, "grad_norm": 12.059338569641113, "learning_rate": 2.6019249486319894e-06, "loss": 5.1909, "step": 1203 }, { "epoch": 0.02, "grad_norm": 12.854375839233398, "learning_rate": 2.604087812263437e-06, "loss": 4.7296, "step": 1204 }, { "epoch": 0.02, "grad_norm": 12.91871452331543, "learning_rate": 2.606250675894885e-06, "loss": 4.2606, "step": 1205 }, { "epoch": 0.02, "grad_norm": 12.129950523376465, "learning_rate": 2.608413539526333e-06, "loss": 4.5867, "step": 1206 }, { "epoch": 0.02, "grad_norm": 13.434012413024902, "learning_rate": 2.610576403157781e-06, "loss": 4.5928, "step": 1207 }, { "epoch": 0.02, "grad_norm": 11.88530445098877, "learning_rate": 2.6127392667892292e-06, "loss": 5.1673, "step": 1208 }, { "epoch": 0.02, "grad_norm": 11.969352722167969, "learning_rate": 2.6149021304206773e-06, "loss": 4.8568, "step": 1209 }, { "epoch": 0.02, "grad_norm": 13.287845611572266, "learning_rate": 2.6170649940521253e-06, "loss": 4.7239, "step": 1210 }, { "epoch": 0.02, "grad_norm": 12.966160774230957, "learning_rate": 2.6192278576835734e-06, "loss": 4.2186, "step": 1211 }, { "epoch": 0.02, "grad_norm": 11.748373031616211, "learning_rate": 2.6213907213150214e-06, "loss": 5.3081, "step": 1212 }, { "epoch": 0.02, "grad_norm": 11.377974510192871, "learning_rate": 2.6235535849464695e-06, "loss": 4.2542, "step": 1213 }, { "epoch": 0.02, "grad_norm": 11.59849739074707, "learning_rate": 2.6257164485779175e-06, "loss": 4.5942, "step": 1214 }, { "epoch": 0.02, "grad_norm": 12.279650688171387, "learning_rate": 2.6278793122093656e-06, "loss": 5.1336, "step": 1215 }, { "epoch": 0.02, "grad_norm": 13.814002990722656, "learning_rate": 2.6300421758408136e-06, "loss": 4.5014, "step": 1216 }, { "epoch": 0.02, "grad_norm": 11.779733657836914, "learning_rate": 2.6322050394722617e-06, "loss": 4.4098, "step": 1217 }, { "epoch": 0.02, "grad_norm": 12.871993064880371, "learning_rate": 2.6343679031037093e-06, "loss": 5.3043, "step": 1218 }, { "epoch": 0.02, "grad_norm": 13.500951766967773, "learning_rate": 2.6365307667351573e-06, "loss": 5.2056, "step": 1219 }, { "epoch": 0.02, "grad_norm": 14.131994247436523, "learning_rate": 2.6386936303666054e-06, "loss": 4.9443, "step": 1220 }, { "epoch": 0.02, "grad_norm": 12.552044868469238, "learning_rate": 2.6408564939980535e-06, "loss": 4.7217, "step": 1221 }, { "epoch": 0.02, "grad_norm": 11.963044166564941, "learning_rate": 2.6430193576295015e-06, "loss": 5.2243, "step": 1222 }, { "epoch": 0.02, "grad_norm": 12.340130805969238, "learning_rate": 2.6451822212609496e-06, "loss": 5.4323, "step": 1223 }, { "epoch": 0.02, "grad_norm": 12.09315299987793, "learning_rate": 2.6473450848923976e-06, "loss": 4.6806, "step": 1224 }, { "epoch": 0.02, "grad_norm": 13.116500854492188, "learning_rate": 2.6495079485238457e-06, "loss": 4.8375, "step": 1225 }, { "epoch": 0.02, "grad_norm": 13.097232818603516, "learning_rate": 2.6516708121552937e-06, "loss": 4.9495, "step": 1226 }, { "epoch": 0.02, "grad_norm": 12.7971830368042, "learning_rate": 2.6538336757867418e-06, "loss": 4.7413, "step": 1227 }, { "epoch": 0.02, "grad_norm": 10.702564239501953, "learning_rate": 2.65599653941819e-06, "loss": 5.0153, "step": 1228 }, { "epoch": 0.02, "grad_norm": 13.090778350830078, "learning_rate": 2.658159403049638e-06, "loss": 4.691, "step": 1229 }, { "epoch": 0.02, "grad_norm": 12.212713241577148, "learning_rate": 2.660322266681086e-06, "loss": 4.6596, "step": 1230 }, { "epoch": 0.02, "grad_norm": 12.002750396728516, "learning_rate": 2.662485130312534e-06, "loss": 5.2365, "step": 1231 }, { "epoch": 0.02, "grad_norm": 13.131170272827148, "learning_rate": 2.6646479939439824e-06, "loss": 5.0513, "step": 1232 }, { "epoch": 0.02, "grad_norm": 11.307002067565918, "learning_rate": 2.6668108575754305e-06, "loss": 4.7128, "step": 1233 }, { "epoch": 0.02, "grad_norm": 11.599756240844727, "learning_rate": 2.6689737212068785e-06, "loss": 4.7066, "step": 1234 }, { "epoch": 0.02, "grad_norm": 10.8063383102417, "learning_rate": 2.6711365848383266e-06, "loss": 4.8087, "step": 1235 }, { "epoch": 0.02, "grad_norm": 11.238690376281738, "learning_rate": 2.6732994484697742e-06, "loss": 3.9943, "step": 1236 }, { "epoch": 0.02, "grad_norm": 12.077960968017578, "learning_rate": 2.6754623121012223e-06, "loss": 4.6123, "step": 1237 }, { "epoch": 0.02, "grad_norm": 13.38801097869873, "learning_rate": 2.6776251757326703e-06, "loss": 3.8677, "step": 1238 }, { "epoch": 0.02, "grad_norm": 13.266731262207031, "learning_rate": 2.6797880393641184e-06, "loss": 4.6052, "step": 1239 }, { "epoch": 0.02, "grad_norm": 12.327171325683594, "learning_rate": 2.6819509029955664e-06, "loss": 4.1931, "step": 1240 }, { "epoch": 0.02, "grad_norm": 10.574971199035645, "learning_rate": 2.6841137666270145e-06, "loss": 4.7224, "step": 1241 }, { "epoch": 0.02, "grad_norm": 12.542715072631836, "learning_rate": 2.6862766302584625e-06, "loss": 4.0706, "step": 1242 }, { "epoch": 0.02, "grad_norm": 12.19111442565918, "learning_rate": 2.6884394938899106e-06, "loss": 4.6269, "step": 1243 }, { "epoch": 0.02, "grad_norm": 11.692866325378418, "learning_rate": 2.6906023575213586e-06, "loss": 4.6832, "step": 1244 }, { "epoch": 0.02, "grad_norm": 13.100127220153809, "learning_rate": 2.6927652211528067e-06, "loss": 4.7463, "step": 1245 }, { "epoch": 0.02, "grad_norm": 13.74910831451416, "learning_rate": 2.6949280847842547e-06, "loss": 4.0402, "step": 1246 }, { "epoch": 0.02, "grad_norm": 12.506946563720703, "learning_rate": 2.6970909484157028e-06, "loss": 4.7693, "step": 1247 }, { "epoch": 0.02, "grad_norm": 13.311214447021484, "learning_rate": 2.699253812047151e-06, "loss": 4.4467, "step": 1248 }, { "epoch": 0.02, "grad_norm": 13.416465759277344, "learning_rate": 2.701416675678599e-06, "loss": 4.5637, "step": 1249 }, { "epoch": 0.02, "grad_norm": 13.769986152648926, "learning_rate": 2.7035795393100465e-06, "loss": 5.0988, "step": 1250 }, { "epoch": 0.02, "grad_norm": 11.342982292175293, "learning_rate": 2.7057424029414945e-06, "loss": 3.9829, "step": 1251 }, { "epoch": 0.02, "grad_norm": 11.889252662658691, "learning_rate": 2.7079052665729426e-06, "loss": 4.2732, "step": 1252 }, { "epoch": 0.02, "grad_norm": 12.090982437133789, "learning_rate": 2.7100681302043907e-06, "loss": 4.4481, "step": 1253 }, { "epoch": 0.02, "grad_norm": 11.562392234802246, "learning_rate": 2.7122309938358387e-06, "loss": 3.8274, "step": 1254 }, { "epoch": 0.02, "grad_norm": 12.181352615356445, "learning_rate": 2.7143938574672868e-06, "loss": 4.5773, "step": 1255 }, { "epoch": 0.02, "grad_norm": 12.5331392288208, "learning_rate": 2.716556721098735e-06, "loss": 4.148, "step": 1256 }, { "epoch": 0.02, "grad_norm": 12.05583381652832, "learning_rate": 2.718719584730183e-06, "loss": 3.9292, "step": 1257 }, { "epoch": 0.02, "grad_norm": 12.985942840576172, "learning_rate": 2.720882448361631e-06, "loss": 5.1663, "step": 1258 }, { "epoch": 0.02, "grad_norm": 12.051292419433594, "learning_rate": 2.723045311993079e-06, "loss": 4.6718, "step": 1259 }, { "epoch": 0.02, "grad_norm": 11.910615921020508, "learning_rate": 2.725208175624527e-06, "loss": 4.7134, "step": 1260 }, { "epoch": 0.02, "grad_norm": 11.863975524902344, "learning_rate": 2.727371039255975e-06, "loss": 5.3975, "step": 1261 }, { "epoch": 0.02, "grad_norm": 11.248427391052246, "learning_rate": 2.729533902887423e-06, "loss": 4.4046, "step": 1262 }, { "epoch": 0.02, "grad_norm": 11.721092224121094, "learning_rate": 2.7316967665188707e-06, "loss": 4.8561, "step": 1263 }, { "epoch": 0.02, "grad_norm": 12.269058227539062, "learning_rate": 2.7338596301503196e-06, "loss": 4.5881, "step": 1264 }, { "epoch": 0.02, "grad_norm": 12.831141471862793, "learning_rate": 2.7360224937817677e-06, "loss": 4.4543, "step": 1265 }, { "epoch": 0.02, "grad_norm": 12.55234146118164, "learning_rate": 2.7381853574132157e-06, "loss": 5.2585, "step": 1266 }, { "epoch": 0.02, "grad_norm": 11.765591621398926, "learning_rate": 2.7403482210446638e-06, "loss": 4.2479, "step": 1267 }, { "epoch": 0.02, "grad_norm": 13.199528694152832, "learning_rate": 2.7425110846761114e-06, "loss": 4.5539, "step": 1268 }, { "epoch": 0.02, "grad_norm": 12.737756729125977, "learning_rate": 2.7446739483075595e-06, "loss": 5.131, "step": 1269 }, { "epoch": 0.02, "grad_norm": 10.424773216247559, "learning_rate": 2.7468368119390075e-06, "loss": 4.5676, "step": 1270 }, { "epoch": 0.02, "grad_norm": 12.094200134277344, "learning_rate": 2.7489996755704556e-06, "loss": 4.7026, "step": 1271 }, { "epoch": 0.02, "grad_norm": 12.25269603729248, "learning_rate": 2.7511625392019036e-06, "loss": 5.3247, "step": 1272 }, { "epoch": 0.02, "grad_norm": 11.890076637268066, "learning_rate": 2.7533254028333517e-06, "loss": 4.4896, "step": 1273 }, { "epoch": 0.02, "grad_norm": 11.126177787780762, "learning_rate": 2.7554882664647997e-06, "loss": 4.3776, "step": 1274 }, { "epoch": 0.02, "grad_norm": 10.780210494995117, "learning_rate": 2.7576511300962478e-06, "loss": 4.8951, "step": 1275 }, { "epoch": 0.02, "grad_norm": 12.600990295410156, "learning_rate": 2.759813993727696e-06, "loss": 4.5049, "step": 1276 }, { "epoch": 0.02, "grad_norm": 10.285218238830566, "learning_rate": 2.761976857359144e-06, "loss": 4.8062, "step": 1277 }, { "epoch": 0.02, "grad_norm": 12.858220100402832, "learning_rate": 2.764139720990592e-06, "loss": 4.4097, "step": 1278 }, { "epoch": 0.02, "grad_norm": 11.101723670959473, "learning_rate": 2.76630258462204e-06, "loss": 4.9501, "step": 1279 }, { "epoch": 0.02, "grad_norm": 11.60736083984375, "learning_rate": 2.768465448253488e-06, "loss": 5.065, "step": 1280 }, { "epoch": 0.02, "grad_norm": 12.404093742370605, "learning_rate": 2.770628311884936e-06, "loss": 4.1518, "step": 1281 }, { "epoch": 0.02, "grad_norm": 14.135299682617188, "learning_rate": 2.7727911755163837e-06, "loss": 4.2738, "step": 1282 }, { "epoch": 0.02, "grad_norm": 11.543498039245605, "learning_rate": 2.7749540391478318e-06, "loss": 5.1835, "step": 1283 }, { "epoch": 0.02, "grad_norm": 15.321895599365234, "learning_rate": 2.77711690277928e-06, "loss": 4.3063, "step": 1284 }, { "epoch": 0.02, "grad_norm": 13.224189758300781, "learning_rate": 2.779279766410728e-06, "loss": 5.2972, "step": 1285 }, { "epoch": 0.02, "grad_norm": 13.732572555541992, "learning_rate": 2.781442630042176e-06, "loss": 4.9176, "step": 1286 }, { "epoch": 0.02, "grad_norm": 11.65341854095459, "learning_rate": 2.783605493673624e-06, "loss": 4.0416, "step": 1287 }, { "epoch": 0.02, "grad_norm": 13.695838928222656, "learning_rate": 2.785768357305072e-06, "loss": 4.2525, "step": 1288 }, { "epoch": 0.02, "grad_norm": 11.252992630004883, "learning_rate": 2.78793122093652e-06, "loss": 4.8583, "step": 1289 }, { "epoch": 0.02, "grad_norm": 14.655511856079102, "learning_rate": 2.790094084567968e-06, "loss": 4.7775, "step": 1290 }, { "epoch": 0.02, "grad_norm": 12.520707130432129, "learning_rate": 2.792256948199416e-06, "loss": 4.6435, "step": 1291 }, { "epoch": 0.02, "grad_norm": 13.152337074279785, "learning_rate": 2.794419811830864e-06, "loss": 3.9278, "step": 1292 }, { "epoch": 0.02, "grad_norm": 13.836795806884766, "learning_rate": 2.7965826754623123e-06, "loss": 4.9982, "step": 1293 }, { "epoch": 0.02, "grad_norm": 11.228255271911621, "learning_rate": 2.7987455390937603e-06, "loss": 5.0049, "step": 1294 }, { "epoch": 0.02, "grad_norm": 12.222293853759766, "learning_rate": 2.800908402725208e-06, "loss": 5.0446, "step": 1295 }, { "epoch": 0.02, "grad_norm": 12.67375373840332, "learning_rate": 2.803071266356657e-06, "loss": 4.4098, "step": 1296 }, { "epoch": 0.02, "grad_norm": 15.370465278625488, "learning_rate": 2.805234129988105e-06, "loss": 4.4256, "step": 1297 }, { "epoch": 0.02, "grad_norm": 13.15505599975586, "learning_rate": 2.807396993619553e-06, "loss": 4.1754, "step": 1298 }, { "epoch": 0.02, "grad_norm": 12.411450386047363, "learning_rate": 2.809559857251001e-06, "loss": 4.6395, "step": 1299 }, { "epoch": 0.02, "grad_norm": 13.206985473632812, "learning_rate": 2.8117227208824486e-06, "loss": 5.5287, "step": 1300 }, { "epoch": 0.02, "grad_norm": 11.357564926147461, "learning_rate": 2.8138855845138967e-06, "loss": 4.1555, "step": 1301 }, { "epoch": 0.02, "grad_norm": 13.294703483581543, "learning_rate": 2.8160484481453447e-06, "loss": 4.5143, "step": 1302 }, { "epoch": 0.02, "grad_norm": 13.19143009185791, "learning_rate": 2.8182113117767928e-06, "loss": 4.8012, "step": 1303 }, { "epoch": 0.02, "grad_norm": 12.683719635009766, "learning_rate": 2.820374175408241e-06, "loss": 4.5606, "step": 1304 }, { "epoch": 0.02, "grad_norm": 12.88888168334961, "learning_rate": 2.822537039039689e-06, "loss": 4.6065, "step": 1305 }, { "epoch": 0.02, "grad_norm": 11.375211715698242, "learning_rate": 2.824699902671137e-06, "loss": 4.255, "step": 1306 }, { "epoch": 0.02, "grad_norm": 12.129801750183105, "learning_rate": 2.826862766302585e-06, "loss": 4.2838, "step": 1307 }, { "epoch": 0.02, "grad_norm": 14.104720115661621, "learning_rate": 2.829025629934033e-06, "loss": 4.5056, "step": 1308 }, { "epoch": 0.02, "grad_norm": 12.732198715209961, "learning_rate": 2.831188493565481e-06, "loss": 3.9534, "step": 1309 }, { "epoch": 0.02, "grad_norm": 12.01664924621582, "learning_rate": 2.833351357196929e-06, "loss": 4.5352, "step": 1310 }, { "epoch": 0.02, "grad_norm": 15.0212984085083, "learning_rate": 2.835514220828377e-06, "loss": 4.1383, "step": 1311 }, { "epoch": 0.02, "grad_norm": 12.465142250061035, "learning_rate": 2.8376770844598252e-06, "loss": 4.567, "step": 1312 }, { "epoch": 0.02, "grad_norm": 11.284093856811523, "learning_rate": 2.839839948091273e-06, "loss": 5.0555, "step": 1313 }, { "epoch": 0.02, "grad_norm": 12.748032569885254, "learning_rate": 2.842002811722721e-06, "loss": 3.9787, "step": 1314 }, { "epoch": 0.02, "grad_norm": 12.978048324584961, "learning_rate": 2.844165675354169e-06, "loss": 4.5211, "step": 1315 }, { "epoch": 0.02, "grad_norm": 12.501045227050781, "learning_rate": 2.846328538985617e-06, "loss": 5.1764, "step": 1316 }, { "epoch": 0.02, "grad_norm": 11.360540390014648, "learning_rate": 2.848491402617065e-06, "loss": 4.136, "step": 1317 }, { "epoch": 0.02, "grad_norm": 10.544933319091797, "learning_rate": 2.850654266248513e-06, "loss": 4.7407, "step": 1318 }, { "epoch": 0.02, "grad_norm": 14.014798164367676, "learning_rate": 2.852817129879961e-06, "loss": 4.3512, "step": 1319 }, { "epoch": 0.02, "grad_norm": 12.935508728027344, "learning_rate": 2.854979993511409e-06, "loss": 4.749, "step": 1320 }, { "epoch": 0.02, "grad_norm": 11.896782875061035, "learning_rate": 2.8571428571428573e-06, "loss": 4.5973, "step": 1321 }, { "epoch": 0.02, "grad_norm": 12.37346363067627, "learning_rate": 2.8593057207743053e-06, "loss": 5.0196, "step": 1322 }, { "epoch": 0.02, "grad_norm": 11.052874565124512, "learning_rate": 2.8614685844057534e-06, "loss": 4.8767, "step": 1323 }, { "epoch": 0.02, "grad_norm": 12.663468360900879, "learning_rate": 2.8636314480372014e-06, "loss": 4.6783, "step": 1324 }, { "epoch": 0.02, "grad_norm": 9.97424030303955, "learning_rate": 2.8657943116686495e-06, "loss": 4.9272, "step": 1325 }, { "epoch": 0.02, "grad_norm": 11.043993949890137, "learning_rate": 2.8679571753000975e-06, "loss": 4.0903, "step": 1326 }, { "epoch": 0.02, "grad_norm": 12.008684158325195, "learning_rate": 2.870120038931545e-06, "loss": 5.1333, "step": 1327 }, { "epoch": 0.02, "grad_norm": 12.353242874145508, "learning_rate": 2.872282902562994e-06, "loss": 4.9132, "step": 1328 }, { "epoch": 0.02, "grad_norm": 11.332950592041016, "learning_rate": 2.874445766194442e-06, "loss": 4.8417, "step": 1329 }, { "epoch": 0.02, "grad_norm": 13.244152069091797, "learning_rate": 2.87660862982589e-06, "loss": 4.8496, "step": 1330 }, { "epoch": 0.02, "grad_norm": 11.379654884338379, "learning_rate": 2.8787714934573378e-06, "loss": 4.6247, "step": 1331 }, { "epoch": 0.02, "grad_norm": 11.4285249710083, "learning_rate": 2.880934357088786e-06, "loss": 5.832, "step": 1332 }, { "epoch": 0.02, "grad_norm": 10.314888954162598, "learning_rate": 2.883097220720234e-06, "loss": 4.5968, "step": 1333 }, { "epoch": 0.02, "grad_norm": 12.100001335144043, "learning_rate": 2.885260084351682e-06, "loss": 4.256, "step": 1334 }, { "epoch": 0.02, "grad_norm": 13.231472969055176, "learning_rate": 2.88742294798313e-06, "loss": 4.384, "step": 1335 }, { "epoch": 0.02, "grad_norm": 12.484437942504883, "learning_rate": 2.889585811614578e-06, "loss": 4.2271, "step": 1336 }, { "epoch": 0.02, "grad_norm": 12.308768272399902, "learning_rate": 2.891748675246026e-06, "loss": 4.4582, "step": 1337 }, { "epoch": 0.02, "grad_norm": 12.022395133972168, "learning_rate": 2.893911538877474e-06, "loss": 3.8292, "step": 1338 }, { "epoch": 0.02, "grad_norm": 10.948142051696777, "learning_rate": 2.896074402508922e-06, "loss": 4.452, "step": 1339 }, { "epoch": 0.02, "grad_norm": 12.089179992675781, "learning_rate": 2.8982372661403702e-06, "loss": 4.9506, "step": 1340 }, { "epoch": 0.02, "grad_norm": 12.373233795166016, "learning_rate": 2.9004001297718183e-06, "loss": 4.0121, "step": 1341 }, { "epoch": 0.02, "grad_norm": 11.320234298706055, "learning_rate": 2.9025629934032663e-06, "loss": 4.8976, "step": 1342 }, { "epoch": 0.02, "grad_norm": 12.287766456604004, "learning_rate": 2.9047258570347144e-06, "loss": 4.7474, "step": 1343 }, { "epoch": 0.02, "grad_norm": 12.509185791015625, "learning_rate": 2.9068887206661624e-06, "loss": 4.3534, "step": 1344 }, { "epoch": 0.02, "grad_norm": 14.289008140563965, "learning_rate": 2.90905158429761e-06, "loss": 4.5995, "step": 1345 }, { "epoch": 0.02, "grad_norm": 12.80944538116455, "learning_rate": 2.911214447929058e-06, "loss": 4.6337, "step": 1346 }, { "epoch": 0.02, "grad_norm": 11.413985252380371, "learning_rate": 2.913377311560506e-06, "loss": 4.9268, "step": 1347 }, { "epoch": 0.02, "grad_norm": 12.64456844329834, "learning_rate": 2.915540175191954e-06, "loss": 4.4214, "step": 1348 }, { "epoch": 0.02, "grad_norm": 12.103008270263672, "learning_rate": 2.9177030388234023e-06, "loss": 4.4636, "step": 1349 }, { "epoch": 0.02, "grad_norm": 14.19888687133789, "learning_rate": 2.9198659024548503e-06, "loss": 3.9539, "step": 1350 }, { "epoch": 0.02, "grad_norm": 12.576031684875488, "learning_rate": 2.9220287660862984e-06, "loss": 5.2113, "step": 1351 }, { "epoch": 0.02, "grad_norm": 12.384243965148926, "learning_rate": 2.9241916297177464e-06, "loss": 4.9768, "step": 1352 }, { "epoch": 0.02, "grad_norm": 14.408811569213867, "learning_rate": 2.9263544933491945e-06, "loss": 4.6497, "step": 1353 }, { "epoch": 0.02, "grad_norm": 13.923047065734863, "learning_rate": 2.9285173569806425e-06, "loss": 4.7361, "step": 1354 }, { "epoch": 0.02, "grad_norm": 13.09285831451416, "learning_rate": 2.9306802206120906e-06, "loss": 4.8887, "step": 1355 }, { "epoch": 0.02, "grad_norm": 12.179058074951172, "learning_rate": 2.9328430842435386e-06, "loss": 4.5415, "step": 1356 }, { "epoch": 0.02, "grad_norm": 12.286667823791504, "learning_rate": 2.9350059478749867e-06, "loss": 4.5688, "step": 1357 }, { "epoch": 0.02, "grad_norm": 10.887858390808105, "learning_rate": 2.9371688115064347e-06, "loss": 4.0194, "step": 1358 }, { "epoch": 0.02, "grad_norm": 11.474930763244629, "learning_rate": 2.9393316751378823e-06, "loss": 4.9375, "step": 1359 }, { "epoch": 0.02, "grad_norm": 11.145259857177734, "learning_rate": 2.9414945387693312e-06, "loss": 4.4704, "step": 1360 }, { "epoch": 0.02, "grad_norm": 10.342259407043457, "learning_rate": 2.9436574024007793e-06, "loss": 4.816, "step": 1361 }, { "epoch": 0.02, "grad_norm": 12.71216869354248, "learning_rate": 2.9458202660322273e-06, "loss": 4.6273, "step": 1362 }, { "epoch": 0.02, "grad_norm": 11.635815620422363, "learning_rate": 2.947983129663675e-06, "loss": 4.61, "step": 1363 }, { "epoch": 0.02, "grad_norm": 12.091771125793457, "learning_rate": 2.950145993295123e-06, "loss": 3.4012, "step": 1364 }, { "epoch": 0.02, "grad_norm": 12.202216148376465, "learning_rate": 2.952308856926571e-06, "loss": 3.3861, "step": 1365 }, { "epoch": 0.02, "grad_norm": 10.313556671142578, "learning_rate": 2.954471720558019e-06, "loss": 4.9151, "step": 1366 }, { "epoch": 0.02, "grad_norm": 10.876091957092285, "learning_rate": 2.956634584189467e-06, "loss": 4.5518, "step": 1367 }, { "epoch": 0.02, "grad_norm": 12.276042938232422, "learning_rate": 2.9587974478209152e-06, "loss": 4.3956, "step": 1368 }, { "epoch": 0.02, "grad_norm": 10.825971603393555, "learning_rate": 2.9609603114523633e-06, "loss": 4.2841, "step": 1369 }, { "epoch": 0.02, "grad_norm": 12.278814315795898, "learning_rate": 2.9631231750838113e-06, "loss": 4.2299, "step": 1370 }, { "epoch": 0.02, "grad_norm": 13.745345115661621, "learning_rate": 2.9652860387152594e-06, "loss": 4.3312, "step": 1371 }, { "epoch": 0.02, "grad_norm": 12.49174690246582, "learning_rate": 2.9674489023467074e-06, "loss": 4.3869, "step": 1372 }, { "epoch": 0.02, "grad_norm": 11.8981351852417, "learning_rate": 2.9696117659781555e-06, "loss": 4.5996, "step": 1373 }, { "epoch": 0.02, "grad_norm": 12.532120704650879, "learning_rate": 2.9717746296096035e-06, "loss": 4.1906, "step": 1374 }, { "epoch": 0.02, "grad_norm": 13.009952545166016, "learning_rate": 2.9739374932410516e-06, "loss": 3.8317, "step": 1375 }, { "epoch": 0.02, "grad_norm": 11.009418487548828, "learning_rate": 2.9761003568724996e-06, "loss": 4.1347, "step": 1376 }, { "epoch": 0.02, "grad_norm": 12.809456825256348, "learning_rate": 2.9782632205039473e-06, "loss": 4.501, "step": 1377 }, { "epoch": 0.02, "grad_norm": 12.564423561096191, "learning_rate": 2.9804260841353953e-06, "loss": 4.8043, "step": 1378 }, { "epoch": 0.02, "grad_norm": 11.30156421661377, "learning_rate": 2.9825889477668434e-06, "loss": 4.1341, "step": 1379 }, { "epoch": 0.02, "grad_norm": 12.065356254577637, "learning_rate": 2.9847518113982914e-06, "loss": 4.3322, "step": 1380 }, { "epoch": 0.02, "grad_norm": 11.856217384338379, "learning_rate": 2.9869146750297395e-06, "loss": 4.5405, "step": 1381 }, { "epoch": 0.02, "grad_norm": 13.144573211669922, "learning_rate": 2.9890775386611875e-06, "loss": 4.5541, "step": 1382 }, { "epoch": 0.02, "grad_norm": 12.353246688842773, "learning_rate": 2.9912404022926356e-06, "loss": 5.0357, "step": 1383 }, { "epoch": 0.02, "grad_norm": 12.941407203674316, "learning_rate": 2.9934032659240836e-06, "loss": 4.5438, "step": 1384 }, { "epoch": 0.02, "grad_norm": 12.811026573181152, "learning_rate": 2.9955661295555317e-06, "loss": 4.3818, "step": 1385 }, { "epoch": 0.02, "grad_norm": 12.637811660766602, "learning_rate": 2.9977289931869797e-06, "loss": 4.9769, "step": 1386 }, { "epoch": 0.02, "grad_norm": 10.2847900390625, "learning_rate": 2.9998918568184278e-06, "loss": 4.1664, "step": 1387 }, { "epoch": 0.02, "grad_norm": 12.96866512298584, "learning_rate": 3.002054720449876e-06, "loss": 4.3456, "step": 1388 }, { "epoch": 0.02, "grad_norm": 11.257702827453613, "learning_rate": 3.004217584081324e-06, "loss": 4.4601, "step": 1389 }, { "epoch": 0.02, "grad_norm": 12.355761528015137, "learning_rate": 3.006380447712772e-06, "loss": 4.3276, "step": 1390 }, { "epoch": 0.02, "grad_norm": 12.082730293273926, "learning_rate": 3.0085433113442195e-06, "loss": 4.0144, "step": 1391 }, { "epoch": 0.02, "grad_norm": 13.811758995056152, "learning_rate": 3.0107061749756684e-06, "loss": 4.5177, "step": 1392 }, { "epoch": 0.02, "grad_norm": 11.454133033752441, "learning_rate": 3.0128690386071165e-06, "loss": 4.5992, "step": 1393 }, { "epoch": 0.02, "grad_norm": 14.606463432312012, "learning_rate": 3.0150319022385645e-06, "loss": 4.151, "step": 1394 }, { "epoch": 0.02, "grad_norm": 11.683462142944336, "learning_rate": 3.017194765870012e-06, "loss": 4.9342, "step": 1395 }, { "epoch": 0.02, "grad_norm": 10.956005096435547, "learning_rate": 3.0193576295014602e-06, "loss": 4.2897, "step": 1396 }, { "epoch": 0.02, "grad_norm": 13.682661056518555, "learning_rate": 3.0215204931329083e-06, "loss": 4.9915, "step": 1397 }, { "epoch": 0.02, "grad_norm": 12.713763236999512, "learning_rate": 3.0236833567643563e-06, "loss": 4.7154, "step": 1398 }, { "epoch": 0.02, "grad_norm": 11.947385787963867, "learning_rate": 3.0258462203958044e-06, "loss": 4.2364, "step": 1399 }, { "epoch": 0.02, "grad_norm": 12.903411865234375, "learning_rate": 3.0280090840272524e-06, "loss": 4.6501, "step": 1400 }, { "epoch": 0.02, "grad_norm": 11.280333518981934, "learning_rate": 3.0301719476587005e-06, "loss": 4.5472, "step": 1401 }, { "epoch": 0.02, "grad_norm": 11.402761459350586, "learning_rate": 3.0323348112901485e-06, "loss": 4.8695, "step": 1402 }, { "epoch": 0.02, "grad_norm": 12.161768913269043, "learning_rate": 3.0344976749215966e-06, "loss": 4.3323, "step": 1403 }, { "epoch": 0.02, "grad_norm": 12.538371086120605, "learning_rate": 3.0366605385530446e-06, "loss": 4.9579, "step": 1404 }, { "epoch": 0.02, "grad_norm": 11.798382759094238, "learning_rate": 3.0388234021844927e-06, "loss": 4.4934, "step": 1405 }, { "epoch": 0.02, "grad_norm": 12.451025009155273, "learning_rate": 3.0409862658159407e-06, "loss": 4.7679, "step": 1406 }, { "epoch": 0.02, "grad_norm": 12.57625675201416, "learning_rate": 3.0431491294473888e-06, "loss": 4.8821, "step": 1407 }, { "epoch": 0.02, "grad_norm": 14.385107040405273, "learning_rate": 3.045311993078837e-06, "loss": 4.0119, "step": 1408 }, { "epoch": 0.02, "grad_norm": 11.309020042419434, "learning_rate": 3.0474748567102845e-06, "loss": 4.3456, "step": 1409 }, { "epoch": 0.02, "grad_norm": 13.595576286315918, "learning_rate": 3.0496377203417325e-06, "loss": 4.2584, "step": 1410 }, { "epoch": 0.02, "grad_norm": 10.999670028686523, "learning_rate": 3.0518005839731806e-06, "loss": 4.2083, "step": 1411 }, { "epoch": 0.02, "grad_norm": 11.741446495056152, "learning_rate": 3.0539634476046286e-06, "loss": 4.174, "step": 1412 }, { "epoch": 0.02, "grad_norm": 11.86070442199707, "learning_rate": 3.0561263112360767e-06, "loss": 4.2358, "step": 1413 }, { "epoch": 0.02, "grad_norm": 11.828994750976562, "learning_rate": 3.0582891748675247e-06, "loss": 4.5463, "step": 1414 }, { "epoch": 0.02, "grad_norm": 12.15069580078125, "learning_rate": 3.0604520384989728e-06, "loss": 4.5084, "step": 1415 }, { "epoch": 0.02, "grad_norm": 11.676057815551758, "learning_rate": 3.062614902130421e-06, "loss": 4.6979, "step": 1416 }, { "epoch": 0.02, "grad_norm": 10.617853164672852, "learning_rate": 3.064777765761869e-06, "loss": 4.4745, "step": 1417 }, { "epoch": 0.02, "grad_norm": 12.176138877868652, "learning_rate": 3.066940629393317e-06, "loss": 5.3874, "step": 1418 }, { "epoch": 0.02, "grad_norm": 12.281679153442383, "learning_rate": 3.069103493024765e-06, "loss": 3.7297, "step": 1419 }, { "epoch": 0.02, "grad_norm": 11.862064361572266, "learning_rate": 3.071266356656213e-06, "loss": 3.7141, "step": 1420 }, { "epoch": 0.02, "grad_norm": 10.840989112854004, "learning_rate": 3.073429220287661e-06, "loss": 4.9821, "step": 1421 }, { "epoch": 0.02, "grad_norm": 11.271451950073242, "learning_rate": 3.075592083919109e-06, "loss": 4.3166, "step": 1422 }, { "epoch": 0.02, "grad_norm": 12.696619033813477, "learning_rate": 3.0777549475505567e-06, "loss": 4.5943, "step": 1423 }, { "epoch": 0.02, "grad_norm": 11.75168514251709, "learning_rate": 3.0799178111820056e-06, "loss": 3.2152, "step": 1424 }, { "epoch": 0.02, "grad_norm": 11.260516166687012, "learning_rate": 3.0820806748134537e-06, "loss": 4.3637, "step": 1425 }, { "epoch": 0.02, "grad_norm": 13.721872329711914, "learning_rate": 3.0842435384449017e-06, "loss": 4.4987, "step": 1426 }, { "epoch": 0.02, "grad_norm": 10.642388343811035, "learning_rate": 3.0864064020763494e-06, "loss": 4.0481, "step": 1427 }, { "epoch": 0.02, "grad_norm": 11.420523643493652, "learning_rate": 3.0885692657077974e-06, "loss": 5.0372, "step": 1428 }, { "epoch": 0.02, "grad_norm": 12.255623817443848, "learning_rate": 3.0907321293392455e-06, "loss": 4.5406, "step": 1429 }, { "epoch": 0.02, "grad_norm": 10.714155197143555, "learning_rate": 3.0928949929706935e-06, "loss": 4.179, "step": 1430 }, { "epoch": 0.02, "grad_norm": 13.319537162780762, "learning_rate": 3.0950578566021416e-06, "loss": 3.9622, "step": 1431 }, { "epoch": 0.02, "grad_norm": 11.228487968444824, "learning_rate": 3.0972207202335896e-06, "loss": 4.4808, "step": 1432 }, { "epoch": 0.02, "grad_norm": 11.115050315856934, "learning_rate": 3.0993835838650377e-06, "loss": 4.647, "step": 1433 }, { "epoch": 0.02, "grad_norm": 12.971076965332031, "learning_rate": 3.1015464474964857e-06, "loss": 4.6546, "step": 1434 }, { "epoch": 0.02, "grad_norm": 12.776853561401367, "learning_rate": 3.1037093111279338e-06, "loss": 4.1716, "step": 1435 }, { "epoch": 0.02, "grad_norm": 12.214078903198242, "learning_rate": 3.105872174759382e-06, "loss": 4.8026, "step": 1436 }, { "epoch": 0.02, "grad_norm": 13.085814476013184, "learning_rate": 3.10803503839083e-06, "loss": 4.3757, "step": 1437 }, { "epoch": 0.02, "grad_norm": 12.861754417419434, "learning_rate": 3.110197902022278e-06, "loss": 4.3104, "step": 1438 }, { "epoch": 0.02, "grad_norm": 13.237727165222168, "learning_rate": 3.112360765653726e-06, "loss": 4.358, "step": 1439 }, { "epoch": 0.02, "grad_norm": 12.065521240234375, "learning_rate": 3.114523629285174e-06, "loss": 4.8197, "step": 1440 }, { "epoch": 0.02, "grad_norm": 10.994734764099121, "learning_rate": 3.1166864929166217e-06, "loss": 4.8694, "step": 1441 }, { "epoch": 0.02, "grad_norm": 11.616357803344727, "learning_rate": 3.1188493565480697e-06, "loss": 3.5445, "step": 1442 }, { "epoch": 0.02, "grad_norm": 11.73233699798584, "learning_rate": 3.1210122201795178e-06, "loss": 4.3421, "step": 1443 }, { "epoch": 0.02, "grad_norm": 11.554978370666504, "learning_rate": 3.123175083810966e-06, "loss": 4.9306, "step": 1444 }, { "epoch": 0.02, "grad_norm": 11.965292930603027, "learning_rate": 3.125337947442414e-06, "loss": 4.7409, "step": 1445 }, { "epoch": 0.02, "grad_norm": 11.460570335388184, "learning_rate": 3.127500811073862e-06, "loss": 3.9968, "step": 1446 }, { "epoch": 0.02, "grad_norm": 11.190811157226562, "learning_rate": 3.12966367470531e-06, "loss": 4.1759, "step": 1447 }, { "epoch": 0.02, "grad_norm": 11.760991096496582, "learning_rate": 3.131826538336758e-06, "loss": 4.9485, "step": 1448 }, { "epoch": 0.02, "grad_norm": 12.896478652954102, "learning_rate": 3.133989401968206e-06, "loss": 3.1915, "step": 1449 }, { "epoch": 0.02, "grad_norm": 11.528347969055176, "learning_rate": 3.136152265599654e-06, "loss": 4.322, "step": 1450 }, { "epoch": 0.02, "grad_norm": 11.392394065856934, "learning_rate": 3.138315129231102e-06, "loss": 4.9612, "step": 1451 }, { "epoch": 0.02, "grad_norm": 12.84676742553711, "learning_rate": 3.14047799286255e-06, "loss": 3.9723, "step": 1452 }, { "epoch": 0.02, "grad_norm": 12.284416198730469, "learning_rate": 3.1426408564939983e-06, "loss": 4.2606, "step": 1453 }, { "epoch": 0.02, "grad_norm": 11.606011390686035, "learning_rate": 3.1448037201254463e-06, "loss": 4.58, "step": 1454 }, { "epoch": 0.02, "grad_norm": 11.772802352905273, "learning_rate": 3.146966583756894e-06, "loss": 4.8033, "step": 1455 }, { "epoch": 0.02, "grad_norm": 11.70073127746582, "learning_rate": 3.149129447388343e-06, "loss": 4.006, "step": 1456 }, { "epoch": 0.02, "grad_norm": 12.41598129272461, "learning_rate": 3.151292311019791e-06, "loss": 4.2799, "step": 1457 }, { "epoch": 0.02, "grad_norm": 11.055606842041016, "learning_rate": 3.153455174651239e-06, "loss": 4.2379, "step": 1458 }, { "epoch": 0.02, "grad_norm": 12.187858581542969, "learning_rate": 3.1556180382826866e-06, "loss": 4.5237, "step": 1459 }, { "epoch": 0.02, "grad_norm": 10.96176815032959, "learning_rate": 3.1577809019141346e-06, "loss": 4.9948, "step": 1460 }, { "epoch": 0.02, "grad_norm": 10.832574844360352, "learning_rate": 3.1599437655455827e-06, "loss": 4.6117, "step": 1461 }, { "epoch": 0.02, "grad_norm": 12.244499206542969, "learning_rate": 3.1621066291770307e-06, "loss": 4.5901, "step": 1462 }, { "epoch": 0.02, "grad_norm": 11.921607971191406, "learning_rate": 3.1642694928084788e-06, "loss": 4.7298, "step": 1463 }, { "epoch": 0.02, "grad_norm": 11.40497875213623, "learning_rate": 3.166432356439927e-06, "loss": 4.7916, "step": 1464 }, { "epoch": 0.02, "grad_norm": 12.282393455505371, "learning_rate": 3.168595220071375e-06, "loss": 4.8436, "step": 1465 }, { "epoch": 0.02, "grad_norm": 12.972391128540039, "learning_rate": 3.170758083702823e-06, "loss": 4.0922, "step": 1466 }, { "epoch": 0.02, "grad_norm": 10.850971221923828, "learning_rate": 3.172920947334271e-06, "loss": 4.3703, "step": 1467 }, { "epoch": 0.02, "grad_norm": 11.389147758483887, "learning_rate": 3.175083810965719e-06, "loss": 4.6753, "step": 1468 }, { "epoch": 0.02, "grad_norm": 11.446407318115234, "learning_rate": 3.177246674597167e-06, "loss": 4.4188, "step": 1469 }, { "epoch": 0.02, "grad_norm": 13.9093656539917, "learning_rate": 3.179409538228615e-06, "loss": 4.6395, "step": 1470 }, { "epoch": 0.02, "grad_norm": 11.688645362854004, "learning_rate": 3.181572401860063e-06, "loss": 4.4289, "step": 1471 }, { "epoch": 0.02, "grad_norm": 11.410633087158203, "learning_rate": 3.1837352654915112e-06, "loss": 4.2505, "step": 1472 }, { "epoch": 0.02, "grad_norm": 10.583949089050293, "learning_rate": 3.185898129122959e-06, "loss": 4.235, "step": 1473 }, { "epoch": 0.02, "grad_norm": 10.82181453704834, "learning_rate": 3.188060992754407e-06, "loss": 4.1438, "step": 1474 }, { "epoch": 0.02, "grad_norm": 13.6083402633667, "learning_rate": 3.190223856385855e-06, "loss": 3.7941, "step": 1475 }, { "epoch": 0.02, "grad_norm": 11.899089813232422, "learning_rate": 3.192386720017303e-06, "loss": 4.1922, "step": 1476 }, { "epoch": 0.02, "grad_norm": 11.005127906799316, "learning_rate": 3.194549583648751e-06, "loss": 4.0567, "step": 1477 }, { "epoch": 0.02, "grad_norm": 10.249770164489746, "learning_rate": 3.196712447280199e-06, "loss": 3.9936, "step": 1478 }, { "epoch": 0.02, "grad_norm": 11.60037899017334, "learning_rate": 3.198875310911647e-06, "loss": 4.1012, "step": 1479 }, { "epoch": 0.02, "grad_norm": 10.947831153869629, "learning_rate": 3.201038174543095e-06, "loss": 4.3795, "step": 1480 }, { "epoch": 0.02, "grad_norm": 12.594507217407227, "learning_rate": 3.2032010381745433e-06, "loss": 4.0477, "step": 1481 }, { "epoch": 0.02, "grad_norm": 12.660821914672852, "learning_rate": 3.2053639018059913e-06, "loss": 4.6079, "step": 1482 }, { "epoch": 0.02, "grad_norm": 11.623332023620605, "learning_rate": 3.2075267654374394e-06, "loss": 3.6572, "step": 1483 }, { "epoch": 0.02, "grad_norm": 10.162444114685059, "learning_rate": 3.2096896290688874e-06, "loss": 4.4004, "step": 1484 }, { "epoch": 0.02, "grad_norm": 12.372648239135742, "learning_rate": 3.2118524927003355e-06, "loss": 4.5305, "step": 1485 }, { "epoch": 0.02, "grad_norm": 14.224369049072266, "learning_rate": 3.214015356331783e-06, "loss": 4.2435, "step": 1486 }, { "epoch": 0.02, "grad_norm": 11.623981475830078, "learning_rate": 3.216178219963231e-06, "loss": 4.7688, "step": 1487 }, { "epoch": 0.02, "grad_norm": 12.024943351745605, "learning_rate": 3.21834108359468e-06, "loss": 4.8208, "step": 1488 }, { "epoch": 0.02, "grad_norm": 11.464527130126953, "learning_rate": 3.220503947226128e-06, "loss": 4.2334, "step": 1489 }, { "epoch": 0.02, "grad_norm": 11.455788612365723, "learning_rate": 3.222666810857576e-06, "loss": 3.8472, "step": 1490 }, { "epoch": 0.02, "grad_norm": 12.950093269348145, "learning_rate": 3.2248296744890238e-06, "loss": 4.6151, "step": 1491 }, { "epoch": 0.02, "grad_norm": 11.189417839050293, "learning_rate": 3.226992538120472e-06, "loss": 4.2303, "step": 1492 }, { "epoch": 0.02, "grad_norm": 13.135046005249023, "learning_rate": 3.22915540175192e-06, "loss": 4.7545, "step": 1493 }, { "epoch": 0.02, "grad_norm": 12.836369514465332, "learning_rate": 3.231318265383368e-06, "loss": 4.299, "step": 1494 }, { "epoch": 0.02, "grad_norm": 10.050827980041504, "learning_rate": 3.233481129014816e-06, "loss": 4.2701, "step": 1495 }, { "epoch": 0.02, "grad_norm": 12.540815353393555, "learning_rate": 3.235643992646264e-06, "loss": 4.5672, "step": 1496 }, { "epoch": 0.02, "grad_norm": 14.994036674499512, "learning_rate": 3.237806856277712e-06, "loss": 4.1653, "step": 1497 }, { "epoch": 0.02, "grad_norm": 13.307753562927246, "learning_rate": 3.23996971990916e-06, "loss": 3.9764, "step": 1498 }, { "epoch": 0.02, "grad_norm": 11.203314781188965, "learning_rate": 3.242132583540608e-06, "loss": 4.6945, "step": 1499 }, { "epoch": 0.02, "grad_norm": 12.90096378326416, "learning_rate": 3.2442954471720562e-06, "loss": 4.2529, "step": 1500 }, { "epoch": 0.02, "grad_norm": 11.286490440368652, "learning_rate": 3.2464583108035043e-06, "loss": 4.1902, "step": 1501 }, { "epoch": 0.02, "grad_norm": 12.231797218322754, "learning_rate": 3.2486211744349523e-06, "loss": 4.6081, "step": 1502 }, { "epoch": 0.02, "grad_norm": 11.361567497253418, "learning_rate": 3.2507840380664004e-06, "loss": 3.9586, "step": 1503 }, { "epoch": 0.02, "grad_norm": 11.136499404907227, "learning_rate": 3.2529469016978484e-06, "loss": 4.516, "step": 1504 }, { "epoch": 0.02, "grad_norm": 11.945900917053223, "learning_rate": 3.255109765329296e-06, "loss": 3.5569, "step": 1505 }, { "epoch": 0.02, "grad_norm": 10.899697303771973, "learning_rate": 3.257272628960744e-06, "loss": 4.2773, "step": 1506 }, { "epoch": 0.02, "grad_norm": 10.862403869628906, "learning_rate": 3.259435492592192e-06, "loss": 4.6645, "step": 1507 }, { "epoch": 0.02, "grad_norm": 12.348076820373535, "learning_rate": 3.26159835622364e-06, "loss": 4.0451, "step": 1508 }, { "epoch": 0.02, "grad_norm": 13.284974098205566, "learning_rate": 3.2637612198550883e-06, "loss": 4.8727, "step": 1509 }, { "epoch": 0.02, "grad_norm": 11.709855079650879, "learning_rate": 3.2659240834865363e-06, "loss": 4.166, "step": 1510 }, { "epoch": 0.02, "grad_norm": 11.946309089660645, "learning_rate": 3.2680869471179844e-06, "loss": 4.0828, "step": 1511 }, { "epoch": 0.02, "grad_norm": 12.683746337890625, "learning_rate": 3.2702498107494324e-06, "loss": 4.6429, "step": 1512 }, { "epoch": 0.02, "grad_norm": 12.597497940063477, "learning_rate": 3.2724126743808805e-06, "loss": 3.773, "step": 1513 }, { "epoch": 0.02, "grad_norm": 11.056622505187988, "learning_rate": 3.2745755380123285e-06, "loss": 4.2434, "step": 1514 }, { "epoch": 0.02, "grad_norm": 14.085946083068848, "learning_rate": 3.2767384016437766e-06, "loss": 3.9486, "step": 1515 }, { "epoch": 0.02, "grad_norm": 13.529438972473145, "learning_rate": 3.2789012652752246e-06, "loss": 4.7214, "step": 1516 }, { "epoch": 0.02, "grad_norm": 11.721991539001465, "learning_rate": 3.2810641289066727e-06, "loss": 5.0068, "step": 1517 }, { "epoch": 0.02, "grad_norm": 13.804047584533691, "learning_rate": 3.2832269925381203e-06, "loss": 4.2084, "step": 1518 }, { "epoch": 0.02, "grad_norm": 11.725698471069336, "learning_rate": 3.2853898561695683e-06, "loss": 3.9338, "step": 1519 }, { "epoch": 0.02, "grad_norm": 11.631529808044434, "learning_rate": 3.2875527198010172e-06, "loss": 4.3019, "step": 1520 }, { "epoch": 0.02, "grad_norm": 11.979877471923828, "learning_rate": 3.2897155834324653e-06, "loss": 4.1938, "step": 1521 }, { "epoch": 0.02, "grad_norm": 10.731409072875977, "learning_rate": 3.2918784470639133e-06, "loss": 4.1765, "step": 1522 }, { "epoch": 0.02, "grad_norm": 13.65379810333252, "learning_rate": 3.294041310695361e-06, "loss": 4.6657, "step": 1523 }, { "epoch": 0.02, "grad_norm": 11.88555908203125, "learning_rate": 3.296204174326809e-06, "loss": 4.7202, "step": 1524 }, { "epoch": 0.02, "grad_norm": 10.737022399902344, "learning_rate": 3.298367037958257e-06, "loss": 4.0771, "step": 1525 }, { "epoch": 0.02, "grad_norm": 10.597625732421875, "learning_rate": 3.300529901589705e-06, "loss": 4.4848, "step": 1526 }, { "epoch": 0.02, "grad_norm": 11.393888473510742, "learning_rate": 3.302692765221153e-06, "loss": 3.829, "step": 1527 }, { "epoch": 0.02, "grad_norm": 11.580937385559082, "learning_rate": 3.3048556288526012e-06, "loss": 3.793, "step": 1528 }, { "epoch": 0.02, "grad_norm": 11.702557563781738, "learning_rate": 3.3070184924840493e-06, "loss": 4.5536, "step": 1529 }, { "epoch": 0.02, "grad_norm": 14.122503280639648, "learning_rate": 3.3091813561154973e-06, "loss": 4.89, "step": 1530 }, { "epoch": 0.02, "grad_norm": 12.768058776855469, "learning_rate": 3.3113442197469454e-06, "loss": 3.4509, "step": 1531 }, { "epoch": 0.02, "grad_norm": 10.719277381896973, "learning_rate": 3.3135070833783934e-06, "loss": 4.0826, "step": 1532 }, { "epoch": 0.02, "grad_norm": 10.211833000183105, "learning_rate": 3.3156699470098415e-06, "loss": 3.6199, "step": 1533 }, { "epoch": 0.02, "grad_norm": 11.10282039642334, "learning_rate": 3.3178328106412895e-06, "loss": 4.0486, "step": 1534 }, { "epoch": 0.02, "grad_norm": 12.32284164428711, "learning_rate": 3.3199956742727376e-06, "loss": 3.9136, "step": 1535 }, { "epoch": 0.02, "grad_norm": 11.946562767028809, "learning_rate": 3.322158537904185e-06, "loss": 4.0113, "step": 1536 }, { "epoch": 0.02, "grad_norm": 10.518763542175293, "learning_rate": 3.3243214015356333e-06, "loss": 4.844, "step": 1537 }, { "epoch": 0.02, "grad_norm": 12.91810417175293, "learning_rate": 3.3264842651670813e-06, "loss": 4.5241, "step": 1538 }, { "epoch": 0.02, "grad_norm": 11.700239181518555, "learning_rate": 3.3286471287985294e-06, "loss": 5.1124, "step": 1539 }, { "epoch": 0.02, "grad_norm": 11.854913711547852, "learning_rate": 3.3308099924299774e-06, "loss": 4.2863, "step": 1540 }, { "epoch": 0.02, "grad_norm": 10.771307945251465, "learning_rate": 3.3329728560614255e-06, "loss": 4.5595, "step": 1541 }, { "epoch": 0.02, "grad_norm": 11.586605072021484, "learning_rate": 3.3351357196928735e-06, "loss": 4.2204, "step": 1542 }, { "epoch": 0.02, "grad_norm": 11.926681518554688, "learning_rate": 3.3372985833243216e-06, "loss": 4.2701, "step": 1543 }, { "epoch": 0.02, "grad_norm": 10.975078582763672, "learning_rate": 3.3394614469557696e-06, "loss": 4.2219, "step": 1544 }, { "epoch": 0.02, "grad_norm": 12.403084754943848, "learning_rate": 3.3416243105872177e-06, "loss": 4.0081, "step": 1545 }, { "epoch": 0.02, "grad_norm": 14.25927448272705, "learning_rate": 3.3437871742186657e-06, "loss": 4.077, "step": 1546 }, { "epoch": 0.02, "grad_norm": 11.605374336242676, "learning_rate": 3.3459500378501138e-06, "loss": 4.0592, "step": 1547 }, { "epoch": 0.02, "grad_norm": 12.28062915802002, "learning_rate": 3.348112901481562e-06, "loss": 4.5939, "step": 1548 }, { "epoch": 0.02, "grad_norm": 11.19558334350586, "learning_rate": 3.35027576511301e-06, "loss": 4.4678, "step": 1549 }, { "epoch": 0.02, "grad_norm": 11.551270484924316, "learning_rate": 3.3524386287444575e-06, "loss": 4.7113, "step": 1550 }, { "epoch": 0.02, "grad_norm": 12.146992683410645, "learning_rate": 3.3546014923759055e-06, "loss": 4.4055, "step": 1551 }, { "epoch": 0.02, "grad_norm": 12.07793140411377, "learning_rate": 3.3567643560073544e-06, "loss": 4.0498, "step": 1552 }, { "epoch": 0.02, "grad_norm": 13.116271018981934, "learning_rate": 3.3589272196388025e-06, "loss": 4.0003, "step": 1553 }, { "epoch": 0.02, "grad_norm": 11.585088729858398, "learning_rate": 3.36109008327025e-06, "loss": 4.418, "step": 1554 }, { "epoch": 0.02, "grad_norm": 11.86815071105957, "learning_rate": 3.363252946901698e-06, "loss": 4.2013, "step": 1555 }, { "epoch": 0.02, "grad_norm": 11.305362701416016, "learning_rate": 3.3654158105331462e-06, "loss": 3.7923, "step": 1556 }, { "epoch": 0.02, "grad_norm": 12.45474624633789, "learning_rate": 3.3675786741645943e-06, "loss": 4.6846, "step": 1557 }, { "epoch": 0.02, "grad_norm": 10.49845027923584, "learning_rate": 3.3697415377960423e-06, "loss": 3.7673, "step": 1558 }, { "epoch": 0.02, "grad_norm": 10.664828300476074, "learning_rate": 3.3719044014274904e-06, "loss": 3.9407, "step": 1559 }, { "epoch": 0.02, "grad_norm": 10.93397331237793, "learning_rate": 3.3740672650589384e-06, "loss": 4.4413, "step": 1560 }, { "epoch": 0.02, "grad_norm": 11.513033866882324, "learning_rate": 3.3762301286903865e-06, "loss": 4.9144, "step": 1561 }, { "epoch": 0.02, "grad_norm": 12.875184059143066, "learning_rate": 3.3783929923218345e-06, "loss": 3.8291, "step": 1562 }, { "epoch": 0.02, "grad_norm": 10.754402160644531, "learning_rate": 3.3805558559532826e-06, "loss": 4.2503, "step": 1563 }, { "epoch": 0.02, "grad_norm": 11.826030731201172, "learning_rate": 3.3827187195847306e-06, "loss": 3.9496, "step": 1564 }, { "epoch": 0.02, "grad_norm": 10.904598236083984, "learning_rate": 3.3848815832161787e-06, "loss": 4.4885, "step": 1565 }, { "epoch": 0.02, "grad_norm": 13.243110656738281, "learning_rate": 3.3870444468476267e-06, "loss": 3.6899, "step": 1566 }, { "epoch": 0.02, "grad_norm": 11.902169227600098, "learning_rate": 3.3892073104790748e-06, "loss": 3.7226, "step": 1567 }, { "epoch": 0.02, "grad_norm": 15.633435249328613, "learning_rate": 3.3913701741105224e-06, "loss": 4.0832, "step": 1568 }, { "epoch": 0.02, "grad_norm": 10.803622245788574, "learning_rate": 3.3935330377419705e-06, "loss": 4.41, "step": 1569 }, { "epoch": 0.02, "grad_norm": 11.842852592468262, "learning_rate": 3.3956959013734185e-06, "loss": 4.6434, "step": 1570 }, { "epoch": 0.02, "grad_norm": 12.362452507019043, "learning_rate": 3.3978587650048666e-06, "loss": 4.4824, "step": 1571 }, { "epoch": 0.02, "grad_norm": 12.062047004699707, "learning_rate": 3.4000216286363146e-06, "loss": 4.1307, "step": 1572 }, { "epoch": 0.02, "grad_norm": 11.793044090270996, "learning_rate": 3.4021844922677627e-06, "loss": 4.5231, "step": 1573 }, { "epoch": 0.02, "grad_norm": 12.687938690185547, "learning_rate": 3.4043473558992107e-06, "loss": 4.3187, "step": 1574 }, { "epoch": 0.02, "grad_norm": 11.617393493652344, "learning_rate": 3.4065102195306588e-06, "loss": 4.4814, "step": 1575 }, { "epoch": 0.02, "grad_norm": 11.564459800720215, "learning_rate": 3.408673083162107e-06, "loss": 4.3196, "step": 1576 }, { "epoch": 0.02, "grad_norm": 11.305740356445312, "learning_rate": 3.410835946793555e-06, "loss": 4.6016, "step": 1577 }, { "epoch": 0.02, "grad_norm": 11.288411140441895, "learning_rate": 3.412998810425003e-06, "loss": 4.652, "step": 1578 }, { "epoch": 0.02, "grad_norm": 10.930660247802734, "learning_rate": 3.415161674056451e-06, "loss": 4.0614, "step": 1579 }, { "epoch": 0.02, "grad_norm": 10.734906196594238, "learning_rate": 3.417324537687899e-06, "loss": 4.3193, "step": 1580 }, { "epoch": 0.02, "grad_norm": 11.097322463989258, "learning_rate": 3.419487401319347e-06, "loss": 4.4235, "step": 1581 }, { "epoch": 0.02, "grad_norm": 11.56418514251709, "learning_rate": 3.4216502649507947e-06, "loss": 4.868, "step": 1582 }, { "epoch": 0.02, "grad_norm": 12.281291961669922, "learning_rate": 3.4238131285822427e-06, "loss": 4.8573, "step": 1583 }, { "epoch": 0.02, "grad_norm": 11.603405952453613, "learning_rate": 3.4259759922136916e-06, "loss": 3.5964, "step": 1584 }, { "epoch": 0.02, "grad_norm": 10.395821571350098, "learning_rate": 3.4281388558451397e-06, "loss": 4.1174, "step": 1585 }, { "epoch": 0.02, "grad_norm": 13.141599655151367, "learning_rate": 3.4303017194765873e-06, "loss": 4.4891, "step": 1586 }, { "epoch": 0.02, "grad_norm": 10.767548561096191, "learning_rate": 3.4324645831080354e-06, "loss": 4.2392, "step": 1587 }, { "epoch": 0.02, "grad_norm": 11.61565113067627, "learning_rate": 3.4346274467394834e-06, "loss": 4.2812, "step": 1588 }, { "epoch": 0.02, "grad_norm": 11.390174865722656, "learning_rate": 3.4367903103709315e-06, "loss": 4.8662, "step": 1589 }, { "epoch": 0.02, "grad_norm": 11.921490669250488, "learning_rate": 3.4389531740023795e-06, "loss": 4.402, "step": 1590 }, { "epoch": 0.02, "grad_norm": 12.603959083557129, "learning_rate": 3.4411160376338276e-06, "loss": 3.9603, "step": 1591 }, { "epoch": 0.02, "grad_norm": 10.44564151763916, "learning_rate": 3.4432789012652756e-06, "loss": 4.2107, "step": 1592 }, { "epoch": 0.02, "grad_norm": 10.001738548278809, "learning_rate": 3.4454417648967237e-06, "loss": 4.0604, "step": 1593 }, { "epoch": 0.02, "grad_norm": 10.464827537536621, "learning_rate": 3.4476046285281717e-06, "loss": 3.9239, "step": 1594 }, { "epoch": 0.02, "grad_norm": 10.961395263671875, "learning_rate": 3.4497674921596198e-06, "loss": 4.1638, "step": 1595 }, { "epoch": 0.02, "grad_norm": 12.109036445617676, "learning_rate": 3.451930355791068e-06, "loss": 4.4745, "step": 1596 }, { "epoch": 0.02, "grad_norm": 12.801247596740723, "learning_rate": 3.454093219422516e-06, "loss": 4.6465, "step": 1597 }, { "epoch": 0.02, "grad_norm": 11.181694984436035, "learning_rate": 3.456256083053964e-06, "loss": 4.6586, "step": 1598 }, { "epoch": 0.02, "grad_norm": 11.497315406799316, "learning_rate": 3.458418946685412e-06, "loss": 4.0168, "step": 1599 }, { "epoch": 0.02, "grad_norm": 10.755132675170898, "learning_rate": 3.4605818103168596e-06, "loss": 4.0607, "step": 1600 }, { "epoch": 0.02, "grad_norm": 10.693632125854492, "learning_rate": 3.4627446739483077e-06, "loss": 4.5931, "step": 1601 }, { "epoch": 0.02, "grad_norm": 12.478408813476562, "learning_rate": 3.4649075375797557e-06, "loss": 4.202, "step": 1602 }, { "epoch": 0.02, "grad_norm": 12.14537525177002, "learning_rate": 3.4670704012112038e-06, "loss": 4.1938, "step": 1603 }, { "epoch": 0.02, "grad_norm": 11.679581642150879, "learning_rate": 3.469233264842652e-06, "loss": 3.5432, "step": 1604 }, { "epoch": 0.02, "grad_norm": 11.378005981445312, "learning_rate": 3.4713961284741e-06, "loss": 4.1148, "step": 1605 }, { "epoch": 0.02, "grad_norm": 12.000053405761719, "learning_rate": 3.473558992105548e-06, "loss": 3.6384, "step": 1606 }, { "epoch": 0.02, "grad_norm": 9.568819999694824, "learning_rate": 3.475721855736996e-06, "loss": 4.3397, "step": 1607 }, { "epoch": 0.02, "grad_norm": 9.370512962341309, "learning_rate": 3.477884719368444e-06, "loss": 3.9849, "step": 1608 }, { "epoch": 0.02, "grad_norm": 10.872862815856934, "learning_rate": 3.480047582999892e-06, "loss": 4.6041, "step": 1609 }, { "epoch": 0.02, "grad_norm": 12.23134708404541, "learning_rate": 3.48221044663134e-06, "loss": 4.6222, "step": 1610 }, { "epoch": 0.02, "grad_norm": 11.462309837341309, "learning_rate": 3.484373310262788e-06, "loss": 3.8494, "step": 1611 }, { "epoch": 0.02, "grad_norm": 10.307671546936035, "learning_rate": 3.4865361738942362e-06, "loss": 4.1359, "step": 1612 }, { "epoch": 0.02, "grad_norm": 12.017020225524902, "learning_rate": 3.4886990375256843e-06, "loss": 4.3373, "step": 1613 }, { "epoch": 0.02, "grad_norm": 10.689961433410645, "learning_rate": 3.490861901157132e-06, "loss": 4.0619, "step": 1614 }, { "epoch": 0.02, "grad_norm": 13.378464698791504, "learning_rate": 3.49302476478858e-06, "loss": 4.2383, "step": 1615 }, { "epoch": 0.02, "grad_norm": 10.67946720123291, "learning_rate": 3.495187628420029e-06, "loss": 4.3528, "step": 1616 }, { "epoch": 0.02, "grad_norm": 12.989761352539062, "learning_rate": 3.497350492051477e-06, "loss": 3.8855, "step": 1617 }, { "epoch": 0.02, "grad_norm": 11.924121856689453, "learning_rate": 3.4995133556829245e-06, "loss": 4.0977, "step": 1618 }, { "epoch": 0.02, "grad_norm": 11.341606140136719, "learning_rate": 3.5016762193143726e-06, "loss": 4.5384, "step": 1619 }, { "epoch": 0.02, "grad_norm": 11.472982406616211, "learning_rate": 3.5038390829458206e-06, "loss": 3.8725, "step": 1620 }, { "epoch": 0.02, "grad_norm": 10.031983375549316, "learning_rate": 3.5060019465772687e-06, "loss": 4.435, "step": 1621 }, { "epoch": 0.02, "grad_norm": 10.213419914245605, "learning_rate": 3.5081648102087167e-06, "loss": 4.2292, "step": 1622 }, { "epoch": 0.02, "grad_norm": 11.43311595916748, "learning_rate": 3.5103276738401648e-06, "loss": 4.207, "step": 1623 }, { "epoch": 0.02, "grad_norm": 12.267828941345215, "learning_rate": 3.512490537471613e-06, "loss": 4.5215, "step": 1624 }, { "epoch": 0.02, "grad_norm": 11.700387001037598, "learning_rate": 3.514653401103061e-06, "loss": 3.9875, "step": 1625 }, { "epoch": 0.02, "grad_norm": 10.934072494506836, "learning_rate": 3.516816264734509e-06, "loss": 4.1271, "step": 1626 }, { "epoch": 0.02, "grad_norm": 12.85871696472168, "learning_rate": 3.518979128365957e-06, "loss": 4.1432, "step": 1627 }, { "epoch": 0.02, "grad_norm": 11.072261810302734, "learning_rate": 3.521141991997405e-06, "loss": 4.5738, "step": 1628 }, { "epoch": 0.02, "grad_norm": 10.991637229919434, "learning_rate": 3.523304855628853e-06, "loss": 4.3382, "step": 1629 }, { "epoch": 0.02, "grad_norm": 10.91032886505127, "learning_rate": 3.525467719260301e-06, "loss": 4.4652, "step": 1630 }, { "epoch": 0.02, "grad_norm": 10.712969779968262, "learning_rate": 3.527630582891749e-06, "loss": 3.4139, "step": 1631 }, { "epoch": 0.02, "grad_norm": 11.16795825958252, "learning_rate": 3.529793446523197e-06, "loss": 4.1797, "step": 1632 }, { "epoch": 0.02, "grad_norm": 9.784947395324707, "learning_rate": 3.531956310154645e-06, "loss": 4.1289, "step": 1633 }, { "epoch": 0.02, "grad_norm": 11.491216659545898, "learning_rate": 3.534119173786093e-06, "loss": 4.4322, "step": 1634 }, { "epoch": 0.02, "grad_norm": 11.535569190979004, "learning_rate": 3.536282037417541e-06, "loss": 4.5389, "step": 1635 }, { "epoch": 0.02, "grad_norm": 13.112590789794922, "learning_rate": 3.538444901048989e-06, "loss": 4.3248, "step": 1636 }, { "epoch": 0.02, "grad_norm": 11.65595817565918, "learning_rate": 3.540607764680437e-06, "loss": 4.2244, "step": 1637 }, { "epoch": 0.02, "grad_norm": 11.592198371887207, "learning_rate": 3.542770628311885e-06, "loss": 3.984, "step": 1638 }, { "epoch": 0.02, "grad_norm": 10.489367485046387, "learning_rate": 3.544933491943333e-06, "loss": 3.5606, "step": 1639 }, { "epoch": 0.02, "grad_norm": 11.410898208618164, "learning_rate": 3.547096355574781e-06, "loss": 4.2542, "step": 1640 }, { "epoch": 0.02, "grad_norm": 11.095114707946777, "learning_rate": 3.5492592192062293e-06, "loss": 4.6536, "step": 1641 }, { "epoch": 0.02, "grad_norm": 11.354499816894531, "learning_rate": 3.5514220828376773e-06, "loss": 4.4898, "step": 1642 }, { "epoch": 0.02, "grad_norm": 14.918839454650879, "learning_rate": 3.5535849464691254e-06, "loss": 4.2169, "step": 1643 }, { "epoch": 0.02, "grad_norm": 10.300844192504883, "learning_rate": 3.5557478101005734e-06, "loss": 4.1181, "step": 1644 }, { "epoch": 0.02, "grad_norm": 10.884660720825195, "learning_rate": 3.5579106737320215e-06, "loss": 4.2891, "step": 1645 }, { "epoch": 0.02, "grad_norm": 10.660186767578125, "learning_rate": 3.560073537363469e-06, "loss": 3.8501, "step": 1646 }, { "epoch": 0.02, "grad_norm": 11.675220489501953, "learning_rate": 3.562236400994917e-06, "loss": 4.0189, "step": 1647 }, { "epoch": 0.02, "grad_norm": 10.690179824829102, "learning_rate": 3.564399264626366e-06, "loss": 3.9492, "step": 1648 }, { "epoch": 0.02, "grad_norm": 11.332380294799805, "learning_rate": 3.566562128257814e-06, "loss": 4.7517, "step": 1649 }, { "epoch": 0.02, "grad_norm": 11.192146301269531, "learning_rate": 3.5687249918892617e-06, "loss": 4.2709, "step": 1650 }, { "epoch": 0.02, "grad_norm": 10.68510627746582, "learning_rate": 3.5708878555207098e-06, "loss": 3.7651, "step": 1651 }, { "epoch": 0.02, "grad_norm": 11.243119239807129, "learning_rate": 3.573050719152158e-06, "loss": 4.7716, "step": 1652 }, { "epoch": 0.02, "grad_norm": 11.094183921813965, "learning_rate": 3.575213582783606e-06, "loss": 4.0511, "step": 1653 }, { "epoch": 0.02, "grad_norm": 10.621308326721191, "learning_rate": 3.577376446415054e-06, "loss": 4.1327, "step": 1654 }, { "epoch": 0.02, "grad_norm": 13.011143684387207, "learning_rate": 3.579539310046502e-06, "loss": 4.3913, "step": 1655 }, { "epoch": 0.02, "grad_norm": 10.619384765625, "learning_rate": 3.58170217367795e-06, "loss": 3.6235, "step": 1656 }, { "epoch": 0.02, "grad_norm": 12.2642822265625, "learning_rate": 3.583865037309398e-06, "loss": 3.6667, "step": 1657 }, { "epoch": 0.02, "grad_norm": 11.731559753417969, "learning_rate": 3.586027900940846e-06, "loss": 4.3827, "step": 1658 }, { "epoch": 0.02, "grad_norm": 14.125041961669922, "learning_rate": 3.588190764572294e-06, "loss": 3.8202, "step": 1659 }, { "epoch": 0.02, "grad_norm": 12.612730979919434, "learning_rate": 3.5903536282037422e-06, "loss": 4.0843, "step": 1660 }, { "epoch": 0.02, "grad_norm": 13.747857093811035, "learning_rate": 3.5925164918351903e-06, "loss": 4.0066, "step": 1661 }, { "epoch": 0.02, "grad_norm": 11.663832664489746, "learning_rate": 3.5946793554666383e-06, "loss": 4.0431, "step": 1662 }, { "epoch": 0.02, "grad_norm": 11.71921443939209, "learning_rate": 3.5968422190980864e-06, "loss": 4.0586, "step": 1663 }, { "epoch": 0.02, "grad_norm": 11.634730339050293, "learning_rate": 3.599005082729534e-06, "loss": 3.7922, "step": 1664 }, { "epoch": 0.02, "grad_norm": 11.078670501708984, "learning_rate": 3.601167946360982e-06, "loss": 3.6081, "step": 1665 }, { "epoch": 0.02, "grad_norm": 12.425122261047363, "learning_rate": 3.60333080999243e-06, "loss": 4.0479, "step": 1666 }, { "epoch": 0.02, "grad_norm": 11.095784187316895, "learning_rate": 3.605493673623878e-06, "loss": 3.9841, "step": 1667 }, { "epoch": 0.02, "grad_norm": 11.872443199157715, "learning_rate": 3.607656537255326e-06, "loss": 3.9361, "step": 1668 }, { "epoch": 0.02, "grad_norm": 11.146760940551758, "learning_rate": 3.6098194008867743e-06, "loss": 4.7524, "step": 1669 }, { "epoch": 0.02, "grad_norm": 11.211709976196289, "learning_rate": 3.6119822645182223e-06, "loss": 4.7951, "step": 1670 }, { "epoch": 0.02, "grad_norm": 11.001338958740234, "learning_rate": 3.6141451281496704e-06, "loss": 3.7036, "step": 1671 }, { "epoch": 0.02, "grad_norm": 10.138286590576172, "learning_rate": 3.6163079917811184e-06, "loss": 3.5311, "step": 1672 }, { "epoch": 0.02, "grad_norm": 12.247286796569824, "learning_rate": 3.6184708554125665e-06, "loss": 3.8478, "step": 1673 }, { "epoch": 0.02, "grad_norm": 11.408716201782227, "learning_rate": 3.6206337190440145e-06, "loss": 5.0313, "step": 1674 }, { "epoch": 0.02, "grad_norm": 11.175275802612305, "learning_rate": 3.6227965826754626e-06, "loss": 4.6078, "step": 1675 }, { "epoch": 0.02, "grad_norm": 10.419760704040527, "learning_rate": 3.6249594463069106e-06, "loss": 3.7804, "step": 1676 }, { "epoch": 0.02, "grad_norm": 10.652555465698242, "learning_rate": 3.6271223099383587e-06, "loss": 4.4823, "step": 1677 }, { "epoch": 0.02, "grad_norm": 10.134613990783691, "learning_rate": 3.6292851735698063e-06, "loss": 3.9078, "step": 1678 }, { "epoch": 0.02, "grad_norm": 11.779244422912598, "learning_rate": 3.6314480372012543e-06, "loss": 3.725, "step": 1679 }, { "epoch": 0.02, "grad_norm": 9.968269348144531, "learning_rate": 3.6336109008327032e-06, "loss": 3.69, "step": 1680 }, { "epoch": 0.02, "grad_norm": 12.425793647766113, "learning_rate": 3.6357737644641513e-06, "loss": 4.371, "step": 1681 }, { "epoch": 0.02, "grad_norm": 10.169991493225098, "learning_rate": 3.637936628095599e-06, "loss": 4.6269, "step": 1682 }, { "epoch": 0.02, "grad_norm": 10.906146049499512, "learning_rate": 3.640099491727047e-06, "loss": 4.4785, "step": 1683 }, { "epoch": 0.02, "grad_norm": 12.016074180603027, "learning_rate": 3.642262355358495e-06, "loss": 4.1297, "step": 1684 }, { "epoch": 0.02, "grad_norm": 10.37710189819336, "learning_rate": 3.644425218989943e-06, "loss": 4.4088, "step": 1685 }, { "epoch": 0.02, "grad_norm": 11.261598587036133, "learning_rate": 3.646588082621391e-06, "loss": 4.0072, "step": 1686 }, { "epoch": 0.02, "grad_norm": 11.337957382202148, "learning_rate": 3.648750946252839e-06, "loss": 4.0537, "step": 1687 }, { "epoch": 0.02, "grad_norm": 11.362714767456055, "learning_rate": 3.6509138098842872e-06, "loss": 3.4339, "step": 1688 }, { "epoch": 0.02, "grad_norm": 11.35744857788086, "learning_rate": 3.6530766735157353e-06, "loss": 3.7602, "step": 1689 }, { "epoch": 0.02, "grad_norm": 11.330410957336426, "learning_rate": 3.6552395371471833e-06, "loss": 4.0741, "step": 1690 }, { "epoch": 0.02, "grad_norm": 12.025324821472168, "learning_rate": 3.6574024007786314e-06, "loss": 3.2345, "step": 1691 }, { "epoch": 0.02, "grad_norm": 10.220582962036133, "learning_rate": 3.6595652644100794e-06, "loss": 3.6567, "step": 1692 }, { "epoch": 0.02, "grad_norm": 11.945990562438965, "learning_rate": 3.6617281280415275e-06, "loss": 3.9745, "step": 1693 }, { "epoch": 0.02, "grad_norm": 10.123006820678711, "learning_rate": 3.6638909916729755e-06, "loss": 4.2733, "step": 1694 }, { "epoch": 0.02, "grad_norm": 11.740171432495117, "learning_rate": 3.6660538553044236e-06, "loss": 4.1799, "step": 1695 }, { "epoch": 0.02, "grad_norm": 11.884323120117188, "learning_rate": 3.668216718935871e-06, "loss": 4.3162, "step": 1696 }, { "epoch": 0.02, "grad_norm": 12.496978759765625, "learning_rate": 3.6703795825673193e-06, "loss": 3.9132, "step": 1697 }, { "epoch": 0.02, "grad_norm": 13.158074378967285, "learning_rate": 3.6725424461987673e-06, "loss": 4.816, "step": 1698 }, { "epoch": 0.02, "grad_norm": 10.751579284667969, "learning_rate": 3.6747053098302154e-06, "loss": 5.2136, "step": 1699 }, { "epoch": 0.02, "grad_norm": 10.961874008178711, "learning_rate": 3.6768681734616634e-06, "loss": 3.9979, "step": 1700 }, { "epoch": 0.02, "grad_norm": 11.380071640014648, "learning_rate": 3.6790310370931115e-06, "loss": 3.8369, "step": 1701 }, { "epoch": 0.02, "grad_norm": 11.01839828491211, "learning_rate": 3.6811939007245595e-06, "loss": 3.9438, "step": 1702 }, { "epoch": 0.02, "grad_norm": 12.545709609985352, "learning_rate": 3.6833567643560076e-06, "loss": 4.0671, "step": 1703 }, { "epoch": 0.02, "grad_norm": 10.585204124450684, "learning_rate": 3.6855196279874556e-06, "loss": 4.4383, "step": 1704 }, { "epoch": 0.02, "grad_norm": 10.934146881103516, "learning_rate": 3.6876824916189037e-06, "loss": 4.3615, "step": 1705 }, { "epoch": 0.02, "grad_norm": 11.201759338378906, "learning_rate": 3.6898453552503517e-06, "loss": 3.764, "step": 1706 }, { "epoch": 0.02, "grad_norm": 10.646772384643555, "learning_rate": 3.6920082188817998e-06, "loss": 3.8963, "step": 1707 }, { "epoch": 0.02, "grad_norm": 10.394500732421875, "learning_rate": 3.694171082513248e-06, "loss": 3.7346, "step": 1708 }, { "epoch": 0.02, "grad_norm": 11.19033432006836, "learning_rate": 3.6963339461446954e-06, "loss": 3.6818, "step": 1709 }, { "epoch": 0.02, "grad_norm": 10.27661418914795, "learning_rate": 3.6984968097761435e-06, "loss": 4.6217, "step": 1710 }, { "epoch": 0.02, "grad_norm": 10.900540351867676, "learning_rate": 3.7006596734075915e-06, "loss": 3.7797, "step": 1711 }, { "epoch": 0.02, "grad_norm": 12.105822563171387, "learning_rate": 3.7028225370390404e-06, "loss": 3.6841, "step": 1712 }, { "epoch": 0.02, "grad_norm": 10.64345645904541, "learning_rate": 3.7049854006704885e-06, "loss": 4.0256, "step": 1713 }, { "epoch": 0.02, "grad_norm": 11.102750778198242, "learning_rate": 3.707148264301936e-06, "loss": 4.3787, "step": 1714 }, { "epoch": 0.02, "grad_norm": 10.351451873779297, "learning_rate": 3.709311127933384e-06, "loss": 4.6219, "step": 1715 }, { "epoch": 0.02, "grad_norm": 11.268248558044434, "learning_rate": 3.7114739915648322e-06, "loss": 4.4776, "step": 1716 }, { "epoch": 0.02, "grad_norm": 11.788281440734863, "learning_rate": 3.7136368551962803e-06, "loss": 3.8449, "step": 1717 }, { "epoch": 0.02, "grad_norm": 10.385897636413574, "learning_rate": 3.7157997188277283e-06, "loss": 3.9341, "step": 1718 }, { "epoch": 0.02, "grad_norm": 12.101065635681152, "learning_rate": 3.7179625824591764e-06, "loss": 4.2488, "step": 1719 }, { "epoch": 0.02, "grad_norm": 11.36521053314209, "learning_rate": 3.7201254460906244e-06, "loss": 4.0133, "step": 1720 }, { "epoch": 0.02, "grad_norm": 11.411884307861328, "learning_rate": 3.7222883097220725e-06, "loss": 3.8424, "step": 1721 }, { "epoch": 0.02, "grad_norm": 11.431496620178223, "learning_rate": 3.7244511733535205e-06, "loss": 4.0266, "step": 1722 }, { "epoch": 0.02, "grad_norm": 11.631284713745117, "learning_rate": 3.7266140369849686e-06, "loss": 4.8064, "step": 1723 }, { "epoch": 0.02, "grad_norm": 11.40756607055664, "learning_rate": 3.7287769006164166e-06, "loss": 4.4821, "step": 1724 }, { "epoch": 0.02, "grad_norm": 11.41130542755127, "learning_rate": 3.7309397642478647e-06, "loss": 3.5963, "step": 1725 }, { "epoch": 0.02, "grad_norm": 11.302611351013184, "learning_rate": 3.7331026278793127e-06, "loss": 4.3833, "step": 1726 }, { "epoch": 0.02, "grad_norm": 11.913369178771973, "learning_rate": 3.7352654915107604e-06, "loss": 4.0476, "step": 1727 }, { "epoch": 0.02, "grad_norm": 11.424909591674805, "learning_rate": 3.7374283551422084e-06, "loss": 3.9994, "step": 1728 }, { "epoch": 0.02, "grad_norm": 9.780524253845215, "learning_rate": 3.7395912187736565e-06, "loss": 4.0853, "step": 1729 }, { "epoch": 0.02, "grad_norm": 11.532316207885742, "learning_rate": 3.7417540824051045e-06, "loss": 3.8237, "step": 1730 }, { "epoch": 0.02, "grad_norm": 10.581875801086426, "learning_rate": 3.7439169460365526e-06, "loss": 4.2566, "step": 1731 }, { "epoch": 0.02, "grad_norm": 9.80745792388916, "learning_rate": 3.7460798096680006e-06, "loss": 4.0623, "step": 1732 }, { "epoch": 0.02, "grad_norm": 10.201875686645508, "learning_rate": 3.7482426732994487e-06, "loss": 3.8485, "step": 1733 }, { "epoch": 0.02, "grad_norm": 11.846124649047852, "learning_rate": 3.7504055369308967e-06, "loss": 4.3286, "step": 1734 }, { "epoch": 0.02, "grad_norm": 11.90049934387207, "learning_rate": 3.7525684005623448e-06, "loss": 3.5903, "step": 1735 }, { "epoch": 0.02, "grad_norm": 10.251205444335938, "learning_rate": 3.754731264193793e-06, "loss": 3.4939, "step": 1736 }, { "epoch": 0.02, "grad_norm": 13.11063289642334, "learning_rate": 3.756894127825241e-06, "loss": 4.1745, "step": 1737 }, { "epoch": 0.02, "grad_norm": 11.35306167602539, "learning_rate": 3.759056991456689e-06, "loss": 3.8991, "step": 1738 }, { "epoch": 0.02, "grad_norm": 12.863283157348633, "learning_rate": 3.761219855088137e-06, "loss": 4.2775, "step": 1739 }, { "epoch": 0.02, "grad_norm": 11.50551986694336, "learning_rate": 3.763382718719585e-06, "loss": 3.8173, "step": 1740 }, { "epoch": 0.02, "grad_norm": 9.948160171508789, "learning_rate": 3.7655455823510326e-06, "loss": 3.9145, "step": 1741 }, { "epoch": 0.02, "grad_norm": 11.177101135253906, "learning_rate": 3.7677084459824807e-06, "loss": 3.3941, "step": 1742 }, { "epoch": 0.02, "grad_norm": 10.488950729370117, "learning_rate": 3.7698713096139287e-06, "loss": 4.0034, "step": 1743 }, { "epoch": 0.02, "grad_norm": 10.602157592773438, "learning_rate": 3.7720341732453776e-06, "loss": 3.5116, "step": 1744 }, { "epoch": 0.02, "grad_norm": 13.52938175201416, "learning_rate": 3.7741970368768257e-06, "loss": 4.0011, "step": 1745 }, { "epoch": 0.02, "grad_norm": 10.774849891662598, "learning_rate": 3.7763599005082733e-06, "loss": 4.6568, "step": 1746 }, { "epoch": 0.02, "grad_norm": 11.374687194824219, "learning_rate": 3.7785227641397214e-06, "loss": 4.266, "step": 1747 }, { "epoch": 0.02, "grad_norm": 11.183116912841797, "learning_rate": 3.7806856277711694e-06, "loss": 4.2049, "step": 1748 }, { "epoch": 0.02, "grad_norm": 12.138819694519043, "learning_rate": 3.7828484914026175e-06, "loss": 4.4985, "step": 1749 }, { "epoch": 0.02, "grad_norm": 10.352762222290039, "learning_rate": 3.7850113550340655e-06, "loss": 4.0606, "step": 1750 }, { "epoch": 0.02, "grad_norm": 10.522720336914062, "learning_rate": 3.7871742186655136e-06, "loss": 3.8197, "step": 1751 }, { "epoch": 0.02, "grad_norm": 10.634215354919434, "learning_rate": 3.7893370822969616e-06, "loss": 4.3088, "step": 1752 }, { "epoch": 0.02, "grad_norm": 11.507669448852539, "learning_rate": 3.7914999459284097e-06, "loss": 4.2279, "step": 1753 }, { "epoch": 0.02, "grad_norm": 11.476306915283203, "learning_rate": 3.7936628095598577e-06, "loss": 4.0683, "step": 1754 }, { "epoch": 0.02, "grad_norm": 10.1569242477417, "learning_rate": 3.7958256731913058e-06, "loss": 3.9195, "step": 1755 }, { "epoch": 0.02, "grad_norm": 9.579508781433105, "learning_rate": 3.797988536822754e-06, "loss": 3.9326, "step": 1756 }, { "epoch": 0.02, "grad_norm": 9.709532737731934, "learning_rate": 3.800151400454202e-06, "loss": 4.8686, "step": 1757 }, { "epoch": 0.02, "grad_norm": 11.46628475189209, "learning_rate": 3.80231426408565e-06, "loss": 4.1886, "step": 1758 }, { "epoch": 0.02, "grad_norm": 10.478856086730957, "learning_rate": 3.8044771277170976e-06, "loss": 4.2114, "step": 1759 }, { "epoch": 0.02, "grad_norm": 10.182356834411621, "learning_rate": 3.8066399913485456e-06, "loss": 3.5933, "step": 1760 }, { "epoch": 0.02, "grad_norm": 11.87745189666748, "learning_rate": 3.8088028549799937e-06, "loss": 3.5193, "step": 1761 }, { "epoch": 0.02, "grad_norm": 11.096930503845215, "learning_rate": 3.8109657186114417e-06, "loss": 3.8589, "step": 1762 }, { "epoch": 0.02, "grad_norm": 9.902449607849121, "learning_rate": 3.8131285822428898e-06, "loss": 3.9183, "step": 1763 }, { "epoch": 0.02, "grad_norm": 11.99114990234375, "learning_rate": 3.815291445874338e-06, "loss": 3.5569, "step": 1764 }, { "epoch": 0.02, "grad_norm": 9.586836814880371, "learning_rate": 3.817454309505786e-06, "loss": 3.3718, "step": 1765 }, { "epoch": 0.02, "grad_norm": 10.587827682495117, "learning_rate": 3.819617173137234e-06, "loss": 4.2412, "step": 1766 }, { "epoch": 0.02, "grad_norm": 11.056938171386719, "learning_rate": 3.821780036768682e-06, "loss": 3.9316, "step": 1767 }, { "epoch": 0.02, "grad_norm": 13.682650566101074, "learning_rate": 3.82394290040013e-06, "loss": 4.3539, "step": 1768 }, { "epoch": 0.02, "grad_norm": 10.490835189819336, "learning_rate": 3.826105764031578e-06, "loss": 4.2208, "step": 1769 }, { "epoch": 0.02, "grad_norm": 11.114100456237793, "learning_rate": 3.828268627663026e-06, "loss": 3.9478, "step": 1770 }, { "epoch": 0.02, "grad_norm": 10.320477485656738, "learning_rate": 3.830431491294474e-06, "loss": 4.5779, "step": 1771 }, { "epoch": 0.02, "grad_norm": 10.957231521606445, "learning_rate": 3.832594354925922e-06, "loss": 3.9779, "step": 1772 }, { "epoch": 0.02, "grad_norm": 10.02597427368164, "learning_rate": 3.83475721855737e-06, "loss": 4.2333, "step": 1773 }, { "epoch": 0.02, "grad_norm": 10.566824913024902, "learning_rate": 3.836920082188818e-06, "loss": 3.3052, "step": 1774 }, { "epoch": 0.02, "grad_norm": 11.244296073913574, "learning_rate": 3.839082945820266e-06, "loss": 4.4341, "step": 1775 }, { "epoch": 0.02, "grad_norm": 10.258841514587402, "learning_rate": 3.841245809451714e-06, "loss": 3.4764, "step": 1776 }, { "epoch": 0.02, "grad_norm": 9.923666000366211, "learning_rate": 3.8434086730831625e-06, "loss": 3.4537, "step": 1777 }, { "epoch": 0.02, "grad_norm": 9.885042190551758, "learning_rate": 3.8455715367146105e-06, "loss": 4.3189, "step": 1778 }, { "epoch": 0.02, "grad_norm": 11.308932304382324, "learning_rate": 3.8477344003460586e-06, "loss": 4.1068, "step": 1779 }, { "epoch": 0.02, "grad_norm": 12.217909812927246, "learning_rate": 3.849897263977507e-06, "loss": 3.9898, "step": 1780 }, { "epoch": 0.02, "grad_norm": 11.623674392700195, "learning_rate": 3.852060127608955e-06, "loss": 3.8371, "step": 1781 }, { "epoch": 0.02, "grad_norm": 13.54190731048584, "learning_rate": 3.854222991240403e-06, "loss": 4.3397, "step": 1782 }, { "epoch": 0.02, "grad_norm": 9.944153785705566, "learning_rate": 3.856385854871851e-06, "loss": 3.5661, "step": 1783 }, { "epoch": 0.02, "grad_norm": 11.053953170776367, "learning_rate": 3.858548718503299e-06, "loss": 3.96, "step": 1784 }, { "epoch": 0.02, "grad_norm": 10.463415145874023, "learning_rate": 3.860711582134747e-06, "loss": 4.6361, "step": 1785 }, { "epoch": 0.02, "grad_norm": 11.386308670043945, "learning_rate": 3.862874445766195e-06, "loss": 4.4389, "step": 1786 }, { "epoch": 0.02, "grad_norm": 11.929826736450195, "learning_rate": 3.865037309397643e-06, "loss": 4.3936, "step": 1787 }, { "epoch": 0.02, "grad_norm": 10.742462158203125, "learning_rate": 3.867200173029091e-06, "loss": 4.5844, "step": 1788 }, { "epoch": 0.02, "grad_norm": 10.410635948181152, "learning_rate": 3.869363036660539e-06, "loss": 3.9214, "step": 1789 }, { "epoch": 0.02, "grad_norm": 11.091007232666016, "learning_rate": 3.871525900291987e-06, "loss": 4.0423, "step": 1790 }, { "epoch": 0.02, "grad_norm": 11.615181922912598, "learning_rate": 3.873688763923435e-06, "loss": 4.1506, "step": 1791 }, { "epoch": 0.02, "grad_norm": 12.225931167602539, "learning_rate": 3.875851627554883e-06, "loss": 4.4022, "step": 1792 }, { "epoch": 0.02, "grad_norm": 13.698775291442871, "learning_rate": 3.878014491186331e-06, "loss": 4.0592, "step": 1793 }, { "epoch": 0.02, "grad_norm": 10.179865837097168, "learning_rate": 3.880177354817779e-06, "loss": 4.0532, "step": 1794 }, { "epoch": 0.02, "grad_norm": 11.533095359802246, "learning_rate": 3.882340218449227e-06, "loss": 4.0921, "step": 1795 }, { "epoch": 0.02, "grad_norm": 10.95347785949707, "learning_rate": 3.8845030820806754e-06, "loss": 4.1223, "step": 1796 }, { "epoch": 0.02, "grad_norm": 10.165352821350098, "learning_rate": 3.8866659457121235e-06, "loss": 4.3667, "step": 1797 }, { "epoch": 0.02, "grad_norm": 10.948894500732422, "learning_rate": 3.888828809343571e-06, "loss": 4.069, "step": 1798 }, { "epoch": 0.02, "grad_norm": 11.416118621826172, "learning_rate": 3.890991672975019e-06, "loss": 3.8712, "step": 1799 }, { "epoch": 0.02, "grad_norm": 11.02919864654541, "learning_rate": 3.893154536606467e-06, "loss": 4.451, "step": 1800 }, { "epoch": 0.02, "grad_norm": 10.462191581726074, "learning_rate": 3.895317400237915e-06, "loss": 4.0723, "step": 1801 }, { "epoch": 0.02, "grad_norm": 12.862171173095703, "learning_rate": 3.897480263869363e-06, "loss": 3.494, "step": 1802 }, { "epoch": 0.02, "grad_norm": 9.592459678649902, "learning_rate": 3.899643127500811e-06, "loss": 3.9876, "step": 1803 }, { "epoch": 0.02, "grad_norm": 9.782427787780762, "learning_rate": 3.901805991132259e-06, "loss": 3.945, "step": 1804 }, { "epoch": 0.02, "grad_norm": 10.765815734863281, "learning_rate": 3.903968854763707e-06, "loss": 3.6985, "step": 1805 }, { "epoch": 0.02, "grad_norm": 11.075216293334961, "learning_rate": 3.906131718395155e-06, "loss": 3.6846, "step": 1806 }, { "epoch": 0.02, "grad_norm": 10.770687103271484, "learning_rate": 3.908294582026603e-06, "loss": 3.992, "step": 1807 }, { "epoch": 0.02, "grad_norm": 9.37387752532959, "learning_rate": 3.910457445658052e-06, "loss": 3.8418, "step": 1808 }, { "epoch": 0.02, "grad_norm": 9.533174514770508, "learning_rate": 3.9126203092895e-06, "loss": 4.0522, "step": 1809 }, { "epoch": 0.02, "grad_norm": 10.871432304382324, "learning_rate": 3.914783172920948e-06, "loss": 4.0063, "step": 1810 }, { "epoch": 0.02, "grad_norm": 11.17325496673584, "learning_rate": 3.916946036552396e-06, "loss": 3.9664, "step": 1811 }, { "epoch": 0.02, "grad_norm": 11.27423095703125, "learning_rate": 3.919108900183844e-06, "loss": 3.5363, "step": 1812 }, { "epoch": 0.02, "grad_norm": 9.79206657409668, "learning_rate": 3.921271763815292e-06, "loss": 3.7952, "step": 1813 }, { "epoch": 0.02, "grad_norm": 10.513506889343262, "learning_rate": 3.92343462744674e-06, "loss": 3.8189, "step": 1814 }, { "epoch": 0.02, "grad_norm": 13.157561302185059, "learning_rate": 3.925597491078188e-06, "loss": 3.8942, "step": 1815 }, { "epoch": 0.02, "grad_norm": 11.47772216796875, "learning_rate": 3.927760354709636e-06, "loss": 3.3656, "step": 1816 }, { "epoch": 0.02, "grad_norm": 11.71440315246582, "learning_rate": 3.929923218341084e-06, "loss": 3.821, "step": 1817 }, { "epoch": 0.02, "grad_norm": 10.300374984741211, "learning_rate": 3.932086081972532e-06, "loss": 3.9572, "step": 1818 }, { "epoch": 0.02, "grad_norm": 11.305803298950195, "learning_rate": 3.93424894560398e-06, "loss": 3.7745, "step": 1819 }, { "epoch": 0.02, "grad_norm": 11.782487869262695, "learning_rate": 3.936411809235428e-06, "loss": 3.8248, "step": 1820 }, { "epoch": 0.02, "grad_norm": 10.806421279907227, "learning_rate": 3.938574672866876e-06, "loss": 3.9595, "step": 1821 }, { "epoch": 0.02, "grad_norm": 10.879849433898926, "learning_rate": 3.940737536498324e-06, "loss": 3.7214, "step": 1822 }, { "epoch": 0.02, "grad_norm": 11.259467124938965, "learning_rate": 3.942900400129772e-06, "loss": 3.7947, "step": 1823 }, { "epoch": 0.02, "grad_norm": 11.021549224853516, "learning_rate": 3.94506326376122e-06, "loss": 3.7062, "step": 1824 }, { "epoch": 0.02, "grad_norm": 10.951658248901367, "learning_rate": 3.947226127392668e-06, "loss": 3.497, "step": 1825 }, { "epoch": 0.02, "grad_norm": 9.823147773742676, "learning_rate": 3.949388991024116e-06, "loss": 4.1123, "step": 1826 }, { "epoch": 0.02, "grad_norm": 11.864752769470215, "learning_rate": 3.951551854655564e-06, "loss": 3.7957, "step": 1827 }, { "epoch": 0.02, "grad_norm": 10.881409645080566, "learning_rate": 3.953714718287012e-06, "loss": 3.2559, "step": 1828 }, { "epoch": 0.02, "grad_norm": 10.335442543029785, "learning_rate": 3.95587758191846e-06, "loss": 3.1005, "step": 1829 }, { "epoch": 0.02, "grad_norm": 11.138860702514648, "learning_rate": 3.958040445549908e-06, "loss": 4.1287, "step": 1830 }, { "epoch": 0.02, "grad_norm": 10.852283477783203, "learning_rate": 3.960203309181356e-06, "loss": 3.4056, "step": 1831 }, { "epoch": 0.02, "grad_norm": 9.02734375, "learning_rate": 3.962366172812804e-06, "loss": 3.6231, "step": 1832 }, { "epoch": 0.02, "grad_norm": 12.249828338623047, "learning_rate": 3.9645290364442525e-06, "loss": 3.9346, "step": 1833 }, { "epoch": 0.02, "grad_norm": 10.891469955444336, "learning_rate": 3.9666919000757005e-06, "loss": 3.8778, "step": 1834 }, { "epoch": 0.02, "grad_norm": 10.76071548461914, "learning_rate": 3.9688547637071486e-06, "loss": 4.1163, "step": 1835 }, { "epoch": 0.02, "grad_norm": 10.380936622619629, "learning_rate": 3.971017627338597e-06, "loss": 3.6675, "step": 1836 }, { "epoch": 0.02, "grad_norm": 11.221693992614746, "learning_rate": 3.973180490970045e-06, "loss": 3.4869, "step": 1837 }, { "epoch": 0.02, "grad_norm": 11.404656410217285, "learning_rate": 3.975343354601493e-06, "loss": 4.117, "step": 1838 }, { "epoch": 0.02, "grad_norm": 12.296998023986816, "learning_rate": 3.977506218232941e-06, "loss": 4.3433, "step": 1839 }, { "epoch": 0.02, "grad_norm": 9.768176078796387, "learning_rate": 3.979669081864389e-06, "loss": 3.0535, "step": 1840 }, { "epoch": 0.02, "grad_norm": 10.501717567443848, "learning_rate": 3.981831945495837e-06, "loss": 3.7836, "step": 1841 }, { "epoch": 0.02, "grad_norm": 11.281559944152832, "learning_rate": 3.983994809127285e-06, "loss": 3.9696, "step": 1842 }, { "epoch": 0.02, "grad_norm": 11.71972942352295, "learning_rate": 3.986157672758733e-06, "loss": 4.5933, "step": 1843 }, { "epoch": 0.02, "grad_norm": 11.108013153076172, "learning_rate": 3.988320536390181e-06, "loss": 3.4777, "step": 1844 }, { "epoch": 0.02, "grad_norm": 11.663249969482422, "learning_rate": 3.990483400021629e-06, "loss": 3.4146, "step": 1845 }, { "epoch": 0.02, "grad_norm": 12.294189453125, "learning_rate": 3.992646263653077e-06, "loss": 4.3233, "step": 1846 }, { "epoch": 0.02, "grad_norm": 10.690864562988281, "learning_rate": 3.994809127284525e-06, "loss": 3.543, "step": 1847 }, { "epoch": 0.02, "grad_norm": 10.816951751708984, "learning_rate": 3.996971990915973e-06, "loss": 4.1814, "step": 1848 }, { "epoch": 0.02, "grad_norm": 10.241011619567871, "learning_rate": 3.999134854547421e-06, "loss": 3.7821, "step": 1849 }, { "epoch": 0.02, "grad_norm": 10.831243515014648, "learning_rate": 4.001297718178869e-06, "loss": 4.0765, "step": 1850 }, { "epoch": 0.02, "grad_norm": 11.823287963867188, "learning_rate": 4.003460581810317e-06, "loss": 3.9925, "step": 1851 }, { "epoch": 0.02, "grad_norm": 9.777237892150879, "learning_rate": 4.0056234454417654e-06, "loss": 4.12, "step": 1852 }, { "epoch": 0.02, "grad_norm": 10.478765487670898, "learning_rate": 4.0077863090732135e-06, "loss": 3.6786, "step": 1853 }, { "epoch": 0.02, "grad_norm": 11.134775161743164, "learning_rate": 4.0099491727046615e-06, "loss": 3.8503, "step": 1854 }, { "epoch": 0.02, "grad_norm": 11.126679420471191, "learning_rate": 4.01211203633611e-06, "loss": 4.0192, "step": 1855 }, { "epoch": 0.02, "grad_norm": 11.909435272216797, "learning_rate": 4.014274899967558e-06, "loss": 4.3154, "step": 1856 }, { "epoch": 0.02, "grad_norm": 9.801212310791016, "learning_rate": 4.016437763599006e-06, "loss": 3.7656, "step": 1857 }, { "epoch": 0.02, "grad_norm": 10.356453895568848, "learning_rate": 4.018600627230454e-06, "loss": 4.7669, "step": 1858 }, { "epoch": 0.02, "grad_norm": 10.263741493225098, "learning_rate": 4.020763490861902e-06, "loss": 3.6437, "step": 1859 }, { "epoch": 0.02, "grad_norm": 12.598856925964355, "learning_rate": 4.02292635449335e-06, "loss": 3.2919, "step": 1860 }, { "epoch": 0.02, "grad_norm": 10.939653396606445, "learning_rate": 4.025089218124798e-06, "loss": 3.7177, "step": 1861 }, { "epoch": 0.02, "grad_norm": 11.201809883117676, "learning_rate": 4.027252081756245e-06, "loss": 3.4784, "step": 1862 }, { "epoch": 0.02, "grad_norm": 11.533246040344238, "learning_rate": 4.029414945387693e-06, "loss": 3.3316, "step": 1863 }, { "epoch": 0.02, "grad_norm": 11.238224029541016, "learning_rate": 4.031577809019141e-06, "loss": 4.1827, "step": 1864 }, { "epoch": 0.02, "grad_norm": 10.906312942504883, "learning_rate": 4.033740672650589e-06, "loss": 4.2621, "step": 1865 }, { "epoch": 0.02, "grad_norm": 10.782669067382812, "learning_rate": 4.035903536282037e-06, "loss": 3.8357, "step": 1866 }, { "epoch": 0.02, "grad_norm": 9.968672752380371, "learning_rate": 4.038066399913485e-06, "loss": 4.1239, "step": 1867 }, { "epoch": 0.02, "grad_norm": 9.532379150390625, "learning_rate": 4.040229263544933e-06, "loss": 4.3426, "step": 1868 }, { "epoch": 0.02, "grad_norm": 10.088403701782227, "learning_rate": 4.0423921271763814e-06, "loss": 4.5316, "step": 1869 }, { "epoch": 0.02, "grad_norm": 10.916450500488281, "learning_rate": 4.0445549908078295e-06, "loss": 3.4547, "step": 1870 }, { "epoch": 0.02, "grad_norm": 10.95669174194336, "learning_rate": 4.0467178544392775e-06, "loss": 3.5905, "step": 1871 }, { "epoch": 0.02, "grad_norm": 10.339702606201172, "learning_rate": 4.0488807180707264e-06, "loss": 4.4974, "step": 1872 }, { "epoch": 0.02, "grad_norm": 9.829551696777344, "learning_rate": 4.0510435817021745e-06, "loss": 3.9268, "step": 1873 }, { "epoch": 0.02, "grad_norm": 10.143810272216797, "learning_rate": 4.0532064453336225e-06, "loss": 4.1983, "step": 1874 }, { "epoch": 0.02, "grad_norm": 10.29999828338623, "learning_rate": 4.055369308965071e-06, "loss": 3.5484, "step": 1875 }, { "epoch": 0.02, "grad_norm": 10.876359939575195, "learning_rate": 4.057532172596519e-06, "loss": 4.3504, "step": 1876 }, { "epoch": 0.02, "grad_norm": 11.492952346801758, "learning_rate": 4.059695036227967e-06, "loss": 3.3816, "step": 1877 }, { "epoch": 0.02, "grad_norm": 10.263117790222168, "learning_rate": 4.061857899859415e-06, "loss": 4.2061, "step": 1878 }, { "epoch": 0.02, "grad_norm": 10.738239288330078, "learning_rate": 4.064020763490863e-06, "loss": 3.8829, "step": 1879 }, { "epoch": 0.02, "grad_norm": 10.79662799835205, "learning_rate": 4.06618362712231e-06, "loss": 3.2966, "step": 1880 }, { "epoch": 0.02, "grad_norm": 11.00821590423584, "learning_rate": 4.068346490753758e-06, "loss": 4.0998, "step": 1881 }, { "epoch": 0.02, "grad_norm": 11.843233108520508, "learning_rate": 4.070509354385206e-06, "loss": 3.2619, "step": 1882 }, { "epoch": 0.02, "grad_norm": 12.0050048828125, "learning_rate": 4.072672218016654e-06, "loss": 3.7221, "step": 1883 }, { "epoch": 0.02, "grad_norm": 11.21610164642334, "learning_rate": 4.074835081648102e-06, "loss": 4.3192, "step": 1884 }, { "epoch": 0.02, "grad_norm": 10.055989265441895, "learning_rate": 4.07699794527955e-06, "loss": 3.4235, "step": 1885 }, { "epoch": 0.02, "grad_norm": 10.622507095336914, "learning_rate": 4.079160808910998e-06, "loss": 3.7701, "step": 1886 }, { "epoch": 0.02, "grad_norm": 10.082606315612793, "learning_rate": 4.081323672542446e-06, "loss": 3.6345, "step": 1887 }, { "epoch": 0.02, "grad_norm": 10.703336715698242, "learning_rate": 4.083486536173894e-06, "loss": 4.0936, "step": 1888 }, { "epoch": 0.02, "grad_norm": 11.464759826660156, "learning_rate": 4.0856493998053425e-06, "loss": 2.7416, "step": 1889 }, { "epoch": 0.02, "grad_norm": 12.45957088470459, "learning_rate": 4.0878122634367905e-06, "loss": 4.6166, "step": 1890 }, { "epoch": 0.02, "grad_norm": 11.625118255615234, "learning_rate": 4.0899751270682386e-06, "loss": 2.9908, "step": 1891 }, { "epoch": 0.02, "grad_norm": 10.551483154296875, "learning_rate": 4.092137990699687e-06, "loss": 3.9211, "step": 1892 }, { "epoch": 0.02, "grad_norm": 9.989375114440918, "learning_rate": 4.094300854331135e-06, "loss": 4.095, "step": 1893 }, { "epoch": 0.02, "grad_norm": 9.631278038024902, "learning_rate": 4.096463717962583e-06, "loss": 3.6648, "step": 1894 }, { "epoch": 0.02, "grad_norm": 9.637796401977539, "learning_rate": 4.098626581594031e-06, "loss": 4.0013, "step": 1895 }, { "epoch": 0.02, "grad_norm": 10.885635375976562, "learning_rate": 4.100789445225479e-06, "loss": 3.3707, "step": 1896 }, { "epoch": 0.02, "grad_norm": 10.126481056213379, "learning_rate": 4.102952308856927e-06, "loss": 4.5624, "step": 1897 }, { "epoch": 0.02, "grad_norm": 10.84529972076416, "learning_rate": 4.105115172488375e-06, "loss": 4.2073, "step": 1898 }, { "epoch": 0.02, "grad_norm": 9.431756019592285, "learning_rate": 4.107278036119823e-06, "loss": 3.782, "step": 1899 }, { "epoch": 0.02, "grad_norm": 10.135420799255371, "learning_rate": 4.109440899751271e-06, "loss": 3.6601, "step": 1900 }, { "epoch": 0.02, "grad_norm": 13.317850112915039, "learning_rate": 4.111603763382719e-06, "loss": 3.6293, "step": 1901 }, { "epoch": 0.02, "grad_norm": 10.337077140808105, "learning_rate": 4.113766627014167e-06, "loss": 4.1688, "step": 1902 }, { "epoch": 0.02, "grad_norm": 9.316003799438477, "learning_rate": 4.115929490645615e-06, "loss": 3.5255, "step": 1903 }, { "epoch": 0.02, "grad_norm": 10.127601623535156, "learning_rate": 4.118092354277063e-06, "loss": 3.7913, "step": 1904 }, { "epoch": 0.02, "grad_norm": 9.577346801757812, "learning_rate": 4.120255217908511e-06, "loss": 4.4423, "step": 1905 }, { "epoch": 0.02, "grad_norm": 10.232419967651367, "learning_rate": 4.122418081539959e-06, "loss": 3.9511, "step": 1906 }, { "epoch": 0.02, "grad_norm": 10.82443618774414, "learning_rate": 4.124580945171407e-06, "loss": 3.8214, "step": 1907 }, { "epoch": 0.02, "grad_norm": 9.8251371383667, "learning_rate": 4.1267438088028554e-06, "loss": 3.7423, "step": 1908 }, { "epoch": 0.02, "grad_norm": 11.052825927734375, "learning_rate": 4.1289066724343035e-06, "loss": 3.5097, "step": 1909 }, { "epoch": 0.02, "grad_norm": 10.873159408569336, "learning_rate": 4.1310695360657515e-06, "loss": 4.1757, "step": 1910 }, { "epoch": 0.02, "grad_norm": 10.562129020690918, "learning_rate": 4.1332323996971996e-06, "loss": 3.8493, "step": 1911 }, { "epoch": 0.02, "grad_norm": 11.821441650390625, "learning_rate": 4.135395263328648e-06, "loss": 3.5787, "step": 1912 }, { "epoch": 0.02, "grad_norm": 10.892412185668945, "learning_rate": 4.137558126960096e-06, "loss": 3.9813, "step": 1913 }, { "epoch": 0.02, "grad_norm": 11.145591735839844, "learning_rate": 4.139720990591544e-06, "loss": 3.9849, "step": 1914 }, { "epoch": 0.02, "grad_norm": 11.262533187866211, "learning_rate": 4.141883854222992e-06, "loss": 3.7148, "step": 1915 }, { "epoch": 0.02, "grad_norm": 10.032830238342285, "learning_rate": 4.14404671785444e-06, "loss": 3.7203, "step": 1916 }, { "epoch": 0.02, "grad_norm": 10.190605163574219, "learning_rate": 4.146209581485888e-06, "loss": 3.8484, "step": 1917 }, { "epoch": 0.02, "grad_norm": 10.603842735290527, "learning_rate": 4.148372445117336e-06, "loss": 3.9728, "step": 1918 }, { "epoch": 0.02, "grad_norm": 9.652002334594727, "learning_rate": 4.150535308748784e-06, "loss": 3.6398, "step": 1919 }, { "epoch": 0.02, "grad_norm": 10.490202903747559, "learning_rate": 4.152698172380232e-06, "loss": 3.8675, "step": 1920 }, { "epoch": 0.02, "grad_norm": 11.325124740600586, "learning_rate": 4.15486103601168e-06, "loss": 3.242, "step": 1921 }, { "epoch": 0.02, "grad_norm": 10.043478012084961, "learning_rate": 4.157023899643128e-06, "loss": 3.4797, "step": 1922 }, { "epoch": 0.02, "grad_norm": 11.458099365234375, "learning_rate": 4.159186763274576e-06, "loss": 3.157, "step": 1923 }, { "epoch": 0.02, "grad_norm": 10.138049125671387, "learning_rate": 4.161349626906024e-06, "loss": 3.6345, "step": 1924 }, { "epoch": 0.02, "grad_norm": 11.808174133300781, "learning_rate": 4.1635124905374714e-06, "loss": 4.0027, "step": 1925 }, { "epoch": 0.02, "grad_norm": 9.88144588470459, "learning_rate": 4.1656753541689195e-06, "loss": 3.919, "step": 1926 }, { "epoch": 0.03, "grad_norm": 11.405590057373047, "learning_rate": 4.1678382178003675e-06, "loss": 4.3301, "step": 1927 }, { "epoch": 0.03, "grad_norm": 13.25898265838623, "learning_rate": 4.170001081431816e-06, "loss": 4.2309, "step": 1928 }, { "epoch": 0.03, "grad_norm": 11.200470924377441, "learning_rate": 4.172163945063264e-06, "loss": 3.8877, "step": 1929 }, { "epoch": 0.03, "grad_norm": 10.818652153015137, "learning_rate": 4.174326808694712e-06, "loss": 3.9737, "step": 1930 }, { "epoch": 0.03, "grad_norm": 9.481416702270508, "learning_rate": 4.17648967232616e-06, "loss": 3.3461, "step": 1931 }, { "epoch": 0.03, "grad_norm": 11.64753246307373, "learning_rate": 4.178652535957608e-06, "loss": 4.0695, "step": 1932 }, { "epoch": 0.03, "grad_norm": 11.223856925964355, "learning_rate": 4.180815399589056e-06, "loss": 3.5845, "step": 1933 }, { "epoch": 0.03, "grad_norm": 11.2842435836792, "learning_rate": 4.182978263220504e-06, "loss": 4.0974, "step": 1934 }, { "epoch": 0.03, "grad_norm": 11.382548332214355, "learning_rate": 4.185141126851952e-06, "loss": 3.6894, "step": 1935 }, { "epoch": 0.03, "grad_norm": 11.718132972717285, "learning_rate": 4.187303990483401e-06, "loss": 3.2797, "step": 1936 }, { "epoch": 0.03, "grad_norm": 11.899065017700195, "learning_rate": 4.189466854114849e-06, "loss": 3.6646, "step": 1937 }, { "epoch": 0.03, "grad_norm": 10.745953559875488, "learning_rate": 4.191629717746297e-06, "loss": 3.9755, "step": 1938 }, { "epoch": 0.03, "grad_norm": 10.385612487792969, "learning_rate": 4.193792581377745e-06, "loss": 3.5129, "step": 1939 }, { "epoch": 0.03, "grad_norm": 10.056686401367188, "learning_rate": 4.195955445009193e-06, "loss": 3.4156, "step": 1940 }, { "epoch": 0.03, "grad_norm": 11.394026756286621, "learning_rate": 4.198118308640641e-06, "loss": 3.9037, "step": 1941 }, { "epoch": 0.03, "grad_norm": 12.075868606567383, "learning_rate": 4.200281172272089e-06, "loss": 3.3662, "step": 1942 }, { "epoch": 0.03, "grad_norm": 10.027022361755371, "learning_rate": 4.202444035903537e-06, "loss": 3.7474, "step": 1943 }, { "epoch": 0.03, "grad_norm": 11.232251167297363, "learning_rate": 4.204606899534984e-06, "loss": 3.9947, "step": 1944 }, { "epoch": 0.03, "grad_norm": 10.742056846618652, "learning_rate": 4.2067697631664325e-06, "loss": 3.7823, "step": 1945 }, { "epoch": 0.03, "grad_norm": 10.056029319763184, "learning_rate": 4.2089326267978805e-06, "loss": 3.7712, "step": 1946 }, { "epoch": 0.03, "grad_norm": 11.417257308959961, "learning_rate": 4.2110954904293286e-06, "loss": 3.6782, "step": 1947 }, { "epoch": 0.03, "grad_norm": 10.5070219039917, "learning_rate": 4.213258354060777e-06, "loss": 3.8573, "step": 1948 }, { "epoch": 0.03, "grad_norm": 9.3388671875, "learning_rate": 4.215421217692225e-06, "loss": 4.237, "step": 1949 }, { "epoch": 0.03, "grad_norm": 11.374890327453613, "learning_rate": 4.217584081323673e-06, "loss": 3.6277, "step": 1950 }, { "epoch": 0.03, "grad_norm": 10.002057075500488, "learning_rate": 4.219746944955121e-06, "loss": 3.4031, "step": 1951 }, { "epoch": 0.03, "grad_norm": 10.448760032653809, "learning_rate": 4.221909808586569e-06, "loss": 3.95, "step": 1952 }, { "epoch": 0.03, "grad_norm": 9.469779014587402, "learning_rate": 4.224072672218017e-06, "loss": 3.2251, "step": 1953 }, { "epoch": 0.03, "grad_norm": 11.61632251739502, "learning_rate": 4.226235535849465e-06, "loss": 3.4822, "step": 1954 }, { "epoch": 0.03, "grad_norm": 10.36385726928711, "learning_rate": 4.228398399480913e-06, "loss": 3.2553, "step": 1955 }, { "epoch": 0.03, "grad_norm": 10.725610733032227, "learning_rate": 4.230561263112361e-06, "loss": 3.6882, "step": 1956 }, { "epoch": 0.03, "grad_norm": 9.588940620422363, "learning_rate": 4.232724126743809e-06, "loss": 3.9214, "step": 1957 }, { "epoch": 0.03, "grad_norm": 10.502291679382324, "learning_rate": 4.234886990375257e-06, "loss": 4.2026, "step": 1958 }, { "epoch": 0.03, "grad_norm": 11.189080238342285, "learning_rate": 4.237049854006705e-06, "loss": 3.9943, "step": 1959 }, { "epoch": 0.03, "grad_norm": 9.506224632263184, "learning_rate": 4.239212717638153e-06, "loss": 3.6271, "step": 1960 }, { "epoch": 0.03, "grad_norm": 10.258651733398438, "learning_rate": 4.241375581269601e-06, "loss": 3.5475, "step": 1961 }, { "epoch": 0.03, "grad_norm": 11.010310173034668, "learning_rate": 4.243538444901049e-06, "loss": 3.285, "step": 1962 }, { "epoch": 0.03, "grad_norm": 10.861111640930176, "learning_rate": 4.245701308532497e-06, "loss": 4.0069, "step": 1963 }, { "epoch": 0.03, "grad_norm": 11.986414909362793, "learning_rate": 4.247864172163945e-06, "loss": 4.2265, "step": 1964 }, { "epoch": 0.03, "grad_norm": 12.645758628845215, "learning_rate": 4.2500270357953935e-06, "loss": 4.2928, "step": 1965 }, { "epoch": 0.03, "grad_norm": 9.893933296203613, "learning_rate": 4.2521898994268415e-06, "loss": 3.8274, "step": 1966 }, { "epoch": 0.03, "grad_norm": 10.444514274597168, "learning_rate": 4.2543527630582896e-06, "loss": 3.8637, "step": 1967 }, { "epoch": 0.03, "grad_norm": 12.360673904418945, "learning_rate": 4.256515626689738e-06, "loss": 3.4654, "step": 1968 }, { "epoch": 0.03, "grad_norm": 10.592523574829102, "learning_rate": 4.258678490321186e-06, "loss": 3.4128, "step": 1969 }, { "epoch": 0.03, "grad_norm": 9.204684257507324, "learning_rate": 4.260841353952634e-06, "loss": 3.4878, "step": 1970 }, { "epoch": 0.03, "grad_norm": 11.150152206420898, "learning_rate": 4.263004217584082e-06, "loss": 4.1518, "step": 1971 }, { "epoch": 0.03, "grad_norm": 10.549823760986328, "learning_rate": 4.26516708121553e-06, "loss": 3.4032, "step": 1972 }, { "epoch": 0.03, "grad_norm": 11.225201606750488, "learning_rate": 4.267329944846978e-06, "loss": 4.2956, "step": 1973 }, { "epoch": 0.03, "grad_norm": 10.85391902923584, "learning_rate": 4.269492808478426e-06, "loss": 3.7941, "step": 1974 }, { "epoch": 0.03, "grad_norm": 11.736661911010742, "learning_rate": 4.271655672109874e-06, "loss": 3.7407, "step": 1975 }, { "epoch": 0.03, "grad_norm": 9.651796340942383, "learning_rate": 4.273818535741322e-06, "loss": 3.785, "step": 1976 }, { "epoch": 0.03, "grad_norm": 11.022512435913086, "learning_rate": 4.27598139937277e-06, "loss": 3.7282, "step": 1977 }, { "epoch": 0.03, "grad_norm": 11.582168579101562, "learning_rate": 4.278144263004218e-06, "loss": 3.3654, "step": 1978 }, { "epoch": 0.03, "grad_norm": 12.048079490661621, "learning_rate": 4.280307126635666e-06, "loss": 3.0254, "step": 1979 }, { "epoch": 0.03, "grad_norm": 11.317869186401367, "learning_rate": 4.282469990267114e-06, "loss": 3.3656, "step": 1980 }, { "epoch": 0.03, "grad_norm": 11.239720344543457, "learning_rate": 4.284632853898562e-06, "loss": 3.8116, "step": 1981 }, { "epoch": 0.03, "grad_norm": 12.157413482666016, "learning_rate": 4.28679571753001e-06, "loss": 3.5964, "step": 1982 }, { "epoch": 0.03, "grad_norm": 10.926920890808105, "learning_rate": 4.288958581161458e-06, "loss": 4.0064, "step": 1983 }, { "epoch": 0.03, "grad_norm": 11.288509368896484, "learning_rate": 4.2911214447929064e-06, "loss": 3.5836, "step": 1984 }, { "epoch": 0.03, "grad_norm": 10.664522171020508, "learning_rate": 4.2932843084243545e-06, "loss": 3.9083, "step": 1985 }, { "epoch": 0.03, "grad_norm": 10.510170936584473, "learning_rate": 4.2954471720558025e-06, "loss": 4.0466, "step": 1986 }, { "epoch": 0.03, "grad_norm": 10.55146598815918, "learning_rate": 4.297610035687251e-06, "loss": 3.5502, "step": 1987 }, { "epoch": 0.03, "grad_norm": 11.41369342803955, "learning_rate": 4.299772899318699e-06, "loss": 3.2946, "step": 1988 }, { "epoch": 0.03, "grad_norm": 10.473082542419434, "learning_rate": 4.301935762950146e-06, "loss": 3.8147, "step": 1989 }, { "epoch": 0.03, "grad_norm": 9.892477035522461, "learning_rate": 4.304098626581594e-06, "loss": 4.1711, "step": 1990 }, { "epoch": 0.03, "grad_norm": 10.352715492248535, "learning_rate": 4.306261490213042e-06, "loss": 4.0323, "step": 1991 }, { "epoch": 0.03, "grad_norm": 13.473280906677246, "learning_rate": 4.30842435384449e-06, "loss": 3.6668, "step": 1992 }, { "epoch": 0.03, "grad_norm": 10.721698760986328, "learning_rate": 4.310587217475938e-06, "loss": 3.6774, "step": 1993 }, { "epoch": 0.03, "grad_norm": 9.50367546081543, "learning_rate": 4.312750081107386e-06, "loss": 3.5663, "step": 1994 }, { "epoch": 0.03, "grad_norm": 11.950716972351074, "learning_rate": 4.314912944738834e-06, "loss": 3.5134, "step": 1995 }, { "epoch": 0.03, "grad_norm": 10.943225860595703, "learning_rate": 4.317075808370282e-06, "loss": 3.9617, "step": 1996 }, { "epoch": 0.03, "grad_norm": 11.599406242370605, "learning_rate": 4.31923867200173e-06, "loss": 4.1323, "step": 1997 }, { "epoch": 0.03, "grad_norm": 10.524571418762207, "learning_rate": 4.321401535633178e-06, "loss": 2.8398, "step": 1998 }, { "epoch": 0.03, "grad_norm": 12.05900764465332, "learning_rate": 4.323564399264626e-06, "loss": 4.2179, "step": 1999 }, { "epoch": 0.03, "grad_norm": 10.263514518737793, "learning_rate": 4.325727262896075e-06, "loss": 3.6232, "step": 2000 }, { "epoch": 0.03, "grad_norm": 9.753668785095215, "learning_rate": 4.327890126527523e-06, "loss": 4.2761, "step": 2001 }, { "epoch": 0.03, "grad_norm": 9.967032432556152, "learning_rate": 4.330052990158971e-06, "loss": 3.7512, "step": 2002 }, { "epoch": 0.03, "grad_norm": 13.116646766662598, "learning_rate": 4.332215853790419e-06, "loss": 4.047, "step": 2003 }, { "epoch": 0.03, "grad_norm": 10.684859275817871, "learning_rate": 4.3343787174218674e-06, "loss": 3.3999, "step": 2004 }, { "epoch": 0.03, "grad_norm": 10.992189407348633, "learning_rate": 4.3365415810533155e-06, "loss": 3.8156, "step": 2005 }, { "epoch": 0.03, "grad_norm": 11.249190330505371, "learning_rate": 4.3387044446847636e-06, "loss": 3.9713, "step": 2006 }, { "epoch": 0.03, "grad_norm": 10.9673490524292, "learning_rate": 4.340867308316211e-06, "loss": 3.4375, "step": 2007 }, { "epoch": 0.03, "grad_norm": 10.31985855102539, "learning_rate": 4.343030171947659e-06, "loss": 3.9091, "step": 2008 }, { "epoch": 0.03, "grad_norm": 10.306764602661133, "learning_rate": 4.345193035579107e-06, "loss": 3.2591, "step": 2009 }, { "epoch": 0.03, "grad_norm": 10.963842391967773, "learning_rate": 4.347355899210555e-06, "loss": 3.3128, "step": 2010 }, { "epoch": 0.03, "grad_norm": 10.613484382629395, "learning_rate": 4.349518762842003e-06, "loss": 3.6804, "step": 2011 }, { "epoch": 0.03, "grad_norm": 10.370357513427734, "learning_rate": 4.351681626473451e-06, "loss": 3.9989, "step": 2012 }, { "epoch": 0.03, "grad_norm": 10.66666316986084, "learning_rate": 4.353844490104899e-06, "loss": 3.9765, "step": 2013 }, { "epoch": 0.03, "grad_norm": 11.654784202575684, "learning_rate": 4.356007353736347e-06, "loss": 4.0097, "step": 2014 }, { "epoch": 0.03, "grad_norm": 8.546272277832031, "learning_rate": 4.358170217367795e-06, "loss": 3.283, "step": 2015 }, { "epoch": 0.03, "grad_norm": 8.914889335632324, "learning_rate": 4.360333080999243e-06, "loss": 3.1465, "step": 2016 }, { "epoch": 0.03, "grad_norm": 9.07412338256836, "learning_rate": 4.362495944630691e-06, "loss": 3.796, "step": 2017 }, { "epoch": 0.03, "grad_norm": 10.208137512207031, "learning_rate": 4.364658808262139e-06, "loss": 3.8899, "step": 2018 }, { "epoch": 0.03, "grad_norm": 10.887442588806152, "learning_rate": 4.366821671893587e-06, "loss": 3.7087, "step": 2019 }, { "epoch": 0.03, "grad_norm": 10.158578872680664, "learning_rate": 4.368984535525035e-06, "loss": 4.0806, "step": 2020 }, { "epoch": 0.03, "grad_norm": 12.11756706237793, "learning_rate": 4.3711473991564835e-06, "loss": 3.3668, "step": 2021 }, { "epoch": 0.03, "grad_norm": 9.55527400970459, "learning_rate": 4.3733102627879315e-06, "loss": 3.7164, "step": 2022 }, { "epoch": 0.03, "grad_norm": 11.18742561340332, "learning_rate": 4.3754731264193796e-06, "loss": 3.7251, "step": 2023 }, { "epoch": 0.03, "grad_norm": 10.878097534179688, "learning_rate": 4.377635990050828e-06, "loss": 3.604, "step": 2024 }, { "epoch": 0.03, "grad_norm": 10.708375930786133, "learning_rate": 4.379798853682276e-06, "loss": 4.1313, "step": 2025 }, { "epoch": 0.03, "grad_norm": 11.08364486694336, "learning_rate": 4.381961717313724e-06, "loss": 3.4485, "step": 2026 }, { "epoch": 0.03, "grad_norm": 10.322446823120117, "learning_rate": 4.384124580945172e-06, "loss": 3.6673, "step": 2027 }, { "epoch": 0.03, "grad_norm": 11.090353012084961, "learning_rate": 4.38628744457662e-06, "loss": 3.756, "step": 2028 }, { "epoch": 0.03, "grad_norm": 9.530527114868164, "learning_rate": 4.388450308208068e-06, "loss": 3.3214, "step": 2029 }, { "epoch": 0.03, "grad_norm": 11.11340618133545, "learning_rate": 4.390613171839516e-06, "loss": 3.7749, "step": 2030 }, { "epoch": 0.03, "grad_norm": 10.82475471496582, "learning_rate": 4.392776035470964e-06, "loss": 3.8469, "step": 2031 }, { "epoch": 0.03, "grad_norm": 12.164026260375977, "learning_rate": 4.394938899102412e-06, "loss": 3.9614, "step": 2032 }, { "epoch": 0.03, "grad_norm": 10.830702781677246, "learning_rate": 4.39710176273386e-06, "loss": 3.7096, "step": 2033 }, { "epoch": 0.03, "grad_norm": 12.184755325317383, "learning_rate": 4.399264626365308e-06, "loss": 3.5882, "step": 2034 }, { "epoch": 0.03, "grad_norm": 11.587825775146484, "learning_rate": 4.401427489996756e-06, "loss": 3.27, "step": 2035 }, { "epoch": 0.03, "grad_norm": 10.710234642028809, "learning_rate": 4.403590353628204e-06, "loss": 4.4268, "step": 2036 }, { "epoch": 0.03, "grad_norm": 10.101982116699219, "learning_rate": 4.405753217259652e-06, "loss": 3.8574, "step": 2037 }, { "epoch": 0.03, "grad_norm": 9.63663101196289, "learning_rate": 4.4079160808911e-06, "loss": 3.7447, "step": 2038 }, { "epoch": 0.03, "grad_norm": 9.520685195922852, "learning_rate": 4.410078944522548e-06, "loss": 3.7528, "step": 2039 }, { "epoch": 0.03, "grad_norm": 10.672423362731934, "learning_rate": 4.4122418081539964e-06, "loss": 3.7205, "step": 2040 }, { "epoch": 0.03, "grad_norm": 11.62424087524414, "learning_rate": 4.4144046717854445e-06, "loss": 4.3597, "step": 2041 }, { "epoch": 0.03, "grad_norm": 9.114832878112793, "learning_rate": 4.4165675354168925e-06, "loss": 3.3245, "step": 2042 }, { "epoch": 0.03, "grad_norm": 10.32471752166748, "learning_rate": 4.418730399048341e-06, "loss": 3.2355, "step": 2043 }, { "epoch": 0.03, "grad_norm": 10.390552520751953, "learning_rate": 4.420893262679789e-06, "loss": 3.5964, "step": 2044 }, { "epoch": 0.03, "grad_norm": 9.586200714111328, "learning_rate": 4.423056126311237e-06, "loss": 3.1543, "step": 2045 }, { "epoch": 0.03, "grad_norm": 11.087119102478027, "learning_rate": 4.425218989942685e-06, "loss": 3.2584, "step": 2046 }, { "epoch": 0.03, "grad_norm": 10.054274559020996, "learning_rate": 4.427381853574133e-06, "loss": 3.4436, "step": 2047 }, { "epoch": 0.03, "grad_norm": 11.75163459777832, "learning_rate": 4.429544717205581e-06, "loss": 4.0825, "step": 2048 }, { "epoch": 0.03, "grad_norm": 11.451813697814941, "learning_rate": 4.431707580837029e-06, "loss": 3.9938, "step": 2049 }, { "epoch": 0.03, "grad_norm": 11.385738372802734, "learning_rate": 4.433870444468477e-06, "loss": 3.5246, "step": 2050 }, { "epoch": 0.03, "grad_norm": 10.516851425170898, "learning_rate": 4.436033308099925e-06, "loss": 4.2958, "step": 2051 }, { "epoch": 0.03, "grad_norm": 9.479621887207031, "learning_rate": 4.438196171731373e-06, "loss": 3.4767, "step": 2052 }, { "epoch": 0.03, "grad_norm": 10.15437126159668, "learning_rate": 4.44035903536282e-06, "loss": 3.2404, "step": 2053 }, { "epoch": 0.03, "grad_norm": 10.091535568237305, "learning_rate": 4.442521898994268e-06, "loss": 4.0185, "step": 2054 }, { "epoch": 0.03, "grad_norm": 10.574875831604004, "learning_rate": 4.444684762625716e-06, "loss": 3.4338, "step": 2055 }, { "epoch": 0.03, "grad_norm": 10.52383041381836, "learning_rate": 4.446847626257164e-06, "loss": 3.6154, "step": 2056 }, { "epoch": 0.03, "grad_norm": 10.830334663391113, "learning_rate": 4.4490104898886124e-06, "loss": 3.9278, "step": 2057 }, { "epoch": 0.03, "grad_norm": 12.260619163513184, "learning_rate": 4.4511733535200605e-06, "loss": 3.8112, "step": 2058 }, { "epoch": 0.03, "grad_norm": 12.825925827026367, "learning_rate": 4.4533362171515085e-06, "loss": 3.2318, "step": 2059 }, { "epoch": 0.03, "grad_norm": 11.485288619995117, "learning_rate": 4.455499080782957e-06, "loss": 3.319, "step": 2060 }, { "epoch": 0.03, "grad_norm": 11.147371292114258, "learning_rate": 4.457661944414405e-06, "loss": 4.1509, "step": 2061 }, { "epoch": 0.03, "grad_norm": 10.231050491333008, "learning_rate": 4.459824808045853e-06, "loss": 3.7679, "step": 2062 }, { "epoch": 0.03, "grad_norm": 9.61260986328125, "learning_rate": 4.461987671677301e-06, "loss": 3.8434, "step": 2063 }, { "epoch": 0.03, "grad_norm": 11.738182067871094, "learning_rate": 4.46415053530875e-06, "loss": 4.369, "step": 2064 }, { "epoch": 0.03, "grad_norm": 10.256695747375488, "learning_rate": 4.466313398940198e-06, "loss": 3.5906, "step": 2065 }, { "epoch": 0.03, "grad_norm": 10.9508638381958, "learning_rate": 4.468476262571646e-06, "loss": 3.529, "step": 2066 }, { "epoch": 0.03, "grad_norm": 9.53898811340332, "learning_rate": 4.470639126203094e-06, "loss": 3.9873, "step": 2067 }, { "epoch": 0.03, "grad_norm": 10.303522109985352, "learning_rate": 4.472801989834542e-06, "loss": 3.622, "step": 2068 }, { "epoch": 0.03, "grad_norm": 9.577413558959961, "learning_rate": 4.47496485346599e-06, "loss": 3.5421, "step": 2069 }, { "epoch": 0.03, "grad_norm": 11.220510482788086, "learning_rate": 4.477127717097438e-06, "loss": 3.7107, "step": 2070 }, { "epoch": 0.03, "grad_norm": 10.551562309265137, "learning_rate": 4.479290580728885e-06, "loss": 4.0323, "step": 2071 }, { "epoch": 0.03, "grad_norm": 10.0458402633667, "learning_rate": 4.481453444360333e-06, "loss": 4.2798, "step": 2072 }, { "epoch": 0.03, "grad_norm": 10.055509567260742, "learning_rate": 4.483616307991781e-06, "loss": 3.8915, "step": 2073 }, { "epoch": 0.03, "grad_norm": 10.642716407775879, "learning_rate": 4.485779171623229e-06, "loss": 3.812, "step": 2074 }, { "epoch": 0.03, "grad_norm": 10.638734817504883, "learning_rate": 4.487942035254677e-06, "loss": 3.47, "step": 2075 }, { "epoch": 0.03, "grad_norm": 10.321403503417969, "learning_rate": 4.490104898886125e-06, "loss": 3.844, "step": 2076 }, { "epoch": 0.03, "grad_norm": 10.32175064086914, "learning_rate": 4.4922677625175735e-06, "loss": 4.106, "step": 2077 }, { "epoch": 0.03, "grad_norm": 11.691164016723633, "learning_rate": 4.4944306261490215e-06, "loss": 4.2078, "step": 2078 }, { "epoch": 0.03, "grad_norm": 8.983863830566406, "learning_rate": 4.4965934897804696e-06, "loss": 3.8017, "step": 2079 }, { "epoch": 0.03, "grad_norm": 11.170387268066406, "learning_rate": 4.498756353411918e-06, "loss": 2.9601, "step": 2080 }, { "epoch": 0.03, "grad_norm": 9.887927055358887, "learning_rate": 4.500919217043366e-06, "loss": 3.7246, "step": 2081 }, { "epoch": 0.03, "grad_norm": 11.201828956604004, "learning_rate": 4.503082080674814e-06, "loss": 3.6268, "step": 2082 }, { "epoch": 0.03, "grad_norm": 10.851971626281738, "learning_rate": 4.505244944306262e-06, "loss": 3.4594, "step": 2083 }, { "epoch": 0.03, "grad_norm": 9.30243968963623, "learning_rate": 4.50740780793771e-06, "loss": 3.8199, "step": 2084 }, { "epoch": 0.03, "grad_norm": 11.201401710510254, "learning_rate": 4.509570671569158e-06, "loss": 4.2687, "step": 2085 }, { "epoch": 0.03, "grad_norm": 10.565817832946777, "learning_rate": 4.511733535200606e-06, "loss": 3.7292, "step": 2086 }, { "epoch": 0.03, "grad_norm": 10.25540828704834, "learning_rate": 4.513896398832054e-06, "loss": 3.9167, "step": 2087 }, { "epoch": 0.03, "grad_norm": 11.05627727508545, "learning_rate": 4.516059262463502e-06, "loss": 3.1587, "step": 2088 }, { "epoch": 0.03, "grad_norm": 10.088525772094727, "learning_rate": 4.51822212609495e-06, "loss": 2.8447, "step": 2089 }, { "epoch": 0.03, "grad_norm": 8.704828262329102, "learning_rate": 4.520384989726398e-06, "loss": 3.3038, "step": 2090 }, { "epoch": 0.03, "grad_norm": 10.38918685913086, "learning_rate": 4.522547853357846e-06, "loss": 3.5933, "step": 2091 }, { "epoch": 0.03, "grad_norm": 10.358036041259766, "learning_rate": 4.524710716989294e-06, "loss": 4.0617, "step": 2092 }, { "epoch": 0.03, "grad_norm": 12.485535621643066, "learning_rate": 4.526873580620742e-06, "loss": 3.7035, "step": 2093 }, { "epoch": 0.03, "grad_norm": 10.841439247131348, "learning_rate": 4.52903644425219e-06, "loss": 3.4854, "step": 2094 }, { "epoch": 0.03, "grad_norm": 12.705819129943848, "learning_rate": 4.531199307883638e-06, "loss": 3.3652, "step": 2095 }, { "epoch": 0.03, "grad_norm": 10.48812484741211, "learning_rate": 4.5333621715150864e-06, "loss": 3.9963, "step": 2096 }, { "epoch": 0.03, "grad_norm": 10.259241104125977, "learning_rate": 4.5355250351465345e-06, "loss": 3.9383, "step": 2097 }, { "epoch": 0.03, "grad_norm": 11.570941925048828, "learning_rate": 4.5376878987779825e-06, "loss": 3.4278, "step": 2098 }, { "epoch": 0.03, "grad_norm": 9.531996726989746, "learning_rate": 4.5398507624094306e-06, "loss": 3.8323, "step": 2099 }, { "epoch": 0.03, "grad_norm": 9.743555068969727, "learning_rate": 4.542013626040879e-06, "loss": 4.227, "step": 2100 }, { "epoch": 0.03, "grad_norm": 9.7001953125, "learning_rate": 4.544176489672327e-06, "loss": 4.0341, "step": 2101 }, { "epoch": 0.03, "grad_norm": 12.144099235534668, "learning_rate": 4.546339353303775e-06, "loss": 4.2752, "step": 2102 }, { "epoch": 0.03, "grad_norm": 10.723076820373535, "learning_rate": 4.548502216935223e-06, "loss": 3.411, "step": 2103 }, { "epoch": 0.03, "grad_norm": 11.133970260620117, "learning_rate": 4.550665080566671e-06, "loss": 3.7785, "step": 2104 }, { "epoch": 0.03, "grad_norm": 9.206442832946777, "learning_rate": 4.552827944198119e-06, "loss": 3.5427, "step": 2105 }, { "epoch": 0.03, "grad_norm": 10.900267601013184, "learning_rate": 4.554990807829567e-06, "loss": 3.6273, "step": 2106 }, { "epoch": 0.03, "grad_norm": 11.565537452697754, "learning_rate": 4.557153671461015e-06, "loss": 3.2927, "step": 2107 }, { "epoch": 0.03, "grad_norm": 10.669135093688965, "learning_rate": 4.559316535092463e-06, "loss": 3.4022, "step": 2108 }, { "epoch": 0.03, "grad_norm": 10.471318244934082, "learning_rate": 4.561479398723911e-06, "loss": 3.4768, "step": 2109 }, { "epoch": 0.03, "grad_norm": 9.448494911193848, "learning_rate": 4.563642262355359e-06, "loss": 3.5907, "step": 2110 }, { "epoch": 0.03, "grad_norm": 11.283390045166016, "learning_rate": 4.565805125986807e-06, "loss": 3.9954, "step": 2111 }, { "epoch": 0.03, "grad_norm": 10.235477447509766, "learning_rate": 4.567967989618255e-06, "loss": 3.6816, "step": 2112 }, { "epoch": 0.03, "grad_norm": 10.107405662536621, "learning_rate": 4.570130853249703e-06, "loss": 3.6663, "step": 2113 }, { "epoch": 0.03, "grad_norm": 10.11479377746582, "learning_rate": 4.572293716881151e-06, "loss": 3.7402, "step": 2114 }, { "epoch": 0.03, "grad_norm": 11.789605140686035, "learning_rate": 4.574456580512599e-06, "loss": 3.6447, "step": 2115 }, { "epoch": 0.03, "grad_norm": 10.797015190124512, "learning_rate": 4.5766194441440474e-06, "loss": 4.1463, "step": 2116 }, { "epoch": 0.03, "grad_norm": 10.798637390136719, "learning_rate": 4.578782307775495e-06, "loss": 4.0095, "step": 2117 }, { "epoch": 0.03, "grad_norm": 11.550004005432129, "learning_rate": 4.580945171406943e-06, "loss": 3.7375, "step": 2118 }, { "epoch": 0.03, "grad_norm": 10.066498756408691, "learning_rate": 4.583108035038391e-06, "loss": 3.7758, "step": 2119 }, { "epoch": 0.03, "grad_norm": 8.60429573059082, "learning_rate": 4.585270898669839e-06, "loss": 3.5055, "step": 2120 }, { "epoch": 0.03, "grad_norm": 9.399559020996094, "learning_rate": 4.587433762301287e-06, "loss": 3.4165, "step": 2121 }, { "epoch": 0.03, "grad_norm": 9.370920181274414, "learning_rate": 4.589596625932735e-06, "loss": 3.1025, "step": 2122 }, { "epoch": 0.03, "grad_norm": 10.924389839172363, "learning_rate": 4.591759489564183e-06, "loss": 3.5972, "step": 2123 }, { "epoch": 0.03, "grad_norm": 9.926405906677246, "learning_rate": 4.593922353195631e-06, "loss": 3.3437, "step": 2124 }, { "epoch": 0.03, "grad_norm": 10.593294143676758, "learning_rate": 4.596085216827079e-06, "loss": 3.291, "step": 2125 }, { "epoch": 0.03, "grad_norm": 9.884330749511719, "learning_rate": 4.598248080458527e-06, "loss": 3.7548, "step": 2126 }, { "epoch": 0.03, "grad_norm": 10.371170043945312, "learning_rate": 4.600410944089975e-06, "loss": 3.7259, "step": 2127 }, { "epoch": 0.03, "grad_norm": 11.875692367553711, "learning_rate": 4.602573807721424e-06, "loss": 3.6789, "step": 2128 }, { "epoch": 0.03, "grad_norm": 9.30859661102295, "learning_rate": 4.604736671352872e-06, "loss": 3.8728, "step": 2129 }, { "epoch": 0.03, "grad_norm": 9.801384925842285, "learning_rate": 4.60689953498432e-06, "loss": 3.929, "step": 2130 }, { "epoch": 0.03, "grad_norm": 10.581748962402344, "learning_rate": 4.609062398615768e-06, "loss": 3.4195, "step": 2131 }, { "epoch": 0.03, "grad_norm": 10.439650535583496, "learning_rate": 4.611225262247216e-06, "loss": 4.0268, "step": 2132 }, { "epoch": 0.03, "grad_norm": 11.108623504638672, "learning_rate": 4.613388125878664e-06, "loss": 3.482, "step": 2133 }, { "epoch": 0.03, "grad_norm": 10.341411590576172, "learning_rate": 4.615550989510112e-06, "loss": 3.6478, "step": 2134 }, { "epoch": 0.03, "grad_norm": 11.292657852172852, "learning_rate": 4.6177138531415596e-06, "loss": 3.6735, "step": 2135 }, { "epoch": 0.03, "grad_norm": 11.032092094421387, "learning_rate": 4.619876716773008e-06, "loss": 3.8984, "step": 2136 }, { "epoch": 0.03, "grad_norm": 11.026915550231934, "learning_rate": 4.622039580404456e-06, "loss": 3.65, "step": 2137 }, { "epoch": 0.03, "grad_norm": 9.919489860534668, "learning_rate": 4.624202444035904e-06, "loss": 3.4092, "step": 2138 }, { "epoch": 0.03, "grad_norm": 10.868501663208008, "learning_rate": 4.626365307667352e-06, "loss": 3.8762, "step": 2139 }, { "epoch": 0.03, "grad_norm": 11.378469467163086, "learning_rate": 4.6285281712988e-06, "loss": 3.5078, "step": 2140 }, { "epoch": 0.03, "grad_norm": 11.311683654785156, "learning_rate": 4.630691034930248e-06, "loss": 3.402, "step": 2141 }, { "epoch": 0.03, "grad_norm": 10.064963340759277, "learning_rate": 4.632853898561696e-06, "loss": 3.8909, "step": 2142 }, { "epoch": 0.03, "grad_norm": 12.498734474182129, "learning_rate": 4.635016762193144e-06, "loss": 3.611, "step": 2143 }, { "epoch": 0.03, "grad_norm": 11.539716720581055, "learning_rate": 4.637179625824592e-06, "loss": 3.6982, "step": 2144 }, { "epoch": 0.03, "grad_norm": 11.013237953186035, "learning_rate": 4.63934248945604e-06, "loss": 3.314, "step": 2145 }, { "epoch": 0.03, "grad_norm": 11.680042266845703, "learning_rate": 4.641505353087488e-06, "loss": 3.7674, "step": 2146 }, { "epoch": 0.03, "grad_norm": 12.109305381774902, "learning_rate": 4.643668216718936e-06, "loss": 4.1275, "step": 2147 }, { "epoch": 0.03, "grad_norm": 10.577526092529297, "learning_rate": 4.645831080350384e-06, "loss": 3.8088, "step": 2148 }, { "epoch": 0.03, "grad_norm": 9.351250648498535, "learning_rate": 4.647993943981832e-06, "loss": 3.4536, "step": 2149 }, { "epoch": 0.03, "grad_norm": 9.846325874328613, "learning_rate": 4.65015680761328e-06, "loss": 4.0873, "step": 2150 }, { "epoch": 0.03, "grad_norm": 9.945269584655762, "learning_rate": 4.652319671244728e-06, "loss": 3.6936, "step": 2151 }, { "epoch": 0.03, "grad_norm": 9.080169677734375, "learning_rate": 4.654482534876176e-06, "loss": 3.2495, "step": 2152 }, { "epoch": 0.03, "grad_norm": 9.278690338134766, "learning_rate": 4.6566453985076245e-06, "loss": 3.5815, "step": 2153 }, { "epoch": 0.03, "grad_norm": 11.33627700805664, "learning_rate": 4.6588082621390725e-06, "loss": 3.5711, "step": 2154 }, { "epoch": 0.03, "grad_norm": 10.361258506774902, "learning_rate": 4.6609711257705206e-06, "loss": 3.9912, "step": 2155 }, { "epoch": 0.03, "grad_norm": 9.954649925231934, "learning_rate": 4.663133989401969e-06, "loss": 2.8727, "step": 2156 }, { "epoch": 0.03, "grad_norm": 11.598698616027832, "learning_rate": 4.665296853033417e-06, "loss": 3.2735, "step": 2157 }, { "epoch": 0.03, "grad_norm": 12.26553726196289, "learning_rate": 4.667459716664865e-06, "loss": 3.2718, "step": 2158 }, { "epoch": 0.03, "grad_norm": 11.051987648010254, "learning_rate": 4.669622580296313e-06, "loss": 3.5949, "step": 2159 }, { "epoch": 0.03, "grad_norm": 10.18330192565918, "learning_rate": 4.671785443927761e-06, "loss": 4.0008, "step": 2160 }, { "epoch": 0.03, "grad_norm": 9.605889320373535, "learning_rate": 4.673948307559209e-06, "loss": 3.9717, "step": 2161 }, { "epoch": 0.03, "grad_norm": 9.750895500183105, "learning_rate": 4.676111171190657e-06, "loss": 3.6678, "step": 2162 }, { "epoch": 0.03, "grad_norm": 9.673559188842773, "learning_rate": 4.678274034822105e-06, "loss": 3.5271, "step": 2163 }, { "epoch": 0.03, "grad_norm": 10.596638679504395, "learning_rate": 4.680436898453553e-06, "loss": 3.971, "step": 2164 }, { "epoch": 0.03, "grad_norm": 9.754523277282715, "learning_rate": 4.682599762085001e-06, "loss": 3.655, "step": 2165 }, { "epoch": 0.03, "grad_norm": 9.636960983276367, "learning_rate": 4.684762625716449e-06, "loss": 3.9947, "step": 2166 }, { "epoch": 0.03, "grad_norm": 8.706755638122559, "learning_rate": 4.686925489347897e-06, "loss": 3.8915, "step": 2167 }, { "epoch": 0.03, "grad_norm": 10.577832221984863, "learning_rate": 4.689088352979345e-06, "loss": 3.7459, "step": 2168 }, { "epoch": 0.03, "grad_norm": 9.683562278747559, "learning_rate": 4.691251216610793e-06, "loss": 3.6858, "step": 2169 }, { "epoch": 0.03, "grad_norm": 10.504602432250977, "learning_rate": 4.693414080242241e-06, "loss": 3.519, "step": 2170 }, { "epoch": 0.03, "grad_norm": 9.699719429016113, "learning_rate": 4.695576943873689e-06, "loss": 3.8381, "step": 2171 }, { "epoch": 0.03, "grad_norm": 10.015474319458008, "learning_rate": 4.6977398075051374e-06, "loss": 3.9159, "step": 2172 }, { "epoch": 0.03, "grad_norm": 12.182249069213867, "learning_rate": 4.6999026711365855e-06, "loss": 3.4614, "step": 2173 }, { "epoch": 0.03, "grad_norm": 10.66832447052002, "learning_rate": 4.7020655347680335e-06, "loss": 3.1302, "step": 2174 }, { "epoch": 0.03, "grad_norm": 11.449474334716797, "learning_rate": 4.704228398399482e-06, "loss": 3.6495, "step": 2175 }, { "epoch": 0.03, "grad_norm": 10.513080596923828, "learning_rate": 4.70639126203093e-06, "loss": 3.7168, "step": 2176 }, { "epoch": 0.03, "grad_norm": 10.105405807495117, "learning_rate": 4.708554125662378e-06, "loss": 3.6806, "step": 2177 }, { "epoch": 0.03, "grad_norm": 10.94351577758789, "learning_rate": 4.710716989293826e-06, "loss": 3.767, "step": 2178 }, { "epoch": 0.03, "grad_norm": 9.757711410522461, "learning_rate": 4.712879852925274e-06, "loss": 3.7668, "step": 2179 }, { "epoch": 0.03, "grad_norm": 10.550037384033203, "learning_rate": 4.715042716556721e-06, "loss": 3.5328, "step": 2180 }, { "epoch": 0.03, "grad_norm": 9.438092231750488, "learning_rate": 4.717205580188169e-06, "loss": 3.8898, "step": 2181 }, { "epoch": 0.03, "grad_norm": 9.31119155883789, "learning_rate": 4.719368443819617e-06, "loss": 3.9202, "step": 2182 }, { "epoch": 0.03, "grad_norm": 9.971545219421387, "learning_rate": 4.721531307451065e-06, "loss": 3.3637, "step": 2183 }, { "epoch": 0.03, "grad_norm": 9.07339859008789, "learning_rate": 4.723694171082513e-06, "loss": 3.2591, "step": 2184 }, { "epoch": 0.03, "grad_norm": 9.867440223693848, "learning_rate": 4.725857034713961e-06, "loss": 3.0148, "step": 2185 }, { "epoch": 0.03, "grad_norm": 9.731063842773438, "learning_rate": 4.728019898345409e-06, "loss": 3.6233, "step": 2186 }, { "epoch": 0.03, "grad_norm": 11.492912292480469, "learning_rate": 4.730182761976857e-06, "loss": 3.3503, "step": 2187 }, { "epoch": 0.03, "grad_norm": 9.405911445617676, "learning_rate": 4.732345625608305e-06, "loss": 3.9011, "step": 2188 }, { "epoch": 0.03, "grad_norm": 10.31469440460205, "learning_rate": 4.7345084892397534e-06, "loss": 3.8864, "step": 2189 }, { "epoch": 0.03, "grad_norm": 11.510679244995117, "learning_rate": 4.7366713528712015e-06, "loss": 3.3798, "step": 2190 }, { "epoch": 0.03, "grad_norm": 11.750907897949219, "learning_rate": 4.7388342165026496e-06, "loss": 2.9612, "step": 2191 }, { "epoch": 0.03, "grad_norm": 10.26274585723877, "learning_rate": 4.7409970801340984e-06, "loss": 3.6002, "step": 2192 }, { "epoch": 0.03, "grad_norm": 9.80162239074707, "learning_rate": 4.7431599437655465e-06, "loss": 3.2408, "step": 2193 }, { "epoch": 0.03, "grad_norm": 10.3907470703125, "learning_rate": 4.7453228073969946e-06, "loss": 3.5376, "step": 2194 }, { "epoch": 0.03, "grad_norm": 9.877833366394043, "learning_rate": 4.747485671028443e-06, "loss": 3.9663, "step": 2195 }, { "epoch": 0.03, "grad_norm": 10.670867919921875, "learning_rate": 4.749648534659891e-06, "loss": 3.4636, "step": 2196 }, { "epoch": 0.03, "grad_norm": 12.951618194580078, "learning_rate": 4.751811398291339e-06, "loss": 3.8979, "step": 2197 }, { "epoch": 0.03, "grad_norm": 10.465438842773438, "learning_rate": 4.753974261922786e-06, "loss": 3.1647, "step": 2198 }, { "epoch": 0.03, "grad_norm": 11.018633842468262, "learning_rate": 4.756137125554234e-06, "loss": 3.5708, "step": 2199 }, { "epoch": 0.03, "grad_norm": 10.326204299926758, "learning_rate": 4.758299989185682e-06, "loss": 3.4811, "step": 2200 }, { "epoch": 0.03, "grad_norm": 11.24254035949707, "learning_rate": 4.76046285281713e-06, "loss": 3.7068, "step": 2201 }, { "epoch": 0.03, "grad_norm": 9.890419006347656, "learning_rate": 4.762625716448578e-06, "loss": 3.573, "step": 2202 }, { "epoch": 0.03, "grad_norm": 11.871613502502441, "learning_rate": 4.764788580080026e-06, "loss": 4.1301, "step": 2203 }, { "epoch": 0.03, "grad_norm": 12.86452865600586, "learning_rate": 4.766951443711474e-06, "loss": 3.9053, "step": 2204 }, { "epoch": 0.03, "grad_norm": 8.625645637512207, "learning_rate": 4.769114307342922e-06, "loss": 3.4707, "step": 2205 }, { "epoch": 0.03, "grad_norm": 11.952066421508789, "learning_rate": 4.77127717097437e-06, "loss": 3.4842, "step": 2206 }, { "epoch": 0.03, "grad_norm": 10.43670654296875, "learning_rate": 4.773440034605818e-06, "loss": 3.037, "step": 2207 }, { "epoch": 0.03, "grad_norm": 11.268943786621094, "learning_rate": 4.775602898237266e-06, "loss": 2.9475, "step": 2208 }, { "epoch": 0.03, "grad_norm": 10.027173042297363, "learning_rate": 4.7777657618687145e-06, "loss": 2.8028, "step": 2209 }, { "epoch": 0.03, "grad_norm": 9.586965560913086, "learning_rate": 4.7799286255001625e-06, "loss": 3.286, "step": 2210 }, { "epoch": 0.03, "grad_norm": 10.1577730178833, "learning_rate": 4.7820914891316106e-06, "loss": 3.7988, "step": 2211 }, { "epoch": 0.03, "grad_norm": 11.054891586303711, "learning_rate": 4.784254352763059e-06, "loss": 3.744, "step": 2212 }, { "epoch": 0.03, "grad_norm": 10.609749794006348, "learning_rate": 4.786417216394507e-06, "loss": 3.4574, "step": 2213 }, { "epoch": 0.03, "grad_norm": 11.028209686279297, "learning_rate": 4.788580080025955e-06, "loss": 3.8843, "step": 2214 }, { "epoch": 0.03, "grad_norm": 10.733920097351074, "learning_rate": 4.790742943657403e-06, "loss": 4.1076, "step": 2215 }, { "epoch": 0.03, "grad_norm": 10.124360084533691, "learning_rate": 4.792905807288851e-06, "loss": 3.4496, "step": 2216 }, { "epoch": 0.03, "grad_norm": 10.601357460021973, "learning_rate": 4.795068670920299e-06, "loss": 3.5861, "step": 2217 }, { "epoch": 0.03, "grad_norm": 9.10824966430664, "learning_rate": 4.797231534551747e-06, "loss": 3.3618, "step": 2218 }, { "epoch": 0.03, "grad_norm": 10.567831039428711, "learning_rate": 4.799394398183195e-06, "loss": 2.9043, "step": 2219 }, { "epoch": 0.03, "grad_norm": 10.74822998046875, "learning_rate": 4.801557261814643e-06, "loss": 3.8552, "step": 2220 }, { "epoch": 0.03, "grad_norm": 10.174187660217285, "learning_rate": 4.803720125446091e-06, "loss": 3.7979, "step": 2221 }, { "epoch": 0.03, "grad_norm": 9.706856727600098, "learning_rate": 4.805882989077539e-06, "loss": 3.3586, "step": 2222 }, { "epoch": 0.03, "grad_norm": 9.958740234375, "learning_rate": 4.808045852708987e-06, "loss": 3.7088, "step": 2223 }, { "epoch": 0.03, "grad_norm": 10.16448974609375, "learning_rate": 4.810208716340435e-06, "loss": 4.1486, "step": 2224 }, { "epoch": 0.03, "grad_norm": 11.2947998046875, "learning_rate": 4.812371579971883e-06, "loss": 3.2032, "step": 2225 }, { "epoch": 0.03, "grad_norm": 11.366471290588379, "learning_rate": 4.814534443603331e-06, "loss": 3.5684, "step": 2226 }, { "epoch": 0.03, "grad_norm": 9.2433443069458, "learning_rate": 4.816697307234779e-06, "loss": 3.1921, "step": 2227 }, { "epoch": 0.03, "grad_norm": 9.672091484069824, "learning_rate": 4.8188601708662274e-06, "loss": 3.7856, "step": 2228 }, { "epoch": 0.03, "grad_norm": 9.621084213256836, "learning_rate": 4.8210230344976755e-06, "loss": 3.6863, "step": 2229 }, { "epoch": 0.03, "grad_norm": 12.227804183959961, "learning_rate": 4.8231858981291235e-06, "loss": 3.8817, "step": 2230 }, { "epoch": 0.03, "grad_norm": 9.3853178024292, "learning_rate": 4.825348761760572e-06, "loss": 3.6359, "step": 2231 }, { "epoch": 0.03, "grad_norm": 11.584009170532227, "learning_rate": 4.82751162539202e-06, "loss": 3.8383, "step": 2232 }, { "epoch": 0.03, "grad_norm": 10.254977226257324, "learning_rate": 4.829674489023468e-06, "loss": 3.5761, "step": 2233 }, { "epoch": 0.03, "grad_norm": 10.59468936920166, "learning_rate": 4.831837352654916e-06, "loss": 3.588, "step": 2234 }, { "epoch": 0.03, "grad_norm": 10.359749794006348, "learning_rate": 4.834000216286364e-06, "loss": 3.764, "step": 2235 }, { "epoch": 0.03, "grad_norm": 11.217177391052246, "learning_rate": 4.836163079917812e-06, "loss": 4.0638, "step": 2236 }, { "epoch": 0.03, "grad_norm": 10.482766151428223, "learning_rate": 4.83832594354926e-06, "loss": 3.2564, "step": 2237 }, { "epoch": 0.03, "grad_norm": 8.884332656860352, "learning_rate": 4.840488807180708e-06, "loss": 3.3847, "step": 2238 }, { "epoch": 0.03, "grad_norm": 10.886794090270996, "learning_rate": 4.842651670812156e-06, "loss": 3.7222, "step": 2239 }, { "epoch": 0.03, "grad_norm": 11.70871639251709, "learning_rate": 4.844814534443604e-06, "loss": 3.9095, "step": 2240 }, { "epoch": 0.03, "grad_norm": 10.15091609954834, "learning_rate": 4.846977398075052e-06, "loss": 3.9114, "step": 2241 }, { "epoch": 0.03, "grad_norm": 9.771370887756348, "learning_rate": 4.8491402617065e-06, "loss": 3.6162, "step": 2242 }, { "epoch": 0.03, "grad_norm": 9.78471851348877, "learning_rate": 4.851303125337948e-06, "loss": 3.6031, "step": 2243 }, { "epoch": 0.03, "grad_norm": 9.937443733215332, "learning_rate": 4.853465988969395e-06, "loss": 3.4395, "step": 2244 }, { "epoch": 0.03, "grad_norm": 10.363429069519043, "learning_rate": 4.8556288526008434e-06, "loss": 3.6046, "step": 2245 }, { "epoch": 0.03, "grad_norm": 10.446381568908691, "learning_rate": 4.8577917162322915e-06, "loss": 3.3801, "step": 2246 }, { "epoch": 0.03, "grad_norm": 10.733307838439941, "learning_rate": 4.8599545798637395e-06, "loss": 3.133, "step": 2247 }, { "epoch": 0.03, "grad_norm": 10.469406127929688, "learning_rate": 4.862117443495188e-06, "loss": 3.5201, "step": 2248 }, { "epoch": 0.03, "grad_norm": 10.07247543334961, "learning_rate": 4.864280307126636e-06, "loss": 3.4819, "step": 2249 }, { "epoch": 0.03, "grad_norm": 10.80955696105957, "learning_rate": 4.866443170758084e-06, "loss": 4.0289, "step": 2250 }, { "epoch": 0.03, "grad_norm": 10.474531173706055, "learning_rate": 4.868606034389532e-06, "loss": 3.892, "step": 2251 }, { "epoch": 0.03, "grad_norm": 11.148468017578125, "learning_rate": 4.87076889802098e-06, "loss": 3.7555, "step": 2252 }, { "epoch": 0.03, "grad_norm": 10.846901893615723, "learning_rate": 4.872931761652428e-06, "loss": 3.5601, "step": 2253 }, { "epoch": 0.03, "grad_norm": 11.18775463104248, "learning_rate": 4.875094625283876e-06, "loss": 3.5224, "step": 2254 }, { "epoch": 0.03, "grad_norm": 10.735895156860352, "learning_rate": 4.877257488915324e-06, "loss": 3.1322, "step": 2255 }, { "epoch": 0.03, "grad_norm": 10.768619537353516, "learning_rate": 4.879420352546773e-06, "loss": 3.4113, "step": 2256 }, { "epoch": 0.03, "grad_norm": 10.6668119430542, "learning_rate": 4.881583216178221e-06, "loss": 3.4057, "step": 2257 }, { "epoch": 0.03, "grad_norm": 9.805294036865234, "learning_rate": 4.883746079809669e-06, "loss": 3.7415, "step": 2258 }, { "epoch": 0.03, "grad_norm": 10.108779907226562, "learning_rate": 4.885908943441117e-06, "loss": 3.3483, "step": 2259 }, { "epoch": 0.03, "grad_norm": 10.784353256225586, "learning_rate": 4.888071807072565e-06, "loss": 3.5882, "step": 2260 }, { "epoch": 0.03, "grad_norm": 12.226192474365234, "learning_rate": 4.890234670704013e-06, "loss": 3.2, "step": 2261 }, { "epoch": 0.03, "grad_norm": 10.606486320495605, "learning_rate": 4.89239753433546e-06, "loss": 3.4355, "step": 2262 }, { "epoch": 0.03, "grad_norm": 8.365019798278809, "learning_rate": 4.894560397966908e-06, "loss": 3.6667, "step": 2263 }, { "epoch": 0.03, "grad_norm": 10.150257110595703, "learning_rate": 4.896723261598356e-06, "loss": 3.588, "step": 2264 }, { "epoch": 0.03, "grad_norm": 11.084391593933105, "learning_rate": 4.8988861252298045e-06, "loss": 4.1398, "step": 2265 }, { "epoch": 0.03, "grad_norm": 11.57339859008789, "learning_rate": 4.9010489888612525e-06, "loss": 3.5871, "step": 2266 }, { "epoch": 0.03, "grad_norm": 9.947009086608887, "learning_rate": 4.9032118524927006e-06, "loss": 4.0049, "step": 2267 }, { "epoch": 0.03, "grad_norm": 10.398591041564941, "learning_rate": 4.905374716124149e-06, "loss": 3.5908, "step": 2268 }, { "epoch": 0.03, "grad_norm": 10.44643783569336, "learning_rate": 4.907537579755597e-06, "loss": 3.3827, "step": 2269 }, { "epoch": 0.03, "grad_norm": 10.745467185974121, "learning_rate": 4.909700443387045e-06, "loss": 3.7275, "step": 2270 }, { "epoch": 0.03, "grad_norm": 9.744597434997559, "learning_rate": 4.911863307018493e-06, "loss": 2.9106, "step": 2271 }, { "epoch": 0.03, "grad_norm": 13.630125045776367, "learning_rate": 4.914026170649941e-06, "loss": 3.5111, "step": 2272 }, { "epoch": 0.03, "grad_norm": 10.47886848449707, "learning_rate": 4.916189034281389e-06, "loss": 3.1587, "step": 2273 }, { "epoch": 0.03, "grad_norm": 10.66961669921875, "learning_rate": 4.918351897912837e-06, "loss": 4.1143, "step": 2274 }, { "epoch": 0.03, "grad_norm": 11.097207069396973, "learning_rate": 4.920514761544285e-06, "loss": 3.7752, "step": 2275 }, { "epoch": 0.03, "grad_norm": 9.971612930297852, "learning_rate": 4.922677625175733e-06, "loss": 3.7804, "step": 2276 }, { "epoch": 0.03, "grad_norm": 9.961043357849121, "learning_rate": 4.924840488807181e-06, "loss": 3.4146, "step": 2277 }, { "epoch": 0.03, "grad_norm": 9.925165176391602, "learning_rate": 4.927003352438629e-06, "loss": 3.2958, "step": 2278 }, { "epoch": 0.03, "grad_norm": 10.589534759521484, "learning_rate": 4.929166216070077e-06, "loss": 3.7281, "step": 2279 }, { "epoch": 0.03, "grad_norm": 10.429206848144531, "learning_rate": 4.931329079701525e-06, "loss": 3.6606, "step": 2280 }, { "epoch": 0.03, "grad_norm": 10.678339958190918, "learning_rate": 4.933491943332973e-06, "loss": 3.0599, "step": 2281 }, { "epoch": 0.03, "grad_norm": 9.078506469726562, "learning_rate": 4.935654806964421e-06, "loss": 3.9145, "step": 2282 }, { "epoch": 0.03, "grad_norm": 10.365097999572754, "learning_rate": 4.937817670595869e-06, "loss": 3.3388, "step": 2283 }, { "epoch": 0.03, "grad_norm": 10.960603713989258, "learning_rate": 4.9399805342273174e-06, "loss": 3.8438, "step": 2284 }, { "epoch": 0.03, "grad_norm": 10.32898998260498, "learning_rate": 4.9421433978587655e-06, "loss": 3.237, "step": 2285 }, { "epoch": 0.03, "grad_norm": 10.98889446258545, "learning_rate": 4.9443062614902135e-06, "loss": 3.1332, "step": 2286 }, { "epoch": 0.03, "grad_norm": 10.745086669921875, "learning_rate": 4.9464691251216616e-06, "loss": 3.4416, "step": 2287 }, { "epoch": 0.03, "grad_norm": 10.202439308166504, "learning_rate": 4.94863198875311e-06, "loss": 3.319, "step": 2288 }, { "epoch": 0.03, "grad_norm": 10.093863487243652, "learning_rate": 4.950794852384558e-06, "loss": 3.6295, "step": 2289 }, { "epoch": 0.03, "grad_norm": 10.53038501739502, "learning_rate": 4.952957716016006e-06, "loss": 4.0315, "step": 2290 }, { "epoch": 0.03, "grad_norm": 10.870161056518555, "learning_rate": 4.955120579647454e-06, "loss": 3.0707, "step": 2291 }, { "epoch": 0.03, "grad_norm": 10.089540481567383, "learning_rate": 4.957283443278902e-06, "loss": 3.4785, "step": 2292 }, { "epoch": 0.03, "grad_norm": 9.671164512634277, "learning_rate": 4.95944630691035e-06, "loss": 3.3973, "step": 2293 }, { "epoch": 0.03, "grad_norm": 11.110754013061523, "learning_rate": 4.961609170541798e-06, "loss": 3.6761, "step": 2294 }, { "epoch": 0.03, "grad_norm": 10.276313781738281, "learning_rate": 4.963772034173246e-06, "loss": 3.9836, "step": 2295 }, { "epoch": 0.03, "grad_norm": 9.701604843139648, "learning_rate": 4.965934897804694e-06, "loss": 3.4613, "step": 2296 }, { "epoch": 0.03, "grad_norm": 10.5152587890625, "learning_rate": 4.968097761436142e-06, "loss": 3.5785, "step": 2297 }, { "epoch": 0.03, "grad_norm": 11.874701499938965, "learning_rate": 4.97026062506759e-06, "loss": 3.6249, "step": 2298 }, { "epoch": 0.03, "grad_norm": 10.26459789276123, "learning_rate": 4.972423488699038e-06, "loss": 3.6894, "step": 2299 }, { "epoch": 0.03, "grad_norm": 10.564356803894043, "learning_rate": 4.974586352330486e-06, "loss": 3.8185, "step": 2300 }, { "epoch": 0.03, "grad_norm": 10.474166870117188, "learning_rate": 4.976749215961934e-06, "loss": 3.758, "step": 2301 }, { "epoch": 0.03, "grad_norm": 11.797767639160156, "learning_rate": 4.978912079593382e-06, "loss": 3.497, "step": 2302 }, { "epoch": 0.03, "grad_norm": 10.241287231445312, "learning_rate": 4.98107494322483e-06, "loss": 4.012, "step": 2303 }, { "epoch": 0.03, "grad_norm": 10.567071914672852, "learning_rate": 4.9832378068562784e-06, "loss": 3.3888, "step": 2304 }, { "epoch": 0.03, "grad_norm": 9.9674072265625, "learning_rate": 4.9854006704877265e-06, "loss": 3.4981, "step": 2305 }, { "epoch": 0.03, "grad_norm": 8.604543685913086, "learning_rate": 4.9875635341191745e-06, "loss": 3.6228, "step": 2306 }, { "epoch": 0.03, "grad_norm": 9.956422805786133, "learning_rate": 4.989726397750623e-06, "loss": 3.4812, "step": 2307 }, { "epoch": 0.03, "grad_norm": 10.224922180175781, "learning_rate": 4.99188926138207e-06, "loss": 3.4212, "step": 2308 }, { "epoch": 0.03, "grad_norm": 9.485383987426758, "learning_rate": 4.994052125013518e-06, "loss": 3.8829, "step": 2309 }, { "epoch": 0.03, "grad_norm": 10.44140625, "learning_rate": 4.996214988644966e-06, "loss": 3.5269, "step": 2310 }, { "epoch": 0.03, "grad_norm": 11.018485069274902, "learning_rate": 4.998377852276414e-06, "loss": 4.0219, "step": 2311 }, { "epoch": 0.03, "grad_norm": 11.190449714660645, "learning_rate": 5.000540715907862e-06, "loss": 3.9689, "step": 2312 }, { "epoch": 0.03, "grad_norm": 9.724618911743164, "learning_rate": 5.002703579539311e-06, "loss": 3.112, "step": 2313 }, { "epoch": 0.03, "grad_norm": 11.648843765258789, "learning_rate": 5.004866443170758e-06, "loss": 4.0188, "step": 2314 }, { "epoch": 0.03, "grad_norm": 10.117560386657715, "learning_rate": 5.007029306802207e-06, "loss": 3.1279, "step": 2315 }, { "epoch": 0.03, "grad_norm": 9.989973068237305, "learning_rate": 5.009192170433654e-06, "loss": 2.9585, "step": 2316 }, { "epoch": 0.03, "grad_norm": 9.6884126663208, "learning_rate": 5.011355034065103e-06, "loss": 3.0608, "step": 2317 }, { "epoch": 0.03, "grad_norm": 9.90864086151123, "learning_rate": 5.01351789769655e-06, "loss": 3.617, "step": 2318 }, { "epoch": 0.03, "grad_norm": 10.796957969665527, "learning_rate": 5.015680761327999e-06, "loss": 3.4031, "step": 2319 }, { "epoch": 0.03, "grad_norm": 9.917610168457031, "learning_rate": 5.017843624959446e-06, "loss": 3.5002, "step": 2320 }, { "epoch": 0.03, "grad_norm": 9.384073257446289, "learning_rate": 5.020006488590895e-06, "loss": 3.2415, "step": 2321 }, { "epoch": 0.03, "grad_norm": 10.89614486694336, "learning_rate": 5.0221693522223425e-06, "loss": 3.4807, "step": 2322 }, { "epoch": 0.03, "grad_norm": 10.766473770141602, "learning_rate": 5.024332215853791e-06, "loss": 2.961, "step": 2323 }, { "epoch": 0.03, "grad_norm": 11.659951210021973, "learning_rate": 5.026495079485239e-06, "loss": 3.548, "step": 2324 }, { "epoch": 0.03, "grad_norm": 10.444595336914062, "learning_rate": 5.0286579431166875e-06, "loss": 3.1639, "step": 2325 }, { "epoch": 0.03, "grad_norm": 10.598130226135254, "learning_rate": 5.030820806748135e-06, "loss": 3.5092, "step": 2326 }, { "epoch": 0.03, "grad_norm": 11.436517715454102, "learning_rate": 5.032983670379583e-06, "loss": 3.1882, "step": 2327 }, { "epoch": 0.03, "grad_norm": 11.57144546508789, "learning_rate": 5.035146534011031e-06, "loss": 3.2149, "step": 2328 }, { "epoch": 0.03, "grad_norm": 9.353530883789062, "learning_rate": 5.037309397642479e-06, "loss": 3.0704, "step": 2329 }, { "epoch": 0.03, "grad_norm": 9.927092552185059, "learning_rate": 5.039472261273927e-06, "loss": 3.4749, "step": 2330 }, { "epoch": 0.03, "grad_norm": 10.711310386657715, "learning_rate": 5.041635124905375e-06, "loss": 3.3588, "step": 2331 }, { "epoch": 0.03, "grad_norm": 9.901175498962402, "learning_rate": 5.043797988536823e-06, "loss": 3.7271, "step": 2332 }, { "epoch": 0.03, "grad_norm": 10.519326210021973, "learning_rate": 5.045960852168271e-06, "loss": 3.6831, "step": 2333 }, { "epoch": 0.03, "grad_norm": 10.559355735778809, "learning_rate": 5.048123715799719e-06, "loss": 3.1155, "step": 2334 }, { "epoch": 0.03, "grad_norm": 10.85379409790039, "learning_rate": 5.050286579431167e-06, "loss": 3.0923, "step": 2335 }, { "epoch": 0.03, "grad_norm": 9.602747917175293, "learning_rate": 5.052449443062616e-06, "loss": 3.4248, "step": 2336 }, { "epoch": 0.03, "grad_norm": 9.949142456054688, "learning_rate": 5.054612306694063e-06, "loss": 3.2372, "step": 2337 }, { "epoch": 0.03, "grad_norm": 10.01089859008789, "learning_rate": 5.056775170325512e-06, "loss": 3.5642, "step": 2338 }, { "epoch": 0.03, "grad_norm": 9.77535343170166, "learning_rate": 5.058938033956959e-06, "loss": 3.9191, "step": 2339 }, { "epoch": 0.03, "grad_norm": 9.803423881530762, "learning_rate": 5.061100897588408e-06, "loss": 3.5813, "step": 2340 }, { "epoch": 0.03, "grad_norm": 12.270284652709961, "learning_rate": 5.0632637612198555e-06, "loss": 3.6903, "step": 2341 }, { "epoch": 0.03, "grad_norm": 11.104806900024414, "learning_rate": 5.065426624851304e-06, "loss": 3.8193, "step": 2342 }, { "epoch": 0.03, "grad_norm": 10.499983787536621, "learning_rate": 5.0675894884827516e-06, "loss": 4.1959, "step": 2343 }, { "epoch": 0.03, "grad_norm": 10.176310539245605, "learning_rate": 5.0697523521142e-06, "loss": 3.5659, "step": 2344 }, { "epoch": 0.03, "grad_norm": 9.69515609741211, "learning_rate": 5.071915215745648e-06, "loss": 3.5684, "step": 2345 }, { "epoch": 0.03, "grad_norm": 11.347491264343262, "learning_rate": 5.074078079377096e-06, "loss": 3.5295, "step": 2346 }, { "epoch": 0.03, "grad_norm": 9.563339233398438, "learning_rate": 5.076240943008544e-06, "loss": 3.485, "step": 2347 }, { "epoch": 0.03, "grad_norm": 9.183595657348633, "learning_rate": 5.078403806639992e-06, "loss": 3.5073, "step": 2348 }, { "epoch": 0.03, "grad_norm": 9.62350845336914, "learning_rate": 5.08056667027144e-06, "loss": 3.4075, "step": 2349 }, { "epoch": 0.03, "grad_norm": 11.70915699005127, "learning_rate": 5.082729533902888e-06, "loss": 3.9061, "step": 2350 }, { "epoch": 0.03, "grad_norm": 10.831521034240723, "learning_rate": 5.084892397534336e-06, "loss": 3.8069, "step": 2351 }, { "epoch": 0.03, "grad_norm": 9.567941665649414, "learning_rate": 5.087055261165784e-06, "loss": 3.4305, "step": 2352 }, { "epoch": 0.03, "grad_norm": 10.07416820526123, "learning_rate": 5.089218124797231e-06, "loss": 3.7582, "step": 2353 }, { "epoch": 0.03, "grad_norm": 10.532370567321777, "learning_rate": 5.09138098842868e-06, "loss": 3.2983, "step": 2354 }, { "epoch": 0.03, "grad_norm": 10.573648452758789, "learning_rate": 5.093543852060127e-06, "loss": 3.627, "step": 2355 }, { "epoch": 0.03, "grad_norm": 8.818343162536621, "learning_rate": 5.095706715691576e-06, "loss": 3.5691, "step": 2356 }, { "epoch": 0.03, "grad_norm": 10.489734649658203, "learning_rate": 5.0978695793230234e-06, "loss": 3.1862, "step": 2357 }, { "epoch": 0.03, "grad_norm": 9.986918449401855, "learning_rate": 5.100032442954472e-06, "loss": 3.2043, "step": 2358 }, { "epoch": 0.03, "grad_norm": 9.560302734375, "learning_rate": 5.1021953065859195e-06, "loss": 3.5908, "step": 2359 }, { "epoch": 0.03, "grad_norm": 10.013303756713867, "learning_rate": 5.1043581702173684e-06, "loss": 2.6742, "step": 2360 }, { "epoch": 0.03, "grad_norm": 11.290436744689941, "learning_rate": 5.106521033848816e-06, "loss": 4.0365, "step": 2361 }, { "epoch": 0.03, "grad_norm": 9.365814208984375, "learning_rate": 5.1086838974802645e-06, "loss": 3.1893, "step": 2362 }, { "epoch": 0.03, "grad_norm": 9.429636001586914, "learning_rate": 5.110846761111712e-06, "loss": 3.2619, "step": 2363 }, { "epoch": 0.03, "grad_norm": 9.724535942077637, "learning_rate": 5.113009624743161e-06, "loss": 3.309, "step": 2364 }, { "epoch": 0.03, "grad_norm": 11.278449058532715, "learning_rate": 5.115172488374608e-06, "loss": 4.0158, "step": 2365 }, { "epoch": 0.03, "grad_norm": 10.789223670959473, "learning_rate": 5.117335352006057e-06, "loss": 3.8097, "step": 2366 }, { "epoch": 0.03, "grad_norm": 10.008976936340332, "learning_rate": 5.119498215637504e-06, "loss": 3.5125, "step": 2367 }, { "epoch": 0.03, "grad_norm": 11.58131217956543, "learning_rate": 5.121661079268953e-06, "loss": 3.7533, "step": 2368 }, { "epoch": 0.03, "grad_norm": 11.051271438598633, "learning_rate": 5.123823942900401e-06, "loss": 3.953, "step": 2369 }, { "epoch": 0.03, "grad_norm": 9.998431205749512, "learning_rate": 5.125986806531849e-06, "loss": 3.2584, "step": 2370 }, { "epoch": 0.03, "grad_norm": 10.223349571228027, "learning_rate": 5.128149670163297e-06, "loss": 3.1104, "step": 2371 }, { "epoch": 0.03, "grad_norm": 9.123248100280762, "learning_rate": 5.130312533794744e-06, "loss": 3.278, "step": 2372 }, { "epoch": 0.03, "grad_norm": 11.174159049987793, "learning_rate": 5.132475397426193e-06, "loss": 3.748, "step": 2373 }, { "epoch": 0.03, "grad_norm": 10.922688484191895, "learning_rate": 5.13463826105764e-06, "loss": 3.5309, "step": 2374 }, { "epoch": 0.03, "grad_norm": 9.843914031982422, "learning_rate": 5.136801124689089e-06, "loss": 3.1277, "step": 2375 }, { "epoch": 0.03, "grad_norm": 9.795907020568848, "learning_rate": 5.138963988320536e-06, "loss": 3.4352, "step": 2376 }, { "epoch": 0.03, "grad_norm": 9.182475090026855, "learning_rate": 5.141126851951985e-06, "loss": 3.4915, "step": 2377 }, { "epoch": 0.03, "grad_norm": 10.00989818572998, "learning_rate": 5.1432897155834325e-06, "loss": 3.9135, "step": 2378 }, { "epoch": 0.03, "grad_norm": 10.620205879211426, "learning_rate": 5.145452579214881e-06, "loss": 3.3997, "step": 2379 }, { "epoch": 0.03, "grad_norm": 9.570730209350586, "learning_rate": 5.147615442846329e-06, "loss": 3.3684, "step": 2380 }, { "epoch": 0.03, "grad_norm": 9.500993728637695, "learning_rate": 5.1497783064777775e-06, "loss": 3.8137, "step": 2381 }, { "epoch": 0.03, "grad_norm": 11.142189025878906, "learning_rate": 5.151941170109225e-06, "loss": 3.5075, "step": 2382 }, { "epoch": 0.03, "grad_norm": 10.220403671264648, "learning_rate": 5.154104033740674e-06, "loss": 3.2855, "step": 2383 }, { "epoch": 0.03, "grad_norm": 8.827445983886719, "learning_rate": 5.156266897372121e-06, "loss": 3.1694, "step": 2384 }, { "epoch": 0.03, "grad_norm": 10.326375961303711, "learning_rate": 5.15842976100357e-06, "loss": 3.3067, "step": 2385 }, { "epoch": 0.03, "grad_norm": 10.139094352722168, "learning_rate": 5.160592624635017e-06, "loss": 2.9678, "step": 2386 }, { "epoch": 0.03, "grad_norm": 11.449438095092773, "learning_rate": 5.162755488266466e-06, "loss": 3.1657, "step": 2387 }, { "epoch": 0.03, "grad_norm": 10.066411972045898, "learning_rate": 5.164918351897913e-06, "loss": 3.5435, "step": 2388 }, { "epoch": 0.03, "grad_norm": 10.795153617858887, "learning_rate": 5.167081215529362e-06, "loss": 4.0095, "step": 2389 }, { "epoch": 0.03, "grad_norm": 10.581942558288574, "learning_rate": 5.169244079160809e-06, "loss": 3.709, "step": 2390 }, { "epoch": 0.03, "grad_norm": 8.625871658325195, "learning_rate": 5.171406942792257e-06, "loss": 3.2579, "step": 2391 }, { "epoch": 0.03, "grad_norm": 10.720425605773926, "learning_rate": 5.173569806423705e-06, "loss": 4.0738, "step": 2392 }, { "epoch": 0.03, "grad_norm": 10.152162551879883, "learning_rate": 5.175732670055153e-06, "loss": 3.3652, "step": 2393 }, { "epoch": 0.03, "grad_norm": 9.267952919006348, "learning_rate": 5.177895533686601e-06, "loss": 3.2152, "step": 2394 }, { "epoch": 0.03, "grad_norm": 9.587748527526855, "learning_rate": 5.180058397318049e-06, "loss": 3.3485, "step": 2395 }, { "epoch": 0.03, "grad_norm": 10.049571990966797, "learning_rate": 5.182221260949497e-06, "loss": 3.0831, "step": 2396 }, { "epoch": 0.03, "grad_norm": 11.798489570617676, "learning_rate": 5.1843841245809455e-06, "loss": 3.0076, "step": 2397 }, { "epoch": 0.03, "grad_norm": 9.649774551391602, "learning_rate": 5.1865469882123935e-06, "loss": 3.4488, "step": 2398 }, { "epoch": 0.03, "grad_norm": 10.13018798828125, "learning_rate": 5.1887098518438416e-06, "loss": 3.5977, "step": 2399 }, { "epoch": 0.03, "grad_norm": 10.219520568847656, "learning_rate": 5.1908727154752905e-06, "loss": 3.2312, "step": 2400 }, { "epoch": 0.03, "grad_norm": 9.471500396728516, "learning_rate": 5.193035579106738e-06, "loss": 3.6297, "step": 2401 }, { "epoch": 0.03, "grad_norm": 10.138280868530273, "learning_rate": 5.1951984427381866e-06, "loss": 3.2872, "step": 2402 }, { "epoch": 0.03, "grad_norm": 9.499151229858398, "learning_rate": 5.197361306369634e-06, "loss": 3.3813, "step": 2403 }, { "epoch": 0.03, "grad_norm": 9.142888069152832, "learning_rate": 5.199524170001083e-06, "loss": 3.1019, "step": 2404 }, { "epoch": 0.03, "grad_norm": 10.389525413513184, "learning_rate": 5.20168703363253e-06, "loss": 2.938, "step": 2405 }, { "epoch": 0.03, "grad_norm": 9.542248725891113, "learning_rate": 5.203849897263979e-06, "loss": 3.1392, "step": 2406 }, { "epoch": 0.03, "grad_norm": 9.872297286987305, "learning_rate": 5.206012760895426e-06, "loss": 3.1957, "step": 2407 }, { "epoch": 0.03, "grad_norm": 9.014805793762207, "learning_rate": 5.208175624526874e-06, "loss": 3.1427, "step": 2408 }, { "epoch": 0.03, "grad_norm": 10.22071647644043, "learning_rate": 5.210338488158322e-06, "loss": 3.6128, "step": 2409 }, { "epoch": 0.03, "grad_norm": 9.602249145507812, "learning_rate": 5.21250135178977e-06, "loss": 3.3916, "step": 2410 }, { "epoch": 0.03, "grad_norm": 11.848949432373047, "learning_rate": 5.214664215421218e-06, "loss": 4.4271, "step": 2411 }, { "epoch": 0.03, "grad_norm": 10.114156723022461, "learning_rate": 5.216827079052666e-06, "loss": 3.091, "step": 2412 }, { "epoch": 0.03, "grad_norm": 9.334857940673828, "learning_rate": 5.218989942684114e-06, "loss": 3.6762, "step": 2413 }, { "epoch": 0.03, "grad_norm": 9.71192455291748, "learning_rate": 5.221152806315562e-06, "loss": 3.3967, "step": 2414 }, { "epoch": 0.03, "grad_norm": 8.480841636657715, "learning_rate": 5.22331566994701e-06, "loss": 3.65, "step": 2415 }, { "epoch": 0.03, "grad_norm": 9.30120849609375, "learning_rate": 5.2254785335784584e-06, "loss": 2.8652, "step": 2416 }, { "epoch": 0.03, "grad_norm": 10.086862564086914, "learning_rate": 5.227641397209906e-06, "loss": 3.7285, "step": 2417 }, { "epoch": 0.03, "grad_norm": 10.688691139221191, "learning_rate": 5.2298042608413545e-06, "loss": 3.7008, "step": 2418 }, { "epoch": 0.03, "grad_norm": 8.96363353729248, "learning_rate": 5.231967124472802e-06, "loss": 3.1632, "step": 2419 }, { "epoch": 0.03, "grad_norm": 10.673206329345703, "learning_rate": 5.234129988104251e-06, "loss": 3.4449, "step": 2420 }, { "epoch": 0.03, "grad_norm": 8.941519737243652, "learning_rate": 5.236292851735698e-06, "loss": 3.738, "step": 2421 }, { "epoch": 0.03, "grad_norm": 9.666254997253418, "learning_rate": 5.238455715367147e-06, "loss": 4.13, "step": 2422 }, { "epoch": 0.03, "grad_norm": 10.563885688781738, "learning_rate": 5.240618578998594e-06, "loss": 3.6551, "step": 2423 }, { "epoch": 0.03, "grad_norm": 10.059505462646484, "learning_rate": 5.242781442630043e-06, "loss": 3.3871, "step": 2424 }, { "epoch": 0.03, "grad_norm": 9.342314720153809, "learning_rate": 5.24494430626149e-06, "loss": 3.6464, "step": 2425 }, { "epoch": 0.03, "grad_norm": 9.71767520904541, "learning_rate": 5.247107169892939e-06, "loss": 3.3168, "step": 2426 }, { "epoch": 0.03, "grad_norm": 9.519871711730957, "learning_rate": 5.249270033524386e-06, "loss": 3.2246, "step": 2427 }, { "epoch": 0.03, "grad_norm": 9.105879783630371, "learning_rate": 5.251432897155835e-06, "loss": 3.6923, "step": 2428 }, { "epoch": 0.03, "grad_norm": 11.81429386138916, "learning_rate": 5.253595760787282e-06, "loss": 3.5719, "step": 2429 }, { "epoch": 0.03, "grad_norm": 9.574026107788086, "learning_rate": 5.255758624418731e-06, "loss": 3.5062, "step": 2430 }, { "epoch": 0.03, "grad_norm": 9.882670402526855, "learning_rate": 5.257921488050178e-06, "loss": 3.6835, "step": 2431 }, { "epoch": 0.03, "grad_norm": 8.988462448120117, "learning_rate": 5.260084351681627e-06, "loss": 3.0709, "step": 2432 }, { "epoch": 0.03, "grad_norm": 10.646296501159668, "learning_rate": 5.262247215313075e-06, "loss": 3.3046, "step": 2433 }, { "epoch": 0.03, "grad_norm": 10.06844425201416, "learning_rate": 5.264410078944523e-06, "loss": 3.1239, "step": 2434 }, { "epoch": 0.03, "grad_norm": 10.534599304199219, "learning_rate": 5.266572942575971e-06, "loss": 3.0725, "step": 2435 }, { "epoch": 0.03, "grad_norm": 10.006475448608398, "learning_rate": 5.268735806207419e-06, "loss": 3.1772, "step": 2436 }, { "epoch": 0.03, "grad_norm": 9.720237731933594, "learning_rate": 5.2708986698388675e-06, "loss": 3.5736, "step": 2437 }, { "epoch": 0.03, "grad_norm": 10.577610969543457, "learning_rate": 5.273061533470315e-06, "loss": 3.4712, "step": 2438 }, { "epoch": 0.03, "grad_norm": 9.449801445007324, "learning_rate": 5.275224397101764e-06, "loss": 3.0852, "step": 2439 }, { "epoch": 0.03, "grad_norm": 10.200963973999023, "learning_rate": 5.277387260733211e-06, "loss": 3.3644, "step": 2440 }, { "epoch": 0.03, "grad_norm": 10.003446578979492, "learning_rate": 5.27955012436466e-06, "loss": 3.7641, "step": 2441 }, { "epoch": 0.03, "grad_norm": 10.940777778625488, "learning_rate": 5.281712987996107e-06, "loss": 3.6191, "step": 2442 }, { "epoch": 0.03, "grad_norm": 10.81196117401123, "learning_rate": 5.283875851627556e-06, "loss": 4.017, "step": 2443 }, { "epoch": 0.03, "grad_norm": 10.764452934265137, "learning_rate": 5.286038715259003e-06, "loss": 3.1666, "step": 2444 }, { "epoch": 0.03, "grad_norm": 10.436234474182129, "learning_rate": 5.288201578890452e-06, "loss": 3.0322, "step": 2445 }, { "epoch": 0.03, "grad_norm": 11.170089721679688, "learning_rate": 5.290364442521899e-06, "loss": 3.1112, "step": 2446 }, { "epoch": 0.03, "grad_norm": 10.427729606628418, "learning_rate": 5.292527306153348e-06, "loss": 3.2058, "step": 2447 }, { "epoch": 0.03, "grad_norm": 10.834566116333008, "learning_rate": 5.294690169784795e-06, "loss": 3.2114, "step": 2448 }, { "epoch": 0.03, "grad_norm": 9.840810775756836, "learning_rate": 5.296853033416244e-06, "loss": 3.4523, "step": 2449 }, { "epoch": 0.03, "grad_norm": 9.573266983032227, "learning_rate": 5.299015897047691e-06, "loss": 3.8706, "step": 2450 }, { "epoch": 0.03, "grad_norm": 9.912405014038086, "learning_rate": 5.30117876067914e-06, "loss": 3.6886, "step": 2451 }, { "epoch": 0.03, "grad_norm": 12.646525382995605, "learning_rate": 5.303341624310587e-06, "loss": 3.437, "step": 2452 }, { "epoch": 0.03, "grad_norm": 10.628599166870117, "learning_rate": 5.3055044879420355e-06, "loss": 3.5444, "step": 2453 }, { "epoch": 0.03, "grad_norm": 9.286365509033203, "learning_rate": 5.3076673515734835e-06, "loss": 3.5148, "step": 2454 }, { "epoch": 0.03, "grad_norm": 9.35421371459961, "learning_rate": 5.3098302152049316e-06, "loss": 2.9984, "step": 2455 }, { "epoch": 0.03, "grad_norm": 10.014337539672852, "learning_rate": 5.31199307883638e-06, "loss": 3.3268, "step": 2456 }, { "epoch": 0.03, "grad_norm": 11.653520584106445, "learning_rate": 5.314155942467828e-06, "loss": 3.3604, "step": 2457 }, { "epoch": 0.03, "grad_norm": 10.629752159118652, "learning_rate": 5.316318806099276e-06, "loss": 3.4592, "step": 2458 }, { "epoch": 0.03, "grad_norm": 10.142827987670898, "learning_rate": 5.318481669730724e-06, "loss": 3.5077, "step": 2459 }, { "epoch": 0.03, "grad_norm": 9.773791313171387, "learning_rate": 5.320644533362172e-06, "loss": 3.7131, "step": 2460 }, { "epoch": 0.03, "grad_norm": 11.621971130371094, "learning_rate": 5.32280739699362e-06, "loss": 3.3991, "step": 2461 }, { "epoch": 0.03, "grad_norm": 11.055521965026855, "learning_rate": 5.324970260625068e-06, "loss": 3.1361, "step": 2462 }, { "epoch": 0.03, "grad_norm": 10.826032638549805, "learning_rate": 5.327133124256516e-06, "loss": 3.4997, "step": 2463 }, { "epoch": 0.03, "grad_norm": 8.97091007232666, "learning_rate": 5.329295987887965e-06, "loss": 3.3446, "step": 2464 }, { "epoch": 0.03, "grad_norm": 9.686339378356934, "learning_rate": 5.331458851519412e-06, "loss": 3.1384, "step": 2465 }, { "epoch": 0.03, "grad_norm": 10.253825187683105, "learning_rate": 5.333621715150861e-06, "loss": 3.4054, "step": 2466 }, { "epoch": 0.03, "grad_norm": 9.492862701416016, "learning_rate": 5.335784578782308e-06, "loss": 3.6588, "step": 2467 }, { "epoch": 0.03, "grad_norm": 10.510638236999512, "learning_rate": 5.337947442413757e-06, "loss": 3.2325, "step": 2468 }, { "epoch": 0.03, "grad_norm": 10.036673545837402, "learning_rate": 5.340110306045204e-06, "loss": 3.0118, "step": 2469 }, { "epoch": 0.03, "grad_norm": 9.784078598022461, "learning_rate": 5.342273169676653e-06, "loss": 3.3413, "step": 2470 }, { "epoch": 0.03, "grad_norm": 10.589499473571777, "learning_rate": 5.3444360333081e-06, "loss": 3.9617, "step": 2471 }, { "epoch": 0.03, "grad_norm": 10.300662994384766, "learning_rate": 5.3465988969395484e-06, "loss": 3.423, "step": 2472 }, { "epoch": 0.03, "grad_norm": 9.914190292358398, "learning_rate": 5.3487617605709965e-06, "loss": 3.8017, "step": 2473 }, { "epoch": 0.03, "grad_norm": 10.000568389892578, "learning_rate": 5.3509246242024445e-06, "loss": 3.698, "step": 2474 }, { "epoch": 0.03, "grad_norm": 9.718195915222168, "learning_rate": 5.353087487833893e-06, "loss": 2.8866, "step": 2475 }, { "epoch": 0.03, "grad_norm": 8.971826553344727, "learning_rate": 5.355250351465341e-06, "loss": 3.3525, "step": 2476 }, { "epoch": 0.03, "grad_norm": 9.888550758361816, "learning_rate": 5.357413215096789e-06, "loss": 2.8006, "step": 2477 }, { "epoch": 0.03, "grad_norm": 9.209304809570312, "learning_rate": 5.359576078728237e-06, "loss": 3.2065, "step": 2478 }, { "epoch": 0.03, "grad_norm": 10.309286117553711, "learning_rate": 5.361738942359685e-06, "loss": 3.5471, "step": 2479 }, { "epoch": 0.03, "grad_norm": 9.918009757995605, "learning_rate": 5.363901805991133e-06, "loss": 3.4757, "step": 2480 }, { "epoch": 0.03, "grad_norm": 9.51379680633545, "learning_rate": 5.36606466962258e-06, "loss": 3.8259, "step": 2481 }, { "epoch": 0.03, "grad_norm": 10.658670425415039, "learning_rate": 5.368227533254029e-06, "loss": 3.4967, "step": 2482 }, { "epoch": 0.03, "grad_norm": 9.82674503326416, "learning_rate": 5.370390396885476e-06, "loss": 3.7529, "step": 2483 }, { "epoch": 0.03, "grad_norm": 10.901741981506348, "learning_rate": 5.372553260516925e-06, "loss": 3.9632, "step": 2484 }, { "epoch": 0.03, "grad_norm": 10.418965339660645, "learning_rate": 5.374716124148372e-06, "loss": 2.8677, "step": 2485 }, { "epoch": 0.03, "grad_norm": 9.398584365844727, "learning_rate": 5.376878987779821e-06, "loss": 3.3514, "step": 2486 }, { "epoch": 0.03, "grad_norm": 10.485700607299805, "learning_rate": 5.379041851411268e-06, "loss": 3.9232, "step": 2487 }, { "epoch": 0.03, "grad_norm": 10.535148620605469, "learning_rate": 5.381204715042717e-06, "loss": 3.5465, "step": 2488 }, { "epoch": 0.03, "grad_norm": 10.10575008392334, "learning_rate": 5.3833675786741644e-06, "loss": 2.9499, "step": 2489 }, { "epoch": 0.03, "grad_norm": 9.643555641174316, "learning_rate": 5.385530442305613e-06, "loss": 3.0893, "step": 2490 }, { "epoch": 0.03, "grad_norm": 10.609374046325684, "learning_rate": 5.3876933059370605e-06, "loss": 3.1522, "step": 2491 }, { "epoch": 0.03, "grad_norm": 10.126221656799316, "learning_rate": 5.3898561695685094e-06, "loss": 3.7069, "step": 2492 }, { "epoch": 0.03, "grad_norm": 11.470693588256836, "learning_rate": 5.392019033199957e-06, "loss": 3.6841, "step": 2493 }, { "epoch": 0.03, "grad_norm": 8.868610382080078, "learning_rate": 5.3941818968314055e-06, "loss": 2.3707, "step": 2494 }, { "epoch": 0.03, "grad_norm": 9.82354736328125, "learning_rate": 5.396344760462853e-06, "loss": 3.5267, "step": 2495 }, { "epoch": 0.03, "grad_norm": 9.649497985839844, "learning_rate": 5.398507624094302e-06, "loss": 2.8627, "step": 2496 }, { "epoch": 0.03, "grad_norm": 10.073037147521973, "learning_rate": 5.40067048772575e-06, "loss": 2.9699, "step": 2497 }, { "epoch": 0.03, "grad_norm": 9.634286880493164, "learning_rate": 5.402833351357198e-06, "loss": 3.3938, "step": 2498 }, { "epoch": 0.03, "grad_norm": 10.194628715515137, "learning_rate": 5.404996214988646e-06, "loss": 3.5043, "step": 2499 }, { "epoch": 0.03, "grad_norm": 11.58884048461914, "learning_rate": 5.407159078620093e-06, "loss": 3.2939, "step": 2500 }, { "epoch": 0.03, "grad_norm": 8.513761520385742, "learning_rate": 5.409321942251542e-06, "loss": 3.1997, "step": 2501 }, { "epoch": 0.03, "grad_norm": 10.293149948120117, "learning_rate": 5.411484805882989e-06, "loss": 3.511, "step": 2502 }, { "epoch": 0.03, "grad_norm": 10.377708435058594, "learning_rate": 5.413647669514438e-06, "loss": 3.5727, "step": 2503 }, { "epoch": 0.03, "grad_norm": 11.633315086364746, "learning_rate": 5.415810533145885e-06, "loss": 3.2382, "step": 2504 }, { "epoch": 0.03, "grad_norm": 9.740283966064453, "learning_rate": 5.417973396777334e-06, "loss": 3.0674, "step": 2505 }, { "epoch": 0.03, "grad_norm": 10.188642501831055, "learning_rate": 5.420136260408781e-06, "loss": 3.6699, "step": 2506 }, { "epoch": 0.03, "grad_norm": 9.910500526428223, "learning_rate": 5.42229912404023e-06, "loss": 3.9999, "step": 2507 }, { "epoch": 0.03, "grad_norm": 9.742664337158203, "learning_rate": 5.424461987671677e-06, "loss": 3.4593, "step": 2508 }, { "epoch": 0.03, "grad_norm": 10.358417510986328, "learning_rate": 5.426624851303126e-06, "loss": 3.6257, "step": 2509 }, { "epoch": 0.03, "grad_norm": 10.036686897277832, "learning_rate": 5.4287877149345735e-06, "loss": 3.6517, "step": 2510 }, { "epoch": 0.03, "grad_norm": 9.003740310668945, "learning_rate": 5.430950578566022e-06, "loss": 3.6303, "step": 2511 }, { "epoch": 0.03, "grad_norm": 10.630439758300781, "learning_rate": 5.43311344219747e-06, "loss": 3.3333, "step": 2512 }, { "epoch": 0.03, "grad_norm": 9.933094024658203, "learning_rate": 5.4352763058289185e-06, "loss": 3.691, "step": 2513 }, { "epoch": 0.03, "grad_norm": 10.13499927520752, "learning_rate": 5.437439169460366e-06, "loss": 3.1071, "step": 2514 }, { "epoch": 0.03, "grad_norm": 9.818097114562988, "learning_rate": 5.439602033091815e-06, "loss": 3.13, "step": 2515 }, { "epoch": 0.03, "grad_norm": 9.952773094177246, "learning_rate": 5.441764896723262e-06, "loss": 3.4998, "step": 2516 }, { "epoch": 0.03, "grad_norm": 10.070470809936523, "learning_rate": 5.44392776035471e-06, "loss": 3.2509, "step": 2517 }, { "epoch": 0.03, "grad_norm": 10.227377891540527, "learning_rate": 5.446090623986158e-06, "loss": 3.4463, "step": 2518 }, { "epoch": 0.03, "grad_norm": 9.067975997924805, "learning_rate": 5.448253487617606e-06, "loss": 3.7542, "step": 2519 }, { "epoch": 0.03, "grad_norm": 10.107836723327637, "learning_rate": 5.450416351249054e-06, "loss": 3.4283, "step": 2520 }, { "epoch": 0.03, "grad_norm": 9.97718334197998, "learning_rate": 5.452579214880502e-06, "loss": 3.7511, "step": 2521 }, { "epoch": 0.03, "grad_norm": 9.228495597839355, "learning_rate": 5.45474207851195e-06, "loss": 2.7207, "step": 2522 }, { "epoch": 0.03, "grad_norm": 10.276835441589355, "learning_rate": 5.456904942143398e-06, "loss": 3.5447, "step": 2523 }, { "epoch": 0.03, "grad_norm": 11.364102363586426, "learning_rate": 5.459067805774846e-06, "loss": 3.5703, "step": 2524 }, { "epoch": 0.03, "grad_norm": 10.215046882629395, "learning_rate": 5.461230669406294e-06, "loss": 3.7543, "step": 2525 }, { "epoch": 0.03, "grad_norm": 9.65912914276123, "learning_rate": 5.4633935330377415e-06, "loss": 3.3619, "step": 2526 }, { "epoch": 0.03, "grad_norm": 9.598152160644531, "learning_rate": 5.46555639666919e-06, "loss": 3.2294, "step": 2527 }, { "epoch": 0.03, "grad_norm": 9.226631164550781, "learning_rate": 5.467719260300639e-06, "loss": 3.3484, "step": 2528 }, { "epoch": 0.03, "grad_norm": 9.644960403442383, "learning_rate": 5.4698821239320865e-06, "loss": 3.086, "step": 2529 }, { "epoch": 0.03, "grad_norm": 10.781880378723145, "learning_rate": 5.472044987563535e-06, "loss": 3.451, "step": 2530 }, { "epoch": 0.03, "grad_norm": 9.979312896728516, "learning_rate": 5.4742078511949826e-06, "loss": 2.9295, "step": 2531 }, { "epoch": 0.03, "grad_norm": 8.603351593017578, "learning_rate": 5.4763707148264315e-06, "loss": 2.9016, "step": 2532 }, { "epoch": 0.03, "grad_norm": 11.003409385681152, "learning_rate": 5.478533578457879e-06, "loss": 3.7271, "step": 2533 }, { "epoch": 0.03, "grad_norm": 9.239931106567383, "learning_rate": 5.4806964420893276e-06, "loss": 3.088, "step": 2534 }, { "epoch": 0.03, "grad_norm": 9.82075023651123, "learning_rate": 5.482859305720775e-06, "loss": 3.7215, "step": 2535 }, { "epoch": 0.03, "grad_norm": 11.514026641845703, "learning_rate": 5.485022169352223e-06, "loss": 4.0103, "step": 2536 }, { "epoch": 0.03, "grad_norm": 10.618109703063965, "learning_rate": 5.487185032983671e-06, "loss": 3.2674, "step": 2537 }, { "epoch": 0.03, "grad_norm": 8.729360580444336, "learning_rate": 5.489347896615119e-06, "loss": 3.3728, "step": 2538 }, { "epoch": 0.03, "grad_norm": 9.593841552734375, "learning_rate": 5.491510760246567e-06, "loss": 3.4715, "step": 2539 }, { "epoch": 0.03, "grad_norm": 10.914700508117676, "learning_rate": 5.493673623878015e-06, "loss": 3.3864, "step": 2540 }, { "epoch": 0.03, "grad_norm": 10.543614387512207, "learning_rate": 5.495836487509463e-06, "loss": 3.3006, "step": 2541 }, { "epoch": 0.03, "grad_norm": 10.09192180633545, "learning_rate": 5.497999351140911e-06, "loss": 3.1883, "step": 2542 }, { "epoch": 0.03, "grad_norm": 11.159380912780762, "learning_rate": 5.500162214772359e-06, "loss": 4.0195, "step": 2543 }, { "epoch": 0.03, "grad_norm": 9.917863845825195, "learning_rate": 5.502325078403807e-06, "loss": 3.3634, "step": 2544 }, { "epoch": 0.03, "grad_norm": 10.316882133483887, "learning_rate": 5.5044879420352544e-06, "loss": 3.4466, "step": 2545 }, { "epoch": 0.03, "grad_norm": 9.223382949829102, "learning_rate": 5.506650805666703e-06, "loss": 3.6942, "step": 2546 }, { "epoch": 0.03, "grad_norm": 9.258604049682617, "learning_rate": 5.5088136692981505e-06, "loss": 3.1096, "step": 2547 }, { "epoch": 0.03, "grad_norm": 10.480901718139648, "learning_rate": 5.5109765329295994e-06, "loss": 3.9626, "step": 2548 }, { "epoch": 0.03, "grad_norm": 10.254501342773438, "learning_rate": 5.513139396561047e-06, "loss": 3.1985, "step": 2549 }, { "epoch": 0.03, "grad_norm": 10.253694534301758, "learning_rate": 5.5153022601924955e-06, "loss": 3.5546, "step": 2550 }, { "epoch": 0.03, "grad_norm": 9.654353141784668, "learning_rate": 5.517465123823943e-06, "loss": 3.2175, "step": 2551 }, { "epoch": 0.03, "grad_norm": 9.669878005981445, "learning_rate": 5.519627987455392e-06, "loss": 3.3677, "step": 2552 }, { "epoch": 0.03, "grad_norm": 10.522067070007324, "learning_rate": 5.521790851086839e-06, "loss": 3.7204, "step": 2553 }, { "epoch": 0.03, "grad_norm": 9.969164848327637, "learning_rate": 5.523953714718288e-06, "loss": 3.3949, "step": 2554 }, { "epoch": 0.03, "grad_norm": 8.870448112487793, "learning_rate": 5.526116578349735e-06, "loss": 3.3976, "step": 2555 }, { "epoch": 0.03, "grad_norm": 9.356536865234375, "learning_rate": 5.528279441981184e-06, "loss": 3.751, "step": 2556 }, { "epoch": 0.03, "grad_norm": 9.265230178833008, "learning_rate": 5.530442305612631e-06, "loss": 4.1541, "step": 2557 }, { "epoch": 0.03, "grad_norm": 9.88545036315918, "learning_rate": 5.53260516924408e-06, "loss": 3.1947, "step": 2558 }, { "epoch": 0.03, "grad_norm": 10.289935111999512, "learning_rate": 5.534768032875527e-06, "loss": 3.3506, "step": 2559 }, { "epoch": 0.03, "grad_norm": 9.670865058898926, "learning_rate": 5.536930896506976e-06, "loss": 3.2644, "step": 2560 }, { "epoch": 0.03, "grad_norm": 9.727710723876953, "learning_rate": 5.539093760138424e-06, "loss": 3.2084, "step": 2561 }, { "epoch": 0.03, "grad_norm": 11.4998197555542, "learning_rate": 5.541256623769872e-06, "loss": 3.2307, "step": 2562 }, { "epoch": 0.03, "grad_norm": 10.162278175354004, "learning_rate": 5.54341948740132e-06, "loss": 3.1827, "step": 2563 }, { "epoch": 0.03, "grad_norm": 9.692693710327148, "learning_rate": 5.545582351032767e-06, "loss": 2.9279, "step": 2564 }, { "epoch": 0.03, "grad_norm": 9.18813419342041, "learning_rate": 5.547745214664216e-06, "loss": 3.1335, "step": 2565 }, { "epoch": 0.03, "grad_norm": 11.426499366760254, "learning_rate": 5.5499080782956635e-06, "loss": 3.608, "step": 2566 }, { "epoch": 0.03, "grad_norm": 10.485006332397461, "learning_rate": 5.552070941927112e-06, "loss": 3.3704, "step": 2567 }, { "epoch": 0.03, "grad_norm": 10.173179626464844, "learning_rate": 5.55423380555856e-06, "loss": 3.32, "step": 2568 }, { "epoch": 0.03, "grad_norm": 10.860227584838867, "learning_rate": 5.5563966691900085e-06, "loss": 2.9274, "step": 2569 }, { "epoch": 0.03, "grad_norm": 10.079118728637695, "learning_rate": 5.558559532821456e-06, "loss": 2.9232, "step": 2570 }, { "epoch": 0.03, "grad_norm": 11.075752258300781, "learning_rate": 5.560722396452905e-06, "loss": 3.3353, "step": 2571 }, { "epoch": 0.03, "grad_norm": 10.48011302947998, "learning_rate": 5.562885260084352e-06, "loss": 3.3001, "step": 2572 }, { "epoch": 0.03, "grad_norm": 10.760457038879395, "learning_rate": 5.565048123715801e-06, "loss": 3.1623, "step": 2573 }, { "epoch": 0.03, "grad_norm": 10.48962688446045, "learning_rate": 5.567210987347248e-06, "loss": 3.6631, "step": 2574 }, { "epoch": 0.03, "grad_norm": 9.793403625488281, "learning_rate": 5.569373850978697e-06, "loss": 3.2783, "step": 2575 }, { "epoch": 0.03, "grad_norm": 11.070928573608398, "learning_rate": 5.571536714610144e-06, "loss": 3.8364, "step": 2576 }, { "epoch": 0.03, "grad_norm": 10.266257286071777, "learning_rate": 5.573699578241593e-06, "loss": 3.5396, "step": 2577 }, { "epoch": 0.03, "grad_norm": 8.916889190673828, "learning_rate": 5.57586244187304e-06, "loss": 3.6484, "step": 2578 }, { "epoch": 0.03, "grad_norm": 9.083890914916992, "learning_rate": 5.578025305504489e-06, "loss": 3.0852, "step": 2579 }, { "epoch": 0.03, "grad_norm": 10.562955856323242, "learning_rate": 5.580188169135936e-06, "loss": 3.2325, "step": 2580 }, { "epoch": 0.03, "grad_norm": 9.827502250671387, "learning_rate": 5.582351032767384e-06, "loss": 3.8445, "step": 2581 }, { "epoch": 0.03, "grad_norm": 9.198624610900879, "learning_rate": 5.584513896398832e-06, "loss": 3.1402, "step": 2582 }, { "epoch": 0.03, "grad_norm": 12.138243675231934, "learning_rate": 5.58667676003028e-06, "loss": 3.1021, "step": 2583 }, { "epoch": 0.03, "grad_norm": 9.947352409362793, "learning_rate": 5.588839623661728e-06, "loss": 4.0109, "step": 2584 }, { "epoch": 0.03, "grad_norm": 11.90788745880127, "learning_rate": 5.5910024872931765e-06, "loss": 3.3718, "step": 2585 }, { "epoch": 0.03, "grad_norm": 9.737289428710938, "learning_rate": 5.5931653509246245e-06, "loss": 3.472, "step": 2586 }, { "epoch": 0.03, "grad_norm": 9.988590240478516, "learning_rate": 5.5953282145560726e-06, "loss": 3.5532, "step": 2587 }, { "epoch": 0.03, "grad_norm": 9.909796714782715, "learning_rate": 5.597491078187521e-06, "loss": 3.477, "step": 2588 }, { "epoch": 0.03, "grad_norm": 9.645261764526367, "learning_rate": 5.599653941818969e-06, "loss": 3.4062, "step": 2589 }, { "epoch": 0.03, "grad_norm": 9.758417129516602, "learning_rate": 5.601816805450416e-06, "loss": 3.3977, "step": 2590 }, { "epoch": 0.03, "grad_norm": 10.0647611618042, "learning_rate": 5.603979669081865e-06, "loss": 3.8342, "step": 2591 }, { "epoch": 0.03, "grad_norm": 8.239724159240723, "learning_rate": 5.606142532713314e-06, "loss": 3.591, "step": 2592 }, { "epoch": 0.03, "grad_norm": 9.485595703125, "learning_rate": 5.608305396344761e-06, "loss": 3.1508, "step": 2593 }, { "epoch": 0.03, "grad_norm": 9.669929504394531, "learning_rate": 5.61046825997621e-06, "loss": 3.4957, "step": 2594 }, { "epoch": 0.03, "grad_norm": 11.376385688781738, "learning_rate": 5.612631123607657e-06, "loss": 3.6537, "step": 2595 }, { "epoch": 0.03, "grad_norm": 9.399053573608398, "learning_rate": 5.614793987239106e-06, "loss": 3.0217, "step": 2596 }, { "epoch": 0.03, "grad_norm": 10.800966262817383, "learning_rate": 5.616956850870553e-06, "loss": 3.3823, "step": 2597 }, { "epoch": 0.03, "grad_norm": 10.636627197265625, "learning_rate": 5.619119714502002e-06, "loss": 2.9036, "step": 2598 }, { "epoch": 0.03, "grad_norm": 9.985630989074707, "learning_rate": 5.621282578133449e-06, "loss": 3.8148, "step": 2599 }, { "epoch": 0.03, "grad_norm": 9.424678802490234, "learning_rate": 5.623445441764897e-06, "loss": 3.2378, "step": 2600 }, { "epoch": 0.03, "grad_norm": 9.836882591247559, "learning_rate": 5.625608305396345e-06, "loss": 3.9375, "step": 2601 }, { "epoch": 0.03, "grad_norm": 8.990066528320312, "learning_rate": 5.627771169027793e-06, "loss": 2.8662, "step": 2602 }, { "epoch": 0.03, "grad_norm": 8.899407386779785, "learning_rate": 5.629934032659241e-06, "loss": 3.2379, "step": 2603 }, { "epoch": 0.03, "grad_norm": 9.420934677124023, "learning_rate": 5.6320968962906894e-06, "loss": 3.6089, "step": 2604 }, { "epoch": 0.03, "grad_norm": 10.571949005126953, "learning_rate": 5.6342597599221375e-06, "loss": 3.4573, "step": 2605 }, { "epoch": 0.03, "grad_norm": 10.149064064025879, "learning_rate": 5.6364226235535855e-06, "loss": 3.1183, "step": 2606 }, { "epoch": 0.03, "grad_norm": 10.481761932373047, "learning_rate": 5.638585487185034e-06, "loss": 3.3921, "step": 2607 }, { "epoch": 0.03, "grad_norm": 10.67493724822998, "learning_rate": 5.640748350816482e-06, "loss": 3.9419, "step": 2608 }, { "epoch": 0.03, "grad_norm": 9.328531265258789, "learning_rate": 5.642911214447929e-06, "loss": 3.6908, "step": 2609 }, { "epoch": 0.03, "grad_norm": 10.51127815246582, "learning_rate": 5.645074078079378e-06, "loss": 3.3226, "step": 2610 }, { "epoch": 0.03, "grad_norm": 8.843683242797852, "learning_rate": 5.647236941710825e-06, "loss": 3.0421, "step": 2611 }, { "epoch": 0.03, "grad_norm": 9.397987365722656, "learning_rate": 5.649399805342274e-06, "loss": 3.2832, "step": 2612 }, { "epoch": 0.03, "grad_norm": 9.13432502746582, "learning_rate": 5.651562668973721e-06, "loss": 3.0653, "step": 2613 }, { "epoch": 0.03, "grad_norm": 8.59457015991211, "learning_rate": 5.65372553260517e-06, "loss": 3.1959, "step": 2614 }, { "epoch": 0.03, "grad_norm": 9.334829330444336, "learning_rate": 5.655888396236617e-06, "loss": 3.5242, "step": 2615 }, { "epoch": 0.03, "grad_norm": 11.429152488708496, "learning_rate": 5.658051259868066e-06, "loss": 3.4853, "step": 2616 }, { "epoch": 0.03, "grad_norm": 10.02580738067627, "learning_rate": 5.660214123499513e-06, "loss": 3.3027, "step": 2617 }, { "epoch": 0.03, "grad_norm": 10.993182182312012, "learning_rate": 5.662376987130962e-06, "loss": 2.8695, "step": 2618 }, { "epoch": 0.03, "grad_norm": 8.957721710205078, "learning_rate": 5.664539850762409e-06, "loss": 3.8336, "step": 2619 }, { "epoch": 0.03, "grad_norm": 10.211872100830078, "learning_rate": 5.666702714393858e-06, "loss": 2.7645, "step": 2620 }, { "epoch": 0.03, "grad_norm": 10.63394832611084, "learning_rate": 5.6688655780253054e-06, "loss": 4.0585, "step": 2621 }, { "epoch": 0.03, "grad_norm": 10.415796279907227, "learning_rate": 5.671028441656754e-06, "loss": 3.1386, "step": 2622 }, { "epoch": 0.03, "grad_norm": 10.040535926818848, "learning_rate": 5.6731913052882015e-06, "loss": 3.2659, "step": 2623 }, { "epoch": 0.03, "grad_norm": 10.391660690307617, "learning_rate": 5.6753541689196504e-06, "loss": 3.5527, "step": 2624 }, { "epoch": 0.03, "grad_norm": 9.837368965148926, "learning_rate": 5.6775170325510985e-06, "loss": 3.1543, "step": 2625 }, { "epoch": 0.03, "grad_norm": 9.955828666687012, "learning_rate": 5.679679896182546e-06, "loss": 3.1951, "step": 2626 }, { "epoch": 0.03, "grad_norm": 10.42552661895752, "learning_rate": 5.681842759813995e-06, "loss": 3.3286, "step": 2627 }, { "epoch": 0.03, "grad_norm": 9.677729606628418, "learning_rate": 5.684005623445442e-06, "loss": 2.7508, "step": 2628 }, { "epoch": 0.03, "grad_norm": 9.854655265808105, "learning_rate": 5.686168487076891e-06, "loss": 3.4223, "step": 2629 }, { "epoch": 0.03, "grad_norm": 10.4744234085083, "learning_rate": 5.688331350708338e-06, "loss": 3.2357, "step": 2630 }, { "epoch": 0.03, "grad_norm": 11.892991065979004, "learning_rate": 5.690494214339787e-06, "loss": 3.1652, "step": 2631 }, { "epoch": 0.03, "grad_norm": 11.197606086730957, "learning_rate": 5.692657077971234e-06, "loss": 3.029, "step": 2632 }, { "epoch": 0.03, "grad_norm": 10.532164573669434, "learning_rate": 5.694819941602683e-06, "loss": 3.299, "step": 2633 }, { "epoch": 0.03, "grad_norm": 11.274982452392578, "learning_rate": 5.69698280523413e-06, "loss": 2.9247, "step": 2634 }, { "epoch": 0.03, "grad_norm": 11.677321434020996, "learning_rate": 5.699145668865579e-06, "loss": 3.1481, "step": 2635 }, { "epoch": 0.03, "grad_norm": 9.578672409057617, "learning_rate": 5.701308532497026e-06, "loss": 2.598, "step": 2636 }, { "epoch": 0.03, "grad_norm": 13.075692176818848, "learning_rate": 5.703471396128475e-06, "loss": 3.6206, "step": 2637 }, { "epoch": 0.03, "grad_norm": 11.416406631469727, "learning_rate": 5.705634259759922e-06, "loss": 3.868, "step": 2638 }, { "epoch": 0.03, "grad_norm": 10.485008239746094, "learning_rate": 5.707797123391371e-06, "loss": 3.8322, "step": 2639 }, { "epoch": 0.03, "grad_norm": 9.064361572265625, "learning_rate": 5.709959987022818e-06, "loss": 3.2456, "step": 2640 }, { "epoch": 0.03, "grad_norm": 10.290867805480957, "learning_rate": 5.712122850654267e-06, "loss": 3.1959, "step": 2641 }, { "epoch": 0.03, "grad_norm": 11.082405090332031, "learning_rate": 5.7142857142857145e-06, "loss": 3.6272, "step": 2642 }, { "epoch": 0.03, "grad_norm": 9.802655220031738, "learning_rate": 5.716448577917163e-06, "loss": 3.6178, "step": 2643 }, { "epoch": 0.03, "grad_norm": 9.775290489196777, "learning_rate": 5.718611441548611e-06, "loss": 2.9748, "step": 2644 }, { "epoch": 0.03, "grad_norm": 9.241366386413574, "learning_rate": 5.720774305180059e-06, "loss": 3.8961, "step": 2645 }, { "epoch": 0.03, "grad_norm": 10.437875747680664, "learning_rate": 5.722937168811507e-06, "loss": 2.9781, "step": 2646 }, { "epoch": 0.03, "grad_norm": 9.941280364990234, "learning_rate": 5.725100032442955e-06, "loss": 3.6965, "step": 2647 }, { "epoch": 0.03, "grad_norm": 11.001323699951172, "learning_rate": 5.727262896074403e-06, "loss": 3.7826, "step": 2648 }, { "epoch": 0.03, "grad_norm": 10.411988258361816, "learning_rate": 5.729425759705851e-06, "loss": 3.4559, "step": 2649 }, { "epoch": 0.03, "grad_norm": 9.82299518585205, "learning_rate": 5.731588623337299e-06, "loss": 3.4091, "step": 2650 }, { "epoch": 0.03, "grad_norm": 10.428729057312012, "learning_rate": 5.733751486968747e-06, "loss": 3.1413, "step": 2651 }, { "epoch": 0.03, "grad_norm": 9.344992637634277, "learning_rate": 5.735914350600195e-06, "loss": 2.9178, "step": 2652 }, { "epoch": 0.03, "grad_norm": 10.178677558898926, "learning_rate": 5.738077214231643e-06, "loss": 3.0869, "step": 2653 }, { "epoch": 0.03, "grad_norm": 9.898597717285156, "learning_rate": 5.74024007786309e-06, "loss": 3.4921, "step": 2654 }, { "epoch": 0.03, "grad_norm": 8.889986991882324, "learning_rate": 5.742402941494539e-06, "loss": 2.8573, "step": 2655 }, { "epoch": 0.03, "grad_norm": 9.958300590515137, "learning_rate": 5.744565805125988e-06, "loss": 3.6298, "step": 2656 }, { "epoch": 0.03, "grad_norm": 10.892536163330078, "learning_rate": 5.746728668757435e-06, "loss": 3.0912, "step": 2657 }, { "epoch": 0.03, "grad_norm": 11.20475959777832, "learning_rate": 5.748891532388884e-06, "loss": 3.6684, "step": 2658 }, { "epoch": 0.03, "grad_norm": 11.615019798278809, "learning_rate": 5.751054396020331e-06, "loss": 3.7358, "step": 2659 }, { "epoch": 0.03, "grad_norm": 9.57961368560791, "learning_rate": 5.75321725965178e-06, "loss": 3.6744, "step": 2660 }, { "epoch": 0.03, "grad_norm": 8.9398193359375, "learning_rate": 5.7553801232832275e-06, "loss": 3.3024, "step": 2661 }, { "epoch": 0.03, "grad_norm": 9.633285522460938, "learning_rate": 5.7575429869146755e-06, "loss": 3.2704, "step": 2662 }, { "epoch": 0.03, "grad_norm": 11.272252082824707, "learning_rate": 5.759705850546124e-06, "loss": 3.6812, "step": 2663 }, { "epoch": 0.03, "grad_norm": 10.210065841674805, "learning_rate": 5.761868714177572e-06, "loss": 2.7762, "step": 2664 }, { "epoch": 0.03, "grad_norm": 10.299205780029297, "learning_rate": 5.76403157780902e-06, "loss": 3.0867, "step": 2665 }, { "epoch": 0.03, "grad_norm": 10.990863800048828, "learning_rate": 5.766194441440468e-06, "loss": 3.0506, "step": 2666 }, { "epoch": 0.03, "grad_norm": 9.6787109375, "learning_rate": 5.768357305071916e-06, "loss": 3.0809, "step": 2667 }, { "epoch": 0.03, "grad_norm": 9.41451358795166, "learning_rate": 5.770520168703364e-06, "loss": 3.3745, "step": 2668 }, { "epoch": 0.03, "grad_norm": 8.46116828918457, "learning_rate": 5.772683032334812e-06, "loss": 2.5948, "step": 2669 }, { "epoch": 0.03, "grad_norm": 10.072831153869629, "learning_rate": 5.77484589596626e-06, "loss": 3.3529, "step": 2670 }, { "epoch": 0.03, "grad_norm": 9.708817481994629, "learning_rate": 5.777008759597708e-06, "loss": 3.5764, "step": 2671 }, { "epoch": 0.03, "grad_norm": 9.540369033813477, "learning_rate": 5.779171623229156e-06, "loss": 3.0752, "step": 2672 }, { "epoch": 0.03, "grad_norm": 9.703882217407227, "learning_rate": 5.781334486860603e-06, "loss": 3.1782, "step": 2673 }, { "epoch": 0.03, "grad_norm": 10.52985668182373, "learning_rate": 5.783497350492052e-06, "loss": 3.0029, "step": 2674 }, { "epoch": 0.03, "grad_norm": 11.491076469421387, "learning_rate": 5.785660214123499e-06, "loss": 3.3368, "step": 2675 }, { "epoch": 0.03, "grad_norm": 12.644989013671875, "learning_rate": 5.787823077754948e-06, "loss": 3.7434, "step": 2676 }, { "epoch": 0.03, "grad_norm": 9.635578155517578, "learning_rate": 5.7899859413863954e-06, "loss": 3.475, "step": 2677 }, { "epoch": 0.03, "grad_norm": 9.624716758728027, "learning_rate": 5.792148805017844e-06, "loss": 3.3941, "step": 2678 }, { "epoch": 0.03, "grad_norm": 8.200268745422363, "learning_rate": 5.7943116686492915e-06, "loss": 2.3222, "step": 2679 }, { "epoch": 0.03, "grad_norm": 9.756922721862793, "learning_rate": 5.7964745322807404e-06, "loss": 3.3157, "step": 2680 }, { "epoch": 0.03, "grad_norm": 8.713435173034668, "learning_rate": 5.798637395912188e-06, "loss": 3.5977, "step": 2681 }, { "epoch": 0.03, "grad_norm": 10.288996696472168, "learning_rate": 5.8008002595436365e-06, "loss": 3.8717, "step": 2682 }, { "epoch": 0.03, "grad_norm": 12.082686424255371, "learning_rate": 5.802963123175084e-06, "loss": 3.6288, "step": 2683 }, { "epoch": 0.03, "grad_norm": 10.00313663482666, "learning_rate": 5.805125986806533e-06, "loss": 3.1731, "step": 2684 }, { "epoch": 0.03, "grad_norm": 11.300115585327148, "learning_rate": 5.80728885043798e-06, "loss": 3.5898, "step": 2685 }, { "epoch": 0.03, "grad_norm": 11.317732810974121, "learning_rate": 5.809451714069429e-06, "loss": 2.9858, "step": 2686 }, { "epoch": 0.03, "grad_norm": 12.042132377624512, "learning_rate": 5.811614577700876e-06, "loss": 3.7241, "step": 2687 }, { "epoch": 0.03, "grad_norm": 10.124323844909668, "learning_rate": 5.813777441332325e-06, "loss": 3.7921, "step": 2688 }, { "epoch": 0.03, "grad_norm": 10.258614540100098, "learning_rate": 5.815940304963773e-06, "loss": 3.3446, "step": 2689 }, { "epoch": 0.03, "grad_norm": 10.346957206726074, "learning_rate": 5.81810316859522e-06, "loss": 3.3985, "step": 2690 }, { "epoch": 0.03, "grad_norm": 9.848001480102539, "learning_rate": 5.820266032226669e-06, "loss": 3.7904, "step": 2691 }, { "epoch": 0.03, "grad_norm": 9.6123046875, "learning_rate": 5.822428895858116e-06, "loss": 3.6772, "step": 2692 }, { "epoch": 0.03, "grad_norm": 9.92627239227295, "learning_rate": 5.824591759489565e-06, "loss": 3.0884, "step": 2693 }, { "epoch": 0.03, "grad_norm": 8.45341968536377, "learning_rate": 5.826754623121012e-06, "loss": 3.396, "step": 2694 }, { "epoch": 0.03, "grad_norm": 9.589834213256836, "learning_rate": 5.828917486752461e-06, "loss": 3.3529, "step": 2695 }, { "epoch": 0.03, "grad_norm": 10.550278663635254, "learning_rate": 5.831080350383908e-06, "loss": 3.3944, "step": 2696 }, { "epoch": 0.04, "grad_norm": 9.498570442199707, "learning_rate": 5.833243214015357e-06, "loss": 3.3241, "step": 2697 }, { "epoch": 0.04, "grad_norm": 9.514250755310059, "learning_rate": 5.8354060776468045e-06, "loss": 3.574, "step": 2698 }, { "epoch": 0.04, "grad_norm": 9.543787956237793, "learning_rate": 5.837568941278253e-06, "loss": 3.1553, "step": 2699 }, { "epoch": 0.04, "grad_norm": 9.22578239440918, "learning_rate": 5.839731804909701e-06, "loss": 3.3892, "step": 2700 }, { "epoch": 0.04, "grad_norm": 10.253887176513672, "learning_rate": 5.8418946685411495e-06, "loss": 3.4787, "step": 2701 }, { "epoch": 0.04, "grad_norm": 10.577178955078125, "learning_rate": 5.844057532172597e-06, "loss": 3.557, "step": 2702 }, { "epoch": 0.04, "grad_norm": 10.348998069763184, "learning_rate": 5.846220395804046e-06, "loss": 2.7062, "step": 2703 }, { "epoch": 0.04, "grad_norm": 10.148794174194336, "learning_rate": 5.848383259435493e-06, "loss": 3.7245, "step": 2704 }, { "epoch": 0.04, "grad_norm": 9.702878952026367, "learning_rate": 5.850546123066942e-06, "loss": 3.2064, "step": 2705 }, { "epoch": 0.04, "grad_norm": 9.845440864562988, "learning_rate": 5.852708986698389e-06, "loss": 3.3225, "step": 2706 }, { "epoch": 0.04, "grad_norm": 8.876128196716309, "learning_rate": 5.854871850329838e-06, "loss": 3.2903, "step": 2707 }, { "epoch": 0.04, "grad_norm": 8.90445327758789, "learning_rate": 5.857034713961285e-06, "loss": 3.5533, "step": 2708 }, { "epoch": 0.04, "grad_norm": 9.949856758117676, "learning_rate": 5.859197577592733e-06, "loss": 3.1369, "step": 2709 }, { "epoch": 0.04, "grad_norm": 8.691967010498047, "learning_rate": 5.861360441224181e-06, "loss": 2.8062, "step": 2710 }, { "epoch": 0.04, "grad_norm": 9.852418899536133, "learning_rate": 5.863523304855629e-06, "loss": 3.3874, "step": 2711 }, { "epoch": 0.04, "grad_norm": 9.943065643310547, "learning_rate": 5.865686168487077e-06, "loss": 3.6692, "step": 2712 }, { "epoch": 0.04, "grad_norm": 9.748188972473145, "learning_rate": 5.867849032118525e-06, "loss": 3.537, "step": 2713 }, { "epoch": 0.04, "grad_norm": 9.716562271118164, "learning_rate": 5.870011895749973e-06, "loss": 3.6458, "step": 2714 }, { "epoch": 0.04, "grad_norm": 10.332986831665039, "learning_rate": 5.872174759381421e-06, "loss": 3.8925, "step": 2715 }, { "epoch": 0.04, "grad_norm": 8.946634292602539, "learning_rate": 5.874337623012869e-06, "loss": 3.0998, "step": 2716 }, { "epoch": 0.04, "grad_norm": 11.889676094055176, "learning_rate": 5.8765004866443175e-06, "loss": 3.0317, "step": 2717 }, { "epoch": 0.04, "grad_norm": 10.724743843078613, "learning_rate": 5.878663350275765e-06, "loss": 3.3687, "step": 2718 }, { "epoch": 0.04, "grad_norm": 10.418181419372559, "learning_rate": 5.8808262139072136e-06, "loss": 3.1621, "step": 2719 }, { "epoch": 0.04, "grad_norm": 8.934988021850586, "learning_rate": 5.8829890775386625e-06, "loss": 3.4116, "step": 2720 }, { "epoch": 0.04, "grad_norm": 9.988201141357422, "learning_rate": 5.88515194117011e-06, "loss": 4.0311, "step": 2721 }, { "epoch": 0.04, "grad_norm": 9.702186584472656, "learning_rate": 5.8873148048015586e-06, "loss": 3.603, "step": 2722 }, { "epoch": 0.04, "grad_norm": 9.885795593261719, "learning_rate": 5.889477668433006e-06, "loss": 3.0381, "step": 2723 }, { "epoch": 0.04, "grad_norm": 10.045061111450195, "learning_rate": 5.891640532064455e-06, "loss": 2.9146, "step": 2724 }, { "epoch": 0.04, "grad_norm": 9.976326942443848, "learning_rate": 5.893803395695902e-06, "loss": 3.655, "step": 2725 }, { "epoch": 0.04, "grad_norm": 10.664836883544922, "learning_rate": 5.89596625932735e-06, "loss": 3.1435, "step": 2726 }, { "epoch": 0.04, "grad_norm": 9.096797943115234, "learning_rate": 5.898129122958798e-06, "loss": 3.7417, "step": 2727 }, { "epoch": 0.04, "grad_norm": 9.588946342468262, "learning_rate": 5.900291986590246e-06, "loss": 3.3723, "step": 2728 }, { "epoch": 0.04, "grad_norm": 9.11845874786377, "learning_rate": 5.902454850221694e-06, "loss": 2.8977, "step": 2729 }, { "epoch": 0.04, "grad_norm": 10.544939041137695, "learning_rate": 5.904617713853142e-06, "loss": 3.5231, "step": 2730 }, { "epoch": 0.04, "grad_norm": 8.944092750549316, "learning_rate": 5.90678057748459e-06, "loss": 3.4407, "step": 2731 }, { "epoch": 0.04, "grad_norm": 10.268052101135254, "learning_rate": 5.908943441116038e-06, "loss": 3.3406, "step": 2732 }, { "epoch": 0.04, "grad_norm": 11.690309524536133, "learning_rate": 5.911106304747486e-06, "loss": 3.0868, "step": 2733 }, { "epoch": 0.04, "grad_norm": 10.310033798217773, "learning_rate": 5.913269168378934e-06, "loss": 3.3754, "step": 2734 }, { "epoch": 0.04, "grad_norm": 8.532576560974121, "learning_rate": 5.915432032010382e-06, "loss": 3.3048, "step": 2735 }, { "epoch": 0.04, "grad_norm": 9.390900611877441, "learning_rate": 5.9175948956418304e-06, "loss": 3.3247, "step": 2736 }, { "epoch": 0.04, "grad_norm": 10.702860832214355, "learning_rate": 5.919757759273278e-06, "loss": 3.4599, "step": 2737 }, { "epoch": 0.04, "grad_norm": 9.567963600158691, "learning_rate": 5.9219206229047265e-06, "loss": 2.9271, "step": 2738 }, { "epoch": 0.04, "grad_norm": 9.707140922546387, "learning_rate": 5.924083486536174e-06, "loss": 3.1469, "step": 2739 }, { "epoch": 0.04, "grad_norm": 9.734914779663086, "learning_rate": 5.926246350167623e-06, "loss": 3.7731, "step": 2740 }, { "epoch": 0.04, "grad_norm": 9.952974319458008, "learning_rate": 5.92840921379907e-06, "loss": 2.6892, "step": 2741 }, { "epoch": 0.04, "grad_norm": 9.57526969909668, "learning_rate": 5.930572077430519e-06, "loss": 3.9568, "step": 2742 }, { "epoch": 0.04, "grad_norm": 10.781407356262207, "learning_rate": 5.932734941061966e-06, "loss": 2.9438, "step": 2743 }, { "epoch": 0.04, "grad_norm": 9.637654304504395, "learning_rate": 5.934897804693415e-06, "loss": 2.5816, "step": 2744 }, { "epoch": 0.04, "grad_norm": 9.56622314453125, "learning_rate": 5.937060668324862e-06, "loss": 3.0219, "step": 2745 }, { "epoch": 0.04, "grad_norm": 10.518624305725098, "learning_rate": 5.939223531956311e-06, "loss": 2.9792, "step": 2746 }, { "epoch": 0.04, "grad_norm": 10.392083168029785, "learning_rate": 5.941386395587758e-06, "loss": 3.8907, "step": 2747 }, { "epoch": 0.04, "grad_norm": 10.449841499328613, "learning_rate": 5.943549259219207e-06, "loss": 2.7927, "step": 2748 }, { "epoch": 0.04, "grad_norm": 10.098625183105469, "learning_rate": 5.945712122850654e-06, "loss": 3.0329, "step": 2749 }, { "epoch": 0.04, "grad_norm": 10.270493507385254, "learning_rate": 5.947874986482103e-06, "loss": 3.9481, "step": 2750 }, { "epoch": 0.04, "grad_norm": 11.704026222229004, "learning_rate": 5.95003785011355e-06, "loss": 3.3511, "step": 2751 }, { "epoch": 0.04, "grad_norm": 11.302599906921387, "learning_rate": 5.952200713744999e-06, "loss": 3.3732, "step": 2752 }, { "epoch": 0.04, "grad_norm": 9.379715919494629, "learning_rate": 5.954363577376447e-06, "loss": 3.154, "step": 2753 }, { "epoch": 0.04, "grad_norm": 9.729005813598633, "learning_rate": 5.9565264410078945e-06, "loss": 3.668, "step": 2754 }, { "epoch": 0.04, "grad_norm": 9.0372953414917, "learning_rate": 5.958689304639343e-06, "loss": 3.6893, "step": 2755 }, { "epoch": 0.04, "grad_norm": 8.158496856689453, "learning_rate": 5.960852168270791e-06, "loss": 3.6182, "step": 2756 }, { "epoch": 0.04, "grad_norm": 11.48073673248291, "learning_rate": 5.9630150319022395e-06, "loss": 3.6158, "step": 2757 }, { "epoch": 0.04, "grad_norm": 8.672418594360352, "learning_rate": 5.965177895533687e-06, "loss": 3.175, "step": 2758 }, { "epoch": 0.04, "grad_norm": 9.61459732055664, "learning_rate": 5.967340759165136e-06, "loss": 4.2663, "step": 2759 }, { "epoch": 0.04, "grad_norm": 8.7864408493042, "learning_rate": 5.969503622796583e-06, "loss": 3.8841, "step": 2760 }, { "epoch": 0.04, "grad_norm": 9.418703079223633, "learning_rate": 5.971666486428032e-06, "loss": 3.1146, "step": 2761 }, { "epoch": 0.04, "grad_norm": 9.42919635772705, "learning_rate": 5.973829350059479e-06, "loss": 4.0278, "step": 2762 }, { "epoch": 0.04, "grad_norm": 8.952266693115234, "learning_rate": 5.975992213690928e-06, "loss": 3.1138, "step": 2763 }, { "epoch": 0.04, "grad_norm": 10.190399169921875, "learning_rate": 5.978155077322375e-06, "loss": 3.7268, "step": 2764 }, { "epoch": 0.04, "grad_norm": 9.182475090026855, "learning_rate": 5.980317940953824e-06, "loss": 3.4594, "step": 2765 }, { "epoch": 0.04, "grad_norm": 9.50394058227539, "learning_rate": 5.982480804585271e-06, "loss": 3.4548, "step": 2766 }, { "epoch": 0.04, "grad_norm": 9.646448135375977, "learning_rate": 5.98464366821672e-06, "loss": 3.3115, "step": 2767 }, { "epoch": 0.04, "grad_norm": 10.837217330932617, "learning_rate": 5.986806531848167e-06, "loss": 3.029, "step": 2768 }, { "epoch": 0.04, "grad_norm": 9.460373878479004, "learning_rate": 5.988969395479616e-06, "loss": 2.9067, "step": 2769 }, { "epoch": 0.04, "grad_norm": 10.297805786132812, "learning_rate": 5.991132259111063e-06, "loss": 3.7934, "step": 2770 }, { "epoch": 0.04, "grad_norm": 9.428500175476074, "learning_rate": 5.993295122742512e-06, "loss": 2.978, "step": 2771 }, { "epoch": 0.04, "grad_norm": 8.904409408569336, "learning_rate": 5.995457986373959e-06, "loss": 3.7985, "step": 2772 }, { "epoch": 0.04, "grad_norm": 12.034622192382812, "learning_rate": 5.9976208500054075e-06, "loss": 3.6498, "step": 2773 }, { "epoch": 0.04, "grad_norm": 11.356657981872559, "learning_rate": 5.9997837136368555e-06, "loss": 3.6797, "step": 2774 }, { "epoch": 0.04, "grad_norm": 9.385536193847656, "learning_rate": 6.0019465772683036e-06, "loss": 3.5985, "step": 2775 }, { "epoch": 0.04, "grad_norm": 9.42684268951416, "learning_rate": 6.004109440899752e-06, "loss": 3.1624, "step": 2776 }, { "epoch": 0.04, "grad_norm": 10.761946678161621, "learning_rate": 6.0062723045312e-06, "loss": 3.3988, "step": 2777 }, { "epoch": 0.04, "grad_norm": 10.132442474365234, "learning_rate": 6.008435168162648e-06, "loss": 3.5201, "step": 2778 }, { "epoch": 0.04, "grad_norm": 11.879279136657715, "learning_rate": 6.010598031794096e-06, "loss": 3.6707, "step": 2779 }, { "epoch": 0.04, "grad_norm": 10.516621589660645, "learning_rate": 6.012760895425544e-06, "loss": 3.1468, "step": 2780 }, { "epoch": 0.04, "grad_norm": 9.274333953857422, "learning_rate": 6.014923759056992e-06, "loss": 3.346, "step": 2781 }, { "epoch": 0.04, "grad_norm": 10.018783569335938, "learning_rate": 6.017086622688439e-06, "loss": 3.2684, "step": 2782 }, { "epoch": 0.04, "grad_norm": 9.636360168457031, "learning_rate": 6.019249486319888e-06, "loss": 3.1686, "step": 2783 }, { "epoch": 0.04, "grad_norm": 8.951691627502441, "learning_rate": 6.021412349951337e-06, "loss": 2.9416, "step": 2784 }, { "epoch": 0.04, "grad_norm": 9.723764419555664, "learning_rate": 6.023575213582784e-06, "loss": 3.6652, "step": 2785 }, { "epoch": 0.04, "grad_norm": 10.43543529510498, "learning_rate": 6.025738077214233e-06, "loss": 3.5088, "step": 2786 }, { "epoch": 0.04, "grad_norm": 11.114724159240723, "learning_rate": 6.02790094084568e-06, "loss": 2.9698, "step": 2787 }, { "epoch": 0.04, "grad_norm": 9.377033233642578, "learning_rate": 6.030063804477129e-06, "loss": 3.3602, "step": 2788 }, { "epoch": 0.04, "grad_norm": 10.183764457702637, "learning_rate": 6.032226668108576e-06, "loss": 3.6176, "step": 2789 }, { "epoch": 0.04, "grad_norm": 10.816474914550781, "learning_rate": 6.034389531740024e-06, "loss": 3.0767, "step": 2790 }, { "epoch": 0.04, "grad_norm": 10.509666442871094, "learning_rate": 6.036552395371472e-06, "loss": 3.7471, "step": 2791 }, { "epoch": 0.04, "grad_norm": 9.491730690002441, "learning_rate": 6.0387152590029204e-06, "loss": 3.4655, "step": 2792 }, { "epoch": 0.04, "grad_norm": 9.954103469848633, "learning_rate": 6.0408781226343685e-06, "loss": 3.622, "step": 2793 }, { "epoch": 0.04, "grad_norm": 8.915566444396973, "learning_rate": 6.0430409862658165e-06, "loss": 3.0233, "step": 2794 }, { "epoch": 0.04, "grad_norm": 10.097064018249512, "learning_rate": 6.045203849897265e-06, "loss": 3.2253, "step": 2795 }, { "epoch": 0.04, "grad_norm": 10.096190452575684, "learning_rate": 6.047366713528713e-06, "loss": 3.3409, "step": 2796 }, { "epoch": 0.04, "grad_norm": 9.10123348236084, "learning_rate": 6.049529577160161e-06, "loss": 2.5582, "step": 2797 }, { "epoch": 0.04, "grad_norm": 8.96074104309082, "learning_rate": 6.051692440791609e-06, "loss": 3.1077, "step": 2798 }, { "epoch": 0.04, "grad_norm": 8.81175708770752, "learning_rate": 6.053855304423056e-06, "loss": 2.9121, "step": 2799 }, { "epoch": 0.04, "grad_norm": 9.628657341003418, "learning_rate": 6.056018168054505e-06, "loss": 2.9198, "step": 2800 }, { "epoch": 0.04, "grad_norm": 11.532014846801758, "learning_rate": 6.058181031685952e-06, "loss": 3.347, "step": 2801 }, { "epoch": 0.04, "grad_norm": 9.152416229248047, "learning_rate": 6.060343895317401e-06, "loss": 3.6003, "step": 2802 }, { "epoch": 0.04, "grad_norm": 9.04103946685791, "learning_rate": 6.062506758948848e-06, "loss": 2.8033, "step": 2803 }, { "epoch": 0.04, "grad_norm": 9.006495475769043, "learning_rate": 6.064669622580297e-06, "loss": 3.5917, "step": 2804 }, { "epoch": 0.04, "grad_norm": 8.8681640625, "learning_rate": 6.066832486211744e-06, "loss": 3.2735, "step": 2805 }, { "epoch": 0.04, "grad_norm": 8.102254867553711, "learning_rate": 6.068995349843193e-06, "loss": 3.6482, "step": 2806 }, { "epoch": 0.04, "grad_norm": 11.45749568939209, "learning_rate": 6.07115821347464e-06, "loss": 3.2891, "step": 2807 }, { "epoch": 0.04, "grad_norm": 9.236664772033691, "learning_rate": 6.073321077106089e-06, "loss": 3.3651, "step": 2808 }, { "epoch": 0.04, "grad_norm": 10.27970027923584, "learning_rate": 6.0754839407375364e-06, "loss": 3.1958, "step": 2809 }, { "epoch": 0.04, "grad_norm": 9.021515846252441, "learning_rate": 6.077646804368985e-06, "loss": 3.3168, "step": 2810 }, { "epoch": 0.04, "grad_norm": 8.357951164245605, "learning_rate": 6.0798096680004325e-06, "loss": 2.9067, "step": 2811 }, { "epoch": 0.04, "grad_norm": 9.216217041015625, "learning_rate": 6.0819725316318814e-06, "loss": 3.4605, "step": 2812 }, { "epoch": 0.04, "grad_norm": 11.253997802734375, "learning_rate": 6.084135395263329e-06, "loss": 3.4897, "step": 2813 }, { "epoch": 0.04, "grad_norm": 10.943763732910156, "learning_rate": 6.0862982588947775e-06, "loss": 3.2526, "step": 2814 }, { "epoch": 0.04, "grad_norm": 9.85381031036377, "learning_rate": 6.088461122526225e-06, "loss": 3.2599, "step": 2815 }, { "epoch": 0.04, "grad_norm": 9.089924812316895, "learning_rate": 6.090623986157674e-06, "loss": 3.5741, "step": 2816 }, { "epoch": 0.04, "grad_norm": 9.154027938842773, "learning_rate": 6.092786849789122e-06, "loss": 3.0185, "step": 2817 }, { "epoch": 0.04, "grad_norm": 8.970337867736816, "learning_rate": 6.094949713420569e-06, "loss": 3.4511, "step": 2818 }, { "epoch": 0.04, "grad_norm": 9.648444175720215, "learning_rate": 6.097112577052018e-06, "loss": 3.5177, "step": 2819 }, { "epoch": 0.04, "grad_norm": 9.271373748779297, "learning_rate": 6.099275440683465e-06, "loss": 3.2656, "step": 2820 }, { "epoch": 0.04, "grad_norm": 9.647078514099121, "learning_rate": 6.101438304314914e-06, "loss": 2.8872, "step": 2821 }, { "epoch": 0.04, "grad_norm": 10.950169563293457, "learning_rate": 6.103601167946361e-06, "loss": 3.7661, "step": 2822 }, { "epoch": 0.04, "grad_norm": 10.741518020629883, "learning_rate": 6.10576403157781e-06, "loss": 3.0001, "step": 2823 }, { "epoch": 0.04, "grad_norm": 9.535867691040039, "learning_rate": 6.107926895209257e-06, "loss": 3.1629, "step": 2824 }, { "epoch": 0.04, "grad_norm": 9.757997512817383, "learning_rate": 6.110089758840706e-06, "loss": 3.4174, "step": 2825 }, { "epoch": 0.04, "grad_norm": 9.799342155456543, "learning_rate": 6.112252622472153e-06, "loss": 2.8526, "step": 2826 }, { "epoch": 0.04, "grad_norm": 10.324467658996582, "learning_rate": 6.114415486103602e-06, "loss": 3.2623, "step": 2827 }, { "epoch": 0.04, "grad_norm": 9.569561958312988, "learning_rate": 6.116578349735049e-06, "loss": 3.5131, "step": 2828 }, { "epoch": 0.04, "grad_norm": 9.1421480178833, "learning_rate": 6.118741213366498e-06, "loss": 3.3284, "step": 2829 }, { "epoch": 0.04, "grad_norm": 9.179662704467773, "learning_rate": 6.1209040769979455e-06, "loss": 3.523, "step": 2830 }, { "epoch": 0.04, "grad_norm": 9.384316444396973, "learning_rate": 6.123066940629394e-06, "loss": 3.0895, "step": 2831 }, { "epoch": 0.04, "grad_norm": 11.747407913208008, "learning_rate": 6.125229804260842e-06, "loss": 3.1139, "step": 2832 }, { "epoch": 0.04, "grad_norm": 10.450653076171875, "learning_rate": 6.1273926678922905e-06, "loss": 3.5093, "step": 2833 }, { "epoch": 0.04, "grad_norm": 8.966740608215332, "learning_rate": 6.129555531523738e-06, "loss": 3.0427, "step": 2834 }, { "epoch": 0.04, "grad_norm": 10.098129272460938, "learning_rate": 6.131718395155186e-06, "loss": 3.6123, "step": 2835 }, { "epoch": 0.04, "grad_norm": 8.060074806213379, "learning_rate": 6.133881258786634e-06, "loss": 2.8386, "step": 2836 }, { "epoch": 0.04, "grad_norm": 10.800073623657227, "learning_rate": 6.136044122418082e-06, "loss": 3.312, "step": 2837 }, { "epoch": 0.04, "grad_norm": 8.604036331176758, "learning_rate": 6.13820698604953e-06, "loss": 2.8246, "step": 2838 }, { "epoch": 0.04, "grad_norm": 9.584365844726562, "learning_rate": 6.140369849680978e-06, "loss": 2.7103, "step": 2839 }, { "epoch": 0.04, "grad_norm": 9.516463279724121, "learning_rate": 6.142532713312426e-06, "loss": 2.8968, "step": 2840 }, { "epoch": 0.04, "grad_norm": 8.426006317138672, "learning_rate": 6.144695576943874e-06, "loss": 3.3766, "step": 2841 }, { "epoch": 0.04, "grad_norm": 9.897414207458496, "learning_rate": 6.146858440575322e-06, "loss": 3.5077, "step": 2842 }, { "epoch": 0.04, "grad_norm": 9.206725120544434, "learning_rate": 6.14902130420677e-06, "loss": 2.9999, "step": 2843 }, { "epoch": 0.04, "grad_norm": 9.095744132995605, "learning_rate": 6.151184167838218e-06, "loss": 3.0006, "step": 2844 }, { "epoch": 0.04, "grad_norm": 9.7840576171875, "learning_rate": 6.153347031469666e-06, "loss": 3.2156, "step": 2845 }, { "epoch": 0.04, "grad_norm": 10.285504341125488, "learning_rate": 6.1555098951011135e-06, "loss": 3.9882, "step": 2846 }, { "epoch": 0.04, "grad_norm": 8.96451187133789, "learning_rate": 6.157672758732562e-06, "loss": 3.1036, "step": 2847 }, { "epoch": 0.04, "grad_norm": 9.532123565673828, "learning_rate": 6.159835622364011e-06, "loss": 3.1723, "step": 2848 }, { "epoch": 0.04, "grad_norm": 8.434953689575195, "learning_rate": 6.1619984859954585e-06, "loss": 3.772, "step": 2849 }, { "epoch": 0.04, "grad_norm": 9.126641273498535, "learning_rate": 6.164161349626907e-06, "loss": 3.2575, "step": 2850 }, { "epoch": 0.04, "grad_norm": 8.539485931396484, "learning_rate": 6.166324213258355e-06, "loss": 3.2621, "step": 2851 }, { "epoch": 0.04, "grad_norm": 9.446800231933594, "learning_rate": 6.1684870768898035e-06, "loss": 2.9416, "step": 2852 }, { "epoch": 0.04, "grad_norm": 8.825867652893066, "learning_rate": 6.170649940521251e-06, "loss": 3.5572, "step": 2853 }, { "epoch": 0.04, "grad_norm": 10.195231437683105, "learning_rate": 6.172812804152699e-06, "loss": 3.2105, "step": 2854 }, { "epoch": 0.04, "grad_norm": 8.924317359924316, "learning_rate": 6.174975667784147e-06, "loss": 3.779, "step": 2855 }, { "epoch": 0.04, "grad_norm": 11.238077163696289, "learning_rate": 6.177138531415595e-06, "loss": 3.1027, "step": 2856 }, { "epoch": 0.04, "grad_norm": 10.216484069824219, "learning_rate": 6.179301395047043e-06, "loss": 2.7392, "step": 2857 }, { "epoch": 0.04, "grad_norm": 8.720327377319336, "learning_rate": 6.181464258678491e-06, "loss": 3.4082, "step": 2858 }, { "epoch": 0.04, "grad_norm": 9.050461769104004, "learning_rate": 6.183627122309939e-06, "loss": 3.4202, "step": 2859 }, { "epoch": 0.04, "grad_norm": 9.658427238464355, "learning_rate": 6.185789985941387e-06, "loss": 3.5784, "step": 2860 }, { "epoch": 0.04, "grad_norm": 9.278902053833008, "learning_rate": 6.187952849572835e-06, "loss": 3.6554, "step": 2861 }, { "epoch": 0.04, "grad_norm": 8.795842170715332, "learning_rate": 6.190115713204283e-06, "loss": 3.4947, "step": 2862 }, { "epoch": 0.04, "grad_norm": 9.898347854614258, "learning_rate": 6.19227857683573e-06, "loss": 2.8, "step": 2863 }, { "epoch": 0.04, "grad_norm": 8.817440032958984, "learning_rate": 6.194441440467179e-06, "loss": 2.6789, "step": 2864 }, { "epoch": 0.04, "grad_norm": 8.56773567199707, "learning_rate": 6.1966043040986264e-06, "loss": 2.8282, "step": 2865 }, { "epoch": 0.04, "grad_norm": 9.384488105773926, "learning_rate": 6.198767167730075e-06, "loss": 3.5117, "step": 2866 }, { "epoch": 0.04, "grad_norm": 10.572821617126465, "learning_rate": 6.2009300313615225e-06, "loss": 3.3649, "step": 2867 }, { "epoch": 0.04, "grad_norm": 9.388975143432617, "learning_rate": 6.2030928949929714e-06, "loss": 3.3324, "step": 2868 }, { "epoch": 0.04, "grad_norm": 10.233343124389648, "learning_rate": 6.205255758624419e-06, "loss": 3.5689, "step": 2869 }, { "epoch": 0.04, "grad_norm": 10.182512283325195, "learning_rate": 6.2074186222558675e-06, "loss": 3.9983, "step": 2870 }, { "epoch": 0.04, "grad_norm": 9.782430648803711, "learning_rate": 6.209581485887315e-06, "loss": 2.5133, "step": 2871 }, { "epoch": 0.04, "grad_norm": 9.469603538513184, "learning_rate": 6.211744349518764e-06, "loss": 3.1157, "step": 2872 }, { "epoch": 0.04, "grad_norm": 9.909085273742676, "learning_rate": 6.213907213150211e-06, "loss": 3.7442, "step": 2873 }, { "epoch": 0.04, "grad_norm": 9.525046348571777, "learning_rate": 6.21607007678166e-06, "loss": 3.7532, "step": 2874 }, { "epoch": 0.04, "grad_norm": 9.552806854248047, "learning_rate": 6.218232940413107e-06, "loss": 3.1677, "step": 2875 }, { "epoch": 0.04, "grad_norm": 11.819523811340332, "learning_rate": 6.220395804044556e-06, "loss": 2.5317, "step": 2876 }, { "epoch": 0.04, "grad_norm": 9.628005027770996, "learning_rate": 6.222558667676003e-06, "loss": 3.4571, "step": 2877 }, { "epoch": 0.04, "grad_norm": 9.538675308227539, "learning_rate": 6.224721531307452e-06, "loss": 3.2408, "step": 2878 }, { "epoch": 0.04, "grad_norm": 9.760246276855469, "learning_rate": 6.226884394938899e-06, "loss": 3.0366, "step": 2879 }, { "epoch": 0.04, "grad_norm": 9.330865859985352, "learning_rate": 6.229047258570348e-06, "loss": 2.9775, "step": 2880 }, { "epoch": 0.04, "grad_norm": 9.545700073242188, "learning_rate": 6.231210122201796e-06, "loss": 3.08, "step": 2881 }, { "epoch": 0.04, "grad_norm": 9.602295875549316, "learning_rate": 6.233372985833243e-06, "loss": 3.4027, "step": 2882 }, { "epoch": 0.04, "grad_norm": 8.94329833984375, "learning_rate": 6.235535849464692e-06, "loss": 2.8253, "step": 2883 }, { "epoch": 0.04, "grad_norm": 9.520071029663086, "learning_rate": 6.237698713096139e-06, "loss": 3.1098, "step": 2884 }, { "epoch": 0.04, "grad_norm": 9.105676651000977, "learning_rate": 6.239861576727588e-06, "loss": 3.1315, "step": 2885 }, { "epoch": 0.04, "grad_norm": 8.873122215270996, "learning_rate": 6.2420244403590355e-06, "loss": 3.3444, "step": 2886 }, { "epoch": 0.04, "grad_norm": 9.100238800048828, "learning_rate": 6.244187303990484e-06, "loss": 3.8042, "step": 2887 }, { "epoch": 0.04, "grad_norm": 10.354300498962402, "learning_rate": 6.246350167621932e-06, "loss": 3.3518, "step": 2888 }, { "epoch": 0.04, "grad_norm": 10.738946914672852, "learning_rate": 6.2485130312533805e-06, "loss": 2.3317, "step": 2889 }, { "epoch": 0.04, "grad_norm": 9.929003715515137, "learning_rate": 6.250675894884828e-06, "loss": 3.0866, "step": 2890 }, { "epoch": 0.04, "grad_norm": 10.281163215637207, "learning_rate": 6.252838758516277e-06, "loss": 3.0611, "step": 2891 }, { "epoch": 0.04, "grad_norm": 10.245457649230957, "learning_rate": 6.255001622147724e-06, "loss": 3.1864, "step": 2892 }, { "epoch": 0.04, "grad_norm": 9.252138137817383, "learning_rate": 6.257164485779173e-06, "loss": 3.7261, "step": 2893 }, { "epoch": 0.04, "grad_norm": 8.33864688873291, "learning_rate": 6.25932734941062e-06, "loss": 2.9392, "step": 2894 }, { "epoch": 0.04, "grad_norm": 9.571243286132812, "learning_rate": 6.261490213042069e-06, "loss": 3.3076, "step": 2895 }, { "epoch": 0.04, "grad_norm": 9.159811973571777, "learning_rate": 6.263653076673516e-06, "loss": 3.7119, "step": 2896 }, { "epoch": 0.04, "grad_norm": 10.280311584472656, "learning_rate": 6.265815940304965e-06, "loss": 3.7432, "step": 2897 }, { "epoch": 0.04, "grad_norm": 9.446778297424316, "learning_rate": 6.267978803936412e-06, "loss": 3.5577, "step": 2898 }, { "epoch": 0.04, "grad_norm": 9.926092147827148, "learning_rate": 6.27014166756786e-06, "loss": 3.5648, "step": 2899 }, { "epoch": 0.04, "grad_norm": 9.49438190460205, "learning_rate": 6.272304531199308e-06, "loss": 3.087, "step": 2900 }, { "epoch": 0.04, "grad_norm": 10.05865478515625, "learning_rate": 6.274467394830756e-06, "loss": 2.9972, "step": 2901 }, { "epoch": 0.04, "grad_norm": 10.150750160217285, "learning_rate": 6.276630258462204e-06, "loss": 3.2348, "step": 2902 }, { "epoch": 0.04, "grad_norm": 9.556020736694336, "learning_rate": 6.278793122093652e-06, "loss": 2.8473, "step": 2903 }, { "epoch": 0.04, "grad_norm": 10.402908325195312, "learning_rate": 6.2809559857251e-06, "loss": 3.5802, "step": 2904 }, { "epoch": 0.04, "grad_norm": 9.565571784973145, "learning_rate": 6.2831188493565485e-06, "loss": 3.3424, "step": 2905 }, { "epoch": 0.04, "grad_norm": 9.46043586730957, "learning_rate": 6.2852817129879965e-06, "loss": 3.4302, "step": 2906 }, { "epoch": 0.04, "grad_norm": 10.161023139953613, "learning_rate": 6.2874445766194446e-06, "loss": 2.9945, "step": 2907 }, { "epoch": 0.04, "grad_norm": 8.68289852142334, "learning_rate": 6.289607440250893e-06, "loss": 3.4803, "step": 2908 }, { "epoch": 0.04, "grad_norm": 9.747011184692383, "learning_rate": 6.291770303882341e-06, "loss": 3.4642, "step": 2909 }, { "epoch": 0.04, "grad_norm": 10.275514602661133, "learning_rate": 6.293933167513788e-06, "loss": 2.9973, "step": 2910 }, { "epoch": 0.04, "grad_norm": 9.247647285461426, "learning_rate": 6.296096031145237e-06, "loss": 2.8825, "step": 2911 }, { "epoch": 0.04, "grad_norm": 8.818078994750977, "learning_rate": 6.298258894776686e-06, "loss": 3.161, "step": 2912 }, { "epoch": 0.04, "grad_norm": 9.432622909545898, "learning_rate": 6.300421758408133e-06, "loss": 2.7594, "step": 2913 }, { "epoch": 0.04, "grad_norm": 8.72828483581543, "learning_rate": 6.302584622039582e-06, "loss": 2.5648, "step": 2914 }, { "epoch": 0.04, "grad_norm": 9.449161529541016, "learning_rate": 6.304747485671029e-06, "loss": 2.9987, "step": 2915 }, { "epoch": 0.04, "grad_norm": 8.401135444641113, "learning_rate": 6.306910349302478e-06, "loss": 3.1755, "step": 2916 }, { "epoch": 0.04, "grad_norm": 8.943692207336426, "learning_rate": 6.309073212933925e-06, "loss": 3.2966, "step": 2917 }, { "epoch": 0.04, "grad_norm": 11.447436332702637, "learning_rate": 6.311236076565373e-06, "loss": 3.0223, "step": 2918 }, { "epoch": 0.04, "grad_norm": 11.98641300201416, "learning_rate": 6.313398940196821e-06, "loss": 3.6121, "step": 2919 }, { "epoch": 0.04, "grad_norm": 8.652694702148438, "learning_rate": 6.315561803828269e-06, "loss": 2.9499, "step": 2920 }, { "epoch": 0.04, "grad_norm": 8.773736953735352, "learning_rate": 6.317724667459717e-06, "loss": 2.6444, "step": 2921 }, { "epoch": 0.04, "grad_norm": 10.482061386108398, "learning_rate": 6.319887531091165e-06, "loss": 3.0336, "step": 2922 }, { "epoch": 0.04, "grad_norm": 10.066696166992188, "learning_rate": 6.322050394722613e-06, "loss": 3.3725, "step": 2923 }, { "epoch": 0.04, "grad_norm": 9.821565628051758, "learning_rate": 6.3242132583540614e-06, "loss": 3.0394, "step": 2924 }, { "epoch": 0.04, "grad_norm": 9.339598655700684, "learning_rate": 6.3263761219855095e-06, "loss": 2.8924, "step": 2925 }, { "epoch": 0.04, "grad_norm": 9.832783699035645, "learning_rate": 6.3285389856169575e-06, "loss": 3.4227, "step": 2926 }, { "epoch": 0.04, "grad_norm": 9.811854362487793, "learning_rate": 6.330701849248405e-06, "loss": 2.9803, "step": 2927 }, { "epoch": 0.04, "grad_norm": 9.591200828552246, "learning_rate": 6.332864712879854e-06, "loss": 3.8482, "step": 2928 }, { "epoch": 0.04, "grad_norm": 9.392535209655762, "learning_rate": 6.335027576511301e-06, "loss": 3.5703, "step": 2929 }, { "epoch": 0.04, "grad_norm": 9.613395690917969, "learning_rate": 6.33719044014275e-06, "loss": 2.7325, "step": 2930 }, { "epoch": 0.04, "grad_norm": 11.309535026550293, "learning_rate": 6.339353303774197e-06, "loss": 3.1023, "step": 2931 }, { "epoch": 0.04, "grad_norm": 9.136109352111816, "learning_rate": 6.341516167405646e-06, "loss": 3.3048, "step": 2932 }, { "epoch": 0.04, "grad_norm": 9.96105670928955, "learning_rate": 6.343679031037093e-06, "loss": 3.7684, "step": 2933 }, { "epoch": 0.04, "grad_norm": 10.800346374511719, "learning_rate": 6.345841894668542e-06, "loss": 2.4416, "step": 2934 }, { "epoch": 0.04, "grad_norm": 8.853623390197754, "learning_rate": 6.348004758299989e-06, "loss": 3.1899, "step": 2935 }, { "epoch": 0.04, "grad_norm": 9.724522590637207, "learning_rate": 6.350167621931438e-06, "loss": 3.2826, "step": 2936 }, { "epoch": 0.04, "grad_norm": 8.524524688720703, "learning_rate": 6.352330485562885e-06, "loss": 3.5313, "step": 2937 }, { "epoch": 0.04, "grad_norm": 8.968172073364258, "learning_rate": 6.354493349194334e-06, "loss": 3.4231, "step": 2938 }, { "epoch": 0.04, "grad_norm": 9.02133846282959, "learning_rate": 6.356656212825781e-06, "loss": 2.7841, "step": 2939 }, { "epoch": 0.04, "grad_norm": 8.81792163848877, "learning_rate": 6.35881907645723e-06, "loss": 2.6924, "step": 2940 }, { "epoch": 0.04, "grad_norm": 9.68805980682373, "learning_rate": 6.3609819400886775e-06, "loss": 3.1321, "step": 2941 }, { "epoch": 0.04, "grad_norm": 8.402253150939941, "learning_rate": 6.363144803720126e-06, "loss": 3.0337, "step": 2942 }, { "epoch": 0.04, "grad_norm": 10.632458686828613, "learning_rate": 6.3653076673515736e-06, "loss": 3.0884, "step": 2943 }, { "epoch": 0.04, "grad_norm": 8.57410717010498, "learning_rate": 6.3674705309830225e-06, "loss": 3.3538, "step": 2944 }, { "epoch": 0.04, "grad_norm": 10.333853721618652, "learning_rate": 6.3696333946144705e-06, "loss": 3.4143, "step": 2945 }, { "epoch": 0.04, "grad_norm": 9.04462718963623, "learning_rate": 6.371796258245918e-06, "loss": 3.6084, "step": 2946 }, { "epoch": 0.04, "grad_norm": 8.61690902709961, "learning_rate": 6.373959121877367e-06, "loss": 3.6716, "step": 2947 }, { "epoch": 0.04, "grad_norm": 12.85011100769043, "learning_rate": 6.376121985508814e-06, "loss": 2.8023, "step": 2948 }, { "epoch": 0.04, "grad_norm": 10.631319999694824, "learning_rate": 6.378284849140263e-06, "loss": 2.8002, "step": 2949 }, { "epoch": 0.04, "grad_norm": 9.68669319152832, "learning_rate": 6.38044771277171e-06, "loss": 2.7975, "step": 2950 }, { "epoch": 0.04, "grad_norm": 9.905354499816895, "learning_rate": 6.382610576403159e-06, "loss": 3.4742, "step": 2951 }, { "epoch": 0.04, "grad_norm": 9.161162376403809, "learning_rate": 6.384773440034606e-06, "loss": 3.2821, "step": 2952 }, { "epoch": 0.04, "grad_norm": 9.082674026489258, "learning_rate": 6.386936303666055e-06, "loss": 3.4435, "step": 2953 }, { "epoch": 0.04, "grad_norm": 10.038174629211426, "learning_rate": 6.389099167297502e-06, "loss": 3.1543, "step": 2954 }, { "epoch": 0.04, "grad_norm": 9.478487014770508, "learning_rate": 6.391262030928951e-06, "loss": 3.1047, "step": 2955 }, { "epoch": 0.04, "grad_norm": 8.694952964782715, "learning_rate": 6.393424894560398e-06, "loss": 3.1769, "step": 2956 }, { "epoch": 0.04, "grad_norm": 9.569154739379883, "learning_rate": 6.395587758191847e-06, "loss": 3.296, "step": 2957 }, { "epoch": 0.04, "grad_norm": 9.337955474853516, "learning_rate": 6.397750621823294e-06, "loss": 3.0929, "step": 2958 }, { "epoch": 0.04, "grad_norm": 9.660163879394531, "learning_rate": 6.399913485454743e-06, "loss": 3.5497, "step": 2959 }, { "epoch": 0.04, "grad_norm": 9.23478889465332, "learning_rate": 6.40207634908619e-06, "loss": 2.7929, "step": 2960 }, { "epoch": 0.04, "grad_norm": 10.171573638916016, "learning_rate": 6.404239212717639e-06, "loss": 2.7694, "step": 2961 }, { "epoch": 0.04, "grad_norm": 8.655014991760254, "learning_rate": 6.4064020763490865e-06, "loss": 3.28, "step": 2962 }, { "epoch": 0.04, "grad_norm": 9.251750946044922, "learning_rate": 6.4085649399805346e-06, "loss": 3.3796, "step": 2963 }, { "epoch": 0.04, "grad_norm": 10.305020332336426, "learning_rate": 6.410727803611983e-06, "loss": 3.5199, "step": 2964 }, { "epoch": 0.04, "grad_norm": 9.895330429077148, "learning_rate": 6.412890667243431e-06, "loss": 2.7497, "step": 2965 }, { "epoch": 0.04, "grad_norm": 11.14999771118164, "learning_rate": 6.415053530874879e-06, "loss": 3.3595, "step": 2966 }, { "epoch": 0.04, "grad_norm": 10.39842700958252, "learning_rate": 6.417216394506327e-06, "loss": 3.723, "step": 2967 }, { "epoch": 0.04, "grad_norm": 9.76855182647705, "learning_rate": 6.419379258137775e-06, "loss": 2.3997, "step": 2968 }, { "epoch": 0.04, "grad_norm": 9.839656829833984, "learning_rate": 6.421542121769223e-06, "loss": 3.08, "step": 2969 }, { "epoch": 0.04, "grad_norm": 9.600096702575684, "learning_rate": 6.423704985400671e-06, "loss": 3.5445, "step": 2970 }, { "epoch": 0.04, "grad_norm": 9.193875312805176, "learning_rate": 6.425867849032119e-06, "loss": 3.1058, "step": 2971 }, { "epoch": 0.04, "grad_norm": 9.639307975769043, "learning_rate": 6.428030712663566e-06, "loss": 3.1163, "step": 2972 }, { "epoch": 0.04, "grad_norm": 12.227079391479492, "learning_rate": 6.430193576295015e-06, "loss": 2.9326, "step": 2973 }, { "epoch": 0.04, "grad_norm": 10.38256549835205, "learning_rate": 6.432356439926462e-06, "loss": 3.361, "step": 2974 }, { "epoch": 0.04, "grad_norm": 8.723618507385254, "learning_rate": 6.434519303557911e-06, "loss": 3.3175, "step": 2975 }, { "epoch": 0.04, "grad_norm": 8.571308135986328, "learning_rate": 6.43668216718936e-06, "loss": 2.5798, "step": 2976 }, { "epoch": 0.04, "grad_norm": 10.520018577575684, "learning_rate": 6.438845030820807e-06, "loss": 2.8718, "step": 2977 }, { "epoch": 0.04, "grad_norm": 9.667871475219727, "learning_rate": 6.441007894452256e-06, "loss": 3.4911, "step": 2978 }, { "epoch": 0.04, "grad_norm": 8.432246208190918, "learning_rate": 6.443170758083703e-06, "loss": 2.778, "step": 2979 }, { "epoch": 0.04, "grad_norm": 10.569195747375488, "learning_rate": 6.445333621715152e-06, "loss": 3.4776, "step": 2980 }, { "epoch": 0.04, "grad_norm": 10.658982276916504, "learning_rate": 6.4474964853465995e-06, "loss": 3.1588, "step": 2981 }, { "epoch": 0.04, "grad_norm": 9.304200172424316, "learning_rate": 6.4496593489780475e-06, "loss": 2.9277, "step": 2982 }, { "epoch": 0.04, "grad_norm": 10.093236923217773, "learning_rate": 6.451822212609496e-06, "loss": 2.7435, "step": 2983 }, { "epoch": 0.04, "grad_norm": 9.531379699707031, "learning_rate": 6.453985076240944e-06, "loss": 2.7471, "step": 2984 }, { "epoch": 0.04, "grad_norm": 9.924872398376465, "learning_rate": 6.456147939872392e-06, "loss": 3.4955, "step": 2985 }, { "epoch": 0.04, "grad_norm": 9.102930068969727, "learning_rate": 6.45831080350384e-06, "loss": 3.0882, "step": 2986 }, { "epoch": 0.04, "grad_norm": 10.026315689086914, "learning_rate": 6.460473667135288e-06, "loss": 3.2927, "step": 2987 }, { "epoch": 0.04, "grad_norm": 10.671669960021973, "learning_rate": 6.462636530766736e-06, "loss": 3.1318, "step": 2988 }, { "epoch": 0.04, "grad_norm": 9.040484428405762, "learning_rate": 6.464799394398184e-06, "loss": 3.1495, "step": 2989 }, { "epoch": 0.04, "grad_norm": 9.241764068603516, "learning_rate": 6.466962258029632e-06, "loss": 3.1873, "step": 2990 }, { "epoch": 0.04, "grad_norm": 9.856532096862793, "learning_rate": 6.469125121661079e-06, "loss": 3.3505, "step": 2991 }, { "epoch": 0.04, "grad_norm": 9.32422161102295, "learning_rate": 6.471287985292528e-06, "loss": 3.1648, "step": 2992 }, { "epoch": 0.04, "grad_norm": 9.48603630065918, "learning_rate": 6.473450848923975e-06, "loss": 2.8908, "step": 2993 }, { "epoch": 0.04, "grad_norm": 9.778481483459473, "learning_rate": 6.475613712555424e-06, "loss": 2.8723, "step": 2994 }, { "epoch": 0.04, "grad_norm": 10.406609535217285, "learning_rate": 6.477776576186871e-06, "loss": 3.3201, "step": 2995 }, { "epoch": 0.04, "grad_norm": 10.095396041870117, "learning_rate": 6.47993943981832e-06, "loss": 3.1685, "step": 2996 }, { "epoch": 0.04, "grad_norm": 9.063841819763184, "learning_rate": 6.4821023034497674e-06, "loss": 2.6813, "step": 2997 }, { "epoch": 0.04, "grad_norm": 9.89438247680664, "learning_rate": 6.484265167081216e-06, "loss": 3.3626, "step": 2998 }, { "epoch": 0.04, "grad_norm": 9.735811233520508, "learning_rate": 6.4864280307126635e-06, "loss": 2.9411, "step": 2999 }, { "epoch": 0.04, "grad_norm": 10.371091842651367, "learning_rate": 6.4885908943441124e-06, "loss": 3.7171, "step": 3000 }, { "epoch": 0.04, "grad_norm": 10.2696533203125, "learning_rate": 6.49075375797556e-06, "loss": 3.3614, "step": 3001 }, { "epoch": 0.04, "grad_norm": 10.215907096862793, "learning_rate": 6.4929166216070085e-06, "loss": 3.2032, "step": 3002 }, { "epoch": 0.04, "grad_norm": 9.495893478393555, "learning_rate": 6.495079485238456e-06, "loss": 3.3645, "step": 3003 }, { "epoch": 0.04, "grad_norm": 8.628117561340332, "learning_rate": 6.497242348869905e-06, "loss": 3.8705, "step": 3004 }, { "epoch": 0.04, "grad_norm": 9.982902526855469, "learning_rate": 6.499405212501352e-06, "loss": 3.4015, "step": 3005 }, { "epoch": 0.04, "grad_norm": 9.334571838378906, "learning_rate": 6.501568076132801e-06, "loss": 2.7779, "step": 3006 }, { "epoch": 0.04, "grad_norm": 11.29697322845459, "learning_rate": 6.503730939764248e-06, "loss": 3.4411, "step": 3007 }, { "epoch": 0.04, "grad_norm": 9.172266960144043, "learning_rate": 6.505893803395697e-06, "loss": 2.9736, "step": 3008 }, { "epoch": 0.04, "grad_norm": 10.150574684143066, "learning_rate": 6.508056667027145e-06, "loss": 3.1569, "step": 3009 }, { "epoch": 0.04, "grad_norm": 10.453662872314453, "learning_rate": 6.510219530658592e-06, "loss": 2.9596, "step": 3010 }, { "epoch": 0.04, "grad_norm": 9.831969261169434, "learning_rate": 6.512382394290041e-06, "loss": 3.4822, "step": 3011 }, { "epoch": 0.04, "grad_norm": 11.101577758789062, "learning_rate": 6.514545257921488e-06, "loss": 3.5885, "step": 3012 }, { "epoch": 0.04, "grad_norm": 9.875682830810547, "learning_rate": 6.516708121552937e-06, "loss": 2.7429, "step": 3013 }, { "epoch": 0.04, "grad_norm": 8.587348937988281, "learning_rate": 6.518870985184384e-06, "loss": 3.3872, "step": 3014 }, { "epoch": 0.04, "grad_norm": 9.253704071044922, "learning_rate": 6.521033848815833e-06, "loss": 3.263, "step": 3015 }, { "epoch": 0.04, "grad_norm": 8.618487358093262, "learning_rate": 6.52319671244728e-06, "loss": 3.1105, "step": 3016 }, { "epoch": 0.04, "grad_norm": 10.555450439453125, "learning_rate": 6.525359576078729e-06, "loss": 3.2394, "step": 3017 }, { "epoch": 0.04, "grad_norm": 10.570569038391113, "learning_rate": 6.5275224397101765e-06, "loss": 3.5444, "step": 3018 }, { "epoch": 0.04, "grad_norm": 10.502643585205078, "learning_rate": 6.529685303341625e-06, "loss": 3.0193, "step": 3019 }, { "epoch": 0.04, "grad_norm": 9.69031810760498, "learning_rate": 6.531848166973073e-06, "loss": 3.2825, "step": 3020 }, { "epoch": 0.04, "grad_norm": 8.949307441711426, "learning_rate": 6.5340110306045215e-06, "loss": 3.291, "step": 3021 }, { "epoch": 0.04, "grad_norm": 9.004040718078613, "learning_rate": 6.536173894235969e-06, "loss": 3.308, "step": 3022 }, { "epoch": 0.04, "grad_norm": 10.352140426635742, "learning_rate": 6.538336757867418e-06, "loss": 3.7104, "step": 3023 }, { "epoch": 0.04, "grad_norm": 8.416061401367188, "learning_rate": 6.540499621498865e-06, "loss": 2.864, "step": 3024 }, { "epoch": 0.04, "grad_norm": 8.024951934814453, "learning_rate": 6.542662485130314e-06, "loss": 3.4248, "step": 3025 }, { "epoch": 0.04, "grad_norm": 9.54272747039795, "learning_rate": 6.544825348761761e-06, "loss": 3.242, "step": 3026 }, { "epoch": 0.04, "grad_norm": 10.081037521362305, "learning_rate": 6.546988212393209e-06, "loss": 2.8329, "step": 3027 }, { "epoch": 0.04, "grad_norm": 11.475995063781738, "learning_rate": 6.549151076024657e-06, "loss": 3.1921, "step": 3028 }, { "epoch": 0.04, "grad_norm": 10.37407112121582, "learning_rate": 6.551313939656105e-06, "loss": 3.4114, "step": 3029 }, { "epoch": 0.04, "grad_norm": 10.614075660705566, "learning_rate": 6.553476803287553e-06, "loss": 3.2598, "step": 3030 }, { "epoch": 0.04, "grad_norm": 8.415891647338867, "learning_rate": 6.555639666919001e-06, "loss": 3.4051, "step": 3031 }, { "epoch": 0.04, "grad_norm": 9.230072021484375, "learning_rate": 6.557802530550449e-06, "loss": 3.296, "step": 3032 }, { "epoch": 0.04, "grad_norm": 8.948996543884277, "learning_rate": 6.559965394181897e-06, "loss": 3.556, "step": 3033 }, { "epoch": 0.04, "grad_norm": 9.944190979003906, "learning_rate": 6.562128257813345e-06, "loss": 3.2059, "step": 3034 }, { "epoch": 0.04, "grad_norm": 9.343896865844727, "learning_rate": 6.564291121444793e-06, "loss": 2.8432, "step": 3035 }, { "epoch": 0.04, "grad_norm": 8.672747611999512, "learning_rate": 6.566453985076241e-06, "loss": 2.9522, "step": 3036 }, { "epoch": 0.04, "grad_norm": 8.507999420166016, "learning_rate": 6.5686168487076895e-06, "loss": 3.4214, "step": 3037 }, { "epoch": 0.04, "grad_norm": 10.16149616241455, "learning_rate": 6.570779712339137e-06, "loss": 3.3361, "step": 3038 }, { "epoch": 0.04, "grad_norm": 10.471900939941406, "learning_rate": 6.572942575970586e-06, "loss": 3.2554, "step": 3039 }, { "epoch": 0.04, "grad_norm": 9.237195014953613, "learning_rate": 6.5751054396020345e-06, "loss": 2.935, "step": 3040 }, { "epoch": 0.04, "grad_norm": 9.64228343963623, "learning_rate": 6.577268303233482e-06, "loss": 3.2011, "step": 3041 }, { "epoch": 0.04, "grad_norm": 9.795318603515625, "learning_rate": 6.579431166864931e-06, "loss": 3.4027, "step": 3042 }, { "epoch": 0.04, "grad_norm": 9.582436561584473, "learning_rate": 6.581594030496378e-06, "loss": 2.6202, "step": 3043 }, { "epoch": 0.04, "grad_norm": 9.877812385559082, "learning_rate": 6.583756894127827e-06, "loss": 3.4025, "step": 3044 }, { "epoch": 0.04, "grad_norm": 8.226362228393555, "learning_rate": 6.585919757759274e-06, "loss": 3.1036, "step": 3045 }, { "epoch": 0.04, "grad_norm": 9.921396255493164, "learning_rate": 6.588082621390722e-06, "loss": 3.029, "step": 3046 }, { "epoch": 0.04, "grad_norm": 9.431886672973633, "learning_rate": 6.59024548502217e-06, "loss": 3.1404, "step": 3047 }, { "epoch": 0.04, "grad_norm": 10.49059009552002, "learning_rate": 6.592408348653618e-06, "loss": 2.7428, "step": 3048 }, { "epoch": 0.04, "grad_norm": 9.126662254333496, "learning_rate": 6.594571212285066e-06, "loss": 3.2473, "step": 3049 }, { "epoch": 0.04, "grad_norm": 11.058452606201172, "learning_rate": 6.596734075916514e-06, "loss": 2.9307, "step": 3050 }, { "epoch": 0.04, "grad_norm": 9.182634353637695, "learning_rate": 6.598896939547962e-06, "loss": 3.27, "step": 3051 }, { "epoch": 0.04, "grad_norm": 10.437575340270996, "learning_rate": 6.60105980317941e-06, "loss": 3.3934, "step": 3052 }, { "epoch": 0.04, "grad_norm": 9.391361236572266, "learning_rate": 6.603222666810858e-06, "loss": 3.0674, "step": 3053 }, { "epoch": 0.04, "grad_norm": 8.529389381408691, "learning_rate": 6.605385530442306e-06, "loss": 3.1679, "step": 3054 }, { "epoch": 0.04, "grad_norm": 10.27743911743164, "learning_rate": 6.6075483940737535e-06, "loss": 2.9811, "step": 3055 }, { "epoch": 0.04, "grad_norm": 9.729595184326172, "learning_rate": 6.6097112577052024e-06, "loss": 3.3545, "step": 3056 }, { "epoch": 0.04, "grad_norm": 9.486016273498535, "learning_rate": 6.61187412133665e-06, "loss": 2.7741, "step": 3057 }, { "epoch": 0.04, "grad_norm": 10.50390911102295, "learning_rate": 6.6140369849680985e-06, "loss": 3.5054, "step": 3058 }, { "epoch": 0.04, "grad_norm": 9.335685729980469, "learning_rate": 6.616199848599546e-06, "loss": 3.03, "step": 3059 }, { "epoch": 0.04, "grad_norm": 8.45789909362793, "learning_rate": 6.618362712230995e-06, "loss": 3.2123, "step": 3060 }, { "epoch": 0.04, "grad_norm": 9.287942886352539, "learning_rate": 6.620525575862442e-06, "loss": 2.566, "step": 3061 }, { "epoch": 0.04, "grad_norm": 10.420092582702637, "learning_rate": 6.622688439493891e-06, "loss": 2.8549, "step": 3062 }, { "epoch": 0.04, "grad_norm": 9.073095321655273, "learning_rate": 6.624851303125338e-06, "loss": 2.7203, "step": 3063 }, { "epoch": 0.04, "grad_norm": 10.416447639465332, "learning_rate": 6.627014166756787e-06, "loss": 3.1771, "step": 3064 }, { "epoch": 0.04, "grad_norm": 10.021844863891602, "learning_rate": 6.629177030388234e-06, "loss": 3.9616, "step": 3065 }, { "epoch": 0.04, "grad_norm": 9.462007522583008, "learning_rate": 6.631339894019683e-06, "loss": 3.5568, "step": 3066 }, { "epoch": 0.04, "grad_norm": 9.81446647644043, "learning_rate": 6.63350275765113e-06, "loss": 4.0095, "step": 3067 }, { "epoch": 0.04, "grad_norm": 8.817350387573242, "learning_rate": 6.635665621282579e-06, "loss": 3.3575, "step": 3068 }, { "epoch": 0.04, "grad_norm": 9.772424697875977, "learning_rate": 6.637828484914026e-06, "loss": 2.9745, "step": 3069 }, { "epoch": 0.04, "grad_norm": 9.576699256896973, "learning_rate": 6.639991348545475e-06, "loss": 3.651, "step": 3070 }, { "epoch": 0.04, "grad_norm": 8.704559326171875, "learning_rate": 6.642154212176922e-06, "loss": 3.2305, "step": 3071 }, { "epoch": 0.04, "grad_norm": 9.023696899414062, "learning_rate": 6.64431707580837e-06, "loss": 3.219, "step": 3072 }, { "epoch": 0.04, "grad_norm": 9.59807014465332, "learning_rate": 6.646479939439819e-06, "loss": 3.4113, "step": 3073 }, { "epoch": 0.04, "grad_norm": 9.329471588134766, "learning_rate": 6.6486428030712665e-06, "loss": 2.795, "step": 3074 }, { "epoch": 0.04, "grad_norm": 8.703218460083008, "learning_rate": 6.650805666702715e-06, "loss": 2.8689, "step": 3075 }, { "epoch": 0.04, "grad_norm": 9.817580223083496, "learning_rate": 6.652968530334163e-06, "loss": 3.4405, "step": 3076 }, { "epoch": 0.04, "grad_norm": 9.487953186035156, "learning_rate": 6.6551313939656115e-06, "loss": 3.5292, "step": 3077 }, { "epoch": 0.04, "grad_norm": 9.548261642456055, "learning_rate": 6.657294257597059e-06, "loss": 3.6329, "step": 3078 }, { "epoch": 0.04, "grad_norm": 9.750161170959473, "learning_rate": 6.659457121228508e-06, "loss": 3.4116, "step": 3079 }, { "epoch": 0.04, "grad_norm": 8.626590728759766, "learning_rate": 6.661619984859955e-06, "loss": 3.6388, "step": 3080 }, { "epoch": 0.04, "grad_norm": 8.601703643798828, "learning_rate": 6.663782848491404e-06, "loss": 3.1083, "step": 3081 }, { "epoch": 0.04, "grad_norm": 10.189643859863281, "learning_rate": 6.665945712122851e-06, "loss": 3.3733, "step": 3082 }, { "epoch": 0.04, "grad_norm": 9.400232315063477, "learning_rate": 6.6681085757543e-06, "loss": 2.9442, "step": 3083 }, { "epoch": 0.04, "grad_norm": 9.839839935302734, "learning_rate": 6.670271439385747e-06, "loss": 3.534, "step": 3084 }, { "epoch": 0.04, "grad_norm": 10.153388977050781, "learning_rate": 6.672434303017196e-06, "loss": 3.2086, "step": 3085 }, { "epoch": 0.04, "grad_norm": 8.816866874694824, "learning_rate": 6.674597166648643e-06, "loss": 3.9192, "step": 3086 }, { "epoch": 0.04, "grad_norm": 9.350783348083496, "learning_rate": 6.676760030280092e-06, "loss": 3.2007, "step": 3087 }, { "epoch": 0.04, "grad_norm": 10.221294403076172, "learning_rate": 6.678922893911539e-06, "loss": 3.0182, "step": 3088 }, { "epoch": 0.04, "grad_norm": 9.633790969848633, "learning_rate": 6.681085757542988e-06, "loss": 3.0811, "step": 3089 }, { "epoch": 0.04, "grad_norm": 9.102782249450684, "learning_rate": 6.683248621174435e-06, "loss": 2.9666, "step": 3090 }, { "epoch": 0.04, "grad_norm": 8.31059455871582, "learning_rate": 6.685411484805883e-06, "loss": 2.6106, "step": 3091 }, { "epoch": 0.04, "grad_norm": 10.783230781555176, "learning_rate": 6.687574348437331e-06, "loss": 3.6465, "step": 3092 }, { "epoch": 0.04, "grad_norm": 8.747660636901855, "learning_rate": 6.6897372120687795e-06, "loss": 2.7339, "step": 3093 }, { "epoch": 0.04, "grad_norm": 9.161020278930664, "learning_rate": 6.6919000757002275e-06, "loss": 3.1113, "step": 3094 }, { "epoch": 0.04, "grad_norm": 9.922883033752441, "learning_rate": 6.6940629393316756e-06, "loss": 3.0312, "step": 3095 }, { "epoch": 0.04, "grad_norm": 9.947489738464355, "learning_rate": 6.696225802963124e-06, "loss": 3.672, "step": 3096 }, { "epoch": 0.04, "grad_norm": 9.451847076416016, "learning_rate": 6.698388666594572e-06, "loss": 2.7458, "step": 3097 }, { "epoch": 0.04, "grad_norm": 10.87447452545166, "learning_rate": 6.70055153022602e-06, "loss": 3.8566, "step": 3098 }, { "epoch": 0.04, "grad_norm": 8.926180839538574, "learning_rate": 6.702714393857468e-06, "loss": 3.6088, "step": 3099 }, { "epoch": 0.04, "grad_norm": 10.505539894104004, "learning_rate": 6.704877257488915e-06, "loss": 2.9327, "step": 3100 }, { "epoch": 0.04, "grad_norm": 10.208586692810059, "learning_rate": 6.707040121120364e-06, "loss": 3.1384, "step": 3101 }, { "epoch": 0.04, "grad_norm": 10.538785934448242, "learning_rate": 6.709202984751811e-06, "loss": 3.1125, "step": 3102 }, { "epoch": 0.04, "grad_norm": 11.688004493713379, "learning_rate": 6.71136584838326e-06, "loss": 3.5476, "step": 3103 }, { "epoch": 0.04, "grad_norm": 9.259451866149902, "learning_rate": 6.713528712014709e-06, "loss": 2.8063, "step": 3104 }, { "epoch": 0.04, "grad_norm": 9.039881706237793, "learning_rate": 6.715691575646156e-06, "loss": 2.9969, "step": 3105 }, { "epoch": 0.04, "grad_norm": 9.539548873901367, "learning_rate": 6.717854439277605e-06, "loss": 2.4118, "step": 3106 }, { "epoch": 0.04, "grad_norm": 10.501860618591309, "learning_rate": 6.720017302909052e-06, "loss": 2.664, "step": 3107 }, { "epoch": 0.04, "grad_norm": 9.413154602050781, "learning_rate": 6.7221801665405e-06, "loss": 2.8889, "step": 3108 }, { "epoch": 0.04, "grad_norm": 9.92018985748291, "learning_rate": 6.724343030171948e-06, "loss": 2.883, "step": 3109 }, { "epoch": 0.04, "grad_norm": 10.251944541931152, "learning_rate": 6.726505893803396e-06, "loss": 3.3334, "step": 3110 }, { "epoch": 0.04, "grad_norm": 10.106596946716309, "learning_rate": 6.728668757434844e-06, "loss": 3.492, "step": 3111 }, { "epoch": 0.04, "grad_norm": 10.189643859863281, "learning_rate": 6.7308316210662924e-06, "loss": 3.9521, "step": 3112 }, { "epoch": 0.04, "grad_norm": 8.899967193603516, "learning_rate": 6.7329944846977405e-06, "loss": 3.5202, "step": 3113 }, { "epoch": 0.04, "grad_norm": 9.358647346496582, "learning_rate": 6.7351573483291885e-06, "loss": 3.0432, "step": 3114 }, { "epoch": 0.04, "grad_norm": 10.822772026062012, "learning_rate": 6.737320211960637e-06, "loss": 3.7232, "step": 3115 }, { "epoch": 0.04, "grad_norm": 9.707603454589844, "learning_rate": 6.739483075592085e-06, "loss": 2.7654, "step": 3116 }, { "epoch": 0.04, "grad_norm": 10.356232643127441, "learning_rate": 6.741645939223533e-06, "loss": 3.3281, "step": 3117 }, { "epoch": 0.04, "grad_norm": 10.345932006835938, "learning_rate": 6.743808802854981e-06, "loss": 2.864, "step": 3118 }, { "epoch": 0.04, "grad_norm": 8.95490837097168, "learning_rate": 6.745971666486428e-06, "loss": 3.2854, "step": 3119 }, { "epoch": 0.04, "grad_norm": 9.021267890930176, "learning_rate": 6.748134530117877e-06, "loss": 3.476, "step": 3120 }, { "epoch": 0.04, "grad_norm": 10.763296127319336, "learning_rate": 6.750297393749324e-06, "loss": 3.5596, "step": 3121 }, { "epoch": 0.04, "grad_norm": 10.965174674987793, "learning_rate": 6.752460257380773e-06, "loss": 2.8667, "step": 3122 }, { "epoch": 0.04, "grad_norm": 10.01829719543457, "learning_rate": 6.75462312101222e-06, "loss": 3.2778, "step": 3123 }, { "epoch": 0.04, "grad_norm": 8.807641983032227, "learning_rate": 6.756785984643669e-06, "loss": 3.2502, "step": 3124 }, { "epoch": 0.04, "grad_norm": 9.234121322631836, "learning_rate": 6.758948848275116e-06, "loss": 3.5956, "step": 3125 }, { "epoch": 0.04, "grad_norm": 9.701776504516602, "learning_rate": 6.761111711906565e-06, "loss": 3.59, "step": 3126 }, { "epoch": 0.04, "grad_norm": 10.22590446472168, "learning_rate": 6.763274575538012e-06, "loss": 3.5904, "step": 3127 }, { "epoch": 0.04, "grad_norm": 8.870550155639648, "learning_rate": 6.765437439169461e-06, "loss": 3.3101, "step": 3128 }, { "epoch": 0.04, "grad_norm": 9.450860023498535, "learning_rate": 6.7676003028009085e-06, "loss": 3.5361, "step": 3129 }, { "epoch": 0.04, "grad_norm": 8.686546325683594, "learning_rate": 6.769763166432357e-06, "loss": 3.262, "step": 3130 }, { "epoch": 0.04, "grad_norm": 9.744928359985352, "learning_rate": 6.7719260300638046e-06, "loss": 3.2534, "step": 3131 }, { "epoch": 0.04, "grad_norm": 8.523469924926758, "learning_rate": 6.7740888936952535e-06, "loss": 3.0556, "step": 3132 }, { "epoch": 0.04, "grad_norm": 10.303180694580078, "learning_rate": 6.776251757326701e-06, "loss": 3.3704, "step": 3133 }, { "epoch": 0.04, "grad_norm": 9.6318359375, "learning_rate": 6.7784146209581496e-06, "loss": 3.3395, "step": 3134 }, { "epoch": 0.04, "grad_norm": 9.494503021240234, "learning_rate": 6.780577484589597e-06, "loss": 3.9818, "step": 3135 }, { "epoch": 0.04, "grad_norm": 8.877099990844727, "learning_rate": 6.782740348221045e-06, "loss": 3.4591, "step": 3136 }, { "epoch": 0.04, "grad_norm": 9.298277854919434, "learning_rate": 6.784903211852494e-06, "loss": 3.0385, "step": 3137 }, { "epoch": 0.04, "grad_norm": 10.387367248535156, "learning_rate": 6.787066075483941e-06, "loss": 3.2025, "step": 3138 }, { "epoch": 0.04, "grad_norm": 8.71363639831543, "learning_rate": 6.78922893911539e-06, "loss": 3.269, "step": 3139 }, { "epoch": 0.04, "grad_norm": 8.245146751403809, "learning_rate": 6.791391802746837e-06, "loss": 2.845, "step": 3140 }, { "epoch": 0.04, "grad_norm": 8.937209129333496, "learning_rate": 6.793554666378286e-06, "loss": 3.6229, "step": 3141 }, { "epoch": 0.04, "grad_norm": 11.394831657409668, "learning_rate": 6.795717530009733e-06, "loss": 3.1272, "step": 3142 }, { "epoch": 0.04, "grad_norm": 8.9049654006958, "learning_rate": 6.797880393641182e-06, "loss": 3.351, "step": 3143 }, { "epoch": 0.04, "grad_norm": 9.919986724853516, "learning_rate": 6.800043257272629e-06, "loss": 3.604, "step": 3144 }, { "epoch": 0.04, "grad_norm": 9.173937797546387, "learning_rate": 6.802206120904078e-06, "loss": 3.4075, "step": 3145 }, { "epoch": 0.04, "grad_norm": 9.805663108825684, "learning_rate": 6.804368984535525e-06, "loss": 3.0467, "step": 3146 }, { "epoch": 0.04, "grad_norm": 10.089622497558594, "learning_rate": 6.806531848166974e-06, "loss": 2.6824, "step": 3147 }, { "epoch": 0.04, "grad_norm": 9.448832511901855, "learning_rate": 6.808694711798421e-06, "loss": 2.9972, "step": 3148 }, { "epoch": 0.04, "grad_norm": 10.1032075881958, "learning_rate": 6.81085757542987e-06, "loss": 3.1087, "step": 3149 }, { "epoch": 0.04, "grad_norm": 9.932247161865234, "learning_rate": 6.8130204390613175e-06, "loss": 3.2798, "step": 3150 }, { "epoch": 0.04, "grad_norm": 9.768220901489258, "learning_rate": 6.815183302692766e-06, "loss": 3.3243, "step": 3151 }, { "epoch": 0.04, "grad_norm": 9.859684944152832, "learning_rate": 6.817346166324214e-06, "loss": 2.9327, "step": 3152 }, { "epoch": 0.04, "grad_norm": 10.874445915222168, "learning_rate": 6.8195090299556625e-06, "loss": 3.2884, "step": 3153 }, { "epoch": 0.04, "grad_norm": 9.10660457611084, "learning_rate": 6.82167189358711e-06, "loss": 2.6422, "step": 3154 }, { "epoch": 0.04, "grad_norm": 10.035113334655762, "learning_rate": 6.823834757218558e-06, "loss": 3.0198, "step": 3155 }, { "epoch": 0.04, "grad_norm": 10.567280769348145, "learning_rate": 6.825997620850006e-06, "loss": 3.4543, "step": 3156 }, { "epoch": 0.04, "grad_norm": 11.375029563903809, "learning_rate": 6.828160484481454e-06, "loss": 2.7003, "step": 3157 }, { "epoch": 0.04, "grad_norm": 9.62665843963623, "learning_rate": 6.830323348112902e-06, "loss": 3.6307, "step": 3158 }, { "epoch": 0.04, "grad_norm": 10.44644546508789, "learning_rate": 6.83248621174435e-06, "loss": 3.5486, "step": 3159 }, { "epoch": 0.04, "grad_norm": 9.590155601501465, "learning_rate": 6.834649075375798e-06, "loss": 3.8584, "step": 3160 }, { "epoch": 0.04, "grad_norm": 8.740760803222656, "learning_rate": 6.836811939007246e-06, "loss": 2.8875, "step": 3161 }, { "epoch": 0.04, "grad_norm": 9.557393074035645, "learning_rate": 6.838974802638694e-06, "loss": 2.7364, "step": 3162 }, { "epoch": 0.04, "grad_norm": 10.829825401306152, "learning_rate": 6.841137666270142e-06, "loss": 3.7262, "step": 3163 }, { "epoch": 0.04, "grad_norm": 9.973349571228027, "learning_rate": 6.843300529901589e-06, "loss": 3.0652, "step": 3164 }, { "epoch": 0.04, "grad_norm": 8.398456573486328, "learning_rate": 6.845463393533038e-06, "loss": 3.0679, "step": 3165 }, { "epoch": 0.04, "grad_norm": 10.077306747436523, "learning_rate": 6.8476262571644855e-06, "loss": 3.3052, "step": 3166 }, { "epoch": 0.04, "grad_norm": 10.343587875366211, "learning_rate": 6.849789120795934e-06, "loss": 3.2241, "step": 3167 }, { "epoch": 0.04, "grad_norm": 9.864872932434082, "learning_rate": 6.851951984427383e-06, "loss": 3.2001, "step": 3168 }, { "epoch": 0.04, "grad_norm": 8.650205612182617, "learning_rate": 6.8541148480588305e-06, "loss": 3.0707, "step": 3169 }, { "epoch": 0.04, "grad_norm": 9.448834419250488, "learning_rate": 6.856277711690279e-06, "loss": 2.4728, "step": 3170 }, { "epoch": 0.04, "grad_norm": 10.85499095916748, "learning_rate": 6.858440575321727e-06, "loss": 3.6853, "step": 3171 }, { "epoch": 0.04, "grad_norm": 9.262582778930664, "learning_rate": 6.860603438953175e-06, "loss": 3.7032, "step": 3172 }, { "epoch": 0.04, "grad_norm": 10.032890319824219, "learning_rate": 6.862766302584623e-06, "loss": 3.1943, "step": 3173 }, { "epoch": 0.04, "grad_norm": 9.987993240356445, "learning_rate": 6.864929166216071e-06, "loss": 3.4509, "step": 3174 }, { "epoch": 0.04, "grad_norm": 10.306042671203613, "learning_rate": 6.867092029847519e-06, "loss": 2.8164, "step": 3175 }, { "epoch": 0.04, "grad_norm": 9.462788581848145, "learning_rate": 6.869254893478967e-06, "loss": 2.737, "step": 3176 }, { "epoch": 0.04, "grad_norm": 9.811158180236816, "learning_rate": 6.871417757110415e-06, "loss": 3.1335, "step": 3177 }, { "epoch": 0.04, "grad_norm": 10.859273910522461, "learning_rate": 6.873580620741863e-06, "loss": 3.3336, "step": 3178 }, { "epoch": 0.04, "grad_norm": 9.016945838928223, "learning_rate": 6.875743484373311e-06, "loss": 3.1165, "step": 3179 }, { "epoch": 0.04, "grad_norm": 8.721985816955566, "learning_rate": 6.877906348004759e-06, "loss": 2.3269, "step": 3180 }, { "epoch": 0.04, "grad_norm": 7.42182731628418, "learning_rate": 6.880069211636207e-06, "loss": 2.4158, "step": 3181 }, { "epoch": 0.04, "grad_norm": 9.69013786315918, "learning_rate": 6.882232075267655e-06, "loss": 3.2951, "step": 3182 }, { "epoch": 0.04, "grad_norm": 10.460362434387207, "learning_rate": 6.884394938899102e-06, "loss": 2.6633, "step": 3183 }, { "epoch": 0.04, "grad_norm": 9.152981758117676, "learning_rate": 6.886557802530551e-06, "loss": 3.1738, "step": 3184 }, { "epoch": 0.04, "grad_norm": 8.611472129821777, "learning_rate": 6.8887206661619984e-06, "loss": 2.731, "step": 3185 }, { "epoch": 0.04, "grad_norm": 9.739943504333496, "learning_rate": 6.890883529793447e-06, "loss": 3.3081, "step": 3186 }, { "epoch": 0.04, "grad_norm": 9.461450576782227, "learning_rate": 6.8930463934248945e-06, "loss": 2.8991, "step": 3187 }, { "epoch": 0.04, "grad_norm": 11.109782218933105, "learning_rate": 6.8952092570563434e-06, "loss": 3.2223, "step": 3188 }, { "epoch": 0.04, "grad_norm": 9.740870475769043, "learning_rate": 6.897372120687791e-06, "loss": 3.2771, "step": 3189 }, { "epoch": 0.04, "grad_norm": 9.478130340576172, "learning_rate": 6.8995349843192395e-06, "loss": 2.9795, "step": 3190 }, { "epoch": 0.04, "grad_norm": 9.192784309387207, "learning_rate": 6.901697847950687e-06, "loss": 3.1354, "step": 3191 }, { "epoch": 0.04, "grad_norm": 9.964859008789062, "learning_rate": 6.903860711582136e-06, "loss": 3.2719, "step": 3192 }, { "epoch": 0.04, "grad_norm": 10.324908256530762, "learning_rate": 6.906023575213583e-06, "loss": 3.0491, "step": 3193 }, { "epoch": 0.04, "grad_norm": 10.129724502563477, "learning_rate": 6.908186438845032e-06, "loss": 2.6657, "step": 3194 }, { "epoch": 0.04, "grad_norm": 11.34107780456543, "learning_rate": 6.910349302476479e-06, "loss": 3.2082, "step": 3195 }, { "epoch": 0.04, "grad_norm": 8.846517562866211, "learning_rate": 6.912512166107928e-06, "loss": 3.3056, "step": 3196 }, { "epoch": 0.04, "grad_norm": 9.929022789001465, "learning_rate": 6.914675029739375e-06, "loss": 3.0409, "step": 3197 }, { "epoch": 0.04, "grad_norm": 12.18222713470459, "learning_rate": 6.916837893370824e-06, "loss": 3.4008, "step": 3198 }, { "epoch": 0.04, "grad_norm": 9.5364351272583, "learning_rate": 6.919000757002271e-06, "loss": 3.1576, "step": 3199 }, { "epoch": 0.04, "grad_norm": 9.990692138671875, "learning_rate": 6.921163620633719e-06, "loss": 2.7734, "step": 3200 }, { "epoch": 0.04, "grad_norm": 9.848438262939453, "learning_rate": 6.923326484265168e-06, "loss": 3.0576, "step": 3201 }, { "epoch": 0.04, "grad_norm": 10.914504051208496, "learning_rate": 6.925489347896615e-06, "loss": 3.5279, "step": 3202 }, { "epoch": 0.04, "grad_norm": 10.280686378479004, "learning_rate": 6.927652211528064e-06, "loss": 3.1203, "step": 3203 }, { "epoch": 0.04, "grad_norm": 12.524336814880371, "learning_rate": 6.929815075159511e-06, "loss": 3.5218, "step": 3204 }, { "epoch": 0.04, "grad_norm": 12.375635147094727, "learning_rate": 6.93197793879096e-06, "loss": 4.0935, "step": 3205 }, { "epoch": 0.04, "grad_norm": 10.370962142944336, "learning_rate": 6.9341408024224075e-06, "loss": 3.2685, "step": 3206 }, { "epoch": 0.04, "grad_norm": 10.252838134765625, "learning_rate": 6.936303666053856e-06, "loss": 3.5823, "step": 3207 }, { "epoch": 0.04, "grad_norm": 9.046287536621094, "learning_rate": 6.938466529685304e-06, "loss": 2.9508, "step": 3208 }, { "epoch": 0.04, "grad_norm": 9.154046058654785, "learning_rate": 6.9406293933167525e-06, "loss": 3.3246, "step": 3209 }, { "epoch": 0.04, "grad_norm": 10.18913459777832, "learning_rate": 6.9427922569482e-06, "loss": 2.9744, "step": 3210 }, { "epoch": 0.04, "grad_norm": 9.866923332214355, "learning_rate": 6.944955120579649e-06, "loss": 2.9695, "step": 3211 }, { "epoch": 0.04, "grad_norm": 10.691764831542969, "learning_rate": 6.947117984211096e-06, "loss": 2.7851, "step": 3212 }, { "epoch": 0.04, "grad_norm": 9.951509475708008, "learning_rate": 6.949280847842545e-06, "loss": 3.177, "step": 3213 }, { "epoch": 0.04, "grad_norm": 9.886717796325684, "learning_rate": 6.951443711473992e-06, "loss": 3.281, "step": 3214 }, { "epoch": 0.04, "grad_norm": 8.545745849609375, "learning_rate": 6.953606575105441e-06, "loss": 3.627, "step": 3215 }, { "epoch": 0.04, "grad_norm": 8.97419548034668, "learning_rate": 6.955769438736888e-06, "loss": 2.8225, "step": 3216 }, { "epoch": 0.04, "grad_norm": 8.970714569091797, "learning_rate": 6.957932302368337e-06, "loss": 2.9206, "step": 3217 }, { "epoch": 0.04, "grad_norm": 9.047926902770996, "learning_rate": 6.960095165999784e-06, "loss": 3.2881, "step": 3218 }, { "epoch": 0.04, "grad_norm": 11.447632789611816, "learning_rate": 6.962258029631232e-06, "loss": 2.7919, "step": 3219 }, { "epoch": 0.04, "grad_norm": 9.808945655822754, "learning_rate": 6.96442089326268e-06, "loss": 3.1058, "step": 3220 }, { "epoch": 0.04, "grad_norm": 9.389595985412598, "learning_rate": 6.966583756894128e-06, "loss": 2.8323, "step": 3221 }, { "epoch": 0.04, "grad_norm": 9.482315063476562, "learning_rate": 6.968746620525576e-06, "loss": 2.8849, "step": 3222 }, { "epoch": 0.04, "grad_norm": 9.62067985534668, "learning_rate": 6.970909484157024e-06, "loss": 3.1504, "step": 3223 }, { "epoch": 0.04, "grad_norm": 8.628278732299805, "learning_rate": 6.9730723477884724e-06, "loss": 2.6666, "step": 3224 }, { "epoch": 0.04, "grad_norm": 9.617618560791016, "learning_rate": 6.9752352114199205e-06, "loss": 3.3219, "step": 3225 }, { "epoch": 0.04, "grad_norm": 9.3640775680542, "learning_rate": 6.9773980750513685e-06, "loss": 2.7859, "step": 3226 }, { "epoch": 0.04, "grad_norm": 9.993215560913086, "learning_rate": 6.979560938682817e-06, "loss": 3.0597, "step": 3227 }, { "epoch": 0.04, "grad_norm": 9.991415977478027, "learning_rate": 6.981723802314264e-06, "loss": 3.5524, "step": 3228 }, { "epoch": 0.04, "grad_norm": 9.38497257232666, "learning_rate": 6.983886665945713e-06, "loss": 2.8617, "step": 3229 }, { "epoch": 0.04, "grad_norm": 9.9666166305542, "learning_rate": 6.98604952957716e-06, "loss": 3.7686, "step": 3230 }, { "epoch": 0.04, "grad_norm": 8.959693908691406, "learning_rate": 6.988212393208609e-06, "loss": 3.3103, "step": 3231 }, { "epoch": 0.04, "grad_norm": 10.637868881225586, "learning_rate": 6.990375256840058e-06, "loss": 3.6431, "step": 3232 }, { "epoch": 0.04, "grad_norm": 9.941468238830566, "learning_rate": 6.992538120471505e-06, "loss": 3.3034, "step": 3233 }, { "epoch": 0.04, "grad_norm": 9.607585906982422, "learning_rate": 6.994700984102954e-06, "loss": 3.2175, "step": 3234 }, { "epoch": 0.04, "grad_norm": 10.336478233337402, "learning_rate": 6.996863847734401e-06, "loss": 2.9756, "step": 3235 }, { "epoch": 0.04, "grad_norm": 8.675615310668945, "learning_rate": 6.999026711365849e-06, "loss": 2.7472, "step": 3236 }, { "epoch": 0.04, "grad_norm": 10.964980125427246, "learning_rate": 7.001189574997297e-06, "loss": 3.2534, "step": 3237 }, { "epoch": 0.04, "grad_norm": 11.931654930114746, "learning_rate": 7.003352438628745e-06, "loss": 2.776, "step": 3238 }, { "epoch": 0.04, "grad_norm": 10.591660499572754, "learning_rate": 7.005515302260193e-06, "loss": 3.0898, "step": 3239 }, { "epoch": 0.04, "grad_norm": 11.244524002075195, "learning_rate": 7.007678165891641e-06, "loss": 3.3822, "step": 3240 }, { "epoch": 0.04, "grad_norm": 9.819721221923828, "learning_rate": 7.009841029523089e-06, "loss": 2.8292, "step": 3241 }, { "epoch": 0.04, "grad_norm": 8.018319129943848, "learning_rate": 7.012003893154537e-06, "loss": 2.7962, "step": 3242 }, { "epoch": 0.04, "grad_norm": 10.078692436218262, "learning_rate": 7.014166756785985e-06, "loss": 3.355, "step": 3243 }, { "epoch": 0.04, "grad_norm": 11.702877044677734, "learning_rate": 7.0163296204174334e-06, "loss": 3.6103, "step": 3244 }, { "epoch": 0.04, "grad_norm": 9.160810470581055, "learning_rate": 7.018492484048881e-06, "loss": 2.99, "step": 3245 }, { "epoch": 0.04, "grad_norm": 8.972920417785645, "learning_rate": 7.0206553476803295e-06, "loss": 3.2285, "step": 3246 }, { "epoch": 0.04, "grad_norm": 11.436573028564453, "learning_rate": 7.022818211311777e-06, "loss": 2.771, "step": 3247 }, { "epoch": 0.04, "grad_norm": 8.572511672973633, "learning_rate": 7.024981074943226e-06, "loss": 3.2399, "step": 3248 }, { "epoch": 0.04, "grad_norm": 9.670727729797363, "learning_rate": 7.027143938574673e-06, "loss": 3.448, "step": 3249 }, { "epoch": 0.04, "grad_norm": 10.409570693969727, "learning_rate": 7.029306802206122e-06, "loss": 2.8889, "step": 3250 }, { "epoch": 0.04, "grad_norm": 11.60737133026123, "learning_rate": 7.031469665837569e-06, "loss": 2.7369, "step": 3251 }, { "epoch": 0.04, "grad_norm": 8.241470336914062, "learning_rate": 7.033632529469018e-06, "loss": 3.2531, "step": 3252 }, { "epoch": 0.04, "grad_norm": 9.939481735229492, "learning_rate": 7.035795393100465e-06, "loss": 3.6089, "step": 3253 }, { "epoch": 0.04, "grad_norm": 8.950817108154297, "learning_rate": 7.037958256731914e-06, "loss": 3.2329, "step": 3254 }, { "epoch": 0.04, "grad_norm": 9.739747047424316, "learning_rate": 7.040121120363361e-06, "loss": 3.5699, "step": 3255 }, { "epoch": 0.04, "grad_norm": 9.526371955871582, "learning_rate": 7.04228398399481e-06, "loss": 3.1515, "step": 3256 }, { "epoch": 0.04, "grad_norm": 9.742456436157227, "learning_rate": 7.044446847626257e-06, "loss": 3.2042, "step": 3257 }, { "epoch": 0.04, "grad_norm": 9.243644714355469, "learning_rate": 7.046609711257706e-06, "loss": 2.753, "step": 3258 }, { "epoch": 0.04, "grad_norm": 9.115700721740723, "learning_rate": 7.048772574889153e-06, "loss": 2.7457, "step": 3259 }, { "epoch": 0.04, "grad_norm": 9.899906158447266, "learning_rate": 7.050935438520602e-06, "loss": 3.4808, "step": 3260 }, { "epoch": 0.04, "grad_norm": 9.011866569519043, "learning_rate": 7.0530983021520495e-06, "loss": 2.6187, "step": 3261 }, { "epoch": 0.04, "grad_norm": 11.813987731933594, "learning_rate": 7.055261165783498e-06, "loss": 3.9886, "step": 3262 }, { "epoch": 0.04, "grad_norm": 7.897795677185059, "learning_rate": 7.0574240294149456e-06, "loss": 2.4202, "step": 3263 }, { "epoch": 0.04, "grad_norm": 8.914535522460938, "learning_rate": 7.059586893046394e-06, "loss": 3.2702, "step": 3264 }, { "epoch": 0.04, "grad_norm": 8.83307933807373, "learning_rate": 7.0617497566778425e-06, "loss": 2.7097, "step": 3265 }, { "epoch": 0.04, "grad_norm": 9.922882080078125, "learning_rate": 7.06391262030929e-06, "loss": 2.8676, "step": 3266 }, { "epoch": 0.04, "grad_norm": 10.105433464050293, "learning_rate": 7.066075483940739e-06, "loss": 3.8197, "step": 3267 }, { "epoch": 0.04, "grad_norm": 9.041962623596191, "learning_rate": 7.068238347572186e-06, "loss": 2.9194, "step": 3268 }, { "epoch": 0.04, "grad_norm": 9.135456085205078, "learning_rate": 7.070401211203635e-06, "loss": 3.2305, "step": 3269 }, { "epoch": 0.04, "grad_norm": 9.250502586364746, "learning_rate": 7.072564074835082e-06, "loss": 3.1057, "step": 3270 }, { "epoch": 0.04, "grad_norm": 8.787275314331055, "learning_rate": 7.074726938466531e-06, "loss": 3.0412, "step": 3271 }, { "epoch": 0.04, "grad_norm": 8.444451332092285, "learning_rate": 7.076889802097978e-06, "loss": 2.9841, "step": 3272 }, { "epoch": 0.04, "grad_norm": 9.430521965026855, "learning_rate": 7.079052665729427e-06, "loss": 2.965, "step": 3273 }, { "epoch": 0.04, "grad_norm": 13.14738941192627, "learning_rate": 7.081215529360874e-06, "loss": 2.6839, "step": 3274 }, { "epoch": 0.04, "grad_norm": 9.203310012817383, "learning_rate": 7.083378392992323e-06, "loss": 2.9699, "step": 3275 }, { "epoch": 0.04, "grad_norm": 8.727014541625977, "learning_rate": 7.08554125662377e-06, "loss": 2.6841, "step": 3276 }, { "epoch": 0.04, "grad_norm": 9.231078147888184, "learning_rate": 7.087704120255219e-06, "loss": 3.0103, "step": 3277 }, { "epoch": 0.04, "grad_norm": 9.17186164855957, "learning_rate": 7.089866983886666e-06, "loss": 3.0701, "step": 3278 }, { "epoch": 0.04, "grad_norm": 9.277080535888672, "learning_rate": 7.092029847518115e-06, "loss": 3.2057, "step": 3279 }, { "epoch": 0.04, "grad_norm": 8.296650886535645, "learning_rate": 7.094192711149562e-06, "loss": 2.6135, "step": 3280 }, { "epoch": 0.04, "grad_norm": 8.933881759643555, "learning_rate": 7.0963555747810105e-06, "loss": 2.4648, "step": 3281 }, { "epoch": 0.04, "grad_norm": 9.360095977783203, "learning_rate": 7.0985184384124585e-06, "loss": 2.9949, "step": 3282 }, { "epoch": 0.04, "grad_norm": 10.037421226501465, "learning_rate": 7.1006813020439066e-06, "loss": 2.9971, "step": 3283 }, { "epoch": 0.04, "grad_norm": 9.520894050598145, "learning_rate": 7.102844165675355e-06, "loss": 3.2264, "step": 3284 }, { "epoch": 0.04, "grad_norm": 9.509406089782715, "learning_rate": 7.105007029306803e-06, "loss": 3.0517, "step": 3285 }, { "epoch": 0.04, "grad_norm": 8.56128978729248, "learning_rate": 7.107169892938251e-06, "loss": 2.7926, "step": 3286 }, { "epoch": 0.04, "grad_norm": 9.297423362731934, "learning_rate": 7.109332756569699e-06, "loss": 3.3777, "step": 3287 }, { "epoch": 0.04, "grad_norm": 9.111014366149902, "learning_rate": 7.111495620201147e-06, "loss": 3.2105, "step": 3288 }, { "epoch": 0.04, "grad_norm": 8.523778915405273, "learning_rate": 7.113658483832595e-06, "loss": 3.5368, "step": 3289 }, { "epoch": 0.04, "grad_norm": 11.050047874450684, "learning_rate": 7.115821347464043e-06, "loss": 3.8116, "step": 3290 }, { "epoch": 0.04, "grad_norm": 10.054585456848145, "learning_rate": 7.117984211095491e-06, "loss": 3.4656, "step": 3291 }, { "epoch": 0.04, "grad_norm": 9.76554012298584, "learning_rate": 7.120147074726938e-06, "loss": 3.1379, "step": 3292 }, { "epoch": 0.04, "grad_norm": 8.547240257263184, "learning_rate": 7.122309938358387e-06, "loss": 3.2024, "step": 3293 }, { "epoch": 0.04, "grad_norm": 10.532991409301758, "learning_rate": 7.124472801989834e-06, "loss": 2.8107, "step": 3294 }, { "epoch": 0.04, "grad_norm": 9.873373031616211, "learning_rate": 7.126635665621283e-06, "loss": 2.7969, "step": 3295 }, { "epoch": 0.04, "grad_norm": 9.726908683776855, "learning_rate": 7.128798529252732e-06, "loss": 3.0865, "step": 3296 }, { "epoch": 0.04, "grad_norm": 9.05335521697998, "learning_rate": 7.130961392884179e-06, "loss": 2.4623, "step": 3297 }, { "epoch": 0.04, "grad_norm": 9.429988861083984, "learning_rate": 7.133124256515628e-06, "loss": 2.7379, "step": 3298 }, { "epoch": 0.04, "grad_norm": 9.271842956542969, "learning_rate": 7.135287120147075e-06, "loss": 2.7221, "step": 3299 }, { "epoch": 0.04, "grad_norm": 8.978302001953125, "learning_rate": 7.1374499837785234e-06, "loss": 2.9896, "step": 3300 }, { "epoch": 0.04, "grad_norm": 8.698407173156738, "learning_rate": 7.1396128474099715e-06, "loss": 2.8809, "step": 3301 }, { "epoch": 0.04, "grad_norm": 10.732731819152832, "learning_rate": 7.1417757110414195e-06, "loss": 2.9741, "step": 3302 }, { "epoch": 0.04, "grad_norm": 10.049116134643555, "learning_rate": 7.143938574672868e-06, "loss": 2.9752, "step": 3303 }, { "epoch": 0.04, "grad_norm": 11.504701614379883, "learning_rate": 7.146101438304316e-06, "loss": 3.4273, "step": 3304 }, { "epoch": 0.04, "grad_norm": 10.129850387573242, "learning_rate": 7.148264301935764e-06, "loss": 2.7376, "step": 3305 }, { "epoch": 0.04, "grad_norm": 9.455243110656738, "learning_rate": 7.150427165567212e-06, "loss": 2.9367, "step": 3306 }, { "epoch": 0.04, "grad_norm": 9.8179931640625, "learning_rate": 7.15259002919866e-06, "loss": 3.4732, "step": 3307 }, { "epoch": 0.04, "grad_norm": 10.495094299316406, "learning_rate": 7.154752892830108e-06, "loss": 3.4033, "step": 3308 }, { "epoch": 0.04, "grad_norm": 9.710424423217773, "learning_rate": 7.156915756461555e-06, "loss": 2.7159, "step": 3309 }, { "epoch": 0.04, "grad_norm": 8.984942436218262, "learning_rate": 7.159078620093004e-06, "loss": 3.0702, "step": 3310 }, { "epoch": 0.04, "grad_norm": 11.017552375793457, "learning_rate": 7.161241483724451e-06, "loss": 3.0278, "step": 3311 }, { "epoch": 0.04, "grad_norm": 10.205262184143066, "learning_rate": 7.1634043473559e-06, "loss": 2.7784, "step": 3312 }, { "epoch": 0.04, "grad_norm": 8.789816856384277, "learning_rate": 7.165567210987347e-06, "loss": 2.6611, "step": 3313 }, { "epoch": 0.04, "grad_norm": 10.4907865524292, "learning_rate": 7.167730074618796e-06, "loss": 2.7793, "step": 3314 }, { "epoch": 0.04, "grad_norm": 10.490394592285156, "learning_rate": 7.169892938250243e-06, "loss": 2.4835, "step": 3315 }, { "epoch": 0.04, "grad_norm": 9.220880508422852, "learning_rate": 7.172055801881692e-06, "loss": 3.3861, "step": 3316 }, { "epoch": 0.04, "grad_norm": 9.140288352966309, "learning_rate": 7.1742186655131395e-06, "loss": 3.2994, "step": 3317 }, { "epoch": 0.04, "grad_norm": 9.8154935836792, "learning_rate": 7.176381529144588e-06, "loss": 2.907, "step": 3318 }, { "epoch": 0.04, "grad_norm": 10.46501350402832, "learning_rate": 7.1785443927760356e-06, "loss": 3.2213, "step": 3319 }, { "epoch": 0.04, "grad_norm": 8.627238273620605, "learning_rate": 7.1807072564074845e-06, "loss": 2.6418, "step": 3320 }, { "epoch": 0.04, "grad_norm": 9.248213768005371, "learning_rate": 7.182870120038932e-06, "loss": 2.8211, "step": 3321 }, { "epoch": 0.04, "grad_norm": 9.072576522827148, "learning_rate": 7.1850329836703806e-06, "loss": 3.4903, "step": 3322 }, { "epoch": 0.04, "grad_norm": 10.975960731506348, "learning_rate": 7.187195847301828e-06, "loss": 3.5439, "step": 3323 }, { "epoch": 0.04, "grad_norm": 9.506386756896973, "learning_rate": 7.189358710933277e-06, "loss": 3.4586, "step": 3324 }, { "epoch": 0.04, "grad_norm": 9.963467597961426, "learning_rate": 7.191521574564724e-06, "loss": 3.014, "step": 3325 }, { "epoch": 0.04, "grad_norm": 9.688458442687988, "learning_rate": 7.193684438196173e-06, "loss": 3.4903, "step": 3326 }, { "epoch": 0.04, "grad_norm": 11.641828536987305, "learning_rate": 7.19584730182762e-06, "loss": 3.0801, "step": 3327 }, { "epoch": 0.04, "grad_norm": 9.729598045349121, "learning_rate": 7.198010165459068e-06, "loss": 3.4206, "step": 3328 }, { "epoch": 0.04, "grad_norm": 12.383914947509766, "learning_rate": 7.200173029090517e-06, "loss": 3.0203, "step": 3329 }, { "epoch": 0.04, "grad_norm": 9.26347541809082, "learning_rate": 7.202335892721964e-06, "loss": 2.8468, "step": 3330 }, { "epoch": 0.04, "grad_norm": 9.96784496307373, "learning_rate": 7.204498756353413e-06, "loss": 3.2565, "step": 3331 }, { "epoch": 0.04, "grad_norm": 11.321097373962402, "learning_rate": 7.20666161998486e-06, "loss": 3.3737, "step": 3332 }, { "epoch": 0.04, "grad_norm": 8.366729736328125, "learning_rate": 7.208824483616309e-06, "loss": 2.4854, "step": 3333 }, { "epoch": 0.04, "grad_norm": 8.938302993774414, "learning_rate": 7.210987347247756e-06, "loss": 2.765, "step": 3334 }, { "epoch": 0.04, "grad_norm": 10.073927879333496, "learning_rate": 7.213150210879205e-06, "loss": 3.0789, "step": 3335 }, { "epoch": 0.04, "grad_norm": 10.276432037353516, "learning_rate": 7.215313074510652e-06, "loss": 3.7468, "step": 3336 }, { "epoch": 0.04, "grad_norm": 8.483512878417969, "learning_rate": 7.217475938142101e-06, "loss": 3.2705, "step": 3337 }, { "epoch": 0.04, "grad_norm": 9.10086727142334, "learning_rate": 7.2196388017735485e-06, "loss": 2.8127, "step": 3338 }, { "epoch": 0.04, "grad_norm": 8.995526313781738, "learning_rate": 7.221801665404997e-06, "loss": 3.0985, "step": 3339 }, { "epoch": 0.04, "grad_norm": 9.59423542022705, "learning_rate": 7.223964529036445e-06, "loss": 3.3579, "step": 3340 }, { "epoch": 0.04, "grad_norm": 8.910404205322266, "learning_rate": 7.2261273926678935e-06, "loss": 2.9919, "step": 3341 }, { "epoch": 0.04, "grad_norm": 8.907384872436523, "learning_rate": 7.228290256299341e-06, "loss": 2.9275, "step": 3342 }, { "epoch": 0.04, "grad_norm": 9.578347206115723, "learning_rate": 7.23045311993079e-06, "loss": 2.9529, "step": 3343 }, { "epoch": 0.04, "grad_norm": 9.225608825683594, "learning_rate": 7.232615983562237e-06, "loss": 3.1254, "step": 3344 }, { "epoch": 0.04, "grad_norm": 10.188409805297852, "learning_rate": 7.234778847193685e-06, "loss": 3.0532, "step": 3345 }, { "epoch": 0.04, "grad_norm": 9.26396369934082, "learning_rate": 7.236941710825133e-06, "loss": 3.0267, "step": 3346 }, { "epoch": 0.04, "grad_norm": 9.476402282714844, "learning_rate": 7.239104574456581e-06, "loss": 3.0556, "step": 3347 }, { "epoch": 0.04, "grad_norm": 9.829455375671387, "learning_rate": 7.241267438088029e-06, "loss": 2.399, "step": 3348 }, { "epoch": 0.04, "grad_norm": 9.900028228759766, "learning_rate": 7.243430301719477e-06, "loss": 3.0769, "step": 3349 }, { "epoch": 0.04, "grad_norm": 9.711193084716797, "learning_rate": 7.245593165350925e-06, "loss": 2.9962, "step": 3350 }, { "epoch": 0.04, "grad_norm": 10.090935707092285, "learning_rate": 7.247756028982373e-06, "loss": 3.4984, "step": 3351 }, { "epoch": 0.04, "grad_norm": 10.66889476776123, "learning_rate": 7.249918892613821e-06, "loss": 2.5328, "step": 3352 }, { "epoch": 0.04, "grad_norm": 9.150436401367188, "learning_rate": 7.252081756245269e-06, "loss": 2.5175, "step": 3353 }, { "epoch": 0.04, "grad_norm": 9.713910102844238, "learning_rate": 7.254244619876717e-06, "loss": 2.5596, "step": 3354 }, { "epoch": 0.04, "grad_norm": 10.025434494018555, "learning_rate": 7.256407483508165e-06, "loss": 3.1432, "step": 3355 }, { "epoch": 0.04, "grad_norm": 9.37060832977295, "learning_rate": 7.258570347139613e-06, "loss": 2.8612, "step": 3356 }, { "epoch": 0.04, "grad_norm": 9.592832565307617, "learning_rate": 7.2607332107710615e-06, "loss": 3.0129, "step": 3357 }, { "epoch": 0.04, "grad_norm": 11.874429702758789, "learning_rate": 7.262896074402509e-06, "loss": 3.5686, "step": 3358 }, { "epoch": 0.04, "grad_norm": 8.769852638244629, "learning_rate": 7.265058938033958e-06, "loss": 3.0, "step": 3359 }, { "epoch": 0.04, "grad_norm": 10.064546585083008, "learning_rate": 7.2672218016654065e-06, "loss": 3.235, "step": 3360 }, { "epoch": 0.04, "grad_norm": 9.88106918334961, "learning_rate": 7.269384665296854e-06, "loss": 3.3614, "step": 3361 }, { "epoch": 0.04, "grad_norm": 8.248703002929688, "learning_rate": 7.271547528928303e-06, "loss": 3.039, "step": 3362 }, { "epoch": 0.04, "grad_norm": 9.969165802001953, "learning_rate": 7.27371039255975e-06, "loss": 2.9046, "step": 3363 }, { "epoch": 0.04, "grad_norm": 9.255678176879883, "learning_rate": 7.275873256191198e-06, "loss": 3.1546, "step": 3364 }, { "epoch": 0.04, "grad_norm": 9.662334442138672, "learning_rate": 7.278036119822646e-06, "loss": 3.1679, "step": 3365 }, { "epoch": 0.04, "grad_norm": 8.997762680053711, "learning_rate": 7.280198983454094e-06, "loss": 2.7361, "step": 3366 }, { "epoch": 0.04, "grad_norm": 8.296796798706055, "learning_rate": 7.282361847085542e-06, "loss": 3.2389, "step": 3367 }, { "epoch": 0.04, "grad_norm": 9.534235000610352, "learning_rate": 7.28452471071699e-06, "loss": 3.0761, "step": 3368 }, { "epoch": 0.04, "grad_norm": 8.840198516845703, "learning_rate": 7.286687574348438e-06, "loss": 3.5909, "step": 3369 }, { "epoch": 0.04, "grad_norm": 8.75370979309082, "learning_rate": 7.288850437979886e-06, "loss": 3.0649, "step": 3370 }, { "epoch": 0.04, "grad_norm": 9.237960815429688, "learning_rate": 7.291013301611334e-06, "loss": 2.8466, "step": 3371 }, { "epoch": 0.04, "grad_norm": 9.752581596374512, "learning_rate": 7.293176165242782e-06, "loss": 2.7453, "step": 3372 }, { "epoch": 0.04, "grad_norm": 9.488873481750488, "learning_rate": 7.2953390288742294e-06, "loss": 3.6434, "step": 3373 }, { "epoch": 0.04, "grad_norm": 10.59443473815918, "learning_rate": 7.297501892505678e-06, "loss": 3.0464, "step": 3374 }, { "epoch": 0.04, "grad_norm": 8.526665687561035, "learning_rate": 7.2996647561371255e-06, "loss": 3.0442, "step": 3375 }, { "epoch": 0.04, "grad_norm": 9.653103828430176, "learning_rate": 7.3018276197685744e-06, "loss": 3.1469, "step": 3376 }, { "epoch": 0.04, "grad_norm": 9.373719215393066, "learning_rate": 7.303990483400022e-06, "loss": 2.7099, "step": 3377 }, { "epoch": 0.04, "grad_norm": 9.542125701904297, "learning_rate": 7.3061533470314705e-06, "loss": 3.3365, "step": 3378 }, { "epoch": 0.04, "grad_norm": 10.09946060180664, "learning_rate": 7.308316210662918e-06, "loss": 3.388, "step": 3379 }, { "epoch": 0.04, "grad_norm": 9.431556701660156, "learning_rate": 7.310479074294367e-06, "loss": 3.2239, "step": 3380 }, { "epoch": 0.04, "grad_norm": 9.65713119506836, "learning_rate": 7.312641937925814e-06, "loss": 3.1643, "step": 3381 }, { "epoch": 0.04, "grad_norm": 9.213823318481445, "learning_rate": 7.314804801557263e-06, "loss": 3.3098, "step": 3382 }, { "epoch": 0.04, "grad_norm": 10.235367774963379, "learning_rate": 7.31696766518871e-06, "loss": 3.3976, "step": 3383 }, { "epoch": 0.04, "grad_norm": 9.252506256103516, "learning_rate": 7.319130528820159e-06, "loss": 3.5765, "step": 3384 }, { "epoch": 0.04, "grad_norm": 9.190718650817871, "learning_rate": 7.321293392451606e-06, "loss": 2.8689, "step": 3385 }, { "epoch": 0.04, "grad_norm": 7.70210075378418, "learning_rate": 7.323456256083055e-06, "loss": 3.023, "step": 3386 }, { "epoch": 0.04, "grad_norm": 8.127699851989746, "learning_rate": 7.325619119714502e-06, "loss": 2.882, "step": 3387 }, { "epoch": 0.04, "grad_norm": 9.607694625854492, "learning_rate": 7.327781983345951e-06, "loss": 3.3176, "step": 3388 }, { "epoch": 0.04, "grad_norm": 10.654938697814941, "learning_rate": 7.329944846977398e-06, "loss": 3.2903, "step": 3389 }, { "epoch": 0.04, "grad_norm": 9.811057090759277, "learning_rate": 7.332107710608847e-06, "loss": 2.8714, "step": 3390 }, { "epoch": 0.04, "grad_norm": 8.098677635192871, "learning_rate": 7.334270574240294e-06, "loss": 2.6984, "step": 3391 }, { "epoch": 0.04, "grad_norm": 8.922513008117676, "learning_rate": 7.336433437871742e-06, "loss": 3.2922, "step": 3392 }, { "epoch": 0.04, "grad_norm": 10.176745414733887, "learning_rate": 7.338596301503191e-06, "loss": 3.595, "step": 3393 }, { "epoch": 0.04, "grad_norm": 10.071581840515137, "learning_rate": 7.3407591651346385e-06, "loss": 3.2142, "step": 3394 }, { "epoch": 0.04, "grad_norm": 9.621146202087402, "learning_rate": 7.342922028766087e-06, "loss": 3.484, "step": 3395 }, { "epoch": 0.04, "grad_norm": 10.28903865814209, "learning_rate": 7.345084892397535e-06, "loss": 3.2124, "step": 3396 }, { "epoch": 0.04, "grad_norm": 10.902326583862305, "learning_rate": 7.3472477560289835e-06, "loss": 3.489, "step": 3397 }, { "epoch": 0.04, "grad_norm": 9.81224536895752, "learning_rate": 7.349410619660431e-06, "loss": 2.9079, "step": 3398 }, { "epoch": 0.04, "grad_norm": 9.7915620803833, "learning_rate": 7.35157348329188e-06, "loss": 3.6063, "step": 3399 }, { "epoch": 0.04, "grad_norm": 9.728002548217773, "learning_rate": 7.353736346923327e-06, "loss": 2.7857, "step": 3400 }, { "epoch": 0.04, "grad_norm": 9.237347602844238, "learning_rate": 7.355899210554776e-06, "loss": 3.1707, "step": 3401 }, { "epoch": 0.04, "grad_norm": 9.529563903808594, "learning_rate": 7.358062074186223e-06, "loss": 3.1877, "step": 3402 }, { "epoch": 0.04, "grad_norm": 9.712786674499512, "learning_rate": 7.360224937817672e-06, "loss": 3.796, "step": 3403 }, { "epoch": 0.04, "grad_norm": 9.181025505065918, "learning_rate": 7.362387801449119e-06, "loss": 2.7596, "step": 3404 }, { "epoch": 0.04, "grad_norm": 9.862699508666992, "learning_rate": 7.364550665080568e-06, "loss": 3.3844, "step": 3405 }, { "epoch": 0.04, "grad_norm": 9.110997200012207, "learning_rate": 7.366713528712015e-06, "loss": 2.8277, "step": 3406 }, { "epoch": 0.04, "grad_norm": 9.974220275878906, "learning_rate": 7.368876392343464e-06, "loss": 3.3576, "step": 3407 }, { "epoch": 0.04, "grad_norm": 9.01317310333252, "learning_rate": 7.371039255974911e-06, "loss": 2.8833, "step": 3408 }, { "epoch": 0.04, "grad_norm": 8.733457565307617, "learning_rate": 7.373202119606359e-06, "loss": 2.9306, "step": 3409 }, { "epoch": 0.04, "grad_norm": 10.189860343933105, "learning_rate": 7.375364983237807e-06, "loss": 3.0151, "step": 3410 }, { "epoch": 0.04, "grad_norm": 8.580081939697266, "learning_rate": 7.377527846869255e-06, "loss": 3.3582, "step": 3411 }, { "epoch": 0.04, "grad_norm": 9.667044639587402, "learning_rate": 7.3796907105007034e-06, "loss": 3.0885, "step": 3412 }, { "epoch": 0.04, "grad_norm": 8.636980056762695, "learning_rate": 7.3818535741321515e-06, "loss": 3.2894, "step": 3413 }, { "epoch": 0.04, "grad_norm": 9.802672386169434, "learning_rate": 7.3840164377635995e-06, "loss": 2.9979, "step": 3414 }, { "epoch": 0.04, "grad_norm": 10.640768051147461, "learning_rate": 7.386179301395048e-06, "loss": 3.2449, "step": 3415 }, { "epoch": 0.04, "grad_norm": 10.32616901397705, "learning_rate": 7.388342165026496e-06, "loss": 3.4466, "step": 3416 }, { "epoch": 0.04, "grad_norm": 8.721270561218262, "learning_rate": 7.390505028657944e-06, "loss": 3.2042, "step": 3417 }, { "epoch": 0.04, "grad_norm": 9.467585563659668, "learning_rate": 7.392667892289391e-06, "loss": 2.8253, "step": 3418 }, { "epoch": 0.04, "grad_norm": 12.127074241638184, "learning_rate": 7.39483075592084e-06, "loss": 3.3627, "step": 3419 }, { "epoch": 0.04, "grad_norm": 9.37066650390625, "learning_rate": 7.396993619552287e-06, "loss": 2.8272, "step": 3420 }, { "epoch": 0.04, "grad_norm": 9.845991134643555, "learning_rate": 7.399156483183736e-06, "loss": 2.757, "step": 3421 }, { "epoch": 0.04, "grad_norm": 8.570984840393066, "learning_rate": 7.401319346815183e-06, "loss": 2.7278, "step": 3422 }, { "epoch": 0.04, "grad_norm": 9.364126205444336, "learning_rate": 7.403482210446632e-06, "loss": 3.2788, "step": 3423 }, { "epoch": 0.04, "grad_norm": 8.94675350189209, "learning_rate": 7.405645074078081e-06, "loss": 3.2163, "step": 3424 }, { "epoch": 0.04, "grad_norm": 9.714302062988281, "learning_rate": 7.407807937709528e-06, "loss": 2.2876, "step": 3425 }, { "epoch": 0.04, "grad_norm": 10.02603530883789, "learning_rate": 7.409970801340977e-06, "loss": 3.3268, "step": 3426 }, { "epoch": 0.04, "grad_norm": 9.517732620239258, "learning_rate": 7.412133664972424e-06, "loss": 3.0285, "step": 3427 }, { "epoch": 0.04, "grad_norm": 8.750190734863281, "learning_rate": 7.414296528603872e-06, "loss": 3.0797, "step": 3428 }, { "epoch": 0.04, "grad_norm": 11.12209415435791, "learning_rate": 7.41645939223532e-06, "loss": 3.3579, "step": 3429 }, { "epoch": 0.04, "grad_norm": 9.159603118896484, "learning_rate": 7.418622255866768e-06, "loss": 2.5163, "step": 3430 }, { "epoch": 0.04, "grad_norm": 8.751394271850586, "learning_rate": 7.420785119498216e-06, "loss": 2.616, "step": 3431 }, { "epoch": 0.04, "grad_norm": 9.963018417358398, "learning_rate": 7.4229479831296644e-06, "loss": 3.1747, "step": 3432 }, { "epoch": 0.04, "grad_norm": 10.450389862060547, "learning_rate": 7.4251108467611125e-06, "loss": 3.1996, "step": 3433 }, { "epoch": 0.04, "grad_norm": 8.35274887084961, "learning_rate": 7.4272737103925605e-06, "loss": 3.0182, "step": 3434 }, { "epoch": 0.04, "grad_norm": 9.881623268127441, "learning_rate": 7.429436574024009e-06, "loss": 3.6161, "step": 3435 }, { "epoch": 0.04, "grad_norm": 8.290273666381836, "learning_rate": 7.431599437655457e-06, "loss": 2.6494, "step": 3436 }, { "epoch": 0.04, "grad_norm": 10.09099292755127, "learning_rate": 7.433762301286904e-06, "loss": 2.8254, "step": 3437 }, { "epoch": 0.04, "grad_norm": 10.061968803405762, "learning_rate": 7.435925164918353e-06, "loss": 2.7666, "step": 3438 }, { "epoch": 0.04, "grad_norm": 9.778115272521973, "learning_rate": 7.4380880285498e-06, "loss": 3.4981, "step": 3439 }, { "epoch": 0.04, "grad_norm": 9.67241096496582, "learning_rate": 7.440250892181249e-06, "loss": 2.527, "step": 3440 }, { "epoch": 0.04, "grad_norm": 9.29318904876709, "learning_rate": 7.442413755812696e-06, "loss": 3.2001, "step": 3441 }, { "epoch": 0.04, "grad_norm": 9.988998413085938, "learning_rate": 7.444576619444145e-06, "loss": 3.3843, "step": 3442 }, { "epoch": 0.04, "grad_norm": 9.655725479125977, "learning_rate": 7.446739483075592e-06, "loss": 2.9022, "step": 3443 }, { "epoch": 0.04, "grad_norm": 9.710758209228516, "learning_rate": 7.448902346707041e-06, "loss": 3.3723, "step": 3444 }, { "epoch": 0.04, "grad_norm": 9.25139331817627, "learning_rate": 7.451065210338488e-06, "loss": 3.0913, "step": 3445 }, { "epoch": 0.04, "grad_norm": 8.604493141174316, "learning_rate": 7.453228073969937e-06, "loss": 2.9315, "step": 3446 }, { "epoch": 0.04, "grad_norm": 9.937318801879883, "learning_rate": 7.455390937601384e-06, "loss": 3.2888, "step": 3447 }, { "epoch": 0.04, "grad_norm": 8.996970176696777, "learning_rate": 7.457553801232833e-06, "loss": 3.5752, "step": 3448 }, { "epoch": 0.04, "grad_norm": 9.762635231018066, "learning_rate": 7.4597166648642805e-06, "loss": 3.0362, "step": 3449 }, { "epoch": 0.04, "grad_norm": 8.701045036315918, "learning_rate": 7.461879528495729e-06, "loss": 3.1894, "step": 3450 }, { "epoch": 0.04, "grad_norm": 9.375104904174805, "learning_rate": 7.4640423921271766e-06, "loss": 3.4538, "step": 3451 }, { "epoch": 0.04, "grad_norm": 10.102437973022461, "learning_rate": 7.4662052557586255e-06, "loss": 3.241, "step": 3452 }, { "epoch": 0.04, "grad_norm": 9.431610107421875, "learning_rate": 7.468368119390073e-06, "loss": 3.2863, "step": 3453 }, { "epoch": 0.04, "grad_norm": 11.424681663513184, "learning_rate": 7.470530983021521e-06, "loss": 3.0916, "step": 3454 }, { "epoch": 0.04, "grad_norm": 10.189833641052246, "learning_rate": 7.472693846652969e-06, "loss": 2.9531, "step": 3455 }, { "epoch": 0.04, "grad_norm": 9.082443237304688, "learning_rate": 7.474856710284417e-06, "loss": 3.3034, "step": 3456 }, { "epoch": 0.04, "grad_norm": 8.724385261535645, "learning_rate": 7.477019573915866e-06, "loss": 3.1901, "step": 3457 }, { "epoch": 0.04, "grad_norm": 9.372542381286621, "learning_rate": 7.479182437547313e-06, "loss": 2.2687, "step": 3458 }, { "epoch": 0.04, "grad_norm": 9.794037818908691, "learning_rate": 7.481345301178762e-06, "loss": 3.1158, "step": 3459 }, { "epoch": 0.04, "grad_norm": 10.226569175720215, "learning_rate": 7.483508164810209e-06, "loss": 3.24, "step": 3460 }, { "epoch": 0.04, "grad_norm": 11.169878005981445, "learning_rate": 7.485671028441658e-06, "loss": 2.8175, "step": 3461 }, { "epoch": 0.04, "grad_norm": 9.804739952087402, "learning_rate": 7.487833892073105e-06, "loss": 3.3434, "step": 3462 }, { "epoch": 0.04, "grad_norm": 9.844253540039062, "learning_rate": 7.489996755704554e-06, "loss": 3.3124, "step": 3463 }, { "epoch": 0.04, "grad_norm": 8.71408748626709, "learning_rate": 7.492159619336001e-06, "loss": 3.0664, "step": 3464 }, { "epoch": 0.04, "grad_norm": 9.435684204101562, "learning_rate": 7.49432248296745e-06, "loss": 3.2887, "step": 3465 }, { "epoch": 0.04, "grad_norm": 10.373908996582031, "learning_rate": 7.496485346598897e-06, "loss": 3.2145, "step": 3466 }, { "epoch": 0.04, "grad_norm": 10.887165069580078, "learning_rate": 7.498648210230346e-06, "loss": 3.4005, "step": 3467 }, { "epoch": 0.05, "grad_norm": 9.704985618591309, "learning_rate": 7.500811073861793e-06, "loss": 2.9627, "step": 3468 }, { "epoch": 0.05, "grad_norm": 11.165055274963379, "learning_rate": 7.502973937493242e-06, "loss": 3.082, "step": 3469 }, { "epoch": 0.05, "grad_norm": 9.085379600524902, "learning_rate": 7.5051368011246895e-06, "loss": 3.1785, "step": 3470 }, { "epoch": 0.05, "grad_norm": 9.118226051330566, "learning_rate": 7.507299664756138e-06, "loss": 3.462, "step": 3471 }, { "epoch": 0.05, "grad_norm": 8.945821762084961, "learning_rate": 7.509462528387586e-06, "loss": 2.7878, "step": 3472 }, { "epoch": 0.05, "grad_norm": 9.812024116516113, "learning_rate": 7.511625392019034e-06, "loss": 3.3878, "step": 3473 }, { "epoch": 0.05, "grad_norm": 9.377979278564453, "learning_rate": 7.513788255650482e-06, "loss": 2.6375, "step": 3474 }, { "epoch": 0.05, "grad_norm": 8.544049263000488, "learning_rate": 7.51595111928193e-06, "loss": 3.2215, "step": 3475 }, { "epoch": 0.05, "grad_norm": 10.329663276672363, "learning_rate": 7.518113982913378e-06, "loss": 2.986, "step": 3476 }, { "epoch": 0.05, "grad_norm": 9.122894287109375, "learning_rate": 7.520276846544826e-06, "loss": 2.8277, "step": 3477 }, { "epoch": 0.05, "grad_norm": 11.0836763381958, "learning_rate": 7.522439710176274e-06, "loss": 2.9883, "step": 3478 }, { "epoch": 0.05, "grad_norm": 9.02094841003418, "learning_rate": 7.524602573807722e-06, "loss": 3.0229, "step": 3479 }, { "epoch": 0.05, "grad_norm": 9.833224296569824, "learning_rate": 7.52676543743917e-06, "loss": 3.195, "step": 3480 }, { "epoch": 0.05, "grad_norm": 8.695379257202148, "learning_rate": 7.528928301070618e-06, "loss": 3.5053, "step": 3481 }, { "epoch": 0.05, "grad_norm": 9.384392738342285, "learning_rate": 7.531091164702065e-06, "loss": 2.9782, "step": 3482 }, { "epoch": 0.05, "grad_norm": 11.116827964782715, "learning_rate": 7.533254028333514e-06, "loss": 2.5538, "step": 3483 }, { "epoch": 0.05, "grad_norm": 9.337016105651855, "learning_rate": 7.535416891964961e-06, "loss": 3.0417, "step": 3484 }, { "epoch": 0.05, "grad_norm": 8.613358497619629, "learning_rate": 7.53757975559641e-06, "loss": 2.8036, "step": 3485 }, { "epoch": 0.05, "grad_norm": 9.154022216796875, "learning_rate": 7.5397426192278575e-06, "loss": 2.6525, "step": 3486 }, { "epoch": 0.05, "grad_norm": 10.953741073608398, "learning_rate": 7.541905482859306e-06, "loss": 3.5054, "step": 3487 }, { "epoch": 0.05, "grad_norm": 8.726823806762695, "learning_rate": 7.544068346490755e-06, "loss": 2.9576, "step": 3488 }, { "epoch": 0.05, "grad_norm": 9.693049430847168, "learning_rate": 7.5462312101222025e-06, "loss": 2.5496, "step": 3489 }, { "epoch": 0.05, "grad_norm": 9.909721374511719, "learning_rate": 7.548394073753651e-06, "loss": 3.2543, "step": 3490 }, { "epoch": 0.05, "grad_norm": 9.328554153442383, "learning_rate": 7.550556937385099e-06, "loss": 2.4849, "step": 3491 }, { "epoch": 0.05, "grad_norm": 9.295079231262207, "learning_rate": 7.552719801016547e-06, "loss": 3.0434, "step": 3492 }, { "epoch": 0.05, "grad_norm": 9.48355770111084, "learning_rate": 7.554882664647995e-06, "loss": 3.5602, "step": 3493 }, { "epoch": 0.05, "grad_norm": 8.880165100097656, "learning_rate": 7.557045528279443e-06, "loss": 3.1436, "step": 3494 }, { "epoch": 0.05, "grad_norm": 10.0663480758667, "learning_rate": 7.559208391910891e-06, "loss": 3.2839, "step": 3495 }, { "epoch": 0.05, "grad_norm": 9.748467445373535, "learning_rate": 7.561371255542339e-06, "loss": 2.85, "step": 3496 }, { "epoch": 0.05, "grad_norm": 9.122333526611328, "learning_rate": 7.563534119173787e-06, "loss": 2.9282, "step": 3497 }, { "epoch": 0.05, "grad_norm": 12.271940231323242, "learning_rate": 7.565696982805235e-06, "loss": 3.1072, "step": 3498 }, { "epoch": 0.05, "grad_norm": 10.502351760864258, "learning_rate": 7.567859846436683e-06, "loss": 3.4017, "step": 3499 }, { "epoch": 0.05, "grad_norm": 9.42845344543457, "learning_rate": 7.570022710068131e-06, "loss": 3.0994, "step": 3500 }, { "epoch": 0.05, "grad_norm": 9.098158836364746, "learning_rate": 7.572185573699578e-06, "loss": 3.1618, "step": 3501 }, { "epoch": 0.05, "grad_norm": 9.359487533569336, "learning_rate": 7.574348437331027e-06, "loss": 2.4829, "step": 3502 }, { "epoch": 0.05, "grad_norm": 10.871353149414062, "learning_rate": 7.576511300962474e-06, "loss": 3.536, "step": 3503 }, { "epoch": 0.05, "grad_norm": 9.571660995483398, "learning_rate": 7.578674164593923e-06, "loss": 3.208, "step": 3504 }, { "epoch": 0.05, "grad_norm": 8.655378341674805, "learning_rate": 7.5808370282253705e-06, "loss": 2.9755, "step": 3505 }, { "epoch": 0.05, "grad_norm": 8.916629791259766, "learning_rate": 7.582999891856819e-06, "loss": 3.7526, "step": 3506 }, { "epoch": 0.05, "grad_norm": 10.169382095336914, "learning_rate": 7.5851627554882666e-06, "loss": 3.62, "step": 3507 }, { "epoch": 0.05, "grad_norm": 10.012691497802734, "learning_rate": 7.5873256191197155e-06, "loss": 3.2337, "step": 3508 }, { "epoch": 0.05, "grad_norm": 9.465067863464355, "learning_rate": 7.589488482751163e-06, "loss": 3.4331, "step": 3509 }, { "epoch": 0.05, "grad_norm": 9.712503433227539, "learning_rate": 7.5916513463826116e-06, "loss": 3.4134, "step": 3510 }, { "epoch": 0.05, "grad_norm": 9.994094848632812, "learning_rate": 7.593814210014059e-06, "loss": 3.3765, "step": 3511 }, { "epoch": 0.05, "grad_norm": 9.416851043701172, "learning_rate": 7.595977073645508e-06, "loss": 3.1681, "step": 3512 }, { "epoch": 0.05, "grad_norm": 8.299955368041992, "learning_rate": 7.598139937276955e-06, "loss": 2.7566, "step": 3513 }, { "epoch": 0.05, "grad_norm": 8.966069221496582, "learning_rate": 7.600302800908404e-06, "loss": 3.6048, "step": 3514 }, { "epoch": 0.05, "grad_norm": 9.126060485839844, "learning_rate": 7.602465664539851e-06, "loss": 3.3625, "step": 3515 }, { "epoch": 0.05, "grad_norm": 9.250256538391113, "learning_rate": 7.6046285281713e-06, "loss": 3.1572, "step": 3516 }, { "epoch": 0.05, "grad_norm": 8.945765495300293, "learning_rate": 7.606791391802747e-06, "loss": 3.4166, "step": 3517 }, { "epoch": 0.05, "grad_norm": 8.929057121276855, "learning_rate": 7.608954255434195e-06, "loss": 2.6937, "step": 3518 }, { "epoch": 0.05, "grad_norm": 9.753045082092285, "learning_rate": 7.611117119065643e-06, "loss": 2.97, "step": 3519 }, { "epoch": 0.05, "grad_norm": 10.226762771606445, "learning_rate": 7.613279982697091e-06, "loss": 3.0388, "step": 3520 }, { "epoch": 0.05, "grad_norm": 8.776118278503418, "learning_rate": 7.61544284632854e-06, "loss": 2.7763, "step": 3521 }, { "epoch": 0.05, "grad_norm": 13.97187614440918, "learning_rate": 7.617605709959987e-06, "loss": 2.9291, "step": 3522 }, { "epoch": 0.05, "grad_norm": 8.326821327209473, "learning_rate": 7.619768573591436e-06, "loss": 2.8485, "step": 3523 }, { "epoch": 0.05, "grad_norm": 9.826977729797363, "learning_rate": 7.621931437222883e-06, "loss": 3.1556, "step": 3524 }, { "epoch": 0.05, "grad_norm": 9.43099308013916, "learning_rate": 7.624094300854332e-06, "loss": 3.3163, "step": 3525 }, { "epoch": 0.05, "grad_norm": 9.90449047088623, "learning_rate": 7.6262571644857795e-06, "loss": 2.9664, "step": 3526 }, { "epoch": 0.05, "grad_norm": 9.89074420928955, "learning_rate": 7.628420028117228e-06, "loss": 2.9405, "step": 3527 }, { "epoch": 0.05, "grad_norm": 9.75056266784668, "learning_rate": 7.630582891748676e-06, "loss": 3.0616, "step": 3528 }, { "epoch": 0.05, "grad_norm": 9.110833168029785, "learning_rate": 7.632745755380124e-06, "loss": 3.3987, "step": 3529 }, { "epoch": 0.05, "grad_norm": 9.906536102294922, "learning_rate": 7.634908619011572e-06, "loss": 2.8319, "step": 3530 }, { "epoch": 0.05, "grad_norm": 9.222447395324707, "learning_rate": 7.63707148264302e-06, "loss": 2.9998, "step": 3531 }, { "epoch": 0.05, "grad_norm": 9.135150909423828, "learning_rate": 7.639234346274468e-06, "loss": 2.9798, "step": 3532 }, { "epoch": 0.05, "grad_norm": 9.022695541381836, "learning_rate": 7.641397209905916e-06, "loss": 3.1033, "step": 3533 }, { "epoch": 0.05, "grad_norm": 8.673487663269043, "learning_rate": 7.643560073537364e-06, "loss": 3.415, "step": 3534 }, { "epoch": 0.05, "grad_norm": 9.288707733154297, "learning_rate": 7.645722937168812e-06, "loss": 2.6747, "step": 3535 }, { "epoch": 0.05, "grad_norm": 9.377238273620605, "learning_rate": 7.64788580080026e-06, "loss": 2.9134, "step": 3536 }, { "epoch": 0.05, "grad_norm": 8.938438415527344, "learning_rate": 7.650048664431708e-06, "loss": 3.2747, "step": 3537 }, { "epoch": 0.05, "grad_norm": 8.567188262939453, "learning_rate": 7.652211528063156e-06, "loss": 3.0937, "step": 3538 }, { "epoch": 0.05, "grad_norm": 8.898594856262207, "learning_rate": 7.654374391694604e-06, "loss": 3.0249, "step": 3539 }, { "epoch": 0.05, "grad_norm": 8.973366737365723, "learning_rate": 7.656537255326052e-06, "loss": 3.1056, "step": 3540 }, { "epoch": 0.05, "grad_norm": 7.922499179840088, "learning_rate": 7.6587001189575e-06, "loss": 2.3839, "step": 3541 }, { "epoch": 0.05, "grad_norm": 10.30644702911377, "learning_rate": 7.660862982588948e-06, "loss": 3.6625, "step": 3542 }, { "epoch": 0.05, "grad_norm": 8.678016662597656, "learning_rate": 7.663025846220396e-06, "loss": 2.8762, "step": 3543 }, { "epoch": 0.05, "grad_norm": 10.671758651733398, "learning_rate": 7.665188709851844e-06, "loss": 3.8761, "step": 3544 }, { "epoch": 0.05, "grad_norm": 9.570788383483887, "learning_rate": 7.667351573483292e-06, "loss": 2.9569, "step": 3545 }, { "epoch": 0.05, "grad_norm": 10.095247268676758, "learning_rate": 7.66951443711474e-06, "loss": 3.3352, "step": 3546 }, { "epoch": 0.05, "grad_norm": 9.632064819335938, "learning_rate": 7.671677300746189e-06, "loss": 3.3618, "step": 3547 }, { "epoch": 0.05, "grad_norm": 8.53225040435791, "learning_rate": 7.673840164377637e-06, "loss": 2.7168, "step": 3548 }, { "epoch": 0.05, "grad_norm": 8.898788452148438, "learning_rate": 7.676003028009085e-06, "loss": 2.3795, "step": 3549 }, { "epoch": 0.05, "grad_norm": 9.457036972045898, "learning_rate": 7.678165891640533e-06, "loss": 2.7276, "step": 3550 }, { "epoch": 0.05, "grad_norm": 10.084870338439941, "learning_rate": 7.68032875527198e-06, "loss": 3.265, "step": 3551 }, { "epoch": 0.05, "grad_norm": 9.405094146728516, "learning_rate": 7.682491618903429e-06, "loss": 3.4638, "step": 3552 }, { "epoch": 0.05, "grad_norm": 9.018092155456543, "learning_rate": 7.684654482534877e-06, "loss": 3.2921, "step": 3553 }, { "epoch": 0.05, "grad_norm": 9.162232398986816, "learning_rate": 7.686817346166325e-06, "loss": 3.1318, "step": 3554 }, { "epoch": 0.05, "grad_norm": 8.561746597290039, "learning_rate": 7.688980209797773e-06, "loss": 2.8615, "step": 3555 }, { "epoch": 0.05, "grad_norm": 9.48415756225586, "learning_rate": 7.691143073429221e-06, "loss": 2.7663, "step": 3556 }, { "epoch": 0.05, "grad_norm": 8.354353904724121, "learning_rate": 7.693305937060669e-06, "loss": 2.742, "step": 3557 }, { "epoch": 0.05, "grad_norm": 8.998720169067383, "learning_rate": 7.695468800692117e-06, "loss": 3.3657, "step": 3558 }, { "epoch": 0.05, "grad_norm": 9.502796173095703, "learning_rate": 7.697631664323565e-06, "loss": 2.9531, "step": 3559 }, { "epoch": 0.05, "grad_norm": 9.79371166229248, "learning_rate": 7.699794527955013e-06, "loss": 2.2559, "step": 3560 }, { "epoch": 0.05, "grad_norm": 9.424137115478516, "learning_rate": 7.701957391586461e-06, "loss": 3.427, "step": 3561 }, { "epoch": 0.05, "grad_norm": 10.069361686706543, "learning_rate": 7.70412025521791e-06, "loss": 3.3249, "step": 3562 }, { "epoch": 0.05, "grad_norm": 10.771882057189941, "learning_rate": 7.706283118849357e-06, "loss": 3.0503, "step": 3563 }, { "epoch": 0.05, "grad_norm": 9.255085945129395, "learning_rate": 7.708445982480805e-06, "loss": 2.6137, "step": 3564 }, { "epoch": 0.05, "grad_norm": 8.57629680633545, "learning_rate": 7.710608846112253e-06, "loss": 2.5997, "step": 3565 }, { "epoch": 0.05, "grad_norm": 9.015157699584961, "learning_rate": 7.712771709743702e-06, "loss": 3.2523, "step": 3566 }, { "epoch": 0.05, "grad_norm": 10.408590316772461, "learning_rate": 7.71493457337515e-06, "loss": 3.5006, "step": 3567 }, { "epoch": 0.05, "grad_norm": 10.88022518157959, "learning_rate": 7.717097437006598e-06, "loss": 3.3445, "step": 3568 }, { "epoch": 0.05, "grad_norm": 9.066604614257812, "learning_rate": 7.719260300638046e-06, "loss": 2.8954, "step": 3569 }, { "epoch": 0.05, "grad_norm": 9.53850269317627, "learning_rate": 7.721423164269494e-06, "loss": 3.1993, "step": 3570 }, { "epoch": 0.05, "grad_norm": 9.370757102966309, "learning_rate": 7.723586027900942e-06, "loss": 2.8614, "step": 3571 }, { "epoch": 0.05, "grad_norm": 9.84030818939209, "learning_rate": 7.72574889153239e-06, "loss": 3.1357, "step": 3572 }, { "epoch": 0.05, "grad_norm": 9.240413665771484, "learning_rate": 7.727911755163838e-06, "loss": 2.6429, "step": 3573 }, { "epoch": 0.05, "grad_norm": 9.429028511047363, "learning_rate": 7.730074618795286e-06, "loss": 2.4888, "step": 3574 }, { "epoch": 0.05, "grad_norm": 10.447853088378906, "learning_rate": 7.732237482426734e-06, "loss": 3.5053, "step": 3575 }, { "epoch": 0.05, "grad_norm": 9.293067932128906, "learning_rate": 7.734400346058182e-06, "loss": 3.2117, "step": 3576 }, { "epoch": 0.05, "grad_norm": 10.644001960754395, "learning_rate": 7.73656320968963e-06, "loss": 2.8013, "step": 3577 }, { "epoch": 0.05, "grad_norm": 9.257462501525879, "learning_rate": 7.738726073321078e-06, "loss": 3.4561, "step": 3578 }, { "epoch": 0.05, "grad_norm": 10.865374565124512, "learning_rate": 7.740888936952525e-06, "loss": 3.2274, "step": 3579 }, { "epoch": 0.05, "grad_norm": 9.93625259399414, "learning_rate": 7.743051800583974e-06, "loss": 3.0793, "step": 3580 }, { "epoch": 0.05, "grad_norm": 9.077917098999023, "learning_rate": 7.74521466421542e-06, "loss": 3.0421, "step": 3581 }, { "epoch": 0.05, "grad_norm": 8.939696311950684, "learning_rate": 7.74737752784687e-06, "loss": 3.3854, "step": 3582 }, { "epoch": 0.05, "grad_norm": 10.127161979675293, "learning_rate": 7.749540391478317e-06, "loss": 2.7155, "step": 3583 }, { "epoch": 0.05, "grad_norm": 8.49497127532959, "learning_rate": 7.751703255109766e-06, "loss": 3.1163, "step": 3584 }, { "epoch": 0.05, "grad_norm": 8.891762733459473, "learning_rate": 7.753866118741215e-06, "loss": 3.3812, "step": 3585 }, { "epoch": 0.05, "grad_norm": 8.425958633422852, "learning_rate": 7.756028982372663e-06, "loss": 2.695, "step": 3586 }, { "epoch": 0.05, "grad_norm": 9.091845512390137, "learning_rate": 7.75819184600411e-06, "loss": 2.8746, "step": 3587 }, { "epoch": 0.05, "grad_norm": 9.633172988891602, "learning_rate": 7.760354709635559e-06, "loss": 3.2085, "step": 3588 }, { "epoch": 0.05, "grad_norm": 9.784795761108398, "learning_rate": 7.762517573267007e-06, "loss": 3.1192, "step": 3589 }, { "epoch": 0.05, "grad_norm": 10.431999206542969, "learning_rate": 7.764680436898455e-06, "loss": 3.3375, "step": 3590 }, { "epoch": 0.05, "grad_norm": 9.716659545898438, "learning_rate": 7.766843300529903e-06, "loss": 2.9064, "step": 3591 }, { "epoch": 0.05, "grad_norm": 10.450286865234375, "learning_rate": 7.769006164161351e-06, "loss": 2.8786, "step": 3592 }, { "epoch": 0.05, "grad_norm": 10.065889358520508, "learning_rate": 7.771169027792799e-06, "loss": 3.6144, "step": 3593 }, { "epoch": 0.05, "grad_norm": 8.990639686584473, "learning_rate": 7.773331891424247e-06, "loss": 2.8269, "step": 3594 }, { "epoch": 0.05, "grad_norm": 10.345404624938965, "learning_rate": 7.775494755055695e-06, "loss": 3.7216, "step": 3595 }, { "epoch": 0.05, "grad_norm": 8.881185531616211, "learning_rate": 7.777657618687141e-06, "loss": 2.9623, "step": 3596 }, { "epoch": 0.05, "grad_norm": 9.356191635131836, "learning_rate": 7.779820482318591e-06, "loss": 3.2871, "step": 3597 }, { "epoch": 0.05, "grad_norm": 8.248699188232422, "learning_rate": 7.781983345950037e-06, "loss": 2.8139, "step": 3598 }, { "epoch": 0.05, "grad_norm": 9.605530738830566, "learning_rate": 7.784146209581487e-06, "loss": 3.0552, "step": 3599 }, { "epoch": 0.05, "grad_norm": 9.067686080932617, "learning_rate": 7.786309073212934e-06, "loss": 3.0157, "step": 3600 }, { "epoch": 0.05, "grad_norm": 8.654886245727539, "learning_rate": 7.788471936844383e-06, "loss": 3.3558, "step": 3601 }, { "epoch": 0.05, "grad_norm": 9.559951782226562, "learning_rate": 7.79063480047583e-06, "loss": 3.2414, "step": 3602 }, { "epoch": 0.05, "grad_norm": 7.954746723175049, "learning_rate": 7.79279766410728e-06, "loss": 3.003, "step": 3603 }, { "epoch": 0.05, "grad_norm": 9.891390800476074, "learning_rate": 7.794960527738726e-06, "loss": 3.1026, "step": 3604 }, { "epoch": 0.05, "grad_norm": 9.170205116271973, "learning_rate": 7.797123391370176e-06, "loss": 3.0641, "step": 3605 }, { "epoch": 0.05, "grad_norm": 9.083984375, "learning_rate": 7.799286255001622e-06, "loss": 3.0111, "step": 3606 }, { "epoch": 0.05, "grad_norm": 8.978899955749512, "learning_rate": 7.801449118633072e-06, "loss": 3.0317, "step": 3607 }, { "epoch": 0.05, "grad_norm": 9.31829833984375, "learning_rate": 7.803611982264518e-06, "loss": 2.9051, "step": 3608 }, { "epoch": 0.05, "grad_norm": 9.641338348388672, "learning_rate": 7.805774845895968e-06, "loss": 3.7134, "step": 3609 }, { "epoch": 0.05, "grad_norm": 8.98705005645752, "learning_rate": 7.807937709527414e-06, "loss": 3.0405, "step": 3610 }, { "epoch": 0.05, "grad_norm": 9.220322608947754, "learning_rate": 7.810100573158864e-06, "loss": 3.5704, "step": 3611 }, { "epoch": 0.05, "grad_norm": 8.794471740722656, "learning_rate": 7.81226343679031e-06, "loss": 3.3723, "step": 3612 }, { "epoch": 0.05, "grad_norm": 8.559103012084961, "learning_rate": 7.81442630042176e-06, "loss": 2.5845, "step": 3613 }, { "epoch": 0.05, "grad_norm": 9.808520317077637, "learning_rate": 7.816589164053206e-06, "loss": 2.9775, "step": 3614 }, { "epoch": 0.05, "grad_norm": 8.81256103515625, "learning_rate": 7.818752027684654e-06, "loss": 2.8462, "step": 3615 }, { "epoch": 0.05, "grad_norm": 9.527384757995605, "learning_rate": 7.820914891316104e-06, "loss": 3.4173, "step": 3616 }, { "epoch": 0.05, "grad_norm": 8.777192115783691, "learning_rate": 7.82307775494755e-06, "loss": 3.0234, "step": 3617 }, { "epoch": 0.05, "grad_norm": 9.328750610351562, "learning_rate": 7.825240618579e-06, "loss": 3.0008, "step": 3618 }, { "epoch": 0.05, "grad_norm": 8.837956428527832, "learning_rate": 7.827403482210447e-06, "loss": 3.0566, "step": 3619 }, { "epoch": 0.05, "grad_norm": 8.699200630187988, "learning_rate": 7.829566345841896e-06, "loss": 2.6442, "step": 3620 }, { "epoch": 0.05, "grad_norm": 9.685197830200195, "learning_rate": 7.831729209473343e-06, "loss": 2.7683, "step": 3621 }, { "epoch": 0.05, "grad_norm": 9.577810287475586, "learning_rate": 7.833892073104792e-06, "loss": 2.9121, "step": 3622 }, { "epoch": 0.05, "grad_norm": 7.995781898498535, "learning_rate": 7.836054936736239e-06, "loss": 2.6593, "step": 3623 }, { "epoch": 0.05, "grad_norm": 9.639616966247559, "learning_rate": 7.838217800367688e-06, "loss": 3.0874, "step": 3624 }, { "epoch": 0.05, "grad_norm": 10.37996768951416, "learning_rate": 7.840380663999135e-06, "loss": 2.854, "step": 3625 }, { "epoch": 0.05, "grad_norm": 9.43591022491455, "learning_rate": 7.842543527630585e-06, "loss": 2.7938, "step": 3626 }, { "epoch": 0.05, "grad_norm": 9.447609901428223, "learning_rate": 7.844706391262031e-06, "loss": 3.2674, "step": 3627 }, { "epoch": 0.05, "grad_norm": 8.294432640075684, "learning_rate": 7.84686925489348e-06, "loss": 2.6987, "step": 3628 }, { "epoch": 0.05, "grad_norm": 10.212966918945312, "learning_rate": 7.849032118524927e-06, "loss": 3.1716, "step": 3629 }, { "epoch": 0.05, "grad_norm": 9.207330703735352, "learning_rate": 7.851194982156377e-06, "loss": 3.721, "step": 3630 }, { "epoch": 0.05, "grad_norm": 10.14054012298584, "learning_rate": 7.853357845787823e-06, "loss": 3.5498, "step": 3631 }, { "epoch": 0.05, "grad_norm": 9.747188568115234, "learning_rate": 7.855520709419271e-06, "loss": 3.1855, "step": 3632 }, { "epoch": 0.05, "grad_norm": 10.715917587280273, "learning_rate": 7.85768357305072e-06, "loss": 3.0045, "step": 3633 }, { "epoch": 0.05, "grad_norm": 9.12954330444336, "learning_rate": 7.859846436682167e-06, "loss": 3.192, "step": 3634 }, { "epoch": 0.05, "grad_norm": 9.87212085723877, "learning_rate": 7.862009300313615e-06, "loss": 2.7701, "step": 3635 }, { "epoch": 0.05, "grad_norm": 9.919063568115234, "learning_rate": 7.864172163945063e-06, "loss": 3.0407, "step": 3636 }, { "epoch": 0.05, "grad_norm": 10.366565704345703, "learning_rate": 7.866335027576511e-06, "loss": 3.1704, "step": 3637 }, { "epoch": 0.05, "grad_norm": 9.662670135498047, "learning_rate": 7.86849789120796e-06, "loss": 3.4106, "step": 3638 }, { "epoch": 0.05, "grad_norm": 9.490923881530762, "learning_rate": 7.870660754839408e-06, "loss": 3.2151, "step": 3639 }, { "epoch": 0.05, "grad_norm": 9.172073364257812, "learning_rate": 7.872823618470856e-06, "loss": 2.9475, "step": 3640 }, { "epoch": 0.05, "grad_norm": 9.24255657196045, "learning_rate": 7.874986482102304e-06, "loss": 2.8907, "step": 3641 }, { "epoch": 0.05, "grad_norm": 9.858327865600586, "learning_rate": 7.877149345733752e-06, "loss": 3.1589, "step": 3642 }, { "epoch": 0.05, "grad_norm": 8.916189193725586, "learning_rate": 7.8793122093652e-06, "loss": 2.9192, "step": 3643 }, { "epoch": 0.05, "grad_norm": 9.352286338806152, "learning_rate": 7.881475072996648e-06, "loss": 2.9796, "step": 3644 }, { "epoch": 0.05, "grad_norm": 8.6671781539917, "learning_rate": 7.883637936628096e-06, "loss": 2.6614, "step": 3645 }, { "epoch": 0.05, "grad_norm": 10.428802490234375, "learning_rate": 7.885800800259544e-06, "loss": 2.6211, "step": 3646 }, { "epoch": 0.05, "grad_norm": 8.881280899047852, "learning_rate": 7.887963663890992e-06, "loss": 3.0124, "step": 3647 }, { "epoch": 0.05, "grad_norm": 9.17709732055664, "learning_rate": 7.89012652752244e-06, "loss": 3.1938, "step": 3648 }, { "epoch": 0.05, "grad_norm": 8.892644882202148, "learning_rate": 7.89228939115389e-06, "loss": 3.084, "step": 3649 }, { "epoch": 0.05, "grad_norm": 8.58150863647461, "learning_rate": 7.894452254785336e-06, "loss": 3.21, "step": 3650 }, { "epoch": 0.05, "grad_norm": 8.986454010009766, "learning_rate": 7.896615118416784e-06, "loss": 2.9381, "step": 3651 }, { "epoch": 0.05, "grad_norm": 9.095098495483398, "learning_rate": 7.898777982048232e-06, "loss": 3.0035, "step": 3652 }, { "epoch": 0.05, "grad_norm": 9.040498733520508, "learning_rate": 7.90094084567968e-06, "loss": 3.3887, "step": 3653 }, { "epoch": 0.05, "grad_norm": 8.642841339111328, "learning_rate": 7.903103709311128e-06, "loss": 2.8118, "step": 3654 }, { "epoch": 0.05, "grad_norm": 10.931477546691895, "learning_rate": 7.905266572942576e-06, "loss": 3.6256, "step": 3655 }, { "epoch": 0.05, "grad_norm": 9.864294052124023, "learning_rate": 7.907429436574024e-06, "loss": 3.0522, "step": 3656 }, { "epoch": 0.05, "grad_norm": 7.986606121063232, "learning_rate": 7.909592300205472e-06, "loss": 2.9091, "step": 3657 }, { "epoch": 0.05, "grad_norm": 9.270057678222656, "learning_rate": 7.91175516383692e-06, "loss": 2.877, "step": 3658 }, { "epoch": 0.05, "grad_norm": 8.913891792297363, "learning_rate": 7.913918027468369e-06, "loss": 3.0359, "step": 3659 }, { "epoch": 0.05, "grad_norm": 7.815526008605957, "learning_rate": 7.916080891099817e-06, "loss": 2.9181, "step": 3660 }, { "epoch": 0.05, "grad_norm": 9.685843467712402, "learning_rate": 7.918243754731265e-06, "loss": 2.9621, "step": 3661 }, { "epoch": 0.05, "grad_norm": 7.770301818847656, "learning_rate": 7.920406618362713e-06, "loss": 2.8522, "step": 3662 }, { "epoch": 0.05, "grad_norm": 9.086366653442383, "learning_rate": 7.92256948199416e-06, "loss": 2.9826, "step": 3663 }, { "epoch": 0.05, "grad_norm": 9.127768516540527, "learning_rate": 7.924732345625609e-06, "loss": 3.1756, "step": 3664 }, { "epoch": 0.05, "grad_norm": 9.543534278869629, "learning_rate": 7.926895209257057e-06, "loss": 3.2918, "step": 3665 }, { "epoch": 0.05, "grad_norm": 11.176009178161621, "learning_rate": 7.929058072888505e-06, "loss": 3.3986, "step": 3666 }, { "epoch": 0.05, "grad_norm": 9.292351722717285, "learning_rate": 7.931220936519953e-06, "loss": 3.2928, "step": 3667 }, { "epoch": 0.05, "grad_norm": 9.4065523147583, "learning_rate": 7.933383800151401e-06, "loss": 2.5853, "step": 3668 }, { "epoch": 0.05, "grad_norm": 8.749938011169434, "learning_rate": 7.935546663782849e-06, "loss": 2.4561, "step": 3669 }, { "epoch": 0.05, "grad_norm": 9.191631317138672, "learning_rate": 7.937709527414297e-06, "loss": 3.379, "step": 3670 }, { "epoch": 0.05, "grad_norm": 8.76840877532959, "learning_rate": 7.939872391045745e-06, "loss": 3.2685, "step": 3671 }, { "epoch": 0.05, "grad_norm": 7.938961505889893, "learning_rate": 7.942035254677193e-06, "loss": 2.6052, "step": 3672 }, { "epoch": 0.05, "grad_norm": 8.122818946838379, "learning_rate": 7.944198118308641e-06, "loss": 3.1845, "step": 3673 }, { "epoch": 0.05, "grad_norm": 8.021936416625977, "learning_rate": 7.94636098194009e-06, "loss": 2.4068, "step": 3674 }, { "epoch": 0.05, "grad_norm": 8.191997528076172, "learning_rate": 7.948523845571537e-06, "loss": 3.0116, "step": 3675 }, { "epoch": 0.05, "grad_norm": 10.036698341369629, "learning_rate": 7.950686709202985e-06, "loss": 3.2939, "step": 3676 }, { "epoch": 0.05, "grad_norm": 9.558220863342285, "learning_rate": 7.952849572834433e-06, "loss": 3.0657, "step": 3677 }, { "epoch": 0.05, "grad_norm": 9.599214553833008, "learning_rate": 7.955012436465882e-06, "loss": 2.9508, "step": 3678 }, { "epoch": 0.05, "grad_norm": 9.403082847595215, "learning_rate": 7.95717530009733e-06, "loss": 2.9381, "step": 3679 }, { "epoch": 0.05, "grad_norm": 9.493794441223145, "learning_rate": 7.959338163728778e-06, "loss": 3.1317, "step": 3680 }, { "epoch": 0.05, "grad_norm": 8.9864501953125, "learning_rate": 7.961501027360226e-06, "loss": 3.1367, "step": 3681 }, { "epoch": 0.05, "grad_norm": 9.886305809020996, "learning_rate": 7.963663890991674e-06, "loss": 3.0171, "step": 3682 }, { "epoch": 0.05, "grad_norm": 9.621465682983398, "learning_rate": 7.965826754623122e-06, "loss": 3.0997, "step": 3683 }, { "epoch": 0.05, "grad_norm": 8.759761810302734, "learning_rate": 7.96798961825457e-06, "loss": 2.7367, "step": 3684 }, { "epoch": 0.05, "grad_norm": 9.257218360900879, "learning_rate": 7.970152481886018e-06, "loss": 2.834, "step": 3685 }, { "epoch": 0.05, "grad_norm": 9.191248893737793, "learning_rate": 7.972315345517466e-06, "loss": 3.335, "step": 3686 }, { "epoch": 0.05, "grad_norm": 10.186711311340332, "learning_rate": 7.974478209148914e-06, "loss": 3.4753, "step": 3687 }, { "epoch": 0.05, "grad_norm": 10.890350341796875, "learning_rate": 7.976641072780362e-06, "loss": 3.0509, "step": 3688 }, { "epoch": 0.05, "grad_norm": 9.347933769226074, "learning_rate": 7.97880393641181e-06, "loss": 3.1759, "step": 3689 }, { "epoch": 0.05, "grad_norm": 9.093435287475586, "learning_rate": 7.980966800043258e-06, "loss": 2.9095, "step": 3690 }, { "epoch": 0.05, "grad_norm": 9.221456527709961, "learning_rate": 7.983129663674706e-06, "loss": 3.0124, "step": 3691 }, { "epoch": 0.05, "grad_norm": 8.889001846313477, "learning_rate": 7.985292527306154e-06, "loss": 2.9178, "step": 3692 }, { "epoch": 0.05, "grad_norm": 10.890374183654785, "learning_rate": 7.987455390937602e-06, "loss": 3.4017, "step": 3693 }, { "epoch": 0.05, "grad_norm": 9.03026008605957, "learning_rate": 7.98961825456905e-06, "loss": 2.8577, "step": 3694 }, { "epoch": 0.05, "grad_norm": 8.562224388122559, "learning_rate": 7.991781118200498e-06, "loss": 3.6887, "step": 3695 }, { "epoch": 0.05, "grad_norm": 8.70140552520752, "learning_rate": 7.993943981831946e-06, "loss": 3.4511, "step": 3696 }, { "epoch": 0.05, "grad_norm": 8.555331230163574, "learning_rate": 7.996106845463395e-06, "loss": 2.9891, "step": 3697 }, { "epoch": 0.05, "grad_norm": 9.028999328613281, "learning_rate": 7.998269709094843e-06, "loss": 3.4929, "step": 3698 }, { "epoch": 0.05, "grad_norm": 9.692758560180664, "learning_rate": 8.00043257272629e-06, "loss": 3.0387, "step": 3699 }, { "epoch": 0.05, "grad_norm": 7.690461158752441, "learning_rate": 8.002595436357739e-06, "loss": 2.0592, "step": 3700 }, { "epoch": 0.05, "grad_norm": 8.682292938232422, "learning_rate": 8.004758299989187e-06, "loss": 3.1779, "step": 3701 }, { "epoch": 0.05, "grad_norm": 8.546664237976074, "learning_rate": 8.006921163620635e-06, "loss": 3.2682, "step": 3702 }, { "epoch": 0.05, "grad_norm": 9.43856430053711, "learning_rate": 8.009084027252083e-06, "loss": 3.2078, "step": 3703 }, { "epoch": 0.05, "grad_norm": 9.485729217529297, "learning_rate": 8.011246890883531e-06, "loss": 3.5355, "step": 3704 }, { "epoch": 0.05, "grad_norm": 8.818986892700195, "learning_rate": 8.013409754514977e-06, "loss": 2.691, "step": 3705 }, { "epoch": 0.05, "grad_norm": 9.222651481628418, "learning_rate": 8.015572618146427e-06, "loss": 2.9907, "step": 3706 }, { "epoch": 0.05, "grad_norm": 8.96837329864502, "learning_rate": 8.017735481777873e-06, "loss": 3.4104, "step": 3707 }, { "epoch": 0.05, "grad_norm": 10.16411304473877, "learning_rate": 8.019898345409323e-06, "loss": 3.1605, "step": 3708 }, { "epoch": 0.05, "grad_norm": 9.902579307556152, "learning_rate": 8.02206120904077e-06, "loss": 3.3639, "step": 3709 }, { "epoch": 0.05, "grad_norm": 9.463008880615234, "learning_rate": 8.02422407267222e-06, "loss": 3.4224, "step": 3710 }, { "epoch": 0.05, "grad_norm": 8.367239952087402, "learning_rate": 8.026386936303666e-06, "loss": 2.6979, "step": 3711 }, { "epoch": 0.05, "grad_norm": 9.723749160766602, "learning_rate": 8.028549799935115e-06, "loss": 3.4864, "step": 3712 }, { "epoch": 0.05, "grad_norm": 9.80351448059082, "learning_rate": 8.030712663566563e-06, "loss": 3.0078, "step": 3713 }, { "epoch": 0.05, "grad_norm": 10.44655990600586, "learning_rate": 8.032875527198011e-06, "loss": 3.3181, "step": 3714 }, { "epoch": 0.05, "grad_norm": 8.755904197692871, "learning_rate": 8.03503839082946e-06, "loss": 3.3915, "step": 3715 }, { "epoch": 0.05, "grad_norm": 9.540216445922852, "learning_rate": 8.037201254460907e-06, "loss": 3.2872, "step": 3716 }, { "epoch": 0.05, "grad_norm": 8.721207618713379, "learning_rate": 8.039364118092356e-06, "loss": 3.6256, "step": 3717 }, { "epoch": 0.05, "grad_norm": 9.117819786071777, "learning_rate": 8.041526981723804e-06, "loss": 3.1999, "step": 3718 }, { "epoch": 0.05, "grad_norm": 10.108464241027832, "learning_rate": 8.043689845355252e-06, "loss": 3.4559, "step": 3719 }, { "epoch": 0.05, "grad_norm": 8.819917678833008, "learning_rate": 8.0458527089867e-06, "loss": 3.1743, "step": 3720 }, { "epoch": 0.05, "grad_norm": 8.817347526550293, "learning_rate": 8.048015572618148e-06, "loss": 2.4246, "step": 3721 }, { "epoch": 0.05, "grad_norm": 10.228372573852539, "learning_rate": 8.050178436249596e-06, "loss": 3.3915, "step": 3722 }, { "epoch": 0.05, "grad_norm": 8.860735893249512, "learning_rate": 8.052341299881044e-06, "loss": 3.0866, "step": 3723 }, { "epoch": 0.05, "grad_norm": 9.181924819946289, "learning_rate": 8.05450416351249e-06, "loss": 3.0174, "step": 3724 }, { "epoch": 0.05, "grad_norm": 10.003175735473633, "learning_rate": 8.05666702714394e-06, "loss": 3.3022, "step": 3725 }, { "epoch": 0.05, "grad_norm": 10.285016059875488, "learning_rate": 8.058829890775386e-06, "loss": 2.4932, "step": 3726 }, { "epoch": 0.05, "grad_norm": 8.677552223205566, "learning_rate": 8.060992754406836e-06, "loss": 3.2292, "step": 3727 }, { "epoch": 0.05, "grad_norm": 10.36501693725586, "learning_rate": 8.063155618038282e-06, "loss": 3.7517, "step": 3728 }, { "epoch": 0.05, "grad_norm": 9.747029304504395, "learning_rate": 8.065318481669732e-06, "loss": 3.8264, "step": 3729 }, { "epoch": 0.05, "grad_norm": 9.612131118774414, "learning_rate": 8.067481345301178e-06, "loss": 3.6249, "step": 3730 }, { "epoch": 0.05, "grad_norm": 8.721577644348145, "learning_rate": 8.069644208932628e-06, "loss": 3.3226, "step": 3731 }, { "epoch": 0.05, "grad_norm": 8.613900184631348, "learning_rate": 8.071807072564075e-06, "loss": 3.2963, "step": 3732 }, { "epoch": 0.05, "grad_norm": 9.427957534790039, "learning_rate": 8.073969936195524e-06, "loss": 3.7379, "step": 3733 }, { "epoch": 0.05, "grad_norm": 9.140765190124512, "learning_rate": 8.07613279982697e-06, "loss": 3.2884, "step": 3734 }, { "epoch": 0.05, "grad_norm": 9.265832901000977, "learning_rate": 8.07829566345842e-06, "loss": 3.0382, "step": 3735 }, { "epoch": 0.05, "grad_norm": 9.925383567810059, "learning_rate": 8.080458527089867e-06, "loss": 3.0329, "step": 3736 }, { "epoch": 0.05, "grad_norm": 10.761649131774902, "learning_rate": 8.082621390721317e-06, "loss": 2.5831, "step": 3737 }, { "epoch": 0.05, "grad_norm": 9.325736999511719, "learning_rate": 8.084784254352763e-06, "loss": 3.2739, "step": 3738 }, { "epoch": 0.05, "grad_norm": 9.12748908996582, "learning_rate": 8.086947117984213e-06, "loss": 2.8752, "step": 3739 }, { "epoch": 0.05, "grad_norm": 8.187711715698242, "learning_rate": 8.089109981615659e-06, "loss": 3.5543, "step": 3740 }, { "epoch": 0.05, "grad_norm": 9.251471519470215, "learning_rate": 8.091272845247107e-06, "loss": 3.5605, "step": 3741 }, { "epoch": 0.05, "grad_norm": 8.88170337677002, "learning_rate": 8.093435708878555e-06, "loss": 3.3988, "step": 3742 }, { "epoch": 0.05, "grad_norm": 8.057629585266113, "learning_rate": 8.095598572510003e-06, "loss": 3.1242, "step": 3743 }, { "epoch": 0.05, "grad_norm": 9.231304168701172, "learning_rate": 8.097761436141453e-06, "loss": 2.6531, "step": 3744 }, { "epoch": 0.05, "grad_norm": 9.67265796661377, "learning_rate": 8.0999242997729e-06, "loss": 3.2141, "step": 3745 }, { "epoch": 0.05, "grad_norm": 9.285201072692871, "learning_rate": 8.102087163404349e-06, "loss": 2.9878, "step": 3746 }, { "epoch": 0.05, "grad_norm": 12.007101058959961, "learning_rate": 8.104250027035795e-06, "loss": 3.1598, "step": 3747 }, { "epoch": 0.05, "grad_norm": 10.12939739227295, "learning_rate": 8.106412890667245e-06, "loss": 3.1734, "step": 3748 }, { "epoch": 0.05, "grad_norm": 9.650486946105957, "learning_rate": 8.108575754298691e-06, "loss": 3.1337, "step": 3749 }, { "epoch": 0.05, "grad_norm": 9.413607597351074, "learning_rate": 8.110738617930141e-06, "loss": 3.0452, "step": 3750 }, { "epoch": 0.05, "grad_norm": 10.147652626037598, "learning_rate": 8.112901481561588e-06, "loss": 3.4111, "step": 3751 }, { "epoch": 0.05, "grad_norm": 7.663844108581543, "learning_rate": 8.115064345193037e-06, "loss": 2.7245, "step": 3752 }, { "epoch": 0.05, "grad_norm": 7.90678071975708, "learning_rate": 8.117227208824484e-06, "loss": 2.7699, "step": 3753 }, { "epoch": 0.05, "grad_norm": 9.008427619934082, "learning_rate": 8.119390072455933e-06, "loss": 3.0294, "step": 3754 }, { "epoch": 0.05, "grad_norm": 8.580975532531738, "learning_rate": 8.12155293608738e-06, "loss": 2.6169, "step": 3755 }, { "epoch": 0.05, "grad_norm": 9.927452087402344, "learning_rate": 8.12371579971883e-06, "loss": 3.1266, "step": 3756 }, { "epoch": 0.05, "grad_norm": 9.503373146057129, "learning_rate": 8.125878663350276e-06, "loss": 3.4112, "step": 3757 }, { "epoch": 0.05, "grad_norm": 8.514471054077148, "learning_rate": 8.128041526981726e-06, "loss": 3.5403, "step": 3758 }, { "epoch": 0.05, "grad_norm": 9.530644416809082, "learning_rate": 8.130204390613172e-06, "loss": 3.2884, "step": 3759 }, { "epoch": 0.05, "grad_norm": 9.582755088806152, "learning_rate": 8.13236725424462e-06, "loss": 2.7417, "step": 3760 }, { "epoch": 0.05, "grad_norm": 9.831268310546875, "learning_rate": 8.134530117876068e-06, "loss": 3.0466, "step": 3761 }, { "epoch": 0.05, "grad_norm": 9.620420455932617, "learning_rate": 8.136692981507516e-06, "loss": 2.7303, "step": 3762 }, { "epoch": 0.05, "grad_norm": 9.75715446472168, "learning_rate": 8.138855845138964e-06, "loss": 3.4607, "step": 3763 }, { "epoch": 0.05, "grad_norm": 9.369241714477539, "learning_rate": 8.141018708770412e-06, "loss": 2.6705, "step": 3764 }, { "epoch": 0.05, "grad_norm": 8.726926803588867, "learning_rate": 8.14318157240186e-06, "loss": 2.625, "step": 3765 }, { "epoch": 0.05, "grad_norm": 8.70701789855957, "learning_rate": 8.145344436033308e-06, "loss": 2.9887, "step": 3766 }, { "epoch": 0.05, "grad_norm": 8.8941068649292, "learning_rate": 8.147507299664756e-06, "loss": 2.9837, "step": 3767 }, { "epoch": 0.05, "grad_norm": 9.032315254211426, "learning_rate": 8.149670163296204e-06, "loss": 3.3552, "step": 3768 }, { "epoch": 0.05, "grad_norm": 8.701313018798828, "learning_rate": 8.151833026927652e-06, "loss": 3.0501, "step": 3769 }, { "epoch": 0.05, "grad_norm": 8.836979866027832, "learning_rate": 8.1539958905591e-06, "loss": 3.0126, "step": 3770 }, { "epoch": 0.05, "grad_norm": 9.647256851196289, "learning_rate": 8.156158754190549e-06, "loss": 2.8354, "step": 3771 }, { "epoch": 0.05, "grad_norm": 9.28395938873291, "learning_rate": 8.158321617821997e-06, "loss": 2.9135, "step": 3772 }, { "epoch": 0.05, "grad_norm": 8.9301176071167, "learning_rate": 8.160484481453445e-06, "loss": 2.9296, "step": 3773 }, { "epoch": 0.05, "grad_norm": 9.018721580505371, "learning_rate": 8.162647345084893e-06, "loss": 2.9927, "step": 3774 }, { "epoch": 0.05, "grad_norm": 9.93940258026123, "learning_rate": 8.16481020871634e-06, "loss": 3.3409, "step": 3775 }, { "epoch": 0.05, "grad_norm": 8.913203239440918, "learning_rate": 8.166973072347789e-06, "loss": 2.9504, "step": 3776 }, { "epoch": 0.05, "grad_norm": 10.055932998657227, "learning_rate": 8.169135935979237e-06, "loss": 2.9388, "step": 3777 }, { "epoch": 0.05, "grad_norm": 9.224410057067871, "learning_rate": 8.171298799610685e-06, "loss": 2.5269, "step": 3778 }, { "epoch": 0.05, "grad_norm": 8.804045677185059, "learning_rate": 8.173461663242133e-06, "loss": 2.7961, "step": 3779 }, { "epoch": 0.05, "grad_norm": 9.172745704650879, "learning_rate": 8.175624526873581e-06, "loss": 2.8319, "step": 3780 }, { "epoch": 0.05, "grad_norm": 8.6329927444458, "learning_rate": 8.177787390505029e-06, "loss": 2.8462, "step": 3781 }, { "epoch": 0.05, "grad_norm": 9.333276748657227, "learning_rate": 8.179950254136477e-06, "loss": 2.7928, "step": 3782 }, { "epoch": 0.05, "grad_norm": 9.707723617553711, "learning_rate": 8.182113117767925e-06, "loss": 3.093, "step": 3783 }, { "epoch": 0.05, "grad_norm": 9.686716079711914, "learning_rate": 8.184275981399373e-06, "loss": 2.9288, "step": 3784 }, { "epoch": 0.05, "grad_norm": 9.122269630432129, "learning_rate": 8.186438845030821e-06, "loss": 3.0135, "step": 3785 }, { "epoch": 0.05, "grad_norm": 8.498295783996582, "learning_rate": 8.18860170866227e-06, "loss": 2.7033, "step": 3786 }, { "epoch": 0.05, "grad_norm": 8.628185272216797, "learning_rate": 8.190764572293717e-06, "loss": 3.0428, "step": 3787 }, { "epoch": 0.05, "grad_norm": 8.88815689086914, "learning_rate": 8.192927435925165e-06, "loss": 2.7807, "step": 3788 }, { "epoch": 0.05, "grad_norm": 10.703010559082031, "learning_rate": 8.195090299556613e-06, "loss": 2.9091, "step": 3789 }, { "epoch": 0.05, "grad_norm": 9.488252639770508, "learning_rate": 8.197253163188062e-06, "loss": 2.816, "step": 3790 }, { "epoch": 0.05, "grad_norm": 9.981250762939453, "learning_rate": 8.19941602681951e-06, "loss": 3.1678, "step": 3791 }, { "epoch": 0.05, "grad_norm": 7.648041725158691, "learning_rate": 8.201578890450958e-06, "loss": 2.8477, "step": 3792 }, { "epoch": 0.05, "grad_norm": 9.547133445739746, "learning_rate": 8.203741754082406e-06, "loss": 2.9142, "step": 3793 }, { "epoch": 0.05, "grad_norm": 11.477535247802734, "learning_rate": 8.205904617713854e-06, "loss": 3.2951, "step": 3794 }, { "epoch": 0.05, "grad_norm": 9.007528305053711, "learning_rate": 8.208067481345302e-06, "loss": 3.1345, "step": 3795 }, { "epoch": 0.05, "grad_norm": 9.71711254119873, "learning_rate": 8.21023034497675e-06, "loss": 3.3031, "step": 3796 }, { "epoch": 0.05, "grad_norm": 8.688998222351074, "learning_rate": 8.212393208608198e-06, "loss": 2.2532, "step": 3797 }, { "epoch": 0.05, "grad_norm": 9.071009635925293, "learning_rate": 8.214556072239646e-06, "loss": 2.992, "step": 3798 }, { "epoch": 0.05, "grad_norm": 9.272884368896484, "learning_rate": 8.216718935871094e-06, "loss": 3.0863, "step": 3799 }, { "epoch": 0.05, "grad_norm": 7.421327114105225, "learning_rate": 8.218881799502542e-06, "loss": 2.6799, "step": 3800 }, { "epoch": 0.05, "grad_norm": 8.579792976379395, "learning_rate": 8.22104466313399e-06, "loss": 2.6072, "step": 3801 }, { "epoch": 0.05, "grad_norm": 9.457518577575684, "learning_rate": 8.223207526765438e-06, "loss": 3.1464, "step": 3802 }, { "epoch": 0.05, "grad_norm": 9.39047622680664, "learning_rate": 8.225370390396886e-06, "loss": 2.9085, "step": 3803 }, { "epoch": 0.05, "grad_norm": 7.644776344299316, "learning_rate": 8.227533254028334e-06, "loss": 2.9138, "step": 3804 }, { "epoch": 0.05, "grad_norm": 8.620095252990723, "learning_rate": 8.229696117659782e-06, "loss": 2.7147, "step": 3805 }, { "epoch": 0.05, "grad_norm": 7.858652114868164, "learning_rate": 8.23185898129123e-06, "loss": 2.6507, "step": 3806 }, { "epoch": 0.05, "grad_norm": 9.317201614379883, "learning_rate": 8.234021844922678e-06, "loss": 2.8706, "step": 3807 }, { "epoch": 0.05, "grad_norm": 8.890693664550781, "learning_rate": 8.236184708554126e-06, "loss": 3.2003, "step": 3808 }, { "epoch": 0.05, "grad_norm": 9.269213676452637, "learning_rate": 8.238347572185574e-06, "loss": 3.3818, "step": 3809 }, { "epoch": 0.05, "grad_norm": 9.268465042114258, "learning_rate": 8.240510435817023e-06, "loss": 3.6433, "step": 3810 }, { "epoch": 0.05, "grad_norm": 9.095234870910645, "learning_rate": 8.24267329944847e-06, "loss": 3.1912, "step": 3811 }, { "epoch": 0.05, "grad_norm": 9.912004470825195, "learning_rate": 8.244836163079919e-06, "loss": 3.5758, "step": 3812 }, { "epoch": 0.05, "grad_norm": 8.761968612670898, "learning_rate": 8.246999026711367e-06, "loss": 2.9523, "step": 3813 }, { "epoch": 0.05, "grad_norm": 9.633899688720703, "learning_rate": 8.249161890342815e-06, "loss": 3.2335, "step": 3814 }, { "epoch": 0.05, "grad_norm": 8.727005004882812, "learning_rate": 8.251324753974263e-06, "loss": 2.4843, "step": 3815 }, { "epoch": 0.05, "grad_norm": 9.313236236572266, "learning_rate": 8.253487617605711e-06, "loss": 3.3283, "step": 3816 }, { "epoch": 0.05, "grad_norm": 7.796602725982666, "learning_rate": 8.255650481237159e-06, "loss": 3.1383, "step": 3817 }, { "epoch": 0.05, "grad_norm": 8.542948722839355, "learning_rate": 8.257813344868607e-06, "loss": 3.2397, "step": 3818 }, { "epoch": 0.05, "grad_norm": 10.124512672424316, "learning_rate": 8.259976208500055e-06, "loss": 3.0112, "step": 3819 }, { "epoch": 0.05, "grad_norm": 9.854522705078125, "learning_rate": 8.262139072131503e-06, "loss": 2.948, "step": 3820 }, { "epoch": 0.05, "grad_norm": 9.34694766998291, "learning_rate": 8.264301935762951e-06, "loss": 3.2623, "step": 3821 }, { "epoch": 0.05, "grad_norm": 9.79365062713623, "learning_rate": 8.266464799394399e-06, "loss": 3.015, "step": 3822 }, { "epoch": 0.05, "grad_norm": 9.888575553894043, "learning_rate": 8.268627663025847e-06, "loss": 3.37, "step": 3823 }, { "epoch": 0.05, "grad_norm": 8.446894645690918, "learning_rate": 8.270790526657295e-06, "loss": 3.021, "step": 3824 }, { "epoch": 0.05, "grad_norm": 9.260197639465332, "learning_rate": 8.272953390288743e-06, "loss": 2.5449, "step": 3825 }, { "epoch": 0.05, "grad_norm": 8.77938461303711, "learning_rate": 8.275116253920191e-06, "loss": 2.935, "step": 3826 }, { "epoch": 0.05, "grad_norm": 8.699198722839355, "learning_rate": 8.27727911755164e-06, "loss": 3.0166, "step": 3827 }, { "epoch": 0.05, "grad_norm": 8.711430549621582, "learning_rate": 8.279441981183087e-06, "loss": 2.846, "step": 3828 }, { "epoch": 0.05, "grad_norm": 9.503653526306152, "learning_rate": 8.281604844814536e-06, "loss": 3.0179, "step": 3829 }, { "epoch": 0.05, "grad_norm": 8.075464248657227, "learning_rate": 8.283767708445984e-06, "loss": 2.2707, "step": 3830 }, { "epoch": 0.05, "grad_norm": 9.407936096191406, "learning_rate": 8.285930572077432e-06, "loss": 2.4834, "step": 3831 }, { "epoch": 0.05, "grad_norm": 10.019308090209961, "learning_rate": 8.28809343570888e-06, "loss": 2.5811, "step": 3832 }, { "epoch": 0.05, "grad_norm": 9.440389633178711, "learning_rate": 8.290256299340326e-06, "loss": 2.8922, "step": 3833 }, { "epoch": 0.05, "grad_norm": 9.173407554626465, "learning_rate": 8.292419162971776e-06, "loss": 3.4236, "step": 3834 }, { "epoch": 0.05, "grad_norm": 8.445719718933105, "learning_rate": 8.294582026603222e-06, "loss": 3.2903, "step": 3835 }, { "epoch": 0.05, "grad_norm": 9.008198738098145, "learning_rate": 8.296744890234672e-06, "loss": 3.1288, "step": 3836 }, { "epoch": 0.05, "grad_norm": 9.1677827835083, "learning_rate": 8.298907753866118e-06, "loss": 2.7797, "step": 3837 }, { "epoch": 0.05, "grad_norm": 9.051206588745117, "learning_rate": 8.301070617497568e-06, "loss": 2.7358, "step": 3838 }, { "epoch": 0.05, "grad_norm": 9.521658897399902, "learning_rate": 8.303233481129014e-06, "loss": 3.2092, "step": 3839 }, { "epoch": 0.05, "grad_norm": 8.853841781616211, "learning_rate": 8.305396344760464e-06, "loss": 3.3189, "step": 3840 }, { "epoch": 0.05, "grad_norm": 9.914953231811523, "learning_rate": 8.307559208391912e-06, "loss": 3.1788, "step": 3841 }, { "epoch": 0.05, "grad_norm": 9.248640060424805, "learning_rate": 8.30972207202336e-06, "loss": 2.6086, "step": 3842 }, { "epoch": 0.05, "grad_norm": 9.395408630371094, "learning_rate": 8.311884935654808e-06, "loss": 3.1313, "step": 3843 }, { "epoch": 0.05, "grad_norm": 8.887468338012695, "learning_rate": 8.314047799286256e-06, "loss": 3.1104, "step": 3844 }, { "epoch": 0.05, "grad_norm": 9.484223365783691, "learning_rate": 8.316210662917704e-06, "loss": 3.4171, "step": 3845 }, { "epoch": 0.05, "grad_norm": 8.208846092224121, "learning_rate": 8.318373526549152e-06, "loss": 3.1737, "step": 3846 }, { "epoch": 0.05, "grad_norm": 9.309222221374512, "learning_rate": 8.3205363901806e-06, "loss": 2.836, "step": 3847 }, { "epoch": 0.05, "grad_norm": 9.164985656738281, "learning_rate": 8.322699253812048e-06, "loss": 3.5107, "step": 3848 }, { "epoch": 0.05, "grad_norm": 9.215846061706543, "learning_rate": 8.324862117443497e-06, "loss": 2.7699, "step": 3849 }, { "epoch": 0.05, "grad_norm": 9.049200057983398, "learning_rate": 8.327024981074943e-06, "loss": 3.3098, "step": 3850 }, { "epoch": 0.05, "grad_norm": 9.889158248901367, "learning_rate": 8.329187844706393e-06, "loss": 2.63, "step": 3851 }, { "epoch": 0.05, "grad_norm": 10.2384033203125, "learning_rate": 8.331350708337839e-06, "loss": 3.517, "step": 3852 }, { "epoch": 0.05, "grad_norm": 8.710203170776367, "learning_rate": 8.333513571969289e-06, "loss": 3.2095, "step": 3853 }, { "epoch": 0.05, "grad_norm": 8.661845207214355, "learning_rate": 8.335676435600735e-06, "loss": 2.7244, "step": 3854 }, { "epoch": 0.05, "grad_norm": 8.991435050964355, "learning_rate": 8.337839299232185e-06, "loss": 3.5483, "step": 3855 }, { "epoch": 0.05, "grad_norm": 8.764918327331543, "learning_rate": 8.340002162863631e-06, "loss": 3.0969, "step": 3856 }, { "epoch": 0.05, "grad_norm": 9.32000732421875, "learning_rate": 8.342165026495081e-06, "loss": 2.8746, "step": 3857 }, { "epoch": 0.05, "grad_norm": 9.209311485290527, "learning_rate": 8.344327890126527e-06, "loss": 3.893, "step": 3858 }, { "epoch": 0.05, "grad_norm": 11.207950592041016, "learning_rate": 8.346490753757977e-06, "loss": 3.3443, "step": 3859 }, { "epoch": 0.05, "grad_norm": 8.490030288696289, "learning_rate": 8.348653617389423e-06, "loss": 3.1183, "step": 3860 }, { "epoch": 0.05, "grad_norm": 9.036481857299805, "learning_rate": 8.350816481020873e-06, "loss": 3.0009, "step": 3861 }, { "epoch": 0.05, "grad_norm": 11.375550270080566, "learning_rate": 8.35297934465232e-06, "loss": 3.014, "step": 3862 }, { "epoch": 0.05, "grad_norm": 8.95573902130127, "learning_rate": 8.35514220828377e-06, "loss": 2.56, "step": 3863 }, { "epoch": 0.05, "grad_norm": 8.537416458129883, "learning_rate": 8.357305071915216e-06, "loss": 2.7794, "step": 3864 }, { "epoch": 0.05, "grad_norm": 8.974163055419922, "learning_rate": 8.359467935546665e-06, "loss": 3.2704, "step": 3865 }, { "epoch": 0.05, "grad_norm": 8.946998596191406, "learning_rate": 8.361630799178112e-06, "loss": 2.9563, "step": 3866 }, { "epoch": 0.05, "grad_norm": 8.705121040344238, "learning_rate": 8.363793662809561e-06, "loss": 3.0239, "step": 3867 }, { "epoch": 0.05, "grad_norm": 10.474958419799805, "learning_rate": 8.365956526441008e-06, "loss": 2.8429, "step": 3868 }, { "epoch": 0.05, "grad_norm": 9.830282211303711, "learning_rate": 8.368119390072456e-06, "loss": 2.8737, "step": 3869 }, { "epoch": 0.05, "grad_norm": 8.859349250793457, "learning_rate": 8.370282253703904e-06, "loss": 3.4006, "step": 3870 }, { "epoch": 0.05, "grad_norm": 9.447235107421875, "learning_rate": 8.372445117335352e-06, "loss": 3.3552, "step": 3871 }, { "epoch": 0.05, "grad_norm": 9.903191566467285, "learning_rate": 8.374607980966802e-06, "loss": 2.8417, "step": 3872 }, { "epoch": 0.05, "grad_norm": 9.27476692199707, "learning_rate": 8.376770844598248e-06, "loss": 3.0728, "step": 3873 }, { "epoch": 0.05, "grad_norm": 9.001360893249512, "learning_rate": 8.378933708229698e-06, "loss": 2.8602, "step": 3874 }, { "epoch": 0.05, "grad_norm": 8.705244064331055, "learning_rate": 8.381096571861144e-06, "loss": 3.0818, "step": 3875 }, { "epoch": 0.05, "grad_norm": 9.044392585754395, "learning_rate": 8.383259435492594e-06, "loss": 2.837, "step": 3876 }, { "epoch": 0.05, "grad_norm": 9.001946449279785, "learning_rate": 8.38542229912404e-06, "loss": 3.1454, "step": 3877 }, { "epoch": 0.05, "grad_norm": 8.872841835021973, "learning_rate": 8.38758516275549e-06, "loss": 2.6545, "step": 3878 }, { "epoch": 0.05, "grad_norm": 9.732684135437012, "learning_rate": 8.389748026386936e-06, "loss": 3.5908, "step": 3879 }, { "epoch": 0.05, "grad_norm": 8.970488548278809, "learning_rate": 8.391910890018386e-06, "loss": 3.3792, "step": 3880 }, { "epoch": 0.05, "grad_norm": 9.56313419342041, "learning_rate": 8.394073753649832e-06, "loss": 2.7208, "step": 3881 }, { "epoch": 0.05, "grad_norm": 9.928675651550293, "learning_rate": 8.396236617281282e-06, "loss": 3.7102, "step": 3882 }, { "epoch": 0.05, "grad_norm": 8.281306266784668, "learning_rate": 8.398399480912729e-06, "loss": 2.6298, "step": 3883 }, { "epoch": 0.05, "grad_norm": 9.215465545654297, "learning_rate": 8.400562344544178e-06, "loss": 2.8177, "step": 3884 }, { "epoch": 0.05, "grad_norm": 9.623167037963867, "learning_rate": 8.402725208175625e-06, "loss": 3.0303, "step": 3885 }, { "epoch": 0.05, "grad_norm": 9.162182807922363, "learning_rate": 8.404888071807074e-06, "loss": 2.74, "step": 3886 }, { "epoch": 0.05, "grad_norm": 9.470916748046875, "learning_rate": 8.40705093543852e-06, "loss": 3.1009, "step": 3887 }, { "epoch": 0.05, "grad_norm": 9.42270278930664, "learning_rate": 8.409213799069969e-06, "loss": 3.0552, "step": 3888 }, { "epoch": 0.05, "grad_norm": 8.962117195129395, "learning_rate": 8.411376662701417e-06, "loss": 3.0685, "step": 3889 }, { "epoch": 0.05, "grad_norm": 9.696828842163086, "learning_rate": 8.413539526332865e-06, "loss": 2.9366, "step": 3890 }, { "epoch": 0.05, "grad_norm": 8.425894737243652, "learning_rate": 8.415702389964313e-06, "loss": 2.6061, "step": 3891 }, { "epoch": 0.05, "grad_norm": 9.915226936340332, "learning_rate": 8.417865253595761e-06, "loss": 3.236, "step": 3892 }, { "epoch": 0.05, "grad_norm": 10.893670082092285, "learning_rate": 8.420028117227209e-06, "loss": 3.7377, "step": 3893 }, { "epoch": 0.05, "grad_norm": 9.788016319274902, "learning_rate": 8.422190980858657e-06, "loss": 2.5446, "step": 3894 }, { "epoch": 0.05, "grad_norm": 8.97213363647461, "learning_rate": 8.424353844490105e-06, "loss": 3.0426, "step": 3895 }, { "epoch": 0.05, "grad_norm": 8.925466537475586, "learning_rate": 8.426516708121553e-06, "loss": 2.7329, "step": 3896 }, { "epoch": 0.05, "grad_norm": 10.123395919799805, "learning_rate": 8.428679571753001e-06, "loss": 3.4929, "step": 3897 }, { "epoch": 0.05, "grad_norm": 8.817030906677246, "learning_rate": 8.43084243538445e-06, "loss": 2.8882, "step": 3898 }, { "epoch": 0.05, "grad_norm": 8.467645645141602, "learning_rate": 8.433005299015897e-06, "loss": 3.4044, "step": 3899 }, { "epoch": 0.05, "grad_norm": 10.960697174072266, "learning_rate": 8.435168162647345e-06, "loss": 2.9663, "step": 3900 }, { "epoch": 0.05, "grad_norm": 10.315293312072754, "learning_rate": 8.437331026278793e-06, "loss": 2.8122, "step": 3901 }, { "epoch": 0.05, "grad_norm": 10.825005531311035, "learning_rate": 8.439493889910242e-06, "loss": 2.9483, "step": 3902 }, { "epoch": 0.05, "grad_norm": 8.929671287536621, "learning_rate": 8.44165675354169e-06, "loss": 2.9685, "step": 3903 }, { "epoch": 0.05, "grad_norm": 8.743001937866211, "learning_rate": 8.443819617173138e-06, "loss": 3.1938, "step": 3904 }, { "epoch": 0.05, "grad_norm": 8.694761276245117, "learning_rate": 8.445982480804586e-06, "loss": 2.747, "step": 3905 }, { "epoch": 0.05, "grad_norm": 9.7551851272583, "learning_rate": 8.448145344436034e-06, "loss": 3.5192, "step": 3906 }, { "epoch": 0.05, "grad_norm": 8.953680038452148, "learning_rate": 8.450308208067482e-06, "loss": 2.7787, "step": 3907 }, { "epoch": 0.05, "grad_norm": 10.961297988891602, "learning_rate": 8.45247107169893e-06, "loss": 3.4223, "step": 3908 }, { "epoch": 0.05, "grad_norm": 8.70428466796875, "learning_rate": 8.454633935330378e-06, "loss": 2.7937, "step": 3909 }, { "epoch": 0.05, "grad_norm": 9.807004928588867, "learning_rate": 8.456796798961826e-06, "loss": 3.3656, "step": 3910 }, { "epoch": 0.05, "grad_norm": 9.430096626281738, "learning_rate": 8.458959662593274e-06, "loss": 3.0366, "step": 3911 }, { "epoch": 0.05, "grad_norm": 9.527693748474121, "learning_rate": 8.461122526224722e-06, "loss": 2.9766, "step": 3912 }, { "epoch": 0.05, "grad_norm": 8.186582565307617, "learning_rate": 8.46328538985617e-06, "loss": 2.8251, "step": 3913 }, { "epoch": 0.05, "grad_norm": 8.754476547241211, "learning_rate": 8.465448253487618e-06, "loss": 3.0631, "step": 3914 }, { "epoch": 0.05, "grad_norm": 9.714693069458008, "learning_rate": 8.467611117119066e-06, "loss": 3.2423, "step": 3915 }, { "epoch": 0.05, "grad_norm": 9.583905220031738, "learning_rate": 8.469773980750514e-06, "loss": 3.1015, "step": 3916 }, { "epoch": 0.05, "grad_norm": 10.025774002075195, "learning_rate": 8.471936844381962e-06, "loss": 3.0947, "step": 3917 }, { "epoch": 0.05, "grad_norm": 8.42419147491455, "learning_rate": 8.47409970801341e-06, "loss": 2.6438, "step": 3918 }, { "epoch": 0.05, "grad_norm": 10.2417631149292, "learning_rate": 8.476262571644858e-06, "loss": 2.8454, "step": 3919 }, { "epoch": 0.05, "grad_norm": 10.544669151306152, "learning_rate": 8.478425435276306e-06, "loss": 2.9185, "step": 3920 }, { "epoch": 0.05, "grad_norm": 9.413046836853027, "learning_rate": 8.480588298907754e-06, "loss": 2.8681, "step": 3921 }, { "epoch": 0.05, "grad_norm": 9.785501480102539, "learning_rate": 8.482751162539203e-06, "loss": 2.6363, "step": 3922 }, { "epoch": 0.05, "grad_norm": 7.881240367889404, "learning_rate": 8.48491402617065e-06, "loss": 2.9261, "step": 3923 }, { "epoch": 0.05, "grad_norm": 8.952569961547852, "learning_rate": 8.487076889802099e-06, "loss": 2.8, "step": 3924 }, { "epoch": 0.05, "grad_norm": 8.375889778137207, "learning_rate": 8.489239753433547e-06, "loss": 2.9699, "step": 3925 }, { "epoch": 0.05, "grad_norm": 9.350298881530762, "learning_rate": 8.491402617064995e-06, "loss": 3.3122, "step": 3926 }, { "epoch": 0.05, "grad_norm": 8.955368995666504, "learning_rate": 8.493565480696443e-06, "loss": 3.5523, "step": 3927 }, { "epoch": 0.05, "grad_norm": 10.722844123840332, "learning_rate": 8.49572834432789e-06, "loss": 3.4415, "step": 3928 }, { "epoch": 0.05, "grad_norm": 9.162018775939941, "learning_rate": 8.497891207959339e-06, "loss": 3.5322, "step": 3929 }, { "epoch": 0.05, "grad_norm": 8.690616607666016, "learning_rate": 8.500054071590787e-06, "loss": 2.7451, "step": 3930 }, { "epoch": 0.05, "grad_norm": 8.978919982910156, "learning_rate": 8.502216935222235e-06, "loss": 3.2003, "step": 3931 }, { "epoch": 0.05, "grad_norm": 10.170331954956055, "learning_rate": 8.504379798853683e-06, "loss": 3.3795, "step": 3932 }, { "epoch": 0.05, "grad_norm": 8.869660377502441, "learning_rate": 8.506542662485131e-06, "loss": 2.791, "step": 3933 }, { "epoch": 0.05, "grad_norm": 9.521307945251465, "learning_rate": 8.508705526116579e-06, "loss": 2.8922, "step": 3934 }, { "epoch": 0.05, "grad_norm": 9.41372013092041, "learning_rate": 8.510868389748027e-06, "loss": 2.7798, "step": 3935 }, { "epoch": 0.05, "grad_norm": 10.400505065917969, "learning_rate": 8.513031253379475e-06, "loss": 3.0622, "step": 3936 }, { "epoch": 0.05, "grad_norm": 9.196581840515137, "learning_rate": 8.515194117010923e-06, "loss": 2.864, "step": 3937 }, { "epoch": 0.05, "grad_norm": 9.19625473022461, "learning_rate": 8.517356980642371e-06, "loss": 3.2507, "step": 3938 }, { "epoch": 0.05, "grad_norm": 9.142780303955078, "learning_rate": 8.51951984427382e-06, "loss": 2.9865, "step": 3939 }, { "epoch": 0.05, "grad_norm": 9.297088623046875, "learning_rate": 8.521682707905267e-06, "loss": 3.3356, "step": 3940 }, { "epoch": 0.05, "grad_norm": 8.87759780883789, "learning_rate": 8.523845571536715e-06, "loss": 2.4714, "step": 3941 }, { "epoch": 0.05, "grad_norm": 9.54195499420166, "learning_rate": 8.526008435168164e-06, "loss": 3.1205, "step": 3942 }, { "epoch": 0.05, "grad_norm": 8.876233100891113, "learning_rate": 8.528171298799612e-06, "loss": 2.8117, "step": 3943 }, { "epoch": 0.05, "grad_norm": 8.165128707885742, "learning_rate": 8.53033416243106e-06, "loss": 2.9431, "step": 3944 }, { "epoch": 0.05, "grad_norm": 8.047612190246582, "learning_rate": 8.532497026062508e-06, "loss": 2.699, "step": 3945 }, { "epoch": 0.05, "grad_norm": 9.719016075134277, "learning_rate": 8.534659889693956e-06, "loss": 2.7498, "step": 3946 }, { "epoch": 0.05, "grad_norm": 9.034994125366211, "learning_rate": 8.536822753325404e-06, "loss": 3.3819, "step": 3947 }, { "epoch": 0.05, "grad_norm": 8.819297790527344, "learning_rate": 8.538985616956852e-06, "loss": 2.9372, "step": 3948 }, { "epoch": 0.05, "grad_norm": 9.298900604248047, "learning_rate": 8.5411484805883e-06, "loss": 3.0234, "step": 3949 }, { "epoch": 0.05, "grad_norm": 9.779215812683105, "learning_rate": 8.543311344219748e-06, "loss": 3.3407, "step": 3950 }, { "epoch": 0.05, "grad_norm": 9.722478866577148, "learning_rate": 8.545474207851196e-06, "loss": 2.8721, "step": 3951 }, { "epoch": 0.05, "grad_norm": 8.967328071594238, "learning_rate": 8.547637071482644e-06, "loss": 2.9985, "step": 3952 }, { "epoch": 0.05, "grad_norm": 8.60605525970459, "learning_rate": 8.549799935114092e-06, "loss": 2.8112, "step": 3953 }, { "epoch": 0.05, "grad_norm": 9.875555038452148, "learning_rate": 8.55196279874554e-06, "loss": 3.5487, "step": 3954 }, { "epoch": 0.05, "grad_norm": 8.254315376281738, "learning_rate": 8.554125662376988e-06, "loss": 2.8694, "step": 3955 }, { "epoch": 0.05, "grad_norm": 8.656900405883789, "learning_rate": 8.556288526008436e-06, "loss": 3.1075, "step": 3956 }, { "epoch": 0.05, "grad_norm": 8.672560691833496, "learning_rate": 8.558451389639884e-06, "loss": 3.4731, "step": 3957 }, { "epoch": 0.05, "grad_norm": 9.229491233825684, "learning_rate": 8.560614253271332e-06, "loss": 3.1348, "step": 3958 }, { "epoch": 0.05, "grad_norm": 9.062387466430664, "learning_rate": 8.56277711690278e-06, "loss": 3.2075, "step": 3959 }, { "epoch": 0.05, "grad_norm": 10.773560523986816, "learning_rate": 8.564939980534228e-06, "loss": 2.301, "step": 3960 }, { "epoch": 0.05, "grad_norm": 10.151290893554688, "learning_rate": 8.567102844165675e-06, "loss": 2.7033, "step": 3961 }, { "epoch": 0.05, "grad_norm": 7.987166881561279, "learning_rate": 8.569265707797125e-06, "loss": 2.7618, "step": 3962 }, { "epoch": 0.05, "grad_norm": 9.01098346710205, "learning_rate": 8.571428571428571e-06, "loss": 3.2307, "step": 3963 }, { "epoch": 0.05, "grad_norm": 8.752859115600586, "learning_rate": 8.57359143506002e-06, "loss": 3.0856, "step": 3964 }, { "epoch": 0.05, "grad_norm": 8.651603698730469, "learning_rate": 8.575754298691467e-06, "loss": 2.7434, "step": 3965 }, { "epoch": 0.05, "grad_norm": 9.67849349975586, "learning_rate": 8.577917162322917e-06, "loss": 3.3388, "step": 3966 }, { "epoch": 0.05, "grad_norm": 8.295194625854492, "learning_rate": 8.580080025954363e-06, "loss": 3.004, "step": 3967 }, { "epoch": 0.05, "grad_norm": 9.422528266906738, "learning_rate": 8.582242889585813e-06, "loss": 2.6681, "step": 3968 }, { "epoch": 0.05, "grad_norm": 9.57951545715332, "learning_rate": 8.584405753217261e-06, "loss": 3.2797, "step": 3969 }, { "epoch": 0.05, "grad_norm": 8.309218406677246, "learning_rate": 8.586568616848709e-06, "loss": 2.7965, "step": 3970 }, { "epoch": 0.05, "grad_norm": 8.555044174194336, "learning_rate": 8.588731480480157e-06, "loss": 2.9715, "step": 3971 }, { "epoch": 0.05, "grad_norm": 8.998228073120117, "learning_rate": 8.590894344111605e-06, "loss": 3.148, "step": 3972 }, { "epoch": 0.05, "grad_norm": 7.857123374938965, "learning_rate": 8.593057207743053e-06, "loss": 2.8142, "step": 3973 }, { "epoch": 0.05, "grad_norm": 8.641777992248535, "learning_rate": 8.595220071374501e-06, "loss": 2.6888, "step": 3974 }, { "epoch": 0.05, "grad_norm": 9.63951301574707, "learning_rate": 8.59738293500595e-06, "loss": 3.0949, "step": 3975 }, { "epoch": 0.05, "grad_norm": 8.024959564208984, "learning_rate": 8.599545798637397e-06, "loss": 3.028, "step": 3976 }, { "epoch": 0.05, "grad_norm": 9.439231872558594, "learning_rate": 8.601708662268845e-06, "loss": 2.3949, "step": 3977 }, { "epoch": 0.05, "grad_norm": 9.880919456481934, "learning_rate": 8.603871525900292e-06, "loss": 2.8842, "step": 3978 }, { "epoch": 0.05, "grad_norm": 8.353240013122559, "learning_rate": 8.606034389531741e-06, "loss": 2.6643, "step": 3979 }, { "epoch": 0.05, "grad_norm": 9.516316413879395, "learning_rate": 8.608197253163188e-06, "loss": 3.7935, "step": 3980 }, { "epoch": 0.05, "grad_norm": 10.78878402709961, "learning_rate": 8.610360116794638e-06, "loss": 3.4461, "step": 3981 }, { "epoch": 0.05, "grad_norm": 8.971653938293457, "learning_rate": 8.612522980426084e-06, "loss": 3.2229, "step": 3982 }, { "epoch": 0.05, "grad_norm": 8.754180908203125, "learning_rate": 8.614685844057534e-06, "loss": 3.1612, "step": 3983 }, { "epoch": 0.05, "grad_norm": 9.135924339294434, "learning_rate": 8.61684870768898e-06, "loss": 2.7922, "step": 3984 }, { "epoch": 0.05, "grad_norm": 9.018221855163574, "learning_rate": 8.61901157132043e-06, "loss": 2.8071, "step": 3985 }, { "epoch": 0.05, "grad_norm": 7.5975213050842285, "learning_rate": 8.621174434951876e-06, "loss": 3.0667, "step": 3986 }, { "epoch": 0.05, "grad_norm": 9.341570854187012, "learning_rate": 8.623337298583326e-06, "loss": 2.7789, "step": 3987 }, { "epoch": 0.05, "grad_norm": 8.974851608276367, "learning_rate": 8.625500162214772e-06, "loss": 3.0072, "step": 3988 }, { "epoch": 0.05, "grad_norm": 8.393444061279297, "learning_rate": 8.627663025846222e-06, "loss": 3.3815, "step": 3989 }, { "epoch": 0.05, "grad_norm": 8.810868263244629, "learning_rate": 8.629825889477668e-06, "loss": 2.6049, "step": 3990 }, { "epoch": 0.05, "grad_norm": 8.430648803710938, "learning_rate": 8.631988753109118e-06, "loss": 3.1722, "step": 3991 }, { "epoch": 0.05, "grad_norm": 8.592202186584473, "learning_rate": 8.634151616740564e-06, "loss": 3.2828, "step": 3992 }, { "epoch": 0.05, "grad_norm": 9.628274917602539, "learning_rate": 8.636314480372014e-06, "loss": 3.584, "step": 3993 }, { "epoch": 0.05, "grad_norm": 8.975072860717773, "learning_rate": 8.63847734400346e-06, "loss": 3.3523, "step": 3994 }, { "epoch": 0.05, "grad_norm": 9.507913589477539, "learning_rate": 8.64064020763491e-06, "loss": 3.0328, "step": 3995 }, { "epoch": 0.05, "grad_norm": 7.920813083648682, "learning_rate": 8.642803071266357e-06, "loss": 2.811, "step": 3996 }, { "epoch": 0.05, "grad_norm": 7.9825849533081055, "learning_rate": 8.644965934897805e-06, "loss": 2.64, "step": 3997 }, { "epoch": 0.05, "grad_norm": 9.05380630493164, "learning_rate": 8.647128798529253e-06, "loss": 2.9994, "step": 3998 }, { "epoch": 0.05, "grad_norm": 8.231602668762207, "learning_rate": 8.6492916621607e-06, "loss": 3.0147, "step": 3999 }, { "epoch": 0.05, "grad_norm": 8.870224952697754, "learning_rate": 8.65145452579215e-06, "loss": 2.7619, "step": 4000 }, { "epoch": 0.05, "grad_norm": 8.961003303527832, "learning_rate": 8.653617389423597e-06, "loss": 3.1493, "step": 4001 }, { "epoch": 0.05, "grad_norm": 7.933650970458984, "learning_rate": 8.655780253055047e-06, "loss": 2.2314, "step": 4002 }, { "epoch": 0.05, "grad_norm": 9.626132011413574, "learning_rate": 8.657943116686493e-06, "loss": 2.7822, "step": 4003 }, { "epoch": 0.05, "grad_norm": 8.600468635559082, "learning_rate": 8.660105980317943e-06, "loss": 2.6711, "step": 4004 }, { "epoch": 0.05, "grad_norm": 9.267288208007812, "learning_rate": 8.662268843949389e-06, "loss": 2.7782, "step": 4005 }, { "epoch": 0.05, "grad_norm": 8.817869186401367, "learning_rate": 8.664431707580839e-06, "loss": 2.3285, "step": 4006 }, { "epoch": 0.05, "grad_norm": 9.072765350341797, "learning_rate": 8.666594571212285e-06, "loss": 2.804, "step": 4007 }, { "epoch": 0.05, "grad_norm": 9.484139442443848, "learning_rate": 8.668757434843735e-06, "loss": 3.4694, "step": 4008 }, { "epoch": 0.05, "grad_norm": 8.21414852142334, "learning_rate": 8.670920298475181e-06, "loss": 2.3677, "step": 4009 }, { "epoch": 0.05, "grad_norm": 8.805900573730469, "learning_rate": 8.673083162106631e-06, "loss": 2.6238, "step": 4010 }, { "epoch": 0.05, "grad_norm": 9.702001571655273, "learning_rate": 8.675246025738077e-06, "loss": 3.3307, "step": 4011 }, { "epoch": 0.05, "grad_norm": 9.142672538757324, "learning_rate": 8.677408889369527e-06, "loss": 3.3326, "step": 4012 }, { "epoch": 0.05, "grad_norm": 9.183880805969238, "learning_rate": 8.679571753000973e-06, "loss": 2.6893, "step": 4013 }, { "epoch": 0.05, "grad_norm": 8.482881546020508, "learning_rate": 8.681734616632422e-06, "loss": 2.5497, "step": 4014 }, { "epoch": 0.05, "grad_norm": 9.376914978027344, "learning_rate": 8.68389748026387e-06, "loss": 2.9284, "step": 4015 }, { "epoch": 0.05, "grad_norm": 10.448203086853027, "learning_rate": 8.686060343895318e-06, "loss": 3.4267, "step": 4016 }, { "epoch": 0.05, "grad_norm": 11.503868103027344, "learning_rate": 8.688223207526766e-06, "loss": 2.6651, "step": 4017 }, { "epoch": 0.05, "grad_norm": 7.877717018127441, "learning_rate": 8.690386071158214e-06, "loss": 3.1488, "step": 4018 }, { "epoch": 0.05, "grad_norm": 9.554637908935547, "learning_rate": 8.692548934789662e-06, "loss": 3.2225, "step": 4019 }, { "epoch": 0.05, "grad_norm": 10.443538665771484, "learning_rate": 8.69471179842111e-06, "loss": 2.8193, "step": 4020 }, { "epoch": 0.05, "grad_norm": 9.263248443603516, "learning_rate": 8.696874662052558e-06, "loss": 3.1035, "step": 4021 }, { "epoch": 0.05, "grad_norm": 8.6893892288208, "learning_rate": 8.699037525684006e-06, "loss": 3.0664, "step": 4022 }, { "epoch": 0.05, "grad_norm": 7.933259010314941, "learning_rate": 8.701200389315454e-06, "loss": 3.0403, "step": 4023 }, { "epoch": 0.05, "grad_norm": 10.254573822021484, "learning_rate": 8.703363252946902e-06, "loss": 2.9028, "step": 4024 }, { "epoch": 0.05, "grad_norm": 8.631780624389648, "learning_rate": 8.70552611657835e-06, "loss": 2.9748, "step": 4025 }, { "epoch": 0.05, "grad_norm": 8.498902320861816, "learning_rate": 8.707688980209798e-06, "loss": 3.1098, "step": 4026 }, { "epoch": 0.05, "grad_norm": 8.756924629211426, "learning_rate": 8.709851843841246e-06, "loss": 2.8136, "step": 4027 }, { "epoch": 0.05, "grad_norm": 9.152957916259766, "learning_rate": 8.712014707472694e-06, "loss": 3.0628, "step": 4028 }, { "epoch": 0.05, "grad_norm": 7.745417594909668, "learning_rate": 8.714177571104142e-06, "loss": 2.8688, "step": 4029 }, { "epoch": 0.05, "grad_norm": 8.228611946105957, "learning_rate": 8.71634043473559e-06, "loss": 2.8979, "step": 4030 }, { "epoch": 0.05, "grad_norm": 10.221994400024414, "learning_rate": 8.718503298367038e-06, "loss": 2.7814, "step": 4031 }, { "epoch": 0.05, "grad_norm": 8.387565612792969, "learning_rate": 8.720666161998486e-06, "loss": 2.5127, "step": 4032 }, { "epoch": 0.05, "grad_norm": 8.522855758666992, "learning_rate": 8.722829025629934e-06, "loss": 2.9065, "step": 4033 }, { "epoch": 0.05, "grad_norm": 8.613548278808594, "learning_rate": 8.724991889261383e-06, "loss": 2.9984, "step": 4034 }, { "epoch": 0.05, "grad_norm": 9.423046112060547, "learning_rate": 8.72715475289283e-06, "loss": 3.2082, "step": 4035 }, { "epoch": 0.05, "grad_norm": 8.571392059326172, "learning_rate": 8.729317616524279e-06, "loss": 3.2255, "step": 4036 }, { "epoch": 0.05, "grad_norm": 10.976279258728027, "learning_rate": 8.731480480155727e-06, "loss": 3.4037, "step": 4037 }, { "epoch": 0.05, "grad_norm": 8.546640396118164, "learning_rate": 8.733643343787175e-06, "loss": 3.2798, "step": 4038 }, { "epoch": 0.05, "grad_norm": 9.32177734375, "learning_rate": 8.735806207418623e-06, "loss": 3.0945, "step": 4039 }, { "epoch": 0.05, "grad_norm": 9.079109191894531, "learning_rate": 8.73796907105007e-06, "loss": 3.514, "step": 4040 }, { "epoch": 0.05, "grad_norm": 8.849634170532227, "learning_rate": 8.740131934681519e-06, "loss": 2.7304, "step": 4041 }, { "epoch": 0.05, "grad_norm": 8.716093063354492, "learning_rate": 8.742294798312967e-06, "loss": 2.9349, "step": 4042 }, { "epoch": 0.05, "grad_norm": 9.556936264038086, "learning_rate": 8.744457661944415e-06, "loss": 3.0663, "step": 4043 }, { "epoch": 0.05, "grad_norm": 9.617974281311035, "learning_rate": 8.746620525575863e-06, "loss": 2.8559, "step": 4044 }, { "epoch": 0.05, "grad_norm": 9.013410568237305, "learning_rate": 8.748783389207311e-06, "loss": 3.5354, "step": 4045 }, { "epoch": 0.05, "grad_norm": 9.29973316192627, "learning_rate": 8.750946252838759e-06, "loss": 2.9923, "step": 4046 }, { "epoch": 0.05, "grad_norm": 8.814167022705078, "learning_rate": 8.753109116470207e-06, "loss": 2.8143, "step": 4047 }, { "epoch": 0.05, "grad_norm": 9.746416091918945, "learning_rate": 8.755271980101655e-06, "loss": 2.8107, "step": 4048 }, { "epoch": 0.05, "grad_norm": 8.885831832885742, "learning_rate": 8.757434843733103e-06, "loss": 2.6266, "step": 4049 }, { "epoch": 0.05, "grad_norm": 8.613073348999023, "learning_rate": 8.759597707364551e-06, "loss": 2.717, "step": 4050 }, { "epoch": 0.05, "grad_norm": 9.11291217803955, "learning_rate": 8.761760570996e-06, "loss": 2.7647, "step": 4051 }, { "epoch": 0.05, "grad_norm": 8.020333290100098, "learning_rate": 8.763923434627447e-06, "loss": 3.1434, "step": 4052 }, { "epoch": 0.05, "grad_norm": 8.99970817565918, "learning_rate": 8.766086298258895e-06, "loss": 3.1421, "step": 4053 }, { "epoch": 0.05, "grad_norm": 8.425105094909668, "learning_rate": 8.768249161890344e-06, "loss": 2.6287, "step": 4054 }, { "epoch": 0.05, "grad_norm": 8.623347282409668, "learning_rate": 8.770412025521792e-06, "loss": 2.9397, "step": 4055 }, { "epoch": 0.05, "grad_norm": 8.007685661315918, "learning_rate": 8.77257488915324e-06, "loss": 2.462, "step": 4056 }, { "epoch": 0.05, "grad_norm": 9.97786808013916, "learning_rate": 8.774737752784688e-06, "loss": 3.5766, "step": 4057 }, { "epoch": 0.05, "grad_norm": 9.271943092346191, "learning_rate": 8.776900616416136e-06, "loss": 2.6772, "step": 4058 }, { "epoch": 0.05, "grad_norm": 9.191267013549805, "learning_rate": 8.779063480047584e-06, "loss": 3.2726, "step": 4059 }, { "epoch": 0.05, "grad_norm": 9.353391647338867, "learning_rate": 8.781226343679032e-06, "loss": 3.1433, "step": 4060 }, { "epoch": 0.05, "grad_norm": 9.095514297485352, "learning_rate": 8.78338920731048e-06, "loss": 2.5484, "step": 4061 }, { "epoch": 0.05, "grad_norm": 8.858786582946777, "learning_rate": 8.785552070941928e-06, "loss": 3.2919, "step": 4062 }, { "epoch": 0.05, "grad_norm": 9.540648460388184, "learning_rate": 8.787714934573376e-06, "loss": 3.0328, "step": 4063 }, { "epoch": 0.05, "grad_norm": 9.097526550292969, "learning_rate": 8.789877798204824e-06, "loss": 3.0191, "step": 4064 }, { "epoch": 0.05, "grad_norm": 9.05784797668457, "learning_rate": 8.792040661836272e-06, "loss": 3.2405, "step": 4065 }, { "epoch": 0.05, "grad_norm": 10.335835456848145, "learning_rate": 8.79420352546772e-06, "loss": 3.177, "step": 4066 }, { "epoch": 0.05, "grad_norm": 10.473820686340332, "learning_rate": 8.796366389099168e-06, "loss": 2.7763, "step": 4067 }, { "epoch": 0.05, "grad_norm": 10.670626640319824, "learning_rate": 8.798529252730616e-06, "loss": 2.7546, "step": 4068 }, { "epoch": 0.05, "grad_norm": 9.760433197021484, "learning_rate": 8.800692116362064e-06, "loss": 3.478, "step": 4069 }, { "epoch": 0.05, "grad_norm": 8.300029754638672, "learning_rate": 8.802854979993512e-06, "loss": 2.7017, "step": 4070 }, { "epoch": 0.05, "grad_norm": 8.83773136138916, "learning_rate": 8.80501784362496e-06, "loss": 3.027, "step": 4071 }, { "epoch": 0.05, "grad_norm": 9.097972869873047, "learning_rate": 8.807180707256408e-06, "loss": 3.3154, "step": 4072 }, { "epoch": 0.05, "grad_norm": 8.21893310546875, "learning_rate": 8.809343570887857e-06, "loss": 2.4314, "step": 4073 }, { "epoch": 0.05, "grad_norm": 9.67503547668457, "learning_rate": 8.811506434519305e-06, "loss": 3.142, "step": 4074 }, { "epoch": 0.05, "grad_norm": 9.545961380004883, "learning_rate": 8.813669298150753e-06, "loss": 2.7778, "step": 4075 }, { "epoch": 0.05, "grad_norm": 8.593548774719238, "learning_rate": 8.8158321617822e-06, "loss": 2.3375, "step": 4076 }, { "epoch": 0.05, "grad_norm": 10.001317977905273, "learning_rate": 8.817995025413649e-06, "loss": 3.3034, "step": 4077 }, { "epoch": 0.05, "grad_norm": 9.940396308898926, "learning_rate": 8.820157889045097e-06, "loss": 3.5539, "step": 4078 }, { "epoch": 0.05, "grad_norm": 9.445730209350586, "learning_rate": 8.822320752676545e-06, "loss": 3.5895, "step": 4079 }, { "epoch": 0.05, "grad_norm": 7.565127372741699, "learning_rate": 8.824483616307993e-06, "loss": 2.4216, "step": 4080 }, { "epoch": 0.05, "grad_norm": 8.949783325195312, "learning_rate": 8.826646479939441e-06, "loss": 3.1085, "step": 4081 }, { "epoch": 0.05, "grad_norm": 8.631632804870605, "learning_rate": 8.828809343570889e-06, "loss": 2.9644, "step": 4082 }, { "epoch": 0.05, "grad_norm": 8.800174713134766, "learning_rate": 8.830972207202337e-06, "loss": 3.0579, "step": 4083 }, { "epoch": 0.05, "grad_norm": 8.178058624267578, "learning_rate": 8.833135070833785e-06, "loss": 3.1609, "step": 4084 }, { "epoch": 0.05, "grad_norm": 8.06923770904541, "learning_rate": 8.835297934465233e-06, "loss": 3.0304, "step": 4085 }, { "epoch": 0.05, "grad_norm": 8.535781860351562, "learning_rate": 8.837460798096681e-06, "loss": 2.8949, "step": 4086 }, { "epoch": 0.05, "grad_norm": 9.244607925415039, "learning_rate": 8.839623661728128e-06, "loss": 2.8684, "step": 4087 }, { "epoch": 0.05, "grad_norm": 10.126365661621094, "learning_rate": 8.841786525359577e-06, "loss": 3.1083, "step": 4088 }, { "epoch": 0.05, "grad_norm": 9.324716567993164, "learning_rate": 8.843949388991024e-06, "loss": 3.0147, "step": 4089 }, { "epoch": 0.05, "grad_norm": 9.060443878173828, "learning_rate": 8.846112252622473e-06, "loss": 2.8529, "step": 4090 }, { "epoch": 0.05, "grad_norm": 8.99248218536377, "learning_rate": 8.84827511625392e-06, "loss": 2.922, "step": 4091 }, { "epoch": 0.05, "grad_norm": 9.129546165466309, "learning_rate": 8.85043797988537e-06, "loss": 2.6844, "step": 4092 }, { "epoch": 0.05, "grad_norm": 8.434819221496582, "learning_rate": 8.852600843516816e-06, "loss": 2.6348, "step": 4093 }, { "epoch": 0.05, "grad_norm": 10.71399211883545, "learning_rate": 8.854763707148266e-06, "loss": 3.4961, "step": 4094 }, { "epoch": 0.05, "grad_norm": 9.603537559509277, "learning_rate": 8.856926570779712e-06, "loss": 3.2467, "step": 4095 }, { "epoch": 0.05, "grad_norm": 9.291491508483887, "learning_rate": 8.859089434411162e-06, "loss": 2.8878, "step": 4096 }, { "epoch": 0.05, "grad_norm": 8.789755821228027, "learning_rate": 8.86125229804261e-06, "loss": 3.2632, "step": 4097 }, { "epoch": 0.05, "grad_norm": 8.409399032592773, "learning_rate": 8.863415161674058e-06, "loss": 2.5395, "step": 4098 }, { "epoch": 0.05, "grad_norm": 9.721589088439941, "learning_rate": 8.865578025305506e-06, "loss": 2.6155, "step": 4099 }, { "epoch": 0.05, "grad_norm": 8.986522674560547, "learning_rate": 8.867740888936954e-06, "loss": 3.1816, "step": 4100 }, { "epoch": 0.05, "grad_norm": 8.93679428100586, "learning_rate": 8.869903752568402e-06, "loss": 3.1037, "step": 4101 }, { "epoch": 0.05, "grad_norm": 9.625103950500488, "learning_rate": 8.87206661619985e-06, "loss": 3.5814, "step": 4102 }, { "epoch": 0.05, "grad_norm": 9.227179527282715, "learning_rate": 8.874229479831298e-06, "loss": 3.5878, "step": 4103 }, { "epoch": 0.05, "grad_norm": 9.412853240966797, "learning_rate": 8.876392343462746e-06, "loss": 3.4223, "step": 4104 }, { "epoch": 0.05, "grad_norm": 8.058524131774902, "learning_rate": 8.878555207094194e-06, "loss": 3.0581, "step": 4105 }, { "epoch": 0.05, "grad_norm": 9.724336624145508, "learning_rate": 8.88071807072564e-06, "loss": 3.2513, "step": 4106 }, { "epoch": 0.05, "grad_norm": 8.634654998779297, "learning_rate": 8.88288093435709e-06, "loss": 2.6157, "step": 4107 }, { "epoch": 0.05, "grad_norm": 8.968670845031738, "learning_rate": 8.885043797988537e-06, "loss": 3.0507, "step": 4108 }, { "epoch": 0.05, "grad_norm": 8.406315803527832, "learning_rate": 8.887206661619986e-06, "loss": 3.3722, "step": 4109 }, { "epoch": 0.05, "grad_norm": 8.94412899017334, "learning_rate": 8.889369525251433e-06, "loss": 3.2005, "step": 4110 }, { "epoch": 0.05, "grad_norm": 8.990771293640137, "learning_rate": 8.891532388882882e-06, "loss": 2.8984, "step": 4111 }, { "epoch": 0.05, "grad_norm": 8.904763221740723, "learning_rate": 8.893695252514329e-06, "loss": 2.9412, "step": 4112 }, { "epoch": 0.05, "grad_norm": 10.618508338928223, "learning_rate": 8.895858116145779e-06, "loss": 2.7661, "step": 4113 }, { "epoch": 0.05, "grad_norm": 8.483643531799316, "learning_rate": 8.898020979777225e-06, "loss": 3.2282, "step": 4114 }, { "epoch": 0.05, "grad_norm": 8.25282096862793, "learning_rate": 8.900183843408675e-06, "loss": 2.8745, "step": 4115 }, { "epoch": 0.05, "grad_norm": 7.711697578430176, "learning_rate": 8.902346707040121e-06, "loss": 2.5494, "step": 4116 }, { "epoch": 0.05, "grad_norm": 9.054781913757324, "learning_rate": 8.90450957067157e-06, "loss": 2.987, "step": 4117 }, { "epoch": 0.05, "grad_norm": 8.271172523498535, "learning_rate": 8.906672434303017e-06, "loss": 3.0836, "step": 4118 }, { "epoch": 0.05, "grad_norm": 8.270212173461914, "learning_rate": 8.908835297934467e-06, "loss": 2.7937, "step": 4119 }, { "epoch": 0.05, "grad_norm": 11.45626449584961, "learning_rate": 8.910998161565913e-06, "loss": 3.3765, "step": 4120 }, { "epoch": 0.05, "grad_norm": 8.918848991394043, "learning_rate": 8.913161025197363e-06, "loss": 2.8847, "step": 4121 }, { "epoch": 0.05, "grad_norm": 8.193462371826172, "learning_rate": 8.91532388882881e-06, "loss": 2.9064, "step": 4122 }, { "epoch": 0.05, "grad_norm": 8.56905460357666, "learning_rate": 8.917486752460257e-06, "loss": 3.0256, "step": 4123 }, { "epoch": 0.05, "grad_norm": 9.015096664428711, "learning_rate": 8.919649616091705e-06, "loss": 2.6067, "step": 4124 }, { "epoch": 0.05, "grad_norm": 8.516160011291504, "learning_rate": 8.921812479723153e-06, "loss": 3.0778, "step": 4125 }, { "epoch": 0.05, "grad_norm": 9.641748428344727, "learning_rate": 8.923975343354601e-06, "loss": 2.7562, "step": 4126 }, { "epoch": 0.05, "grad_norm": 9.385973930358887, "learning_rate": 8.92613820698605e-06, "loss": 2.822, "step": 4127 }, { "epoch": 0.05, "grad_norm": 9.887707710266113, "learning_rate": 8.9283010706175e-06, "loss": 3.0247, "step": 4128 }, { "epoch": 0.05, "grad_norm": 9.51135540008545, "learning_rate": 8.930463934248946e-06, "loss": 2.8883, "step": 4129 }, { "epoch": 0.05, "grad_norm": 9.334537506103516, "learning_rate": 8.932626797880395e-06, "loss": 2.8109, "step": 4130 }, { "epoch": 0.05, "grad_norm": 10.100686073303223, "learning_rate": 8.934789661511842e-06, "loss": 2.8846, "step": 4131 }, { "epoch": 0.05, "grad_norm": 9.667948722839355, "learning_rate": 8.936952525143291e-06, "loss": 3.1179, "step": 4132 }, { "epoch": 0.05, "grad_norm": 11.160226821899414, "learning_rate": 8.939115388774738e-06, "loss": 2.9443, "step": 4133 }, { "epoch": 0.05, "grad_norm": 10.294184684753418, "learning_rate": 8.941278252406188e-06, "loss": 2.6748, "step": 4134 }, { "epoch": 0.05, "grad_norm": 8.947834968566895, "learning_rate": 8.943441116037634e-06, "loss": 2.9177, "step": 4135 }, { "epoch": 0.05, "grad_norm": 9.079237937927246, "learning_rate": 8.945603979669084e-06, "loss": 3.448, "step": 4136 }, { "epoch": 0.05, "grad_norm": 9.27872085571289, "learning_rate": 8.94776684330053e-06, "loss": 3.1373, "step": 4137 }, { "epoch": 0.05, "grad_norm": 8.281451225280762, "learning_rate": 8.94992970693198e-06, "loss": 2.4775, "step": 4138 }, { "epoch": 0.05, "grad_norm": 8.908465385437012, "learning_rate": 8.952092570563426e-06, "loss": 3.2236, "step": 4139 }, { "epoch": 0.05, "grad_norm": 9.394489288330078, "learning_rate": 8.954255434194876e-06, "loss": 3.1912, "step": 4140 }, { "epoch": 0.05, "grad_norm": 9.791757583618164, "learning_rate": 8.956418297826322e-06, "loss": 2.9507, "step": 4141 }, { "epoch": 0.05, "grad_norm": 7.915194988250732, "learning_rate": 8.95858116145777e-06, "loss": 3.1493, "step": 4142 }, { "epoch": 0.05, "grad_norm": 10.542861938476562, "learning_rate": 8.960744025089218e-06, "loss": 3.1968, "step": 4143 }, { "epoch": 0.05, "grad_norm": 8.106374740600586, "learning_rate": 8.962906888720666e-06, "loss": 2.5211, "step": 4144 }, { "epoch": 0.05, "grad_norm": 8.339493751525879, "learning_rate": 8.965069752352114e-06, "loss": 2.8836, "step": 4145 }, { "epoch": 0.05, "grad_norm": 9.476851463317871, "learning_rate": 8.967232615983563e-06, "loss": 2.0559, "step": 4146 }, { "epoch": 0.05, "grad_norm": 9.052128791809082, "learning_rate": 8.96939547961501e-06, "loss": 2.9853, "step": 4147 }, { "epoch": 0.05, "grad_norm": 8.699254989624023, "learning_rate": 8.971558343246459e-06, "loss": 3.1896, "step": 4148 }, { "epoch": 0.05, "grad_norm": 9.575736045837402, "learning_rate": 8.973721206877907e-06, "loss": 2.6342, "step": 4149 }, { "epoch": 0.05, "grad_norm": 8.213873863220215, "learning_rate": 8.975884070509355e-06, "loss": 3.2218, "step": 4150 }, { "epoch": 0.05, "grad_norm": 8.93882942199707, "learning_rate": 8.978046934140803e-06, "loss": 2.9977, "step": 4151 }, { "epoch": 0.05, "grad_norm": 8.640710830688477, "learning_rate": 8.98020979777225e-06, "loss": 2.9961, "step": 4152 }, { "epoch": 0.05, "grad_norm": 8.599164962768555, "learning_rate": 8.982372661403699e-06, "loss": 2.9699, "step": 4153 }, { "epoch": 0.05, "grad_norm": 9.928267478942871, "learning_rate": 8.984535525035147e-06, "loss": 3.6017, "step": 4154 }, { "epoch": 0.05, "grad_norm": 8.591573715209961, "learning_rate": 8.986698388666595e-06, "loss": 3.1916, "step": 4155 }, { "epoch": 0.05, "grad_norm": 10.337992668151855, "learning_rate": 8.988861252298043e-06, "loss": 3.257, "step": 4156 }, { "epoch": 0.05, "grad_norm": 8.887700080871582, "learning_rate": 8.991024115929491e-06, "loss": 2.8785, "step": 4157 }, { "epoch": 0.05, "grad_norm": 8.974382400512695, "learning_rate": 8.993186979560939e-06, "loss": 3.5796, "step": 4158 }, { "epoch": 0.05, "grad_norm": 9.731985092163086, "learning_rate": 8.995349843192387e-06, "loss": 2.6209, "step": 4159 }, { "epoch": 0.05, "grad_norm": 9.053242683410645, "learning_rate": 8.997512706823835e-06, "loss": 3.3933, "step": 4160 }, { "epoch": 0.05, "grad_norm": 11.384563446044922, "learning_rate": 8.999675570455283e-06, "loss": 3.1212, "step": 4161 }, { "epoch": 0.05, "grad_norm": 9.374794960021973, "learning_rate": 9.001838434086731e-06, "loss": 3.2692, "step": 4162 }, { "epoch": 0.05, "grad_norm": 8.773526191711426, "learning_rate": 9.00400129771818e-06, "loss": 2.4859, "step": 4163 }, { "epoch": 0.05, "grad_norm": 9.057058334350586, "learning_rate": 9.006164161349627e-06, "loss": 2.8821, "step": 4164 }, { "epoch": 0.05, "grad_norm": 8.502883911132812, "learning_rate": 9.008327024981075e-06, "loss": 2.4812, "step": 4165 }, { "epoch": 0.05, "grad_norm": 8.003511428833008, "learning_rate": 9.010489888612524e-06, "loss": 2.8413, "step": 4166 }, { "epoch": 0.05, "grad_norm": 10.541178703308105, "learning_rate": 9.012652752243972e-06, "loss": 3.2953, "step": 4167 }, { "epoch": 0.05, "grad_norm": 7.824612617492676, "learning_rate": 9.01481561587542e-06, "loss": 2.3672, "step": 4168 }, { "epoch": 0.05, "grad_norm": 8.115198135375977, "learning_rate": 9.016978479506868e-06, "loss": 2.65, "step": 4169 }, { "epoch": 0.05, "grad_norm": 9.001333236694336, "learning_rate": 9.019141343138316e-06, "loss": 3.0496, "step": 4170 }, { "epoch": 0.05, "grad_norm": 7.950142860412598, "learning_rate": 9.021304206769764e-06, "loss": 2.2428, "step": 4171 }, { "epoch": 0.05, "grad_norm": 11.474553108215332, "learning_rate": 9.023467070401212e-06, "loss": 3.1719, "step": 4172 }, { "epoch": 0.05, "grad_norm": 8.341867446899414, "learning_rate": 9.02562993403266e-06, "loss": 3.6148, "step": 4173 }, { "epoch": 0.05, "grad_norm": 9.917770385742188, "learning_rate": 9.027792797664108e-06, "loss": 2.5084, "step": 4174 }, { "epoch": 0.05, "grad_norm": 9.579607963562012, "learning_rate": 9.029955661295556e-06, "loss": 3.164, "step": 4175 }, { "epoch": 0.05, "grad_norm": 8.054265022277832, "learning_rate": 9.032118524927004e-06, "loss": 2.2822, "step": 4176 }, { "epoch": 0.05, "grad_norm": 8.685848236083984, "learning_rate": 9.034281388558452e-06, "loss": 3.526, "step": 4177 }, { "epoch": 0.05, "grad_norm": 9.436866760253906, "learning_rate": 9.0364442521899e-06, "loss": 3.4169, "step": 4178 }, { "epoch": 0.05, "grad_norm": 8.62488079071045, "learning_rate": 9.038607115821348e-06, "loss": 3.2989, "step": 4179 }, { "epoch": 0.05, "grad_norm": 9.420355796813965, "learning_rate": 9.040769979452796e-06, "loss": 3.1122, "step": 4180 }, { "epoch": 0.05, "grad_norm": 9.630301475524902, "learning_rate": 9.042932843084244e-06, "loss": 2.904, "step": 4181 }, { "epoch": 0.05, "grad_norm": 9.405120849609375, "learning_rate": 9.045095706715692e-06, "loss": 2.7352, "step": 4182 }, { "epoch": 0.05, "grad_norm": 8.616004943847656, "learning_rate": 9.04725857034714e-06, "loss": 2.5728, "step": 4183 }, { "epoch": 0.05, "grad_norm": 8.36242961883545, "learning_rate": 9.049421433978588e-06, "loss": 3.1315, "step": 4184 }, { "epoch": 0.05, "grad_norm": 8.535761833190918, "learning_rate": 9.051584297610036e-06, "loss": 2.741, "step": 4185 }, { "epoch": 0.05, "grad_norm": 8.704134941101074, "learning_rate": 9.053747161241485e-06, "loss": 2.6764, "step": 4186 }, { "epoch": 0.05, "grad_norm": 8.198663711547852, "learning_rate": 9.055910024872933e-06, "loss": 2.9292, "step": 4187 }, { "epoch": 0.05, "grad_norm": 9.447097778320312, "learning_rate": 9.05807288850438e-06, "loss": 3.0266, "step": 4188 }, { "epoch": 0.05, "grad_norm": 10.04857063293457, "learning_rate": 9.060235752135829e-06, "loss": 3.075, "step": 4189 }, { "epoch": 0.05, "grad_norm": 8.660273551940918, "learning_rate": 9.062398615767277e-06, "loss": 3.0179, "step": 4190 }, { "epoch": 0.05, "grad_norm": 9.381972312927246, "learning_rate": 9.064561479398725e-06, "loss": 2.4258, "step": 4191 }, { "epoch": 0.05, "grad_norm": 9.53028678894043, "learning_rate": 9.066724343030173e-06, "loss": 2.7167, "step": 4192 }, { "epoch": 0.05, "grad_norm": 10.461405754089355, "learning_rate": 9.068887206661621e-06, "loss": 3.7871, "step": 4193 }, { "epoch": 0.05, "grad_norm": 8.675225257873535, "learning_rate": 9.071050070293069e-06, "loss": 2.8429, "step": 4194 }, { "epoch": 0.05, "grad_norm": 9.872893333435059, "learning_rate": 9.073212933924517e-06, "loss": 3.139, "step": 4195 }, { "epoch": 0.05, "grad_norm": 8.081480026245117, "learning_rate": 9.075375797555965e-06, "loss": 2.756, "step": 4196 }, { "epoch": 0.05, "grad_norm": 8.557599067687988, "learning_rate": 9.077538661187413e-06, "loss": 3.2354, "step": 4197 }, { "epoch": 0.05, "grad_norm": 8.508837699890137, "learning_rate": 9.079701524818861e-06, "loss": 2.4571, "step": 4198 }, { "epoch": 0.05, "grad_norm": 8.440088272094727, "learning_rate": 9.08186438845031e-06, "loss": 2.7829, "step": 4199 }, { "epoch": 0.05, "grad_norm": 10.8557767868042, "learning_rate": 9.084027252081757e-06, "loss": 3.199, "step": 4200 }, { "epoch": 0.05, "grad_norm": 8.116552352905273, "learning_rate": 9.086190115713205e-06, "loss": 2.8496, "step": 4201 }, { "epoch": 0.05, "grad_norm": 8.71894359588623, "learning_rate": 9.088352979344653e-06, "loss": 3.2106, "step": 4202 }, { "epoch": 0.05, "grad_norm": 9.920199394226074, "learning_rate": 9.090515842976101e-06, "loss": 3.9115, "step": 4203 }, { "epoch": 0.05, "grad_norm": 8.81010627746582, "learning_rate": 9.09267870660755e-06, "loss": 2.9686, "step": 4204 }, { "epoch": 0.05, "grad_norm": 9.582247734069824, "learning_rate": 9.094841570238998e-06, "loss": 2.785, "step": 4205 }, { "epoch": 0.05, "grad_norm": 9.490469932556152, "learning_rate": 9.097004433870446e-06, "loss": 3.0341, "step": 4206 }, { "epoch": 0.05, "grad_norm": 9.109302520751953, "learning_rate": 9.099167297501894e-06, "loss": 3.3511, "step": 4207 }, { "epoch": 0.05, "grad_norm": 7.805883407592773, "learning_rate": 9.101330161133342e-06, "loss": 3.1012, "step": 4208 }, { "epoch": 0.05, "grad_norm": 8.84492015838623, "learning_rate": 9.10349302476479e-06, "loss": 3.1558, "step": 4209 }, { "epoch": 0.05, "grad_norm": 8.288015365600586, "learning_rate": 9.105655888396238e-06, "loss": 3.0933, "step": 4210 }, { "epoch": 0.05, "grad_norm": 8.97276496887207, "learning_rate": 9.107818752027686e-06, "loss": 2.6503, "step": 4211 }, { "epoch": 0.05, "grad_norm": 10.960636138916016, "learning_rate": 9.109981615659134e-06, "loss": 3.6234, "step": 4212 }, { "epoch": 0.05, "grad_norm": 9.354393005371094, "learning_rate": 9.112144479290582e-06, "loss": 2.7828, "step": 4213 }, { "epoch": 0.05, "grad_norm": 8.278752326965332, "learning_rate": 9.11430734292203e-06, "loss": 2.9659, "step": 4214 }, { "epoch": 0.05, "grad_norm": 8.473092079162598, "learning_rate": 9.116470206553476e-06, "loss": 3.1977, "step": 4215 }, { "epoch": 0.05, "grad_norm": 8.943066596984863, "learning_rate": 9.118633070184926e-06, "loss": 2.5677, "step": 4216 }, { "epoch": 0.05, "grad_norm": 8.861969947814941, "learning_rate": 9.120795933816372e-06, "loss": 3.0413, "step": 4217 }, { "epoch": 0.05, "grad_norm": 8.892319679260254, "learning_rate": 9.122958797447822e-06, "loss": 2.9587, "step": 4218 }, { "epoch": 0.05, "grad_norm": 7.842750072479248, "learning_rate": 9.125121661079269e-06, "loss": 2.7302, "step": 4219 }, { "epoch": 0.05, "grad_norm": 8.533342361450195, "learning_rate": 9.127284524710718e-06, "loss": 2.9187, "step": 4220 }, { "epoch": 0.05, "grad_norm": 8.890152931213379, "learning_rate": 9.129447388342165e-06, "loss": 2.7979, "step": 4221 }, { "epoch": 0.05, "grad_norm": 9.352042198181152, "learning_rate": 9.131610251973614e-06, "loss": 3.2912, "step": 4222 }, { "epoch": 0.05, "grad_norm": 8.498420715332031, "learning_rate": 9.13377311560506e-06, "loss": 2.4859, "step": 4223 }, { "epoch": 0.05, "grad_norm": 9.081488609313965, "learning_rate": 9.13593597923651e-06, "loss": 3.3324, "step": 4224 }, { "epoch": 0.05, "grad_norm": 8.450865745544434, "learning_rate": 9.138098842867959e-06, "loss": 3.2499, "step": 4225 }, { "epoch": 0.05, "grad_norm": 9.712626457214355, "learning_rate": 9.140261706499407e-06, "loss": 3.4279, "step": 4226 }, { "epoch": 0.05, "grad_norm": 9.524721145629883, "learning_rate": 9.142424570130855e-06, "loss": 3.0384, "step": 4227 }, { "epoch": 0.05, "grad_norm": 8.914627075195312, "learning_rate": 9.144587433762303e-06, "loss": 2.6111, "step": 4228 }, { "epoch": 0.05, "grad_norm": 8.785408973693848, "learning_rate": 9.14675029739375e-06, "loss": 3.4473, "step": 4229 }, { "epoch": 0.05, "grad_norm": 8.787164688110352, "learning_rate": 9.148913161025199e-06, "loss": 3.2499, "step": 4230 }, { "epoch": 0.05, "grad_norm": 9.009819984436035, "learning_rate": 9.151076024656647e-06, "loss": 3.2054, "step": 4231 }, { "epoch": 0.05, "grad_norm": 8.4968900680542, "learning_rate": 9.153238888288095e-06, "loss": 2.4145, "step": 4232 }, { "epoch": 0.05, "grad_norm": 8.787154197692871, "learning_rate": 9.155401751919543e-06, "loss": 3.3479, "step": 4233 }, { "epoch": 0.05, "grad_norm": 8.68990421295166, "learning_rate": 9.15756461555099e-06, "loss": 2.9957, "step": 4234 }, { "epoch": 0.05, "grad_norm": 9.618369102478027, "learning_rate": 9.159727479182439e-06, "loss": 2.7703, "step": 4235 }, { "epoch": 0.05, "grad_norm": 9.056138038635254, "learning_rate": 9.161890342813885e-06, "loss": 3.4505, "step": 4236 }, { "epoch": 0.05, "grad_norm": 9.106744766235352, "learning_rate": 9.164053206445335e-06, "loss": 2.396, "step": 4237 }, { "epoch": 0.05, "grad_norm": 9.335073471069336, "learning_rate": 9.166216070076781e-06, "loss": 3.3813, "step": 4238 }, { "epoch": 0.06, "grad_norm": 9.113000869750977, "learning_rate": 9.168378933708231e-06, "loss": 2.8323, "step": 4239 }, { "epoch": 0.06, "grad_norm": 8.465489387512207, "learning_rate": 9.170541797339678e-06, "loss": 2.5053, "step": 4240 }, { "epoch": 0.06, "grad_norm": 10.223518371582031, "learning_rate": 9.172704660971127e-06, "loss": 2.8855, "step": 4241 }, { "epoch": 0.06, "grad_norm": 8.425514221191406, "learning_rate": 9.174867524602574e-06, "loss": 2.8665, "step": 4242 }, { "epoch": 0.06, "grad_norm": 8.673223495483398, "learning_rate": 9.177030388234023e-06, "loss": 3.0217, "step": 4243 }, { "epoch": 0.06, "grad_norm": 9.969247817993164, "learning_rate": 9.17919325186547e-06, "loss": 2.9291, "step": 4244 }, { "epoch": 0.06, "grad_norm": 10.148835182189941, "learning_rate": 9.18135611549692e-06, "loss": 2.9161, "step": 4245 }, { "epoch": 0.06, "grad_norm": 8.787489891052246, "learning_rate": 9.183518979128366e-06, "loss": 2.5358, "step": 4246 }, { "epoch": 0.06, "grad_norm": 9.289420127868652, "learning_rate": 9.185681842759816e-06, "loss": 3.3383, "step": 4247 }, { "epoch": 0.06, "grad_norm": 8.42058277130127, "learning_rate": 9.187844706391262e-06, "loss": 3.2378, "step": 4248 }, { "epoch": 0.06, "grad_norm": 9.49162769317627, "learning_rate": 9.190007570022712e-06, "loss": 3.0979, "step": 4249 }, { "epoch": 0.06, "grad_norm": 8.198874473571777, "learning_rate": 9.192170433654158e-06, "loss": 2.9678, "step": 4250 }, { "epoch": 0.06, "grad_norm": 8.868795394897461, "learning_rate": 9.194333297285606e-06, "loss": 2.8999, "step": 4251 }, { "epoch": 0.06, "grad_norm": 8.571632385253906, "learning_rate": 9.196496160917054e-06, "loss": 3.1339, "step": 4252 }, { "epoch": 0.06, "grad_norm": 9.523136138916016, "learning_rate": 9.198659024548502e-06, "loss": 2.6222, "step": 4253 }, { "epoch": 0.06, "grad_norm": 7.744039535522461, "learning_rate": 9.20082188817995e-06, "loss": 3.3447, "step": 4254 }, { "epoch": 0.06, "grad_norm": 8.764558792114258, "learning_rate": 9.202984751811398e-06, "loss": 3.0451, "step": 4255 }, { "epoch": 0.06, "grad_norm": 8.304567337036133, "learning_rate": 9.205147615442848e-06, "loss": 2.7388, "step": 4256 }, { "epoch": 0.06, "grad_norm": 9.60951042175293, "learning_rate": 9.207310479074294e-06, "loss": 3.3681, "step": 4257 }, { "epoch": 0.06, "grad_norm": 9.28577995300293, "learning_rate": 9.209473342705744e-06, "loss": 3.1181, "step": 4258 }, { "epoch": 0.06, "grad_norm": 10.490798950195312, "learning_rate": 9.21163620633719e-06, "loss": 3.0066, "step": 4259 }, { "epoch": 0.06, "grad_norm": 7.400363445281982, "learning_rate": 9.21379906996864e-06, "loss": 2.9744, "step": 4260 }, { "epoch": 0.06, "grad_norm": 9.150379180908203, "learning_rate": 9.215961933600087e-06, "loss": 3.2526, "step": 4261 }, { "epoch": 0.06, "grad_norm": 8.114045143127441, "learning_rate": 9.218124797231536e-06, "loss": 2.4789, "step": 4262 }, { "epoch": 0.06, "grad_norm": 7.748763561248779, "learning_rate": 9.220287660862983e-06, "loss": 2.5988, "step": 4263 }, { "epoch": 0.06, "grad_norm": 9.399701118469238, "learning_rate": 9.222450524494433e-06, "loss": 3.0093, "step": 4264 }, { "epoch": 0.06, "grad_norm": 8.457115173339844, "learning_rate": 9.224613388125879e-06, "loss": 2.9929, "step": 4265 }, { "epoch": 0.06, "grad_norm": 8.786404609680176, "learning_rate": 9.226776251757329e-06, "loss": 2.5929, "step": 4266 }, { "epoch": 0.06, "grad_norm": 10.419791221618652, "learning_rate": 9.228939115388775e-06, "loss": 2.8485, "step": 4267 }, { "epoch": 0.06, "grad_norm": 8.059293746948242, "learning_rate": 9.231101979020225e-06, "loss": 2.8438, "step": 4268 }, { "epoch": 0.06, "grad_norm": 8.974872589111328, "learning_rate": 9.233264842651671e-06, "loss": 3.4292, "step": 4269 }, { "epoch": 0.06, "grad_norm": 8.111701965332031, "learning_rate": 9.235427706283119e-06, "loss": 2.7503, "step": 4270 }, { "epoch": 0.06, "grad_norm": 7.675744533538818, "learning_rate": 9.237590569914567e-06, "loss": 2.8294, "step": 4271 }, { "epoch": 0.06, "grad_norm": 9.153820037841797, "learning_rate": 9.239753433546015e-06, "loss": 3.6087, "step": 4272 }, { "epoch": 0.06, "grad_norm": 9.241703987121582, "learning_rate": 9.241916297177463e-06, "loss": 3.4321, "step": 4273 }, { "epoch": 0.06, "grad_norm": 9.240930557250977, "learning_rate": 9.244079160808911e-06, "loss": 3.1945, "step": 4274 }, { "epoch": 0.06, "grad_norm": 10.432280540466309, "learning_rate": 9.24624202444036e-06, "loss": 3.0043, "step": 4275 }, { "epoch": 0.06, "grad_norm": 8.599945068359375, "learning_rate": 9.248404888071807e-06, "loss": 2.7533, "step": 4276 }, { "epoch": 0.06, "grad_norm": 8.868868827819824, "learning_rate": 9.250567751703255e-06, "loss": 2.9118, "step": 4277 }, { "epoch": 0.06, "grad_norm": 8.84360408782959, "learning_rate": 9.252730615334704e-06, "loss": 2.8691, "step": 4278 }, { "epoch": 0.06, "grad_norm": 9.572598457336426, "learning_rate": 9.254893478966152e-06, "loss": 2.4222, "step": 4279 }, { "epoch": 0.06, "grad_norm": 8.09988784790039, "learning_rate": 9.2570563425976e-06, "loss": 2.5693, "step": 4280 }, { "epoch": 0.06, "grad_norm": 7.924548625946045, "learning_rate": 9.259219206229048e-06, "loss": 3.2191, "step": 4281 }, { "epoch": 0.06, "grad_norm": 10.08442211151123, "learning_rate": 9.261382069860496e-06, "loss": 3.0971, "step": 4282 }, { "epoch": 0.06, "grad_norm": 9.146851539611816, "learning_rate": 9.263544933491944e-06, "loss": 2.9379, "step": 4283 }, { "epoch": 0.06, "grad_norm": 8.606752395629883, "learning_rate": 9.265707797123392e-06, "loss": 2.4675, "step": 4284 }, { "epoch": 0.06, "grad_norm": 8.746702194213867, "learning_rate": 9.26787066075484e-06, "loss": 3.2391, "step": 4285 }, { "epoch": 0.06, "grad_norm": 8.496119499206543, "learning_rate": 9.270033524386288e-06, "loss": 3.0919, "step": 4286 }, { "epoch": 0.06, "grad_norm": 9.062003135681152, "learning_rate": 9.272196388017736e-06, "loss": 3.0022, "step": 4287 }, { "epoch": 0.06, "grad_norm": 10.471450805664062, "learning_rate": 9.274359251649184e-06, "loss": 3.034, "step": 4288 }, { "epoch": 0.06, "grad_norm": 9.914485931396484, "learning_rate": 9.276522115280632e-06, "loss": 3.0723, "step": 4289 }, { "epoch": 0.06, "grad_norm": 8.672873497009277, "learning_rate": 9.27868497891208e-06, "loss": 2.2408, "step": 4290 }, { "epoch": 0.06, "grad_norm": 9.70837116241455, "learning_rate": 9.280847842543528e-06, "loss": 3.4465, "step": 4291 }, { "epoch": 0.06, "grad_norm": 8.609472274780273, "learning_rate": 9.283010706174976e-06, "loss": 2.5958, "step": 4292 }, { "epoch": 0.06, "grad_norm": 9.41685676574707, "learning_rate": 9.285173569806424e-06, "loss": 2.939, "step": 4293 }, { "epoch": 0.06, "grad_norm": 8.375540733337402, "learning_rate": 9.287336433437872e-06, "loss": 2.9293, "step": 4294 }, { "epoch": 0.06, "grad_norm": 9.273431777954102, "learning_rate": 9.28949929706932e-06, "loss": 3.0364, "step": 4295 }, { "epoch": 0.06, "grad_norm": 8.722311973571777, "learning_rate": 9.291662160700768e-06, "loss": 3.7818, "step": 4296 }, { "epoch": 0.06, "grad_norm": 10.29769229888916, "learning_rate": 9.293825024332216e-06, "loss": 3.0455, "step": 4297 }, { "epoch": 0.06, "grad_norm": 9.837875366210938, "learning_rate": 9.295987887963665e-06, "loss": 3.1738, "step": 4298 }, { "epoch": 0.06, "grad_norm": 8.540968894958496, "learning_rate": 9.298150751595113e-06, "loss": 2.9358, "step": 4299 }, { "epoch": 0.06, "grad_norm": 10.28818130493164, "learning_rate": 9.30031361522656e-06, "loss": 3.3591, "step": 4300 }, { "epoch": 0.06, "grad_norm": 9.686490058898926, "learning_rate": 9.302476478858009e-06, "loss": 3.415, "step": 4301 }, { "epoch": 0.06, "grad_norm": 9.782082557678223, "learning_rate": 9.304639342489457e-06, "loss": 2.6831, "step": 4302 }, { "epoch": 0.06, "grad_norm": 9.630290031433105, "learning_rate": 9.306802206120905e-06, "loss": 3.5005, "step": 4303 }, { "epoch": 0.06, "grad_norm": 10.184741020202637, "learning_rate": 9.308965069752353e-06, "loss": 2.3952, "step": 4304 }, { "epoch": 0.06, "grad_norm": 10.175152778625488, "learning_rate": 9.311127933383801e-06, "loss": 3.1186, "step": 4305 }, { "epoch": 0.06, "grad_norm": 10.642838478088379, "learning_rate": 9.313290797015249e-06, "loss": 3.4216, "step": 4306 }, { "epoch": 0.06, "grad_norm": 9.093697547912598, "learning_rate": 9.315453660646697e-06, "loss": 2.961, "step": 4307 }, { "epoch": 0.06, "grad_norm": 9.78884220123291, "learning_rate": 9.317616524278145e-06, "loss": 2.9515, "step": 4308 }, { "epoch": 0.06, "grad_norm": 9.081172943115234, "learning_rate": 9.319779387909593e-06, "loss": 3.5612, "step": 4309 }, { "epoch": 0.06, "grad_norm": 8.89690113067627, "learning_rate": 9.321942251541041e-06, "loss": 2.8216, "step": 4310 }, { "epoch": 0.06, "grad_norm": 8.65704345703125, "learning_rate": 9.32410511517249e-06, "loss": 3.1061, "step": 4311 }, { "epoch": 0.06, "grad_norm": 7.753295421600342, "learning_rate": 9.326267978803937e-06, "loss": 3.0566, "step": 4312 }, { "epoch": 0.06, "grad_norm": 8.573772430419922, "learning_rate": 9.328430842435385e-06, "loss": 2.8035, "step": 4313 }, { "epoch": 0.06, "grad_norm": 7.924304962158203, "learning_rate": 9.330593706066833e-06, "loss": 2.8806, "step": 4314 }, { "epoch": 0.06, "grad_norm": 8.902034759521484, "learning_rate": 9.332756569698281e-06, "loss": 2.8504, "step": 4315 }, { "epoch": 0.06, "grad_norm": 8.992663383483887, "learning_rate": 9.33491943332973e-06, "loss": 2.3432, "step": 4316 }, { "epoch": 0.06, "grad_norm": 8.601202011108398, "learning_rate": 9.337082296961177e-06, "loss": 2.9827, "step": 4317 }, { "epoch": 0.06, "grad_norm": 8.002729415893555, "learning_rate": 9.339245160592626e-06, "loss": 2.6367, "step": 4318 }, { "epoch": 0.06, "grad_norm": 8.965533256530762, "learning_rate": 9.341408024224074e-06, "loss": 3.0615, "step": 4319 }, { "epoch": 0.06, "grad_norm": 8.994606971740723, "learning_rate": 9.343570887855522e-06, "loss": 2.3388, "step": 4320 }, { "epoch": 0.06, "grad_norm": 9.783747673034668, "learning_rate": 9.34573375148697e-06, "loss": 3.1116, "step": 4321 }, { "epoch": 0.06, "grad_norm": 8.122971534729004, "learning_rate": 9.347896615118418e-06, "loss": 3.146, "step": 4322 }, { "epoch": 0.06, "grad_norm": 8.689803123474121, "learning_rate": 9.350059478749866e-06, "loss": 2.984, "step": 4323 }, { "epoch": 0.06, "grad_norm": 8.701172828674316, "learning_rate": 9.352222342381314e-06, "loss": 2.9447, "step": 4324 }, { "epoch": 0.06, "grad_norm": 8.573968887329102, "learning_rate": 9.354385206012762e-06, "loss": 2.8986, "step": 4325 }, { "epoch": 0.06, "grad_norm": 8.18625259399414, "learning_rate": 9.35654806964421e-06, "loss": 3.0231, "step": 4326 }, { "epoch": 0.06, "grad_norm": 8.48379135131836, "learning_rate": 9.358710933275658e-06, "loss": 2.6981, "step": 4327 }, { "epoch": 0.06, "grad_norm": 7.960453033447266, "learning_rate": 9.360873796907106e-06, "loss": 2.918, "step": 4328 }, { "epoch": 0.06, "grad_norm": 9.85818099975586, "learning_rate": 9.363036660538554e-06, "loss": 3.1244, "step": 4329 }, { "epoch": 0.06, "grad_norm": 8.224392890930176, "learning_rate": 9.365199524170002e-06, "loss": 2.619, "step": 4330 }, { "epoch": 0.06, "grad_norm": 8.170262336730957, "learning_rate": 9.36736238780145e-06, "loss": 2.9411, "step": 4331 }, { "epoch": 0.06, "grad_norm": 7.97713041305542, "learning_rate": 9.369525251432898e-06, "loss": 2.6314, "step": 4332 }, { "epoch": 0.06, "grad_norm": 9.78217887878418, "learning_rate": 9.371688115064346e-06, "loss": 3.2281, "step": 4333 }, { "epoch": 0.06, "grad_norm": 8.17094898223877, "learning_rate": 9.373850978695794e-06, "loss": 2.4749, "step": 4334 }, { "epoch": 0.06, "grad_norm": 8.014864921569824, "learning_rate": 9.376013842327242e-06, "loss": 3.1509, "step": 4335 }, { "epoch": 0.06, "grad_norm": 9.192405700683594, "learning_rate": 9.37817670595869e-06, "loss": 2.8796, "step": 4336 }, { "epoch": 0.06, "grad_norm": 8.856995582580566, "learning_rate": 9.380339569590139e-06, "loss": 2.842, "step": 4337 }, { "epoch": 0.06, "grad_norm": 9.652146339416504, "learning_rate": 9.382502433221587e-06, "loss": 2.5206, "step": 4338 }, { "epoch": 0.06, "grad_norm": 8.935985565185547, "learning_rate": 9.384665296853035e-06, "loss": 3.1687, "step": 4339 }, { "epoch": 0.06, "grad_norm": 8.584514617919922, "learning_rate": 9.386828160484483e-06, "loss": 2.9507, "step": 4340 }, { "epoch": 0.06, "grad_norm": 8.255080223083496, "learning_rate": 9.38899102411593e-06, "loss": 2.3284, "step": 4341 }, { "epoch": 0.06, "grad_norm": 9.548458099365234, "learning_rate": 9.391153887747379e-06, "loss": 3.125, "step": 4342 }, { "epoch": 0.06, "grad_norm": 9.535073280334473, "learning_rate": 9.393316751378825e-06, "loss": 2.8584, "step": 4343 }, { "epoch": 0.06, "grad_norm": 7.548603057861328, "learning_rate": 9.395479615010275e-06, "loss": 2.947, "step": 4344 }, { "epoch": 0.06, "grad_norm": 9.521198272705078, "learning_rate": 9.397642478641721e-06, "loss": 3.0455, "step": 4345 }, { "epoch": 0.06, "grad_norm": 8.621885299682617, "learning_rate": 9.399805342273171e-06, "loss": 3.2365, "step": 4346 }, { "epoch": 0.06, "grad_norm": 8.50935173034668, "learning_rate": 9.401968205904617e-06, "loss": 2.8242, "step": 4347 }, { "epoch": 0.06, "grad_norm": 7.288082599639893, "learning_rate": 9.404131069536067e-06, "loss": 2.7169, "step": 4348 }, { "epoch": 0.06, "grad_norm": 8.86627197265625, "learning_rate": 9.406293933167513e-06, "loss": 2.5292, "step": 4349 }, { "epoch": 0.06, "grad_norm": 8.197412490844727, "learning_rate": 9.408456796798963e-06, "loss": 2.8282, "step": 4350 }, { "epoch": 0.06, "grad_norm": 9.056628227233887, "learning_rate": 9.41061966043041e-06, "loss": 3.2164, "step": 4351 }, { "epoch": 0.06, "grad_norm": 8.211689949035645, "learning_rate": 9.41278252406186e-06, "loss": 3.0847, "step": 4352 }, { "epoch": 0.06, "grad_norm": 8.71631145477295, "learning_rate": 9.414945387693307e-06, "loss": 2.9273, "step": 4353 }, { "epoch": 0.06, "grad_norm": 7.639793395996094, "learning_rate": 9.417108251324755e-06, "loss": 2.6704, "step": 4354 }, { "epoch": 0.06, "grad_norm": 7.9091901779174805, "learning_rate": 9.419271114956203e-06, "loss": 2.3026, "step": 4355 }, { "epoch": 0.06, "grad_norm": 8.872992515563965, "learning_rate": 9.421433978587651e-06, "loss": 3.865, "step": 4356 }, { "epoch": 0.06, "grad_norm": 8.74413776397705, "learning_rate": 9.4235968422191e-06, "loss": 3.168, "step": 4357 }, { "epoch": 0.06, "grad_norm": 10.792963981628418, "learning_rate": 9.425759705850548e-06, "loss": 3.0446, "step": 4358 }, { "epoch": 0.06, "grad_norm": 9.167683601379395, "learning_rate": 9.427922569481996e-06, "loss": 3.2762, "step": 4359 }, { "epoch": 0.06, "grad_norm": 9.194933891296387, "learning_rate": 9.430085433113442e-06, "loss": 3.3208, "step": 4360 }, { "epoch": 0.06, "grad_norm": 10.890975952148438, "learning_rate": 9.432248296744892e-06, "loss": 2.9492, "step": 4361 }, { "epoch": 0.06, "grad_norm": 9.855032920837402, "learning_rate": 9.434411160376338e-06, "loss": 3.2517, "step": 4362 }, { "epoch": 0.06, "grad_norm": 8.206193923950195, "learning_rate": 9.436574024007788e-06, "loss": 3.1266, "step": 4363 }, { "epoch": 0.06, "grad_norm": 8.499375343322754, "learning_rate": 9.438736887639234e-06, "loss": 2.907, "step": 4364 }, { "epoch": 0.06, "grad_norm": 9.02774715423584, "learning_rate": 9.440899751270684e-06, "loss": 3.1876, "step": 4365 }, { "epoch": 0.06, "grad_norm": 9.548384666442871, "learning_rate": 9.44306261490213e-06, "loss": 2.8708, "step": 4366 }, { "epoch": 0.06, "grad_norm": 9.015778541564941, "learning_rate": 9.44522547853358e-06, "loss": 2.6254, "step": 4367 }, { "epoch": 0.06, "grad_norm": 8.681380271911621, "learning_rate": 9.447388342165026e-06, "loss": 2.651, "step": 4368 }, { "epoch": 0.06, "grad_norm": 8.957889556884766, "learning_rate": 9.449551205796476e-06, "loss": 3.3815, "step": 4369 }, { "epoch": 0.06, "grad_norm": 12.002596855163574, "learning_rate": 9.451714069427922e-06, "loss": 2.7718, "step": 4370 }, { "epoch": 0.06, "grad_norm": 8.447799682617188, "learning_rate": 9.453876933059372e-06, "loss": 3.397, "step": 4371 }, { "epoch": 0.06, "grad_norm": 8.637117385864258, "learning_rate": 9.456039796690819e-06, "loss": 2.7695, "step": 4372 }, { "epoch": 0.06, "grad_norm": 8.393248558044434, "learning_rate": 9.458202660322268e-06, "loss": 3.3461, "step": 4373 }, { "epoch": 0.06, "grad_norm": 8.034770011901855, "learning_rate": 9.460365523953715e-06, "loss": 2.4354, "step": 4374 }, { "epoch": 0.06, "grad_norm": 8.724227905273438, "learning_rate": 9.462528387585164e-06, "loss": 2.7106, "step": 4375 }, { "epoch": 0.06, "grad_norm": 8.288446426391602, "learning_rate": 9.46469125121661e-06, "loss": 2.6952, "step": 4376 }, { "epoch": 0.06, "grad_norm": 9.019205093383789, "learning_rate": 9.46685411484806e-06, "loss": 2.898, "step": 4377 }, { "epoch": 0.06, "grad_norm": 7.889543056488037, "learning_rate": 9.469016978479507e-06, "loss": 3.3065, "step": 4378 }, { "epoch": 0.06, "grad_norm": 8.598140716552734, "learning_rate": 9.471179842110955e-06, "loss": 3.3697, "step": 4379 }, { "epoch": 0.06, "grad_norm": 8.377676010131836, "learning_rate": 9.473342705742403e-06, "loss": 2.947, "step": 4380 }, { "epoch": 0.06, "grad_norm": 9.146026611328125, "learning_rate": 9.475505569373851e-06, "loss": 2.8327, "step": 4381 }, { "epoch": 0.06, "grad_norm": 8.72056770324707, "learning_rate": 9.477668433005299e-06, "loss": 2.6953, "step": 4382 }, { "epoch": 0.06, "grad_norm": 7.944361686706543, "learning_rate": 9.479831296636747e-06, "loss": 3.1119, "step": 4383 }, { "epoch": 0.06, "grad_norm": 8.71541976928711, "learning_rate": 9.481994160268197e-06, "loss": 2.7346, "step": 4384 }, { "epoch": 0.06, "grad_norm": 9.208105087280273, "learning_rate": 9.484157023899643e-06, "loss": 3.1167, "step": 4385 }, { "epoch": 0.06, "grad_norm": 8.499005317687988, "learning_rate": 9.486319887531093e-06, "loss": 2.5349, "step": 4386 }, { "epoch": 0.06, "grad_norm": 7.9583611488342285, "learning_rate": 9.48848275116254e-06, "loss": 3.096, "step": 4387 }, { "epoch": 0.06, "grad_norm": 8.622846603393555, "learning_rate": 9.490645614793989e-06, "loss": 2.9404, "step": 4388 }, { "epoch": 0.06, "grad_norm": 7.668818950653076, "learning_rate": 9.492808478425435e-06, "loss": 2.5849, "step": 4389 }, { "epoch": 0.06, "grad_norm": 9.540761947631836, "learning_rate": 9.494971342056885e-06, "loss": 2.9832, "step": 4390 }, { "epoch": 0.06, "grad_norm": 8.746343612670898, "learning_rate": 9.497134205688332e-06, "loss": 2.7475, "step": 4391 }, { "epoch": 0.06, "grad_norm": 8.322611808776855, "learning_rate": 9.499297069319781e-06, "loss": 3.4656, "step": 4392 }, { "epoch": 0.06, "grad_norm": 8.744117736816406, "learning_rate": 9.501459932951228e-06, "loss": 3.2241, "step": 4393 }, { "epoch": 0.06, "grad_norm": 8.170256614685059, "learning_rate": 9.503622796582677e-06, "loss": 3.3066, "step": 4394 }, { "epoch": 0.06, "grad_norm": 8.456320762634277, "learning_rate": 9.505785660214124e-06, "loss": 3.0214, "step": 4395 }, { "epoch": 0.06, "grad_norm": 8.305610656738281, "learning_rate": 9.507948523845572e-06, "loss": 2.5832, "step": 4396 }, { "epoch": 0.06, "grad_norm": 8.522607803344727, "learning_rate": 9.51011138747702e-06, "loss": 3.1255, "step": 4397 }, { "epoch": 0.06, "grad_norm": 9.49444580078125, "learning_rate": 9.512274251108468e-06, "loss": 2.8916, "step": 4398 }, { "epoch": 0.06, "grad_norm": 9.066652297973633, "learning_rate": 9.514437114739916e-06, "loss": 3.0875, "step": 4399 }, { "epoch": 0.06, "grad_norm": 9.940535545349121, "learning_rate": 9.516599978371364e-06, "loss": 2.9905, "step": 4400 }, { "epoch": 0.06, "grad_norm": 9.629915237426758, "learning_rate": 9.518762842002812e-06, "loss": 3.0094, "step": 4401 }, { "epoch": 0.06, "grad_norm": 8.798479080200195, "learning_rate": 9.52092570563426e-06, "loss": 3.3185, "step": 4402 }, { "epoch": 0.06, "grad_norm": 9.121136665344238, "learning_rate": 9.523088569265708e-06, "loss": 3.129, "step": 4403 }, { "epoch": 0.06, "grad_norm": 8.905903816223145, "learning_rate": 9.525251432897156e-06, "loss": 3.1658, "step": 4404 }, { "epoch": 0.06, "grad_norm": 7.967162132263184, "learning_rate": 9.527414296528604e-06, "loss": 2.6375, "step": 4405 }, { "epoch": 0.06, "grad_norm": 8.255158424377441, "learning_rate": 9.529577160160052e-06, "loss": 3.2112, "step": 4406 }, { "epoch": 0.06, "grad_norm": 9.483573913574219, "learning_rate": 9.5317400237915e-06, "loss": 3.5766, "step": 4407 }, { "epoch": 0.06, "grad_norm": 9.026957511901855, "learning_rate": 9.533902887422948e-06, "loss": 3.2208, "step": 4408 }, { "epoch": 0.06, "grad_norm": 9.645647048950195, "learning_rate": 9.536065751054396e-06, "loss": 3.6221, "step": 4409 }, { "epoch": 0.06, "grad_norm": 9.0358304977417, "learning_rate": 9.538228614685845e-06, "loss": 3.0049, "step": 4410 }, { "epoch": 0.06, "grad_norm": 9.2633695602417, "learning_rate": 9.540391478317293e-06, "loss": 2.861, "step": 4411 }, { "epoch": 0.06, "grad_norm": 7.948358535766602, "learning_rate": 9.54255434194874e-06, "loss": 2.59, "step": 4412 }, { "epoch": 0.06, "grad_norm": 9.254064559936523, "learning_rate": 9.544717205580189e-06, "loss": 2.7955, "step": 4413 }, { "epoch": 0.06, "grad_norm": 8.156112670898438, "learning_rate": 9.546880069211637e-06, "loss": 3.1543, "step": 4414 }, { "epoch": 0.06, "grad_norm": 8.85400104522705, "learning_rate": 9.549042932843085e-06, "loss": 2.7963, "step": 4415 }, { "epoch": 0.06, "grad_norm": 9.090629577636719, "learning_rate": 9.551205796474533e-06, "loss": 2.505, "step": 4416 }, { "epoch": 0.06, "grad_norm": 7.71209192276001, "learning_rate": 9.553368660105981e-06, "loss": 2.436, "step": 4417 }, { "epoch": 0.06, "grad_norm": 8.91716194152832, "learning_rate": 9.555531523737429e-06, "loss": 2.874, "step": 4418 }, { "epoch": 0.06, "grad_norm": 8.48232650756836, "learning_rate": 9.557694387368877e-06, "loss": 2.9591, "step": 4419 }, { "epoch": 0.06, "grad_norm": 8.240547180175781, "learning_rate": 9.559857251000325e-06, "loss": 2.7502, "step": 4420 }, { "epoch": 0.06, "grad_norm": 9.166288375854492, "learning_rate": 9.562020114631773e-06, "loss": 2.8371, "step": 4421 }, { "epoch": 0.06, "grad_norm": 7.961655139923096, "learning_rate": 9.564182978263221e-06, "loss": 2.6492, "step": 4422 }, { "epoch": 0.06, "grad_norm": 7.710526466369629, "learning_rate": 9.56634584189467e-06, "loss": 2.6252, "step": 4423 }, { "epoch": 0.06, "grad_norm": 8.193989753723145, "learning_rate": 9.568508705526117e-06, "loss": 3.0052, "step": 4424 }, { "epoch": 0.06, "grad_norm": 9.958916664123535, "learning_rate": 9.570671569157565e-06, "loss": 3.9802, "step": 4425 }, { "epoch": 0.06, "grad_norm": 9.711872100830078, "learning_rate": 9.572834432789013e-06, "loss": 3.1922, "step": 4426 }, { "epoch": 0.06, "grad_norm": 8.198601722717285, "learning_rate": 9.574997296420461e-06, "loss": 2.7019, "step": 4427 }, { "epoch": 0.06, "grad_norm": 9.091456413269043, "learning_rate": 9.57716016005191e-06, "loss": 3.3399, "step": 4428 }, { "epoch": 0.06, "grad_norm": 9.04180908203125, "learning_rate": 9.579323023683357e-06, "loss": 3.1912, "step": 4429 }, { "epoch": 0.06, "grad_norm": 8.864079475402832, "learning_rate": 9.581485887314806e-06, "loss": 3.1498, "step": 4430 }, { "epoch": 0.06, "grad_norm": 9.874260902404785, "learning_rate": 9.583648750946254e-06, "loss": 2.6859, "step": 4431 }, { "epoch": 0.06, "grad_norm": 8.32802963256836, "learning_rate": 9.585811614577702e-06, "loss": 3.0718, "step": 4432 }, { "epoch": 0.06, "grad_norm": 8.88637638092041, "learning_rate": 9.58797447820915e-06, "loss": 2.9355, "step": 4433 }, { "epoch": 0.06, "grad_norm": 8.897319793701172, "learning_rate": 9.590137341840598e-06, "loss": 2.6353, "step": 4434 }, { "epoch": 0.06, "grad_norm": 8.941696166992188, "learning_rate": 9.592300205472046e-06, "loss": 3.102, "step": 4435 }, { "epoch": 0.06, "grad_norm": 8.720366477966309, "learning_rate": 9.594463069103494e-06, "loss": 2.8187, "step": 4436 }, { "epoch": 0.06, "grad_norm": 9.229879379272461, "learning_rate": 9.596625932734942e-06, "loss": 3.3053, "step": 4437 }, { "epoch": 0.06, "grad_norm": 7.993464469909668, "learning_rate": 9.59878879636639e-06, "loss": 3.0987, "step": 4438 }, { "epoch": 0.06, "grad_norm": 9.56584644317627, "learning_rate": 9.600951659997838e-06, "loss": 3.2988, "step": 4439 }, { "epoch": 0.06, "grad_norm": 8.39967155456543, "learning_rate": 9.603114523629286e-06, "loss": 3.0376, "step": 4440 }, { "epoch": 0.06, "grad_norm": 10.189493179321289, "learning_rate": 9.605277387260734e-06, "loss": 3.1474, "step": 4441 }, { "epoch": 0.06, "grad_norm": 8.570097923278809, "learning_rate": 9.607440250892182e-06, "loss": 3.2108, "step": 4442 }, { "epoch": 0.06, "grad_norm": 9.175278663635254, "learning_rate": 9.60960311452363e-06, "loss": 2.5711, "step": 4443 }, { "epoch": 0.06, "grad_norm": 7.805870532989502, "learning_rate": 9.611765978155078e-06, "loss": 2.5282, "step": 4444 }, { "epoch": 0.06, "grad_norm": 7.992625713348389, "learning_rate": 9.613928841786526e-06, "loss": 2.7502, "step": 4445 }, { "epoch": 0.06, "grad_norm": 8.614931106567383, "learning_rate": 9.616091705417974e-06, "loss": 2.6571, "step": 4446 }, { "epoch": 0.06, "grad_norm": 9.421575546264648, "learning_rate": 9.618254569049422e-06, "loss": 3.0027, "step": 4447 }, { "epoch": 0.06, "grad_norm": 8.225025177001953, "learning_rate": 9.62041743268087e-06, "loss": 2.3394, "step": 4448 }, { "epoch": 0.06, "grad_norm": 8.04159164428711, "learning_rate": 9.622580296312319e-06, "loss": 3.2688, "step": 4449 }, { "epoch": 0.06, "grad_norm": 8.665193557739258, "learning_rate": 9.624743159943767e-06, "loss": 2.7962, "step": 4450 }, { "epoch": 0.06, "grad_norm": 8.660848617553711, "learning_rate": 9.626906023575215e-06, "loss": 2.7387, "step": 4451 }, { "epoch": 0.06, "grad_norm": 7.023551940917969, "learning_rate": 9.629068887206663e-06, "loss": 2.6053, "step": 4452 }, { "epoch": 0.06, "grad_norm": 8.918055534362793, "learning_rate": 9.63123175083811e-06, "loss": 3.1622, "step": 4453 }, { "epoch": 0.06, "grad_norm": 8.937703132629395, "learning_rate": 9.633394614469559e-06, "loss": 2.7432, "step": 4454 }, { "epoch": 0.06, "grad_norm": 8.370284080505371, "learning_rate": 9.635557478101007e-06, "loss": 2.8863, "step": 4455 }, { "epoch": 0.06, "grad_norm": 8.236641883850098, "learning_rate": 9.637720341732455e-06, "loss": 2.5623, "step": 4456 }, { "epoch": 0.06, "grad_norm": 9.09947395324707, "learning_rate": 9.639883205363903e-06, "loss": 2.7999, "step": 4457 }, { "epoch": 0.06, "grad_norm": 8.1565580368042, "learning_rate": 9.642046068995351e-06, "loss": 2.8874, "step": 4458 }, { "epoch": 0.06, "grad_norm": 8.259337425231934, "learning_rate": 9.644208932626799e-06, "loss": 2.6685, "step": 4459 }, { "epoch": 0.06, "grad_norm": 8.39275074005127, "learning_rate": 9.646371796258247e-06, "loss": 2.6405, "step": 4460 }, { "epoch": 0.06, "grad_norm": 8.756250381469727, "learning_rate": 9.648534659889695e-06, "loss": 2.9878, "step": 4461 }, { "epoch": 0.06, "grad_norm": 9.678945541381836, "learning_rate": 9.650697523521143e-06, "loss": 3.2495, "step": 4462 }, { "epoch": 0.06, "grad_norm": 9.613852500915527, "learning_rate": 9.652860387152591e-06, "loss": 2.8691, "step": 4463 }, { "epoch": 0.06, "grad_norm": 8.129125595092773, "learning_rate": 9.65502325078404e-06, "loss": 2.8243, "step": 4464 }, { "epoch": 0.06, "grad_norm": 9.998790740966797, "learning_rate": 9.657186114415487e-06, "loss": 2.9242, "step": 4465 }, { "epoch": 0.06, "grad_norm": 8.973047256469727, "learning_rate": 9.659348978046935e-06, "loss": 3.2831, "step": 4466 }, { "epoch": 0.06, "grad_norm": 9.17416000366211, "learning_rate": 9.661511841678383e-06, "loss": 2.7508, "step": 4467 }, { "epoch": 0.06, "grad_norm": 8.182890892028809, "learning_rate": 9.663674705309831e-06, "loss": 2.8339, "step": 4468 }, { "epoch": 0.06, "grad_norm": 9.494181632995605, "learning_rate": 9.665837568941278e-06, "loss": 3.1017, "step": 4469 }, { "epoch": 0.06, "grad_norm": 11.023253440856934, "learning_rate": 9.668000432572728e-06, "loss": 3.0409, "step": 4470 }, { "epoch": 0.06, "grad_norm": 9.853645324707031, "learning_rate": 9.670163296204174e-06, "loss": 2.6978, "step": 4471 }, { "epoch": 0.06, "grad_norm": 11.838099479675293, "learning_rate": 9.672326159835624e-06, "loss": 3.2067, "step": 4472 }, { "epoch": 0.06, "grad_norm": 9.961958885192871, "learning_rate": 9.67448902346707e-06, "loss": 2.8348, "step": 4473 }, { "epoch": 0.06, "grad_norm": 10.211908340454102, "learning_rate": 9.67665188709852e-06, "loss": 3.4733, "step": 4474 }, { "epoch": 0.06, "grad_norm": 8.575922966003418, "learning_rate": 9.678814750729966e-06, "loss": 2.3076, "step": 4475 }, { "epoch": 0.06, "grad_norm": 9.36667251586914, "learning_rate": 9.680977614361416e-06, "loss": 3.4415, "step": 4476 }, { "epoch": 0.06, "grad_norm": 8.538603782653809, "learning_rate": 9.683140477992862e-06, "loss": 2.7719, "step": 4477 }, { "epoch": 0.06, "grad_norm": 10.972095489501953, "learning_rate": 9.685303341624312e-06, "loss": 2.9735, "step": 4478 }, { "epoch": 0.06, "grad_norm": 8.101367950439453, "learning_rate": 9.687466205255758e-06, "loss": 2.5604, "step": 4479 }, { "epoch": 0.06, "grad_norm": 9.81029224395752, "learning_rate": 9.689629068887208e-06, "loss": 3.203, "step": 4480 }, { "epoch": 0.06, "grad_norm": 9.837089538574219, "learning_rate": 9.691791932518656e-06, "loss": 2.7492, "step": 4481 }, { "epoch": 0.06, "grad_norm": 8.837220191955566, "learning_rate": 9.693954796150104e-06, "loss": 2.9145, "step": 4482 }, { "epoch": 0.06, "grad_norm": 9.017187118530273, "learning_rate": 9.696117659781552e-06, "loss": 2.8772, "step": 4483 }, { "epoch": 0.06, "grad_norm": 8.902175903320312, "learning_rate": 9.698280523413e-06, "loss": 2.9077, "step": 4484 }, { "epoch": 0.06, "grad_norm": 9.666393280029297, "learning_rate": 9.700443387044448e-06, "loss": 3.0506, "step": 4485 }, { "epoch": 0.06, "grad_norm": 8.783772468566895, "learning_rate": 9.702606250675896e-06, "loss": 3.3142, "step": 4486 }, { "epoch": 0.06, "grad_norm": 8.456274032592773, "learning_rate": 9.704769114307344e-06, "loss": 3.2628, "step": 4487 }, { "epoch": 0.06, "grad_norm": 8.418381690979004, "learning_rate": 9.70693197793879e-06, "loss": 3.2216, "step": 4488 }, { "epoch": 0.06, "grad_norm": 8.976049423217773, "learning_rate": 9.70909484157024e-06, "loss": 2.8108, "step": 4489 }, { "epoch": 0.06, "grad_norm": 9.142168998718262, "learning_rate": 9.711257705201687e-06, "loss": 2.38, "step": 4490 }, { "epoch": 0.06, "grad_norm": 8.279064178466797, "learning_rate": 9.713420568833137e-06, "loss": 2.9498, "step": 4491 }, { "epoch": 0.06, "grad_norm": 9.10094928741455, "learning_rate": 9.715583432464583e-06, "loss": 2.7711, "step": 4492 }, { "epoch": 0.06, "grad_norm": 9.111270904541016, "learning_rate": 9.717746296096033e-06, "loss": 3.2539, "step": 4493 }, { "epoch": 0.06, "grad_norm": 9.488042831420898, "learning_rate": 9.719909159727479e-06, "loss": 2.8196, "step": 4494 }, { "epoch": 0.06, "grad_norm": 8.566080093383789, "learning_rate": 9.722072023358929e-06, "loss": 3.115, "step": 4495 }, { "epoch": 0.06, "grad_norm": 9.3357515335083, "learning_rate": 9.724234886990375e-06, "loss": 3.2765, "step": 4496 }, { "epoch": 0.06, "grad_norm": 8.082293510437012, "learning_rate": 9.726397750621825e-06, "loss": 2.7845, "step": 4497 }, { "epoch": 0.06, "grad_norm": 8.967899322509766, "learning_rate": 9.728560614253271e-06, "loss": 3.2684, "step": 4498 }, { "epoch": 0.06, "grad_norm": 9.76201057434082, "learning_rate": 9.730723477884721e-06, "loss": 2.8102, "step": 4499 }, { "epoch": 0.06, "grad_norm": 8.750777244567871, "learning_rate": 9.732886341516167e-06, "loss": 2.9385, "step": 4500 }, { "epoch": 0.06, "grad_norm": 9.051376342773438, "learning_rate": 9.735049205147617e-06, "loss": 3.1316, "step": 4501 }, { "epoch": 0.06, "grad_norm": 9.479924201965332, "learning_rate": 9.737212068779063e-06, "loss": 3.4787, "step": 4502 }, { "epoch": 0.06, "grad_norm": 9.760754585266113, "learning_rate": 9.739374932410513e-06, "loss": 3.3881, "step": 4503 }, { "epoch": 0.06, "grad_norm": 9.038418769836426, "learning_rate": 9.74153779604196e-06, "loss": 3.2608, "step": 4504 }, { "epoch": 0.06, "grad_norm": 8.827065467834473, "learning_rate": 9.74370065967341e-06, "loss": 3.2494, "step": 4505 }, { "epoch": 0.06, "grad_norm": 9.463622093200684, "learning_rate": 9.745863523304856e-06, "loss": 3.2454, "step": 4506 }, { "epoch": 0.06, "grad_norm": 8.500722885131836, "learning_rate": 9.748026386936304e-06, "loss": 2.4543, "step": 4507 }, { "epoch": 0.06, "grad_norm": 10.43653392791748, "learning_rate": 9.750189250567752e-06, "loss": 3.0733, "step": 4508 }, { "epoch": 0.06, "grad_norm": 9.812752723693848, "learning_rate": 9.7523521141992e-06, "loss": 3.2687, "step": 4509 }, { "epoch": 0.06, "grad_norm": 9.75922966003418, "learning_rate": 9.754514977830648e-06, "loss": 3.5995, "step": 4510 }, { "epoch": 0.06, "grad_norm": 8.580353736877441, "learning_rate": 9.756677841462096e-06, "loss": 2.5249, "step": 4511 }, { "epoch": 0.06, "grad_norm": 8.848684310913086, "learning_rate": 9.758840705093546e-06, "loss": 3.1798, "step": 4512 }, { "epoch": 0.06, "grad_norm": 8.187411308288574, "learning_rate": 9.761003568724992e-06, "loss": 2.7038, "step": 4513 }, { "epoch": 0.06, "grad_norm": 9.014395713806152, "learning_rate": 9.763166432356442e-06, "loss": 2.6763, "step": 4514 }, { "epoch": 0.06, "grad_norm": 7.629156112670898, "learning_rate": 9.765329295987888e-06, "loss": 2.8131, "step": 4515 }, { "epoch": 0.06, "grad_norm": 8.780067443847656, "learning_rate": 9.767492159619338e-06, "loss": 3.0283, "step": 4516 }, { "epoch": 0.06, "grad_norm": 9.300342559814453, "learning_rate": 9.769655023250784e-06, "loss": 3.3103, "step": 4517 }, { "epoch": 0.06, "grad_norm": 10.008752822875977, "learning_rate": 9.771817886882234e-06, "loss": 3.2282, "step": 4518 }, { "epoch": 0.06, "grad_norm": 9.080896377563477, "learning_rate": 9.77398075051368e-06, "loss": 3.0206, "step": 4519 }, { "epoch": 0.06, "grad_norm": 8.566323280334473, "learning_rate": 9.77614361414513e-06, "loss": 2.9802, "step": 4520 }, { "epoch": 0.06, "grad_norm": 9.47691822052002, "learning_rate": 9.778306477776576e-06, "loss": 2.9648, "step": 4521 }, { "epoch": 0.06, "grad_norm": 9.074094772338867, "learning_rate": 9.780469341408026e-06, "loss": 3.2218, "step": 4522 }, { "epoch": 0.06, "grad_norm": 8.308806419372559, "learning_rate": 9.782632205039473e-06, "loss": 3.3138, "step": 4523 }, { "epoch": 0.06, "grad_norm": 8.293633460998535, "learning_rate": 9.78479506867092e-06, "loss": 2.809, "step": 4524 }, { "epoch": 0.06, "grad_norm": 8.487576484680176, "learning_rate": 9.786957932302369e-06, "loss": 2.5431, "step": 4525 }, { "epoch": 0.06, "grad_norm": 9.63926887512207, "learning_rate": 9.789120795933817e-06, "loss": 2.7607, "step": 4526 }, { "epoch": 0.06, "grad_norm": 7.9988579750061035, "learning_rate": 9.791283659565265e-06, "loss": 3.1544, "step": 4527 }, { "epoch": 0.06, "grad_norm": 7.832481861114502, "learning_rate": 9.793446523196713e-06, "loss": 2.5215, "step": 4528 }, { "epoch": 0.06, "grad_norm": 8.475835800170898, "learning_rate": 9.795609386828161e-06, "loss": 2.4403, "step": 4529 }, { "epoch": 0.06, "grad_norm": 8.348342895507812, "learning_rate": 9.797772250459609e-06, "loss": 2.786, "step": 4530 }, { "epoch": 0.06, "grad_norm": 9.60575008392334, "learning_rate": 9.799935114091057e-06, "loss": 3.2173, "step": 4531 }, { "epoch": 0.06, "grad_norm": 8.612210273742676, "learning_rate": 9.802097977722505e-06, "loss": 3.1443, "step": 4532 }, { "epoch": 0.06, "grad_norm": 9.650577545166016, "learning_rate": 9.804260841353953e-06, "loss": 2.6875, "step": 4533 }, { "epoch": 0.06, "grad_norm": 8.069666862487793, "learning_rate": 9.806423704985401e-06, "loss": 2.4937, "step": 4534 }, { "epoch": 0.06, "grad_norm": 10.480195045471191, "learning_rate": 9.80858656861685e-06, "loss": 3.5251, "step": 4535 }, { "epoch": 0.06, "grad_norm": 7.803302764892578, "learning_rate": 9.810749432248297e-06, "loss": 2.6119, "step": 4536 }, { "epoch": 0.06, "grad_norm": 8.481724739074707, "learning_rate": 9.812912295879745e-06, "loss": 2.9088, "step": 4537 }, { "epoch": 0.06, "grad_norm": 7.857794761657715, "learning_rate": 9.815075159511193e-06, "loss": 2.7979, "step": 4538 }, { "epoch": 0.06, "grad_norm": 8.49632740020752, "learning_rate": 9.817238023142641e-06, "loss": 2.7369, "step": 4539 }, { "epoch": 0.06, "grad_norm": 9.464743614196777, "learning_rate": 9.81940088677409e-06, "loss": 3.0754, "step": 4540 }, { "epoch": 0.06, "grad_norm": 10.163299560546875, "learning_rate": 9.821563750405537e-06, "loss": 3.1204, "step": 4541 }, { "epoch": 0.06, "grad_norm": 8.465598106384277, "learning_rate": 9.823726614036986e-06, "loss": 2.7371, "step": 4542 }, { "epoch": 0.06, "grad_norm": 9.956294059753418, "learning_rate": 9.825889477668434e-06, "loss": 2.7915, "step": 4543 }, { "epoch": 0.06, "grad_norm": 7.881450653076172, "learning_rate": 9.828052341299882e-06, "loss": 2.9542, "step": 4544 }, { "epoch": 0.06, "grad_norm": 10.503715515136719, "learning_rate": 9.83021520493133e-06, "loss": 3.0027, "step": 4545 }, { "epoch": 0.06, "grad_norm": 8.672390937805176, "learning_rate": 9.832378068562778e-06, "loss": 2.6864, "step": 4546 }, { "epoch": 0.06, "grad_norm": 10.347936630249023, "learning_rate": 9.834540932194226e-06, "loss": 3.132, "step": 4547 }, { "epoch": 0.06, "grad_norm": 8.952723503112793, "learning_rate": 9.836703795825674e-06, "loss": 2.772, "step": 4548 }, { "epoch": 0.06, "grad_norm": 9.15014362335205, "learning_rate": 9.838866659457122e-06, "loss": 2.8806, "step": 4549 }, { "epoch": 0.06, "grad_norm": 9.432613372802734, "learning_rate": 9.84102952308857e-06, "loss": 2.9898, "step": 4550 }, { "epoch": 0.06, "grad_norm": 8.813349723815918, "learning_rate": 9.843192386720018e-06, "loss": 3.0485, "step": 4551 }, { "epoch": 0.06, "grad_norm": 8.105156898498535, "learning_rate": 9.845355250351466e-06, "loss": 2.9774, "step": 4552 }, { "epoch": 0.06, "grad_norm": 9.247737884521484, "learning_rate": 9.847518113982914e-06, "loss": 2.69, "step": 4553 }, { "epoch": 0.06, "grad_norm": 8.780657768249512, "learning_rate": 9.849680977614362e-06, "loss": 3.079, "step": 4554 }, { "epoch": 0.06, "grad_norm": 8.836368560791016, "learning_rate": 9.85184384124581e-06, "loss": 3.0062, "step": 4555 }, { "epoch": 0.06, "grad_norm": 9.357429504394531, "learning_rate": 9.854006704877258e-06, "loss": 3.1424, "step": 4556 }, { "epoch": 0.06, "grad_norm": 8.568764686584473, "learning_rate": 9.856169568508706e-06, "loss": 2.7721, "step": 4557 }, { "epoch": 0.06, "grad_norm": 9.206504821777344, "learning_rate": 9.858332432140154e-06, "loss": 2.6883, "step": 4558 }, { "epoch": 0.06, "grad_norm": 8.76063060760498, "learning_rate": 9.860495295771602e-06, "loss": 3.2406, "step": 4559 }, { "epoch": 0.06, "grad_norm": 10.6829833984375, "learning_rate": 9.86265815940305e-06, "loss": 3.3762, "step": 4560 }, { "epoch": 0.06, "grad_norm": 8.714625358581543, "learning_rate": 9.864821023034498e-06, "loss": 2.8472, "step": 4561 }, { "epoch": 0.06, "grad_norm": 9.167410850524902, "learning_rate": 9.866983886665947e-06, "loss": 3.4649, "step": 4562 }, { "epoch": 0.06, "grad_norm": 8.158023834228516, "learning_rate": 9.869146750297395e-06, "loss": 2.5424, "step": 4563 }, { "epoch": 0.06, "grad_norm": 9.316020965576172, "learning_rate": 9.871309613928843e-06, "loss": 2.9033, "step": 4564 }, { "epoch": 0.06, "grad_norm": 7.448459625244141, "learning_rate": 9.87347247756029e-06, "loss": 2.491, "step": 4565 }, { "epoch": 0.06, "grad_norm": 9.678208351135254, "learning_rate": 9.875635341191739e-06, "loss": 2.7251, "step": 4566 }, { "epoch": 0.06, "grad_norm": 8.180923461914062, "learning_rate": 9.877798204823187e-06, "loss": 2.932, "step": 4567 }, { "epoch": 0.06, "grad_norm": 9.122186660766602, "learning_rate": 9.879961068454635e-06, "loss": 3.0784, "step": 4568 }, { "epoch": 0.06, "grad_norm": 8.823481559753418, "learning_rate": 9.882123932086083e-06, "loss": 3.0791, "step": 4569 }, { "epoch": 0.06, "grad_norm": 9.797303199768066, "learning_rate": 9.884286795717531e-06, "loss": 2.9273, "step": 4570 }, { "epoch": 0.06, "grad_norm": 8.324479103088379, "learning_rate": 9.886449659348979e-06, "loss": 2.571, "step": 4571 }, { "epoch": 0.06, "grad_norm": 8.85826301574707, "learning_rate": 9.888612522980427e-06, "loss": 2.8642, "step": 4572 }, { "epoch": 0.06, "grad_norm": 10.071979522705078, "learning_rate": 9.890775386611875e-06, "loss": 3.2146, "step": 4573 }, { "epoch": 0.06, "grad_norm": 9.404491424560547, "learning_rate": 9.892938250243323e-06, "loss": 2.9389, "step": 4574 }, { "epoch": 0.06, "grad_norm": 9.434896469116211, "learning_rate": 9.895101113874771e-06, "loss": 2.9346, "step": 4575 }, { "epoch": 0.06, "grad_norm": 9.354877471923828, "learning_rate": 9.89726397750622e-06, "loss": 3.2273, "step": 4576 }, { "epoch": 0.06, "grad_norm": 8.624001502990723, "learning_rate": 9.899426841137667e-06, "loss": 3.0298, "step": 4577 }, { "epoch": 0.06, "grad_norm": 8.72639274597168, "learning_rate": 9.901589704769115e-06, "loss": 3.1185, "step": 4578 }, { "epoch": 0.06, "grad_norm": 9.082279205322266, "learning_rate": 9.903752568400563e-06, "loss": 3.2723, "step": 4579 }, { "epoch": 0.06, "grad_norm": 7.967621803283691, "learning_rate": 9.905915432032011e-06, "loss": 2.574, "step": 4580 }, { "epoch": 0.06, "grad_norm": 8.42815113067627, "learning_rate": 9.90807829566346e-06, "loss": 3.0269, "step": 4581 }, { "epoch": 0.06, "grad_norm": 8.264918327331543, "learning_rate": 9.910241159294908e-06, "loss": 3.303, "step": 4582 }, { "epoch": 0.06, "grad_norm": 11.090720176696777, "learning_rate": 9.912404022926356e-06, "loss": 2.9106, "step": 4583 }, { "epoch": 0.06, "grad_norm": 8.521371841430664, "learning_rate": 9.914566886557804e-06, "loss": 2.3722, "step": 4584 }, { "epoch": 0.06, "grad_norm": 9.110689163208008, "learning_rate": 9.916729750189252e-06, "loss": 2.9382, "step": 4585 }, { "epoch": 0.06, "grad_norm": 8.24360466003418, "learning_rate": 9.9188926138207e-06, "loss": 2.9229, "step": 4586 }, { "epoch": 0.06, "grad_norm": 8.473345756530762, "learning_rate": 9.921055477452148e-06, "loss": 3.0947, "step": 4587 }, { "epoch": 0.06, "grad_norm": 8.43437671661377, "learning_rate": 9.923218341083596e-06, "loss": 2.7567, "step": 4588 }, { "epoch": 0.06, "grad_norm": 8.335628509521484, "learning_rate": 9.925381204715044e-06, "loss": 2.2471, "step": 4589 }, { "epoch": 0.06, "grad_norm": 8.508617401123047, "learning_rate": 9.927544068346492e-06, "loss": 2.69, "step": 4590 }, { "epoch": 0.06, "grad_norm": 8.934919357299805, "learning_rate": 9.92970693197794e-06, "loss": 2.6349, "step": 4591 }, { "epoch": 0.06, "grad_norm": 9.495323181152344, "learning_rate": 9.931869795609388e-06, "loss": 3.4422, "step": 4592 }, { "epoch": 0.06, "grad_norm": 9.030056953430176, "learning_rate": 9.934032659240836e-06, "loss": 2.9301, "step": 4593 }, { "epoch": 0.06, "grad_norm": 8.008264541625977, "learning_rate": 9.936195522872284e-06, "loss": 2.23, "step": 4594 }, { "epoch": 0.06, "grad_norm": 8.688209533691406, "learning_rate": 9.938358386503732e-06, "loss": 3.2147, "step": 4595 }, { "epoch": 0.06, "grad_norm": 9.527970314025879, "learning_rate": 9.94052125013518e-06, "loss": 2.6164, "step": 4596 }, { "epoch": 0.06, "grad_norm": 10.204471588134766, "learning_rate": 9.942684113766627e-06, "loss": 2.6728, "step": 4597 }, { "epoch": 0.06, "grad_norm": 9.63272476196289, "learning_rate": 9.944846977398076e-06, "loss": 3.3378, "step": 4598 }, { "epoch": 0.06, "grad_norm": 8.603513717651367, "learning_rate": 9.947009841029523e-06, "loss": 2.744, "step": 4599 }, { "epoch": 0.06, "grad_norm": 9.202951431274414, "learning_rate": 9.949172704660972e-06, "loss": 3.084, "step": 4600 }, { "epoch": 0.06, "grad_norm": 10.918322563171387, "learning_rate": 9.951335568292419e-06, "loss": 3.3275, "step": 4601 }, { "epoch": 0.06, "grad_norm": 10.043758392333984, "learning_rate": 9.953498431923869e-06, "loss": 3.7125, "step": 4602 }, { "epoch": 0.06, "grad_norm": 8.497562408447266, "learning_rate": 9.955661295555315e-06, "loss": 3.2774, "step": 4603 }, { "epoch": 0.06, "grad_norm": 10.921435356140137, "learning_rate": 9.957824159186765e-06, "loss": 2.9701, "step": 4604 }, { "epoch": 0.06, "grad_norm": 8.139759063720703, "learning_rate": 9.959987022818211e-06, "loss": 2.1169, "step": 4605 }, { "epoch": 0.06, "grad_norm": 8.490910530090332, "learning_rate": 9.96214988644966e-06, "loss": 3.0516, "step": 4606 }, { "epoch": 0.06, "grad_norm": 9.48122501373291, "learning_rate": 9.964312750081107e-06, "loss": 2.9353, "step": 4607 }, { "epoch": 0.06, "grad_norm": 7.49453592300415, "learning_rate": 9.966475613712557e-06, "loss": 2.5912, "step": 4608 }, { "epoch": 0.06, "grad_norm": 9.154522895812988, "learning_rate": 9.968638477344005e-06, "loss": 2.8687, "step": 4609 }, { "epoch": 0.06, "grad_norm": 7.705408573150635, "learning_rate": 9.970801340975453e-06, "loss": 3.0768, "step": 4610 }, { "epoch": 0.06, "grad_norm": 8.328909873962402, "learning_rate": 9.972964204606901e-06, "loss": 2.7144, "step": 4611 }, { "epoch": 0.06, "grad_norm": 8.30532455444336, "learning_rate": 9.975127068238349e-06, "loss": 2.6548, "step": 4612 }, { "epoch": 0.06, "grad_norm": 8.747802734375, "learning_rate": 9.977289931869797e-06, "loss": 2.86, "step": 4613 }, { "epoch": 0.06, "grad_norm": 8.16724681854248, "learning_rate": 9.979452795501245e-06, "loss": 2.6186, "step": 4614 }, { "epoch": 0.06, "grad_norm": 10.109244346618652, "learning_rate": 9.981615659132693e-06, "loss": 3.4898, "step": 4615 }, { "epoch": 0.06, "grad_norm": 7.8330488204956055, "learning_rate": 9.98377852276414e-06, "loss": 2.6881, "step": 4616 }, { "epoch": 0.06, "grad_norm": 7.854090690612793, "learning_rate": 9.98594138639559e-06, "loss": 2.4027, "step": 4617 }, { "epoch": 0.06, "grad_norm": 8.389116287231445, "learning_rate": 9.988104250027036e-06, "loss": 2.6865, "step": 4618 }, { "epoch": 0.06, "grad_norm": 8.429569244384766, "learning_rate": 9.990267113658485e-06, "loss": 3.1305, "step": 4619 }, { "epoch": 0.06, "grad_norm": 8.940682411193848, "learning_rate": 9.992429977289932e-06, "loss": 2.8983, "step": 4620 }, { "epoch": 0.06, "grad_norm": 8.488338470458984, "learning_rate": 9.994592840921382e-06, "loss": 2.325, "step": 4621 }, { "epoch": 0.06, "grad_norm": 9.224360466003418, "learning_rate": 9.996755704552828e-06, "loss": 2.8989, "step": 4622 }, { "epoch": 0.06, "grad_norm": 8.542157173156738, "learning_rate": 9.998918568184278e-06, "loss": 3.1889, "step": 4623 }, { "epoch": 0.06, "grad_norm": 8.345837593078613, "learning_rate": 1.0001081431815724e-05, "loss": 3.2333, "step": 4624 }, { "epoch": 0.06, "grad_norm": 8.655518531799316, "learning_rate": 1.0003244295447172e-05, "loss": 2.5477, "step": 4625 }, { "epoch": 0.06, "grad_norm": 8.367290496826172, "learning_rate": 1.0005407159078622e-05, "loss": 2.9558, "step": 4626 }, { "epoch": 0.06, "grad_norm": 8.301728248596191, "learning_rate": 1.000757002271007e-05, "loss": 2.9127, "step": 4627 }, { "epoch": 0.06, "grad_norm": 8.506078720092773, "learning_rate": 1.0009732886341516e-05, "loss": 3.2594, "step": 4628 }, { "epoch": 0.06, "grad_norm": 8.768259048461914, "learning_rate": 1.0011895749972964e-05, "loss": 3.2419, "step": 4629 }, { "epoch": 0.06, "grad_norm": 9.466641426086426, "learning_rate": 1.0014058613604414e-05, "loss": 3.5451, "step": 4630 }, { "epoch": 0.06, "grad_norm": 7.592283725738525, "learning_rate": 1.0016221477235862e-05, "loss": 2.4204, "step": 4631 }, { "epoch": 0.06, "grad_norm": 8.924323081970215, "learning_rate": 1.0018384340867308e-05, "loss": 2.6538, "step": 4632 }, { "epoch": 0.06, "grad_norm": 7.8734331130981445, "learning_rate": 1.0020547204498756e-05, "loss": 2.762, "step": 4633 }, { "epoch": 0.06, "grad_norm": 9.645410537719727, "learning_rate": 1.0022710068130206e-05, "loss": 3.4182, "step": 4634 }, { "epoch": 0.06, "grad_norm": 7.905970573425293, "learning_rate": 1.0024872931761653e-05, "loss": 2.7087, "step": 4635 }, { "epoch": 0.06, "grad_norm": 9.328327178955078, "learning_rate": 1.00270357953931e-05, "loss": 3.4672, "step": 4636 }, { "epoch": 0.06, "grad_norm": 9.674833297729492, "learning_rate": 1.0029198659024549e-05, "loss": 2.8149, "step": 4637 }, { "epoch": 0.06, "grad_norm": 8.11709976196289, "learning_rate": 1.0031361522655998e-05, "loss": 2.6295, "step": 4638 }, { "epoch": 0.06, "grad_norm": 9.052335739135742, "learning_rate": 1.0033524386287445e-05, "loss": 3.2177, "step": 4639 }, { "epoch": 0.06, "grad_norm": 9.073664665222168, "learning_rate": 1.0035687249918893e-05, "loss": 2.8284, "step": 4640 }, { "epoch": 0.06, "grad_norm": 8.073100090026855, "learning_rate": 1.0037850113550343e-05, "loss": 2.7485, "step": 4641 }, { "epoch": 0.06, "grad_norm": 8.31148624420166, "learning_rate": 1.004001297718179e-05, "loss": 2.4489, "step": 4642 }, { "epoch": 0.06, "grad_norm": 8.3496675491333, "learning_rate": 1.0042175840813237e-05, "loss": 2.9038, "step": 4643 }, { "epoch": 0.06, "grad_norm": 8.726555824279785, "learning_rate": 1.0044338704444685e-05, "loss": 2.9403, "step": 4644 }, { "epoch": 0.06, "grad_norm": 7.6431756019592285, "learning_rate": 1.0046501568076135e-05, "loss": 2.7297, "step": 4645 }, { "epoch": 0.06, "grad_norm": 8.163944244384766, "learning_rate": 1.0048664431707583e-05, "loss": 2.7075, "step": 4646 }, { "epoch": 0.06, "grad_norm": 8.355510711669922, "learning_rate": 1.005082729533903e-05, "loss": 2.8352, "step": 4647 }, { "epoch": 0.06, "grad_norm": 9.221184730529785, "learning_rate": 1.0052990158970477e-05, "loss": 3.0869, "step": 4648 }, { "epoch": 0.06, "grad_norm": 9.6148681640625, "learning_rate": 1.0055153022601927e-05, "loss": 2.9247, "step": 4649 }, { "epoch": 0.06, "grad_norm": 9.31057357788086, "learning_rate": 1.0057315886233375e-05, "loss": 2.7531, "step": 4650 }, { "epoch": 0.06, "grad_norm": 8.413174629211426, "learning_rate": 1.0059478749864821e-05, "loss": 3.0136, "step": 4651 }, { "epoch": 0.06, "grad_norm": 9.149697303771973, "learning_rate": 1.006164161349627e-05, "loss": 3.26, "step": 4652 }, { "epoch": 0.06, "grad_norm": 9.452775955200195, "learning_rate": 1.006380447712772e-05, "loss": 3.302, "step": 4653 }, { "epoch": 0.06, "grad_norm": 10.197787284851074, "learning_rate": 1.0065967340759166e-05, "loss": 2.6315, "step": 4654 }, { "epoch": 0.06, "grad_norm": 9.478540420532227, "learning_rate": 1.0068130204390614e-05, "loss": 2.8362, "step": 4655 }, { "epoch": 0.06, "grad_norm": 9.30152702331543, "learning_rate": 1.0070293068022062e-05, "loss": 3.1514, "step": 4656 }, { "epoch": 0.06, "grad_norm": 9.34856128692627, "learning_rate": 1.0072455931653511e-05, "loss": 2.9443, "step": 4657 }, { "epoch": 0.06, "grad_norm": 8.474870681762695, "learning_rate": 1.0074618795284958e-05, "loss": 2.5476, "step": 4658 }, { "epoch": 0.06, "grad_norm": 9.956130981445312, "learning_rate": 1.0076781658916406e-05, "loss": 2.5636, "step": 4659 }, { "epoch": 0.06, "grad_norm": 10.238974571228027, "learning_rate": 1.0078944522547854e-05, "loss": 2.9673, "step": 4660 }, { "epoch": 0.06, "grad_norm": 9.192938804626465, "learning_rate": 1.0081107386179304e-05, "loss": 3.0654, "step": 4661 }, { "epoch": 0.06, "grad_norm": 8.230567932128906, "learning_rate": 1.008327024981075e-05, "loss": 3.1463, "step": 4662 }, { "epoch": 0.06, "grad_norm": 8.35970401763916, "learning_rate": 1.0085433113442198e-05, "loss": 2.646, "step": 4663 }, { "epoch": 0.06, "grad_norm": 8.642367362976074, "learning_rate": 1.0087595977073646e-05, "loss": 2.776, "step": 4664 }, { "epoch": 0.06, "grad_norm": 8.753671646118164, "learning_rate": 1.0089758840705096e-05, "loss": 3.0742, "step": 4665 }, { "epoch": 0.06, "grad_norm": 9.168729782104492, "learning_rate": 1.0091921704336542e-05, "loss": 2.9423, "step": 4666 }, { "epoch": 0.06, "grad_norm": 8.415353775024414, "learning_rate": 1.009408456796799e-05, "loss": 3.3137, "step": 4667 }, { "epoch": 0.06, "grad_norm": 8.416022300720215, "learning_rate": 1.0096247431599438e-05, "loss": 2.761, "step": 4668 }, { "epoch": 0.06, "grad_norm": 8.827641487121582, "learning_rate": 1.0098410295230886e-05, "loss": 3.3284, "step": 4669 }, { "epoch": 0.06, "grad_norm": 9.295550346374512, "learning_rate": 1.0100573158862334e-05, "loss": 2.6608, "step": 4670 }, { "epoch": 0.06, "grad_norm": 9.041533470153809, "learning_rate": 1.0102736022493782e-05, "loss": 3.5846, "step": 4671 }, { "epoch": 0.06, "grad_norm": 7.613980293273926, "learning_rate": 1.0104898886125232e-05, "loss": 2.3687, "step": 4672 }, { "epoch": 0.06, "grad_norm": 8.377936363220215, "learning_rate": 1.0107061749756678e-05, "loss": 3.001, "step": 4673 }, { "epoch": 0.06, "grad_norm": 9.208508491516113, "learning_rate": 1.0109224613388127e-05, "loss": 3.9278, "step": 4674 }, { "epoch": 0.06, "grad_norm": 8.287281036376953, "learning_rate": 1.0111387477019575e-05, "loss": 2.5578, "step": 4675 }, { "epoch": 0.06, "grad_norm": 8.41356086730957, "learning_rate": 1.0113550340651024e-05, "loss": 3.3599, "step": 4676 }, { "epoch": 0.06, "grad_norm": 8.180895805358887, "learning_rate": 1.011571320428247e-05, "loss": 2.5713, "step": 4677 }, { "epoch": 0.06, "grad_norm": 7.725403308868408, "learning_rate": 1.0117876067913919e-05, "loss": 2.0526, "step": 4678 }, { "epoch": 0.06, "grad_norm": 9.120478630065918, "learning_rate": 1.0120038931545367e-05, "loss": 3.3536, "step": 4679 }, { "epoch": 0.06, "grad_norm": 8.881050109863281, "learning_rate": 1.0122201795176817e-05, "loss": 2.7628, "step": 4680 }, { "epoch": 0.06, "grad_norm": 8.128153800964355, "learning_rate": 1.0124364658808263e-05, "loss": 1.9505, "step": 4681 }, { "epoch": 0.06, "grad_norm": 9.117302894592285, "learning_rate": 1.0126527522439711e-05, "loss": 2.7113, "step": 4682 }, { "epoch": 0.06, "grad_norm": 10.207525253295898, "learning_rate": 1.0128690386071159e-05, "loss": 2.5949, "step": 4683 }, { "epoch": 0.06, "grad_norm": 9.92579460144043, "learning_rate": 1.0130853249702609e-05, "loss": 2.525, "step": 4684 }, { "epoch": 0.06, "grad_norm": 9.184998512268066, "learning_rate": 1.0133016113334055e-05, "loss": 2.9002, "step": 4685 }, { "epoch": 0.06, "grad_norm": 9.16249942779541, "learning_rate": 1.0135178976965503e-05, "loss": 3.1787, "step": 4686 }, { "epoch": 0.06, "grad_norm": 10.11021614074707, "learning_rate": 1.0137341840596951e-05, "loss": 3.2608, "step": 4687 }, { "epoch": 0.06, "grad_norm": 8.500072479248047, "learning_rate": 1.01395047042284e-05, "loss": 2.7578, "step": 4688 }, { "epoch": 0.06, "grad_norm": 9.722749710083008, "learning_rate": 1.0141667567859847e-05, "loss": 2.805, "step": 4689 }, { "epoch": 0.06, "grad_norm": 8.448360443115234, "learning_rate": 1.0143830431491295e-05, "loss": 2.5238, "step": 4690 }, { "epoch": 0.06, "grad_norm": 8.690425872802734, "learning_rate": 1.0145993295122742e-05, "loss": 2.6473, "step": 4691 }, { "epoch": 0.06, "grad_norm": 10.777624130249023, "learning_rate": 1.0148156158754191e-05, "loss": 3.0521, "step": 4692 }, { "epoch": 0.06, "grad_norm": 8.693475723266602, "learning_rate": 1.015031902238564e-05, "loss": 2.7531, "step": 4693 }, { "epoch": 0.06, "grad_norm": 8.840143203735352, "learning_rate": 1.0152481886017088e-05, "loss": 2.3787, "step": 4694 }, { "epoch": 0.06, "grad_norm": 9.036517143249512, "learning_rate": 1.0154644749648534e-05, "loss": 3.2255, "step": 4695 }, { "epoch": 0.06, "grad_norm": 9.437275886535645, "learning_rate": 1.0156807613279984e-05, "loss": 3.0809, "step": 4696 }, { "epoch": 0.06, "grad_norm": 8.814644813537598, "learning_rate": 1.0158970476911432e-05, "loss": 2.57, "step": 4697 }, { "epoch": 0.06, "grad_norm": 9.593345642089844, "learning_rate": 1.016113334054288e-05, "loss": 3.4122, "step": 4698 }, { "epoch": 0.06, "grad_norm": 8.546342849731445, "learning_rate": 1.0163296204174326e-05, "loss": 2.9578, "step": 4699 }, { "epoch": 0.06, "grad_norm": 8.621726036071777, "learning_rate": 1.0165459067805776e-05, "loss": 2.78, "step": 4700 }, { "epoch": 0.06, "grad_norm": 7.967128276824951, "learning_rate": 1.0167621931437224e-05, "loss": 2.5739, "step": 4701 }, { "epoch": 0.06, "grad_norm": 7.967175006866455, "learning_rate": 1.0169784795068672e-05, "loss": 2.9091, "step": 4702 }, { "epoch": 0.06, "grad_norm": 8.370965957641602, "learning_rate": 1.0171947658700118e-05, "loss": 2.7163, "step": 4703 }, { "epoch": 0.06, "grad_norm": 8.085847854614258, "learning_rate": 1.0174110522331568e-05, "loss": 3.2118, "step": 4704 }, { "epoch": 0.06, "grad_norm": 9.048709869384766, "learning_rate": 1.0176273385963016e-05, "loss": 3.4049, "step": 4705 }, { "epoch": 0.06, "grad_norm": 8.716304779052734, "learning_rate": 1.0178436249594462e-05, "loss": 2.8427, "step": 4706 }, { "epoch": 0.06, "grad_norm": 8.272045135498047, "learning_rate": 1.0180599113225912e-05, "loss": 2.6222, "step": 4707 }, { "epoch": 0.06, "grad_norm": 9.481399536132812, "learning_rate": 1.018276197685736e-05, "loss": 2.953, "step": 4708 }, { "epoch": 0.06, "grad_norm": 8.497761726379395, "learning_rate": 1.0184924840488808e-05, "loss": 3.0637, "step": 4709 }, { "epoch": 0.06, "grad_norm": 7.853451728820801, "learning_rate": 1.0187087704120255e-05, "loss": 2.6834, "step": 4710 }, { "epoch": 0.06, "grad_norm": 9.249197006225586, "learning_rate": 1.0189250567751704e-05, "loss": 2.904, "step": 4711 }, { "epoch": 0.06, "grad_norm": 9.237567901611328, "learning_rate": 1.0191413431383152e-05, "loss": 2.7343, "step": 4712 }, { "epoch": 0.06, "grad_norm": 8.64533805847168, "learning_rate": 1.01935762950146e-05, "loss": 2.7764, "step": 4713 }, { "epoch": 0.06, "grad_norm": 6.845547676086426, "learning_rate": 1.0195739158646047e-05, "loss": 2.216, "step": 4714 }, { "epoch": 0.06, "grad_norm": 10.13858413696289, "learning_rate": 1.0197902022277497e-05, "loss": 3.1296, "step": 4715 }, { "epoch": 0.06, "grad_norm": 8.108297348022461, "learning_rate": 1.0200064885908945e-05, "loss": 2.7041, "step": 4716 }, { "epoch": 0.06, "grad_norm": 8.671361923217773, "learning_rate": 1.0202227749540393e-05, "loss": 2.9125, "step": 4717 }, { "epoch": 0.06, "grad_norm": 9.323172569274902, "learning_rate": 1.0204390613171839e-05, "loss": 2.9422, "step": 4718 }, { "epoch": 0.06, "grad_norm": 9.23381233215332, "learning_rate": 1.0206553476803289e-05, "loss": 2.8681, "step": 4719 }, { "epoch": 0.06, "grad_norm": 9.364203453063965, "learning_rate": 1.0208716340434737e-05, "loss": 2.8618, "step": 4720 }, { "epoch": 0.06, "grad_norm": 9.006980895996094, "learning_rate": 1.0210879204066185e-05, "loss": 2.8926, "step": 4721 }, { "epoch": 0.06, "grad_norm": 8.613578796386719, "learning_rate": 1.0213042067697631e-05, "loss": 3.0498, "step": 4722 }, { "epoch": 0.06, "grad_norm": 10.512277603149414, "learning_rate": 1.0215204931329081e-05, "loss": 3.2847, "step": 4723 }, { "epoch": 0.06, "grad_norm": 8.381418228149414, "learning_rate": 1.0217367794960529e-05, "loss": 3.1755, "step": 4724 }, { "epoch": 0.06, "grad_norm": 8.794448852539062, "learning_rate": 1.0219530658591975e-05, "loss": 3.7327, "step": 4725 }, { "epoch": 0.06, "grad_norm": 8.716248512268066, "learning_rate": 1.0221693522223423e-05, "loss": 2.7481, "step": 4726 }, { "epoch": 0.06, "grad_norm": 8.293191909790039, "learning_rate": 1.0223856385854873e-05, "loss": 2.9092, "step": 4727 }, { "epoch": 0.06, "grad_norm": 7.944182395935059, "learning_rate": 1.0226019249486321e-05, "loss": 2.7403, "step": 4728 }, { "epoch": 0.06, "grad_norm": 9.104146003723145, "learning_rate": 1.0228182113117768e-05, "loss": 3.2775, "step": 4729 }, { "epoch": 0.06, "grad_norm": 8.408696174621582, "learning_rate": 1.0230344976749216e-05, "loss": 3.2559, "step": 4730 }, { "epoch": 0.06, "grad_norm": 9.027767181396484, "learning_rate": 1.0232507840380665e-05, "loss": 2.6454, "step": 4731 }, { "epoch": 0.06, "grad_norm": 8.953505516052246, "learning_rate": 1.0234670704012113e-05, "loss": 2.6061, "step": 4732 }, { "epoch": 0.06, "grad_norm": 8.437997817993164, "learning_rate": 1.023683356764356e-05, "loss": 2.9831, "step": 4733 }, { "epoch": 0.06, "grad_norm": 7.455644607543945, "learning_rate": 1.0238996431275008e-05, "loss": 2.7187, "step": 4734 }, { "epoch": 0.06, "grad_norm": 9.782562255859375, "learning_rate": 1.0241159294906458e-05, "loss": 3.2715, "step": 4735 }, { "epoch": 0.06, "grad_norm": 8.02346420288086, "learning_rate": 1.0243322158537906e-05, "loss": 2.9218, "step": 4736 }, { "epoch": 0.06, "grad_norm": 8.232267379760742, "learning_rate": 1.0245485022169352e-05, "loss": 3.0991, "step": 4737 }, { "epoch": 0.06, "grad_norm": 8.246648788452148, "learning_rate": 1.0247647885800802e-05, "loss": 2.6057, "step": 4738 }, { "epoch": 0.06, "grad_norm": 8.921915054321289, "learning_rate": 1.024981074943225e-05, "loss": 2.6817, "step": 4739 }, { "epoch": 0.06, "grad_norm": 8.289639472961426, "learning_rate": 1.0251973613063698e-05, "loss": 2.6326, "step": 4740 }, { "epoch": 0.06, "grad_norm": 8.598947525024414, "learning_rate": 1.0254136476695144e-05, "loss": 3.3335, "step": 4741 }, { "epoch": 0.06, "grad_norm": 9.559932708740234, "learning_rate": 1.0256299340326594e-05, "loss": 2.6259, "step": 4742 }, { "epoch": 0.06, "grad_norm": 8.872060775756836, "learning_rate": 1.0258462203958042e-05, "loss": 2.6659, "step": 4743 }, { "epoch": 0.06, "grad_norm": 7.737900733947754, "learning_rate": 1.0260625067589488e-05, "loss": 2.571, "step": 4744 }, { "epoch": 0.06, "grad_norm": 8.972664833068848, "learning_rate": 1.0262787931220936e-05, "loss": 2.6717, "step": 4745 }, { "epoch": 0.06, "grad_norm": 7.842396259307861, "learning_rate": 1.0264950794852386e-05, "loss": 2.3669, "step": 4746 }, { "epoch": 0.06, "grad_norm": 9.071195602416992, "learning_rate": 1.0267113658483834e-05, "loss": 3.1854, "step": 4747 }, { "epoch": 0.06, "grad_norm": 9.981939315795898, "learning_rate": 1.026927652211528e-05, "loss": 3.0382, "step": 4748 }, { "epoch": 0.06, "grad_norm": 8.993427276611328, "learning_rate": 1.0271439385746729e-05, "loss": 2.9319, "step": 4749 }, { "epoch": 0.06, "grad_norm": 10.354537963867188, "learning_rate": 1.0273602249378178e-05, "loss": 2.7919, "step": 4750 }, { "epoch": 0.06, "grad_norm": 8.842655181884766, "learning_rate": 1.0275765113009626e-05, "loss": 2.6039, "step": 4751 }, { "epoch": 0.06, "grad_norm": 9.174922943115234, "learning_rate": 1.0277927976641073e-05, "loss": 2.8489, "step": 4752 }, { "epoch": 0.06, "grad_norm": 7.594542980194092, "learning_rate": 1.0280090840272521e-05, "loss": 2.4991, "step": 4753 }, { "epoch": 0.06, "grad_norm": 9.252776145935059, "learning_rate": 1.028225370390397e-05, "loss": 3.044, "step": 4754 }, { "epoch": 0.06, "grad_norm": 7.407966136932373, "learning_rate": 1.0284416567535419e-05, "loss": 2.6265, "step": 4755 }, { "epoch": 0.06, "grad_norm": 8.435276985168457, "learning_rate": 1.0286579431166865e-05, "loss": 2.8173, "step": 4756 }, { "epoch": 0.06, "grad_norm": 7.624262809753418, "learning_rate": 1.0288742294798313e-05, "loss": 2.7309, "step": 4757 }, { "epoch": 0.06, "grad_norm": 7.8422322273254395, "learning_rate": 1.0290905158429763e-05, "loss": 1.779, "step": 4758 }, { "epoch": 0.06, "grad_norm": 10.082447052001953, "learning_rate": 1.0293068022061211e-05, "loss": 3.5629, "step": 4759 }, { "epoch": 0.06, "grad_norm": 8.34028148651123, "learning_rate": 1.0295230885692657e-05, "loss": 3.0746, "step": 4760 }, { "epoch": 0.06, "grad_norm": 8.311060905456543, "learning_rate": 1.0297393749324105e-05, "loss": 3.2733, "step": 4761 }, { "epoch": 0.06, "grad_norm": 7.874283313751221, "learning_rate": 1.0299556612955555e-05, "loss": 2.5039, "step": 4762 }, { "epoch": 0.06, "grad_norm": 7.586464881896973, "learning_rate": 1.0301719476587001e-05, "loss": 2.9929, "step": 4763 }, { "epoch": 0.06, "grad_norm": 9.346138000488281, "learning_rate": 1.030388234021845e-05, "loss": 2.5999, "step": 4764 }, { "epoch": 0.06, "grad_norm": 8.070076942443848, "learning_rate": 1.0306045203849897e-05, "loss": 2.5358, "step": 4765 }, { "epoch": 0.06, "grad_norm": 8.226676940917969, "learning_rate": 1.0308208067481347e-05, "loss": 3.01, "step": 4766 }, { "epoch": 0.06, "grad_norm": 7.848090171813965, "learning_rate": 1.0310370931112794e-05, "loss": 3.0322, "step": 4767 }, { "epoch": 0.06, "grad_norm": 9.582955360412598, "learning_rate": 1.0312533794744242e-05, "loss": 3.0019, "step": 4768 }, { "epoch": 0.06, "grad_norm": 8.268518447875977, "learning_rate": 1.0314696658375691e-05, "loss": 2.437, "step": 4769 }, { "epoch": 0.06, "grad_norm": 8.196647644042969, "learning_rate": 1.031685952200714e-05, "loss": 2.9299, "step": 4770 }, { "epoch": 0.06, "grad_norm": 8.705584526062012, "learning_rate": 1.0319022385638586e-05, "loss": 2.3393, "step": 4771 }, { "epoch": 0.06, "grad_norm": 10.665288925170898, "learning_rate": 1.0321185249270034e-05, "loss": 3.0187, "step": 4772 }, { "epoch": 0.06, "grad_norm": 8.681262016296387, "learning_rate": 1.0323348112901484e-05, "loss": 2.7544, "step": 4773 }, { "epoch": 0.06, "grad_norm": 7.744112014770508, "learning_rate": 1.0325510976532932e-05, "loss": 2.5724, "step": 4774 }, { "epoch": 0.06, "grad_norm": 7.898754119873047, "learning_rate": 1.0327673840164378e-05, "loss": 2.4389, "step": 4775 }, { "epoch": 0.06, "grad_norm": 8.964181900024414, "learning_rate": 1.0329836703795826e-05, "loss": 2.8841, "step": 4776 }, { "epoch": 0.06, "grad_norm": 7.785616874694824, "learning_rate": 1.0331999567427276e-05, "loss": 2.5548, "step": 4777 }, { "epoch": 0.06, "grad_norm": 9.532938957214355, "learning_rate": 1.0334162431058724e-05, "loss": 3.2013, "step": 4778 }, { "epoch": 0.06, "grad_norm": 9.173894882202148, "learning_rate": 1.033632529469017e-05, "loss": 3.0011, "step": 4779 }, { "epoch": 0.06, "grad_norm": 8.985681533813477, "learning_rate": 1.0338488158321618e-05, "loss": 2.8174, "step": 4780 }, { "epoch": 0.06, "grad_norm": 7.805590629577637, "learning_rate": 1.0340651021953068e-05, "loss": 2.791, "step": 4781 }, { "epoch": 0.06, "grad_norm": 9.476890563964844, "learning_rate": 1.0342813885584514e-05, "loss": 3.3832, "step": 4782 }, { "epoch": 0.06, "grad_norm": 8.32709789276123, "learning_rate": 1.0344976749215962e-05, "loss": 3.3609, "step": 4783 }, { "epoch": 0.06, "grad_norm": 8.16137981414795, "learning_rate": 1.034713961284741e-05, "loss": 2.6319, "step": 4784 }, { "epoch": 0.06, "grad_norm": 7.801786422729492, "learning_rate": 1.034930247647886e-05, "loss": 2.9358, "step": 4785 }, { "epoch": 0.06, "grad_norm": 8.397357940673828, "learning_rate": 1.0351465340110307e-05, "loss": 3.1368, "step": 4786 }, { "epoch": 0.06, "grad_norm": 7.394124984741211, "learning_rate": 1.0353628203741755e-05, "loss": 2.6483, "step": 4787 }, { "epoch": 0.06, "grad_norm": 8.914324760437012, "learning_rate": 1.0355791067373203e-05, "loss": 3.5345, "step": 4788 }, { "epoch": 0.06, "grad_norm": 8.923657417297363, "learning_rate": 1.0357953931004652e-05, "loss": 2.8838, "step": 4789 }, { "epoch": 0.06, "grad_norm": 8.750218391418457, "learning_rate": 1.0360116794636099e-05, "loss": 2.6164, "step": 4790 }, { "epoch": 0.06, "grad_norm": 9.103485107421875, "learning_rate": 1.0362279658267547e-05, "loss": 2.6388, "step": 4791 }, { "epoch": 0.06, "grad_norm": 8.94383716583252, "learning_rate": 1.0364442521898995e-05, "loss": 2.9482, "step": 4792 }, { "epoch": 0.06, "grad_norm": 7.832152366638184, "learning_rate": 1.0366605385530445e-05, "loss": 2.4436, "step": 4793 }, { "epoch": 0.06, "grad_norm": 8.566644668579102, "learning_rate": 1.0368768249161891e-05, "loss": 2.9561, "step": 4794 }, { "epoch": 0.06, "grad_norm": 8.953034400939941, "learning_rate": 1.0370931112793339e-05, "loss": 3.0948, "step": 4795 }, { "epoch": 0.06, "grad_norm": 8.560503005981445, "learning_rate": 1.0373093976424787e-05, "loss": 2.807, "step": 4796 }, { "epoch": 0.06, "grad_norm": 8.568181037902832, "learning_rate": 1.0375256840056235e-05, "loss": 2.9116, "step": 4797 }, { "epoch": 0.06, "grad_norm": 9.064249992370605, "learning_rate": 1.0377419703687683e-05, "loss": 2.3995, "step": 4798 }, { "epoch": 0.06, "grad_norm": 8.502432823181152, "learning_rate": 1.0379582567319131e-05, "loss": 2.8135, "step": 4799 }, { "epoch": 0.06, "grad_norm": 9.013505935668945, "learning_rate": 1.0381745430950581e-05, "loss": 3.3061, "step": 4800 }, { "epoch": 0.06, "grad_norm": 8.576870918273926, "learning_rate": 1.0383908294582027e-05, "loss": 2.8814, "step": 4801 }, { "epoch": 0.06, "grad_norm": 8.015085220336914, "learning_rate": 1.0386071158213475e-05, "loss": 2.998, "step": 4802 }, { "epoch": 0.06, "grad_norm": 8.749170303344727, "learning_rate": 1.0388234021844923e-05, "loss": 3.0286, "step": 4803 }, { "epoch": 0.06, "grad_norm": 9.317179679870605, "learning_rate": 1.0390396885476373e-05, "loss": 2.7421, "step": 4804 }, { "epoch": 0.06, "grad_norm": 9.327652931213379, "learning_rate": 1.039255974910782e-05, "loss": 2.7053, "step": 4805 }, { "epoch": 0.06, "grad_norm": 10.216721534729004, "learning_rate": 1.0394722612739268e-05, "loss": 2.9148, "step": 4806 }, { "epoch": 0.06, "grad_norm": 7.812057018280029, "learning_rate": 1.0396885476370716e-05, "loss": 2.8421, "step": 4807 }, { "epoch": 0.06, "grad_norm": 8.500614166259766, "learning_rate": 1.0399048340002165e-05, "loss": 2.6648, "step": 4808 }, { "epoch": 0.06, "grad_norm": 9.099250793457031, "learning_rate": 1.0401211203633612e-05, "loss": 3.0411, "step": 4809 }, { "epoch": 0.06, "grad_norm": 9.32591438293457, "learning_rate": 1.040337406726506e-05, "loss": 3.0103, "step": 4810 }, { "epoch": 0.06, "grad_norm": 9.462071418762207, "learning_rate": 1.0405536930896508e-05, "loss": 3.1339, "step": 4811 }, { "epoch": 0.06, "grad_norm": 7.801874160766602, "learning_rate": 1.0407699794527958e-05, "loss": 2.6553, "step": 4812 }, { "epoch": 0.06, "grad_norm": 8.958037376403809, "learning_rate": 1.0409862658159404e-05, "loss": 2.965, "step": 4813 }, { "epoch": 0.06, "grad_norm": 9.432111740112305, "learning_rate": 1.0412025521790852e-05, "loss": 2.6862, "step": 4814 }, { "epoch": 0.06, "grad_norm": 8.428923606872559, "learning_rate": 1.04141883854223e-05, "loss": 2.9374, "step": 4815 }, { "epoch": 0.06, "grad_norm": 7.720001697540283, "learning_rate": 1.0416351249053748e-05, "loss": 2.4012, "step": 4816 }, { "epoch": 0.06, "grad_norm": 7.698874473571777, "learning_rate": 1.0418514112685196e-05, "loss": 2.4258, "step": 4817 }, { "epoch": 0.06, "grad_norm": 8.848318099975586, "learning_rate": 1.0420676976316644e-05, "loss": 3.1782, "step": 4818 }, { "epoch": 0.06, "grad_norm": 8.118143081665039, "learning_rate": 1.042283983994809e-05, "loss": 2.8317, "step": 4819 }, { "epoch": 0.06, "grad_norm": 8.612554550170898, "learning_rate": 1.042500270357954e-05, "loss": 3.1954, "step": 4820 }, { "epoch": 0.06, "grad_norm": 8.77627182006836, "learning_rate": 1.0427165567210988e-05, "loss": 3.4636, "step": 4821 }, { "epoch": 0.06, "grad_norm": 8.125999450683594, "learning_rate": 1.0429328430842436e-05, "loss": 3.0172, "step": 4822 }, { "epoch": 0.06, "grad_norm": 8.348736763000488, "learning_rate": 1.0431491294473883e-05, "loss": 2.8789, "step": 4823 }, { "epoch": 0.06, "grad_norm": 8.923157691955566, "learning_rate": 1.0433654158105332e-05, "loss": 2.8138, "step": 4824 }, { "epoch": 0.06, "grad_norm": 8.144081115722656, "learning_rate": 1.043581702173678e-05, "loss": 2.8327, "step": 4825 }, { "epoch": 0.06, "grad_norm": 9.093331336975098, "learning_rate": 1.0437979885368229e-05, "loss": 2.9628, "step": 4826 }, { "epoch": 0.06, "grad_norm": 8.909330368041992, "learning_rate": 1.0440142748999675e-05, "loss": 2.9472, "step": 4827 }, { "epoch": 0.06, "grad_norm": 6.901252269744873, "learning_rate": 1.0442305612631125e-05, "loss": 2.3727, "step": 4828 }, { "epoch": 0.06, "grad_norm": 8.873153686523438, "learning_rate": 1.0444468476262573e-05, "loss": 3.1371, "step": 4829 }, { "epoch": 0.06, "grad_norm": 7.659355640411377, "learning_rate": 1.044663133989402e-05, "loss": 3.0933, "step": 4830 }, { "epoch": 0.06, "grad_norm": 9.028531074523926, "learning_rate": 1.0448794203525467e-05, "loss": 2.6075, "step": 4831 }, { "epoch": 0.06, "grad_norm": 8.122469902038574, "learning_rate": 1.0450957067156917e-05, "loss": 2.9199, "step": 4832 }, { "epoch": 0.06, "grad_norm": 8.648186683654785, "learning_rate": 1.0453119930788365e-05, "loss": 2.6505, "step": 4833 }, { "epoch": 0.06, "grad_norm": 8.77673053741455, "learning_rate": 1.0455282794419811e-05, "loss": 3.0789, "step": 4834 }, { "epoch": 0.06, "grad_norm": 7.548134803771973, "learning_rate": 1.0457445658051261e-05, "loss": 2.746, "step": 4835 }, { "epoch": 0.06, "grad_norm": 8.102361679077148, "learning_rate": 1.0459608521682709e-05, "loss": 2.7679, "step": 4836 }, { "epoch": 0.06, "grad_norm": 9.006985664367676, "learning_rate": 1.0461771385314157e-05, "loss": 2.6812, "step": 4837 }, { "epoch": 0.06, "grad_norm": 9.578168869018555, "learning_rate": 1.0463934248945603e-05, "loss": 3.2796, "step": 4838 }, { "epoch": 0.06, "grad_norm": 8.477155685424805, "learning_rate": 1.0466097112577053e-05, "loss": 2.9948, "step": 4839 }, { "epoch": 0.06, "grad_norm": 7.991451740264893, "learning_rate": 1.0468259976208501e-05, "loss": 2.3272, "step": 4840 }, { "epoch": 0.06, "grad_norm": 9.057435035705566, "learning_rate": 1.047042283983995e-05, "loss": 2.7219, "step": 4841 }, { "epoch": 0.06, "grad_norm": 8.869759559631348, "learning_rate": 1.0472585703471396e-05, "loss": 2.9287, "step": 4842 }, { "epoch": 0.06, "grad_norm": 9.154027938842773, "learning_rate": 1.0474748567102845e-05, "loss": 2.9983, "step": 4843 }, { "epoch": 0.06, "grad_norm": 8.571831703186035, "learning_rate": 1.0476911430734293e-05, "loss": 3.5715, "step": 4844 }, { "epoch": 0.06, "grad_norm": 8.41927433013916, "learning_rate": 1.0479074294365742e-05, "loss": 3.1024, "step": 4845 }, { "epoch": 0.06, "grad_norm": 8.71738052368164, "learning_rate": 1.0481237157997188e-05, "loss": 2.9277, "step": 4846 }, { "epoch": 0.06, "grad_norm": 9.51257610321045, "learning_rate": 1.0483400021628638e-05, "loss": 2.9924, "step": 4847 }, { "epoch": 0.06, "grad_norm": 8.059196472167969, "learning_rate": 1.0485562885260086e-05, "loss": 3.1889, "step": 4848 }, { "epoch": 0.06, "grad_norm": 8.070777893066406, "learning_rate": 1.0487725748891534e-05, "loss": 2.8978, "step": 4849 }, { "epoch": 0.06, "grad_norm": 8.019033432006836, "learning_rate": 1.048988861252298e-05, "loss": 2.7959, "step": 4850 }, { "epoch": 0.06, "grad_norm": 8.223018646240234, "learning_rate": 1.049205147615443e-05, "loss": 2.9234, "step": 4851 }, { "epoch": 0.06, "grad_norm": 8.446981430053711, "learning_rate": 1.0494214339785878e-05, "loss": 3.4508, "step": 4852 }, { "epoch": 0.06, "grad_norm": 7.918734073638916, "learning_rate": 1.0496377203417324e-05, "loss": 3.0915, "step": 4853 }, { "epoch": 0.06, "grad_norm": 7.025221347808838, "learning_rate": 1.0498540067048772e-05, "loss": 2.7797, "step": 4854 }, { "epoch": 0.06, "grad_norm": 8.791302680969238, "learning_rate": 1.0500702930680222e-05, "loss": 2.841, "step": 4855 }, { "epoch": 0.06, "grad_norm": 8.092782020568848, "learning_rate": 1.050286579431167e-05, "loss": 2.7213, "step": 4856 }, { "epoch": 0.06, "grad_norm": 8.826576232910156, "learning_rate": 1.0505028657943116e-05, "loss": 2.6286, "step": 4857 }, { "epoch": 0.06, "grad_norm": 8.832499504089355, "learning_rate": 1.0507191521574564e-05, "loss": 2.9989, "step": 4858 }, { "epoch": 0.06, "grad_norm": 8.827796936035156, "learning_rate": 1.0509354385206014e-05, "loss": 2.8452, "step": 4859 }, { "epoch": 0.06, "grad_norm": 8.018966674804688, "learning_rate": 1.0511517248837462e-05, "loss": 2.657, "step": 4860 }, { "epoch": 0.06, "grad_norm": 7.720984935760498, "learning_rate": 1.0513680112468909e-05, "loss": 2.3641, "step": 4861 }, { "epoch": 0.06, "grad_norm": 10.113224983215332, "learning_rate": 1.0515842976100357e-05, "loss": 3.0986, "step": 4862 }, { "epoch": 0.06, "grad_norm": 8.861505508422852, "learning_rate": 1.0518005839731806e-05, "loss": 2.9113, "step": 4863 }, { "epoch": 0.06, "grad_norm": 8.437047004699707, "learning_rate": 1.0520168703363254e-05, "loss": 2.5137, "step": 4864 }, { "epoch": 0.06, "grad_norm": 10.200252532958984, "learning_rate": 1.05223315669947e-05, "loss": 2.7223, "step": 4865 }, { "epoch": 0.06, "grad_norm": 9.439435958862305, "learning_rate": 1.052449443062615e-05, "loss": 2.9304, "step": 4866 }, { "epoch": 0.06, "grad_norm": 9.338837623596191, "learning_rate": 1.0526657294257599e-05, "loss": 3.1135, "step": 4867 }, { "epoch": 0.06, "grad_norm": 8.797106742858887, "learning_rate": 1.0528820157889047e-05, "loss": 2.519, "step": 4868 }, { "epoch": 0.06, "grad_norm": 9.057483673095703, "learning_rate": 1.0530983021520493e-05, "loss": 3.9254, "step": 4869 }, { "epoch": 0.06, "grad_norm": 8.983114242553711, "learning_rate": 1.0533145885151943e-05, "loss": 2.8354, "step": 4870 }, { "epoch": 0.06, "grad_norm": 7.9854512214660645, "learning_rate": 1.053530874878339e-05, "loss": 2.9521, "step": 4871 }, { "epoch": 0.06, "grad_norm": 8.811424255371094, "learning_rate": 1.0537471612414837e-05, "loss": 2.8084, "step": 4872 }, { "epoch": 0.06, "grad_norm": 9.578523635864258, "learning_rate": 1.0539634476046285e-05, "loss": 2.9995, "step": 4873 }, { "epoch": 0.06, "grad_norm": 7.8586554527282715, "learning_rate": 1.0541797339677735e-05, "loss": 3.1597, "step": 4874 }, { "epoch": 0.06, "grad_norm": 8.537858963012695, "learning_rate": 1.0543960203309183e-05, "loss": 2.6301, "step": 4875 }, { "epoch": 0.06, "grad_norm": 8.913928985595703, "learning_rate": 1.054612306694063e-05, "loss": 3.202, "step": 4876 }, { "epoch": 0.06, "grad_norm": 8.99818229675293, "learning_rate": 1.0548285930572077e-05, "loss": 3.5241, "step": 4877 }, { "epoch": 0.06, "grad_norm": 9.557031631469727, "learning_rate": 1.0550448794203527e-05, "loss": 3.0811, "step": 4878 }, { "epoch": 0.06, "grad_norm": 8.383471488952637, "learning_rate": 1.0552611657834975e-05, "loss": 2.2117, "step": 4879 }, { "epoch": 0.06, "grad_norm": 10.732733726501465, "learning_rate": 1.0554774521466422e-05, "loss": 2.4812, "step": 4880 }, { "epoch": 0.06, "grad_norm": 9.275493621826172, "learning_rate": 1.055693738509787e-05, "loss": 3.1088, "step": 4881 }, { "epoch": 0.06, "grad_norm": 9.841486930847168, "learning_rate": 1.055910024872932e-05, "loss": 3.2986, "step": 4882 }, { "epoch": 0.06, "grad_norm": 8.384366035461426, "learning_rate": 1.0561263112360767e-05, "loss": 2.7704, "step": 4883 }, { "epoch": 0.06, "grad_norm": 9.279807090759277, "learning_rate": 1.0563425975992214e-05, "loss": 3.2634, "step": 4884 }, { "epoch": 0.06, "grad_norm": 7.969528675079346, "learning_rate": 1.0565588839623662e-05, "loss": 2.9791, "step": 4885 }, { "epoch": 0.06, "grad_norm": 9.125029563903809, "learning_rate": 1.0567751703255112e-05, "loss": 3.0426, "step": 4886 }, { "epoch": 0.06, "grad_norm": 10.120312690734863, "learning_rate": 1.056991456688656e-05, "loss": 3.1159, "step": 4887 }, { "epoch": 0.06, "grad_norm": 9.124327659606934, "learning_rate": 1.0572077430518006e-05, "loss": 2.9767, "step": 4888 }, { "epoch": 0.06, "grad_norm": 7.876035213470459, "learning_rate": 1.0574240294149454e-05, "loss": 2.5854, "step": 4889 }, { "epoch": 0.06, "grad_norm": 9.641897201538086, "learning_rate": 1.0576403157780904e-05, "loss": 3.363, "step": 4890 }, { "epoch": 0.06, "grad_norm": 8.284897804260254, "learning_rate": 1.057856602141235e-05, "loss": 2.895, "step": 4891 }, { "epoch": 0.06, "grad_norm": 9.073719024658203, "learning_rate": 1.0580728885043798e-05, "loss": 2.9694, "step": 4892 }, { "epoch": 0.06, "grad_norm": 8.66104793548584, "learning_rate": 1.0582891748675246e-05, "loss": 3.0297, "step": 4893 }, { "epoch": 0.06, "grad_norm": 8.279275894165039, "learning_rate": 1.0585054612306696e-05, "loss": 3.1165, "step": 4894 }, { "epoch": 0.06, "grad_norm": 8.46675968170166, "learning_rate": 1.0587217475938142e-05, "loss": 2.4073, "step": 4895 }, { "epoch": 0.06, "grad_norm": 7.9188151359558105, "learning_rate": 1.058938033956959e-05, "loss": 3.217, "step": 4896 }, { "epoch": 0.06, "grad_norm": 9.537433624267578, "learning_rate": 1.059154320320104e-05, "loss": 3.0029, "step": 4897 }, { "epoch": 0.06, "grad_norm": 8.72118854522705, "learning_rate": 1.0593706066832488e-05, "loss": 2.8534, "step": 4898 }, { "epoch": 0.06, "grad_norm": 10.176518440246582, "learning_rate": 1.0595868930463935e-05, "loss": 2.7601, "step": 4899 }, { "epoch": 0.06, "grad_norm": 8.343289375305176, "learning_rate": 1.0598031794095383e-05, "loss": 3.4875, "step": 4900 }, { "epoch": 0.06, "grad_norm": 9.314218521118164, "learning_rate": 1.0600194657726832e-05, "loss": 3.3596, "step": 4901 }, { "epoch": 0.06, "grad_norm": 9.975765228271484, "learning_rate": 1.060235752135828e-05, "loss": 2.7522, "step": 4902 }, { "epoch": 0.06, "grad_norm": 8.6808500289917, "learning_rate": 1.0604520384989727e-05, "loss": 3.3334, "step": 4903 }, { "epoch": 0.06, "grad_norm": 8.009195327758789, "learning_rate": 1.0606683248621175e-05, "loss": 2.4698, "step": 4904 }, { "epoch": 0.06, "grad_norm": 9.066635131835938, "learning_rate": 1.0608846112252625e-05, "loss": 2.6878, "step": 4905 }, { "epoch": 0.06, "grad_norm": 8.938441276550293, "learning_rate": 1.0611008975884071e-05, "loss": 2.9526, "step": 4906 }, { "epoch": 0.06, "grad_norm": 8.473664283752441, "learning_rate": 1.0613171839515519e-05, "loss": 2.0534, "step": 4907 }, { "epoch": 0.06, "grad_norm": 8.361363410949707, "learning_rate": 1.0615334703146967e-05, "loss": 2.5745, "step": 4908 }, { "epoch": 0.06, "grad_norm": 8.346781730651855, "learning_rate": 1.0617497566778417e-05, "loss": 2.4823, "step": 4909 }, { "epoch": 0.06, "grad_norm": 10.317024230957031, "learning_rate": 1.0619660430409863e-05, "loss": 2.5573, "step": 4910 }, { "epoch": 0.06, "grad_norm": 9.283650398254395, "learning_rate": 1.0621823294041311e-05, "loss": 3.0414, "step": 4911 }, { "epoch": 0.06, "grad_norm": 7.846114158630371, "learning_rate": 1.062398615767276e-05, "loss": 2.7908, "step": 4912 }, { "epoch": 0.06, "grad_norm": 9.591761589050293, "learning_rate": 1.0626149021304209e-05, "loss": 2.9494, "step": 4913 }, { "epoch": 0.06, "grad_norm": 9.43738842010498, "learning_rate": 1.0628311884935655e-05, "loss": 3.2831, "step": 4914 }, { "epoch": 0.06, "grad_norm": 8.784367561340332, "learning_rate": 1.0630474748567103e-05, "loss": 3.2456, "step": 4915 }, { "epoch": 0.06, "grad_norm": 8.514898300170898, "learning_rate": 1.0632637612198551e-05, "loss": 2.702, "step": 4916 }, { "epoch": 0.06, "grad_norm": 8.624271392822266, "learning_rate": 1.0634800475830001e-05, "loss": 2.6621, "step": 4917 }, { "epoch": 0.06, "grad_norm": 8.375489234924316, "learning_rate": 1.0636963339461448e-05, "loss": 2.8, "step": 4918 }, { "epoch": 0.06, "grad_norm": 8.700441360473633, "learning_rate": 1.0639126203092896e-05, "loss": 3.1947, "step": 4919 }, { "epoch": 0.06, "grad_norm": 9.186511039733887, "learning_rate": 1.0641289066724344e-05, "loss": 2.631, "step": 4920 }, { "epoch": 0.06, "grad_norm": 8.21094799041748, "learning_rate": 1.0643451930355793e-05, "loss": 2.6116, "step": 4921 }, { "epoch": 0.06, "grad_norm": 8.819486618041992, "learning_rate": 1.064561479398724e-05, "loss": 2.7907, "step": 4922 }, { "epoch": 0.06, "grad_norm": 8.811517715454102, "learning_rate": 1.0647777657618688e-05, "loss": 2.9857, "step": 4923 }, { "epoch": 0.06, "grad_norm": 9.789514541625977, "learning_rate": 1.0649940521250136e-05, "loss": 3.0405, "step": 4924 }, { "epoch": 0.06, "grad_norm": 9.472724914550781, "learning_rate": 1.0652103384881584e-05, "loss": 2.8024, "step": 4925 }, { "epoch": 0.06, "grad_norm": 8.221104621887207, "learning_rate": 1.0654266248513032e-05, "loss": 2.6134, "step": 4926 }, { "epoch": 0.06, "grad_norm": 9.414814949035645, "learning_rate": 1.065642911214448e-05, "loss": 2.9178, "step": 4927 }, { "epoch": 0.06, "grad_norm": 8.928112983703613, "learning_rate": 1.065859197577593e-05, "loss": 3.1165, "step": 4928 }, { "epoch": 0.06, "grad_norm": 8.13807487487793, "learning_rate": 1.0660754839407376e-05, "loss": 3.0037, "step": 4929 }, { "epoch": 0.06, "grad_norm": 8.25633430480957, "learning_rate": 1.0662917703038824e-05, "loss": 2.7403, "step": 4930 }, { "epoch": 0.06, "grad_norm": 9.349007606506348, "learning_rate": 1.0665080566670272e-05, "loss": 2.6111, "step": 4931 }, { "epoch": 0.06, "grad_norm": 8.008665084838867, "learning_rate": 1.0667243430301722e-05, "loss": 3.0892, "step": 4932 }, { "epoch": 0.06, "grad_norm": 8.696972846984863, "learning_rate": 1.0669406293933168e-05, "loss": 2.5962, "step": 4933 }, { "epoch": 0.06, "grad_norm": 8.41940689086914, "learning_rate": 1.0671569157564616e-05, "loss": 2.3949, "step": 4934 }, { "epoch": 0.06, "grad_norm": 8.815909385681152, "learning_rate": 1.0673732021196064e-05, "loss": 2.9627, "step": 4935 }, { "epoch": 0.06, "grad_norm": 9.331489562988281, "learning_rate": 1.0675894884827514e-05, "loss": 3.5852, "step": 4936 }, { "epoch": 0.06, "grad_norm": 10.661375999450684, "learning_rate": 1.067805774845896e-05, "loss": 3.3586, "step": 4937 }, { "epoch": 0.06, "grad_norm": 8.82174301147461, "learning_rate": 1.0680220612090409e-05, "loss": 2.4765, "step": 4938 }, { "epoch": 0.06, "grad_norm": 8.325983047485352, "learning_rate": 1.0682383475721857e-05, "loss": 3.0266, "step": 4939 }, { "epoch": 0.06, "grad_norm": 8.324126243591309, "learning_rate": 1.0684546339353306e-05, "loss": 2.6588, "step": 4940 }, { "epoch": 0.06, "grad_norm": 9.2115478515625, "learning_rate": 1.0686709202984753e-05, "loss": 3.1655, "step": 4941 }, { "epoch": 0.06, "grad_norm": 7.732304096221924, "learning_rate": 1.06888720666162e-05, "loss": 2.7526, "step": 4942 }, { "epoch": 0.06, "grad_norm": 8.39029312133789, "learning_rate": 1.0691034930247647e-05, "loss": 3.2164, "step": 4943 }, { "epoch": 0.06, "grad_norm": 7.438108921051025, "learning_rate": 1.0693197793879097e-05, "loss": 2.9775, "step": 4944 }, { "epoch": 0.06, "grad_norm": 10.398675918579102, "learning_rate": 1.0695360657510545e-05, "loss": 2.7094, "step": 4945 }, { "epoch": 0.06, "grad_norm": 8.410808563232422, "learning_rate": 1.0697523521141993e-05, "loss": 3.0782, "step": 4946 }, { "epoch": 0.06, "grad_norm": 8.300134658813477, "learning_rate": 1.069968638477344e-05, "loss": 3.1808, "step": 4947 }, { "epoch": 0.06, "grad_norm": 11.097479820251465, "learning_rate": 1.0701849248404889e-05, "loss": 2.9813, "step": 4948 }, { "epoch": 0.06, "grad_norm": 9.45467472076416, "learning_rate": 1.0704012112036337e-05, "loss": 3.3164, "step": 4949 }, { "epoch": 0.06, "grad_norm": 8.26467514038086, "learning_rate": 1.0706174975667785e-05, "loss": 2.7505, "step": 4950 }, { "epoch": 0.06, "grad_norm": 9.529061317443848, "learning_rate": 1.0708337839299232e-05, "loss": 2.902, "step": 4951 }, { "epoch": 0.06, "grad_norm": 8.108372688293457, "learning_rate": 1.0710500702930681e-05, "loss": 2.6572, "step": 4952 }, { "epoch": 0.06, "grad_norm": 9.068395614624023, "learning_rate": 1.071266356656213e-05, "loss": 2.8267, "step": 4953 }, { "epoch": 0.06, "grad_norm": 7.678865909576416, "learning_rate": 1.0714826430193577e-05, "loss": 2.9969, "step": 4954 }, { "epoch": 0.06, "grad_norm": 7.764611721038818, "learning_rate": 1.0716989293825024e-05, "loss": 2.659, "step": 4955 }, { "epoch": 0.06, "grad_norm": 9.131299018859863, "learning_rate": 1.0719152157456473e-05, "loss": 3.2318, "step": 4956 }, { "epoch": 0.06, "grad_norm": 8.797140121459961, "learning_rate": 1.0721315021087922e-05, "loss": 3.0426, "step": 4957 }, { "epoch": 0.06, "grad_norm": 9.248319625854492, "learning_rate": 1.072347788471937e-05, "loss": 2.76, "step": 4958 }, { "epoch": 0.06, "grad_norm": 9.219184875488281, "learning_rate": 1.0725640748350816e-05, "loss": 3.2432, "step": 4959 }, { "epoch": 0.06, "grad_norm": 9.906304359436035, "learning_rate": 1.0727803611982266e-05, "loss": 2.9364, "step": 4960 }, { "epoch": 0.06, "grad_norm": 7.484410285949707, "learning_rate": 1.0729966475613714e-05, "loss": 3.2416, "step": 4961 }, { "epoch": 0.06, "grad_norm": 9.960848808288574, "learning_rate": 1.073212933924516e-05, "loss": 2.7291, "step": 4962 }, { "epoch": 0.06, "grad_norm": 8.6090087890625, "learning_rate": 1.073429220287661e-05, "loss": 3.3488, "step": 4963 }, { "epoch": 0.06, "grad_norm": 8.34453296661377, "learning_rate": 1.0736455066508058e-05, "loss": 2.9267, "step": 4964 }, { "epoch": 0.06, "grad_norm": 8.544259071350098, "learning_rate": 1.0738617930139506e-05, "loss": 2.969, "step": 4965 }, { "epoch": 0.06, "grad_norm": 8.565062522888184, "learning_rate": 1.0740780793770952e-05, "loss": 2.8594, "step": 4966 }, { "epoch": 0.06, "grad_norm": 9.053215026855469, "learning_rate": 1.0742943657402402e-05, "loss": 2.8256, "step": 4967 }, { "epoch": 0.06, "grad_norm": 8.56294059753418, "learning_rate": 1.074510652103385e-05, "loss": 2.7375, "step": 4968 }, { "epoch": 0.06, "grad_norm": 8.782003402709961, "learning_rate": 1.0747269384665298e-05, "loss": 2.6396, "step": 4969 }, { "epoch": 0.06, "grad_norm": 8.821818351745605, "learning_rate": 1.0749432248296744e-05, "loss": 2.9512, "step": 4970 }, { "epoch": 0.06, "grad_norm": 8.619658470153809, "learning_rate": 1.0751595111928194e-05, "loss": 2.8699, "step": 4971 }, { "epoch": 0.06, "grad_norm": 9.381486892700195, "learning_rate": 1.0753757975559642e-05, "loss": 3.4647, "step": 4972 }, { "epoch": 0.06, "grad_norm": 8.858156204223633, "learning_rate": 1.075592083919109e-05, "loss": 3.2989, "step": 4973 }, { "epoch": 0.06, "grad_norm": 8.085548400878906, "learning_rate": 1.0758083702822537e-05, "loss": 3.0711, "step": 4974 }, { "epoch": 0.06, "grad_norm": 8.020039558410645, "learning_rate": 1.0760246566453986e-05, "loss": 2.6076, "step": 4975 }, { "epoch": 0.06, "grad_norm": 8.574390411376953, "learning_rate": 1.0762409430085434e-05, "loss": 3.0462, "step": 4976 }, { "epoch": 0.06, "grad_norm": 7.99995756149292, "learning_rate": 1.0764572293716883e-05, "loss": 2.7124, "step": 4977 }, { "epoch": 0.06, "grad_norm": 7.9540114402771, "learning_rate": 1.0766735157348329e-05, "loss": 3.1814, "step": 4978 }, { "epoch": 0.06, "grad_norm": 7.814937591552734, "learning_rate": 1.0768898020979779e-05, "loss": 3.037, "step": 4979 }, { "epoch": 0.06, "grad_norm": 8.02312183380127, "learning_rate": 1.0771060884611227e-05, "loss": 3.0613, "step": 4980 }, { "epoch": 0.06, "grad_norm": 9.705050468444824, "learning_rate": 1.0773223748242673e-05, "loss": 3.2957, "step": 4981 }, { "epoch": 0.06, "grad_norm": 8.18798542022705, "learning_rate": 1.0775386611874121e-05, "loss": 2.7416, "step": 4982 }, { "epoch": 0.06, "grad_norm": 8.823759078979492, "learning_rate": 1.077754947550557e-05, "loss": 2.6836, "step": 4983 }, { "epoch": 0.06, "grad_norm": 8.47874927520752, "learning_rate": 1.0779712339137019e-05, "loss": 2.772, "step": 4984 }, { "epoch": 0.06, "grad_norm": 8.16463851928711, "learning_rate": 1.0781875202768465e-05, "loss": 2.5378, "step": 4985 }, { "epoch": 0.06, "grad_norm": 9.13650131225586, "learning_rate": 1.0784038066399913e-05, "loss": 3.0267, "step": 4986 }, { "epoch": 0.06, "grad_norm": 8.204193115234375, "learning_rate": 1.0786200930031363e-05, "loss": 3.0456, "step": 4987 }, { "epoch": 0.06, "grad_norm": 8.869190216064453, "learning_rate": 1.0788363793662811e-05, "loss": 2.7847, "step": 4988 }, { "epoch": 0.06, "grad_norm": 9.572741508483887, "learning_rate": 1.0790526657294257e-05, "loss": 2.9544, "step": 4989 }, { "epoch": 0.06, "grad_norm": 8.381946563720703, "learning_rate": 1.0792689520925705e-05, "loss": 2.2285, "step": 4990 }, { "epoch": 0.06, "grad_norm": 8.171168327331543, "learning_rate": 1.0794852384557155e-05, "loss": 2.7035, "step": 4991 }, { "epoch": 0.06, "grad_norm": 9.306469917297363, "learning_rate": 1.0797015248188603e-05, "loss": 2.8087, "step": 4992 }, { "epoch": 0.06, "grad_norm": 9.78198528289795, "learning_rate": 1.079917811182005e-05, "loss": 2.7684, "step": 4993 }, { "epoch": 0.06, "grad_norm": 8.878241539001465, "learning_rate": 1.08013409754515e-05, "loss": 3.3976, "step": 4994 }, { "epoch": 0.06, "grad_norm": 9.328145980834961, "learning_rate": 1.0803503839082947e-05, "loss": 3.4314, "step": 4995 }, { "epoch": 0.06, "grad_norm": 8.26966381072998, "learning_rate": 1.0805666702714395e-05, "loss": 2.3902, "step": 4996 }, { "epoch": 0.06, "grad_norm": 7.6617231369018555, "learning_rate": 1.0807829566345842e-05, "loss": 2.5045, "step": 4997 }, { "epoch": 0.06, "grad_norm": 8.243239402770996, "learning_rate": 1.0809992429977292e-05, "loss": 2.3288, "step": 4998 }, { "epoch": 0.06, "grad_norm": 8.630664825439453, "learning_rate": 1.081215529360874e-05, "loss": 3.4389, "step": 4999 }, { "epoch": 0.06, "grad_norm": 8.211347579956055, "learning_rate": 1.0814318157240186e-05, "loss": 2.7163, "step": 5000 }, { "epoch": 0.06, "grad_norm": 8.196206092834473, "learning_rate": 1.0816481020871634e-05, "loss": 2.6762, "step": 5001 }, { "epoch": 0.06, "grad_norm": 7.986756324768066, "learning_rate": 1.0818643884503084e-05, "loss": 2.624, "step": 5002 }, { "epoch": 0.06, "grad_norm": 8.757168769836426, "learning_rate": 1.0820806748134532e-05, "loss": 2.5638, "step": 5003 }, { "epoch": 0.06, "grad_norm": 7.751895427703857, "learning_rate": 1.0822969611765978e-05, "loss": 2.4044, "step": 5004 }, { "epoch": 0.06, "grad_norm": 7.875043869018555, "learning_rate": 1.0825132475397426e-05, "loss": 2.5204, "step": 5005 }, { "epoch": 0.06, "grad_norm": 10.298776626586914, "learning_rate": 1.0827295339028876e-05, "loss": 2.987, "step": 5006 }, { "epoch": 0.06, "grad_norm": 7.932959079742432, "learning_rate": 1.0829458202660324e-05, "loss": 3.0613, "step": 5007 }, { "epoch": 0.06, "grad_norm": 8.474393844604492, "learning_rate": 1.083162106629177e-05, "loss": 2.3921, "step": 5008 }, { "epoch": 0.07, "grad_norm": 8.475247383117676, "learning_rate": 1.0833783929923218e-05, "loss": 2.3936, "step": 5009 }, { "epoch": 0.07, "grad_norm": 8.122398376464844, "learning_rate": 1.0835946793554668e-05, "loss": 2.964, "step": 5010 }, { "epoch": 0.07, "grad_norm": 8.991348266601562, "learning_rate": 1.0838109657186116e-05, "loss": 3.089, "step": 5011 }, { "epoch": 0.07, "grad_norm": 8.245528221130371, "learning_rate": 1.0840272520817563e-05, "loss": 2.6046, "step": 5012 }, { "epoch": 0.07, "grad_norm": 8.56399917602539, "learning_rate": 1.084243538444901e-05, "loss": 2.6738, "step": 5013 }, { "epoch": 0.07, "grad_norm": 9.357400894165039, "learning_rate": 1.084459824808046e-05, "loss": 3.0285, "step": 5014 }, { "epoch": 0.07, "grad_norm": 8.027701377868652, "learning_rate": 1.0846761111711907e-05, "loss": 2.612, "step": 5015 }, { "epoch": 0.07, "grad_norm": 7.75743293762207, "learning_rate": 1.0848923975343355e-05, "loss": 2.8685, "step": 5016 }, { "epoch": 0.07, "grad_norm": 9.539366722106934, "learning_rate": 1.0851086838974803e-05, "loss": 3.1352, "step": 5017 }, { "epoch": 0.07, "grad_norm": 8.067789077758789, "learning_rate": 1.0853249702606253e-05, "loss": 3.5096, "step": 5018 }, { "epoch": 0.07, "grad_norm": 8.638425827026367, "learning_rate": 1.0855412566237699e-05, "loss": 3.0152, "step": 5019 }, { "epoch": 0.07, "grad_norm": 8.6784029006958, "learning_rate": 1.0857575429869147e-05, "loss": 2.6195, "step": 5020 }, { "epoch": 0.07, "grad_norm": 7.766951084136963, "learning_rate": 1.0859738293500595e-05, "loss": 3.3624, "step": 5021 }, { "epoch": 0.07, "grad_norm": 8.762253761291504, "learning_rate": 1.0861901157132045e-05, "loss": 2.3479, "step": 5022 }, { "epoch": 0.07, "grad_norm": 8.580475807189941, "learning_rate": 1.0864064020763491e-05, "loss": 2.6589, "step": 5023 }, { "epoch": 0.07, "grad_norm": 9.176600456237793, "learning_rate": 1.086622688439494e-05, "loss": 3.2668, "step": 5024 }, { "epoch": 0.07, "grad_norm": 10.033975601196289, "learning_rate": 1.0868389748026389e-05, "loss": 2.5931, "step": 5025 }, { "epoch": 0.07, "grad_norm": 9.314745903015137, "learning_rate": 1.0870552611657837e-05, "loss": 2.6753, "step": 5026 }, { "epoch": 0.07, "grad_norm": 9.565764427185059, "learning_rate": 1.0872715475289283e-05, "loss": 2.6108, "step": 5027 }, { "epoch": 0.07, "grad_norm": 8.171708106994629, "learning_rate": 1.0874878338920731e-05, "loss": 3.1678, "step": 5028 }, { "epoch": 0.07, "grad_norm": 7.916112422943115, "learning_rate": 1.0877041202552181e-05, "loss": 2.6787, "step": 5029 }, { "epoch": 0.07, "grad_norm": 8.785317420959473, "learning_rate": 1.087920406618363e-05, "loss": 2.4541, "step": 5030 }, { "epoch": 0.07, "grad_norm": 8.135114669799805, "learning_rate": 1.0881366929815076e-05, "loss": 2.4959, "step": 5031 }, { "epoch": 0.07, "grad_norm": 8.473061561584473, "learning_rate": 1.0883529793446524e-05, "loss": 3.0693, "step": 5032 }, { "epoch": 0.07, "grad_norm": 9.273305892944336, "learning_rate": 1.0885692657077973e-05, "loss": 3.5106, "step": 5033 }, { "epoch": 0.07, "grad_norm": 7.894862174987793, "learning_rate": 1.088785552070942e-05, "loss": 2.6713, "step": 5034 }, { "epoch": 0.07, "grad_norm": 8.144085884094238, "learning_rate": 1.0890018384340868e-05, "loss": 3.0006, "step": 5035 }, { "epoch": 0.07, "grad_norm": 7.839533805847168, "learning_rate": 1.0892181247972316e-05, "loss": 2.6117, "step": 5036 }, { "epoch": 0.07, "grad_norm": 7.7555766105651855, "learning_rate": 1.0894344111603766e-05, "loss": 3.3329, "step": 5037 }, { "epoch": 0.07, "grad_norm": 8.495861053466797, "learning_rate": 1.0896506975235212e-05, "loss": 3.1247, "step": 5038 }, { "epoch": 0.07, "grad_norm": 9.151762962341309, "learning_rate": 1.089866983886666e-05, "loss": 2.7578, "step": 5039 }, { "epoch": 0.07, "grad_norm": 9.252130508422852, "learning_rate": 1.0900832702498108e-05, "loss": 3.2133, "step": 5040 }, { "epoch": 0.07, "grad_norm": 7.88925313949585, "learning_rate": 1.0902995566129558e-05, "loss": 2.9006, "step": 5041 }, { "epoch": 0.07, "grad_norm": 7.682383060455322, "learning_rate": 1.0905158429761004e-05, "loss": 2.6512, "step": 5042 }, { "epoch": 0.07, "grad_norm": 7.820953369140625, "learning_rate": 1.0907321293392452e-05, "loss": 2.9032, "step": 5043 }, { "epoch": 0.07, "grad_norm": 8.369345664978027, "learning_rate": 1.09094841570239e-05, "loss": 2.4887, "step": 5044 }, { "epoch": 0.07, "grad_norm": 8.38935375213623, "learning_rate": 1.091164702065535e-05, "loss": 3.407, "step": 5045 }, { "epoch": 0.07, "grad_norm": 9.03707504272461, "learning_rate": 1.0913809884286796e-05, "loss": 3.4971, "step": 5046 }, { "epoch": 0.07, "grad_norm": 7.2290520668029785, "learning_rate": 1.0915972747918244e-05, "loss": 2.7341, "step": 5047 }, { "epoch": 0.07, "grad_norm": 8.644821166992188, "learning_rate": 1.0918135611549692e-05, "loss": 2.7315, "step": 5048 }, { "epoch": 0.07, "grad_norm": 9.897494316101074, "learning_rate": 1.0920298475181142e-05, "loss": 3.1051, "step": 5049 }, { "epoch": 0.07, "grad_norm": 8.880919456481934, "learning_rate": 1.0922461338812589e-05, "loss": 3.4497, "step": 5050 }, { "epoch": 0.07, "grad_norm": 8.474165916442871, "learning_rate": 1.0924624202444037e-05, "loss": 2.9941, "step": 5051 }, { "epoch": 0.07, "grad_norm": 7.720078945159912, "learning_rate": 1.0926787066075483e-05, "loss": 3.1196, "step": 5052 }, { "epoch": 0.07, "grad_norm": 7.779390811920166, "learning_rate": 1.0928949929706933e-05, "loss": 2.6849, "step": 5053 }, { "epoch": 0.07, "grad_norm": 8.051778793334961, "learning_rate": 1.093111279333838e-05, "loss": 2.9601, "step": 5054 }, { "epoch": 0.07, "grad_norm": 9.12386417388916, "learning_rate": 1.0933275656969829e-05, "loss": 3.3528, "step": 5055 }, { "epoch": 0.07, "grad_norm": 9.484557151794434, "learning_rate": 1.0935438520601279e-05, "loss": 3.1946, "step": 5056 }, { "epoch": 0.07, "grad_norm": 8.757994651794434, "learning_rate": 1.0937601384232725e-05, "loss": 3.0149, "step": 5057 }, { "epoch": 0.07, "grad_norm": 8.566357612609863, "learning_rate": 1.0939764247864173e-05, "loss": 2.8953, "step": 5058 }, { "epoch": 0.07, "grad_norm": 8.395210266113281, "learning_rate": 1.0941927111495621e-05, "loss": 2.5692, "step": 5059 }, { "epoch": 0.07, "grad_norm": 9.066056251525879, "learning_rate": 1.094408997512707e-05, "loss": 2.6742, "step": 5060 }, { "epoch": 0.07, "grad_norm": 9.2083158493042, "learning_rate": 1.0946252838758517e-05, "loss": 3.0468, "step": 5061 }, { "epoch": 0.07, "grad_norm": 9.286408424377441, "learning_rate": 1.0948415702389965e-05, "loss": 2.9838, "step": 5062 }, { "epoch": 0.07, "grad_norm": 8.311071395874023, "learning_rate": 1.0950578566021413e-05, "loss": 2.7533, "step": 5063 }, { "epoch": 0.07, "grad_norm": 8.590394973754883, "learning_rate": 1.0952741429652863e-05, "loss": 3.0534, "step": 5064 }, { "epoch": 0.07, "grad_norm": 9.081936836242676, "learning_rate": 1.095490429328431e-05, "loss": 2.8087, "step": 5065 }, { "epoch": 0.07, "grad_norm": 8.232085227966309, "learning_rate": 1.0957067156915757e-05, "loss": 2.6265, "step": 5066 }, { "epoch": 0.07, "grad_norm": 9.195466041564941, "learning_rate": 1.0959230020547205e-05, "loss": 2.6795, "step": 5067 }, { "epoch": 0.07, "grad_norm": 9.297883987426758, "learning_rate": 1.0961392884178655e-05, "loss": 2.9183, "step": 5068 }, { "epoch": 0.07, "grad_norm": 10.614381790161133, "learning_rate": 1.0963555747810101e-05, "loss": 3.2849, "step": 5069 }, { "epoch": 0.07, "grad_norm": 8.485906600952148, "learning_rate": 1.096571861144155e-05, "loss": 2.8313, "step": 5070 }, { "epoch": 0.07, "grad_norm": 8.415129661560059, "learning_rate": 1.0967881475072996e-05, "loss": 3.0263, "step": 5071 }, { "epoch": 0.07, "grad_norm": 8.025876998901367, "learning_rate": 1.0970044338704446e-05, "loss": 2.6421, "step": 5072 }, { "epoch": 0.07, "grad_norm": 8.121700286865234, "learning_rate": 1.0972207202335894e-05, "loss": 2.4703, "step": 5073 }, { "epoch": 0.07, "grad_norm": 9.574305534362793, "learning_rate": 1.0974370065967342e-05, "loss": 2.985, "step": 5074 }, { "epoch": 0.07, "grad_norm": 8.407876014709473, "learning_rate": 1.0976532929598788e-05, "loss": 2.5527, "step": 5075 }, { "epoch": 0.07, "grad_norm": 7.53071928024292, "learning_rate": 1.0978695793230238e-05, "loss": 3.107, "step": 5076 }, { "epoch": 0.07, "grad_norm": 8.342799186706543, "learning_rate": 1.0980858656861686e-05, "loss": 2.9442, "step": 5077 }, { "epoch": 0.07, "grad_norm": 7.9560441970825195, "learning_rate": 1.0983021520493134e-05, "loss": 3.0009, "step": 5078 }, { "epoch": 0.07, "grad_norm": 9.021480560302734, "learning_rate": 1.098518438412458e-05, "loss": 2.951, "step": 5079 }, { "epoch": 0.07, "grad_norm": 9.6752347946167, "learning_rate": 1.098734724775603e-05, "loss": 2.9329, "step": 5080 }, { "epoch": 0.07, "grad_norm": 8.826956748962402, "learning_rate": 1.0989510111387478e-05, "loss": 3.0351, "step": 5081 }, { "epoch": 0.07, "grad_norm": 8.579232215881348, "learning_rate": 1.0991672975018926e-05, "loss": 2.3386, "step": 5082 }, { "epoch": 0.07, "grad_norm": 9.107305526733398, "learning_rate": 1.0993835838650373e-05, "loss": 3.1459, "step": 5083 }, { "epoch": 0.07, "grad_norm": 8.346957206726074, "learning_rate": 1.0995998702281822e-05, "loss": 2.941, "step": 5084 }, { "epoch": 0.07, "grad_norm": 8.061469078063965, "learning_rate": 1.099816156591327e-05, "loss": 3.0628, "step": 5085 }, { "epoch": 0.07, "grad_norm": 8.938374519348145, "learning_rate": 1.1000324429544718e-05, "loss": 2.8076, "step": 5086 }, { "epoch": 0.07, "grad_norm": 7.59551477432251, "learning_rate": 1.1002487293176165e-05, "loss": 2.5883, "step": 5087 }, { "epoch": 0.07, "grad_norm": 9.01394271850586, "learning_rate": 1.1004650156807614e-05, "loss": 2.8773, "step": 5088 }, { "epoch": 0.07, "grad_norm": 8.702778816223145, "learning_rate": 1.1006813020439063e-05, "loss": 2.9021, "step": 5089 }, { "epoch": 0.07, "grad_norm": 7.560642719268799, "learning_rate": 1.1008975884070509e-05, "loss": 2.7492, "step": 5090 }, { "epoch": 0.07, "grad_norm": 7.9069414138793945, "learning_rate": 1.1011138747701959e-05, "loss": 2.7333, "step": 5091 }, { "epoch": 0.07, "grad_norm": 8.007206916809082, "learning_rate": 1.1013301611333407e-05, "loss": 2.5288, "step": 5092 }, { "epoch": 0.07, "grad_norm": 7.4429521560668945, "learning_rate": 1.1015464474964855e-05, "loss": 2.185, "step": 5093 }, { "epoch": 0.07, "grad_norm": 9.426591873168945, "learning_rate": 1.1017627338596301e-05, "loss": 2.8901, "step": 5094 }, { "epoch": 0.07, "grad_norm": 8.941813468933105, "learning_rate": 1.101979020222775e-05, "loss": 2.6084, "step": 5095 }, { "epoch": 0.07, "grad_norm": 9.40434455871582, "learning_rate": 1.1021953065859199e-05, "loss": 2.6856, "step": 5096 }, { "epoch": 0.07, "grad_norm": 7.646826267242432, "learning_rate": 1.1024115929490647e-05, "loss": 2.7673, "step": 5097 }, { "epoch": 0.07, "grad_norm": 8.673662185668945, "learning_rate": 1.1026278793122093e-05, "loss": 2.9784, "step": 5098 }, { "epoch": 0.07, "grad_norm": 8.462982177734375, "learning_rate": 1.1028441656753543e-05, "loss": 2.9636, "step": 5099 }, { "epoch": 0.07, "grad_norm": 8.262367248535156, "learning_rate": 1.1030604520384991e-05, "loss": 2.6615, "step": 5100 }, { "epoch": 0.07, "grad_norm": 7.575250148773193, "learning_rate": 1.1032767384016439e-05, "loss": 2.5463, "step": 5101 }, { "epoch": 0.07, "grad_norm": 9.38537311553955, "learning_rate": 1.1034930247647885e-05, "loss": 2.9246, "step": 5102 }, { "epoch": 0.07, "grad_norm": 8.611021041870117, "learning_rate": 1.1037093111279335e-05, "loss": 3.0767, "step": 5103 }, { "epoch": 0.07, "grad_norm": 8.071881294250488, "learning_rate": 1.1039255974910783e-05, "loss": 2.5361, "step": 5104 }, { "epoch": 0.07, "grad_norm": 7.984316349029541, "learning_rate": 1.1041418838542231e-05, "loss": 2.7106, "step": 5105 }, { "epoch": 0.07, "grad_norm": 8.764227867126465, "learning_rate": 1.1043581702173678e-05, "loss": 2.7288, "step": 5106 }, { "epoch": 0.07, "grad_norm": 9.81295108795166, "learning_rate": 1.1045744565805127e-05, "loss": 3.046, "step": 5107 }, { "epoch": 0.07, "grad_norm": 7.552769660949707, "learning_rate": 1.1047907429436575e-05, "loss": 2.9532, "step": 5108 }, { "epoch": 0.07, "grad_norm": 9.032931327819824, "learning_rate": 1.1050070293068022e-05, "loss": 2.959, "step": 5109 }, { "epoch": 0.07, "grad_norm": 7.868806838989258, "learning_rate": 1.105223315669947e-05, "loss": 2.9836, "step": 5110 }, { "epoch": 0.07, "grad_norm": 8.239274024963379, "learning_rate": 1.105439602033092e-05, "loss": 3.0019, "step": 5111 }, { "epoch": 0.07, "grad_norm": 8.061491966247559, "learning_rate": 1.1056558883962368e-05, "loss": 2.6838, "step": 5112 }, { "epoch": 0.07, "grad_norm": 9.400209426879883, "learning_rate": 1.1058721747593814e-05, "loss": 2.8322, "step": 5113 }, { "epoch": 0.07, "grad_norm": 9.549116134643555, "learning_rate": 1.1060884611225262e-05, "loss": 2.7259, "step": 5114 }, { "epoch": 0.07, "grad_norm": 8.724141120910645, "learning_rate": 1.1063047474856712e-05, "loss": 2.731, "step": 5115 }, { "epoch": 0.07, "grad_norm": 9.513484001159668, "learning_rate": 1.106521033848816e-05, "loss": 3.1578, "step": 5116 }, { "epoch": 0.07, "grad_norm": 7.640521049499512, "learning_rate": 1.1067373202119606e-05, "loss": 2.6082, "step": 5117 }, { "epoch": 0.07, "grad_norm": 8.253503799438477, "learning_rate": 1.1069536065751054e-05, "loss": 2.4159, "step": 5118 }, { "epoch": 0.07, "grad_norm": 7.917253017425537, "learning_rate": 1.1071698929382504e-05, "loss": 2.4122, "step": 5119 }, { "epoch": 0.07, "grad_norm": 8.635971069335938, "learning_rate": 1.1073861793013952e-05, "loss": 2.8136, "step": 5120 }, { "epoch": 0.07, "grad_norm": 8.343846321105957, "learning_rate": 1.1076024656645398e-05, "loss": 2.4318, "step": 5121 }, { "epoch": 0.07, "grad_norm": 8.688150405883789, "learning_rate": 1.1078187520276848e-05, "loss": 2.5788, "step": 5122 }, { "epoch": 0.07, "grad_norm": 9.318769454956055, "learning_rate": 1.1080350383908296e-05, "loss": 2.9046, "step": 5123 }, { "epoch": 0.07, "grad_norm": 8.947097778320312, "learning_rate": 1.1082513247539744e-05, "loss": 2.6973, "step": 5124 }, { "epoch": 0.07, "grad_norm": 7.221140384674072, "learning_rate": 1.108467611117119e-05, "loss": 2.6129, "step": 5125 }, { "epoch": 0.07, "grad_norm": 9.153271675109863, "learning_rate": 1.108683897480264e-05, "loss": 2.572, "step": 5126 }, { "epoch": 0.07, "grad_norm": 9.67176342010498, "learning_rate": 1.1089001838434088e-05, "loss": 2.7951, "step": 5127 }, { "epoch": 0.07, "grad_norm": 10.299778938293457, "learning_rate": 1.1091164702065535e-05, "loss": 2.9776, "step": 5128 }, { "epoch": 0.07, "grad_norm": 8.325987815856934, "learning_rate": 1.1093327565696983e-05, "loss": 3.019, "step": 5129 }, { "epoch": 0.07, "grad_norm": 8.237237930297852, "learning_rate": 1.1095490429328433e-05, "loss": 2.8643, "step": 5130 }, { "epoch": 0.07, "grad_norm": 7.772909641265869, "learning_rate": 1.109765329295988e-05, "loss": 2.6307, "step": 5131 }, { "epoch": 0.07, "grad_norm": 8.848073959350586, "learning_rate": 1.1099816156591327e-05, "loss": 3.1359, "step": 5132 }, { "epoch": 0.07, "grad_norm": 9.399356842041016, "learning_rate": 1.1101979020222775e-05, "loss": 3.3128, "step": 5133 }, { "epoch": 0.07, "grad_norm": 7.588265419006348, "learning_rate": 1.1104141883854225e-05, "loss": 2.5247, "step": 5134 }, { "epoch": 0.07, "grad_norm": 8.66357421875, "learning_rate": 1.1106304747485673e-05, "loss": 2.6618, "step": 5135 }, { "epoch": 0.07, "grad_norm": 7.819633960723877, "learning_rate": 1.110846761111712e-05, "loss": 2.7537, "step": 5136 }, { "epoch": 0.07, "grad_norm": 9.047592163085938, "learning_rate": 1.1110630474748567e-05, "loss": 2.1957, "step": 5137 }, { "epoch": 0.07, "grad_norm": 8.294443130493164, "learning_rate": 1.1112793338380017e-05, "loss": 3.0407, "step": 5138 }, { "epoch": 0.07, "grad_norm": 7.907074928283691, "learning_rate": 1.1114956202011465e-05, "loss": 2.5915, "step": 5139 }, { "epoch": 0.07, "grad_norm": 8.044351577758789, "learning_rate": 1.1117119065642911e-05, "loss": 2.8098, "step": 5140 }, { "epoch": 0.07, "grad_norm": 8.23988151550293, "learning_rate": 1.111928192927436e-05, "loss": 2.4895, "step": 5141 }, { "epoch": 0.07, "grad_norm": 8.250933647155762, "learning_rate": 1.112144479290581e-05, "loss": 2.6339, "step": 5142 }, { "epoch": 0.07, "grad_norm": 8.543758392333984, "learning_rate": 1.1123607656537256e-05, "loss": 2.9873, "step": 5143 }, { "epoch": 0.07, "grad_norm": 7.6831488609313965, "learning_rate": 1.1125770520168704e-05, "loss": 2.6744, "step": 5144 }, { "epoch": 0.07, "grad_norm": 9.185595512390137, "learning_rate": 1.1127933383800152e-05, "loss": 2.6956, "step": 5145 }, { "epoch": 0.07, "grad_norm": 8.029101371765137, "learning_rate": 1.1130096247431601e-05, "loss": 2.7102, "step": 5146 }, { "epoch": 0.07, "grad_norm": 10.89765739440918, "learning_rate": 1.1132259111063048e-05, "loss": 3.1481, "step": 5147 }, { "epoch": 0.07, "grad_norm": 9.472555160522461, "learning_rate": 1.1134421974694496e-05, "loss": 3.1567, "step": 5148 }, { "epoch": 0.07, "grad_norm": 7.202618598937988, "learning_rate": 1.1136584838325944e-05, "loss": 2.7251, "step": 5149 }, { "epoch": 0.07, "grad_norm": 8.918140411376953, "learning_rate": 1.1138747701957394e-05, "loss": 2.8897, "step": 5150 }, { "epoch": 0.07, "grad_norm": 9.385377883911133, "learning_rate": 1.114091056558884e-05, "loss": 3.4327, "step": 5151 }, { "epoch": 0.07, "grad_norm": 9.13364315032959, "learning_rate": 1.1143073429220288e-05, "loss": 2.8915, "step": 5152 }, { "epoch": 0.07, "grad_norm": 8.155811309814453, "learning_rate": 1.1145236292851738e-05, "loss": 2.9538, "step": 5153 }, { "epoch": 0.07, "grad_norm": 7.961185932159424, "learning_rate": 1.1147399156483186e-05, "loss": 2.7739, "step": 5154 }, { "epoch": 0.07, "grad_norm": 8.474306106567383, "learning_rate": 1.1149562020114632e-05, "loss": 2.7334, "step": 5155 }, { "epoch": 0.07, "grad_norm": 7.886475563049316, "learning_rate": 1.115172488374608e-05, "loss": 2.9988, "step": 5156 }, { "epoch": 0.07, "grad_norm": 8.703211784362793, "learning_rate": 1.115388774737753e-05, "loss": 2.5554, "step": 5157 }, { "epoch": 0.07, "grad_norm": 8.20088005065918, "learning_rate": 1.1156050611008978e-05, "loss": 2.6031, "step": 5158 }, { "epoch": 0.07, "grad_norm": 7.812401294708252, "learning_rate": 1.1158213474640424e-05, "loss": 2.683, "step": 5159 }, { "epoch": 0.07, "grad_norm": 8.551080703735352, "learning_rate": 1.1160376338271872e-05, "loss": 3.2752, "step": 5160 }, { "epoch": 0.07, "grad_norm": 7.8732733726501465, "learning_rate": 1.1162539201903322e-05, "loss": 2.5863, "step": 5161 }, { "epoch": 0.07, "grad_norm": 8.222298622131348, "learning_rate": 1.1164702065534769e-05, "loss": 3.1907, "step": 5162 }, { "epoch": 0.07, "grad_norm": 9.47791576385498, "learning_rate": 1.1166864929166217e-05, "loss": 2.9481, "step": 5163 }, { "epoch": 0.07, "grad_norm": 7.156335830688477, "learning_rate": 1.1169027792797665e-05, "loss": 2.697, "step": 5164 }, { "epoch": 0.07, "grad_norm": 8.937063217163086, "learning_rate": 1.1171190656429114e-05, "loss": 2.8826, "step": 5165 }, { "epoch": 0.07, "grad_norm": 7.671934127807617, "learning_rate": 1.117335352006056e-05, "loss": 2.8099, "step": 5166 }, { "epoch": 0.07, "grad_norm": 9.017395973205566, "learning_rate": 1.1175516383692009e-05, "loss": 3.5416, "step": 5167 }, { "epoch": 0.07, "grad_norm": 8.398721694946289, "learning_rate": 1.1177679247323457e-05, "loss": 2.3664, "step": 5168 }, { "epoch": 0.07, "grad_norm": 8.017723083496094, "learning_rate": 1.1179842110954907e-05, "loss": 2.2833, "step": 5169 }, { "epoch": 0.07, "grad_norm": 7.738276481628418, "learning_rate": 1.1182004974586353e-05, "loss": 2.3708, "step": 5170 }, { "epoch": 0.07, "grad_norm": 9.492653846740723, "learning_rate": 1.1184167838217801e-05, "loss": 2.9384, "step": 5171 }, { "epoch": 0.07, "grad_norm": 7.949699878692627, "learning_rate": 1.1186330701849249e-05, "loss": 2.8434, "step": 5172 }, { "epoch": 0.07, "grad_norm": 9.40255355834961, "learning_rate": 1.1188493565480699e-05, "loss": 2.7903, "step": 5173 }, { "epoch": 0.07, "grad_norm": 8.322721481323242, "learning_rate": 1.1190656429112145e-05, "loss": 2.9418, "step": 5174 }, { "epoch": 0.07, "grad_norm": 7.4243035316467285, "learning_rate": 1.1192819292743593e-05, "loss": 2.9033, "step": 5175 }, { "epoch": 0.07, "grad_norm": 8.655213356018066, "learning_rate": 1.1194982156375041e-05, "loss": 2.9378, "step": 5176 }, { "epoch": 0.07, "grad_norm": 8.617932319641113, "learning_rate": 1.1197145020006491e-05, "loss": 2.567, "step": 5177 }, { "epoch": 0.07, "grad_norm": 8.720181465148926, "learning_rate": 1.1199307883637937e-05, "loss": 3.4023, "step": 5178 }, { "epoch": 0.07, "grad_norm": 8.820680618286133, "learning_rate": 1.1201470747269385e-05, "loss": 2.763, "step": 5179 }, { "epoch": 0.07, "grad_norm": 8.596236228942871, "learning_rate": 1.1203633610900832e-05, "loss": 2.4599, "step": 5180 }, { "epoch": 0.07, "grad_norm": 8.739371299743652, "learning_rate": 1.1205796474532281e-05, "loss": 2.9493, "step": 5181 }, { "epoch": 0.07, "grad_norm": 8.346415519714355, "learning_rate": 1.120795933816373e-05, "loss": 2.5445, "step": 5182 }, { "epoch": 0.07, "grad_norm": 8.674337387084961, "learning_rate": 1.1210122201795178e-05, "loss": 3.1492, "step": 5183 }, { "epoch": 0.07, "grad_norm": 7.994020462036133, "learning_rate": 1.1212285065426627e-05, "loss": 2.3413, "step": 5184 }, { "epoch": 0.07, "grad_norm": 8.69291877746582, "learning_rate": 1.1214447929058074e-05, "loss": 3.2462, "step": 5185 }, { "epoch": 0.07, "grad_norm": 9.507060050964355, "learning_rate": 1.1216610792689522e-05, "loss": 2.9113, "step": 5186 }, { "epoch": 0.07, "grad_norm": 8.148828506469727, "learning_rate": 1.121877365632097e-05, "loss": 2.68, "step": 5187 }, { "epoch": 0.07, "grad_norm": 9.428618431091309, "learning_rate": 1.122093651995242e-05, "loss": 3.5059, "step": 5188 }, { "epoch": 0.07, "grad_norm": 8.574841499328613, "learning_rate": 1.1223099383583866e-05, "loss": 2.7554, "step": 5189 }, { "epoch": 0.07, "grad_norm": 7.847898483276367, "learning_rate": 1.1225262247215314e-05, "loss": 3.0607, "step": 5190 }, { "epoch": 0.07, "grad_norm": 9.366634368896484, "learning_rate": 1.1227425110846762e-05, "loss": 2.5205, "step": 5191 }, { "epoch": 0.07, "grad_norm": 6.935732841491699, "learning_rate": 1.1229587974478212e-05, "loss": 2.1415, "step": 5192 }, { "epoch": 0.07, "grad_norm": 8.617780685424805, "learning_rate": 1.1231750838109658e-05, "loss": 2.7494, "step": 5193 }, { "epoch": 0.07, "grad_norm": 8.583123207092285, "learning_rate": 1.1233913701741106e-05, "loss": 2.8021, "step": 5194 }, { "epoch": 0.07, "grad_norm": 8.286606788635254, "learning_rate": 1.1236076565372554e-05, "loss": 2.5008, "step": 5195 }, { "epoch": 0.07, "grad_norm": 8.341069221496582, "learning_rate": 1.1238239429004004e-05, "loss": 2.6079, "step": 5196 }, { "epoch": 0.07, "grad_norm": 7.849120140075684, "learning_rate": 1.124040229263545e-05, "loss": 2.6955, "step": 5197 }, { "epoch": 0.07, "grad_norm": 8.949576377868652, "learning_rate": 1.1242565156266898e-05, "loss": 2.8778, "step": 5198 }, { "epoch": 0.07, "grad_norm": 8.769021034240723, "learning_rate": 1.1244728019898345e-05, "loss": 2.5357, "step": 5199 }, { "epoch": 0.07, "grad_norm": 8.353311538696289, "learning_rate": 1.1246890883529794e-05, "loss": 2.3727, "step": 5200 }, { "epoch": 0.07, "grad_norm": 7.870763778686523, "learning_rate": 1.1249053747161243e-05, "loss": 2.5688, "step": 5201 }, { "epoch": 0.07, "grad_norm": 8.50279712677002, "learning_rate": 1.125121661079269e-05, "loss": 2.9829, "step": 5202 }, { "epoch": 0.07, "grad_norm": 9.060444831848145, "learning_rate": 1.1253379474424137e-05, "loss": 3.0492, "step": 5203 }, { "epoch": 0.07, "grad_norm": 8.570405960083008, "learning_rate": 1.1255542338055587e-05, "loss": 3.1415, "step": 5204 }, { "epoch": 0.07, "grad_norm": 8.315950393676758, "learning_rate": 1.1257705201687035e-05, "loss": 2.6216, "step": 5205 }, { "epoch": 0.07, "grad_norm": 9.540178298950195, "learning_rate": 1.1259868065318483e-05, "loss": 2.7841, "step": 5206 }, { "epoch": 0.07, "grad_norm": 8.999202728271484, "learning_rate": 1.1262030928949929e-05, "loss": 2.8852, "step": 5207 }, { "epoch": 0.07, "grad_norm": 8.899861335754395, "learning_rate": 1.1264193792581379e-05, "loss": 2.8166, "step": 5208 }, { "epoch": 0.07, "grad_norm": 7.0589213371276855, "learning_rate": 1.1266356656212827e-05, "loss": 2.9031, "step": 5209 }, { "epoch": 0.07, "grad_norm": 7.999166965484619, "learning_rate": 1.1268519519844275e-05, "loss": 2.7504, "step": 5210 }, { "epoch": 0.07, "grad_norm": 8.738161087036133, "learning_rate": 1.1270682383475721e-05, "loss": 3.0115, "step": 5211 }, { "epoch": 0.07, "grad_norm": 8.519248962402344, "learning_rate": 1.1272845247107171e-05, "loss": 2.8943, "step": 5212 }, { "epoch": 0.07, "grad_norm": 8.682929039001465, "learning_rate": 1.1275008110738619e-05, "loss": 3.0853, "step": 5213 }, { "epoch": 0.07, "grad_norm": 7.823679447174072, "learning_rate": 1.1277170974370067e-05, "loss": 2.7074, "step": 5214 }, { "epoch": 0.07, "grad_norm": 8.519774436950684, "learning_rate": 1.1279333838001514e-05, "loss": 2.7414, "step": 5215 }, { "epoch": 0.07, "grad_norm": 9.091727256774902, "learning_rate": 1.1281496701632963e-05, "loss": 2.2572, "step": 5216 }, { "epoch": 0.07, "grad_norm": 9.774280548095703, "learning_rate": 1.1283659565264411e-05, "loss": 2.735, "step": 5217 }, { "epoch": 0.07, "grad_norm": 8.425749778747559, "learning_rate": 1.1285822428895858e-05, "loss": 2.6445, "step": 5218 }, { "epoch": 0.07, "grad_norm": 9.28667163848877, "learning_rate": 1.1287985292527307e-05, "loss": 2.7219, "step": 5219 }, { "epoch": 0.07, "grad_norm": 8.038785934448242, "learning_rate": 1.1290148156158755e-05, "loss": 2.4371, "step": 5220 }, { "epoch": 0.07, "grad_norm": 10.051400184631348, "learning_rate": 1.1292311019790204e-05, "loss": 2.919, "step": 5221 }, { "epoch": 0.07, "grad_norm": 9.726800918579102, "learning_rate": 1.129447388342165e-05, "loss": 2.9775, "step": 5222 }, { "epoch": 0.07, "grad_norm": 8.425055503845215, "learning_rate": 1.12966367470531e-05, "loss": 2.8733, "step": 5223 }, { "epoch": 0.07, "grad_norm": 9.487946510314941, "learning_rate": 1.1298799610684548e-05, "loss": 3.2089, "step": 5224 }, { "epoch": 0.07, "grad_norm": 9.618997573852539, "learning_rate": 1.1300962474315996e-05, "loss": 3.0387, "step": 5225 }, { "epoch": 0.07, "grad_norm": 9.140915870666504, "learning_rate": 1.1303125337947442e-05, "loss": 2.9669, "step": 5226 }, { "epoch": 0.07, "grad_norm": 8.523894309997559, "learning_rate": 1.1305288201578892e-05, "loss": 3.1228, "step": 5227 }, { "epoch": 0.07, "grad_norm": 8.875285148620605, "learning_rate": 1.130745106521034e-05, "loss": 2.9138, "step": 5228 }, { "epoch": 0.07, "grad_norm": 8.880651473999023, "learning_rate": 1.1309613928841788e-05, "loss": 3.1927, "step": 5229 }, { "epoch": 0.07, "grad_norm": 11.115874290466309, "learning_rate": 1.1311776792473234e-05, "loss": 3.0245, "step": 5230 }, { "epoch": 0.07, "grad_norm": 7.802852630615234, "learning_rate": 1.1313939656104684e-05, "loss": 2.9002, "step": 5231 }, { "epoch": 0.07, "grad_norm": 9.462176322937012, "learning_rate": 1.1316102519736132e-05, "loss": 3.208, "step": 5232 }, { "epoch": 0.07, "grad_norm": 8.804512023925781, "learning_rate": 1.131826538336758e-05, "loss": 2.7122, "step": 5233 }, { "epoch": 0.07, "grad_norm": 10.787921905517578, "learning_rate": 1.1320428246999026e-05, "loss": 3.021, "step": 5234 }, { "epoch": 0.07, "grad_norm": 9.142914772033691, "learning_rate": 1.1322591110630476e-05, "loss": 2.5186, "step": 5235 }, { "epoch": 0.07, "grad_norm": 9.107233047485352, "learning_rate": 1.1324753974261924e-05, "loss": 2.6853, "step": 5236 }, { "epoch": 0.07, "grad_norm": 10.293116569519043, "learning_rate": 1.132691683789337e-05, "loss": 2.9175, "step": 5237 }, { "epoch": 0.07, "grad_norm": 9.030298233032227, "learning_rate": 1.1329079701524819e-05, "loss": 3.1669, "step": 5238 }, { "epoch": 0.07, "grad_norm": 7.742337226867676, "learning_rate": 1.1331242565156268e-05, "loss": 2.6693, "step": 5239 }, { "epoch": 0.07, "grad_norm": 8.421882629394531, "learning_rate": 1.1333405428787716e-05, "loss": 3.1109, "step": 5240 }, { "epoch": 0.07, "grad_norm": 8.597352027893066, "learning_rate": 1.1335568292419163e-05, "loss": 2.7694, "step": 5241 }, { "epoch": 0.07, "grad_norm": 8.793079376220703, "learning_rate": 1.1337731156050611e-05, "loss": 3.0717, "step": 5242 }, { "epoch": 0.07, "grad_norm": 8.813409805297852, "learning_rate": 1.133989401968206e-05, "loss": 3.157, "step": 5243 }, { "epoch": 0.07, "grad_norm": 8.410351753234863, "learning_rate": 1.1342056883313509e-05, "loss": 2.5484, "step": 5244 }, { "epoch": 0.07, "grad_norm": 9.707316398620605, "learning_rate": 1.1344219746944955e-05, "loss": 3.495, "step": 5245 }, { "epoch": 0.07, "grad_norm": 8.79386043548584, "learning_rate": 1.1346382610576403e-05, "loss": 2.998, "step": 5246 }, { "epoch": 0.07, "grad_norm": 7.676094055175781, "learning_rate": 1.1348545474207853e-05, "loss": 3.1775, "step": 5247 }, { "epoch": 0.07, "grad_norm": 8.485467910766602, "learning_rate": 1.1350708337839301e-05, "loss": 2.231, "step": 5248 }, { "epoch": 0.07, "grad_norm": 7.723127841949463, "learning_rate": 1.1352871201470747e-05, "loss": 3.1346, "step": 5249 }, { "epoch": 0.07, "grad_norm": 8.334729194641113, "learning_rate": 1.1355034065102197e-05, "loss": 2.6675, "step": 5250 }, { "epoch": 0.07, "grad_norm": 9.432947158813477, "learning_rate": 1.1357196928733645e-05, "loss": 2.7806, "step": 5251 }, { "epoch": 0.07, "grad_norm": 8.494820594787598, "learning_rate": 1.1359359792365091e-05, "loss": 3.0281, "step": 5252 }, { "epoch": 0.07, "grad_norm": 8.504171371459961, "learning_rate": 1.136152265599654e-05, "loss": 3.2292, "step": 5253 }, { "epoch": 0.07, "grad_norm": 8.542511940002441, "learning_rate": 1.136368551962799e-05, "loss": 3.2678, "step": 5254 }, { "epoch": 0.07, "grad_norm": 8.360730171203613, "learning_rate": 1.1365848383259437e-05, "loss": 2.6564, "step": 5255 }, { "epoch": 0.07, "grad_norm": 7.2101640701293945, "learning_rate": 1.1368011246890884e-05, "loss": 2.5891, "step": 5256 }, { "epoch": 0.07, "grad_norm": 8.248177528381348, "learning_rate": 1.1370174110522332e-05, "loss": 3.2608, "step": 5257 }, { "epoch": 0.07, "grad_norm": 7.758224010467529, "learning_rate": 1.1372336974153781e-05, "loss": 3.0085, "step": 5258 }, { "epoch": 0.07, "grad_norm": 7.831669330596924, "learning_rate": 1.137449983778523e-05, "loss": 2.1487, "step": 5259 }, { "epoch": 0.07, "grad_norm": 8.976600646972656, "learning_rate": 1.1376662701416676e-05, "loss": 3.394, "step": 5260 }, { "epoch": 0.07, "grad_norm": 7.881988525390625, "learning_rate": 1.1378825565048124e-05, "loss": 2.971, "step": 5261 }, { "epoch": 0.07, "grad_norm": 7.596286773681641, "learning_rate": 1.1380988428679574e-05, "loss": 2.4545, "step": 5262 }, { "epoch": 0.07, "grad_norm": 7.344930171966553, "learning_rate": 1.1383151292311022e-05, "loss": 2.2611, "step": 5263 }, { "epoch": 0.07, "grad_norm": 8.600469589233398, "learning_rate": 1.1385314155942468e-05, "loss": 2.7249, "step": 5264 }, { "epoch": 0.07, "grad_norm": 7.951536178588867, "learning_rate": 1.1387477019573916e-05, "loss": 2.7795, "step": 5265 }, { "epoch": 0.07, "grad_norm": 8.819632530212402, "learning_rate": 1.1389639883205366e-05, "loss": 2.9786, "step": 5266 }, { "epoch": 0.07, "grad_norm": 8.322367668151855, "learning_rate": 1.1391802746836814e-05, "loss": 2.8748, "step": 5267 }, { "epoch": 0.07, "grad_norm": 7.9480671882629395, "learning_rate": 1.139396561046826e-05, "loss": 2.1585, "step": 5268 }, { "epoch": 0.07, "grad_norm": 7.643197059631348, "learning_rate": 1.1396128474099708e-05, "loss": 2.6344, "step": 5269 }, { "epoch": 0.07, "grad_norm": 7.926049709320068, "learning_rate": 1.1398291337731158e-05, "loss": 2.7211, "step": 5270 }, { "epoch": 0.07, "grad_norm": 10.39535140991211, "learning_rate": 1.1400454201362604e-05, "loss": 3.7391, "step": 5271 }, { "epoch": 0.07, "grad_norm": 8.0332670211792, "learning_rate": 1.1402617064994052e-05, "loss": 2.7965, "step": 5272 }, { "epoch": 0.07, "grad_norm": 8.213451385498047, "learning_rate": 1.14047799286255e-05, "loss": 2.7271, "step": 5273 }, { "epoch": 0.07, "grad_norm": 8.93028450012207, "learning_rate": 1.140694279225695e-05, "loss": 2.517, "step": 5274 }, { "epoch": 0.07, "grad_norm": 8.15002727508545, "learning_rate": 1.1409105655888397e-05, "loss": 2.8034, "step": 5275 }, { "epoch": 0.07, "grad_norm": 8.449729919433594, "learning_rate": 1.1411268519519845e-05, "loss": 2.9354, "step": 5276 }, { "epoch": 0.07, "grad_norm": 9.606850624084473, "learning_rate": 1.1413431383151293e-05, "loss": 2.7378, "step": 5277 }, { "epoch": 0.07, "grad_norm": 8.656394004821777, "learning_rate": 1.1415594246782742e-05, "loss": 3.0912, "step": 5278 }, { "epoch": 0.07, "grad_norm": 7.647377967834473, "learning_rate": 1.1417757110414189e-05, "loss": 2.4447, "step": 5279 }, { "epoch": 0.07, "grad_norm": 7.603212833404541, "learning_rate": 1.1419919974045637e-05, "loss": 2.9501, "step": 5280 }, { "epoch": 0.07, "grad_norm": 8.010320663452148, "learning_rate": 1.1422082837677087e-05, "loss": 2.2267, "step": 5281 }, { "epoch": 0.07, "grad_norm": 7.806244850158691, "learning_rate": 1.1424245701308535e-05, "loss": 2.8769, "step": 5282 }, { "epoch": 0.07, "grad_norm": 7.872209548950195, "learning_rate": 1.1426408564939981e-05, "loss": 2.5163, "step": 5283 }, { "epoch": 0.07, "grad_norm": 6.731924533843994, "learning_rate": 1.1428571428571429e-05, "loss": 2.5436, "step": 5284 }, { "epoch": 0.07, "grad_norm": 9.11072063446045, "learning_rate": 1.1430734292202879e-05, "loss": 2.2531, "step": 5285 }, { "epoch": 0.07, "grad_norm": 8.931702613830566, "learning_rate": 1.1432897155834327e-05, "loss": 2.3386, "step": 5286 }, { "epoch": 0.07, "grad_norm": 9.041770935058594, "learning_rate": 1.1435060019465773e-05, "loss": 3.1565, "step": 5287 }, { "epoch": 0.07, "grad_norm": 6.624119758605957, "learning_rate": 1.1437222883097221e-05, "loss": 2.9098, "step": 5288 }, { "epoch": 0.07, "grad_norm": 8.459056854248047, "learning_rate": 1.1439385746728671e-05, "loss": 3.4957, "step": 5289 }, { "epoch": 0.07, "grad_norm": 7.80976676940918, "learning_rate": 1.1441548610360117e-05, "loss": 3.032, "step": 5290 }, { "epoch": 0.07, "grad_norm": 6.816638946533203, "learning_rate": 1.1443711473991565e-05, "loss": 2.3152, "step": 5291 }, { "epoch": 0.07, "grad_norm": 8.306827545166016, "learning_rate": 1.1445874337623013e-05, "loss": 2.9049, "step": 5292 }, { "epoch": 0.07, "grad_norm": 8.513545989990234, "learning_rate": 1.1448037201254463e-05, "loss": 2.8859, "step": 5293 }, { "epoch": 0.07, "grad_norm": 8.339425086975098, "learning_rate": 1.145020006488591e-05, "loss": 2.7245, "step": 5294 }, { "epoch": 0.07, "grad_norm": 7.472795009613037, "learning_rate": 1.1452362928517358e-05, "loss": 1.8907, "step": 5295 }, { "epoch": 0.07, "grad_norm": 9.868514060974121, "learning_rate": 1.1454525792148806e-05, "loss": 3.6928, "step": 5296 }, { "epoch": 0.07, "grad_norm": 7.4989542961120605, "learning_rate": 1.1456688655780255e-05, "loss": 2.8345, "step": 5297 }, { "epoch": 0.07, "grad_norm": 8.308159828186035, "learning_rate": 1.1458851519411702e-05, "loss": 2.6518, "step": 5298 }, { "epoch": 0.07, "grad_norm": 8.664321899414062, "learning_rate": 1.146101438304315e-05, "loss": 3.1694, "step": 5299 }, { "epoch": 0.07, "grad_norm": 8.483955383300781, "learning_rate": 1.1463177246674598e-05, "loss": 3.0386, "step": 5300 }, { "epoch": 0.07, "grad_norm": 9.2189302444458, "learning_rate": 1.1465340110306048e-05, "loss": 3.01, "step": 5301 }, { "epoch": 0.07, "grad_norm": 8.459369659423828, "learning_rate": 1.1467502973937494e-05, "loss": 2.9744, "step": 5302 }, { "epoch": 0.07, "grad_norm": 8.075820922851562, "learning_rate": 1.1469665837568942e-05, "loss": 2.9245, "step": 5303 }, { "epoch": 0.07, "grad_norm": 7.958730220794678, "learning_rate": 1.147182870120039e-05, "loss": 3.2339, "step": 5304 }, { "epoch": 0.07, "grad_norm": 6.916569709777832, "learning_rate": 1.147399156483184e-05, "loss": 2.102, "step": 5305 }, { "epoch": 0.07, "grad_norm": 9.08120346069336, "learning_rate": 1.1476154428463286e-05, "loss": 3.2442, "step": 5306 }, { "epoch": 0.07, "grad_norm": 8.176376342773438, "learning_rate": 1.1478317292094734e-05, "loss": 2.3657, "step": 5307 }, { "epoch": 0.07, "grad_norm": 8.378171920776367, "learning_rate": 1.148048015572618e-05, "loss": 3.2562, "step": 5308 }, { "epoch": 0.07, "grad_norm": 8.772039413452148, "learning_rate": 1.148264301935763e-05, "loss": 3.1191, "step": 5309 }, { "epoch": 0.07, "grad_norm": 8.960663795471191, "learning_rate": 1.1484805882989078e-05, "loss": 2.5633, "step": 5310 }, { "epoch": 0.07, "grad_norm": 7.790516376495361, "learning_rate": 1.1486968746620526e-05, "loss": 3.2221, "step": 5311 }, { "epoch": 0.07, "grad_norm": 7.966332912445068, "learning_rate": 1.1489131610251976e-05, "loss": 2.7061, "step": 5312 }, { "epoch": 0.07, "grad_norm": 7.5374345779418945, "learning_rate": 1.1491294473883422e-05, "loss": 2.99, "step": 5313 }, { "epoch": 0.07, "grad_norm": 8.741888999938965, "learning_rate": 1.149345733751487e-05, "loss": 2.8005, "step": 5314 }, { "epoch": 0.07, "grad_norm": 7.109508514404297, "learning_rate": 1.1495620201146319e-05, "loss": 2.3629, "step": 5315 }, { "epoch": 0.07, "grad_norm": 8.089559555053711, "learning_rate": 1.1497783064777768e-05, "loss": 2.7503, "step": 5316 }, { "epoch": 0.07, "grad_norm": 8.776240348815918, "learning_rate": 1.1499945928409215e-05, "loss": 2.4366, "step": 5317 }, { "epoch": 0.07, "grad_norm": 7.495005130767822, "learning_rate": 1.1502108792040663e-05, "loss": 3.3934, "step": 5318 }, { "epoch": 0.07, "grad_norm": 7.450030326843262, "learning_rate": 1.150427165567211e-05, "loss": 2.8688, "step": 5319 }, { "epoch": 0.07, "grad_norm": 8.537267684936523, "learning_rate": 1.150643451930356e-05, "loss": 2.7417, "step": 5320 }, { "epoch": 0.07, "grad_norm": 7.745062828063965, "learning_rate": 1.1508597382935007e-05, "loss": 2.8029, "step": 5321 }, { "epoch": 0.07, "grad_norm": 7.846710205078125, "learning_rate": 1.1510760246566455e-05, "loss": 2.5621, "step": 5322 }, { "epoch": 0.07, "grad_norm": 8.301724433898926, "learning_rate": 1.1512923110197903e-05, "loss": 3.2529, "step": 5323 }, { "epoch": 0.07, "grad_norm": 7.368049144744873, "learning_rate": 1.1515085973829351e-05, "loss": 2.3407, "step": 5324 }, { "epoch": 0.07, "grad_norm": 10.30322551727295, "learning_rate": 1.1517248837460799e-05, "loss": 2.8528, "step": 5325 }, { "epoch": 0.07, "grad_norm": 8.046534538269043, "learning_rate": 1.1519411701092247e-05, "loss": 2.8199, "step": 5326 }, { "epoch": 0.07, "grad_norm": 8.445730209350586, "learning_rate": 1.1521574564723694e-05, "loss": 2.6163, "step": 5327 }, { "epoch": 0.07, "grad_norm": 7.530698299407959, "learning_rate": 1.1523737428355143e-05, "loss": 2.8239, "step": 5328 }, { "epoch": 0.07, "grad_norm": 9.138699531555176, "learning_rate": 1.1525900291986591e-05, "loss": 3.4012, "step": 5329 }, { "epoch": 0.07, "grad_norm": 8.103557586669922, "learning_rate": 1.152806315561804e-05, "loss": 2.4906, "step": 5330 }, { "epoch": 0.07, "grad_norm": 8.370314598083496, "learning_rate": 1.1530226019249486e-05, "loss": 3.2102, "step": 5331 }, { "epoch": 0.07, "grad_norm": 7.488301753997803, "learning_rate": 1.1532388882880935e-05, "loss": 2.6169, "step": 5332 }, { "epoch": 0.07, "grad_norm": 7.223875522613525, "learning_rate": 1.1534551746512384e-05, "loss": 2.4928, "step": 5333 }, { "epoch": 0.07, "grad_norm": 7.998122215270996, "learning_rate": 1.1536714610143832e-05, "loss": 2.9893, "step": 5334 }, { "epoch": 0.07, "grad_norm": 9.12181568145752, "learning_rate": 1.1538877473775278e-05, "loss": 3.0211, "step": 5335 }, { "epoch": 0.07, "grad_norm": 8.523334503173828, "learning_rate": 1.1541040337406728e-05, "loss": 2.679, "step": 5336 }, { "epoch": 0.07, "grad_norm": 8.039056777954102, "learning_rate": 1.1543203201038176e-05, "loss": 2.4909, "step": 5337 }, { "epoch": 0.07, "grad_norm": 7.8022332191467285, "learning_rate": 1.1545366064669624e-05, "loss": 3.1198, "step": 5338 }, { "epoch": 0.07, "grad_norm": 7.967710494995117, "learning_rate": 1.154752892830107e-05, "loss": 2.6972, "step": 5339 }, { "epoch": 0.07, "grad_norm": 9.98913860321045, "learning_rate": 1.154969179193252e-05, "loss": 2.9815, "step": 5340 }, { "epoch": 0.07, "grad_norm": 8.674976348876953, "learning_rate": 1.1551854655563968e-05, "loss": 2.88, "step": 5341 }, { "epoch": 0.07, "grad_norm": 9.36095142364502, "learning_rate": 1.1554017519195416e-05, "loss": 2.9426, "step": 5342 }, { "epoch": 0.07, "grad_norm": 7.790654182434082, "learning_rate": 1.1556180382826862e-05, "loss": 2.8709, "step": 5343 }, { "epoch": 0.07, "grad_norm": 7.015620231628418, "learning_rate": 1.1558343246458312e-05, "loss": 2.3874, "step": 5344 }, { "epoch": 0.07, "grad_norm": 8.144777297973633, "learning_rate": 1.156050611008976e-05, "loss": 3.3437, "step": 5345 }, { "epoch": 0.07, "grad_norm": 7.611141681671143, "learning_rate": 1.1562668973721206e-05, "loss": 2.4185, "step": 5346 }, { "epoch": 0.07, "grad_norm": 7.632063865661621, "learning_rate": 1.1564831837352656e-05, "loss": 2.4076, "step": 5347 }, { "epoch": 0.07, "grad_norm": 7.1161274909973145, "learning_rate": 1.1566994700984104e-05, "loss": 2.3363, "step": 5348 }, { "epoch": 0.07, "grad_norm": 8.05571460723877, "learning_rate": 1.1569157564615552e-05, "loss": 2.3671, "step": 5349 }, { "epoch": 0.07, "grad_norm": 9.945476531982422, "learning_rate": 1.1571320428246999e-05, "loss": 3.3956, "step": 5350 }, { "epoch": 0.07, "grad_norm": 8.7359037399292, "learning_rate": 1.1573483291878448e-05, "loss": 2.4011, "step": 5351 }, { "epoch": 0.07, "grad_norm": 8.414740562438965, "learning_rate": 1.1575646155509896e-05, "loss": 3.0267, "step": 5352 }, { "epoch": 0.07, "grad_norm": 7.6378374099731445, "learning_rate": 1.1577809019141345e-05, "loss": 2.3244, "step": 5353 }, { "epoch": 0.07, "grad_norm": 8.59677505493164, "learning_rate": 1.1579971882772791e-05, "loss": 2.8804, "step": 5354 }, { "epoch": 0.07, "grad_norm": 6.931464672088623, "learning_rate": 1.158213474640424e-05, "loss": 2.2082, "step": 5355 }, { "epoch": 0.07, "grad_norm": 8.22708797454834, "learning_rate": 1.1584297610035689e-05, "loss": 2.9574, "step": 5356 }, { "epoch": 0.07, "grad_norm": 8.596639633178711, "learning_rate": 1.1586460473667137e-05, "loss": 2.9279, "step": 5357 }, { "epoch": 0.07, "grad_norm": 8.381863594055176, "learning_rate": 1.1588623337298583e-05, "loss": 2.7027, "step": 5358 }, { "epoch": 0.07, "grad_norm": 8.047353744506836, "learning_rate": 1.1590786200930033e-05, "loss": 3.0469, "step": 5359 }, { "epoch": 0.07, "grad_norm": 7.496104717254639, "learning_rate": 1.1592949064561481e-05, "loss": 2.8988, "step": 5360 }, { "epoch": 0.07, "grad_norm": 8.81773567199707, "learning_rate": 1.1595111928192927e-05, "loss": 2.865, "step": 5361 }, { "epoch": 0.07, "grad_norm": 7.62082052230835, "learning_rate": 1.1597274791824375e-05, "loss": 2.2744, "step": 5362 }, { "epoch": 0.07, "grad_norm": 9.255558967590332, "learning_rate": 1.1599437655455825e-05, "loss": 2.2272, "step": 5363 }, { "epoch": 0.07, "grad_norm": 9.275634765625, "learning_rate": 1.1601600519087273e-05, "loss": 2.5854, "step": 5364 }, { "epoch": 0.07, "grad_norm": 8.067124366760254, "learning_rate": 1.160376338271872e-05, "loss": 2.8079, "step": 5365 }, { "epoch": 0.07, "grad_norm": 7.265322685241699, "learning_rate": 1.1605926246350167e-05, "loss": 2.457, "step": 5366 }, { "epoch": 0.07, "grad_norm": 8.63458251953125, "learning_rate": 1.1608089109981617e-05, "loss": 2.719, "step": 5367 }, { "epoch": 0.07, "grad_norm": 7.910319805145264, "learning_rate": 1.1610251973613065e-05, "loss": 2.836, "step": 5368 }, { "epoch": 0.07, "grad_norm": 8.14831829071045, "learning_rate": 1.1612414837244512e-05, "loss": 2.8736, "step": 5369 }, { "epoch": 0.07, "grad_norm": 7.386200428009033, "learning_rate": 1.161457770087596e-05, "loss": 2.403, "step": 5370 }, { "epoch": 0.07, "grad_norm": 9.15237045288086, "learning_rate": 1.161674056450741e-05, "loss": 2.7525, "step": 5371 }, { "epoch": 0.07, "grad_norm": 8.849578857421875, "learning_rate": 1.1618903428138857e-05, "loss": 2.9257, "step": 5372 }, { "epoch": 0.07, "grad_norm": 8.187430381774902, "learning_rate": 1.1621066291770304e-05, "loss": 2.8303, "step": 5373 }, { "epoch": 0.07, "grad_norm": 9.105034828186035, "learning_rate": 1.1623229155401752e-05, "loss": 3.0217, "step": 5374 }, { "epoch": 0.07, "grad_norm": 8.349696159362793, "learning_rate": 1.1625392019033202e-05, "loss": 2.8499, "step": 5375 }, { "epoch": 0.07, "grad_norm": 8.64113712310791, "learning_rate": 1.162755488266465e-05, "loss": 2.826, "step": 5376 }, { "epoch": 0.07, "grad_norm": 7.534224510192871, "learning_rate": 1.1629717746296096e-05, "loss": 2.5779, "step": 5377 }, { "epoch": 0.07, "grad_norm": 7.9818434715271, "learning_rate": 1.1631880609927546e-05, "loss": 2.2461, "step": 5378 }, { "epoch": 0.07, "grad_norm": 7.897556781768799, "learning_rate": 1.1634043473558994e-05, "loss": 2.278, "step": 5379 }, { "epoch": 0.07, "grad_norm": 9.263731002807617, "learning_rate": 1.163620633719044e-05, "loss": 3.1152, "step": 5380 }, { "epoch": 0.07, "grad_norm": 8.970857620239258, "learning_rate": 1.1638369200821888e-05, "loss": 2.8172, "step": 5381 }, { "epoch": 0.07, "grad_norm": 8.05016040802002, "learning_rate": 1.1640532064453338e-05, "loss": 2.9476, "step": 5382 }, { "epoch": 0.07, "grad_norm": 8.216246604919434, "learning_rate": 1.1642694928084786e-05, "loss": 2.9406, "step": 5383 }, { "epoch": 0.07, "grad_norm": 8.644370079040527, "learning_rate": 1.1644857791716232e-05, "loss": 3.3702, "step": 5384 }, { "epoch": 0.07, "grad_norm": 8.834339141845703, "learning_rate": 1.164702065534768e-05, "loss": 2.7571, "step": 5385 }, { "epoch": 0.07, "grad_norm": 7.984983921051025, "learning_rate": 1.164918351897913e-05, "loss": 2.491, "step": 5386 }, { "epoch": 0.07, "grad_norm": 8.44670295715332, "learning_rate": 1.1651346382610578e-05, "loss": 2.7838, "step": 5387 }, { "epoch": 0.07, "grad_norm": 7.958768367767334, "learning_rate": 1.1653509246242025e-05, "loss": 2.5557, "step": 5388 }, { "epoch": 0.07, "grad_norm": 7.525256156921387, "learning_rate": 1.1655672109873473e-05, "loss": 3.0217, "step": 5389 }, { "epoch": 0.07, "grad_norm": 7.924159526824951, "learning_rate": 1.1657834973504922e-05, "loss": 3.2999, "step": 5390 }, { "epoch": 0.07, "grad_norm": 8.344447135925293, "learning_rate": 1.165999783713637e-05, "loss": 3.293, "step": 5391 }, { "epoch": 0.07, "grad_norm": 8.690183639526367, "learning_rate": 1.1662160700767817e-05, "loss": 3.0557, "step": 5392 }, { "epoch": 0.07, "grad_norm": 8.362953186035156, "learning_rate": 1.1664323564399265e-05, "loss": 2.513, "step": 5393 }, { "epoch": 0.07, "grad_norm": 10.955168724060059, "learning_rate": 1.1666486428030715e-05, "loss": 2.9814, "step": 5394 }, { "epoch": 0.07, "grad_norm": 8.088818550109863, "learning_rate": 1.1668649291662163e-05, "loss": 2.2238, "step": 5395 }, { "epoch": 0.07, "grad_norm": 8.393827438354492, "learning_rate": 1.1670812155293609e-05, "loss": 3.036, "step": 5396 }, { "epoch": 0.07, "grad_norm": 7.8067474365234375, "learning_rate": 1.1672975018925057e-05, "loss": 2.308, "step": 5397 }, { "epoch": 0.07, "grad_norm": 8.594417572021484, "learning_rate": 1.1675137882556507e-05, "loss": 3.0053, "step": 5398 }, { "epoch": 0.07, "grad_norm": 8.335955619812012, "learning_rate": 1.1677300746187953e-05, "loss": 2.3501, "step": 5399 }, { "epoch": 0.07, "grad_norm": 7.6542558670043945, "learning_rate": 1.1679463609819401e-05, "loss": 2.2444, "step": 5400 }, { "epoch": 0.07, "grad_norm": 7.766315937042236, "learning_rate": 1.168162647345085e-05, "loss": 2.6508, "step": 5401 }, { "epoch": 0.07, "grad_norm": 7.5457539558410645, "learning_rate": 1.1683789337082299e-05, "loss": 2.8728, "step": 5402 }, { "epoch": 0.07, "grad_norm": 8.417806625366211, "learning_rate": 1.1685952200713745e-05, "loss": 2.8925, "step": 5403 }, { "epoch": 0.07, "grad_norm": 8.270647048950195, "learning_rate": 1.1688115064345193e-05, "loss": 2.6807, "step": 5404 }, { "epoch": 0.07, "grad_norm": 9.11268424987793, "learning_rate": 1.1690277927976641e-05, "loss": 3.1192, "step": 5405 }, { "epoch": 0.07, "grad_norm": 8.089879989624023, "learning_rate": 1.1692440791608091e-05, "loss": 2.84, "step": 5406 }, { "epoch": 0.07, "grad_norm": 7.957298278808594, "learning_rate": 1.1694603655239538e-05, "loss": 2.5191, "step": 5407 }, { "epoch": 0.07, "grad_norm": 8.143106460571289, "learning_rate": 1.1696766518870986e-05, "loss": 2.9177, "step": 5408 }, { "epoch": 0.07, "grad_norm": 8.329826354980469, "learning_rate": 1.1698929382502435e-05, "loss": 2.9625, "step": 5409 }, { "epoch": 0.07, "grad_norm": 7.9939866065979, "learning_rate": 1.1701092246133883e-05, "loss": 2.8445, "step": 5410 }, { "epoch": 0.07, "grad_norm": 8.88742446899414, "learning_rate": 1.170325510976533e-05, "loss": 2.9468, "step": 5411 }, { "epoch": 0.07, "grad_norm": 8.749044418334961, "learning_rate": 1.1705417973396778e-05, "loss": 2.6564, "step": 5412 }, { "epoch": 0.07, "grad_norm": 9.490391731262207, "learning_rate": 1.1707580837028228e-05, "loss": 3.1177, "step": 5413 }, { "epoch": 0.07, "grad_norm": 8.219937324523926, "learning_rate": 1.1709743700659676e-05, "loss": 3.1067, "step": 5414 }, { "epoch": 0.07, "grad_norm": 8.264627456665039, "learning_rate": 1.1711906564291122e-05, "loss": 2.6591, "step": 5415 }, { "epoch": 0.07, "grad_norm": 8.201211929321289, "learning_rate": 1.171406942792257e-05, "loss": 2.7643, "step": 5416 }, { "epoch": 0.07, "grad_norm": 9.129758834838867, "learning_rate": 1.171623229155402e-05, "loss": 2.8433, "step": 5417 }, { "epoch": 0.07, "grad_norm": 7.778236389160156, "learning_rate": 1.1718395155185466e-05, "loss": 2.7121, "step": 5418 }, { "epoch": 0.07, "grad_norm": 8.959314346313477, "learning_rate": 1.1720558018816914e-05, "loss": 2.9663, "step": 5419 }, { "epoch": 0.07, "grad_norm": 9.692255973815918, "learning_rate": 1.1722720882448362e-05, "loss": 3.2527, "step": 5420 }, { "epoch": 0.07, "grad_norm": 8.532624244689941, "learning_rate": 1.1724883746079812e-05, "loss": 2.9047, "step": 5421 }, { "epoch": 0.07, "grad_norm": 7.7953290939331055, "learning_rate": 1.1727046609711258e-05, "loss": 2.8691, "step": 5422 }, { "epoch": 0.07, "grad_norm": 7.791032791137695, "learning_rate": 1.1729209473342706e-05, "loss": 3.031, "step": 5423 }, { "epoch": 0.07, "grad_norm": 9.07859992980957, "learning_rate": 1.1731372336974154e-05, "loss": 3.1517, "step": 5424 }, { "epoch": 0.07, "grad_norm": 9.05461311340332, "learning_rate": 1.1733535200605604e-05, "loss": 2.6535, "step": 5425 }, { "epoch": 0.07, "grad_norm": 7.237456798553467, "learning_rate": 1.173569806423705e-05, "loss": 2.6056, "step": 5426 }, { "epoch": 0.07, "grad_norm": 8.291662216186523, "learning_rate": 1.1737860927868499e-05, "loss": 2.9155, "step": 5427 }, { "epoch": 0.07, "grad_norm": 8.886640548706055, "learning_rate": 1.1740023791499947e-05, "loss": 2.5649, "step": 5428 }, { "epoch": 0.07, "grad_norm": 7.189305305480957, "learning_rate": 1.1742186655131396e-05, "loss": 2.9639, "step": 5429 }, { "epoch": 0.07, "grad_norm": 9.348669052124023, "learning_rate": 1.1744349518762843e-05, "loss": 2.714, "step": 5430 }, { "epoch": 0.07, "grad_norm": 8.351139068603516, "learning_rate": 1.174651238239429e-05, "loss": 2.4536, "step": 5431 }, { "epoch": 0.07, "grad_norm": 8.356954574584961, "learning_rate": 1.1748675246025739e-05, "loss": 3.4517, "step": 5432 }, { "epoch": 0.07, "grad_norm": 8.869531631469727, "learning_rate": 1.1750838109657189e-05, "loss": 2.8146, "step": 5433 }, { "epoch": 0.07, "grad_norm": 7.636457443237305, "learning_rate": 1.1753000973288635e-05, "loss": 2.733, "step": 5434 }, { "epoch": 0.07, "grad_norm": 8.632857322692871, "learning_rate": 1.1755163836920083e-05, "loss": 3.0559, "step": 5435 }, { "epoch": 0.07, "grad_norm": 7.549345970153809, "learning_rate": 1.175732670055153e-05, "loss": 2.3085, "step": 5436 }, { "epoch": 0.07, "grad_norm": 8.151795387268066, "learning_rate": 1.1759489564182979e-05, "loss": 2.6914, "step": 5437 }, { "epoch": 0.07, "grad_norm": 7.979367733001709, "learning_rate": 1.1761652427814427e-05, "loss": 2.7862, "step": 5438 }, { "epoch": 0.07, "grad_norm": 7.142310619354248, "learning_rate": 1.1763815291445875e-05, "loss": 2.4242, "step": 5439 }, { "epoch": 0.07, "grad_norm": 8.405745506286621, "learning_rate": 1.1765978155077325e-05, "loss": 2.7266, "step": 5440 }, { "epoch": 0.07, "grad_norm": 8.496750831604004, "learning_rate": 1.1768141018708771e-05, "loss": 2.776, "step": 5441 }, { "epoch": 0.07, "grad_norm": 8.188544273376465, "learning_rate": 1.177030388234022e-05, "loss": 2.8498, "step": 5442 }, { "epoch": 0.07, "grad_norm": 8.345514297485352, "learning_rate": 1.1772466745971667e-05, "loss": 3.1308, "step": 5443 }, { "epoch": 0.07, "grad_norm": 7.786322116851807, "learning_rate": 1.1774629609603117e-05, "loss": 2.6352, "step": 5444 }, { "epoch": 0.07, "grad_norm": 7.758318901062012, "learning_rate": 1.1776792473234563e-05, "loss": 2.3733, "step": 5445 }, { "epoch": 0.07, "grad_norm": 9.078805923461914, "learning_rate": 1.1778955336866012e-05, "loss": 2.83, "step": 5446 }, { "epoch": 0.07, "grad_norm": 8.09577465057373, "learning_rate": 1.178111820049746e-05, "loss": 3.0108, "step": 5447 }, { "epoch": 0.07, "grad_norm": 8.15850830078125, "learning_rate": 1.178328106412891e-05, "loss": 3.0958, "step": 5448 }, { "epoch": 0.07, "grad_norm": 7.9514265060424805, "learning_rate": 1.1785443927760356e-05, "loss": 2.6294, "step": 5449 }, { "epoch": 0.07, "grad_norm": 8.42742919921875, "learning_rate": 1.1787606791391804e-05, "loss": 2.6654, "step": 5450 }, { "epoch": 0.07, "grad_norm": 8.032392501831055, "learning_rate": 1.1789769655023252e-05, "loss": 2.419, "step": 5451 }, { "epoch": 0.07, "grad_norm": 8.60965633392334, "learning_rate": 1.17919325186547e-05, "loss": 3.3514, "step": 5452 }, { "epoch": 0.07, "grad_norm": 9.423176765441895, "learning_rate": 1.1794095382286148e-05, "loss": 3.2234, "step": 5453 }, { "epoch": 0.07, "grad_norm": 8.532468795776367, "learning_rate": 1.1796258245917596e-05, "loss": 3.0485, "step": 5454 }, { "epoch": 0.07, "grad_norm": 9.075608253479004, "learning_rate": 1.1798421109549042e-05, "loss": 2.8578, "step": 5455 }, { "epoch": 0.07, "grad_norm": 9.025737762451172, "learning_rate": 1.1800583973180492e-05, "loss": 3.263, "step": 5456 }, { "epoch": 0.07, "grad_norm": 9.471341133117676, "learning_rate": 1.180274683681194e-05, "loss": 3.1535, "step": 5457 }, { "epoch": 0.07, "grad_norm": 8.382224082946777, "learning_rate": 1.1804909700443388e-05, "loss": 2.3175, "step": 5458 }, { "epoch": 0.07, "grad_norm": 8.329473495483398, "learning_rate": 1.1807072564074835e-05, "loss": 2.7461, "step": 5459 }, { "epoch": 0.07, "grad_norm": 7.49725341796875, "learning_rate": 1.1809235427706284e-05, "loss": 2.9558, "step": 5460 }, { "epoch": 0.07, "grad_norm": 8.68337631225586, "learning_rate": 1.1811398291337732e-05, "loss": 3.1452, "step": 5461 }, { "epoch": 0.07, "grad_norm": 9.574441909790039, "learning_rate": 1.181356115496918e-05, "loss": 3.4179, "step": 5462 }, { "epoch": 0.07, "grad_norm": 9.510294914245605, "learning_rate": 1.1815724018600627e-05, "loss": 3.1689, "step": 5463 }, { "epoch": 0.07, "grad_norm": 8.076147079467773, "learning_rate": 1.1817886882232076e-05, "loss": 2.9349, "step": 5464 }, { "epoch": 0.07, "grad_norm": 8.244941711425781, "learning_rate": 1.1820049745863525e-05, "loss": 3.0171, "step": 5465 }, { "epoch": 0.07, "grad_norm": 8.35450553894043, "learning_rate": 1.1822212609494973e-05, "loss": 2.9496, "step": 5466 }, { "epoch": 0.07, "grad_norm": 9.1138334274292, "learning_rate": 1.1824375473126419e-05, "loss": 3.1524, "step": 5467 }, { "epoch": 0.07, "grad_norm": 9.161320686340332, "learning_rate": 1.1826538336757869e-05, "loss": 3.1034, "step": 5468 }, { "epoch": 0.07, "grad_norm": 8.795257568359375, "learning_rate": 1.1828701200389317e-05, "loss": 2.9084, "step": 5469 }, { "epoch": 0.07, "grad_norm": 7.902888774871826, "learning_rate": 1.1830864064020765e-05, "loss": 3.0329, "step": 5470 }, { "epoch": 0.07, "grad_norm": 7.796362400054932, "learning_rate": 1.1833026927652211e-05, "loss": 2.6777, "step": 5471 }, { "epoch": 0.07, "grad_norm": 7.355490684509277, "learning_rate": 1.1835189791283661e-05, "loss": 2.82, "step": 5472 }, { "epoch": 0.07, "grad_norm": 7.840528964996338, "learning_rate": 1.1837352654915109e-05, "loss": 2.6553, "step": 5473 }, { "epoch": 0.07, "grad_norm": 7.698709011077881, "learning_rate": 1.1839515518546555e-05, "loss": 3.068, "step": 5474 }, { "epoch": 0.07, "grad_norm": 8.535446166992188, "learning_rate": 1.1841678382178005e-05, "loss": 3.0543, "step": 5475 }, { "epoch": 0.07, "grad_norm": 7.1037068367004395, "learning_rate": 1.1843841245809453e-05, "loss": 2.612, "step": 5476 }, { "epoch": 0.07, "grad_norm": 7.398043155670166, "learning_rate": 1.1846004109440901e-05, "loss": 2.6427, "step": 5477 }, { "epoch": 0.07, "grad_norm": 7.005255222320557, "learning_rate": 1.1848166973072347e-05, "loss": 2.6107, "step": 5478 }, { "epoch": 0.07, "grad_norm": 7.578644752502441, "learning_rate": 1.1850329836703797e-05, "loss": 2.7629, "step": 5479 }, { "epoch": 0.07, "grad_norm": 8.22329330444336, "learning_rate": 1.1852492700335245e-05, "loss": 2.4969, "step": 5480 }, { "epoch": 0.07, "grad_norm": 8.654696464538574, "learning_rate": 1.1854655563966693e-05, "loss": 2.6842, "step": 5481 }, { "epoch": 0.07, "grad_norm": 7.858907222747803, "learning_rate": 1.185681842759814e-05, "loss": 2.6611, "step": 5482 }, { "epoch": 0.07, "grad_norm": 9.317474365234375, "learning_rate": 1.185898129122959e-05, "loss": 2.7205, "step": 5483 }, { "epoch": 0.07, "grad_norm": 8.440983772277832, "learning_rate": 1.1861144154861037e-05, "loss": 2.8239, "step": 5484 }, { "epoch": 0.07, "grad_norm": 9.227291107177734, "learning_rate": 1.1863307018492486e-05, "loss": 2.5388, "step": 5485 }, { "epoch": 0.07, "grad_norm": 7.328279972076416, "learning_rate": 1.1865469882123932e-05, "loss": 2.4374, "step": 5486 }, { "epoch": 0.07, "grad_norm": 8.567757606506348, "learning_rate": 1.1867632745755382e-05, "loss": 3.4153, "step": 5487 }, { "epoch": 0.07, "grad_norm": 8.456048011779785, "learning_rate": 1.186979560938683e-05, "loss": 2.6941, "step": 5488 }, { "epoch": 0.07, "grad_norm": 7.9922776222229, "learning_rate": 1.1871958473018276e-05, "loss": 3.0717, "step": 5489 }, { "epoch": 0.07, "grad_norm": 7.860463619232178, "learning_rate": 1.1874121336649724e-05, "loss": 2.32, "step": 5490 }, { "epoch": 0.07, "grad_norm": 7.394965171813965, "learning_rate": 1.1876284200281174e-05, "loss": 2.8739, "step": 5491 }, { "epoch": 0.07, "grad_norm": 7.564680099487305, "learning_rate": 1.1878447063912622e-05, "loss": 2.5756, "step": 5492 }, { "epoch": 0.07, "grad_norm": 7.643363952636719, "learning_rate": 1.1880609927544068e-05, "loss": 2.7958, "step": 5493 }, { "epoch": 0.07, "grad_norm": 8.543609619140625, "learning_rate": 1.1882772791175516e-05, "loss": 2.4736, "step": 5494 }, { "epoch": 0.07, "grad_norm": 7.501171112060547, "learning_rate": 1.1884935654806966e-05, "loss": 2.8481, "step": 5495 }, { "epoch": 0.07, "grad_norm": 8.700876235961914, "learning_rate": 1.1887098518438414e-05, "loss": 3.3206, "step": 5496 }, { "epoch": 0.07, "grad_norm": 9.192660331726074, "learning_rate": 1.188926138206986e-05, "loss": 2.6948, "step": 5497 }, { "epoch": 0.07, "grad_norm": 10.007209777832031, "learning_rate": 1.1891424245701308e-05, "loss": 3.7868, "step": 5498 }, { "epoch": 0.07, "grad_norm": 8.045419692993164, "learning_rate": 1.1893587109332758e-05, "loss": 2.6863, "step": 5499 }, { "epoch": 0.07, "grad_norm": 8.203307151794434, "learning_rate": 1.1895749972964206e-05, "loss": 2.7764, "step": 5500 }, { "epoch": 0.07, "grad_norm": 7.429471492767334, "learning_rate": 1.1897912836595653e-05, "loss": 2.4093, "step": 5501 }, { "epoch": 0.07, "grad_norm": 8.57848072052002, "learning_rate": 1.19000757002271e-05, "loss": 3.3645, "step": 5502 }, { "epoch": 0.07, "grad_norm": 8.955368041992188, "learning_rate": 1.190223856385855e-05, "loss": 3.3151, "step": 5503 }, { "epoch": 0.07, "grad_norm": 9.48219108581543, "learning_rate": 1.1904401427489998e-05, "loss": 2.6351, "step": 5504 }, { "epoch": 0.07, "grad_norm": 7.800426483154297, "learning_rate": 1.1906564291121445e-05, "loss": 2.8176, "step": 5505 }, { "epoch": 0.07, "grad_norm": 8.291927337646484, "learning_rate": 1.1908727154752895e-05, "loss": 2.9023, "step": 5506 }, { "epoch": 0.07, "grad_norm": 7.620169639587402, "learning_rate": 1.1910890018384343e-05, "loss": 2.3484, "step": 5507 }, { "epoch": 0.07, "grad_norm": 8.703131675720215, "learning_rate": 1.1913052882015789e-05, "loss": 3.5492, "step": 5508 }, { "epoch": 0.07, "grad_norm": 8.678372383117676, "learning_rate": 1.1915215745647237e-05, "loss": 3.2933, "step": 5509 }, { "epoch": 0.07, "grad_norm": 8.170695304870605, "learning_rate": 1.1917378609278687e-05, "loss": 2.2947, "step": 5510 }, { "epoch": 0.07, "grad_norm": 8.794312477111816, "learning_rate": 1.1919541472910135e-05, "loss": 3.3358, "step": 5511 }, { "epoch": 0.07, "grad_norm": 8.209476470947266, "learning_rate": 1.1921704336541581e-05, "loss": 2.4205, "step": 5512 }, { "epoch": 0.07, "grad_norm": 8.491292953491211, "learning_rate": 1.192386720017303e-05, "loss": 2.8777, "step": 5513 }, { "epoch": 0.07, "grad_norm": 9.06899642944336, "learning_rate": 1.1926030063804479e-05, "loss": 2.7424, "step": 5514 }, { "epoch": 0.07, "grad_norm": 8.085906028747559, "learning_rate": 1.1928192927435927e-05, "loss": 2.5415, "step": 5515 }, { "epoch": 0.07, "grad_norm": 9.097418785095215, "learning_rate": 1.1930355791067373e-05, "loss": 2.6955, "step": 5516 }, { "epoch": 0.07, "grad_norm": 7.939058303833008, "learning_rate": 1.1932518654698821e-05, "loss": 2.9568, "step": 5517 }, { "epoch": 0.07, "grad_norm": 8.080018043518066, "learning_rate": 1.1934681518330271e-05, "loss": 2.9628, "step": 5518 }, { "epoch": 0.07, "grad_norm": 9.204824447631836, "learning_rate": 1.193684438196172e-05, "loss": 2.9927, "step": 5519 }, { "epoch": 0.07, "grad_norm": 7.513237476348877, "learning_rate": 1.1939007245593166e-05, "loss": 2.9003, "step": 5520 }, { "epoch": 0.07, "grad_norm": 8.146283149719238, "learning_rate": 1.1941170109224614e-05, "loss": 2.9379, "step": 5521 }, { "epoch": 0.07, "grad_norm": 8.736393928527832, "learning_rate": 1.1943332972856063e-05, "loss": 3.2998, "step": 5522 }, { "epoch": 0.07, "grad_norm": 8.046380996704102, "learning_rate": 1.1945495836487511e-05, "loss": 2.769, "step": 5523 }, { "epoch": 0.07, "grad_norm": 8.255708694458008, "learning_rate": 1.1947658700118958e-05, "loss": 2.6578, "step": 5524 }, { "epoch": 0.07, "grad_norm": 7.15173864364624, "learning_rate": 1.1949821563750406e-05, "loss": 2.2421, "step": 5525 }, { "epoch": 0.07, "grad_norm": 8.073967933654785, "learning_rate": 1.1951984427381856e-05, "loss": 2.9672, "step": 5526 }, { "epoch": 0.07, "grad_norm": 7.5592498779296875, "learning_rate": 1.1954147291013302e-05, "loss": 2.6435, "step": 5527 }, { "epoch": 0.07, "grad_norm": 8.279298782348633, "learning_rate": 1.195631015464475e-05, "loss": 2.6006, "step": 5528 }, { "epoch": 0.07, "grad_norm": 8.457830429077148, "learning_rate": 1.1958473018276198e-05, "loss": 2.8522, "step": 5529 }, { "epoch": 0.07, "grad_norm": 8.409460067749023, "learning_rate": 1.1960635881907648e-05, "loss": 2.9086, "step": 5530 }, { "epoch": 0.07, "grad_norm": 8.250473976135254, "learning_rate": 1.1962798745539094e-05, "loss": 2.7791, "step": 5531 }, { "epoch": 0.07, "grad_norm": 8.188429832458496, "learning_rate": 1.1964961609170542e-05, "loss": 2.9356, "step": 5532 }, { "epoch": 0.07, "grad_norm": 8.713122367858887, "learning_rate": 1.196712447280199e-05, "loss": 2.9426, "step": 5533 }, { "epoch": 0.07, "grad_norm": 8.023115158081055, "learning_rate": 1.196928733643344e-05, "loss": 2.8587, "step": 5534 }, { "epoch": 0.07, "grad_norm": 8.259176254272461, "learning_rate": 1.1971450200064886e-05, "loss": 2.926, "step": 5535 }, { "epoch": 0.07, "grad_norm": 8.650033950805664, "learning_rate": 1.1973613063696334e-05, "loss": 3.1067, "step": 5536 }, { "epoch": 0.07, "grad_norm": 8.375105857849121, "learning_rate": 1.1975775927327784e-05, "loss": 3.1396, "step": 5537 }, { "epoch": 0.07, "grad_norm": 8.644001007080078, "learning_rate": 1.1977938790959232e-05, "loss": 2.9938, "step": 5538 }, { "epoch": 0.07, "grad_norm": 8.203137397766113, "learning_rate": 1.1980101654590679e-05, "loss": 2.6299, "step": 5539 }, { "epoch": 0.07, "grad_norm": 7.252408504486084, "learning_rate": 1.1982264518222127e-05, "loss": 2.6911, "step": 5540 }, { "epoch": 0.07, "grad_norm": 7.875787734985352, "learning_rate": 1.1984427381853576e-05, "loss": 2.5665, "step": 5541 }, { "epoch": 0.07, "grad_norm": 8.30858039855957, "learning_rate": 1.1986590245485024e-05, "loss": 2.9304, "step": 5542 }, { "epoch": 0.07, "grad_norm": 7.945651054382324, "learning_rate": 1.198875310911647e-05, "loss": 2.4931, "step": 5543 }, { "epoch": 0.07, "grad_norm": 7.6672186851501465, "learning_rate": 1.1990915972747919e-05, "loss": 2.5462, "step": 5544 }, { "epoch": 0.07, "grad_norm": 8.09742546081543, "learning_rate": 1.1993078836379369e-05, "loss": 2.5737, "step": 5545 }, { "epoch": 0.07, "grad_norm": 7.368836402893066, "learning_rate": 1.1995241700010815e-05, "loss": 2.2612, "step": 5546 }, { "epoch": 0.07, "grad_norm": 7.533986568450928, "learning_rate": 1.1997404563642263e-05, "loss": 2.5554, "step": 5547 }, { "epoch": 0.07, "grad_norm": 8.631503105163574, "learning_rate": 1.1999567427273711e-05, "loss": 2.6255, "step": 5548 }, { "epoch": 0.07, "grad_norm": 9.057275772094727, "learning_rate": 1.200173029090516e-05, "loss": 2.9115, "step": 5549 }, { "epoch": 0.07, "grad_norm": 8.265349388122559, "learning_rate": 1.2003893154536607e-05, "loss": 3.0273, "step": 5550 }, { "epoch": 0.07, "grad_norm": 8.859326362609863, "learning_rate": 1.2006056018168055e-05, "loss": 2.5048, "step": 5551 }, { "epoch": 0.07, "grad_norm": 9.057753562927246, "learning_rate": 1.2008218881799503e-05, "loss": 2.9704, "step": 5552 }, { "epoch": 0.07, "grad_norm": 7.25274658203125, "learning_rate": 1.2010381745430953e-05, "loss": 2.418, "step": 5553 }, { "epoch": 0.07, "grad_norm": 8.626285552978516, "learning_rate": 1.20125446090624e-05, "loss": 3.0896, "step": 5554 }, { "epoch": 0.07, "grad_norm": 7.792593955993652, "learning_rate": 1.2014707472693847e-05, "loss": 2.5814, "step": 5555 }, { "epoch": 0.07, "grad_norm": 9.068994522094727, "learning_rate": 1.2016870336325295e-05, "loss": 2.8656, "step": 5556 }, { "epoch": 0.07, "grad_norm": 7.387673377990723, "learning_rate": 1.2019033199956745e-05, "loss": 2.7236, "step": 5557 }, { "epoch": 0.07, "grad_norm": 8.60170841217041, "learning_rate": 1.2021196063588192e-05, "loss": 3.0702, "step": 5558 }, { "epoch": 0.07, "grad_norm": 8.589122772216797, "learning_rate": 1.202335892721964e-05, "loss": 3.3705, "step": 5559 }, { "epoch": 0.07, "grad_norm": 7.164295196533203, "learning_rate": 1.2025521790851088e-05, "loss": 2.5023, "step": 5560 }, { "epoch": 0.07, "grad_norm": 8.18713665008545, "learning_rate": 1.2027684654482536e-05, "loss": 2.9668, "step": 5561 }, { "epoch": 0.07, "grad_norm": 7.627579689025879, "learning_rate": 1.2029847518113984e-05, "loss": 2.8594, "step": 5562 }, { "epoch": 0.07, "grad_norm": 8.461861610412598, "learning_rate": 1.2032010381745432e-05, "loss": 2.8298, "step": 5563 }, { "epoch": 0.07, "grad_norm": 11.023558616638184, "learning_rate": 1.2034173245376878e-05, "loss": 3.2564, "step": 5564 }, { "epoch": 0.07, "grad_norm": 7.7000298500061035, "learning_rate": 1.2036336109008328e-05, "loss": 2.2498, "step": 5565 }, { "epoch": 0.07, "grad_norm": 8.65816593170166, "learning_rate": 1.2038498972639776e-05, "loss": 2.9568, "step": 5566 }, { "epoch": 0.07, "grad_norm": 7.281044960021973, "learning_rate": 1.2040661836271224e-05, "loss": 2.5899, "step": 5567 }, { "epoch": 0.07, "grad_norm": 8.406920433044434, "learning_rate": 1.2042824699902674e-05, "loss": 2.4808, "step": 5568 }, { "epoch": 0.07, "grad_norm": 7.782124042510986, "learning_rate": 1.204498756353412e-05, "loss": 2.6943, "step": 5569 }, { "epoch": 0.07, "grad_norm": 8.138440132141113, "learning_rate": 1.2047150427165568e-05, "loss": 2.4811, "step": 5570 }, { "epoch": 0.07, "grad_norm": 7.805521488189697, "learning_rate": 1.2049313290797016e-05, "loss": 3.1519, "step": 5571 }, { "epoch": 0.07, "grad_norm": 7.967971324920654, "learning_rate": 1.2051476154428466e-05, "loss": 2.9234, "step": 5572 }, { "epoch": 0.07, "grad_norm": 7.89543342590332, "learning_rate": 1.2053639018059912e-05, "loss": 2.5965, "step": 5573 }, { "epoch": 0.07, "grad_norm": 7.9293999671936035, "learning_rate": 1.205580188169136e-05, "loss": 2.4039, "step": 5574 }, { "epoch": 0.07, "grad_norm": 7.794745922088623, "learning_rate": 1.2057964745322808e-05, "loss": 2.6055, "step": 5575 }, { "epoch": 0.07, "grad_norm": 7.8018927574157715, "learning_rate": 1.2060127608954258e-05, "loss": 2.7331, "step": 5576 }, { "epoch": 0.07, "grad_norm": 8.299333572387695, "learning_rate": 1.2062290472585705e-05, "loss": 2.999, "step": 5577 }, { "epoch": 0.07, "grad_norm": 7.565082550048828, "learning_rate": 1.2064453336217153e-05, "loss": 2.4163, "step": 5578 }, { "epoch": 0.07, "grad_norm": 8.248527526855469, "learning_rate": 1.20666161998486e-05, "loss": 2.8082, "step": 5579 }, { "epoch": 0.07, "grad_norm": 9.367162704467773, "learning_rate": 1.2068779063480049e-05, "loss": 2.8891, "step": 5580 }, { "epoch": 0.07, "grad_norm": 7.6403279304504395, "learning_rate": 1.2070941927111497e-05, "loss": 2.4538, "step": 5581 }, { "epoch": 0.07, "grad_norm": 7.323150157928467, "learning_rate": 1.2073104790742945e-05, "loss": 2.564, "step": 5582 }, { "epoch": 0.07, "grad_norm": 8.619000434875488, "learning_rate": 1.2075267654374391e-05, "loss": 2.8091, "step": 5583 }, { "epoch": 0.07, "grad_norm": 8.444633483886719, "learning_rate": 1.2077430518005841e-05, "loss": 3.3699, "step": 5584 }, { "epoch": 0.07, "grad_norm": 8.291654586791992, "learning_rate": 1.2079593381637289e-05, "loss": 3.0733, "step": 5585 }, { "epoch": 0.07, "grad_norm": 8.101505279541016, "learning_rate": 1.2081756245268737e-05, "loss": 2.2994, "step": 5586 }, { "epoch": 0.07, "grad_norm": 7.716263294219971, "learning_rate": 1.2083919108900183e-05, "loss": 2.674, "step": 5587 }, { "epoch": 0.07, "grad_norm": 7.516982555389404, "learning_rate": 1.2086081972531633e-05, "loss": 2.5797, "step": 5588 }, { "epoch": 0.07, "grad_norm": 7.966201305389404, "learning_rate": 1.2088244836163081e-05, "loss": 3.1909, "step": 5589 }, { "epoch": 0.07, "grad_norm": 7.13399600982666, "learning_rate": 1.209040769979453e-05, "loss": 2.4815, "step": 5590 }, { "epoch": 0.07, "grad_norm": 7.491964817047119, "learning_rate": 1.2092570563425976e-05, "loss": 2.9828, "step": 5591 }, { "epoch": 0.07, "grad_norm": 6.752058982849121, "learning_rate": 1.2094733427057425e-05, "loss": 2.2461, "step": 5592 }, { "epoch": 0.07, "grad_norm": 8.752654075622559, "learning_rate": 1.2096896290688873e-05, "loss": 2.665, "step": 5593 }, { "epoch": 0.07, "grad_norm": 7.639777660369873, "learning_rate": 1.2099059154320321e-05, "loss": 2.506, "step": 5594 }, { "epoch": 0.07, "grad_norm": 8.554098129272461, "learning_rate": 1.2101222017951768e-05, "loss": 3.0052, "step": 5595 }, { "epoch": 0.07, "grad_norm": 7.889682769775391, "learning_rate": 1.2103384881583217e-05, "loss": 2.9748, "step": 5596 }, { "epoch": 0.07, "grad_norm": 8.346917152404785, "learning_rate": 1.2105547745214666e-05, "loss": 2.8664, "step": 5597 }, { "epoch": 0.07, "grad_norm": 7.529214382171631, "learning_rate": 1.2107710608846112e-05, "loss": 2.4694, "step": 5598 }, { "epoch": 0.07, "grad_norm": 8.146590232849121, "learning_rate": 1.210987347247756e-05, "loss": 3.171, "step": 5599 }, { "epoch": 0.07, "grad_norm": 9.162577629089355, "learning_rate": 1.211203633610901e-05, "loss": 2.7228, "step": 5600 }, { "epoch": 0.07, "grad_norm": 9.895574569702148, "learning_rate": 1.2114199199740458e-05, "loss": 3.235, "step": 5601 }, { "epoch": 0.07, "grad_norm": 8.331809997558594, "learning_rate": 1.2116362063371904e-05, "loss": 2.253, "step": 5602 }, { "epoch": 0.07, "grad_norm": 7.872253894805908, "learning_rate": 1.2118524927003354e-05, "loss": 2.1419, "step": 5603 }, { "epoch": 0.07, "grad_norm": 9.234092712402344, "learning_rate": 1.2120687790634802e-05, "loss": 2.3693, "step": 5604 }, { "epoch": 0.07, "grad_norm": 7.639830589294434, "learning_rate": 1.212285065426625e-05, "loss": 2.2391, "step": 5605 }, { "epoch": 0.07, "grad_norm": 8.225454330444336, "learning_rate": 1.2125013517897696e-05, "loss": 2.5996, "step": 5606 }, { "epoch": 0.07, "grad_norm": 8.976215362548828, "learning_rate": 1.2127176381529146e-05, "loss": 3.1392, "step": 5607 }, { "epoch": 0.07, "grad_norm": 9.625588417053223, "learning_rate": 1.2129339245160594e-05, "loss": 3.2928, "step": 5608 }, { "epoch": 0.07, "grad_norm": 8.14757251739502, "learning_rate": 1.2131502108792042e-05, "loss": 3.24, "step": 5609 }, { "epoch": 0.07, "grad_norm": 9.0784330368042, "learning_rate": 1.2133664972423488e-05, "loss": 2.4602, "step": 5610 }, { "epoch": 0.07, "grad_norm": 8.972811698913574, "learning_rate": 1.2135827836054938e-05, "loss": 3.4482, "step": 5611 }, { "epoch": 0.07, "grad_norm": 10.470444679260254, "learning_rate": 1.2137990699686386e-05, "loss": 2.6498, "step": 5612 }, { "epoch": 0.07, "grad_norm": 8.83436393737793, "learning_rate": 1.2140153563317834e-05, "loss": 3.0375, "step": 5613 }, { "epoch": 0.07, "grad_norm": 8.767398834228516, "learning_rate": 1.214231642694928e-05, "loss": 2.945, "step": 5614 }, { "epoch": 0.07, "grad_norm": 8.664689064025879, "learning_rate": 1.214447929058073e-05, "loss": 2.5535, "step": 5615 }, { "epoch": 0.07, "grad_norm": 7.775311470031738, "learning_rate": 1.2146642154212178e-05, "loss": 2.8846, "step": 5616 }, { "epoch": 0.07, "grad_norm": 8.218559265136719, "learning_rate": 1.2148805017843625e-05, "loss": 2.9453, "step": 5617 }, { "epoch": 0.07, "grad_norm": 7.689115524291992, "learning_rate": 1.2150967881475073e-05, "loss": 2.6471, "step": 5618 }, { "epoch": 0.07, "grad_norm": 7.563770771026611, "learning_rate": 1.2153130745106523e-05, "loss": 2.5307, "step": 5619 }, { "epoch": 0.07, "grad_norm": 8.16766357421875, "learning_rate": 1.215529360873797e-05, "loss": 2.3395, "step": 5620 }, { "epoch": 0.07, "grad_norm": 8.444707870483398, "learning_rate": 1.2157456472369417e-05, "loss": 3.0417, "step": 5621 }, { "epoch": 0.07, "grad_norm": 8.554818153381348, "learning_rate": 1.2159619336000865e-05, "loss": 2.8826, "step": 5622 }, { "epoch": 0.07, "grad_norm": 8.748720169067383, "learning_rate": 1.2161782199632315e-05, "loss": 3.1572, "step": 5623 }, { "epoch": 0.07, "grad_norm": 8.322450637817383, "learning_rate": 1.2163945063263763e-05, "loss": 2.1044, "step": 5624 }, { "epoch": 0.07, "grad_norm": 7.653407573699951, "learning_rate": 1.216610792689521e-05, "loss": 2.8865, "step": 5625 }, { "epoch": 0.07, "grad_norm": 8.058882713317871, "learning_rate": 1.2168270790526657e-05, "loss": 2.8956, "step": 5626 }, { "epoch": 0.07, "grad_norm": 8.805272102355957, "learning_rate": 1.2170433654158107e-05, "loss": 3.0795, "step": 5627 }, { "epoch": 0.07, "grad_norm": 8.517499923706055, "learning_rate": 1.2172596517789555e-05, "loss": 2.7327, "step": 5628 }, { "epoch": 0.07, "grad_norm": 8.546218872070312, "learning_rate": 1.2174759381421001e-05, "loss": 2.912, "step": 5629 }, { "epoch": 0.07, "grad_norm": 8.37569808959961, "learning_rate": 1.217692224505245e-05, "loss": 2.4553, "step": 5630 }, { "epoch": 0.07, "grad_norm": 8.785398483276367, "learning_rate": 1.21790851086839e-05, "loss": 3.1303, "step": 5631 }, { "epoch": 0.07, "grad_norm": 7.864438056945801, "learning_rate": 1.2181247972315347e-05, "loss": 3.3239, "step": 5632 }, { "epoch": 0.07, "grad_norm": 6.71495246887207, "learning_rate": 1.2183410835946794e-05, "loss": 2.1475, "step": 5633 }, { "epoch": 0.07, "grad_norm": 7.350488185882568, "learning_rate": 1.2185573699578243e-05, "loss": 2.674, "step": 5634 }, { "epoch": 0.07, "grad_norm": 7.761080741882324, "learning_rate": 1.2187736563209691e-05, "loss": 3.0116, "step": 5635 }, { "epoch": 0.07, "grad_norm": 8.981829643249512, "learning_rate": 1.2189899426841138e-05, "loss": 2.9972, "step": 5636 }, { "epoch": 0.07, "grad_norm": 7.245491027832031, "learning_rate": 1.2192062290472586e-05, "loss": 2.5419, "step": 5637 }, { "epoch": 0.07, "grad_norm": 9.08201789855957, "learning_rate": 1.2194225154104036e-05, "loss": 3.188, "step": 5638 }, { "epoch": 0.07, "grad_norm": 8.036643028259277, "learning_rate": 1.2196388017735484e-05, "loss": 2.7572, "step": 5639 }, { "epoch": 0.07, "grad_norm": 8.390239715576172, "learning_rate": 1.219855088136693e-05, "loss": 2.9535, "step": 5640 }, { "epoch": 0.07, "grad_norm": 8.845603942871094, "learning_rate": 1.2200713744998378e-05, "loss": 3.2522, "step": 5641 }, { "epoch": 0.07, "grad_norm": 8.1898775100708, "learning_rate": 1.2202876608629828e-05, "loss": 2.9104, "step": 5642 }, { "epoch": 0.07, "grad_norm": 7.791342258453369, "learning_rate": 1.2205039472261276e-05, "loss": 2.9184, "step": 5643 }, { "epoch": 0.07, "grad_norm": 7.568426609039307, "learning_rate": 1.2207202335892722e-05, "loss": 2.9514, "step": 5644 }, { "epoch": 0.07, "grad_norm": 8.116168975830078, "learning_rate": 1.220936519952417e-05, "loss": 2.7842, "step": 5645 }, { "epoch": 0.07, "grad_norm": 7.996092796325684, "learning_rate": 1.221152806315562e-05, "loss": 2.366, "step": 5646 }, { "epoch": 0.07, "grad_norm": 7.633693695068359, "learning_rate": 1.2213690926787068e-05, "loss": 2.7709, "step": 5647 }, { "epoch": 0.07, "grad_norm": 7.5274786949157715, "learning_rate": 1.2215853790418514e-05, "loss": 2.9879, "step": 5648 }, { "epoch": 0.07, "grad_norm": 7.851378440856934, "learning_rate": 1.2218016654049962e-05, "loss": 2.8397, "step": 5649 }, { "epoch": 0.07, "grad_norm": 8.222062110900879, "learning_rate": 1.2220179517681412e-05, "loss": 2.7183, "step": 5650 }, { "epoch": 0.07, "grad_norm": 7.693682670593262, "learning_rate": 1.222234238131286e-05, "loss": 2.8733, "step": 5651 }, { "epoch": 0.07, "grad_norm": 7.97530460357666, "learning_rate": 1.2224505244944307e-05, "loss": 3.1277, "step": 5652 }, { "epoch": 0.07, "grad_norm": 8.597956657409668, "learning_rate": 1.2226668108575755e-05, "loss": 2.7972, "step": 5653 }, { "epoch": 0.07, "grad_norm": 9.310458183288574, "learning_rate": 1.2228830972207204e-05, "loss": 2.8695, "step": 5654 }, { "epoch": 0.07, "grad_norm": 7.653414726257324, "learning_rate": 1.223099383583865e-05, "loss": 2.4288, "step": 5655 }, { "epoch": 0.07, "grad_norm": 8.243370056152344, "learning_rate": 1.2233156699470099e-05, "loss": 2.8749, "step": 5656 }, { "epoch": 0.07, "grad_norm": 8.99742317199707, "learning_rate": 1.2235319563101547e-05, "loss": 2.5141, "step": 5657 }, { "epoch": 0.07, "grad_norm": 8.425960540771484, "learning_rate": 1.2237482426732997e-05, "loss": 3.1783, "step": 5658 }, { "epoch": 0.07, "grad_norm": 8.264469146728516, "learning_rate": 1.2239645290364443e-05, "loss": 3.0153, "step": 5659 }, { "epoch": 0.07, "grad_norm": 7.501834869384766, "learning_rate": 1.2241808153995891e-05, "loss": 2.8044, "step": 5660 }, { "epoch": 0.07, "grad_norm": 7.602917671203613, "learning_rate": 1.2243971017627339e-05, "loss": 2.6818, "step": 5661 }, { "epoch": 0.07, "grad_norm": 9.267083168029785, "learning_rate": 1.2246133881258789e-05, "loss": 2.6741, "step": 5662 }, { "epoch": 0.07, "grad_norm": 7.163387298583984, "learning_rate": 1.2248296744890235e-05, "loss": 2.5628, "step": 5663 }, { "epoch": 0.07, "grad_norm": 9.33126449584961, "learning_rate": 1.2250459608521683e-05, "loss": 3.1215, "step": 5664 }, { "epoch": 0.07, "grad_norm": 7.377986907958984, "learning_rate": 1.2252622472153133e-05, "loss": 2.5735, "step": 5665 }, { "epoch": 0.07, "grad_norm": 9.283984184265137, "learning_rate": 1.2254785335784581e-05, "loss": 2.9551, "step": 5666 }, { "epoch": 0.07, "grad_norm": 7.594018936157227, "learning_rate": 1.2256948199416027e-05, "loss": 2.6121, "step": 5667 }, { "epoch": 0.07, "grad_norm": 8.11726188659668, "learning_rate": 1.2259111063047475e-05, "loss": 2.6298, "step": 5668 }, { "epoch": 0.07, "grad_norm": 7.344338417053223, "learning_rate": 1.2261273926678925e-05, "loss": 2.9455, "step": 5669 }, { "epoch": 0.07, "grad_norm": 8.242141723632812, "learning_rate": 1.2263436790310372e-05, "loss": 3.0166, "step": 5670 }, { "epoch": 0.07, "grad_norm": 8.008731842041016, "learning_rate": 1.226559965394182e-05, "loss": 2.5557, "step": 5671 }, { "epoch": 0.07, "grad_norm": 7.676813125610352, "learning_rate": 1.2267762517573268e-05, "loss": 2.8585, "step": 5672 }, { "epoch": 0.07, "grad_norm": 7.748571395874023, "learning_rate": 1.2269925381204717e-05, "loss": 2.2759, "step": 5673 }, { "epoch": 0.07, "grad_norm": 8.453740119934082, "learning_rate": 1.2272088244836164e-05, "loss": 3.0171, "step": 5674 }, { "epoch": 0.07, "grad_norm": 8.158098220825195, "learning_rate": 1.2274251108467612e-05, "loss": 2.3781, "step": 5675 }, { "epoch": 0.07, "grad_norm": 7.401226997375488, "learning_rate": 1.227641397209906e-05, "loss": 2.7364, "step": 5676 }, { "epoch": 0.07, "grad_norm": 8.979769706726074, "learning_rate": 1.227857683573051e-05, "loss": 3.2873, "step": 5677 }, { "epoch": 0.07, "grad_norm": 8.142984390258789, "learning_rate": 1.2280739699361956e-05, "loss": 2.4822, "step": 5678 }, { "epoch": 0.07, "grad_norm": 8.057907104492188, "learning_rate": 1.2282902562993404e-05, "loss": 2.4572, "step": 5679 }, { "epoch": 0.07, "grad_norm": 8.679289817810059, "learning_rate": 1.2285065426624852e-05, "loss": 3.1134, "step": 5680 }, { "epoch": 0.07, "grad_norm": 7.36113166809082, "learning_rate": 1.2287228290256302e-05, "loss": 2.2241, "step": 5681 }, { "epoch": 0.07, "grad_norm": 8.295498847961426, "learning_rate": 1.2289391153887748e-05, "loss": 2.5284, "step": 5682 }, { "epoch": 0.07, "grad_norm": 7.968384265899658, "learning_rate": 1.2291554017519196e-05, "loss": 3.0233, "step": 5683 }, { "epoch": 0.07, "grad_norm": 9.086137771606445, "learning_rate": 1.2293716881150644e-05, "loss": 3.0361, "step": 5684 }, { "epoch": 0.07, "grad_norm": 7.935566425323486, "learning_rate": 1.2295879744782094e-05, "loss": 2.9282, "step": 5685 }, { "epoch": 0.07, "grad_norm": 8.097909927368164, "learning_rate": 1.229804260841354e-05, "loss": 2.461, "step": 5686 }, { "epoch": 0.07, "grad_norm": 7.708350658416748, "learning_rate": 1.2300205472044988e-05, "loss": 2.6448, "step": 5687 }, { "epoch": 0.07, "grad_norm": 7.804795742034912, "learning_rate": 1.2302368335676436e-05, "loss": 3.4496, "step": 5688 }, { "epoch": 0.07, "grad_norm": 8.950271606445312, "learning_rate": 1.2304531199307884e-05, "loss": 2.9318, "step": 5689 }, { "epoch": 0.07, "grad_norm": 8.200996398925781, "learning_rate": 1.2306694062939333e-05, "loss": 2.6998, "step": 5690 }, { "epoch": 0.07, "grad_norm": 7.7128400802612305, "learning_rate": 1.230885692657078e-05, "loss": 2.3712, "step": 5691 }, { "epoch": 0.07, "grad_norm": 7.228262424468994, "learning_rate": 1.2311019790202227e-05, "loss": 2.5657, "step": 5692 }, { "epoch": 0.07, "grad_norm": 7.885824203491211, "learning_rate": 1.2313182653833677e-05, "loss": 3.0469, "step": 5693 }, { "epoch": 0.07, "grad_norm": 7.932607173919678, "learning_rate": 1.2315345517465125e-05, "loss": 2.5252, "step": 5694 }, { "epoch": 0.07, "grad_norm": 7.991005897521973, "learning_rate": 1.2317508381096573e-05, "loss": 2.3864, "step": 5695 }, { "epoch": 0.07, "grad_norm": 7.928852558135986, "learning_rate": 1.2319671244728023e-05, "loss": 2.3906, "step": 5696 }, { "epoch": 0.07, "grad_norm": 8.688775062561035, "learning_rate": 1.2321834108359469e-05, "loss": 2.6842, "step": 5697 }, { "epoch": 0.07, "grad_norm": 8.47543716430664, "learning_rate": 1.2323996971990917e-05, "loss": 2.4625, "step": 5698 }, { "epoch": 0.07, "grad_norm": 7.91830587387085, "learning_rate": 1.2326159835622365e-05, "loss": 3.0865, "step": 5699 }, { "epoch": 0.07, "grad_norm": 7.241849422454834, "learning_rate": 1.2328322699253815e-05, "loss": 2.0467, "step": 5700 }, { "epoch": 0.07, "grad_norm": 8.71555233001709, "learning_rate": 1.2330485562885261e-05, "loss": 3.1664, "step": 5701 }, { "epoch": 0.07, "grad_norm": 8.184821128845215, "learning_rate": 1.233264842651671e-05, "loss": 2.9327, "step": 5702 }, { "epoch": 0.07, "grad_norm": 7.771961688995361, "learning_rate": 1.2334811290148157e-05, "loss": 2.4744, "step": 5703 }, { "epoch": 0.07, "grad_norm": 7.888708591461182, "learning_rate": 1.2336974153779607e-05, "loss": 2.56, "step": 5704 }, { "epoch": 0.07, "grad_norm": 8.45556926727295, "learning_rate": 1.2339137017411053e-05, "loss": 2.4541, "step": 5705 }, { "epoch": 0.07, "grad_norm": 8.135581016540527, "learning_rate": 1.2341299881042501e-05, "loss": 2.9868, "step": 5706 }, { "epoch": 0.07, "grad_norm": 8.78269100189209, "learning_rate": 1.2343462744673948e-05, "loss": 3.1408, "step": 5707 }, { "epoch": 0.07, "grad_norm": 8.350932121276855, "learning_rate": 1.2345625608305397e-05, "loss": 2.7752, "step": 5708 }, { "epoch": 0.07, "grad_norm": 7.886378765106201, "learning_rate": 1.2347788471936846e-05, "loss": 2.594, "step": 5709 }, { "epoch": 0.07, "grad_norm": 8.816340446472168, "learning_rate": 1.2349951335568294e-05, "loss": 3.1872, "step": 5710 }, { "epoch": 0.07, "grad_norm": 8.72311019897461, "learning_rate": 1.235211419919974e-05, "loss": 2.8651, "step": 5711 }, { "epoch": 0.07, "grad_norm": 7.861654281616211, "learning_rate": 1.235427706283119e-05, "loss": 3.3094, "step": 5712 }, { "epoch": 0.07, "grad_norm": 8.303102493286133, "learning_rate": 1.2356439926462638e-05, "loss": 3.3451, "step": 5713 }, { "epoch": 0.07, "grad_norm": 8.918048858642578, "learning_rate": 1.2358602790094086e-05, "loss": 2.585, "step": 5714 }, { "epoch": 0.07, "grad_norm": 8.63357162475586, "learning_rate": 1.2360765653725532e-05, "loss": 2.4994, "step": 5715 }, { "epoch": 0.07, "grad_norm": 8.152125358581543, "learning_rate": 1.2362928517356982e-05, "loss": 2.9287, "step": 5716 }, { "epoch": 0.07, "grad_norm": 8.09343147277832, "learning_rate": 1.236509138098843e-05, "loss": 2.2512, "step": 5717 }, { "epoch": 0.07, "grad_norm": 7.505654335021973, "learning_rate": 1.2367254244619878e-05, "loss": 2.6452, "step": 5718 }, { "epoch": 0.07, "grad_norm": 8.804122924804688, "learning_rate": 1.2369417108251324e-05, "loss": 2.9817, "step": 5719 }, { "epoch": 0.07, "grad_norm": 7.80452823638916, "learning_rate": 1.2371579971882774e-05, "loss": 2.852, "step": 5720 }, { "epoch": 0.07, "grad_norm": 7.683933734893799, "learning_rate": 1.2373742835514222e-05, "loss": 2.7393, "step": 5721 }, { "epoch": 0.07, "grad_norm": 7.022854804992676, "learning_rate": 1.237590569914567e-05, "loss": 2.5049, "step": 5722 }, { "epoch": 0.07, "grad_norm": 8.413122177124023, "learning_rate": 1.2378068562777117e-05, "loss": 3.0116, "step": 5723 }, { "epoch": 0.07, "grad_norm": 9.640890121459961, "learning_rate": 1.2380231426408566e-05, "loss": 3.05, "step": 5724 }, { "epoch": 0.07, "grad_norm": 8.45617961883545, "learning_rate": 1.2382394290040014e-05, "loss": 3.0472, "step": 5725 }, { "epoch": 0.07, "grad_norm": 7.591861724853516, "learning_rate": 1.238455715367146e-05, "loss": 2.4236, "step": 5726 }, { "epoch": 0.07, "grad_norm": 8.044381141662598, "learning_rate": 1.2386720017302909e-05, "loss": 3.1032, "step": 5727 }, { "epoch": 0.07, "grad_norm": 7.355445861816406, "learning_rate": 1.2388882880934358e-05, "loss": 2.355, "step": 5728 }, { "epoch": 0.07, "grad_norm": 8.812215805053711, "learning_rate": 1.2391045744565807e-05, "loss": 3.1271, "step": 5729 }, { "epoch": 0.07, "grad_norm": 7.4747090339660645, "learning_rate": 1.2393208608197253e-05, "loss": 2.8592, "step": 5730 }, { "epoch": 0.07, "grad_norm": 7.790393352508545, "learning_rate": 1.2395371471828703e-05, "loss": 2.5638, "step": 5731 }, { "epoch": 0.07, "grad_norm": 9.424076080322266, "learning_rate": 1.239753433546015e-05, "loss": 2.9118, "step": 5732 }, { "epoch": 0.07, "grad_norm": 8.37073802947998, "learning_rate": 1.2399697199091599e-05, "loss": 2.3844, "step": 5733 }, { "epoch": 0.07, "grad_norm": 7.789203643798828, "learning_rate": 1.2401860062723045e-05, "loss": 2.4763, "step": 5734 }, { "epoch": 0.07, "grad_norm": 9.070699691772461, "learning_rate": 1.2404022926354495e-05, "loss": 3.0472, "step": 5735 }, { "epoch": 0.07, "grad_norm": 7.436978340148926, "learning_rate": 1.2406185789985943e-05, "loss": 2.4445, "step": 5736 }, { "epoch": 0.07, "grad_norm": 8.161273002624512, "learning_rate": 1.2408348653617391e-05, "loss": 2.7058, "step": 5737 }, { "epoch": 0.07, "grad_norm": 8.130443572998047, "learning_rate": 1.2410511517248837e-05, "loss": 3.794, "step": 5738 }, { "epoch": 0.07, "grad_norm": 7.580056190490723, "learning_rate": 1.2412674380880287e-05, "loss": 3.2324, "step": 5739 }, { "epoch": 0.07, "grad_norm": 9.169727325439453, "learning_rate": 1.2414837244511735e-05, "loss": 3.549, "step": 5740 }, { "epoch": 0.07, "grad_norm": 7.572789669036865, "learning_rate": 1.2417000108143183e-05, "loss": 2.362, "step": 5741 }, { "epoch": 0.07, "grad_norm": 7.5583624839782715, "learning_rate": 1.241916297177463e-05, "loss": 2.5504, "step": 5742 }, { "epoch": 0.07, "grad_norm": 8.084802627563477, "learning_rate": 1.242132583540608e-05, "loss": 2.8513, "step": 5743 }, { "epoch": 0.07, "grad_norm": 7.878948211669922, "learning_rate": 1.2423488699037527e-05, "loss": 2.4352, "step": 5744 }, { "epoch": 0.07, "grad_norm": 8.164194107055664, "learning_rate": 1.2425651562668974e-05, "loss": 2.9028, "step": 5745 }, { "epoch": 0.07, "grad_norm": 7.943493366241455, "learning_rate": 1.2427814426300422e-05, "loss": 2.4661, "step": 5746 }, { "epoch": 0.07, "grad_norm": 9.290712356567383, "learning_rate": 1.2429977289931871e-05, "loss": 3.0813, "step": 5747 }, { "epoch": 0.07, "grad_norm": 7.016781330108643, "learning_rate": 1.243214015356332e-05, "loss": 3.0557, "step": 5748 }, { "epoch": 0.07, "grad_norm": 7.996005058288574, "learning_rate": 1.2434303017194766e-05, "loss": 2.243, "step": 5749 }, { "epoch": 0.07, "grad_norm": 7.316032409667969, "learning_rate": 1.2436465880826214e-05, "loss": 2.1044, "step": 5750 }, { "epoch": 0.07, "grad_norm": 7.193103790283203, "learning_rate": 1.2438628744457664e-05, "loss": 3.0247, "step": 5751 }, { "epoch": 0.07, "grad_norm": 7.715349197387695, "learning_rate": 1.2440791608089112e-05, "loss": 2.4584, "step": 5752 }, { "epoch": 0.07, "grad_norm": 7.325648307800293, "learning_rate": 1.2442954471720558e-05, "loss": 2.9263, "step": 5753 }, { "epoch": 0.07, "grad_norm": 7.407113552093506, "learning_rate": 1.2445117335352006e-05, "loss": 2.9026, "step": 5754 }, { "epoch": 0.07, "grad_norm": 7.366705417633057, "learning_rate": 1.2447280198983456e-05, "loss": 2.8222, "step": 5755 }, { "epoch": 0.07, "grad_norm": 7.415061950683594, "learning_rate": 1.2449443062614904e-05, "loss": 2.5808, "step": 5756 }, { "epoch": 0.07, "grad_norm": 7.501469135284424, "learning_rate": 1.245160592624635e-05, "loss": 3.0807, "step": 5757 }, { "epoch": 0.07, "grad_norm": 7.626863956451416, "learning_rate": 1.2453768789877798e-05, "loss": 2.8386, "step": 5758 }, { "epoch": 0.07, "grad_norm": 8.243416786193848, "learning_rate": 1.2455931653509248e-05, "loss": 2.9398, "step": 5759 }, { "epoch": 0.07, "grad_norm": 9.017353057861328, "learning_rate": 1.2458094517140696e-05, "loss": 2.5075, "step": 5760 }, { "epoch": 0.07, "grad_norm": 6.873365879058838, "learning_rate": 1.2460257380772142e-05, "loss": 2.4421, "step": 5761 }, { "epoch": 0.07, "grad_norm": 7.575709342956543, "learning_rate": 1.2462420244403592e-05, "loss": 2.9621, "step": 5762 }, { "epoch": 0.07, "grad_norm": 8.172303199768066, "learning_rate": 1.246458310803504e-05, "loss": 2.4812, "step": 5763 }, { "epoch": 0.07, "grad_norm": 8.697179794311523, "learning_rate": 1.2466745971666487e-05, "loss": 2.7905, "step": 5764 }, { "epoch": 0.07, "grad_norm": 7.839441776275635, "learning_rate": 1.2468908835297935e-05, "loss": 2.8383, "step": 5765 }, { "epoch": 0.07, "grad_norm": 7.567489147186279, "learning_rate": 1.2471071698929384e-05, "loss": 2.3246, "step": 5766 }, { "epoch": 0.07, "grad_norm": 7.96516227722168, "learning_rate": 1.2473234562560832e-05, "loss": 2.4748, "step": 5767 }, { "epoch": 0.07, "grad_norm": 9.290669441223145, "learning_rate": 1.2475397426192279e-05, "loss": 3.3183, "step": 5768 }, { "epoch": 0.07, "grad_norm": 8.127668380737305, "learning_rate": 1.2477560289823727e-05, "loss": 2.6927, "step": 5769 }, { "epoch": 0.07, "grad_norm": 7.208719730377197, "learning_rate": 1.2479723153455177e-05, "loss": 2.3084, "step": 5770 }, { "epoch": 0.07, "grad_norm": 8.470314025878906, "learning_rate": 1.2481886017086625e-05, "loss": 2.722, "step": 5771 }, { "epoch": 0.07, "grad_norm": 7.9759840965271, "learning_rate": 1.2484048880718071e-05, "loss": 2.824, "step": 5772 }, { "epoch": 0.07, "grad_norm": 7.538700103759766, "learning_rate": 1.2486211744349519e-05, "loss": 2.3705, "step": 5773 }, { "epoch": 0.07, "grad_norm": 8.934109687805176, "learning_rate": 1.2488374607980969e-05, "loss": 2.9422, "step": 5774 }, { "epoch": 0.07, "grad_norm": 7.565910339355469, "learning_rate": 1.2490537471612417e-05, "loss": 2.5925, "step": 5775 }, { "epoch": 0.07, "grad_norm": 8.291234970092773, "learning_rate": 1.2492700335243863e-05, "loss": 3.0741, "step": 5776 }, { "epoch": 0.07, "grad_norm": 8.477127075195312, "learning_rate": 1.2494863198875311e-05, "loss": 3.1804, "step": 5777 }, { "epoch": 0.07, "grad_norm": 8.094185829162598, "learning_rate": 1.2497026062506761e-05, "loss": 2.9327, "step": 5778 }, { "epoch": 0.07, "grad_norm": 8.166155815124512, "learning_rate": 1.2499188926138209e-05, "loss": 2.4297, "step": 5779 }, { "epoch": 0.08, "grad_norm": 7.704820156097412, "learning_rate": 1.2501351789769655e-05, "loss": 2.3355, "step": 5780 }, { "epoch": 0.08, "grad_norm": 7.859091758728027, "learning_rate": 1.2503514653401103e-05, "loss": 2.4304, "step": 5781 }, { "epoch": 0.08, "grad_norm": 7.758855819702148, "learning_rate": 1.2505677517032553e-05, "loss": 2.7877, "step": 5782 }, { "epoch": 0.08, "grad_norm": 7.75385046005249, "learning_rate": 1.2507840380664e-05, "loss": 3.0859, "step": 5783 }, { "epoch": 0.08, "grad_norm": 8.057587623596191, "learning_rate": 1.2510003244295448e-05, "loss": 2.5905, "step": 5784 }, { "epoch": 0.08, "grad_norm": 7.947498798370361, "learning_rate": 1.2512166107926896e-05, "loss": 2.898, "step": 5785 }, { "epoch": 0.08, "grad_norm": 7.73035192489624, "learning_rate": 1.2514328971558345e-05, "loss": 2.6281, "step": 5786 }, { "epoch": 0.08, "grad_norm": 9.164466857910156, "learning_rate": 1.2516491835189792e-05, "loss": 3.1767, "step": 5787 }, { "epoch": 0.08, "grad_norm": 7.208308696746826, "learning_rate": 1.251865469882124e-05, "loss": 2.521, "step": 5788 }, { "epoch": 0.08, "grad_norm": 8.878560066223145, "learning_rate": 1.2520817562452688e-05, "loss": 2.947, "step": 5789 }, { "epoch": 0.08, "grad_norm": 8.054259300231934, "learning_rate": 1.2522980426084138e-05, "loss": 2.8334, "step": 5790 }, { "epoch": 0.08, "grad_norm": 9.627079010009766, "learning_rate": 1.2525143289715584e-05, "loss": 3.4355, "step": 5791 }, { "epoch": 0.08, "grad_norm": 8.520729064941406, "learning_rate": 1.2527306153347032e-05, "loss": 2.8858, "step": 5792 }, { "epoch": 0.08, "grad_norm": 7.3629536628723145, "learning_rate": 1.2529469016978482e-05, "loss": 2.708, "step": 5793 }, { "epoch": 0.08, "grad_norm": 8.357357025146484, "learning_rate": 1.253163188060993e-05, "loss": 2.4173, "step": 5794 }, { "epoch": 0.08, "grad_norm": 8.09876537322998, "learning_rate": 1.2533794744241376e-05, "loss": 2.9967, "step": 5795 }, { "epoch": 0.08, "grad_norm": 7.592083930969238, "learning_rate": 1.2535957607872824e-05, "loss": 3.0352, "step": 5796 }, { "epoch": 0.08, "grad_norm": 7.638134479522705, "learning_rate": 1.2538120471504274e-05, "loss": 3.079, "step": 5797 }, { "epoch": 0.08, "grad_norm": 8.161836624145508, "learning_rate": 1.254028333513572e-05, "loss": 3.3199, "step": 5798 }, { "epoch": 0.08, "grad_norm": 8.75121784210205, "learning_rate": 1.2542446198767168e-05, "loss": 3.265, "step": 5799 }, { "epoch": 0.08, "grad_norm": 7.215656757354736, "learning_rate": 1.2544609062398616e-05, "loss": 2.8622, "step": 5800 }, { "epoch": 0.08, "grad_norm": 8.065718650817871, "learning_rate": 1.2546771926030066e-05, "loss": 2.7655, "step": 5801 }, { "epoch": 0.08, "grad_norm": 8.009624481201172, "learning_rate": 1.2548934789661513e-05, "loss": 2.7519, "step": 5802 }, { "epoch": 0.08, "grad_norm": 7.865118503570557, "learning_rate": 1.255109765329296e-05, "loss": 2.7547, "step": 5803 }, { "epoch": 0.08, "grad_norm": 8.676177024841309, "learning_rate": 1.2553260516924409e-05, "loss": 2.9747, "step": 5804 }, { "epoch": 0.08, "grad_norm": 10.273083686828613, "learning_rate": 1.2555423380555858e-05, "loss": 3.1526, "step": 5805 }, { "epoch": 0.08, "grad_norm": 7.296900272369385, "learning_rate": 1.2557586244187305e-05, "loss": 2.5594, "step": 5806 }, { "epoch": 0.08, "grad_norm": 8.330694198608398, "learning_rate": 1.2559749107818753e-05, "loss": 3.3653, "step": 5807 }, { "epoch": 0.08, "grad_norm": 8.105036735534668, "learning_rate": 1.25619119714502e-05, "loss": 3.1667, "step": 5808 }, { "epoch": 0.08, "grad_norm": 8.958114624023438, "learning_rate": 1.256407483508165e-05, "loss": 3.1082, "step": 5809 }, { "epoch": 0.08, "grad_norm": 8.430902481079102, "learning_rate": 1.2566237698713097e-05, "loss": 2.9017, "step": 5810 }, { "epoch": 0.08, "grad_norm": 7.9596757888793945, "learning_rate": 1.2568400562344545e-05, "loss": 2.5945, "step": 5811 }, { "epoch": 0.08, "grad_norm": 8.383243560791016, "learning_rate": 1.2570563425975993e-05, "loss": 2.6787, "step": 5812 }, { "epoch": 0.08, "grad_norm": 7.6850996017456055, "learning_rate": 1.2572726289607443e-05, "loss": 2.8089, "step": 5813 }, { "epoch": 0.08, "grad_norm": 7.881965160369873, "learning_rate": 1.2574889153238889e-05, "loss": 2.8186, "step": 5814 }, { "epoch": 0.08, "grad_norm": 7.494417190551758, "learning_rate": 1.2577052016870337e-05, "loss": 2.5545, "step": 5815 }, { "epoch": 0.08, "grad_norm": 7.4844865798950195, "learning_rate": 1.2579214880501785e-05, "loss": 2.6752, "step": 5816 }, { "epoch": 0.08, "grad_norm": 7.696999549865723, "learning_rate": 1.2581377744133233e-05, "loss": 2.3412, "step": 5817 }, { "epoch": 0.08, "grad_norm": 8.021787643432617, "learning_rate": 1.2583540607764681e-05, "loss": 3.6416, "step": 5818 }, { "epoch": 0.08, "grad_norm": 7.5893449783325195, "learning_rate": 1.258570347139613e-05, "loss": 2.8555, "step": 5819 }, { "epoch": 0.08, "grad_norm": 8.128195762634277, "learning_rate": 1.2587866335027576e-05, "loss": 2.7232, "step": 5820 }, { "epoch": 0.08, "grad_norm": 8.682287216186523, "learning_rate": 1.2590029198659025e-05, "loss": 2.958, "step": 5821 }, { "epoch": 0.08, "grad_norm": 8.039758682250977, "learning_rate": 1.2592192062290474e-05, "loss": 2.8223, "step": 5822 }, { "epoch": 0.08, "grad_norm": 8.277078628540039, "learning_rate": 1.2594354925921922e-05, "loss": 3.1374, "step": 5823 }, { "epoch": 0.08, "grad_norm": 8.100608825683594, "learning_rate": 1.2596517789553371e-05, "loss": 2.6439, "step": 5824 }, { "epoch": 0.08, "grad_norm": 7.936920642852783, "learning_rate": 1.2598680653184818e-05, "loss": 2.821, "step": 5825 }, { "epoch": 0.08, "grad_norm": 7.561108589172363, "learning_rate": 1.2600843516816266e-05, "loss": 2.4231, "step": 5826 }, { "epoch": 0.08, "grad_norm": 8.572461128234863, "learning_rate": 1.2603006380447714e-05, "loss": 2.7189, "step": 5827 }, { "epoch": 0.08, "grad_norm": 8.31884765625, "learning_rate": 1.2605169244079164e-05, "loss": 2.5538, "step": 5828 }, { "epoch": 0.08, "grad_norm": 8.290617942810059, "learning_rate": 1.260733210771061e-05, "loss": 2.9357, "step": 5829 }, { "epoch": 0.08, "grad_norm": 7.440587997436523, "learning_rate": 1.2609494971342058e-05, "loss": 2.8379, "step": 5830 }, { "epoch": 0.08, "grad_norm": 9.261775016784668, "learning_rate": 1.2611657834973506e-05, "loss": 3.3731, "step": 5831 }, { "epoch": 0.08, "grad_norm": 6.856369495391846, "learning_rate": 1.2613820698604956e-05, "loss": 2.3916, "step": 5832 }, { "epoch": 0.08, "grad_norm": 7.439445972442627, "learning_rate": 1.2615983562236402e-05, "loss": 2.5595, "step": 5833 }, { "epoch": 0.08, "grad_norm": 8.506420135498047, "learning_rate": 1.261814642586785e-05, "loss": 2.4602, "step": 5834 }, { "epoch": 0.08, "grad_norm": 7.887448310852051, "learning_rate": 1.2620309289499297e-05, "loss": 2.7524, "step": 5835 }, { "epoch": 0.08, "grad_norm": 7.639018535614014, "learning_rate": 1.2622472153130746e-05, "loss": 2.4776, "step": 5836 }, { "epoch": 0.08, "grad_norm": 8.500625610351562, "learning_rate": 1.2624635016762194e-05, "loss": 3.3988, "step": 5837 }, { "epoch": 0.08, "grad_norm": 7.293302059173584, "learning_rate": 1.2626797880393642e-05, "loss": 2.2459, "step": 5838 }, { "epoch": 0.08, "grad_norm": 7.896634578704834, "learning_rate": 1.2628960744025089e-05, "loss": 2.8593, "step": 5839 }, { "epoch": 0.08, "grad_norm": 7.289880752563477, "learning_rate": 1.2631123607656538e-05, "loss": 2.2203, "step": 5840 }, { "epoch": 0.08, "grad_norm": 7.404395580291748, "learning_rate": 1.2633286471287987e-05, "loss": 2.8406, "step": 5841 }, { "epoch": 0.08, "grad_norm": 7.422527313232422, "learning_rate": 1.2635449334919435e-05, "loss": 2.9466, "step": 5842 }, { "epoch": 0.08, "grad_norm": 9.002620697021484, "learning_rate": 1.2637612198550881e-05, "loss": 3.024, "step": 5843 }, { "epoch": 0.08, "grad_norm": 7.757911682128906, "learning_rate": 1.263977506218233e-05, "loss": 2.5263, "step": 5844 }, { "epoch": 0.08, "grad_norm": 7.6175007820129395, "learning_rate": 1.2641937925813779e-05, "loss": 2.3277, "step": 5845 }, { "epoch": 0.08, "grad_norm": 7.618476867675781, "learning_rate": 1.2644100789445227e-05, "loss": 2.5789, "step": 5846 }, { "epoch": 0.08, "grad_norm": 7.494588851928711, "learning_rate": 1.2646263653076673e-05, "loss": 2.6492, "step": 5847 }, { "epoch": 0.08, "grad_norm": 8.262112617492676, "learning_rate": 1.2648426516708123e-05, "loss": 2.8585, "step": 5848 }, { "epoch": 0.08, "grad_norm": 8.033742904663086, "learning_rate": 1.2650589380339571e-05, "loss": 3.1416, "step": 5849 }, { "epoch": 0.08, "grad_norm": 8.794160842895508, "learning_rate": 1.2652752243971019e-05, "loss": 2.6668, "step": 5850 }, { "epoch": 0.08, "grad_norm": 8.12597942352295, "learning_rate": 1.2654915107602465e-05, "loss": 3.2588, "step": 5851 }, { "epoch": 0.08, "grad_norm": 7.0494208335876465, "learning_rate": 1.2657077971233915e-05, "loss": 2.4527, "step": 5852 }, { "epoch": 0.08, "grad_norm": 7.87582540512085, "learning_rate": 1.2659240834865363e-05, "loss": 3.0096, "step": 5853 }, { "epoch": 0.08, "grad_norm": 8.062149047851562, "learning_rate": 1.266140369849681e-05, "loss": 3.0853, "step": 5854 }, { "epoch": 0.08, "grad_norm": 8.300086975097656, "learning_rate": 1.2663566562128258e-05, "loss": 3.0066, "step": 5855 }, { "epoch": 0.08, "grad_norm": 8.883452415466309, "learning_rate": 1.2665729425759707e-05, "loss": 3.0621, "step": 5856 }, { "epoch": 0.08, "grad_norm": 7.771047592163086, "learning_rate": 1.2667892289391155e-05, "loss": 3.2155, "step": 5857 }, { "epoch": 0.08, "grad_norm": 7.305081844329834, "learning_rate": 1.2670055153022602e-05, "loss": 2.9667, "step": 5858 }, { "epoch": 0.08, "grad_norm": 8.874873161315918, "learning_rate": 1.2672218016654051e-05, "loss": 2.9525, "step": 5859 }, { "epoch": 0.08, "grad_norm": 7.986697196960449, "learning_rate": 1.26743808802855e-05, "loss": 2.332, "step": 5860 }, { "epoch": 0.08, "grad_norm": 6.887545108795166, "learning_rate": 1.2676543743916948e-05, "loss": 2.5057, "step": 5861 }, { "epoch": 0.08, "grad_norm": 7.880643367767334, "learning_rate": 1.2678706607548394e-05, "loss": 2.6, "step": 5862 }, { "epoch": 0.08, "grad_norm": 8.24240493774414, "learning_rate": 1.2680869471179844e-05, "loss": 2.5748, "step": 5863 }, { "epoch": 0.08, "grad_norm": 8.268815994262695, "learning_rate": 1.2683032334811292e-05, "loss": 2.3175, "step": 5864 }, { "epoch": 0.08, "grad_norm": 8.230974197387695, "learning_rate": 1.268519519844274e-05, "loss": 2.3893, "step": 5865 }, { "epoch": 0.08, "grad_norm": 7.785612106323242, "learning_rate": 1.2687358062074186e-05, "loss": 3.081, "step": 5866 }, { "epoch": 0.08, "grad_norm": 7.356688022613525, "learning_rate": 1.2689520925705636e-05, "loss": 2.1471, "step": 5867 }, { "epoch": 0.08, "grad_norm": 8.328228950500488, "learning_rate": 1.2691683789337084e-05, "loss": 2.9642, "step": 5868 }, { "epoch": 0.08, "grad_norm": 7.85810661315918, "learning_rate": 1.2693846652968532e-05, "loss": 2.5736, "step": 5869 }, { "epoch": 0.08, "grad_norm": 8.127785682678223, "learning_rate": 1.2696009516599978e-05, "loss": 3.2681, "step": 5870 }, { "epoch": 0.08, "grad_norm": 7.296697616577148, "learning_rate": 1.2698172380231428e-05, "loss": 2.1261, "step": 5871 }, { "epoch": 0.08, "grad_norm": 7.667090892791748, "learning_rate": 1.2700335243862876e-05, "loss": 2.5314, "step": 5872 }, { "epoch": 0.08, "grad_norm": 8.122037887573242, "learning_rate": 1.2702498107494322e-05, "loss": 3.0929, "step": 5873 }, { "epoch": 0.08, "grad_norm": 7.881214141845703, "learning_rate": 1.270466097112577e-05, "loss": 2.2419, "step": 5874 }, { "epoch": 0.08, "grad_norm": 8.608631134033203, "learning_rate": 1.270682383475722e-05, "loss": 2.6591, "step": 5875 }, { "epoch": 0.08, "grad_norm": 7.524801731109619, "learning_rate": 1.2708986698388668e-05, "loss": 2.6345, "step": 5876 }, { "epoch": 0.08, "grad_norm": 9.221613883972168, "learning_rate": 1.2711149562020115e-05, "loss": 2.8893, "step": 5877 }, { "epoch": 0.08, "grad_norm": 8.1298828125, "learning_rate": 1.2713312425651563e-05, "loss": 2.9273, "step": 5878 }, { "epoch": 0.08, "grad_norm": 7.3892598152160645, "learning_rate": 1.2715475289283012e-05, "loss": 2.9416, "step": 5879 }, { "epoch": 0.08, "grad_norm": 7.510034561157227, "learning_rate": 1.271763815291446e-05, "loss": 2.6896, "step": 5880 }, { "epoch": 0.08, "grad_norm": 8.277815818786621, "learning_rate": 1.2719801016545907e-05, "loss": 2.5487, "step": 5881 }, { "epoch": 0.08, "grad_norm": 8.11474609375, "learning_rate": 1.2721963880177355e-05, "loss": 2.5287, "step": 5882 }, { "epoch": 0.08, "grad_norm": 8.337662696838379, "learning_rate": 1.2724126743808805e-05, "loss": 3.1379, "step": 5883 }, { "epoch": 0.08, "grad_norm": 7.955145359039307, "learning_rate": 1.2726289607440253e-05, "loss": 2.8243, "step": 5884 }, { "epoch": 0.08, "grad_norm": 8.630561828613281, "learning_rate": 1.2728452471071699e-05, "loss": 2.8209, "step": 5885 }, { "epoch": 0.08, "grad_norm": 7.888707637786865, "learning_rate": 1.2730615334703147e-05, "loss": 2.1161, "step": 5886 }, { "epoch": 0.08, "grad_norm": 7.858078479766846, "learning_rate": 1.2732778198334597e-05, "loss": 2.661, "step": 5887 }, { "epoch": 0.08, "grad_norm": 7.232956409454346, "learning_rate": 1.2734941061966045e-05, "loss": 2.5497, "step": 5888 }, { "epoch": 0.08, "grad_norm": 8.81814956665039, "learning_rate": 1.2737103925597491e-05, "loss": 2.4306, "step": 5889 }, { "epoch": 0.08, "grad_norm": 7.311395645141602, "learning_rate": 1.2739266789228941e-05, "loss": 2.739, "step": 5890 }, { "epoch": 0.08, "grad_norm": 8.824817657470703, "learning_rate": 1.2741429652860389e-05, "loss": 3.1494, "step": 5891 }, { "epoch": 0.08, "grad_norm": 7.8223443031311035, "learning_rate": 1.2743592516491835e-05, "loss": 2.7721, "step": 5892 }, { "epoch": 0.08, "grad_norm": 7.63277530670166, "learning_rate": 1.2745755380123283e-05, "loss": 2.9351, "step": 5893 }, { "epoch": 0.08, "grad_norm": 9.050029754638672, "learning_rate": 1.2747918243754733e-05, "loss": 3.0189, "step": 5894 }, { "epoch": 0.08, "grad_norm": 7.532497406005859, "learning_rate": 1.2750081107386181e-05, "loss": 2.7294, "step": 5895 }, { "epoch": 0.08, "grad_norm": 7.317128658294678, "learning_rate": 1.2752243971017628e-05, "loss": 2.9362, "step": 5896 }, { "epoch": 0.08, "grad_norm": 7.963167190551758, "learning_rate": 1.2754406834649076e-05, "loss": 2.3407, "step": 5897 }, { "epoch": 0.08, "grad_norm": 8.138505935668945, "learning_rate": 1.2756569698280525e-05, "loss": 3.1323, "step": 5898 }, { "epoch": 0.08, "grad_norm": 8.639922142028809, "learning_rate": 1.2758732561911973e-05, "loss": 3.3921, "step": 5899 }, { "epoch": 0.08, "grad_norm": 6.597701549530029, "learning_rate": 1.276089542554342e-05, "loss": 2.1852, "step": 5900 }, { "epoch": 0.08, "grad_norm": 7.71024751663208, "learning_rate": 1.2763058289174868e-05, "loss": 2.769, "step": 5901 }, { "epoch": 0.08, "grad_norm": 8.712329864501953, "learning_rate": 1.2765221152806318e-05, "loss": 3.5374, "step": 5902 }, { "epoch": 0.08, "grad_norm": 8.45135498046875, "learning_rate": 1.2767384016437766e-05, "loss": 2.6501, "step": 5903 }, { "epoch": 0.08, "grad_norm": 9.617210388183594, "learning_rate": 1.2769546880069212e-05, "loss": 2.7538, "step": 5904 }, { "epoch": 0.08, "grad_norm": 7.923446178436279, "learning_rate": 1.277170974370066e-05, "loss": 3.0568, "step": 5905 }, { "epoch": 0.08, "grad_norm": 8.48534870147705, "learning_rate": 1.277387260733211e-05, "loss": 2.826, "step": 5906 }, { "epoch": 0.08, "grad_norm": 7.393352508544922, "learning_rate": 1.2776035470963556e-05, "loss": 2.5837, "step": 5907 }, { "epoch": 0.08, "grad_norm": 8.252782821655273, "learning_rate": 1.2778198334595004e-05, "loss": 2.755, "step": 5908 }, { "epoch": 0.08, "grad_norm": 7.852527141571045, "learning_rate": 1.2780361198226452e-05, "loss": 2.4099, "step": 5909 }, { "epoch": 0.08, "grad_norm": 8.193663597106934, "learning_rate": 1.2782524061857902e-05, "loss": 2.8691, "step": 5910 }, { "epoch": 0.08, "grad_norm": 7.252971649169922, "learning_rate": 1.2784686925489348e-05, "loss": 2.6055, "step": 5911 }, { "epoch": 0.08, "grad_norm": 7.5284528732299805, "learning_rate": 1.2786849789120796e-05, "loss": 2.2015, "step": 5912 }, { "epoch": 0.08, "grad_norm": 7.685815811157227, "learning_rate": 1.2789012652752244e-05, "loss": 2.4678, "step": 5913 }, { "epoch": 0.08, "grad_norm": 8.453356742858887, "learning_rate": 1.2791175516383694e-05, "loss": 2.833, "step": 5914 }, { "epoch": 0.08, "grad_norm": 7.677613735198975, "learning_rate": 1.279333838001514e-05, "loss": 2.2694, "step": 5915 }, { "epoch": 0.08, "grad_norm": 8.125685691833496, "learning_rate": 1.2795501243646589e-05, "loss": 3.0089, "step": 5916 }, { "epoch": 0.08, "grad_norm": 7.184153079986572, "learning_rate": 1.2797664107278037e-05, "loss": 2.3885, "step": 5917 }, { "epoch": 0.08, "grad_norm": 9.88181209564209, "learning_rate": 1.2799826970909486e-05, "loss": 2.8697, "step": 5918 }, { "epoch": 0.08, "grad_norm": 7.275360584259033, "learning_rate": 1.2801989834540933e-05, "loss": 2.29, "step": 5919 }, { "epoch": 0.08, "grad_norm": 7.821339130401611, "learning_rate": 1.280415269817238e-05, "loss": 3.0432, "step": 5920 }, { "epoch": 0.08, "grad_norm": 8.659351348876953, "learning_rate": 1.280631556180383e-05, "loss": 2.7217, "step": 5921 }, { "epoch": 0.08, "grad_norm": 7.204690456390381, "learning_rate": 1.2808478425435279e-05, "loss": 2.4659, "step": 5922 }, { "epoch": 0.08, "grad_norm": 7.250236988067627, "learning_rate": 1.2810641289066725e-05, "loss": 2.1067, "step": 5923 }, { "epoch": 0.08, "grad_norm": 8.389137268066406, "learning_rate": 1.2812804152698173e-05, "loss": 3.0187, "step": 5924 }, { "epoch": 0.08, "grad_norm": 8.63952350616455, "learning_rate": 1.2814967016329623e-05, "loss": 2.254, "step": 5925 }, { "epoch": 0.08, "grad_norm": 8.417789459228516, "learning_rate": 1.2817129879961069e-05, "loss": 3.0826, "step": 5926 }, { "epoch": 0.08, "grad_norm": 7.018357753753662, "learning_rate": 1.2819292743592517e-05, "loss": 2.394, "step": 5927 }, { "epoch": 0.08, "grad_norm": 8.00933837890625, "learning_rate": 1.2821455607223965e-05, "loss": 2.5699, "step": 5928 }, { "epoch": 0.08, "grad_norm": 7.952650547027588, "learning_rate": 1.2823618470855415e-05, "loss": 2.7884, "step": 5929 }, { "epoch": 0.08, "grad_norm": 7.549109935760498, "learning_rate": 1.2825781334486861e-05, "loss": 2.7286, "step": 5930 }, { "epoch": 0.08, "grad_norm": 8.843368530273438, "learning_rate": 1.282794419811831e-05, "loss": 2.6814, "step": 5931 }, { "epoch": 0.08, "grad_norm": 8.016392707824707, "learning_rate": 1.2830107061749757e-05, "loss": 2.1246, "step": 5932 }, { "epoch": 0.08, "grad_norm": 7.861075401306152, "learning_rate": 1.2832269925381207e-05, "loss": 3.0546, "step": 5933 }, { "epoch": 0.08, "grad_norm": 8.06208324432373, "learning_rate": 1.2834432789012654e-05, "loss": 2.496, "step": 5934 }, { "epoch": 0.08, "grad_norm": 7.494781017303467, "learning_rate": 1.2836595652644102e-05, "loss": 2.4961, "step": 5935 }, { "epoch": 0.08, "grad_norm": 7.982985973358154, "learning_rate": 1.283875851627555e-05, "loss": 3.2647, "step": 5936 }, { "epoch": 0.08, "grad_norm": 7.336591720581055, "learning_rate": 1.2840921379907e-05, "loss": 2.7296, "step": 5937 }, { "epoch": 0.08, "grad_norm": 7.329984664916992, "learning_rate": 1.2843084243538446e-05, "loss": 2.9821, "step": 5938 }, { "epoch": 0.08, "grad_norm": 8.717230796813965, "learning_rate": 1.2845247107169894e-05, "loss": 3.0118, "step": 5939 }, { "epoch": 0.08, "grad_norm": 7.177222728729248, "learning_rate": 1.2847409970801342e-05, "loss": 2.5939, "step": 5940 }, { "epoch": 0.08, "grad_norm": 7.734133243560791, "learning_rate": 1.2849572834432792e-05, "loss": 2.3475, "step": 5941 }, { "epoch": 0.08, "grad_norm": 8.879440307617188, "learning_rate": 1.2851735698064238e-05, "loss": 2.3241, "step": 5942 }, { "epoch": 0.08, "grad_norm": 7.773837089538574, "learning_rate": 1.2853898561695686e-05, "loss": 2.7999, "step": 5943 }, { "epoch": 0.08, "grad_norm": 9.389459609985352, "learning_rate": 1.2856061425327132e-05, "loss": 3.1443, "step": 5944 }, { "epoch": 0.08, "grad_norm": 8.223336219787598, "learning_rate": 1.2858224288958582e-05, "loss": 2.8453, "step": 5945 }, { "epoch": 0.08, "grad_norm": 7.655696392059326, "learning_rate": 1.286038715259003e-05, "loss": 2.7019, "step": 5946 }, { "epoch": 0.08, "grad_norm": 6.7775444984436035, "learning_rate": 1.2862550016221478e-05, "loss": 2.2549, "step": 5947 }, { "epoch": 0.08, "grad_norm": 7.993571758270264, "learning_rate": 1.2864712879852925e-05, "loss": 2.6284, "step": 5948 }, { "epoch": 0.08, "grad_norm": 8.387422561645508, "learning_rate": 1.2866875743484374e-05, "loss": 2.9958, "step": 5949 }, { "epoch": 0.08, "grad_norm": 7.913154125213623, "learning_rate": 1.2869038607115822e-05, "loss": 3.256, "step": 5950 }, { "epoch": 0.08, "grad_norm": 7.6296868324279785, "learning_rate": 1.287120147074727e-05, "loss": 2.9642, "step": 5951 }, { "epoch": 0.08, "grad_norm": 8.945694923400879, "learning_rate": 1.287336433437872e-05, "loss": 3.3151, "step": 5952 }, { "epoch": 0.08, "grad_norm": 8.570130348205566, "learning_rate": 1.2875527198010167e-05, "loss": 2.7485, "step": 5953 }, { "epoch": 0.08, "grad_norm": 8.918889045715332, "learning_rate": 1.2877690061641615e-05, "loss": 2.8732, "step": 5954 }, { "epoch": 0.08, "grad_norm": 8.54397964477539, "learning_rate": 1.2879852925273063e-05, "loss": 3.19, "step": 5955 }, { "epoch": 0.08, "grad_norm": 7.8490095138549805, "learning_rate": 1.2882015788904512e-05, "loss": 2.5479, "step": 5956 }, { "epoch": 0.08, "grad_norm": 7.349795818328857, "learning_rate": 1.2884178652535959e-05, "loss": 2.4368, "step": 5957 }, { "epoch": 0.08, "grad_norm": 7.565162181854248, "learning_rate": 1.2886341516167407e-05, "loss": 2.4167, "step": 5958 }, { "epoch": 0.08, "grad_norm": 7.643101215362549, "learning_rate": 1.2888504379798855e-05, "loss": 2.802, "step": 5959 }, { "epoch": 0.08, "grad_norm": 8.012918472290039, "learning_rate": 1.2890667243430305e-05, "loss": 2.6557, "step": 5960 }, { "epoch": 0.08, "grad_norm": 7.594620227813721, "learning_rate": 1.2892830107061751e-05, "loss": 2.511, "step": 5961 }, { "epoch": 0.08, "grad_norm": 8.389178276062012, "learning_rate": 1.2894992970693199e-05, "loss": 3.1543, "step": 5962 }, { "epoch": 0.08, "grad_norm": 8.057670593261719, "learning_rate": 1.2897155834324645e-05, "loss": 2.9032, "step": 5963 }, { "epoch": 0.08, "grad_norm": 8.430032730102539, "learning_rate": 1.2899318697956095e-05, "loss": 3.1256, "step": 5964 }, { "epoch": 0.08, "grad_norm": 7.926079273223877, "learning_rate": 1.2901481561587543e-05, "loss": 2.7622, "step": 5965 }, { "epoch": 0.08, "grad_norm": 7.769717693328857, "learning_rate": 1.2903644425218991e-05, "loss": 2.5627, "step": 5966 }, { "epoch": 0.08, "grad_norm": 7.830595016479492, "learning_rate": 1.2905807288850438e-05, "loss": 2.9753, "step": 5967 }, { "epoch": 0.08, "grad_norm": 8.021682739257812, "learning_rate": 1.2907970152481887e-05, "loss": 2.5456, "step": 5968 }, { "epoch": 0.08, "grad_norm": 7.234097003936768, "learning_rate": 1.2910133016113335e-05, "loss": 3.1427, "step": 5969 }, { "epoch": 0.08, "grad_norm": 7.5443644523620605, "learning_rate": 1.2912295879744783e-05, "loss": 2.6073, "step": 5970 }, { "epoch": 0.08, "grad_norm": 7.434332370758057, "learning_rate": 1.291445874337623e-05, "loss": 2.3764, "step": 5971 }, { "epoch": 0.08, "grad_norm": 7.810701370239258, "learning_rate": 1.291662160700768e-05, "loss": 3.0466, "step": 5972 }, { "epoch": 0.08, "grad_norm": 7.1754560470581055, "learning_rate": 1.2918784470639128e-05, "loss": 2.7962, "step": 5973 }, { "epoch": 0.08, "grad_norm": 7.693580150604248, "learning_rate": 1.2920947334270576e-05, "loss": 2.5737, "step": 5974 }, { "epoch": 0.08, "grad_norm": 8.090461730957031, "learning_rate": 1.2923110197902022e-05, "loss": 2.4582, "step": 5975 }, { "epoch": 0.08, "grad_norm": 7.153176784515381, "learning_rate": 1.2925273061533472e-05, "loss": 2.4527, "step": 5976 }, { "epoch": 0.08, "grad_norm": 8.101609230041504, "learning_rate": 1.292743592516492e-05, "loss": 3.3893, "step": 5977 }, { "epoch": 0.08, "grad_norm": 6.479248523712158, "learning_rate": 1.2929598788796368e-05, "loss": 2.3222, "step": 5978 }, { "epoch": 0.08, "grad_norm": 7.343568325042725, "learning_rate": 1.2931761652427814e-05, "loss": 2.7868, "step": 5979 }, { "epoch": 0.08, "grad_norm": 8.690705299377441, "learning_rate": 1.2933924516059264e-05, "loss": 3.0048, "step": 5980 }, { "epoch": 0.08, "grad_norm": 8.061868667602539, "learning_rate": 1.2936087379690712e-05, "loss": 3.0031, "step": 5981 }, { "epoch": 0.08, "grad_norm": 7.083492755889893, "learning_rate": 1.2938250243322158e-05, "loss": 2.3163, "step": 5982 }, { "epoch": 0.08, "grad_norm": 7.8504791259765625, "learning_rate": 1.2940413106953606e-05, "loss": 2.2685, "step": 5983 }, { "epoch": 0.08, "grad_norm": 7.492166996002197, "learning_rate": 1.2942575970585056e-05, "loss": 2.8154, "step": 5984 }, { "epoch": 0.08, "grad_norm": 7.5177435874938965, "learning_rate": 1.2944738834216504e-05, "loss": 2.3205, "step": 5985 }, { "epoch": 0.08, "grad_norm": 8.245100021362305, "learning_rate": 1.294690169784795e-05, "loss": 2.1126, "step": 5986 }, { "epoch": 0.08, "grad_norm": 8.938079833984375, "learning_rate": 1.29490645614794e-05, "loss": 3.0948, "step": 5987 }, { "epoch": 0.08, "grad_norm": 8.063687324523926, "learning_rate": 1.2951227425110848e-05, "loss": 2.4482, "step": 5988 }, { "epoch": 0.08, "grad_norm": 7.322964668273926, "learning_rate": 1.2953390288742296e-05, "loss": 2.929, "step": 5989 }, { "epoch": 0.08, "grad_norm": 8.685861587524414, "learning_rate": 1.2955553152373743e-05, "loss": 2.9829, "step": 5990 }, { "epoch": 0.08, "grad_norm": 8.902119636535645, "learning_rate": 1.2957716016005192e-05, "loss": 3.1269, "step": 5991 }, { "epoch": 0.08, "grad_norm": 7.513336181640625, "learning_rate": 1.295987887963664e-05, "loss": 2.5403, "step": 5992 }, { "epoch": 0.08, "grad_norm": 8.462431907653809, "learning_rate": 1.2962041743268089e-05, "loss": 2.3364, "step": 5993 }, { "epoch": 0.08, "grad_norm": 8.506983757019043, "learning_rate": 1.2964204606899535e-05, "loss": 2.6914, "step": 5994 }, { "epoch": 0.08, "grad_norm": 7.784276485443115, "learning_rate": 1.2966367470530985e-05, "loss": 2.7353, "step": 5995 }, { "epoch": 0.08, "grad_norm": 7.59368371963501, "learning_rate": 1.2968530334162433e-05, "loss": 2.5292, "step": 5996 }, { "epoch": 0.08, "grad_norm": 9.36103343963623, "learning_rate": 1.297069319779388e-05, "loss": 2.8964, "step": 5997 }, { "epoch": 0.08, "grad_norm": 8.885661125183105, "learning_rate": 1.2972856061425327e-05, "loss": 3.7357, "step": 5998 }, { "epoch": 0.08, "grad_norm": 8.400300025939941, "learning_rate": 1.2975018925056777e-05, "loss": 2.8128, "step": 5999 }, { "epoch": 0.08, "grad_norm": 7.673933029174805, "learning_rate": 1.2977181788688225e-05, "loss": 2.6236, "step": 6000 }, { "epoch": 0.08, "grad_norm": 8.451187133789062, "learning_rate": 1.2979344652319671e-05, "loss": 2.7686, "step": 6001 }, { "epoch": 0.08, "grad_norm": 7.0551676750183105, "learning_rate": 1.298150751595112e-05, "loss": 3.0063, "step": 6002 }, { "epoch": 0.08, "grad_norm": 7.73419713973999, "learning_rate": 1.2983670379582569e-05, "loss": 2.7584, "step": 6003 }, { "epoch": 0.08, "grad_norm": 7.597722053527832, "learning_rate": 1.2985833243214017e-05, "loss": 2.4277, "step": 6004 }, { "epoch": 0.08, "grad_norm": 8.808263778686523, "learning_rate": 1.2987996106845463e-05, "loss": 2.4113, "step": 6005 }, { "epoch": 0.08, "grad_norm": 7.875490188598633, "learning_rate": 1.2990158970476912e-05, "loss": 2.9388, "step": 6006 }, { "epoch": 0.08, "grad_norm": 7.655898094177246, "learning_rate": 1.2992321834108361e-05, "loss": 2.5487, "step": 6007 }, { "epoch": 0.08, "grad_norm": 8.362156867980957, "learning_rate": 1.299448469773981e-05, "loss": 2.4897, "step": 6008 }, { "epoch": 0.08, "grad_norm": 7.167930603027344, "learning_rate": 1.2996647561371256e-05, "loss": 2.8985, "step": 6009 }, { "epoch": 0.08, "grad_norm": 6.9352593421936035, "learning_rate": 1.2998810425002704e-05, "loss": 2.6377, "step": 6010 }, { "epoch": 0.08, "grad_norm": 7.902835369110107, "learning_rate": 1.3000973288634153e-05, "loss": 2.9515, "step": 6011 }, { "epoch": 0.08, "grad_norm": 8.21860122680664, "learning_rate": 1.3003136152265601e-05, "loss": 3.3547, "step": 6012 }, { "epoch": 0.08, "grad_norm": 7.50625467300415, "learning_rate": 1.3005299015897048e-05, "loss": 2.5977, "step": 6013 }, { "epoch": 0.08, "grad_norm": 6.428037166595459, "learning_rate": 1.3007461879528496e-05, "loss": 2.3352, "step": 6014 }, { "epoch": 0.08, "grad_norm": 8.490561485290527, "learning_rate": 1.3009624743159946e-05, "loss": 3.1275, "step": 6015 }, { "epoch": 0.08, "grad_norm": 7.458846569061279, "learning_rate": 1.3011787606791394e-05, "loss": 2.6193, "step": 6016 }, { "epoch": 0.08, "grad_norm": 7.037408828735352, "learning_rate": 1.301395047042284e-05, "loss": 2.8829, "step": 6017 }, { "epoch": 0.08, "grad_norm": 8.047975540161133, "learning_rate": 1.301611333405429e-05, "loss": 2.9617, "step": 6018 }, { "epoch": 0.08, "grad_norm": 9.087898254394531, "learning_rate": 1.3018276197685738e-05, "loss": 3.1328, "step": 6019 }, { "epoch": 0.08, "grad_norm": 8.167427062988281, "learning_rate": 1.3020439061317184e-05, "loss": 2.8514, "step": 6020 }, { "epoch": 0.08, "grad_norm": 7.22557258605957, "learning_rate": 1.3022601924948632e-05, "loss": 2.4428, "step": 6021 }, { "epoch": 0.08, "grad_norm": 9.241146087646484, "learning_rate": 1.3024764788580082e-05, "loss": 3.2898, "step": 6022 }, { "epoch": 0.08, "grad_norm": 6.6145710945129395, "learning_rate": 1.302692765221153e-05, "loss": 2.804, "step": 6023 }, { "epoch": 0.08, "grad_norm": 8.427767753601074, "learning_rate": 1.3029090515842976e-05, "loss": 2.7769, "step": 6024 }, { "epoch": 0.08, "grad_norm": 8.627897262573242, "learning_rate": 1.3031253379474424e-05, "loss": 2.506, "step": 6025 }, { "epoch": 0.08, "grad_norm": 8.2550687789917, "learning_rate": 1.3033416243105874e-05, "loss": 2.8372, "step": 6026 }, { "epoch": 0.08, "grad_norm": 8.40571403503418, "learning_rate": 1.3035579106737322e-05, "loss": 2.9468, "step": 6027 }, { "epoch": 0.08, "grad_norm": 8.050755500793457, "learning_rate": 1.3037741970368769e-05, "loss": 2.5402, "step": 6028 }, { "epoch": 0.08, "grad_norm": 6.933781147003174, "learning_rate": 1.3039904834000217e-05, "loss": 2.6327, "step": 6029 }, { "epoch": 0.08, "grad_norm": 10.15324592590332, "learning_rate": 1.3042067697631666e-05, "loss": 3.2447, "step": 6030 }, { "epoch": 0.08, "grad_norm": 7.585676670074463, "learning_rate": 1.3044230561263114e-05, "loss": 2.9089, "step": 6031 }, { "epoch": 0.08, "grad_norm": 7.9737548828125, "learning_rate": 1.304639342489456e-05, "loss": 2.681, "step": 6032 }, { "epoch": 0.08, "grad_norm": 7.858016490936279, "learning_rate": 1.3048556288526009e-05, "loss": 2.7089, "step": 6033 }, { "epoch": 0.08, "grad_norm": 7.617537021636963, "learning_rate": 1.3050719152157459e-05, "loss": 2.7303, "step": 6034 }, { "epoch": 0.08, "grad_norm": 7.30494499206543, "learning_rate": 1.3052882015788905e-05, "loss": 2.5954, "step": 6035 }, { "epoch": 0.08, "grad_norm": 8.138138771057129, "learning_rate": 1.3055044879420353e-05, "loss": 2.7035, "step": 6036 }, { "epoch": 0.08, "grad_norm": 8.695459365844727, "learning_rate": 1.3057207743051801e-05, "loss": 2.8041, "step": 6037 }, { "epoch": 0.08, "grad_norm": 8.83223819732666, "learning_rate": 1.305937060668325e-05, "loss": 3.5047, "step": 6038 }, { "epoch": 0.08, "grad_norm": 7.353570938110352, "learning_rate": 1.3061533470314697e-05, "loss": 2.5859, "step": 6039 }, { "epoch": 0.08, "grad_norm": 8.180116653442383, "learning_rate": 1.3063696333946145e-05, "loss": 2.2889, "step": 6040 }, { "epoch": 0.08, "grad_norm": 8.600323677062988, "learning_rate": 1.3065859197577593e-05, "loss": 2.5895, "step": 6041 }, { "epoch": 0.08, "grad_norm": 10.022671699523926, "learning_rate": 1.3068022061209043e-05, "loss": 3.1651, "step": 6042 }, { "epoch": 0.08, "grad_norm": 7.2856268882751465, "learning_rate": 1.307018492484049e-05, "loss": 2.478, "step": 6043 }, { "epoch": 0.08, "grad_norm": 9.09002685546875, "learning_rate": 1.3072347788471937e-05, "loss": 2.9371, "step": 6044 }, { "epoch": 0.08, "grad_norm": 8.264330863952637, "learning_rate": 1.3074510652103385e-05, "loss": 2.3235, "step": 6045 }, { "epoch": 0.08, "grad_norm": 7.583738327026367, "learning_rate": 1.3076673515734835e-05, "loss": 2.478, "step": 6046 }, { "epoch": 0.08, "grad_norm": 6.702815532684326, "learning_rate": 1.3078836379366282e-05, "loss": 2.8732, "step": 6047 }, { "epoch": 0.08, "grad_norm": 8.597787857055664, "learning_rate": 1.308099924299773e-05, "loss": 3.7218, "step": 6048 }, { "epoch": 0.08, "grad_norm": 7.927159786224365, "learning_rate": 1.308316210662918e-05, "loss": 2.2579, "step": 6049 }, { "epoch": 0.08, "grad_norm": 9.103981018066406, "learning_rate": 1.3085324970260627e-05, "loss": 2.9356, "step": 6050 }, { "epoch": 0.08, "grad_norm": 7.507833480834961, "learning_rate": 1.3087487833892074e-05, "loss": 2.3983, "step": 6051 }, { "epoch": 0.08, "grad_norm": 7.835781574249268, "learning_rate": 1.3089650697523522e-05, "loss": 2.1895, "step": 6052 }, { "epoch": 0.08, "grad_norm": 8.436997413635254, "learning_rate": 1.3091813561154972e-05, "loss": 2.9646, "step": 6053 }, { "epoch": 0.08, "grad_norm": 7.950084209442139, "learning_rate": 1.3093976424786418e-05, "loss": 2.4551, "step": 6054 }, { "epoch": 0.08, "grad_norm": 7.948166847229004, "learning_rate": 1.3096139288417866e-05, "loss": 3.2037, "step": 6055 }, { "epoch": 0.08, "grad_norm": 7.423679828643799, "learning_rate": 1.3098302152049314e-05, "loss": 2.2644, "step": 6056 }, { "epoch": 0.08, "grad_norm": 7.692092418670654, "learning_rate": 1.3100465015680764e-05, "loss": 2.7234, "step": 6057 }, { "epoch": 0.08, "grad_norm": 7.360171794891357, "learning_rate": 1.310262787931221e-05, "loss": 2.8788, "step": 6058 }, { "epoch": 0.08, "grad_norm": 8.617810249328613, "learning_rate": 1.3104790742943658e-05, "loss": 2.0015, "step": 6059 }, { "epoch": 0.08, "grad_norm": 8.061288833618164, "learning_rate": 1.3106953606575106e-05, "loss": 3.0213, "step": 6060 }, { "epoch": 0.08, "grad_norm": 7.548541069030762, "learning_rate": 1.3109116470206556e-05, "loss": 2.3967, "step": 6061 }, { "epoch": 0.08, "grad_norm": 8.077688217163086, "learning_rate": 1.3111279333838002e-05, "loss": 2.7523, "step": 6062 }, { "epoch": 0.08, "grad_norm": 8.156622886657715, "learning_rate": 1.311344219746945e-05, "loss": 2.476, "step": 6063 }, { "epoch": 0.08, "grad_norm": 8.304285049438477, "learning_rate": 1.3115605061100898e-05, "loss": 2.0953, "step": 6064 }, { "epoch": 0.08, "grad_norm": 7.185181140899658, "learning_rate": 1.3117767924732348e-05, "loss": 2.5115, "step": 6065 }, { "epoch": 0.08, "grad_norm": 7.88485860824585, "learning_rate": 1.3119930788363795e-05, "loss": 2.7367, "step": 6066 }, { "epoch": 0.08, "grad_norm": 7.796748161315918, "learning_rate": 1.3122093651995243e-05, "loss": 2.9856, "step": 6067 }, { "epoch": 0.08, "grad_norm": 8.645364761352539, "learning_rate": 1.312425651562669e-05, "loss": 3.0903, "step": 6068 }, { "epoch": 0.08, "grad_norm": 8.014606475830078, "learning_rate": 1.312641937925814e-05, "loss": 3.6539, "step": 6069 }, { "epoch": 0.08, "grad_norm": 8.382647514343262, "learning_rate": 1.3128582242889587e-05, "loss": 3.1032, "step": 6070 }, { "epoch": 0.08, "grad_norm": 9.031920433044434, "learning_rate": 1.3130745106521035e-05, "loss": 2.9782, "step": 6071 }, { "epoch": 0.08, "grad_norm": 9.744400978088379, "learning_rate": 1.3132907970152481e-05, "loss": 2.2774, "step": 6072 }, { "epoch": 0.08, "grad_norm": 8.517415046691895, "learning_rate": 1.3135070833783931e-05, "loss": 3.3861, "step": 6073 }, { "epoch": 0.08, "grad_norm": 6.943664073944092, "learning_rate": 1.3137233697415379e-05, "loss": 2.83, "step": 6074 }, { "epoch": 0.08, "grad_norm": 8.088667869567871, "learning_rate": 1.3139396561046827e-05, "loss": 2.5762, "step": 6075 }, { "epoch": 0.08, "grad_norm": 7.707860469818115, "learning_rate": 1.3141559424678273e-05, "loss": 3.488, "step": 6076 }, { "epoch": 0.08, "grad_norm": 7.792508125305176, "learning_rate": 1.3143722288309723e-05, "loss": 3.0015, "step": 6077 }, { "epoch": 0.08, "grad_norm": 6.880940914154053, "learning_rate": 1.3145885151941171e-05, "loss": 2.5262, "step": 6078 }, { "epoch": 0.08, "grad_norm": 8.640731811523438, "learning_rate": 1.314804801557262e-05, "loss": 2.9355, "step": 6079 }, { "epoch": 0.08, "grad_norm": 8.280251502990723, "learning_rate": 1.3150210879204069e-05, "loss": 2.69, "step": 6080 }, { "epoch": 0.08, "grad_norm": 7.571393013000488, "learning_rate": 1.3152373742835515e-05, "loss": 2.6022, "step": 6081 }, { "epoch": 0.08, "grad_norm": 7.674659252166748, "learning_rate": 1.3154536606466963e-05, "loss": 2.5209, "step": 6082 }, { "epoch": 0.08, "grad_norm": 7.4728193283081055, "learning_rate": 1.3156699470098411e-05, "loss": 2.9215, "step": 6083 }, { "epoch": 0.08, "grad_norm": 6.824562072753906, "learning_rate": 1.3158862333729861e-05, "loss": 2.459, "step": 6084 }, { "epoch": 0.08, "grad_norm": 8.548931121826172, "learning_rate": 1.3161025197361308e-05, "loss": 2.7878, "step": 6085 }, { "epoch": 0.08, "grad_norm": 6.516210556030273, "learning_rate": 1.3163188060992756e-05, "loss": 2.3074, "step": 6086 }, { "epoch": 0.08, "grad_norm": 9.381855964660645, "learning_rate": 1.3165350924624204e-05, "loss": 2.5046, "step": 6087 }, { "epoch": 0.08, "grad_norm": 6.879922866821289, "learning_rate": 1.3167513788255653e-05, "loss": 2.8401, "step": 6088 }, { "epoch": 0.08, "grad_norm": 7.364184856414795, "learning_rate": 1.31696766518871e-05, "loss": 2.8347, "step": 6089 }, { "epoch": 0.08, "grad_norm": 7.134035110473633, "learning_rate": 1.3171839515518548e-05, "loss": 2.8676, "step": 6090 }, { "epoch": 0.08, "grad_norm": 8.457873344421387, "learning_rate": 1.3174002379149994e-05, "loss": 3.1156, "step": 6091 }, { "epoch": 0.08, "grad_norm": 7.275792121887207, "learning_rate": 1.3176165242781444e-05, "loss": 2.1988, "step": 6092 }, { "epoch": 0.08, "grad_norm": 8.164517402648926, "learning_rate": 1.3178328106412892e-05, "loss": 2.451, "step": 6093 }, { "epoch": 0.08, "grad_norm": 7.993359565734863, "learning_rate": 1.318049097004434e-05, "loss": 3.075, "step": 6094 }, { "epoch": 0.08, "grad_norm": 6.575723171234131, "learning_rate": 1.3182653833675786e-05, "loss": 2.4175, "step": 6095 }, { "epoch": 0.08, "grad_norm": 7.393671035766602, "learning_rate": 1.3184816697307236e-05, "loss": 2.5276, "step": 6096 }, { "epoch": 0.08, "grad_norm": 8.005228042602539, "learning_rate": 1.3186979560938684e-05, "loss": 3.1106, "step": 6097 }, { "epoch": 0.08, "grad_norm": 7.445982933044434, "learning_rate": 1.3189142424570132e-05, "loss": 2.357, "step": 6098 }, { "epoch": 0.08, "grad_norm": 8.5097017288208, "learning_rate": 1.3191305288201579e-05, "loss": 2.9492, "step": 6099 }, { "epoch": 0.08, "grad_norm": 7.679865837097168, "learning_rate": 1.3193468151833028e-05, "loss": 2.9905, "step": 6100 }, { "epoch": 0.08, "grad_norm": 8.079879760742188, "learning_rate": 1.3195631015464476e-05, "loss": 2.2512, "step": 6101 }, { "epoch": 0.08, "grad_norm": 7.500462532043457, "learning_rate": 1.3197793879095924e-05, "loss": 2.5752, "step": 6102 }, { "epoch": 0.08, "grad_norm": 8.715887069702148, "learning_rate": 1.319995674272737e-05, "loss": 3.1934, "step": 6103 }, { "epoch": 0.08, "grad_norm": 8.133938789367676, "learning_rate": 1.320211960635882e-05, "loss": 3.1169, "step": 6104 }, { "epoch": 0.08, "grad_norm": 8.277427673339844, "learning_rate": 1.3204282469990269e-05, "loss": 2.8179, "step": 6105 }, { "epoch": 0.08, "grad_norm": 8.669062614440918, "learning_rate": 1.3206445333621717e-05, "loss": 3.6868, "step": 6106 }, { "epoch": 0.08, "grad_norm": 8.164896965026855, "learning_rate": 1.3208608197253163e-05, "loss": 3.1366, "step": 6107 }, { "epoch": 0.08, "grad_norm": 8.220270156860352, "learning_rate": 1.3210771060884613e-05, "loss": 2.7703, "step": 6108 }, { "epoch": 0.08, "grad_norm": 8.866267204284668, "learning_rate": 1.321293392451606e-05, "loss": 2.7718, "step": 6109 }, { "epoch": 0.08, "grad_norm": 8.289398193359375, "learning_rate": 1.3215096788147507e-05, "loss": 2.6482, "step": 6110 }, { "epoch": 0.08, "grad_norm": 6.82542085647583, "learning_rate": 1.3217259651778955e-05, "loss": 2.5214, "step": 6111 }, { "epoch": 0.08, "grad_norm": 8.299582481384277, "learning_rate": 1.3219422515410405e-05, "loss": 2.4817, "step": 6112 }, { "epoch": 0.08, "grad_norm": 9.717264175415039, "learning_rate": 1.3221585379041853e-05, "loss": 3.204, "step": 6113 }, { "epoch": 0.08, "grad_norm": 7.56305456161499, "learning_rate": 1.32237482426733e-05, "loss": 2.2029, "step": 6114 }, { "epoch": 0.08, "grad_norm": 7.2003703117370605, "learning_rate": 1.3225911106304749e-05, "loss": 2.7157, "step": 6115 }, { "epoch": 0.08, "grad_norm": 8.16331672668457, "learning_rate": 1.3228073969936197e-05, "loss": 2.5964, "step": 6116 }, { "epoch": 0.08, "grad_norm": 7.643049716949463, "learning_rate": 1.3230236833567645e-05, "loss": 2.8352, "step": 6117 }, { "epoch": 0.08, "grad_norm": 7.437598705291748, "learning_rate": 1.3232399697199091e-05, "loss": 2.9328, "step": 6118 }, { "epoch": 0.08, "grad_norm": 7.1617865562438965, "learning_rate": 1.3234562560830541e-05, "loss": 2.5566, "step": 6119 }, { "epoch": 0.08, "grad_norm": 7.765688896179199, "learning_rate": 1.323672542446199e-05, "loss": 2.6596, "step": 6120 }, { "epoch": 0.08, "grad_norm": 8.034356117248535, "learning_rate": 1.3238888288093437e-05, "loss": 2.7931, "step": 6121 }, { "epoch": 0.08, "grad_norm": 7.5056915283203125, "learning_rate": 1.3241051151724884e-05, "loss": 2.5617, "step": 6122 }, { "epoch": 0.08, "grad_norm": 8.023870468139648, "learning_rate": 1.3243214015356333e-05, "loss": 2.6058, "step": 6123 }, { "epoch": 0.08, "grad_norm": 7.178934097290039, "learning_rate": 1.3245376878987781e-05, "loss": 2.3977, "step": 6124 }, { "epoch": 0.08, "grad_norm": 7.7874627113342285, "learning_rate": 1.324753974261923e-05, "loss": 3.0643, "step": 6125 }, { "epoch": 0.08, "grad_norm": 6.948584079742432, "learning_rate": 1.3249702606250676e-05, "loss": 2.644, "step": 6126 }, { "epoch": 0.08, "grad_norm": 7.737093925476074, "learning_rate": 1.3251865469882126e-05, "loss": 2.4729, "step": 6127 }, { "epoch": 0.08, "grad_norm": 8.583910942077637, "learning_rate": 1.3254028333513574e-05, "loss": 2.6501, "step": 6128 }, { "epoch": 0.08, "grad_norm": 8.174808502197266, "learning_rate": 1.325619119714502e-05, "loss": 2.8356, "step": 6129 }, { "epoch": 0.08, "grad_norm": 9.136651992797852, "learning_rate": 1.3258354060776468e-05, "loss": 2.687, "step": 6130 }, { "epoch": 0.08, "grad_norm": 7.780276775360107, "learning_rate": 1.3260516924407918e-05, "loss": 2.2634, "step": 6131 }, { "epoch": 0.08, "grad_norm": 7.594075679779053, "learning_rate": 1.3262679788039366e-05, "loss": 2.5077, "step": 6132 }, { "epoch": 0.08, "grad_norm": 8.339832305908203, "learning_rate": 1.3264842651670812e-05, "loss": 3.0849, "step": 6133 }, { "epoch": 0.08, "grad_norm": 7.976232528686523, "learning_rate": 1.326700551530226e-05, "loss": 3.0126, "step": 6134 }, { "epoch": 0.08, "grad_norm": 7.183354377746582, "learning_rate": 1.326916837893371e-05, "loss": 2.4561, "step": 6135 }, { "epoch": 0.08, "grad_norm": 7.931595325469971, "learning_rate": 1.3271331242565158e-05, "loss": 2.6041, "step": 6136 }, { "epoch": 0.08, "grad_norm": 8.471546173095703, "learning_rate": 1.3273494106196604e-05, "loss": 2.6404, "step": 6137 }, { "epoch": 0.08, "grad_norm": 8.007067680358887, "learning_rate": 1.3275656969828053e-05, "loss": 2.2771, "step": 6138 }, { "epoch": 0.08, "grad_norm": 7.0028510093688965, "learning_rate": 1.3277819833459502e-05, "loss": 2.3357, "step": 6139 }, { "epoch": 0.08, "grad_norm": 7.5682806968688965, "learning_rate": 1.327998269709095e-05, "loss": 2.6111, "step": 6140 }, { "epoch": 0.08, "grad_norm": 8.327130317687988, "learning_rate": 1.3282145560722397e-05, "loss": 3.1896, "step": 6141 }, { "epoch": 0.08, "grad_norm": 6.7550368309021, "learning_rate": 1.3284308424353845e-05, "loss": 2.7627, "step": 6142 }, { "epoch": 0.08, "grad_norm": 8.044160842895508, "learning_rate": 1.3286471287985294e-05, "loss": 3.0307, "step": 6143 }, { "epoch": 0.08, "grad_norm": 6.9065961837768555, "learning_rate": 1.328863415161674e-05, "loss": 1.9454, "step": 6144 }, { "epoch": 0.08, "grad_norm": 6.764902591705322, "learning_rate": 1.3290797015248189e-05, "loss": 2.5353, "step": 6145 }, { "epoch": 0.08, "grad_norm": 7.688427448272705, "learning_rate": 1.3292959878879639e-05, "loss": 2.2544, "step": 6146 }, { "epoch": 0.08, "grad_norm": 6.824522972106934, "learning_rate": 1.3295122742511087e-05, "loss": 2.6373, "step": 6147 }, { "epoch": 0.08, "grad_norm": 7.832771301269531, "learning_rate": 1.3297285606142533e-05, "loss": 2.9006, "step": 6148 }, { "epoch": 0.08, "grad_norm": 7.975858688354492, "learning_rate": 1.3299448469773981e-05, "loss": 2.3993, "step": 6149 }, { "epoch": 0.08, "grad_norm": 7.342355251312256, "learning_rate": 1.330161133340543e-05, "loss": 2.1635, "step": 6150 }, { "epoch": 0.08, "grad_norm": 7.388742446899414, "learning_rate": 1.3303774197036879e-05, "loss": 2.9841, "step": 6151 }, { "epoch": 0.08, "grad_norm": 7.353594779968262, "learning_rate": 1.3305937060668325e-05, "loss": 3.219, "step": 6152 }, { "epoch": 0.08, "grad_norm": 8.315523147583008, "learning_rate": 1.3308099924299773e-05, "loss": 2.5421, "step": 6153 }, { "epoch": 0.08, "grad_norm": 8.337078094482422, "learning_rate": 1.3310262787931223e-05, "loss": 3.0996, "step": 6154 }, { "epoch": 0.08, "grad_norm": 8.349480628967285, "learning_rate": 1.3312425651562671e-05, "loss": 2.6988, "step": 6155 }, { "epoch": 0.08, "grad_norm": 7.664766788482666, "learning_rate": 1.3314588515194117e-05, "loss": 2.5679, "step": 6156 }, { "epoch": 0.08, "grad_norm": 7.7087321281433105, "learning_rate": 1.3316751378825565e-05, "loss": 2.4703, "step": 6157 }, { "epoch": 0.08, "grad_norm": 7.554579734802246, "learning_rate": 1.3318914242457015e-05, "loss": 2.8431, "step": 6158 }, { "epoch": 0.08, "grad_norm": 7.8912353515625, "learning_rate": 1.3321077106088463e-05, "loss": 3.2501, "step": 6159 }, { "epoch": 0.08, "grad_norm": 7.391513347625732, "learning_rate": 1.332323996971991e-05, "loss": 2.4113, "step": 6160 }, { "epoch": 0.08, "grad_norm": 8.126601219177246, "learning_rate": 1.3325402833351358e-05, "loss": 2.9341, "step": 6161 }, { "epoch": 0.08, "grad_norm": 8.377008438110352, "learning_rate": 1.3327565696982807e-05, "loss": 2.5472, "step": 6162 }, { "epoch": 0.08, "grad_norm": 7.708652019500732, "learning_rate": 1.3329728560614254e-05, "loss": 2.7731, "step": 6163 }, { "epoch": 0.08, "grad_norm": 7.79362678527832, "learning_rate": 1.3331891424245702e-05, "loss": 2.8263, "step": 6164 }, { "epoch": 0.08, "grad_norm": 8.121243476867676, "learning_rate": 1.333405428787715e-05, "loss": 2.9117, "step": 6165 }, { "epoch": 0.08, "grad_norm": 9.07479476928711, "learning_rate": 1.33362171515086e-05, "loss": 2.2842, "step": 6166 }, { "epoch": 0.08, "grad_norm": 7.223984241485596, "learning_rate": 1.3338380015140046e-05, "loss": 2.7883, "step": 6167 }, { "epoch": 0.08, "grad_norm": 7.242138385772705, "learning_rate": 1.3340542878771494e-05, "loss": 2.7767, "step": 6168 }, { "epoch": 0.08, "grad_norm": 8.464922904968262, "learning_rate": 1.3342705742402942e-05, "loss": 3.23, "step": 6169 }, { "epoch": 0.08, "grad_norm": 6.932378768920898, "learning_rate": 1.3344868606034392e-05, "loss": 2.0587, "step": 6170 }, { "epoch": 0.08, "grad_norm": 7.3577728271484375, "learning_rate": 1.3347031469665838e-05, "loss": 2.121, "step": 6171 }, { "epoch": 0.08, "grad_norm": 7.068454742431641, "learning_rate": 1.3349194333297286e-05, "loss": 2.32, "step": 6172 }, { "epoch": 0.08, "grad_norm": 7.566316604614258, "learning_rate": 1.3351357196928734e-05, "loss": 2.5361, "step": 6173 }, { "epoch": 0.08, "grad_norm": 5.958583831787109, "learning_rate": 1.3353520060560184e-05, "loss": 2.0378, "step": 6174 }, { "epoch": 0.08, "grad_norm": 7.632809162139893, "learning_rate": 1.335568292419163e-05, "loss": 2.923, "step": 6175 }, { "epoch": 0.08, "grad_norm": 7.9089579582214355, "learning_rate": 1.3357845787823078e-05, "loss": 2.8519, "step": 6176 }, { "epoch": 0.08, "grad_norm": 8.740198135375977, "learning_rate": 1.3360008651454528e-05, "loss": 3.3296, "step": 6177 }, { "epoch": 0.08, "grad_norm": 7.5952839851379395, "learning_rate": 1.3362171515085976e-05, "loss": 2.628, "step": 6178 }, { "epoch": 0.08, "grad_norm": 7.416398525238037, "learning_rate": 1.3364334378717423e-05, "loss": 2.8445, "step": 6179 }, { "epoch": 0.08, "grad_norm": 8.028783798217773, "learning_rate": 1.336649724234887e-05, "loss": 2.9613, "step": 6180 }, { "epoch": 0.08, "grad_norm": 8.676020622253418, "learning_rate": 1.336866010598032e-05, "loss": 2.7816, "step": 6181 }, { "epoch": 0.08, "grad_norm": 7.200887680053711, "learning_rate": 1.3370822969611767e-05, "loss": 2.3284, "step": 6182 }, { "epoch": 0.08, "grad_norm": 9.0223388671875, "learning_rate": 1.3372985833243215e-05, "loss": 2.6833, "step": 6183 }, { "epoch": 0.08, "grad_norm": 7.968507289886475, "learning_rate": 1.3375148696874663e-05, "loss": 2.8655, "step": 6184 }, { "epoch": 0.08, "grad_norm": 6.755319118499756, "learning_rate": 1.3377311560506113e-05, "loss": 2.5653, "step": 6185 }, { "epoch": 0.08, "grad_norm": 8.389552116394043, "learning_rate": 1.3379474424137559e-05, "loss": 2.9903, "step": 6186 }, { "epoch": 0.08, "grad_norm": 7.965725421905518, "learning_rate": 1.3381637287769007e-05, "loss": 3.2079, "step": 6187 }, { "epoch": 0.08, "grad_norm": 9.464836120605469, "learning_rate": 1.3383800151400455e-05, "loss": 3.2318, "step": 6188 }, { "epoch": 0.08, "grad_norm": 7.292965888977051, "learning_rate": 1.3385963015031905e-05, "loss": 2.169, "step": 6189 }, { "epoch": 0.08, "grad_norm": 8.299479484558105, "learning_rate": 1.3388125878663351e-05, "loss": 2.7429, "step": 6190 }, { "epoch": 0.08, "grad_norm": 7.696765899658203, "learning_rate": 1.33902887422948e-05, "loss": 2.9718, "step": 6191 }, { "epoch": 0.08, "grad_norm": 7.940935134887695, "learning_rate": 1.3392451605926247e-05, "loss": 2.6417, "step": 6192 }, { "epoch": 0.08, "grad_norm": 8.483247756958008, "learning_rate": 1.3394614469557697e-05, "loss": 3.3164, "step": 6193 }, { "epoch": 0.08, "grad_norm": 7.2622199058532715, "learning_rate": 1.3396777333189143e-05, "loss": 2.513, "step": 6194 }, { "epoch": 0.08, "grad_norm": 8.565590858459473, "learning_rate": 1.3398940196820591e-05, "loss": 3.0906, "step": 6195 }, { "epoch": 0.08, "grad_norm": 9.826539039611816, "learning_rate": 1.340110306045204e-05, "loss": 2.8937, "step": 6196 }, { "epoch": 0.08, "grad_norm": 7.336889743804932, "learning_rate": 1.340326592408349e-05, "loss": 2.6333, "step": 6197 }, { "epoch": 0.08, "grad_norm": 7.324492454528809, "learning_rate": 1.3405428787714936e-05, "loss": 2.5729, "step": 6198 }, { "epoch": 0.08, "grad_norm": 8.041913032531738, "learning_rate": 1.3407591651346384e-05, "loss": 3.2833, "step": 6199 }, { "epoch": 0.08, "grad_norm": 7.7270121574401855, "learning_rate": 1.340975451497783e-05, "loss": 3.0439, "step": 6200 }, { "epoch": 0.08, "grad_norm": 8.699640274047852, "learning_rate": 1.341191737860928e-05, "loss": 2.9275, "step": 6201 }, { "epoch": 0.08, "grad_norm": 7.264160633087158, "learning_rate": 1.3414080242240728e-05, "loss": 2.6013, "step": 6202 }, { "epoch": 0.08, "grad_norm": 7.621147155761719, "learning_rate": 1.3416243105872176e-05, "loss": 2.2988, "step": 6203 }, { "epoch": 0.08, "grad_norm": 7.438604354858398, "learning_rate": 1.3418405969503622e-05, "loss": 2.7721, "step": 6204 }, { "epoch": 0.08, "grad_norm": 8.744997024536133, "learning_rate": 1.3420568833135072e-05, "loss": 3.1658, "step": 6205 }, { "epoch": 0.08, "grad_norm": 7.9693284034729, "learning_rate": 1.342273169676652e-05, "loss": 2.8189, "step": 6206 }, { "epoch": 0.08, "grad_norm": 7.360033988952637, "learning_rate": 1.3424894560397968e-05, "loss": 2.1839, "step": 6207 }, { "epoch": 0.08, "grad_norm": 8.821785926818848, "learning_rate": 1.3427057424029418e-05, "loss": 2.5072, "step": 6208 }, { "epoch": 0.08, "grad_norm": 7.768228530883789, "learning_rate": 1.3429220287660864e-05, "loss": 2.931, "step": 6209 }, { "epoch": 0.08, "grad_norm": 8.602580070495605, "learning_rate": 1.3431383151292312e-05, "loss": 3.5138, "step": 6210 }, { "epoch": 0.08, "grad_norm": 8.128196716308594, "learning_rate": 1.343354601492376e-05, "loss": 2.7815, "step": 6211 }, { "epoch": 0.08, "grad_norm": 8.05331039428711, "learning_rate": 1.343570887855521e-05, "loss": 2.745, "step": 6212 }, { "epoch": 0.08, "grad_norm": 7.89741325378418, "learning_rate": 1.3437871742186656e-05, "loss": 2.6007, "step": 6213 }, { "epoch": 0.08, "grad_norm": 7.808135032653809, "learning_rate": 1.3440034605818104e-05, "loss": 2.1828, "step": 6214 }, { "epoch": 0.08, "grad_norm": 7.574982166290283, "learning_rate": 1.3442197469449552e-05, "loss": 2.9231, "step": 6215 }, { "epoch": 0.08, "grad_norm": 7.209649562835693, "learning_rate": 1.3444360333081e-05, "loss": 2.1778, "step": 6216 }, { "epoch": 0.08, "grad_norm": 7.588475227355957, "learning_rate": 1.3446523196712449e-05, "loss": 2.6163, "step": 6217 }, { "epoch": 0.08, "grad_norm": 7.671034336090088, "learning_rate": 1.3448686060343897e-05, "loss": 2.8211, "step": 6218 }, { "epoch": 0.08, "grad_norm": 7.015474319458008, "learning_rate": 1.3450848923975343e-05, "loss": 2.579, "step": 6219 }, { "epoch": 0.08, "grad_norm": 6.170175075531006, "learning_rate": 1.3453011787606793e-05, "loss": 2.2086, "step": 6220 }, { "epoch": 0.08, "grad_norm": 7.156073093414307, "learning_rate": 1.345517465123824e-05, "loss": 2.7855, "step": 6221 }, { "epoch": 0.08, "grad_norm": 6.772987365722656, "learning_rate": 1.3457337514869689e-05, "loss": 2.6582, "step": 6222 }, { "epoch": 0.08, "grad_norm": 7.224412441253662, "learning_rate": 1.3459500378501135e-05, "loss": 2.4198, "step": 6223 }, { "epoch": 0.08, "grad_norm": 8.870264053344727, "learning_rate": 1.3461663242132585e-05, "loss": 2.9152, "step": 6224 }, { "epoch": 0.08, "grad_norm": 7.850541591644287, "learning_rate": 1.3463826105764033e-05, "loss": 2.4742, "step": 6225 }, { "epoch": 0.08, "grad_norm": 8.979021072387695, "learning_rate": 1.3465988969395481e-05, "loss": 2.9224, "step": 6226 }, { "epoch": 0.08, "grad_norm": 7.516865253448486, "learning_rate": 1.3468151833026927e-05, "loss": 3.228, "step": 6227 }, { "epoch": 0.08, "grad_norm": 6.747982025146484, "learning_rate": 1.3470314696658377e-05, "loss": 2.2969, "step": 6228 }, { "epoch": 0.08, "grad_norm": 9.084113121032715, "learning_rate": 1.3472477560289825e-05, "loss": 2.711, "step": 6229 }, { "epoch": 0.08, "grad_norm": 7.809077739715576, "learning_rate": 1.3474640423921273e-05, "loss": 2.8455, "step": 6230 }, { "epoch": 0.08, "grad_norm": 8.783342361450195, "learning_rate": 1.347680328755272e-05, "loss": 2.8738, "step": 6231 }, { "epoch": 0.08, "grad_norm": 7.60977029800415, "learning_rate": 1.347896615118417e-05, "loss": 2.7303, "step": 6232 }, { "epoch": 0.08, "grad_norm": 7.8692193031311035, "learning_rate": 1.3481129014815617e-05, "loss": 2.79, "step": 6233 }, { "epoch": 0.08, "grad_norm": 7.582770824432373, "learning_rate": 1.3483291878447065e-05, "loss": 2.4465, "step": 6234 }, { "epoch": 0.08, "grad_norm": 6.9419708251953125, "learning_rate": 1.3485454742078512e-05, "loss": 2.775, "step": 6235 }, { "epoch": 0.08, "grad_norm": 7.105273723602295, "learning_rate": 1.3487617605709961e-05, "loss": 2.6798, "step": 6236 }, { "epoch": 0.08, "grad_norm": 6.956069469451904, "learning_rate": 1.348978046934141e-05, "loss": 2.7415, "step": 6237 }, { "epoch": 0.08, "grad_norm": 7.953881740570068, "learning_rate": 1.3491943332972856e-05, "loss": 2.8903, "step": 6238 }, { "epoch": 0.08, "grad_norm": 7.83951997756958, "learning_rate": 1.3494106196604304e-05, "loss": 2.6359, "step": 6239 }, { "epoch": 0.08, "grad_norm": 7.675478458404541, "learning_rate": 1.3496269060235754e-05, "loss": 2.6709, "step": 6240 }, { "epoch": 0.08, "grad_norm": 7.050457000732422, "learning_rate": 1.3498431923867202e-05, "loss": 2.1941, "step": 6241 }, { "epoch": 0.08, "grad_norm": 7.242096900939941, "learning_rate": 1.3500594787498648e-05, "loss": 2.6271, "step": 6242 }, { "epoch": 0.08, "grad_norm": 7.5188188552856445, "learning_rate": 1.3502757651130098e-05, "loss": 3.1714, "step": 6243 }, { "epoch": 0.08, "grad_norm": 7.470188140869141, "learning_rate": 1.3504920514761546e-05, "loss": 3.1067, "step": 6244 }, { "epoch": 0.08, "grad_norm": 6.571797847747803, "learning_rate": 1.3507083378392994e-05, "loss": 2.4309, "step": 6245 }, { "epoch": 0.08, "grad_norm": 8.902989387512207, "learning_rate": 1.350924624202444e-05, "loss": 3.3853, "step": 6246 }, { "epoch": 0.08, "grad_norm": 7.4433770179748535, "learning_rate": 1.351140910565589e-05, "loss": 2.7092, "step": 6247 }, { "epoch": 0.08, "grad_norm": 6.913280487060547, "learning_rate": 1.3513571969287338e-05, "loss": 3.109, "step": 6248 }, { "epoch": 0.08, "grad_norm": 7.500477313995361, "learning_rate": 1.3515734832918786e-05, "loss": 2.7584, "step": 6249 }, { "epoch": 0.08, "grad_norm": 8.0586519241333, "learning_rate": 1.3517897696550232e-05, "loss": 2.6599, "step": 6250 }, { "epoch": 0.08, "grad_norm": 7.913821220397949, "learning_rate": 1.3520060560181682e-05, "loss": 2.975, "step": 6251 }, { "epoch": 0.08, "grad_norm": 7.200356960296631, "learning_rate": 1.352222342381313e-05, "loss": 2.9016, "step": 6252 }, { "epoch": 0.08, "grad_norm": 7.3986992835998535, "learning_rate": 1.3524386287444577e-05, "loss": 2.6164, "step": 6253 }, { "epoch": 0.08, "grad_norm": 10.174155235290527, "learning_rate": 1.3526549151076025e-05, "loss": 3.1598, "step": 6254 }, { "epoch": 0.08, "grad_norm": 6.75588321685791, "learning_rate": 1.3528712014707474e-05, "loss": 2.653, "step": 6255 }, { "epoch": 0.08, "grad_norm": 8.261128425598145, "learning_rate": 1.3530874878338922e-05, "loss": 3.162, "step": 6256 }, { "epoch": 0.08, "grad_norm": 7.415981292724609, "learning_rate": 1.3533037741970369e-05, "loss": 2.5644, "step": 6257 }, { "epoch": 0.08, "grad_norm": 6.9201979637146, "learning_rate": 1.3535200605601817e-05, "loss": 2.7437, "step": 6258 }, { "epoch": 0.08, "grad_norm": 7.6160054206848145, "learning_rate": 1.3537363469233267e-05, "loss": 3.2443, "step": 6259 }, { "epoch": 0.08, "grad_norm": 7.756065368652344, "learning_rate": 1.3539526332864715e-05, "loss": 2.7558, "step": 6260 }, { "epoch": 0.08, "grad_norm": 7.749301910400391, "learning_rate": 1.3541689196496161e-05, "loss": 2.9931, "step": 6261 }, { "epoch": 0.08, "grad_norm": 7.592989444732666, "learning_rate": 1.3543852060127609e-05, "loss": 2.8179, "step": 6262 }, { "epoch": 0.08, "grad_norm": 7.949005126953125, "learning_rate": 1.3546014923759059e-05, "loss": 2.719, "step": 6263 }, { "epoch": 0.08, "grad_norm": 8.10416030883789, "learning_rate": 1.3548177787390507e-05, "loss": 2.9734, "step": 6264 }, { "epoch": 0.08, "grad_norm": 7.246780872344971, "learning_rate": 1.3550340651021953e-05, "loss": 2.7798, "step": 6265 }, { "epoch": 0.08, "grad_norm": 6.800934791564941, "learning_rate": 1.3552503514653401e-05, "loss": 2.4466, "step": 6266 }, { "epoch": 0.08, "grad_norm": 7.8263163566589355, "learning_rate": 1.3554666378284851e-05, "loss": 2.8544, "step": 6267 }, { "epoch": 0.08, "grad_norm": 7.6832427978515625, "learning_rate": 1.3556829241916299e-05, "loss": 2.2521, "step": 6268 }, { "epoch": 0.08, "grad_norm": 8.19549560546875, "learning_rate": 1.3558992105547745e-05, "loss": 2.6584, "step": 6269 }, { "epoch": 0.08, "grad_norm": 7.316251277923584, "learning_rate": 1.3561154969179194e-05, "loss": 2.5143, "step": 6270 }, { "epoch": 0.08, "grad_norm": 7.034874439239502, "learning_rate": 1.3563317832810643e-05, "loss": 2.852, "step": 6271 }, { "epoch": 0.08, "grad_norm": 7.6754231452941895, "learning_rate": 1.356548069644209e-05, "loss": 2.7307, "step": 6272 }, { "epoch": 0.08, "grad_norm": 8.037094116210938, "learning_rate": 1.3567643560073538e-05, "loss": 2.7932, "step": 6273 }, { "epoch": 0.08, "grad_norm": 8.278343200683594, "learning_rate": 1.3569806423704987e-05, "loss": 2.387, "step": 6274 }, { "epoch": 0.08, "grad_norm": 8.633848190307617, "learning_rate": 1.3571969287336435e-05, "loss": 2.8296, "step": 6275 }, { "epoch": 0.08, "grad_norm": 8.295913696289062, "learning_rate": 1.3574132150967882e-05, "loss": 2.978, "step": 6276 }, { "epoch": 0.08, "grad_norm": 7.599588394165039, "learning_rate": 1.357629501459933e-05, "loss": 2.6036, "step": 6277 }, { "epoch": 0.08, "grad_norm": 8.858132362365723, "learning_rate": 1.357845787823078e-05, "loss": 2.9549, "step": 6278 }, { "epoch": 0.08, "grad_norm": 8.24538516998291, "learning_rate": 1.3580620741862228e-05, "loss": 2.5047, "step": 6279 }, { "epoch": 0.08, "grad_norm": 8.280409812927246, "learning_rate": 1.3582783605493674e-05, "loss": 2.5488, "step": 6280 }, { "epoch": 0.08, "grad_norm": 7.864217758178711, "learning_rate": 1.3584946469125122e-05, "loss": 2.3574, "step": 6281 }, { "epoch": 0.08, "grad_norm": 8.61983585357666, "learning_rate": 1.3587109332756572e-05, "loss": 2.7725, "step": 6282 }, { "epoch": 0.08, "grad_norm": 8.792448043823242, "learning_rate": 1.358927219638802e-05, "loss": 2.8921, "step": 6283 }, { "epoch": 0.08, "grad_norm": 7.523017883300781, "learning_rate": 1.3591435060019466e-05, "loss": 2.3824, "step": 6284 }, { "epoch": 0.08, "grad_norm": 8.674992561340332, "learning_rate": 1.3593597923650914e-05, "loss": 2.5679, "step": 6285 }, { "epoch": 0.08, "grad_norm": 9.768879890441895, "learning_rate": 1.3595760787282364e-05, "loss": 2.8407, "step": 6286 }, { "epoch": 0.08, "grad_norm": 6.805103778839111, "learning_rate": 1.3597923650913812e-05, "loss": 2.232, "step": 6287 }, { "epoch": 0.08, "grad_norm": 7.7946882247924805, "learning_rate": 1.3600086514545258e-05, "loss": 3.0524, "step": 6288 }, { "epoch": 0.08, "grad_norm": 8.658424377441406, "learning_rate": 1.3602249378176706e-05, "loss": 3.1065, "step": 6289 }, { "epoch": 0.08, "grad_norm": 8.716094970703125, "learning_rate": 1.3604412241808156e-05, "loss": 3.0568, "step": 6290 }, { "epoch": 0.08, "grad_norm": 7.25674295425415, "learning_rate": 1.3606575105439603e-05, "loss": 2.3522, "step": 6291 }, { "epoch": 0.08, "grad_norm": 7.502932548522949, "learning_rate": 1.360873796907105e-05, "loss": 2.2692, "step": 6292 }, { "epoch": 0.08, "grad_norm": 7.607326507568359, "learning_rate": 1.3610900832702499e-05, "loss": 2.3919, "step": 6293 }, { "epoch": 0.08, "grad_norm": 6.971780776977539, "learning_rate": 1.3613063696333948e-05, "loss": 2.3394, "step": 6294 }, { "epoch": 0.08, "grad_norm": 7.258949279785156, "learning_rate": 1.3615226559965395e-05, "loss": 2.5792, "step": 6295 }, { "epoch": 0.08, "grad_norm": 8.589821815490723, "learning_rate": 1.3617389423596843e-05, "loss": 3.4532, "step": 6296 }, { "epoch": 0.08, "grad_norm": 8.806734085083008, "learning_rate": 1.3619552287228291e-05, "loss": 2.8234, "step": 6297 }, { "epoch": 0.08, "grad_norm": 8.099785804748535, "learning_rate": 1.362171515085974e-05, "loss": 2.5482, "step": 6298 }, { "epoch": 0.08, "grad_norm": 7.704052925109863, "learning_rate": 1.3623878014491187e-05, "loss": 2.5223, "step": 6299 }, { "epoch": 0.08, "grad_norm": 7.60073184967041, "learning_rate": 1.3626040878122635e-05, "loss": 2.4992, "step": 6300 }, { "epoch": 0.08, "grad_norm": 7.971288204193115, "learning_rate": 1.3628203741754083e-05, "loss": 2.7084, "step": 6301 }, { "epoch": 0.08, "grad_norm": 8.628002166748047, "learning_rate": 1.3630366605385533e-05, "loss": 2.4111, "step": 6302 }, { "epoch": 0.08, "grad_norm": 7.726466655731201, "learning_rate": 1.363252946901698e-05, "loss": 2.7071, "step": 6303 }, { "epoch": 0.08, "grad_norm": 7.392821311950684, "learning_rate": 1.3634692332648427e-05, "loss": 2.3673, "step": 6304 }, { "epoch": 0.08, "grad_norm": 7.390059947967529, "learning_rate": 1.3636855196279877e-05, "loss": 2.541, "step": 6305 }, { "epoch": 0.08, "grad_norm": 7.511632919311523, "learning_rate": 1.3639018059911325e-05, "loss": 2.9404, "step": 6306 }, { "epoch": 0.08, "grad_norm": 7.763047218322754, "learning_rate": 1.3641180923542771e-05, "loss": 2.9934, "step": 6307 }, { "epoch": 0.08, "grad_norm": 7.881462097167969, "learning_rate": 1.364334378717422e-05, "loss": 2.6341, "step": 6308 }, { "epoch": 0.08, "grad_norm": 8.381460189819336, "learning_rate": 1.364550665080567e-05, "loss": 2.4276, "step": 6309 }, { "epoch": 0.08, "grad_norm": 6.966101169586182, "learning_rate": 1.3647669514437116e-05, "loss": 2.6791, "step": 6310 }, { "epoch": 0.08, "grad_norm": 7.5292649269104, "learning_rate": 1.3649832378068564e-05, "loss": 2.488, "step": 6311 }, { "epoch": 0.08, "grad_norm": 7.172763347625732, "learning_rate": 1.3651995241700012e-05, "loss": 2.6368, "step": 6312 }, { "epoch": 0.08, "grad_norm": 7.776808738708496, "learning_rate": 1.3654158105331461e-05, "loss": 2.8823, "step": 6313 }, { "epoch": 0.08, "grad_norm": 7.1306281089782715, "learning_rate": 1.3656320968962908e-05, "loss": 2.8263, "step": 6314 }, { "epoch": 0.08, "grad_norm": 8.1637601852417, "learning_rate": 1.3658483832594356e-05, "loss": 2.4787, "step": 6315 }, { "epoch": 0.08, "grad_norm": 8.329858779907227, "learning_rate": 1.3660646696225804e-05, "loss": 2.5543, "step": 6316 }, { "epoch": 0.08, "grad_norm": 6.919152736663818, "learning_rate": 1.3662809559857254e-05, "loss": 2.319, "step": 6317 }, { "epoch": 0.08, "grad_norm": 8.084076881408691, "learning_rate": 1.36649724234887e-05, "loss": 2.4343, "step": 6318 }, { "epoch": 0.08, "grad_norm": 8.305705070495605, "learning_rate": 1.3667135287120148e-05, "loss": 2.7066, "step": 6319 }, { "epoch": 0.08, "grad_norm": 7.91428279876709, "learning_rate": 1.3669298150751596e-05, "loss": 2.5686, "step": 6320 }, { "epoch": 0.08, "grad_norm": 7.402216911315918, "learning_rate": 1.3671461014383046e-05, "loss": 3.0084, "step": 6321 }, { "epoch": 0.08, "grad_norm": 8.630361557006836, "learning_rate": 1.3673623878014492e-05, "loss": 2.4916, "step": 6322 }, { "epoch": 0.08, "grad_norm": 7.023933410644531, "learning_rate": 1.367578674164594e-05, "loss": 2.4365, "step": 6323 }, { "epoch": 0.08, "grad_norm": 8.105660438537598, "learning_rate": 1.3677949605277388e-05, "loss": 2.7633, "step": 6324 }, { "epoch": 0.08, "grad_norm": 6.778051376342773, "learning_rate": 1.3680112468908838e-05, "loss": 2.9259, "step": 6325 }, { "epoch": 0.08, "grad_norm": 8.584957122802734, "learning_rate": 1.3682275332540284e-05, "loss": 3.0055, "step": 6326 }, { "epoch": 0.08, "grad_norm": 7.670901298522949, "learning_rate": 1.3684438196171732e-05, "loss": 2.5009, "step": 6327 }, { "epoch": 0.08, "grad_norm": 7.4318013191223145, "learning_rate": 1.3686601059803179e-05, "loss": 2.2952, "step": 6328 }, { "epoch": 0.08, "grad_norm": 6.726641654968262, "learning_rate": 1.3688763923434629e-05, "loss": 2.3681, "step": 6329 }, { "epoch": 0.08, "grad_norm": 7.891271114349365, "learning_rate": 1.3690926787066077e-05, "loss": 2.714, "step": 6330 }, { "epoch": 0.08, "grad_norm": 6.554643154144287, "learning_rate": 1.3693089650697525e-05, "loss": 2.3614, "step": 6331 }, { "epoch": 0.08, "grad_norm": 8.494122505187988, "learning_rate": 1.3695252514328971e-05, "loss": 3.1104, "step": 6332 }, { "epoch": 0.08, "grad_norm": 9.4583101272583, "learning_rate": 1.369741537796042e-05, "loss": 2.814, "step": 6333 }, { "epoch": 0.08, "grad_norm": 7.693535327911377, "learning_rate": 1.3699578241591869e-05, "loss": 3.1076, "step": 6334 }, { "epoch": 0.08, "grad_norm": 7.9931721687316895, "learning_rate": 1.3701741105223317e-05, "loss": 2.9114, "step": 6335 }, { "epoch": 0.08, "grad_norm": 6.978144645690918, "learning_rate": 1.3703903968854767e-05, "loss": 2.1996, "step": 6336 }, { "epoch": 0.08, "grad_norm": 6.928597927093506, "learning_rate": 1.3706066832486213e-05, "loss": 2.6461, "step": 6337 }, { "epoch": 0.08, "grad_norm": 9.341429710388184, "learning_rate": 1.3708229696117661e-05, "loss": 3.268, "step": 6338 }, { "epoch": 0.08, "grad_norm": 7.473873138427734, "learning_rate": 1.3710392559749109e-05, "loss": 2.4174, "step": 6339 }, { "epoch": 0.08, "grad_norm": 7.573358535766602, "learning_rate": 1.3712555423380559e-05, "loss": 2.6049, "step": 6340 }, { "epoch": 0.08, "grad_norm": 7.998002529144287, "learning_rate": 1.3714718287012005e-05, "loss": 2.5445, "step": 6341 }, { "epoch": 0.08, "grad_norm": 6.818118572235107, "learning_rate": 1.3716881150643453e-05, "loss": 2.9582, "step": 6342 }, { "epoch": 0.08, "grad_norm": 7.132383346557617, "learning_rate": 1.3719044014274901e-05, "loss": 2.6744, "step": 6343 }, { "epoch": 0.08, "grad_norm": 6.800833225250244, "learning_rate": 1.372120687790635e-05, "loss": 2.7168, "step": 6344 }, { "epoch": 0.08, "grad_norm": 7.275590419769287, "learning_rate": 1.3723369741537797e-05, "loss": 2.6186, "step": 6345 }, { "epoch": 0.08, "grad_norm": 7.47357702255249, "learning_rate": 1.3725532605169245e-05, "loss": 2.1483, "step": 6346 }, { "epoch": 0.08, "grad_norm": 8.299281120300293, "learning_rate": 1.3727695468800692e-05, "loss": 2.9946, "step": 6347 }, { "epoch": 0.08, "grad_norm": 6.779168128967285, "learning_rate": 1.3729858332432141e-05, "loss": 2.8552, "step": 6348 }, { "epoch": 0.08, "grad_norm": 7.079296588897705, "learning_rate": 1.373202119606359e-05, "loss": 2.5876, "step": 6349 }, { "epoch": 0.08, "grad_norm": 7.454261779785156, "learning_rate": 1.3734184059695038e-05, "loss": 2.7873, "step": 6350 }, { "epoch": 0.08, "grad_norm": 8.29098892211914, "learning_rate": 1.3736346923326484e-05, "loss": 3.1577, "step": 6351 }, { "epoch": 0.08, "grad_norm": 8.384931564331055, "learning_rate": 1.3738509786957934e-05, "loss": 3.8156, "step": 6352 }, { "epoch": 0.08, "grad_norm": 7.876402378082275, "learning_rate": 1.3740672650589382e-05, "loss": 2.3948, "step": 6353 }, { "epoch": 0.08, "grad_norm": 7.438085556030273, "learning_rate": 1.374283551422083e-05, "loss": 2.9128, "step": 6354 }, { "epoch": 0.08, "grad_norm": 7.321775436401367, "learning_rate": 1.3744998377852276e-05, "loss": 2.7301, "step": 6355 }, { "epoch": 0.08, "grad_norm": 8.05718994140625, "learning_rate": 1.3747161241483726e-05, "loss": 2.9946, "step": 6356 }, { "epoch": 0.08, "grad_norm": 6.355678081512451, "learning_rate": 1.3749324105115174e-05, "loss": 2.3357, "step": 6357 }, { "epoch": 0.08, "grad_norm": 8.091902732849121, "learning_rate": 1.3751486968746622e-05, "loss": 2.8782, "step": 6358 }, { "epoch": 0.08, "grad_norm": 8.86709976196289, "learning_rate": 1.3753649832378068e-05, "loss": 3.2019, "step": 6359 }, { "epoch": 0.08, "grad_norm": 7.551170825958252, "learning_rate": 1.3755812696009518e-05, "loss": 2.9816, "step": 6360 }, { "epoch": 0.08, "grad_norm": 8.323280334472656, "learning_rate": 1.3757975559640966e-05, "loss": 2.9464, "step": 6361 }, { "epoch": 0.08, "grad_norm": 7.363241672515869, "learning_rate": 1.3760138423272414e-05, "loss": 2.7154, "step": 6362 }, { "epoch": 0.08, "grad_norm": 6.957363605499268, "learning_rate": 1.376230128690386e-05, "loss": 2.5435, "step": 6363 }, { "epoch": 0.08, "grad_norm": 7.007710933685303, "learning_rate": 1.376446415053531e-05, "loss": 2.4983, "step": 6364 }, { "epoch": 0.08, "grad_norm": 7.2559919357299805, "learning_rate": 1.3766627014166758e-05, "loss": 2.7175, "step": 6365 }, { "epoch": 0.08, "grad_norm": 6.899861812591553, "learning_rate": 1.3768789877798205e-05, "loss": 2.3379, "step": 6366 }, { "epoch": 0.08, "grad_norm": 7.413211345672607, "learning_rate": 1.3770952741429653e-05, "loss": 2.2633, "step": 6367 }, { "epoch": 0.08, "grad_norm": 8.055787086486816, "learning_rate": 1.3773115605061102e-05, "loss": 2.6625, "step": 6368 }, { "epoch": 0.08, "grad_norm": 8.990213394165039, "learning_rate": 1.377527846869255e-05, "loss": 2.5079, "step": 6369 }, { "epoch": 0.08, "grad_norm": 6.72325325012207, "learning_rate": 1.3777441332323997e-05, "loss": 2.3515, "step": 6370 }, { "epoch": 0.08, "grad_norm": 7.802762031555176, "learning_rate": 1.3779604195955447e-05, "loss": 2.3574, "step": 6371 }, { "epoch": 0.08, "grad_norm": 8.20416259765625, "learning_rate": 1.3781767059586895e-05, "loss": 2.708, "step": 6372 }, { "epoch": 0.08, "grad_norm": 6.281889915466309, "learning_rate": 1.3783929923218343e-05, "loss": 2.2867, "step": 6373 }, { "epoch": 0.08, "grad_norm": 8.459810256958008, "learning_rate": 1.3786092786849789e-05, "loss": 2.7191, "step": 6374 }, { "epoch": 0.08, "grad_norm": 8.234978675842285, "learning_rate": 1.3788255650481239e-05, "loss": 2.5102, "step": 6375 }, { "epoch": 0.08, "grad_norm": 6.648728847503662, "learning_rate": 1.3790418514112687e-05, "loss": 1.7999, "step": 6376 }, { "epoch": 0.08, "grad_norm": 6.600191593170166, "learning_rate": 1.3792581377744135e-05, "loss": 2.5389, "step": 6377 }, { "epoch": 0.08, "grad_norm": 8.152420997619629, "learning_rate": 1.3794744241375581e-05, "loss": 2.5802, "step": 6378 }, { "epoch": 0.08, "grad_norm": 7.5836567878723145, "learning_rate": 1.3796907105007031e-05, "loss": 2.2277, "step": 6379 }, { "epoch": 0.08, "grad_norm": 7.6204657554626465, "learning_rate": 1.3799069968638479e-05, "loss": 3.4505, "step": 6380 }, { "epoch": 0.08, "grad_norm": 7.710048675537109, "learning_rate": 1.3801232832269925e-05, "loss": 3.4606, "step": 6381 }, { "epoch": 0.08, "grad_norm": 7.543856620788574, "learning_rate": 1.3803395695901374e-05, "loss": 2.6873, "step": 6382 }, { "epoch": 0.08, "grad_norm": 7.718710899353027, "learning_rate": 1.3805558559532823e-05, "loss": 2.5633, "step": 6383 }, { "epoch": 0.08, "grad_norm": 6.7050347328186035, "learning_rate": 1.3807721423164271e-05, "loss": 1.9392, "step": 6384 }, { "epoch": 0.08, "grad_norm": 6.6759443283081055, "learning_rate": 1.3809884286795718e-05, "loss": 2.7529, "step": 6385 }, { "epoch": 0.08, "grad_norm": 7.9080963134765625, "learning_rate": 1.3812047150427166e-05, "loss": 2.7524, "step": 6386 }, { "epoch": 0.08, "grad_norm": 7.559596061706543, "learning_rate": 1.3814210014058615e-05, "loss": 2.966, "step": 6387 }, { "epoch": 0.08, "grad_norm": 7.6681318283081055, "learning_rate": 1.3816372877690063e-05, "loss": 2.7606, "step": 6388 }, { "epoch": 0.08, "grad_norm": 7.147913932800293, "learning_rate": 1.381853574132151e-05, "loss": 2.8947, "step": 6389 }, { "epoch": 0.08, "grad_norm": 6.477366924285889, "learning_rate": 1.3820698604952958e-05, "loss": 2.5461, "step": 6390 }, { "epoch": 0.08, "grad_norm": 6.8721771240234375, "learning_rate": 1.3822861468584408e-05, "loss": 2.6432, "step": 6391 }, { "epoch": 0.08, "grad_norm": 7.069347858428955, "learning_rate": 1.3825024332215856e-05, "loss": 2.5486, "step": 6392 }, { "epoch": 0.08, "grad_norm": 7.815762519836426, "learning_rate": 1.3827187195847302e-05, "loss": 2.5216, "step": 6393 }, { "epoch": 0.08, "grad_norm": 7.879483699798584, "learning_rate": 1.382935005947875e-05, "loss": 2.5887, "step": 6394 }, { "epoch": 0.08, "grad_norm": 7.299379825592041, "learning_rate": 1.38315129231102e-05, "loss": 2.8306, "step": 6395 }, { "epoch": 0.08, "grad_norm": 7.4766130447387695, "learning_rate": 1.3833675786741648e-05, "loss": 2.7587, "step": 6396 }, { "epoch": 0.08, "grad_norm": 6.626262187957764, "learning_rate": 1.3835838650373094e-05, "loss": 2.8381, "step": 6397 }, { "epoch": 0.08, "grad_norm": 6.760823726654053, "learning_rate": 1.3838001514004542e-05, "loss": 2.6448, "step": 6398 }, { "epoch": 0.08, "grad_norm": 8.249809265136719, "learning_rate": 1.3840164377635992e-05, "loss": 2.5087, "step": 6399 }, { "epoch": 0.08, "grad_norm": 7.610337734222412, "learning_rate": 1.3842327241267438e-05, "loss": 2.7627, "step": 6400 }, { "epoch": 0.08, "grad_norm": 7.683042049407959, "learning_rate": 1.3844490104898886e-05, "loss": 2.5448, "step": 6401 }, { "epoch": 0.08, "grad_norm": 7.377992630004883, "learning_rate": 1.3846652968530336e-05, "loss": 2.8529, "step": 6402 }, { "epoch": 0.08, "grad_norm": 7.73309850692749, "learning_rate": 1.3848815832161784e-05, "loss": 2.7144, "step": 6403 }, { "epoch": 0.08, "grad_norm": 7.142841815948486, "learning_rate": 1.385097869579323e-05, "loss": 2.2976, "step": 6404 }, { "epoch": 0.08, "grad_norm": 7.183588027954102, "learning_rate": 1.3853141559424679e-05, "loss": 2.8274, "step": 6405 }, { "epoch": 0.08, "grad_norm": 7.025176525115967, "learning_rate": 1.3855304423056128e-05, "loss": 2.7793, "step": 6406 }, { "epoch": 0.08, "grad_norm": 7.153785705566406, "learning_rate": 1.3857467286687576e-05, "loss": 2.1873, "step": 6407 }, { "epoch": 0.08, "grad_norm": 7.419158935546875, "learning_rate": 1.3859630150319023e-05, "loss": 2.5267, "step": 6408 }, { "epoch": 0.08, "grad_norm": 6.918591499328613, "learning_rate": 1.3861793013950471e-05, "loss": 2.6169, "step": 6409 }, { "epoch": 0.08, "grad_norm": 7.8665452003479, "learning_rate": 1.386395587758192e-05, "loss": 2.8109, "step": 6410 }, { "epoch": 0.08, "grad_norm": 7.443798542022705, "learning_rate": 1.3866118741213369e-05, "loss": 2.8911, "step": 6411 }, { "epoch": 0.08, "grad_norm": 7.807815074920654, "learning_rate": 1.3868281604844815e-05, "loss": 2.7779, "step": 6412 }, { "epoch": 0.08, "grad_norm": 9.044708251953125, "learning_rate": 1.3870444468476263e-05, "loss": 3.3636, "step": 6413 }, { "epoch": 0.08, "grad_norm": 8.681258201599121, "learning_rate": 1.3872607332107713e-05, "loss": 2.8482, "step": 6414 }, { "epoch": 0.08, "grad_norm": 7.503198623657227, "learning_rate": 1.3874770195739161e-05, "loss": 2.3545, "step": 6415 }, { "epoch": 0.08, "grad_norm": 7.286610126495361, "learning_rate": 1.3876933059370607e-05, "loss": 2.0832, "step": 6416 }, { "epoch": 0.08, "grad_norm": 7.82317590713501, "learning_rate": 1.3879095923002055e-05, "loss": 2.6386, "step": 6417 }, { "epoch": 0.08, "grad_norm": 7.762314319610596, "learning_rate": 1.3881258786633505e-05, "loss": 2.846, "step": 6418 }, { "epoch": 0.08, "grad_norm": 7.564884662628174, "learning_rate": 1.3883421650264951e-05, "loss": 2.4169, "step": 6419 }, { "epoch": 0.08, "grad_norm": 7.288666725158691, "learning_rate": 1.38855845138964e-05, "loss": 2.5932, "step": 6420 }, { "epoch": 0.08, "grad_norm": 7.338698387145996, "learning_rate": 1.3887747377527847e-05, "loss": 2.733, "step": 6421 }, { "epoch": 0.08, "grad_norm": 8.005484580993652, "learning_rate": 1.3889910241159297e-05, "loss": 2.2754, "step": 6422 }, { "epoch": 0.08, "grad_norm": 8.203570365905762, "learning_rate": 1.3892073104790744e-05, "loss": 2.5306, "step": 6423 }, { "epoch": 0.08, "grad_norm": 8.425583839416504, "learning_rate": 1.3894235968422192e-05, "loss": 2.6976, "step": 6424 }, { "epoch": 0.08, "grad_norm": 7.196644306182861, "learning_rate": 1.389639883205364e-05, "loss": 1.8173, "step": 6425 }, { "epoch": 0.08, "grad_norm": 7.1124091148376465, "learning_rate": 1.389856169568509e-05, "loss": 2.3139, "step": 6426 }, { "epoch": 0.08, "grad_norm": 7.33018684387207, "learning_rate": 1.3900724559316536e-05, "loss": 2.2542, "step": 6427 }, { "epoch": 0.08, "grad_norm": 8.105576515197754, "learning_rate": 1.3902887422947984e-05, "loss": 2.4913, "step": 6428 }, { "epoch": 0.08, "grad_norm": 7.224588871002197, "learning_rate": 1.3905050286579432e-05, "loss": 2.1224, "step": 6429 }, { "epoch": 0.08, "grad_norm": 8.951675415039062, "learning_rate": 1.3907213150210882e-05, "loss": 3.1602, "step": 6430 }, { "epoch": 0.08, "grad_norm": 7.835674285888672, "learning_rate": 1.3909376013842328e-05, "loss": 3.3101, "step": 6431 }, { "epoch": 0.08, "grad_norm": 7.811148643493652, "learning_rate": 1.3911538877473776e-05, "loss": 2.4857, "step": 6432 }, { "epoch": 0.08, "grad_norm": 7.495560169219971, "learning_rate": 1.3913701741105226e-05, "loss": 2.5003, "step": 6433 }, { "epoch": 0.08, "grad_norm": 7.4551262855529785, "learning_rate": 1.3915864604736674e-05, "loss": 2.5209, "step": 6434 }, { "epoch": 0.08, "grad_norm": 7.099211692810059, "learning_rate": 1.391802746836812e-05, "loss": 2.6182, "step": 6435 }, { "epoch": 0.08, "grad_norm": 8.501432418823242, "learning_rate": 1.3920190331999568e-05, "loss": 2.7197, "step": 6436 }, { "epoch": 0.08, "grad_norm": 7.6617302894592285, "learning_rate": 1.3922353195631018e-05, "loss": 2.5522, "step": 6437 }, { "epoch": 0.08, "grad_norm": 8.476426124572754, "learning_rate": 1.3924516059262464e-05, "loss": 2.727, "step": 6438 }, { "epoch": 0.08, "grad_norm": 6.8146209716796875, "learning_rate": 1.3926678922893912e-05, "loss": 2.1843, "step": 6439 }, { "epoch": 0.08, "grad_norm": 7.413368225097656, "learning_rate": 1.392884178652536e-05, "loss": 2.316, "step": 6440 }, { "epoch": 0.08, "grad_norm": 7.67479944229126, "learning_rate": 1.393100465015681e-05, "loss": 3.1896, "step": 6441 }, { "epoch": 0.08, "grad_norm": 7.607053756713867, "learning_rate": 1.3933167513788257e-05, "loss": 2.8705, "step": 6442 }, { "epoch": 0.08, "grad_norm": 7.845946788787842, "learning_rate": 1.3935330377419705e-05, "loss": 2.8531, "step": 6443 }, { "epoch": 0.08, "grad_norm": 7.173470497131348, "learning_rate": 1.3937493241051153e-05, "loss": 2.8181, "step": 6444 }, { "epoch": 0.08, "grad_norm": 7.085753917694092, "learning_rate": 1.3939656104682602e-05, "loss": 2.9249, "step": 6445 }, { "epoch": 0.08, "grad_norm": 6.8485517501831055, "learning_rate": 1.3941818968314049e-05, "loss": 2.7479, "step": 6446 }, { "epoch": 0.08, "grad_norm": 8.308860778808594, "learning_rate": 1.3943981831945497e-05, "loss": 2.7977, "step": 6447 }, { "epoch": 0.08, "grad_norm": 7.962652206420898, "learning_rate": 1.3946144695576945e-05, "loss": 2.7175, "step": 6448 }, { "epoch": 0.08, "grad_norm": 7.607723712921143, "learning_rate": 1.3948307559208395e-05, "loss": 2.2349, "step": 6449 }, { "epoch": 0.08, "grad_norm": 7.34318733215332, "learning_rate": 1.3950470422839841e-05, "loss": 2.4492, "step": 6450 }, { "epoch": 0.08, "grad_norm": 7.099628925323486, "learning_rate": 1.3952633286471289e-05, "loss": 2.5503, "step": 6451 }, { "epoch": 0.08, "grad_norm": 7.876709461212158, "learning_rate": 1.3954796150102737e-05, "loss": 2.7165, "step": 6452 }, { "epoch": 0.08, "grad_norm": 6.6342267990112305, "learning_rate": 1.3956959013734185e-05, "loss": 2.5056, "step": 6453 }, { "epoch": 0.08, "grad_norm": 8.399118423461914, "learning_rate": 1.3959121877365633e-05, "loss": 2.7465, "step": 6454 }, { "epoch": 0.08, "grad_norm": 7.575377464294434, "learning_rate": 1.3961284740997081e-05, "loss": 2.6482, "step": 6455 }, { "epoch": 0.08, "grad_norm": 7.5543212890625, "learning_rate": 1.3963447604628528e-05, "loss": 2.7794, "step": 6456 }, { "epoch": 0.08, "grad_norm": 6.926211357116699, "learning_rate": 1.3965610468259977e-05, "loss": 3.0066, "step": 6457 }, { "epoch": 0.08, "grad_norm": 7.288671970367432, "learning_rate": 1.3967773331891425e-05, "loss": 2.5948, "step": 6458 }, { "epoch": 0.08, "grad_norm": 6.530013561248779, "learning_rate": 1.3969936195522873e-05, "loss": 2.5247, "step": 6459 }, { "epoch": 0.08, "grad_norm": 7.009833335876465, "learning_rate": 1.397209905915432e-05, "loss": 2.0148, "step": 6460 }, { "epoch": 0.08, "grad_norm": 7.007157325744629, "learning_rate": 1.397426192278577e-05, "loss": 2.4016, "step": 6461 }, { "epoch": 0.08, "grad_norm": 7.665502548217773, "learning_rate": 1.3976424786417218e-05, "loss": 2.4299, "step": 6462 }, { "epoch": 0.08, "grad_norm": 8.671428680419922, "learning_rate": 1.3978587650048666e-05, "loss": 2.7763, "step": 6463 }, { "epoch": 0.08, "grad_norm": 6.917350769042969, "learning_rate": 1.3980750513680115e-05, "loss": 2.2891, "step": 6464 }, { "epoch": 0.08, "grad_norm": 6.838197231292725, "learning_rate": 1.3982913377311562e-05, "loss": 2.5532, "step": 6465 }, { "epoch": 0.08, "grad_norm": 5.9064040184021, "learning_rate": 1.398507624094301e-05, "loss": 2.2541, "step": 6466 }, { "epoch": 0.08, "grad_norm": 8.392215728759766, "learning_rate": 1.3987239104574458e-05, "loss": 2.5103, "step": 6467 }, { "epoch": 0.08, "grad_norm": 8.6586332321167, "learning_rate": 1.3989401968205908e-05, "loss": 2.9686, "step": 6468 }, { "epoch": 0.08, "grad_norm": 7.554845809936523, "learning_rate": 1.3991564831837354e-05, "loss": 2.8996, "step": 6469 }, { "epoch": 0.08, "grad_norm": 7.510504722595215, "learning_rate": 1.3993727695468802e-05, "loss": 2.5196, "step": 6470 }, { "epoch": 0.08, "grad_norm": 8.306487083435059, "learning_rate": 1.399589055910025e-05, "loss": 2.8757, "step": 6471 }, { "epoch": 0.08, "grad_norm": 8.439087867736816, "learning_rate": 1.3998053422731698e-05, "loss": 2.8668, "step": 6472 }, { "epoch": 0.08, "grad_norm": 8.475357055664062, "learning_rate": 1.4000216286363146e-05, "loss": 2.5982, "step": 6473 }, { "epoch": 0.08, "grad_norm": 7.133349895477295, "learning_rate": 1.4002379149994594e-05, "loss": 1.9946, "step": 6474 }, { "epoch": 0.08, "grad_norm": 7.965028285980225, "learning_rate": 1.400454201362604e-05, "loss": 2.9882, "step": 6475 }, { "epoch": 0.08, "grad_norm": 8.771932601928711, "learning_rate": 1.400670487725749e-05, "loss": 3.0496, "step": 6476 }, { "epoch": 0.08, "grad_norm": 7.2918829917907715, "learning_rate": 1.4008867740888938e-05, "loss": 2.7113, "step": 6477 }, { "epoch": 0.08, "grad_norm": 7.59263801574707, "learning_rate": 1.4011030604520386e-05, "loss": 2.505, "step": 6478 }, { "epoch": 0.08, "grad_norm": 8.226314544677734, "learning_rate": 1.4013193468151833e-05, "loss": 2.812, "step": 6479 }, { "epoch": 0.08, "grad_norm": 8.510703086853027, "learning_rate": 1.4015356331783282e-05, "loss": 2.9885, "step": 6480 }, { "epoch": 0.08, "grad_norm": 8.103315353393555, "learning_rate": 1.401751919541473e-05, "loss": 2.5171, "step": 6481 }, { "epoch": 0.08, "grad_norm": 8.042390823364258, "learning_rate": 1.4019682059046179e-05, "loss": 2.8018, "step": 6482 }, { "epoch": 0.08, "grad_norm": 8.35622501373291, "learning_rate": 1.4021844922677625e-05, "loss": 2.1855, "step": 6483 }, { "epoch": 0.08, "grad_norm": 8.637993812561035, "learning_rate": 1.4024007786309075e-05, "loss": 2.8157, "step": 6484 }, { "epoch": 0.08, "grad_norm": 8.531379699707031, "learning_rate": 1.4026170649940523e-05, "loss": 2.8007, "step": 6485 }, { "epoch": 0.08, "grad_norm": 7.268194198608398, "learning_rate": 1.402833351357197e-05, "loss": 2.4569, "step": 6486 }, { "epoch": 0.08, "grad_norm": 7.439474105834961, "learning_rate": 1.4030496377203417e-05, "loss": 2.2636, "step": 6487 }, { "epoch": 0.08, "grad_norm": 7.999350070953369, "learning_rate": 1.4032659240834867e-05, "loss": 2.5217, "step": 6488 }, { "epoch": 0.08, "grad_norm": 7.330915451049805, "learning_rate": 1.4034822104466315e-05, "loss": 2.6935, "step": 6489 }, { "epoch": 0.08, "grad_norm": 7.18718147277832, "learning_rate": 1.4036984968097761e-05, "loss": 2.6081, "step": 6490 }, { "epoch": 0.08, "grad_norm": 8.427255630493164, "learning_rate": 1.403914783172921e-05, "loss": 2.8209, "step": 6491 }, { "epoch": 0.08, "grad_norm": 7.1306471824646, "learning_rate": 1.4041310695360659e-05, "loss": 2.5008, "step": 6492 }, { "epoch": 0.08, "grad_norm": 7.681708335876465, "learning_rate": 1.4043473558992107e-05, "loss": 2.9529, "step": 6493 }, { "epoch": 0.08, "grad_norm": 6.951831340789795, "learning_rate": 1.4045636422623553e-05, "loss": 2.3017, "step": 6494 }, { "epoch": 0.08, "grad_norm": 6.353428840637207, "learning_rate": 1.4047799286255002e-05, "loss": 2.5387, "step": 6495 }, { "epoch": 0.08, "grad_norm": 7.19688606262207, "learning_rate": 1.4049962149886451e-05, "loss": 2.4296, "step": 6496 }, { "epoch": 0.08, "grad_norm": 6.436523914337158, "learning_rate": 1.40521250135179e-05, "loss": 2.5966, "step": 6497 }, { "epoch": 0.08, "grad_norm": 7.188930034637451, "learning_rate": 1.4054287877149346e-05, "loss": 2.7615, "step": 6498 }, { "epoch": 0.08, "grad_norm": 9.106196403503418, "learning_rate": 1.4056450740780795e-05, "loss": 3.2435, "step": 6499 }, { "epoch": 0.08, "grad_norm": 6.855627059936523, "learning_rate": 1.4058613604412243e-05, "loss": 2.6583, "step": 6500 }, { "epoch": 0.08, "grad_norm": 7.794727325439453, "learning_rate": 1.4060776468043692e-05, "loss": 3.1682, "step": 6501 }, { "epoch": 0.08, "grad_norm": 7.8717427253723145, "learning_rate": 1.4062939331675138e-05, "loss": 2.9002, "step": 6502 }, { "epoch": 0.08, "grad_norm": 8.328800201416016, "learning_rate": 1.4065102195306588e-05, "loss": 2.953, "step": 6503 }, { "epoch": 0.08, "grad_norm": 7.080404281616211, "learning_rate": 1.4067265058938036e-05, "loss": 2.9435, "step": 6504 }, { "epoch": 0.08, "grad_norm": 8.573179244995117, "learning_rate": 1.4069427922569484e-05, "loss": 2.8473, "step": 6505 }, { "epoch": 0.08, "grad_norm": 7.369444847106934, "learning_rate": 1.407159078620093e-05, "loss": 2.1342, "step": 6506 }, { "epoch": 0.08, "grad_norm": 7.551629543304443, "learning_rate": 1.407375364983238e-05, "loss": 3.0561, "step": 6507 }, { "epoch": 0.08, "grad_norm": 8.113465309143066, "learning_rate": 1.4075916513463828e-05, "loss": 2.6248, "step": 6508 }, { "epoch": 0.08, "grad_norm": 7.023022174835205, "learning_rate": 1.4078079377095274e-05, "loss": 2.6198, "step": 6509 }, { "epoch": 0.08, "grad_norm": 6.815483093261719, "learning_rate": 1.4080242240726722e-05, "loss": 2.3798, "step": 6510 }, { "epoch": 0.08, "grad_norm": 7.264596939086914, "learning_rate": 1.4082405104358172e-05, "loss": 3.0488, "step": 6511 }, { "epoch": 0.08, "grad_norm": 7.136972904205322, "learning_rate": 1.408456796798962e-05, "loss": 2.1724, "step": 6512 }, { "epoch": 0.08, "grad_norm": 7.821401596069336, "learning_rate": 1.4086730831621066e-05, "loss": 2.7811, "step": 6513 }, { "epoch": 0.08, "grad_norm": 7.383670330047607, "learning_rate": 1.4088893695252515e-05, "loss": 2.7693, "step": 6514 }, { "epoch": 0.08, "grad_norm": 7.737942218780518, "learning_rate": 1.4091056558883964e-05, "loss": 2.7028, "step": 6515 }, { "epoch": 0.08, "grad_norm": 7.920215606689453, "learning_rate": 1.4093219422515412e-05, "loss": 2.7602, "step": 6516 }, { "epoch": 0.08, "grad_norm": 7.57323694229126, "learning_rate": 1.4095382286146859e-05, "loss": 2.1725, "step": 6517 }, { "epoch": 0.08, "grad_norm": 7.997124671936035, "learning_rate": 1.4097545149778307e-05, "loss": 2.8471, "step": 6518 }, { "epoch": 0.08, "grad_norm": 8.537848472595215, "learning_rate": 1.4099708013409756e-05, "loss": 2.5096, "step": 6519 }, { "epoch": 0.08, "grad_norm": 7.6567864418029785, "learning_rate": 1.4101870877041205e-05, "loss": 2.5223, "step": 6520 }, { "epoch": 0.08, "grad_norm": 8.391121864318848, "learning_rate": 1.4104033740672651e-05, "loss": 2.5802, "step": 6521 }, { "epoch": 0.08, "grad_norm": 7.2573161125183105, "learning_rate": 1.4106196604304099e-05, "loss": 2.2816, "step": 6522 }, { "epoch": 0.08, "grad_norm": 7.606963157653809, "learning_rate": 1.4108359467935549e-05, "loss": 2.7564, "step": 6523 }, { "epoch": 0.08, "grad_norm": 7.394615173339844, "learning_rate": 1.4110522331566997e-05, "loss": 2.6413, "step": 6524 }, { "epoch": 0.08, "grad_norm": 7.435999870300293, "learning_rate": 1.4112685195198443e-05, "loss": 2.7395, "step": 6525 }, { "epoch": 0.08, "grad_norm": 7.303561687469482, "learning_rate": 1.4114848058829891e-05, "loss": 2.2562, "step": 6526 }, { "epoch": 0.08, "grad_norm": 7.81315279006958, "learning_rate": 1.4117010922461341e-05, "loss": 2.748, "step": 6527 }, { "epoch": 0.08, "grad_norm": 7.337125778198242, "learning_rate": 1.4119173786092787e-05, "loss": 3.2064, "step": 6528 }, { "epoch": 0.08, "grad_norm": 7.328176498413086, "learning_rate": 1.4121336649724235e-05, "loss": 2.4221, "step": 6529 }, { "epoch": 0.08, "grad_norm": 6.836357593536377, "learning_rate": 1.4123499513355685e-05, "loss": 2.4498, "step": 6530 }, { "epoch": 0.08, "grad_norm": 7.451873779296875, "learning_rate": 1.4125662376987133e-05, "loss": 3.1262, "step": 6531 }, { "epoch": 0.08, "grad_norm": 7.05248498916626, "learning_rate": 1.412782524061858e-05, "loss": 2.5392, "step": 6532 }, { "epoch": 0.08, "grad_norm": 7.170812129974365, "learning_rate": 1.4129988104250027e-05, "loss": 2.3318, "step": 6533 }, { "epoch": 0.08, "grad_norm": 7.835909843444824, "learning_rate": 1.4132150967881477e-05, "loss": 2.6259, "step": 6534 }, { "epoch": 0.08, "grad_norm": 7.648425102233887, "learning_rate": 1.4134313831512925e-05, "loss": 3.0145, "step": 6535 }, { "epoch": 0.08, "grad_norm": 7.303614139556885, "learning_rate": 1.4136476695144372e-05, "loss": 1.823, "step": 6536 }, { "epoch": 0.08, "grad_norm": 6.668203830718994, "learning_rate": 1.413863955877582e-05, "loss": 2.8059, "step": 6537 }, { "epoch": 0.08, "grad_norm": 6.8053507804870605, "learning_rate": 1.414080242240727e-05, "loss": 2.3094, "step": 6538 }, { "epoch": 0.08, "grad_norm": 6.817785739898682, "learning_rate": 1.4142965286038717e-05, "loss": 2.3553, "step": 6539 }, { "epoch": 0.08, "grad_norm": 6.718937397003174, "learning_rate": 1.4145128149670164e-05, "loss": 2.4789, "step": 6540 }, { "epoch": 0.08, "grad_norm": 6.704796314239502, "learning_rate": 1.4147291013301612e-05, "loss": 2.202, "step": 6541 }, { "epoch": 0.08, "grad_norm": 7.519357204437256, "learning_rate": 1.4149453876933062e-05, "loss": 3.0067, "step": 6542 }, { "epoch": 0.08, "grad_norm": 7.456989765167236, "learning_rate": 1.415161674056451e-05, "loss": 2.1147, "step": 6543 }, { "epoch": 0.08, "grad_norm": 6.525455474853516, "learning_rate": 1.4153779604195956e-05, "loss": 2.6013, "step": 6544 }, { "epoch": 0.08, "grad_norm": 8.094375610351562, "learning_rate": 1.4155942467827404e-05, "loss": 2.5633, "step": 6545 }, { "epoch": 0.08, "grad_norm": 6.841319561004639, "learning_rate": 1.4158105331458854e-05, "loss": 2.906, "step": 6546 }, { "epoch": 0.08, "grad_norm": 8.639347076416016, "learning_rate": 1.41602681950903e-05, "loss": 2.9037, "step": 6547 }, { "epoch": 0.08, "grad_norm": 7.4599761962890625, "learning_rate": 1.4162431058721748e-05, "loss": 2.1889, "step": 6548 }, { "epoch": 0.08, "grad_norm": 7.014463424682617, "learning_rate": 1.4164593922353196e-05, "loss": 2.6928, "step": 6549 }, { "epoch": 0.09, "grad_norm": 7.814675807952881, "learning_rate": 1.4166756785984646e-05, "loss": 3.1044, "step": 6550 }, { "epoch": 0.09, "grad_norm": 7.415426254272461, "learning_rate": 1.4168919649616092e-05, "loss": 2.032, "step": 6551 }, { "epoch": 0.09, "grad_norm": 8.176473617553711, "learning_rate": 1.417108251324754e-05, "loss": 2.9278, "step": 6552 }, { "epoch": 0.09, "grad_norm": 7.687839031219482, "learning_rate": 1.4173245376878988e-05, "loss": 2.4217, "step": 6553 }, { "epoch": 0.09, "grad_norm": 7.3617353439331055, "learning_rate": 1.4175408240510438e-05, "loss": 2.6211, "step": 6554 }, { "epoch": 0.09, "grad_norm": 7.570276737213135, "learning_rate": 1.4177571104141885e-05, "loss": 2.7373, "step": 6555 }, { "epoch": 0.09, "grad_norm": 7.907065391540527, "learning_rate": 1.4179733967773333e-05, "loss": 2.9401, "step": 6556 }, { "epoch": 0.09, "grad_norm": 7.328980922698975, "learning_rate": 1.418189683140478e-05, "loss": 2.4696, "step": 6557 }, { "epoch": 0.09, "grad_norm": 8.015836715698242, "learning_rate": 1.418405969503623e-05, "loss": 2.7404, "step": 6558 }, { "epoch": 0.09, "grad_norm": 6.676398277282715, "learning_rate": 1.4186222558667677e-05, "loss": 2.2399, "step": 6559 }, { "epoch": 0.09, "grad_norm": 7.597842693328857, "learning_rate": 1.4188385422299125e-05, "loss": 2.3345, "step": 6560 }, { "epoch": 0.09, "grad_norm": 8.078634262084961, "learning_rate": 1.4190548285930575e-05, "loss": 2.8105, "step": 6561 }, { "epoch": 0.09, "grad_norm": 7.631951808929443, "learning_rate": 1.4192711149562021e-05, "loss": 2.7264, "step": 6562 }, { "epoch": 0.09, "grad_norm": 8.414477348327637, "learning_rate": 1.4194874013193469e-05, "loss": 3.4152, "step": 6563 }, { "epoch": 0.09, "grad_norm": 7.487175464630127, "learning_rate": 1.4197036876824917e-05, "loss": 2.7153, "step": 6564 }, { "epoch": 0.09, "grad_norm": 7.609910011291504, "learning_rate": 1.4199199740456367e-05, "loss": 3.0745, "step": 6565 }, { "epoch": 0.09, "grad_norm": 7.656591415405273, "learning_rate": 1.4201362604087813e-05, "loss": 3.1289, "step": 6566 }, { "epoch": 0.09, "grad_norm": 7.741440773010254, "learning_rate": 1.4203525467719261e-05, "loss": 2.655, "step": 6567 }, { "epoch": 0.09, "grad_norm": 7.343564987182617, "learning_rate": 1.420568833135071e-05, "loss": 2.6645, "step": 6568 }, { "epoch": 0.09, "grad_norm": 6.7345428466796875, "learning_rate": 1.4207851194982159e-05, "loss": 2.6545, "step": 6569 }, { "epoch": 0.09, "grad_norm": 8.985333442687988, "learning_rate": 1.4210014058613605e-05, "loss": 2.9385, "step": 6570 }, { "epoch": 0.09, "grad_norm": 7.939216613769531, "learning_rate": 1.4212176922245053e-05, "loss": 2.4167, "step": 6571 }, { "epoch": 0.09, "grad_norm": 8.040274620056152, "learning_rate": 1.4214339785876501e-05, "loss": 3.283, "step": 6572 }, { "epoch": 0.09, "grad_norm": 7.518998146057129, "learning_rate": 1.4216502649507951e-05, "loss": 2.8697, "step": 6573 }, { "epoch": 0.09, "grad_norm": 8.989729881286621, "learning_rate": 1.4218665513139398e-05, "loss": 3.2945, "step": 6574 }, { "epoch": 0.09, "grad_norm": 7.634288787841797, "learning_rate": 1.4220828376770846e-05, "loss": 2.7979, "step": 6575 }, { "epoch": 0.09, "grad_norm": 7.598228454589844, "learning_rate": 1.4222991240402294e-05, "loss": 2.9453, "step": 6576 }, { "epoch": 0.09, "grad_norm": 7.4118218421936035, "learning_rate": 1.4225154104033743e-05, "loss": 2.3827, "step": 6577 }, { "epoch": 0.09, "grad_norm": 8.252092361450195, "learning_rate": 1.422731696766519e-05, "loss": 2.8891, "step": 6578 }, { "epoch": 0.09, "grad_norm": 7.195234775543213, "learning_rate": 1.4229479831296638e-05, "loss": 2.3299, "step": 6579 }, { "epoch": 0.09, "grad_norm": 7.864969730377197, "learning_rate": 1.4231642694928086e-05, "loss": 2.8632, "step": 6580 }, { "epoch": 0.09, "grad_norm": 7.832307815551758, "learning_rate": 1.4233805558559534e-05, "loss": 2.1936, "step": 6581 }, { "epoch": 0.09, "grad_norm": 7.5685014724731445, "learning_rate": 1.4235968422190982e-05, "loss": 2.6998, "step": 6582 }, { "epoch": 0.09, "grad_norm": 7.218174934387207, "learning_rate": 1.423813128582243e-05, "loss": 2.4661, "step": 6583 }, { "epoch": 0.09, "grad_norm": 8.067940711975098, "learning_rate": 1.4240294149453876e-05, "loss": 2.2205, "step": 6584 }, { "epoch": 0.09, "grad_norm": 7.189366817474365, "learning_rate": 1.4242457013085326e-05, "loss": 2.3133, "step": 6585 }, { "epoch": 0.09, "grad_norm": 6.839583873748779, "learning_rate": 1.4244619876716774e-05, "loss": 2.2393, "step": 6586 }, { "epoch": 0.09, "grad_norm": 8.004461288452148, "learning_rate": 1.4246782740348222e-05, "loss": 2.5917, "step": 6587 }, { "epoch": 0.09, "grad_norm": 8.033439636230469, "learning_rate": 1.4248945603979669e-05, "loss": 2.1292, "step": 6588 }, { "epoch": 0.09, "grad_norm": 9.055206298828125, "learning_rate": 1.4251108467611118e-05, "loss": 3.2213, "step": 6589 }, { "epoch": 0.09, "grad_norm": 7.963709831237793, "learning_rate": 1.4253271331242566e-05, "loss": 2.5438, "step": 6590 }, { "epoch": 0.09, "grad_norm": 7.916086673736572, "learning_rate": 1.4255434194874014e-05, "loss": 2.374, "step": 6591 }, { "epoch": 0.09, "grad_norm": 7.3895463943481445, "learning_rate": 1.4257597058505464e-05, "loss": 2.457, "step": 6592 }, { "epoch": 0.09, "grad_norm": 8.14529037475586, "learning_rate": 1.425975992213691e-05, "loss": 2.729, "step": 6593 }, { "epoch": 0.09, "grad_norm": 7.32376766204834, "learning_rate": 1.4261922785768359e-05, "loss": 2.3704, "step": 6594 }, { "epoch": 0.09, "grad_norm": 7.246970176696777, "learning_rate": 1.4264085649399807e-05, "loss": 2.2149, "step": 6595 }, { "epoch": 0.09, "grad_norm": 6.671411037445068, "learning_rate": 1.4266248513031256e-05, "loss": 2.5132, "step": 6596 }, { "epoch": 0.09, "grad_norm": 6.604763984680176, "learning_rate": 1.4268411376662703e-05, "loss": 2.5787, "step": 6597 }, { "epoch": 0.09, "grad_norm": 8.657167434692383, "learning_rate": 1.427057424029415e-05, "loss": 3.102, "step": 6598 }, { "epoch": 0.09, "grad_norm": 7.9145941734313965, "learning_rate": 1.4272737103925597e-05, "loss": 2.8714, "step": 6599 }, { "epoch": 0.09, "grad_norm": 7.952974796295166, "learning_rate": 1.4274899967557047e-05, "loss": 2.6434, "step": 6600 }, { "epoch": 0.09, "grad_norm": 7.1496500968933105, "learning_rate": 1.4277062831188495e-05, "loss": 2.3144, "step": 6601 }, { "epoch": 0.09, "grad_norm": 7.920952320098877, "learning_rate": 1.4279225694819943e-05, "loss": 2.9039, "step": 6602 }, { "epoch": 0.09, "grad_norm": 7.797552108764648, "learning_rate": 1.428138855845139e-05, "loss": 3.0964, "step": 6603 }, { "epoch": 0.09, "grad_norm": 7.625024795532227, "learning_rate": 1.4283551422082839e-05, "loss": 3.3481, "step": 6604 }, { "epoch": 0.09, "grad_norm": 7.369634628295898, "learning_rate": 1.4285714285714287e-05, "loss": 2.5938, "step": 6605 }, { "epoch": 0.09, "grad_norm": 7.291746616363525, "learning_rate": 1.4287877149345735e-05, "loss": 2.0815, "step": 6606 }, { "epoch": 0.09, "grad_norm": 8.668051719665527, "learning_rate": 1.4290040012977182e-05, "loss": 2.8486, "step": 6607 }, { "epoch": 0.09, "grad_norm": 7.283808708190918, "learning_rate": 1.4292202876608631e-05, "loss": 2.5811, "step": 6608 }, { "epoch": 0.09, "grad_norm": 8.194343566894531, "learning_rate": 1.429436574024008e-05, "loss": 2.8404, "step": 6609 }, { "epoch": 0.09, "grad_norm": 7.036371231079102, "learning_rate": 1.4296528603871527e-05, "loss": 2.5817, "step": 6610 }, { "epoch": 0.09, "grad_norm": 8.33089828491211, "learning_rate": 1.4298691467502974e-05, "loss": 2.8446, "step": 6611 }, { "epoch": 0.09, "grad_norm": 6.543475151062012, "learning_rate": 1.4300854331134423e-05, "loss": 2.5478, "step": 6612 }, { "epoch": 0.09, "grad_norm": 8.423389434814453, "learning_rate": 1.4303017194765872e-05, "loss": 2.4916, "step": 6613 }, { "epoch": 0.09, "grad_norm": 7.4436116218566895, "learning_rate": 1.430518005839732e-05, "loss": 2.5374, "step": 6614 }, { "epoch": 0.09, "grad_norm": 7.283185958862305, "learning_rate": 1.4307342922028766e-05, "loss": 2.4375, "step": 6615 }, { "epoch": 0.09, "grad_norm": 6.915569305419922, "learning_rate": 1.4309505785660216e-05, "loss": 2.6818, "step": 6616 }, { "epoch": 0.09, "grad_norm": 6.9029035568237305, "learning_rate": 1.4311668649291664e-05, "loss": 2.6044, "step": 6617 }, { "epoch": 0.09, "grad_norm": 8.323137283325195, "learning_rate": 1.431383151292311e-05, "loss": 2.6865, "step": 6618 }, { "epoch": 0.09, "grad_norm": 7.7245635986328125, "learning_rate": 1.4315994376554558e-05, "loss": 2.7339, "step": 6619 }, { "epoch": 0.09, "grad_norm": 7.513582229614258, "learning_rate": 1.4318157240186008e-05, "loss": 2.5849, "step": 6620 }, { "epoch": 0.09, "grad_norm": 7.27146577835083, "learning_rate": 1.4320320103817456e-05, "loss": 2.4539, "step": 6621 }, { "epoch": 0.09, "grad_norm": 6.53265380859375, "learning_rate": 1.4322482967448902e-05, "loss": 2.9068, "step": 6622 }, { "epoch": 0.09, "grad_norm": 6.788460731506348, "learning_rate": 1.432464583108035e-05, "loss": 2.2522, "step": 6623 }, { "epoch": 0.09, "grad_norm": 7.611406326293945, "learning_rate": 1.43268086947118e-05, "loss": 2.9988, "step": 6624 }, { "epoch": 0.09, "grad_norm": 7.133467197418213, "learning_rate": 1.4328971558343248e-05, "loss": 2.7322, "step": 6625 }, { "epoch": 0.09, "grad_norm": 7.582430839538574, "learning_rate": 1.4331134421974694e-05, "loss": 3.5777, "step": 6626 }, { "epoch": 0.09, "grad_norm": 7.205521106719971, "learning_rate": 1.4333297285606144e-05, "loss": 2.3987, "step": 6627 }, { "epoch": 0.09, "grad_norm": 7.74290657043457, "learning_rate": 1.4335460149237592e-05, "loss": 3.0347, "step": 6628 }, { "epoch": 0.09, "grad_norm": 7.848057270050049, "learning_rate": 1.433762301286904e-05, "loss": 2.6888, "step": 6629 }, { "epoch": 0.09, "grad_norm": 7.852685928344727, "learning_rate": 1.4339785876500487e-05, "loss": 2.7918, "step": 6630 }, { "epoch": 0.09, "grad_norm": 7.46642541885376, "learning_rate": 1.4341948740131936e-05, "loss": 2.7786, "step": 6631 }, { "epoch": 0.09, "grad_norm": 7.264747142791748, "learning_rate": 1.4344111603763384e-05, "loss": 3.21, "step": 6632 }, { "epoch": 0.09, "grad_norm": 6.46412992477417, "learning_rate": 1.4346274467394833e-05, "loss": 2.7988, "step": 6633 }, { "epoch": 0.09, "grad_norm": 7.599213123321533, "learning_rate": 1.4348437331026279e-05, "loss": 2.6791, "step": 6634 }, { "epoch": 0.09, "grad_norm": 7.737996578216553, "learning_rate": 1.4350600194657729e-05, "loss": 2.5494, "step": 6635 }, { "epoch": 0.09, "grad_norm": 8.008065223693848, "learning_rate": 1.4352763058289177e-05, "loss": 2.4622, "step": 6636 }, { "epoch": 0.09, "grad_norm": 6.908658027648926, "learning_rate": 1.4354925921920623e-05, "loss": 2.5911, "step": 6637 }, { "epoch": 0.09, "grad_norm": 7.638428688049316, "learning_rate": 1.4357088785552071e-05, "loss": 2.1303, "step": 6638 }, { "epoch": 0.09, "grad_norm": 6.332587242126465, "learning_rate": 1.4359251649183521e-05, "loss": 2.443, "step": 6639 }, { "epoch": 0.09, "grad_norm": 7.440748691558838, "learning_rate": 1.4361414512814969e-05, "loss": 3.0217, "step": 6640 }, { "epoch": 0.09, "grad_norm": 7.127813816070557, "learning_rate": 1.4363577376446415e-05, "loss": 2.4178, "step": 6641 }, { "epoch": 0.09, "grad_norm": 8.080997467041016, "learning_rate": 1.4365740240077863e-05, "loss": 3.0381, "step": 6642 }, { "epoch": 0.09, "grad_norm": 6.851057052612305, "learning_rate": 1.4367903103709313e-05, "loss": 2.3952, "step": 6643 }, { "epoch": 0.09, "grad_norm": 7.723151683807373, "learning_rate": 1.4370065967340761e-05, "loss": 2.5106, "step": 6644 }, { "epoch": 0.09, "grad_norm": 7.756344795227051, "learning_rate": 1.4372228830972207e-05, "loss": 2.88, "step": 6645 }, { "epoch": 0.09, "grad_norm": 7.808228969573975, "learning_rate": 1.4374391694603656e-05, "loss": 2.7034, "step": 6646 }, { "epoch": 0.09, "grad_norm": 7.227291107177734, "learning_rate": 1.4376554558235105e-05, "loss": 2.8281, "step": 6647 }, { "epoch": 0.09, "grad_norm": 8.172224998474121, "learning_rate": 1.4378717421866553e-05, "loss": 3.0696, "step": 6648 }, { "epoch": 0.09, "grad_norm": 7.264052391052246, "learning_rate": 1.4380880285498e-05, "loss": 2.6169, "step": 6649 }, { "epoch": 0.09, "grad_norm": 7.5440993309021, "learning_rate": 1.4383043149129448e-05, "loss": 2.3091, "step": 6650 }, { "epoch": 0.09, "grad_norm": 7.680998802185059, "learning_rate": 1.4385206012760897e-05, "loss": 2.0562, "step": 6651 }, { "epoch": 0.09, "grad_norm": 7.2975053787231445, "learning_rate": 1.4387368876392346e-05, "loss": 2.5434, "step": 6652 }, { "epoch": 0.09, "grad_norm": 7.83951473236084, "learning_rate": 1.4389531740023792e-05, "loss": 3.0017, "step": 6653 }, { "epoch": 0.09, "grad_norm": 7.250382423400879, "learning_rate": 1.439169460365524e-05, "loss": 2.6344, "step": 6654 }, { "epoch": 0.09, "grad_norm": 6.536529541015625, "learning_rate": 1.439385746728669e-05, "loss": 2.6794, "step": 6655 }, { "epoch": 0.09, "grad_norm": 7.039352893829346, "learning_rate": 1.4396020330918136e-05, "loss": 2.0232, "step": 6656 }, { "epoch": 0.09, "grad_norm": 7.208545207977295, "learning_rate": 1.4398183194549584e-05, "loss": 2.5685, "step": 6657 }, { "epoch": 0.09, "grad_norm": 7.179118633270264, "learning_rate": 1.4400346058181034e-05, "loss": 2.319, "step": 6658 }, { "epoch": 0.09, "grad_norm": 7.458560943603516, "learning_rate": 1.4402508921812482e-05, "loss": 2.4453, "step": 6659 }, { "epoch": 0.09, "grad_norm": 10.24396800994873, "learning_rate": 1.4404671785443928e-05, "loss": 2.8294, "step": 6660 }, { "epoch": 0.09, "grad_norm": 6.868098258972168, "learning_rate": 1.4406834649075376e-05, "loss": 2.7545, "step": 6661 }, { "epoch": 0.09, "grad_norm": 8.430929183959961, "learning_rate": 1.4408997512706826e-05, "loss": 2.366, "step": 6662 }, { "epoch": 0.09, "grad_norm": 8.118058204650879, "learning_rate": 1.4411160376338274e-05, "loss": 2.6529, "step": 6663 }, { "epoch": 0.09, "grad_norm": 7.438070297241211, "learning_rate": 1.441332323996972e-05, "loss": 2.8557, "step": 6664 }, { "epoch": 0.09, "grad_norm": 7.476350784301758, "learning_rate": 1.4415486103601168e-05, "loss": 2.3713, "step": 6665 }, { "epoch": 0.09, "grad_norm": 7.19911003112793, "learning_rate": 1.4417648967232618e-05, "loss": 2.1735, "step": 6666 }, { "epoch": 0.09, "grad_norm": 8.738786697387695, "learning_rate": 1.4419811830864066e-05, "loss": 2.6718, "step": 6667 }, { "epoch": 0.09, "grad_norm": 7.847409248352051, "learning_rate": 1.4421974694495513e-05, "loss": 3.0849, "step": 6668 }, { "epoch": 0.09, "grad_norm": 6.8530378341674805, "learning_rate": 1.442413755812696e-05, "loss": 2.4102, "step": 6669 }, { "epoch": 0.09, "grad_norm": 6.731195449829102, "learning_rate": 1.442630042175841e-05, "loss": 2.0316, "step": 6670 }, { "epoch": 0.09, "grad_norm": 6.77174186706543, "learning_rate": 1.4428463285389858e-05, "loss": 2.5297, "step": 6671 }, { "epoch": 0.09, "grad_norm": 7.37560510635376, "learning_rate": 1.4430626149021305e-05, "loss": 2.3297, "step": 6672 }, { "epoch": 0.09, "grad_norm": 7.560781955718994, "learning_rate": 1.4432789012652753e-05, "loss": 2.6442, "step": 6673 }, { "epoch": 0.09, "grad_norm": 7.627838611602783, "learning_rate": 1.4434951876284203e-05, "loss": 2.6967, "step": 6674 }, { "epoch": 0.09, "grad_norm": 6.710158824920654, "learning_rate": 1.4437114739915649e-05, "loss": 2.0704, "step": 6675 }, { "epoch": 0.09, "grad_norm": 6.736032962799072, "learning_rate": 1.4439277603547097e-05, "loss": 2.4217, "step": 6676 }, { "epoch": 0.09, "grad_norm": 7.146894454956055, "learning_rate": 1.4441440467178545e-05, "loss": 2.6188, "step": 6677 }, { "epoch": 0.09, "grad_norm": 6.996014595031738, "learning_rate": 1.4443603330809995e-05, "loss": 2.2558, "step": 6678 }, { "epoch": 0.09, "grad_norm": 8.473196983337402, "learning_rate": 1.4445766194441441e-05, "loss": 2.925, "step": 6679 }, { "epoch": 0.09, "grad_norm": 6.795579433441162, "learning_rate": 1.444792905807289e-05, "loss": 2.4922, "step": 6680 }, { "epoch": 0.09, "grad_norm": 7.929638385772705, "learning_rate": 1.4450091921704337e-05, "loss": 2.7243, "step": 6681 }, { "epoch": 0.09, "grad_norm": 7.521246910095215, "learning_rate": 1.4452254785335787e-05, "loss": 2.5502, "step": 6682 }, { "epoch": 0.09, "grad_norm": 7.377167701721191, "learning_rate": 1.4454417648967233e-05, "loss": 3.3789, "step": 6683 }, { "epoch": 0.09, "grad_norm": 7.102144241333008, "learning_rate": 1.4456580512598681e-05, "loss": 2.9412, "step": 6684 }, { "epoch": 0.09, "grad_norm": 7.771994590759277, "learning_rate": 1.445874337623013e-05, "loss": 2.7815, "step": 6685 }, { "epoch": 0.09, "grad_norm": 7.3432087898254395, "learning_rate": 1.446090623986158e-05, "loss": 2.3055, "step": 6686 }, { "epoch": 0.09, "grad_norm": 7.570930480957031, "learning_rate": 1.4463069103493026e-05, "loss": 2.9809, "step": 6687 }, { "epoch": 0.09, "grad_norm": 8.118542671203613, "learning_rate": 1.4465231967124474e-05, "loss": 2.9104, "step": 6688 }, { "epoch": 0.09, "grad_norm": 6.8089280128479, "learning_rate": 1.4467394830755923e-05, "loss": 2.7574, "step": 6689 }, { "epoch": 0.09, "grad_norm": 6.541284084320068, "learning_rate": 1.446955769438737e-05, "loss": 2.7187, "step": 6690 }, { "epoch": 0.09, "grad_norm": 9.264083862304688, "learning_rate": 1.4471720558018818e-05, "loss": 2.8054, "step": 6691 }, { "epoch": 0.09, "grad_norm": 7.401288032531738, "learning_rate": 1.4473883421650266e-05, "loss": 3.3345, "step": 6692 }, { "epoch": 0.09, "grad_norm": 8.2512788772583, "learning_rate": 1.4476046285281716e-05, "loss": 2.9085, "step": 6693 }, { "epoch": 0.09, "grad_norm": 7.860528469085693, "learning_rate": 1.4478209148913162e-05, "loss": 2.3134, "step": 6694 }, { "epoch": 0.09, "grad_norm": 9.669673919677734, "learning_rate": 1.448037201254461e-05, "loss": 2.9024, "step": 6695 }, { "epoch": 0.09, "grad_norm": 7.053370475769043, "learning_rate": 1.4482534876176058e-05, "loss": 2.5723, "step": 6696 }, { "epoch": 0.09, "grad_norm": 7.235836982727051, "learning_rate": 1.4484697739807508e-05, "loss": 2.5065, "step": 6697 }, { "epoch": 0.09, "grad_norm": 7.551481246948242, "learning_rate": 1.4486860603438954e-05, "loss": 3.0132, "step": 6698 }, { "epoch": 0.09, "grad_norm": 7.604904651641846, "learning_rate": 1.4489023467070402e-05, "loss": 2.605, "step": 6699 }, { "epoch": 0.09, "grad_norm": 6.042868137359619, "learning_rate": 1.449118633070185e-05, "loss": 2.3351, "step": 6700 }, { "epoch": 0.09, "grad_norm": 6.337707042694092, "learning_rate": 1.44933491943333e-05, "loss": 1.8862, "step": 6701 }, { "epoch": 0.09, "grad_norm": 5.801591873168945, "learning_rate": 1.4495512057964746e-05, "loss": 2.0861, "step": 6702 }, { "epoch": 0.09, "grad_norm": 7.173505783081055, "learning_rate": 1.4497674921596194e-05, "loss": 2.7304, "step": 6703 }, { "epoch": 0.09, "grad_norm": 7.922940254211426, "learning_rate": 1.4499837785227642e-05, "loss": 2.7859, "step": 6704 }, { "epoch": 0.09, "grad_norm": 6.651832580566406, "learning_rate": 1.4502000648859092e-05, "loss": 2.471, "step": 6705 }, { "epoch": 0.09, "grad_norm": 6.894862651824951, "learning_rate": 1.4504163512490539e-05, "loss": 2.8046, "step": 6706 }, { "epoch": 0.09, "grad_norm": 6.687857151031494, "learning_rate": 1.4506326376121987e-05, "loss": 2.551, "step": 6707 }, { "epoch": 0.09, "grad_norm": 7.621541500091553, "learning_rate": 1.4508489239753435e-05, "loss": 3.4026, "step": 6708 }, { "epoch": 0.09, "grad_norm": 7.384186744689941, "learning_rate": 1.4510652103384883e-05, "loss": 2.4019, "step": 6709 }, { "epoch": 0.09, "grad_norm": 7.296523571014404, "learning_rate": 1.451281496701633e-05, "loss": 2.3145, "step": 6710 }, { "epoch": 0.09, "grad_norm": 6.442944526672363, "learning_rate": 1.4514977830647779e-05, "loss": 2.5113, "step": 6711 }, { "epoch": 0.09, "grad_norm": 8.011866569519043, "learning_rate": 1.4517140694279225e-05, "loss": 3.029, "step": 6712 }, { "epoch": 0.09, "grad_norm": 7.636062145233154, "learning_rate": 1.4519303557910675e-05, "loss": 2.5081, "step": 6713 }, { "epoch": 0.09, "grad_norm": 6.535732746124268, "learning_rate": 1.4521466421542123e-05, "loss": 2.8011, "step": 6714 }, { "epoch": 0.09, "grad_norm": 7.095422267913818, "learning_rate": 1.4523629285173571e-05, "loss": 2.733, "step": 6715 }, { "epoch": 0.09, "grad_norm": 10.87409496307373, "learning_rate": 1.4525792148805017e-05, "loss": 3.3802, "step": 6716 }, { "epoch": 0.09, "grad_norm": 6.717520236968994, "learning_rate": 1.4527955012436467e-05, "loss": 2.3276, "step": 6717 }, { "epoch": 0.09, "grad_norm": 6.463918209075928, "learning_rate": 1.4530117876067915e-05, "loss": 2.517, "step": 6718 }, { "epoch": 0.09, "grad_norm": 7.424110412597656, "learning_rate": 1.4532280739699363e-05, "loss": 2.5602, "step": 6719 }, { "epoch": 0.09, "grad_norm": 6.402149677276611, "learning_rate": 1.4534443603330813e-05, "loss": 2.3202, "step": 6720 }, { "epoch": 0.09, "grad_norm": 8.035717010498047, "learning_rate": 1.453660646696226e-05, "loss": 3.0183, "step": 6721 }, { "epoch": 0.09, "grad_norm": 7.874531269073486, "learning_rate": 1.4538769330593707e-05, "loss": 3.0539, "step": 6722 }, { "epoch": 0.09, "grad_norm": 7.843888759613037, "learning_rate": 1.4540932194225155e-05, "loss": 2.2672, "step": 6723 }, { "epoch": 0.09, "grad_norm": 8.371774673461914, "learning_rate": 1.4543095057856605e-05, "loss": 2.7283, "step": 6724 }, { "epoch": 0.09, "grad_norm": 7.842301845550537, "learning_rate": 1.4545257921488052e-05, "loss": 3.0802, "step": 6725 }, { "epoch": 0.09, "grad_norm": 6.524600028991699, "learning_rate": 1.45474207851195e-05, "loss": 2.5339, "step": 6726 }, { "epoch": 0.09, "grad_norm": 7.943717002868652, "learning_rate": 1.4549583648750946e-05, "loss": 2.7111, "step": 6727 }, { "epoch": 0.09, "grad_norm": 8.767319679260254, "learning_rate": 1.4551746512382396e-05, "loss": 3.5296, "step": 6728 }, { "epoch": 0.09, "grad_norm": 7.256189346313477, "learning_rate": 1.4553909376013844e-05, "loss": 2.9012, "step": 6729 }, { "epoch": 0.09, "grad_norm": 9.355949401855469, "learning_rate": 1.4556072239645292e-05, "loss": 2.3652, "step": 6730 }, { "epoch": 0.09, "grad_norm": 7.318315029144287, "learning_rate": 1.4558235103276738e-05, "loss": 2.8147, "step": 6731 }, { "epoch": 0.09, "grad_norm": 7.254757404327393, "learning_rate": 1.4560397966908188e-05, "loss": 2.592, "step": 6732 }, { "epoch": 0.09, "grad_norm": 7.797873497009277, "learning_rate": 1.4562560830539636e-05, "loss": 2.6842, "step": 6733 }, { "epoch": 0.09, "grad_norm": 7.355397701263428, "learning_rate": 1.4564723694171084e-05, "loss": 2.3154, "step": 6734 }, { "epoch": 0.09, "grad_norm": 7.416152000427246, "learning_rate": 1.456688655780253e-05, "loss": 2.7447, "step": 6735 }, { "epoch": 0.09, "grad_norm": 8.59717082977295, "learning_rate": 1.456904942143398e-05, "loss": 2.8446, "step": 6736 }, { "epoch": 0.09, "grad_norm": 6.987912178039551, "learning_rate": 1.4571212285065428e-05, "loss": 2.3337, "step": 6737 }, { "epoch": 0.09, "grad_norm": 7.6514763832092285, "learning_rate": 1.4573375148696876e-05, "loss": 2.4173, "step": 6738 }, { "epoch": 0.09, "grad_norm": 6.419079780578613, "learning_rate": 1.4575538012328323e-05, "loss": 2.2503, "step": 6739 }, { "epoch": 0.09, "grad_norm": 7.063903331756592, "learning_rate": 1.4577700875959772e-05, "loss": 2.6839, "step": 6740 }, { "epoch": 0.09, "grad_norm": 6.60275411605835, "learning_rate": 1.457986373959122e-05, "loss": 2.1341, "step": 6741 }, { "epoch": 0.09, "grad_norm": 7.178219795227051, "learning_rate": 1.4582026603222668e-05, "loss": 2.608, "step": 6742 }, { "epoch": 0.09, "grad_norm": 8.18515396118164, "learning_rate": 1.4584189466854115e-05, "loss": 2.9366, "step": 6743 }, { "epoch": 0.09, "grad_norm": 7.563796043395996, "learning_rate": 1.4586352330485564e-05, "loss": 2.7608, "step": 6744 }, { "epoch": 0.09, "grad_norm": 7.428787708282471, "learning_rate": 1.4588515194117013e-05, "loss": 2.7897, "step": 6745 }, { "epoch": 0.09, "grad_norm": 6.670593738555908, "learning_rate": 1.4590678057748459e-05, "loss": 2.8199, "step": 6746 }, { "epoch": 0.09, "grad_norm": 6.52230978012085, "learning_rate": 1.4592840921379907e-05, "loss": 2.3236, "step": 6747 }, { "epoch": 0.09, "grad_norm": 6.844750881195068, "learning_rate": 1.4595003785011357e-05, "loss": 2.238, "step": 6748 }, { "epoch": 0.09, "grad_norm": 6.638628005981445, "learning_rate": 1.4597166648642805e-05, "loss": 2.4279, "step": 6749 }, { "epoch": 0.09, "grad_norm": 6.808815956115723, "learning_rate": 1.4599329512274251e-05, "loss": 2.396, "step": 6750 }, { "epoch": 0.09, "grad_norm": 8.8967924118042, "learning_rate": 1.4601492375905699e-05, "loss": 2.7876, "step": 6751 }, { "epoch": 0.09, "grad_norm": 7.846121788024902, "learning_rate": 1.4603655239537149e-05, "loss": 3.0786, "step": 6752 }, { "epoch": 0.09, "grad_norm": 7.3613691329956055, "learning_rate": 1.4605818103168597e-05, "loss": 2.5485, "step": 6753 }, { "epoch": 0.09, "grad_norm": 7.193666458129883, "learning_rate": 1.4607980966800043e-05, "loss": 2.8625, "step": 6754 }, { "epoch": 0.09, "grad_norm": 7.750302791595459, "learning_rate": 1.4610143830431493e-05, "loss": 2.2186, "step": 6755 }, { "epoch": 0.09, "grad_norm": 6.542721748352051, "learning_rate": 1.4612306694062941e-05, "loss": 2.5086, "step": 6756 }, { "epoch": 0.09, "grad_norm": 7.737721920013428, "learning_rate": 1.4614469557694389e-05, "loss": 2.6333, "step": 6757 }, { "epoch": 0.09, "grad_norm": 9.47575855255127, "learning_rate": 1.4616632421325836e-05, "loss": 3.4799, "step": 6758 }, { "epoch": 0.09, "grad_norm": 6.660551071166992, "learning_rate": 1.4618795284957285e-05, "loss": 2.422, "step": 6759 }, { "epoch": 0.09, "grad_norm": 7.416109561920166, "learning_rate": 1.4620958148588733e-05, "loss": 2.4494, "step": 6760 }, { "epoch": 0.09, "grad_norm": 6.850212574005127, "learning_rate": 1.4623121012220181e-05, "loss": 2.6447, "step": 6761 }, { "epoch": 0.09, "grad_norm": 7.4530744552612305, "learning_rate": 1.4625283875851628e-05, "loss": 2.1054, "step": 6762 }, { "epoch": 0.09, "grad_norm": 9.440203666687012, "learning_rate": 1.4627446739483077e-05, "loss": 2.7106, "step": 6763 }, { "epoch": 0.09, "grad_norm": 7.414919376373291, "learning_rate": 1.4629609603114526e-05, "loss": 2.5604, "step": 6764 }, { "epoch": 0.09, "grad_norm": 7.529989242553711, "learning_rate": 1.4631772466745972e-05, "loss": 2.7905, "step": 6765 }, { "epoch": 0.09, "grad_norm": 6.69002628326416, "learning_rate": 1.463393533037742e-05, "loss": 2.3408, "step": 6766 }, { "epoch": 0.09, "grad_norm": 6.917322635650635, "learning_rate": 1.463609819400887e-05, "loss": 2.8997, "step": 6767 }, { "epoch": 0.09, "grad_norm": 6.8461480140686035, "learning_rate": 1.4638261057640318e-05, "loss": 2.4999, "step": 6768 }, { "epoch": 0.09, "grad_norm": 7.38838529586792, "learning_rate": 1.4640423921271764e-05, "loss": 2.6064, "step": 6769 }, { "epoch": 0.09, "grad_norm": 7.9414262771606445, "learning_rate": 1.4642586784903212e-05, "loss": 2.8421, "step": 6770 }, { "epoch": 0.09, "grad_norm": 8.062813758850098, "learning_rate": 1.4644749648534662e-05, "loss": 2.7742, "step": 6771 }, { "epoch": 0.09, "grad_norm": 7.7663493156433105, "learning_rate": 1.464691251216611e-05, "loss": 2.6422, "step": 6772 }, { "epoch": 0.09, "grad_norm": 6.874850273132324, "learning_rate": 1.4649075375797556e-05, "loss": 2.3253, "step": 6773 }, { "epoch": 0.09, "grad_norm": 7.130761623382568, "learning_rate": 1.4651238239429004e-05, "loss": 2.743, "step": 6774 }, { "epoch": 0.09, "grad_norm": 8.182390213012695, "learning_rate": 1.4653401103060454e-05, "loss": 2.9302, "step": 6775 }, { "epoch": 0.09, "grad_norm": 7.316171169281006, "learning_rate": 1.4655563966691902e-05, "loss": 3.0718, "step": 6776 }, { "epoch": 0.09, "grad_norm": 7.582973957061768, "learning_rate": 1.4657726830323348e-05, "loss": 2.46, "step": 6777 }, { "epoch": 0.09, "grad_norm": 6.441054821014404, "learning_rate": 1.4659889693954797e-05, "loss": 2.1285, "step": 6778 }, { "epoch": 0.09, "grad_norm": 7.91670560836792, "learning_rate": 1.4662052557586246e-05, "loss": 3.3622, "step": 6779 }, { "epoch": 0.09, "grad_norm": 6.711994647979736, "learning_rate": 1.4664215421217694e-05, "loss": 2.845, "step": 6780 }, { "epoch": 0.09, "grad_norm": 7.419326305389404, "learning_rate": 1.466637828484914e-05, "loss": 2.9099, "step": 6781 }, { "epoch": 0.09, "grad_norm": 6.742397308349609, "learning_rate": 1.4668541148480589e-05, "loss": 2.1963, "step": 6782 }, { "epoch": 0.09, "grad_norm": 6.56395959854126, "learning_rate": 1.4670704012112038e-05, "loss": 2.8061, "step": 6783 }, { "epoch": 0.09, "grad_norm": 7.544330596923828, "learning_rate": 1.4672866875743485e-05, "loss": 2.8389, "step": 6784 }, { "epoch": 0.09, "grad_norm": 7.5461554527282715, "learning_rate": 1.4675029739374933e-05, "loss": 2.8062, "step": 6785 }, { "epoch": 0.09, "grad_norm": 7.011725425720215, "learning_rate": 1.4677192603006383e-05, "loss": 2.6619, "step": 6786 }, { "epoch": 0.09, "grad_norm": 7.00527811050415, "learning_rate": 1.467935546663783e-05, "loss": 2.4207, "step": 6787 }, { "epoch": 0.09, "grad_norm": 6.464247703552246, "learning_rate": 1.4681518330269277e-05, "loss": 2.4382, "step": 6788 }, { "epoch": 0.09, "grad_norm": 7.266007423400879, "learning_rate": 1.4683681193900725e-05, "loss": 2.7522, "step": 6789 }, { "epoch": 0.09, "grad_norm": 7.621665000915527, "learning_rate": 1.4685844057532175e-05, "loss": 2.3029, "step": 6790 }, { "epoch": 0.09, "grad_norm": 7.789848804473877, "learning_rate": 1.4688006921163623e-05, "loss": 2.7061, "step": 6791 }, { "epoch": 0.09, "grad_norm": 7.238114833831787, "learning_rate": 1.469016978479507e-05, "loss": 2.6873, "step": 6792 }, { "epoch": 0.09, "grad_norm": 7.174557685852051, "learning_rate": 1.4692332648426517e-05, "loss": 2.9406, "step": 6793 }, { "epoch": 0.09, "grad_norm": 6.887479782104492, "learning_rate": 1.4694495512057967e-05, "loss": 2.9365, "step": 6794 }, { "epoch": 0.09, "grad_norm": 7.186159133911133, "learning_rate": 1.4696658375689415e-05, "loss": 2.3218, "step": 6795 }, { "epoch": 0.09, "grad_norm": 8.003050804138184, "learning_rate": 1.4698821239320861e-05, "loss": 3.3215, "step": 6796 }, { "epoch": 0.09, "grad_norm": 6.567124366760254, "learning_rate": 1.470098410295231e-05, "loss": 2.5152, "step": 6797 }, { "epoch": 0.09, "grad_norm": 7.776889801025391, "learning_rate": 1.470314696658376e-05, "loss": 2.2032, "step": 6798 }, { "epoch": 0.09, "grad_norm": 7.965562343597412, "learning_rate": 1.4705309830215206e-05, "loss": 2.8769, "step": 6799 }, { "epoch": 0.09, "grad_norm": 8.385598182678223, "learning_rate": 1.4707472693846654e-05, "loss": 2.7363, "step": 6800 }, { "epoch": 0.09, "grad_norm": 8.006150245666504, "learning_rate": 1.4709635557478102e-05, "loss": 3.256, "step": 6801 }, { "epoch": 0.09, "grad_norm": 7.608657360076904, "learning_rate": 1.4711798421109551e-05, "loss": 2.4111, "step": 6802 }, { "epoch": 0.09, "grad_norm": 7.09438943862915, "learning_rate": 1.4713961284740998e-05, "loss": 2.3545, "step": 6803 }, { "epoch": 0.09, "grad_norm": 7.849460124969482, "learning_rate": 1.4716124148372446e-05, "loss": 2.3843, "step": 6804 }, { "epoch": 0.09, "grad_norm": 7.223751544952393, "learning_rate": 1.4718287012003894e-05, "loss": 2.8653, "step": 6805 }, { "epoch": 0.09, "grad_norm": 7.81949520111084, "learning_rate": 1.4720449875635344e-05, "loss": 2.8296, "step": 6806 }, { "epoch": 0.09, "grad_norm": 8.351472854614258, "learning_rate": 1.472261273926679e-05, "loss": 3.1043, "step": 6807 }, { "epoch": 0.09, "grad_norm": 7.068104267120361, "learning_rate": 1.4724775602898238e-05, "loss": 2.1632, "step": 6808 }, { "epoch": 0.09, "grad_norm": 6.350394248962402, "learning_rate": 1.4726938466529686e-05, "loss": 2.1747, "step": 6809 }, { "epoch": 0.09, "grad_norm": 7.593044281005859, "learning_rate": 1.4729101330161136e-05, "loss": 2.6397, "step": 6810 }, { "epoch": 0.09, "grad_norm": 8.215951919555664, "learning_rate": 1.4731264193792582e-05, "loss": 2.4878, "step": 6811 }, { "epoch": 0.09, "grad_norm": 6.918059825897217, "learning_rate": 1.473342705742403e-05, "loss": 2.7798, "step": 6812 }, { "epoch": 0.09, "grad_norm": 7.880249977111816, "learning_rate": 1.4735589921055478e-05, "loss": 2.8056, "step": 6813 }, { "epoch": 0.09, "grad_norm": 7.144869804382324, "learning_rate": 1.4737752784686928e-05, "loss": 2.4595, "step": 6814 }, { "epoch": 0.09, "grad_norm": 7.232232570648193, "learning_rate": 1.4739915648318374e-05, "loss": 3.0692, "step": 6815 }, { "epoch": 0.09, "grad_norm": 6.2826104164123535, "learning_rate": 1.4742078511949822e-05, "loss": 2.4724, "step": 6816 }, { "epoch": 0.09, "grad_norm": 7.34277868270874, "learning_rate": 1.4744241375581272e-05, "loss": 2.6482, "step": 6817 }, { "epoch": 0.09, "grad_norm": 7.259209156036377, "learning_rate": 1.4746404239212719e-05, "loss": 2.7495, "step": 6818 }, { "epoch": 0.09, "grad_norm": 7.248851299285889, "learning_rate": 1.4748567102844167e-05, "loss": 2.8578, "step": 6819 }, { "epoch": 0.09, "grad_norm": 7.412619590759277, "learning_rate": 1.4750729966475615e-05, "loss": 2.7956, "step": 6820 }, { "epoch": 0.09, "grad_norm": 6.776324272155762, "learning_rate": 1.4752892830107064e-05, "loss": 2.5718, "step": 6821 }, { "epoch": 0.09, "grad_norm": 7.90810489654541, "learning_rate": 1.475505569373851e-05, "loss": 2.5172, "step": 6822 }, { "epoch": 0.09, "grad_norm": 6.660107612609863, "learning_rate": 1.4757218557369959e-05, "loss": 2.6838, "step": 6823 }, { "epoch": 0.09, "grad_norm": 7.676065444946289, "learning_rate": 1.4759381421001407e-05, "loss": 2.808, "step": 6824 }, { "epoch": 0.09, "grad_norm": 6.746255397796631, "learning_rate": 1.4761544284632857e-05, "loss": 2.2273, "step": 6825 }, { "epoch": 0.09, "grad_norm": 7.914487361907959, "learning_rate": 1.4763707148264303e-05, "loss": 2.3596, "step": 6826 }, { "epoch": 0.09, "grad_norm": 8.354339599609375, "learning_rate": 1.4765870011895751e-05, "loss": 2.5704, "step": 6827 }, { "epoch": 0.09, "grad_norm": 7.112384796142578, "learning_rate": 1.4768032875527199e-05, "loss": 2.5692, "step": 6828 }, { "epoch": 0.09, "grad_norm": 7.572276592254639, "learning_rate": 1.4770195739158649e-05, "loss": 2.359, "step": 6829 }, { "epoch": 0.09, "grad_norm": 7.155760288238525, "learning_rate": 1.4772358602790095e-05, "loss": 2.7395, "step": 6830 }, { "epoch": 0.09, "grad_norm": 7.874836444854736, "learning_rate": 1.4774521466421543e-05, "loss": 3.0196, "step": 6831 }, { "epoch": 0.09, "grad_norm": 8.007109642028809, "learning_rate": 1.4776684330052991e-05, "loss": 2.6635, "step": 6832 }, { "epoch": 0.09, "grad_norm": 6.190001487731934, "learning_rate": 1.4778847193684441e-05, "loss": 2.2769, "step": 6833 }, { "epoch": 0.09, "grad_norm": 6.693536758422852, "learning_rate": 1.4781010057315887e-05, "loss": 2.5042, "step": 6834 }, { "epoch": 0.09, "grad_norm": 7.230560302734375, "learning_rate": 1.4783172920947335e-05, "loss": 2.8995, "step": 6835 }, { "epoch": 0.09, "grad_norm": 7.630460739135742, "learning_rate": 1.4785335784578782e-05, "loss": 3.0496, "step": 6836 }, { "epoch": 0.09, "grad_norm": 6.9526047706604, "learning_rate": 1.4787498648210232e-05, "loss": 2.7698, "step": 6837 }, { "epoch": 0.09, "grad_norm": 6.659827709197998, "learning_rate": 1.478966151184168e-05, "loss": 2.9739, "step": 6838 }, { "epoch": 0.09, "grad_norm": 6.875422477722168, "learning_rate": 1.4791824375473128e-05, "loss": 2.9898, "step": 6839 }, { "epoch": 0.09, "grad_norm": 8.115548133850098, "learning_rate": 1.4793987239104574e-05, "loss": 2.7117, "step": 6840 }, { "epoch": 0.09, "grad_norm": 6.895277500152588, "learning_rate": 1.4796150102736024e-05, "loss": 3.1147, "step": 6841 }, { "epoch": 0.09, "grad_norm": 7.568947792053223, "learning_rate": 1.4798312966367472e-05, "loss": 2.9271, "step": 6842 }, { "epoch": 0.09, "grad_norm": 7.502223968505859, "learning_rate": 1.480047582999892e-05, "loss": 2.8689, "step": 6843 }, { "epoch": 0.09, "grad_norm": 7.017847061157227, "learning_rate": 1.4802638693630366e-05, "loss": 2.8699, "step": 6844 }, { "epoch": 0.09, "grad_norm": 6.881277084350586, "learning_rate": 1.4804801557261816e-05, "loss": 2.7213, "step": 6845 }, { "epoch": 0.09, "grad_norm": 7.369153022766113, "learning_rate": 1.4806964420893264e-05, "loss": 2.5441, "step": 6846 }, { "epoch": 0.09, "grad_norm": 6.8884053230285645, "learning_rate": 1.4809127284524712e-05, "loss": 2.302, "step": 6847 }, { "epoch": 0.09, "grad_norm": 6.593516826629639, "learning_rate": 1.4811290148156162e-05, "loss": 2.2969, "step": 6848 }, { "epoch": 0.09, "grad_norm": 7.192575454711914, "learning_rate": 1.4813453011787608e-05, "loss": 2.5008, "step": 6849 }, { "epoch": 0.09, "grad_norm": 7.406453609466553, "learning_rate": 1.4815615875419056e-05, "loss": 2.9045, "step": 6850 }, { "epoch": 0.09, "grad_norm": 7.315066337585449, "learning_rate": 1.4817778739050504e-05, "loss": 2.9487, "step": 6851 }, { "epoch": 0.09, "grad_norm": 7.043337821960449, "learning_rate": 1.4819941602681954e-05, "loss": 2.4427, "step": 6852 }, { "epoch": 0.09, "grad_norm": 7.471461772918701, "learning_rate": 1.48221044663134e-05, "loss": 2.9452, "step": 6853 }, { "epoch": 0.09, "grad_norm": 7.1765055656433105, "learning_rate": 1.4824267329944848e-05, "loss": 2.7047, "step": 6854 }, { "epoch": 0.09, "grad_norm": 7.191604137420654, "learning_rate": 1.4826430193576295e-05, "loss": 2.6679, "step": 6855 }, { "epoch": 0.09, "grad_norm": 7.1628031730651855, "learning_rate": 1.4828593057207744e-05, "loss": 2.2742, "step": 6856 }, { "epoch": 0.09, "grad_norm": 7.141040325164795, "learning_rate": 1.4830755920839193e-05, "loss": 3.3674, "step": 6857 }, { "epoch": 0.09, "grad_norm": 6.798908710479736, "learning_rate": 1.483291878447064e-05, "loss": 3.266, "step": 6858 }, { "epoch": 0.09, "grad_norm": 7.791111469268799, "learning_rate": 1.4835081648102087e-05, "loss": 3.0968, "step": 6859 }, { "epoch": 0.09, "grad_norm": 6.284018039703369, "learning_rate": 1.4837244511733537e-05, "loss": 2.1298, "step": 6860 }, { "epoch": 0.09, "grad_norm": 6.3765974044799805, "learning_rate": 1.4839407375364985e-05, "loss": 2.4007, "step": 6861 }, { "epoch": 0.09, "grad_norm": 6.855979919433594, "learning_rate": 1.4841570238996433e-05, "loss": 2.8594, "step": 6862 }, { "epoch": 0.09, "grad_norm": 7.552175045013428, "learning_rate": 1.4843733102627879e-05, "loss": 2.8682, "step": 6863 }, { "epoch": 0.09, "grad_norm": 8.713695526123047, "learning_rate": 1.4845895966259329e-05, "loss": 2.7223, "step": 6864 }, { "epoch": 0.09, "grad_norm": 7.115779399871826, "learning_rate": 1.4848058829890777e-05, "loss": 2.6038, "step": 6865 }, { "epoch": 0.09, "grad_norm": 6.96023416519165, "learning_rate": 1.4850221693522225e-05, "loss": 2.449, "step": 6866 }, { "epoch": 0.09, "grad_norm": 7.4191179275512695, "learning_rate": 1.4852384557153671e-05, "loss": 2.4696, "step": 6867 }, { "epoch": 0.09, "grad_norm": 6.020615577697754, "learning_rate": 1.4854547420785121e-05, "loss": 2.443, "step": 6868 }, { "epoch": 0.09, "grad_norm": 6.609710693359375, "learning_rate": 1.4856710284416569e-05, "loss": 1.9622, "step": 6869 }, { "epoch": 0.09, "grad_norm": 6.802937030792236, "learning_rate": 1.4858873148048017e-05, "loss": 2.7091, "step": 6870 }, { "epoch": 0.09, "grad_norm": 7.532538414001465, "learning_rate": 1.4861036011679464e-05, "loss": 2.6874, "step": 6871 }, { "epoch": 0.09, "grad_norm": 7.031876564025879, "learning_rate": 1.4863198875310913e-05, "loss": 2.6276, "step": 6872 }, { "epoch": 0.09, "grad_norm": 6.969978332519531, "learning_rate": 1.4865361738942361e-05, "loss": 2.348, "step": 6873 }, { "epoch": 0.09, "grad_norm": 7.639764785766602, "learning_rate": 1.4867524602573808e-05, "loss": 2.8561, "step": 6874 }, { "epoch": 0.09, "grad_norm": 6.994815349578857, "learning_rate": 1.4869687466205256e-05, "loss": 2.042, "step": 6875 }, { "epoch": 0.09, "grad_norm": 7.0844292640686035, "learning_rate": 1.4871850329836705e-05, "loss": 2.4768, "step": 6876 }, { "epoch": 0.09, "grad_norm": 7.664445877075195, "learning_rate": 1.4874013193468154e-05, "loss": 2.6314, "step": 6877 }, { "epoch": 0.09, "grad_norm": 6.699201583862305, "learning_rate": 1.48761760570996e-05, "loss": 2.3683, "step": 6878 }, { "epoch": 0.09, "grad_norm": 9.488944053649902, "learning_rate": 1.4878338920731048e-05, "loss": 3.7249, "step": 6879 }, { "epoch": 0.09, "grad_norm": 6.914485454559326, "learning_rate": 1.4880501784362498e-05, "loss": 2.3965, "step": 6880 }, { "epoch": 0.09, "grad_norm": 6.958983421325684, "learning_rate": 1.4882664647993946e-05, "loss": 2.3399, "step": 6881 }, { "epoch": 0.09, "grad_norm": 6.9006805419921875, "learning_rate": 1.4884827511625392e-05, "loss": 2.6268, "step": 6882 }, { "epoch": 0.09, "grad_norm": 8.073979377746582, "learning_rate": 1.4886990375256842e-05, "loss": 2.2649, "step": 6883 }, { "epoch": 0.09, "grad_norm": 7.627657890319824, "learning_rate": 1.488915323888829e-05, "loss": 2.5499, "step": 6884 }, { "epoch": 0.09, "grad_norm": 7.427494049072266, "learning_rate": 1.4891316102519738e-05, "loss": 2.8558, "step": 6885 }, { "epoch": 0.09, "grad_norm": 7.293725967407227, "learning_rate": 1.4893478966151184e-05, "loss": 2.9891, "step": 6886 }, { "epoch": 0.09, "grad_norm": 6.935693264007568, "learning_rate": 1.4895641829782634e-05, "loss": 2.8881, "step": 6887 }, { "epoch": 0.09, "grad_norm": 7.741147041320801, "learning_rate": 1.4897804693414082e-05, "loss": 2.7957, "step": 6888 }, { "epoch": 0.09, "grad_norm": 7.591165065765381, "learning_rate": 1.489996755704553e-05, "loss": 2.5417, "step": 6889 }, { "epoch": 0.09, "grad_norm": 6.23044490814209, "learning_rate": 1.4902130420676977e-05, "loss": 1.8897, "step": 6890 }, { "epoch": 0.09, "grad_norm": 6.906113147735596, "learning_rate": 1.4904293284308426e-05, "loss": 2.5781, "step": 6891 }, { "epoch": 0.09, "grad_norm": 7.078333377838135, "learning_rate": 1.4906456147939874e-05, "loss": 2.0823, "step": 6892 }, { "epoch": 0.09, "grad_norm": 7.157148361206055, "learning_rate": 1.490861901157132e-05, "loss": 2.75, "step": 6893 }, { "epoch": 0.09, "grad_norm": 8.65866756439209, "learning_rate": 1.4910781875202769e-05, "loss": 3.1603, "step": 6894 }, { "epoch": 0.09, "grad_norm": 9.551485061645508, "learning_rate": 1.4912944738834218e-05, "loss": 3.137, "step": 6895 }, { "epoch": 0.09, "grad_norm": 7.352786064147949, "learning_rate": 1.4915107602465667e-05, "loss": 2.6057, "step": 6896 }, { "epoch": 0.09, "grad_norm": 7.780863285064697, "learning_rate": 1.4917270466097113e-05, "loss": 3.2067, "step": 6897 }, { "epoch": 0.09, "grad_norm": 7.203307628631592, "learning_rate": 1.4919433329728561e-05, "loss": 2.8814, "step": 6898 }, { "epoch": 0.09, "grad_norm": 7.233453750610352, "learning_rate": 1.492159619336001e-05, "loss": 2.9617, "step": 6899 }, { "epoch": 0.09, "grad_norm": 6.903039455413818, "learning_rate": 1.4923759056991459e-05, "loss": 2.3954, "step": 6900 }, { "epoch": 0.09, "grad_norm": 6.865484714508057, "learning_rate": 1.4925921920622905e-05, "loss": 2.4303, "step": 6901 }, { "epoch": 0.09, "grad_norm": 7.2021050453186035, "learning_rate": 1.4928084784254353e-05, "loss": 2.4548, "step": 6902 }, { "epoch": 0.09, "grad_norm": 7.06274938583374, "learning_rate": 1.4930247647885803e-05, "loss": 2.4608, "step": 6903 }, { "epoch": 0.09, "grad_norm": 6.933408737182617, "learning_rate": 1.4932410511517251e-05, "loss": 2.473, "step": 6904 }, { "epoch": 0.09, "grad_norm": 7.51555871963501, "learning_rate": 1.4934573375148697e-05, "loss": 2.9702, "step": 6905 }, { "epoch": 0.09, "grad_norm": 7.45973539352417, "learning_rate": 1.4936736238780145e-05, "loss": 2.6921, "step": 6906 }, { "epoch": 0.09, "grad_norm": 7.215299129486084, "learning_rate": 1.4938899102411595e-05, "loss": 3.1064, "step": 6907 }, { "epoch": 0.09, "grad_norm": 6.828419208526611, "learning_rate": 1.4941061966043041e-05, "loss": 2.4601, "step": 6908 }, { "epoch": 0.09, "grad_norm": 6.518313884735107, "learning_rate": 1.494322482967449e-05, "loss": 2.5883, "step": 6909 }, { "epoch": 0.09, "grad_norm": 8.199793815612793, "learning_rate": 1.4945387693305938e-05, "loss": 2.7428, "step": 6910 }, { "epoch": 0.09, "grad_norm": 8.875704765319824, "learning_rate": 1.4947550556937387e-05, "loss": 3.3495, "step": 6911 }, { "epoch": 0.09, "grad_norm": 7.836215019226074, "learning_rate": 1.4949713420568834e-05, "loss": 2.8428, "step": 6912 }, { "epoch": 0.09, "grad_norm": 6.659442901611328, "learning_rate": 1.4951876284200282e-05, "loss": 2.4464, "step": 6913 }, { "epoch": 0.09, "grad_norm": 9.286930084228516, "learning_rate": 1.4954039147831731e-05, "loss": 2.692, "step": 6914 }, { "epoch": 0.09, "grad_norm": 7.593312740325928, "learning_rate": 1.495620201146318e-05, "loss": 1.9667, "step": 6915 }, { "epoch": 0.09, "grad_norm": 6.607312202453613, "learning_rate": 1.4958364875094626e-05, "loss": 2.6119, "step": 6916 }, { "epoch": 0.09, "grad_norm": 7.1198906898498535, "learning_rate": 1.4960527738726074e-05, "loss": 3.3765, "step": 6917 }, { "epoch": 0.09, "grad_norm": 6.972676753997803, "learning_rate": 1.4962690602357524e-05, "loss": 3.3581, "step": 6918 }, { "epoch": 0.09, "grad_norm": 7.016420364379883, "learning_rate": 1.4964853465988972e-05, "loss": 2.2247, "step": 6919 }, { "epoch": 0.09, "grad_norm": 7.068350791931152, "learning_rate": 1.4967016329620418e-05, "loss": 2.5244, "step": 6920 }, { "epoch": 0.09, "grad_norm": 6.661915302276611, "learning_rate": 1.4969179193251866e-05, "loss": 2.2523, "step": 6921 }, { "epoch": 0.09, "grad_norm": 7.412827491760254, "learning_rate": 1.4971342056883316e-05, "loss": 2.7408, "step": 6922 }, { "epoch": 0.09, "grad_norm": 7.056148052215576, "learning_rate": 1.4973504920514764e-05, "loss": 2.8645, "step": 6923 }, { "epoch": 0.09, "grad_norm": 6.319923400878906, "learning_rate": 1.497566778414621e-05, "loss": 2.1792, "step": 6924 }, { "epoch": 0.09, "grad_norm": 6.838034629821777, "learning_rate": 1.4977830647777658e-05, "loss": 2.5692, "step": 6925 }, { "epoch": 0.09, "grad_norm": 7.376431465148926, "learning_rate": 1.4979993511409108e-05, "loss": 2.8086, "step": 6926 }, { "epoch": 0.09, "grad_norm": 7.373073577880859, "learning_rate": 1.4982156375040554e-05, "loss": 3.0548, "step": 6927 }, { "epoch": 0.09, "grad_norm": 7.675738334655762, "learning_rate": 1.4984319238672002e-05, "loss": 2.864, "step": 6928 }, { "epoch": 0.09, "grad_norm": 6.172231674194336, "learning_rate": 1.498648210230345e-05, "loss": 2.1674, "step": 6929 }, { "epoch": 0.09, "grad_norm": 6.454074859619141, "learning_rate": 1.49886449659349e-05, "loss": 2.5734, "step": 6930 }, { "epoch": 0.09, "grad_norm": 7.175392150878906, "learning_rate": 1.4990807829566347e-05, "loss": 2.669, "step": 6931 }, { "epoch": 0.09, "grad_norm": 7.078561782836914, "learning_rate": 1.4992970693197795e-05, "loss": 2.8391, "step": 6932 }, { "epoch": 0.09, "grad_norm": 8.030305862426758, "learning_rate": 1.4995133556829243e-05, "loss": 2.9679, "step": 6933 }, { "epoch": 0.09, "grad_norm": 6.593003749847412, "learning_rate": 1.4997296420460692e-05, "loss": 2.2485, "step": 6934 }, { "epoch": 0.09, "grad_norm": 7.141628265380859, "learning_rate": 1.4999459284092139e-05, "loss": 2.3839, "step": 6935 }, { "epoch": 0.09, "grad_norm": 7.052590847015381, "learning_rate": 1.5001622147723587e-05, "loss": 2.7467, "step": 6936 }, { "epoch": 0.09, "grad_norm": 6.629761219024658, "learning_rate": 1.5003785011355035e-05, "loss": 2.1938, "step": 6937 }, { "epoch": 0.09, "grad_norm": 7.237315654754639, "learning_rate": 1.5005947874986485e-05, "loss": 2.8588, "step": 6938 }, { "epoch": 0.09, "grad_norm": 8.082595825195312, "learning_rate": 1.5008110738617931e-05, "loss": 2.6928, "step": 6939 }, { "epoch": 0.09, "grad_norm": 8.024991035461426, "learning_rate": 1.5010273602249379e-05, "loss": 2.5106, "step": 6940 }, { "epoch": 0.09, "grad_norm": 6.149094581604004, "learning_rate": 1.5012436465880827e-05, "loss": 2.6105, "step": 6941 }, { "epoch": 0.09, "grad_norm": 7.233816146850586, "learning_rate": 1.5014599329512277e-05, "loss": 2.3262, "step": 6942 }, { "epoch": 0.09, "grad_norm": 7.9977593421936035, "learning_rate": 1.5016762193143723e-05, "loss": 2.8505, "step": 6943 }, { "epoch": 0.09, "grad_norm": 7.568057060241699, "learning_rate": 1.5018925056775171e-05, "loss": 2.9998, "step": 6944 }, { "epoch": 0.09, "grad_norm": 6.373913764953613, "learning_rate": 1.5021087920406621e-05, "loss": 2.5825, "step": 6945 }, { "epoch": 0.09, "grad_norm": 6.792748928070068, "learning_rate": 1.5023250784038067e-05, "loss": 2.6571, "step": 6946 }, { "epoch": 0.09, "grad_norm": 7.5105390548706055, "learning_rate": 1.5025413647669515e-05, "loss": 2.4043, "step": 6947 }, { "epoch": 0.09, "grad_norm": 6.6157450675964355, "learning_rate": 1.5027576511300963e-05, "loss": 1.7769, "step": 6948 }, { "epoch": 0.09, "grad_norm": 9.796174049377441, "learning_rate": 1.5029739374932413e-05, "loss": 3.1998, "step": 6949 }, { "epoch": 0.09, "grad_norm": 7.906439781188965, "learning_rate": 1.503190223856386e-05, "loss": 2.4615, "step": 6950 }, { "epoch": 0.09, "grad_norm": 7.0620832443237305, "learning_rate": 1.5034065102195308e-05, "loss": 2.7662, "step": 6951 }, { "epoch": 0.09, "grad_norm": 7.175622463226318, "learning_rate": 1.5036227965826756e-05, "loss": 2.5684, "step": 6952 }, { "epoch": 0.09, "grad_norm": 7.845917701721191, "learning_rate": 1.5038390829458205e-05, "loss": 3.0321, "step": 6953 }, { "epoch": 0.09, "grad_norm": 6.991319179534912, "learning_rate": 1.5040553693089652e-05, "loss": 2.4075, "step": 6954 }, { "epoch": 0.09, "grad_norm": 7.0365681648254395, "learning_rate": 1.50427165567211e-05, "loss": 2.7365, "step": 6955 }, { "epoch": 0.09, "grad_norm": 6.930754661560059, "learning_rate": 1.5044879420352548e-05, "loss": 2.596, "step": 6956 }, { "epoch": 0.09, "grad_norm": 7.75504207611084, "learning_rate": 1.5047042283983998e-05, "loss": 2.7952, "step": 6957 }, { "epoch": 0.09, "grad_norm": 8.090838432312012, "learning_rate": 1.5049205147615444e-05, "loss": 3.0325, "step": 6958 }, { "epoch": 0.09, "grad_norm": 7.252640247344971, "learning_rate": 1.5051368011246892e-05, "loss": 2.7725, "step": 6959 }, { "epoch": 0.09, "grad_norm": 6.846121788024902, "learning_rate": 1.505353087487834e-05, "loss": 2.7113, "step": 6960 }, { "epoch": 0.09, "grad_norm": 7.338146209716797, "learning_rate": 1.505569373850979e-05, "loss": 3.2851, "step": 6961 }, { "epoch": 0.09, "grad_norm": 8.055761337280273, "learning_rate": 1.5057856602141236e-05, "loss": 3.0059, "step": 6962 }, { "epoch": 0.09, "grad_norm": 7.407110214233398, "learning_rate": 1.5060019465772684e-05, "loss": 2.7315, "step": 6963 }, { "epoch": 0.09, "grad_norm": 6.425547122955322, "learning_rate": 1.506218232940413e-05, "loss": 2.2343, "step": 6964 }, { "epoch": 0.09, "grad_norm": 6.803523540496826, "learning_rate": 1.506434519303558e-05, "loss": 2.9447, "step": 6965 }, { "epoch": 0.09, "grad_norm": 6.800400733947754, "learning_rate": 1.5066508056667028e-05, "loss": 2.8548, "step": 6966 }, { "epoch": 0.09, "grad_norm": 6.8294291496276855, "learning_rate": 1.5068670920298476e-05, "loss": 2.6881, "step": 6967 }, { "epoch": 0.09, "grad_norm": 7.454566955566406, "learning_rate": 1.5070833783929923e-05, "loss": 2.3135, "step": 6968 }, { "epoch": 0.09, "grad_norm": 7.077080249786377, "learning_rate": 1.5072996647561373e-05, "loss": 2.6522, "step": 6969 }, { "epoch": 0.09, "grad_norm": 6.63281774520874, "learning_rate": 1.507515951119282e-05, "loss": 2.265, "step": 6970 }, { "epoch": 0.09, "grad_norm": 6.992335796356201, "learning_rate": 1.5077322374824269e-05, "loss": 2.6758, "step": 6971 }, { "epoch": 0.09, "grad_norm": 7.8696980476379395, "learning_rate": 1.5079485238455715e-05, "loss": 2.2325, "step": 6972 }, { "epoch": 0.09, "grad_norm": 6.901350975036621, "learning_rate": 1.5081648102087165e-05, "loss": 2.647, "step": 6973 }, { "epoch": 0.09, "grad_norm": 6.578821659088135, "learning_rate": 1.5083810965718613e-05, "loss": 2.5022, "step": 6974 }, { "epoch": 0.09, "grad_norm": 8.223549842834473, "learning_rate": 1.508597382935006e-05, "loss": 3.1434, "step": 6975 }, { "epoch": 0.09, "grad_norm": 7.008009910583496, "learning_rate": 1.508813669298151e-05, "loss": 2.1961, "step": 6976 }, { "epoch": 0.09, "grad_norm": 6.637478351593018, "learning_rate": 1.5090299556612957e-05, "loss": 2.5887, "step": 6977 }, { "epoch": 0.09, "grad_norm": 6.891568660736084, "learning_rate": 1.5092462420244405e-05, "loss": 2.593, "step": 6978 }, { "epoch": 0.09, "grad_norm": 8.090399742126465, "learning_rate": 1.5094625283875853e-05, "loss": 2.9323, "step": 6979 }, { "epoch": 0.09, "grad_norm": 7.345254421234131, "learning_rate": 1.5096788147507303e-05, "loss": 2.5457, "step": 6980 }, { "epoch": 0.09, "grad_norm": 7.003180980682373, "learning_rate": 1.5098951011138749e-05, "loss": 2.5608, "step": 6981 }, { "epoch": 0.09, "grad_norm": 6.752871513366699, "learning_rate": 1.5101113874770197e-05, "loss": 2.6217, "step": 6982 }, { "epoch": 0.09, "grad_norm": 7.193608283996582, "learning_rate": 1.5103276738401644e-05, "loss": 2.6555, "step": 6983 }, { "epoch": 0.09, "grad_norm": 6.762973785400391, "learning_rate": 1.5105439602033093e-05, "loss": 2.8708, "step": 6984 }, { "epoch": 0.09, "grad_norm": 8.821636199951172, "learning_rate": 1.5107602465664541e-05, "loss": 2.928, "step": 6985 }, { "epoch": 0.09, "grad_norm": 6.209529399871826, "learning_rate": 1.510976532929599e-05, "loss": 2.4009, "step": 6986 }, { "epoch": 0.09, "grad_norm": 6.547903537750244, "learning_rate": 1.5111928192927436e-05, "loss": 2.1255, "step": 6987 }, { "epoch": 0.09, "grad_norm": 7.729179382324219, "learning_rate": 1.5114091056558885e-05, "loss": 2.5867, "step": 6988 }, { "epoch": 0.09, "grad_norm": 8.062962532043457, "learning_rate": 1.5116253920190334e-05, "loss": 3.3507, "step": 6989 }, { "epoch": 0.09, "grad_norm": 6.853487491607666, "learning_rate": 1.5118416783821782e-05, "loss": 2.7187, "step": 6990 }, { "epoch": 0.09, "grad_norm": 7.122677803039551, "learning_rate": 1.5120579647453228e-05, "loss": 2.5605, "step": 6991 }, { "epoch": 0.09, "grad_norm": 7.964079856872559, "learning_rate": 1.5122742511084678e-05, "loss": 2.5351, "step": 6992 }, { "epoch": 0.09, "grad_norm": 8.32564640045166, "learning_rate": 1.5124905374716126e-05, "loss": 3.6669, "step": 6993 }, { "epoch": 0.09, "grad_norm": 7.803848743438721, "learning_rate": 1.5127068238347574e-05, "loss": 2.5329, "step": 6994 }, { "epoch": 0.09, "grad_norm": 7.771511077880859, "learning_rate": 1.512923110197902e-05, "loss": 2.5414, "step": 6995 }, { "epoch": 0.09, "grad_norm": 8.727139472961426, "learning_rate": 1.513139396561047e-05, "loss": 2.7843, "step": 6996 }, { "epoch": 0.09, "grad_norm": 7.159663677215576, "learning_rate": 1.5133556829241918e-05, "loss": 2.3213, "step": 6997 }, { "epoch": 0.09, "grad_norm": 8.823010444641113, "learning_rate": 1.5135719692873366e-05, "loss": 2.7826, "step": 6998 }, { "epoch": 0.09, "grad_norm": 7.30344295501709, "learning_rate": 1.5137882556504812e-05, "loss": 2.6285, "step": 6999 }, { "epoch": 0.09, "grad_norm": 7.897825717926025, "learning_rate": 1.5140045420136262e-05, "loss": 2.9499, "step": 7000 }, { "epoch": 0.09, "grad_norm": 8.846816062927246, "learning_rate": 1.514220828376771e-05, "loss": 2.5558, "step": 7001 }, { "epoch": 0.09, "grad_norm": 7.197854995727539, "learning_rate": 1.5144371147399156e-05, "loss": 3.5943, "step": 7002 }, { "epoch": 0.09, "grad_norm": 9.509980201721191, "learning_rate": 1.5146534011030605e-05, "loss": 2.4607, "step": 7003 }, { "epoch": 0.09, "grad_norm": 7.857126712799072, "learning_rate": 1.5148696874662054e-05, "loss": 2.5415, "step": 7004 }, { "epoch": 0.09, "grad_norm": 7.309666633605957, "learning_rate": 1.5150859738293502e-05, "loss": 2.3818, "step": 7005 }, { "epoch": 0.09, "grad_norm": 6.885824203491211, "learning_rate": 1.5153022601924949e-05, "loss": 2.3762, "step": 7006 }, { "epoch": 0.09, "grad_norm": 7.985500335693359, "learning_rate": 1.5155185465556397e-05, "loss": 2.8847, "step": 7007 }, { "epoch": 0.09, "grad_norm": 7.063333034515381, "learning_rate": 1.5157348329187846e-05, "loss": 2.3517, "step": 7008 }, { "epoch": 0.09, "grad_norm": 6.779211521148682, "learning_rate": 1.5159511192819295e-05, "loss": 3.0854, "step": 7009 }, { "epoch": 0.09, "grad_norm": 9.141420364379883, "learning_rate": 1.5161674056450741e-05, "loss": 2.928, "step": 7010 }, { "epoch": 0.09, "grad_norm": 7.571166515350342, "learning_rate": 1.516383692008219e-05, "loss": 2.731, "step": 7011 }, { "epoch": 0.09, "grad_norm": 8.476015090942383, "learning_rate": 1.5165999783713639e-05, "loss": 2.5086, "step": 7012 }, { "epoch": 0.09, "grad_norm": 6.99238395690918, "learning_rate": 1.5168162647345087e-05, "loss": 2.597, "step": 7013 }, { "epoch": 0.09, "grad_norm": 7.373047828674316, "learning_rate": 1.5170325510976533e-05, "loss": 2.7875, "step": 7014 }, { "epoch": 0.09, "grad_norm": 7.091212749481201, "learning_rate": 1.5172488374607983e-05, "loss": 2.4337, "step": 7015 }, { "epoch": 0.09, "grad_norm": 7.6964945793151855, "learning_rate": 1.5174651238239431e-05, "loss": 2.7732, "step": 7016 }, { "epoch": 0.09, "grad_norm": 7.166074752807617, "learning_rate": 1.5176814101870879e-05, "loss": 2.2237, "step": 7017 }, { "epoch": 0.09, "grad_norm": 7.807547569274902, "learning_rate": 1.5178976965502325e-05, "loss": 2.5203, "step": 7018 }, { "epoch": 0.09, "grad_norm": 6.69577169418335, "learning_rate": 1.5181139829133775e-05, "loss": 2.7213, "step": 7019 }, { "epoch": 0.09, "grad_norm": 6.007137298583984, "learning_rate": 1.5183302692765223e-05, "loss": 2.2726, "step": 7020 }, { "epoch": 0.09, "grad_norm": 6.395033359527588, "learning_rate": 1.518546555639667e-05, "loss": 2.4611, "step": 7021 }, { "epoch": 0.09, "grad_norm": 6.558471202850342, "learning_rate": 1.5187628420028118e-05, "loss": 1.9, "step": 7022 }, { "epoch": 0.09, "grad_norm": 6.1296796798706055, "learning_rate": 1.5189791283659567e-05, "loss": 2.6667, "step": 7023 }, { "epoch": 0.09, "grad_norm": 6.748460292816162, "learning_rate": 1.5191954147291015e-05, "loss": 2.6987, "step": 7024 }, { "epoch": 0.09, "grad_norm": 7.098424434661865, "learning_rate": 1.5194117010922462e-05, "loss": 2.5716, "step": 7025 }, { "epoch": 0.09, "grad_norm": 9.028467178344727, "learning_rate": 1.519627987455391e-05, "loss": 3.4294, "step": 7026 }, { "epoch": 0.09, "grad_norm": 7.27718448638916, "learning_rate": 1.519844273818536e-05, "loss": 2.2995, "step": 7027 }, { "epoch": 0.09, "grad_norm": 6.351648807525635, "learning_rate": 1.5200605601816808e-05, "loss": 2.4299, "step": 7028 }, { "epoch": 0.09, "grad_norm": 6.897815704345703, "learning_rate": 1.5202768465448254e-05, "loss": 2.4035, "step": 7029 }, { "epoch": 0.09, "grad_norm": 7.486092567443848, "learning_rate": 1.5204931329079702e-05, "loss": 2.8025, "step": 7030 }, { "epoch": 0.09, "grad_norm": 6.390407085418701, "learning_rate": 1.5207094192711152e-05, "loss": 2.6707, "step": 7031 }, { "epoch": 0.09, "grad_norm": 7.7787065505981445, "learning_rate": 1.52092570563426e-05, "loss": 2.5225, "step": 7032 }, { "epoch": 0.09, "grad_norm": 6.874297142028809, "learning_rate": 1.5211419919974046e-05, "loss": 2.3947, "step": 7033 }, { "epoch": 0.09, "grad_norm": 6.378233909606934, "learning_rate": 1.5213582783605494e-05, "loss": 2.4875, "step": 7034 }, { "epoch": 0.09, "grad_norm": 7.323629856109619, "learning_rate": 1.5215745647236944e-05, "loss": 2.4576, "step": 7035 }, { "epoch": 0.09, "grad_norm": 6.157605171203613, "learning_rate": 1.521790851086839e-05, "loss": 2.4576, "step": 7036 }, { "epoch": 0.09, "grad_norm": 7.073887825012207, "learning_rate": 1.5220071374499838e-05, "loss": 2.9032, "step": 7037 }, { "epoch": 0.09, "grad_norm": 6.743645668029785, "learning_rate": 1.5222234238131286e-05, "loss": 3.1728, "step": 7038 }, { "epoch": 0.09, "grad_norm": 7.241336822509766, "learning_rate": 1.5224397101762736e-05, "loss": 2.1113, "step": 7039 }, { "epoch": 0.09, "grad_norm": 7.289174556732178, "learning_rate": 1.5226559965394182e-05, "loss": 2.5189, "step": 7040 }, { "epoch": 0.09, "grad_norm": 7.2469096183776855, "learning_rate": 1.522872282902563e-05, "loss": 2.7004, "step": 7041 }, { "epoch": 0.09, "grad_norm": 7.548104286193848, "learning_rate": 1.523088569265708e-05, "loss": 2.6243, "step": 7042 }, { "epoch": 0.09, "grad_norm": 7.23906135559082, "learning_rate": 1.5233048556288528e-05, "loss": 2.9994, "step": 7043 }, { "epoch": 0.09, "grad_norm": 6.45536994934082, "learning_rate": 1.5235211419919975e-05, "loss": 2.4832, "step": 7044 }, { "epoch": 0.09, "grad_norm": 7.60345458984375, "learning_rate": 1.5237374283551423e-05, "loss": 3.1711, "step": 7045 }, { "epoch": 0.09, "grad_norm": 6.528463363647461, "learning_rate": 1.5239537147182872e-05, "loss": 2.3986, "step": 7046 }, { "epoch": 0.09, "grad_norm": 7.004340648651123, "learning_rate": 1.524170001081432e-05, "loss": 2.1559, "step": 7047 }, { "epoch": 0.09, "grad_norm": 6.08878755569458, "learning_rate": 1.5243862874445767e-05, "loss": 2.2372, "step": 7048 }, { "epoch": 0.09, "grad_norm": 7.900495529174805, "learning_rate": 1.5246025738077215e-05, "loss": 2.6795, "step": 7049 }, { "epoch": 0.09, "grad_norm": 7.359866142272949, "learning_rate": 1.5248188601708665e-05, "loss": 2.4451, "step": 7050 }, { "epoch": 0.09, "grad_norm": 7.30413293838501, "learning_rate": 1.5250351465340113e-05, "loss": 2.4265, "step": 7051 }, { "epoch": 0.09, "grad_norm": 6.687589645385742, "learning_rate": 1.5252514328971559e-05, "loss": 2.5387, "step": 7052 }, { "epoch": 0.09, "grad_norm": 6.560893535614014, "learning_rate": 1.5254677192603007e-05, "loss": 2.3314, "step": 7053 }, { "epoch": 0.09, "grad_norm": 7.17020845413208, "learning_rate": 1.5256840056234457e-05, "loss": 2.5833, "step": 7054 }, { "epoch": 0.09, "grad_norm": 6.2891082763671875, "learning_rate": 1.5259002919865905e-05, "loss": 2.1793, "step": 7055 }, { "epoch": 0.09, "grad_norm": 6.765735626220703, "learning_rate": 1.526116578349735e-05, "loss": 2.5119, "step": 7056 }, { "epoch": 0.09, "grad_norm": 7.3630194664001465, "learning_rate": 1.5263328647128798e-05, "loss": 2.79, "step": 7057 }, { "epoch": 0.09, "grad_norm": 6.510472774505615, "learning_rate": 1.5265491510760247e-05, "loss": 2.4952, "step": 7058 }, { "epoch": 0.09, "grad_norm": 6.344297885894775, "learning_rate": 1.5267654374391697e-05, "loss": 2.47, "step": 7059 }, { "epoch": 0.09, "grad_norm": 6.924896717071533, "learning_rate": 1.5269817238023143e-05, "loss": 2.7443, "step": 7060 }, { "epoch": 0.09, "grad_norm": 6.733526229858398, "learning_rate": 1.527198010165459e-05, "loss": 2.7888, "step": 7061 }, { "epoch": 0.09, "grad_norm": 6.62416410446167, "learning_rate": 1.527414296528604e-05, "loss": 2.5042, "step": 7062 }, { "epoch": 0.09, "grad_norm": 7.28491735458374, "learning_rate": 1.527630582891749e-05, "loss": 2.6961, "step": 7063 }, { "epoch": 0.09, "grad_norm": 8.786763191223145, "learning_rate": 1.5278468692548936e-05, "loss": 2.9411, "step": 7064 }, { "epoch": 0.09, "grad_norm": 7.540157794952393, "learning_rate": 1.5280631556180382e-05, "loss": 3.2285, "step": 7065 }, { "epoch": 0.09, "grad_norm": 7.151690483093262, "learning_rate": 1.5282794419811832e-05, "loss": 3.0405, "step": 7066 }, { "epoch": 0.09, "grad_norm": 6.910696506500244, "learning_rate": 1.528495728344328e-05, "loss": 2.356, "step": 7067 }, { "epoch": 0.09, "grad_norm": 7.010414123535156, "learning_rate": 1.5287120147074728e-05, "loss": 2.9373, "step": 7068 }, { "epoch": 0.09, "grad_norm": 6.1997904777526855, "learning_rate": 1.5289283010706174e-05, "loss": 1.8595, "step": 7069 }, { "epoch": 0.09, "grad_norm": 6.242943286895752, "learning_rate": 1.5291445874337624e-05, "loss": 2.6368, "step": 7070 }, { "epoch": 0.09, "grad_norm": 7.100222587585449, "learning_rate": 1.5293608737969074e-05, "loss": 2.5465, "step": 7071 }, { "epoch": 0.09, "grad_norm": 7.827605247497559, "learning_rate": 1.529577160160052e-05, "loss": 2.6564, "step": 7072 }, { "epoch": 0.09, "grad_norm": 9.10788345336914, "learning_rate": 1.529793446523197e-05, "loss": 3.0738, "step": 7073 }, { "epoch": 0.09, "grad_norm": 7.307363033294678, "learning_rate": 1.5300097328863416e-05, "loss": 2.4054, "step": 7074 }, { "epoch": 0.09, "grad_norm": 6.755416393280029, "learning_rate": 1.5302260192494866e-05, "loss": 2.2636, "step": 7075 }, { "epoch": 0.09, "grad_norm": 7.758864879608154, "learning_rate": 1.5304423056126312e-05, "loss": 2.9514, "step": 7076 }, { "epoch": 0.09, "grad_norm": 6.1027116775512695, "learning_rate": 1.5306585919757762e-05, "loss": 2.2416, "step": 7077 }, { "epoch": 0.09, "grad_norm": 5.853905200958252, "learning_rate": 1.530874878338921e-05, "loss": 2.184, "step": 7078 }, { "epoch": 0.09, "grad_norm": 6.641127109527588, "learning_rate": 1.5310911647020658e-05, "loss": 2.3738, "step": 7079 }, { "epoch": 0.09, "grad_norm": 6.7214155197143555, "learning_rate": 1.5313074510652104e-05, "loss": 2.6014, "step": 7080 }, { "epoch": 0.09, "grad_norm": 6.369647979736328, "learning_rate": 1.5315237374283554e-05, "loss": 2.2047, "step": 7081 }, { "epoch": 0.09, "grad_norm": 7.816731929779053, "learning_rate": 1.5317400237915e-05, "loss": 2.6031, "step": 7082 }, { "epoch": 0.09, "grad_norm": 7.210423469543457, "learning_rate": 1.5319563101546447e-05, "loss": 2.7665, "step": 7083 }, { "epoch": 0.09, "grad_norm": 7.135851860046387, "learning_rate": 1.5321725965177897e-05, "loss": 2.303, "step": 7084 }, { "epoch": 0.09, "grad_norm": 6.332527160644531, "learning_rate": 1.5323888828809346e-05, "loss": 2.6119, "step": 7085 }, { "epoch": 0.09, "grad_norm": 9.172738075256348, "learning_rate": 1.5326051692440793e-05, "loss": 3.1607, "step": 7086 }, { "epoch": 0.09, "grad_norm": 6.639833450317383, "learning_rate": 1.532821455607224e-05, "loss": 2.2066, "step": 7087 }, { "epoch": 0.09, "grad_norm": 8.269417762756348, "learning_rate": 1.533037741970369e-05, "loss": 2.6953, "step": 7088 }, { "epoch": 0.09, "grad_norm": 7.983919620513916, "learning_rate": 1.533254028333514e-05, "loss": 2.7818, "step": 7089 }, { "epoch": 0.09, "grad_norm": 7.062153339385986, "learning_rate": 1.5334703146966585e-05, "loss": 2.9511, "step": 7090 }, { "epoch": 0.09, "grad_norm": 6.970877647399902, "learning_rate": 1.533686601059803e-05, "loss": 2.5841, "step": 7091 }, { "epoch": 0.09, "grad_norm": 6.661769390106201, "learning_rate": 1.533902887422948e-05, "loss": 1.9402, "step": 7092 }, { "epoch": 0.09, "grad_norm": 7.70329475402832, "learning_rate": 1.534119173786093e-05, "loss": 2.7535, "step": 7093 }, { "epoch": 0.09, "grad_norm": 6.812368392944336, "learning_rate": 1.5343354601492377e-05, "loss": 2.6416, "step": 7094 }, { "epoch": 0.09, "grad_norm": 7.210526466369629, "learning_rate": 1.5345517465123824e-05, "loss": 3.1611, "step": 7095 }, { "epoch": 0.09, "grad_norm": 7.046359539031982, "learning_rate": 1.5347680328755273e-05, "loss": 3.1724, "step": 7096 }, { "epoch": 0.09, "grad_norm": 7.878990173339844, "learning_rate": 1.5349843192386723e-05, "loss": 3.2625, "step": 7097 }, { "epoch": 0.09, "grad_norm": 6.6905903816223145, "learning_rate": 1.535200605601817e-05, "loss": 2.4288, "step": 7098 }, { "epoch": 0.09, "grad_norm": 7.659359931945801, "learning_rate": 1.5354168919649616e-05, "loss": 2.8995, "step": 7099 }, { "epoch": 0.09, "grad_norm": 6.302304744720459, "learning_rate": 1.5356331783281065e-05, "loss": 2.4487, "step": 7100 }, { "epoch": 0.09, "grad_norm": 7.012979507446289, "learning_rate": 1.5358494646912515e-05, "loss": 3.0039, "step": 7101 }, { "epoch": 0.09, "grad_norm": 6.575178146362305, "learning_rate": 1.536065751054396e-05, "loss": 2.3295, "step": 7102 }, { "epoch": 0.09, "grad_norm": 5.912685394287109, "learning_rate": 1.5362820374175408e-05, "loss": 2.1159, "step": 7103 }, { "epoch": 0.09, "grad_norm": 6.067251682281494, "learning_rate": 1.5364983237806858e-05, "loss": 2.1581, "step": 7104 }, { "epoch": 0.09, "grad_norm": 6.882908821105957, "learning_rate": 1.5367146101438307e-05, "loss": 2.726, "step": 7105 }, { "epoch": 0.09, "grad_norm": 7.036218643188477, "learning_rate": 1.5369308965069754e-05, "loss": 2.3749, "step": 7106 }, { "epoch": 0.09, "grad_norm": 6.682007312774658, "learning_rate": 1.53714718287012e-05, "loss": 2.256, "step": 7107 }, { "epoch": 0.09, "grad_norm": 6.879165172576904, "learning_rate": 1.537363469233265e-05, "loss": 2.7847, "step": 7108 }, { "epoch": 0.09, "grad_norm": 6.638848781585693, "learning_rate": 1.53757975559641e-05, "loss": 2.6394, "step": 7109 }, { "epoch": 0.09, "grad_norm": 7.750101566314697, "learning_rate": 1.5377960419595546e-05, "loss": 2.861, "step": 7110 }, { "epoch": 0.09, "grad_norm": 7.759665012359619, "learning_rate": 1.5380123283226992e-05, "loss": 3.2005, "step": 7111 }, { "epoch": 0.09, "grad_norm": 6.990673065185547, "learning_rate": 1.5382286146858442e-05, "loss": 2.4575, "step": 7112 }, { "epoch": 0.09, "grad_norm": 6.65023136138916, "learning_rate": 1.5384449010489892e-05, "loss": 2.9627, "step": 7113 }, { "epoch": 0.09, "grad_norm": 6.390079498291016, "learning_rate": 1.5386611874121338e-05, "loss": 2.6728, "step": 7114 }, { "epoch": 0.09, "grad_norm": 7.447781085968018, "learning_rate": 1.5388774737752785e-05, "loss": 3.2927, "step": 7115 }, { "epoch": 0.09, "grad_norm": 7.610205173492432, "learning_rate": 1.5390937601384234e-05, "loss": 2.7128, "step": 7116 }, { "epoch": 0.09, "grad_norm": 7.7366790771484375, "learning_rate": 1.5393100465015684e-05, "loss": 3.1253, "step": 7117 }, { "epoch": 0.09, "grad_norm": 7.842392921447754, "learning_rate": 1.539526332864713e-05, "loss": 2.4043, "step": 7118 }, { "epoch": 0.09, "grad_norm": 7.1804890632629395, "learning_rate": 1.5397426192278577e-05, "loss": 2.6704, "step": 7119 }, { "epoch": 0.09, "grad_norm": 6.7874298095703125, "learning_rate": 1.5399589055910026e-05, "loss": 2.7011, "step": 7120 }, { "epoch": 0.09, "grad_norm": 6.583156585693359, "learning_rate": 1.5401751919541473e-05, "loss": 2.3469, "step": 7121 }, { "epoch": 0.09, "grad_norm": 6.160722732543945, "learning_rate": 1.5403914783172923e-05, "loss": 2.1485, "step": 7122 }, { "epoch": 0.09, "grad_norm": 7.073882102966309, "learning_rate": 1.540607764680437e-05, "loss": 2.6763, "step": 7123 }, { "epoch": 0.09, "grad_norm": 8.145766258239746, "learning_rate": 1.540824051043582e-05, "loss": 2.7973, "step": 7124 }, { "epoch": 0.09, "grad_norm": 7.504219055175781, "learning_rate": 1.5410403374067265e-05, "loss": 3.1061, "step": 7125 }, { "epoch": 0.09, "grad_norm": 6.802563190460205, "learning_rate": 1.5412566237698715e-05, "loss": 2.8568, "step": 7126 }, { "epoch": 0.09, "grad_norm": 7.038890838623047, "learning_rate": 1.541472910133016e-05, "loss": 2.444, "step": 7127 }, { "epoch": 0.09, "grad_norm": 8.590932846069336, "learning_rate": 1.541689196496161e-05, "loss": 2.9778, "step": 7128 }, { "epoch": 0.09, "grad_norm": 7.16955041885376, "learning_rate": 1.5419054828593057e-05, "loss": 2.8465, "step": 7129 }, { "epoch": 0.09, "grad_norm": 6.357450008392334, "learning_rate": 1.5421217692224507e-05, "loss": 2.3483, "step": 7130 }, { "epoch": 0.09, "grad_norm": 7.2610015869140625, "learning_rate": 1.5423380555855953e-05, "loss": 2.2613, "step": 7131 }, { "epoch": 0.09, "grad_norm": 6.788719654083252, "learning_rate": 1.5425543419487403e-05, "loss": 2.1543, "step": 7132 }, { "epoch": 0.09, "grad_norm": 6.763981342315674, "learning_rate": 1.542770628311885e-05, "loss": 2.5702, "step": 7133 }, { "epoch": 0.09, "grad_norm": 7.813093185424805, "learning_rate": 1.54298691467503e-05, "loss": 2.6177, "step": 7134 }, { "epoch": 0.09, "grad_norm": 6.045846462249756, "learning_rate": 1.5432032010381746e-05, "loss": 1.9312, "step": 7135 }, { "epoch": 0.09, "grad_norm": 6.774188041687012, "learning_rate": 1.5434194874013195e-05, "loss": 2.1933, "step": 7136 }, { "epoch": 0.09, "grad_norm": 6.72217321395874, "learning_rate": 1.543635773764464e-05, "loss": 2.2726, "step": 7137 }, { "epoch": 0.09, "grad_norm": 6.26102352142334, "learning_rate": 1.543852060127609e-05, "loss": 1.9675, "step": 7138 }, { "epoch": 0.09, "grad_norm": 6.829354286193848, "learning_rate": 1.544068346490754e-05, "loss": 3.0578, "step": 7139 }, { "epoch": 0.09, "grad_norm": 7.565456867218018, "learning_rate": 1.5442846328538988e-05, "loss": 2.9422, "step": 7140 }, { "epoch": 0.09, "grad_norm": 8.310769081115723, "learning_rate": 1.5445009192170434e-05, "loss": 3.0368, "step": 7141 }, { "epoch": 0.09, "grad_norm": 8.750014305114746, "learning_rate": 1.5447172055801884e-05, "loss": 2.6111, "step": 7142 }, { "epoch": 0.09, "grad_norm": 6.208998680114746, "learning_rate": 1.5449334919433333e-05, "loss": 2.1686, "step": 7143 }, { "epoch": 0.09, "grad_norm": 8.697469711303711, "learning_rate": 1.545149778306478e-05, "loss": 2.7418, "step": 7144 }, { "epoch": 0.09, "grad_norm": 6.942201614379883, "learning_rate": 1.5453660646696226e-05, "loss": 2.3927, "step": 7145 }, { "epoch": 0.09, "grad_norm": 7.336414813995361, "learning_rate": 1.5455823510327676e-05, "loss": 2.9623, "step": 7146 }, { "epoch": 0.09, "grad_norm": 7.858953475952148, "learning_rate": 1.5457986373959126e-05, "loss": 2.6771, "step": 7147 }, { "epoch": 0.09, "grad_norm": 6.604364395141602, "learning_rate": 1.5460149237590572e-05, "loss": 2.0891, "step": 7148 }, { "epoch": 0.09, "grad_norm": 7.14103889465332, "learning_rate": 1.5462312101222018e-05, "loss": 2.3733, "step": 7149 }, { "epoch": 0.09, "grad_norm": 6.369926452636719, "learning_rate": 1.5464474964853468e-05, "loss": 2.6906, "step": 7150 }, { "epoch": 0.09, "grad_norm": 7.535852432250977, "learning_rate": 1.5466637828484918e-05, "loss": 2.857, "step": 7151 }, { "epoch": 0.09, "grad_norm": 6.923208713531494, "learning_rate": 1.5468800692116364e-05, "loss": 2.6545, "step": 7152 }, { "epoch": 0.09, "grad_norm": 6.556088924407959, "learning_rate": 1.547096355574781e-05, "loss": 2.7941, "step": 7153 }, { "epoch": 0.09, "grad_norm": 6.722116470336914, "learning_rate": 1.547312641937926e-05, "loss": 2.4051, "step": 7154 }, { "epoch": 0.09, "grad_norm": 7.4811110496521, "learning_rate": 1.5475289283010707e-05, "loss": 2.2671, "step": 7155 }, { "epoch": 0.09, "grad_norm": 6.964934349060059, "learning_rate": 1.5477452146642156e-05, "loss": 2.2036, "step": 7156 }, { "epoch": 0.09, "grad_norm": 6.47849702835083, "learning_rate": 1.5479615010273603e-05, "loss": 2.5377, "step": 7157 }, { "epoch": 0.09, "grad_norm": 6.835880756378174, "learning_rate": 1.548177787390505e-05, "loss": 2.1666, "step": 7158 }, { "epoch": 0.09, "grad_norm": 6.927553176879883, "learning_rate": 1.54839407375365e-05, "loss": 2.6024, "step": 7159 }, { "epoch": 0.09, "grad_norm": 6.986194133758545, "learning_rate": 1.548610360116795e-05, "loss": 3.1116, "step": 7160 }, { "epoch": 0.09, "grad_norm": 8.065901756286621, "learning_rate": 1.5488266464799395e-05, "loss": 2.8732, "step": 7161 }, { "epoch": 0.09, "grad_norm": 7.354072093963623, "learning_rate": 1.549042932843084e-05, "loss": 2.839, "step": 7162 }, { "epoch": 0.09, "grad_norm": 7.356887340545654, "learning_rate": 1.549259219206229e-05, "loss": 2.6817, "step": 7163 }, { "epoch": 0.09, "grad_norm": 6.784029960632324, "learning_rate": 1.549475505569374e-05, "loss": 2.9676, "step": 7164 }, { "epoch": 0.09, "grad_norm": 7.890761852264404, "learning_rate": 1.5496917919325187e-05, "loss": 2.5544, "step": 7165 }, { "epoch": 0.09, "grad_norm": 7.800734519958496, "learning_rate": 1.5499080782956633e-05, "loss": 2.6689, "step": 7166 }, { "epoch": 0.09, "grad_norm": 8.842405319213867, "learning_rate": 1.5501243646588083e-05, "loss": 2.9563, "step": 7167 }, { "epoch": 0.09, "grad_norm": 7.244657516479492, "learning_rate": 1.5503406510219533e-05, "loss": 2.8193, "step": 7168 }, { "epoch": 0.09, "grad_norm": 6.608859062194824, "learning_rate": 1.550556937385098e-05, "loss": 2.1244, "step": 7169 }, { "epoch": 0.09, "grad_norm": 9.77320671081543, "learning_rate": 1.550773223748243e-05, "loss": 2.3172, "step": 7170 }, { "epoch": 0.09, "grad_norm": 7.973962783813477, "learning_rate": 1.5509895101113875e-05, "loss": 2.3965, "step": 7171 }, { "epoch": 0.09, "grad_norm": 9.072723388671875, "learning_rate": 1.5512057964745325e-05, "loss": 2.9807, "step": 7172 }, { "epoch": 0.09, "grad_norm": 7.861578464508057, "learning_rate": 1.551422082837677e-05, "loss": 2.8791, "step": 7173 }, { "epoch": 0.09, "grad_norm": 7.00758695602417, "learning_rate": 1.551638369200822e-05, "loss": 2.7461, "step": 7174 }, { "epoch": 0.09, "grad_norm": 7.747107028961182, "learning_rate": 1.5518546555639668e-05, "loss": 2.9036, "step": 7175 }, { "epoch": 0.09, "grad_norm": 7.740309715270996, "learning_rate": 1.5520709419271117e-05, "loss": 2.4178, "step": 7176 }, { "epoch": 0.09, "grad_norm": 7.233668327331543, "learning_rate": 1.5522872282902564e-05, "loss": 2.4208, "step": 7177 }, { "epoch": 0.09, "grad_norm": 7.221747398376465, "learning_rate": 1.5525035146534013e-05, "loss": 2.329, "step": 7178 }, { "epoch": 0.09, "grad_norm": 7.797089099884033, "learning_rate": 1.552719801016546e-05, "loss": 2.7262, "step": 7179 }, { "epoch": 0.09, "grad_norm": 7.526884078979492, "learning_rate": 1.552936087379691e-05, "loss": 2.818, "step": 7180 }, { "epoch": 0.09, "grad_norm": 8.752204895019531, "learning_rate": 1.5531523737428356e-05, "loss": 2.6865, "step": 7181 }, { "epoch": 0.09, "grad_norm": 7.08292818069458, "learning_rate": 1.5533686601059806e-05, "loss": 2.845, "step": 7182 }, { "epoch": 0.09, "grad_norm": 7.3219499588012695, "learning_rate": 1.5535849464691252e-05, "loss": 2.6327, "step": 7183 }, { "epoch": 0.09, "grad_norm": 7.528319358825684, "learning_rate": 1.5538012328322702e-05, "loss": 2.6219, "step": 7184 }, { "epoch": 0.09, "grad_norm": 6.40444803237915, "learning_rate": 1.5540175191954148e-05, "loss": 2.7754, "step": 7185 }, { "epoch": 0.09, "grad_norm": 7.2635498046875, "learning_rate": 1.5542338055585598e-05, "loss": 2.6346, "step": 7186 }, { "epoch": 0.09, "grad_norm": 7.766899108886719, "learning_rate": 1.5544500919217044e-05, "loss": 2.6437, "step": 7187 }, { "epoch": 0.09, "grad_norm": 6.841360092163086, "learning_rate": 1.5546663782848494e-05, "loss": 2.2083, "step": 7188 }, { "epoch": 0.09, "grad_norm": 6.457056522369385, "learning_rate": 1.554882664647994e-05, "loss": 2.2964, "step": 7189 }, { "epoch": 0.09, "grad_norm": 6.574488639831543, "learning_rate": 1.555098951011139e-05, "loss": 2.5651, "step": 7190 }, { "epoch": 0.09, "grad_norm": 7.582051753997803, "learning_rate": 1.5553152373742836e-05, "loss": 3.1828, "step": 7191 }, { "epoch": 0.09, "grad_norm": 6.689086437225342, "learning_rate": 1.5555315237374283e-05, "loss": 2.1721, "step": 7192 }, { "epoch": 0.09, "grad_norm": 7.88723611831665, "learning_rate": 1.5557478101005732e-05, "loss": 2.5208, "step": 7193 }, { "epoch": 0.09, "grad_norm": 6.928804397583008, "learning_rate": 1.5559640964637182e-05, "loss": 2.5663, "step": 7194 }, { "epoch": 0.09, "grad_norm": 6.9570136070251465, "learning_rate": 1.556180382826863e-05, "loss": 2.8733, "step": 7195 }, { "epoch": 0.09, "grad_norm": 6.960015296936035, "learning_rate": 1.5563966691900075e-05, "loss": 2.2382, "step": 7196 }, { "epoch": 0.09, "grad_norm": 7.21915340423584, "learning_rate": 1.5566129555531525e-05, "loss": 2.5179, "step": 7197 }, { "epoch": 0.09, "grad_norm": 6.702395915985107, "learning_rate": 1.5568292419162974e-05, "loss": 2.6753, "step": 7198 }, { "epoch": 0.09, "grad_norm": 7.1221022605896, "learning_rate": 1.557045528279442e-05, "loss": 2.8609, "step": 7199 }, { "epoch": 0.09, "grad_norm": 7.832160472869873, "learning_rate": 1.5572618146425867e-05, "loss": 2.4978, "step": 7200 }, { "epoch": 0.09, "grad_norm": 8.015305519104004, "learning_rate": 1.5574781010057317e-05, "loss": 2.5063, "step": 7201 }, { "epoch": 0.09, "grad_norm": 7.948428153991699, "learning_rate": 1.5576943873688767e-05, "loss": 2.7376, "step": 7202 }, { "epoch": 0.09, "grad_norm": 7.394571781158447, "learning_rate": 1.5579106737320213e-05, "loss": 2.6551, "step": 7203 }, { "epoch": 0.09, "grad_norm": 7.466315269470215, "learning_rate": 1.558126960095166e-05, "loss": 2.2804, "step": 7204 }, { "epoch": 0.09, "grad_norm": 6.428966999053955, "learning_rate": 1.558343246458311e-05, "loss": 2.3198, "step": 7205 }, { "epoch": 0.09, "grad_norm": 6.921388149261475, "learning_rate": 1.558559532821456e-05, "loss": 2.1832, "step": 7206 }, { "epoch": 0.09, "grad_norm": 6.676187515258789, "learning_rate": 1.5587758191846005e-05, "loss": 2.2868, "step": 7207 }, { "epoch": 0.09, "grad_norm": 6.9858832359313965, "learning_rate": 1.558992105547745e-05, "loss": 2.3892, "step": 7208 }, { "epoch": 0.09, "grad_norm": 7.700855255126953, "learning_rate": 1.55920839191089e-05, "loss": 2.8529, "step": 7209 }, { "epoch": 0.09, "grad_norm": 8.66484546661377, "learning_rate": 1.559424678274035e-05, "loss": 2.7606, "step": 7210 }, { "epoch": 0.09, "grad_norm": 7.236019611358643, "learning_rate": 1.5596409646371797e-05, "loss": 2.6271, "step": 7211 }, { "epoch": 0.09, "grad_norm": 6.235623836517334, "learning_rate": 1.5598572510003244e-05, "loss": 2.5679, "step": 7212 }, { "epoch": 0.09, "grad_norm": 6.958927631378174, "learning_rate": 1.5600735373634694e-05, "loss": 2.3272, "step": 7213 }, { "epoch": 0.09, "grad_norm": 7.951030731201172, "learning_rate": 1.5602898237266143e-05, "loss": 3.0214, "step": 7214 }, { "epoch": 0.09, "grad_norm": 7.250650882720947, "learning_rate": 1.560506110089759e-05, "loss": 2.6245, "step": 7215 }, { "epoch": 0.09, "grad_norm": 6.793599605560303, "learning_rate": 1.5607223964529036e-05, "loss": 2.6164, "step": 7216 }, { "epoch": 0.09, "grad_norm": 7.404141902923584, "learning_rate": 1.5609386828160486e-05, "loss": 2.3687, "step": 7217 }, { "epoch": 0.09, "grad_norm": 6.623730659484863, "learning_rate": 1.5611549691791935e-05, "loss": 2.3665, "step": 7218 }, { "epoch": 0.09, "grad_norm": 7.587255477905273, "learning_rate": 1.5613712555423382e-05, "loss": 2.8759, "step": 7219 }, { "epoch": 0.09, "grad_norm": 6.9916672706604, "learning_rate": 1.5615875419054828e-05, "loss": 2.363, "step": 7220 }, { "epoch": 0.09, "grad_norm": 6.7013726234436035, "learning_rate": 1.5618038282686278e-05, "loss": 2.4809, "step": 7221 }, { "epoch": 0.09, "grad_norm": 7.09263277053833, "learning_rate": 1.5620201146317728e-05, "loss": 2.5636, "step": 7222 }, { "epoch": 0.09, "grad_norm": 6.235314846038818, "learning_rate": 1.5622364009949174e-05, "loss": 2.4155, "step": 7223 }, { "epoch": 0.09, "grad_norm": 6.91861629486084, "learning_rate": 1.562452687358062e-05, "loss": 2.682, "step": 7224 }, { "epoch": 0.09, "grad_norm": 6.799290657043457, "learning_rate": 1.562668973721207e-05, "loss": 2.5457, "step": 7225 }, { "epoch": 0.09, "grad_norm": 7.049903869628906, "learning_rate": 1.562885260084352e-05, "loss": 2.5954, "step": 7226 }, { "epoch": 0.09, "grad_norm": 7.472658634185791, "learning_rate": 1.5631015464474966e-05, "loss": 2.6146, "step": 7227 }, { "epoch": 0.09, "grad_norm": 6.4462504386901855, "learning_rate": 1.5633178328106413e-05, "loss": 2.387, "step": 7228 }, { "epoch": 0.09, "grad_norm": 6.9067254066467285, "learning_rate": 1.5635341191737862e-05, "loss": 2.3474, "step": 7229 }, { "epoch": 0.09, "grad_norm": 6.634068489074707, "learning_rate": 1.563750405536931e-05, "loss": 3.1756, "step": 7230 }, { "epoch": 0.09, "grad_norm": 8.154739379882812, "learning_rate": 1.563966691900076e-05, "loss": 3.1071, "step": 7231 }, { "epoch": 0.09, "grad_norm": 7.5427045822143555, "learning_rate": 1.5641829782632208e-05, "loss": 2.6654, "step": 7232 }, { "epoch": 0.09, "grad_norm": 7.408502578735352, "learning_rate": 1.5643992646263655e-05, "loss": 2.7853, "step": 7233 }, { "epoch": 0.09, "grad_norm": 7.713600158691406, "learning_rate": 1.56461555098951e-05, "loss": 3.1207, "step": 7234 }, { "epoch": 0.09, "grad_norm": 7.086551189422607, "learning_rate": 1.564831837352655e-05, "loss": 2.0846, "step": 7235 }, { "epoch": 0.09, "grad_norm": 6.209238529205322, "learning_rate": 1.5650481237158e-05, "loss": 2.4666, "step": 7236 }, { "epoch": 0.09, "grad_norm": 7.242464065551758, "learning_rate": 1.5652644100789447e-05, "loss": 2.6456, "step": 7237 }, { "epoch": 0.09, "grad_norm": 7.833320617675781, "learning_rate": 1.5654806964420893e-05, "loss": 2.7365, "step": 7238 }, { "epoch": 0.09, "grad_norm": 6.333113193511963, "learning_rate": 1.5656969828052343e-05, "loss": 2.4699, "step": 7239 }, { "epoch": 0.09, "grad_norm": 7.174895763397217, "learning_rate": 1.5659132691683793e-05, "loss": 1.8849, "step": 7240 }, { "epoch": 0.09, "grad_norm": 7.076484680175781, "learning_rate": 1.566129555531524e-05, "loss": 2.9334, "step": 7241 }, { "epoch": 0.09, "grad_norm": 7.164216041564941, "learning_rate": 1.5663458418946685e-05, "loss": 2.8641, "step": 7242 }, { "epoch": 0.09, "grad_norm": 7.235805511474609, "learning_rate": 1.5665621282578135e-05, "loss": 2.7191, "step": 7243 }, { "epoch": 0.09, "grad_norm": 6.823815822601318, "learning_rate": 1.5667784146209585e-05, "loss": 2.6233, "step": 7244 }, { "epoch": 0.09, "grad_norm": 6.958104610443115, "learning_rate": 1.566994700984103e-05, "loss": 2.6077, "step": 7245 }, { "epoch": 0.09, "grad_norm": 7.181694984436035, "learning_rate": 1.5672109873472477e-05, "loss": 2.8361, "step": 7246 }, { "epoch": 0.09, "grad_norm": 6.556369304656982, "learning_rate": 1.5674272737103927e-05, "loss": 2.7435, "step": 7247 }, { "epoch": 0.09, "grad_norm": 6.977806568145752, "learning_rate": 1.5676435600735377e-05, "loss": 2.6058, "step": 7248 }, { "epoch": 0.09, "grad_norm": 7.387730598449707, "learning_rate": 1.5678598464366823e-05, "loss": 2.5722, "step": 7249 }, { "epoch": 0.09, "grad_norm": 6.504360675811768, "learning_rate": 1.568076132799827e-05, "loss": 2.8604, "step": 7250 }, { "epoch": 0.09, "grad_norm": 6.580363750457764, "learning_rate": 1.568292419162972e-05, "loss": 1.8589, "step": 7251 }, { "epoch": 0.09, "grad_norm": 7.378255367279053, "learning_rate": 1.568508705526117e-05, "loss": 2.316, "step": 7252 }, { "epoch": 0.09, "grad_norm": 7.073019504547119, "learning_rate": 1.5687249918892616e-05, "loss": 2.2457, "step": 7253 }, { "epoch": 0.09, "grad_norm": 7.134130477905273, "learning_rate": 1.5689412782524062e-05, "loss": 2.6028, "step": 7254 }, { "epoch": 0.09, "grad_norm": 6.892433166503906, "learning_rate": 1.569157564615551e-05, "loss": 2.5592, "step": 7255 }, { "epoch": 0.09, "grad_norm": 8.211385726928711, "learning_rate": 1.569373850978696e-05, "loss": 2.5498, "step": 7256 }, { "epoch": 0.09, "grad_norm": 7.489207744598389, "learning_rate": 1.5695901373418408e-05, "loss": 2.9442, "step": 7257 }, { "epoch": 0.09, "grad_norm": 6.366893768310547, "learning_rate": 1.5698064237049854e-05, "loss": 2.2212, "step": 7258 }, { "epoch": 0.09, "grad_norm": 7.315288066864014, "learning_rate": 1.5700227100681304e-05, "loss": 2.7944, "step": 7259 }, { "epoch": 0.09, "grad_norm": 7.002262592315674, "learning_rate": 1.5702389964312754e-05, "loss": 2.4612, "step": 7260 }, { "epoch": 0.09, "grad_norm": 6.769563674926758, "learning_rate": 1.57045528279442e-05, "loss": 2.3353, "step": 7261 }, { "epoch": 0.09, "grad_norm": 7.248333930969238, "learning_rate": 1.5706715691575646e-05, "loss": 2.3848, "step": 7262 }, { "epoch": 0.09, "grad_norm": 7.208527565002441, "learning_rate": 1.5708878555207096e-05, "loss": 2.2865, "step": 7263 }, { "epoch": 0.09, "grad_norm": 7.034058570861816, "learning_rate": 1.5711041418838542e-05, "loss": 2.6638, "step": 7264 }, { "epoch": 0.09, "grad_norm": 7.680915832519531, "learning_rate": 1.5713204282469992e-05, "loss": 2.5502, "step": 7265 }, { "epoch": 0.09, "grad_norm": 5.613720417022705, "learning_rate": 1.571536714610144e-05, "loss": 2.2013, "step": 7266 }, { "epoch": 0.09, "grad_norm": 6.877796649932861, "learning_rate": 1.5717530009732888e-05, "loss": 3.1058, "step": 7267 }, { "epoch": 0.09, "grad_norm": 6.549979209899902, "learning_rate": 1.5719692873364335e-05, "loss": 2.3554, "step": 7268 }, { "epoch": 0.09, "grad_norm": 7.124426364898682, "learning_rate": 1.5721855736995784e-05, "loss": 2.5673, "step": 7269 }, { "epoch": 0.09, "grad_norm": 6.362118721008301, "learning_rate": 1.572401860062723e-05, "loss": 2.585, "step": 7270 }, { "epoch": 0.09, "grad_norm": 6.645132064819336, "learning_rate": 1.572618146425868e-05, "loss": 2.6607, "step": 7271 }, { "epoch": 0.09, "grad_norm": 6.093810081481934, "learning_rate": 1.5728344327890127e-05, "loss": 2.2157, "step": 7272 }, { "epoch": 0.09, "grad_norm": 8.07824420928955, "learning_rate": 1.5730507191521577e-05, "loss": 2.8715, "step": 7273 }, { "epoch": 0.09, "grad_norm": 6.882369518280029, "learning_rate": 1.5732670055153023e-05, "loss": 2.6487, "step": 7274 }, { "epoch": 0.09, "grad_norm": 6.672093868255615, "learning_rate": 1.5734832918784473e-05, "loss": 3.0478, "step": 7275 }, { "epoch": 0.09, "grad_norm": 7.629555702209473, "learning_rate": 1.573699578241592e-05, "loss": 2.6569, "step": 7276 }, { "epoch": 0.09, "grad_norm": 7.125749588012695, "learning_rate": 1.573915864604737e-05, "loss": 2.658, "step": 7277 }, { "epoch": 0.09, "grad_norm": 7.872119426727295, "learning_rate": 1.5741321509678815e-05, "loss": 2.9041, "step": 7278 }, { "epoch": 0.09, "grad_norm": 7.0341291427612305, "learning_rate": 1.5743484373310265e-05, "loss": 2.9152, "step": 7279 }, { "epoch": 0.09, "grad_norm": 6.835422039031982, "learning_rate": 1.574564723694171e-05, "loss": 2.4668, "step": 7280 }, { "epoch": 0.09, "grad_norm": 7.286705493927002, "learning_rate": 1.574781010057316e-05, "loss": 2.7814, "step": 7281 }, { "epoch": 0.09, "grad_norm": 7.080769062042236, "learning_rate": 1.5749972964204607e-05, "loss": 2.5031, "step": 7282 }, { "epoch": 0.09, "grad_norm": 7.543501377105713, "learning_rate": 1.5752135827836057e-05, "loss": 2.7694, "step": 7283 }, { "epoch": 0.09, "grad_norm": 6.976400852203369, "learning_rate": 1.5754298691467503e-05, "loss": 2.3661, "step": 7284 }, { "epoch": 0.09, "grad_norm": 7.008222579956055, "learning_rate": 1.5756461555098953e-05, "loss": 2.8834, "step": 7285 }, { "epoch": 0.09, "grad_norm": 6.3387770652771, "learning_rate": 1.57586244187304e-05, "loss": 2.5309, "step": 7286 }, { "epoch": 0.09, "grad_norm": 6.5112762451171875, "learning_rate": 1.576078728236185e-05, "loss": 2.6357, "step": 7287 }, { "epoch": 0.09, "grad_norm": 7.098365306854248, "learning_rate": 1.5762950145993296e-05, "loss": 2.3402, "step": 7288 }, { "epoch": 0.09, "grad_norm": 6.637659549713135, "learning_rate": 1.5765113009624745e-05, "loss": 2.7203, "step": 7289 }, { "epoch": 0.09, "grad_norm": 6.2779669761657715, "learning_rate": 1.5767275873256192e-05, "loss": 2.5015, "step": 7290 }, { "epoch": 0.09, "grad_norm": 6.856103897094727, "learning_rate": 1.576943873688764e-05, "loss": 2.8986, "step": 7291 }, { "epoch": 0.09, "grad_norm": 6.41923713684082, "learning_rate": 1.5771601600519088e-05, "loss": 2.0553, "step": 7292 }, { "epoch": 0.09, "grad_norm": 7.404824733734131, "learning_rate": 1.5773764464150538e-05, "loss": 3.067, "step": 7293 }, { "epoch": 0.09, "grad_norm": 6.583346366882324, "learning_rate": 1.5775927327781984e-05, "loss": 2.3519, "step": 7294 }, { "epoch": 0.09, "grad_norm": 7.425209045410156, "learning_rate": 1.5778090191413434e-05, "loss": 2.7729, "step": 7295 }, { "epoch": 0.09, "grad_norm": 6.84530782699585, "learning_rate": 1.578025305504488e-05, "loss": 2.4782, "step": 7296 }, { "epoch": 0.09, "grad_norm": 7.1716179847717285, "learning_rate": 1.578241591867633e-05, "loss": 2.4132, "step": 7297 }, { "epoch": 0.09, "grad_norm": 6.627607345581055, "learning_rate": 1.578457878230778e-05, "loss": 2.2872, "step": 7298 }, { "epoch": 0.09, "grad_norm": 7.231386184692383, "learning_rate": 1.5786741645939226e-05, "loss": 2.5105, "step": 7299 }, { "epoch": 0.09, "grad_norm": 7.0793657302856445, "learning_rate": 1.5788904509570672e-05, "loss": 2.715, "step": 7300 }, { "epoch": 0.09, "grad_norm": 6.120035648345947, "learning_rate": 1.579106737320212e-05, "loss": 2.5871, "step": 7301 }, { "epoch": 0.09, "grad_norm": 7.916081428527832, "learning_rate": 1.579323023683357e-05, "loss": 2.9719, "step": 7302 }, { "epoch": 0.09, "grad_norm": 6.134711742401123, "learning_rate": 1.5795393100465018e-05, "loss": 2.4683, "step": 7303 }, { "epoch": 0.09, "grad_norm": 6.998986721038818, "learning_rate": 1.5797555964096464e-05, "loss": 2.8656, "step": 7304 }, { "epoch": 0.09, "grad_norm": 7.2828240394592285, "learning_rate": 1.579971882772791e-05, "loss": 2.2274, "step": 7305 }, { "epoch": 0.09, "grad_norm": 6.524473667144775, "learning_rate": 1.580188169135936e-05, "loss": 2.4705, "step": 7306 }, { "epoch": 0.09, "grad_norm": 5.909236907958984, "learning_rate": 1.580404455499081e-05, "loss": 1.8613, "step": 7307 }, { "epoch": 0.09, "grad_norm": 7.22368049621582, "learning_rate": 1.5806207418622257e-05, "loss": 3.2581, "step": 7308 }, { "epoch": 0.09, "grad_norm": 5.764204502105713, "learning_rate": 1.5808370282253703e-05, "loss": 2.1421, "step": 7309 }, { "epoch": 0.09, "grad_norm": 7.78087043762207, "learning_rate": 1.5810533145885153e-05, "loss": 2.5648, "step": 7310 }, { "epoch": 0.09, "grad_norm": 7.08748197555542, "learning_rate": 1.5812696009516602e-05, "loss": 2.6278, "step": 7311 }, { "epoch": 0.09, "grad_norm": 7.083211421966553, "learning_rate": 1.581485887314805e-05, "loss": 2.4409, "step": 7312 }, { "epoch": 0.09, "grad_norm": 7.1735334396362305, "learning_rate": 1.5817021736779495e-05, "loss": 2.2737, "step": 7313 }, { "epoch": 0.09, "grad_norm": 6.537448406219482, "learning_rate": 1.5819184600410945e-05, "loss": 2.4524, "step": 7314 }, { "epoch": 0.09, "grad_norm": 8.430065155029297, "learning_rate": 1.5821347464042395e-05, "loss": 3.1102, "step": 7315 }, { "epoch": 0.09, "grad_norm": 6.908890247344971, "learning_rate": 1.582351032767384e-05, "loss": 2.6111, "step": 7316 }, { "epoch": 0.09, "grad_norm": 7.026583671569824, "learning_rate": 1.5825673191305287e-05, "loss": 2.4293, "step": 7317 }, { "epoch": 0.09, "grad_norm": 6.97703742980957, "learning_rate": 1.5827836054936737e-05, "loss": 2.7649, "step": 7318 }, { "epoch": 0.09, "grad_norm": 7.536894798278809, "learning_rate": 1.5829998918568187e-05, "loss": 2.8571, "step": 7319 }, { "epoch": 0.09, "grad_norm": 6.748569965362549, "learning_rate": 1.5832161782199633e-05, "loss": 2.6356, "step": 7320 }, { "epoch": 0.1, "grad_norm": 6.581500053405762, "learning_rate": 1.583432464583108e-05, "loss": 2.5847, "step": 7321 }, { "epoch": 0.1, "grad_norm": 6.8735480308532715, "learning_rate": 1.583648750946253e-05, "loss": 2.663, "step": 7322 }, { "epoch": 0.1, "grad_norm": 6.820468902587891, "learning_rate": 1.583865037309398e-05, "loss": 2.5846, "step": 7323 }, { "epoch": 0.1, "grad_norm": 7.006976127624512, "learning_rate": 1.5840813236725425e-05, "loss": 3.0297, "step": 7324 }, { "epoch": 0.1, "grad_norm": 7.151459217071533, "learning_rate": 1.5842976100356872e-05, "loss": 2.1999, "step": 7325 }, { "epoch": 0.1, "grad_norm": 6.5770111083984375, "learning_rate": 1.584513896398832e-05, "loss": 2.7493, "step": 7326 }, { "epoch": 0.1, "grad_norm": 6.430453300476074, "learning_rate": 1.584730182761977e-05, "loss": 2.5437, "step": 7327 }, { "epoch": 0.1, "grad_norm": 6.637750625610352, "learning_rate": 1.5849464691251218e-05, "loss": 2.943, "step": 7328 }, { "epoch": 0.1, "grad_norm": 6.846380710601807, "learning_rate": 1.5851627554882667e-05, "loss": 1.9623, "step": 7329 }, { "epoch": 0.1, "grad_norm": 7.00887393951416, "learning_rate": 1.5853790418514114e-05, "loss": 2.6376, "step": 7330 }, { "epoch": 0.1, "grad_norm": 7.082189559936523, "learning_rate": 1.5855953282145564e-05, "loss": 2.8615, "step": 7331 }, { "epoch": 0.1, "grad_norm": 5.9551215171813965, "learning_rate": 1.585811614577701e-05, "loss": 2.423, "step": 7332 }, { "epoch": 0.1, "grad_norm": 6.5980000495910645, "learning_rate": 1.586027900940846e-05, "loss": 2.651, "step": 7333 }, { "epoch": 0.1, "grad_norm": 7.826911449432373, "learning_rate": 1.5862441873039906e-05, "loss": 2.3873, "step": 7334 }, { "epoch": 0.1, "grad_norm": 6.634844779968262, "learning_rate": 1.5864604736671356e-05, "loss": 2.2803, "step": 7335 }, { "epoch": 0.1, "grad_norm": 7.441333293914795, "learning_rate": 1.5866767600302802e-05, "loss": 2.6523, "step": 7336 }, { "epoch": 0.1, "grad_norm": 6.397400856018066, "learning_rate": 1.5868930463934252e-05, "loss": 2.7486, "step": 7337 }, { "epoch": 0.1, "grad_norm": 7.397233486175537, "learning_rate": 1.5871093327565698e-05, "loss": 2.9312, "step": 7338 }, { "epoch": 0.1, "grad_norm": 7.143794059753418, "learning_rate": 1.5873256191197145e-05, "loss": 2.7596, "step": 7339 }, { "epoch": 0.1, "grad_norm": 6.48563814163208, "learning_rate": 1.5875419054828594e-05, "loss": 2.0805, "step": 7340 }, { "epoch": 0.1, "grad_norm": 6.5414581298828125, "learning_rate": 1.5877581918460044e-05, "loss": 2.8013, "step": 7341 }, { "epoch": 0.1, "grad_norm": 6.692915439605713, "learning_rate": 1.587974478209149e-05, "loss": 2.8755, "step": 7342 }, { "epoch": 0.1, "grad_norm": 7.261704921722412, "learning_rate": 1.5881907645722937e-05, "loss": 2.7522, "step": 7343 }, { "epoch": 0.1, "grad_norm": 7.2753190994262695, "learning_rate": 1.5884070509354386e-05, "loss": 2.4943, "step": 7344 }, { "epoch": 0.1, "grad_norm": 7.174779891967773, "learning_rate": 1.5886233372985836e-05, "loss": 2.4325, "step": 7345 }, { "epoch": 0.1, "grad_norm": 7.814793109893799, "learning_rate": 1.5888396236617283e-05, "loss": 3.3997, "step": 7346 }, { "epoch": 0.1, "grad_norm": 7.835060119628906, "learning_rate": 1.589055910024873e-05, "loss": 2.6037, "step": 7347 }, { "epoch": 0.1, "grad_norm": 7.118619441986084, "learning_rate": 1.589272196388018e-05, "loss": 2.8995, "step": 7348 }, { "epoch": 0.1, "grad_norm": 7.019162654876709, "learning_rate": 1.589488482751163e-05, "loss": 2.9698, "step": 7349 }, { "epoch": 0.1, "grad_norm": 7.135332107543945, "learning_rate": 1.5897047691143075e-05, "loss": 2.9605, "step": 7350 }, { "epoch": 0.1, "grad_norm": 7.37434196472168, "learning_rate": 1.589921055477452e-05, "loss": 2.7866, "step": 7351 }, { "epoch": 0.1, "grad_norm": 6.050037860870361, "learning_rate": 1.590137341840597e-05, "loss": 2.2429, "step": 7352 }, { "epoch": 0.1, "grad_norm": 6.867334842681885, "learning_rate": 1.590353628203742e-05, "loss": 2.4388, "step": 7353 }, { "epoch": 0.1, "grad_norm": 6.139524936676025, "learning_rate": 1.5905699145668867e-05, "loss": 2.2492, "step": 7354 }, { "epoch": 0.1, "grad_norm": 7.880849361419678, "learning_rate": 1.5907862009300313e-05, "loss": 2.5815, "step": 7355 }, { "epoch": 0.1, "grad_norm": 6.208156108856201, "learning_rate": 1.5910024872931763e-05, "loss": 2.9074, "step": 7356 }, { "epoch": 0.1, "grad_norm": 7.291664123535156, "learning_rate": 1.5912187736563213e-05, "loss": 3.1524, "step": 7357 }, { "epoch": 0.1, "grad_norm": 7.92522668838501, "learning_rate": 1.591435060019466e-05, "loss": 2.3466, "step": 7358 }, { "epoch": 0.1, "grad_norm": 5.9631218910217285, "learning_rate": 1.5916513463826106e-05, "loss": 1.9438, "step": 7359 }, { "epoch": 0.1, "grad_norm": 7.034526348114014, "learning_rate": 1.5918676327457555e-05, "loss": 2.355, "step": 7360 }, { "epoch": 0.1, "grad_norm": 7.152739524841309, "learning_rate": 1.5920839191089005e-05, "loss": 2.5907, "step": 7361 }, { "epoch": 0.1, "grad_norm": 7.1768622398376465, "learning_rate": 1.592300205472045e-05, "loss": 2.4236, "step": 7362 }, { "epoch": 0.1, "grad_norm": 6.4369025230407715, "learning_rate": 1.5925164918351898e-05, "loss": 2.5945, "step": 7363 }, { "epoch": 0.1, "grad_norm": 7.279486179351807, "learning_rate": 1.5927327781983347e-05, "loss": 2.4671, "step": 7364 }, { "epoch": 0.1, "grad_norm": 7.361173152923584, "learning_rate": 1.5929490645614797e-05, "loss": 2.744, "step": 7365 }, { "epoch": 0.1, "grad_norm": 6.846809387207031, "learning_rate": 1.5931653509246244e-05, "loss": 2.5169, "step": 7366 }, { "epoch": 0.1, "grad_norm": 6.512603282928467, "learning_rate": 1.593381637287769e-05, "loss": 2.0294, "step": 7367 }, { "epoch": 0.1, "grad_norm": 7.232084274291992, "learning_rate": 1.593597923650914e-05, "loss": 2.9058, "step": 7368 }, { "epoch": 0.1, "grad_norm": 7.3235554695129395, "learning_rate": 1.593814210014059e-05, "loss": 3.2327, "step": 7369 }, { "epoch": 0.1, "grad_norm": 6.078493595123291, "learning_rate": 1.5940304963772036e-05, "loss": 2.4349, "step": 7370 }, { "epoch": 0.1, "grad_norm": 6.978241920471191, "learning_rate": 1.5942467827403482e-05, "loss": 1.9557, "step": 7371 }, { "epoch": 0.1, "grad_norm": 7.046560764312744, "learning_rate": 1.5944630691034932e-05, "loss": 2.5055, "step": 7372 }, { "epoch": 0.1, "grad_norm": 6.190722942352295, "learning_rate": 1.5946793554666378e-05, "loss": 2.3082, "step": 7373 }, { "epoch": 0.1, "grad_norm": 7.677418231964111, "learning_rate": 1.5948956418297828e-05, "loss": 2.7949, "step": 7374 }, { "epoch": 0.1, "grad_norm": 7.058958053588867, "learning_rate": 1.5951119281929274e-05, "loss": 2.4407, "step": 7375 }, { "epoch": 0.1, "grad_norm": 5.795088291168213, "learning_rate": 1.5953282145560724e-05, "loss": 2.1268, "step": 7376 }, { "epoch": 0.1, "grad_norm": 7.928457736968994, "learning_rate": 1.595544500919217e-05, "loss": 2.8225, "step": 7377 }, { "epoch": 0.1, "grad_norm": 6.710714340209961, "learning_rate": 1.595760787282362e-05, "loss": 2.4373, "step": 7378 }, { "epoch": 0.1, "grad_norm": 6.3760175704956055, "learning_rate": 1.5959770736455067e-05, "loss": 2.5979, "step": 7379 }, { "epoch": 0.1, "grad_norm": 7.048341274261475, "learning_rate": 1.5961933600086516e-05, "loss": 2.7371, "step": 7380 }, { "epoch": 0.1, "grad_norm": 6.4261884689331055, "learning_rate": 1.5964096463717963e-05, "loss": 2.412, "step": 7381 }, { "epoch": 0.1, "grad_norm": 7.5657734870910645, "learning_rate": 1.5966259327349412e-05, "loss": 2.9809, "step": 7382 }, { "epoch": 0.1, "grad_norm": 6.889157295227051, "learning_rate": 1.596842219098086e-05, "loss": 2.2765, "step": 7383 }, { "epoch": 0.1, "grad_norm": 7.172541618347168, "learning_rate": 1.597058505461231e-05, "loss": 2.7393, "step": 7384 }, { "epoch": 0.1, "grad_norm": 6.406197547912598, "learning_rate": 1.5972747918243755e-05, "loss": 2.0566, "step": 7385 }, { "epoch": 0.1, "grad_norm": 7.272675514221191, "learning_rate": 1.5974910781875205e-05, "loss": 2.2247, "step": 7386 }, { "epoch": 0.1, "grad_norm": 7.020864963531494, "learning_rate": 1.597707364550665e-05, "loss": 2.7791, "step": 7387 }, { "epoch": 0.1, "grad_norm": 6.498499870300293, "learning_rate": 1.59792365091381e-05, "loss": 2.5244, "step": 7388 }, { "epoch": 0.1, "grad_norm": 6.536552429199219, "learning_rate": 1.5981399372769547e-05, "loss": 1.8474, "step": 7389 }, { "epoch": 0.1, "grad_norm": 6.027113437652588, "learning_rate": 1.5983562236400997e-05, "loss": 2.4656, "step": 7390 }, { "epoch": 0.1, "grad_norm": 7.825985431671143, "learning_rate": 1.5985725100032443e-05, "loss": 2.5301, "step": 7391 }, { "epoch": 0.1, "grad_norm": 6.099410057067871, "learning_rate": 1.5987887963663893e-05, "loss": 2.7506, "step": 7392 }, { "epoch": 0.1, "grad_norm": 6.383248805999756, "learning_rate": 1.599005082729534e-05, "loss": 2.7361, "step": 7393 }, { "epoch": 0.1, "grad_norm": 7.447007179260254, "learning_rate": 1.599221369092679e-05, "loss": 2.8497, "step": 7394 }, { "epoch": 0.1, "grad_norm": 7.459449291229248, "learning_rate": 1.599437655455824e-05, "loss": 2.7974, "step": 7395 }, { "epoch": 0.1, "grad_norm": 6.654777526855469, "learning_rate": 1.5996539418189685e-05, "loss": 2.748, "step": 7396 }, { "epoch": 0.1, "grad_norm": 6.6292724609375, "learning_rate": 1.599870228182113e-05, "loss": 2.5648, "step": 7397 }, { "epoch": 0.1, "grad_norm": 6.700908660888672, "learning_rate": 1.600086514545258e-05, "loss": 2.0838, "step": 7398 }, { "epoch": 0.1, "grad_norm": 7.110265731811523, "learning_rate": 1.600302800908403e-05, "loss": 2.6854, "step": 7399 }, { "epoch": 0.1, "grad_norm": 6.064968109130859, "learning_rate": 1.6005190872715477e-05, "loss": 2.0863, "step": 7400 }, { "epoch": 0.1, "grad_norm": 6.708709239959717, "learning_rate": 1.6007353736346924e-05, "loss": 2.6058, "step": 7401 }, { "epoch": 0.1, "grad_norm": 7.0682053565979, "learning_rate": 1.6009516599978373e-05, "loss": 2.4751, "step": 7402 }, { "epoch": 0.1, "grad_norm": 6.771344184875488, "learning_rate": 1.6011679463609823e-05, "loss": 2.0606, "step": 7403 }, { "epoch": 0.1, "grad_norm": 6.990740776062012, "learning_rate": 1.601384232724127e-05, "loss": 2.3746, "step": 7404 }, { "epoch": 0.1, "grad_norm": 6.599619388580322, "learning_rate": 1.6016005190872716e-05, "loss": 2.5015, "step": 7405 }, { "epoch": 0.1, "grad_norm": 6.395177364349365, "learning_rate": 1.6018168054504166e-05, "loss": 2.79, "step": 7406 }, { "epoch": 0.1, "grad_norm": 6.839558124542236, "learning_rate": 1.6020330918135615e-05, "loss": 2.6434, "step": 7407 }, { "epoch": 0.1, "grad_norm": 6.480340480804443, "learning_rate": 1.6022493781767062e-05, "loss": 2.4269, "step": 7408 }, { "epoch": 0.1, "grad_norm": 6.919928073883057, "learning_rate": 1.6024656645398508e-05, "loss": 2.7991, "step": 7409 }, { "epoch": 0.1, "grad_norm": 7.774156093597412, "learning_rate": 1.6026819509029954e-05, "loss": 2.558, "step": 7410 }, { "epoch": 0.1, "grad_norm": 7.736296653747559, "learning_rate": 1.6028982372661404e-05, "loss": 2.7993, "step": 7411 }, { "epoch": 0.1, "grad_norm": 7.616169452667236, "learning_rate": 1.6031145236292854e-05, "loss": 2.633, "step": 7412 }, { "epoch": 0.1, "grad_norm": 7.906099796295166, "learning_rate": 1.60333080999243e-05, "loss": 3.1725, "step": 7413 }, { "epoch": 0.1, "grad_norm": 6.653243064880371, "learning_rate": 1.6035470963555747e-05, "loss": 2.635, "step": 7414 }, { "epoch": 0.1, "grad_norm": 7.540909767150879, "learning_rate": 1.6037633827187196e-05, "loss": 3.1491, "step": 7415 }, { "epoch": 0.1, "grad_norm": 6.685023307800293, "learning_rate": 1.6039796690818646e-05, "loss": 2.3692, "step": 7416 }, { "epoch": 0.1, "grad_norm": 7.352789402008057, "learning_rate": 1.6041959554450092e-05, "loss": 3.0991, "step": 7417 }, { "epoch": 0.1, "grad_norm": 7.3301167488098145, "learning_rate": 1.604412241808154e-05, "loss": 2.091, "step": 7418 }, { "epoch": 0.1, "grad_norm": 7.182517051696777, "learning_rate": 1.604628528171299e-05, "loss": 2.9451, "step": 7419 }, { "epoch": 0.1, "grad_norm": 7.709661960601807, "learning_rate": 1.604844814534444e-05, "loss": 2.3441, "step": 7420 }, { "epoch": 0.1, "grad_norm": 8.01426887512207, "learning_rate": 1.6050611008975885e-05, "loss": 2.7145, "step": 7421 }, { "epoch": 0.1, "grad_norm": 9.626663208007812, "learning_rate": 1.605277387260733e-05, "loss": 3.3633, "step": 7422 }, { "epoch": 0.1, "grad_norm": 7.919289588928223, "learning_rate": 1.605493673623878e-05, "loss": 2.7676, "step": 7423 }, { "epoch": 0.1, "grad_norm": 6.929496765136719, "learning_rate": 1.605709959987023e-05, "loss": 2.3487, "step": 7424 }, { "epoch": 0.1, "grad_norm": 6.666438579559326, "learning_rate": 1.6059262463501677e-05, "loss": 2.4465, "step": 7425 }, { "epoch": 0.1, "grad_norm": 8.4421968460083, "learning_rate": 1.6061425327133127e-05, "loss": 2.6452, "step": 7426 }, { "epoch": 0.1, "grad_norm": 6.963001728057861, "learning_rate": 1.6063588190764573e-05, "loss": 3.1637, "step": 7427 }, { "epoch": 0.1, "grad_norm": 7.506794452667236, "learning_rate": 1.6065751054396023e-05, "loss": 2.0681, "step": 7428 }, { "epoch": 0.1, "grad_norm": 6.972014427185059, "learning_rate": 1.606791391802747e-05, "loss": 2.576, "step": 7429 }, { "epoch": 0.1, "grad_norm": 6.223151683807373, "learning_rate": 1.607007678165892e-05, "loss": 2.0019, "step": 7430 }, { "epoch": 0.1, "grad_norm": 6.619872570037842, "learning_rate": 1.6072239645290365e-05, "loss": 2.4529, "step": 7431 }, { "epoch": 0.1, "grad_norm": 6.4034576416015625, "learning_rate": 1.6074402508921815e-05, "loss": 2.4333, "step": 7432 }, { "epoch": 0.1, "grad_norm": 7.142964839935303, "learning_rate": 1.607656537255326e-05, "loss": 2.6786, "step": 7433 }, { "epoch": 0.1, "grad_norm": 6.291795253753662, "learning_rate": 1.607872823618471e-05, "loss": 2.7648, "step": 7434 }, { "epoch": 0.1, "grad_norm": 6.798057556152344, "learning_rate": 1.6080891099816157e-05, "loss": 2.5479, "step": 7435 }, { "epoch": 0.1, "grad_norm": 6.18123197555542, "learning_rate": 1.6083053963447607e-05, "loss": 2.406, "step": 7436 }, { "epoch": 0.1, "grad_norm": 7.2729597091674805, "learning_rate": 1.6085216827079053e-05, "loss": 2.7169, "step": 7437 }, { "epoch": 0.1, "grad_norm": 7.061403274536133, "learning_rate": 1.6087379690710503e-05, "loss": 2.4385, "step": 7438 }, { "epoch": 0.1, "grad_norm": 7.212247848510742, "learning_rate": 1.608954255434195e-05, "loss": 2.4118, "step": 7439 }, { "epoch": 0.1, "grad_norm": 6.821460247039795, "learning_rate": 1.60917054179734e-05, "loss": 2.6052, "step": 7440 }, { "epoch": 0.1, "grad_norm": 7.243228912353516, "learning_rate": 1.6093868281604846e-05, "loss": 2.2493, "step": 7441 }, { "epoch": 0.1, "grad_norm": 6.580670356750488, "learning_rate": 1.6096031145236295e-05, "loss": 2.714, "step": 7442 }, { "epoch": 0.1, "grad_norm": 7.6357951164245605, "learning_rate": 1.6098194008867742e-05, "loss": 2.3973, "step": 7443 }, { "epoch": 0.1, "grad_norm": 7.254674434661865, "learning_rate": 1.610035687249919e-05, "loss": 2.4423, "step": 7444 }, { "epoch": 0.1, "grad_norm": 6.518571376800537, "learning_rate": 1.6102519736130638e-05, "loss": 2.8409, "step": 7445 }, { "epoch": 0.1, "grad_norm": 6.537463188171387, "learning_rate": 1.6104682599762088e-05, "loss": 2.6015, "step": 7446 }, { "epoch": 0.1, "grad_norm": 7.124011039733887, "learning_rate": 1.6106845463393534e-05, "loss": 2.4744, "step": 7447 }, { "epoch": 0.1, "grad_norm": 6.458573341369629, "learning_rate": 1.610900832702498e-05, "loss": 2.7756, "step": 7448 }, { "epoch": 0.1, "grad_norm": 6.541853427886963, "learning_rate": 1.611117119065643e-05, "loss": 2.3747, "step": 7449 }, { "epoch": 0.1, "grad_norm": 6.123569488525391, "learning_rate": 1.611333405428788e-05, "loss": 2.7874, "step": 7450 }, { "epoch": 0.1, "grad_norm": 6.404360771179199, "learning_rate": 1.6115496917919326e-05, "loss": 2.4285, "step": 7451 }, { "epoch": 0.1, "grad_norm": 7.376855850219727, "learning_rate": 1.6117659781550773e-05, "loss": 2.8669, "step": 7452 }, { "epoch": 0.1, "grad_norm": 6.917396068572998, "learning_rate": 1.6119822645182222e-05, "loss": 2.4155, "step": 7453 }, { "epoch": 0.1, "grad_norm": 6.340023517608643, "learning_rate": 1.6121985508813672e-05, "loss": 2.8455, "step": 7454 }, { "epoch": 0.1, "grad_norm": 6.134924411773682, "learning_rate": 1.612414837244512e-05, "loss": 2.1533, "step": 7455 }, { "epoch": 0.1, "grad_norm": 7.2203593254089355, "learning_rate": 1.6126311236076565e-05, "loss": 2.7759, "step": 7456 }, { "epoch": 0.1, "grad_norm": 7.042374610900879, "learning_rate": 1.6128474099708015e-05, "loss": 2.7823, "step": 7457 }, { "epoch": 0.1, "grad_norm": 6.547272682189941, "learning_rate": 1.6130636963339464e-05, "loss": 2.5058, "step": 7458 }, { "epoch": 0.1, "grad_norm": 6.487916469573975, "learning_rate": 1.613279982697091e-05, "loss": 2.852, "step": 7459 }, { "epoch": 0.1, "grad_norm": 7.006087303161621, "learning_rate": 1.6134962690602357e-05, "loss": 2.8566, "step": 7460 }, { "epoch": 0.1, "grad_norm": 6.449941158294678, "learning_rate": 1.6137125554233807e-05, "loss": 2.3179, "step": 7461 }, { "epoch": 0.1, "grad_norm": 5.5611724853515625, "learning_rate": 1.6139288417865256e-05, "loss": 2.0613, "step": 7462 }, { "epoch": 0.1, "grad_norm": 6.682674884796143, "learning_rate": 1.6141451281496703e-05, "loss": 2.5572, "step": 7463 }, { "epoch": 0.1, "grad_norm": 7.552018165588379, "learning_rate": 1.614361414512815e-05, "loss": 2.6577, "step": 7464 }, { "epoch": 0.1, "grad_norm": 6.36165714263916, "learning_rate": 1.61457770087596e-05, "loss": 2.2285, "step": 7465 }, { "epoch": 0.1, "grad_norm": 7.163812637329102, "learning_rate": 1.614793987239105e-05, "loss": 2.4542, "step": 7466 }, { "epoch": 0.1, "grad_norm": 5.9993133544921875, "learning_rate": 1.6150102736022495e-05, "loss": 2.4123, "step": 7467 }, { "epoch": 0.1, "grad_norm": 5.7968363761901855, "learning_rate": 1.615226559965394e-05, "loss": 2.3292, "step": 7468 }, { "epoch": 0.1, "grad_norm": 7.479952335357666, "learning_rate": 1.615442846328539e-05, "loss": 2.9437, "step": 7469 }, { "epoch": 0.1, "grad_norm": 6.352912425994873, "learning_rate": 1.615659132691684e-05, "loss": 2.3877, "step": 7470 }, { "epoch": 0.1, "grad_norm": 6.946023941040039, "learning_rate": 1.6158754190548287e-05, "loss": 2.8818, "step": 7471 }, { "epoch": 0.1, "grad_norm": 6.073676586151123, "learning_rate": 1.6160917054179734e-05, "loss": 2.4845, "step": 7472 }, { "epoch": 0.1, "grad_norm": 5.9542365074157715, "learning_rate": 1.6163079917811183e-05, "loss": 1.8923, "step": 7473 }, { "epoch": 0.1, "grad_norm": 6.940215587615967, "learning_rate": 1.6165242781442633e-05, "loss": 2.2432, "step": 7474 }, { "epoch": 0.1, "grad_norm": 7.151760578155518, "learning_rate": 1.616740564507408e-05, "loss": 2.6847, "step": 7475 }, { "epoch": 0.1, "grad_norm": 6.982513427734375, "learning_rate": 1.6169568508705526e-05, "loss": 2.5242, "step": 7476 }, { "epoch": 0.1, "grad_norm": 7.0809006690979, "learning_rate": 1.6171731372336976e-05, "loss": 2.7451, "step": 7477 }, { "epoch": 0.1, "grad_norm": 7.0142130851745605, "learning_rate": 1.6173894235968425e-05, "loss": 2.416, "step": 7478 }, { "epoch": 0.1, "grad_norm": 6.799868106842041, "learning_rate": 1.617605709959987e-05, "loss": 2.7017, "step": 7479 }, { "epoch": 0.1, "grad_norm": 5.734954833984375, "learning_rate": 1.6178219963231318e-05, "loss": 2.2285, "step": 7480 }, { "epoch": 0.1, "grad_norm": 6.381087779998779, "learning_rate": 1.6180382826862768e-05, "loss": 2.4065, "step": 7481 }, { "epoch": 0.1, "grad_norm": 7.3966498374938965, "learning_rate": 1.6182545690494214e-05, "loss": 2.9114, "step": 7482 }, { "epoch": 0.1, "grad_norm": 6.63240385055542, "learning_rate": 1.6184708554125664e-05, "loss": 2.0873, "step": 7483 }, { "epoch": 0.1, "grad_norm": 6.917654991149902, "learning_rate": 1.618687141775711e-05, "loss": 2.5084, "step": 7484 }, { "epoch": 0.1, "grad_norm": 6.797478199005127, "learning_rate": 1.618903428138856e-05, "loss": 2.3338, "step": 7485 }, { "epoch": 0.1, "grad_norm": 7.263805389404297, "learning_rate": 1.6191197145020006e-05, "loss": 3.2341, "step": 7486 }, { "epoch": 0.1, "grad_norm": 6.277373313903809, "learning_rate": 1.6193360008651456e-05, "loss": 2.5284, "step": 7487 }, { "epoch": 0.1, "grad_norm": 6.192673206329346, "learning_rate": 1.6195522872282906e-05, "loss": 2.232, "step": 7488 }, { "epoch": 0.1, "grad_norm": 7.884406566619873, "learning_rate": 1.6197685735914352e-05, "loss": 2.9404, "step": 7489 }, { "epoch": 0.1, "grad_norm": 7.559455871582031, "learning_rate": 1.61998485995458e-05, "loss": 2.7462, "step": 7490 }, { "epoch": 0.1, "grad_norm": 6.89241886138916, "learning_rate": 1.6202011463177248e-05, "loss": 3.1158, "step": 7491 }, { "epoch": 0.1, "grad_norm": 6.6853790283203125, "learning_rate": 1.6204174326808698e-05, "loss": 2.781, "step": 7492 }, { "epoch": 0.1, "grad_norm": 6.668673038482666, "learning_rate": 1.6206337190440144e-05, "loss": 2.7765, "step": 7493 }, { "epoch": 0.1, "grad_norm": 6.437235355377197, "learning_rate": 1.620850005407159e-05, "loss": 2.2507, "step": 7494 }, { "epoch": 0.1, "grad_norm": 7.696500301361084, "learning_rate": 1.621066291770304e-05, "loss": 2.6605, "step": 7495 }, { "epoch": 0.1, "grad_norm": 7.450958251953125, "learning_rate": 1.621282578133449e-05, "loss": 3.1532, "step": 7496 }, { "epoch": 0.1, "grad_norm": 6.8906965255737305, "learning_rate": 1.6214988644965937e-05, "loss": 3.0955, "step": 7497 }, { "epoch": 0.1, "grad_norm": 8.17557144165039, "learning_rate": 1.6217151508597383e-05, "loss": 2.4166, "step": 7498 }, { "epoch": 0.1, "grad_norm": 7.343116283416748, "learning_rate": 1.6219314372228833e-05, "loss": 2.6912, "step": 7499 }, { "epoch": 0.1, "grad_norm": 6.965702533721924, "learning_rate": 1.6221477235860282e-05, "loss": 2.6, "step": 7500 }, { "epoch": 0.1, "grad_norm": 6.1146440505981445, "learning_rate": 1.622364009949173e-05, "loss": 2.1684, "step": 7501 }, { "epoch": 0.1, "grad_norm": 7.411294937133789, "learning_rate": 1.6225802963123175e-05, "loss": 2.6541, "step": 7502 }, { "epoch": 0.1, "grad_norm": 6.5669755935668945, "learning_rate": 1.6227965826754625e-05, "loss": 2.9518, "step": 7503 }, { "epoch": 0.1, "grad_norm": 7.573241233825684, "learning_rate": 1.6230128690386075e-05, "loss": 3.0548, "step": 7504 }, { "epoch": 0.1, "grad_norm": 7.403743743896484, "learning_rate": 1.623229155401752e-05, "loss": 2.6275, "step": 7505 }, { "epoch": 0.1, "grad_norm": 6.470734596252441, "learning_rate": 1.6234454417648967e-05, "loss": 2.3725, "step": 7506 }, { "epoch": 0.1, "grad_norm": 7.220939636230469, "learning_rate": 1.6236617281280417e-05, "loss": 2.613, "step": 7507 }, { "epoch": 0.1, "grad_norm": 7.364802360534668, "learning_rate": 1.6238780144911867e-05, "loss": 2.4546, "step": 7508 }, { "epoch": 0.1, "grad_norm": 6.232236862182617, "learning_rate": 1.6240943008543313e-05, "loss": 2.6073, "step": 7509 }, { "epoch": 0.1, "grad_norm": 7.029788017272949, "learning_rate": 1.624310587217476e-05, "loss": 2.4249, "step": 7510 }, { "epoch": 0.1, "grad_norm": 6.201052665710449, "learning_rate": 1.624526873580621e-05, "loss": 2.6111, "step": 7511 }, { "epoch": 0.1, "grad_norm": 7.814630508422852, "learning_rate": 1.624743159943766e-05, "loss": 2.9166, "step": 7512 }, { "epoch": 0.1, "grad_norm": 7.786922454833984, "learning_rate": 1.6249594463069105e-05, "loss": 2.7066, "step": 7513 }, { "epoch": 0.1, "grad_norm": 6.486783027648926, "learning_rate": 1.6251757326700552e-05, "loss": 2.4176, "step": 7514 }, { "epoch": 0.1, "grad_norm": 7.0619215965271, "learning_rate": 1.6253920190332e-05, "loss": 2.5759, "step": 7515 }, { "epoch": 0.1, "grad_norm": 6.378966808319092, "learning_rate": 1.625608305396345e-05, "loss": 2.7502, "step": 7516 }, { "epoch": 0.1, "grad_norm": 7.156522750854492, "learning_rate": 1.6258245917594898e-05, "loss": 2.9141, "step": 7517 }, { "epoch": 0.1, "grad_norm": 6.832627773284912, "learning_rate": 1.6260408781226344e-05, "loss": 3.1699, "step": 7518 }, { "epoch": 0.1, "grad_norm": 6.6066107749938965, "learning_rate": 1.626257164485779e-05, "loss": 2.8282, "step": 7519 }, { "epoch": 0.1, "grad_norm": 7.446967601776123, "learning_rate": 1.626473450848924e-05, "loss": 2.7522, "step": 7520 }, { "epoch": 0.1, "grad_norm": 7.7911272048950195, "learning_rate": 1.626689737212069e-05, "loss": 2.9055, "step": 7521 }, { "epoch": 0.1, "grad_norm": 6.680560111999512, "learning_rate": 1.6269060235752136e-05, "loss": 2.4887, "step": 7522 }, { "epoch": 0.1, "grad_norm": 7.255692481994629, "learning_rate": 1.6271223099383586e-05, "loss": 2.6522, "step": 7523 }, { "epoch": 0.1, "grad_norm": 6.823592185974121, "learning_rate": 1.6273385963015032e-05, "loss": 2.943, "step": 7524 }, { "epoch": 0.1, "grad_norm": 6.951469421386719, "learning_rate": 1.6275548826646482e-05, "loss": 2.319, "step": 7525 }, { "epoch": 0.1, "grad_norm": 6.887563228607178, "learning_rate": 1.627771169027793e-05, "loss": 2.5226, "step": 7526 }, { "epoch": 0.1, "grad_norm": 6.424746513366699, "learning_rate": 1.6279874553909378e-05, "loss": 2.2544, "step": 7527 }, { "epoch": 0.1, "grad_norm": 7.149116039276123, "learning_rate": 1.6282037417540824e-05, "loss": 2.8444, "step": 7528 }, { "epoch": 0.1, "grad_norm": 7.594472885131836, "learning_rate": 1.6284200281172274e-05, "loss": 2.7621, "step": 7529 }, { "epoch": 0.1, "grad_norm": 7.486030578613281, "learning_rate": 1.628636314480372e-05, "loss": 2.7197, "step": 7530 }, { "epoch": 0.1, "grad_norm": 6.4425740242004395, "learning_rate": 1.628852600843517e-05, "loss": 2.4921, "step": 7531 }, { "epoch": 0.1, "grad_norm": 7.366512298583984, "learning_rate": 1.6290688872066617e-05, "loss": 2.7731, "step": 7532 }, { "epoch": 0.1, "grad_norm": 6.176359176635742, "learning_rate": 1.6292851735698066e-05, "loss": 2.2716, "step": 7533 }, { "epoch": 0.1, "grad_norm": 7.106621742248535, "learning_rate": 1.6295014599329513e-05, "loss": 2.8989, "step": 7534 }, { "epoch": 0.1, "grad_norm": 6.683829307556152, "learning_rate": 1.6297177462960962e-05, "loss": 2.6752, "step": 7535 }, { "epoch": 0.1, "grad_norm": 6.258397579193115, "learning_rate": 1.629934032659241e-05, "loss": 2.6927, "step": 7536 }, { "epoch": 0.1, "grad_norm": 6.908985614776611, "learning_rate": 1.630150319022386e-05, "loss": 2.8188, "step": 7537 }, { "epoch": 0.1, "grad_norm": 7.474554538726807, "learning_rate": 1.6303666053855305e-05, "loss": 2.6987, "step": 7538 }, { "epoch": 0.1, "grad_norm": 7.271151542663574, "learning_rate": 1.6305828917486755e-05, "loss": 2.6987, "step": 7539 }, { "epoch": 0.1, "grad_norm": 6.201009273529053, "learning_rate": 1.63079917811182e-05, "loss": 2.7752, "step": 7540 }, { "epoch": 0.1, "grad_norm": 6.1972336769104, "learning_rate": 1.631015464474965e-05, "loss": 2.0942, "step": 7541 }, { "epoch": 0.1, "grad_norm": 7.017030239105225, "learning_rate": 1.6312317508381097e-05, "loss": 2.3158, "step": 7542 }, { "epoch": 0.1, "grad_norm": 6.994544982910156, "learning_rate": 1.6314480372012547e-05, "loss": 2.7555, "step": 7543 }, { "epoch": 0.1, "grad_norm": 7.0656843185424805, "learning_rate": 1.6316643235643993e-05, "loss": 2.6838, "step": 7544 }, { "epoch": 0.1, "grad_norm": 7.6903839111328125, "learning_rate": 1.6318806099275443e-05, "loss": 2.9734, "step": 7545 }, { "epoch": 0.1, "grad_norm": 5.912347793579102, "learning_rate": 1.632096896290689e-05, "loss": 2.4025, "step": 7546 }, { "epoch": 0.1, "grad_norm": 6.022477626800537, "learning_rate": 1.632313182653834e-05, "loss": 2.3232, "step": 7547 }, { "epoch": 0.1, "grad_norm": 6.540194034576416, "learning_rate": 1.6325294690169785e-05, "loss": 2.4387, "step": 7548 }, { "epoch": 0.1, "grad_norm": 6.964670658111572, "learning_rate": 1.6327457553801235e-05, "loss": 2.5826, "step": 7549 }, { "epoch": 0.1, "grad_norm": 6.770599365234375, "learning_rate": 1.632962041743268e-05, "loss": 2.6587, "step": 7550 }, { "epoch": 0.1, "grad_norm": 6.6349897384643555, "learning_rate": 1.633178328106413e-05, "loss": 2.6693, "step": 7551 }, { "epoch": 0.1, "grad_norm": 6.275814533233643, "learning_rate": 1.6333946144695578e-05, "loss": 2.8157, "step": 7552 }, { "epoch": 0.1, "grad_norm": 6.635829448699951, "learning_rate": 1.6336109008327027e-05, "loss": 2.4377, "step": 7553 }, { "epoch": 0.1, "grad_norm": 7.579905986785889, "learning_rate": 1.6338271871958474e-05, "loss": 3.0682, "step": 7554 }, { "epoch": 0.1, "grad_norm": 6.793774127960205, "learning_rate": 1.6340434735589923e-05, "loss": 2.0308, "step": 7555 }, { "epoch": 0.1, "grad_norm": 6.0337090492248535, "learning_rate": 1.634259759922137e-05, "loss": 2.2936, "step": 7556 }, { "epoch": 0.1, "grad_norm": 6.868194103240967, "learning_rate": 1.6344760462852816e-05, "loss": 2.9441, "step": 7557 }, { "epoch": 0.1, "grad_norm": 6.466938495635986, "learning_rate": 1.6346923326484266e-05, "loss": 2.0616, "step": 7558 }, { "epoch": 0.1, "grad_norm": 7.031929016113281, "learning_rate": 1.6349086190115716e-05, "loss": 2.9426, "step": 7559 }, { "epoch": 0.1, "grad_norm": 7.447885036468506, "learning_rate": 1.6351249053747162e-05, "loss": 2.8057, "step": 7560 }, { "epoch": 0.1, "grad_norm": 6.65403938293457, "learning_rate": 1.635341191737861e-05, "loss": 2.6479, "step": 7561 }, { "epoch": 0.1, "grad_norm": 6.830049514770508, "learning_rate": 1.6355574781010058e-05, "loss": 2.5468, "step": 7562 }, { "epoch": 0.1, "grad_norm": 5.4205756187438965, "learning_rate": 1.6357737644641508e-05, "loss": 2.541, "step": 7563 }, { "epoch": 0.1, "grad_norm": 6.637087345123291, "learning_rate": 1.6359900508272954e-05, "loss": 2.7547, "step": 7564 }, { "epoch": 0.1, "grad_norm": 6.829404354095459, "learning_rate": 1.63620633719044e-05, "loss": 2.9375, "step": 7565 }, { "epoch": 0.1, "grad_norm": 6.4713053703308105, "learning_rate": 1.636422623553585e-05, "loss": 2.7463, "step": 7566 }, { "epoch": 0.1, "grad_norm": 7.252649784088135, "learning_rate": 1.63663890991673e-05, "loss": 3.0638, "step": 7567 }, { "epoch": 0.1, "grad_norm": 7.250773906707764, "learning_rate": 1.6368551962798746e-05, "loss": 3.2068, "step": 7568 }, { "epoch": 0.1, "grad_norm": 6.324241638183594, "learning_rate": 1.6370714826430193e-05, "loss": 2.9325, "step": 7569 }, { "epoch": 0.1, "grad_norm": 6.939096450805664, "learning_rate": 1.6372877690061643e-05, "loss": 2.7683, "step": 7570 }, { "epoch": 0.1, "grad_norm": 8.03137493133545, "learning_rate": 1.6375040553693092e-05, "loss": 2.7997, "step": 7571 }, { "epoch": 0.1, "grad_norm": 7.2206854820251465, "learning_rate": 1.637720341732454e-05, "loss": 3.3365, "step": 7572 }, { "epoch": 0.1, "grad_norm": 5.830014228820801, "learning_rate": 1.6379366280955985e-05, "loss": 2.3363, "step": 7573 }, { "epoch": 0.1, "grad_norm": 6.809881210327148, "learning_rate": 1.6381529144587435e-05, "loss": 2.7035, "step": 7574 }, { "epoch": 0.1, "grad_norm": 6.802520275115967, "learning_rate": 1.6383692008218884e-05, "loss": 2.4338, "step": 7575 }, { "epoch": 0.1, "grad_norm": 6.434607982635498, "learning_rate": 1.638585487185033e-05, "loss": 2.5927, "step": 7576 }, { "epoch": 0.1, "grad_norm": 5.8428215980529785, "learning_rate": 1.6388017735481777e-05, "loss": 2.5497, "step": 7577 }, { "epoch": 0.1, "grad_norm": 6.910436153411865, "learning_rate": 1.6390180599113227e-05, "loss": 2.6141, "step": 7578 }, { "epoch": 0.1, "grad_norm": 7.361451625823975, "learning_rate": 1.6392343462744677e-05, "loss": 2.7336, "step": 7579 }, { "epoch": 0.1, "grad_norm": 6.770045757293701, "learning_rate": 1.6394506326376123e-05, "loss": 2.2964, "step": 7580 }, { "epoch": 0.1, "grad_norm": 7.402977466583252, "learning_rate": 1.639666919000757e-05, "loss": 2.8443, "step": 7581 }, { "epoch": 0.1, "grad_norm": 7.227853298187256, "learning_rate": 1.639883205363902e-05, "loss": 2.6594, "step": 7582 }, { "epoch": 0.1, "grad_norm": 7.628917694091797, "learning_rate": 1.640099491727047e-05, "loss": 2.726, "step": 7583 }, { "epoch": 0.1, "grad_norm": 6.5528082847595215, "learning_rate": 1.6403157780901915e-05, "loss": 2.5808, "step": 7584 }, { "epoch": 0.1, "grad_norm": 6.931270122528076, "learning_rate": 1.6405320644533365e-05, "loss": 2.7618, "step": 7585 }, { "epoch": 0.1, "grad_norm": 6.990274429321289, "learning_rate": 1.640748350816481e-05, "loss": 2.2577, "step": 7586 }, { "epoch": 0.1, "grad_norm": 6.018221855163574, "learning_rate": 1.640964637179626e-05, "loss": 2.4415, "step": 7587 }, { "epoch": 0.1, "grad_norm": 6.150130271911621, "learning_rate": 1.6411809235427707e-05, "loss": 2.2301, "step": 7588 }, { "epoch": 0.1, "grad_norm": 6.596251010894775, "learning_rate": 1.6413972099059157e-05, "loss": 2.6838, "step": 7589 }, { "epoch": 0.1, "grad_norm": 6.5573859214782715, "learning_rate": 1.6416134962690604e-05, "loss": 2.795, "step": 7590 }, { "epoch": 0.1, "grad_norm": 8.006831169128418, "learning_rate": 1.641829782632205e-05, "loss": 3.4828, "step": 7591 }, { "epoch": 0.1, "grad_norm": 6.611670970916748, "learning_rate": 1.64204606899535e-05, "loss": 2.6421, "step": 7592 }, { "epoch": 0.1, "grad_norm": 7.111455917358398, "learning_rate": 1.642262355358495e-05, "loss": 3.1876, "step": 7593 }, { "epoch": 0.1, "grad_norm": 5.906028747558594, "learning_rate": 1.6424786417216396e-05, "loss": 2.314, "step": 7594 }, { "epoch": 0.1, "grad_norm": 6.289337635040283, "learning_rate": 1.6426949280847842e-05, "loss": 2.4703, "step": 7595 }, { "epoch": 0.1, "grad_norm": 7.153402805328369, "learning_rate": 1.6429112144479292e-05, "loss": 2.7234, "step": 7596 }, { "epoch": 0.1, "grad_norm": 6.441693305969238, "learning_rate": 1.643127500811074e-05, "loss": 2.4302, "step": 7597 }, { "epoch": 0.1, "grad_norm": 6.685727596282959, "learning_rate": 1.6433437871742188e-05, "loss": 2.5758, "step": 7598 }, { "epoch": 0.1, "grad_norm": 6.41784143447876, "learning_rate": 1.6435600735373634e-05, "loss": 2.1008, "step": 7599 }, { "epoch": 0.1, "grad_norm": 7.472846508026123, "learning_rate": 1.6437763599005084e-05, "loss": 2.2652, "step": 7600 }, { "epoch": 0.1, "grad_norm": 7.2833170890808105, "learning_rate": 1.6439926462636534e-05, "loss": 2.566, "step": 7601 }, { "epoch": 0.1, "grad_norm": 6.554608345031738, "learning_rate": 1.644208932626798e-05, "loss": 2.5265, "step": 7602 }, { "epoch": 0.1, "grad_norm": 6.053999900817871, "learning_rate": 1.6444252189899427e-05, "loss": 2.6508, "step": 7603 }, { "epoch": 0.1, "grad_norm": 6.903637886047363, "learning_rate": 1.6446415053530876e-05, "loss": 2.6893, "step": 7604 }, { "epoch": 0.1, "grad_norm": 6.124246120452881, "learning_rate": 1.6448577917162326e-05, "loss": 2.4773, "step": 7605 }, { "epoch": 0.1, "grad_norm": 7.841961860656738, "learning_rate": 1.6450740780793772e-05, "loss": 2.6863, "step": 7606 }, { "epoch": 0.1, "grad_norm": 6.3791046142578125, "learning_rate": 1.645290364442522e-05, "loss": 1.7789, "step": 7607 }, { "epoch": 0.1, "grad_norm": 6.677443504333496, "learning_rate": 1.645506650805667e-05, "loss": 2.6349, "step": 7608 }, { "epoch": 0.1, "grad_norm": 7.019489765167236, "learning_rate": 1.6457229371688118e-05, "loss": 2.4013, "step": 7609 }, { "epoch": 0.1, "grad_norm": 7.045022010803223, "learning_rate": 1.6459392235319565e-05, "loss": 2.2414, "step": 7610 }, { "epoch": 0.1, "grad_norm": 7.827584266662598, "learning_rate": 1.646155509895101e-05, "loss": 2.8318, "step": 7611 }, { "epoch": 0.1, "grad_norm": 7.466294765472412, "learning_rate": 1.646371796258246e-05, "loss": 2.875, "step": 7612 }, { "epoch": 0.1, "grad_norm": 5.915184497833252, "learning_rate": 1.646588082621391e-05, "loss": 2.1397, "step": 7613 }, { "epoch": 0.1, "grad_norm": 7.81110143661499, "learning_rate": 1.6468043689845357e-05, "loss": 3.746, "step": 7614 }, { "epoch": 0.1, "grad_norm": 7.160592079162598, "learning_rate": 1.6470206553476803e-05, "loss": 2.5063, "step": 7615 }, { "epoch": 0.1, "grad_norm": 6.97330379486084, "learning_rate": 1.6472369417108253e-05, "loss": 3.0227, "step": 7616 }, { "epoch": 0.1, "grad_norm": 7.104043483734131, "learning_rate": 1.6474532280739703e-05, "loss": 3.0169, "step": 7617 }, { "epoch": 0.1, "grad_norm": 7.297719955444336, "learning_rate": 1.647669514437115e-05, "loss": 2.6997, "step": 7618 }, { "epoch": 0.1, "grad_norm": 7.489274978637695, "learning_rate": 1.6478858008002595e-05, "loss": 3.5752, "step": 7619 }, { "epoch": 0.1, "grad_norm": 6.387792110443115, "learning_rate": 1.6481020871634045e-05, "loss": 2.6537, "step": 7620 }, { "epoch": 0.1, "grad_norm": 6.860201835632324, "learning_rate": 1.6483183735265495e-05, "loss": 2.5345, "step": 7621 }, { "epoch": 0.1, "grad_norm": 7.593395233154297, "learning_rate": 1.648534659889694e-05, "loss": 3.0722, "step": 7622 }, { "epoch": 0.1, "grad_norm": 6.627287864685059, "learning_rate": 1.6487509462528388e-05, "loss": 2.6168, "step": 7623 }, { "epoch": 0.1, "grad_norm": 6.500984191894531, "learning_rate": 1.6489672326159837e-05, "loss": 2.9074, "step": 7624 }, { "epoch": 0.1, "grad_norm": 6.668919086456299, "learning_rate": 1.6491835189791287e-05, "loss": 2.5508, "step": 7625 }, { "epoch": 0.1, "grad_norm": 6.325042247772217, "learning_rate": 1.6493998053422733e-05, "loss": 2.1919, "step": 7626 }, { "epoch": 0.1, "grad_norm": 7.412988662719727, "learning_rate": 1.649616091705418e-05, "loss": 2.525, "step": 7627 }, { "epoch": 0.1, "grad_norm": 6.471708297729492, "learning_rate": 1.649832378068563e-05, "loss": 2.7932, "step": 7628 }, { "epoch": 0.1, "grad_norm": 6.789440155029297, "learning_rate": 1.6500486644317076e-05, "loss": 2.8611, "step": 7629 }, { "epoch": 0.1, "grad_norm": 6.574685096740723, "learning_rate": 1.6502649507948526e-05, "loss": 2.665, "step": 7630 }, { "epoch": 0.1, "grad_norm": 7.120038986206055, "learning_rate": 1.6504812371579972e-05, "loss": 2.6865, "step": 7631 }, { "epoch": 0.1, "grad_norm": 6.17361307144165, "learning_rate": 1.6506975235211422e-05, "loss": 2.2693, "step": 7632 }, { "epoch": 0.1, "grad_norm": 6.88934850692749, "learning_rate": 1.6509138098842868e-05, "loss": 2.9665, "step": 7633 }, { "epoch": 0.1, "grad_norm": 6.400626182556152, "learning_rate": 1.6511300962474318e-05, "loss": 2.7022, "step": 7634 }, { "epoch": 0.1, "grad_norm": 7.325601100921631, "learning_rate": 1.6513463826105764e-05, "loss": 3.1452, "step": 7635 }, { "epoch": 0.1, "grad_norm": 6.815648555755615, "learning_rate": 1.6515626689737214e-05, "loss": 2.3779, "step": 7636 }, { "epoch": 0.1, "grad_norm": 5.390587329864502, "learning_rate": 1.651778955336866e-05, "loss": 2.4151, "step": 7637 }, { "epoch": 0.1, "grad_norm": 5.446542263031006, "learning_rate": 1.651995241700011e-05, "loss": 2.3962, "step": 7638 }, { "epoch": 0.1, "grad_norm": 6.609167575836182, "learning_rate": 1.6522115280631556e-05, "loss": 2.8736, "step": 7639 }, { "epoch": 0.1, "grad_norm": 7.472021579742432, "learning_rate": 1.6524278144263006e-05, "loss": 2.5966, "step": 7640 }, { "epoch": 0.1, "grad_norm": 6.590490818023682, "learning_rate": 1.6526441007894452e-05, "loss": 2.5954, "step": 7641 }, { "epoch": 0.1, "grad_norm": 6.6799092292785645, "learning_rate": 1.6528603871525902e-05, "loss": 2.3162, "step": 7642 }, { "epoch": 0.1, "grad_norm": 6.623093605041504, "learning_rate": 1.653076673515735e-05, "loss": 2.4302, "step": 7643 }, { "epoch": 0.1, "grad_norm": 7.664527893066406, "learning_rate": 1.6532929598788798e-05, "loss": 3.174, "step": 7644 }, { "epoch": 0.1, "grad_norm": 5.8083624839782715, "learning_rate": 1.6535092462420245e-05, "loss": 2.6361, "step": 7645 }, { "epoch": 0.1, "grad_norm": 5.90501594543457, "learning_rate": 1.6537255326051694e-05, "loss": 2.3919, "step": 7646 }, { "epoch": 0.1, "grad_norm": 6.2748236656188965, "learning_rate": 1.653941818968314e-05, "loss": 2.3363, "step": 7647 }, { "epoch": 0.1, "grad_norm": 6.549990177154541, "learning_rate": 1.654158105331459e-05, "loss": 2.6844, "step": 7648 }, { "epoch": 0.1, "grad_norm": 7.859543323516846, "learning_rate": 1.6543743916946037e-05, "loss": 2.7623, "step": 7649 }, { "epoch": 0.1, "grad_norm": 6.227321624755859, "learning_rate": 1.6545906780577487e-05, "loss": 2.2603, "step": 7650 }, { "epoch": 0.1, "grad_norm": 7.253427982330322, "learning_rate": 1.6548069644208936e-05, "loss": 2.7874, "step": 7651 }, { "epoch": 0.1, "grad_norm": 7.159069061279297, "learning_rate": 1.6550232507840383e-05, "loss": 2.2969, "step": 7652 }, { "epoch": 0.1, "grad_norm": 6.839751243591309, "learning_rate": 1.655239537147183e-05, "loss": 2.4692, "step": 7653 }, { "epoch": 0.1, "grad_norm": 6.972675800323486, "learning_rate": 1.655455823510328e-05, "loss": 2.2662, "step": 7654 }, { "epoch": 0.1, "grad_norm": 7.939656734466553, "learning_rate": 1.655672109873473e-05, "loss": 2.6536, "step": 7655 }, { "epoch": 0.1, "grad_norm": 6.611564636230469, "learning_rate": 1.6558883962366175e-05, "loss": 3.1037, "step": 7656 }, { "epoch": 0.1, "grad_norm": 5.9216179847717285, "learning_rate": 1.656104682599762e-05, "loss": 2.4044, "step": 7657 }, { "epoch": 0.1, "grad_norm": 5.6507439613342285, "learning_rate": 1.656320968962907e-05, "loss": 2.2927, "step": 7658 }, { "epoch": 0.1, "grad_norm": 6.966874122619629, "learning_rate": 1.656537255326052e-05, "loss": 2.2986, "step": 7659 }, { "epoch": 0.1, "grad_norm": 6.5986528396606445, "learning_rate": 1.6567535416891967e-05, "loss": 2.402, "step": 7660 }, { "epoch": 0.1, "grad_norm": 7.167881965637207, "learning_rate": 1.6569698280523413e-05, "loss": 2.3041, "step": 7661 }, { "epoch": 0.1, "grad_norm": 6.62114953994751, "learning_rate": 1.6571861144154863e-05, "loss": 2.3294, "step": 7662 }, { "epoch": 0.1, "grad_norm": 7.466611385345459, "learning_rate": 1.657402400778631e-05, "loss": 2.5276, "step": 7663 }, { "epoch": 0.1, "grad_norm": 6.799073696136475, "learning_rate": 1.657618687141776e-05, "loss": 2.6235, "step": 7664 }, { "epoch": 0.1, "grad_norm": 6.630728721618652, "learning_rate": 1.6578349735049206e-05, "loss": 2.4988, "step": 7665 }, { "epoch": 0.1, "grad_norm": 6.409905910491943, "learning_rate": 1.6580512598680652e-05, "loss": 2.2922, "step": 7666 }, { "epoch": 0.1, "grad_norm": 6.325800895690918, "learning_rate": 1.6582675462312102e-05, "loss": 2.6201, "step": 7667 }, { "epoch": 0.1, "grad_norm": 8.102012634277344, "learning_rate": 1.658483832594355e-05, "loss": 2.9644, "step": 7668 }, { "epoch": 0.1, "grad_norm": 6.436072826385498, "learning_rate": 1.6587001189574998e-05, "loss": 2.301, "step": 7669 }, { "epoch": 0.1, "grad_norm": 6.831756114959717, "learning_rate": 1.6589164053206444e-05, "loss": 2.706, "step": 7670 }, { "epoch": 0.1, "grad_norm": 6.93845272064209, "learning_rate": 1.6591326916837894e-05, "loss": 2.3706, "step": 7671 }, { "epoch": 0.1, "grad_norm": 7.233452796936035, "learning_rate": 1.6593489780469344e-05, "loss": 2.8198, "step": 7672 }, { "epoch": 0.1, "grad_norm": 6.120971202850342, "learning_rate": 1.659565264410079e-05, "loss": 2.3706, "step": 7673 }, { "epoch": 0.1, "grad_norm": 6.17632532119751, "learning_rate": 1.6597815507732236e-05, "loss": 2.1732, "step": 7674 }, { "epoch": 0.1, "grad_norm": 6.617023944854736, "learning_rate": 1.6599978371363686e-05, "loss": 2.2627, "step": 7675 }, { "epoch": 0.1, "grad_norm": 8.091292381286621, "learning_rate": 1.6602141234995136e-05, "loss": 2.3945, "step": 7676 }, { "epoch": 0.1, "grad_norm": 7.095589637756348, "learning_rate": 1.6604304098626582e-05, "loss": 2.5779, "step": 7677 }, { "epoch": 0.1, "grad_norm": 8.25102710723877, "learning_rate": 1.660646696225803e-05, "loss": 2.6839, "step": 7678 }, { "epoch": 0.1, "grad_norm": 6.940555095672607, "learning_rate": 1.660862982588948e-05, "loss": 2.8053, "step": 7679 }, { "epoch": 0.1, "grad_norm": 6.313292026519775, "learning_rate": 1.6610792689520928e-05, "loss": 2.0775, "step": 7680 }, { "epoch": 0.1, "grad_norm": 6.009666442871094, "learning_rate": 1.6612955553152374e-05, "loss": 2.3345, "step": 7681 }, { "epoch": 0.1, "grad_norm": 6.101136207580566, "learning_rate": 1.6615118416783824e-05, "loss": 2.0646, "step": 7682 }, { "epoch": 0.1, "grad_norm": 8.02049446105957, "learning_rate": 1.661728128041527e-05, "loss": 2.704, "step": 7683 }, { "epoch": 0.1, "grad_norm": 6.835960388183594, "learning_rate": 1.661944414404672e-05, "loss": 2.6401, "step": 7684 }, { "epoch": 0.1, "grad_norm": 5.868671417236328, "learning_rate": 1.6621607007678167e-05, "loss": 2.2842, "step": 7685 }, { "epoch": 0.1, "grad_norm": 6.283241271972656, "learning_rate": 1.6623769871309616e-05, "loss": 2.2346, "step": 7686 }, { "epoch": 0.1, "grad_norm": 7.09295129776001, "learning_rate": 1.6625932734941063e-05, "loss": 2.4006, "step": 7687 }, { "epoch": 0.1, "grad_norm": 6.910192012786865, "learning_rate": 1.6628095598572513e-05, "loss": 2.5873, "step": 7688 }, { "epoch": 0.1, "grad_norm": 7.791581630706787, "learning_rate": 1.663025846220396e-05, "loss": 2.6763, "step": 7689 }, { "epoch": 0.1, "grad_norm": 7.435920715332031, "learning_rate": 1.663242132583541e-05, "loss": 3.4249, "step": 7690 }, { "epoch": 0.1, "grad_norm": 6.663557529449463, "learning_rate": 1.6634584189466855e-05, "loss": 2.6092, "step": 7691 }, { "epoch": 0.1, "grad_norm": 7.436704158782959, "learning_rate": 1.6636747053098305e-05, "loss": 2.6332, "step": 7692 }, { "epoch": 0.1, "grad_norm": 6.547738075256348, "learning_rate": 1.663890991672975e-05, "loss": 2.3231, "step": 7693 }, { "epoch": 0.1, "grad_norm": 7.107034683227539, "learning_rate": 1.66410727803612e-05, "loss": 2.7902, "step": 7694 }, { "epoch": 0.1, "grad_norm": 8.444076538085938, "learning_rate": 1.6643235643992647e-05, "loss": 2.8412, "step": 7695 }, { "epoch": 0.1, "grad_norm": 6.025613307952881, "learning_rate": 1.6645398507624097e-05, "loss": 2.4875, "step": 7696 }, { "epoch": 0.1, "grad_norm": 6.293638229370117, "learning_rate": 1.6647561371255543e-05, "loss": 2.762, "step": 7697 }, { "epoch": 0.1, "grad_norm": 7.275578498840332, "learning_rate": 1.6649724234886993e-05, "loss": 3.0289, "step": 7698 }, { "epoch": 0.1, "grad_norm": 6.623427391052246, "learning_rate": 1.665188709851844e-05, "loss": 2.606, "step": 7699 }, { "epoch": 0.1, "grad_norm": 7.178698539733887, "learning_rate": 1.6654049962149886e-05, "loss": 2.6132, "step": 7700 }, { "epoch": 0.1, "grad_norm": 6.33991003036499, "learning_rate": 1.6656212825781336e-05, "loss": 2.49, "step": 7701 }, { "epoch": 0.1, "grad_norm": 6.2511749267578125, "learning_rate": 1.6658375689412785e-05, "loss": 2.5467, "step": 7702 }, { "epoch": 0.1, "grad_norm": 6.416893482208252, "learning_rate": 1.666053855304423e-05, "loss": 2.629, "step": 7703 }, { "epoch": 0.1, "grad_norm": 6.66668701171875, "learning_rate": 1.6662701416675678e-05, "loss": 2.9288, "step": 7704 }, { "epoch": 0.1, "grad_norm": 7.41827392578125, "learning_rate": 1.6664864280307128e-05, "loss": 2.6699, "step": 7705 }, { "epoch": 0.1, "grad_norm": 7.086390018463135, "learning_rate": 1.6667027143938577e-05, "loss": 3.0841, "step": 7706 }, { "epoch": 0.1, "grad_norm": 6.759942054748535, "learning_rate": 1.6669190007570024e-05, "loss": 2.3752, "step": 7707 }, { "epoch": 0.1, "grad_norm": 6.594743728637695, "learning_rate": 1.667135287120147e-05, "loss": 2.2495, "step": 7708 }, { "epoch": 0.1, "grad_norm": 6.790812969207764, "learning_rate": 1.667351573483292e-05, "loss": 2.422, "step": 7709 }, { "epoch": 0.1, "grad_norm": 6.3675689697265625, "learning_rate": 1.667567859846437e-05, "loss": 2.4297, "step": 7710 }, { "epoch": 0.1, "grad_norm": 6.924678325653076, "learning_rate": 1.6677841462095816e-05, "loss": 2.5099, "step": 7711 }, { "epoch": 0.1, "grad_norm": 6.652116775512695, "learning_rate": 1.6680004325727262e-05, "loss": 2.7869, "step": 7712 }, { "epoch": 0.1, "grad_norm": 7.111969470977783, "learning_rate": 1.6682167189358712e-05, "loss": 2.6277, "step": 7713 }, { "epoch": 0.1, "grad_norm": 6.360953330993652, "learning_rate": 1.6684330052990162e-05, "loss": 2.6628, "step": 7714 }, { "epoch": 0.1, "grad_norm": 7.284707069396973, "learning_rate": 1.6686492916621608e-05, "loss": 2.4897, "step": 7715 }, { "epoch": 0.1, "grad_norm": 7.0274529457092285, "learning_rate": 1.6688655780253055e-05, "loss": 2.5913, "step": 7716 }, { "epoch": 0.1, "grad_norm": 7.012273788452148, "learning_rate": 1.6690818643884504e-05, "loss": 2.2301, "step": 7717 }, { "epoch": 0.1, "grad_norm": 6.54619026184082, "learning_rate": 1.6692981507515954e-05, "loss": 1.8992, "step": 7718 }, { "epoch": 0.1, "grad_norm": 7.398469924926758, "learning_rate": 1.66951443711474e-05, "loss": 2.6202, "step": 7719 }, { "epoch": 0.1, "grad_norm": 7.057574272155762, "learning_rate": 1.6697307234778847e-05, "loss": 2.8325, "step": 7720 }, { "epoch": 0.1, "grad_norm": 7.594939231872559, "learning_rate": 1.6699470098410297e-05, "loss": 2.4322, "step": 7721 }, { "epoch": 0.1, "grad_norm": 7.373960971832275, "learning_rate": 1.6701632962041746e-05, "loss": 2.9263, "step": 7722 }, { "epoch": 0.1, "grad_norm": 6.598387718200684, "learning_rate": 1.6703795825673193e-05, "loss": 2.4259, "step": 7723 }, { "epoch": 0.1, "grad_norm": 6.558129787445068, "learning_rate": 1.670595868930464e-05, "loss": 2.9703, "step": 7724 }, { "epoch": 0.1, "grad_norm": 6.923157691955566, "learning_rate": 1.670812155293609e-05, "loss": 2.9291, "step": 7725 }, { "epoch": 0.1, "grad_norm": 7.501131534576416, "learning_rate": 1.671028441656754e-05, "loss": 1.991, "step": 7726 }, { "epoch": 0.1, "grad_norm": 7.873439788818359, "learning_rate": 1.6712447280198985e-05, "loss": 3.1336, "step": 7727 }, { "epoch": 0.1, "grad_norm": 6.715689659118652, "learning_rate": 1.671461014383043e-05, "loss": 2.4908, "step": 7728 }, { "epoch": 0.1, "grad_norm": 6.720224380493164, "learning_rate": 1.671677300746188e-05, "loss": 2.7439, "step": 7729 }, { "epoch": 0.1, "grad_norm": 6.5102410316467285, "learning_rate": 1.671893587109333e-05, "loss": 2.6032, "step": 7730 }, { "epoch": 0.1, "grad_norm": 7.4147467613220215, "learning_rate": 1.6721098734724777e-05, "loss": 3.4157, "step": 7731 }, { "epoch": 0.1, "grad_norm": 6.571446418762207, "learning_rate": 1.6723261598356223e-05, "loss": 2.7375, "step": 7732 }, { "epoch": 0.1, "grad_norm": 6.4338836669921875, "learning_rate": 1.6725424461987673e-05, "loss": 2.2274, "step": 7733 }, { "epoch": 0.1, "grad_norm": 5.977947235107422, "learning_rate": 1.6727587325619123e-05, "loss": 2.3142, "step": 7734 }, { "epoch": 0.1, "grad_norm": 7.001518249511719, "learning_rate": 1.672975018925057e-05, "loss": 2.3934, "step": 7735 }, { "epoch": 0.1, "grad_norm": 6.122052192687988, "learning_rate": 1.6731913052882016e-05, "loss": 2.4962, "step": 7736 }, { "epoch": 0.1, "grad_norm": 6.487542629241943, "learning_rate": 1.6734075916513465e-05, "loss": 2.5877, "step": 7737 }, { "epoch": 0.1, "grad_norm": 6.6191911697387695, "learning_rate": 1.673623878014491e-05, "loss": 2.8259, "step": 7738 }, { "epoch": 0.1, "grad_norm": 6.790073871612549, "learning_rate": 1.673840164377636e-05, "loss": 2.4281, "step": 7739 }, { "epoch": 0.1, "grad_norm": 5.9398698806762695, "learning_rate": 1.6740564507407808e-05, "loss": 2.0106, "step": 7740 }, { "epoch": 0.1, "grad_norm": 7.486714839935303, "learning_rate": 1.6742727371039258e-05, "loss": 3.3837, "step": 7741 }, { "epoch": 0.1, "grad_norm": 8.23420238494873, "learning_rate": 1.6744890234670704e-05, "loss": 3.1698, "step": 7742 }, { "epoch": 0.1, "grad_norm": 6.293842315673828, "learning_rate": 1.6747053098302154e-05, "loss": 2.9241, "step": 7743 }, { "epoch": 0.1, "grad_norm": 6.84470796585083, "learning_rate": 1.6749215961933603e-05, "loss": 2.5478, "step": 7744 }, { "epoch": 0.1, "grad_norm": 7.378925323486328, "learning_rate": 1.675137882556505e-05, "loss": 2.7361, "step": 7745 }, { "epoch": 0.1, "grad_norm": 6.748950958251953, "learning_rate": 1.6753541689196496e-05, "loss": 2.9484, "step": 7746 }, { "epoch": 0.1, "grad_norm": 7.493124008178711, "learning_rate": 1.6755704552827946e-05, "loss": 2.7408, "step": 7747 }, { "epoch": 0.1, "grad_norm": 6.759483337402344, "learning_rate": 1.6757867416459396e-05, "loss": 2.6929, "step": 7748 }, { "epoch": 0.1, "grad_norm": 6.810353755950928, "learning_rate": 1.6760030280090842e-05, "loss": 2.6686, "step": 7749 }, { "epoch": 0.1, "grad_norm": 6.10661506652832, "learning_rate": 1.6762193143722288e-05, "loss": 2.3979, "step": 7750 }, { "epoch": 0.1, "grad_norm": 6.449834823608398, "learning_rate": 1.6764356007353738e-05, "loss": 1.7899, "step": 7751 }, { "epoch": 0.1, "grad_norm": 7.365626335144043, "learning_rate": 1.6766518870985188e-05, "loss": 3.2963, "step": 7752 }, { "epoch": 0.1, "grad_norm": 7.178081512451172, "learning_rate": 1.6768681734616634e-05, "loss": 2.3918, "step": 7753 }, { "epoch": 0.1, "grad_norm": 6.276215553283691, "learning_rate": 1.677084459824808e-05, "loss": 2.4868, "step": 7754 }, { "epoch": 0.1, "grad_norm": 5.508419990539551, "learning_rate": 1.677300746187953e-05, "loss": 1.9935, "step": 7755 }, { "epoch": 0.1, "grad_norm": 6.415957450866699, "learning_rate": 1.677517032551098e-05, "loss": 3.3396, "step": 7756 }, { "epoch": 0.1, "grad_norm": 7.222970485687256, "learning_rate": 1.6777333189142426e-05, "loss": 2.9969, "step": 7757 }, { "epoch": 0.1, "grad_norm": 6.286863327026367, "learning_rate": 1.6779496052773873e-05, "loss": 2.639, "step": 7758 }, { "epoch": 0.1, "grad_norm": 6.47021484375, "learning_rate": 1.6781658916405322e-05, "loss": 2.7861, "step": 7759 }, { "epoch": 0.1, "grad_norm": 6.589328289031982, "learning_rate": 1.6783821780036772e-05, "loss": 2.784, "step": 7760 }, { "epoch": 0.1, "grad_norm": 6.017117977142334, "learning_rate": 1.678598464366822e-05, "loss": 2.3187, "step": 7761 }, { "epoch": 0.1, "grad_norm": 6.333709239959717, "learning_rate": 1.6788147507299665e-05, "loss": 2.5118, "step": 7762 }, { "epoch": 0.1, "grad_norm": 7.033042907714844, "learning_rate": 1.6790310370931115e-05, "loss": 2.9233, "step": 7763 }, { "epoch": 0.1, "grad_norm": 6.356366157531738, "learning_rate": 1.6792473234562564e-05, "loss": 2.8002, "step": 7764 }, { "epoch": 0.1, "grad_norm": 6.625370979309082, "learning_rate": 1.679463609819401e-05, "loss": 2.2057, "step": 7765 }, { "epoch": 0.1, "grad_norm": 6.327931880950928, "learning_rate": 1.6796798961825457e-05, "loss": 2.424, "step": 7766 }, { "epoch": 0.1, "grad_norm": 8.156826972961426, "learning_rate": 1.6798961825456907e-05, "loss": 2.849, "step": 7767 }, { "epoch": 0.1, "grad_norm": 6.9981303215026855, "learning_rate": 1.6801124689088357e-05, "loss": 2.7134, "step": 7768 }, { "epoch": 0.1, "grad_norm": 7.336590766906738, "learning_rate": 1.6803287552719803e-05, "loss": 2.5545, "step": 7769 }, { "epoch": 0.1, "grad_norm": 7.3383355140686035, "learning_rate": 1.680545041635125e-05, "loss": 2.4563, "step": 7770 }, { "epoch": 0.1, "grad_norm": 8.082615852355957, "learning_rate": 1.68076132799827e-05, "loss": 2.6591, "step": 7771 }, { "epoch": 0.1, "grad_norm": 6.0982842445373535, "learning_rate": 1.680977614361415e-05, "loss": 1.9344, "step": 7772 }, { "epoch": 0.1, "grad_norm": 6.373157024383545, "learning_rate": 1.6811939007245595e-05, "loss": 2.5712, "step": 7773 }, { "epoch": 0.1, "grad_norm": 6.21049165725708, "learning_rate": 1.681410187087704e-05, "loss": 2.0364, "step": 7774 }, { "epoch": 0.1, "grad_norm": 6.429840087890625, "learning_rate": 1.6816264734508488e-05, "loss": 2.7954, "step": 7775 }, { "epoch": 0.1, "grad_norm": 6.6352033615112305, "learning_rate": 1.6818427598139938e-05, "loss": 2.7342, "step": 7776 }, { "epoch": 0.1, "grad_norm": 6.884702682495117, "learning_rate": 1.6820590461771387e-05, "loss": 2.7821, "step": 7777 }, { "epoch": 0.1, "grad_norm": 6.347643852233887, "learning_rate": 1.6822753325402834e-05, "loss": 2.336, "step": 7778 }, { "epoch": 0.1, "grad_norm": 6.964463233947754, "learning_rate": 1.6824916189034283e-05, "loss": 3.1683, "step": 7779 }, { "epoch": 0.1, "grad_norm": 7.872574329376221, "learning_rate": 1.682707905266573e-05, "loss": 2.8497, "step": 7780 }, { "epoch": 0.1, "grad_norm": 7.071662902832031, "learning_rate": 1.682924191629718e-05, "loss": 3.1284, "step": 7781 }, { "epoch": 0.1, "grad_norm": 6.383466720581055, "learning_rate": 1.6831404779928626e-05, "loss": 2.4067, "step": 7782 }, { "epoch": 0.1, "grad_norm": 6.309503078460693, "learning_rate": 1.6833567643560076e-05, "loss": 2.4917, "step": 7783 }, { "epoch": 0.1, "grad_norm": 6.338937759399414, "learning_rate": 1.6835730507191522e-05, "loss": 2.1722, "step": 7784 }, { "epoch": 0.1, "grad_norm": 6.823468208312988, "learning_rate": 1.6837893370822972e-05, "loss": 2.6247, "step": 7785 }, { "epoch": 0.1, "grad_norm": 6.765349388122559, "learning_rate": 1.6840056234454418e-05, "loss": 2.6301, "step": 7786 }, { "epoch": 0.1, "grad_norm": 7.624615669250488, "learning_rate": 1.6842219098085868e-05, "loss": 2.8335, "step": 7787 }, { "epoch": 0.1, "grad_norm": 7.153671741485596, "learning_rate": 1.6844381961717314e-05, "loss": 2.643, "step": 7788 }, { "epoch": 0.1, "grad_norm": 6.299101829528809, "learning_rate": 1.6846544825348764e-05, "loss": 2.592, "step": 7789 }, { "epoch": 0.1, "grad_norm": 6.590519428253174, "learning_rate": 1.684870768898021e-05, "loss": 2.7417, "step": 7790 }, { "epoch": 0.1, "grad_norm": 7.150984764099121, "learning_rate": 1.685087055261166e-05, "loss": 2.5708, "step": 7791 }, { "epoch": 0.1, "grad_norm": 7.902465343475342, "learning_rate": 1.6853033416243106e-05, "loss": 2.6277, "step": 7792 }, { "epoch": 0.1, "grad_norm": 6.689565181732178, "learning_rate": 1.6855196279874556e-05, "loss": 2.8356, "step": 7793 }, { "epoch": 0.1, "grad_norm": 7.0520453453063965, "learning_rate": 1.6857359143506003e-05, "loss": 2.5476, "step": 7794 }, { "epoch": 0.1, "grad_norm": 6.267398357391357, "learning_rate": 1.6859522007137452e-05, "loss": 2.6741, "step": 7795 }, { "epoch": 0.1, "grad_norm": 7.0335798263549805, "learning_rate": 1.68616848707689e-05, "loss": 2.5743, "step": 7796 }, { "epoch": 0.1, "grad_norm": 7.472509384155273, "learning_rate": 1.686384773440035e-05, "loss": 2.8024, "step": 7797 }, { "epoch": 0.1, "grad_norm": 6.398059368133545, "learning_rate": 1.6866010598031795e-05, "loss": 2.8701, "step": 7798 }, { "epoch": 0.1, "grad_norm": 7.564128398895264, "learning_rate": 1.6868173461663244e-05, "loss": 2.6539, "step": 7799 }, { "epoch": 0.1, "grad_norm": 6.585284233093262, "learning_rate": 1.687033632529469e-05, "loss": 2.7718, "step": 7800 }, { "epoch": 0.1, "grad_norm": 5.995213031768799, "learning_rate": 1.687249918892614e-05, "loss": 2.3563, "step": 7801 }, { "epoch": 0.1, "grad_norm": 7.175391674041748, "learning_rate": 1.6874662052557587e-05, "loss": 2.9992, "step": 7802 }, { "epoch": 0.1, "grad_norm": 6.46983003616333, "learning_rate": 1.6876824916189037e-05, "loss": 2.6356, "step": 7803 }, { "epoch": 0.1, "grad_norm": 7.571682929992676, "learning_rate": 1.6878987779820483e-05, "loss": 2.3433, "step": 7804 }, { "epoch": 0.1, "grad_norm": 6.555115699768066, "learning_rate": 1.6881150643451933e-05, "loss": 2.5712, "step": 7805 }, { "epoch": 0.1, "grad_norm": 7.0838541984558105, "learning_rate": 1.688331350708338e-05, "loss": 3.0831, "step": 7806 }, { "epoch": 0.1, "grad_norm": 6.564239978790283, "learning_rate": 1.688547637071483e-05, "loss": 2.221, "step": 7807 }, { "epoch": 0.1, "grad_norm": 6.174508094787598, "learning_rate": 1.6887639234346275e-05, "loss": 2.0821, "step": 7808 }, { "epoch": 0.1, "grad_norm": 6.330270767211914, "learning_rate": 1.6889802097977725e-05, "loss": 2.632, "step": 7809 }, { "epoch": 0.1, "grad_norm": 6.971510410308838, "learning_rate": 1.689196496160917e-05, "loss": 3.1961, "step": 7810 }, { "epoch": 0.1, "grad_norm": 7.325080871582031, "learning_rate": 1.689412782524062e-05, "loss": 2.9889, "step": 7811 }, { "epoch": 0.1, "grad_norm": 6.192660808563232, "learning_rate": 1.6896290688872067e-05, "loss": 2.5459, "step": 7812 }, { "epoch": 0.1, "grad_norm": 6.888269901275635, "learning_rate": 1.6898453552503514e-05, "loss": 2.4471, "step": 7813 }, { "epoch": 0.1, "grad_norm": 6.549870014190674, "learning_rate": 1.6900616416134964e-05, "loss": 2.5013, "step": 7814 }, { "epoch": 0.1, "grad_norm": 6.413891315460205, "learning_rate": 1.6902779279766413e-05, "loss": 2.3041, "step": 7815 }, { "epoch": 0.1, "grad_norm": 7.706423759460449, "learning_rate": 1.690494214339786e-05, "loss": 2.7466, "step": 7816 }, { "epoch": 0.1, "grad_norm": 8.121048927307129, "learning_rate": 1.6907105007029306e-05, "loss": 2.9761, "step": 7817 }, { "epoch": 0.1, "grad_norm": 7.855103015899658, "learning_rate": 1.6909267870660756e-05, "loss": 2.7491, "step": 7818 }, { "epoch": 0.1, "grad_norm": 7.225062370300293, "learning_rate": 1.6911430734292205e-05, "loss": 2.5484, "step": 7819 }, { "epoch": 0.1, "grad_norm": 7.4447431564331055, "learning_rate": 1.6913593597923652e-05, "loss": 2.6779, "step": 7820 }, { "epoch": 0.1, "grad_norm": 7.20839262008667, "learning_rate": 1.6915756461555098e-05, "loss": 2.6865, "step": 7821 }, { "epoch": 0.1, "grad_norm": 7.643245697021484, "learning_rate": 1.6917919325186548e-05, "loss": 2.3112, "step": 7822 }, { "epoch": 0.1, "grad_norm": 6.253111362457275, "learning_rate": 1.6920082188817998e-05, "loss": 2.8413, "step": 7823 }, { "epoch": 0.1, "grad_norm": 6.186039447784424, "learning_rate": 1.6922245052449444e-05, "loss": 2.1094, "step": 7824 }, { "epoch": 0.1, "grad_norm": 6.885324478149414, "learning_rate": 1.692440791608089e-05, "loss": 2.1595, "step": 7825 }, { "epoch": 0.1, "grad_norm": 7.881892204284668, "learning_rate": 1.692657077971234e-05, "loss": 2.7879, "step": 7826 }, { "epoch": 0.1, "grad_norm": 6.795853137969971, "learning_rate": 1.692873364334379e-05, "loss": 2.6104, "step": 7827 }, { "epoch": 0.1, "grad_norm": 6.276658058166504, "learning_rate": 1.6930896506975236e-05, "loss": 2.8715, "step": 7828 }, { "epoch": 0.1, "grad_norm": 6.791856288909912, "learning_rate": 1.6933059370606683e-05, "loss": 2.5827, "step": 7829 }, { "epoch": 0.1, "grad_norm": 6.519853115081787, "learning_rate": 1.6935222234238132e-05, "loss": 3.1957, "step": 7830 }, { "epoch": 0.1, "grad_norm": 5.896716594696045, "learning_rate": 1.6937385097869582e-05, "loss": 2.2118, "step": 7831 }, { "epoch": 0.1, "grad_norm": 7.341492176055908, "learning_rate": 1.693954796150103e-05, "loss": 3.1394, "step": 7832 }, { "epoch": 0.1, "grad_norm": 6.724745750427246, "learning_rate": 1.6941710825132475e-05, "loss": 2.6956, "step": 7833 }, { "epoch": 0.1, "grad_norm": 6.330222129821777, "learning_rate": 1.6943873688763925e-05, "loss": 1.9049, "step": 7834 }, { "epoch": 0.1, "grad_norm": 5.741779804229736, "learning_rate": 1.6946036552395374e-05, "loss": 1.8465, "step": 7835 }, { "epoch": 0.1, "grad_norm": 6.914285182952881, "learning_rate": 1.694819941602682e-05, "loss": 2.8956, "step": 7836 }, { "epoch": 0.1, "grad_norm": 6.2061896324157715, "learning_rate": 1.6950362279658267e-05, "loss": 2.5641, "step": 7837 }, { "epoch": 0.1, "grad_norm": 7.113834857940674, "learning_rate": 1.6952525143289717e-05, "loss": 2.8866, "step": 7838 }, { "epoch": 0.1, "grad_norm": 6.434120178222656, "learning_rate": 1.6954688006921167e-05, "loss": 2.6412, "step": 7839 }, { "epoch": 0.1, "grad_norm": 6.783775329589844, "learning_rate": 1.6956850870552613e-05, "loss": 2.8974, "step": 7840 }, { "epoch": 0.1, "grad_norm": 8.442959785461426, "learning_rate": 1.6959013734184063e-05, "loss": 3.5262, "step": 7841 }, { "epoch": 0.1, "grad_norm": 6.975114822387695, "learning_rate": 1.696117659781551e-05, "loss": 3.5608, "step": 7842 }, { "epoch": 0.1, "grad_norm": 7.282547950744629, "learning_rate": 1.696333946144696e-05, "loss": 2.3859, "step": 7843 }, { "epoch": 0.1, "grad_norm": 6.653679370880127, "learning_rate": 1.6965502325078405e-05, "loss": 2.459, "step": 7844 }, { "epoch": 0.1, "grad_norm": 7.232574939727783, "learning_rate": 1.6967665188709855e-05, "loss": 2.9335, "step": 7845 }, { "epoch": 0.1, "grad_norm": 6.01820707321167, "learning_rate": 1.69698280523413e-05, "loss": 2.1894, "step": 7846 }, { "epoch": 0.1, "grad_norm": 6.397394180297852, "learning_rate": 1.6971990915972748e-05, "loss": 2.225, "step": 7847 }, { "epoch": 0.1, "grad_norm": 6.9065752029418945, "learning_rate": 1.6974153779604197e-05, "loss": 2.8054, "step": 7848 }, { "epoch": 0.1, "grad_norm": 6.582679271697998, "learning_rate": 1.6976316643235647e-05, "loss": 2.7337, "step": 7849 }, { "epoch": 0.1, "grad_norm": 6.15623140335083, "learning_rate": 1.6978479506867093e-05, "loss": 2.1036, "step": 7850 }, { "epoch": 0.1, "grad_norm": 6.481020450592041, "learning_rate": 1.698064237049854e-05, "loss": 3.1378, "step": 7851 }, { "epoch": 0.1, "grad_norm": 6.346936225891113, "learning_rate": 1.698280523412999e-05, "loss": 2.3092, "step": 7852 }, { "epoch": 0.1, "grad_norm": 6.348178863525391, "learning_rate": 1.698496809776144e-05, "loss": 2.3823, "step": 7853 }, { "epoch": 0.1, "grad_norm": 6.740133762359619, "learning_rate": 1.6987130961392886e-05, "loss": 2.9153, "step": 7854 }, { "epoch": 0.1, "grad_norm": 6.866818428039551, "learning_rate": 1.6989293825024332e-05, "loss": 2.6256, "step": 7855 }, { "epoch": 0.1, "grad_norm": 6.83066987991333, "learning_rate": 1.699145668865578e-05, "loss": 2.5176, "step": 7856 }, { "epoch": 0.1, "grad_norm": 7.9312262535095215, "learning_rate": 1.699361955228723e-05, "loss": 2.4673, "step": 7857 }, { "epoch": 0.1, "grad_norm": 6.651773929595947, "learning_rate": 1.6995782415918678e-05, "loss": 3.0578, "step": 7858 }, { "epoch": 0.1, "grad_norm": 7.652642250061035, "learning_rate": 1.6997945279550124e-05, "loss": 2.6247, "step": 7859 }, { "epoch": 0.1, "grad_norm": 5.48961877822876, "learning_rate": 1.7000108143181574e-05, "loss": 2.2305, "step": 7860 }, { "epoch": 0.1, "grad_norm": 6.197190761566162, "learning_rate": 1.7002271006813024e-05, "loss": 2.4185, "step": 7861 }, { "epoch": 0.1, "grad_norm": 7.064995765686035, "learning_rate": 1.700443387044447e-05, "loss": 2.67, "step": 7862 }, { "epoch": 0.1, "grad_norm": 7.6574387550354, "learning_rate": 1.7006596734075916e-05, "loss": 2.4784, "step": 7863 }, { "epoch": 0.1, "grad_norm": 5.440279006958008, "learning_rate": 1.7008759597707366e-05, "loss": 1.9464, "step": 7864 }, { "epoch": 0.1, "grad_norm": 5.470777988433838, "learning_rate": 1.7010922461338816e-05, "loss": 2.3427, "step": 7865 }, { "epoch": 0.1, "grad_norm": 8.367748260498047, "learning_rate": 1.7013085324970262e-05, "loss": 3.0249, "step": 7866 }, { "epoch": 0.1, "grad_norm": 6.312922954559326, "learning_rate": 1.701524818860171e-05, "loss": 2.6036, "step": 7867 }, { "epoch": 0.1, "grad_norm": 6.873441696166992, "learning_rate": 1.7017411052233158e-05, "loss": 2.6477, "step": 7868 }, { "epoch": 0.1, "grad_norm": 5.566758155822754, "learning_rate": 1.7019573915864608e-05, "loss": 2.6651, "step": 7869 }, { "epoch": 0.1, "grad_norm": 6.580766201019287, "learning_rate": 1.7021736779496054e-05, "loss": 2.503, "step": 7870 }, { "epoch": 0.1, "grad_norm": 5.507435321807861, "learning_rate": 1.70238996431275e-05, "loss": 2.1183, "step": 7871 }, { "epoch": 0.1, "grad_norm": 6.155289173126221, "learning_rate": 1.702606250675895e-05, "loss": 2.4435, "step": 7872 }, { "epoch": 0.1, "grad_norm": 7.175492763519287, "learning_rate": 1.70282253703904e-05, "loss": 2.3982, "step": 7873 }, { "epoch": 0.1, "grad_norm": 6.408743381500244, "learning_rate": 1.7030388234021847e-05, "loss": 2.3251, "step": 7874 }, { "epoch": 0.1, "grad_norm": 6.523871898651123, "learning_rate": 1.7032551097653293e-05, "loss": 2.7259, "step": 7875 }, { "epoch": 0.1, "grad_norm": 6.13540506362915, "learning_rate": 1.7034713961284743e-05, "loss": 2.1911, "step": 7876 }, { "epoch": 0.1, "grad_norm": 7.508023262023926, "learning_rate": 1.7036876824916192e-05, "loss": 2.8005, "step": 7877 }, { "epoch": 0.1, "grad_norm": 6.273929595947266, "learning_rate": 1.703903968854764e-05, "loss": 2.3968, "step": 7878 }, { "epoch": 0.1, "grad_norm": 6.720609188079834, "learning_rate": 1.7041202552179085e-05, "loss": 2.536, "step": 7879 }, { "epoch": 0.1, "grad_norm": 6.536533832550049, "learning_rate": 1.7043365415810535e-05, "loss": 2.8457, "step": 7880 }, { "epoch": 0.1, "grad_norm": 6.669569969177246, "learning_rate": 1.7045528279441985e-05, "loss": 2.8746, "step": 7881 }, { "epoch": 0.1, "grad_norm": 6.57803201675415, "learning_rate": 1.704769114307343e-05, "loss": 2.4326, "step": 7882 }, { "epoch": 0.1, "grad_norm": 6.822221279144287, "learning_rate": 1.7049854006704877e-05, "loss": 2.5293, "step": 7883 }, { "epoch": 0.1, "grad_norm": 6.853877544403076, "learning_rate": 1.7052016870336327e-05, "loss": 3.2448, "step": 7884 }, { "epoch": 0.1, "grad_norm": 6.835905075073242, "learning_rate": 1.7054179733967773e-05, "loss": 2.7484, "step": 7885 }, { "epoch": 0.1, "grad_norm": 6.715580940246582, "learning_rate": 1.7056342597599223e-05, "loss": 2.6944, "step": 7886 }, { "epoch": 0.1, "grad_norm": 8.574151039123535, "learning_rate": 1.705850546123067e-05, "loss": 2.6656, "step": 7887 }, { "epoch": 0.1, "grad_norm": 6.402035713195801, "learning_rate": 1.706066832486212e-05, "loss": 2.0323, "step": 7888 }, { "epoch": 0.1, "grad_norm": 6.272106170654297, "learning_rate": 1.7062831188493566e-05, "loss": 2.5859, "step": 7889 }, { "epoch": 0.1, "grad_norm": 6.690384387969971, "learning_rate": 1.7064994052125015e-05, "loss": 2.9598, "step": 7890 }, { "epoch": 0.1, "grad_norm": 6.629519462585449, "learning_rate": 1.7067156915756462e-05, "loss": 2.6184, "step": 7891 }, { "epoch": 0.1, "grad_norm": 6.504821300506592, "learning_rate": 1.706931977938791e-05, "loss": 2.6032, "step": 7892 }, { "epoch": 0.1, "grad_norm": 6.2595953941345215, "learning_rate": 1.7071482643019358e-05, "loss": 2.5636, "step": 7893 }, { "epoch": 0.1, "grad_norm": 7.271487236022949, "learning_rate": 1.7073645506650808e-05, "loss": 2.5316, "step": 7894 }, { "epoch": 0.1, "grad_norm": 6.991202354431152, "learning_rate": 1.7075808370282254e-05, "loss": 2.5523, "step": 7895 }, { "epoch": 0.1, "grad_norm": 6.74257755279541, "learning_rate": 1.7077971233913704e-05, "loss": 2.5303, "step": 7896 }, { "epoch": 0.1, "grad_norm": 6.114313125610352, "learning_rate": 1.708013409754515e-05, "loss": 2.9417, "step": 7897 }, { "epoch": 0.1, "grad_norm": 6.265094757080078, "learning_rate": 1.70822969611766e-05, "loss": 2.1678, "step": 7898 }, { "epoch": 0.1, "grad_norm": 6.637321472167969, "learning_rate": 1.7084459824808046e-05, "loss": 2.9094, "step": 7899 }, { "epoch": 0.1, "grad_norm": 7.43419075012207, "learning_rate": 1.7086622688439496e-05, "loss": 3.1249, "step": 7900 }, { "epoch": 0.1, "grad_norm": 5.732173919677734, "learning_rate": 1.7088785552070942e-05, "loss": 2.1389, "step": 7901 }, { "epoch": 0.1, "grad_norm": 7.246559143066406, "learning_rate": 1.7090948415702392e-05, "loss": 2.5242, "step": 7902 }, { "epoch": 0.1, "grad_norm": 5.921160697937012, "learning_rate": 1.709311127933384e-05, "loss": 2.1323, "step": 7903 }, { "epoch": 0.1, "grad_norm": 5.700899600982666, "learning_rate": 1.7095274142965288e-05, "loss": 2.399, "step": 7904 }, { "epoch": 0.1, "grad_norm": 5.891419887542725, "learning_rate": 1.7097437006596734e-05, "loss": 2.2028, "step": 7905 }, { "epoch": 0.1, "grad_norm": 5.952733993530273, "learning_rate": 1.7099599870228184e-05, "loss": 1.9456, "step": 7906 }, { "epoch": 0.1, "grad_norm": 6.635630130767822, "learning_rate": 1.7101762733859634e-05, "loss": 2.5504, "step": 7907 }, { "epoch": 0.1, "grad_norm": 6.210168361663818, "learning_rate": 1.710392559749108e-05, "loss": 2.4599, "step": 7908 }, { "epoch": 0.1, "grad_norm": 6.917220592498779, "learning_rate": 1.7106088461122527e-05, "loss": 2.5407, "step": 7909 }, { "epoch": 0.1, "grad_norm": 6.771565914154053, "learning_rate": 1.7108251324753976e-05, "loss": 2.6122, "step": 7910 }, { "epoch": 0.1, "grad_norm": 6.809847354888916, "learning_rate": 1.7110414188385426e-05, "loss": 2.5074, "step": 7911 }, { "epoch": 0.1, "grad_norm": 7.965205192565918, "learning_rate": 1.7112577052016873e-05, "loss": 3.1513, "step": 7912 }, { "epoch": 0.1, "grad_norm": 7.165337085723877, "learning_rate": 1.711473991564832e-05, "loss": 2.1607, "step": 7913 }, { "epoch": 0.1, "grad_norm": 5.960692405700684, "learning_rate": 1.711690277927977e-05, "loss": 2.0024, "step": 7914 }, { "epoch": 0.1, "grad_norm": 6.467067718505859, "learning_rate": 1.711906564291122e-05, "loss": 3.2853, "step": 7915 }, { "epoch": 0.1, "grad_norm": 6.6494598388671875, "learning_rate": 1.7121228506542665e-05, "loss": 3.5685, "step": 7916 }, { "epoch": 0.1, "grad_norm": 7.355510234832764, "learning_rate": 1.712339137017411e-05, "loss": 2.8258, "step": 7917 }, { "epoch": 0.1, "grad_norm": 7.020669460296631, "learning_rate": 1.712555423380556e-05, "loss": 2.5895, "step": 7918 }, { "epoch": 0.1, "grad_norm": 6.882543087005615, "learning_rate": 1.7127717097437007e-05, "loss": 2.9177, "step": 7919 }, { "epoch": 0.1, "grad_norm": 7.004019260406494, "learning_rate": 1.7129879961068457e-05, "loss": 2.8908, "step": 7920 }, { "epoch": 0.1, "grad_norm": 6.507181644439697, "learning_rate": 1.7132042824699903e-05, "loss": 2.6331, "step": 7921 }, { "epoch": 0.1, "grad_norm": 5.708009243011475, "learning_rate": 1.713420568833135e-05, "loss": 2.6633, "step": 7922 }, { "epoch": 0.1, "grad_norm": 6.414422988891602, "learning_rate": 1.71363685519628e-05, "loss": 2.6147, "step": 7923 }, { "epoch": 0.1, "grad_norm": 6.469635486602783, "learning_rate": 1.713853141559425e-05, "loss": 2.7763, "step": 7924 }, { "epoch": 0.1, "grad_norm": 6.336550235748291, "learning_rate": 1.7140694279225695e-05, "loss": 2.4387, "step": 7925 }, { "epoch": 0.1, "grad_norm": 7.218503952026367, "learning_rate": 1.7142857142857142e-05, "loss": 2.5883, "step": 7926 }, { "epoch": 0.1, "grad_norm": 6.116767406463623, "learning_rate": 1.714502000648859e-05, "loss": 2.5866, "step": 7927 }, { "epoch": 0.1, "grad_norm": 7.833624362945557, "learning_rate": 1.714718287012004e-05, "loss": 2.9355, "step": 7928 }, { "epoch": 0.1, "grad_norm": 7.098257541656494, "learning_rate": 1.7149345733751488e-05, "loss": 2.6241, "step": 7929 }, { "epoch": 0.1, "grad_norm": 6.366880416870117, "learning_rate": 1.7151508597382934e-05, "loss": 2.6396, "step": 7930 }, { "epoch": 0.1, "grad_norm": 6.246614456176758, "learning_rate": 1.7153671461014384e-05, "loss": 2.652, "step": 7931 }, { "epoch": 0.1, "grad_norm": 6.595032691955566, "learning_rate": 1.7155834324645834e-05, "loss": 2.4779, "step": 7932 }, { "epoch": 0.1, "grad_norm": 7.426185131072998, "learning_rate": 1.715799718827728e-05, "loss": 2.7592, "step": 7933 }, { "epoch": 0.1, "grad_norm": 5.612440586090088, "learning_rate": 1.7160160051908726e-05, "loss": 2.0136, "step": 7934 }, { "epoch": 0.1, "grad_norm": 6.863811016082764, "learning_rate": 1.7162322915540176e-05, "loss": 2.9925, "step": 7935 }, { "epoch": 0.1, "grad_norm": 6.391209125518799, "learning_rate": 1.7164485779171626e-05, "loss": 2.2822, "step": 7936 }, { "epoch": 0.1, "grad_norm": 6.710513114929199, "learning_rate": 1.7166648642803072e-05, "loss": 2.3073, "step": 7937 }, { "epoch": 0.1, "grad_norm": 7.262368679046631, "learning_rate": 1.7168811506434522e-05, "loss": 2.6801, "step": 7938 }, { "epoch": 0.1, "grad_norm": 7.345788955688477, "learning_rate": 1.7170974370065968e-05, "loss": 2.5458, "step": 7939 }, { "epoch": 0.1, "grad_norm": 6.7856268882751465, "learning_rate": 1.7173137233697418e-05, "loss": 2.239, "step": 7940 }, { "epoch": 0.1, "grad_norm": 6.641408443450928, "learning_rate": 1.7175300097328864e-05, "loss": 2.2073, "step": 7941 }, { "epoch": 0.1, "grad_norm": 6.653030872344971, "learning_rate": 1.7177462960960314e-05, "loss": 2.3747, "step": 7942 }, { "epoch": 0.1, "grad_norm": 7.87674617767334, "learning_rate": 1.717962582459176e-05, "loss": 3.0317, "step": 7943 }, { "epoch": 0.1, "grad_norm": 7.059998035430908, "learning_rate": 1.718178868822321e-05, "loss": 2.5899, "step": 7944 }, { "epoch": 0.1, "grad_norm": 6.497280597686768, "learning_rate": 1.7183951551854656e-05, "loss": 2.3468, "step": 7945 }, { "epoch": 0.1, "grad_norm": 6.841734886169434, "learning_rate": 1.7186114415486106e-05, "loss": 2.4928, "step": 7946 }, { "epoch": 0.1, "grad_norm": 6.262010097503662, "learning_rate": 1.7188277279117553e-05, "loss": 2.0544, "step": 7947 }, { "epoch": 0.1, "grad_norm": 6.240572929382324, "learning_rate": 1.7190440142749002e-05, "loss": 2.2472, "step": 7948 }, { "epoch": 0.1, "grad_norm": 6.692298889160156, "learning_rate": 1.719260300638045e-05, "loss": 2.5549, "step": 7949 }, { "epoch": 0.1, "grad_norm": 6.030318260192871, "learning_rate": 1.71947658700119e-05, "loss": 2.1652, "step": 7950 }, { "epoch": 0.1, "grad_norm": 7.560760498046875, "learning_rate": 1.7196928733643345e-05, "loss": 3.2916, "step": 7951 }, { "epoch": 0.1, "grad_norm": 7.879971504211426, "learning_rate": 1.7199091597274795e-05, "loss": 2.5911, "step": 7952 }, { "epoch": 0.1, "grad_norm": 5.424527168273926, "learning_rate": 1.720125446090624e-05, "loss": 2.1052, "step": 7953 }, { "epoch": 0.1, "grad_norm": 7.065324783325195, "learning_rate": 1.720341732453769e-05, "loss": 3.0887, "step": 7954 }, { "epoch": 0.1, "grad_norm": 6.164995193481445, "learning_rate": 1.7205580188169137e-05, "loss": 2.0667, "step": 7955 }, { "epoch": 0.1, "grad_norm": 6.493024826049805, "learning_rate": 1.7207743051800583e-05, "loss": 2.2845, "step": 7956 }, { "epoch": 0.1, "grad_norm": 5.935035705566406, "learning_rate": 1.7209905915432033e-05, "loss": 1.9803, "step": 7957 }, { "epoch": 0.1, "grad_norm": 6.394929885864258, "learning_rate": 1.7212068779063483e-05, "loss": 2.1463, "step": 7958 }, { "epoch": 0.1, "grad_norm": 6.533236980438232, "learning_rate": 1.721423164269493e-05, "loss": 2.6942, "step": 7959 }, { "epoch": 0.1, "grad_norm": 7.395760536193848, "learning_rate": 1.7216394506326376e-05, "loss": 2.6257, "step": 7960 }, { "epoch": 0.1, "grad_norm": 6.724713325500488, "learning_rate": 1.7218557369957825e-05, "loss": 2.3982, "step": 7961 }, { "epoch": 0.1, "grad_norm": 6.762115001678467, "learning_rate": 1.7220720233589275e-05, "loss": 3.096, "step": 7962 }, { "epoch": 0.1, "grad_norm": 6.666813373565674, "learning_rate": 1.722288309722072e-05, "loss": 2.7039, "step": 7963 }, { "epoch": 0.1, "grad_norm": 8.492928504943848, "learning_rate": 1.7225045960852168e-05, "loss": 2.6151, "step": 7964 }, { "epoch": 0.1, "grad_norm": 6.481022834777832, "learning_rate": 1.7227208824483618e-05, "loss": 2.0752, "step": 7965 }, { "epoch": 0.1, "grad_norm": 5.891686916351318, "learning_rate": 1.7229371688115067e-05, "loss": 2.141, "step": 7966 }, { "epoch": 0.1, "grad_norm": 6.223675727844238, "learning_rate": 1.7231534551746514e-05, "loss": 2.0281, "step": 7967 }, { "epoch": 0.1, "grad_norm": 7.334432601928711, "learning_rate": 1.723369741537796e-05, "loss": 2.5065, "step": 7968 }, { "epoch": 0.1, "grad_norm": 6.675146102905273, "learning_rate": 1.723586027900941e-05, "loss": 2.5901, "step": 7969 }, { "epoch": 0.1, "grad_norm": 6.495640277862549, "learning_rate": 1.723802314264086e-05, "loss": 2.468, "step": 7970 }, { "epoch": 0.1, "grad_norm": 7.520829677581787, "learning_rate": 1.7240186006272306e-05, "loss": 2.2789, "step": 7971 }, { "epoch": 0.1, "grad_norm": 6.581655979156494, "learning_rate": 1.7242348869903752e-05, "loss": 2.2245, "step": 7972 }, { "epoch": 0.1, "grad_norm": 8.453178405761719, "learning_rate": 1.7244511733535202e-05, "loss": 3.2842, "step": 7973 }, { "epoch": 0.1, "grad_norm": 6.690640449523926, "learning_rate": 1.724667459716665e-05, "loss": 3.1612, "step": 7974 }, { "epoch": 0.1, "grad_norm": 6.899205684661865, "learning_rate": 1.7248837460798098e-05, "loss": 3.0632, "step": 7975 }, { "epoch": 0.1, "grad_norm": 6.063957214355469, "learning_rate": 1.7251000324429544e-05, "loss": 2.4155, "step": 7976 }, { "epoch": 0.1, "grad_norm": 6.9272074699401855, "learning_rate": 1.7253163188060994e-05, "loss": 2.5574, "step": 7977 }, { "epoch": 0.1, "grad_norm": 6.007561206817627, "learning_rate": 1.7255326051692444e-05, "loss": 2.33, "step": 7978 }, { "epoch": 0.1, "grad_norm": 7.127908706665039, "learning_rate": 1.725748891532389e-05, "loss": 2.8094, "step": 7979 }, { "epoch": 0.1, "grad_norm": 6.808645248413086, "learning_rate": 1.7259651778955337e-05, "loss": 2.8164, "step": 7980 }, { "epoch": 0.1, "grad_norm": 6.701059341430664, "learning_rate": 1.7261814642586786e-05, "loss": 2.7221, "step": 7981 }, { "epoch": 0.1, "grad_norm": 6.748501300811768, "learning_rate": 1.7263977506218236e-05, "loss": 2.3562, "step": 7982 }, { "epoch": 0.1, "grad_norm": 6.107625484466553, "learning_rate": 1.7266140369849682e-05, "loss": 2.652, "step": 7983 }, { "epoch": 0.1, "grad_norm": 6.6552629470825195, "learning_rate": 1.726830323348113e-05, "loss": 2.9029, "step": 7984 }, { "epoch": 0.1, "grad_norm": 6.491065979003906, "learning_rate": 1.727046609711258e-05, "loss": 2.1654, "step": 7985 }, { "epoch": 0.1, "grad_norm": 6.500729084014893, "learning_rate": 1.7272628960744028e-05, "loss": 2.8124, "step": 7986 }, { "epoch": 0.1, "grad_norm": 6.866399765014648, "learning_rate": 1.7274791824375475e-05, "loss": 2.8193, "step": 7987 }, { "epoch": 0.1, "grad_norm": 6.306986331939697, "learning_rate": 1.727695468800692e-05, "loss": 2.473, "step": 7988 }, { "epoch": 0.1, "grad_norm": 7.4077043533325195, "learning_rate": 1.727911755163837e-05, "loss": 2.9519, "step": 7989 }, { "epoch": 0.1, "grad_norm": 6.186593055725098, "learning_rate": 1.728128041526982e-05, "loss": 2.678, "step": 7990 }, { "epoch": 0.1, "grad_norm": 6.557027816772461, "learning_rate": 1.7283443278901267e-05, "loss": 2.5856, "step": 7991 }, { "epoch": 0.1, "grad_norm": 5.830067157745361, "learning_rate": 1.7285606142532713e-05, "loss": 2.3538, "step": 7992 }, { "epoch": 0.1, "grad_norm": 6.5639424324035645, "learning_rate": 1.7287769006164163e-05, "loss": 2.811, "step": 7993 }, { "epoch": 0.1, "grad_norm": 6.742532730102539, "learning_rate": 1.728993186979561e-05, "loss": 1.8184, "step": 7994 }, { "epoch": 0.1, "grad_norm": 6.276541709899902, "learning_rate": 1.729209473342706e-05, "loss": 2.4791, "step": 7995 }, { "epoch": 0.1, "grad_norm": 6.496082782745361, "learning_rate": 1.7294257597058505e-05, "loss": 2.5609, "step": 7996 }, { "epoch": 0.1, "grad_norm": 6.461541175842285, "learning_rate": 1.7296420460689955e-05, "loss": 2.7034, "step": 7997 }, { "epoch": 0.1, "grad_norm": 6.63777494430542, "learning_rate": 1.72985833243214e-05, "loss": 2.359, "step": 7998 }, { "epoch": 0.1, "grad_norm": 7.169704914093018, "learning_rate": 1.730074618795285e-05, "loss": 2.8263, "step": 7999 }, { "epoch": 0.1, "grad_norm": 8.571019172668457, "learning_rate": 1.73029090515843e-05, "loss": 2.4954, "step": 8000 }, { "epoch": 0.1, "grad_norm": 6.390571594238281, "learning_rate": 1.7305071915215747e-05, "loss": 2.3915, "step": 8001 }, { "epoch": 0.1, "grad_norm": 6.184631824493408, "learning_rate": 1.7307234778847194e-05, "loss": 2.1452, "step": 8002 }, { "epoch": 0.1, "grad_norm": 6.411122798919678, "learning_rate": 1.7309397642478643e-05, "loss": 2.3149, "step": 8003 }, { "epoch": 0.1, "grad_norm": 6.775904178619385, "learning_rate": 1.7311560506110093e-05, "loss": 3.0729, "step": 8004 }, { "epoch": 0.1, "grad_norm": 6.481380939483643, "learning_rate": 1.731372336974154e-05, "loss": 2.7074, "step": 8005 }, { "epoch": 0.1, "grad_norm": 6.833120822906494, "learning_rate": 1.7315886233372986e-05, "loss": 2.5601, "step": 8006 }, { "epoch": 0.1, "grad_norm": 7.296422481536865, "learning_rate": 1.7318049097004436e-05, "loss": 2.8638, "step": 8007 }, { "epoch": 0.1, "grad_norm": 6.056493282318115, "learning_rate": 1.7320211960635885e-05, "loss": 3.2048, "step": 8008 }, { "epoch": 0.1, "grad_norm": 7.784359455108643, "learning_rate": 1.7322374824267332e-05, "loss": 2.9094, "step": 8009 }, { "epoch": 0.1, "grad_norm": 6.631547451019287, "learning_rate": 1.7324537687898778e-05, "loss": 2.6483, "step": 8010 }, { "epoch": 0.1, "grad_norm": 6.285494804382324, "learning_rate": 1.7326700551530228e-05, "loss": 2.6932, "step": 8011 }, { "epoch": 0.1, "grad_norm": 6.228938102722168, "learning_rate": 1.7328863415161678e-05, "loss": 2.4295, "step": 8012 }, { "epoch": 0.1, "grad_norm": 6.13726806640625, "learning_rate": 1.7331026278793124e-05, "loss": 2.4977, "step": 8013 }, { "epoch": 0.1, "grad_norm": 6.774461269378662, "learning_rate": 1.733318914242457e-05, "loss": 2.668, "step": 8014 }, { "epoch": 0.1, "grad_norm": 6.9193902015686035, "learning_rate": 1.733535200605602e-05, "loss": 2.4823, "step": 8015 }, { "epoch": 0.1, "grad_norm": 6.468849182128906, "learning_rate": 1.733751486968747e-05, "loss": 2.684, "step": 8016 }, { "epoch": 0.1, "grad_norm": 6.512604236602783, "learning_rate": 1.7339677733318916e-05, "loss": 2.8606, "step": 8017 }, { "epoch": 0.1, "grad_norm": 6.39073371887207, "learning_rate": 1.7341840596950363e-05, "loss": 2.8812, "step": 8018 }, { "epoch": 0.1, "grad_norm": 7.896798133850098, "learning_rate": 1.7344003460581812e-05, "loss": 2.9391, "step": 8019 }, { "epoch": 0.1, "grad_norm": 7.029452323913574, "learning_rate": 1.7346166324213262e-05, "loss": 2.6126, "step": 8020 }, { "epoch": 0.1, "grad_norm": 7.033297538757324, "learning_rate": 1.734832918784471e-05, "loss": 2.8184, "step": 8021 }, { "epoch": 0.1, "grad_norm": 6.995606899261475, "learning_rate": 1.7350492051476155e-05, "loss": 2.4609, "step": 8022 }, { "epoch": 0.1, "grad_norm": 6.218132019042969, "learning_rate": 1.7352654915107604e-05, "loss": 2.9337, "step": 8023 }, { "epoch": 0.1, "grad_norm": 6.31102180480957, "learning_rate": 1.7354817778739054e-05, "loss": 2.8431, "step": 8024 }, { "epoch": 0.1, "grad_norm": 6.29506778717041, "learning_rate": 1.73569806423705e-05, "loss": 2.7641, "step": 8025 }, { "epoch": 0.1, "grad_norm": 5.906515598297119, "learning_rate": 1.7359143506001947e-05, "loss": 2.576, "step": 8026 }, { "epoch": 0.1, "grad_norm": 6.22052001953125, "learning_rate": 1.7361306369633397e-05, "loss": 2.3922, "step": 8027 }, { "epoch": 0.1, "grad_norm": 6.605632305145264, "learning_rate": 1.7363469233264843e-05, "loss": 2.2444, "step": 8028 }, { "epoch": 0.1, "grad_norm": 5.66322660446167, "learning_rate": 1.7365632096896293e-05, "loss": 2.4699, "step": 8029 }, { "epoch": 0.1, "grad_norm": 7.050535202026367, "learning_rate": 1.736779496052774e-05, "loss": 2.5756, "step": 8030 }, { "epoch": 0.1, "grad_norm": 6.607261657714844, "learning_rate": 1.7369957824159185e-05, "loss": 3.0415, "step": 8031 }, { "epoch": 0.1, "grad_norm": 5.324228763580322, "learning_rate": 1.7372120687790635e-05, "loss": 2.0487, "step": 8032 }, { "epoch": 0.1, "grad_norm": 6.064085960388184, "learning_rate": 1.7374283551422085e-05, "loss": 1.8516, "step": 8033 }, { "epoch": 0.1, "grad_norm": 7.365179061889648, "learning_rate": 1.737644641505353e-05, "loss": 2.9059, "step": 8034 }, { "epoch": 0.1, "grad_norm": 5.662432670593262, "learning_rate": 1.737860927868498e-05, "loss": 2.3995, "step": 8035 }, { "epoch": 0.1, "grad_norm": 6.394716262817383, "learning_rate": 1.7380772142316427e-05, "loss": 2.5084, "step": 8036 }, { "epoch": 0.1, "grad_norm": 6.50938081741333, "learning_rate": 1.7382935005947877e-05, "loss": 2.418, "step": 8037 }, { "epoch": 0.1, "grad_norm": 6.383720397949219, "learning_rate": 1.7385097869579324e-05, "loss": 2.3672, "step": 8038 }, { "epoch": 0.1, "grad_norm": 8.67235279083252, "learning_rate": 1.7387260733210773e-05, "loss": 2.7672, "step": 8039 }, { "epoch": 0.1, "grad_norm": 6.330708980560303, "learning_rate": 1.738942359684222e-05, "loss": 2.6039, "step": 8040 }, { "epoch": 0.1, "grad_norm": 6.709590911865234, "learning_rate": 1.739158646047367e-05, "loss": 2.2192, "step": 8041 }, { "epoch": 0.1, "grad_norm": 8.076472282409668, "learning_rate": 1.7393749324105116e-05, "loss": 2.816, "step": 8042 }, { "epoch": 0.1, "grad_norm": 6.110167026519775, "learning_rate": 1.7395912187736565e-05, "loss": 2.2616, "step": 8043 }, { "epoch": 0.1, "grad_norm": 6.926767349243164, "learning_rate": 1.7398075051368012e-05, "loss": 2.5335, "step": 8044 }, { "epoch": 0.1, "grad_norm": 7.344071865081787, "learning_rate": 1.740023791499946e-05, "loss": 3.1563, "step": 8045 }, { "epoch": 0.1, "grad_norm": 7.867467880249023, "learning_rate": 1.7402400778630908e-05, "loss": 2.9017, "step": 8046 }, { "epoch": 0.1, "grad_norm": 6.9199442863464355, "learning_rate": 1.7404563642262358e-05, "loss": 2.6882, "step": 8047 }, { "epoch": 0.1, "grad_norm": 7.3141188621521, "learning_rate": 1.7406726505893804e-05, "loss": 2.1804, "step": 8048 }, { "epoch": 0.1, "grad_norm": 6.360255241394043, "learning_rate": 1.7408889369525254e-05, "loss": 2.3097, "step": 8049 }, { "epoch": 0.1, "grad_norm": 6.612864017486572, "learning_rate": 1.74110522331567e-05, "loss": 2.2569, "step": 8050 }, { "epoch": 0.1, "grad_norm": 6.493436813354492, "learning_rate": 1.741321509678815e-05, "loss": 3.0329, "step": 8051 }, { "epoch": 0.1, "grad_norm": 5.882873058319092, "learning_rate": 1.7415377960419596e-05, "loss": 2.4478, "step": 8052 }, { "epoch": 0.1, "grad_norm": 7.283227920532227, "learning_rate": 1.7417540824051046e-05, "loss": 2.8913, "step": 8053 }, { "epoch": 0.1, "grad_norm": 6.892005443572998, "learning_rate": 1.7419703687682492e-05, "loss": 2.4372, "step": 8054 }, { "epoch": 0.1, "grad_norm": 7.2851338386535645, "learning_rate": 1.7421866551313942e-05, "loss": 2.9316, "step": 8055 }, { "epoch": 0.1, "grad_norm": 7.740244388580322, "learning_rate": 1.742402941494539e-05, "loss": 3.2955, "step": 8056 }, { "epoch": 0.1, "grad_norm": 7.99102258682251, "learning_rate": 1.7426192278576838e-05, "loss": 3.0257, "step": 8057 }, { "epoch": 0.1, "grad_norm": 6.693574905395508, "learning_rate": 1.7428355142208285e-05, "loss": 2.9517, "step": 8058 }, { "epoch": 0.1, "grad_norm": 7.3151021003723145, "learning_rate": 1.7430518005839734e-05, "loss": 3.4388, "step": 8059 }, { "epoch": 0.1, "grad_norm": 6.385921001434326, "learning_rate": 1.743268086947118e-05, "loss": 2.5595, "step": 8060 }, { "epoch": 0.1, "grad_norm": 6.586477756500244, "learning_rate": 1.743484373310263e-05, "loss": 2.2624, "step": 8061 }, { "epoch": 0.1, "grad_norm": 6.478043556213379, "learning_rate": 1.7437006596734077e-05, "loss": 2.5231, "step": 8062 }, { "epoch": 0.1, "grad_norm": 7.153264045715332, "learning_rate": 1.7439169460365526e-05, "loss": 3.01, "step": 8063 }, { "epoch": 0.1, "grad_norm": 7.617661952972412, "learning_rate": 1.7441332323996973e-05, "loss": 2.3521, "step": 8064 }, { "epoch": 0.1, "grad_norm": 5.857311248779297, "learning_rate": 1.744349518762842e-05, "loss": 2.8358, "step": 8065 }, { "epoch": 0.1, "grad_norm": 6.146877765655518, "learning_rate": 1.744565805125987e-05, "loss": 2.3165, "step": 8066 }, { "epoch": 0.1, "grad_norm": 7.036512851715088, "learning_rate": 1.744782091489132e-05, "loss": 2.1577, "step": 8067 }, { "epoch": 0.1, "grad_norm": 6.380383491516113, "learning_rate": 1.7449983778522765e-05, "loss": 2.8094, "step": 8068 }, { "epoch": 0.1, "grad_norm": 7.071660995483398, "learning_rate": 1.745214664215421e-05, "loss": 2.8106, "step": 8069 }, { "epoch": 0.1, "grad_norm": 6.6150736808776855, "learning_rate": 1.745430950578566e-05, "loss": 2.5061, "step": 8070 }, { "epoch": 0.1, "grad_norm": 6.458188056945801, "learning_rate": 1.745647236941711e-05, "loss": 2.5289, "step": 8071 }, { "epoch": 0.1, "grad_norm": 6.64628791809082, "learning_rate": 1.7458635233048557e-05, "loss": 2.6238, "step": 8072 }, { "epoch": 0.1, "grad_norm": 6.5097455978393555, "learning_rate": 1.7460798096680004e-05, "loss": 2.4189, "step": 8073 }, { "epoch": 0.1, "grad_norm": 6.0523762702941895, "learning_rate": 1.7462960960311453e-05, "loss": 1.9133, "step": 8074 }, { "epoch": 0.1, "grad_norm": 5.878019332885742, "learning_rate": 1.7465123823942903e-05, "loss": 2.1483, "step": 8075 }, { "epoch": 0.1, "grad_norm": 5.92595911026001, "learning_rate": 1.746728668757435e-05, "loss": 2.7978, "step": 8076 }, { "epoch": 0.1, "grad_norm": 5.86104679107666, "learning_rate": 1.7469449551205796e-05, "loss": 2.3684, "step": 8077 }, { "epoch": 0.1, "grad_norm": 7.59367036819458, "learning_rate": 1.7471612414837246e-05, "loss": 2.7792, "step": 8078 }, { "epoch": 0.1, "grad_norm": 5.7373785972595215, "learning_rate": 1.7473775278468695e-05, "loss": 2.3637, "step": 8079 }, { "epoch": 0.1, "grad_norm": 6.0519561767578125, "learning_rate": 1.747593814210014e-05, "loss": 2.5274, "step": 8080 }, { "epoch": 0.1, "grad_norm": 6.1778244972229, "learning_rate": 1.7478101005731588e-05, "loss": 2.7118, "step": 8081 }, { "epoch": 0.1, "grad_norm": 6.31422233581543, "learning_rate": 1.7480263869363038e-05, "loss": 2.6688, "step": 8082 }, { "epoch": 0.1, "grad_norm": 6.437875270843506, "learning_rate": 1.7482426732994488e-05, "loss": 3.0406, "step": 8083 }, { "epoch": 0.1, "grad_norm": 7.26101016998291, "learning_rate": 1.7484589596625934e-05, "loss": 2.906, "step": 8084 }, { "epoch": 0.1, "grad_norm": 7.23479700088501, "learning_rate": 1.748675246025738e-05, "loss": 2.8849, "step": 8085 }, { "epoch": 0.1, "grad_norm": 6.322641372680664, "learning_rate": 1.748891532388883e-05, "loss": 2.4895, "step": 8086 }, { "epoch": 0.1, "grad_norm": 6.191206455230713, "learning_rate": 1.749107818752028e-05, "loss": 2.2176, "step": 8087 }, { "epoch": 0.1, "grad_norm": 7.4788737297058105, "learning_rate": 1.7493241051151726e-05, "loss": 3.2942, "step": 8088 }, { "epoch": 0.1, "grad_norm": 6.316243648529053, "learning_rate": 1.7495403914783172e-05, "loss": 1.8932, "step": 8089 }, { "epoch": 0.1, "grad_norm": 6.304199695587158, "learning_rate": 1.7497566778414622e-05, "loss": 2.5241, "step": 8090 }, { "epoch": 0.11, "grad_norm": 6.361100196838379, "learning_rate": 1.7499729642046072e-05, "loss": 2.3443, "step": 8091 }, { "epoch": 0.11, "grad_norm": 6.412224769592285, "learning_rate": 1.7501892505677518e-05, "loss": 2.7005, "step": 8092 }, { "epoch": 0.11, "grad_norm": 6.984325885772705, "learning_rate": 1.7504055369308965e-05, "loss": 2.5218, "step": 8093 }, { "epoch": 0.11, "grad_norm": 6.613852024078369, "learning_rate": 1.7506218232940414e-05, "loss": 2.2844, "step": 8094 }, { "epoch": 0.11, "grad_norm": 6.966646194458008, "learning_rate": 1.7508381096571864e-05, "loss": 2.4364, "step": 8095 }, { "epoch": 0.11, "grad_norm": 7.8215765953063965, "learning_rate": 1.751054396020331e-05, "loss": 2.7903, "step": 8096 }, { "epoch": 0.11, "grad_norm": 7.663681507110596, "learning_rate": 1.751270682383476e-05, "loss": 2.8276, "step": 8097 }, { "epoch": 0.11, "grad_norm": 6.605759620666504, "learning_rate": 1.7514869687466207e-05, "loss": 2.2909, "step": 8098 }, { "epoch": 0.11, "grad_norm": 6.082563877105713, "learning_rate": 1.7517032551097656e-05, "loss": 2.055, "step": 8099 }, { "epoch": 0.11, "grad_norm": 5.765298366546631, "learning_rate": 1.7519195414729103e-05, "loss": 2.2897, "step": 8100 }, { "epoch": 0.11, "grad_norm": 6.128190994262695, "learning_rate": 1.7521358278360552e-05, "loss": 2.3777, "step": 8101 }, { "epoch": 0.11, "grad_norm": 6.264378070831299, "learning_rate": 1.7523521141992e-05, "loss": 2.3625, "step": 8102 }, { "epoch": 0.11, "grad_norm": 6.1659016609191895, "learning_rate": 1.7525684005623445e-05, "loss": 2.3301, "step": 8103 }, { "epoch": 0.11, "grad_norm": 5.492692470550537, "learning_rate": 1.7527846869254895e-05, "loss": 1.9758, "step": 8104 }, { "epoch": 0.11, "grad_norm": 6.050100803375244, "learning_rate": 1.7530009732886345e-05, "loss": 2.5318, "step": 8105 }, { "epoch": 0.11, "grad_norm": 6.539767742156982, "learning_rate": 1.753217259651779e-05, "loss": 2.233, "step": 8106 }, { "epoch": 0.11, "grad_norm": 5.948841571807861, "learning_rate": 1.7534335460149237e-05, "loss": 2.349, "step": 8107 }, { "epoch": 0.11, "grad_norm": 8.072933197021484, "learning_rate": 1.7536498323780687e-05, "loss": 2.7641, "step": 8108 }, { "epoch": 0.11, "grad_norm": 6.766626834869385, "learning_rate": 1.7538661187412137e-05, "loss": 2.5125, "step": 8109 }, { "epoch": 0.11, "grad_norm": 6.214371204376221, "learning_rate": 1.7540824051043583e-05, "loss": 2.7774, "step": 8110 }, { "epoch": 0.11, "grad_norm": 5.972259044647217, "learning_rate": 1.754298691467503e-05, "loss": 2.4906, "step": 8111 }, { "epoch": 0.11, "grad_norm": 6.657710075378418, "learning_rate": 1.754514977830648e-05, "loss": 2.8215, "step": 8112 }, { "epoch": 0.11, "grad_norm": 6.470979690551758, "learning_rate": 1.754731264193793e-05, "loss": 2.1934, "step": 8113 }, { "epoch": 0.11, "grad_norm": 6.018238544464111, "learning_rate": 1.7549475505569375e-05, "loss": 2.1447, "step": 8114 }, { "epoch": 0.11, "grad_norm": 6.650839805603027, "learning_rate": 1.7551638369200822e-05, "loss": 2.9061, "step": 8115 }, { "epoch": 0.11, "grad_norm": 6.701148986816406, "learning_rate": 1.755380123283227e-05, "loss": 2.737, "step": 8116 }, { "epoch": 0.11, "grad_norm": 7.168111801147461, "learning_rate": 1.755596409646372e-05, "loss": 2.9273, "step": 8117 }, { "epoch": 0.11, "grad_norm": 6.223862171173096, "learning_rate": 1.7558126960095168e-05, "loss": 2.2603, "step": 8118 }, { "epoch": 0.11, "grad_norm": 6.428025722503662, "learning_rate": 1.7560289823726614e-05, "loss": 2.5122, "step": 8119 }, { "epoch": 0.11, "grad_norm": 6.755375385284424, "learning_rate": 1.7562452687358064e-05, "loss": 3.1813, "step": 8120 }, { "epoch": 0.11, "grad_norm": 6.192777156829834, "learning_rate": 1.7564615550989513e-05, "loss": 2.2561, "step": 8121 }, { "epoch": 0.11, "grad_norm": 7.18704891204834, "learning_rate": 1.756677841462096e-05, "loss": 2.368, "step": 8122 }, { "epoch": 0.11, "grad_norm": 5.826396942138672, "learning_rate": 1.7568941278252406e-05, "loss": 2.214, "step": 8123 }, { "epoch": 0.11, "grad_norm": 7.1876068115234375, "learning_rate": 1.7571104141883856e-05, "loss": 3.1137, "step": 8124 }, { "epoch": 0.11, "grad_norm": 6.071263313293457, "learning_rate": 1.7573267005515306e-05, "loss": 2.5671, "step": 8125 }, { "epoch": 0.11, "grad_norm": 6.519482135772705, "learning_rate": 1.7575429869146752e-05, "loss": 2.4762, "step": 8126 }, { "epoch": 0.11, "grad_norm": 6.386119365692139, "learning_rate": 1.75775927327782e-05, "loss": 2.5667, "step": 8127 }, { "epoch": 0.11, "grad_norm": 6.27731466293335, "learning_rate": 1.7579755596409648e-05, "loss": 2.7561, "step": 8128 }, { "epoch": 0.11, "grad_norm": 6.214239120483398, "learning_rate": 1.7581918460041098e-05, "loss": 2.3788, "step": 8129 }, { "epoch": 0.11, "grad_norm": 7.830803394317627, "learning_rate": 1.7584081323672544e-05, "loss": 2.848, "step": 8130 }, { "epoch": 0.11, "grad_norm": 6.400282382965088, "learning_rate": 1.758624418730399e-05, "loss": 2.7128, "step": 8131 }, { "epoch": 0.11, "grad_norm": 5.799713134765625, "learning_rate": 1.758840705093544e-05, "loss": 2.5471, "step": 8132 }, { "epoch": 0.11, "grad_norm": 7.595289707183838, "learning_rate": 1.759056991456689e-05, "loss": 2.5487, "step": 8133 }, { "epoch": 0.11, "grad_norm": 5.981612205505371, "learning_rate": 1.7592732778198336e-05, "loss": 2.434, "step": 8134 }, { "epoch": 0.11, "grad_norm": 6.068450927734375, "learning_rate": 1.7594895641829783e-05, "loss": 2.7082, "step": 8135 }, { "epoch": 0.11, "grad_norm": 6.16422176361084, "learning_rate": 1.7597058505461232e-05, "loss": 2.4572, "step": 8136 }, { "epoch": 0.11, "grad_norm": 6.984602928161621, "learning_rate": 1.759922136909268e-05, "loss": 2.7688, "step": 8137 }, { "epoch": 0.11, "grad_norm": 7.0311150550842285, "learning_rate": 1.760138423272413e-05, "loss": 2.9983, "step": 8138 }, { "epoch": 0.11, "grad_norm": 7.586055278778076, "learning_rate": 1.7603547096355575e-05, "loss": 3.2154, "step": 8139 }, { "epoch": 0.11, "grad_norm": 5.999974727630615, "learning_rate": 1.7605709959987025e-05, "loss": 2.7387, "step": 8140 }, { "epoch": 0.11, "grad_norm": 6.291100025177002, "learning_rate": 1.760787282361847e-05, "loss": 2.7467, "step": 8141 }, { "epoch": 0.11, "grad_norm": 7.404244899749756, "learning_rate": 1.761003568724992e-05, "loss": 3.2036, "step": 8142 }, { "epoch": 0.11, "grad_norm": 5.721336364746094, "learning_rate": 1.7612198550881367e-05, "loss": 2.4703, "step": 8143 }, { "epoch": 0.11, "grad_norm": 6.511951446533203, "learning_rate": 1.7614361414512817e-05, "loss": 2.6192, "step": 8144 }, { "epoch": 0.11, "grad_norm": 6.926935195922852, "learning_rate": 1.7616524278144263e-05, "loss": 3.1373, "step": 8145 }, { "epoch": 0.11, "grad_norm": 6.255186557769775, "learning_rate": 1.7618687141775713e-05, "loss": 1.9501, "step": 8146 }, { "epoch": 0.11, "grad_norm": 6.315193176269531, "learning_rate": 1.762085000540716e-05, "loss": 2.6118, "step": 8147 }, { "epoch": 0.11, "grad_norm": 6.873105525970459, "learning_rate": 1.762301286903861e-05, "loss": 2.7385, "step": 8148 }, { "epoch": 0.11, "grad_norm": 6.391262531280518, "learning_rate": 1.7625175732670055e-05, "loss": 2.7085, "step": 8149 }, { "epoch": 0.11, "grad_norm": 7.158865928649902, "learning_rate": 1.7627338596301505e-05, "loss": 2.7718, "step": 8150 }, { "epoch": 0.11, "grad_norm": 5.966769695281982, "learning_rate": 1.762950145993295e-05, "loss": 2.5047, "step": 8151 }, { "epoch": 0.11, "grad_norm": 6.3260626792907715, "learning_rate": 1.76316643235644e-05, "loss": 2.2234, "step": 8152 }, { "epoch": 0.11, "grad_norm": 6.4661335945129395, "learning_rate": 1.7633827187195848e-05, "loss": 2.5526, "step": 8153 }, { "epoch": 0.11, "grad_norm": 6.074036121368408, "learning_rate": 1.7635990050827297e-05, "loss": 2.2288, "step": 8154 }, { "epoch": 0.11, "grad_norm": 6.466347694396973, "learning_rate": 1.7638152914458744e-05, "loss": 2.8838, "step": 8155 }, { "epoch": 0.11, "grad_norm": 5.900957107543945, "learning_rate": 1.7640315778090194e-05, "loss": 2.5955, "step": 8156 }, { "epoch": 0.11, "grad_norm": 6.56151008605957, "learning_rate": 1.764247864172164e-05, "loss": 2.7575, "step": 8157 }, { "epoch": 0.11, "grad_norm": 6.638400077819824, "learning_rate": 1.764464150535309e-05, "loss": 2.3735, "step": 8158 }, { "epoch": 0.11, "grad_norm": 6.8924479484558105, "learning_rate": 1.7646804368984536e-05, "loss": 2.7578, "step": 8159 }, { "epoch": 0.11, "grad_norm": 7.586605072021484, "learning_rate": 1.7648967232615986e-05, "loss": 2.5155, "step": 8160 }, { "epoch": 0.11, "grad_norm": 6.607039451599121, "learning_rate": 1.7651130096247432e-05, "loss": 2.9378, "step": 8161 }, { "epoch": 0.11, "grad_norm": 5.278227806091309, "learning_rate": 1.7653292959878882e-05, "loss": 2.0241, "step": 8162 }, { "epoch": 0.11, "grad_norm": 5.703961372375488, "learning_rate": 1.765545582351033e-05, "loss": 2.074, "step": 8163 }, { "epoch": 0.11, "grad_norm": 6.539029598236084, "learning_rate": 1.7657618687141778e-05, "loss": 2.6622, "step": 8164 }, { "epoch": 0.11, "grad_norm": 7.041287899017334, "learning_rate": 1.7659781550773224e-05, "loss": 2.4992, "step": 8165 }, { "epoch": 0.11, "grad_norm": 8.311017990112305, "learning_rate": 1.7661944414404674e-05, "loss": 2.9768, "step": 8166 }, { "epoch": 0.11, "grad_norm": 5.329909801483154, "learning_rate": 1.7664107278036124e-05, "loss": 2.1896, "step": 8167 }, { "epoch": 0.11, "grad_norm": 7.505887508392334, "learning_rate": 1.766627014166757e-05, "loss": 2.4205, "step": 8168 }, { "epoch": 0.11, "grad_norm": 6.275876522064209, "learning_rate": 1.7668433005299016e-05, "loss": 2.6363, "step": 8169 }, { "epoch": 0.11, "grad_norm": 6.5533952713012695, "learning_rate": 1.7670595868930466e-05, "loss": 2.3095, "step": 8170 }, { "epoch": 0.11, "grad_norm": 6.29157829284668, "learning_rate": 1.7672758732561916e-05, "loss": 2.7693, "step": 8171 }, { "epoch": 0.11, "grad_norm": 6.9418768882751465, "learning_rate": 1.7674921596193362e-05, "loss": 2.5804, "step": 8172 }, { "epoch": 0.11, "grad_norm": 7.325221061706543, "learning_rate": 1.767708445982481e-05, "loss": 3.0405, "step": 8173 }, { "epoch": 0.11, "grad_norm": 5.7100043296813965, "learning_rate": 1.7679247323456255e-05, "loss": 2.5863, "step": 8174 }, { "epoch": 0.11, "grad_norm": 6.464103698730469, "learning_rate": 1.7681410187087705e-05, "loss": 2.6545, "step": 8175 }, { "epoch": 0.11, "grad_norm": 7.254886627197266, "learning_rate": 1.7683573050719155e-05, "loss": 3.0762, "step": 8176 }, { "epoch": 0.11, "grad_norm": 5.578364849090576, "learning_rate": 1.76857359143506e-05, "loss": 2.2373, "step": 8177 }, { "epoch": 0.11, "grad_norm": 6.688211917877197, "learning_rate": 1.7687898777982047e-05, "loss": 2.3855, "step": 8178 }, { "epoch": 0.11, "grad_norm": 6.1043620109558105, "learning_rate": 1.7690061641613497e-05, "loss": 2.5272, "step": 8179 }, { "epoch": 0.11, "grad_norm": 6.698019981384277, "learning_rate": 1.7692224505244947e-05, "loss": 2.8322, "step": 8180 }, { "epoch": 0.11, "grad_norm": 5.751254081726074, "learning_rate": 1.7694387368876393e-05, "loss": 2.3646, "step": 8181 }, { "epoch": 0.11, "grad_norm": 5.391142845153809, "learning_rate": 1.769655023250784e-05, "loss": 2.3741, "step": 8182 }, { "epoch": 0.11, "grad_norm": 5.7428669929504395, "learning_rate": 1.769871309613929e-05, "loss": 2.1155, "step": 8183 }, { "epoch": 0.11, "grad_norm": 5.417857646942139, "learning_rate": 1.770087595977074e-05, "loss": 2.5674, "step": 8184 }, { "epoch": 0.11, "grad_norm": 6.397596836090088, "learning_rate": 1.7703038823402185e-05, "loss": 2.3686, "step": 8185 }, { "epoch": 0.11, "grad_norm": 6.931630611419678, "learning_rate": 1.770520168703363e-05, "loss": 2.8493, "step": 8186 }, { "epoch": 0.11, "grad_norm": 6.260601043701172, "learning_rate": 1.770736455066508e-05, "loss": 2.9287, "step": 8187 }, { "epoch": 0.11, "grad_norm": 6.098097801208496, "learning_rate": 1.770952741429653e-05, "loss": 2.0443, "step": 8188 }, { "epoch": 0.11, "grad_norm": 6.607945442199707, "learning_rate": 1.7711690277927977e-05, "loss": 2.7297, "step": 8189 }, { "epoch": 0.11, "grad_norm": 6.963106632232666, "learning_rate": 1.7713853141559424e-05, "loss": 3.0845, "step": 8190 }, { "epoch": 0.11, "grad_norm": 7.031360626220703, "learning_rate": 1.7716016005190874e-05, "loss": 2.8616, "step": 8191 }, { "epoch": 0.11, "grad_norm": 6.790035247802734, "learning_rate": 1.7718178868822323e-05, "loss": 2.3515, "step": 8192 }, { "epoch": 0.11, "grad_norm": 6.085229396820068, "learning_rate": 1.772034173245377e-05, "loss": 2.1297, "step": 8193 }, { "epoch": 0.11, "grad_norm": 6.1030192375183105, "learning_rate": 1.772250459608522e-05, "loss": 2.2792, "step": 8194 }, { "epoch": 0.11, "grad_norm": 6.277714729309082, "learning_rate": 1.7724667459716666e-05, "loss": 2.6039, "step": 8195 }, { "epoch": 0.11, "grad_norm": 6.042291164398193, "learning_rate": 1.7726830323348116e-05, "loss": 2.7843, "step": 8196 }, { "epoch": 0.11, "grad_norm": 6.0211405754089355, "learning_rate": 1.7728993186979562e-05, "loss": 2.3664, "step": 8197 }, { "epoch": 0.11, "grad_norm": 6.586150169372559, "learning_rate": 1.773115605061101e-05, "loss": 2.5622, "step": 8198 }, { "epoch": 0.11, "grad_norm": 6.876523017883301, "learning_rate": 1.7733318914242458e-05, "loss": 2.5014, "step": 8199 }, { "epoch": 0.11, "grad_norm": 7.309729099273682, "learning_rate": 1.7735481777873908e-05, "loss": 2.7488, "step": 8200 }, { "epoch": 0.11, "grad_norm": 6.26230525970459, "learning_rate": 1.7737644641505354e-05, "loss": 2.5526, "step": 8201 }, { "epoch": 0.11, "grad_norm": 6.505983829498291, "learning_rate": 1.7739807505136804e-05, "loss": 2.7729, "step": 8202 }, { "epoch": 0.11, "grad_norm": 6.4572577476501465, "learning_rate": 1.774197036876825e-05, "loss": 2.3488, "step": 8203 }, { "epoch": 0.11, "grad_norm": 6.542860507965088, "learning_rate": 1.77441332323997e-05, "loss": 2.4635, "step": 8204 }, { "epoch": 0.11, "grad_norm": 6.398279666900635, "learning_rate": 1.7746296096031146e-05, "loss": 2.522, "step": 8205 }, { "epoch": 0.11, "grad_norm": 6.305028915405273, "learning_rate": 1.7748458959662596e-05, "loss": 2.9542, "step": 8206 }, { "epoch": 0.11, "grad_norm": 6.107056617736816, "learning_rate": 1.7750621823294042e-05, "loss": 2.4349, "step": 8207 }, { "epoch": 0.11, "grad_norm": 6.597794532775879, "learning_rate": 1.7752784686925492e-05, "loss": 2.781, "step": 8208 }, { "epoch": 0.11, "grad_norm": 6.191397190093994, "learning_rate": 1.775494755055694e-05, "loss": 2.6773, "step": 8209 }, { "epoch": 0.11, "grad_norm": 6.591459274291992, "learning_rate": 1.7757110414188388e-05, "loss": 2.6277, "step": 8210 }, { "epoch": 0.11, "grad_norm": 6.270894527435303, "learning_rate": 1.7759273277819835e-05, "loss": 2.4172, "step": 8211 }, { "epoch": 0.11, "grad_norm": 6.18719482421875, "learning_rate": 1.776143614145128e-05, "loss": 2.346, "step": 8212 }, { "epoch": 0.11, "grad_norm": 6.187580108642578, "learning_rate": 1.776359900508273e-05, "loss": 3.0254, "step": 8213 }, { "epoch": 0.11, "grad_norm": 5.5080246925354, "learning_rate": 1.776576186871418e-05, "loss": 2.435, "step": 8214 }, { "epoch": 0.11, "grad_norm": 6.984537601470947, "learning_rate": 1.7767924732345627e-05, "loss": 2.88, "step": 8215 }, { "epoch": 0.11, "grad_norm": 6.224177837371826, "learning_rate": 1.7770087595977073e-05, "loss": 2.6259, "step": 8216 }, { "epoch": 0.11, "grad_norm": 6.452852249145508, "learning_rate": 1.7772250459608523e-05, "loss": 2.8847, "step": 8217 }, { "epoch": 0.11, "grad_norm": 7.009904861450195, "learning_rate": 1.7774413323239973e-05, "loss": 2.6531, "step": 8218 }, { "epoch": 0.11, "grad_norm": 5.969778060913086, "learning_rate": 1.777657618687142e-05, "loss": 2.4469, "step": 8219 }, { "epoch": 0.11, "grad_norm": 6.52335262298584, "learning_rate": 1.7778739050502865e-05, "loss": 2.8919, "step": 8220 }, { "epoch": 0.11, "grad_norm": 6.1778154373168945, "learning_rate": 1.7780901914134315e-05, "loss": 2.5854, "step": 8221 }, { "epoch": 0.11, "grad_norm": 6.461878299713135, "learning_rate": 1.7783064777765765e-05, "loss": 2.4352, "step": 8222 }, { "epoch": 0.11, "grad_norm": 6.1606316566467285, "learning_rate": 1.778522764139721e-05, "loss": 2.7653, "step": 8223 }, { "epoch": 0.11, "grad_norm": 6.504136562347412, "learning_rate": 1.7787390505028658e-05, "loss": 2.7038, "step": 8224 }, { "epoch": 0.11, "grad_norm": 5.934930801391602, "learning_rate": 1.7789553368660107e-05, "loss": 2.333, "step": 8225 }, { "epoch": 0.11, "grad_norm": 7.2866435050964355, "learning_rate": 1.7791716232291557e-05, "loss": 2.5099, "step": 8226 }, { "epoch": 0.11, "grad_norm": 7.143360614776611, "learning_rate": 1.7793879095923003e-05, "loss": 2.4875, "step": 8227 }, { "epoch": 0.11, "grad_norm": 6.4670538902282715, "learning_rate": 1.779604195955445e-05, "loss": 2.5747, "step": 8228 }, { "epoch": 0.11, "grad_norm": 6.434304237365723, "learning_rate": 1.77982048231859e-05, "loss": 2.5596, "step": 8229 }, { "epoch": 0.11, "grad_norm": 5.919358253479004, "learning_rate": 1.780036768681735e-05, "loss": 2.3177, "step": 8230 }, { "epoch": 0.11, "grad_norm": 6.450562953948975, "learning_rate": 1.7802530550448796e-05, "loss": 2.5883, "step": 8231 }, { "epoch": 0.11, "grad_norm": 6.111826419830322, "learning_rate": 1.7804693414080242e-05, "loss": 2.4641, "step": 8232 }, { "epoch": 0.11, "grad_norm": 6.264053821563721, "learning_rate": 1.7806856277711692e-05, "loss": 2.8043, "step": 8233 }, { "epoch": 0.11, "grad_norm": 6.605232238769531, "learning_rate": 1.780901914134314e-05, "loss": 2.5647, "step": 8234 }, { "epoch": 0.11, "grad_norm": 6.33895206451416, "learning_rate": 1.7811182004974588e-05, "loss": 2.6752, "step": 8235 }, { "epoch": 0.11, "grad_norm": 6.3897833824157715, "learning_rate": 1.7813344868606034e-05, "loss": 2.2103, "step": 8236 }, { "epoch": 0.11, "grad_norm": 6.565052032470703, "learning_rate": 1.7815507732237484e-05, "loss": 2.5779, "step": 8237 }, { "epoch": 0.11, "grad_norm": 6.944730281829834, "learning_rate": 1.7817670595868934e-05, "loss": 2.9938, "step": 8238 }, { "epoch": 0.11, "grad_norm": 6.323854923248291, "learning_rate": 1.781983345950038e-05, "loss": 2.3279, "step": 8239 }, { "epoch": 0.11, "grad_norm": 5.833718299865723, "learning_rate": 1.7821996323131826e-05, "loss": 2.5698, "step": 8240 }, { "epoch": 0.11, "grad_norm": 6.377895832061768, "learning_rate": 1.7824159186763276e-05, "loss": 2.9244, "step": 8241 }, { "epoch": 0.11, "grad_norm": 6.848227024078369, "learning_rate": 1.7826322050394726e-05, "loss": 2.6463, "step": 8242 }, { "epoch": 0.11, "grad_norm": 6.129732131958008, "learning_rate": 1.7828484914026172e-05, "loss": 2.3398, "step": 8243 }, { "epoch": 0.11, "grad_norm": 6.62762451171875, "learning_rate": 1.783064777765762e-05, "loss": 2.8383, "step": 8244 }, { "epoch": 0.11, "grad_norm": 7.085474491119385, "learning_rate": 1.783281064128907e-05, "loss": 2.518, "step": 8245 }, { "epoch": 0.11, "grad_norm": 7.3595781326293945, "learning_rate": 1.7834973504920515e-05, "loss": 2.9057, "step": 8246 }, { "epoch": 0.11, "grad_norm": 6.624875545501709, "learning_rate": 1.7837136368551964e-05, "loss": 2.635, "step": 8247 }, { "epoch": 0.11, "grad_norm": 5.482291221618652, "learning_rate": 1.783929923218341e-05, "loss": 2.4892, "step": 8248 }, { "epoch": 0.11, "grad_norm": 5.5531158447265625, "learning_rate": 1.784146209581486e-05, "loss": 2.4126, "step": 8249 }, { "epoch": 0.11, "grad_norm": 6.524246692657471, "learning_rate": 1.7843624959446307e-05, "loss": 2.3255, "step": 8250 }, { "epoch": 0.11, "grad_norm": 5.8971781730651855, "learning_rate": 1.7845787823077757e-05, "loss": 2.404, "step": 8251 }, { "epoch": 0.11, "grad_norm": 6.536860942840576, "learning_rate": 1.7847950686709203e-05, "loss": 2.4426, "step": 8252 }, { "epoch": 0.11, "grad_norm": 7.854151248931885, "learning_rate": 1.7850113550340653e-05, "loss": 3.2962, "step": 8253 }, { "epoch": 0.11, "grad_norm": 6.063627243041992, "learning_rate": 1.78522764139721e-05, "loss": 2.0629, "step": 8254 }, { "epoch": 0.11, "grad_norm": 5.813772678375244, "learning_rate": 1.785443927760355e-05, "loss": 2.2313, "step": 8255 }, { "epoch": 0.11, "grad_norm": 6.809683799743652, "learning_rate": 1.7856602141235e-05, "loss": 2.6877, "step": 8256 }, { "epoch": 0.11, "grad_norm": 7.543091297149658, "learning_rate": 1.7858765004866445e-05, "loss": 2.3512, "step": 8257 }, { "epoch": 0.11, "grad_norm": 6.479732513427734, "learning_rate": 1.786092786849789e-05, "loss": 2.4471, "step": 8258 }, { "epoch": 0.11, "grad_norm": 7.128788948059082, "learning_rate": 1.786309073212934e-05, "loss": 2.5944, "step": 8259 }, { "epoch": 0.11, "grad_norm": 6.317924499511719, "learning_rate": 1.786525359576079e-05, "loss": 2.2507, "step": 8260 }, { "epoch": 0.11, "grad_norm": 6.925371170043945, "learning_rate": 1.7867416459392237e-05, "loss": 2.9379, "step": 8261 }, { "epoch": 0.11, "grad_norm": 6.954626560211182, "learning_rate": 1.7869579323023684e-05, "loss": 2.9778, "step": 8262 }, { "epoch": 0.11, "grad_norm": 6.1763529777526855, "learning_rate": 1.7871742186655133e-05, "loss": 2.3621, "step": 8263 }, { "epoch": 0.11, "grad_norm": 6.556295394897461, "learning_rate": 1.7873905050286583e-05, "loss": 2.5365, "step": 8264 }, { "epoch": 0.11, "grad_norm": 5.984853267669678, "learning_rate": 1.787606791391803e-05, "loss": 2.3176, "step": 8265 }, { "epoch": 0.11, "grad_norm": 6.97292947769165, "learning_rate": 1.7878230777549476e-05, "loss": 3.2108, "step": 8266 }, { "epoch": 0.11, "grad_norm": 7.183937072753906, "learning_rate": 1.7880393641180925e-05, "loss": 2.5837, "step": 8267 }, { "epoch": 0.11, "grad_norm": 6.230603218078613, "learning_rate": 1.7882556504812375e-05, "loss": 1.9292, "step": 8268 }, { "epoch": 0.11, "grad_norm": 7.860132217407227, "learning_rate": 1.788471936844382e-05, "loss": 2.7877, "step": 8269 }, { "epoch": 0.11, "grad_norm": 7.596988677978516, "learning_rate": 1.7886882232075268e-05, "loss": 2.964, "step": 8270 }, { "epoch": 0.11, "grad_norm": 6.366698741912842, "learning_rate": 1.7889045095706718e-05, "loss": 3.1303, "step": 8271 }, { "epoch": 0.11, "grad_norm": 7.085962295532227, "learning_rate": 1.7891207959338167e-05, "loss": 2.7432, "step": 8272 }, { "epoch": 0.11, "grad_norm": 6.278125286102295, "learning_rate": 1.7893370822969614e-05, "loss": 3.1252, "step": 8273 }, { "epoch": 0.11, "grad_norm": 6.734260082244873, "learning_rate": 1.789553368660106e-05, "loss": 2.5374, "step": 8274 }, { "epoch": 0.11, "grad_norm": 5.808079719543457, "learning_rate": 1.789769655023251e-05, "loss": 2.641, "step": 8275 }, { "epoch": 0.11, "grad_norm": 7.256545066833496, "learning_rate": 1.789985941386396e-05, "loss": 2.8573, "step": 8276 }, { "epoch": 0.11, "grad_norm": 6.115304470062256, "learning_rate": 1.7902022277495406e-05, "loss": 2.8401, "step": 8277 }, { "epoch": 0.11, "grad_norm": 6.24293851852417, "learning_rate": 1.7904185141126852e-05, "loss": 2.3856, "step": 8278 }, { "epoch": 0.11, "grad_norm": 5.499818801879883, "learning_rate": 1.7906348004758302e-05, "loss": 2.4042, "step": 8279 }, { "epoch": 0.11, "grad_norm": 7.159998893737793, "learning_rate": 1.7908510868389752e-05, "loss": 2.4022, "step": 8280 }, { "epoch": 0.11, "grad_norm": 6.098472595214844, "learning_rate": 1.7910673732021198e-05, "loss": 2.5326, "step": 8281 }, { "epoch": 0.11, "grad_norm": 5.528675079345703, "learning_rate": 1.7912836595652645e-05, "loss": 2.4256, "step": 8282 }, { "epoch": 0.11, "grad_norm": 5.837705612182617, "learning_rate": 1.791499945928409e-05, "loss": 2.2675, "step": 8283 }, { "epoch": 0.11, "grad_norm": 5.9859514236450195, "learning_rate": 1.791716232291554e-05, "loss": 2.882, "step": 8284 }, { "epoch": 0.11, "grad_norm": 6.0786261558532715, "learning_rate": 1.791932518654699e-05, "loss": 2.3862, "step": 8285 }, { "epoch": 0.11, "grad_norm": 6.31445837020874, "learning_rate": 1.7921488050178437e-05, "loss": 2.7315, "step": 8286 }, { "epoch": 0.11, "grad_norm": 6.482954025268555, "learning_rate": 1.7923650913809883e-05, "loss": 2.8362, "step": 8287 }, { "epoch": 0.11, "grad_norm": 7.011681079864502, "learning_rate": 1.7925813777441333e-05, "loss": 2.536, "step": 8288 }, { "epoch": 0.11, "grad_norm": 7.363842010498047, "learning_rate": 1.7927976641072783e-05, "loss": 2.6853, "step": 8289 }, { "epoch": 0.11, "grad_norm": 6.46686315536499, "learning_rate": 1.793013950470423e-05, "loss": 2.6621, "step": 8290 }, { "epoch": 0.11, "grad_norm": 6.467171669006348, "learning_rate": 1.793230236833568e-05, "loss": 2.5821, "step": 8291 }, { "epoch": 0.11, "grad_norm": 6.400685787200928, "learning_rate": 1.7934465231967125e-05, "loss": 2.5607, "step": 8292 }, { "epoch": 0.11, "grad_norm": 6.128498077392578, "learning_rate": 1.7936628095598575e-05, "loss": 2.3989, "step": 8293 }, { "epoch": 0.11, "grad_norm": 6.000344753265381, "learning_rate": 1.793879095923002e-05, "loss": 2.0765, "step": 8294 }, { "epoch": 0.11, "grad_norm": 6.152158260345459, "learning_rate": 1.794095382286147e-05, "loss": 2.3525, "step": 8295 }, { "epoch": 0.11, "grad_norm": 6.372097969055176, "learning_rate": 1.7943116686492917e-05, "loss": 2.6944, "step": 8296 }, { "epoch": 0.11, "grad_norm": 6.669358253479004, "learning_rate": 1.7945279550124367e-05, "loss": 2.489, "step": 8297 }, { "epoch": 0.11, "grad_norm": 5.701211929321289, "learning_rate": 1.7947442413755813e-05, "loss": 2.7789, "step": 8298 }, { "epoch": 0.11, "grad_norm": 7.464479446411133, "learning_rate": 1.7949605277387263e-05, "loss": 2.7201, "step": 8299 }, { "epoch": 0.11, "grad_norm": 6.420837879180908, "learning_rate": 1.795176814101871e-05, "loss": 2.0224, "step": 8300 }, { "epoch": 0.11, "grad_norm": 6.497344017028809, "learning_rate": 1.795393100465016e-05, "loss": 2.9287, "step": 8301 }, { "epoch": 0.11, "grad_norm": 6.111985206604004, "learning_rate": 1.7956093868281606e-05, "loss": 2.67, "step": 8302 }, { "epoch": 0.11, "grad_norm": 6.491896629333496, "learning_rate": 1.7958256731913055e-05, "loss": 2.1388, "step": 8303 }, { "epoch": 0.11, "grad_norm": 7.110454559326172, "learning_rate": 1.79604195955445e-05, "loss": 2.6669, "step": 8304 }, { "epoch": 0.11, "grad_norm": 6.655109882354736, "learning_rate": 1.796258245917595e-05, "loss": 2.3719, "step": 8305 }, { "epoch": 0.11, "grad_norm": 5.710826396942139, "learning_rate": 1.7964745322807398e-05, "loss": 2.6192, "step": 8306 }, { "epoch": 0.11, "grad_norm": 6.036640167236328, "learning_rate": 1.7966908186438847e-05, "loss": 2.2899, "step": 8307 }, { "epoch": 0.11, "grad_norm": 6.096197605133057, "learning_rate": 1.7969071050070294e-05, "loss": 2.0044, "step": 8308 }, { "epoch": 0.11, "grad_norm": 6.581196308135986, "learning_rate": 1.7971233913701744e-05, "loss": 2.8393, "step": 8309 }, { "epoch": 0.11, "grad_norm": 6.318344593048096, "learning_rate": 1.797339677733319e-05, "loss": 2.6661, "step": 8310 }, { "epoch": 0.11, "grad_norm": 6.643087863922119, "learning_rate": 1.797555964096464e-05, "loss": 2.5389, "step": 8311 }, { "epoch": 0.11, "grad_norm": 6.9987473487854, "learning_rate": 1.7977722504596086e-05, "loss": 2.1058, "step": 8312 }, { "epoch": 0.11, "grad_norm": 6.45599365234375, "learning_rate": 1.7979885368227536e-05, "loss": 2.1252, "step": 8313 }, { "epoch": 0.11, "grad_norm": 6.754414081573486, "learning_rate": 1.7982048231858982e-05, "loss": 2.0249, "step": 8314 }, { "epoch": 0.11, "grad_norm": 6.036952018737793, "learning_rate": 1.7984211095490432e-05, "loss": 2.4712, "step": 8315 }, { "epoch": 0.11, "grad_norm": 5.601334571838379, "learning_rate": 1.7986373959121878e-05, "loss": 2.5522, "step": 8316 }, { "epoch": 0.11, "grad_norm": 6.618199348449707, "learning_rate": 1.7988536822753328e-05, "loss": 3.0387, "step": 8317 }, { "epoch": 0.11, "grad_norm": 6.562100887298584, "learning_rate": 1.7990699686384774e-05, "loss": 2.7106, "step": 8318 }, { "epoch": 0.11, "grad_norm": 6.44839334487915, "learning_rate": 1.7992862550016224e-05, "loss": 2.4073, "step": 8319 }, { "epoch": 0.11, "grad_norm": 6.711750507354736, "learning_rate": 1.799502541364767e-05, "loss": 2.3277, "step": 8320 }, { "epoch": 0.11, "grad_norm": 6.131924629211426, "learning_rate": 1.7997188277279117e-05, "loss": 2.7135, "step": 8321 }, { "epoch": 0.11, "grad_norm": 5.731955528259277, "learning_rate": 1.7999351140910567e-05, "loss": 2.5985, "step": 8322 }, { "epoch": 0.11, "grad_norm": 6.105549335479736, "learning_rate": 1.8001514004542016e-05, "loss": 2.7288, "step": 8323 }, { "epoch": 0.11, "grad_norm": 6.080223560333252, "learning_rate": 1.8003676868173463e-05, "loss": 2.5116, "step": 8324 }, { "epoch": 0.11, "grad_norm": 6.632386207580566, "learning_rate": 1.800583973180491e-05, "loss": 2.3472, "step": 8325 }, { "epoch": 0.11, "grad_norm": 6.133911609649658, "learning_rate": 1.800800259543636e-05, "loss": 2.4614, "step": 8326 }, { "epoch": 0.11, "grad_norm": 6.794127941131592, "learning_rate": 1.801016545906781e-05, "loss": 3.0466, "step": 8327 }, { "epoch": 0.11, "grad_norm": 5.10144567489624, "learning_rate": 1.8012328322699255e-05, "loss": 1.823, "step": 8328 }, { "epoch": 0.11, "grad_norm": 6.863183498382568, "learning_rate": 1.80144911863307e-05, "loss": 2.3527, "step": 8329 }, { "epoch": 0.11, "grad_norm": 6.234238147735596, "learning_rate": 1.801665404996215e-05, "loss": 2.9448, "step": 8330 }, { "epoch": 0.11, "grad_norm": 6.374583721160889, "learning_rate": 1.80188169135936e-05, "loss": 2.5546, "step": 8331 }, { "epoch": 0.11, "grad_norm": 6.267165660858154, "learning_rate": 1.8020979777225047e-05, "loss": 2.5646, "step": 8332 }, { "epoch": 0.11, "grad_norm": 6.804826736450195, "learning_rate": 1.8023142640856493e-05, "loss": 2.7437, "step": 8333 }, { "epoch": 0.11, "grad_norm": 5.519817352294922, "learning_rate": 1.8025305504487943e-05, "loss": 2.138, "step": 8334 }, { "epoch": 0.11, "grad_norm": 6.105126857757568, "learning_rate": 1.8027468368119393e-05, "loss": 2.314, "step": 8335 }, { "epoch": 0.11, "grad_norm": 6.118855953216553, "learning_rate": 1.802963123175084e-05, "loss": 2.5041, "step": 8336 }, { "epoch": 0.11, "grad_norm": 6.737832069396973, "learning_rate": 1.8031794095382286e-05, "loss": 2.6957, "step": 8337 }, { "epoch": 0.11, "grad_norm": 5.912059783935547, "learning_rate": 1.8033956959013735e-05, "loss": 3.0838, "step": 8338 }, { "epoch": 0.11, "grad_norm": 6.0725579261779785, "learning_rate": 1.8036119822645185e-05, "loss": 2.6779, "step": 8339 }, { "epoch": 0.11, "grad_norm": 5.5686516761779785, "learning_rate": 1.803828268627663e-05, "loss": 2.4138, "step": 8340 }, { "epoch": 0.11, "grad_norm": 5.594192981719971, "learning_rate": 1.8040445549908078e-05, "loss": 2.1234, "step": 8341 }, { "epoch": 0.11, "grad_norm": 7.057566165924072, "learning_rate": 1.8042608413539528e-05, "loss": 2.7816, "step": 8342 }, { "epoch": 0.11, "grad_norm": 6.015064239501953, "learning_rate": 1.8044771277170977e-05, "loss": 2.758, "step": 8343 }, { "epoch": 0.11, "grad_norm": 6.115769863128662, "learning_rate": 1.8046934140802424e-05, "loss": 2.7161, "step": 8344 }, { "epoch": 0.11, "grad_norm": 5.195924282073975, "learning_rate": 1.804909700443387e-05, "loss": 1.8642, "step": 8345 }, { "epoch": 0.11, "grad_norm": 6.526481628417969, "learning_rate": 1.805125986806532e-05, "loss": 2.9498, "step": 8346 }, { "epoch": 0.11, "grad_norm": 6.313782691955566, "learning_rate": 1.805342273169677e-05, "loss": 2.4729, "step": 8347 }, { "epoch": 0.11, "grad_norm": 6.060029029846191, "learning_rate": 1.8055585595328216e-05, "loss": 2.6879, "step": 8348 }, { "epoch": 0.11, "grad_norm": 5.276512622833252, "learning_rate": 1.8057748458959662e-05, "loss": 2.2151, "step": 8349 }, { "epoch": 0.11, "grad_norm": 6.819054126739502, "learning_rate": 1.8059911322591112e-05, "loss": 2.1078, "step": 8350 }, { "epoch": 0.11, "grad_norm": 6.717698574066162, "learning_rate": 1.8062074186222562e-05, "loss": 2.7518, "step": 8351 }, { "epoch": 0.11, "grad_norm": 5.957390308380127, "learning_rate": 1.8064237049854008e-05, "loss": 2.6485, "step": 8352 }, { "epoch": 0.11, "grad_norm": 6.370759010314941, "learning_rate": 1.8066399913485458e-05, "loss": 2.484, "step": 8353 }, { "epoch": 0.11, "grad_norm": 5.4360222816467285, "learning_rate": 1.8068562777116904e-05, "loss": 2.7005, "step": 8354 }, { "epoch": 0.11, "grad_norm": 5.915811061859131, "learning_rate": 1.8070725640748354e-05, "loss": 1.9028, "step": 8355 }, { "epoch": 0.11, "grad_norm": 5.707669734954834, "learning_rate": 1.80728885043798e-05, "loss": 2.4681, "step": 8356 }, { "epoch": 0.11, "grad_norm": 6.476108074188232, "learning_rate": 1.807505136801125e-05, "loss": 2.2872, "step": 8357 }, { "epoch": 0.11, "grad_norm": 6.627201557159424, "learning_rate": 1.8077214231642696e-05, "loss": 2.8213, "step": 8358 }, { "epoch": 0.11, "grad_norm": 6.202064514160156, "learning_rate": 1.8079377095274143e-05, "loss": 2.2499, "step": 8359 }, { "epoch": 0.11, "grad_norm": 6.514004707336426, "learning_rate": 1.8081539958905592e-05, "loss": 3.0338, "step": 8360 }, { "epoch": 0.11, "grad_norm": 6.119204044342041, "learning_rate": 1.8083702822537042e-05, "loss": 2.7235, "step": 8361 }, { "epoch": 0.11, "grad_norm": 5.853837013244629, "learning_rate": 1.808586568616849e-05, "loss": 2.1957, "step": 8362 }, { "epoch": 0.11, "grad_norm": 6.521533012390137, "learning_rate": 1.8088028549799935e-05, "loss": 2.5177, "step": 8363 }, { "epoch": 0.11, "grad_norm": 6.50314998626709, "learning_rate": 1.8090191413431385e-05, "loss": 2.7838, "step": 8364 }, { "epoch": 0.11, "grad_norm": 6.106027126312256, "learning_rate": 1.8092354277062834e-05, "loss": 2.7575, "step": 8365 }, { "epoch": 0.11, "grad_norm": 6.0676774978637695, "learning_rate": 1.809451714069428e-05, "loss": 2.1773, "step": 8366 }, { "epoch": 0.11, "grad_norm": 6.1992974281311035, "learning_rate": 1.8096680004325727e-05, "loss": 2.4336, "step": 8367 }, { "epoch": 0.11, "grad_norm": 5.692852020263672, "learning_rate": 1.8098842867957177e-05, "loss": 2.4222, "step": 8368 }, { "epoch": 0.11, "grad_norm": 5.897858619689941, "learning_rate": 1.8101005731588627e-05, "loss": 2.4885, "step": 8369 }, { "epoch": 0.11, "grad_norm": 6.15432596206665, "learning_rate": 1.8103168595220073e-05, "loss": 2.6241, "step": 8370 }, { "epoch": 0.11, "grad_norm": 6.9322662353515625, "learning_rate": 1.810533145885152e-05, "loss": 2.6108, "step": 8371 }, { "epoch": 0.11, "grad_norm": 5.786375522613525, "learning_rate": 1.810749432248297e-05, "loss": 2.3307, "step": 8372 }, { "epoch": 0.11, "grad_norm": 6.750543117523193, "learning_rate": 1.810965718611442e-05, "loss": 2.7258, "step": 8373 }, { "epoch": 0.11, "grad_norm": 5.757523536682129, "learning_rate": 1.8111820049745865e-05, "loss": 2.8235, "step": 8374 }, { "epoch": 0.11, "grad_norm": 5.671698570251465, "learning_rate": 1.811398291337731e-05, "loss": 2.169, "step": 8375 }, { "epoch": 0.11, "grad_norm": 5.933284282684326, "learning_rate": 1.811614577700876e-05, "loss": 2.3776, "step": 8376 }, { "epoch": 0.11, "grad_norm": 6.104861259460449, "learning_rate": 1.811830864064021e-05, "loss": 2.1872, "step": 8377 }, { "epoch": 0.11, "grad_norm": 5.50623893737793, "learning_rate": 1.8120471504271657e-05, "loss": 2.4768, "step": 8378 }, { "epoch": 0.11, "grad_norm": 5.782991886138916, "learning_rate": 1.8122634367903104e-05, "loss": 1.7702, "step": 8379 }, { "epoch": 0.11, "grad_norm": 6.989423751831055, "learning_rate": 1.8124797231534553e-05, "loss": 2.8929, "step": 8380 }, { "epoch": 0.11, "grad_norm": 5.493648529052734, "learning_rate": 1.8126960095166003e-05, "loss": 2.6563, "step": 8381 }, { "epoch": 0.11, "grad_norm": 6.0187153816223145, "learning_rate": 1.812912295879745e-05, "loss": 2.354, "step": 8382 }, { "epoch": 0.11, "grad_norm": 6.255660057067871, "learning_rate": 1.8131285822428896e-05, "loss": 2.5055, "step": 8383 }, { "epoch": 0.11, "grad_norm": 9.475513458251953, "learning_rate": 1.8133448686060346e-05, "loss": 2.0797, "step": 8384 }, { "epoch": 0.11, "grad_norm": 6.740869522094727, "learning_rate": 1.8135611549691795e-05, "loss": 2.6786, "step": 8385 }, { "epoch": 0.11, "grad_norm": 5.667056083679199, "learning_rate": 1.8137774413323242e-05, "loss": 2.6679, "step": 8386 }, { "epoch": 0.11, "grad_norm": 6.062081813812256, "learning_rate": 1.8139937276954688e-05, "loss": 2.3275, "step": 8387 }, { "epoch": 0.11, "grad_norm": 5.996132850646973, "learning_rate": 1.8142100140586138e-05, "loss": 2.3502, "step": 8388 }, { "epoch": 0.11, "grad_norm": 6.450332164764404, "learning_rate": 1.8144263004217588e-05, "loss": 2.4911, "step": 8389 }, { "epoch": 0.11, "grad_norm": 6.244843482971191, "learning_rate": 1.8146425867849034e-05, "loss": 2.2812, "step": 8390 }, { "epoch": 0.11, "grad_norm": 7.075207710266113, "learning_rate": 1.814858873148048e-05, "loss": 2.165, "step": 8391 }, { "epoch": 0.11, "grad_norm": 7.110391616821289, "learning_rate": 1.815075159511193e-05, "loss": 2.978, "step": 8392 }, { "epoch": 0.11, "grad_norm": 7.185091495513916, "learning_rate": 1.8152914458743376e-05, "loss": 2.9065, "step": 8393 }, { "epoch": 0.11, "grad_norm": 6.4664740562438965, "learning_rate": 1.8155077322374826e-05, "loss": 2.5466, "step": 8394 }, { "epoch": 0.11, "grad_norm": 7.003311634063721, "learning_rate": 1.8157240186006273e-05, "loss": 2.5482, "step": 8395 }, { "epoch": 0.11, "grad_norm": 5.752942085266113, "learning_rate": 1.8159403049637722e-05, "loss": 2.7445, "step": 8396 }, { "epoch": 0.11, "grad_norm": 6.385970115661621, "learning_rate": 1.816156591326917e-05, "loss": 2.6505, "step": 8397 }, { "epoch": 0.11, "grad_norm": 5.462051868438721, "learning_rate": 1.816372877690062e-05, "loss": 2.6948, "step": 8398 }, { "epoch": 0.11, "grad_norm": 6.399593353271484, "learning_rate": 1.8165891640532065e-05, "loss": 2.6705, "step": 8399 }, { "epoch": 0.11, "grad_norm": 6.190540790557861, "learning_rate": 1.8168054504163515e-05, "loss": 2.9735, "step": 8400 }, { "epoch": 0.11, "grad_norm": 6.601414203643799, "learning_rate": 1.817021736779496e-05, "loss": 2.7768, "step": 8401 }, { "epoch": 0.11, "grad_norm": 6.316686630249023, "learning_rate": 1.817238023142641e-05, "loss": 2.6481, "step": 8402 }, { "epoch": 0.11, "grad_norm": 7.287530422210693, "learning_rate": 1.8174543095057857e-05, "loss": 2.5851, "step": 8403 }, { "epoch": 0.11, "grad_norm": 7.027937412261963, "learning_rate": 1.8176705958689307e-05, "loss": 2.7331, "step": 8404 }, { "epoch": 0.11, "grad_norm": 7.071684837341309, "learning_rate": 1.8178868822320753e-05, "loss": 2.7706, "step": 8405 }, { "epoch": 0.11, "grad_norm": 6.39011812210083, "learning_rate": 1.8181031685952203e-05, "loss": 2.7283, "step": 8406 }, { "epoch": 0.11, "grad_norm": 6.444826602935791, "learning_rate": 1.818319454958365e-05, "loss": 2.5209, "step": 8407 }, { "epoch": 0.11, "grad_norm": 6.380544185638428, "learning_rate": 1.81853574132151e-05, "loss": 2.5356, "step": 8408 }, { "epoch": 0.11, "grad_norm": 5.49528169631958, "learning_rate": 1.8187520276846545e-05, "loss": 2.1532, "step": 8409 }, { "epoch": 0.11, "grad_norm": 5.913356781005859, "learning_rate": 1.8189683140477995e-05, "loss": 2.6249, "step": 8410 }, { "epoch": 0.11, "grad_norm": 6.3933868408203125, "learning_rate": 1.819184600410944e-05, "loss": 2.4145, "step": 8411 }, { "epoch": 0.11, "grad_norm": 5.876865863800049, "learning_rate": 1.819400886774089e-05, "loss": 2.7787, "step": 8412 }, { "epoch": 0.11, "grad_norm": 5.989374160766602, "learning_rate": 1.8196171731372337e-05, "loss": 2.8232, "step": 8413 }, { "epoch": 0.11, "grad_norm": 6.6651225090026855, "learning_rate": 1.8198334595003787e-05, "loss": 2.6742, "step": 8414 }, { "epoch": 0.11, "grad_norm": 7.866893291473389, "learning_rate": 1.8200497458635234e-05, "loss": 2.5058, "step": 8415 }, { "epoch": 0.11, "grad_norm": 6.967931747436523, "learning_rate": 1.8202660322266683e-05, "loss": 2.3489, "step": 8416 }, { "epoch": 0.11, "grad_norm": 7.091005802154541, "learning_rate": 1.820482318589813e-05, "loss": 2.4711, "step": 8417 }, { "epoch": 0.11, "grad_norm": 6.864259243011475, "learning_rate": 1.820698604952958e-05, "loss": 2.9725, "step": 8418 }, { "epoch": 0.11, "grad_norm": 6.185471534729004, "learning_rate": 1.820914891316103e-05, "loss": 2.3971, "step": 8419 }, { "epoch": 0.11, "grad_norm": 5.262834072113037, "learning_rate": 1.8211311776792476e-05, "loss": 1.9532, "step": 8420 }, { "epoch": 0.11, "grad_norm": 6.2223286628723145, "learning_rate": 1.8213474640423922e-05, "loss": 2.5789, "step": 8421 }, { "epoch": 0.11, "grad_norm": 5.9462971687316895, "learning_rate": 1.821563750405537e-05, "loss": 2.6634, "step": 8422 }, { "epoch": 0.11, "grad_norm": 6.197583198547363, "learning_rate": 1.821780036768682e-05, "loss": 2.3542, "step": 8423 }, { "epoch": 0.11, "grad_norm": 6.455636978149414, "learning_rate": 1.8219963231318268e-05, "loss": 2.6416, "step": 8424 }, { "epoch": 0.11, "grad_norm": 5.739650726318359, "learning_rate": 1.8222126094949714e-05, "loss": 2.6007, "step": 8425 }, { "epoch": 0.11, "grad_norm": 6.781277656555176, "learning_rate": 1.8224288958581164e-05, "loss": 2.4996, "step": 8426 }, { "epoch": 0.11, "grad_norm": 6.8357253074646, "learning_rate": 1.8226451822212614e-05, "loss": 2.9157, "step": 8427 }, { "epoch": 0.11, "grad_norm": 6.197393894195557, "learning_rate": 1.822861468584406e-05, "loss": 2.679, "step": 8428 }, { "epoch": 0.11, "grad_norm": 6.48576545715332, "learning_rate": 1.8230777549475506e-05, "loss": 2.7099, "step": 8429 }, { "epoch": 0.11, "grad_norm": 6.756275177001953, "learning_rate": 1.8232940413106953e-05, "loss": 2.3775, "step": 8430 }, { "epoch": 0.11, "grad_norm": 6.488674640655518, "learning_rate": 1.8235103276738402e-05, "loss": 2.6036, "step": 8431 }, { "epoch": 0.11, "grad_norm": 6.109202861785889, "learning_rate": 1.8237266140369852e-05, "loss": 2.8451, "step": 8432 }, { "epoch": 0.11, "grad_norm": 6.107335567474365, "learning_rate": 1.82394290040013e-05, "loss": 2.549, "step": 8433 }, { "epoch": 0.11, "grad_norm": 6.834075450897217, "learning_rate": 1.8241591867632745e-05, "loss": 2.621, "step": 8434 }, { "epoch": 0.11, "grad_norm": 5.280097484588623, "learning_rate": 1.8243754731264195e-05, "loss": 2.1785, "step": 8435 }, { "epoch": 0.11, "grad_norm": 6.516717910766602, "learning_rate": 1.8245917594895644e-05, "loss": 3.0925, "step": 8436 }, { "epoch": 0.11, "grad_norm": 5.840514659881592, "learning_rate": 1.824808045852709e-05, "loss": 2.2386, "step": 8437 }, { "epoch": 0.11, "grad_norm": 5.202542304992676, "learning_rate": 1.8250243322158537e-05, "loss": 2.2249, "step": 8438 }, { "epoch": 0.11, "grad_norm": 6.221738338470459, "learning_rate": 1.8252406185789987e-05, "loss": 2.8709, "step": 8439 }, { "epoch": 0.11, "grad_norm": 6.633298397064209, "learning_rate": 1.8254569049421437e-05, "loss": 2.7435, "step": 8440 }, { "epoch": 0.11, "grad_norm": 6.038609027862549, "learning_rate": 1.8256731913052883e-05, "loss": 2.3392, "step": 8441 }, { "epoch": 0.11, "grad_norm": 6.035416603088379, "learning_rate": 1.825889477668433e-05, "loss": 2.5569, "step": 8442 }, { "epoch": 0.11, "grad_norm": 6.110121250152588, "learning_rate": 1.826105764031578e-05, "loss": 2.9079, "step": 8443 }, { "epoch": 0.11, "grad_norm": 7.074954986572266, "learning_rate": 1.826322050394723e-05, "loss": 2.824, "step": 8444 }, { "epoch": 0.11, "grad_norm": 7.875762462615967, "learning_rate": 1.8265383367578675e-05, "loss": 3.3839, "step": 8445 }, { "epoch": 0.11, "grad_norm": 7.371803283691406, "learning_rate": 1.826754623121012e-05, "loss": 2.638, "step": 8446 }, { "epoch": 0.11, "grad_norm": 6.242247104644775, "learning_rate": 1.826970909484157e-05, "loss": 2.6575, "step": 8447 }, { "epoch": 0.11, "grad_norm": 6.39581298828125, "learning_rate": 1.827187195847302e-05, "loss": 2.9793, "step": 8448 }, { "epoch": 0.11, "grad_norm": 7.5298566818237305, "learning_rate": 1.8274034822104467e-05, "loss": 2.7005, "step": 8449 }, { "epoch": 0.11, "grad_norm": 6.800642490386963, "learning_rate": 1.8276197685735917e-05, "loss": 2.5831, "step": 8450 }, { "epoch": 0.11, "grad_norm": 6.903744697570801, "learning_rate": 1.8278360549367363e-05, "loss": 2.3636, "step": 8451 }, { "epoch": 0.11, "grad_norm": 5.552672863006592, "learning_rate": 1.8280523412998813e-05, "loss": 2.3165, "step": 8452 }, { "epoch": 0.11, "grad_norm": 5.937969207763672, "learning_rate": 1.828268627663026e-05, "loss": 2.5127, "step": 8453 }, { "epoch": 0.11, "grad_norm": 5.692302227020264, "learning_rate": 1.828484914026171e-05, "loss": 2.4071, "step": 8454 }, { "epoch": 0.11, "grad_norm": 6.766292572021484, "learning_rate": 1.8287012003893156e-05, "loss": 2.7813, "step": 8455 }, { "epoch": 0.11, "grad_norm": 6.018545627593994, "learning_rate": 1.8289174867524605e-05, "loss": 2.5739, "step": 8456 }, { "epoch": 0.11, "grad_norm": 6.234808444976807, "learning_rate": 1.8291337731156052e-05, "loss": 2.7338, "step": 8457 }, { "epoch": 0.11, "grad_norm": 6.104979038238525, "learning_rate": 1.82935005947875e-05, "loss": 2.2031, "step": 8458 }, { "epoch": 0.11, "grad_norm": 5.906702041625977, "learning_rate": 1.8295663458418948e-05, "loss": 2.25, "step": 8459 }, { "epoch": 0.11, "grad_norm": 5.6753458976745605, "learning_rate": 1.8297826322050398e-05, "loss": 2.4446, "step": 8460 }, { "epoch": 0.11, "grad_norm": 5.739463806152344, "learning_rate": 1.8299989185681844e-05, "loss": 2.5269, "step": 8461 }, { "epoch": 0.11, "grad_norm": 5.826568126678467, "learning_rate": 1.8302152049313294e-05, "loss": 2.6569, "step": 8462 }, { "epoch": 0.11, "grad_norm": 6.3356242179870605, "learning_rate": 1.830431491294474e-05, "loss": 2.5408, "step": 8463 }, { "epoch": 0.11, "grad_norm": 6.571255683898926, "learning_rate": 1.830647777657619e-05, "loss": 2.4841, "step": 8464 }, { "epoch": 0.11, "grad_norm": 6.9034905433654785, "learning_rate": 1.8308640640207636e-05, "loss": 2.8366, "step": 8465 }, { "epoch": 0.11, "grad_norm": 6.347291946411133, "learning_rate": 1.8310803503839086e-05, "loss": 2.8427, "step": 8466 }, { "epoch": 0.11, "grad_norm": 5.935938835144043, "learning_rate": 1.8312966367470532e-05, "loss": 2.2328, "step": 8467 }, { "epoch": 0.11, "grad_norm": 6.103870391845703, "learning_rate": 1.831512923110198e-05, "loss": 2.5066, "step": 8468 }, { "epoch": 0.11, "grad_norm": 6.540243625640869, "learning_rate": 1.831729209473343e-05, "loss": 3.32, "step": 8469 }, { "epoch": 0.11, "grad_norm": 7.100099563598633, "learning_rate": 1.8319454958364878e-05, "loss": 2.3806, "step": 8470 }, { "epoch": 0.11, "grad_norm": 7.334843635559082, "learning_rate": 1.8321617821996324e-05, "loss": 2.1657, "step": 8471 }, { "epoch": 0.11, "grad_norm": 6.515365123748779, "learning_rate": 1.832378068562777e-05, "loss": 2.9584, "step": 8472 }, { "epoch": 0.11, "grad_norm": 5.638703346252441, "learning_rate": 1.832594354925922e-05, "loss": 2.1145, "step": 8473 }, { "epoch": 0.11, "grad_norm": 7.141870498657227, "learning_rate": 1.832810641289067e-05, "loss": 2.7462, "step": 8474 }, { "epoch": 0.11, "grad_norm": 6.020914077758789, "learning_rate": 1.8330269276522117e-05, "loss": 1.9299, "step": 8475 }, { "epoch": 0.11, "grad_norm": 5.444030284881592, "learning_rate": 1.8332432140153563e-05, "loss": 2.2494, "step": 8476 }, { "epoch": 0.11, "grad_norm": 6.189877510070801, "learning_rate": 1.8334595003785013e-05, "loss": 2.4054, "step": 8477 }, { "epoch": 0.11, "grad_norm": 6.17364501953125, "learning_rate": 1.8336757867416462e-05, "loss": 2.4659, "step": 8478 }, { "epoch": 0.11, "grad_norm": 5.8736042976379395, "learning_rate": 1.833892073104791e-05, "loss": 2.3094, "step": 8479 }, { "epoch": 0.11, "grad_norm": 6.155414581298828, "learning_rate": 1.8341083594679355e-05, "loss": 2.533, "step": 8480 }, { "epoch": 0.11, "grad_norm": 7.048592567443848, "learning_rate": 1.8343246458310805e-05, "loss": 2.8093, "step": 8481 }, { "epoch": 0.11, "grad_norm": 5.737355709075928, "learning_rate": 1.8345409321942255e-05, "loss": 2.6772, "step": 8482 }, { "epoch": 0.11, "grad_norm": 6.548849582672119, "learning_rate": 1.83475721855737e-05, "loss": 2.1087, "step": 8483 }, { "epoch": 0.11, "grad_norm": 6.6805830001831055, "learning_rate": 1.8349735049205147e-05, "loss": 2.4964, "step": 8484 }, { "epoch": 0.11, "grad_norm": 5.617951393127441, "learning_rate": 1.8351897912836597e-05, "loss": 1.758, "step": 8485 }, { "epoch": 0.11, "grad_norm": 5.722009658813477, "learning_rate": 1.8354060776468047e-05, "loss": 2.0441, "step": 8486 }, { "epoch": 0.11, "grad_norm": 6.869797229766846, "learning_rate": 1.8356223640099493e-05, "loss": 2.6384, "step": 8487 }, { "epoch": 0.11, "grad_norm": 5.729758262634277, "learning_rate": 1.835838650373094e-05, "loss": 2.2683, "step": 8488 }, { "epoch": 0.11, "grad_norm": 5.577845096588135, "learning_rate": 1.836054936736239e-05, "loss": 2.1748, "step": 8489 }, { "epoch": 0.11, "grad_norm": 5.997822284698486, "learning_rate": 1.836271223099384e-05, "loss": 2.4351, "step": 8490 }, { "epoch": 0.11, "grad_norm": 5.685295104980469, "learning_rate": 1.8364875094625285e-05, "loss": 2.4735, "step": 8491 }, { "epoch": 0.11, "grad_norm": 5.979963779449463, "learning_rate": 1.8367037958256732e-05, "loss": 2.4423, "step": 8492 }, { "epoch": 0.11, "grad_norm": 6.272975444793701, "learning_rate": 1.836920082188818e-05, "loss": 2.5993, "step": 8493 }, { "epoch": 0.11, "grad_norm": 7.269587516784668, "learning_rate": 1.837136368551963e-05, "loss": 2.6741, "step": 8494 }, { "epoch": 0.11, "grad_norm": 5.9699554443359375, "learning_rate": 1.8373526549151078e-05, "loss": 2.5745, "step": 8495 }, { "epoch": 0.11, "grad_norm": 5.760495185852051, "learning_rate": 1.8375689412782524e-05, "loss": 2.2337, "step": 8496 }, { "epoch": 0.11, "grad_norm": 6.263226509094238, "learning_rate": 1.8377852276413974e-05, "loss": 2.3815, "step": 8497 }, { "epoch": 0.11, "grad_norm": 6.099328517913818, "learning_rate": 1.8380015140045423e-05, "loss": 2.6254, "step": 8498 }, { "epoch": 0.11, "grad_norm": 6.2375617027282715, "learning_rate": 1.838217800367687e-05, "loss": 2.7355, "step": 8499 }, { "epoch": 0.11, "grad_norm": 5.46812629699707, "learning_rate": 1.8384340867308316e-05, "loss": 2.159, "step": 8500 }, { "epoch": 0.11, "grad_norm": 6.290828227996826, "learning_rate": 1.8386503730939766e-05, "loss": 2.5591, "step": 8501 }, { "epoch": 0.11, "grad_norm": 6.051310062408447, "learning_rate": 1.8388666594571212e-05, "loss": 2.6375, "step": 8502 }, { "epoch": 0.11, "grad_norm": 7.724433422088623, "learning_rate": 1.8390829458202662e-05, "loss": 2.8293, "step": 8503 }, { "epoch": 0.11, "grad_norm": 7.241683006286621, "learning_rate": 1.839299232183411e-05, "loss": 2.8967, "step": 8504 }, { "epoch": 0.11, "grad_norm": 7.252857685089111, "learning_rate": 1.8395155185465558e-05, "loss": 2.9808, "step": 8505 }, { "epoch": 0.11, "grad_norm": 6.889863014221191, "learning_rate": 1.8397318049097004e-05, "loss": 2.4179, "step": 8506 }, { "epoch": 0.11, "grad_norm": 7.4556565284729, "learning_rate": 1.8399480912728454e-05, "loss": 2.954, "step": 8507 }, { "epoch": 0.11, "grad_norm": 5.727986812591553, "learning_rate": 1.84016437763599e-05, "loss": 2.5832, "step": 8508 }, { "epoch": 0.11, "grad_norm": 6.016510486602783, "learning_rate": 1.840380663999135e-05, "loss": 2.5325, "step": 8509 }, { "epoch": 0.11, "grad_norm": 5.880553722381592, "learning_rate": 1.8405969503622797e-05, "loss": 2.5025, "step": 8510 }, { "epoch": 0.11, "grad_norm": 6.519139766693115, "learning_rate": 1.8408132367254246e-05, "loss": 2.7233, "step": 8511 }, { "epoch": 0.11, "grad_norm": 6.177113056182861, "learning_rate": 1.8410295230885696e-05, "loss": 2.4194, "step": 8512 }, { "epoch": 0.11, "grad_norm": 6.645711421966553, "learning_rate": 1.8412458094517143e-05, "loss": 2.7642, "step": 8513 }, { "epoch": 0.11, "grad_norm": 6.506302356719971, "learning_rate": 1.841462095814859e-05, "loss": 2.3683, "step": 8514 }, { "epoch": 0.11, "grad_norm": 7.439166069030762, "learning_rate": 1.841678382178004e-05, "loss": 2.3661, "step": 8515 }, { "epoch": 0.11, "grad_norm": 5.683035373687744, "learning_rate": 1.841894668541149e-05, "loss": 2.1665, "step": 8516 }, { "epoch": 0.11, "grad_norm": 5.164599418640137, "learning_rate": 1.8421109549042935e-05, "loss": 2.0896, "step": 8517 }, { "epoch": 0.11, "grad_norm": 6.565767288208008, "learning_rate": 1.842327241267438e-05, "loss": 2.297, "step": 8518 }, { "epoch": 0.11, "grad_norm": 6.512270450592041, "learning_rate": 1.842543527630583e-05, "loss": 2.5559, "step": 8519 }, { "epoch": 0.11, "grad_norm": 6.055228233337402, "learning_rate": 1.842759813993728e-05, "loss": 2.5521, "step": 8520 }, { "epoch": 0.11, "grad_norm": 5.892161846160889, "learning_rate": 1.8429761003568727e-05, "loss": 2.4034, "step": 8521 }, { "epoch": 0.11, "grad_norm": 6.927343368530273, "learning_rate": 1.8431923867200173e-05, "loss": 2.9312, "step": 8522 }, { "epoch": 0.11, "grad_norm": 7.346673488616943, "learning_rate": 1.8434086730831623e-05, "loss": 2.904, "step": 8523 }, { "epoch": 0.11, "grad_norm": 5.67008113861084, "learning_rate": 1.8436249594463073e-05, "loss": 2.1063, "step": 8524 }, { "epoch": 0.11, "grad_norm": 5.696236610412598, "learning_rate": 1.843841245809452e-05, "loss": 2.4244, "step": 8525 }, { "epoch": 0.11, "grad_norm": 6.3692402839660645, "learning_rate": 1.8440575321725966e-05, "loss": 2.5681, "step": 8526 }, { "epoch": 0.11, "grad_norm": 7.796359062194824, "learning_rate": 1.8442738185357415e-05, "loss": 2.9077, "step": 8527 }, { "epoch": 0.11, "grad_norm": 6.622256755828857, "learning_rate": 1.8444901048988865e-05, "loss": 2.4601, "step": 8528 }, { "epoch": 0.11, "grad_norm": 6.855388164520264, "learning_rate": 1.844706391262031e-05, "loss": 2.5935, "step": 8529 }, { "epoch": 0.11, "grad_norm": 6.991682052612305, "learning_rate": 1.8449226776251758e-05, "loss": 2.7928, "step": 8530 }, { "epoch": 0.11, "grad_norm": 5.06329345703125, "learning_rate": 1.8451389639883207e-05, "loss": 1.875, "step": 8531 }, { "epoch": 0.11, "grad_norm": 6.590300559997559, "learning_rate": 1.8453552503514657e-05, "loss": 2.5608, "step": 8532 }, { "epoch": 0.11, "grad_norm": 5.472159385681152, "learning_rate": 1.8455715367146104e-05, "loss": 1.9332, "step": 8533 }, { "epoch": 0.11, "grad_norm": 6.012777805328369, "learning_rate": 1.845787823077755e-05, "loss": 2.4797, "step": 8534 }, { "epoch": 0.11, "grad_norm": 6.5497517585754395, "learning_rate": 1.8460041094409e-05, "loss": 3.0138, "step": 8535 }, { "epoch": 0.11, "grad_norm": 6.052003383636475, "learning_rate": 1.846220395804045e-05, "loss": 2.2301, "step": 8536 }, { "epoch": 0.11, "grad_norm": 6.772624969482422, "learning_rate": 1.8464366821671896e-05, "loss": 2.8768, "step": 8537 }, { "epoch": 0.11, "grad_norm": 7.028099060058594, "learning_rate": 1.8466529685303342e-05, "loss": 2.3302, "step": 8538 }, { "epoch": 0.11, "grad_norm": 6.197144031524658, "learning_rate": 1.846869254893479e-05, "loss": 2.3439, "step": 8539 }, { "epoch": 0.11, "grad_norm": 6.369575023651123, "learning_rate": 1.8470855412566238e-05, "loss": 2.1138, "step": 8540 }, { "epoch": 0.11, "grad_norm": 7.198338508605957, "learning_rate": 1.8473018276197688e-05, "loss": 2.4641, "step": 8541 }, { "epoch": 0.11, "grad_norm": 5.808257102966309, "learning_rate": 1.8475181139829134e-05, "loss": 2.2777, "step": 8542 }, { "epoch": 0.11, "grad_norm": 6.066967010498047, "learning_rate": 1.847734400346058e-05, "loss": 2.6048, "step": 8543 }, { "epoch": 0.11, "grad_norm": 7.014857292175293, "learning_rate": 1.847950686709203e-05, "loss": 2.8413, "step": 8544 }, { "epoch": 0.11, "grad_norm": 6.368676662445068, "learning_rate": 1.848166973072348e-05, "loss": 2.0698, "step": 8545 }, { "epoch": 0.11, "grad_norm": 6.867926597595215, "learning_rate": 1.8483832594354927e-05, "loss": 2.4939, "step": 8546 }, { "epoch": 0.11, "grad_norm": 6.223100662231445, "learning_rate": 1.8485995457986376e-05, "loss": 2.7593, "step": 8547 }, { "epoch": 0.11, "grad_norm": 5.808620452880859, "learning_rate": 1.8488158321617823e-05, "loss": 2.5183, "step": 8548 }, { "epoch": 0.11, "grad_norm": 7.445527076721191, "learning_rate": 1.8490321185249272e-05, "loss": 3.0685, "step": 8549 }, { "epoch": 0.11, "grad_norm": 6.616603374481201, "learning_rate": 1.849248404888072e-05, "loss": 3.0822, "step": 8550 }, { "epoch": 0.11, "grad_norm": 5.920831680297852, "learning_rate": 1.849464691251217e-05, "loss": 2.7289, "step": 8551 }, { "epoch": 0.11, "grad_norm": 5.67869758605957, "learning_rate": 1.8496809776143615e-05, "loss": 2.3434, "step": 8552 }, { "epoch": 0.11, "grad_norm": 6.497627258300781, "learning_rate": 1.8498972639775065e-05, "loss": 2.4986, "step": 8553 }, { "epoch": 0.11, "grad_norm": 6.450940132141113, "learning_rate": 1.850113550340651e-05, "loss": 2.3097, "step": 8554 }, { "epoch": 0.11, "grad_norm": 7.422195911407471, "learning_rate": 1.850329836703796e-05, "loss": 2.1826, "step": 8555 }, { "epoch": 0.11, "grad_norm": 6.426480293273926, "learning_rate": 1.8505461230669407e-05, "loss": 2.2542, "step": 8556 }, { "epoch": 0.11, "grad_norm": 6.397987365722656, "learning_rate": 1.8507624094300857e-05, "loss": 2.9832, "step": 8557 }, { "epoch": 0.11, "grad_norm": 6.1333818435668945, "learning_rate": 1.8509786957932303e-05, "loss": 2.5501, "step": 8558 }, { "epoch": 0.11, "grad_norm": 6.5187578201293945, "learning_rate": 1.8511949821563753e-05, "loss": 2.6287, "step": 8559 }, { "epoch": 0.11, "grad_norm": 5.861410140991211, "learning_rate": 1.85141126851952e-05, "loss": 2.3174, "step": 8560 }, { "epoch": 0.11, "grad_norm": 6.02473258972168, "learning_rate": 1.851627554882665e-05, "loss": 2.4668, "step": 8561 }, { "epoch": 0.11, "grad_norm": 5.409026145935059, "learning_rate": 1.8518438412458095e-05, "loss": 2.8621, "step": 8562 }, { "epoch": 0.11, "grad_norm": 6.094655990600586, "learning_rate": 1.8520601276089545e-05, "loss": 2.53, "step": 8563 }, { "epoch": 0.11, "grad_norm": 6.189924240112305, "learning_rate": 1.852276413972099e-05, "loss": 3.0898, "step": 8564 }, { "epoch": 0.11, "grad_norm": 5.20726203918457, "learning_rate": 1.852492700335244e-05, "loss": 2.0938, "step": 8565 }, { "epoch": 0.11, "grad_norm": 5.273891448974609, "learning_rate": 1.8527089866983888e-05, "loss": 2.3249, "step": 8566 }, { "epoch": 0.11, "grad_norm": 6.07585334777832, "learning_rate": 1.8529252730615337e-05, "loss": 2.5413, "step": 8567 }, { "epoch": 0.11, "grad_norm": 6.988293170928955, "learning_rate": 1.8531415594246784e-05, "loss": 2.7555, "step": 8568 }, { "epoch": 0.11, "grad_norm": 5.831587791442871, "learning_rate": 1.8533578457878233e-05, "loss": 2.3683, "step": 8569 }, { "epoch": 0.11, "grad_norm": 6.294037818908691, "learning_rate": 1.853574132150968e-05, "loss": 2.4095, "step": 8570 }, { "epoch": 0.11, "grad_norm": 6.178578853607178, "learning_rate": 1.853790418514113e-05, "loss": 2.1518, "step": 8571 }, { "epoch": 0.11, "grad_norm": 6.096070766448975, "learning_rate": 1.8540067048772576e-05, "loss": 2.8482, "step": 8572 }, { "epoch": 0.11, "grad_norm": 7.083271026611328, "learning_rate": 1.8542229912404026e-05, "loss": 2.3235, "step": 8573 }, { "epoch": 0.11, "grad_norm": 6.309591293334961, "learning_rate": 1.8544392776035472e-05, "loss": 2.9264, "step": 8574 }, { "epoch": 0.11, "grad_norm": 6.3151445388793945, "learning_rate": 1.8546555639666922e-05, "loss": 2.8755, "step": 8575 }, { "epoch": 0.11, "grad_norm": 6.915375709533691, "learning_rate": 1.8548718503298368e-05, "loss": 3.0029, "step": 8576 }, { "epoch": 0.11, "grad_norm": 5.722705841064453, "learning_rate": 1.8550881366929814e-05, "loss": 2.5907, "step": 8577 }, { "epoch": 0.11, "grad_norm": 6.437211513519287, "learning_rate": 1.8553044230561264e-05, "loss": 2.1724, "step": 8578 }, { "epoch": 0.11, "grad_norm": 6.167977333068848, "learning_rate": 1.8555207094192714e-05, "loss": 2.5725, "step": 8579 }, { "epoch": 0.11, "grad_norm": 6.937167167663574, "learning_rate": 1.855736995782416e-05, "loss": 2.8389, "step": 8580 }, { "epoch": 0.11, "grad_norm": 5.9089531898498535, "learning_rate": 1.8559532821455607e-05, "loss": 2.5527, "step": 8581 }, { "epoch": 0.11, "grad_norm": 6.943124294281006, "learning_rate": 1.8561695685087056e-05, "loss": 2.7648, "step": 8582 }, { "epoch": 0.11, "grad_norm": 5.947827339172363, "learning_rate": 1.8563858548718506e-05, "loss": 2.2468, "step": 8583 }, { "epoch": 0.11, "grad_norm": 7.453056335449219, "learning_rate": 1.8566021412349952e-05, "loss": 2.9549, "step": 8584 }, { "epoch": 0.11, "grad_norm": 5.321691513061523, "learning_rate": 1.85681842759814e-05, "loss": 1.8595, "step": 8585 }, { "epoch": 0.11, "grad_norm": 5.761586666107178, "learning_rate": 1.857034713961285e-05, "loss": 2.3741, "step": 8586 }, { "epoch": 0.11, "grad_norm": 6.343958854675293, "learning_rate": 1.8572510003244298e-05, "loss": 2.4709, "step": 8587 }, { "epoch": 0.11, "grad_norm": 6.3394551277160645, "learning_rate": 1.8574672866875745e-05, "loss": 2.4905, "step": 8588 }, { "epoch": 0.11, "grad_norm": 6.218583106994629, "learning_rate": 1.857683573050719e-05, "loss": 2.7563, "step": 8589 }, { "epoch": 0.11, "grad_norm": 6.10454797744751, "learning_rate": 1.857899859413864e-05, "loss": 2.5463, "step": 8590 }, { "epoch": 0.11, "grad_norm": 6.063538074493408, "learning_rate": 1.858116145777009e-05, "loss": 2.5896, "step": 8591 }, { "epoch": 0.11, "grad_norm": 6.230379581451416, "learning_rate": 1.8583324321401537e-05, "loss": 2.7246, "step": 8592 }, { "epoch": 0.11, "grad_norm": 5.9373369216918945, "learning_rate": 1.8585487185032983e-05, "loss": 2.5892, "step": 8593 }, { "epoch": 0.11, "grad_norm": 6.410993576049805, "learning_rate": 1.8587650048664433e-05, "loss": 2.7081, "step": 8594 }, { "epoch": 0.11, "grad_norm": 6.463663101196289, "learning_rate": 1.8589812912295883e-05, "loss": 2.4215, "step": 8595 }, { "epoch": 0.11, "grad_norm": 5.480867862701416, "learning_rate": 1.859197577592733e-05, "loss": 2.4133, "step": 8596 }, { "epoch": 0.11, "grad_norm": 6.27965784072876, "learning_rate": 1.8594138639558775e-05, "loss": 2.4747, "step": 8597 }, { "epoch": 0.11, "grad_norm": 5.714555263519287, "learning_rate": 1.8596301503190225e-05, "loss": 2.653, "step": 8598 }, { "epoch": 0.11, "grad_norm": 6.04032039642334, "learning_rate": 1.8598464366821675e-05, "loss": 2.6231, "step": 8599 }, { "epoch": 0.11, "grad_norm": 5.328405857086182, "learning_rate": 1.860062723045312e-05, "loss": 2.6759, "step": 8600 }, { "epoch": 0.11, "grad_norm": 6.547062397003174, "learning_rate": 1.8602790094084568e-05, "loss": 3.3579, "step": 8601 }, { "epoch": 0.11, "grad_norm": 5.435521125793457, "learning_rate": 1.8604952957716017e-05, "loss": 1.8362, "step": 8602 }, { "epoch": 0.11, "grad_norm": 5.458180904388428, "learning_rate": 1.8607115821347467e-05, "loss": 2.2086, "step": 8603 }, { "epoch": 0.11, "grad_norm": 6.131399631500244, "learning_rate": 1.8609278684978913e-05, "loss": 2.4551, "step": 8604 }, { "epoch": 0.11, "grad_norm": 5.718270301818848, "learning_rate": 1.861144154861036e-05, "loss": 2.5893, "step": 8605 }, { "epoch": 0.11, "grad_norm": 5.384771823883057, "learning_rate": 1.861360441224181e-05, "loss": 2.0809, "step": 8606 }, { "epoch": 0.11, "grad_norm": 6.196657180786133, "learning_rate": 1.861576727587326e-05, "loss": 2.6955, "step": 8607 }, { "epoch": 0.11, "grad_norm": 6.088946342468262, "learning_rate": 1.8617930139504706e-05, "loss": 2.3769, "step": 8608 }, { "epoch": 0.11, "grad_norm": 5.735405921936035, "learning_rate": 1.8620093003136155e-05, "loss": 1.842, "step": 8609 }, { "epoch": 0.11, "grad_norm": 6.617453098297119, "learning_rate": 1.8622255866767602e-05, "loss": 2.7377, "step": 8610 }, { "epoch": 0.11, "grad_norm": 5.457193374633789, "learning_rate": 1.8624418730399048e-05, "loss": 2.3122, "step": 8611 }, { "epoch": 0.11, "grad_norm": 6.250528335571289, "learning_rate": 1.8626581594030498e-05, "loss": 2.2047, "step": 8612 }, { "epoch": 0.11, "grad_norm": 6.224671363830566, "learning_rate": 1.8628744457661948e-05, "loss": 2.3203, "step": 8613 }, { "epoch": 0.11, "grad_norm": 6.305305004119873, "learning_rate": 1.8630907321293394e-05, "loss": 2.0837, "step": 8614 }, { "epoch": 0.11, "grad_norm": 6.13735294342041, "learning_rate": 1.863307018492484e-05, "loss": 2.5293, "step": 8615 }, { "epoch": 0.11, "grad_norm": 6.496958255767822, "learning_rate": 1.863523304855629e-05, "loss": 2.531, "step": 8616 }, { "epoch": 0.11, "grad_norm": 6.566200256347656, "learning_rate": 1.863739591218774e-05, "loss": 3.0295, "step": 8617 }, { "epoch": 0.11, "grad_norm": 6.565997123718262, "learning_rate": 1.8639558775819186e-05, "loss": 2.7821, "step": 8618 }, { "epoch": 0.11, "grad_norm": 6.028968334197998, "learning_rate": 1.8641721639450633e-05, "loss": 2.642, "step": 8619 }, { "epoch": 0.11, "grad_norm": 5.7343668937683105, "learning_rate": 1.8643884503082082e-05, "loss": 2.3793, "step": 8620 }, { "epoch": 0.11, "grad_norm": 6.14324426651001, "learning_rate": 1.8646047366713532e-05, "loss": 2.3765, "step": 8621 }, { "epoch": 0.11, "grad_norm": 5.709408283233643, "learning_rate": 1.864821023034498e-05, "loss": 2.3874, "step": 8622 }, { "epoch": 0.11, "grad_norm": 7.378720283508301, "learning_rate": 1.8650373093976425e-05, "loss": 2.4415, "step": 8623 }, { "epoch": 0.11, "grad_norm": 6.1382155418396, "learning_rate": 1.8652535957607874e-05, "loss": 2.4299, "step": 8624 }, { "epoch": 0.11, "grad_norm": 5.963290214538574, "learning_rate": 1.8654698821239324e-05, "loss": 2.9547, "step": 8625 }, { "epoch": 0.11, "grad_norm": 6.792378902435303, "learning_rate": 1.865686168487077e-05, "loss": 3.2783, "step": 8626 }, { "epoch": 0.11, "grad_norm": 5.123610019683838, "learning_rate": 1.8659024548502217e-05, "loss": 2.1703, "step": 8627 }, { "epoch": 0.11, "grad_norm": 5.449238300323486, "learning_rate": 1.8661187412133667e-05, "loss": 2.3614, "step": 8628 }, { "epoch": 0.11, "grad_norm": 5.675135135650635, "learning_rate": 1.8663350275765116e-05, "loss": 2.3526, "step": 8629 }, { "epoch": 0.11, "grad_norm": 5.521292209625244, "learning_rate": 1.8665513139396563e-05, "loss": 2.0857, "step": 8630 }, { "epoch": 0.11, "grad_norm": 6.080729007720947, "learning_rate": 1.866767600302801e-05, "loss": 2.2624, "step": 8631 }, { "epoch": 0.11, "grad_norm": 5.695257186889648, "learning_rate": 1.866983886665946e-05, "loss": 2.3595, "step": 8632 }, { "epoch": 0.11, "grad_norm": 6.798067092895508, "learning_rate": 1.867200173029091e-05, "loss": 2.5508, "step": 8633 }, { "epoch": 0.11, "grad_norm": 6.232858657836914, "learning_rate": 1.8674164593922355e-05, "loss": 2.7709, "step": 8634 }, { "epoch": 0.11, "grad_norm": 5.82635498046875, "learning_rate": 1.86763274575538e-05, "loss": 2.6139, "step": 8635 }, { "epoch": 0.11, "grad_norm": 5.898416042327881, "learning_rate": 1.867849032118525e-05, "loss": 2.3573, "step": 8636 }, { "epoch": 0.11, "grad_norm": 5.8349833488464355, "learning_rate": 1.86806531848167e-05, "loss": 2.6309, "step": 8637 }, { "epoch": 0.11, "grad_norm": 5.655893325805664, "learning_rate": 1.8682816048448147e-05, "loss": 2.0719, "step": 8638 }, { "epoch": 0.11, "grad_norm": 5.268506050109863, "learning_rate": 1.8684978912079594e-05, "loss": 2.0169, "step": 8639 }, { "epoch": 0.11, "grad_norm": 6.142941951751709, "learning_rate": 1.8687141775711043e-05, "loss": 3.0027, "step": 8640 }, { "epoch": 0.11, "grad_norm": 6.720235824584961, "learning_rate": 1.8689304639342493e-05, "loss": 2.5887, "step": 8641 }, { "epoch": 0.11, "grad_norm": 5.755038261413574, "learning_rate": 1.869146750297394e-05, "loss": 2.6335, "step": 8642 }, { "epoch": 0.11, "grad_norm": 5.480355262756348, "learning_rate": 1.8693630366605386e-05, "loss": 2.4943, "step": 8643 }, { "epoch": 0.11, "grad_norm": 5.933934688568115, "learning_rate": 1.8695793230236836e-05, "loss": 2.5073, "step": 8644 }, { "epoch": 0.11, "grad_norm": 7.740485668182373, "learning_rate": 1.8697956093868285e-05, "loss": 3.3707, "step": 8645 }, { "epoch": 0.11, "grad_norm": 6.081950664520264, "learning_rate": 1.870011895749973e-05, "loss": 2.6223, "step": 8646 }, { "epoch": 0.11, "grad_norm": 6.047679424285889, "learning_rate": 1.8702281821131178e-05, "loss": 2.5535, "step": 8647 }, { "epoch": 0.11, "grad_norm": 6.213584899902344, "learning_rate": 1.8704444684762628e-05, "loss": 2.5958, "step": 8648 }, { "epoch": 0.11, "grad_norm": 5.527181625366211, "learning_rate": 1.8706607548394074e-05, "loss": 2.6125, "step": 8649 }, { "epoch": 0.11, "grad_norm": 6.155506134033203, "learning_rate": 1.8708770412025524e-05, "loss": 2.4466, "step": 8650 }, { "epoch": 0.11, "grad_norm": 5.748922348022461, "learning_rate": 1.871093327565697e-05, "loss": 2.514, "step": 8651 }, { "epoch": 0.11, "grad_norm": 7.225730895996094, "learning_rate": 1.871309613928842e-05, "loss": 2.7916, "step": 8652 }, { "epoch": 0.11, "grad_norm": 5.769023895263672, "learning_rate": 1.8715259002919866e-05, "loss": 2.5458, "step": 8653 }, { "epoch": 0.11, "grad_norm": 6.637198448181152, "learning_rate": 1.8717421866551316e-05, "loss": 2.4237, "step": 8654 }, { "epoch": 0.11, "grad_norm": 7.147541522979736, "learning_rate": 1.8719584730182762e-05, "loss": 2.5604, "step": 8655 }, { "epoch": 0.11, "grad_norm": 6.171792030334473, "learning_rate": 1.8721747593814212e-05, "loss": 2.5062, "step": 8656 }, { "epoch": 0.11, "grad_norm": 6.714565753936768, "learning_rate": 1.872391045744566e-05, "loss": 2.7301, "step": 8657 }, { "epoch": 0.11, "grad_norm": 6.866672039031982, "learning_rate": 1.8726073321077108e-05, "loss": 2.7487, "step": 8658 }, { "epoch": 0.11, "grad_norm": 4.965131759643555, "learning_rate": 1.8728236184708555e-05, "loss": 1.8954, "step": 8659 }, { "epoch": 0.11, "grad_norm": 6.830198764801025, "learning_rate": 1.8730399048340004e-05, "loss": 2.3327, "step": 8660 }, { "epoch": 0.11, "grad_norm": 5.943989276885986, "learning_rate": 1.873256191197145e-05, "loss": 2.5148, "step": 8661 }, { "epoch": 0.11, "grad_norm": 6.0936174392700195, "learning_rate": 1.87347247756029e-05, "loss": 2.3538, "step": 8662 }, { "epoch": 0.11, "grad_norm": 6.316725254058838, "learning_rate": 1.8736887639234347e-05, "loss": 2.634, "step": 8663 }, { "epoch": 0.11, "grad_norm": 6.473006248474121, "learning_rate": 1.8739050502865797e-05, "loss": 2.3452, "step": 8664 }, { "epoch": 0.11, "grad_norm": 6.227208614349365, "learning_rate": 1.8741213366497243e-05, "loss": 2.3459, "step": 8665 }, { "epoch": 0.11, "grad_norm": 7.905007839202881, "learning_rate": 1.8743376230128693e-05, "loss": 2.9239, "step": 8666 }, { "epoch": 0.11, "grad_norm": 6.150725841522217, "learning_rate": 1.874553909376014e-05, "loss": 2.5664, "step": 8667 }, { "epoch": 0.11, "grad_norm": 6.622819423675537, "learning_rate": 1.874770195739159e-05, "loss": 2.1458, "step": 8668 }, { "epoch": 0.11, "grad_norm": 5.372737884521484, "learning_rate": 1.8749864821023035e-05, "loss": 1.9159, "step": 8669 }, { "epoch": 0.11, "grad_norm": 6.509154796600342, "learning_rate": 1.8752027684654485e-05, "loss": 2.9039, "step": 8670 }, { "epoch": 0.11, "grad_norm": 5.68664026260376, "learning_rate": 1.875419054828593e-05, "loss": 2.6545, "step": 8671 }, { "epoch": 0.11, "grad_norm": 6.656307697296143, "learning_rate": 1.875635341191738e-05, "loss": 2.383, "step": 8672 }, { "epoch": 0.11, "grad_norm": 6.39442253112793, "learning_rate": 1.8758516275548827e-05, "loss": 2.8842, "step": 8673 }, { "epoch": 0.11, "grad_norm": 5.968860149383545, "learning_rate": 1.8760679139180277e-05, "loss": 3.1596, "step": 8674 }, { "epoch": 0.11, "grad_norm": 6.329663276672363, "learning_rate": 1.8762842002811727e-05, "loss": 2.5013, "step": 8675 }, { "epoch": 0.11, "grad_norm": 6.892392158508301, "learning_rate": 1.8765004866443173e-05, "loss": 2.8394, "step": 8676 }, { "epoch": 0.11, "grad_norm": 5.725630760192871, "learning_rate": 1.876716773007462e-05, "loss": 2.6568, "step": 8677 }, { "epoch": 0.11, "grad_norm": 6.128683567047119, "learning_rate": 1.876933059370607e-05, "loss": 3.1796, "step": 8678 }, { "epoch": 0.11, "grad_norm": 6.438597202301025, "learning_rate": 1.877149345733752e-05, "loss": 2.2658, "step": 8679 }, { "epoch": 0.11, "grad_norm": 5.913486480712891, "learning_rate": 1.8773656320968965e-05, "loss": 2.6339, "step": 8680 }, { "epoch": 0.11, "grad_norm": 5.542836666107178, "learning_rate": 1.877581918460041e-05, "loss": 2.5084, "step": 8681 }, { "epoch": 0.11, "grad_norm": 5.610534191131592, "learning_rate": 1.877798204823186e-05, "loss": 2.147, "step": 8682 }, { "epoch": 0.11, "grad_norm": 5.7697272300720215, "learning_rate": 1.8780144911863308e-05, "loss": 2.3197, "step": 8683 }, { "epoch": 0.11, "grad_norm": 6.026426792144775, "learning_rate": 1.8782307775494758e-05, "loss": 2.299, "step": 8684 }, { "epoch": 0.11, "grad_norm": 5.652222156524658, "learning_rate": 1.8784470639126204e-05, "loss": 2.0878, "step": 8685 }, { "epoch": 0.11, "grad_norm": 5.900905609130859, "learning_rate": 1.878663350275765e-05, "loss": 2.4613, "step": 8686 }, { "epoch": 0.11, "grad_norm": 6.469293117523193, "learning_rate": 1.87887963663891e-05, "loss": 2.6302, "step": 8687 }, { "epoch": 0.11, "grad_norm": 6.068037986755371, "learning_rate": 1.879095923002055e-05, "loss": 2.9412, "step": 8688 }, { "epoch": 0.11, "grad_norm": 5.8648247718811035, "learning_rate": 1.8793122093651996e-05, "loss": 2.8851, "step": 8689 }, { "epoch": 0.11, "grad_norm": 5.2756218910217285, "learning_rate": 1.8795284957283442e-05, "loss": 1.8584, "step": 8690 }, { "epoch": 0.11, "grad_norm": 6.279531478881836, "learning_rate": 1.8797447820914892e-05, "loss": 2.5883, "step": 8691 }, { "epoch": 0.11, "grad_norm": 6.187509536743164, "learning_rate": 1.8799610684546342e-05, "loss": 2.4809, "step": 8692 }, { "epoch": 0.11, "grad_norm": 5.483823776245117, "learning_rate": 1.8801773548177788e-05, "loss": 2.3726, "step": 8693 }, { "epoch": 0.11, "grad_norm": 6.06759786605835, "learning_rate": 1.8803936411809235e-05, "loss": 2.6189, "step": 8694 }, { "epoch": 0.11, "grad_norm": 5.591255187988281, "learning_rate": 1.8806099275440684e-05, "loss": 2.2053, "step": 8695 }, { "epoch": 0.11, "grad_norm": 5.364502906799316, "learning_rate": 1.8808262139072134e-05, "loss": 2.5346, "step": 8696 }, { "epoch": 0.11, "grad_norm": 4.859394073486328, "learning_rate": 1.881042500270358e-05, "loss": 1.9701, "step": 8697 }, { "epoch": 0.11, "grad_norm": 5.62632942199707, "learning_rate": 1.8812587866335027e-05, "loss": 2.5204, "step": 8698 }, { "epoch": 0.11, "grad_norm": 6.020790100097656, "learning_rate": 1.8814750729966477e-05, "loss": 2.7568, "step": 8699 }, { "epoch": 0.11, "grad_norm": 6.1117329597473145, "learning_rate": 1.8816913593597926e-05, "loss": 2.4546, "step": 8700 }, { "epoch": 0.11, "grad_norm": 6.475948810577393, "learning_rate": 1.8819076457229373e-05, "loss": 2.3772, "step": 8701 }, { "epoch": 0.11, "grad_norm": 6.822643756866455, "learning_rate": 1.882123932086082e-05, "loss": 2.6384, "step": 8702 }, { "epoch": 0.11, "grad_norm": 6.563300132751465, "learning_rate": 1.882340218449227e-05, "loss": 3.3365, "step": 8703 }, { "epoch": 0.11, "grad_norm": 6.0706706047058105, "learning_rate": 1.882556504812372e-05, "loss": 2.9136, "step": 8704 }, { "epoch": 0.11, "grad_norm": 6.741620063781738, "learning_rate": 1.8827727911755165e-05, "loss": 2.2165, "step": 8705 }, { "epoch": 0.11, "grad_norm": 6.033780574798584, "learning_rate": 1.8829890775386615e-05, "loss": 2.3006, "step": 8706 }, { "epoch": 0.11, "grad_norm": 6.737532615661621, "learning_rate": 1.883205363901806e-05, "loss": 2.487, "step": 8707 }, { "epoch": 0.11, "grad_norm": 6.1154093742370605, "learning_rate": 1.883421650264951e-05, "loss": 1.934, "step": 8708 }, { "epoch": 0.11, "grad_norm": 6.540918350219727, "learning_rate": 1.8836379366280957e-05, "loss": 2.3626, "step": 8709 }, { "epoch": 0.11, "grad_norm": 5.847850799560547, "learning_rate": 1.8838542229912407e-05, "loss": 1.9909, "step": 8710 }, { "epoch": 0.11, "grad_norm": 5.578953742980957, "learning_rate": 1.8840705093543853e-05, "loss": 1.9342, "step": 8711 }, { "epoch": 0.11, "grad_norm": 5.974784851074219, "learning_rate": 1.8842867957175303e-05, "loss": 2.6016, "step": 8712 }, { "epoch": 0.11, "grad_norm": 5.891733646392822, "learning_rate": 1.884503082080675e-05, "loss": 2.5358, "step": 8713 }, { "epoch": 0.11, "grad_norm": 6.675291061401367, "learning_rate": 1.88471936844382e-05, "loss": 2.864, "step": 8714 }, { "epoch": 0.11, "grad_norm": 6.83646297454834, "learning_rate": 1.8849356548069645e-05, "loss": 2.7603, "step": 8715 }, { "epoch": 0.11, "grad_norm": 7.523776531219482, "learning_rate": 1.8851519411701095e-05, "loss": 3.2475, "step": 8716 }, { "epoch": 0.11, "grad_norm": 5.83597469329834, "learning_rate": 1.885368227533254e-05, "loss": 2.9021, "step": 8717 }, { "epoch": 0.11, "grad_norm": 5.602538585662842, "learning_rate": 1.885584513896399e-05, "loss": 2.4355, "step": 8718 }, { "epoch": 0.11, "grad_norm": 6.935359001159668, "learning_rate": 1.8858008002595438e-05, "loss": 2.4679, "step": 8719 }, { "epoch": 0.11, "grad_norm": 6.490828990936279, "learning_rate": 1.8860170866226884e-05, "loss": 2.9484, "step": 8720 }, { "epoch": 0.11, "grad_norm": 5.800402641296387, "learning_rate": 1.8862333729858334e-05, "loss": 1.7843, "step": 8721 }, { "epoch": 0.11, "grad_norm": 5.714121341705322, "learning_rate": 1.8864496593489783e-05, "loss": 2.4823, "step": 8722 }, { "epoch": 0.11, "grad_norm": 5.942351818084717, "learning_rate": 1.886665945712123e-05, "loss": 2.4981, "step": 8723 }, { "epoch": 0.11, "grad_norm": 6.862405300140381, "learning_rate": 1.8868822320752676e-05, "loss": 2.7213, "step": 8724 }, { "epoch": 0.11, "grad_norm": 5.993440628051758, "learning_rate": 1.8870985184384126e-05, "loss": 2.5872, "step": 8725 }, { "epoch": 0.11, "grad_norm": 6.5498199462890625, "learning_rate": 1.8873148048015576e-05, "loss": 2.1176, "step": 8726 }, { "epoch": 0.11, "grad_norm": 6.688770294189453, "learning_rate": 1.8875310911647022e-05, "loss": 2.5374, "step": 8727 }, { "epoch": 0.11, "grad_norm": 6.546471118927002, "learning_rate": 1.887747377527847e-05, "loss": 2.8285, "step": 8728 }, { "epoch": 0.11, "grad_norm": 6.4755859375, "learning_rate": 1.8879636638909918e-05, "loss": 2.5406, "step": 8729 }, { "epoch": 0.11, "grad_norm": 6.058895587921143, "learning_rate": 1.8881799502541368e-05, "loss": 2.5106, "step": 8730 }, { "epoch": 0.11, "grad_norm": 5.3231096267700195, "learning_rate": 1.8883962366172814e-05, "loss": 2.3262, "step": 8731 }, { "epoch": 0.11, "grad_norm": 6.185008525848389, "learning_rate": 1.888612522980426e-05, "loss": 2.4473, "step": 8732 }, { "epoch": 0.11, "grad_norm": 5.970093250274658, "learning_rate": 1.888828809343571e-05, "loss": 2.3645, "step": 8733 }, { "epoch": 0.11, "grad_norm": 5.744266033172607, "learning_rate": 1.889045095706716e-05, "loss": 2.5862, "step": 8734 }, { "epoch": 0.11, "grad_norm": 6.123724460601807, "learning_rate": 1.8892613820698606e-05, "loss": 2.2512, "step": 8735 }, { "epoch": 0.11, "grad_norm": 6.69174337387085, "learning_rate": 1.8894776684330053e-05, "loss": 2.443, "step": 8736 }, { "epoch": 0.11, "grad_norm": 5.930188179016113, "learning_rate": 1.8896939547961503e-05, "loss": 2.5257, "step": 8737 }, { "epoch": 0.11, "grad_norm": 6.754269599914551, "learning_rate": 1.8899102411592952e-05, "loss": 2.3921, "step": 8738 }, { "epoch": 0.11, "grad_norm": 6.3291239738464355, "learning_rate": 1.89012652752244e-05, "loss": 2.4786, "step": 8739 }, { "epoch": 0.11, "grad_norm": 5.546037197113037, "learning_rate": 1.8903428138855845e-05, "loss": 2.6574, "step": 8740 }, { "epoch": 0.11, "grad_norm": 6.8749284744262695, "learning_rate": 1.8905591002487295e-05, "loss": 2.5049, "step": 8741 }, { "epoch": 0.11, "grad_norm": 6.3127899169921875, "learning_rate": 1.8907753866118744e-05, "loss": 2.8198, "step": 8742 }, { "epoch": 0.11, "grad_norm": 5.95977258682251, "learning_rate": 1.890991672975019e-05, "loss": 1.9532, "step": 8743 }, { "epoch": 0.11, "grad_norm": 6.349859714508057, "learning_rate": 1.8912079593381637e-05, "loss": 2.9566, "step": 8744 }, { "epoch": 0.11, "grad_norm": 5.731784820556641, "learning_rate": 1.8914242457013087e-05, "loss": 2.7783, "step": 8745 }, { "epoch": 0.11, "grad_norm": 5.919917106628418, "learning_rate": 1.8916405320644537e-05, "loss": 2.8256, "step": 8746 }, { "epoch": 0.11, "grad_norm": 5.8553972244262695, "learning_rate": 1.8918568184275983e-05, "loss": 2.4732, "step": 8747 }, { "epoch": 0.11, "grad_norm": 7.095501899719238, "learning_rate": 1.892073104790743e-05, "loss": 3.114, "step": 8748 }, { "epoch": 0.11, "grad_norm": 6.42944860458374, "learning_rate": 1.892289391153888e-05, "loss": 2.3451, "step": 8749 }, { "epoch": 0.11, "grad_norm": 6.288423538208008, "learning_rate": 1.892505677517033e-05, "loss": 2.1962, "step": 8750 }, { "epoch": 0.11, "grad_norm": 5.786958694458008, "learning_rate": 1.8927219638801775e-05, "loss": 2.6185, "step": 8751 }, { "epoch": 0.11, "grad_norm": 6.166219711303711, "learning_rate": 1.892938250243322e-05, "loss": 2.8833, "step": 8752 }, { "epoch": 0.11, "grad_norm": 5.337929725646973, "learning_rate": 1.893154536606467e-05, "loss": 2.6123, "step": 8753 }, { "epoch": 0.11, "grad_norm": 7.419249057769775, "learning_rate": 1.893370822969612e-05, "loss": 2.503, "step": 8754 }, { "epoch": 0.11, "grad_norm": 7.747531414031982, "learning_rate": 1.8935871093327567e-05, "loss": 2.8555, "step": 8755 }, { "epoch": 0.11, "grad_norm": 5.457276344299316, "learning_rate": 1.8938033956959014e-05, "loss": 2.4414, "step": 8756 }, { "epoch": 0.11, "grad_norm": 6.1620025634765625, "learning_rate": 1.8940196820590464e-05, "loss": 2.4589, "step": 8757 }, { "epoch": 0.11, "grad_norm": 6.348432540893555, "learning_rate": 1.894235968422191e-05, "loss": 3.0524, "step": 8758 }, { "epoch": 0.11, "grad_norm": 6.161167621612549, "learning_rate": 1.894452254785336e-05, "loss": 2.5557, "step": 8759 }, { "epoch": 0.11, "grad_norm": 5.871808052062988, "learning_rate": 1.8946685411484806e-05, "loss": 3.0584, "step": 8760 }, { "epoch": 0.11, "grad_norm": 6.576602935791016, "learning_rate": 1.8948848275116256e-05, "loss": 2.9247, "step": 8761 }, { "epoch": 0.11, "grad_norm": 6.517605781555176, "learning_rate": 1.8951011138747702e-05, "loss": 2.9855, "step": 8762 }, { "epoch": 0.11, "grad_norm": 6.432336807250977, "learning_rate": 1.8953174002379152e-05, "loss": 2.678, "step": 8763 }, { "epoch": 0.11, "grad_norm": 6.11106014251709, "learning_rate": 1.8955336866010598e-05, "loss": 2.4098, "step": 8764 }, { "epoch": 0.11, "grad_norm": 5.620787620544434, "learning_rate": 1.8957499729642048e-05, "loss": 2.312, "step": 8765 }, { "epoch": 0.11, "grad_norm": 5.604639053344727, "learning_rate": 1.8959662593273494e-05, "loss": 2.0203, "step": 8766 }, { "epoch": 0.11, "grad_norm": 5.697131156921387, "learning_rate": 1.8961825456904944e-05, "loss": 2.2283, "step": 8767 }, { "epoch": 0.11, "grad_norm": 6.665136814117432, "learning_rate": 1.8963988320536394e-05, "loss": 2.4508, "step": 8768 }, { "epoch": 0.11, "grad_norm": 6.429535388946533, "learning_rate": 1.896615118416784e-05, "loss": 2.9014, "step": 8769 }, { "epoch": 0.11, "grad_norm": 5.456122875213623, "learning_rate": 1.8968314047799287e-05, "loss": 2.5539, "step": 8770 }, { "epoch": 0.11, "grad_norm": 5.8808913230896, "learning_rate": 1.8970476911430736e-05, "loss": 2.4622, "step": 8771 }, { "epoch": 0.11, "grad_norm": 4.680912494659424, "learning_rate": 1.8972639775062186e-05, "loss": 1.8329, "step": 8772 }, { "epoch": 0.11, "grad_norm": 5.6644287109375, "learning_rate": 1.8974802638693632e-05, "loss": 1.8959, "step": 8773 }, { "epoch": 0.11, "grad_norm": 7.187738418579102, "learning_rate": 1.897696550232508e-05, "loss": 2.7228, "step": 8774 }, { "epoch": 0.11, "grad_norm": 6.218008518218994, "learning_rate": 1.897912836595653e-05, "loss": 2.8829, "step": 8775 }, { "epoch": 0.11, "grad_norm": 6.028497695922852, "learning_rate": 1.8981291229587978e-05, "loss": 2.6603, "step": 8776 }, { "epoch": 0.11, "grad_norm": 5.372254848480225, "learning_rate": 1.8983454093219425e-05, "loss": 2.1706, "step": 8777 }, { "epoch": 0.11, "grad_norm": 5.528109073638916, "learning_rate": 1.898561695685087e-05, "loss": 2.2865, "step": 8778 }, { "epoch": 0.11, "grad_norm": 5.950655460357666, "learning_rate": 1.898777982048232e-05, "loss": 2.3156, "step": 8779 }, { "epoch": 0.11, "grad_norm": 5.871123790740967, "learning_rate": 1.898994268411377e-05, "loss": 1.9517, "step": 8780 }, { "epoch": 0.11, "grad_norm": 5.5174078941345215, "learning_rate": 1.8992105547745217e-05, "loss": 2.2107, "step": 8781 }, { "epoch": 0.11, "grad_norm": 6.26154088973999, "learning_rate": 1.8994268411376663e-05, "loss": 2.2372, "step": 8782 }, { "epoch": 0.11, "grad_norm": 5.897322654724121, "learning_rate": 1.8996431275008113e-05, "loss": 2.4743, "step": 8783 }, { "epoch": 0.11, "grad_norm": 6.024313449859619, "learning_rate": 1.8998594138639563e-05, "loss": 2.4518, "step": 8784 }, { "epoch": 0.11, "grad_norm": 7.092275142669678, "learning_rate": 1.900075700227101e-05, "loss": 2.869, "step": 8785 }, { "epoch": 0.11, "grad_norm": 5.9908857345581055, "learning_rate": 1.9002919865902455e-05, "loss": 2.2094, "step": 8786 }, { "epoch": 0.11, "grad_norm": 6.188835620880127, "learning_rate": 1.9005082729533905e-05, "loss": 2.7215, "step": 8787 }, { "epoch": 0.11, "grad_norm": 5.761233329772949, "learning_rate": 1.9007245593165355e-05, "loss": 2.4043, "step": 8788 }, { "epoch": 0.11, "grad_norm": 6.537540435791016, "learning_rate": 1.90094084567968e-05, "loss": 2.8367, "step": 8789 }, { "epoch": 0.11, "grad_norm": 5.520723819732666, "learning_rate": 1.9011571320428248e-05, "loss": 2.5226, "step": 8790 }, { "epoch": 0.11, "grad_norm": 6.267303466796875, "learning_rate": 1.9013734184059697e-05, "loss": 2.5924, "step": 8791 }, { "epoch": 0.11, "grad_norm": 6.422314167022705, "learning_rate": 1.9015897047691144e-05, "loss": 2.9825, "step": 8792 }, { "epoch": 0.11, "grad_norm": 5.243934154510498, "learning_rate": 1.9018059911322593e-05, "loss": 2.1455, "step": 8793 }, { "epoch": 0.11, "grad_norm": 5.720952033996582, "learning_rate": 1.902022277495404e-05, "loss": 2.5056, "step": 8794 }, { "epoch": 0.11, "grad_norm": 5.180423259735107, "learning_rate": 1.9022385638585486e-05, "loss": 2.4709, "step": 8795 }, { "epoch": 0.11, "grad_norm": 5.688825607299805, "learning_rate": 1.9024548502216936e-05, "loss": 2.4391, "step": 8796 }, { "epoch": 0.11, "grad_norm": 5.754472255706787, "learning_rate": 1.9026711365848386e-05, "loss": 2.1893, "step": 8797 }, { "epoch": 0.11, "grad_norm": 5.992738723754883, "learning_rate": 1.9028874229479832e-05, "loss": 2.6064, "step": 8798 }, { "epoch": 0.11, "grad_norm": 5.2147417068481445, "learning_rate": 1.9031037093111278e-05, "loss": 2.4309, "step": 8799 }, { "epoch": 0.11, "grad_norm": 5.274062156677246, "learning_rate": 1.9033199956742728e-05, "loss": 2.1731, "step": 8800 }, { "epoch": 0.11, "grad_norm": 5.51175594329834, "learning_rate": 1.9035362820374178e-05, "loss": 2.5476, "step": 8801 }, { "epoch": 0.11, "grad_norm": 6.071288585662842, "learning_rate": 1.9037525684005624e-05, "loss": 2.6031, "step": 8802 }, { "epoch": 0.11, "grad_norm": 5.161947250366211, "learning_rate": 1.9039688547637074e-05, "loss": 2.1625, "step": 8803 }, { "epoch": 0.11, "grad_norm": 6.28507137298584, "learning_rate": 1.904185141126852e-05, "loss": 2.351, "step": 8804 }, { "epoch": 0.11, "grad_norm": 6.339354991912842, "learning_rate": 1.904401427489997e-05, "loss": 2.9789, "step": 8805 }, { "epoch": 0.11, "grad_norm": 6.088259220123291, "learning_rate": 1.9046177138531416e-05, "loss": 2.5745, "step": 8806 }, { "epoch": 0.11, "grad_norm": 5.769765377044678, "learning_rate": 1.9048340002162866e-05, "loss": 2.2159, "step": 8807 }, { "epoch": 0.11, "grad_norm": 6.128194332122803, "learning_rate": 1.9050502865794312e-05, "loss": 2.8977, "step": 8808 }, { "epoch": 0.11, "grad_norm": 6.145595073699951, "learning_rate": 1.9052665729425762e-05, "loss": 2.1432, "step": 8809 }, { "epoch": 0.11, "grad_norm": 6.888703346252441, "learning_rate": 1.905482859305721e-05, "loss": 2.1527, "step": 8810 }, { "epoch": 0.11, "grad_norm": 6.642890930175781, "learning_rate": 1.9056991456688658e-05, "loss": 2.4893, "step": 8811 }, { "epoch": 0.11, "grad_norm": 6.290327548980713, "learning_rate": 1.9059154320320105e-05, "loss": 2.462, "step": 8812 }, { "epoch": 0.11, "grad_norm": 5.386040210723877, "learning_rate": 1.9061317183951554e-05, "loss": 2.0126, "step": 8813 }, { "epoch": 0.11, "grad_norm": 6.307253837585449, "learning_rate": 1.9063480047583e-05, "loss": 2.2861, "step": 8814 }, { "epoch": 0.11, "grad_norm": 6.244283199310303, "learning_rate": 1.906564291121445e-05, "loss": 2.8358, "step": 8815 }, { "epoch": 0.11, "grad_norm": 5.921229362487793, "learning_rate": 1.9067805774845897e-05, "loss": 2.773, "step": 8816 }, { "epoch": 0.11, "grad_norm": 5.933825492858887, "learning_rate": 1.9069968638477347e-05, "loss": 2.3858, "step": 8817 }, { "epoch": 0.11, "grad_norm": 5.771323204040527, "learning_rate": 1.9072131502108793e-05, "loss": 2.3713, "step": 8818 }, { "epoch": 0.11, "grad_norm": 5.562242031097412, "learning_rate": 1.9074294365740243e-05, "loss": 2.6841, "step": 8819 }, { "epoch": 0.11, "grad_norm": 6.065028667449951, "learning_rate": 1.907645722937169e-05, "loss": 2.4429, "step": 8820 }, { "epoch": 0.11, "grad_norm": 5.870835781097412, "learning_rate": 1.907862009300314e-05, "loss": 2.4166, "step": 8821 }, { "epoch": 0.11, "grad_norm": 6.237168312072754, "learning_rate": 1.9080782956634585e-05, "loss": 2.1302, "step": 8822 }, { "epoch": 0.11, "grad_norm": 5.982668399810791, "learning_rate": 1.9082945820266035e-05, "loss": 2.776, "step": 8823 }, { "epoch": 0.11, "grad_norm": 5.930811405181885, "learning_rate": 1.908510868389748e-05, "loss": 2.4999, "step": 8824 }, { "epoch": 0.11, "grad_norm": 6.0392374992370605, "learning_rate": 1.908727154752893e-05, "loss": 2.67, "step": 8825 }, { "epoch": 0.11, "grad_norm": 6.323683738708496, "learning_rate": 1.9089434411160377e-05, "loss": 2.4655, "step": 8826 }, { "epoch": 0.11, "grad_norm": 6.408637523651123, "learning_rate": 1.9091597274791827e-05, "loss": 2.7028, "step": 8827 }, { "epoch": 0.11, "grad_norm": 5.174905776977539, "learning_rate": 1.9093760138423273e-05, "loss": 2.1629, "step": 8828 }, { "epoch": 0.11, "grad_norm": 6.650381565093994, "learning_rate": 1.909592300205472e-05, "loss": 2.7563, "step": 8829 }, { "epoch": 0.11, "grad_norm": 5.245987415313721, "learning_rate": 1.909808586568617e-05, "loss": 2.0994, "step": 8830 }, { "epoch": 0.11, "grad_norm": 6.33651876449585, "learning_rate": 1.910024872931762e-05, "loss": 2.7422, "step": 8831 }, { "epoch": 0.11, "grad_norm": 7.166902542114258, "learning_rate": 1.9102411592949066e-05, "loss": 2.7524, "step": 8832 }, { "epoch": 0.11, "grad_norm": 6.982742786407471, "learning_rate": 1.9104574456580512e-05, "loss": 2.7981, "step": 8833 }, { "epoch": 0.11, "grad_norm": 6.465542793273926, "learning_rate": 1.9106737320211962e-05, "loss": 2.8645, "step": 8834 }, { "epoch": 0.11, "grad_norm": 6.191837787628174, "learning_rate": 1.910890018384341e-05, "loss": 2.288, "step": 8835 }, { "epoch": 0.11, "grad_norm": 5.681331634521484, "learning_rate": 1.9111063047474858e-05, "loss": 2.3308, "step": 8836 }, { "epoch": 0.11, "grad_norm": 6.904380798339844, "learning_rate": 1.9113225911106304e-05, "loss": 2.8998, "step": 8837 }, { "epoch": 0.11, "grad_norm": 5.41400146484375, "learning_rate": 1.9115388774737754e-05, "loss": 2.1924, "step": 8838 }, { "epoch": 0.11, "grad_norm": 6.2979583740234375, "learning_rate": 1.9117551638369204e-05, "loss": 2.5794, "step": 8839 }, { "epoch": 0.11, "grad_norm": 5.207274436950684, "learning_rate": 1.911971450200065e-05, "loss": 2.4285, "step": 8840 }, { "epoch": 0.11, "grad_norm": 5.742431163787842, "learning_rate": 1.9121877365632096e-05, "loss": 2.784, "step": 8841 }, { "epoch": 0.11, "grad_norm": 5.2789506912231445, "learning_rate": 1.9124040229263546e-05, "loss": 2.3383, "step": 8842 }, { "epoch": 0.11, "grad_norm": 6.513305187225342, "learning_rate": 1.9126203092894996e-05, "loss": 2.6563, "step": 8843 }, { "epoch": 0.11, "grad_norm": 6.651068210601807, "learning_rate": 1.9128365956526442e-05, "loss": 2.6452, "step": 8844 }, { "epoch": 0.11, "grad_norm": 5.265775680541992, "learning_rate": 1.913052882015789e-05, "loss": 2.0186, "step": 8845 }, { "epoch": 0.11, "grad_norm": 5.918349742889404, "learning_rate": 1.913269168378934e-05, "loss": 2.1858, "step": 8846 }, { "epoch": 0.11, "grad_norm": 5.640147686004639, "learning_rate": 1.9134854547420788e-05, "loss": 2.6242, "step": 8847 }, { "epoch": 0.11, "grad_norm": 6.479733943939209, "learning_rate": 1.9137017411052234e-05, "loss": 2.8656, "step": 8848 }, { "epoch": 0.11, "grad_norm": 6.1119489669799805, "learning_rate": 1.913918027468368e-05, "loss": 2.4464, "step": 8849 }, { "epoch": 0.11, "grad_norm": 5.6536760330200195, "learning_rate": 1.914134313831513e-05, "loss": 2.012, "step": 8850 }, { "epoch": 0.11, "grad_norm": 5.820356845855713, "learning_rate": 1.914350600194658e-05, "loss": 2.3844, "step": 8851 }, { "epoch": 0.11, "grad_norm": 5.64784574508667, "learning_rate": 1.9145668865578027e-05, "loss": 2.2406, "step": 8852 }, { "epoch": 0.11, "grad_norm": 5.53784704208374, "learning_rate": 1.9147831729209473e-05, "loss": 2.1815, "step": 8853 }, { "epoch": 0.11, "grad_norm": 6.338232040405273, "learning_rate": 1.9149994592840923e-05, "loss": 2.5537, "step": 8854 }, { "epoch": 0.11, "grad_norm": 6.136760234832764, "learning_rate": 1.9152157456472373e-05, "loss": 2.6367, "step": 8855 }, { "epoch": 0.11, "grad_norm": 5.958284854888916, "learning_rate": 1.915432032010382e-05, "loss": 2.1011, "step": 8856 }, { "epoch": 0.11, "grad_norm": 6.233363151550293, "learning_rate": 1.9156483183735265e-05, "loss": 2.2838, "step": 8857 }, { "epoch": 0.11, "grad_norm": 5.538802623748779, "learning_rate": 1.9158646047366715e-05, "loss": 2.779, "step": 8858 }, { "epoch": 0.11, "grad_norm": 6.318634033203125, "learning_rate": 1.9160808910998165e-05, "loss": 2.8462, "step": 8859 }, { "epoch": 0.11, "grad_norm": 5.921529293060303, "learning_rate": 1.916297177462961e-05, "loss": 2.7443, "step": 8860 }, { "epoch": 0.11, "grad_norm": 5.458154201507568, "learning_rate": 1.9165134638261057e-05, "loss": 2.494, "step": 8861 }, { "epoch": 0.12, "grad_norm": 6.237978458404541, "learning_rate": 1.9167297501892507e-05, "loss": 2.8728, "step": 8862 }, { "epoch": 0.12, "grad_norm": 7.132005214691162, "learning_rate": 1.9169460365523957e-05, "loss": 2.959, "step": 8863 }, { "epoch": 0.12, "grad_norm": 6.343231201171875, "learning_rate": 1.9171623229155403e-05, "loss": 2.7567, "step": 8864 }, { "epoch": 0.12, "grad_norm": 6.858755111694336, "learning_rate": 1.9173786092786853e-05, "loss": 2.4953, "step": 8865 }, { "epoch": 0.12, "grad_norm": 5.319248199462891, "learning_rate": 1.91759489564183e-05, "loss": 1.9901, "step": 8866 }, { "epoch": 0.12, "grad_norm": 5.740202903747559, "learning_rate": 1.9178111820049746e-05, "loss": 2.6728, "step": 8867 }, { "epoch": 0.12, "grad_norm": 6.508849143981934, "learning_rate": 1.9180274683681195e-05, "loss": 2.6706, "step": 8868 }, { "epoch": 0.12, "grad_norm": 6.741283416748047, "learning_rate": 1.9182437547312645e-05, "loss": 2.6346, "step": 8869 }, { "epoch": 0.12, "grad_norm": 6.16009521484375, "learning_rate": 1.918460041094409e-05, "loss": 2.6084, "step": 8870 }, { "epoch": 0.12, "grad_norm": 6.172438144683838, "learning_rate": 1.9186763274575538e-05, "loss": 2.5701, "step": 8871 }, { "epoch": 0.12, "grad_norm": 6.883700370788574, "learning_rate": 1.9188926138206988e-05, "loss": 3.2806, "step": 8872 }, { "epoch": 0.12, "grad_norm": 6.720185279846191, "learning_rate": 1.9191089001838437e-05, "loss": 2.4953, "step": 8873 }, { "epoch": 0.12, "grad_norm": 5.834897994995117, "learning_rate": 1.9193251865469884e-05, "loss": 2.5392, "step": 8874 }, { "epoch": 0.12, "grad_norm": 6.499139785766602, "learning_rate": 1.919541472910133e-05, "loss": 2.4325, "step": 8875 }, { "epoch": 0.12, "grad_norm": 6.606823921203613, "learning_rate": 1.919757759273278e-05, "loss": 2.5627, "step": 8876 }, { "epoch": 0.12, "grad_norm": 6.168908596038818, "learning_rate": 1.919974045636423e-05, "loss": 2.6041, "step": 8877 }, { "epoch": 0.12, "grad_norm": 5.942627429962158, "learning_rate": 1.9201903319995676e-05, "loss": 2.9533, "step": 8878 }, { "epoch": 0.12, "grad_norm": 6.493383407592773, "learning_rate": 1.9204066183627122e-05, "loss": 2.2333, "step": 8879 }, { "epoch": 0.12, "grad_norm": 5.791729927062988, "learning_rate": 1.9206229047258572e-05, "loss": 2.4169, "step": 8880 }, { "epoch": 0.12, "grad_norm": 5.936859607696533, "learning_rate": 1.9208391910890022e-05, "loss": 2.187, "step": 8881 }, { "epoch": 0.12, "grad_norm": 6.098814010620117, "learning_rate": 1.9210554774521468e-05, "loss": 2.3239, "step": 8882 }, { "epoch": 0.12, "grad_norm": 6.013365268707275, "learning_rate": 1.9212717638152915e-05, "loss": 2.8314, "step": 8883 }, { "epoch": 0.12, "grad_norm": 5.548616886138916, "learning_rate": 1.9214880501784364e-05, "loss": 2.4373, "step": 8884 }, { "epoch": 0.12, "grad_norm": 6.970212936401367, "learning_rate": 1.9217043365415814e-05, "loss": 2.6418, "step": 8885 }, { "epoch": 0.12, "grad_norm": 5.790700912475586, "learning_rate": 1.921920622904726e-05, "loss": 2.5616, "step": 8886 }, { "epoch": 0.12, "grad_norm": 6.446634769439697, "learning_rate": 1.9221369092678707e-05, "loss": 3.0458, "step": 8887 }, { "epoch": 0.12, "grad_norm": 7.093394756317139, "learning_rate": 1.9223531956310156e-05, "loss": 2.4302, "step": 8888 }, { "epoch": 0.12, "grad_norm": 6.480655193328857, "learning_rate": 1.9225694819941606e-05, "loss": 2.5449, "step": 8889 }, { "epoch": 0.12, "grad_norm": 6.913225173950195, "learning_rate": 1.9227857683573053e-05, "loss": 2.9758, "step": 8890 }, { "epoch": 0.12, "grad_norm": 6.317853927612305, "learning_rate": 1.92300205472045e-05, "loss": 2.4192, "step": 8891 }, { "epoch": 0.12, "grad_norm": 5.127600193023682, "learning_rate": 1.923218341083595e-05, "loss": 2.1791, "step": 8892 }, { "epoch": 0.12, "grad_norm": 6.819687843322754, "learning_rate": 1.92343462744674e-05, "loss": 2.6071, "step": 8893 }, { "epoch": 0.12, "grad_norm": 6.78018045425415, "learning_rate": 1.9236509138098845e-05, "loss": 2.7297, "step": 8894 }, { "epoch": 0.12, "grad_norm": 5.884312152862549, "learning_rate": 1.923867200173029e-05, "loss": 2.9133, "step": 8895 }, { "epoch": 0.12, "grad_norm": 6.75093936920166, "learning_rate": 1.924083486536174e-05, "loss": 3.1373, "step": 8896 }, { "epoch": 0.12, "grad_norm": 6.095934867858887, "learning_rate": 1.924299772899319e-05, "loss": 2.9945, "step": 8897 }, { "epoch": 0.12, "grad_norm": 6.455990791320801, "learning_rate": 1.9245160592624637e-05, "loss": 2.4783, "step": 8898 }, { "epoch": 0.12, "grad_norm": 5.822202205657959, "learning_rate": 1.9247323456256083e-05, "loss": 2.7492, "step": 8899 }, { "epoch": 0.12, "grad_norm": 7.818787097930908, "learning_rate": 1.9249486319887533e-05, "loss": 2.5345, "step": 8900 }, { "epoch": 0.12, "grad_norm": 5.9430389404296875, "learning_rate": 1.925164918351898e-05, "loss": 1.8571, "step": 8901 }, { "epoch": 0.12, "grad_norm": 6.364274024963379, "learning_rate": 1.925381204715043e-05, "loss": 2.6876, "step": 8902 }, { "epoch": 0.12, "grad_norm": 6.566896438598633, "learning_rate": 1.9255974910781876e-05, "loss": 2.6297, "step": 8903 }, { "epoch": 0.12, "grad_norm": 6.368007183074951, "learning_rate": 1.9258137774413325e-05, "loss": 2.672, "step": 8904 }, { "epoch": 0.12, "grad_norm": 6.804859161376953, "learning_rate": 1.926030063804477e-05, "loss": 2.7673, "step": 8905 }, { "epoch": 0.12, "grad_norm": 7.021200656890869, "learning_rate": 1.926246350167622e-05, "loss": 2.9002, "step": 8906 }, { "epoch": 0.12, "grad_norm": 5.715339660644531, "learning_rate": 1.9264626365307668e-05, "loss": 2.6136, "step": 8907 }, { "epoch": 0.12, "grad_norm": 6.72646427154541, "learning_rate": 1.9266789228939118e-05, "loss": 2.5211, "step": 8908 }, { "epoch": 0.12, "grad_norm": 5.863593578338623, "learning_rate": 1.9268952092570564e-05, "loss": 2.5261, "step": 8909 }, { "epoch": 0.12, "grad_norm": 5.258490562438965, "learning_rate": 1.9271114956202014e-05, "loss": 1.9807, "step": 8910 }, { "epoch": 0.12, "grad_norm": 5.840970993041992, "learning_rate": 1.927327781983346e-05, "loss": 2.7026, "step": 8911 }, { "epoch": 0.12, "grad_norm": 5.635013580322266, "learning_rate": 1.927544068346491e-05, "loss": 2.9385, "step": 8912 }, { "epoch": 0.12, "grad_norm": 5.359719753265381, "learning_rate": 1.9277603547096356e-05, "loss": 2.4243, "step": 8913 }, { "epoch": 0.12, "grad_norm": 6.114288330078125, "learning_rate": 1.9279766410727806e-05, "loss": 2.4704, "step": 8914 }, { "epoch": 0.12, "grad_norm": 5.836162567138672, "learning_rate": 1.9281929274359252e-05, "loss": 2.613, "step": 8915 }, { "epoch": 0.12, "grad_norm": 5.350597381591797, "learning_rate": 1.9284092137990702e-05, "loss": 2.11, "step": 8916 }, { "epoch": 0.12, "grad_norm": 5.594137191772461, "learning_rate": 1.9286255001622148e-05, "loss": 2.2477, "step": 8917 }, { "epoch": 0.12, "grad_norm": 5.796825408935547, "learning_rate": 1.9288417865253598e-05, "loss": 2.8234, "step": 8918 }, { "epoch": 0.12, "grad_norm": 6.29586935043335, "learning_rate": 1.9290580728885044e-05, "loss": 2.4899, "step": 8919 }, { "epoch": 0.12, "grad_norm": 5.638261318206787, "learning_rate": 1.9292743592516494e-05, "loss": 2.6382, "step": 8920 }, { "epoch": 0.12, "grad_norm": 5.271968841552734, "learning_rate": 1.929490645614794e-05, "loss": 2.1438, "step": 8921 }, { "epoch": 0.12, "grad_norm": 5.915347099304199, "learning_rate": 1.929706931977939e-05, "loss": 2.8805, "step": 8922 }, { "epoch": 0.12, "grad_norm": 5.673249244689941, "learning_rate": 1.9299232183410837e-05, "loss": 2.135, "step": 8923 }, { "epoch": 0.12, "grad_norm": 5.649080276489258, "learning_rate": 1.9301395047042286e-05, "loss": 2.767, "step": 8924 }, { "epoch": 0.12, "grad_norm": 6.47099494934082, "learning_rate": 1.9303557910673733e-05, "loss": 3.0123, "step": 8925 }, { "epoch": 0.12, "grad_norm": 6.494179725646973, "learning_rate": 1.9305720774305182e-05, "loss": 2.8956, "step": 8926 }, { "epoch": 0.12, "grad_norm": 5.6394734382629395, "learning_rate": 1.930788363793663e-05, "loss": 2.3054, "step": 8927 }, { "epoch": 0.12, "grad_norm": 5.364325046539307, "learning_rate": 1.931004650156808e-05, "loss": 2.4969, "step": 8928 }, { "epoch": 0.12, "grad_norm": 7.09882926940918, "learning_rate": 1.9312209365199525e-05, "loss": 3.2299, "step": 8929 }, { "epoch": 0.12, "grad_norm": 5.186707019805908, "learning_rate": 1.9314372228830975e-05, "loss": 2.1011, "step": 8930 }, { "epoch": 0.12, "grad_norm": 6.386739730834961, "learning_rate": 1.9316535092462424e-05, "loss": 2.6077, "step": 8931 }, { "epoch": 0.12, "grad_norm": 5.627198696136475, "learning_rate": 1.931869795609387e-05, "loss": 2.3657, "step": 8932 }, { "epoch": 0.12, "grad_norm": 5.559570789337158, "learning_rate": 1.9320860819725317e-05, "loss": 1.7885, "step": 8933 }, { "epoch": 0.12, "grad_norm": 6.025157451629639, "learning_rate": 1.9323023683356767e-05, "loss": 2.3069, "step": 8934 }, { "epoch": 0.12, "grad_norm": 5.854732513427734, "learning_rate": 1.9325186546988217e-05, "loss": 2.5261, "step": 8935 }, { "epoch": 0.12, "grad_norm": 6.085501670837402, "learning_rate": 1.9327349410619663e-05, "loss": 2.5763, "step": 8936 }, { "epoch": 0.12, "grad_norm": 5.511232376098633, "learning_rate": 1.932951227425111e-05, "loss": 2.286, "step": 8937 }, { "epoch": 0.12, "grad_norm": 6.560737609863281, "learning_rate": 1.9331675137882556e-05, "loss": 2.6368, "step": 8938 }, { "epoch": 0.12, "grad_norm": 6.150827884674072, "learning_rate": 1.9333838001514005e-05, "loss": 2.2614, "step": 8939 }, { "epoch": 0.12, "grad_norm": 5.554131031036377, "learning_rate": 1.9336000865145455e-05, "loss": 2.2305, "step": 8940 }, { "epoch": 0.12, "grad_norm": 6.537549018859863, "learning_rate": 1.93381637287769e-05, "loss": 2.9741, "step": 8941 }, { "epoch": 0.12, "grad_norm": 6.517223358154297, "learning_rate": 1.9340326592408348e-05, "loss": 2.8856, "step": 8942 }, { "epoch": 0.12, "grad_norm": 7.019258499145508, "learning_rate": 1.9342489456039798e-05, "loss": 2.6586, "step": 8943 }, { "epoch": 0.12, "grad_norm": 6.639008522033691, "learning_rate": 1.9344652319671247e-05, "loss": 2.8159, "step": 8944 }, { "epoch": 0.12, "grad_norm": 4.857549667358398, "learning_rate": 1.9346815183302694e-05, "loss": 2.0844, "step": 8945 }, { "epoch": 0.12, "grad_norm": 6.324179649353027, "learning_rate": 1.934897804693414e-05, "loss": 2.4628, "step": 8946 }, { "epoch": 0.12, "grad_norm": 5.4988555908203125, "learning_rate": 1.935114091056559e-05, "loss": 2.4784, "step": 8947 }, { "epoch": 0.12, "grad_norm": 6.1077399253845215, "learning_rate": 1.935330377419704e-05, "loss": 2.4105, "step": 8948 }, { "epoch": 0.12, "grad_norm": 5.133305549621582, "learning_rate": 1.9355466637828486e-05, "loss": 2.3792, "step": 8949 }, { "epoch": 0.12, "grad_norm": 6.074028968811035, "learning_rate": 1.9357629501459932e-05, "loss": 2.1879, "step": 8950 }, { "epoch": 0.12, "grad_norm": 5.694988250732422, "learning_rate": 1.9359792365091382e-05, "loss": 2.2, "step": 8951 }, { "epoch": 0.12, "grad_norm": 6.378546714782715, "learning_rate": 1.9361955228722832e-05, "loss": 2.8151, "step": 8952 }, { "epoch": 0.12, "grad_norm": 5.4985432624816895, "learning_rate": 1.9364118092354278e-05, "loss": 2.0547, "step": 8953 }, { "epoch": 0.12, "grad_norm": 5.1179094314575195, "learning_rate": 1.9366280955985724e-05, "loss": 2.3872, "step": 8954 }, { "epoch": 0.12, "grad_norm": 6.110751628875732, "learning_rate": 1.9368443819617174e-05, "loss": 2.3678, "step": 8955 }, { "epoch": 0.12, "grad_norm": 6.024858474731445, "learning_rate": 1.9370606683248624e-05, "loss": 2.5286, "step": 8956 }, { "epoch": 0.12, "grad_norm": 5.823538303375244, "learning_rate": 1.937276954688007e-05, "loss": 2.7096, "step": 8957 }, { "epoch": 0.12, "grad_norm": 5.229547500610352, "learning_rate": 1.9374932410511517e-05, "loss": 2.1221, "step": 8958 }, { "epoch": 0.12, "grad_norm": 6.411685943603516, "learning_rate": 1.9377095274142966e-05, "loss": 2.6272, "step": 8959 }, { "epoch": 0.12, "grad_norm": 5.968459129333496, "learning_rate": 1.9379258137774416e-05, "loss": 2.6385, "step": 8960 }, { "epoch": 0.12, "grad_norm": 7.625675201416016, "learning_rate": 1.9381421001405863e-05, "loss": 2.6037, "step": 8961 }, { "epoch": 0.12, "grad_norm": 5.761295795440674, "learning_rate": 1.9383583865037312e-05, "loss": 2.1756, "step": 8962 }, { "epoch": 0.12, "grad_norm": 5.789668083190918, "learning_rate": 1.938574672866876e-05, "loss": 2.5552, "step": 8963 }, { "epoch": 0.12, "grad_norm": 5.7992472648620605, "learning_rate": 1.938790959230021e-05, "loss": 2.2946, "step": 8964 }, { "epoch": 0.12, "grad_norm": 5.5210957527160645, "learning_rate": 1.9390072455931655e-05, "loss": 2.4734, "step": 8965 }, { "epoch": 0.12, "grad_norm": 5.3555145263671875, "learning_rate": 1.9392235319563104e-05, "loss": 2.0581, "step": 8966 }, { "epoch": 0.12, "grad_norm": 6.258331298828125, "learning_rate": 1.939439818319455e-05, "loss": 2.5903, "step": 8967 }, { "epoch": 0.12, "grad_norm": 5.744433879852295, "learning_rate": 1.9396561046826e-05, "loss": 2.4391, "step": 8968 }, { "epoch": 0.12, "grad_norm": 5.32655143737793, "learning_rate": 1.9398723910457447e-05, "loss": 2.0511, "step": 8969 }, { "epoch": 0.12, "grad_norm": 5.2531962394714355, "learning_rate": 1.9400886774088897e-05, "loss": 2.4821, "step": 8970 }, { "epoch": 0.12, "grad_norm": 6.226799488067627, "learning_rate": 1.9403049637720343e-05, "loss": 2.6431, "step": 8971 }, { "epoch": 0.12, "grad_norm": 6.062629222869873, "learning_rate": 1.9405212501351793e-05, "loss": 2.9839, "step": 8972 }, { "epoch": 0.12, "grad_norm": 5.901914119720459, "learning_rate": 1.940737536498324e-05, "loss": 2.3542, "step": 8973 }, { "epoch": 0.12, "grad_norm": 6.7538933753967285, "learning_rate": 1.940953822861469e-05, "loss": 2.8008, "step": 8974 }, { "epoch": 0.12, "grad_norm": 5.9098896980285645, "learning_rate": 1.9411701092246135e-05, "loss": 2.3134, "step": 8975 }, { "epoch": 0.12, "grad_norm": 5.525598049163818, "learning_rate": 1.941386395587758e-05, "loss": 2.2357, "step": 8976 }, { "epoch": 0.12, "grad_norm": 5.246382713317871, "learning_rate": 1.941602681950903e-05, "loss": 2.2798, "step": 8977 }, { "epoch": 0.12, "grad_norm": 6.123507022857666, "learning_rate": 1.941818968314048e-05, "loss": 2.4728, "step": 8978 }, { "epoch": 0.12, "grad_norm": 5.597875118255615, "learning_rate": 1.9420352546771927e-05, "loss": 2.5203, "step": 8979 }, { "epoch": 0.12, "grad_norm": 5.523171424865723, "learning_rate": 1.9422515410403374e-05, "loss": 2.7217, "step": 8980 }, { "epoch": 0.12, "grad_norm": 5.255061626434326, "learning_rate": 1.9424678274034824e-05, "loss": 2.2002, "step": 8981 }, { "epoch": 0.12, "grad_norm": 5.788706302642822, "learning_rate": 1.9426841137666273e-05, "loss": 2.5871, "step": 8982 }, { "epoch": 0.12, "grad_norm": 6.010961055755615, "learning_rate": 1.942900400129772e-05, "loss": 2.4285, "step": 8983 }, { "epoch": 0.12, "grad_norm": 6.111354351043701, "learning_rate": 1.9431166864929166e-05, "loss": 2.4483, "step": 8984 }, { "epoch": 0.12, "grad_norm": 5.660409927368164, "learning_rate": 1.9433329728560616e-05, "loss": 2.4974, "step": 8985 }, { "epoch": 0.12, "grad_norm": 5.941059589385986, "learning_rate": 1.9435492592192065e-05, "loss": 2.7598, "step": 8986 }, { "epoch": 0.12, "grad_norm": 6.35488224029541, "learning_rate": 1.9437655455823512e-05, "loss": 2.3757, "step": 8987 }, { "epoch": 0.12, "grad_norm": 5.86126184463501, "learning_rate": 1.9439818319454958e-05, "loss": 2.437, "step": 8988 }, { "epoch": 0.12, "grad_norm": 6.627330780029297, "learning_rate": 1.9441981183086408e-05, "loss": 2.721, "step": 8989 }, { "epoch": 0.12, "grad_norm": 5.887073040008545, "learning_rate": 1.9444144046717858e-05, "loss": 2.5791, "step": 8990 }, { "epoch": 0.12, "grad_norm": 5.871262073516846, "learning_rate": 1.9446306910349304e-05, "loss": 2.6434, "step": 8991 }, { "epoch": 0.12, "grad_norm": 5.4714741706848145, "learning_rate": 1.944846977398075e-05, "loss": 2.6272, "step": 8992 }, { "epoch": 0.12, "grad_norm": 5.586060047149658, "learning_rate": 1.94506326376122e-05, "loss": 2.1131, "step": 8993 }, { "epoch": 0.12, "grad_norm": 5.047486782073975, "learning_rate": 1.945279550124365e-05, "loss": 2.1842, "step": 8994 }, { "epoch": 0.12, "grad_norm": 5.829801082611084, "learning_rate": 1.9454958364875096e-05, "loss": 2.6508, "step": 8995 }, { "epoch": 0.12, "grad_norm": 5.920542240142822, "learning_rate": 1.9457121228506543e-05, "loss": 2.2228, "step": 8996 }, { "epoch": 0.12, "grad_norm": 4.863570690155029, "learning_rate": 1.9459284092137992e-05, "loss": 1.9797, "step": 8997 }, { "epoch": 0.12, "grad_norm": 5.349177837371826, "learning_rate": 1.9461446955769442e-05, "loss": 2.4865, "step": 8998 }, { "epoch": 0.12, "grad_norm": 5.981565952301025, "learning_rate": 1.946360981940089e-05, "loss": 2.5451, "step": 8999 }, { "epoch": 0.12, "grad_norm": 5.627706050872803, "learning_rate": 1.9465772683032335e-05, "loss": 2.4673, "step": 9000 }, { "epoch": 0.12, "grad_norm": 6.2111334800720215, "learning_rate": 1.9467935546663785e-05, "loss": 2.1452, "step": 9001 }, { "epoch": 0.12, "grad_norm": 5.56013298034668, "learning_rate": 1.9470098410295234e-05, "loss": 2.5292, "step": 9002 }, { "epoch": 0.12, "grad_norm": 6.352970600128174, "learning_rate": 1.947226127392668e-05, "loss": 2.2438, "step": 9003 }, { "epoch": 0.12, "grad_norm": 5.549426078796387, "learning_rate": 1.9474424137558127e-05, "loss": 2.3507, "step": 9004 }, { "epoch": 0.12, "grad_norm": 7.036637783050537, "learning_rate": 1.9476587001189577e-05, "loss": 2.9052, "step": 9005 }, { "epoch": 0.12, "grad_norm": 6.482902526855469, "learning_rate": 1.9478749864821026e-05, "loss": 2.8882, "step": 9006 }, { "epoch": 0.12, "grad_norm": 5.468772888183594, "learning_rate": 1.9480912728452473e-05, "loss": 2.4264, "step": 9007 }, { "epoch": 0.12, "grad_norm": 5.719051361083984, "learning_rate": 1.948307559208392e-05, "loss": 1.9804, "step": 9008 }, { "epoch": 0.12, "grad_norm": 5.251045227050781, "learning_rate": 1.948523845571537e-05, "loss": 2.5994, "step": 9009 }, { "epoch": 0.12, "grad_norm": 5.892673015594482, "learning_rate": 1.948740131934682e-05, "loss": 2.2388, "step": 9010 }, { "epoch": 0.12, "grad_norm": 5.678600788116455, "learning_rate": 1.9489564182978265e-05, "loss": 2.3161, "step": 9011 }, { "epoch": 0.12, "grad_norm": 6.69303035736084, "learning_rate": 1.949172704660971e-05, "loss": 3.4478, "step": 9012 }, { "epoch": 0.12, "grad_norm": 4.963356971740723, "learning_rate": 1.949388991024116e-05, "loss": 2.447, "step": 9013 }, { "epoch": 0.12, "grad_norm": 6.200000762939453, "learning_rate": 1.9496052773872608e-05, "loss": 2.8329, "step": 9014 }, { "epoch": 0.12, "grad_norm": 6.322200298309326, "learning_rate": 1.9498215637504057e-05, "loss": 2.3845, "step": 9015 }, { "epoch": 0.12, "grad_norm": 6.026486873626709, "learning_rate": 1.9500378501135504e-05, "loss": 2.564, "step": 9016 }, { "epoch": 0.12, "grad_norm": 6.707377910614014, "learning_rate": 1.9502541364766953e-05, "loss": 2.8559, "step": 9017 }, { "epoch": 0.12, "grad_norm": 7.023696422576904, "learning_rate": 1.95047042283984e-05, "loss": 2.4433, "step": 9018 }, { "epoch": 0.12, "grad_norm": 6.3335065841674805, "learning_rate": 1.950686709202985e-05, "loss": 2.2909, "step": 9019 }, { "epoch": 0.12, "grad_norm": 6.252453327178955, "learning_rate": 1.9509029955661296e-05, "loss": 2.4649, "step": 9020 }, { "epoch": 0.12, "grad_norm": 5.923767566680908, "learning_rate": 1.9511192819292746e-05, "loss": 2.672, "step": 9021 }, { "epoch": 0.12, "grad_norm": 5.312451362609863, "learning_rate": 1.9513355682924192e-05, "loss": 2.3502, "step": 9022 }, { "epoch": 0.12, "grad_norm": 6.338072776794434, "learning_rate": 1.951551854655564e-05, "loss": 2.6394, "step": 9023 }, { "epoch": 0.12, "grad_norm": 5.61760139465332, "learning_rate": 1.951768141018709e-05, "loss": 2.0395, "step": 9024 }, { "epoch": 0.12, "grad_norm": 5.459754943847656, "learning_rate": 1.9519844273818538e-05, "loss": 1.741, "step": 9025 }, { "epoch": 0.12, "grad_norm": 5.686699867248535, "learning_rate": 1.9522007137449984e-05, "loss": 2.3938, "step": 9026 }, { "epoch": 0.12, "grad_norm": 5.628036975860596, "learning_rate": 1.9524170001081434e-05, "loss": 2.5577, "step": 9027 }, { "epoch": 0.12, "grad_norm": 6.611979961395264, "learning_rate": 1.9526332864712884e-05, "loss": 2.9591, "step": 9028 }, { "epoch": 0.12, "grad_norm": 6.414653778076172, "learning_rate": 1.952849572834433e-05, "loss": 2.5533, "step": 9029 }, { "epoch": 0.12, "grad_norm": 5.454022407531738, "learning_rate": 1.9530658591975776e-05, "loss": 2.4446, "step": 9030 }, { "epoch": 0.12, "grad_norm": 5.706058502197266, "learning_rate": 1.9532821455607226e-05, "loss": 3.0161, "step": 9031 }, { "epoch": 0.12, "grad_norm": 6.341161251068115, "learning_rate": 1.9534984319238676e-05, "loss": 2.4668, "step": 9032 }, { "epoch": 0.12, "grad_norm": 5.876203536987305, "learning_rate": 1.9537147182870122e-05, "loss": 2.1568, "step": 9033 }, { "epoch": 0.12, "grad_norm": 6.416567802429199, "learning_rate": 1.953931004650157e-05, "loss": 2.4965, "step": 9034 }, { "epoch": 0.12, "grad_norm": 5.788267612457275, "learning_rate": 1.9541472910133018e-05, "loss": 2.436, "step": 9035 }, { "epoch": 0.12, "grad_norm": 5.1801652908325195, "learning_rate": 1.9543635773764468e-05, "loss": 2.1243, "step": 9036 }, { "epoch": 0.12, "grad_norm": 6.66480827331543, "learning_rate": 1.9545798637395914e-05, "loss": 2.7843, "step": 9037 }, { "epoch": 0.12, "grad_norm": 6.008944511413574, "learning_rate": 1.954796150102736e-05, "loss": 2.7408, "step": 9038 }, { "epoch": 0.12, "grad_norm": 6.0597243309021, "learning_rate": 1.955012436465881e-05, "loss": 2.2293, "step": 9039 }, { "epoch": 0.12, "grad_norm": 5.305458068847656, "learning_rate": 1.955228722829026e-05, "loss": 2.0996, "step": 9040 }, { "epoch": 0.12, "grad_norm": 6.923102378845215, "learning_rate": 1.9554450091921707e-05, "loss": 2.5866, "step": 9041 }, { "epoch": 0.12, "grad_norm": 6.164401531219482, "learning_rate": 1.9556612955553153e-05, "loss": 2.2662, "step": 9042 }, { "epoch": 0.12, "grad_norm": 5.515057563781738, "learning_rate": 1.9558775819184603e-05, "loss": 2.2335, "step": 9043 }, { "epoch": 0.12, "grad_norm": 5.363313674926758, "learning_rate": 1.9560938682816052e-05, "loss": 2.3276, "step": 9044 }, { "epoch": 0.12, "grad_norm": 5.790585517883301, "learning_rate": 1.95631015464475e-05, "loss": 2.4701, "step": 9045 }, { "epoch": 0.12, "grad_norm": 5.923894882202148, "learning_rate": 1.9565264410078945e-05, "loss": 2.4199, "step": 9046 }, { "epoch": 0.12, "grad_norm": 6.557370185852051, "learning_rate": 1.9567427273710395e-05, "loss": 3.0571, "step": 9047 }, { "epoch": 0.12, "grad_norm": 7.05325984954834, "learning_rate": 1.956959013734184e-05, "loss": 2.9692, "step": 9048 }, { "epoch": 0.12, "grad_norm": 6.592388153076172, "learning_rate": 1.957175300097329e-05, "loss": 2.5684, "step": 9049 }, { "epoch": 0.12, "grad_norm": 5.868624687194824, "learning_rate": 1.9573915864604737e-05, "loss": 2.6413, "step": 9050 }, { "epoch": 0.12, "grad_norm": 5.6616668701171875, "learning_rate": 1.9576078728236184e-05, "loss": 2.4906, "step": 9051 }, { "epoch": 0.12, "grad_norm": 5.207388877868652, "learning_rate": 1.9578241591867633e-05, "loss": 2.0343, "step": 9052 }, { "epoch": 0.12, "grad_norm": 6.390260696411133, "learning_rate": 1.9580404455499083e-05, "loss": 2.125, "step": 9053 }, { "epoch": 0.12, "grad_norm": 5.169686317443848, "learning_rate": 1.958256731913053e-05, "loss": 2.4395, "step": 9054 }, { "epoch": 0.12, "grad_norm": 5.675914287567139, "learning_rate": 1.9584730182761976e-05, "loss": 2.3924, "step": 9055 }, { "epoch": 0.12, "grad_norm": 6.921135425567627, "learning_rate": 1.9586893046393426e-05, "loss": 3.3292, "step": 9056 }, { "epoch": 0.12, "grad_norm": 6.476700782775879, "learning_rate": 1.9589055910024875e-05, "loss": 2.1303, "step": 9057 }, { "epoch": 0.12, "grad_norm": 5.225740432739258, "learning_rate": 1.9591218773656322e-05, "loss": 1.9837, "step": 9058 }, { "epoch": 0.12, "grad_norm": 5.623349189758301, "learning_rate": 1.959338163728777e-05, "loss": 2.3155, "step": 9059 }, { "epoch": 0.12, "grad_norm": 5.805389881134033, "learning_rate": 1.9595544500919218e-05, "loss": 2.9922, "step": 9060 }, { "epoch": 0.12, "grad_norm": 5.99221134185791, "learning_rate": 1.9597707364550668e-05, "loss": 2.6717, "step": 9061 }, { "epoch": 0.12, "grad_norm": 5.3551926612854, "learning_rate": 1.9599870228182114e-05, "loss": 2.4817, "step": 9062 }, { "epoch": 0.12, "grad_norm": 5.809715747833252, "learning_rate": 1.9602033091813564e-05, "loss": 2.5399, "step": 9063 }, { "epoch": 0.12, "grad_norm": 6.235161304473877, "learning_rate": 1.960419595544501e-05, "loss": 2.5752, "step": 9064 }, { "epoch": 0.12, "grad_norm": 5.739316463470459, "learning_rate": 1.960635881907646e-05, "loss": 2.5941, "step": 9065 }, { "epoch": 0.12, "grad_norm": 5.969075679779053, "learning_rate": 1.9608521682707906e-05, "loss": 2.5523, "step": 9066 }, { "epoch": 0.12, "grad_norm": 6.050864219665527, "learning_rate": 1.9610684546339356e-05, "loss": 2.3517, "step": 9067 }, { "epoch": 0.12, "grad_norm": 5.871397972106934, "learning_rate": 1.9612847409970802e-05, "loss": 2.3502, "step": 9068 }, { "epoch": 0.12, "grad_norm": 7.266826152801514, "learning_rate": 1.9615010273602252e-05, "loss": 3.0262, "step": 9069 }, { "epoch": 0.12, "grad_norm": 5.961787700653076, "learning_rate": 1.96171731372337e-05, "loss": 2.3291, "step": 9070 }, { "epoch": 0.12, "grad_norm": 5.2544636726379395, "learning_rate": 1.9619336000865148e-05, "loss": 2.0445, "step": 9071 }, { "epoch": 0.12, "grad_norm": 6.31404972076416, "learning_rate": 1.9621498864496594e-05, "loss": 3.0021, "step": 9072 }, { "epoch": 0.12, "grad_norm": 6.6932525634765625, "learning_rate": 1.9623661728128044e-05, "loss": 2.354, "step": 9073 }, { "epoch": 0.12, "grad_norm": 6.007938861846924, "learning_rate": 1.962582459175949e-05, "loss": 2.7883, "step": 9074 }, { "epoch": 0.12, "grad_norm": 5.943949222564697, "learning_rate": 1.962798745539094e-05, "loss": 2.3898, "step": 9075 }, { "epoch": 0.12, "grad_norm": 5.818996906280518, "learning_rate": 1.9630150319022387e-05, "loss": 2.8415, "step": 9076 }, { "epoch": 0.12, "grad_norm": 5.646419048309326, "learning_rate": 1.9632313182653836e-05, "loss": 2.1526, "step": 9077 }, { "epoch": 0.12, "grad_norm": 5.91992712020874, "learning_rate": 1.9634476046285283e-05, "loss": 2.3697, "step": 9078 }, { "epoch": 0.12, "grad_norm": 5.725616931915283, "learning_rate": 1.9636638909916732e-05, "loss": 2.1694, "step": 9079 }, { "epoch": 0.12, "grad_norm": 6.759684085845947, "learning_rate": 1.963880177354818e-05, "loss": 2.8029, "step": 9080 }, { "epoch": 0.12, "grad_norm": 6.10646390914917, "learning_rate": 1.964096463717963e-05, "loss": 1.9812, "step": 9081 }, { "epoch": 0.12, "grad_norm": 5.22628927230835, "learning_rate": 1.9643127500811075e-05, "loss": 2.3155, "step": 9082 }, { "epoch": 0.12, "grad_norm": 5.531426429748535, "learning_rate": 1.9645290364442525e-05, "loss": 2.2072, "step": 9083 }, { "epoch": 0.12, "grad_norm": 5.997307777404785, "learning_rate": 1.964745322807397e-05, "loss": 2.2699, "step": 9084 }, { "epoch": 0.12, "grad_norm": 5.2258172035217285, "learning_rate": 1.9649616091705417e-05, "loss": 2.2492, "step": 9085 }, { "epoch": 0.12, "grad_norm": 5.222631454467773, "learning_rate": 1.9651778955336867e-05, "loss": 2.6478, "step": 9086 }, { "epoch": 0.12, "grad_norm": 5.715466022491455, "learning_rate": 1.9653941818968317e-05, "loss": 2.3089, "step": 9087 }, { "epoch": 0.12, "grad_norm": 5.7220234870910645, "learning_rate": 1.9656104682599763e-05, "loss": 2.5792, "step": 9088 }, { "epoch": 0.12, "grad_norm": 5.5858473777771, "learning_rate": 1.965826754623121e-05, "loss": 2.3043, "step": 9089 }, { "epoch": 0.12, "grad_norm": 5.717605113983154, "learning_rate": 1.966043040986266e-05, "loss": 2.3268, "step": 9090 }, { "epoch": 0.12, "grad_norm": 6.573559761047363, "learning_rate": 1.966259327349411e-05, "loss": 2.3093, "step": 9091 }, { "epoch": 0.12, "grad_norm": 7.076873302459717, "learning_rate": 1.9664756137125555e-05, "loss": 2.8057, "step": 9092 }, { "epoch": 0.12, "grad_norm": 6.062839031219482, "learning_rate": 1.9666919000757002e-05, "loss": 2.4896, "step": 9093 }, { "epoch": 0.12, "grad_norm": 6.508147716522217, "learning_rate": 1.966908186438845e-05, "loss": 2.9225, "step": 9094 }, { "epoch": 0.12, "grad_norm": 5.8744378089904785, "learning_rate": 1.96712447280199e-05, "loss": 2.5724, "step": 9095 }, { "epoch": 0.12, "grad_norm": 5.943075656890869, "learning_rate": 1.9673407591651348e-05, "loss": 2.277, "step": 9096 }, { "epoch": 0.12, "grad_norm": 6.260663032531738, "learning_rate": 1.9675570455282794e-05, "loss": 2.6555, "step": 9097 }, { "epoch": 0.12, "grad_norm": 5.927616119384766, "learning_rate": 1.9677733318914244e-05, "loss": 2.8245, "step": 9098 }, { "epoch": 0.12, "grad_norm": 5.995017051696777, "learning_rate": 1.9679896182545694e-05, "loss": 2.3755, "step": 9099 }, { "epoch": 0.12, "grad_norm": 5.659659385681152, "learning_rate": 1.968205904617714e-05, "loss": 2.0374, "step": 9100 }, { "epoch": 0.12, "grad_norm": 5.079534530639648, "learning_rate": 1.9684221909808586e-05, "loss": 2.0777, "step": 9101 }, { "epoch": 0.12, "grad_norm": 6.019917964935303, "learning_rate": 1.9686384773440036e-05, "loss": 2.8716, "step": 9102 }, { "epoch": 0.12, "grad_norm": 5.599632740020752, "learning_rate": 1.9688547637071486e-05, "loss": 2.1138, "step": 9103 }, { "epoch": 0.12, "grad_norm": 6.711424827575684, "learning_rate": 1.9690710500702932e-05, "loss": 2.493, "step": 9104 }, { "epoch": 0.12, "grad_norm": 7.09113073348999, "learning_rate": 1.969287336433438e-05, "loss": 2.5231, "step": 9105 }, { "epoch": 0.12, "grad_norm": 6.810942649841309, "learning_rate": 1.9695036227965828e-05, "loss": 2.5982, "step": 9106 }, { "epoch": 0.12, "grad_norm": 5.7776408195495605, "learning_rate": 1.9697199091597278e-05, "loss": 2.1904, "step": 9107 }, { "epoch": 0.12, "grad_norm": 6.008259296417236, "learning_rate": 1.9699361955228724e-05, "loss": 2.4613, "step": 9108 }, { "epoch": 0.12, "grad_norm": 6.073721885681152, "learning_rate": 1.970152481886017e-05, "loss": 2.5786, "step": 9109 }, { "epoch": 0.12, "grad_norm": 6.0497870445251465, "learning_rate": 1.970368768249162e-05, "loss": 2.6463, "step": 9110 }, { "epoch": 0.12, "grad_norm": 6.135735988616943, "learning_rate": 1.970585054612307e-05, "loss": 2.6944, "step": 9111 }, { "epoch": 0.12, "grad_norm": 5.791622161865234, "learning_rate": 1.9708013409754516e-05, "loss": 2.347, "step": 9112 }, { "epoch": 0.12, "grad_norm": 5.939692497253418, "learning_rate": 1.9710176273385963e-05, "loss": 1.9103, "step": 9113 }, { "epoch": 0.12, "grad_norm": 5.354442596435547, "learning_rate": 1.9712339137017413e-05, "loss": 2.2582, "step": 9114 }, { "epoch": 0.12, "grad_norm": 6.087283134460449, "learning_rate": 1.9714502000648862e-05, "loss": 2.5378, "step": 9115 }, { "epoch": 0.12, "grad_norm": 5.298548221588135, "learning_rate": 1.971666486428031e-05, "loss": 2.5486, "step": 9116 }, { "epoch": 0.12, "grad_norm": 6.0333709716796875, "learning_rate": 1.9718827727911755e-05, "loss": 2.4077, "step": 9117 }, { "epoch": 0.12, "grad_norm": 5.342400074005127, "learning_rate": 1.9720990591543205e-05, "loss": 2.8902, "step": 9118 }, { "epoch": 0.12, "grad_norm": 6.428393840789795, "learning_rate": 1.9723153455174655e-05, "loss": 2.4955, "step": 9119 }, { "epoch": 0.12, "grad_norm": 6.351571083068848, "learning_rate": 1.97253163188061e-05, "loss": 2.4453, "step": 9120 }, { "epoch": 0.12, "grad_norm": 5.326047897338867, "learning_rate": 1.972747918243755e-05, "loss": 2.1516, "step": 9121 }, { "epoch": 0.12, "grad_norm": 5.911513328552246, "learning_rate": 1.9729642046068997e-05, "loss": 2.4554, "step": 9122 }, { "epoch": 0.12, "grad_norm": 5.983452796936035, "learning_rate": 1.9731804909700443e-05, "loss": 2.8184, "step": 9123 }, { "epoch": 0.12, "grad_norm": 5.542727947235107, "learning_rate": 1.9733967773331893e-05, "loss": 2.6264, "step": 9124 }, { "epoch": 0.12, "grad_norm": 6.123360633850098, "learning_rate": 1.9736130636963343e-05, "loss": 2.9406, "step": 9125 }, { "epoch": 0.12, "grad_norm": 6.366767883300781, "learning_rate": 1.973829350059479e-05, "loss": 2.6393, "step": 9126 }, { "epoch": 0.12, "grad_norm": 6.0129523277282715, "learning_rate": 1.9740456364226236e-05, "loss": 2.5534, "step": 9127 }, { "epoch": 0.12, "grad_norm": 6.279966831207275, "learning_rate": 1.9742619227857685e-05, "loss": 2.9396, "step": 9128 }, { "epoch": 0.12, "grad_norm": 5.948243141174316, "learning_rate": 1.9744782091489135e-05, "loss": 2.5281, "step": 9129 }, { "epoch": 0.12, "grad_norm": 5.670843601226807, "learning_rate": 1.974694495512058e-05, "loss": 2.3123, "step": 9130 }, { "epoch": 0.12, "grad_norm": 6.433663368225098, "learning_rate": 1.9749107818752028e-05, "loss": 2.6312, "step": 9131 }, { "epoch": 0.12, "grad_norm": 5.9942145347595215, "learning_rate": 1.9751270682383477e-05, "loss": 2.7958, "step": 9132 }, { "epoch": 0.12, "grad_norm": 6.013616561889648, "learning_rate": 1.9753433546014927e-05, "loss": 2.5892, "step": 9133 }, { "epoch": 0.12, "grad_norm": 5.770537853240967, "learning_rate": 1.9755596409646374e-05, "loss": 2.6114, "step": 9134 }, { "epoch": 0.12, "grad_norm": 5.607624530792236, "learning_rate": 1.975775927327782e-05, "loss": 2.2663, "step": 9135 }, { "epoch": 0.12, "grad_norm": 5.949797630310059, "learning_rate": 1.975992213690927e-05, "loss": 2.1655, "step": 9136 }, { "epoch": 0.12, "grad_norm": 6.432382106781006, "learning_rate": 1.976208500054072e-05, "loss": 2.211, "step": 9137 }, { "epoch": 0.12, "grad_norm": 5.764739990234375, "learning_rate": 1.9764247864172166e-05, "loss": 2.2353, "step": 9138 }, { "epoch": 0.12, "grad_norm": 5.923744201660156, "learning_rate": 1.9766410727803612e-05, "loss": 2.5772, "step": 9139 }, { "epoch": 0.12, "grad_norm": 5.337305068969727, "learning_rate": 1.9768573591435062e-05, "loss": 2.5916, "step": 9140 }, { "epoch": 0.12, "grad_norm": 6.019854545593262, "learning_rate": 1.977073645506651e-05, "loss": 2.36, "step": 9141 }, { "epoch": 0.12, "grad_norm": 5.5511698722839355, "learning_rate": 1.9772899318697958e-05, "loss": 2.3641, "step": 9142 }, { "epoch": 0.12, "grad_norm": 6.074387550354004, "learning_rate": 1.9775062182329404e-05, "loss": 2.1278, "step": 9143 }, { "epoch": 0.12, "grad_norm": 6.347116470336914, "learning_rate": 1.9777225045960854e-05, "loss": 3.0001, "step": 9144 }, { "epoch": 0.12, "grad_norm": 5.413164138793945, "learning_rate": 1.9779387909592304e-05, "loss": 2.602, "step": 9145 }, { "epoch": 0.12, "grad_norm": 6.047226428985596, "learning_rate": 1.978155077322375e-05, "loss": 2.5226, "step": 9146 }, { "epoch": 0.12, "grad_norm": 6.049524784088135, "learning_rate": 1.9783713636855197e-05, "loss": 1.9664, "step": 9147 }, { "epoch": 0.12, "grad_norm": 6.14737606048584, "learning_rate": 1.9785876500486646e-05, "loss": 2.7706, "step": 9148 }, { "epoch": 0.12, "grad_norm": 5.628752708435059, "learning_rate": 1.9788039364118096e-05, "loss": 2.4307, "step": 9149 }, { "epoch": 0.12, "grad_norm": 6.136503219604492, "learning_rate": 1.9790202227749542e-05, "loss": 3.0947, "step": 9150 }, { "epoch": 0.12, "grad_norm": 5.765201568603516, "learning_rate": 1.979236509138099e-05, "loss": 2.3818, "step": 9151 }, { "epoch": 0.12, "grad_norm": 7.061962127685547, "learning_rate": 1.979452795501244e-05, "loss": 2.8975, "step": 9152 }, { "epoch": 0.12, "grad_norm": 7.0591559410095215, "learning_rate": 1.9796690818643888e-05, "loss": 3.5071, "step": 9153 }, { "epoch": 0.12, "grad_norm": 5.524771213531494, "learning_rate": 1.9798853682275335e-05, "loss": 2.2857, "step": 9154 }, { "epoch": 0.12, "grad_norm": 6.782273292541504, "learning_rate": 1.980101654590678e-05, "loss": 2.1981, "step": 9155 }, { "epoch": 0.12, "grad_norm": 5.5401787757873535, "learning_rate": 1.980317940953823e-05, "loss": 2.532, "step": 9156 }, { "epoch": 0.12, "grad_norm": 5.412862777709961, "learning_rate": 1.9805342273169677e-05, "loss": 2.0225, "step": 9157 }, { "epoch": 0.12, "grad_norm": 5.48831033706665, "learning_rate": 1.9807505136801127e-05, "loss": 2.4559, "step": 9158 }, { "epoch": 0.12, "grad_norm": 6.327822208404541, "learning_rate": 1.9809668000432573e-05, "loss": 2.4538, "step": 9159 }, { "epoch": 0.12, "grad_norm": 5.584938049316406, "learning_rate": 1.9811830864064023e-05, "loss": 2.3531, "step": 9160 }, { "epoch": 0.12, "grad_norm": 5.395111560821533, "learning_rate": 1.981399372769547e-05, "loss": 2.1549, "step": 9161 }, { "epoch": 0.12, "grad_norm": 5.405800819396973, "learning_rate": 1.981615659132692e-05, "loss": 1.9314, "step": 9162 }, { "epoch": 0.12, "grad_norm": 5.621685981750488, "learning_rate": 1.9818319454958365e-05, "loss": 2.4033, "step": 9163 }, { "epoch": 0.12, "grad_norm": 6.390172004699707, "learning_rate": 1.9820482318589815e-05, "loss": 2.5264, "step": 9164 }, { "epoch": 0.12, "grad_norm": 6.835832118988037, "learning_rate": 1.982264518222126e-05, "loss": 2.7804, "step": 9165 }, { "epoch": 0.12, "grad_norm": 5.8499932289123535, "learning_rate": 1.982480804585271e-05, "loss": 2.7012, "step": 9166 }, { "epoch": 0.12, "grad_norm": 6.313164234161377, "learning_rate": 1.9826970909484158e-05, "loss": 2.7601, "step": 9167 }, { "epoch": 0.12, "grad_norm": 5.95470666885376, "learning_rate": 1.9829133773115607e-05, "loss": 2.6772, "step": 9168 }, { "epoch": 0.12, "grad_norm": 5.629491806030273, "learning_rate": 1.9831296636747054e-05, "loss": 2.4515, "step": 9169 }, { "epoch": 0.12, "grad_norm": 5.969650745391846, "learning_rate": 1.9833459500378503e-05, "loss": 2.5062, "step": 9170 }, { "epoch": 0.12, "grad_norm": 6.546888828277588, "learning_rate": 1.983562236400995e-05, "loss": 3.06, "step": 9171 }, { "epoch": 0.12, "grad_norm": 5.21159029006958, "learning_rate": 1.98377852276414e-05, "loss": 2.7264, "step": 9172 }, { "epoch": 0.12, "grad_norm": 6.183351516723633, "learning_rate": 1.9839948091272846e-05, "loss": 2.6921, "step": 9173 }, { "epoch": 0.12, "grad_norm": 6.082056045532227, "learning_rate": 1.9842110954904296e-05, "loss": 2.2137, "step": 9174 }, { "epoch": 0.12, "grad_norm": 5.4375433921813965, "learning_rate": 1.9844273818535742e-05, "loss": 2.7993, "step": 9175 }, { "epoch": 0.12, "grad_norm": 5.826198101043701, "learning_rate": 1.9846436682167192e-05, "loss": 2.3959, "step": 9176 }, { "epoch": 0.12, "grad_norm": 6.034268856048584, "learning_rate": 1.9848599545798638e-05, "loss": 2.7058, "step": 9177 }, { "epoch": 0.12, "grad_norm": 5.894867897033691, "learning_rate": 1.9850762409430088e-05, "loss": 2.6113, "step": 9178 }, { "epoch": 0.12, "grad_norm": 6.513812065124512, "learning_rate": 1.9852925273061534e-05, "loss": 2.6036, "step": 9179 }, { "epoch": 0.12, "grad_norm": 5.873082160949707, "learning_rate": 1.9855088136692984e-05, "loss": 2.6996, "step": 9180 }, { "epoch": 0.12, "grad_norm": 5.951517581939697, "learning_rate": 1.985725100032443e-05, "loss": 3.0619, "step": 9181 }, { "epoch": 0.12, "grad_norm": 5.862232208251953, "learning_rate": 1.985941386395588e-05, "loss": 2.2586, "step": 9182 }, { "epoch": 0.12, "grad_norm": 5.2906293869018555, "learning_rate": 1.9861576727587326e-05, "loss": 2.8885, "step": 9183 }, { "epoch": 0.12, "grad_norm": 5.875442028045654, "learning_rate": 1.9863739591218776e-05, "loss": 2.167, "step": 9184 }, { "epoch": 0.12, "grad_norm": 6.122348308563232, "learning_rate": 1.9865902454850222e-05, "loss": 2.6003, "step": 9185 }, { "epoch": 0.12, "grad_norm": 5.037684917449951, "learning_rate": 1.9868065318481672e-05, "loss": 2.3188, "step": 9186 }, { "epoch": 0.12, "grad_norm": 5.554008483886719, "learning_rate": 1.9870228182113122e-05, "loss": 2.4451, "step": 9187 }, { "epoch": 0.12, "grad_norm": 5.167474746704102, "learning_rate": 1.987239104574457e-05, "loss": 2.5611, "step": 9188 }, { "epoch": 0.12, "grad_norm": 6.154602527618408, "learning_rate": 1.9874553909376015e-05, "loss": 2.4696, "step": 9189 }, { "epoch": 0.12, "grad_norm": 5.852519989013672, "learning_rate": 1.9876716773007464e-05, "loss": 2.4659, "step": 9190 }, { "epoch": 0.12, "grad_norm": 5.254316329956055, "learning_rate": 1.9878879636638914e-05, "loss": 2.2297, "step": 9191 }, { "epoch": 0.12, "grad_norm": 6.172750949859619, "learning_rate": 1.988104250027036e-05, "loss": 2.5779, "step": 9192 }, { "epoch": 0.12, "grad_norm": 6.39804744720459, "learning_rate": 1.9883205363901807e-05, "loss": 2.3214, "step": 9193 }, { "epoch": 0.12, "grad_norm": 5.282159805297852, "learning_rate": 1.9885368227533253e-05, "loss": 2.1148, "step": 9194 }, { "epoch": 0.12, "grad_norm": 5.892698764801025, "learning_rate": 1.9887531091164703e-05, "loss": 2.9127, "step": 9195 }, { "epoch": 0.12, "grad_norm": 6.961541175842285, "learning_rate": 1.9889693954796153e-05, "loss": 2.4497, "step": 9196 }, { "epoch": 0.12, "grad_norm": 5.252357482910156, "learning_rate": 1.98918568184276e-05, "loss": 2.3154, "step": 9197 }, { "epoch": 0.12, "grad_norm": 7.217569351196289, "learning_rate": 1.9894019682059045e-05, "loss": 2.4963, "step": 9198 }, { "epoch": 0.12, "grad_norm": 5.907473087310791, "learning_rate": 1.9896182545690495e-05, "loss": 2.6801, "step": 9199 }, { "epoch": 0.12, "grad_norm": 6.661372661590576, "learning_rate": 1.9898345409321945e-05, "loss": 2.236, "step": 9200 }, { "epoch": 0.12, "grad_norm": 5.676732540130615, "learning_rate": 1.990050827295339e-05, "loss": 2.5375, "step": 9201 }, { "epoch": 0.12, "grad_norm": 5.530054092407227, "learning_rate": 1.9902671136584838e-05, "loss": 2.1036, "step": 9202 }, { "epoch": 0.12, "grad_norm": 6.390771865844727, "learning_rate": 1.9904834000216287e-05, "loss": 2.7574, "step": 9203 }, { "epoch": 0.12, "grad_norm": 6.05402946472168, "learning_rate": 1.9906996863847737e-05, "loss": 2.3732, "step": 9204 }, { "epoch": 0.12, "grad_norm": 5.813143253326416, "learning_rate": 1.9909159727479184e-05, "loss": 2.9063, "step": 9205 }, { "epoch": 0.12, "grad_norm": 6.967738151550293, "learning_rate": 1.991132259111063e-05, "loss": 3.4353, "step": 9206 }, { "epoch": 0.12, "grad_norm": 5.3312201499938965, "learning_rate": 1.991348545474208e-05, "loss": 1.9291, "step": 9207 }, { "epoch": 0.12, "grad_norm": 5.798177719116211, "learning_rate": 1.991564831837353e-05, "loss": 2.6827, "step": 9208 }, { "epoch": 0.12, "grad_norm": 5.501561641693115, "learning_rate": 1.9917811182004976e-05, "loss": 2.9405, "step": 9209 }, { "epoch": 0.12, "grad_norm": 5.760981559753418, "learning_rate": 1.9919974045636422e-05, "loss": 2.1605, "step": 9210 }, { "epoch": 0.12, "grad_norm": 5.508267402648926, "learning_rate": 1.9922136909267872e-05, "loss": 2.7998, "step": 9211 }, { "epoch": 0.12, "grad_norm": 6.0689897537231445, "learning_rate": 1.992429977289932e-05, "loss": 2.6699, "step": 9212 }, { "epoch": 0.12, "grad_norm": 5.143905162811279, "learning_rate": 1.9926462636530768e-05, "loss": 2.0883, "step": 9213 }, { "epoch": 0.12, "grad_norm": 6.547295570373535, "learning_rate": 1.9928625500162214e-05, "loss": 2.5246, "step": 9214 }, { "epoch": 0.12, "grad_norm": 6.034207820892334, "learning_rate": 1.9930788363793664e-05, "loss": 2.4556, "step": 9215 }, { "epoch": 0.12, "grad_norm": 6.060361385345459, "learning_rate": 1.9932951227425114e-05, "loss": 2.8775, "step": 9216 }, { "epoch": 0.12, "grad_norm": 6.508169651031494, "learning_rate": 1.993511409105656e-05, "loss": 2.7146, "step": 9217 }, { "epoch": 0.12, "grad_norm": 5.631445407867432, "learning_rate": 1.993727695468801e-05, "loss": 2.3116, "step": 9218 }, { "epoch": 0.12, "grad_norm": 5.693487167358398, "learning_rate": 1.9939439818319456e-05, "loss": 2.532, "step": 9219 }, { "epoch": 0.12, "grad_norm": 5.690913200378418, "learning_rate": 1.9941602681950906e-05, "loss": 2.0153, "step": 9220 }, { "epoch": 0.12, "grad_norm": 6.046535968780518, "learning_rate": 1.9943765545582352e-05, "loss": 2.5831, "step": 9221 }, { "epoch": 0.12, "grad_norm": 5.640242099761963, "learning_rate": 1.9945928409213802e-05, "loss": 2.5026, "step": 9222 }, { "epoch": 0.12, "grad_norm": 6.211941719055176, "learning_rate": 1.994809127284525e-05, "loss": 2.3688, "step": 9223 }, { "epoch": 0.12, "grad_norm": 5.538566589355469, "learning_rate": 1.9950254136476698e-05, "loss": 2.2269, "step": 9224 }, { "epoch": 0.12, "grad_norm": 5.849209785461426, "learning_rate": 1.9952417000108145e-05, "loss": 2.0369, "step": 9225 }, { "epoch": 0.12, "grad_norm": 5.970457553863525, "learning_rate": 1.9954579863739594e-05, "loss": 2.674, "step": 9226 }, { "epoch": 0.12, "grad_norm": 5.457762241363525, "learning_rate": 1.995674272737104e-05, "loss": 2.2578, "step": 9227 }, { "epoch": 0.12, "grad_norm": 4.9475626945495605, "learning_rate": 1.995890559100249e-05, "loss": 1.8818, "step": 9228 }, { "epoch": 0.12, "grad_norm": 5.2650604248046875, "learning_rate": 1.9961068454633937e-05, "loss": 2.2805, "step": 9229 }, { "epoch": 0.12, "grad_norm": 5.996617794036865, "learning_rate": 1.9963231318265386e-05, "loss": 2.5474, "step": 9230 }, { "epoch": 0.12, "grad_norm": 5.908936023712158, "learning_rate": 1.9965394181896833e-05, "loss": 2.075, "step": 9231 }, { "epoch": 0.12, "grad_norm": 6.525459289550781, "learning_rate": 1.996755704552828e-05, "loss": 2.5578, "step": 9232 }, { "epoch": 0.12, "grad_norm": 5.812904357910156, "learning_rate": 1.996971990915973e-05, "loss": 2.1538, "step": 9233 }, { "epoch": 0.12, "grad_norm": 5.986192226409912, "learning_rate": 1.997188277279118e-05, "loss": 2.5809, "step": 9234 }, { "epoch": 0.12, "grad_norm": 5.888993263244629, "learning_rate": 1.9974045636422625e-05, "loss": 2.4027, "step": 9235 }, { "epoch": 0.12, "grad_norm": 5.972288131713867, "learning_rate": 1.997620850005407e-05, "loss": 2.607, "step": 9236 }, { "epoch": 0.12, "grad_norm": 5.129820823669434, "learning_rate": 1.997837136368552e-05, "loss": 1.8242, "step": 9237 }, { "epoch": 0.12, "grad_norm": 6.967933177947998, "learning_rate": 1.998053422731697e-05, "loss": 2.6851, "step": 9238 }, { "epoch": 0.12, "grad_norm": 6.471746921539307, "learning_rate": 1.9982697090948417e-05, "loss": 2.8487, "step": 9239 }, { "epoch": 0.12, "grad_norm": 6.0939178466796875, "learning_rate": 1.9984859954579864e-05, "loss": 2.4878, "step": 9240 }, { "epoch": 0.12, "grad_norm": 5.960986614227295, "learning_rate": 1.9987022818211313e-05, "loss": 2.4949, "step": 9241 }, { "epoch": 0.12, "grad_norm": 5.251101016998291, "learning_rate": 1.9989185681842763e-05, "loss": 2.0187, "step": 9242 }, { "epoch": 0.12, "grad_norm": 5.355048179626465, "learning_rate": 1.999134854547421e-05, "loss": 2.1767, "step": 9243 }, { "epoch": 0.12, "grad_norm": 6.593698978424072, "learning_rate": 1.9993511409105656e-05, "loss": 2.4471, "step": 9244 }, { "epoch": 0.12, "grad_norm": 5.804337501525879, "learning_rate": 1.9995674272737106e-05, "loss": 2.326, "step": 9245 }, { "epoch": 0.12, "grad_norm": 6.3799519538879395, "learning_rate": 1.9997837136368555e-05, "loss": 3.0507, "step": 9246 }, { "epoch": 0.12, "grad_norm": 6.044947147369385, "learning_rate": 2e-05, "loss": 3.1705, "step": 9247 }, { "epoch": 0.12, "grad_norm": 7.310060501098633, "learning_rate": 1.9999999999447916e-05, "loss": 2.8934, "step": 9248 }, { "epoch": 0.12, "grad_norm": 6.246899127960205, "learning_rate": 1.999999999779166e-05, "loss": 2.4447, "step": 9249 }, { "epoch": 0.12, "grad_norm": 4.8101067543029785, "learning_rate": 1.999999999503124e-05, "loss": 2.0225, "step": 9250 }, { "epoch": 0.12, "grad_norm": 5.162338733673096, "learning_rate": 1.9999999991166644e-05, "loss": 1.9847, "step": 9251 }, { "epoch": 0.12, "grad_norm": 5.444738388061523, "learning_rate": 1.999999998619788e-05, "loss": 2.7585, "step": 9252 }, { "epoch": 0.12, "grad_norm": 5.27466344833374, "learning_rate": 1.999999998012495e-05, "loss": 2.6329, "step": 9253 }, { "epoch": 0.12, "grad_norm": 6.018951416015625, "learning_rate": 1.9999999972947846e-05, "loss": 2.0065, "step": 9254 }, { "epoch": 0.12, "grad_norm": 5.691699028015137, "learning_rate": 1.999999996466657e-05, "loss": 2.2358, "step": 9255 }, { "epoch": 0.12, "grad_norm": 6.75949239730835, "learning_rate": 1.999999995528113e-05, "loss": 2.8438, "step": 9256 }, { "epoch": 0.12, "grad_norm": 5.828890800476074, "learning_rate": 1.999999994479152e-05, "loss": 2.4529, "step": 9257 }, { "epoch": 0.12, "grad_norm": 5.181392669677734, "learning_rate": 1.999999993319774e-05, "loss": 2.1835, "step": 9258 }, { "epoch": 0.12, "grad_norm": 4.716440200805664, "learning_rate": 1.9999999920499786e-05, "loss": 1.7093, "step": 9259 }, { "epoch": 0.12, "grad_norm": 5.761344909667969, "learning_rate": 1.9999999906697666e-05, "loss": 2.4827, "step": 9260 }, { "epoch": 0.12, "grad_norm": 6.229907035827637, "learning_rate": 1.9999999891791377e-05, "loss": 3.0636, "step": 9261 }, { "epoch": 0.12, "grad_norm": 4.705442428588867, "learning_rate": 1.9999999875780916e-05, "loss": 2.3322, "step": 9262 }, { "epoch": 0.12, "grad_norm": 6.607435703277588, "learning_rate": 1.9999999858666286e-05, "loss": 2.6997, "step": 9263 }, { "epoch": 0.12, "grad_norm": 5.7478814125061035, "learning_rate": 1.999999984044749e-05, "loss": 2.1309, "step": 9264 }, { "epoch": 0.12, "grad_norm": 5.0110650062561035, "learning_rate": 1.999999982112452e-05, "loss": 1.9616, "step": 9265 }, { "epoch": 0.12, "grad_norm": 7.3515543937683105, "learning_rate": 1.9999999800697378e-05, "loss": 2.6044, "step": 9266 }, { "epoch": 0.12, "grad_norm": 4.757129669189453, "learning_rate": 1.999999977916607e-05, "loss": 2.124, "step": 9267 }, { "epoch": 0.12, "grad_norm": 5.792810440063477, "learning_rate": 1.9999999756530593e-05, "loss": 2.3801, "step": 9268 }, { "epoch": 0.12, "grad_norm": 5.06804084777832, "learning_rate": 1.9999999732790946e-05, "loss": 1.98, "step": 9269 }, { "epoch": 0.12, "grad_norm": 5.606499195098877, "learning_rate": 1.999999970794713e-05, "loss": 2.5661, "step": 9270 }, { "epoch": 0.12, "grad_norm": 6.473219871520996, "learning_rate": 1.9999999681999144e-05, "loss": 2.7987, "step": 9271 }, { "epoch": 0.12, "grad_norm": 6.199167251586914, "learning_rate": 1.9999999654946985e-05, "loss": 2.6813, "step": 9272 }, { "epoch": 0.12, "grad_norm": 5.027141571044922, "learning_rate": 1.999999962679066e-05, "loss": 2.6098, "step": 9273 }, { "epoch": 0.12, "grad_norm": 5.750716209411621, "learning_rate": 1.9999999597530166e-05, "loss": 2.5373, "step": 9274 }, { "epoch": 0.12, "grad_norm": 6.942811489105225, "learning_rate": 1.99999995671655e-05, "loss": 2.9907, "step": 9275 }, { "epoch": 0.12, "grad_norm": 5.072338581085205, "learning_rate": 1.9999999535696666e-05, "loss": 2.0455, "step": 9276 }, { "epoch": 0.12, "grad_norm": 5.491209506988525, "learning_rate": 1.999999950312366e-05, "loss": 2.4645, "step": 9277 }, { "epoch": 0.12, "grad_norm": 5.950072765350342, "learning_rate": 1.999999946944649e-05, "loss": 2.777, "step": 9278 }, { "epoch": 0.12, "grad_norm": 5.985475063323975, "learning_rate": 1.9999999434665144e-05, "loss": 2.5921, "step": 9279 }, { "epoch": 0.12, "grad_norm": 5.748819351196289, "learning_rate": 1.999999939877963e-05, "loss": 2.7034, "step": 9280 }, { "epoch": 0.12, "grad_norm": 5.249011993408203, "learning_rate": 1.999999936178995e-05, "loss": 2.2615, "step": 9281 }, { "epoch": 0.12, "grad_norm": 5.174450874328613, "learning_rate": 1.9999999323696096e-05, "loss": 2.363, "step": 9282 }, { "epoch": 0.12, "grad_norm": 6.291382312774658, "learning_rate": 1.9999999284498074e-05, "loss": 2.6308, "step": 9283 }, { "epoch": 0.12, "grad_norm": 6.118205547332764, "learning_rate": 1.9999999244195884e-05, "loss": 2.7921, "step": 9284 }, { "epoch": 0.12, "grad_norm": 6.211999416351318, "learning_rate": 1.9999999202789522e-05, "loss": 2.8956, "step": 9285 }, { "epoch": 0.12, "grad_norm": 6.190925121307373, "learning_rate": 1.9999999160278992e-05, "loss": 2.3558, "step": 9286 }, { "epoch": 0.12, "grad_norm": 5.80772066116333, "learning_rate": 1.9999999116664293e-05, "loss": 2.4707, "step": 9287 }, { "epoch": 0.12, "grad_norm": 5.037163257598877, "learning_rate": 1.9999999071945422e-05, "loss": 2.1935, "step": 9288 }, { "epoch": 0.12, "grad_norm": 6.159605026245117, "learning_rate": 1.9999999026122383e-05, "loss": 2.5848, "step": 9289 }, { "epoch": 0.12, "grad_norm": 6.0628886222839355, "learning_rate": 1.9999998979195176e-05, "loss": 2.5941, "step": 9290 }, { "epoch": 0.12, "grad_norm": 7.153975486755371, "learning_rate": 1.9999998931163796e-05, "loss": 2.7042, "step": 9291 }, { "epoch": 0.12, "grad_norm": 7.244606971740723, "learning_rate": 1.999999888202825e-05, "loss": 3.4936, "step": 9292 }, { "epoch": 0.12, "grad_norm": 5.407569885253906, "learning_rate": 1.9999998831788532e-05, "loss": 2.6459, "step": 9293 }, { "epoch": 0.12, "grad_norm": 6.464082717895508, "learning_rate": 1.9999998780444644e-05, "loss": 2.892, "step": 9294 }, { "epoch": 0.12, "grad_norm": 6.344151496887207, "learning_rate": 1.9999998727996588e-05, "loss": 2.7204, "step": 9295 }, { "epoch": 0.12, "grad_norm": 4.7687788009643555, "learning_rate": 1.9999998674444363e-05, "loss": 2.1166, "step": 9296 }, { "epoch": 0.12, "grad_norm": 5.976130485534668, "learning_rate": 1.999999861978797e-05, "loss": 2.4392, "step": 9297 }, { "epoch": 0.12, "grad_norm": 5.774656295776367, "learning_rate": 1.9999998564027404e-05, "loss": 2.1642, "step": 9298 }, { "epoch": 0.12, "grad_norm": 5.97593355178833, "learning_rate": 1.9999998507162667e-05, "loss": 2.395, "step": 9299 }, { "epoch": 0.12, "grad_norm": 6.290213108062744, "learning_rate": 1.9999998449193765e-05, "loss": 2.9505, "step": 9300 }, { "epoch": 0.12, "grad_norm": 6.732825756072998, "learning_rate": 1.999999839012069e-05, "loss": 2.8037, "step": 9301 }, { "epoch": 0.12, "grad_norm": 6.503515720367432, "learning_rate": 1.999999832994345e-05, "loss": 3.2559, "step": 9302 }, { "epoch": 0.12, "grad_norm": 4.422624111175537, "learning_rate": 1.9999998268662035e-05, "loss": 1.8399, "step": 9303 }, { "epoch": 0.12, "grad_norm": 5.652207851409912, "learning_rate": 1.9999998206276452e-05, "loss": 2.4466, "step": 9304 }, { "epoch": 0.12, "grad_norm": 6.618023872375488, "learning_rate": 1.99999981427867e-05, "loss": 2.4435, "step": 9305 }, { "epoch": 0.12, "grad_norm": 6.420572280883789, "learning_rate": 1.9999998078192782e-05, "loss": 2.191, "step": 9306 }, { "epoch": 0.12, "grad_norm": 5.768450736999512, "learning_rate": 1.999999801249469e-05, "loss": 2.6997, "step": 9307 }, { "epoch": 0.12, "grad_norm": 5.272779941558838, "learning_rate": 1.9999997945692435e-05, "loss": 2.0713, "step": 9308 }, { "epoch": 0.12, "grad_norm": 5.138142108917236, "learning_rate": 1.9999997877786003e-05, "loss": 1.9623, "step": 9309 }, { "epoch": 0.12, "grad_norm": 5.917236328125, "learning_rate": 1.9999997808775403e-05, "loss": 2.6564, "step": 9310 }, { "epoch": 0.12, "grad_norm": 6.519253253936768, "learning_rate": 1.9999997738660638e-05, "loss": 2.6103, "step": 9311 }, { "epoch": 0.12, "grad_norm": 6.194901943206787, "learning_rate": 1.9999997667441698e-05, "loss": 3.3395, "step": 9312 }, { "epoch": 0.12, "grad_norm": 5.565614700317383, "learning_rate": 1.9999997595118593e-05, "loss": 2.4064, "step": 9313 }, { "epoch": 0.12, "grad_norm": 5.447997093200684, "learning_rate": 1.999999752169132e-05, "loss": 1.7678, "step": 9314 }, { "epoch": 0.12, "grad_norm": 5.64526891708374, "learning_rate": 1.9999997447159874e-05, "loss": 2.4716, "step": 9315 }, { "epoch": 0.12, "grad_norm": 6.039468288421631, "learning_rate": 1.9999997371524256e-05, "loss": 2.7514, "step": 9316 }, { "epoch": 0.12, "grad_norm": 5.6954498291015625, "learning_rate": 1.9999997294784474e-05, "loss": 2.8448, "step": 9317 }, { "epoch": 0.12, "grad_norm": 5.993619918823242, "learning_rate": 1.999999721694052e-05, "loss": 2.7427, "step": 9318 }, { "epoch": 0.12, "grad_norm": 5.355303764343262, "learning_rate": 1.9999997137992397e-05, "loss": 2.3317, "step": 9319 }, { "epoch": 0.12, "grad_norm": 4.810409069061279, "learning_rate": 1.9999997057940106e-05, "loss": 1.8562, "step": 9320 }, { "epoch": 0.12, "grad_norm": 5.48574686050415, "learning_rate": 1.9999996976783644e-05, "loss": 2.3808, "step": 9321 }, { "epoch": 0.12, "grad_norm": 6.370235919952393, "learning_rate": 1.9999996894523016e-05, "loss": 2.6476, "step": 9322 }, { "epoch": 0.12, "grad_norm": 5.129692077636719, "learning_rate": 1.9999996811158216e-05, "loss": 2.1288, "step": 9323 }, { "epoch": 0.12, "grad_norm": 5.267079830169678, "learning_rate": 1.9999996726689244e-05, "loss": 2.1376, "step": 9324 }, { "epoch": 0.12, "grad_norm": 4.890445709228516, "learning_rate": 1.9999996641116108e-05, "loss": 2.2806, "step": 9325 }, { "epoch": 0.12, "grad_norm": 5.128025054931641, "learning_rate": 1.9999996554438796e-05, "loss": 2.4854, "step": 9326 }, { "epoch": 0.12, "grad_norm": 5.301689147949219, "learning_rate": 1.9999996466657323e-05, "loss": 2.2176, "step": 9327 }, { "epoch": 0.12, "grad_norm": 6.343381404876709, "learning_rate": 1.9999996377771674e-05, "loss": 2.2563, "step": 9328 }, { "epoch": 0.12, "grad_norm": 5.25252628326416, "learning_rate": 1.999999628778186e-05, "loss": 2.1435, "step": 9329 }, { "epoch": 0.12, "grad_norm": 5.58942985534668, "learning_rate": 1.9999996196687875e-05, "loss": 2.1312, "step": 9330 }, { "epoch": 0.12, "grad_norm": 5.306055545806885, "learning_rate": 1.9999996104489717e-05, "loss": 2.249, "step": 9331 }, { "epoch": 0.12, "grad_norm": 5.296668529510498, "learning_rate": 1.99999960111874e-05, "loss": 2.1473, "step": 9332 }, { "epoch": 0.12, "grad_norm": 6.072818756103516, "learning_rate": 1.9999995916780904e-05, "loss": 2.5106, "step": 9333 }, { "epoch": 0.12, "grad_norm": 5.152476787567139, "learning_rate": 1.999999582127024e-05, "loss": 2.5701, "step": 9334 }, { "epoch": 0.12, "grad_norm": 5.625085353851318, "learning_rate": 1.999999572465541e-05, "loss": 2.6523, "step": 9335 }, { "epoch": 0.12, "grad_norm": 6.107723712921143, "learning_rate": 1.999999562693641e-05, "loss": 2.6679, "step": 9336 }, { "epoch": 0.12, "grad_norm": 5.307980060577393, "learning_rate": 1.999999552811324e-05, "loss": 2.0867, "step": 9337 }, { "epoch": 0.12, "grad_norm": 6.153618335723877, "learning_rate": 1.99999954281859e-05, "loss": 2.6429, "step": 9338 }, { "epoch": 0.12, "grad_norm": 6.1956892013549805, "learning_rate": 1.9999995327154395e-05, "loss": 2.3445, "step": 9339 }, { "epoch": 0.12, "grad_norm": 6.118619918823242, "learning_rate": 1.999999522501872e-05, "loss": 2.7568, "step": 9340 }, { "epoch": 0.12, "grad_norm": 6.5118489265441895, "learning_rate": 1.9999995121778873e-05, "loss": 2.8088, "step": 9341 }, { "epoch": 0.12, "grad_norm": 6.245302200317383, "learning_rate": 1.9999995017434857e-05, "loss": 2.1264, "step": 9342 }, { "epoch": 0.12, "grad_norm": 5.980774879455566, "learning_rate": 1.9999994911986675e-05, "loss": 2.3541, "step": 9343 }, { "epoch": 0.12, "grad_norm": 6.2539496421813965, "learning_rate": 1.999999480543432e-05, "loss": 2.3992, "step": 9344 }, { "epoch": 0.12, "grad_norm": 5.341752052307129, "learning_rate": 1.99999946977778e-05, "loss": 1.99, "step": 9345 }, { "epoch": 0.12, "grad_norm": 6.15991735458374, "learning_rate": 1.9999994589017108e-05, "loss": 2.352, "step": 9346 }, { "epoch": 0.12, "grad_norm": 5.815503120422363, "learning_rate": 1.9999994479152246e-05, "loss": 2.3646, "step": 9347 }, { "epoch": 0.12, "grad_norm": 5.783111572265625, "learning_rate": 1.9999994368183215e-05, "loss": 2.2618, "step": 9348 }, { "epoch": 0.12, "grad_norm": 5.813600063323975, "learning_rate": 1.999999425611002e-05, "loss": 2.5128, "step": 9349 }, { "epoch": 0.12, "grad_norm": 6.189085960388184, "learning_rate": 1.999999414293265e-05, "loss": 2.4134, "step": 9350 }, { "epoch": 0.12, "grad_norm": 5.458600997924805, "learning_rate": 1.9999994028651115e-05, "loss": 2.0222, "step": 9351 }, { "epoch": 0.12, "grad_norm": 7.835294723510742, "learning_rate": 1.999999391326541e-05, "loss": 2.7592, "step": 9352 }, { "epoch": 0.12, "grad_norm": 5.970528602600098, "learning_rate": 1.9999993796775534e-05, "loss": 2.44, "step": 9353 }, { "epoch": 0.12, "grad_norm": 5.994283676147461, "learning_rate": 1.999999367918149e-05, "loss": 2.3473, "step": 9354 }, { "epoch": 0.12, "grad_norm": 5.407227039337158, "learning_rate": 1.9999993560483276e-05, "loss": 2.6866, "step": 9355 }, { "epoch": 0.12, "grad_norm": 7.06574010848999, "learning_rate": 1.9999993440680897e-05, "loss": 3.1058, "step": 9356 }, { "epoch": 0.12, "grad_norm": 5.571343421936035, "learning_rate": 1.9999993319774344e-05, "loss": 2.9055, "step": 9357 }, { "epoch": 0.12, "grad_norm": 6.377732753753662, "learning_rate": 1.999999319776363e-05, "loss": 2.4868, "step": 9358 }, { "epoch": 0.12, "grad_norm": 6.148699760437012, "learning_rate": 1.9999993074648742e-05, "loss": 2.4612, "step": 9359 }, { "epoch": 0.12, "grad_norm": 5.915914058685303, "learning_rate": 1.9999992950429683e-05, "loss": 2.6508, "step": 9360 }, { "epoch": 0.12, "grad_norm": 5.853206157684326, "learning_rate": 1.9999992825106456e-05, "loss": 2.3405, "step": 9361 }, { "epoch": 0.12, "grad_norm": 5.601466655731201, "learning_rate": 1.9999992698679063e-05, "loss": 2.4544, "step": 9362 }, { "epoch": 0.12, "grad_norm": 6.198080062866211, "learning_rate": 1.9999992571147496e-05, "loss": 2.5985, "step": 9363 }, { "epoch": 0.12, "grad_norm": 5.727558135986328, "learning_rate": 1.9999992442511767e-05, "loss": 2.6225, "step": 9364 }, { "epoch": 0.12, "grad_norm": 6.287287712097168, "learning_rate": 1.9999992312771862e-05, "loss": 1.9088, "step": 9365 }, { "epoch": 0.12, "grad_norm": 5.7022223472595215, "learning_rate": 1.9999992181927793e-05, "loss": 2.142, "step": 9366 }, { "epoch": 0.12, "grad_norm": 4.831046104431152, "learning_rate": 1.9999992049979555e-05, "loss": 2.1063, "step": 9367 }, { "epoch": 0.12, "grad_norm": 5.448161602020264, "learning_rate": 1.9999991916927145e-05, "loss": 2.5619, "step": 9368 }, { "epoch": 0.12, "grad_norm": 5.138493537902832, "learning_rate": 1.999999178277057e-05, "loss": 2.0364, "step": 9369 }, { "epoch": 0.12, "grad_norm": 6.265532970428467, "learning_rate": 1.9999991647509827e-05, "loss": 2.451, "step": 9370 }, { "epoch": 0.12, "grad_norm": 6.970465660095215, "learning_rate": 1.9999991511144912e-05, "loss": 3.4848, "step": 9371 }, { "epoch": 0.12, "grad_norm": 7.235257625579834, "learning_rate": 1.999999137367583e-05, "loss": 2.5079, "step": 9372 }, { "epoch": 0.12, "grad_norm": 6.372746467590332, "learning_rate": 1.9999991235102577e-05, "loss": 3.0197, "step": 9373 }, { "epoch": 0.12, "grad_norm": 5.688083648681641, "learning_rate": 1.999999109542516e-05, "loss": 2.6667, "step": 9374 }, { "epoch": 0.12, "grad_norm": 5.450468063354492, "learning_rate": 1.9999990954643568e-05, "loss": 2.3872, "step": 9375 }, { "epoch": 0.12, "grad_norm": 5.223294734954834, "learning_rate": 1.999999081275781e-05, "loss": 2.0759, "step": 9376 }, { "epoch": 0.12, "grad_norm": 5.773074150085449, "learning_rate": 1.9999990669767885e-05, "loss": 2.3252, "step": 9377 }, { "epoch": 0.12, "grad_norm": 5.937618732452393, "learning_rate": 1.999999052567379e-05, "loss": 2.7664, "step": 9378 }, { "epoch": 0.12, "grad_norm": 5.81212043762207, "learning_rate": 1.999999038047553e-05, "loss": 2.564, "step": 9379 }, { "epoch": 0.12, "grad_norm": 5.803635597229004, "learning_rate": 1.9999990234173098e-05, "loss": 2.3056, "step": 9380 }, { "epoch": 0.12, "grad_norm": 5.710065841674805, "learning_rate": 1.9999990086766495e-05, "loss": 1.9379, "step": 9381 }, { "epoch": 0.12, "grad_norm": 5.724199295043945, "learning_rate": 1.9999989938255727e-05, "loss": 2.5009, "step": 9382 }, { "epoch": 0.12, "grad_norm": 6.0106353759765625, "learning_rate": 1.999998978864079e-05, "loss": 2.781, "step": 9383 }, { "epoch": 0.12, "grad_norm": 5.246847152709961, "learning_rate": 1.9999989637921683e-05, "loss": 2.4793, "step": 9384 }, { "epoch": 0.12, "grad_norm": 6.072144508361816, "learning_rate": 1.999998948609841e-05, "loss": 2.8682, "step": 9385 }, { "epoch": 0.12, "grad_norm": 5.645201683044434, "learning_rate": 1.999998933317097e-05, "loss": 3.0794, "step": 9386 }, { "epoch": 0.12, "grad_norm": 6.3531317710876465, "learning_rate": 1.9999989179139355e-05, "loss": 2.7109, "step": 9387 }, { "epoch": 0.12, "grad_norm": 5.9895830154418945, "learning_rate": 1.9999989024003574e-05, "loss": 2.8672, "step": 9388 }, { "epoch": 0.12, "grad_norm": 5.662266254425049, "learning_rate": 1.999998886776363e-05, "loss": 2.4479, "step": 9389 }, { "epoch": 0.12, "grad_norm": 6.552511692047119, "learning_rate": 1.999998871041951e-05, "loss": 2.2731, "step": 9390 }, { "epoch": 0.12, "grad_norm": 5.540102005004883, "learning_rate": 1.9999988551971225e-05, "loss": 2.1484, "step": 9391 }, { "epoch": 0.12, "grad_norm": 5.299467086791992, "learning_rate": 1.9999988392418772e-05, "loss": 2.1079, "step": 9392 }, { "epoch": 0.12, "grad_norm": 6.3955912590026855, "learning_rate": 1.9999988231762152e-05, "loss": 2.7458, "step": 9393 }, { "epoch": 0.12, "grad_norm": 5.244866847991943, "learning_rate": 1.999998807000136e-05, "loss": 2.187, "step": 9394 }, { "epoch": 0.12, "grad_norm": 5.537261486053467, "learning_rate": 1.9999987907136402e-05, "loss": 2.2813, "step": 9395 }, { "epoch": 0.12, "grad_norm": 5.880566120147705, "learning_rate": 1.9999987743167276e-05, "loss": 2.7803, "step": 9396 }, { "epoch": 0.12, "grad_norm": 6.040311336517334, "learning_rate": 1.999998757809398e-05, "loss": 2.6559, "step": 9397 }, { "epoch": 0.12, "grad_norm": 5.368263244628906, "learning_rate": 1.9999987411916516e-05, "loss": 2.0882, "step": 9398 }, { "epoch": 0.12, "grad_norm": 6.126481533050537, "learning_rate": 1.9999987244634885e-05, "loss": 3.0138, "step": 9399 }, { "epoch": 0.12, "grad_norm": 5.387876510620117, "learning_rate": 1.9999987076249082e-05, "loss": 1.9413, "step": 9400 }, { "epoch": 0.12, "grad_norm": 4.973618030548096, "learning_rate": 1.9999986906759114e-05, "loss": 2.2413, "step": 9401 }, { "epoch": 0.12, "grad_norm": 5.642082214355469, "learning_rate": 1.999998673616498e-05, "loss": 2.318, "step": 9402 }, { "epoch": 0.12, "grad_norm": 5.862476348876953, "learning_rate": 1.9999986564466676e-05, "loss": 2.2207, "step": 9403 }, { "epoch": 0.12, "grad_norm": 6.186092853546143, "learning_rate": 1.9999986391664203e-05, "loss": 2.7865, "step": 9404 }, { "epoch": 0.12, "grad_norm": 5.25841760635376, "learning_rate": 1.999998621775756e-05, "loss": 2.3846, "step": 9405 }, { "epoch": 0.12, "grad_norm": 5.926302909851074, "learning_rate": 1.999998604274675e-05, "loss": 2.3726, "step": 9406 }, { "epoch": 0.12, "grad_norm": 6.157643795013428, "learning_rate": 1.9999985866631776e-05, "loss": 2.886, "step": 9407 }, { "epoch": 0.12, "grad_norm": 5.88646936416626, "learning_rate": 1.999998568941263e-05, "loss": 2.409, "step": 9408 }, { "epoch": 0.12, "grad_norm": 5.883012771606445, "learning_rate": 1.9999985511089317e-05, "loss": 2.4744, "step": 9409 }, { "epoch": 0.12, "grad_norm": 5.604555130004883, "learning_rate": 1.9999985331661836e-05, "loss": 2.8612, "step": 9410 }, { "epoch": 0.12, "grad_norm": 6.303854942321777, "learning_rate": 1.9999985151130187e-05, "loss": 2.5683, "step": 9411 }, { "epoch": 0.12, "grad_norm": 5.521998882293701, "learning_rate": 1.999998496949437e-05, "loss": 2.5074, "step": 9412 }, { "epoch": 0.12, "grad_norm": 6.298323154449463, "learning_rate": 1.9999984786754384e-05, "loss": 2.1495, "step": 9413 }, { "epoch": 0.12, "grad_norm": 6.647917747497559, "learning_rate": 1.999998460291023e-05, "loss": 2.7554, "step": 9414 }, { "epoch": 0.12, "grad_norm": 5.445492267608643, "learning_rate": 1.9999984417961907e-05, "loss": 2.4435, "step": 9415 }, { "epoch": 0.12, "grad_norm": 5.255932807922363, "learning_rate": 1.9999984231909416e-05, "loss": 2.2525, "step": 9416 }, { "epoch": 0.12, "grad_norm": 5.839693069458008, "learning_rate": 1.9999984044752763e-05, "loss": 2.5172, "step": 9417 }, { "epoch": 0.12, "grad_norm": 5.308437824249268, "learning_rate": 1.999998385649194e-05, "loss": 2.5001, "step": 9418 }, { "epoch": 0.12, "grad_norm": 5.254206657409668, "learning_rate": 1.9999983667126942e-05, "loss": 2.2901, "step": 9419 }, { "epoch": 0.12, "grad_norm": 5.463030815124512, "learning_rate": 1.9999983476657784e-05, "loss": 2.7427, "step": 9420 }, { "epoch": 0.12, "grad_norm": 5.9812774658203125, "learning_rate": 1.9999983285084454e-05, "loss": 2.6908, "step": 9421 }, { "epoch": 0.12, "grad_norm": 4.977238655090332, "learning_rate": 1.999998309240696e-05, "loss": 1.8497, "step": 9422 }, { "epoch": 0.12, "grad_norm": 6.631804943084717, "learning_rate": 1.9999982898625293e-05, "loss": 2.6816, "step": 9423 }, { "epoch": 0.12, "grad_norm": 7.080934047698975, "learning_rate": 1.999998270373946e-05, "loss": 2.6454, "step": 9424 }, { "epoch": 0.12, "grad_norm": 6.949439525604248, "learning_rate": 1.999998250774946e-05, "loss": 3.0757, "step": 9425 }, { "epoch": 0.12, "grad_norm": 6.083422660827637, "learning_rate": 1.9999982310655296e-05, "loss": 2.1469, "step": 9426 }, { "epoch": 0.12, "grad_norm": 7.252900123596191, "learning_rate": 1.999998211245696e-05, "loss": 2.815, "step": 9427 }, { "epoch": 0.12, "grad_norm": 5.433562755584717, "learning_rate": 1.9999981913154456e-05, "loss": 2.4255, "step": 9428 }, { "epoch": 0.12, "grad_norm": 6.178565979003906, "learning_rate": 1.9999981712747786e-05, "loss": 3.291, "step": 9429 }, { "epoch": 0.12, "grad_norm": 5.2900710105896, "learning_rate": 1.9999981511236947e-05, "loss": 2.488, "step": 9430 }, { "epoch": 0.12, "grad_norm": 5.194973468780518, "learning_rate": 1.9999981308621943e-05, "loss": 2.0059, "step": 9431 }, { "epoch": 0.12, "grad_norm": 5.559176921844482, "learning_rate": 1.9999981104902767e-05, "loss": 2.303, "step": 9432 }, { "epoch": 0.12, "grad_norm": 5.749264717102051, "learning_rate": 1.9999980900079426e-05, "loss": 2.4344, "step": 9433 }, { "epoch": 0.12, "grad_norm": 5.90114164352417, "learning_rate": 1.999998069415192e-05, "loss": 2.7926, "step": 9434 }, { "epoch": 0.12, "grad_norm": 4.937808990478516, "learning_rate": 1.9999980487120242e-05, "loss": 1.9295, "step": 9435 }, { "epoch": 0.12, "grad_norm": 6.084866046905518, "learning_rate": 1.99999802789844e-05, "loss": 2.2233, "step": 9436 }, { "epoch": 0.12, "grad_norm": 5.751319885253906, "learning_rate": 1.9999980069744388e-05, "loss": 2.9735, "step": 9437 }, { "epoch": 0.12, "grad_norm": 6.310868740081787, "learning_rate": 1.999997985940021e-05, "loss": 2.5032, "step": 9438 }, { "epoch": 0.12, "grad_norm": 5.5361247062683105, "learning_rate": 1.9999979647951867e-05, "loss": 2.7623, "step": 9439 }, { "epoch": 0.12, "grad_norm": 5.319387435913086, "learning_rate": 1.9999979435399354e-05, "loss": 2.9261, "step": 9440 }, { "epoch": 0.12, "grad_norm": 5.656595230102539, "learning_rate": 1.9999979221742672e-05, "loss": 2.4605, "step": 9441 }, { "epoch": 0.12, "grad_norm": 5.733743190765381, "learning_rate": 1.9999979006981826e-05, "loss": 1.8027, "step": 9442 }, { "epoch": 0.12, "grad_norm": 5.057083606719971, "learning_rate": 1.9999978791116807e-05, "loss": 2.1488, "step": 9443 }, { "epoch": 0.12, "grad_norm": 5.50647497177124, "learning_rate": 1.9999978574147624e-05, "loss": 2.5595, "step": 9444 }, { "epoch": 0.12, "grad_norm": 5.408568382263184, "learning_rate": 1.9999978356074275e-05, "loss": 2.0669, "step": 9445 }, { "epoch": 0.12, "grad_norm": 5.902955532073975, "learning_rate": 1.9999978136896758e-05, "loss": 2.6154, "step": 9446 }, { "epoch": 0.12, "grad_norm": 5.827919006347656, "learning_rate": 1.9999977916615073e-05, "loss": 2.6382, "step": 9447 }, { "epoch": 0.12, "grad_norm": 6.079471111297607, "learning_rate": 1.9999977695229222e-05, "loss": 2.8749, "step": 9448 }, { "epoch": 0.12, "grad_norm": 5.999644756317139, "learning_rate": 1.9999977472739203e-05, "loss": 2.5489, "step": 9449 }, { "epoch": 0.12, "grad_norm": 6.014369964599609, "learning_rate": 1.9999977249145016e-05, "loss": 2.6685, "step": 9450 }, { "epoch": 0.12, "grad_norm": 5.034068584442139, "learning_rate": 1.9999977024446664e-05, "loss": 2.1401, "step": 9451 }, { "epoch": 0.12, "grad_norm": 6.617315292358398, "learning_rate": 1.9999976798644143e-05, "loss": 3.0892, "step": 9452 }, { "epoch": 0.12, "grad_norm": 5.919337749481201, "learning_rate": 1.9999976571737457e-05, "loss": 2.2388, "step": 9453 }, { "epoch": 0.12, "grad_norm": 6.483886241912842, "learning_rate": 1.9999976343726603e-05, "loss": 2.6348, "step": 9454 }, { "epoch": 0.12, "grad_norm": 6.519753456115723, "learning_rate": 1.999997611461158e-05, "loss": 2.4982, "step": 9455 }, { "epoch": 0.12, "grad_norm": 5.769966125488281, "learning_rate": 1.9999975884392392e-05, "loss": 2.7906, "step": 9456 }, { "epoch": 0.12, "grad_norm": 6.500121593475342, "learning_rate": 1.9999975653069036e-05, "loss": 3.1128, "step": 9457 }, { "epoch": 0.12, "grad_norm": 5.410768985748291, "learning_rate": 1.9999975420641515e-05, "loss": 2.5842, "step": 9458 }, { "epoch": 0.12, "grad_norm": 4.772837162017822, "learning_rate": 1.9999975187109825e-05, "loss": 1.8105, "step": 9459 }, { "epoch": 0.12, "grad_norm": 5.448964595794678, "learning_rate": 1.999997495247397e-05, "loss": 2.2973, "step": 9460 }, { "epoch": 0.12, "grad_norm": 6.429152965545654, "learning_rate": 1.9999974716733947e-05, "loss": 2.4945, "step": 9461 }, { "epoch": 0.12, "grad_norm": 6.107858657836914, "learning_rate": 1.9999974479889756e-05, "loss": 3.0485, "step": 9462 }, { "epoch": 0.12, "grad_norm": 5.1339311599731445, "learning_rate": 1.99999742419414e-05, "loss": 2.409, "step": 9463 }, { "epoch": 0.12, "grad_norm": 5.815486431121826, "learning_rate": 1.9999974002888874e-05, "loss": 2.5949, "step": 9464 }, { "epoch": 0.12, "grad_norm": 5.560460567474365, "learning_rate": 1.9999973762732184e-05, "loss": 2.8089, "step": 9465 }, { "epoch": 0.12, "grad_norm": 5.599948406219482, "learning_rate": 1.999997352147133e-05, "loss": 2.314, "step": 9466 }, { "epoch": 0.12, "grad_norm": 5.98983097076416, "learning_rate": 1.9999973279106305e-05, "loss": 2.9239, "step": 9467 }, { "epoch": 0.12, "grad_norm": 6.234588623046875, "learning_rate": 1.9999973035637113e-05, "loss": 2.6287, "step": 9468 }, { "epoch": 0.12, "grad_norm": 5.374390125274658, "learning_rate": 1.9999972791063756e-05, "loss": 2.6323, "step": 9469 }, { "epoch": 0.12, "grad_norm": 5.433921813964844, "learning_rate": 1.9999972545386234e-05, "loss": 2.3921, "step": 9470 }, { "epoch": 0.12, "grad_norm": 5.752783298492432, "learning_rate": 1.9999972298604544e-05, "loss": 2.719, "step": 9471 }, { "epoch": 0.12, "grad_norm": 6.125173568725586, "learning_rate": 1.9999972050718685e-05, "loss": 2.3318, "step": 9472 }, { "epoch": 0.12, "grad_norm": 6.730288982391357, "learning_rate": 1.999997180172866e-05, "loss": 2.8428, "step": 9473 }, { "epoch": 0.12, "grad_norm": 5.653431415557861, "learning_rate": 1.999997155163447e-05, "loss": 2.5715, "step": 9474 }, { "epoch": 0.12, "grad_norm": 5.93103551864624, "learning_rate": 1.9999971300436114e-05, "loss": 2.1855, "step": 9475 }, { "epoch": 0.12, "grad_norm": 6.387020587921143, "learning_rate": 1.999997104813359e-05, "loss": 2.3293, "step": 9476 }, { "epoch": 0.12, "grad_norm": 5.291046619415283, "learning_rate": 1.99999707947269e-05, "loss": 2.521, "step": 9477 }, { "epoch": 0.12, "grad_norm": 5.836363792419434, "learning_rate": 1.9999970540216044e-05, "loss": 1.905, "step": 9478 }, { "epoch": 0.12, "grad_norm": 5.964914321899414, "learning_rate": 1.9999970284601023e-05, "loss": 2.0527, "step": 9479 }, { "epoch": 0.12, "grad_norm": 6.046021938323975, "learning_rate": 1.999997002788183e-05, "loss": 2.6324, "step": 9480 }, { "epoch": 0.12, "grad_norm": 5.391945838928223, "learning_rate": 1.999996977005848e-05, "loss": 2.3869, "step": 9481 }, { "epoch": 0.12, "grad_norm": 5.831295013427734, "learning_rate": 1.9999969511130956e-05, "loss": 2.6911, "step": 9482 }, { "epoch": 0.12, "grad_norm": 5.981383800506592, "learning_rate": 1.999996925109927e-05, "loss": 2.7864, "step": 9483 }, { "epoch": 0.12, "grad_norm": 5.392453670501709, "learning_rate": 1.9999968989963415e-05, "loss": 2.1894, "step": 9484 }, { "epoch": 0.12, "grad_norm": 4.838772296905518, "learning_rate": 1.9999968727723394e-05, "loss": 2.3319, "step": 9485 }, { "epoch": 0.12, "grad_norm": 7.398254871368408, "learning_rate": 1.9999968464379207e-05, "loss": 3.0513, "step": 9486 }, { "epoch": 0.12, "grad_norm": 5.374623775482178, "learning_rate": 1.9999968199930856e-05, "loss": 2.1701, "step": 9487 }, { "epoch": 0.12, "grad_norm": 5.107382774353027, "learning_rate": 1.9999967934378336e-05, "loss": 2.5927, "step": 9488 }, { "epoch": 0.12, "grad_norm": 5.203587055206299, "learning_rate": 1.999996766772165e-05, "loss": 2.139, "step": 9489 }, { "epoch": 0.12, "grad_norm": 4.842303276062012, "learning_rate": 1.99999673999608e-05, "loss": 2.1837, "step": 9490 }, { "epoch": 0.12, "grad_norm": 4.9690022468566895, "learning_rate": 1.9999967131095782e-05, "loss": 1.801, "step": 9491 }, { "epoch": 0.12, "grad_norm": 6.00001859664917, "learning_rate": 1.99999668611266e-05, "loss": 2.4663, "step": 9492 }, { "epoch": 0.12, "grad_norm": 5.3616943359375, "learning_rate": 1.999996659005325e-05, "loss": 2.2451, "step": 9493 }, { "epoch": 0.12, "grad_norm": 5.745017051696777, "learning_rate": 1.9999966317875736e-05, "loss": 2.8162, "step": 9494 }, { "epoch": 0.12, "grad_norm": 5.65357780456543, "learning_rate": 1.9999966044594054e-05, "loss": 2.301, "step": 9495 }, { "epoch": 0.12, "grad_norm": 5.1518096923828125, "learning_rate": 1.999996577020821e-05, "loss": 1.9056, "step": 9496 }, { "epoch": 0.12, "grad_norm": 6.3934221267700195, "learning_rate": 1.9999965494718198e-05, "loss": 2.5381, "step": 9497 }, { "epoch": 0.12, "grad_norm": 5.507431983947754, "learning_rate": 1.9999965218124017e-05, "loss": 2.2543, "step": 9498 }, { "epoch": 0.12, "grad_norm": 5.127645492553711, "learning_rate": 1.9999964940425675e-05, "loss": 2.5475, "step": 9499 }, { "epoch": 0.12, "grad_norm": 5.703267574310303, "learning_rate": 1.9999964661623164e-05, "loss": 2.2412, "step": 9500 }, { "epoch": 0.12, "grad_norm": 5.66806697845459, "learning_rate": 1.999996438171649e-05, "loss": 2.3809, "step": 9501 }, { "epoch": 0.12, "grad_norm": 5.217545509338379, "learning_rate": 1.9999964100705644e-05, "loss": 2.2217, "step": 9502 }, { "epoch": 0.12, "grad_norm": 5.219440937042236, "learning_rate": 1.999996381859064e-05, "loss": 2.5182, "step": 9503 }, { "epoch": 0.12, "grad_norm": 5.838109016418457, "learning_rate": 1.9999963535371464e-05, "loss": 2.2601, "step": 9504 }, { "epoch": 0.12, "grad_norm": 5.083373546600342, "learning_rate": 1.9999963251048125e-05, "loss": 2.0838, "step": 9505 }, { "epoch": 0.12, "grad_norm": 5.195124626159668, "learning_rate": 1.999996296562062e-05, "loss": 2.5174, "step": 9506 }, { "epoch": 0.12, "grad_norm": 5.529825687408447, "learning_rate": 1.999996267908895e-05, "loss": 2.4521, "step": 9507 }, { "epoch": 0.12, "grad_norm": 4.508051872253418, "learning_rate": 1.9999962391453116e-05, "loss": 2.3139, "step": 9508 }, { "epoch": 0.12, "grad_norm": 4.714776992797852, "learning_rate": 1.9999962102713114e-05, "loss": 2.2783, "step": 9509 }, { "epoch": 0.12, "grad_norm": 6.066493511199951, "learning_rate": 1.999996181286895e-05, "loss": 2.7899, "step": 9510 }, { "epoch": 0.12, "grad_norm": 5.439560890197754, "learning_rate": 1.9999961521920616e-05, "loss": 2.5625, "step": 9511 }, { "epoch": 0.12, "grad_norm": 5.986692428588867, "learning_rate": 1.9999961229868118e-05, "loss": 2.6592, "step": 9512 }, { "epoch": 0.12, "grad_norm": 5.374180793762207, "learning_rate": 1.9999960936711458e-05, "loss": 2.3327, "step": 9513 }, { "epoch": 0.12, "grad_norm": 6.029965400695801, "learning_rate": 1.999996064245063e-05, "loss": 2.2858, "step": 9514 }, { "epoch": 0.12, "grad_norm": 5.099582672119141, "learning_rate": 1.9999960347085637e-05, "loss": 2.2101, "step": 9515 }, { "epoch": 0.12, "grad_norm": 5.938945770263672, "learning_rate": 1.999996005061648e-05, "loss": 2.2058, "step": 9516 }, { "epoch": 0.12, "grad_norm": 6.077001571655273, "learning_rate": 1.9999959753043155e-05, "loss": 2.6772, "step": 9517 }, { "epoch": 0.12, "grad_norm": 7.171144485473633, "learning_rate": 1.9999959454365664e-05, "loss": 2.8389, "step": 9518 }, { "epoch": 0.12, "grad_norm": 6.468217372894287, "learning_rate": 1.999995915458401e-05, "loss": 3.0498, "step": 9519 }, { "epoch": 0.12, "grad_norm": 5.562965393066406, "learning_rate": 1.9999958853698192e-05, "loss": 2.2608, "step": 9520 }, { "epoch": 0.12, "grad_norm": 5.426872730255127, "learning_rate": 1.999995855170821e-05, "loss": 2.1883, "step": 9521 }, { "epoch": 0.12, "grad_norm": 7.080284595489502, "learning_rate": 1.9999958248614057e-05, "loss": 2.6142, "step": 9522 }, { "epoch": 0.12, "grad_norm": 6.898589611053467, "learning_rate": 1.9999957944415743e-05, "loss": 2.4278, "step": 9523 }, { "epoch": 0.12, "grad_norm": 6.844054222106934, "learning_rate": 1.9999957639113265e-05, "loss": 2.6349, "step": 9524 }, { "epoch": 0.12, "grad_norm": 6.164281368255615, "learning_rate": 1.9999957332706618e-05, "loss": 2.4171, "step": 9525 }, { "epoch": 0.12, "grad_norm": 6.299881458282471, "learning_rate": 1.9999957025195812e-05, "loss": 2.5223, "step": 9526 }, { "epoch": 0.12, "grad_norm": 5.822054862976074, "learning_rate": 1.999995671658084e-05, "loss": 2.3239, "step": 9527 }, { "epoch": 0.12, "grad_norm": 5.346337795257568, "learning_rate": 1.9999956406861696e-05, "loss": 2.5505, "step": 9528 }, { "epoch": 0.12, "grad_norm": 5.804269790649414, "learning_rate": 1.9999956096038393e-05, "loss": 2.9073, "step": 9529 }, { "epoch": 0.12, "grad_norm": 4.776869297027588, "learning_rate": 1.9999955784110924e-05, "loss": 2.1128, "step": 9530 }, { "epoch": 0.12, "grad_norm": 5.508131980895996, "learning_rate": 1.999995547107929e-05, "loss": 2.6875, "step": 9531 }, { "epoch": 0.12, "grad_norm": 6.057164192199707, "learning_rate": 1.9999955156943494e-05, "loss": 2.5965, "step": 9532 }, { "epoch": 0.12, "grad_norm": 5.644495010375977, "learning_rate": 1.999995484170353e-05, "loss": 2.5551, "step": 9533 }, { "epoch": 0.12, "grad_norm": 5.077588081359863, "learning_rate": 1.99999545253594e-05, "loss": 1.876, "step": 9534 }, { "epoch": 0.12, "grad_norm": 5.408042907714844, "learning_rate": 1.999995420791111e-05, "loss": 2.4282, "step": 9535 }, { "epoch": 0.12, "grad_norm": 5.918310642242432, "learning_rate": 1.999995388935865e-05, "loss": 2.5801, "step": 9536 }, { "epoch": 0.12, "grad_norm": 6.7859206199646, "learning_rate": 1.999995356970203e-05, "loss": 2.1578, "step": 9537 }, { "epoch": 0.12, "grad_norm": 6.013772487640381, "learning_rate": 1.9999953248941245e-05, "loss": 2.9764, "step": 9538 }, { "epoch": 0.12, "grad_norm": 6.0669426918029785, "learning_rate": 1.9999952927076297e-05, "loss": 3.0141, "step": 9539 }, { "epoch": 0.12, "grad_norm": 6.645596504211426, "learning_rate": 1.999995260410718e-05, "loss": 2.6226, "step": 9540 }, { "epoch": 0.12, "grad_norm": 6.406651496887207, "learning_rate": 1.99999522800339e-05, "loss": 2.6052, "step": 9541 }, { "epoch": 0.12, "grad_norm": 5.401613712310791, "learning_rate": 1.9999951954856458e-05, "loss": 2.2254, "step": 9542 }, { "epoch": 0.12, "grad_norm": 5.649628162384033, "learning_rate": 1.9999951628574847e-05, "loss": 2.2858, "step": 9543 }, { "epoch": 0.12, "grad_norm": 6.4766974449157715, "learning_rate": 1.9999951301189074e-05, "loss": 2.473, "step": 9544 }, { "epoch": 0.12, "grad_norm": 6.00382661819458, "learning_rate": 1.999995097269914e-05, "loss": 2.5878, "step": 9545 }, { "epoch": 0.12, "grad_norm": 5.89046573638916, "learning_rate": 1.999995064310504e-05, "loss": 2.8861, "step": 9546 }, { "epoch": 0.12, "grad_norm": 6.007609844207764, "learning_rate": 1.9999950312406776e-05, "loss": 2.5309, "step": 9547 }, { "epoch": 0.12, "grad_norm": 6.2877984046936035, "learning_rate": 1.9999949980604346e-05, "loss": 2.1382, "step": 9548 }, { "epoch": 0.12, "grad_norm": 5.6783952713012695, "learning_rate": 1.999994964769775e-05, "loss": 2.9831, "step": 9549 }, { "epoch": 0.12, "grad_norm": 5.712231636047363, "learning_rate": 1.9999949313686992e-05, "loss": 2.4674, "step": 9550 }, { "epoch": 0.12, "grad_norm": 6.9290452003479, "learning_rate": 1.999994897857207e-05, "loss": 2.5891, "step": 9551 }, { "epoch": 0.12, "grad_norm": 5.404115200042725, "learning_rate": 1.9999948642352988e-05, "loss": 2.5479, "step": 9552 }, { "epoch": 0.12, "grad_norm": 5.806311130523682, "learning_rate": 1.9999948305029737e-05, "loss": 2.6351, "step": 9553 }, { "epoch": 0.12, "grad_norm": 5.429923057556152, "learning_rate": 1.9999947966602324e-05, "loss": 2.493, "step": 9554 }, { "epoch": 0.12, "grad_norm": 6.574697494506836, "learning_rate": 1.9999947627070746e-05, "loss": 2.5342, "step": 9555 }, { "epoch": 0.12, "grad_norm": 5.856566905975342, "learning_rate": 1.9999947286435003e-05, "loss": 2.678, "step": 9556 }, { "epoch": 0.12, "grad_norm": 4.979762077331543, "learning_rate": 1.99999469446951e-05, "loss": 2.3967, "step": 9557 }, { "epoch": 0.12, "grad_norm": 5.9244561195373535, "learning_rate": 1.999994660185103e-05, "loss": 2.5822, "step": 9558 }, { "epoch": 0.12, "grad_norm": 5.463019847869873, "learning_rate": 1.9999946257902797e-05, "loss": 2.7403, "step": 9559 }, { "epoch": 0.12, "grad_norm": 5.734491348266602, "learning_rate": 1.9999945912850404e-05, "loss": 2.676, "step": 9560 }, { "epoch": 0.12, "grad_norm": 6.399394512176514, "learning_rate": 1.9999945566693842e-05, "loss": 2.2874, "step": 9561 }, { "epoch": 0.12, "grad_norm": 5.830671310424805, "learning_rate": 1.999994521943312e-05, "loss": 2.7725, "step": 9562 }, { "epoch": 0.12, "grad_norm": 6.766200065612793, "learning_rate": 1.999994487106823e-05, "loss": 2.8367, "step": 9563 }, { "epoch": 0.12, "grad_norm": 5.360352993011475, "learning_rate": 1.999994452159918e-05, "loss": 2.4767, "step": 9564 }, { "epoch": 0.12, "grad_norm": 5.7382402420043945, "learning_rate": 1.9999944171025966e-05, "loss": 2.6798, "step": 9565 }, { "epoch": 0.12, "grad_norm": 6.604632377624512, "learning_rate": 1.9999943819348586e-05, "loss": 2.7417, "step": 9566 }, { "epoch": 0.12, "grad_norm": 5.731563091278076, "learning_rate": 1.9999943466567044e-05, "loss": 2.6694, "step": 9567 }, { "epoch": 0.12, "grad_norm": 6.346935272216797, "learning_rate": 1.999994311268134e-05, "loss": 2.6231, "step": 9568 }, { "epoch": 0.12, "grad_norm": 5.540862083435059, "learning_rate": 1.9999942757691473e-05, "loss": 2.317, "step": 9569 }, { "epoch": 0.12, "grad_norm": 5.331707000732422, "learning_rate": 1.9999942401597443e-05, "loss": 2.5636, "step": 9570 }, { "epoch": 0.12, "grad_norm": 5.820589065551758, "learning_rate": 1.9999942044399247e-05, "loss": 2.8342, "step": 9571 }, { "epoch": 0.12, "grad_norm": 6.827341079711914, "learning_rate": 1.9999941686096887e-05, "loss": 2.843, "step": 9572 }, { "epoch": 0.12, "grad_norm": 5.453071117401123, "learning_rate": 1.999994132669037e-05, "loss": 2.3331, "step": 9573 }, { "epoch": 0.12, "grad_norm": 5.815117835998535, "learning_rate": 1.9999940966179682e-05, "loss": 3.026, "step": 9574 }, { "epoch": 0.12, "grad_norm": 5.448243618011475, "learning_rate": 1.9999940604564837e-05, "loss": 2.4243, "step": 9575 }, { "epoch": 0.12, "grad_norm": 5.580719947814941, "learning_rate": 1.9999940241845827e-05, "loss": 2.0293, "step": 9576 }, { "epoch": 0.12, "grad_norm": 5.0846452713012695, "learning_rate": 1.9999939878022652e-05, "loss": 2.2601, "step": 9577 }, { "epoch": 0.12, "grad_norm": 5.305818557739258, "learning_rate": 1.9999939513095315e-05, "loss": 2.1818, "step": 9578 }, { "epoch": 0.12, "grad_norm": 5.296003818511963, "learning_rate": 1.9999939147063813e-05, "loss": 2.9342, "step": 9579 }, { "epoch": 0.12, "grad_norm": 6.693587779998779, "learning_rate": 1.9999938779928153e-05, "loss": 3.0051, "step": 9580 }, { "epoch": 0.12, "grad_norm": 5.935116291046143, "learning_rate": 1.9999938411688328e-05, "loss": 2.3627, "step": 9581 }, { "epoch": 0.12, "grad_norm": 6.229569911956787, "learning_rate": 1.9999938042344338e-05, "loss": 2.7648, "step": 9582 }, { "epoch": 0.12, "grad_norm": 5.232210159301758, "learning_rate": 1.999993767189619e-05, "loss": 2.2601, "step": 9583 }, { "epoch": 0.12, "grad_norm": 5.289175987243652, "learning_rate": 1.9999937300343872e-05, "loss": 2.3344, "step": 9584 }, { "epoch": 0.12, "grad_norm": 5.611778259277344, "learning_rate": 1.9999936927687397e-05, "loss": 1.9526, "step": 9585 }, { "epoch": 0.12, "grad_norm": 6.133121490478516, "learning_rate": 1.9999936553926757e-05, "loss": 2.3596, "step": 9586 }, { "epoch": 0.12, "grad_norm": 5.904094219207764, "learning_rate": 1.9999936179061955e-05, "loss": 2.3832, "step": 9587 }, { "epoch": 0.12, "grad_norm": 5.04616117477417, "learning_rate": 1.999993580309299e-05, "loss": 1.9572, "step": 9588 }, { "epoch": 0.12, "grad_norm": 5.280906677246094, "learning_rate": 1.9999935426019866e-05, "loss": 2.3682, "step": 9589 }, { "epoch": 0.12, "grad_norm": 6.187143802642822, "learning_rate": 1.9999935047842576e-05, "loss": 3.3202, "step": 9590 }, { "epoch": 0.12, "grad_norm": 5.238595485687256, "learning_rate": 1.999993466856112e-05, "loss": 1.7407, "step": 9591 }, { "epoch": 0.12, "grad_norm": 5.0422444343566895, "learning_rate": 1.9999934288175507e-05, "loss": 2.1165, "step": 9592 }, { "epoch": 0.12, "grad_norm": 5.405186653137207, "learning_rate": 1.9999933906685732e-05, "loss": 2.9372, "step": 9593 }, { "epoch": 0.12, "grad_norm": 5.6646599769592285, "learning_rate": 1.9999933524091792e-05, "loss": 2.5608, "step": 9594 }, { "epoch": 0.12, "grad_norm": 6.184089183807373, "learning_rate": 1.9999933140393687e-05, "loss": 2.681, "step": 9595 }, { "epoch": 0.12, "grad_norm": 5.713804244995117, "learning_rate": 1.9999932755591426e-05, "loss": 2.5542, "step": 9596 }, { "epoch": 0.12, "grad_norm": 6.4227471351623535, "learning_rate": 1.9999932369685e-05, "loss": 2.6356, "step": 9597 }, { "epoch": 0.12, "grad_norm": 5.158885478973389, "learning_rate": 1.999993198267441e-05, "loss": 2.4178, "step": 9598 }, { "epoch": 0.12, "grad_norm": 4.569433212280273, "learning_rate": 1.999993159455966e-05, "loss": 2.2534, "step": 9599 }, { "epoch": 0.12, "grad_norm": 5.4829535484313965, "learning_rate": 1.999993120534075e-05, "loss": 2.5118, "step": 9600 }, { "epoch": 0.12, "grad_norm": 5.55564022064209, "learning_rate": 1.999993081501767e-05, "loss": 2.6299, "step": 9601 }, { "epoch": 0.12, "grad_norm": 6.203643798828125, "learning_rate": 1.9999930423590437e-05, "loss": 2.3893, "step": 9602 }, { "epoch": 0.12, "grad_norm": 5.54606294631958, "learning_rate": 1.9999930031059038e-05, "loss": 2.386, "step": 9603 }, { "epoch": 0.12, "grad_norm": 5.46633243560791, "learning_rate": 1.9999929637423478e-05, "loss": 2.4381, "step": 9604 }, { "epoch": 0.12, "grad_norm": 5.219058990478516, "learning_rate": 1.9999929242683756e-05, "loss": 2.6007, "step": 9605 }, { "epoch": 0.12, "grad_norm": 6.049563884735107, "learning_rate": 1.9999928846839873e-05, "loss": 2.3204, "step": 9606 }, { "epoch": 0.12, "grad_norm": 6.156798362731934, "learning_rate": 1.9999928449891824e-05, "loss": 2.5173, "step": 9607 }, { "epoch": 0.12, "grad_norm": 5.513121128082275, "learning_rate": 1.9999928051839617e-05, "loss": 2.2048, "step": 9608 }, { "epoch": 0.12, "grad_norm": 6.175306797027588, "learning_rate": 1.9999927652683245e-05, "loss": 2.8176, "step": 9609 }, { "epoch": 0.12, "grad_norm": 4.847548007965088, "learning_rate": 1.9999927252422715e-05, "loss": 2.414, "step": 9610 }, { "epoch": 0.12, "grad_norm": 6.092710018157959, "learning_rate": 1.9999926851058023e-05, "loss": 2.4308, "step": 9611 }, { "epoch": 0.12, "grad_norm": 5.454433917999268, "learning_rate": 1.999992644858917e-05, "loss": 2.3145, "step": 9612 }, { "epoch": 0.12, "grad_norm": 5.7956767082214355, "learning_rate": 1.9999926045016155e-05, "loss": 2.3562, "step": 9613 }, { "epoch": 0.12, "grad_norm": 5.23743200302124, "learning_rate": 1.9999925640338975e-05, "loss": 2.6616, "step": 9614 }, { "epoch": 0.12, "grad_norm": 5.70847225189209, "learning_rate": 1.9999925234557636e-05, "loss": 2.5839, "step": 9615 }, { "epoch": 0.12, "grad_norm": 5.462133884429932, "learning_rate": 1.9999924827672136e-05, "loss": 1.9829, "step": 9616 }, { "epoch": 0.12, "grad_norm": 5.757789134979248, "learning_rate": 1.9999924419682474e-05, "loss": 2.8959, "step": 9617 }, { "epoch": 0.12, "grad_norm": 6.309150218963623, "learning_rate": 1.999992401058865e-05, "loss": 2.5032, "step": 9618 }, { "epoch": 0.12, "grad_norm": 6.7624192237854, "learning_rate": 1.9999923600390666e-05, "loss": 2.308, "step": 9619 }, { "epoch": 0.12, "grad_norm": 5.62717866897583, "learning_rate": 1.9999923189088522e-05, "loss": 2.3378, "step": 9620 }, { "epoch": 0.12, "grad_norm": 4.589144229888916, "learning_rate": 1.9999922776682217e-05, "loss": 1.8359, "step": 9621 }, { "epoch": 0.12, "grad_norm": 6.103856086730957, "learning_rate": 1.9999922363171747e-05, "loss": 3.1721, "step": 9622 }, { "epoch": 0.12, "grad_norm": 5.69934606552124, "learning_rate": 1.999992194855712e-05, "loss": 2.2365, "step": 9623 }, { "epoch": 0.12, "grad_norm": 5.062212944030762, "learning_rate": 1.999992153283833e-05, "loss": 2.1781, "step": 9624 }, { "epoch": 0.12, "grad_norm": 5.8789591789245605, "learning_rate": 1.9999921116015378e-05, "loss": 2.9699, "step": 9625 }, { "epoch": 0.12, "grad_norm": 4.964593887329102, "learning_rate": 1.9999920698088268e-05, "loss": 1.7038, "step": 9626 }, { "epoch": 0.12, "grad_norm": 4.81646203994751, "learning_rate": 1.9999920279056996e-05, "loss": 2.3105, "step": 9627 }, { "epoch": 0.12, "grad_norm": 5.924152374267578, "learning_rate": 1.9999919858921563e-05, "loss": 3.0497, "step": 9628 }, { "epoch": 0.12, "grad_norm": 5.95221471786499, "learning_rate": 1.9999919437681968e-05, "loss": 2.943, "step": 9629 }, { "epoch": 0.12, "grad_norm": 5.641091346740723, "learning_rate": 1.999991901533821e-05, "loss": 2.221, "step": 9630 }, { "epoch": 0.12, "grad_norm": 5.851719379425049, "learning_rate": 1.9999918591890293e-05, "loss": 2.2934, "step": 9631 }, { "epoch": 0.13, "grad_norm": 5.831170082092285, "learning_rate": 1.999991816733822e-05, "loss": 2.4556, "step": 9632 }, { "epoch": 0.13, "grad_norm": 5.119227886199951, "learning_rate": 1.9999917741681982e-05, "loss": 2.4776, "step": 9633 }, { "epoch": 0.13, "grad_norm": 5.798698425292969, "learning_rate": 1.9999917314921582e-05, "loss": 2.7265, "step": 9634 }, { "epoch": 0.13, "grad_norm": 5.8268585205078125, "learning_rate": 1.9999916887057024e-05, "loss": 2.59, "step": 9635 }, { "epoch": 0.13, "grad_norm": 6.054327964782715, "learning_rate": 1.9999916458088308e-05, "loss": 2.2593, "step": 9636 }, { "epoch": 0.13, "grad_norm": 6.656147003173828, "learning_rate": 1.999991602801543e-05, "loss": 2.5377, "step": 9637 }, { "epoch": 0.13, "grad_norm": 4.7638840675354, "learning_rate": 1.999991559683839e-05, "loss": 1.8099, "step": 9638 }, { "epoch": 0.13, "grad_norm": 5.429349899291992, "learning_rate": 1.999991516455719e-05, "loss": 2.3193, "step": 9639 }, { "epoch": 0.13, "grad_norm": 4.974747180938721, "learning_rate": 1.999991473117183e-05, "loss": 1.9635, "step": 9640 }, { "epoch": 0.13, "grad_norm": 5.417895317077637, "learning_rate": 1.999991429668231e-05, "loss": 2.3478, "step": 9641 }, { "epoch": 0.13, "grad_norm": 5.51317024230957, "learning_rate": 1.999991386108863e-05, "loss": 2.5353, "step": 9642 }, { "epoch": 0.13, "grad_norm": 5.589086055755615, "learning_rate": 1.9999913424390788e-05, "loss": 2.3752, "step": 9643 }, { "epoch": 0.13, "grad_norm": 5.601330280303955, "learning_rate": 1.999991298658879e-05, "loss": 2.2098, "step": 9644 }, { "epoch": 0.13, "grad_norm": 5.989184379577637, "learning_rate": 1.9999912547682628e-05, "loss": 2.201, "step": 9645 }, { "epoch": 0.13, "grad_norm": 5.358433246612549, "learning_rate": 1.9999912107672305e-05, "loss": 2.6912, "step": 9646 }, { "epoch": 0.13, "grad_norm": 5.993339538574219, "learning_rate": 1.9999911666557824e-05, "loss": 3.0772, "step": 9647 }, { "epoch": 0.13, "grad_norm": 5.199431896209717, "learning_rate": 1.9999911224339185e-05, "loss": 2.3455, "step": 9648 }, { "epoch": 0.13, "grad_norm": 5.039295673370361, "learning_rate": 1.9999910781016384e-05, "loss": 2.1088, "step": 9649 }, { "epoch": 0.13, "grad_norm": 6.225580215454102, "learning_rate": 1.9999910336589425e-05, "loss": 2.4687, "step": 9650 }, { "epoch": 0.13, "grad_norm": 5.603896141052246, "learning_rate": 1.9999909891058304e-05, "loss": 2.7037, "step": 9651 }, { "epoch": 0.13, "grad_norm": 5.481950759887695, "learning_rate": 1.9999909444423025e-05, "loss": 2.2589, "step": 9652 }, { "epoch": 0.13, "grad_norm": 5.862218379974365, "learning_rate": 1.9999908996683584e-05, "loss": 2.5138, "step": 9653 }, { "epoch": 0.13, "grad_norm": 5.161261558532715, "learning_rate": 1.9999908547839985e-05, "loss": 2.2923, "step": 9654 }, { "epoch": 0.13, "grad_norm": 6.0661187171936035, "learning_rate": 1.9999908097892228e-05, "loss": 2.7863, "step": 9655 }, { "epoch": 0.13, "grad_norm": 5.309394836425781, "learning_rate": 1.999990764684031e-05, "loss": 2.5717, "step": 9656 }, { "epoch": 0.13, "grad_norm": 5.874429225921631, "learning_rate": 1.9999907194684232e-05, "loss": 2.6487, "step": 9657 }, { "epoch": 0.13, "grad_norm": 4.974059581756592, "learning_rate": 1.9999906741423996e-05, "loss": 2.7133, "step": 9658 }, { "epoch": 0.13, "grad_norm": 5.7218427658081055, "learning_rate": 1.99999062870596e-05, "loss": 1.9785, "step": 9659 }, { "epoch": 0.13, "grad_norm": 5.1507463455200195, "learning_rate": 1.9999905831591044e-05, "loss": 2.277, "step": 9660 }, { "epoch": 0.13, "grad_norm": 4.9115729331970215, "learning_rate": 1.999990537501833e-05, "loss": 1.9615, "step": 9661 }, { "epoch": 0.13, "grad_norm": 5.3577470779418945, "learning_rate": 1.9999904917341455e-05, "loss": 2.3007, "step": 9662 }, { "epoch": 0.13, "grad_norm": 5.194732666015625, "learning_rate": 1.999990445856042e-05, "loss": 2.1402, "step": 9663 }, { "epoch": 0.13, "grad_norm": 5.469214916229248, "learning_rate": 1.999990399867523e-05, "loss": 2.4045, "step": 9664 }, { "epoch": 0.13, "grad_norm": 5.252039432525635, "learning_rate": 1.999990353768588e-05, "loss": 2.162, "step": 9665 }, { "epoch": 0.13, "grad_norm": 5.651328086853027, "learning_rate": 1.9999903075592367e-05, "loss": 2.9129, "step": 9666 }, { "epoch": 0.13, "grad_norm": 6.34231424331665, "learning_rate": 1.9999902612394697e-05, "loss": 2.6685, "step": 9667 }, { "epoch": 0.13, "grad_norm": 5.412652492523193, "learning_rate": 1.999990214809287e-05, "loss": 2.5495, "step": 9668 }, { "epoch": 0.13, "grad_norm": 5.5218634605407715, "learning_rate": 1.9999901682686886e-05, "loss": 2.2181, "step": 9669 }, { "epoch": 0.13, "grad_norm": 4.874581336975098, "learning_rate": 1.9999901216176738e-05, "loss": 1.9286, "step": 9670 }, { "epoch": 0.13, "grad_norm": 5.157168865203857, "learning_rate": 1.9999900748562434e-05, "loss": 2.4109, "step": 9671 }, { "epoch": 0.13, "grad_norm": 5.777820587158203, "learning_rate": 1.999990027984397e-05, "loss": 2.5777, "step": 9672 }, { "epoch": 0.13, "grad_norm": 5.395297527313232, "learning_rate": 1.9999899810021347e-05, "loss": 2.3195, "step": 9673 }, { "epoch": 0.13, "grad_norm": 6.0803375244140625, "learning_rate": 1.999989933909457e-05, "loss": 2.5342, "step": 9674 }, { "epoch": 0.13, "grad_norm": 6.205013275146484, "learning_rate": 1.999989886706363e-05, "loss": 2.2738, "step": 9675 }, { "epoch": 0.13, "grad_norm": 5.570084571838379, "learning_rate": 1.999989839392853e-05, "loss": 2.3303, "step": 9676 }, { "epoch": 0.13, "grad_norm": 5.027329444885254, "learning_rate": 1.9999897919689275e-05, "loss": 2.2838, "step": 9677 }, { "epoch": 0.13, "grad_norm": 5.551204681396484, "learning_rate": 1.999989744434586e-05, "loss": 2.4387, "step": 9678 }, { "epoch": 0.13, "grad_norm": 5.230228424072266, "learning_rate": 1.9999896967898288e-05, "loss": 2.1133, "step": 9679 }, { "epoch": 0.13, "grad_norm": 5.267390251159668, "learning_rate": 1.9999896490346557e-05, "loss": 2.2288, "step": 9680 }, { "epoch": 0.13, "grad_norm": 6.111279010772705, "learning_rate": 1.9999896011690668e-05, "loss": 2.1596, "step": 9681 }, { "epoch": 0.13, "grad_norm": 5.603428363800049, "learning_rate": 1.999989553193062e-05, "loss": 2.1894, "step": 9682 }, { "epoch": 0.13, "grad_norm": 5.700031280517578, "learning_rate": 1.9999895051066415e-05, "loss": 2.3718, "step": 9683 }, { "epoch": 0.13, "grad_norm": 5.540928363800049, "learning_rate": 1.999989456909805e-05, "loss": 1.9889, "step": 9684 }, { "epoch": 0.13, "grad_norm": 5.859004974365234, "learning_rate": 1.9999894086025532e-05, "loss": 2.5175, "step": 9685 }, { "epoch": 0.13, "grad_norm": 7.036720275878906, "learning_rate": 1.999989360184885e-05, "loss": 3.0154, "step": 9686 }, { "epoch": 0.13, "grad_norm": 6.0407562255859375, "learning_rate": 1.9999893116568012e-05, "loss": 2.5861, "step": 9687 }, { "epoch": 0.13, "grad_norm": 5.866534233093262, "learning_rate": 1.999989263018302e-05, "loss": 2.7627, "step": 9688 }, { "epoch": 0.13, "grad_norm": 5.436172962188721, "learning_rate": 1.9999892142693867e-05, "loss": 2.5711, "step": 9689 }, { "epoch": 0.13, "grad_norm": 5.819499492645264, "learning_rate": 1.9999891654100553e-05, "loss": 2.3658, "step": 9690 }, { "epoch": 0.13, "grad_norm": 6.898200511932373, "learning_rate": 1.9999891164403085e-05, "loss": 3.7307, "step": 9691 }, { "epoch": 0.13, "grad_norm": 5.900123596191406, "learning_rate": 1.9999890673601458e-05, "loss": 2.5313, "step": 9692 }, { "epoch": 0.13, "grad_norm": 5.577465534210205, "learning_rate": 1.9999890181695676e-05, "loss": 2.2891, "step": 9693 }, { "epoch": 0.13, "grad_norm": 6.419095516204834, "learning_rate": 1.9999889688685736e-05, "loss": 2.699, "step": 9694 }, { "epoch": 0.13, "grad_norm": 5.081936359405518, "learning_rate": 1.9999889194571635e-05, "loss": 1.958, "step": 9695 }, { "epoch": 0.13, "grad_norm": 5.560680389404297, "learning_rate": 1.999988869935338e-05, "loss": 2.6698, "step": 9696 }, { "epoch": 0.13, "grad_norm": 6.486210346221924, "learning_rate": 1.9999888203030967e-05, "loss": 2.7865, "step": 9697 }, { "epoch": 0.13, "grad_norm": 4.6901445388793945, "learning_rate": 1.9999887705604397e-05, "loss": 2.0736, "step": 9698 }, { "epoch": 0.13, "grad_norm": 5.941183567047119, "learning_rate": 1.999988720707367e-05, "loss": 2.7549, "step": 9699 }, { "epoch": 0.13, "grad_norm": 4.976129531860352, "learning_rate": 1.9999886707438783e-05, "loss": 1.6901, "step": 9700 }, { "epoch": 0.13, "grad_norm": 5.525296688079834, "learning_rate": 1.999988620669974e-05, "loss": 2.3251, "step": 9701 }, { "epoch": 0.13, "grad_norm": 6.066704750061035, "learning_rate": 1.999988570485654e-05, "loss": 2.8629, "step": 9702 }, { "epoch": 0.13, "grad_norm": 6.0453715324401855, "learning_rate": 1.9999885201909185e-05, "loss": 2.7888, "step": 9703 }, { "epoch": 0.13, "grad_norm": 5.677940368652344, "learning_rate": 1.999988469785767e-05, "loss": 2.4224, "step": 9704 }, { "epoch": 0.13, "grad_norm": 4.965030670166016, "learning_rate": 1.9999884192702002e-05, "loss": 2.5083, "step": 9705 }, { "epoch": 0.13, "grad_norm": 5.9551873207092285, "learning_rate": 1.9999883686442173e-05, "loss": 2.9165, "step": 9706 }, { "epoch": 0.13, "grad_norm": 5.559757709503174, "learning_rate": 1.999988317907819e-05, "loss": 2.6028, "step": 9707 }, { "epoch": 0.13, "grad_norm": 5.600219249725342, "learning_rate": 1.9999882670610047e-05, "loss": 2.4094, "step": 9708 }, { "epoch": 0.13, "grad_norm": 6.070824146270752, "learning_rate": 1.999988216103775e-05, "loss": 2.8288, "step": 9709 }, { "epoch": 0.13, "grad_norm": 5.94686222076416, "learning_rate": 1.99998816503613e-05, "loss": 2.9617, "step": 9710 }, { "epoch": 0.13, "grad_norm": 4.786436557769775, "learning_rate": 1.9999881138580685e-05, "loss": 2.0896, "step": 9711 }, { "epoch": 0.13, "grad_norm": 5.237726211547852, "learning_rate": 1.999988062569592e-05, "loss": 2.5226, "step": 9712 }, { "epoch": 0.13, "grad_norm": 4.874541282653809, "learning_rate": 1.9999880111706996e-05, "loss": 1.7825, "step": 9713 }, { "epoch": 0.13, "grad_norm": 5.148716926574707, "learning_rate": 1.9999879596613915e-05, "loss": 2.3569, "step": 9714 }, { "epoch": 0.13, "grad_norm": 5.195078372955322, "learning_rate": 1.999987908041668e-05, "loss": 2.5281, "step": 9715 }, { "epoch": 0.13, "grad_norm": 6.1444597244262695, "learning_rate": 1.9999878563115284e-05, "loss": 3.3305, "step": 9716 }, { "epoch": 0.13, "grad_norm": 6.697595596313477, "learning_rate": 1.9999878044709734e-05, "loss": 2.6746, "step": 9717 }, { "epoch": 0.13, "grad_norm": 5.796417236328125, "learning_rate": 1.999987752520003e-05, "loss": 2.2596, "step": 9718 }, { "epoch": 0.13, "grad_norm": 5.644107341766357, "learning_rate": 1.999987700458617e-05, "loss": 2.4237, "step": 9719 }, { "epoch": 0.13, "grad_norm": 5.217671871185303, "learning_rate": 1.9999876482868153e-05, "loss": 2.4278, "step": 9720 }, { "epoch": 0.13, "grad_norm": 5.599127769470215, "learning_rate": 1.9999875960045977e-05, "loss": 2.4157, "step": 9721 }, { "epoch": 0.13, "grad_norm": 5.945165157318115, "learning_rate": 1.999987543611965e-05, "loss": 2.2079, "step": 9722 }, { "epoch": 0.13, "grad_norm": 5.767475128173828, "learning_rate": 1.9999874911089164e-05, "loss": 2.3977, "step": 9723 }, { "epoch": 0.13, "grad_norm": 5.783619403839111, "learning_rate": 1.9999874384954523e-05, "loss": 2.512, "step": 9724 }, { "epoch": 0.13, "grad_norm": 5.542359828948975, "learning_rate": 1.9999873857715724e-05, "loss": 2.9658, "step": 9725 }, { "epoch": 0.13, "grad_norm": 6.433932781219482, "learning_rate": 1.9999873329372774e-05, "loss": 2.988, "step": 9726 }, { "epoch": 0.13, "grad_norm": 6.4565205574035645, "learning_rate": 1.9999872799925665e-05, "loss": 2.7407, "step": 9727 }, { "epoch": 0.13, "grad_norm": 5.140969753265381, "learning_rate": 1.9999872269374402e-05, "loss": 2.3655, "step": 9728 }, { "epoch": 0.13, "grad_norm": 6.136523246765137, "learning_rate": 1.9999871737718984e-05, "loss": 2.4448, "step": 9729 }, { "epoch": 0.13, "grad_norm": 5.317419528961182, "learning_rate": 1.9999871204959407e-05, "loss": 2.6779, "step": 9730 }, { "epoch": 0.13, "grad_norm": 6.194451332092285, "learning_rate": 1.999987067109568e-05, "loss": 2.9268, "step": 9731 }, { "epoch": 0.13, "grad_norm": 5.626246452331543, "learning_rate": 1.999987013612779e-05, "loss": 2.7415, "step": 9732 }, { "epoch": 0.13, "grad_norm": 5.362029075622559, "learning_rate": 1.999986960005575e-05, "loss": 2.5921, "step": 9733 }, { "epoch": 0.13, "grad_norm": 5.814487457275391, "learning_rate": 1.9999869062879552e-05, "loss": 2.5865, "step": 9734 }, { "epoch": 0.13, "grad_norm": 5.417242050170898, "learning_rate": 1.99998685245992e-05, "loss": 2.3865, "step": 9735 }, { "epoch": 0.13, "grad_norm": 6.872177600860596, "learning_rate": 1.9999867985214695e-05, "loss": 2.9287, "step": 9736 }, { "epoch": 0.13, "grad_norm": 6.0337090492248535, "learning_rate": 1.9999867444726034e-05, "loss": 2.4894, "step": 9737 }, { "epoch": 0.13, "grad_norm": 6.442519187927246, "learning_rate": 1.9999866903133215e-05, "loss": 2.5795, "step": 9738 }, { "epoch": 0.13, "grad_norm": 5.501981735229492, "learning_rate": 1.9999866360436245e-05, "loss": 2.2547, "step": 9739 }, { "epoch": 0.13, "grad_norm": 5.248324394226074, "learning_rate": 1.999986581663512e-05, "loss": 2.1502, "step": 9740 }, { "epoch": 0.13, "grad_norm": 5.532377243041992, "learning_rate": 1.9999865271729835e-05, "loss": 2.5625, "step": 9741 }, { "epoch": 0.13, "grad_norm": 5.698651313781738, "learning_rate": 1.99998647257204e-05, "loss": 2.8325, "step": 9742 }, { "epoch": 0.13, "grad_norm": 4.86293888092041, "learning_rate": 1.999986417860681e-05, "loss": 2.071, "step": 9743 }, { "epoch": 0.13, "grad_norm": 5.840376853942871, "learning_rate": 1.9999863630389065e-05, "loss": 2.546, "step": 9744 }, { "epoch": 0.13, "grad_norm": 5.4627861976623535, "learning_rate": 1.9999863081067162e-05, "loss": 2.8343, "step": 9745 }, { "epoch": 0.13, "grad_norm": 5.939870357513428, "learning_rate": 1.999986253064111e-05, "loss": 2.9084, "step": 9746 }, { "epoch": 0.13, "grad_norm": 6.064189910888672, "learning_rate": 1.99998619791109e-05, "loss": 2.2269, "step": 9747 }, { "epoch": 0.13, "grad_norm": 5.465937614440918, "learning_rate": 1.9999861426476537e-05, "loss": 2.6388, "step": 9748 }, { "epoch": 0.13, "grad_norm": 4.896765232086182, "learning_rate": 1.999986087273802e-05, "loss": 2.1093, "step": 9749 }, { "epoch": 0.13, "grad_norm": 5.334708213806152, "learning_rate": 1.9999860317895347e-05, "loss": 2.8005, "step": 9750 }, { "epoch": 0.13, "grad_norm": 6.064077854156494, "learning_rate": 1.9999859761948517e-05, "loss": 2.5403, "step": 9751 }, { "epoch": 0.13, "grad_norm": 5.294746398925781, "learning_rate": 1.999985920489754e-05, "loss": 2.2423, "step": 9752 }, { "epoch": 0.13, "grad_norm": 6.102099418640137, "learning_rate": 1.9999858646742405e-05, "loss": 2.6786, "step": 9753 }, { "epoch": 0.13, "grad_norm": 5.761444091796875, "learning_rate": 1.9999858087483114e-05, "loss": 2.7547, "step": 9754 }, { "epoch": 0.13, "grad_norm": 6.356704235076904, "learning_rate": 1.9999857527119673e-05, "loss": 2.7331, "step": 9755 }, { "epoch": 0.13, "grad_norm": 5.3722734451293945, "learning_rate": 1.9999856965652076e-05, "loss": 2.2139, "step": 9756 }, { "epoch": 0.13, "grad_norm": 5.842587471008301, "learning_rate": 1.9999856403080328e-05, "loss": 2.3678, "step": 9757 }, { "epoch": 0.13, "grad_norm": 5.115921497344971, "learning_rate": 1.999985583940442e-05, "loss": 2.4744, "step": 9758 }, { "epoch": 0.13, "grad_norm": 4.910447120666504, "learning_rate": 1.9999855274624363e-05, "loss": 1.9632, "step": 9759 }, { "epoch": 0.13, "grad_norm": 5.630148887634277, "learning_rate": 1.999985470874015e-05, "loss": 2.4595, "step": 9760 }, { "epoch": 0.13, "grad_norm": 4.986459255218506, "learning_rate": 1.9999854141751786e-05, "loss": 2.1258, "step": 9761 }, { "epoch": 0.13, "grad_norm": 6.230332374572754, "learning_rate": 1.9999853573659267e-05, "loss": 2.9757, "step": 9762 }, { "epoch": 0.13, "grad_norm": 5.2256245613098145, "learning_rate": 1.9999853004462596e-05, "loss": 2.3511, "step": 9763 }, { "epoch": 0.13, "grad_norm": 5.281379222869873, "learning_rate": 1.9999852434161767e-05, "loss": 2.6267, "step": 9764 }, { "epoch": 0.13, "grad_norm": 5.716549873352051, "learning_rate": 1.999985186275679e-05, "loss": 2.3964, "step": 9765 }, { "epoch": 0.13, "grad_norm": 5.809209823608398, "learning_rate": 1.9999851290247658e-05, "loss": 2.8702, "step": 9766 }, { "epoch": 0.13, "grad_norm": 6.163857460021973, "learning_rate": 1.999985071663437e-05, "loss": 2.6696, "step": 9767 }, { "epoch": 0.13, "grad_norm": 5.433800220489502, "learning_rate": 1.9999850141916933e-05, "loss": 2.1636, "step": 9768 }, { "epoch": 0.13, "grad_norm": 5.694543838500977, "learning_rate": 1.999984956609534e-05, "loss": 2.4923, "step": 9769 }, { "epoch": 0.13, "grad_norm": 4.995551586151123, "learning_rate": 1.9999848989169598e-05, "loss": 1.6755, "step": 9770 }, { "epoch": 0.13, "grad_norm": 6.185279846191406, "learning_rate": 1.99998484111397e-05, "loss": 3.0414, "step": 9771 }, { "epoch": 0.13, "grad_norm": 5.971691608428955, "learning_rate": 1.9999847832005644e-05, "loss": 2.7839, "step": 9772 }, { "epoch": 0.13, "grad_norm": 5.020112037658691, "learning_rate": 1.999984725176744e-05, "loss": 2.4065, "step": 9773 }, { "epoch": 0.13, "grad_norm": 4.8083930015563965, "learning_rate": 1.9999846670425084e-05, "loss": 1.7521, "step": 9774 }, { "epoch": 0.13, "grad_norm": 5.9858856201171875, "learning_rate": 1.9999846087978575e-05, "loss": 2.6502, "step": 9775 }, { "epoch": 0.13, "grad_norm": 6.369102954864502, "learning_rate": 1.9999845504427915e-05, "loss": 3.0227, "step": 9776 }, { "epoch": 0.13, "grad_norm": 6.71766471862793, "learning_rate": 1.9999844919773103e-05, "loss": 2.2383, "step": 9777 }, { "epoch": 0.13, "grad_norm": 5.439168930053711, "learning_rate": 1.9999844334014132e-05, "loss": 2.3353, "step": 9778 }, { "epoch": 0.13, "grad_norm": 5.39556884765625, "learning_rate": 1.9999843747151014e-05, "loss": 2.1468, "step": 9779 }, { "epoch": 0.13, "grad_norm": 5.928988933563232, "learning_rate": 1.9999843159183744e-05, "loss": 2.5487, "step": 9780 }, { "epoch": 0.13, "grad_norm": 5.374914646148682, "learning_rate": 1.9999842570112316e-05, "loss": 2.0635, "step": 9781 }, { "epoch": 0.13, "grad_norm": 5.465679168701172, "learning_rate": 1.999984197993674e-05, "loss": 2.9966, "step": 9782 }, { "epoch": 0.13, "grad_norm": 6.202785968780518, "learning_rate": 1.9999841388657012e-05, "loss": 2.9325, "step": 9783 }, { "epoch": 0.13, "grad_norm": 5.4848432540893555, "learning_rate": 1.999984079627313e-05, "loss": 2.0868, "step": 9784 }, { "epoch": 0.13, "grad_norm": 5.734785556793213, "learning_rate": 1.9999840202785096e-05, "loss": 2.4536, "step": 9785 }, { "epoch": 0.13, "grad_norm": 5.96933650970459, "learning_rate": 1.9999839608192913e-05, "loss": 2.2456, "step": 9786 }, { "epoch": 0.13, "grad_norm": 5.377588272094727, "learning_rate": 1.9999839012496576e-05, "loss": 2.4078, "step": 9787 }, { "epoch": 0.13, "grad_norm": 5.237778186798096, "learning_rate": 1.9999838415696088e-05, "loss": 2.5234, "step": 9788 }, { "epoch": 0.13, "grad_norm": 6.056416034698486, "learning_rate": 1.9999837817791444e-05, "loss": 2.59, "step": 9789 }, { "epoch": 0.13, "grad_norm": 5.2824273109436035, "learning_rate": 1.9999837218782653e-05, "loss": 2.4645, "step": 9790 }, { "epoch": 0.13, "grad_norm": 5.442831039428711, "learning_rate": 1.999983661866971e-05, "loss": 2.2086, "step": 9791 }, { "epoch": 0.13, "grad_norm": 4.998197555541992, "learning_rate": 1.9999836017452615e-05, "loss": 2.4078, "step": 9792 }, { "epoch": 0.13, "grad_norm": 5.1859941482543945, "learning_rate": 1.999983541513137e-05, "loss": 2.3027, "step": 9793 }, { "epoch": 0.13, "grad_norm": 5.155570030212402, "learning_rate": 1.999983481170597e-05, "loss": 2.2978, "step": 9794 }, { "epoch": 0.13, "grad_norm": 6.307559490203857, "learning_rate": 1.999983420717642e-05, "loss": 2.5491, "step": 9795 }, { "epoch": 0.13, "grad_norm": 4.550986289978027, "learning_rate": 1.999983360154272e-05, "loss": 1.6968, "step": 9796 }, { "epoch": 0.13, "grad_norm": 5.377610206604004, "learning_rate": 1.9999832994804864e-05, "loss": 2.8751, "step": 9797 }, { "epoch": 0.13, "grad_norm": 5.32708215713501, "learning_rate": 1.999983238696286e-05, "loss": 2.3806, "step": 9798 }, { "epoch": 0.13, "grad_norm": 5.338069438934326, "learning_rate": 1.999983177801671e-05, "loss": 2.3951, "step": 9799 }, { "epoch": 0.13, "grad_norm": 5.7518439292907715, "learning_rate": 1.99998311679664e-05, "loss": 2.0796, "step": 9800 }, { "epoch": 0.13, "grad_norm": 6.178782939910889, "learning_rate": 1.9999830556811944e-05, "loss": 2.8862, "step": 9801 }, { "epoch": 0.13, "grad_norm": 5.312097549438477, "learning_rate": 1.9999829944553335e-05, "loss": 2.2839, "step": 9802 }, { "epoch": 0.13, "grad_norm": 5.620138645172119, "learning_rate": 1.9999829331190574e-05, "loss": 2.6482, "step": 9803 }, { "epoch": 0.13, "grad_norm": 5.981736183166504, "learning_rate": 1.9999828716723665e-05, "loss": 2.7035, "step": 9804 }, { "epoch": 0.13, "grad_norm": 4.5028204917907715, "learning_rate": 1.9999828101152605e-05, "loss": 1.9418, "step": 9805 }, { "epoch": 0.13, "grad_norm": 5.198555946350098, "learning_rate": 1.9999827484477396e-05, "loss": 2.3251, "step": 9806 }, { "epoch": 0.13, "grad_norm": 5.797850131988525, "learning_rate": 1.9999826866698032e-05, "loss": 2.9431, "step": 9807 }, { "epoch": 0.13, "grad_norm": 5.565020561218262, "learning_rate": 1.999982624781452e-05, "loss": 2.4601, "step": 9808 }, { "epoch": 0.13, "grad_norm": 5.10686731338501, "learning_rate": 1.9999825627826858e-05, "loss": 2.372, "step": 9809 }, { "epoch": 0.13, "grad_norm": 5.244102478027344, "learning_rate": 1.9999825006735043e-05, "loss": 2.4161, "step": 9810 }, { "epoch": 0.13, "grad_norm": 6.258089542388916, "learning_rate": 1.999982438453908e-05, "loss": 2.6984, "step": 9811 }, { "epoch": 0.13, "grad_norm": 6.286654949188232, "learning_rate": 1.9999823761238966e-05, "loss": 2.7669, "step": 9812 }, { "epoch": 0.13, "grad_norm": 6.622378349304199, "learning_rate": 1.99998231368347e-05, "loss": 2.8299, "step": 9813 }, { "epoch": 0.13, "grad_norm": 5.420362949371338, "learning_rate": 1.9999822511326287e-05, "loss": 2.3641, "step": 9814 }, { "epoch": 0.13, "grad_norm": 6.014963626861572, "learning_rate": 1.9999821884713725e-05, "loss": 2.4542, "step": 9815 }, { "epoch": 0.13, "grad_norm": 5.276346683502197, "learning_rate": 1.9999821256997008e-05, "loss": 2.2441, "step": 9816 }, { "epoch": 0.13, "grad_norm": 5.607856750488281, "learning_rate": 1.9999820628176147e-05, "loss": 2.766, "step": 9817 }, { "epoch": 0.13, "grad_norm": 4.85811185836792, "learning_rate": 1.999981999825113e-05, "loss": 2.1336, "step": 9818 }, { "epoch": 0.13, "grad_norm": 5.280383110046387, "learning_rate": 1.999981936722197e-05, "loss": 2.3625, "step": 9819 }, { "epoch": 0.13, "grad_norm": 5.113104343414307, "learning_rate": 1.9999818735088653e-05, "loss": 2.8393, "step": 9820 }, { "epoch": 0.13, "grad_norm": 4.983563423156738, "learning_rate": 1.9999818101851193e-05, "loss": 2.2703, "step": 9821 }, { "epoch": 0.13, "grad_norm": 6.217905521392822, "learning_rate": 1.999981746750958e-05, "loss": 2.4052, "step": 9822 }, { "epoch": 0.13, "grad_norm": 5.598326683044434, "learning_rate": 1.999981683206382e-05, "loss": 2.2991, "step": 9823 }, { "epoch": 0.13, "grad_norm": 5.180360794067383, "learning_rate": 1.9999816195513905e-05, "loss": 2.3804, "step": 9824 }, { "epoch": 0.13, "grad_norm": 5.185080051422119, "learning_rate": 1.9999815557859845e-05, "loss": 2.0891, "step": 9825 }, { "epoch": 0.13, "grad_norm": 5.458906650543213, "learning_rate": 1.9999814919101634e-05, "loss": 2.7443, "step": 9826 }, { "epoch": 0.13, "grad_norm": 5.894456386566162, "learning_rate": 1.9999814279239274e-05, "loss": 2.6945, "step": 9827 }, { "epoch": 0.13, "grad_norm": 4.725700855255127, "learning_rate": 1.9999813638272767e-05, "loss": 1.879, "step": 9828 }, { "epoch": 0.13, "grad_norm": 5.229445934295654, "learning_rate": 1.999981299620211e-05, "loss": 2.5482, "step": 9829 }, { "epoch": 0.13, "grad_norm": 5.349068641662598, "learning_rate": 1.9999812353027304e-05, "loss": 2.7484, "step": 9830 }, { "epoch": 0.13, "grad_norm": 4.677070140838623, "learning_rate": 1.9999811708748345e-05, "loss": 1.8956, "step": 9831 }, { "epoch": 0.13, "grad_norm": 5.79595947265625, "learning_rate": 1.9999811063365242e-05, "loss": 2.9353, "step": 9832 }, { "epoch": 0.13, "grad_norm": 4.5329790115356445, "learning_rate": 1.999981041687799e-05, "loss": 2.4312, "step": 9833 }, { "epoch": 0.13, "grad_norm": 6.18299674987793, "learning_rate": 1.9999809769286587e-05, "loss": 2.7848, "step": 9834 }, { "epoch": 0.13, "grad_norm": 5.4093403816223145, "learning_rate": 1.9999809120591036e-05, "loss": 2.8357, "step": 9835 }, { "epoch": 0.13, "grad_norm": 4.809648036956787, "learning_rate": 1.9999808470791337e-05, "loss": 2.2645, "step": 9836 }, { "epoch": 0.13, "grad_norm": 5.9433698654174805, "learning_rate": 1.999980781988749e-05, "loss": 2.3603, "step": 9837 }, { "epoch": 0.13, "grad_norm": 5.147488594055176, "learning_rate": 1.9999807167879494e-05, "loss": 2.2349, "step": 9838 }, { "epoch": 0.13, "grad_norm": 5.766440391540527, "learning_rate": 1.9999806514767348e-05, "loss": 2.774, "step": 9839 }, { "epoch": 0.13, "grad_norm": 4.9576334953308105, "learning_rate": 1.999980586055106e-05, "loss": 2.2999, "step": 9840 }, { "epoch": 0.13, "grad_norm": 5.131114959716797, "learning_rate": 1.9999805205230616e-05, "loss": 1.9821, "step": 9841 }, { "epoch": 0.13, "grad_norm": 4.347094535827637, "learning_rate": 1.9999804548806025e-05, "loss": 1.7829, "step": 9842 }, { "epoch": 0.13, "grad_norm": 5.396083354949951, "learning_rate": 1.999980389127729e-05, "loss": 2.1636, "step": 9843 }, { "epoch": 0.13, "grad_norm": 6.080385684967041, "learning_rate": 1.9999803232644405e-05, "loss": 2.9688, "step": 9844 }, { "epoch": 0.13, "grad_norm": 6.392953395843506, "learning_rate": 1.9999802572907373e-05, "loss": 2.616, "step": 9845 }, { "epoch": 0.13, "grad_norm": 5.95050573348999, "learning_rate": 1.999980191206619e-05, "loss": 2.6365, "step": 9846 }, { "epoch": 0.13, "grad_norm": 5.4886603355407715, "learning_rate": 1.999980125012086e-05, "loss": 2.3929, "step": 9847 }, { "epoch": 0.13, "grad_norm": 5.119182109832764, "learning_rate": 1.9999800587071385e-05, "loss": 2.624, "step": 9848 }, { "epoch": 0.13, "grad_norm": 6.179019451141357, "learning_rate": 1.9999799922917763e-05, "loss": 2.2326, "step": 9849 }, { "epoch": 0.13, "grad_norm": 5.559062480926514, "learning_rate": 1.9999799257659987e-05, "loss": 2.7271, "step": 9850 }, { "epoch": 0.13, "grad_norm": 5.440300464630127, "learning_rate": 1.999979859129807e-05, "loss": 2.8735, "step": 9851 }, { "epoch": 0.13, "grad_norm": 6.170294761657715, "learning_rate": 1.9999797923832e-05, "loss": 2.7627, "step": 9852 }, { "epoch": 0.13, "grad_norm": 6.290626049041748, "learning_rate": 1.999979725526179e-05, "loss": 2.8072, "step": 9853 }, { "epoch": 0.13, "grad_norm": 6.29032564163208, "learning_rate": 1.999979658558743e-05, "loss": 2.7474, "step": 9854 }, { "epoch": 0.13, "grad_norm": 5.742497444152832, "learning_rate": 1.9999795914808923e-05, "loss": 2.2286, "step": 9855 }, { "epoch": 0.13, "grad_norm": 6.0382184982299805, "learning_rate": 1.9999795242926265e-05, "loss": 2.5382, "step": 9856 }, { "epoch": 0.13, "grad_norm": 5.365371227264404, "learning_rate": 1.9999794569939462e-05, "loss": 2.4606, "step": 9857 }, { "epoch": 0.13, "grad_norm": 6.2061028480529785, "learning_rate": 1.9999793895848515e-05, "loss": 2.7437, "step": 9858 }, { "epoch": 0.13, "grad_norm": 4.955264568328857, "learning_rate": 1.9999793220653416e-05, "loss": 2.673, "step": 9859 }, { "epoch": 0.13, "grad_norm": 6.238409519195557, "learning_rate": 1.9999792544354173e-05, "loss": 2.5607, "step": 9860 }, { "epoch": 0.13, "grad_norm": 4.845652103424072, "learning_rate": 1.9999791866950785e-05, "loss": 2.2677, "step": 9861 }, { "epoch": 0.13, "grad_norm": 4.784956932067871, "learning_rate": 1.9999791188443248e-05, "loss": 2.1649, "step": 9862 }, { "epoch": 0.13, "grad_norm": 5.293886184692383, "learning_rate": 1.9999790508831564e-05, "loss": 2.3314, "step": 9863 }, { "epoch": 0.13, "grad_norm": 5.5853447914123535, "learning_rate": 1.999978982811573e-05, "loss": 2.609, "step": 9864 }, { "epoch": 0.13, "grad_norm": 5.530358791351318, "learning_rate": 1.9999789146295757e-05, "loss": 2.3676, "step": 9865 }, { "epoch": 0.13, "grad_norm": 5.660069942474365, "learning_rate": 1.9999788463371635e-05, "loss": 2.4472, "step": 9866 }, { "epoch": 0.13, "grad_norm": 5.1764817237854, "learning_rate": 1.999978777934337e-05, "loss": 2.4131, "step": 9867 }, { "epoch": 0.13, "grad_norm": 5.305903911590576, "learning_rate": 1.999978709421095e-05, "loss": 2.1668, "step": 9868 }, { "epoch": 0.13, "grad_norm": 4.907223701477051, "learning_rate": 1.9999786407974388e-05, "loss": 2.392, "step": 9869 }, { "epoch": 0.13, "grad_norm": 5.526730060577393, "learning_rate": 1.9999785720633683e-05, "loss": 2.2183, "step": 9870 }, { "epoch": 0.13, "grad_norm": 6.384815692901611, "learning_rate": 1.999978503218883e-05, "loss": 2.23, "step": 9871 }, { "epoch": 0.13, "grad_norm": 4.862071990966797, "learning_rate": 1.999978434263983e-05, "loss": 2.2869, "step": 9872 }, { "epoch": 0.13, "grad_norm": 5.726324081420898, "learning_rate": 1.9999783651986685e-05, "loss": 2.6409, "step": 9873 }, { "epoch": 0.13, "grad_norm": 5.795183181762695, "learning_rate": 1.9999782960229396e-05, "loss": 2.3233, "step": 9874 }, { "epoch": 0.13, "grad_norm": 5.255523204803467, "learning_rate": 1.9999782267367958e-05, "loss": 2.4658, "step": 9875 }, { "epoch": 0.13, "grad_norm": 6.289722919464111, "learning_rate": 1.9999781573402375e-05, "loss": 2.6308, "step": 9876 }, { "epoch": 0.13, "grad_norm": 5.434426784515381, "learning_rate": 1.9999780878332647e-05, "loss": 2.5204, "step": 9877 }, { "epoch": 0.13, "grad_norm": 5.925014972686768, "learning_rate": 1.9999780182158775e-05, "loss": 2.0168, "step": 9878 }, { "epoch": 0.13, "grad_norm": 5.39937162399292, "learning_rate": 1.999977948488076e-05, "loss": 2.3611, "step": 9879 }, { "epoch": 0.13, "grad_norm": 6.191429138183594, "learning_rate": 1.999977878649859e-05, "loss": 2.7828, "step": 9880 }, { "epoch": 0.13, "grad_norm": 5.9359636306762695, "learning_rate": 1.9999778087012284e-05, "loss": 3.0791, "step": 9881 }, { "epoch": 0.13, "grad_norm": 6.580071926116943, "learning_rate": 1.999977738642183e-05, "loss": 2.924, "step": 9882 }, { "epoch": 0.13, "grad_norm": 6.511672019958496, "learning_rate": 1.999977668472723e-05, "loss": 2.7466, "step": 9883 }, { "epoch": 0.13, "grad_norm": 5.832028865814209, "learning_rate": 1.9999775981928487e-05, "loss": 2.7793, "step": 9884 }, { "epoch": 0.13, "grad_norm": 5.3605828285217285, "learning_rate": 1.9999775278025598e-05, "loss": 2.7713, "step": 9885 }, { "epoch": 0.13, "grad_norm": 5.858046054840088, "learning_rate": 1.9999774573018565e-05, "loss": 2.6053, "step": 9886 }, { "epoch": 0.13, "grad_norm": 6.829052448272705, "learning_rate": 1.9999773866907387e-05, "loss": 2.5903, "step": 9887 }, { "epoch": 0.13, "grad_norm": 5.220241069793701, "learning_rate": 1.999977315969206e-05, "loss": 2.5611, "step": 9888 }, { "epoch": 0.13, "grad_norm": 5.0106329917907715, "learning_rate": 1.9999772451372593e-05, "loss": 1.9712, "step": 9889 }, { "epoch": 0.13, "grad_norm": 5.542309284210205, "learning_rate": 1.999977174194898e-05, "loss": 2.6219, "step": 9890 }, { "epoch": 0.13, "grad_norm": 6.091899394989014, "learning_rate": 1.9999771031421224e-05, "loss": 2.8244, "step": 9891 }, { "epoch": 0.13, "grad_norm": 5.96087646484375, "learning_rate": 1.9999770319789323e-05, "loss": 2.3845, "step": 9892 }, { "epoch": 0.13, "grad_norm": 6.017244338989258, "learning_rate": 1.9999769607053277e-05, "loss": 3.0377, "step": 9893 }, { "epoch": 0.13, "grad_norm": 5.639407634735107, "learning_rate": 1.999976889321309e-05, "loss": 2.6864, "step": 9894 }, { "epoch": 0.13, "grad_norm": 5.210408687591553, "learning_rate": 1.9999768178268753e-05, "loss": 2.6995, "step": 9895 }, { "epoch": 0.13, "grad_norm": 5.74293327331543, "learning_rate": 1.9999767462220276e-05, "loss": 2.1118, "step": 9896 }, { "epoch": 0.13, "grad_norm": 5.235174179077148, "learning_rate": 1.999976674506765e-05, "loss": 2.2071, "step": 9897 }, { "epoch": 0.13, "grad_norm": 5.7834014892578125, "learning_rate": 1.9999766026810888e-05, "loss": 2.73, "step": 9898 }, { "epoch": 0.13, "grad_norm": 5.447909832000732, "learning_rate": 1.9999765307449977e-05, "loss": 2.7624, "step": 9899 }, { "epoch": 0.13, "grad_norm": 5.7163262367248535, "learning_rate": 1.9999764586984924e-05, "loss": 2.716, "step": 9900 }, { "epoch": 0.13, "grad_norm": 5.332581996917725, "learning_rate": 1.9999763865415727e-05, "loss": 2.8026, "step": 9901 }, { "epoch": 0.13, "grad_norm": 5.246612071990967, "learning_rate": 1.9999763142742385e-05, "loss": 2.0964, "step": 9902 }, { "epoch": 0.13, "grad_norm": 5.574736595153809, "learning_rate": 1.9999762418964902e-05, "loss": 2.7571, "step": 9903 }, { "epoch": 0.13, "grad_norm": 5.354038715362549, "learning_rate": 1.9999761694083274e-05, "loss": 2.3224, "step": 9904 }, { "epoch": 0.13, "grad_norm": 5.6098127365112305, "learning_rate": 1.9999760968097505e-05, "loss": 2.6472, "step": 9905 }, { "epoch": 0.13, "grad_norm": 5.74841833114624, "learning_rate": 1.9999760241007587e-05, "loss": 2.4605, "step": 9906 }, { "epoch": 0.13, "grad_norm": 6.0032243728637695, "learning_rate": 1.999975951281353e-05, "loss": 2.6566, "step": 9907 }, { "epoch": 0.13, "grad_norm": 5.312676906585693, "learning_rate": 1.9999758783515332e-05, "loss": 2.7878, "step": 9908 }, { "epoch": 0.13, "grad_norm": 5.899139881134033, "learning_rate": 1.9999758053112987e-05, "loss": 3.0318, "step": 9909 }, { "epoch": 0.13, "grad_norm": 5.121237277984619, "learning_rate": 1.99997573216065e-05, "loss": 2.3878, "step": 9910 }, { "epoch": 0.13, "grad_norm": 5.197172164916992, "learning_rate": 1.999975658899587e-05, "loss": 2.1664, "step": 9911 }, { "epoch": 0.13, "grad_norm": 5.383448123931885, "learning_rate": 1.9999755855281098e-05, "loss": 2.3633, "step": 9912 }, { "epoch": 0.13, "grad_norm": 5.649479389190674, "learning_rate": 1.9999755120462185e-05, "loss": 2.1784, "step": 9913 }, { "epoch": 0.13, "grad_norm": 5.681156635284424, "learning_rate": 1.9999754384539126e-05, "loss": 2.508, "step": 9914 }, { "epoch": 0.13, "grad_norm": 5.87701416015625, "learning_rate": 1.9999753647511927e-05, "loss": 2.0999, "step": 9915 }, { "epoch": 0.13, "grad_norm": 6.052829742431641, "learning_rate": 1.9999752909380586e-05, "loss": 2.9511, "step": 9916 }, { "epoch": 0.13, "grad_norm": 5.48020076751709, "learning_rate": 1.99997521701451e-05, "loss": 2.1031, "step": 9917 }, { "epoch": 0.13, "grad_norm": 5.393052577972412, "learning_rate": 1.999975142980547e-05, "loss": 2.6588, "step": 9918 }, { "epoch": 0.13, "grad_norm": 6.560813903808594, "learning_rate": 1.9999750688361702e-05, "loss": 2.6784, "step": 9919 }, { "epoch": 0.13, "grad_norm": 5.947097301483154, "learning_rate": 1.9999749945813792e-05, "loss": 2.6981, "step": 9920 }, { "epoch": 0.13, "grad_norm": 5.480867862701416, "learning_rate": 1.999974920216174e-05, "loss": 2.5312, "step": 9921 }, { "epoch": 0.13, "grad_norm": 6.032566070556641, "learning_rate": 1.9999748457405542e-05, "loss": 2.8075, "step": 9922 }, { "epoch": 0.13, "grad_norm": 5.507818698883057, "learning_rate": 1.999974771154521e-05, "loss": 2.6021, "step": 9923 }, { "epoch": 0.13, "grad_norm": 4.749311923980713, "learning_rate": 1.9999746964580727e-05, "loss": 2.1462, "step": 9924 }, { "epoch": 0.13, "grad_norm": 6.205353736877441, "learning_rate": 1.9999746216512104e-05, "loss": 2.7111, "step": 9925 }, { "epoch": 0.13, "grad_norm": 5.252908706665039, "learning_rate": 1.9999745467339343e-05, "loss": 2.2558, "step": 9926 }, { "epoch": 0.13, "grad_norm": 6.072942733764648, "learning_rate": 1.9999744717062438e-05, "loss": 2.776, "step": 9927 }, { "epoch": 0.13, "grad_norm": 4.714381217956543, "learning_rate": 1.9999743965681394e-05, "loss": 2.2516, "step": 9928 }, { "epoch": 0.13, "grad_norm": 5.117406845092773, "learning_rate": 1.999974321319621e-05, "loss": 2.8024, "step": 9929 }, { "epoch": 0.13, "grad_norm": 5.582014560699463, "learning_rate": 1.999974245960688e-05, "loss": 2.187, "step": 9930 }, { "epoch": 0.13, "grad_norm": 5.8071489334106445, "learning_rate": 1.9999741704913412e-05, "loss": 2.3887, "step": 9931 }, { "epoch": 0.13, "grad_norm": 4.873057842254639, "learning_rate": 1.99997409491158e-05, "loss": 2.0451, "step": 9932 }, { "epoch": 0.13, "grad_norm": 5.607027530670166, "learning_rate": 1.999974019221405e-05, "loss": 2.5519, "step": 9933 }, { "epoch": 0.13, "grad_norm": 5.637145042419434, "learning_rate": 1.9999739434208155e-05, "loss": 2.1753, "step": 9934 }, { "epoch": 0.13, "grad_norm": 5.440680980682373, "learning_rate": 1.9999738675098125e-05, "loss": 2.3723, "step": 9935 }, { "epoch": 0.13, "grad_norm": 5.322792053222656, "learning_rate": 1.9999737914883948e-05, "loss": 2.2479, "step": 9936 }, { "epoch": 0.13, "grad_norm": 5.868253707885742, "learning_rate": 1.9999737153565632e-05, "loss": 2.604, "step": 9937 }, { "epoch": 0.13, "grad_norm": 5.984304904937744, "learning_rate": 1.999973639114318e-05, "loss": 2.9827, "step": 9938 }, { "epoch": 0.13, "grad_norm": 5.165986061096191, "learning_rate": 1.999973562761658e-05, "loss": 2.3667, "step": 9939 }, { "epoch": 0.13, "grad_norm": 5.579363822937012, "learning_rate": 1.9999734862985844e-05, "loss": 2.706, "step": 9940 }, { "epoch": 0.13, "grad_norm": 5.7857279777526855, "learning_rate": 1.999973409725097e-05, "loss": 2.5327, "step": 9941 }, { "epoch": 0.13, "grad_norm": 5.37269401550293, "learning_rate": 1.999973333041195e-05, "loss": 2.402, "step": 9942 }, { "epoch": 0.13, "grad_norm": 5.153353691101074, "learning_rate": 1.9999732562468795e-05, "loss": 2.3752, "step": 9943 }, { "epoch": 0.13, "grad_norm": 6.307171821594238, "learning_rate": 1.9999731793421493e-05, "loss": 2.841, "step": 9944 }, { "epoch": 0.13, "grad_norm": 5.542601108551025, "learning_rate": 1.999973102327006e-05, "loss": 2.5013, "step": 9945 }, { "epoch": 0.13, "grad_norm": 5.598844051361084, "learning_rate": 1.9999730252014484e-05, "loss": 2.5555, "step": 9946 }, { "epoch": 0.13, "grad_norm": 5.292207717895508, "learning_rate": 1.9999729479654765e-05, "loss": 2.3854, "step": 9947 }, { "epoch": 0.13, "grad_norm": 6.001494884490967, "learning_rate": 1.999972870619091e-05, "loss": 2.8272, "step": 9948 }, { "epoch": 0.13, "grad_norm": 5.18741512298584, "learning_rate": 1.999972793162291e-05, "loss": 2.1199, "step": 9949 }, { "epoch": 0.13, "grad_norm": 5.709320068359375, "learning_rate": 1.999972715595077e-05, "loss": 2.1539, "step": 9950 }, { "epoch": 0.13, "grad_norm": 5.22352409362793, "learning_rate": 1.9999726379174497e-05, "loss": 2.3883, "step": 9951 }, { "epoch": 0.13, "grad_norm": 4.930230617523193, "learning_rate": 1.9999725601294082e-05, "loss": 2.1982, "step": 9952 }, { "epoch": 0.13, "grad_norm": 6.094799995422363, "learning_rate": 1.9999724822309526e-05, "loss": 2.745, "step": 9953 }, { "epoch": 0.13, "grad_norm": 6.364938735961914, "learning_rate": 1.9999724042220835e-05, "loss": 3.4374, "step": 9954 }, { "epoch": 0.13, "grad_norm": 5.645570278167725, "learning_rate": 1.9999723261028003e-05, "loss": 2.8697, "step": 9955 }, { "epoch": 0.13, "grad_norm": 6.113912582397461, "learning_rate": 1.999972247873103e-05, "loss": 2.4526, "step": 9956 }, { "epoch": 0.13, "grad_norm": 5.218770503997803, "learning_rate": 1.9999721695329917e-05, "loss": 2.1507, "step": 9957 }, { "epoch": 0.13, "grad_norm": 6.138766765594482, "learning_rate": 1.9999720910824668e-05, "loss": 3.0335, "step": 9958 }, { "epoch": 0.13, "grad_norm": 5.675309658050537, "learning_rate": 1.9999720125215277e-05, "loss": 2.676, "step": 9959 }, { "epoch": 0.13, "grad_norm": 5.027432918548584, "learning_rate": 1.999971933850175e-05, "loss": 2.2511, "step": 9960 }, { "epoch": 0.13, "grad_norm": 5.099667549133301, "learning_rate": 1.999971855068408e-05, "loss": 2.435, "step": 9961 }, { "epoch": 0.13, "grad_norm": 4.836271286010742, "learning_rate": 1.9999717761762277e-05, "loss": 2.3084, "step": 9962 }, { "epoch": 0.13, "grad_norm": 5.204466342926025, "learning_rate": 1.9999716971736338e-05, "loss": 2.2293, "step": 9963 }, { "epoch": 0.13, "grad_norm": 5.424601078033447, "learning_rate": 1.9999716180606254e-05, "loss": 2.6971, "step": 9964 }, { "epoch": 0.13, "grad_norm": 4.883664608001709, "learning_rate": 1.9999715388372032e-05, "loss": 1.9811, "step": 9965 }, { "epoch": 0.13, "grad_norm": 4.757072448730469, "learning_rate": 1.9999714595033675e-05, "loss": 2.5225, "step": 9966 }, { "epoch": 0.13, "grad_norm": 5.922347068786621, "learning_rate": 1.9999713800591178e-05, "loss": 2.2767, "step": 9967 }, { "epoch": 0.13, "grad_norm": 5.420594215393066, "learning_rate": 1.9999713005044542e-05, "loss": 2.3748, "step": 9968 }, { "epoch": 0.13, "grad_norm": 5.997220993041992, "learning_rate": 1.999971220839377e-05, "loss": 2.5519, "step": 9969 }, { "epoch": 0.13, "grad_norm": 4.707643985748291, "learning_rate": 1.999971141063886e-05, "loss": 1.9953, "step": 9970 }, { "epoch": 0.13, "grad_norm": 5.198553562164307, "learning_rate": 1.999971061177981e-05, "loss": 2.7813, "step": 9971 }, { "epoch": 0.13, "grad_norm": 4.971798419952393, "learning_rate": 1.9999709811816626e-05, "loss": 2.2611, "step": 9972 }, { "epoch": 0.13, "grad_norm": 5.475210189819336, "learning_rate": 1.99997090107493e-05, "loss": 2.3131, "step": 9973 }, { "epoch": 0.13, "grad_norm": 5.389121055603027, "learning_rate": 1.9999708208577838e-05, "loss": 2.7231, "step": 9974 }, { "epoch": 0.13, "grad_norm": 4.564779758453369, "learning_rate": 1.9999707405302243e-05, "loss": 2.2146, "step": 9975 }, { "epoch": 0.13, "grad_norm": 5.841198444366455, "learning_rate": 1.9999706600922507e-05, "loss": 2.9432, "step": 9976 }, { "epoch": 0.13, "grad_norm": 5.663082122802734, "learning_rate": 1.999970579543863e-05, "loss": 2.9503, "step": 9977 }, { "epoch": 0.13, "grad_norm": 5.451046943664551, "learning_rate": 1.9999704988850622e-05, "loss": 2.0864, "step": 9978 }, { "epoch": 0.13, "grad_norm": 5.426102638244629, "learning_rate": 1.9999704181158476e-05, "loss": 2.4629, "step": 9979 }, { "epoch": 0.13, "grad_norm": 5.265434741973877, "learning_rate": 1.9999703372362188e-05, "loss": 2.7796, "step": 9980 }, { "epoch": 0.13, "grad_norm": 5.257169723510742, "learning_rate": 1.999970256246177e-05, "loss": 2.879, "step": 9981 }, { "epoch": 0.13, "grad_norm": 4.725194454193115, "learning_rate": 1.999970175145721e-05, "loss": 2.3066, "step": 9982 }, { "epoch": 0.13, "grad_norm": 5.500418186187744, "learning_rate": 1.9999700939348515e-05, "loss": 2.354, "step": 9983 }, { "epoch": 0.13, "grad_norm": 5.20835542678833, "learning_rate": 1.9999700126135685e-05, "loss": 2.4347, "step": 9984 }, { "epoch": 0.13, "grad_norm": 5.465036392211914, "learning_rate": 1.9999699311818718e-05, "loss": 2.6409, "step": 9985 }, { "epoch": 0.13, "grad_norm": 4.9545207023620605, "learning_rate": 1.999969849639761e-05, "loss": 2.3539, "step": 9986 }, { "epoch": 0.13, "grad_norm": 4.890516757965088, "learning_rate": 1.9999697679872373e-05, "loss": 1.8796, "step": 9987 }, { "epoch": 0.13, "grad_norm": 6.438554763793945, "learning_rate": 1.9999696862242996e-05, "loss": 3.2547, "step": 9988 }, { "epoch": 0.13, "grad_norm": 5.316963195800781, "learning_rate": 1.999969604350948e-05, "loss": 2.5216, "step": 9989 }, { "epoch": 0.13, "grad_norm": 5.585529327392578, "learning_rate": 1.999969522367183e-05, "loss": 2.7053, "step": 9990 }, { "epoch": 0.13, "grad_norm": 5.439752578735352, "learning_rate": 1.9999694402730046e-05, "loss": 2.7132, "step": 9991 }, { "epoch": 0.13, "grad_norm": 5.150540828704834, "learning_rate": 1.9999693580684127e-05, "loss": 2.3029, "step": 9992 }, { "epoch": 0.13, "grad_norm": 5.223323345184326, "learning_rate": 1.9999692757534066e-05, "loss": 2.4491, "step": 9993 }, { "epoch": 0.13, "grad_norm": 5.669987678527832, "learning_rate": 1.9999691933279874e-05, "loss": 3.062, "step": 9994 }, { "epoch": 0.13, "grad_norm": 5.2642436027526855, "learning_rate": 1.9999691107921545e-05, "loss": 2.4078, "step": 9995 }, { "epoch": 0.13, "grad_norm": 4.675353050231934, "learning_rate": 1.999969028145908e-05, "loss": 2.2848, "step": 9996 }, { "epoch": 0.13, "grad_norm": 5.050381183624268, "learning_rate": 1.9999689453892482e-05, "loss": 2.2359, "step": 9997 }, { "epoch": 0.13, "grad_norm": 5.208996295928955, "learning_rate": 1.9999688625221745e-05, "loss": 2.3275, "step": 9998 }, { "epoch": 0.13, "grad_norm": 5.17495584487915, "learning_rate": 1.9999687795446878e-05, "loss": 2.0371, "step": 9999 }, { "epoch": 0.13, "grad_norm": 4.61837100982666, "learning_rate": 1.9999686964567872e-05, "loss": 1.8563, "step": 10000 }, { "epoch": 0.13, "grad_norm": 6.220460414886475, "learning_rate": 1.999968613258473e-05, "loss": 2.7272, "step": 10001 }, { "epoch": 0.13, "grad_norm": 5.739479064941406, "learning_rate": 1.9999685299497457e-05, "loss": 2.2373, "step": 10002 }, { "epoch": 0.13, "grad_norm": 4.9480156898498535, "learning_rate": 1.9999684465306047e-05, "loss": 2.3863, "step": 10003 }, { "epoch": 0.13, "grad_norm": 6.412378787994385, "learning_rate": 1.9999683630010503e-05, "loss": 3.206, "step": 10004 }, { "epoch": 0.13, "grad_norm": 4.49773645401001, "learning_rate": 1.9999682793610818e-05, "loss": 2.015, "step": 10005 }, { "epoch": 0.13, "grad_norm": 5.509541034698486, "learning_rate": 1.9999681956107005e-05, "loss": 2.8317, "step": 10006 }, { "epoch": 0.13, "grad_norm": 5.384940147399902, "learning_rate": 1.9999681117499058e-05, "loss": 2.419, "step": 10007 }, { "epoch": 0.13, "grad_norm": 5.0725274085998535, "learning_rate": 1.9999680277786972e-05, "loss": 2.5811, "step": 10008 }, { "epoch": 0.13, "grad_norm": 7.297532558441162, "learning_rate": 1.9999679436970755e-05, "loss": 2.6653, "step": 10009 }, { "epoch": 0.13, "grad_norm": 5.5739593505859375, "learning_rate": 1.9999678595050404e-05, "loss": 2.3892, "step": 10010 }, { "epoch": 0.13, "grad_norm": 4.969461441040039, "learning_rate": 1.9999677752025918e-05, "loss": 2.8651, "step": 10011 }, { "epoch": 0.13, "grad_norm": 5.0097808837890625, "learning_rate": 1.9999676907897298e-05, "loss": 2.0007, "step": 10012 }, { "epoch": 0.13, "grad_norm": 5.837946891784668, "learning_rate": 1.9999676062664543e-05, "loss": 1.8194, "step": 10013 }, { "epoch": 0.13, "grad_norm": 5.063373565673828, "learning_rate": 1.9999675216327654e-05, "loss": 2.3488, "step": 10014 }, { "epoch": 0.13, "grad_norm": 5.549603462219238, "learning_rate": 1.9999674368886636e-05, "loss": 2.6039, "step": 10015 }, { "epoch": 0.13, "grad_norm": 5.647200584411621, "learning_rate": 1.9999673520341478e-05, "loss": 2.6624, "step": 10016 }, { "epoch": 0.13, "grad_norm": 5.288131237030029, "learning_rate": 1.9999672670692188e-05, "loss": 2.6209, "step": 10017 }, { "epoch": 0.13, "grad_norm": 5.791133880615234, "learning_rate": 1.9999671819938767e-05, "loss": 2.1102, "step": 10018 }, { "epoch": 0.13, "grad_norm": 5.463635444641113, "learning_rate": 1.999967096808121e-05, "loss": 2.6285, "step": 10019 }, { "epoch": 0.13, "grad_norm": 5.1980719566345215, "learning_rate": 1.999967011511952e-05, "loss": 2.2935, "step": 10020 }, { "epoch": 0.13, "grad_norm": 5.473869323730469, "learning_rate": 1.9999669261053697e-05, "loss": 2.3638, "step": 10021 }, { "epoch": 0.13, "grad_norm": 5.100375652313232, "learning_rate": 1.9999668405883745e-05, "loss": 2.0106, "step": 10022 }, { "epoch": 0.13, "grad_norm": 4.92728853225708, "learning_rate": 1.999966754960965e-05, "loss": 1.8463, "step": 10023 }, { "epoch": 0.13, "grad_norm": 5.33973503112793, "learning_rate": 1.999966669223143e-05, "loss": 2.65, "step": 10024 }, { "epoch": 0.13, "grad_norm": 6.187189102172852, "learning_rate": 1.9999665833749077e-05, "loss": 2.5103, "step": 10025 }, { "epoch": 0.13, "grad_norm": 5.449306488037109, "learning_rate": 1.999966497416259e-05, "loss": 2.3942, "step": 10026 }, { "epoch": 0.13, "grad_norm": 5.834860324859619, "learning_rate": 1.999966411347197e-05, "loss": 2.5653, "step": 10027 }, { "epoch": 0.13, "grad_norm": 5.477726459503174, "learning_rate": 1.9999663251677215e-05, "loss": 2.5259, "step": 10028 }, { "epoch": 0.13, "grad_norm": 4.862590789794922, "learning_rate": 1.999966238877833e-05, "loss": 2.509, "step": 10029 }, { "epoch": 0.13, "grad_norm": 5.395468711853027, "learning_rate": 1.9999661524775313e-05, "loss": 2.0944, "step": 10030 }, { "epoch": 0.13, "grad_norm": 5.96921443939209, "learning_rate": 1.9999660659668163e-05, "loss": 2.7533, "step": 10031 }, { "epoch": 0.13, "grad_norm": 5.521668434143066, "learning_rate": 1.9999659793456882e-05, "loss": 2.6201, "step": 10032 }, { "epoch": 0.13, "grad_norm": 4.7363128662109375, "learning_rate": 1.9999658926141467e-05, "loss": 2.0797, "step": 10033 }, { "epoch": 0.13, "grad_norm": 6.000758647918701, "learning_rate": 1.9999658057721924e-05, "loss": 2.7566, "step": 10034 }, { "epoch": 0.13, "grad_norm": 5.278075218200684, "learning_rate": 1.9999657188198242e-05, "loss": 2.5792, "step": 10035 }, { "epoch": 0.13, "grad_norm": 6.621555805206299, "learning_rate": 1.9999656317570433e-05, "loss": 3.387, "step": 10036 }, { "epoch": 0.13, "grad_norm": 5.4103593826293945, "learning_rate": 1.9999655445838493e-05, "loss": 2.5275, "step": 10037 }, { "epoch": 0.13, "grad_norm": 4.476330280303955, "learning_rate": 1.999965457300242e-05, "loss": 1.853, "step": 10038 }, { "epoch": 0.13, "grad_norm": 5.30727481842041, "learning_rate": 1.9999653699062213e-05, "loss": 2.5701, "step": 10039 }, { "epoch": 0.13, "grad_norm": 5.305818557739258, "learning_rate": 1.999965282401788e-05, "loss": 2.4234, "step": 10040 }, { "epoch": 0.13, "grad_norm": 5.230493545532227, "learning_rate": 1.999965194786941e-05, "loss": 2.563, "step": 10041 }, { "epoch": 0.13, "grad_norm": 5.55306339263916, "learning_rate": 1.9999651070616815e-05, "loss": 2.6588, "step": 10042 }, { "epoch": 0.13, "grad_norm": 5.47067928314209, "learning_rate": 1.9999650192260085e-05, "loss": 2.3511, "step": 10043 }, { "epoch": 0.13, "grad_norm": 5.824985504150391, "learning_rate": 1.9999649312799223e-05, "loss": 2.3085, "step": 10044 }, { "epoch": 0.13, "grad_norm": 4.6484293937683105, "learning_rate": 1.999964843223423e-05, "loss": 2.1751, "step": 10045 }, { "epoch": 0.13, "grad_norm": 5.33190393447876, "learning_rate": 1.999964755056511e-05, "loss": 2.345, "step": 10046 }, { "epoch": 0.13, "grad_norm": 5.268886566162109, "learning_rate": 1.9999646667791858e-05, "loss": 2.1836, "step": 10047 }, { "epoch": 0.13, "grad_norm": 5.127226829528809, "learning_rate": 1.999964578391447e-05, "loss": 2.703, "step": 10048 }, { "epoch": 0.13, "grad_norm": 5.560047149658203, "learning_rate": 1.9999644898932958e-05, "loss": 2.7817, "step": 10049 }, { "epoch": 0.13, "grad_norm": 6.50777006149292, "learning_rate": 1.9999644012847312e-05, "loss": 2.2134, "step": 10050 }, { "epoch": 0.13, "grad_norm": 5.685068607330322, "learning_rate": 1.9999643125657536e-05, "loss": 2.0827, "step": 10051 }, { "epoch": 0.13, "grad_norm": 5.437464714050293, "learning_rate": 1.9999642237363632e-05, "loss": 2.4422, "step": 10052 }, { "epoch": 0.13, "grad_norm": 5.800394058227539, "learning_rate": 1.9999641347965596e-05, "loss": 2.3307, "step": 10053 }, { "epoch": 0.13, "grad_norm": 5.845876216888428, "learning_rate": 1.9999640457463433e-05, "loss": 2.9211, "step": 10054 }, { "epoch": 0.13, "grad_norm": 6.245619773864746, "learning_rate": 1.9999639565857136e-05, "loss": 2.8762, "step": 10055 }, { "epoch": 0.13, "grad_norm": 5.398568630218506, "learning_rate": 1.999963867314671e-05, "loss": 2.6579, "step": 10056 }, { "epoch": 0.13, "grad_norm": 5.213172912597656, "learning_rate": 1.9999637779332154e-05, "loss": 2.3042, "step": 10057 }, { "epoch": 0.13, "grad_norm": 5.472138404846191, "learning_rate": 1.999963688441347e-05, "loss": 2.3775, "step": 10058 }, { "epoch": 0.13, "grad_norm": 5.6400346755981445, "learning_rate": 1.9999635988390657e-05, "loss": 2.45, "step": 10059 }, { "epoch": 0.13, "grad_norm": 6.219076156616211, "learning_rate": 1.999963509126371e-05, "loss": 2.8248, "step": 10060 }, { "epoch": 0.13, "grad_norm": 5.3100786209106445, "learning_rate": 1.999963419303264e-05, "loss": 2.6366, "step": 10061 }, { "epoch": 0.13, "grad_norm": 6.257635116577148, "learning_rate": 1.9999633293697437e-05, "loss": 3.0159, "step": 10062 }, { "epoch": 0.13, "grad_norm": 5.6755805015563965, "learning_rate": 1.9999632393258107e-05, "loss": 2.7194, "step": 10063 }, { "epoch": 0.13, "grad_norm": 5.310375690460205, "learning_rate": 1.9999631491714642e-05, "loss": 2.3029, "step": 10064 }, { "epoch": 0.13, "grad_norm": 4.884968280792236, "learning_rate": 1.9999630589067053e-05, "loss": 2.3583, "step": 10065 }, { "epoch": 0.13, "grad_norm": 6.164969444274902, "learning_rate": 1.9999629685315337e-05, "loss": 2.1986, "step": 10066 }, { "epoch": 0.13, "grad_norm": 5.812868118286133, "learning_rate": 1.999962878045949e-05, "loss": 2.6371, "step": 10067 }, { "epoch": 0.13, "grad_norm": 5.14797830581665, "learning_rate": 1.9999627874499513e-05, "loss": 2.2331, "step": 10068 }, { "epoch": 0.13, "grad_norm": 5.342215538024902, "learning_rate": 1.999962696743541e-05, "loss": 2.4619, "step": 10069 }, { "epoch": 0.13, "grad_norm": 5.688112258911133, "learning_rate": 1.9999626059267175e-05, "loss": 2.4537, "step": 10070 }, { "epoch": 0.13, "grad_norm": 5.211683750152588, "learning_rate": 1.9999625149994816e-05, "loss": 2.4051, "step": 10071 }, { "epoch": 0.13, "grad_norm": 5.450534343719482, "learning_rate": 1.9999624239618326e-05, "loss": 2.3756, "step": 10072 }, { "epoch": 0.13, "grad_norm": 5.503137588500977, "learning_rate": 1.9999623328137705e-05, "loss": 2.5702, "step": 10073 }, { "epoch": 0.13, "grad_norm": 5.467910289764404, "learning_rate": 1.999962241555296e-05, "loss": 2.3821, "step": 10074 }, { "epoch": 0.13, "grad_norm": 5.774941921234131, "learning_rate": 1.999962150186409e-05, "loss": 2.9827, "step": 10075 }, { "epoch": 0.13, "grad_norm": 5.223016738891602, "learning_rate": 1.9999620587071084e-05, "loss": 1.987, "step": 10076 }, { "epoch": 0.13, "grad_norm": 5.761370658874512, "learning_rate": 1.9999619671173955e-05, "loss": 2.7125, "step": 10077 }, { "epoch": 0.13, "grad_norm": 5.438714981079102, "learning_rate": 1.9999618754172698e-05, "loss": 2.5629, "step": 10078 }, { "epoch": 0.13, "grad_norm": 4.797371864318848, "learning_rate": 1.9999617836067317e-05, "loss": 1.5932, "step": 10079 }, { "epoch": 0.13, "grad_norm": 5.308262348175049, "learning_rate": 1.99996169168578e-05, "loss": 2.8255, "step": 10080 }, { "epoch": 0.13, "grad_norm": 5.445788860321045, "learning_rate": 1.9999615996544164e-05, "loss": 2.4004, "step": 10081 }, { "epoch": 0.13, "grad_norm": 5.7157158851623535, "learning_rate": 1.9999615075126396e-05, "loss": 2.2845, "step": 10082 }, { "epoch": 0.13, "grad_norm": 6.045007228851318, "learning_rate": 1.99996141526045e-05, "loss": 2.5319, "step": 10083 }, { "epoch": 0.13, "grad_norm": 4.798138618469238, "learning_rate": 1.999961322897848e-05, "loss": 1.9163, "step": 10084 }, { "epoch": 0.13, "grad_norm": 5.696242332458496, "learning_rate": 1.9999612304248336e-05, "loss": 2.7418, "step": 10085 }, { "epoch": 0.13, "grad_norm": 6.091297626495361, "learning_rate": 1.999961137841406e-05, "loss": 3.0467, "step": 10086 }, { "epoch": 0.13, "grad_norm": 4.949251651763916, "learning_rate": 1.9999610451475657e-05, "loss": 2.1144, "step": 10087 }, { "epoch": 0.13, "grad_norm": 5.861688613891602, "learning_rate": 1.999960952343313e-05, "loss": 2.6793, "step": 10088 }, { "epoch": 0.13, "grad_norm": 5.793869495391846, "learning_rate": 1.9999608594286474e-05, "loss": 2.8112, "step": 10089 }, { "epoch": 0.13, "grad_norm": 5.549623489379883, "learning_rate": 1.9999607664035694e-05, "loss": 2.487, "step": 10090 }, { "epoch": 0.13, "grad_norm": 5.766852378845215, "learning_rate": 1.9999606732680786e-05, "loss": 2.8301, "step": 10091 }, { "epoch": 0.13, "grad_norm": 5.194030284881592, "learning_rate": 1.9999605800221754e-05, "loss": 2.1844, "step": 10092 }, { "epoch": 0.13, "grad_norm": 5.423338413238525, "learning_rate": 1.9999604866658594e-05, "loss": 2.4647, "step": 10093 }, { "epoch": 0.13, "grad_norm": 5.470794200897217, "learning_rate": 1.9999603931991306e-05, "loss": 2.8619, "step": 10094 }, { "epoch": 0.13, "grad_norm": 5.4702534675598145, "learning_rate": 1.9999602996219894e-05, "loss": 2.9407, "step": 10095 }, { "epoch": 0.13, "grad_norm": 5.981429100036621, "learning_rate": 1.9999602059344358e-05, "loss": 2.559, "step": 10096 }, { "epoch": 0.13, "grad_norm": 5.459500789642334, "learning_rate": 1.9999601121364697e-05, "loss": 2.3094, "step": 10097 }, { "epoch": 0.13, "grad_norm": 5.079380512237549, "learning_rate": 1.9999600182280905e-05, "loss": 2.603, "step": 10098 }, { "epoch": 0.13, "grad_norm": 4.827220439910889, "learning_rate": 1.9999599242092992e-05, "loss": 1.9812, "step": 10099 }, { "epoch": 0.13, "grad_norm": 4.984822750091553, "learning_rate": 1.999959830080095e-05, "loss": 2.1929, "step": 10100 }, { "epoch": 0.13, "grad_norm": 5.990591526031494, "learning_rate": 1.9999597358404786e-05, "loss": 2.5891, "step": 10101 }, { "epoch": 0.13, "grad_norm": 4.690168857574463, "learning_rate": 1.99995964149045e-05, "loss": 1.7154, "step": 10102 }, { "epoch": 0.13, "grad_norm": 5.837810039520264, "learning_rate": 1.9999595470300082e-05, "loss": 2.5964, "step": 10103 }, { "epoch": 0.13, "grad_norm": 5.352457046508789, "learning_rate": 1.999959452459154e-05, "loss": 2.237, "step": 10104 }, { "epoch": 0.13, "grad_norm": 5.33603572845459, "learning_rate": 1.9999593577778875e-05, "loss": 2.1803, "step": 10105 }, { "epoch": 0.13, "grad_norm": 6.253325939178467, "learning_rate": 1.9999592629862084e-05, "loss": 2.7731, "step": 10106 }, { "epoch": 0.13, "grad_norm": 5.732785701751709, "learning_rate": 1.999959168084117e-05, "loss": 2.8223, "step": 10107 }, { "epoch": 0.13, "grad_norm": 5.50583553314209, "learning_rate": 1.9999590730716133e-05, "loss": 2.1807, "step": 10108 }, { "epoch": 0.13, "grad_norm": 5.072847366333008, "learning_rate": 1.999958977948697e-05, "loss": 2.55, "step": 10109 }, { "epoch": 0.13, "grad_norm": 5.295651435852051, "learning_rate": 1.9999588827153678e-05, "loss": 2.3569, "step": 10110 }, { "epoch": 0.13, "grad_norm": 5.326240062713623, "learning_rate": 1.999958787371627e-05, "loss": 2.6831, "step": 10111 }, { "epoch": 0.13, "grad_norm": 5.71162223815918, "learning_rate": 1.9999586919174732e-05, "loss": 2.2388, "step": 10112 }, { "epoch": 0.13, "grad_norm": 5.645767688751221, "learning_rate": 1.999958596352907e-05, "loss": 2.6283, "step": 10113 }, { "epoch": 0.13, "grad_norm": 4.85174036026001, "learning_rate": 1.999958500677929e-05, "loss": 2.0623, "step": 10114 }, { "epoch": 0.13, "grad_norm": 4.894235610961914, "learning_rate": 1.999958404892538e-05, "loss": 2.4538, "step": 10115 }, { "epoch": 0.13, "grad_norm": 5.788656711578369, "learning_rate": 1.9999583089967348e-05, "loss": 2.3165, "step": 10116 }, { "epoch": 0.13, "grad_norm": 5.418711185455322, "learning_rate": 1.999958212990519e-05, "loss": 2.6864, "step": 10117 }, { "epoch": 0.13, "grad_norm": 5.621740341186523, "learning_rate": 1.9999581168738913e-05, "loss": 2.7185, "step": 10118 }, { "epoch": 0.13, "grad_norm": 5.40449333190918, "learning_rate": 1.999958020646851e-05, "loss": 2.4047, "step": 10119 }, { "epoch": 0.13, "grad_norm": 5.316451072692871, "learning_rate": 1.9999579243093983e-05, "loss": 2.6938, "step": 10120 }, { "epoch": 0.13, "grad_norm": 4.733452320098877, "learning_rate": 1.9999578278615334e-05, "loss": 2.4161, "step": 10121 }, { "epoch": 0.13, "grad_norm": 6.022932529449463, "learning_rate": 1.9999577313032567e-05, "loss": 2.5935, "step": 10122 }, { "epoch": 0.13, "grad_norm": 5.325408458709717, "learning_rate": 1.9999576346345672e-05, "loss": 2.157, "step": 10123 }, { "epoch": 0.13, "grad_norm": 5.804910182952881, "learning_rate": 1.999957537855465e-05, "loss": 2.2131, "step": 10124 }, { "epoch": 0.13, "grad_norm": 5.341468334197998, "learning_rate": 1.999957440965951e-05, "loss": 2.5437, "step": 10125 }, { "epoch": 0.13, "grad_norm": 5.2744646072387695, "learning_rate": 1.9999573439660248e-05, "loss": 2.0999, "step": 10126 }, { "epoch": 0.13, "grad_norm": 5.922471523284912, "learning_rate": 1.9999572468556862e-05, "loss": 2.4172, "step": 10127 }, { "epoch": 0.13, "grad_norm": 5.779613971710205, "learning_rate": 1.9999571496349352e-05, "loss": 2.6003, "step": 10128 }, { "epoch": 0.13, "grad_norm": 5.683196067810059, "learning_rate": 1.9999570523037724e-05, "loss": 2.7516, "step": 10129 }, { "epoch": 0.13, "grad_norm": 6.021291255950928, "learning_rate": 1.9999569548621972e-05, "loss": 2.7716, "step": 10130 }, { "epoch": 0.13, "grad_norm": 6.674376010894775, "learning_rate": 1.9999568573102092e-05, "loss": 2.4712, "step": 10131 }, { "epoch": 0.13, "grad_norm": 5.695205211639404, "learning_rate": 1.9999567596478098e-05, "loss": 2.6053, "step": 10132 }, { "epoch": 0.13, "grad_norm": 5.3453779220581055, "learning_rate": 1.9999566618749976e-05, "loss": 2.5329, "step": 10133 }, { "epoch": 0.13, "grad_norm": 4.005855083465576, "learning_rate": 1.9999565639917737e-05, "loss": 1.6778, "step": 10134 }, { "epoch": 0.13, "grad_norm": 4.919674873352051, "learning_rate": 1.9999564659981376e-05, "loss": 2.3344, "step": 10135 }, { "epoch": 0.13, "grad_norm": 5.997109889984131, "learning_rate": 1.999956367894089e-05, "loss": 2.7215, "step": 10136 }, { "epoch": 0.13, "grad_norm": 6.106285095214844, "learning_rate": 1.9999562696796286e-05, "loss": 2.8391, "step": 10137 }, { "epoch": 0.13, "grad_norm": 5.496445178985596, "learning_rate": 1.999956171354756e-05, "loss": 2.6492, "step": 10138 }, { "epoch": 0.13, "grad_norm": 5.064724445343018, "learning_rate": 1.999956072919471e-05, "loss": 2.0546, "step": 10139 }, { "epoch": 0.13, "grad_norm": 5.338379383087158, "learning_rate": 1.9999559743737742e-05, "loss": 2.4357, "step": 10140 }, { "epoch": 0.13, "grad_norm": 5.78061580657959, "learning_rate": 1.9999558757176652e-05, "loss": 2.2935, "step": 10141 }, { "epoch": 0.13, "grad_norm": 6.160563945770264, "learning_rate": 1.9999557769511438e-05, "loss": 2.8429, "step": 10142 }, { "epoch": 0.13, "grad_norm": 4.780494213104248, "learning_rate": 1.9999556780742106e-05, "loss": 1.9083, "step": 10143 }, { "epoch": 0.13, "grad_norm": 4.6943769454956055, "learning_rate": 1.9999555790868657e-05, "loss": 1.9268, "step": 10144 }, { "epoch": 0.13, "grad_norm": 6.350560188293457, "learning_rate": 1.999955479989108e-05, "loss": 3.163, "step": 10145 }, { "epoch": 0.13, "grad_norm": 4.411236763000488, "learning_rate": 1.9999553807809388e-05, "loss": 1.7336, "step": 10146 }, { "epoch": 0.13, "grad_norm": 5.75031852722168, "learning_rate": 1.9999552814623576e-05, "loss": 2.5294, "step": 10147 }, { "epoch": 0.13, "grad_norm": 5.57257604598999, "learning_rate": 1.999955182033364e-05, "loss": 2.2338, "step": 10148 }, { "epoch": 0.13, "grad_norm": 5.543344497680664, "learning_rate": 1.9999550824939584e-05, "loss": 2.4413, "step": 10149 }, { "epoch": 0.13, "grad_norm": 4.642663478851318, "learning_rate": 1.9999549828441412e-05, "loss": 2.0375, "step": 10150 }, { "epoch": 0.13, "grad_norm": 5.5119547843933105, "learning_rate": 1.9999548830839116e-05, "loss": 2.2416, "step": 10151 }, { "epoch": 0.13, "grad_norm": 5.551476955413818, "learning_rate": 1.99995478321327e-05, "loss": 2.617, "step": 10152 }, { "epoch": 0.13, "grad_norm": 5.233338356018066, "learning_rate": 1.9999546832322167e-05, "loss": 2.0767, "step": 10153 }, { "epoch": 0.13, "grad_norm": 5.924254417419434, "learning_rate": 1.999954583140751e-05, "loss": 2.3264, "step": 10154 }, { "epoch": 0.13, "grad_norm": 5.44086217880249, "learning_rate": 1.9999544829388737e-05, "loss": 2.411, "step": 10155 }, { "epoch": 0.13, "grad_norm": 5.713037967681885, "learning_rate": 1.9999543826265846e-05, "loss": 2.1107, "step": 10156 }, { "epoch": 0.13, "grad_norm": 4.671113967895508, "learning_rate": 1.9999542822038833e-05, "loss": 2.1191, "step": 10157 }, { "epoch": 0.13, "grad_norm": 5.281075477600098, "learning_rate": 1.9999541816707703e-05, "loss": 2.5772, "step": 10158 }, { "epoch": 0.13, "grad_norm": 5.3638997077941895, "learning_rate": 1.9999540810272453e-05, "loss": 2.4112, "step": 10159 }, { "epoch": 0.13, "grad_norm": 6.1281352043151855, "learning_rate": 1.9999539802733084e-05, "loss": 2.5793, "step": 10160 }, { "epoch": 0.13, "grad_norm": 5.7836995124816895, "learning_rate": 1.9999538794089598e-05, "loss": 2.4758, "step": 10161 }, { "epoch": 0.13, "grad_norm": 5.725590705871582, "learning_rate": 1.9999537784341987e-05, "loss": 2.5311, "step": 10162 }, { "epoch": 0.13, "grad_norm": 5.549458980560303, "learning_rate": 1.9999536773490266e-05, "loss": 2.5798, "step": 10163 }, { "epoch": 0.13, "grad_norm": 4.953955173492432, "learning_rate": 1.999953576153442e-05, "loss": 2.0866, "step": 10164 }, { "epoch": 0.13, "grad_norm": 4.903487682342529, "learning_rate": 1.9999534748474457e-05, "loss": 2.2198, "step": 10165 }, { "epoch": 0.13, "grad_norm": 5.60330867767334, "learning_rate": 1.9999533734310376e-05, "loss": 2.3484, "step": 10166 }, { "epoch": 0.13, "grad_norm": 5.345220565795898, "learning_rate": 1.9999532719042177e-05, "loss": 2.2054, "step": 10167 }, { "epoch": 0.13, "grad_norm": 5.527586460113525, "learning_rate": 1.999953170266986e-05, "loss": 2.3681, "step": 10168 }, { "epoch": 0.13, "grad_norm": 5.715392589569092, "learning_rate": 1.9999530685193424e-05, "loss": 2.3366, "step": 10169 }, { "epoch": 0.13, "grad_norm": 5.8569159507751465, "learning_rate": 1.9999529666612873e-05, "loss": 2.743, "step": 10170 }, { "epoch": 0.13, "grad_norm": 5.750509738922119, "learning_rate": 1.99995286469282e-05, "loss": 2.8415, "step": 10171 }, { "epoch": 0.13, "grad_norm": 7.225132942199707, "learning_rate": 1.9999527626139414e-05, "loss": 2.6152, "step": 10172 }, { "epoch": 0.13, "grad_norm": 6.026980876922607, "learning_rate": 1.999952660424651e-05, "loss": 2.4684, "step": 10173 }, { "epoch": 0.13, "grad_norm": 5.7162017822265625, "learning_rate": 1.9999525581249484e-05, "loss": 2.6378, "step": 10174 }, { "epoch": 0.13, "grad_norm": 5.4862847328186035, "learning_rate": 1.9999524557148345e-05, "loss": 2.4168, "step": 10175 }, { "epoch": 0.13, "grad_norm": 5.129925727844238, "learning_rate": 1.999952353194309e-05, "loss": 2.1357, "step": 10176 }, { "epoch": 0.13, "grad_norm": 5.935206890106201, "learning_rate": 1.9999522505633717e-05, "loss": 2.6422, "step": 10177 }, { "epoch": 0.13, "grad_norm": 5.363907337188721, "learning_rate": 1.9999521478220224e-05, "loss": 2.1887, "step": 10178 }, { "epoch": 0.13, "grad_norm": 5.599534511566162, "learning_rate": 1.9999520449702618e-05, "loss": 2.4961, "step": 10179 }, { "epoch": 0.13, "grad_norm": 5.398186683654785, "learning_rate": 1.999951942008089e-05, "loss": 2.0021, "step": 10180 }, { "epoch": 0.13, "grad_norm": 5.303134918212891, "learning_rate": 1.9999518389355052e-05, "loss": 2.2869, "step": 10181 }, { "epoch": 0.13, "grad_norm": 4.937140941619873, "learning_rate": 1.9999517357525093e-05, "loss": 2.235, "step": 10182 }, { "epoch": 0.13, "grad_norm": 5.396292686462402, "learning_rate": 1.999951632459102e-05, "loss": 2.3936, "step": 10183 }, { "epoch": 0.13, "grad_norm": 6.338315486907959, "learning_rate": 1.9999515290552828e-05, "loss": 2.9962, "step": 10184 }, { "epoch": 0.13, "grad_norm": 5.863398551940918, "learning_rate": 1.9999514255410522e-05, "loss": 2.7541, "step": 10185 }, { "epoch": 0.13, "grad_norm": 5.386826038360596, "learning_rate": 1.99995132191641e-05, "loss": 2.438, "step": 10186 }, { "epoch": 0.13, "grad_norm": 5.421504497528076, "learning_rate": 1.9999512181813562e-05, "loss": 2.2488, "step": 10187 }, { "epoch": 0.13, "grad_norm": 5.315975189208984, "learning_rate": 1.9999511143358904e-05, "loss": 2.5367, "step": 10188 }, { "epoch": 0.13, "grad_norm": 5.544501781463623, "learning_rate": 1.9999510103800138e-05, "loss": 2.5557, "step": 10189 }, { "epoch": 0.13, "grad_norm": 5.049028396606445, "learning_rate": 1.999950906313725e-05, "loss": 2.1575, "step": 10190 }, { "epoch": 0.13, "grad_norm": 5.617339611053467, "learning_rate": 1.999950802137025e-05, "loss": 2.2053, "step": 10191 }, { "epoch": 0.13, "grad_norm": 4.828122138977051, "learning_rate": 1.9999506978499132e-05, "loss": 2.1682, "step": 10192 }, { "epoch": 0.13, "grad_norm": 5.235500812530518, "learning_rate": 1.9999505934523903e-05, "loss": 2.3119, "step": 10193 }, { "epoch": 0.13, "grad_norm": 4.579885959625244, "learning_rate": 1.9999504889444556e-05, "loss": 2.2008, "step": 10194 }, { "epoch": 0.13, "grad_norm": 6.317058086395264, "learning_rate": 1.9999503843261095e-05, "loss": 2.4594, "step": 10195 }, { "epoch": 0.13, "grad_norm": 5.6555376052856445, "learning_rate": 1.999950279597352e-05, "loss": 2.7789, "step": 10196 }, { "epoch": 0.13, "grad_norm": 5.1878862380981445, "learning_rate": 1.9999501747581823e-05, "loss": 2.1898, "step": 10197 }, { "epoch": 0.13, "grad_norm": 6.360954284667969, "learning_rate": 1.999950069808602e-05, "loss": 3.2604, "step": 10198 }, { "epoch": 0.13, "grad_norm": 5.358856678009033, "learning_rate": 1.99994996474861e-05, "loss": 2.3086, "step": 10199 }, { "epoch": 0.13, "grad_norm": 5.312686920166016, "learning_rate": 1.9999498595782065e-05, "loss": 2.291, "step": 10200 }, { "epoch": 0.13, "grad_norm": 5.0168375968933105, "learning_rate": 1.9999497542973915e-05, "loss": 2.53, "step": 10201 }, { "epoch": 0.13, "grad_norm": 5.622304439544678, "learning_rate": 1.9999496489061655e-05, "loss": 2.4375, "step": 10202 }, { "epoch": 0.13, "grad_norm": 5.420966625213623, "learning_rate": 1.9999495434045276e-05, "loss": 2.2021, "step": 10203 }, { "epoch": 0.13, "grad_norm": 4.691073894500732, "learning_rate": 1.9999494377924784e-05, "loss": 2.2874, "step": 10204 }, { "epoch": 0.13, "grad_norm": 4.969547748565674, "learning_rate": 1.9999493320700177e-05, "loss": 2.2254, "step": 10205 }, { "epoch": 0.13, "grad_norm": 5.402833938598633, "learning_rate": 1.999949226237146e-05, "loss": 2.5415, "step": 10206 }, { "epoch": 0.13, "grad_norm": 4.775497913360596, "learning_rate": 1.9999491202938627e-05, "loss": 2.5433, "step": 10207 }, { "epoch": 0.13, "grad_norm": 4.921910762786865, "learning_rate": 1.999949014240168e-05, "loss": 1.8547, "step": 10208 }, { "epoch": 0.13, "grad_norm": 5.3299336433410645, "learning_rate": 1.9999489080760624e-05, "loss": 2.3834, "step": 10209 }, { "epoch": 0.13, "grad_norm": 6.204709529876709, "learning_rate": 1.999948801801545e-05, "loss": 2.3634, "step": 10210 }, { "epoch": 0.13, "grad_norm": 6.0451340675354, "learning_rate": 1.9999486954166165e-05, "loss": 2.3873, "step": 10211 }, { "epoch": 0.13, "grad_norm": 5.502793312072754, "learning_rate": 1.9999485889212765e-05, "loss": 2.4802, "step": 10212 }, { "epoch": 0.13, "grad_norm": 4.9859137535095215, "learning_rate": 1.9999484823155255e-05, "loss": 1.8488, "step": 10213 }, { "epoch": 0.13, "grad_norm": 5.640886306762695, "learning_rate": 1.9999483755993633e-05, "loss": 2.5957, "step": 10214 }, { "epoch": 0.13, "grad_norm": 6.349592685699463, "learning_rate": 1.9999482687727894e-05, "loss": 2.5619, "step": 10215 }, { "epoch": 0.13, "grad_norm": 4.744068622589111, "learning_rate": 1.9999481618358045e-05, "loss": 2.1067, "step": 10216 }, { "epoch": 0.13, "grad_norm": 4.883432388305664, "learning_rate": 1.9999480547884084e-05, "loss": 2.1219, "step": 10217 }, { "epoch": 0.13, "grad_norm": 5.5689191818237305, "learning_rate": 1.999947947630601e-05, "loss": 1.9501, "step": 10218 }, { "epoch": 0.13, "grad_norm": 5.260814189910889, "learning_rate": 1.999947840362382e-05, "loss": 2.6276, "step": 10219 }, { "epoch": 0.13, "grad_norm": 5.15578031539917, "learning_rate": 1.9999477329837524e-05, "loss": 2.205, "step": 10220 }, { "epoch": 0.13, "grad_norm": 5.589395999908447, "learning_rate": 1.9999476254947113e-05, "loss": 2.0657, "step": 10221 }, { "epoch": 0.13, "grad_norm": 5.726709365844727, "learning_rate": 1.9999475178952592e-05, "loss": 2.8981, "step": 10222 }, { "epoch": 0.13, "grad_norm": 5.154362201690674, "learning_rate": 1.9999474101853956e-05, "loss": 2.7104, "step": 10223 }, { "epoch": 0.13, "grad_norm": 5.552913665771484, "learning_rate": 1.9999473023651213e-05, "loss": 2.3737, "step": 10224 }, { "epoch": 0.13, "grad_norm": 5.436859130859375, "learning_rate": 1.9999471944344356e-05, "loss": 2.3994, "step": 10225 }, { "epoch": 0.13, "grad_norm": 6.004275798797607, "learning_rate": 1.999947086393339e-05, "loss": 2.7067, "step": 10226 }, { "epoch": 0.13, "grad_norm": 5.368735313415527, "learning_rate": 1.999946978241831e-05, "loss": 2.344, "step": 10227 }, { "epoch": 0.13, "grad_norm": 5.965420722961426, "learning_rate": 1.999946869979912e-05, "loss": 2.6155, "step": 10228 }, { "epoch": 0.13, "grad_norm": 5.805778980255127, "learning_rate": 1.9999467616075814e-05, "loss": 2.5056, "step": 10229 }, { "epoch": 0.13, "grad_norm": 4.796544075012207, "learning_rate": 1.9999466531248406e-05, "loss": 2.0529, "step": 10230 }, { "epoch": 0.13, "grad_norm": 4.380945205688477, "learning_rate": 1.999946544531688e-05, "loss": 1.8961, "step": 10231 }, { "epoch": 0.13, "grad_norm": 5.089148044586182, "learning_rate": 1.9999464358281247e-05, "loss": 2.1175, "step": 10232 }, { "epoch": 0.13, "grad_norm": 4.624680995941162, "learning_rate": 1.9999463270141503e-05, "loss": 2.2261, "step": 10233 }, { "epoch": 0.13, "grad_norm": 4.665546417236328, "learning_rate": 1.999946218089765e-05, "loss": 2.3881, "step": 10234 }, { "epoch": 0.13, "grad_norm": 4.726602077484131, "learning_rate": 1.9999461090549683e-05, "loss": 2.1178, "step": 10235 }, { "epoch": 0.13, "grad_norm": 5.350476264953613, "learning_rate": 1.999945999909761e-05, "loss": 2.4765, "step": 10236 }, { "epoch": 0.13, "grad_norm": 5.457244873046875, "learning_rate": 1.9999458906541425e-05, "loss": 2.3239, "step": 10237 }, { "epoch": 0.13, "grad_norm": 4.5922064781188965, "learning_rate": 1.999945781288113e-05, "loss": 2.303, "step": 10238 }, { "epoch": 0.13, "grad_norm": 4.731668949127197, "learning_rate": 1.9999456718116725e-05, "loss": 1.9747, "step": 10239 }, { "epoch": 0.13, "grad_norm": 5.559168815612793, "learning_rate": 1.999945562224821e-05, "loss": 2.5129, "step": 10240 }, { "epoch": 0.13, "grad_norm": 5.662050724029541, "learning_rate": 1.9999454525275585e-05, "loss": 2.5714, "step": 10241 }, { "epoch": 0.13, "grad_norm": 5.569206237792969, "learning_rate": 1.9999453427198853e-05, "loss": 2.4938, "step": 10242 }, { "epoch": 0.13, "grad_norm": 5.236237049102783, "learning_rate": 1.999945232801801e-05, "loss": 2.4524, "step": 10243 }, { "epoch": 0.13, "grad_norm": 4.849227428436279, "learning_rate": 1.9999451227733058e-05, "loss": 2.4369, "step": 10244 }, { "epoch": 0.13, "grad_norm": 6.004542350769043, "learning_rate": 1.9999450126343997e-05, "loss": 2.6703, "step": 10245 }, { "epoch": 0.13, "grad_norm": 5.254395484924316, "learning_rate": 1.9999449023850826e-05, "loss": 2.5639, "step": 10246 }, { "epoch": 0.13, "grad_norm": 5.65567684173584, "learning_rate": 1.999944792025355e-05, "loss": 2.2545, "step": 10247 }, { "epoch": 0.13, "grad_norm": 5.055978775024414, "learning_rate": 1.999944681555216e-05, "loss": 2.6118, "step": 10248 }, { "epoch": 0.13, "grad_norm": 6.213339328765869, "learning_rate": 1.9999445709746664e-05, "loss": 2.7836, "step": 10249 }, { "epoch": 0.13, "grad_norm": 4.618037700653076, "learning_rate": 1.9999444602837063e-05, "loss": 1.953, "step": 10250 }, { "epoch": 0.13, "grad_norm": 5.1914143562316895, "learning_rate": 1.9999443494823347e-05, "loss": 2.7889, "step": 10251 }, { "epoch": 0.13, "grad_norm": 5.446054458618164, "learning_rate": 1.9999442385705524e-05, "loss": 2.2536, "step": 10252 }, { "epoch": 0.13, "grad_norm": 5.90007209777832, "learning_rate": 1.9999441275483598e-05, "loss": 2.8049, "step": 10253 }, { "epoch": 0.13, "grad_norm": 5.724555015563965, "learning_rate": 1.9999440164157563e-05, "loss": 2.5785, "step": 10254 }, { "epoch": 0.13, "grad_norm": 5.065841197967529, "learning_rate": 1.9999439051727415e-05, "loss": 2.4554, "step": 10255 }, { "epoch": 0.13, "grad_norm": 5.454145908355713, "learning_rate": 1.9999437938193162e-05, "loss": 2.2742, "step": 10256 }, { "epoch": 0.13, "grad_norm": 5.086942672729492, "learning_rate": 1.9999436823554802e-05, "loss": 2.2581, "step": 10257 }, { "epoch": 0.13, "grad_norm": 5.17227840423584, "learning_rate": 1.9999435707812335e-05, "loss": 2.8759, "step": 10258 }, { "epoch": 0.13, "grad_norm": 5.2287492752075195, "learning_rate": 1.999943459096576e-05, "loss": 2.1276, "step": 10259 }, { "epoch": 0.13, "grad_norm": 4.516557216644287, "learning_rate": 1.9999433473015077e-05, "loss": 2.1341, "step": 10260 }, { "epoch": 0.13, "grad_norm": 4.7225494384765625, "learning_rate": 1.999943235396029e-05, "loss": 2.6314, "step": 10261 }, { "epoch": 0.13, "grad_norm": 4.609950542449951, "learning_rate": 1.999943123380139e-05, "loss": 2.2791, "step": 10262 }, { "epoch": 0.13, "grad_norm": 5.458486557006836, "learning_rate": 1.999943011253839e-05, "loss": 2.4416, "step": 10263 }, { "epoch": 0.13, "grad_norm": 4.971062183380127, "learning_rate": 1.999942899017128e-05, "loss": 2.1081, "step": 10264 }, { "epoch": 0.13, "grad_norm": 5.510566234588623, "learning_rate": 1.9999427866700064e-05, "loss": 2.5048, "step": 10265 }, { "epoch": 0.13, "grad_norm": 5.410811424255371, "learning_rate": 1.999942674212474e-05, "loss": 2.1331, "step": 10266 }, { "epoch": 0.13, "grad_norm": 5.152415752410889, "learning_rate": 1.9999425616445312e-05, "loss": 2.7107, "step": 10267 }, { "epoch": 0.13, "grad_norm": 5.451257228851318, "learning_rate": 1.9999424489661777e-05, "loss": 2.2807, "step": 10268 }, { "epoch": 0.13, "grad_norm": 5.923196792602539, "learning_rate": 1.9999423361774135e-05, "loss": 2.5975, "step": 10269 }, { "epoch": 0.13, "grad_norm": 5.07298469543457, "learning_rate": 1.9999422232782385e-05, "loss": 2.5939, "step": 10270 }, { "epoch": 0.13, "grad_norm": 4.794642448425293, "learning_rate": 1.9999421102686534e-05, "loss": 2.5371, "step": 10271 }, { "epoch": 0.13, "grad_norm": 5.6067633628845215, "learning_rate": 1.9999419971486572e-05, "loss": 2.6962, "step": 10272 }, { "epoch": 0.13, "grad_norm": 5.812892436981201, "learning_rate": 1.999941883918251e-05, "loss": 2.6031, "step": 10273 }, { "epoch": 0.13, "grad_norm": 5.240294933319092, "learning_rate": 1.999941770577434e-05, "loss": 2.437, "step": 10274 }, { "epoch": 0.13, "grad_norm": 4.853691101074219, "learning_rate": 1.9999416571262063e-05, "loss": 2.1893, "step": 10275 }, { "epoch": 0.13, "grad_norm": 5.775742530822754, "learning_rate": 1.9999415435645685e-05, "loss": 2.5253, "step": 10276 }, { "epoch": 0.13, "grad_norm": 5.160867691040039, "learning_rate": 1.9999414298925197e-05, "loss": 2.1641, "step": 10277 }, { "epoch": 0.13, "grad_norm": 5.2486114501953125, "learning_rate": 1.9999413161100607e-05, "loss": 2.7858, "step": 10278 }, { "epoch": 0.13, "grad_norm": 5.542981147766113, "learning_rate": 1.999941202217191e-05, "loss": 2.7713, "step": 10279 }, { "epoch": 0.13, "grad_norm": 5.458410263061523, "learning_rate": 1.999941088213911e-05, "loss": 2.2531, "step": 10280 }, { "epoch": 0.13, "grad_norm": 6.428280353546143, "learning_rate": 1.9999409741002205e-05, "loss": 2.3198, "step": 10281 }, { "epoch": 0.13, "grad_norm": 5.5742716789245605, "learning_rate": 1.9999408598761192e-05, "loss": 2.2953, "step": 10282 }, { "epoch": 0.13, "grad_norm": 5.146677017211914, "learning_rate": 1.9999407455416083e-05, "loss": 2.2257, "step": 10283 }, { "epoch": 0.13, "grad_norm": 5.267866134643555, "learning_rate": 1.9999406310966862e-05, "loss": 2.5669, "step": 10284 }, { "epoch": 0.13, "grad_norm": 5.375489711761475, "learning_rate": 1.999940516541354e-05, "loss": 2.7817, "step": 10285 }, { "epoch": 0.13, "grad_norm": 6.028268814086914, "learning_rate": 1.9999404018756116e-05, "loss": 2.8748, "step": 10286 }, { "epoch": 0.13, "grad_norm": 4.8649091720581055, "learning_rate": 1.9999402870994583e-05, "loss": 2.6261, "step": 10287 }, { "epoch": 0.13, "grad_norm": 5.999342441558838, "learning_rate": 1.999940172212895e-05, "loss": 2.6038, "step": 10288 }, { "epoch": 0.13, "grad_norm": 5.76495885848999, "learning_rate": 1.9999400572159213e-05, "loss": 2.4795, "step": 10289 }, { "epoch": 0.13, "grad_norm": 5.099034786224365, "learning_rate": 1.999939942108537e-05, "loss": 2.2868, "step": 10290 }, { "epoch": 0.13, "grad_norm": 6.072359085083008, "learning_rate": 1.999939826890743e-05, "loss": 2.7, "step": 10291 }, { "epoch": 0.13, "grad_norm": 5.3289690017700195, "learning_rate": 1.999939711562538e-05, "loss": 2.2412, "step": 10292 }, { "epoch": 0.13, "grad_norm": 5.322723388671875, "learning_rate": 1.999939596123923e-05, "loss": 2.3899, "step": 10293 }, { "epoch": 0.13, "grad_norm": 5.600654125213623, "learning_rate": 1.999939480574898e-05, "loss": 2.5849, "step": 10294 }, { "epoch": 0.13, "grad_norm": 5.642651557922363, "learning_rate": 1.999939364915462e-05, "loss": 2.6702, "step": 10295 }, { "epoch": 0.13, "grad_norm": 5.168509006500244, "learning_rate": 1.999939249145616e-05, "loss": 2.5774, "step": 10296 }, { "epoch": 0.13, "grad_norm": 6.11777925491333, "learning_rate": 1.99993913326536e-05, "loss": 2.8901, "step": 10297 }, { "epoch": 0.13, "grad_norm": 5.4072723388671875, "learning_rate": 1.9999390172746935e-05, "loss": 2.4816, "step": 10298 }, { "epoch": 0.13, "grad_norm": 6.710785388946533, "learning_rate": 1.999938901173617e-05, "loss": 3.4807, "step": 10299 }, { "epoch": 0.13, "grad_norm": 5.429654598236084, "learning_rate": 1.9999387849621302e-05, "loss": 2.5531, "step": 10300 }, { "epoch": 0.13, "grad_norm": 4.80813455581665, "learning_rate": 1.999938668640233e-05, "loss": 2.2203, "step": 10301 }, { "epoch": 0.13, "grad_norm": 5.813036918640137, "learning_rate": 1.9999385522079256e-05, "loss": 2.3654, "step": 10302 }, { "epoch": 0.13, "grad_norm": 5.310596466064453, "learning_rate": 1.9999384356652082e-05, "loss": 2.5865, "step": 10303 }, { "epoch": 0.13, "grad_norm": 7.150815486907959, "learning_rate": 1.9999383190120808e-05, "loss": 2.624, "step": 10304 }, { "epoch": 0.13, "grad_norm": 5.6140546798706055, "learning_rate": 1.999938202248543e-05, "loss": 2.6038, "step": 10305 }, { "epoch": 0.13, "grad_norm": 5.649886131286621, "learning_rate": 1.999938085374595e-05, "loss": 2.571, "step": 10306 }, { "epoch": 0.13, "grad_norm": 5.556650638580322, "learning_rate": 1.999937968390237e-05, "loss": 2.8554, "step": 10307 }, { "epoch": 0.13, "grad_norm": 5.429641246795654, "learning_rate": 1.999937851295469e-05, "loss": 2.5661, "step": 10308 }, { "epoch": 0.13, "grad_norm": 5.484854221343994, "learning_rate": 1.9999377340902905e-05, "loss": 2.8432, "step": 10309 }, { "epoch": 0.13, "grad_norm": 5.6541829109191895, "learning_rate": 1.9999376167747023e-05, "loss": 2.7789, "step": 10310 }, { "epoch": 0.13, "grad_norm": 5.529729843139648, "learning_rate": 1.999937499348704e-05, "loss": 2.647, "step": 10311 }, { "epoch": 0.13, "grad_norm": 5.043858528137207, "learning_rate": 1.9999373818122954e-05, "loss": 2.4597, "step": 10312 }, { "epoch": 0.13, "grad_norm": 5.306018829345703, "learning_rate": 1.999937264165477e-05, "loss": 2.6363, "step": 10313 }, { "epoch": 0.13, "grad_norm": 5.359303951263428, "learning_rate": 1.9999371464082485e-05, "loss": 1.9811, "step": 10314 }, { "epoch": 0.13, "grad_norm": 6.095974445343018, "learning_rate": 1.9999370285406096e-05, "loss": 2.1728, "step": 10315 }, { "epoch": 0.13, "grad_norm": 4.866225242614746, "learning_rate": 1.9999369105625614e-05, "loss": 2.5017, "step": 10316 }, { "epoch": 0.13, "grad_norm": 5.544230937957764, "learning_rate": 1.9999367924741024e-05, "loss": 2.5621, "step": 10317 }, { "epoch": 0.13, "grad_norm": 5.538342475891113, "learning_rate": 1.999936674275234e-05, "loss": 2.7215, "step": 10318 }, { "epoch": 0.13, "grad_norm": 5.029030799865723, "learning_rate": 1.9999365559659555e-05, "loss": 2.2115, "step": 10319 }, { "epoch": 0.13, "grad_norm": 5.1282219886779785, "learning_rate": 1.999936437546267e-05, "loss": 2.3642, "step": 10320 }, { "epoch": 0.13, "grad_norm": 5.000607013702393, "learning_rate": 1.9999363190161684e-05, "loss": 2.6247, "step": 10321 }, { "epoch": 0.13, "grad_norm": 4.828582286834717, "learning_rate": 1.99993620037566e-05, "loss": 2.3453, "step": 10322 }, { "epoch": 0.13, "grad_norm": 4.646968364715576, "learning_rate": 1.9999360816247417e-05, "loss": 2.1776, "step": 10323 }, { "epoch": 0.13, "grad_norm": 4.1287736892700195, "learning_rate": 1.9999359627634133e-05, "loss": 2.2702, "step": 10324 }, { "epoch": 0.13, "grad_norm": 4.481476306915283, "learning_rate": 1.9999358437916754e-05, "loss": 1.8233, "step": 10325 }, { "epoch": 0.13, "grad_norm": 5.205948829650879, "learning_rate": 1.9999357247095275e-05, "loss": 2.6052, "step": 10326 }, { "epoch": 0.13, "grad_norm": 4.647755146026611, "learning_rate": 1.9999356055169695e-05, "loss": 2.3089, "step": 10327 }, { "epoch": 0.13, "grad_norm": 5.843029975891113, "learning_rate": 1.9999354862140018e-05, "loss": 2.7799, "step": 10328 }, { "epoch": 0.13, "grad_norm": 4.848170757293701, "learning_rate": 1.999935366800624e-05, "loss": 2.3708, "step": 10329 }, { "epoch": 0.13, "grad_norm": 5.392114639282227, "learning_rate": 1.999935247276837e-05, "loss": 2.3656, "step": 10330 }, { "epoch": 0.13, "grad_norm": 4.887248992919922, "learning_rate": 1.9999351276426397e-05, "loss": 2.3236, "step": 10331 }, { "epoch": 0.13, "grad_norm": 5.039449691772461, "learning_rate": 1.9999350078980324e-05, "loss": 2.6251, "step": 10332 }, { "epoch": 0.13, "grad_norm": 6.163435459136963, "learning_rate": 1.9999348880430157e-05, "loss": 2.8364, "step": 10333 }, { "epoch": 0.13, "grad_norm": 4.960732460021973, "learning_rate": 1.9999347680775893e-05, "loss": 2.1026, "step": 10334 }, { "epoch": 0.13, "grad_norm": 4.439153671264648, "learning_rate": 1.999934648001753e-05, "loss": 1.8395, "step": 10335 }, { "epoch": 0.13, "grad_norm": 5.315093040466309, "learning_rate": 1.9999345278155066e-05, "loss": 2.0475, "step": 10336 }, { "epoch": 0.13, "grad_norm": 5.615527629852295, "learning_rate": 1.999934407518851e-05, "loss": 2.7386, "step": 10337 }, { "epoch": 0.13, "grad_norm": 6.026104927062988, "learning_rate": 1.9999342871117853e-05, "loss": 2.5358, "step": 10338 }, { "epoch": 0.13, "grad_norm": 5.490182399749756, "learning_rate": 1.9999341665943103e-05, "loss": 2.4996, "step": 10339 }, { "epoch": 0.13, "grad_norm": 5.062554359436035, "learning_rate": 1.999934045966425e-05, "loss": 2.1407, "step": 10340 }, { "epoch": 0.13, "grad_norm": 6.213738441467285, "learning_rate": 1.9999339252281306e-05, "loss": 2.525, "step": 10341 }, { "epoch": 0.13, "grad_norm": 5.172341346740723, "learning_rate": 1.9999338043794264e-05, "loss": 2.5504, "step": 10342 }, { "epoch": 0.13, "grad_norm": 4.94805383682251, "learning_rate": 1.9999336834203124e-05, "loss": 2.0848, "step": 10343 }, { "epoch": 0.13, "grad_norm": 5.5234150886535645, "learning_rate": 1.9999335623507887e-05, "loss": 2.9212, "step": 10344 }, { "epoch": 0.13, "grad_norm": 5.673065185546875, "learning_rate": 1.9999334411708556e-05, "loss": 2.2565, "step": 10345 }, { "epoch": 0.13, "grad_norm": 4.684093952178955, "learning_rate": 1.9999333198805125e-05, "loss": 2.0007, "step": 10346 }, { "epoch": 0.13, "grad_norm": 5.00786828994751, "learning_rate": 1.99993319847976e-05, "loss": 2.2212, "step": 10347 }, { "epoch": 0.13, "grad_norm": 5.551456928253174, "learning_rate": 1.999933076968598e-05, "loss": 2.4842, "step": 10348 }, { "epoch": 0.13, "grad_norm": 5.6909589767456055, "learning_rate": 1.9999329553470263e-05, "loss": 2.2006, "step": 10349 }, { "epoch": 0.13, "grad_norm": 5.2439374923706055, "learning_rate": 1.9999328336150453e-05, "loss": 2.5223, "step": 10350 }, { "epoch": 0.13, "grad_norm": 5.060962200164795, "learning_rate": 1.9999327117726545e-05, "loss": 2.4563, "step": 10351 }, { "epoch": 0.13, "grad_norm": 5.349265098571777, "learning_rate": 1.9999325898198543e-05, "loss": 2.2495, "step": 10352 }, { "epoch": 0.13, "grad_norm": 5.364990711212158, "learning_rate": 1.9999324677566445e-05, "loss": 2.2828, "step": 10353 }, { "epoch": 0.13, "grad_norm": 4.887967109680176, "learning_rate": 1.999932345583025e-05, "loss": 1.9805, "step": 10354 }, { "epoch": 0.13, "grad_norm": 6.090427875518799, "learning_rate": 1.9999322232989965e-05, "loss": 2.6941, "step": 10355 }, { "epoch": 0.13, "grad_norm": 5.720134735107422, "learning_rate": 1.999932100904558e-05, "loss": 2.6812, "step": 10356 }, { "epoch": 0.13, "grad_norm": 5.0057854652404785, "learning_rate": 1.9999319783997103e-05, "loss": 2.2736, "step": 10357 }, { "epoch": 0.13, "grad_norm": 6.276862144470215, "learning_rate": 1.999931855784453e-05, "loss": 2.3247, "step": 10358 }, { "epoch": 0.13, "grad_norm": 5.824465751647949, "learning_rate": 1.9999317330587866e-05, "loss": 2.5628, "step": 10359 }, { "epoch": 0.13, "grad_norm": 5.3733720779418945, "learning_rate": 1.9999316102227106e-05, "loss": 2.5827, "step": 10360 }, { "epoch": 0.13, "grad_norm": 4.345843315124512, "learning_rate": 1.999931487276225e-05, "loss": 2.2134, "step": 10361 }, { "epoch": 0.13, "grad_norm": 6.126992225646973, "learning_rate": 1.99993136421933e-05, "loss": 2.3025, "step": 10362 }, { "epoch": 0.13, "grad_norm": 4.9628400802612305, "learning_rate": 1.9999312410520257e-05, "loss": 2.5944, "step": 10363 }, { "epoch": 0.13, "grad_norm": 5.256290912628174, "learning_rate": 1.9999311177743122e-05, "loss": 2.616, "step": 10364 }, { "epoch": 0.13, "grad_norm": 5.971593856811523, "learning_rate": 1.999930994386189e-05, "loss": 2.941, "step": 10365 }, { "epoch": 0.13, "grad_norm": 5.537909030914307, "learning_rate": 1.9999308708876566e-05, "loss": 2.9914, "step": 10366 }, { "epoch": 0.13, "grad_norm": 5.261909008026123, "learning_rate": 1.999930747278715e-05, "loss": 2.2098, "step": 10367 }, { "epoch": 0.13, "grad_norm": 4.847784042358398, "learning_rate": 1.9999306235593642e-05, "loss": 2.3405, "step": 10368 }, { "epoch": 0.13, "grad_norm": 5.210468769073486, "learning_rate": 1.9999304997296038e-05, "loss": 2.4305, "step": 10369 }, { "epoch": 0.13, "grad_norm": 4.977744102478027, "learning_rate": 1.9999303757894342e-05, "loss": 2.3256, "step": 10370 }, { "epoch": 0.13, "grad_norm": 4.2636637687683105, "learning_rate": 1.9999302517388553e-05, "loss": 1.7605, "step": 10371 }, { "epoch": 0.13, "grad_norm": 5.101670742034912, "learning_rate": 1.999930127577867e-05, "loss": 2.4992, "step": 10372 }, { "epoch": 0.13, "grad_norm": 5.07343053817749, "learning_rate": 1.99993000330647e-05, "loss": 2.2819, "step": 10373 }, { "epoch": 0.13, "grad_norm": 5.462769508361816, "learning_rate": 1.9999298789246633e-05, "loss": 2.3256, "step": 10374 }, { "epoch": 0.13, "grad_norm": 5.470128536224365, "learning_rate": 1.9999297544324472e-05, "loss": 2.7423, "step": 10375 }, { "epoch": 0.13, "grad_norm": 5.3418450355529785, "learning_rate": 1.9999296298298223e-05, "loss": 2.3711, "step": 10376 }, { "epoch": 0.13, "grad_norm": 4.648618698120117, "learning_rate": 1.999929505116788e-05, "loss": 1.6917, "step": 10377 }, { "epoch": 0.13, "grad_norm": 5.102953910827637, "learning_rate": 1.9999293802933445e-05, "loss": 2.2818, "step": 10378 }, { "epoch": 0.13, "grad_norm": 4.908062934875488, "learning_rate": 1.9999292553594922e-05, "loss": 2.5077, "step": 10379 }, { "epoch": 0.13, "grad_norm": 4.950351715087891, "learning_rate": 1.99992913031523e-05, "loss": 2.1382, "step": 10380 }, { "epoch": 0.13, "grad_norm": 4.73634672164917, "learning_rate": 1.9999290051605597e-05, "loss": 2.0939, "step": 10381 }, { "epoch": 0.13, "grad_norm": 6.469321250915527, "learning_rate": 1.9999288798954795e-05, "loss": 2.8299, "step": 10382 }, { "epoch": 0.13, "grad_norm": 5.165849208831787, "learning_rate": 1.9999287545199903e-05, "loss": 2.3979, "step": 10383 }, { "epoch": 0.13, "grad_norm": 5.611594200134277, "learning_rate": 1.999928629034092e-05, "loss": 2.2496, "step": 10384 }, { "epoch": 0.13, "grad_norm": 6.06630277633667, "learning_rate": 1.9999285034377848e-05, "loss": 2.5989, "step": 10385 }, { "epoch": 0.13, "grad_norm": 5.549764156341553, "learning_rate": 1.9999283777310684e-05, "loss": 2.5473, "step": 10386 }, { "epoch": 0.13, "grad_norm": 5.40296745300293, "learning_rate": 1.999928251913943e-05, "loss": 2.6117, "step": 10387 }, { "epoch": 0.13, "grad_norm": 5.457500457763672, "learning_rate": 1.9999281259864082e-05, "loss": 2.4394, "step": 10388 }, { "epoch": 0.13, "grad_norm": 5.274238586425781, "learning_rate": 1.999927999948465e-05, "loss": 2.4635, "step": 10389 }, { "epoch": 0.13, "grad_norm": 5.530942440032959, "learning_rate": 1.9999278738001125e-05, "loss": 2.5024, "step": 10390 }, { "epoch": 0.13, "grad_norm": 4.883192539215088, "learning_rate": 1.999927747541351e-05, "loss": 2.2621, "step": 10391 }, { "epoch": 0.13, "grad_norm": 4.8345112800598145, "learning_rate": 1.9999276211721802e-05, "loss": 1.8801, "step": 10392 }, { "epoch": 0.13, "grad_norm": 5.2364044189453125, "learning_rate": 1.9999274946926008e-05, "loss": 2.7806, "step": 10393 }, { "epoch": 0.13, "grad_norm": 5.907235145568848, "learning_rate": 1.9999273681026124e-05, "loss": 2.394, "step": 10394 }, { "epoch": 0.13, "grad_norm": 5.306849479675293, "learning_rate": 1.999927241402215e-05, "loss": 2.3627, "step": 10395 }, { "epoch": 0.13, "grad_norm": 5.423491954803467, "learning_rate": 1.9999271145914084e-05, "loss": 2.2658, "step": 10396 }, { "epoch": 0.13, "grad_norm": 5.237621307373047, "learning_rate": 1.999926987670193e-05, "loss": 2.1703, "step": 10397 }, { "epoch": 0.13, "grad_norm": 4.863316059112549, "learning_rate": 1.9999268606385692e-05, "loss": 2.1353, "step": 10398 }, { "epoch": 0.13, "grad_norm": 5.1324143409729, "learning_rate": 1.9999267334965362e-05, "loss": 2.4249, "step": 10399 }, { "epoch": 0.13, "grad_norm": 5.670971870422363, "learning_rate": 1.999926606244094e-05, "loss": 2.5071, "step": 10400 }, { "epoch": 0.13, "grad_norm": 5.2930908203125, "learning_rate": 1.9999264788812437e-05, "loss": 2.4582, "step": 10401 }, { "epoch": 0.13, "grad_norm": 5.319901466369629, "learning_rate": 1.999926351407984e-05, "loss": 2.6245, "step": 10402 }, { "epoch": 0.14, "grad_norm": 5.808530807495117, "learning_rate": 1.9999262238243154e-05, "loss": 2.617, "step": 10403 }, { "epoch": 0.14, "grad_norm": 5.024529933929443, "learning_rate": 1.9999260961302382e-05, "loss": 2.5252, "step": 10404 }, { "epoch": 0.14, "grad_norm": 5.142290115356445, "learning_rate": 1.999925968325752e-05, "loss": 2.8833, "step": 10405 }, { "epoch": 0.14, "grad_norm": 5.32914924621582, "learning_rate": 1.9999258404108572e-05, "loss": 2.3745, "step": 10406 }, { "epoch": 0.14, "grad_norm": 5.600548267364502, "learning_rate": 1.9999257123855535e-05, "loss": 2.4083, "step": 10407 }, { "epoch": 0.14, "grad_norm": 5.55905818939209, "learning_rate": 1.9999255842498414e-05, "loss": 2.6613, "step": 10408 }, { "epoch": 0.14, "grad_norm": 5.014645099639893, "learning_rate": 1.99992545600372e-05, "loss": 2.1666, "step": 10409 }, { "epoch": 0.14, "grad_norm": 4.766005992889404, "learning_rate": 1.99992532764719e-05, "loss": 2.2546, "step": 10410 }, { "epoch": 0.14, "grad_norm": 4.899058818817139, "learning_rate": 1.9999251991802516e-05, "loss": 2.3738, "step": 10411 }, { "epoch": 0.14, "grad_norm": 4.797243118286133, "learning_rate": 1.9999250706029043e-05, "loss": 2.2926, "step": 10412 }, { "epoch": 0.14, "grad_norm": 5.728796005249023, "learning_rate": 1.9999249419151483e-05, "loss": 2.9648, "step": 10413 }, { "epoch": 0.14, "grad_norm": 4.861010551452637, "learning_rate": 1.9999248131169836e-05, "loss": 2.655, "step": 10414 }, { "epoch": 0.14, "grad_norm": 6.208797454833984, "learning_rate": 1.9999246842084102e-05, "loss": 2.0583, "step": 10415 }, { "epoch": 0.14, "grad_norm": 5.032931804656982, "learning_rate": 1.999924555189428e-05, "loss": 2.1614, "step": 10416 }, { "epoch": 0.14, "grad_norm": 5.421762943267822, "learning_rate": 1.9999244260600376e-05, "loss": 2.3672, "step": 10417 }, { "epoch": 0.14, "grad_norm": 5.356228351593018, "learning_rate": 1.9999242968202384e-05, "loss": 2.4267, "step": 10418 }, { "epoch": 0.14, "grad_norm": 5.439685821533203, "learning_rate": 1.9999241674700305e-05, "loss": 2.5925, "step": 10419 }, { "epoch": 0.14, "grad_norm": 5.490311622619629, "learning_rate": 1.9999240380094142e-05, "loss": 1.8084, "step": 10420 }, { "epoch": 0.14, "grad_norm": 4.9602460861206055, "learning_rate": 1.999923908438389e-05, "loss": 2.0966, "step": 10421 }, { "epoch": 0.14, "grad_norm": 4.955573081970215, "learning_rate": 1.9999237787569555e-05, "loss": 2.4342, "step": 10422 }, { "epoch": 0.14, "grad_norm": 4.886013031005859, "learning_rate": 1.9999236489651134e-05, "loss": 2.4538, "step": 10423 }, { "epoch": 0.14, "grad_norm": 5.409223556518555, "learning_rate": 1.9999235190628626e-05, "loss": 2.3367, "step": 10424 }, { "epoch": 0.14, "grad_norm": 4.523763656616211, "learning_rate": 1.999923389050203e-05, "loss": 2.1321, "step": 10425 }, { "epoch": 0.14, "grad_norm": 4.583577632904053, "learning_rate": 1.9999232589271356e-05, "loss": 2.0524, "step": 10426 }, { "epoch": 0.14, "grad_norm": 4.6510539054870605, "learning_rate": 1.9999231286936593e-05, "loss": 1.972, "step": 10427 }, { "epoch": 0.14, "grad_norm": 5.859370231628418, "learning_rate": 1.9999229983497747e-05, "loss": 2.2455, "step": 10428 }, { "epoch": 0.14, "grad_norm": 5.1397929191589355, "learning_rate": 1.9999228678954817e-05, "loss": 2.7802, "step": 10429 }, { "epoch": 0.14, "grad_norm": 5.321130752563477, "learning_rate": 1.99992273733078e-05, "loss": 2.4834, "step": 10430 }, { "epoch": 0.14, "grad_norm": 5.417185306549072, "learning_rate": 1.99992260665567e-05, "loss": 3.1902, "step": 10431 }, { "epoch": 0.14, "grad_norm": 5.510776042938232, "learning_rate": 1.9999224758701514e-05, "loss": 2.3033, "step": 10432 }, { "epoch": 0.14, "grad_norm": 5.86580228805542, "learning_rate": 1.9999223449742246e-05, "loss": 2.7303, "step": 10433 }, { "epoch": 0.14, "grad_norm": 4.8305840492248535, "learning_rate": 1.9999222139678894e-05, "loss": 2.0149, "step": 10434 }, { "epoch": 0.14, "grad_norm": 5.0529046058654785, "learning_rate": 1.999922082851146e-05, "loss": 2.0936, "step": 10435 }, { "epoch": 0.14, "grad_norm": 5.4293928146362305, "learning_rate": 1.999921951623994e-05, "loss": 3.1497, "step": 10436 }, { "epoch": 0.14, "grad_norm": 6.687467098236084, "learning_rate": 1.9999218202864336e-05, "loss": 2.9453, "step": 10437 }, { "epoch": 0.14, "grad_norm": 5.722705364227295, "learning_rate": 1.999921688838465e-05, "loss": 2.2961, "step": 10438 }, { "epoch": 0.14, "grad_norm": 6.892923355102539, "learning_rate": 1.9999215572800878e-05, "loss": 2.5291, "step": 10439 }, { "epoch": 0.14, "grad_norm": 4.458103656768799, "learning_rate": 1.9999214256113024e-05, "loss": 1.9176, "step": 10440 }, { "epoch": 0.14, "grad_norm": 5.640517711639404, "learning_rate": 1.9999212938321092e-05, "loss": 2.9253, "step": 10441 }, { "epoch": 0.14, "grad_norm": 5.026042461395264, "learning_rate": 1.999921161942507e-05, "loss": 2.7687, "step": 10442 }, { "epoch": 0.14, "grad_norm": 4.66480827331543, "learning_rate": 1.9999210299424972e-05, "loss": 2.2317, "step": 10443 }, { "epoch": 0.14, "grad_norm": 5.187930107116699, "learning_rate": 1.9999208978320786e-05, "loss": 2.3891, "step": 10444 }, { "epoch": 0.14, "grad_norm": 5.630730152130127, "learning_rate": 1.9999207656112523e-05, "loss": 2.4724, "step": 10445 }, { "epoch": 0.14, "grad_norm": 5.386885166168213, "learning_rate": 1.9999206332800173e-05, "loss": 2.3055, "step": 10446 }, { "epoch": 0.14, "grad_norm": 4.467687129974365, "learning_rate": 1.9999205008383743e-05, "loss": 1.9605, "step": 10447 }, { "epoch": 0.14, "grad_norm": 4.893235683441162, "learning_rate": 1.9999203682863232e-05, "loss": 2.1091, "step": 10448 }, { "epoch": 0.14, "grad_norm": 5.759307384490967, "learning_rate": 1.9999202356238638e-05, "loss": 2.6697, "step": 10449 }, { "epoch": 0.14, "grad_norm": 6.160223960876465, "learning_rate": 1.9999201028509963e-05, "loss": 2.533, "step": 10450 }, { "epoch": 0.14, "grad_norm": 5.274964809417725, "learning_rate": 1.9999199699677208e-05, "loss": 2.629, "step": 10451 }, { "epoch": 0.14, "grad_norm": 5.078407287597656, "learning_rate": 1.9999198369740365e-05, "loss": 2.2314, "step": 10452 }, { "epoch": 0.14, "grad_norm": 5.059476375579834, "learning_rate": 1.999919703869945e-05, "loss": 2.4027, "step": 10453 }, { "epoch": 0.14, "grad_norm": 5.238671779632568, "learning_rate": 1.999919570655445e-05, "loss": 2.1966, "step": 10454 }, { "epoch": 0.14, "grad_norm": 6.147860050201416, "learning_rate": 1.9999194373305367e-05, "loss": 2.7311, "step": 10455 }, { "epoch": 0.14, "grad_norm": 5.559408664703369, "learning_rate": 1.9999193038952207e-05, "loss": 2.3637, "step": 10456 }, { "epoch": 0.14, "grad_norm": 4.571577072143555, "learning_rate": 1.9999191703494966e-05, "loss": 2.3733, "step": 10457 }, { "epoch": 0.14, "grad_norm": 5.38240909576416, "learning_rate": 1.9999190366933642e-05, "loss": 2.8428, "step": 10458 }, { "epoch": 0.14, "grad_norm": 4.875784397125244, "learning_rate": 1.9999189029268237e-05, "loss": 2.1618, "step": 10459 }, { "epoch": 0.14, "grad_norm": 4.763365745544434, "learning_rate": 1.9999187690498756e-05, "loss": 2.4886, "step": 10460 }, { "epoch": 0.14, "grad_norm": 4.47808313369751, "learning_rate": 1.9999186350625194e-05, "loss": 2.049, "step": 10461 }, { "epoch": 0.14, "grad_norm": 4.457658290863037, "learning_rate": 1.999918500964755e-05, "loss": 1.8962, "step": 10462 }, { "epoch": 0.14, "grad_norm": 5.038662433624268, "learning_rate": 1.999918366756583e-05, "loss": 2.2203, "step": 10463 }, { "epoch": 0.14, "grad_norm": 4.835596561431885, "learning_rate": 1.9999182324380026e-05, "loss": 2.2153, "step": 10464 }, { "epoch": 0.14, "grad_norm": 5.173667907714844, "learning_rate": 1.9999180980090146e-05, "loss": 2.2284, "step": 10465 }, { "epoch": 0.14, "grad_norm": 5.334711074829102, "learning_rate": 1.9999179634696183e-05, "loss": 2.2131, "step": 10466 }, { "epoch": 0.14, "grad_norm": 5.805552005767822, "learning_rate": 1.9999178288198145e-05, "loss": 2.8426, "step": 10467 }, { "epoch": 0.14, "grad_norm": 4.6071553230285645, "learning_rate": 1.9999176940596028e-05, "loss": 2.3002, "step": 10468 }, { "epoch": 0.14, "grad_norm": 6.208066463470459, "learning_rate": 1.999917559188983e-05, "loss": 2.9126, "step": 10469 }, { "epoch": 0.14, "grad_norm": 5.122445583343506, "learning_rate": 1.9999174242079555e-05, "loss": 2.2036, "step": 10470 }, { "epoch": 0.14, "grad_norm": 5.686153888702393, "learning_rate": 1.99991728911652e-05, "loss": 2.1884, "step": 10471 }, { "epoch": 0.14, "grad_norm": 5.001164436340332, "learning_rate": 1.9999171539146768e-05, "loss": 2.0432, "step": 10472 }, { "epoch": 0.14, "grad_norm": 4.816197872161865, "learning_rate": 1.9999170186024256e-05, "loss": 1.979, "step": 10473 }, { "epoch": 0.14, "grad_norm": 5.983483791351318, "learning_rate": 1.999916883179767e-05, "loss": 2.5017, "step": 10474 }, { "epoch": 0.14, "grad_norm": 5.300493240356445, "learning_rate": 1.9999167476467e-05, "loss": 2.3137, "step": 10475 }, { "epoch": 0.14, "grad_norm": 5.001335144042969, "learning_rate": 1.999916612003226e-05, "loss": 2.1423, "step": 10476 }, { "epoch": 0.14, "grad_norm": 5.92333984375, "learning_rate": 1.9999164762493436e-05, "loss": 3.0602, "step": 10477 }, { "epoch": 0.14, "grad_norm": 4.74050235748291, "learning_rate": 1.9999163403850536e-05, "loss": 1.9946, "step": 10478 }, { "epoch": 0.14, "grad_norm": 5.460805892944336, "learning_rate": 1.9999162044103558e-05, "loss": 2.4529, "step": 10479 }, { "epoch": 0.14, "grad_norm": 5.065199375152588, "learning_rate": 1.9999160683252507e-05, "loss": 2.771, "step": 10480 }, { "epoch": 0.14, "grad_norm": 6.171753406524658, "learning_rate": 1.9999159321297376e-05, "loss": 2.8479, "step": 10481 }, { "epoch": 0.14, "grad_norm": 5.98696756362915, "learning_rate": 1.9999157958238167e-05, "loss": 2.7112, "step": 10482 }, { "epoch": 0.14, "grad_norm": 6.866856575012207, "learning_rate": 1.9999156594074882e-05, "loss": 2.6046, "step": 10483 }, { "epoch": 0.14, "grad_norm": 5.770179271697998, "learning_rate": 1.9999155228807523e-05, "loss": 2.922, "step": 10484 }, { "epoch": 0.14, "grad_norm": 5.010042190551758, "learning_rate": 1.9999153862436083e-05, "loss": 2.4235, "step": 10485 }, { "epoch": 0.14, "grad_norm": 5.219456195831299, "learning_rate": 1.999915249496057e-05, "loss": 2.3963, "step": 10486 }, { "epoch": 0.14, "grad_norm": 5.566780090332031, "learning_rate": 1.999915112638098e-05, "loss": 2.87, "step": 10487 }, { "epoch": 0.14, "grad_norm": 5.184285640716553, "learning_rate": 1.9999149756697317e-05, "loss": 2.3143, "step": 10488 }, { "epoch": 0.14, "grad_norm": 4.96247673034668, "learning_rate": 1.9999148385909574e-05, "loss": 2.1086, "step": 10489 }, { "epoch": 0.14, "grad_norm": 4.97116756439209, "learning_rate": 1.999914701401776e-05, "loss": 1.7899, "step": 10490 }, { "epoch": 0.14, "grad_norm": 4.816816329956055, "learning_rate": 1.9999145641021866e-05, "loss": 2.2903, "step": 10491 }, { "epoch": 0.14, "grad_norm": 4.562138080596924, "learning_rate": 1.9999144266921898e-05, "loss": 2.1073, "step": 10492 }, { "epoch": 0.14, "grad_norm": 5.816997051239014, "learning_rate": 1.9999142891717857e-05, "loss": 2.9247, "step": 10493 }, { "epoch": 0.14, "grad_norm": 4.872710704803467, "learning_rate": 1.9999141515409738e-05, "loss": 1.9407, "step": 10494 }, { "epoch": 0.14, "grad_norm": 5.187795639038086, "learning_rate": 1.9999140137997543e-05, "loss": 2.5757, "step": 10495 }, { "epoch": 0.14, "grad_norm": 4.885178565979004, "learning_rate": 1.9999138759481278e-05, "loss": 2.4755, "step": 10496 }, { "epoch": 0.14, "grad_norm": 5.160801887512207, "learning_rate": 1.9999137379860935e-05, "loss": 2.1401, "step": 10497 }, { "epoch": 0.14, "grad_norm": 5.418368816375732, "learning_rate": 1.999913599913652e-05, "loss": 2.6779, "step": 10498 }, { "epoch": 0.14, "grad_norm": 5.2733917236328125, "learning_rate": 1.999913461730803e-05, "loss": 2.3648, "step": 10499 }, { "epoch": 0.14, "grad_norm": 5.5239691734313965, "learning_rate": 1.9999133234375467e-05, "loss": 2.9284, "step": 10500 }, { "epoch": 0.14, "grad_norm": 4.607630252838135, "learning_rate": 1.9999131850338823e-05, "loss": 2.1198, "step": 10501 }, { "epoch": 0.14, "grad_norm": 4.432924270629883, "learning_rate": 1.9999130465198113e-05, "loss": 1.7211, "step": 10502 }, { "epoch": 0.14, "grad_norm": 5.373757839202881, "learning_rate": 1.9999129078953326e-05, "loss": 2.8674, "step": 10503 }, { "epoch": 0.14, "grad_norm": 4.682501792907715, "learning_rate": 1.999912769160447e-05, "loss": 2.2934, "step": 10504 }, { "epoch": 0.14, "grad_norm": 5.065104007720947, "learning_rate": 1.9999126303151534e-05, "loss": 2.4992, "step": 10505 }, { "epoch": 0.14, "grad_norm": 4.930964946746826, "learning_rate": 1.999912491359453e-05, "loss": 2.0413, "step": 10506 }, { "epoch": 0.14, "grad_norm": 5.5449137687683105, "learning_rate": 1.999912352293345e-05, "loss": 2.5357, "step": 10507 }, { "epoch": 0.14, "grad_norm": 4.288496494293213, "learning_rate": 1.9999122131168297e-05, "loss": 2.1826, "step": 10508 }, { "epoch": 0.14, "grad_norm": 5.283101558685303, "learning_rate": 1.9999120738299075e-05, "loss": 2.5525, "step": 10509 }, { "epoch": 0.14, "grad_norm": 5.671206951141357, "learning_rate": 1.9999119344325776e-05, "loss": 2.5471, "step": 10510 }, { "epoch": 0.14, "grad_norm": 4.722690105438232, "learning_rate": 1.9999117949248407e-05, "loss": 2.114, "step": 10511 }, { "epoch": 0.14, "grad_norm": 4.973424434661865, "learning_rate": 1.9999116553066965e-05, "loss": 2.2252, "step": 10512 }, { "epoch": 0.14, "grad_norm": 5.226615905761719, "learning_rate": 1.999911515578145e-05, "loss": 2.1309, "step": 10513 }, { "epoch": 0.14, "grad_norm": 5.35310173034668, "learning_rate": 1.9999113757391862e-05, "loss": 2.9211, "step": 10514 }, { "epoch": 0.14, "grad_norm": 5.052610874176025, "learning_rate": 1.9999112357898206e-05, "loss": 2.6215, "step": 10515 }, { "epoch": 0.14, "grad_norm": 4.885446548461914, "learning_rate": 1.9999110957300476e-05, "loss": 1.9888, "step": 10516 }, { "epoch": 0.14, "grad_norm": 4.706874370574951, "learning_rate": 1.9999109555598676e-05, "loss": 2.012, "step": 10517 }, { "epoch": 0.14, "grad_norm": 4.630272388458252, "learning_rate": 1.9999108152792802e-05, "loss": 2.0038, "step": 10518 }, { "epoch": 0.14, "grad_norm": 5.877945899963379, "learning_rate": 1.999910674888286e-05, "loss": 2.4167, "step": 10519 }, { "epoch": 0.14, "grad_norm": 5.6126322746276855, "learning_rate": 1.9999105343868844e-05, "loss": 2.4316, "step": 10520 }, { "epoch": 0.14, "grad_norm": 5.18665075302124, "learning_rate": 1.999910393775076e-05, "loss": 2.228, "step": 10521 }, { "epoch": 0.14, "grad_norm": 4.948860168457031, "learning_rate": 1.9999102530528605e-05, "loss": 2.0819, "step": 10522 }, { "epoch": 0.14, "grad_norm": 5.994711875915527, "learning_rate": 1.9999101122202377e-05, "loss": 2.914, "step": 10523 }, { "epoch": 0.14, "grad_norm": 5.063834190368652, "learning_rate": 1.999909971277208e-05, "loss": 2.1929, "step": 10524 }, { "epoch": 0.14, "grad_norm": 6.1570329666137695, "learning_rate": 1.9999098302237714e-05, "loss": 1.9775, "step": 10525 }, { "epoch": 0.14, "grad_norm": 5.119844436645508, "learning_rate": 1.9999096890599276e-05, "loss": 2.2709, "step": 10526 }, { "epoch": 0.14, "grad_norm": 5.93972110748291, "learning_rate": 1.999909547785677e-05, "loss": 2.4073, "step": 10527 }, { "epoch": 0.14, "grad_norm": 4.755625247955322, "learning_rate": 1.9999094064010195e-05, "loss": 2.0305, "step": 10528 }, { "epoch": 0.14, "grad_norm": 5.703031063079834, "learning_rate": 1.9999092649059546e-05, "loss": 2.6483, "step": 10529 }, { "epoch": 0.14, "grad_norm": 4.904429912567139, "learning_rate": 1.999909123300483e-05, "loss": 2.2139, "step": 10530 }, { "epoch": 0.14, "grad_norm": 5.504095077514648, "learning_rate": 1.9999089815846044e-05, "loss": 2.2554, "step": 10531 }, { "epoch": 0.14, "grad_norm": 5.312086582183838, "learning_rate": 1.999908839758319e-05, "loss": 2.367, "step": 10532 }, { "epoch": 0.14, "grad_norm": 5.852933406829834, "learning_rate": 1.9999086978216265e-05, "loss": 2.6666, "step": 10533 }, { "epoch": 0.14, "grad_norm": 5.092503547668457, "learning_rate": 1.9999085557745275e-05, "loss": 2.445, "step": 10534 }, { "epoch": 0.14, "grad_norm": 5.351500988006592, "learning_rate": 1.9999084136170212e-05, "loss": 2.4817, "step": 10535 }, { "epoch": 0.14, "grad_norm": 5.224130630493164, "learning_rate": 1.9999082713491082e-05, "loss": 2.7388, "step": 10536 }, { "epoch": 0.14, "grad_norm": 5.977982997894287, "learning_rate": 1.9999081289707885e-05, "loss": 2.4542, "step": 10537 }, { "epoch": 0.14, "grad_norm": 6.036070346832275, "learning_rate": 1.9999079864820618e-05, "loss": 2.5696, "step": 10538 }, { "epoch": 0.14, "grad_norm": 5.156966209411621, "learning_rate": 1.9999078438829284e-05, "loss": 2.3324, "step": 10539 }, { "epoch": 0.14, "grad_norm": 4.921981334686279, "learning_rate": 1.9999077011733883e-05, "loss": 1.791, "step": 10540 }, { "epoch": 0.14, "grad_norm": 5.39154577255249, "learning_rate": 1.9999075583534412e-05, "loss": 2.3821, "step": 10541 }, { "epoch": 0.14, "grad_norm": 5.379451274871826, "learning_rate": 1.9999074154230875e-05, "loss": 2.0092, "step": 10542 }, { "epoch": 0.14, "grad_norm": 5.123778820037842, "learning_rate": 1.999907272382327e-05, "loss": 2.853, "step": 10543 }, { "epoch": 0.14, "grad_norm": 4.877075672149658, "learning_rate": 1.99990712923116e-05, "loss": 2.5673, "step": 10544 }, { "epoch": 0.14, "grad_norm": 4.973273754119873, "learning_rate": 1.9999069859695858e-05, "loss": 2.3371, "step": 10545 }, { "epoch": 0.14, "grad_norm": 5.512240886688232, "learning_rate": 1.9999068425976053e-05, "loss": 2.8514, "step": 10546 }, { "epoch": 0.14, "grad_norm": 4.71647310256958, "learning_rate": 1.999906699115218e-05, "loss": 2.2734, "step": 10547 }, { "epoch": 0.14, "grad_norm": 5.271170616149902, "learning_rate": 1.999906555522424e-05, "loss": 2.9366, "step": 10548 }, { "epoch": 0.14, "grad_norm": 4.8735032081604, "learning_rate": 1.9999064118192234e-05, "loss": 2.1786, "step": 10549 }, { "epoch": 0.14, "grad_norm": 4.9407219886779785, "learning_rate": 1.9999062680056162e-05, "loss": 2.212, "step": 10550 }, { "epoch": 0.14, "grad_norm": 4.551806449890137, "learning_rate": 1.9999061240816024e-05, "loss": 2.3255, "step": 10551 }, { "epoch": 0.14, "grad_norm": 4.896955966949463, "learning_rate": 1.9999059800471818e-05, "loss": 2.7328, "step": 10552 }, { "epoch": 0.14, "grad_norm": 4.50044059753418, "learning_rate": 1.9999058359023546e-05, "loss": 2.1692, "step": 10553 }, { "epoch": 0.14, "grad_norm": 5.477920055389404, "learning_rate": 1.999905691647121e-05, "loss": 2.562, "step": 10554 }, { "epoch": 0.14, "grad_norm": 5.341190814971924, "learning_rate": 1.999905547281481e-05, "loss": 2.9113, "step": 10555 }, { "epoch": 0.14, "grad_norm": 5.263304233551025, "learning_rate": 1.999905402805434e-05, "loss": 2.851, "step": 10556 }, { "epoch": 0.14, "grad_norm": 5.419874668121338, "learning_rate": 1.9999052582189812e-05, "loss": 2.1626, "step": 10557 }, { "epoch": 0.14, "grad_norm": 5.331277370452881, "learning_rate": 1.9999051135221213e-05, "loss": 2.7464, "step": 10558 }, { "epoch": 0.14, "grad_norm": 5.456125259399414, "learning_rate": 1.999904968714855e-05, "loss": 2.507, "step": 10559 }, { "epoch": 0.14, "grad_norm": 4.884128570556641, "learning_rate": 1.9999048237971827e-05, "loss": 2.4877, "step": 10560 }, { "epoch": 0.14, "grad_norm": 5.518564224243164, "learning_rate": 1.999904678769103e-05, "loss": 2.2885, "step": 10561 }, { "epoch": 0.14, "grad_norm": 5.328637599945068, "learning_rate": 1.9999045336306177e-05, "loss": 2.7096, "step": 10562 }, { "epoch": 0.14, "grad_norm": 5.045444965362549, "learning_rate": 1.9999043883817258e-05, "loss": 2.3422, "step": 10563 }, { "epoch": 0.14, "grad_norm": 5.339309215545654, "learning_rate": 1.9999042430224274e-05, "loss": 2.3467, "step": 10564 }, { "epoch": 0.14, "grad_norm": 4.940018177032471, "learning_rate": 1.9999040975527224e-05, "loss": 2.5645, "step": 10565 }, { "epoch": 0.14, "grad_norm": 4.753615856170654, "learning_rate": 1.9999039519726114e-05, "loss": 2.6181, "step": 10566 }, { "epoch": 0.14, "grad_norm": 4.9566521644592285, "learning_rate": 1.999903806282094e-05, "loss": 1.9771, "step": 10567 }, { "epoch": 0.14, "grad_norm": 5.742598056793213, "learning_rate": 1.99990366048117e-05, "loss": 2.0883, "step": 10568 }, { "epoch": 0.14, "grad_norm": 5.343355655670166, "learning_rate": 1.9999035145698397e-05, "loss": 2.2791, "step": 10569 }, { "epoch": 0.14, "grad_norm": 5.739771842956543, "learning_rate": 1.999903368548103e-05, "loss": 2.5814, "step": 10570 }, { "epoch": 0.14, "grad_norm": 5.345561981201172, "learning_rate": 1.9999032224159602e-05, "loss": 2.5958, "step": 10571 }, { "epoch": 0.14, "grad_norm": 5.339529514312744, "learning_rate": 1.9999030761734112e-05, "loss": 2.1478, "step": 10572 }, { "epoch": 0.14, "grad_norm": 5.090882778167725, "learning_rate": 1.9999029298204558e-05, "loss": 2.3964, "step": 10573 }, { "epoch": 0.14, "grad_norm": 8.045585632324219, "learning_rate": 1.9999027833570944e-05, "loss": 2.269, "step": 10574 }, { "epoch": 0.14, "grad_norm": 5.474395751953125, "learning_rate": 1.9999026367833263e-05, "loss": 2.6796, "step": 10575 }, { "epoch": 0.14, "grad_norm": 5.582679748535156, "learning_rate": 1.9999024900991523e-05, "loss": 2.519, "step": 10576 }, { "epoch": 0.14, "grad_norm": 6.120410442352295, "learning_rate": 1.9999023433045722e-05, "loss": 3.0157, "step": 10577 }, { "epoch": 0.14, "grad_norm": 4.498624801635742, "learning_rate": 1.9999021963995858e-05, "loss": 1.8509, "step": 10578 }, { "epoch": 0.14, "grad_norm": 5.249791622161865, "learning_rate": 1.999902049384193e-05, "loss": 2.8678, "step": 10579 }, { "epoch": 0.14, "grad_norm": 4.767254829406738, "learning_rate": 1.9999019022583943e-05, "loss": 2.3597, "step": 10580 }, { "epoch": 0.14, "grad_norm": 5.934590816497803, "learning_rate": 1.9999017550221895e-05, "loss": 3.0725, "step": 10581 }, { "epoch": 0.14, "grad_norm": 4.534481048583984, "learning_rate": 1.9999016076755784e-05, "loss": 2.1903, "step": 10582 }, { "epoch": 0.14, "grad_norm": 6.00679349899292, "learning_rate": 1.9999014602185613e-05, "loss": 2.4398, "step": 10583 }, { "epoch": 0.14, "grad_norm": 5.231523036956787, "learning_rate": 1.9999013126511382e-05, "loss": 2.1268, "step": 10584 }, { "epoch": 0.14, "grad_norm": 5.0374979972839355, "learning_rate": 1.9999011649733088e-05, "loss": 3.0512, "step": 10585 }, { "epoch": 0.14, "grad_norm": 4.490081310272217, "learning_rate": 1.9999010171850737e-05, "loss": 2.1499, "step": 10586 }, { "epoch": 0.14, "grad_norm": 6.009731769561768, "learning_rate": 1.9999008692864323e-05, "loss": 2.6116, "step": 10587 }, { "epoch": 0.14, "grad_norm": 4.393918991088867, "learning_rate": 1.9999007212773848e-05, "loss": 2.2364, "step": 10588 }, { "epoch": 0.14, "grad_norm": 4.746800422668457, "learning_rate": 1.9999005731579314e-05, "loss": 2.2514, "step": 10589 }, { "epoch": 0.14, "grad_norm": 4.823975086212158, "learning_rate": 1.9999004249280723e-05, "loss": 2.249, "step": 10590 }, { "epoch": 0.14, "grad_norm": 5.41724157333374, "learning_rate": 1.9999002765878068e-05, "loss": 3.0734, "step": 10591 }, { "epoch": 0.14, "grad_norm": 5.530779838562012, "learning_rate": 1.9999001281371354e-05, "loss": 2.5736, "step": 10592 }, { "epoch": 0.14, "grad_norm": 4.97713565826416, "learning_rate": 1.9998999795760583e-05, "loss": 2.3079, "step": 10593 }, { "epoch": 0.14, "grad_norm": 5.542154312133789, "learning_rate": 1.999899830904575e-05, "loss": 2.5536, "step": 10594 }, { "epoch": 0.14, "grad_norm": 5.808990001678467, "learning_rate": 1.999899682122686e-05, "loss": 2.8857, "step": 10595 }, { "epoch": 0.14, "grad_norm": 5.1309123039245605, "learning_rate": 1.9998995332303913e-05, "loss": 2.291, "step": 10596 }, { "epoch": 0.14, "grad_norm": 4.633787631988525, "learning_rate": 1.9998993842276905e-05, "loss": 2.3858, "step": 10597 }, { "epoch": 0.14, "grad_norm": 5.135173797607422, "learning_rate": 1.9998992351145837e-05, "loss": 2.8228, "step": 10598 }, { "epoch": 0.14, "grad_norm": 5.885075092315674, "learning_rate": 1.9998990858910713e-05, "loss": 2.6939, "step": 10599 }, { "epoch": 0.14, "grad_norm": 7.1249799728393555, "learning_rate": 1.999898936557153e-05, "loss": 2.3404, "step": 10600 }, { "epoch": 0.14, "grad_norm": 5.630924701690674, "learning_rate": 1.9998987871128287e-05, "loss": 2.7702, "step": 10601 }, { "epoch": 0.14, "grad_norm": 4.921910285949707, "learning_rate": 1.999898637558099e-05, "loss": 2.1868, "step": 10602 }, { "epoch": 0.14, "grad_norm": 4.843430995941162, "learning_rate": 1.9998984878929632e-05, "loss": 2.2711, "step": 10603 }, { "epoch": 0.14, "grad_norm": 5.373794078826904, "learning_rate": 1.9998983381174218e-05, "loss": 2.9391, "step": 10604 }, { "epoch": 0.14, "grad_norm": 5.441704273223877, "learning_rate": 1.9998981882314747e-05, "loss": 2.6225, "step": 10605 }, { "epoch": 0.14, "grad_norm": 5.3392767906188965, "learning_rate": 1.999898038235122e-05, "loss": 2.6317, "step": 10606 }, { "epoch": 0.14, "grad_norm": 5.877254486083984, "learning_rate": 1.9998978881283632e-05, "loss": 2.3615, "step": 10607 }, { "epoch": 0.14, "grad_norm": 4.653182506561279, "learning_rate": 1.9998977379111988e-05, "loss": 2.0243, "step": 10608 }, { "epoch": 0.14, "grad_norm": 4.93212890625, "learning_rate": 1.9998975875836287e-05, "loss": 2.4232, "step": 10609 }, { "epoch": 0.14, "grad_norm": 4.654815673828125, "learning_rate": 1.9998974371456533e-05, "loss": 1.6642, "step": 10610 }, { "epoch": 0.14, "grad_norm": 4.67749547958374, "learning_rate": 1.999897286597272e-05, "loss": 2.6044, "step": 10611 }, { "epoch": 0.14, "grad_norm": 5.72236967086792, "learning_rate": 1.9998971359384848e-05, "loss": 2.6866, "step": 10612 }, { "epoch": 0.14, "grad_norm": 4.898291110992432, "learning_rate": 1.9998969851692924e-05, "loss": 2.5445, "step": 10613 }, { "epoch": 0.14, "grad_norm": 5.7165207862854, "learning_rate": 1.9998968342896944e-05, "loss": 2.3172, "step": 10614 }, { "epoch": 0.14, "grad_norm": 5.069063186645508, "learning_rate": 1.9998966832996906e-05, "loss": 2.4001, "step": 10615 }, { "epoch": 0.14, "grad_norm": 5.4939422607421875, "learning_rate": 1.9998965321992813e-05, "loss": 3.0035, "step": 10616 }, { "epoch": 0.14, "grad_norm": 4.70174503326416, "learning_rate": 1.999896380988467e-05, "loss": 2.21, "step": 10617 }, { "epoch": 0.14, "grad_norm": 5.111044883728027, "learning_rate": 1.9998962296672465e-05, "loss": 2.0837, "step": 10618 }, { "epoch": 0.14, "grad_norm": 5.468536376953125, "learning_rate": 1.9998960782356205e-05, "loss": 2.394, "step": 10619 }, { "epoch": 0.14, "grad_norm": 4.78354024887085, "learning_rate": 1.9998959266935895e-05, "loss": 2.1751, "step": 10620 }, { "epoch": 0.14, "grad_norm": 5.352304935455322, "learning_rate": 1.9998957750411525e-05, "loss": 2.4147, "step": 10621 }, { "epoch": 0.14, "grad_norm": 5.351561069488525, "learning_rate": 1.9998956232783105e-05, "loss": 2.5677, "step": 10622 }, { "epoch": 0.14, "grad_norm": 5.8803863525390625, "learning_rate": 1.9998954714050628e-05, "loss": 2.59, "step": 10623 }, { "epoch": 0.14, "grad_norm": 4.684932708740234, "learning_rate": 1.9998953194214095e-05, "loss": 2.0133, "step": 10624 }, { "epoch": 0.14, "grad_norm": 4.526780128479004, "learning_rate": 1.999895167327351e-05, "loss": 2.1128, "step": 10625 }, { "epoch": 0.14, "grad_norm": 5.94562292098999, "learning_rate": 1.9998950151228872e-05, "loss": 3.0233, "step": 10626 }, { "epoch": 0.14, "grad_norm": 5.040877342224121, "learning_rate": 1.999894862808018e-05, "loss": 1.765, "step": 10627 }, { "epoch": 0.14, "grad_norm": 5.622892379760742, "learning_rate": 1.9998947103827433e-05, "loss": 2.4323, "step": 10628 }, { "epoch": 0.14, "grad_norm": 4.791590690612793, "learning_rate": 1.9998945578470634e-05, "loss": 2.1448, "step": 10629 }, { "epoch": 0.14, "grad_norm": 5.0984883308410645, "learning_rate": 1.999894405200978e-05, "loss": 2.3669, "step": 10630 }, { "epoch": 0.14, "grad_norm": 5.3582940101623535, "learning_rate": 1.9998942524444875e-05, "loss": 2.423, "step": 10631 }, { "epoch": 0.14, "grad_norm": 4.540129661560059, "learning_rate": 1.9998940995775916e-05, "loss": 1.6151, "step": 10632 }, { "epoch": 0.14, "grad_norm": 5.161205291748047, "learning_rate": 1.9998939466002904e-05, "loss": 2.4127, "step": 10633 }, { "epoch": 0.14, "grad_norm": 4.8427252769470215, "learning_rate": 1.9998937935125838e-05, "loss": 1.9863, "step": 10634 }, { "epoch": 0.14, "grad_norm": 5.638560771942139, "learning_rate": 1.9998936403144726e-05, "loss": 2.4152, "step": 10635 }, { "epoch": 0.14, "grad_norm": 6.651238441467285, "learning_rate": 1.9998934870059554e-05, "loss": 2.7368, "step": 10636 }, { "epoch": 0.14, "grad_norm": 5.409428119659424, "learning_rate": 1.9998933335870335e-05, "loss": 2.6291, "step": 10637 }, { "epoch": 0.14, "grad_norm": 5.626274585723877, "learning_rate": 1.9998931800577063e-05, "loss": 2.6154, "step": 10638 }, { "epoch": 0.14, "grad_norm": 5.5825090408325195, "learning_rate": 1.999893026417974e-05, "loss": 2.4483, "step": 10639 }, { "epoch": 0.14, "grad_norm": 5.861503601074219, "learning_rate": 1.9998928726678364e-05, "loss": 2.714, "step": 10640 }, { "epoch": 0.14, "grad_norm": 4.82443904876709, "learning_rate": 1.9998927188072935e-05, "loss": 2.0452, "step": 10641 }, { "epoch": 0.14, "grad_norm": 4.8149895668029785, "learning_rate": 1.9998925648363458e-05, "loss": 2.404, "step": 10642 }, { "epoch": 0.14, "grad_norm": 5.324881553649902, "learning_rate": 1.999892410754993e-05, "loss": 2.8382, "step": 10643 }, { "epoch": 0.14, "grad_norm": 5.427830696105957, "learning_rate": 1.999892256563235e-05, "loss": 2.5852, "step": 10644 }, { "epoch": 0.14, "grad_norm": 5.131953239440918, "learning_rate": 1.9998921022610718e-05, "loss": 2.41, "step": 10645 }, { "epoch": 0.14, "grad_norm": 5.238674640655518, "learning_rate": 1.9998919478485034e-05, "loss": 2.2744, "step": 10646 }, { "epoch": 0.14, "grad_norm": 5.737619876861572, "learning_rate": 1.9998917933255303e-05, "loss": 2.1467, "step": 10647 }, { "epoch": 0.14, "grad_norm": 4.377594947814941, "learning_rate": 1.999891638692152e-05, "loss": 2.2999, "step": 10648 }, { "epoch": 0.14, "grad_norm": 4.709027290344238, "learning_rate": 1.999891483948369e-05, "loss": 2.3603, "step": 10649 }, { "epoch": 0.14, "grad_norm": 4.700076103210449, "learning_rate": 1.999891329094181e-05, "loss": 2.3809, "step": 10650 }, { "epoch": 0.14, "grad_norm": 4.820590972900391, "learning_rate": 1.9998911741295876e-05, "loss": 2.2253, "step": 10651 }, { "epoch": 0.14, "grad_norm": 5.074827194213867, "learning_rate": 1.9998910190545896e-05, "loss": 2.4225, "step": 10652 }, { "epoch": 0.14, "grad_norm": 4.608361721038818, "learning_rate": 1.9998908638691866e-05, "loss": 2.394, "step": 10653 }, { "epoch": 0.14, "grad_norm": 5.689714431762695, "learning_rate": 1.9998907085733787e-05, "loss": 2.0577, "step": 10654 }, { "epoch": 0.14, "grad_norm": 4.9115705490112305, "learning_rate": 1.9998905531671657e-05, "loss": 2.5577, "step": 10655 }, { "epoch": 0.14, "grad_norm": 5.169522285461426, "learning_rate": 1.9998903976505478e-05, "loss": 2.8448, "step": 10656 }, { "epoch": 0.14, "grad_norm": 5.212917327880859, "learning_rate": 1.9998902420235256e-05, "loss": 2.3171, "step": 10657 }, { "epoch": 0.14, "grad_norm": 5.186188697814941, "learning_rate": 1.999890086286098e-05, "loss": 2.2382, "step": 10658 }, { "epoch": 0.14, "grad_norm": 6.003454208374023, "learning_rate": 1.9998899304382658e-05, "loss": 2.8526, "step": 10659 }, { "epoch": 0.14, "grad_norm": 5.161011695861816, "learning_rate": 1.9998897744800286e-05, "loss": 2.007, "step": 10660 }, { "epoch": 0.14, "grad_norm": 4.634092807769775, "learning_rate": 1.9998896184113868e-05, "loss": 1.9865, "step": 10661 }, { "epoch": 0.14, "grad_norm": 5.452394485473633, "learning_rate": 1.99988946223234e-05, "loss": 2.6647, "step": 10662 }, { "epoch": 0.14, "grad_norm": 4.554851055145264, "learning_rate": 1.999889305942889e-05, "loss": 1.98, "step": 10663 }, { "epoch": 0.14, "grad_norm": 5.469756603240967, "learning_rate": 1.9998891495430328e-05, "loss": 2.4157, "step": 10664 }, { "epoch": 0.14, "grad_norm": 5.283473968505859, "learning_rate": 1.9998889930327717e-05, "loss": 2.3591, "step": 10665 }, { "epoch": 0.14, "grad_norm": 4.568671703338623, "learning_rate": 1.9998888364121063e-05, "loss": 2.5738, "step": 10666 }, { "epoch": 0.14, "grad_norm": 4.924442768096924, "learning_rate": 1.999888679681036e-05, "loss": 2.1317, "step": 10667 }, { "epoch": 0.14, "grad_norm": 4.389651775360107, "learning_rate": 1.999888522839561e-05, "loss": 1.9774, "step": 10668 }, { "epoch": 0.14, "grad_norm": 4.402400016784668, "learning_rate": 1.9998883658876815e-05, "loss": 2.1572, "step": 10669 }, { "epoch": 0.14, "grad_norm": 4.966737747192383, "learning_rate": 1.9998882088253972e-05, "loss": 2.2665, "step": 10670 }, { "epoch": 0.14, "grad_norm": 4.993348121643066, "learning_rate": 1.9998880516527082e-05, "loss": 2.7926, "step": 10671 }, { "epoch": 0.14, "grad_norm": 5.3294806480407715, "learning_rate": 1.999887894369615e-05, "loss": 2.5806, "step": 10672 }, { "epoch": 0.14, "grad_norm": 5.8005194664001465, "learning_rate": 1.999887736976117e-05, "loss": 2.9176, "step": 10673 }, { "epoch": 0.14, "grad_norm": 6.337424278259277, "learning_rate": 1.999887579472214e-05, "loss": 2.9132, "step": 10674 }, { "epoch": 0.14, "grad_norm": 5.407525539398193, "learning_rate": 1.9998874218579073e-05, "loss": 2.6817, "step": 10675 }, { "epoch": 0.14, "grad_norm": 4.6612443923950195, "learning_rate": 1.999887264133195e-05, "loss": 2.3838, "step": 10676 }, { "epoch": 0.14, "grad_norm": 5.861412525177002, "learning_rate": 1.9998871062980792e-05, "loss": 2.1795, "step": 10677 }, { "epoch": 0.14, "grad_norm": 5.233585357666016, "learning_rate": 1.9998869483525584e-05, "loss": 2.3571, "step": 10678 }, { "epoch": 0.14, "grad_norm": 5.171879768371582, "learning_rate": 1.9998867902966333e-05, "loss": 3.1663, "step": 10679 }, { "epoch": 0.14, "grad_norm": 5.783146381378174, "learning_rate": 1.9998866321303036e-05, "loss": 3.0146, "step": 10680 }, { "epoch": 0.14, "grad_norm": 5.570554256439209, "learning_rate": 1.9998864738535695e-05, "loss": 2.7176, "step": 10681 }, { "epoch": 0.14, "grad_norm": 4.680483818054199, "learning_rate": 1.999886315466431e-05, "loss": 2.133, "step": 10682 }, { "epoch": 0.14, "grad_norm": 4.895158290863037, "learning_rate": 1.9998861569688877e-05, "loss": 2.4686, "step": 10683 }, { "epoch": 0.14, "grad_norm": 6.129465103149414, "learning_rate": 1.9998859983609407e-05, "loss": 2.9012, "step": 10684 }, { "epoch": 0.14, "grad_norm": 5.304957389831543, "learning_rate": 1.9998858396425888e-05, "loss": 2.4321, "step": 10685 }, { "epoch": 0.14, "grad_norm": 5.292823314666748, "learning_rate": 1.9998856808138328e-05, "loss": 2.2082, "step": 10686 }, { "epoch": 0.14, "grad_norm": 4.789447784423828, "learning_rate": 1.9998855218746726e-05, "loss": 2.0592, "step": 10687 }, { "epoch": 0.14, "grad_norm": 6.028285980224609, "learning_rate": 1.9998853628251077e-05, "loss": 2.4766, "step": 10688 }, { "epoch": 0.14, "grad_norm": 5.521266460418701, "learning_rate": 1.9998852036651385e-05, "loss": 2.7525, "step": 10689 }, { "epoch": 0.14, "grad_norm": 5.39053201675415, "learning_rate": 1.9998850443947653e-05, "loss": 2.8101, "step": 10690 }, { "epoch": 0.14, "grad_norm": 5.257492542266846, "learning_rate": 1.9998848850139875e-05, "loss": 2.301, "step": 10691 }, { "epoch": 0.14, "grad_norm": 4.656033515930176, "learning_rate": 1.9998847255228057e-05, "loss": 2.0839, "step": 10692 }, { "epoch": 0.14, "grad_norm": 5.040134429931641, "learning_rate": 1.9998845659212196e-05, "loss": 1.9285, "step": 10693 }, { "epoch": 0.14, "grad_norm": 4.84234619140625, "learning_rate": 1.999884406209229e-05, "loss": 2.2394, "step": 10694 }, { "epoch": 0.14, "grad_norm": 5.030129432678223, "learning_rate": 1.9998842463868348e-05, "loss": 2.1484, "step": 10695 }, { "epoch": 0.14, "grad_norm": 5.081399440765381, "learning_rate": 1.9998840864540358e-05, "loss": 2.1653, "step": 10696 }, { "epoch": 0.14, "grad_norm": 5.069599628448486, "learning_rate": 1.999883926410833e-05, "loss": 2.6634, "step": 10697 }, { "epoch": 0.14, "grad_norm": 4.290017604827881, "learning_rate": 1.999883766257226e-05, "loss": 1.6806, "step": 10698 }, { "epoch": 0.14, "grad_norm": 5.366523265838623, "learning_rate": 1.999883605993215e-05, "loss": 2.8405, "step": 10699 }, { "epoch": 0.14, "grad_norm": 5.555426120758057, "learning_rate": 1.9998834456187993e-05, "loss": 2.5928, "step": 10700 }, { "epoch": 0.14, "grad_norm": 5.381985187530518, "learning_rate": 1.99988328513398e-05, "loss": 1.976, "step": 10701 }, { "epoch": 0.14, "grad_norm": 4.638188362121582, "learning_rate": 1.9998831245387566e-05, "loss": 2.3443, "step": 10702 }, { "epoch": 0.14, "grad_norm": 5.40903902053833, "learning_rate": 1.999882963833129e-05, "loss": 2.3789, "step": 10703 }, { "epoch": 0.14, "grad_norm": 5.6346964836120605, "learning_rate": 1.9998828030170977e-05, "loss": 2.2022, "step": 10704 }, { "epoch": 0.14, "grad_norm": 5.325688362121582, "learning_rate": 1.999882642090662e-05, "loss": 2.3276, "step": 10705 }, { "epoch": 0.14, "grad_norm": 3.9072978496551514, "learning_rate": 1.9998824810538226e-05, "loss": 1.6814, "step": 10706 }, { "epoch": 0.14, "grad_norm": 5.491178512573242, "learning_rate": 1.999882319906579e-05, "loss": 2.4853, "step": 10707 }, { "epoch": 0.14, "grad_norm": 4.979422092437744, "learning_rate": 1.9998821586489316e-05, "loss": 2.5756, "step": 10708 }, { "epoch": 0.14, "grad_norm": 5.348427772521973, "learning_rate": 1.99988199728088e-05, "loss": 2.4733, "step": 10709 }, { "epoch": 0.14, "grad_norm": 5.165615081787109, "learning_rate": 1.9998818358024244e-05, "loss": 2.2797, "step": 10710 }, { "epoch": 0.14, "grad_norm": 5.196829795837402, "learning_rate": 1.9998816742135648e-05, "loss": 2.4369, "step": 10711 }, { "epoch": 0.14, "grad_norm": 4.609689712524414, "learning_rate": 1.9998815125143016e-05, "loss": 2.1565, "step": 10712 }, { "epoch": 0.14, "grad_norm": 4.994034290313721, "learning_rate": 1.9998813507046348e-05, "loss": 2.6748, "step": 10713 }, { "epoch": 0.14, "grad_norm": 4.97459077835083, "learning_rate": 1.9998811887845636e-05, "loss": 1.9107, "step": 10714 }, { "epoch": 0.14, "grad_norm": 5.366399765014648, "learning_rate": 1.9998810267540886e-05, "loss": 2.3214, "step": 10715 }, { "epoch": 0.14, "grad_norm": 5.050634860992432, "learning_rate": 1.99988086461321e-05, "loss": 2.1106, "step": 10716 }, { "epoch": 0.14, "grad_norm": 5.405420780181885, "learning_rate": 1.9998807023619275e-05, "loss": 2.4666, "step": 10717 }, { "epoch": 0.14, "grad_norm": 4.972688674926758, "learning_rate": 1.999880540000241e-05, "loss": 2.267, "step": 10718 }, { "epoch": 0.14, "grad_norm": 4.948624134063721, "learning_rate": 1.9998803775281512e-05, "loss": 2.7916, "step": 10719 }, { "epoch": 0.14, "grad_norm": 5.191647529602051, "learning_rate": 1.9998802149456576e-05, "loss": 2.6285, "step": 10720 }, { "epoch": 0.14, "grad_norm": 4.601511001586914, "learning_rate": 1.9998800522527597e-05, "loss": 2.5298, "step": 10721 }, { "epoch": 0.14, "grad_norm": 5.696746349334717, "learning_rate": 1.9998798894494585e-05, "loss": 2.5408, "step": 10722 }, { "epoch": 0.14, "grad_norm": 6.109124660491943, "learning_rate": 1.9998797265357534e-05, "loss": 2.1134, "step": 10723 }, { "epoch": 0.14, "grad_norm": 5.693892478942871, "learning_rate": 1.999879563511645e-05, "loss": 2.7511, "step": 10724 }, { "epoch": 0.14, "grad_norm": 4.837368011474609, "learning_rate": 1.9998794003771325e-05, "loss": 2.1339, "step": 10725 }, { "epoch": 0.14, "grad_norm": 4.997993469238281, "learning_rate": 1.9998792371322165e-05, "loss": 2.5768, "step": 10726 }, { "epoch": 0.14, "grad_norm": 5.026813983917236, "learning_rate": 1.9998790737768968e-05, "loss": 2.7822, "step": 10727 }, { "epoch": 0.14, "grad_norm": 5.479526996612549, "learning_rate": 1.999878910311174e-05, "loss": 2.4859, "step": 10728 }, { "epoch": 0.14, "grad_norm": 5.173796653747559, "learning_rate": 1.999878746735047e-05, "loss": 2.2569, "step": 10729 }, { "epoch": 0.14, "grad_norm": 5.138092517852783, "learning_rate": 1.9998785830485167e-05, "loss": 2.3742, "step": 10730 }, { "epoch": 0.14, "grad_norm": 6.156699180603027, "learning_rate": 1.9998784192515825e-05, "loss": 2.9523, "step": 10731 }, { "epoch": 0.14, "grad_norm": 4.854026794433594, "learning_rate": 1.999878255344245e-05, "loss": 2.0816, "step": 10732 }, { "epoch": 0.14, "grad_norm": 4.309128284454346, "learning_rate": 1.999878091326504e-05, "loss": 1.9037, "step": 10733 }, { "epoch": 0.14, "grad_norm": 5.40744161605835, "learning_rate": 1.9998779271983596e-05, "loss": 2.6035, "step": 10734 }, { "epoch": 0.14, "grad_norm": 5.140194892883301, "learning_rate": 1.9998777629598115e-05, "loss": 2.4075, "step": 10735 }, { "epoch": 0.14, "grad_norm": 5.48853063583374, "learning_rate": 1.9998775986108602e-05, "loss": 2.6255, "step": 10736 }, { "epoch": 0.14, "grad_norm": 5.419802188873291, "learning_rate": 1.9998774341515053e-05, "loss": 2.7725, "step": 10737 }, { "epoch": 0.14, "grad_norm": 5.1187663078308105, "learning_rate": 1.9998772695817467e-05, "loss": 2.7862, "step": 10738 }, { "epoch": 0.14, "grad_norm": 5.073129177093506, "learning_rate": 1.999877104901585e-05, "loss": 2.2448, "step": 10739 }, { "epoch": 0.14, "grad_norm": 5.299009323120117, "learning_rate": 1.99987694011102e-05, "loss": 2.3503, "step": 10740 }, { "epoch": 0.14, "grad_norm": 4.759143829345703, "learning_rate": 1.9998767752100515e-05, "loss": 2.56, "step": 10741 }, { "epoch": 0.14, "grad_norm": 5.48726224899292, "learning_rate": 1.999876610198679e-05, "loss": 2.6105, "step": 10742 }, { "epoch": 0.14, "grad_norm": 5.026348114013672, "learning_rate": 1.999876445076904e-05, "loss": 2.2118, "step": 10743 }, { "epoch": 0.14, "grad_norm": 4.555227279663086, "learning_rate": 1.9998762798447254e-05, "loss": 2.2392, "step": 10744 }, { "epoch": 0.14, "grad_norm": 5.207577228546143, "learning_rate": 1.9998761145021438e-05, "loss": 2.2549, "step": 10745 }, { "epoch": 0.14, "grad_norm": 5.371060848236084, "learning_rate": 1.9998759490491582e-05, "loss": 2.5649, "step": 10746 }, { "epoch": 0.14, "grad_norm": 5.323713302612305, "learning_rate": 1.99987578348577e-05, "loss": 2.4028, "step": 10747 }, { "epoch": 0.14, "grad_norm": 5.786265850067139, "learning_rate": 1.9998756178119786e-05, "loss": 3.2771, "step": 10748 }, { "epoch": 0.14, "grad_norm": 5.709527492523193, "learning_rate": 1.9998754520277838e-05, "loss": 2.7778, "step": 10749 }, { "epoch": 0.14, "grad_norm": 5.569483280181885, "learning_rate": 1.9998752861331854e-05, "loss": 2.6528, "step": 10750 }, { "epoch": 0.14, "grad_norm": 4.761411190032959, "learning_rate": 1.999875120128184e-05, "loss": 1.852, "step": 10751 }, { "epoch": 0.14, "grad_norm": 5.9596147537231445, "learning_rate": 1.9998749540127794e-05, "loss": 2.7684, "step": 10752 }, { "epoch": 0.14, "grad_norm": 5.3328752517700195, "learning_rate": 1.999874787786972e-05, "loss": 2.8238, "step": 10753 }, { "epoch": 0.14, "grad_norm": 5.547866344451904, "learning_rate": 1.9998746214507612e-05, "loss": 2.2677, "step": 10754 }, { "epoch": 0.14, "grad_norm": 4.782856464385986, "learning_rate": 1.999874455004147e-05, "loss": 2.0045, "step": 10755 }, { "epoch": 0.14, "grad_norm": 5.7962870597839355, "learning_rate": 1.99987428844713e-05, "loss": 2.7071, "step": 10756 }, { "epoch": 0.14, "grad_norm": 5.147905349731445, "learning_rate": 1.99987412177971e-05, "loss": 2.2056, "step": 10757 }, { "epoch": 0.14, "grad_norm": 4.302932262420654, "learning_rate": 1.999873955001887e-05, "loss": 1.7749, "step": 10758 }, { "epoch": 0.14, "grad_norm": 4.978294372558594, "learning_rate": 1.9998737881136608e-05, "loss": 2.4683, "step": 10759 }, { "epoch": 0.14, "grad_norm": 4.838001728057861, "learning_rate": 1.9998736211150315e-05, "loss": 2.3249, "step": 10760 }, { "epoch": 0.14, "grad_norm": 5.407434463500977, "learning_rate": 1.9998734540059993e-05, "loss": 2.8869, "step": 10761 }, { "epoch": 0.14, "grad_norm": 5.354990005493164, "learning_rate": 1.999873286786564e-05, "loss": 2.7126, "step": 10762 }, { "epoch": 0.14, "grad_norm": 4.904406547546387, "learning_rate": 1.999873119456726e-05, "loss": 2.2446, "step": 10763 }, { "epoch": 0.14, "grad_norm": 5.100893020629883, "learning_rate": 1.9998729520164845e-05, "loss": 2.0713, "step": 10764 }, { "epoch": 0.14, "grad_norm": 5.057940483093262, "learning_rate": 1.9998727844658405e-05, "loss": 2.2631, "step": 10765 }, { "epoch": 0.14, "grad_norm": 4.797706127166748, "learning_rate": 1.9998726168047935e-05, "loss": 2.506, "step": 10766 }, { "epoch": 0.14, "grad_norm": 5.607776641845703, "learning_rate": 1.9998724490333436e-05, "loss": 2.2909, "step": 10767 }, { "epoch": 0.14, "grad_norm": 4.795867443084717, "learning_rate": 1.9998722811514907e-05, "loss": 2.313, "step": 10768 }, { "epoch": 0.14, "grad_norm": 4.460236072540283, "learning_rate": 1.9998721131592348e-05, "loss": 1.91, "step": 10769 }, { "epoch": 0.14, "grad_norm": 4.6573638916015625, "learning_rate": 1.9998719450565763e-05, "loss": 1.9016, "step": 10770 }, { "epoch": 0.14, "grad_norm": 5.341267108917236, "learning_rate": 1.9998717768435153e-05, "loss": 1.9638, "step": 10771 }, { "epoch": 0.14, "grad_norm": 5.028470039367676, "learning_rate": 1.9998716085200512e-05, "loss": 2.2808, "step": 10772 }, { "epoch": 0.14, "grad_norm": 5.755209445953369, "learning_rate": 1.999871440086184e-05, "loss": 2.7276, "step": 10773 }, { "epoch": 0.14, "grad_norm": 5.547613620758057, "learning_rate": 1.9998712715419143e-05, "loss": 2.2703, "step": 10774 }, { "epoch": 0.14, "grad_norm": 5.496642589569092, "learning_rate": 1.9998711028872418e-05, "loss": 2.6776, "step": 10775 }, { "epoch": 0.14, "grad_norm": 4.985424995422363, "learning_rate": 1.9998709341221666e-05, "loss": 2.2042, "step": 10776 }, { "epoch": 0.14, "grad_norm": 5.253659725189209, "learning_rate": 1.999870765246689e-05, "loss": 2.7896, "step": 10777 }, { "epoch": 0.14, "grad_norm": 5.170636177062988, "learning_rate": 1.999870596260808e-05, "loss": 2.6515, "step": 10778 }, { "epoch": 0.14, "grad_norm": 5.568111896514893, "learning_rate": 1.9998704271645248e-05, "loss": 2.8317, "step": 10779 }, { "epoch": 0.14, "grad_norm": 5.829154968261719, "learning_rate": 1.999870257957839e-05, "loss": 2.3457, "step": 10780 }, { "epoch": 0.14, "grad_norm": 4.553460121154785, "learning_rate": 1.9998700886407506e-05, "loss": 2.5639, "step": 10781 }, { "epoch": 0.14, "grad_norm": 4.9412078857421875, "learning_rate": 1.999869919213259e-05, "loss": 2.2442, "step": 10782 }, { "epoch": 0.14, "grad_norm": 5.033747673034668, "learning_rate": 1.9998697496753653e-05, "loss": 2.427, "step": 10783 }, { "epoch": 0.14, "grad_norm": 4.549057483673096, "learning_rate": 1.999869580027069e-05, "loss": 1.9173, "step": 10784 }, { "epoch": 0.14, "grad_norm": 5.063900947570801, "learning_rate": 1.99986941026837e-05, "loss": 2.4525, "step": 10785 }, { "epoch": 0.14, "grad_norm": 4.194186210632324, "learning_rate": 1.9998692403992686e-05, "loss": 2.0873, "step": 10786 }, { "epoch": 0.14, "grad_norm": 5.4164814949035645, "learning_rate": 1.9998690704197644e-05, "loss": 2.4251, "step": 10787 }, { "epoch": 0.14, "grad_norm": 4.9243245124816895, "learning_rate": 1.999868900329858e-05, "loss": 1.9933, "step": 10788 }, { "epoch": 0.14, "grad_norm": 5.221303939819336, "learning_rate": 1.9998687301295487e-05, "loss": 2.2799, "step": 10789 }, { "epoch": 0.14, "grad_norm": 5.033943176269531, "learning_rate": 1.9998685598188373e-05, "loss": 2.3327, "step": 10790 }, { "epoch": 0.14, "grad_norm": 5.430209159851074, "learning_rate": 1.9998683893977233e-05, "loss": 2.4941, "step": 10791 }, { "epoch": 0.14, "grad_norm": 5.4388933181762695, "learning_rate": 1.999868218866207e-05, "loss": 2.3468, "step": 10792 }, { "epoch": 0.14, "grad_norm": 4.985300540924072, "learning_rate": 1.999868048224288e-05, "loss": 2.6184, "step": 10793 }, { "epoch": 0.14, "grad_norm": 5.035857200622559, "learning_rate": 1.999867877471967e-05, "loss": 1.9973, "step": 10794 }, { "epoch": 0.14, "grad_norm": 5.964973449707031, "learning_rate": 1.9998677066092435e-05, "loss": 3.2527, "step": 10795 }, { "epoch": 0.14, "grad_norm": 6.159068584442139, "learning_rate": 1.9998675356361174e-05, "loss": 3.0322, "step": 10796 }, { "epoch": 0.14, "grad_norm": 4.393654823303223, "learning_rate": 1.999867364552589e-05, "loss": 2.5581, "step": 10797 }, { "epoch": 0.14, "grad_norm": 5.574179172515869, "learning_rate": 1.9998671933586585e-05, "loss": 2.7971, "step": 10798 }, { "epoch": 0.14, "grad_norm": 5.599788665771484, "learning_rate": 1.999867022054326e-05, "loss": 3.0409, "step": 10799 }, { "epoch": 0.14, "grad_norm": 5.8011956214904785, "learning_rate": 1.9998668506395908e-05, "loss": 2.7715, "step": 10800 }, { "epoch": 0.14, "grad_norm": 6.072423458099365, "learning_rate": 1.9998666791144533e-05, "loss": 2.5895, "step": 10801 }, { "epoch": 0.14, "grad_norm": 4.769620418548584, "learning_rate": 1.9998665074789136e-05, "loss": 1.9567, "step": 10802 }, { "epoch": 0.14, "grad_norm": 5.0236592292785645, "learning_rate": 1.9998663357329716e-05, "loss": 2.8471, "step": 10803 }, { "epoch": 0.14, "grad_norm": 5.602749347686768, "learning_rate": 1.9998661638766274e-05, "loss": 2.0899, "step": 10804 }, { "epoch": 0.14, "grad_norm": 5.189527988433838, "learning_rate": 1.9998659919098812e-05, "loss": 2.2504, "step": 10805 }, { "epoch": 0.14, "grad_norm": 4.750943183898926, "learning_rate": 1.9998658198327327e-05, "loss": 2.5223, "step": 10806 }, { "epoch": 0.14, "grad_norm": 6.008481025695801, "learning_rate": 1.9998656476451823e-05, "loss": 2.1537, "step": 10807 }, { "epoch": 0.14, "grad_norm": 5.60718297958374, "learning_rate": 1.9998654753472296e-05, "loss": 2.7223, "step": 10808 }, { "epoch": 0.14, "grad_norm": 5.068779945373535, "learning_rate": 1.9998653029388747e-05, "loss": 2.41, "step": 10809 }, { "epoch": 0.14, "grad_norm": 4.62015962600708, "learning_rate": 1.9998651304201178e-05, "loss": 1.9242, "step": 10810 }, { "epoch": 0.14, "grad_norm": 4.975138187408447, "learning_rate": 1.9998649577909586e-05, "loss": 2.0536, "step": 10811 }, { "epoch": 0.14, "grad_norm": 4.8840765953063965, "learning_rate": 1.9998647850513975e-05, "loss": 2.0966, "step": 10812 }, { "epoch": 0.14, "grad_norm": 4.868178367614746, "learning_rate": 1.9998646122014345e-05, "loss": 1.9433, "step": 10813 }, { "epoch": 0.14, "grad_norm": 4.933088779449463, "learning_rate": 1.9998644392410692e-05, "loss": 2.5239, "step": 10814 }, { "epoch": 0.14, "grad_norm": 5.152756214141846, "learning_rate": 1.9998642661703023e-05, "loss": 2.4227, "step": 10815 }, { "epoch": 0.14, "grad_norm": 5.166901111602783, "learning_rate": 1.999864092989133e-05, "loss": 1.9577, "step": 10816 }, { "epoch": 0.14, "grad_norm": 5.814486026763916, "learning_rate": 1.9998639196975623e-05, "loss": 2.2807, "step": 10817 }, { "epoch": 0.14, "grad_norm": 4.853291034698486, "learning_rate": 1.999863746295589e-05, "loss": 2.7915, "step": 10818 }, { "epoch": 0.14, "grad_norm": 4.45302152633667, "learning_rate": 1.9998635727832143e-05, "loss": 2.4469, "step": 10819 }, { "epoch": 0.14, "grad_norm": 5.3507280349731445, "learning_rate": 1.9998633991604374e-05, "loss": 2.3494, "step": 10820 }, { "epoch": 0.14, "grad_norm": 4.5266194343566895, "learning_rate": 1.999863225427259e-05, "loss": 1.9301, "step": 10821 }, { "epoch": 0.14, "grad_norm": 4.9211812019348145, "learning_rate": 1.9998630515836777e-05, "loss": 2.217, "step": 10822 }, { "epoch": 0.14, "grad_norm": 5.49170446395874, "learning_rate": 1.9998628776296957e-05, "loss": 2.9749, "step": 10823 }, { "epoch": 0.14, "grad_norm": 5.541472434997559, "learning_rate": 1.9998627035653113e-05, "loss": 2.8084, "step": 10824 }, { "epoch": 0.14, "grad_norm": 4.944738388061523, "learning_rate": 1.9998625293905254e-05, "loss": 2.6235, "step": 10825 }, { "epoch": 0.14, "grad_norm": 5.166446685791016, "learning_rate": 1.9998623551053376e-05, "loss": 2.5112, "step": 10826 }, { "epoch": 0.14, "grad_norm": 6.359969615936279, "learning_rate": 1.999862180709748e-05, "loss": 2.4714, "step": 10827 }, { "epoch": 0.14, "grad_norm": 5.044716835021973, "learning_rate": 1.9998620062037567e-05, "loss": 2.3761, "step": 10828 }, { "epoch": 0.14, "grad_norm": 4.635523319244385, "learning_rate": 1.9998618315873637e-05, "loss": 2.3909, "step": 10829 }, { "epoch": 0.14, "grad_norm": 4.62977409362793, "learning_rate": 1.9998616568605688e-05, "loss": 1.6875, "step": 10830 }, { "epoch": 0.14, "grad_norm": 5.35310173034668, "learning_rate": 1.9998614820233723e-05, "loss": 2.539, "step": 10831 }, { "epoch": 0.14, "grad_norm": 5.482527256011963, "learning_rate": 1.999861307075774e-05, "loss": 2.7619, "step": 10832 }, { "epoch": 0.14, "grad_norm": 5.850215911865234, "learning_rate": 1.9998611320177745e-05, "loss": 2.9224, "step": 10833 }, { "epoch": 0.14, "grad_norm": 5.081671237945557, "learning_rate": 1.9998609568493728e-05, "loss": 2.3204, "step": 10834 }, { "epoch": 0.14, "grad_norm": 5.3717780113220215, "learning_rate": 1.9998607815705696e-05, "loss": 2.2809, "step": 10835 }, { "epoch": 0.14, "grad_norm": 5.753139495849609, "learning_rate": 1.9998606061813654e-05, "loss": 3.169, "step": 10836 }, { "epoch": 0.14, "grad_norm": 5.018474578857422, "learning_rate": 1.999860430681759e-05, "loss": 2.727, "step": 10837 }, { "epoch": 0.14, "grad_norm": 4.215005397796631, "learning_rate": 1.9998602550717513e-05, "loss": 2.1253, "step": 10838 }, { "epoch": 0.14, "grad_norm": 5.225269794464111, "learning_rate": 1.999860079351342e-05, "loss": 2.7837, "step": 10839 }, { "epoch": 0.14, "grad_norm": 5.218149185180664, "learning_rate": 1.9998599035205314e-05, "loss": 2.7852, "step": 10840 }, { "epoch": 0.14, "grad_norm": 5.65709114074707, "learning_rate": 1.999859727579319e-05, "loss": 2.3812, "step": 10841 }, { "epoch": 0.14, "grad_norm": 5.0232343673706055, "learning_rate": 1.9998595515277052e-05, "loss": 2.119, "step": 10842 }, { "epoch": 0.14, "grad_norm": 5.182992935180664, "learning_rate": 1.99985937536569e-05, "loss": 2.4385, "step": 10843 }, { "epoch": 0.14, "grad_norm": 5.125441074371338, "learning_rate": 1.9998591990932733e-05, "loss": 2.3743, "step": 10844 }, { "epoch": 0.14, "grad_norm": 4.85852575302124, "learning_rate": 1.999859022710455e-05, "loss": 2.4934, "step": 10845 }, { "epoch": 0.14, "grad_norm": 4.8741984367370605, "learning_rate": 1.9998588462172356e-05, "loss": 2.2408, "step": 10846 }, { "epoch": 0.14, "grad_norm": 5.257804870605469, "learning_rate": 1.9998586696136146e-05, "loss": 2.1163, "step": 10847 }, { "epoch": 0.14, "grad_norm": 4.396851062774658, "learning_rate": 1.9998584928995926e-05, "loss": 1.9831, "step": 10848 }, { "epoch": 0.14, "grad_norm": 4.93571662902832, "learning_rate": 1.999858316075169e-05, "loss": 2.2491, "step": 10849 }, { "epoch": 0.14, "grad_norm": 4.636838436126709, "learning_rate": 1.999858139140344e-05, "loss": 2.1661, "step": 10850 }, { "epoch": 0.14, "grad_norm": 6.875833988189697, "learning_rate": 1.999857962095118e-05, "loss": 3.1017, "step": 10851 }, { "epoch": 0.14, "grad_norm": 5.488060474395752, "learning_rate": 1.9998577849394904e-05, "loss": 2.436, "step": 10852 }, { "epoch": 0.14, "grad_norm": 5.504672527313232, "learning_rate": 1.9998576076734615e-05, "loss": 2.7047, "step": 10853 }, { "epoch": 0.14, "grad_norm": 4.666312217712402, "learning_rate": 1.9998574302970316e-05, "loss": 2.2307, "step": 10854 }, { "epoch": 0.14, "grad_norm": 5.2245378494262695, "learning_rate": 1.9998572528102005e-05, "loss": 2.4235, "step": 10855 }, { "epoch": 0.14, "grad_norm": 4.819474220275879, "learning_rate": 1.999857075212968e-05, "loss": 2.3705, "step": 10856 }, { "epoch": 0.14, "grad_norm": 4.985865592956543, "learning_rate": 1.9998568975053345e-05, "loss": 2.2425, "step": 10857 }, { "epoch": 0.14, "grad_norm": 4.915460109710693, "learning_rate": 1.9998567196872996e-05, "loss": 2.8361, "step": 10858 }, { "epoch": 0.14, "grad_norm": 4.5570878982543945, "learning_rate": 1.999856541758864e-05, "loss": 2.297, "step": 10859 }, { "epoch": 0.14, "grad_norm": 5.6661224365234375, "learning_rate": 1.9998563637200268e-05, "loss": 2.3764, "step": 10860 }, { "epoch": 0.14, "grad_norm": 5.666129112243652, "learning_rate": 1.9998561855707885e-05, "loss": 2.3753, "step": 10861 }, { "epoch": 0.14, "grad_norm": 5.291025638580322, "learning_rate": 1.9998560073111496e-05, "loss": 2.5182, "step": 10862 }, { "epoch": 0.14, "grad_norm": 5.51937198638916, "learning_rate": 1.999855828941109e-05, "loss": 2.2929, "step": 10863 }, { "epoch": 0.14, "grad_norm": 4.558935165405273, "learning_rate": 1.999855650460668e-05, "loss": 1.8937, "step": 10864 }, { "epoch": 0.14, "grad_norm": 5.6619462966918945, "learning_rate": 1.9998554718698253e-05, "loss": 2.1445, "step": 10865 }, { "epoch": 0.14, "grad_norm": 4.9858269691467285, "learning_rate": 1.999855293168582e-05, "loss": 2.2514, "step": 10866 }, { "epoch": 0.14, "grad_norm": 6.6898651123046875, "learning_rate": 1.9998551143569375e-05, "loss": 2.3095, "step": 10867 }, { "epoch": 0.14, "grad_norm": 4.652912139892578, "learning_rate": 1.9998549354348924e-05, "loss": 2.4227, "step": 10868 }, { "epoch": 0.14, "grad_norm": 4.7769622802734375, "learning_rate": 1.999854756402446e-05, "loss": 2.4105, "step": 10869 }, { "epoch": 0.14, "grad_norm": 4.728517055511475, "learning_rate": 1.9998545772595988e-05, "loss": 2.1727, "step": 10870 }, { "epoch": 0.14, "grad_norm": 4.890629768371582, "learning_rate": 1.9998543980063506e-05, "loss": 2.1294, "step": 10871 }, { "epoch": 0.14, "grad_norm": 4.007595062255859, "learning_rate": 1.999854218642702e-05, "loss": 1.956, "step": 10872 }, { "epoch": 0.14, "grad_norm": 4.53235387802124, "learning_rate": 1.999854039168652e-05, "loss": 1.9701, "step": 10873 }, { "epoch": 0.14, "grad_norm": 4.710683822631836, "learning_rate": 1.9998538595842012e-05, "loss": 1.8534, "step": 10874 }, { "epoch": 0.14, "grad_norm": 6.083786964416504, "learning_rate": 1.9998536798893497e-05, "loss": 2.7063, "step": 10875 }, { "epoch": 0.14, "grad_norm": 5.287598133087158, "learning_rate": 1.9998535000840972e-05, "loss": 2.5127, "step": 10876 }, { "epoch": 0.14, "grad_norm": 5.491759300231934, "learning_rate": 1.9998533201684442e-05, "loss": 2.5984, "step": 10877 }, { "epoch": 0.14, "grad_norm": 6.219573974609375, "learning_rate": 1.9998531401423903e-05, "loss": 2.9525, "step": 10878 }, { "epoch": 0.14, "grad_norm": 5.272427558898926, "learning_rate": 1.9998529600059354e-05, "loss": 2.6803, "step": 10879 }, { "epoch": 0.14, "grad_norm": 5.30443000793457, "learning_rate": 1.9998527797590803e-05, "loss": 2.6054, "step": 10880 }, { "epoch": 0.14, "grad_norm": 6.390637397766113, "learning_rate": 1.999852599401824e-05, "loss": 2.4208, "step": 10881 }, { "epoch": 0.14, "grad_norm": 5.355656147003174, "learning_rate": 1.9998524189341673e-05, "loss": 2.7528, "step": 10882 }, { "epoch": 0.14, "grad_norm": 5.934586524963379, "learning_rate": 1.9998522383561098e-05, "loss": 2.776, "step": 10883 }, { "epoch": 0.14, "grad_norm": 5.27937126159668, "learning_rate": 1.999852057667652e-05, "loss": 2.3986, "step": 10884 }, { "epoch": 0.14, "grad_norm": 5.2942423820495605, "learning_rate": 1.999851876868793e-05, "loss": 2.2787, "step": 10885 }, { "epoch": 0.14, "grad_norm": 4.941829681396484, "learning_rate": 1.9998516959595337e-05, "loss": 2.3313, "step": 10886 }, { "epoch": 0.14, "grad_norm": 4.656696796417236, "learning_rate": 1.999851514939874e-05, "loss": 2.2302, "step": 10887 }, { "epoch": 0.14, "grad_norm": 5.17599630355835, "learning_rate": 1.9998513338098135e-05, "loss": 2.6626, "step": 10888 }, { "epoch": 0.14, "grad_norm": 5.017104625701904, "learning_rate": 1.999851152569352e-05, "loss": 2.5843, "step": 10889 }, { "epoch": 0.14, "grad_norm": 5.715426445007324, "learning_rate": 1.9998509712184905e-05, "loss": 2.8771, "step": 10890 }, { "epoch": 0.14, "grad_norm": 5.135800838470459, "learning_rate": 1.9998507897572287e-05, "loss": 2.7137, "step": 10891 }, { "epoch": 0.14, "grad_norm": 4.321661472320557, "learning_rate": 1.999850608185566e-05, "loss": 2.0526, "step": 10892 }, { "epoch": 0.14, "grad_norm": 5.355914115905762, "learning_rate": 1.999850426503503e-05, "loss": 2.9181, "step": 10893 }, { "epoch": 0.14, "grad_norm": 5.077833652496338, "learning_rate": 1.9998502447110394e-05, "loss": 2.7396, "step": 10894 }, { "epoch": 0.14, "grad_norm": 4.8482985496521, "learning_rate": 1.9998500628081756e-05, "loss": 2.4633, "step": 10895 }, { "epoch": 0.14, "grad_norm": 4.717056751251221, "learning_rate": 1.9998498807949112e-05, "loss": 2.3549, "step": 10896 }, { "epoch": 0.14, "grad_norm": 4.929256916046143, "learning_rate": 1.9998496986712466e-05, "loss": 2.8654, "step": 10897 }, { "epoch": 0.14, "grad_norm": 5.282547473907471, "learning_rate": 1.9998495164371813e-05, "loss": 2.3528, "step": 10898 }, { "epoch": 0.14, "grad_norm": 4.605570316314697, "learning_rate": 1.999849334092716e-05, "loss": 2.3174, "step": 10899 }, { "epoch": 0.14, "grad_norm": 5.032188415527344, "learning_rate": 1.9998491516378502e-05, "loss": 2.304, "step": 10900 }, { "epoch": 0.14, "grad_norm": 5.710386753082275, "learning_rate": 1.9998489690725843e-05, "loss": 2.6059, "step": 10901 }, { "epoch": 0.14, "grad_norm": 5.041755199432373, "learning_rate": 1.9998487863969177e-05, "loss": 2.4879, "step": 10902 }, { "epoch": 0.14, "grad_norm": 5.550607204437256, "learning_rate": 1.999848603610851e-05, "loss": 2.4279, "step": 10903 }, { "epoch": 0.14, "grad_norm": 4.604456901550293, "learning_rate": 1.999848420714384e-05, "loss": 2.2791, "step": 10904 }, { "epoch": 0.14, "grad_norm": 4.848237991333008, "learning_rate": 1.999848237707517e-05, "loss": 2.3691, "step": 10905 }, { "epoch": 0.14, "grad_norm": 5.208245754241943, "learning_rate": 1.9998480545902496e-05, "loss": 2.3783, "step": 10906 }, { "epoch": 0.14, "grad_norm": 5.041154384613037, "learning_rate": 1.9998478713625822e-05, "loss": 2.4445, "step": 10907 }, { "epoch": 0.14, "grad_norm": 6.0613579750061035, "learning_rate": 1.9998476880245142e-05, "loss": 2.6714, "step": 10908 }, { "epoch": 0.14, "grad_norm": 4.661981582641602, "learning_rate": 1.9998475045760467e-05, "loss": 2.0572, "step": 10909 }, { "epoch": 0.14, "grad_norm": 5.303521156311035, "learning_rate": 1.9998473210171786e-05, "loss": 2.4175, "step": 10910 }, { "epoch": 0.14, "grad_norm": 4.6199164390563965, "learning_rate": 1.9998471373479106e-05, "loss": 2.6561, "step": 10911 }, { "epoch": 0.14, "grad_norm": 5.630120277404785, "learning_rate": 1.9998469535682427e-05, "loss": 2.7, "step": 10912 }, { "epoch": 0.14, "grad_norm": 6.2443342208862305, "learning_rate": 1.9998467696781742e-05, "loss": 3.2516, "step": 10913 }, { "epoch": 0.14, "grad_norm": 5.519718647003174, "learning_rate": 1.9998465856777058e-05, "loss": 2.0078, "step": 10914 }, { "epoch": 0.14, "grad_norm": 5.473255157470703, "learning_rate": 1.9998464015668375e-05, "loss": 2.5179, "step": 10915 }, { "epoch": 0.14, "grad_norm": 5.006696701049805, "learning_rate": 1.9998462173455696e-05, "loss": 2.4526, "step": 10916 }, { "epoch": 0.14, "grad_norm": 4.911094665527344, "learning_rate": 1.999846033013901e-05, "loss": 2.4903, "step": 10917 }, { "epoch": 0.14, "grad_norm": 5.268555164337158, "learning_rate": 1.9998458485718328e-05, "loss": 2.3293, "step": 10918 }, { "epoch": 0.14, "grad_norm": 4.696012020111084, "learning_rate": 1.9998456640193646e-05, "loss": 2.1914, "step": 10919 }, { "epoch": 0.14, "grad_norm": 5.898371696472168, "learning_rate": 1.9998454793564964e-05, "loss": 2.7616, "step": 10920 }, { "epoch": 0.14, "grad_norm": 5.025698661804199, "learning_rate": 1.9998452945832283e-05, "loss": 2.85, "step": 10921 }, { "epoch": 0.14, "grad_norm": 4.784668922424316, "learning_rate": 1.9998451096995607e-05, "loss": 2.0957, "step": 10922 }, { "epoch": 0.14, "grad_norm": 4.5665740966796875, "learning_rate": 1.999844924705493e-05, "loss": 1.8991, "step": 10923 }, { "epoch": 0.14, "grad_norm": 5.162466049194336, "learning_rate": 1.999844739601025e-05, "loss": 2.5396, "step": 10924 }, { "epoch": 0.14, "grad_norm": 5.690184116363525, "learning_rate": 1.9998445543861578e-05, "loss": 2.8584, "step": 10925 }, { "epoch": 0.14, "grad_norm": 4.808882713317871, "learning_rate": 1.9998443690608905e-05, "loss": 2.6469, "step": 10926 }, { "epoch": 0.14, "grad_norm": 5.6122636795043945, "learning_rate": 1.9998441836252237e-05, "loss": 2.4741, "step": 10927 }, { "epoch": 0.14, "grad_norm": 5.717144966125488, "learning_rate": 1.9998439980791567e-05, "loss": 2.7842, "step": 10928 }, { "epoch": 0.14, "grad_norm": 4.449309825897217, "learning_rate": 1.99984381242269e-05, "loss": 1.9684, "step": 10929 }, { "epoch": 0.14, "grad_norm": 5.808070182800293, "learning_rate": 1.999843626655824e-05, "loss": 2.5923, "step": 10930 }, { "epoch": 0.14, "grad_norm": 4.938586711883545, "learning_rate": 1.9998434407785582e-05, "loss": 2.102, "step": 10931 }, { "epoch": 0.14, "grad_norm": 4.882091522216797, "learning_rate": 1.9998432547908923e-05, "loss": 2.9157, "step": 10932 }, { "epoch": 0.14, "grad_norm": 4.985003471374512, "learning_rate": 1.999843068692827e-05, "loss": 2.2584, "step": 10933 }, { "epoch": 0.14, "grad_norm": 5.932925701141357, "learning_rate": 1.9998428824843623e-05, "loss": 2.3545, "step": 10934 }, { "epoch": 0.14, "grad_norm": 5.105830192565918, "learning_rate": 1.9998426961654976e-05, "loss": 2.904, "step": 10935 }, { "epoch": 0.14, "grad_norm": 5.972875118255615, "learning_rate": 1.9998425097362334e-05, "loss": 2.9439, "step": 10936 }, { "epoch": 0.14, "grad_norm": 5.471678256988525, "learning_rate": 1.99984232319657e-05, "loss": 2.7045, "step": 10937 }, { "epoch": 0.14, "grad_norm": 4.9026994705200195, "learning_rate": 1.999842136546507e-05, "loss": 2.0826, "step": 10938 }, { "epoch": 0.14, "grad_norm": 4.521109104156494, "learning_rate": 1.999841949786044e-05, "loss": 1.9452, "step": 10939 }, { "epoch": 0.14, "grad_norm": 5.542167663574219, "learning_rate": 1.9998417629151816e-05, "loss": 3.1671, "step": 10940 }, { "epoch": 0.14, "grad_norm": 5.129067420959473, "learning_rate": 1.99984157593392e-05, "loss": 2.595, "step": 10941 }, { "epoch": 0.14, "grad_norm": 5.58939790725708, "learning_rate": 1.9998413888422586e-05, "loss": 2.337, "step": 10942 }, { "epoch": 0.14, "grad_norm": 5.343206882476807, "learning_rate": 1.999841201640198e-05, "loss": 2.5879, "step": 10943 }, { "epoch": 0.14, "grad_norm": 4.800091743469238, "learning_rate": 1.9998410143277377e-05, "loss": 1.8492, "step": 10944 }, { "epoch": 0.14, "grad_norm": 4.481967449188232, "learning_rate": 1.999840826904878e-05, "loss": 2.1894, "step": 10945 }, { "epoch": 0.14, "grad_norm": 4.591334819793701, "learning_rate": 1.9998406393716192e-05, "loss": 1.8672, "step": 10946 }, { "epoch": 0.14, "grad_norm": 5.195478916168213, "learning_rate": 1.9998404517279608e-05, "loss": 2.4766, "step": 10947 }, { "epoch": 0.14, "grad_norm": 5.060213088989258, "learning_rate": 1.9998402639739032e-05, "loss": 2.2205, "step": 10948 }, { "epoch": 0.14, "grad_norm": 5.332921981811523, "learning_rate": 1.9998400761094463e-05, "loss": 2.6338, "step": 10949 }, { "epoch": 0.14, "grad_norm": 4.353298187255859, "learning_rate": 1.99983988813459e-05, "loss": 2.282, "step": 10950 }, { "epoch": 0.14, "grad_norm": 4.758183002471924, "learning_rate": 1.9998397000493342e-05, "loss": 1.9157, "step": 10951 }, { "epoch": 0.14, "grad_norm": 4.506327152252197, "learning_rate": 1.9998395118536794e-05, "loss": 1.9483, "step": 10952 }, { "epoch": 0.14, "grad_norm": 5.205545902252197, "learning_rate": 1.9998393235476256e-05, "loss": 2.3422, "step": 10953 }, { "epoch": 0.14, "grad_norm": 4.604341983795166, "learning_rate": 1.9998391351311723e-05, "loss": 2.4012, "step": 10954 }, { "epoch": 0.14, "grad_norm": 5.044713020324707, "learning_rate": 1.9998389466043194e-05, "loss": 2.5231, "step": 10955 }, { "epoch": 0.14, "grad_norm": 6.228366851806641, "learning_rate": 1.999838757967068e-05, "loss": 2.7065, "step": 10956 }, { "epoch": 0.14, "grad_norm": 5.019720077514648, "learning_rate": 1.999838569219417e-05, "loss": 2.2423, "step": 10957 }, { "epoch": 0.14, "grad_norm": 4.678028583526611, "learning_rate": 1.999838380361367e-05, "loss": 2.6297, "step": 10958 }, { "epoch": 0.14, "grad_norm": 4.529874801635742, "learning_rate": 1.999838191392918e-05, "loss": 1.9812, "step": 10959 }, { "epoch": 0.14, "grad_norm": 5.717193126678467, "learning_rate": 1.9998380023140696e-05, "loss": 2.8493, "step": 10960 }, { "epoch": 0.14, "grad_norm": 5.308444976806641, "learning_rate": 1.999837813124822e-05, "loss": 2.6842, "step": 10961 }, { "epoch": 0.14, "grad_norm": 4.159672260284424, "learning_rate": 1.999837623825176e-05, "loss": 2.1531, "step": 10962 }, { "epoch": 0.14, "grad_norm": 4.956879615783691, "learning_rate": 1.9998374344151306e-05, "loss": 2.5977, "step": 10963 }, { "epoch": 0.14, "grad_norm": 5.24650239944458, "learning_rate": 1.999837244894686e-05, "loss": 2.546, "step": 10964 }, { "epoch": 0.14, "grad_norm": 5.105040550231934, "learning_rate": 1.999837055263843e-05, "loss": 2.5517, "step": 10965 }, { "epoch": 0.14, "grad_norm": 5.639381408691406, "learning_rate": 1.9998368655226003e-05, "loss": 2.4941, "step": 10966 }, { "epoch": 0.14, "grad_norm": 6.031809329986572, "learning_rate": 1.999836675670959e-05, "loss": 2.7655, "step": 10967 }, { "epoch": 0.14, "grad_norm": 4.687793731689453, "learning_rate": 1.9998364857089186e-05, "loss": 2.2732, "step": 10968 }, { "epoch": 0.14, "grad_norm": 5.066740989685059, "learning_rate": 1.9998362956364796e-05, "loss": 2.6407, "step": 10969 }, { "epoch": 0.14, "grad_norm": 4.508164882659912, "learning_rate": 1.9998361054536414e-05, "loss": 1.92, "step": 10970 }, { "epoch": 0.14, "grad_norm": 5.606775283813477, "learning_rate": 1.9998359151604047e-05, "loss": 2.3638, "step": 10971 }, { "epoch": 0.14, "grad_norm": 6.292539596557617, "learning_rate": 1.9998357247567687e-05, "loss": 2.661, "step": 10972 }, { "epoch": 0.14, "grad_norm": 5.5830278396606445, "learning_rate": 1.999835534242734e-05, "loss": 3.0488, "step": 10973 }, { "epoch": 0.14, "grad_norm": 4.844064712524414, "learning_rate": 1.9998353436183007e-05, "loss": 2.3544, "step": 10974 }, { "epoch": 0.14, "grad_norm": 5.057672023773193, "learning_rate": 1.9998351528834688e-05, "loss": 2.4093, "step": 10975 }, { "epoch": 0.14, "grad_norm": 5.306272029876709, "learning_rate": 1.9998349620382376e-05, "loss": 2.451, "step": 10976 }, { "epoch": 0.14, "grad_norm": 5.877108573913574, "learning_rate": 1.999834771082608e-05, "loss": 2.6658, "step": 10977 }, { "epoch": 0.14, "grad_norm": 5.898967266082764, "learning_rate": 1.9998345800165795e-05, "loss": 2.0917, "step": 10978 }, { "epoch": 0.14, "grad_norm": 4.726624965667725, "learning_rate": 1.9998343888401524e-05, "loss": 2.0938, "step": 10979 }, { "epoch": 0.14, "grad_norm": 5.105170249938965, "learning_rate": 1.9998341975533266e-05, "loss": 2.6101, "step": 10980 }, { "epoch": 0.14, "grad_norm": 4.617990970611572, "learning_rate": 1.999834006156102e-05, "loss": 2.2823, "step": 10981 }, { "epoch": 0.14, "grad_norm": 4.802999496459961, "learning_rate": 1.999833814648479e-05, "loss": 2.188, "step": 10982 }, { "epoch": 0.14, "grad_norm": 5.111630916595459, "learning_rate": 1.9998336230304575e-05, "loss": 2.5926, "step": 10983 }, { "epoch": 0.14, "grad_norm": 5.333271026611328, "learning_rate": 1.9998334313020373e-05, "loss": 2.5283, "step": 10984 }, { "epoch": 0.14, "grad_norm": 4.337570667266846, "learning_rate": 1.999833239463218e-05, "loss": 1.6623, "step": 10985 }, { "epoch": 0.14, "grad_norm": 5.102300643920898, "learning_rate": 1.9998330475140008e-05, "loss": 1.9594, "step": 10986 }, { "epoch": 0.14, "grad_norm": 4.716602802276611, "learning_rate": 1.999832855454385e-05, "loss": 2.3185, "step": 10987 }, { "epoch": 0.14, "grad_norm": 4.924274921417236, "learning_rate": 1.9998326632843705e-05, "loss": 1.9872, "step": 10988 }, { "epoch": 0.14, "grad_norm": 5.28535270690918, "learning_rate": 1.9998324710039575e-05, "loss": 2.4276, "step": 10989 }, { "epoch": 0.14, "grad_norm": 5.265490531921387, "learning_rate": 1.999832278613146e-05, "loss": 2.3917, "step": 10990 }, { "epoch": 0.14, "grad_norm": 6.041810512542725, "learning_rate": 1.999832086111936e-05, "loss": 2.8624, "step": 10991 }, { "epoch": 0.14, "grad_norm": 5.927036762237549, "learning_rate": 1.9998318935003276e-05, "loss": 2.7111, "step": 10992 }, { "epoch": 0.14, "grad_norm": 5.3189802169799805, "learning_rate": 1.999831700778321e-05, "loss": 2.6735, "step": 10993 }, { "epoch": 0.14, "grad_norm": 4.949843406677246, "learning_rate": 1.9998315079459157e-05, "loss": 2.2774, "step": 10994 }, { "epoch": 0.14, "grad_norm": 5.333057403564453, "learning_rate": 1.9998313150031124e-05, "loss": 2.851, "step": 10995 }, { "epoch": 0.14, "grad_norm": 5.298807621002197, "learning_rate": 1.9998311219499106e-05, "loss": 1.9036, "step": 10996 }, { "epoch": 0.14, "grad_norm": 5.242720603942871, "learning_rate": 1.9998309287863106e-05, "loss": 2.3761, "step": 10997 }, { "epoch": 0.14, "grad_norm": 4.596470355987549, "learning_rate": 1.9998307355123124e-05, "loss": 2.2899, "step": 10998 }, { "epoch": 0.14, "grad_norm": 5.172076225280762, "learning_rate": 1.9998305421279153e-05, "loss": 2.7111, "step": 10999 }, { "epoch": 0.14, "grad_norm": 5.619128704071045, "learning_rate": 1.9998303486331207e-05, "loss": 2.5169, "step": 11000 }, { "epoch": 0.14, "grad_norm": 5.715588569641113, "learning_rate": 1.9998301550279275e-05, "loss": 2.8003, "step": 11001 }, { "epoch": 0.14, "grad_norm": 5.392800331115723, "learning_rate": 1.999829961312336e-05, "loss": 2.1231, "step": 11002 }, { "epoch": 0.14, "grad_norm": 5.271116733551025, "learning_rate": 1.9998297674863465e-05, "loss": 2.3301, "step": 11003 }, { "epoch": 0.14, "grad_norm": 5.329634189605713, "learning_rate": 1.999829573549959e-05, "loss": 2.1301, "step": 11004 }, { "epoch": 0.14, "grad_norm": 5.613402366638184, "learning_rate": 1.9998293795031727e-05, "loss": 2.5718, "step": 11005 }, { "epoch": 0.14, "grad_norm": 4.994675636291504, "learning_rate": 1.9998291853459888e-05, "loss": 2.4959, "step": 11006 }, { "epoch": 0.14, "grad_norm": 5.406466484069824, "learning_rate": 1.9998289910784067e-05, "loss": 2.9413, "step": 11007 }, { "epoch": 0.14, "grad_norm": 4.817638874053955, "learning_rate": 1.9998287967004267e-05, "loss": 2.2622, "step": 11008 }, { "epoch": 0.14, "grad_norm": 4.521103382110596, "learning_rate": 1.9998286022120482e-05, "loss": 1.5534, "step": 11009 }, { "epoch": 0.14, "grad_norm": 4.804098129272461, "learning_rate": 1.999828407613272e-05, "loss": 1.9732, "step": 11010 }, { "epoch": 0.14, "grad_norm": 4.596996307373047, "learning_rate": 1.9998282129040976e-05, "loss": 2.3418, "step": 11011 }, { "epoch": 0.14, "grad_norm": 5.595040321350098, "learning_rate": 1.9998280180845254e-05, "loss": 2.7362, "step": 11012 }, { "epoch": 0.14, "grad_norm": 4.757376670837402, "learning_rate": 1.999827823154555e-05, "loss": 2.0679, "step": 11013 }, { "epoch": 0.14, "grad_norm": 4.916123390197754, "learning_rate": 1.999827628114187e-05, "loss": 2.248, "step": 11014 }, { "epoch": 0.14, "grad_norm": 4.760556221008301, "learning_rate": 1.9998274329634205e-05, "loss": 2.4993, "step": 11015 }, { "epoch": 0.14, "grad_norm": 4.31339693069458, "learning_rate": 1.9998272377022565e-05, "loss": 2.0782, "step": 11016 }, { "epoch": 0.14, "grad_norm": 4.576648235321045, "learning_rate": 1.9998270423306944e-05, "loss": 1.9466, "step": 11017 }, { "epoch": 0.14, "grad_norm": 5.4126362800598145, "learning_rate": 1.9998268468487347e-05, "loss": 2.6078, "step": 11018 }, { "epoch": 0.14, "grad_norm": 5.350746154785156, "learning_rate": 1.999826651256377e-05, "loss": 2.5695, "step": 11019 }, { "epoch": 0.14, "grad_norm": 5.239620208740234, "learning_rate": 1.9998264555536214e-05, "loss": 2.4484, "step": 11020 }, { "epoch": 0.14, "grad_norm": 4.896872520446777, "learning_rate": 1.999826259740468e-05, "loss": 2.6012, "step": 11021 }, { "epoch": 0.14, "grad_norm": 4.847893714904785, "learning_rate": 1.9998260638169173e-05, "loss": 2.4103, "step": 11022 }, { "epoch": 0.14, "grad_norm": 4.535330772399902, "learning_rate": 1.9998258677829683e-05, "loss": 1.6514, "step": 11023 }, { "epoch": 0.14, "grad_norm": 4.973206043243408, "learning_rate": 1.9998256716386217e-05, "loss": 2.7516, "step": 11024 }, { "epoch": 0.14, "grad_norm": 5.194704055786133, "learning_rate": 1.9998254753838773e-05, "loss": 2.3706, "step": 11025 }, { "epoch": 0.14, "grad_norm": 4.632778167724609, "learning_rate": 1.9998252790187353e-05, "loss": 2.1433, "step": 11026 }, { "epoch": 0.14, "grad_norm": 5.48057222366333, "learning_rate": 1.9998250825431955e-05, "loss": 2.2606, "step": 11027 }, { "epoch": 0.14, "grad_norm": 4.942724227905273, "learning_rate": 1.999824885957258e-05, "loss": 2.1524, "step": 11028 }, { "epoch": 0.14, "grad_norm": 5.417444229125977, "learning_rate": 1.9998246892609236e-05, "loss": 2.7233, "step": 11029 }, { "epoch": 0.14, "grad_norm": 6.007899761199951, "learning_rate": 1.9998244924541908e-05, "loss": 2.6428, "step": 11030 }, { "epoch": 0.14, "grad_norm": 5.7455339431762695, "learning_rate": 1.9998242955370605e-05, "loss": 2.4798, "step": 11031 }, { "epoch": 0.14, "grad_norm": 5.185837745666504, "learning_rate": 1.999824098509533e-05, "loss": 2.1633, "step": 11032 }, { "epoch": 0.14, "grad_norm": 5.370060443878174, "learning_rate": 1.9998239013716077e-05, "loss": 2.6556, "step": 11033 }, { "epoch": 0.14, "grad_norm": 5.291707515716553, "learning_rate": 1.9998237041232848e-05, "loss": 2.6675, "step": 11034 }, { "epoch": 0.14, "grad_norm": 5.8728861808776855, "learning_rate": 1.9998235067645647e-05, "loss": 2.3323, "step": 11035 }, { "epoch": 0.14, "grad_norm": 4.741186141967773, "learning_rate": 1.9998233092954468e-05, "loss": 1.9892, "step": 11036 }, { "epoch": 0.14, "grad_norm": 6.00979471206665, "learning_rate": 1.9998231117159316e-05, "loss": 2.7961, "step": 11037 }, { "epoch": 0.14, "grad_norm": 5.1465935707092285, "learning_rate": 1.999822914026019e-05, "loss": 2.284, "step": 11038 }, { "epoch": 0.14, "grad_norm": 4.895798206329346, "learning_rate": 1.9998227162257087e-05, "loss": 2.1696, "step": 11039 }, { "epoch": 0.14, "grad_norm": 5.292486667633057, "learning_rate": 1.9998225183150014e-05, "loss": 2.2495, "step": 11040 }, { "epoch": 0.14, "grad_norm": 5.996176242828369, "learning_rate": 1.9998223202938968e-05, "loss": 3.167, "step": 11041 }, { "epoch": 0.14, "grad_norm": 4.691737651824951, "learning_rate": 1.9998221221623947e-05, "loss": 2.2753, "step": 11042 }, { "epoch": 0.14, "grad_norm": 5.0117669105529785, "learning_rate": 1.999821923920495e-05, "loss": 2.4838, "step": 11043 }, { "epoch": 0.14, "grad_norm": 5.6331467628479, "learning_rate": 1.999821725568198e-05, "loss": 2.6382, "step": 11044 }, { "epoch": 0.14, "grad_norm": 4.817885398864746, "learning_rate": 1.999821527105504e-05, "loss": 2.271, "step": 11045 }, { "epoch": 0.14, "grad_norm": 5.162413597106934, "learning_rate": 1.999821328532413e-05, "loss": 2.2724, "step": 11046 }, { "epoch": 0.14, "grad_norm": 5.412532329559326, "learning_rate": 1.999821129848924e-05, "loss": 2.4251, "step": 11047 }, { "epoch": 0.14, "grad_norm": 4.78953218460083, "learning_rate": 1.9998209310550382e-05, "loss": 2.6112, "step": 11048 }, { "epoch": 0.14, "grad_norm": 5.000235557556152, "learning_rate": 1.9998207321507554e-05, "loss": 2.1382, "step": 11049 }, { "epoch": 0.14, "grad_norm": 4.683457374572754, "learning_rate": 1.999820533136075e-05, "loss": 2.0418, "step": 11050 }, { "epoch": 0.14, "grad_norm": 4.73747444152832, "learning_rate": 1.999820334010998e-05, "loss": 2.1816, "step": 11051 }, { "epoch": 0.14, "grad_norm": 5.405496120452881, "learning_rate": 1.9998201347755235e-05, "loss": 2.6479, "step": 11052 }, { "epoch": 0.14, "grad_norm": 5.0519280433654785, "learning_rate": 1.9998199354296523e-05, "loss": 2.337, "step": 11053 }, { "epoch": 0.14, "grad_norm": 5.461981296539307, "learning_rate": 1.9998197359733832e-05, "loss": 2.1419, "step": 11054 }, { "epoch": 0.14, "grad_norm": 5.0908331871032715, "learning_rate": 1.999819536406718e-05, "loss": 2.1459, "step": 11055 }, { "epoch": 0.14, "grad_norm": 5.512566566467285, "learning_rate": 1.999819336729655e-05, "loss": 2.3564, "step": 11056 }, { "epoch": 0.14, "grad_norm": 5.45795202255249, "learning_rate": 1.9998191369421954e-05, "loss": 2.3901, "step": 11057 }, { "epoch": 0.14, "grad_norm": 5.104385852813721, "learning_rate": 1.9998189370443386e-05, "loss": 2.1582, "step": 11058 }, { "epoch": 0.14, "grad_norm": 6.677594184875488, "learning_rate": 1.999818737036085e-05, "loss": 3.2989, "step": 11059 }, { "epoch": 0.14, "grad_norm": 5.5734124183654785, "learning_rate": 1.999818536917434e-05, "loss": 2.4493, "step": 11060 }, { "epoch": 0.14, "grad_norm": 4.989889144897461, "learning_rate": 1.9998183366883866e-05, "loss": 2.4324, "step": 11061 }, { "epoch": 0.14, "grad_norm": 5.159143924713135, "learning_rate": 1.9998181363489423e-05, "loss": 1.9592, "step": 11062 }, { "epoch": 0.14, "grad_norm": 4.911818504333496, "learning_rate": 1.999817935899101e-05, "loss": 2.1323, "step": 11063 }, { "epoch": 0.14, "grad_norm": 4.357361316680908, "learning_rate": 1.9998177353388625e-05, "loss": 2.2679, "step": 11064 }, { "epoch": 0.14, "grad_norm": 5.049713611602783, "learning_rate": 1.9998175346682274e-05, "loss": 2.5437, "step": 11065 }, { "epoch": 0.14, "grad_norm": 4.890560626983643, "learning_rate": 1.9998173338871957e-05, "loss": 2.404, "step": 11066 }, { "epoch": 0.14, "grad_norm": 5.383610248565674, "learning_rate": 1.9998171329957668e-05, "loss": 3.0157, "step": 11067 }, { "epoch": 0.14, "grad_norm": 5.498964309692383, "learning_rate": 1.9998169319939414e-05, "loss": 1.9691, "step": 11068 }, { "epoch": 0.14, "grad_norm": 6.1363725662231445, "learning_rate": 1.9998167308817195e-05, "loss": 2.7516, "step": 11069 }, { "epoch": 0.14, "grad_norm": 4.992476940155029, "learning_rate": 1.9998165296591004e-05, "loss": 1.7971, "step": 11070 }, { "epoch": 0.14, "grad_norm": 4.866754531860352, "learning_rate": 1.9998163283260848e-05, "loss": 2.2219, "step": 11071 }, { "epoch": 0.14, "grad_norm": 4.658689975738525, "learning_rate": 1.9998161268826724e-05, "loss": 2.3438, "step": 11072 }, { "epoch": 0.14, "grad_norm": 4.861176490783691, "learning_rate": 1.9998159253288634e-05, "loss": 2.5237, "step": 11073 }, { "epoch": 0.14, "grad_norm": 4.825049877166748, "learning_rate": 1.9998157236646576e-05, "loss": 2.2279, "step": 11074 }, { "epoch": 0.14, "grad_norm": 4.6481032371521, "learning_rate": 1.9998155218900555e-05, "loss": 2.0865, "step": 11075 }, { "epoch": 0.14, "grad_norm": 5.091829776763916, "learning_rate": 1.9998153200050567e-05, "loss": 2.3905, "step": 11076 }, { "epoch": 0.14, "grad_norm": 4.917641639709473, "learning_rate": 1.9998151180096613e-05, "loss": 2.8232, "step": 11077 }, { "epoch": 0.14, "grad_norm": 4.919463634490967, "learning_rate": 1.999814915903869e-05, "loss": 2.1177, "step": 11078 }, { "epoch": 0.14, "grad_norm": 5.199683666229248, "learning_rate": 1.999814713687681e-05, "loss": 2.484, "step": 11079 }, { "epoch": 0.14, "grad_norm": 6.20115327835083, "learning_rate": 1.9998145113610958e-05, "loss": 2.8708, "step": 11080 }, { "epoch": 0.14, "grad_norm": 5.910895347595215, "learning_rate": 1.9998143089241143e-05, "loss": 2.2223, "step": 11081 }, { "epoch": 0.14, "grad_norm": 4.702602863311768, "learning_rate": 1.9998141063767364e-05, "loss": 2.5785, "step": 11082 }, { "epoch": 0.14, "grad_norm": 6.33436393737793, "learning_rate": 1.999813903718962e-05, "loss": 3.2308, "step": 11083 }, { "epoch": 0.14, "grad_norm": 5.2644782066345215, "learning_rate": 1.999813700950791e-05, "loss": 2.0948, "step": 11084 }, { "epoch": 0.14, "grad_norm": 5.481776237487793, "learning_rate": 1.9998134980722238e-05, "loss": 2.9097, "step": 11085 }, { "epoch": 0.14, "grad_norm": 5.50043249130249, "learning_rate": 1.99981329508326e-05, "loss": 2.5348, "step": 11086 }, { "epoch": 0.14, "grad_norm": 5.349383354187012, "learning_rate": 1.9998130919839003e-05, "loss": 2.5718, "step": 11087 }, { "epoch": 0.14, "grad_norm": 4.155120372772217, "learning_rate": 1.9998128887741443e-05, "loss": 1.5922, "step": 11088 }, { "epoch": 0.14, "grad_norm": 4.874853134155273, "learning_rate": 1.9998126854539914e-05, "loss": 2.1611, "step": 11089 }, { "epoch": 0.14, "grad_norm": 4.764949321746826, "learning_rate": 1.9998124820234427e-05, "loss": 2.7061, "step": 11090 }, { "epoch": 0.14, "grad_norm": 5.581380844116211, "learning_rate": 1.9998122784824975e-05, "loss": 2.4584, "step": 11091 }, { "epoch": 0.14, "grad_norm": 4.3195414543151855, "learning_rate": 1.999812074831156e-05, "loss": 1.9469, "step": 11092 }, { "epoch": 0.14, "grad_norm": 4.426231384277344, "learning_rate": 1.999811871069419e-05, "loss": 2.0921, "step": 11093 }, { "epoch": 0.14, "grad_norm": 5.3431267738342285, "learning_rate": 1.9998116671972848e-05, "loss": 2.1673, "step": 11094 }, { "epoch": 0.14, "grad_norm": 7.7080559730529785, "learning_rate": 1.999811463214755e-05, "loss": 2.2902, "step": 11095 }, { "epoch": 0.14, "grad_norm": 5.280825138092041, "learning_rate": 1.9998112591218288e-05, "loss": 2.6716, "step": 11096 }, { "epoch": 0.14, "grad_norm": 5.719499588012695, "learning_rate": 1.9998110549185068e-05, "loss": 2.4498, "step": 11097 }, { "epoch": 0.14, "grad_norm": 4.6066365242004395, "learning_rate": 1.9998108506047884e-05, "loss": 2.2703, "step": 11098 }, { "epoch": 0.14, "grad_norm": 5.002701282501221, "learning_rate": 1.999810646180674e-05, "loss": 2.29, "step": 11099 }, { "epoch": 0.14, "grad_norm": 6.114543914794922, "learning_rate": 1.9998104416461636e-05, "loss": 3.1787, "step": 11100 }, { "epoch": 0.14, "grad_norm": 5.063688278198242, "learning_rate": 1.999810237001257e-05, "loss": 2.1933, "step": 11101 }, { "epoch": 0.14, "grad_norm": 4.502621650695801, "learning_rate": 1.9998100322459545e-05, "loss": 2.0755, "step": 11102 }, { "epoch": 0.14, "grad_norm": 5.150018692016602, "learning_rate": 1.9998098273802562e-05, "loss": 2.4609, "step": 11103 }, { "epoch": 0.14, "grad_norm": 4.5916056632995605, "learning_rate": 1.9998096224041617e-05, "loss": 2.104, "step": 11104 }, { "epoch": 0.14, "grad_norm": 5.518771648406982, "learning_rate": 1.9998094173176714e-05, "loss": 2.3242, "step": 11105 }, { "epoch": 0.14, "grad_norm": 4.343324661254883, "learning_rate": 1.999809212120785e-05, "loss": 2.2071, "step": 11106 }, { "epoch": 0.14, "grad_norm": 4.859689712524414, "learning_rate": 1.999809006813503e-05, "loss": 2.2291, "step": 11107 }, { "epoch": 0.14, "grad_norm": 5.683920860290527, "learning_rate": 1.9998088013958247e-05, "loss": 2.3566, "step": 11108 }, { "epoch": 0.14, "grad_norm": 4.6925859451293945, "learning_rate": 1.999808595867751e-05, "loss": 2.5018, "step": 11109 }, { "epoch": 0.14, "grad_norm": 4.801153659820557, "learning_rate": 1.9998083902292812e-05, "loss": 2.6189, "step": 11110 }, { "epoch": 0.14, "grad_norm": 4.981138229370117, "learning_rate": 1.9998081844804155e-05, "loss": 2.2534, "step": 11111 }, { "epoch": 0.14, "grad_norm": 4.148940563201904, "learning_rate": 1.9998079786211543e-05, "loss": 2.1116, "step": 11112 }, { "epoch": 0.14, "grad_norm": 5.246407508850098, "learning_rate": 1.9998077726514973e-05, "loss": 2.4854, "step": 11113 }, { "epoch": 0.14, "grad_norm": 5.021104335784912, "learning_rate": 1.999807566571444e-05, "loss": 2.6523, "step": 11114 }, { "epoch": 0.14, "grad_norm": 5.532702445983887, "learning_rate": 1.9998073603809954e-05, "loss": 2.9226, "step": 11115 }, { "epoch": 0.14, "grad_norm": 5.019958972930908, "learning_rate": 1.9998071540801512e-05, "loss": 2.6124, "step": 11116 }, { "epoch": 0.14, "grad_norm": 5.993872165679932, "learning_rate": 1.9998069476689115e-05, "loss": 2.3184, "step": 11117 }, { "epoch": 0.14, "grad_norm": 4.507575988769531, "learning_rate": 1.999806741147276e-05, "loss": 2.3366, "step": 11118 }, { "epoch": 0.14, "grad_norm": 6.04006814956665, "learning_rate": 1.9998065345152446e-05, "loss": 2.0528, "step": 11119 }, { "epoch": 0.14, "grad_norm": 5.709449291229248, "learning_rate": 1.9998063277728178e-05, "loss": 2.7548, "step": 11120 }, { "epoch": 0.14, "grad_norm": 5.010003089904785, "learning_rate": 1.9998061209199954e-05, "loss": 1.9717, "step": 11121 }, { "epoch": 0.14, "grad_norm": 5.759344577789307, "learning_rate": 1.9998059139567772e-05, "loss": 2.6031, "step": 11122 }, { "epoch": 0.14, "grad_norm": 4.591101169586182, "learning_rate": 1.999805706883164e-05, "loss": 2.7556, "step": 11123 }, { "epoch": 0.14, "grad_norm": 5.473169803619385, "learning_rate": 1.999805499699155e-05, "loss": 2.641, "step": 11124 }, { "epoch": 0.14, "grad_norm": 4.64430570602417, "learning_rate": 1.9998052924047507e-05, "loss": 2.2841, "step": 11125 }, { "epoch": 0.14, "grad_norm": 5.578569412231445, "learning_rate": 1.9998050849999505e-05, "loss": 2.984, "step": 11126 }, { "epoch": 0.14, "grad_norm": 5.9642109870910645, "learning_rate": 1.999804877484755e-05, "loss": 2.5019, "step": 11127 }, { "epoch": 0.14, "grad_norm": 5.235599994659424, "learning_rate": 1.9998046698591643e-05, "loss": 2.6141, "step": 11128 }, { "epoch": 0.14, "grad_norm": 5.048774242401123, "learning_rate": 1.999804462123178e-05, "loss": 2.6223, "step": 11129 }, { "epoch": 0.14, "grad_norm": 5.000527381896973, "learning_rate": 1.9998042542767964e-05, "loss": 2.682, "step": 11130 }, { "epoch": 0.14, "grad_norm": 5.105626583099365, "learning_rate": 1.9998040463200198e-05, "loss": 2.1926, "step": 11131 }, { "epoch": 0.14, "grad_norm": 4.408524036407471, "learning_rate": 1.9998038382528473e-05, "loss": 1.8258, "step": 11132 }, { "epoch": 0.14, "grad_norm": 4.9632673263549805, "learning_rate": 1.9998036300752796e-05, "loss": 2.5238, "step": 11133 }, { "epoch": 0.14, "grad_norm": 5.049656867980957, "learning_rate": 1.9998034217873168e-05, "loss": 2.7532, "step": 11134 }, { "epoch": 0.14, "grad_norm": 5.006823539733887, "learning_rate": 1.9998032133889588e-05, "loss": 2.1577, "step": 11135 }, { "epoch": 0.14, "grad_norm": 4.880799293518066, "learning_rate": 1.9998030048802053e-05, "loss": 2.2168, "step": 11136 }, { "epoch": 0.14, "grad_norm": 5.099804401397705, "learning_rate": 1.9998027962610567e-05, "loss": 2.2158, "step": 11137 }, { "epoch": 0.14, "grad_norm": 4.286923408508301, "learning_rate": 1.999802587531513e-05, "loss": 1.9483, "step": 11138 }, { "epoch": 0.14, "grad_norm": 4.423673629760742, "learning_rate": 1.9998023786915743e-05, "loss": 2.1322, "step": 11139 }, { "epoch": 0.14, "grad_norm": 5.140262126922607, "learning_rate": 1.99980216974124e-05, "loss": 1.869, "step": 11140 }, { "epoch": 0.14, "grad_norm": 5.310471057891846, "learning_rate": 1.999801960680511e-05, "loss": 2.4883, "step": 11141 }, { "epoch": 0.14, "grad_norm": 5.303409099578857, "learning_rate": 1.9998017515093865e-05, "loss": 2.17, "step": 11142 }, { "epoch": 0.14, "grad_norm": 4.8354291915893555, "learning_rate": 1.999801542227867e-05, "loss": 2.313, "step": 11143 }, { "epoch": 0.14, "grad_norm": 4.803122520446777, "learning_rate": 1.9998013328359528e-05, "loss": 2.1921, "step": 11144 }, { "epoch": 0.14, "grad_norm": 5.236734867095947, "learning_rate": 1.9998011233336432e-05, "loss": 2.7383, "step": 11145 }, { "epoch": 0.14, "grad_norm": 5.611680030822754, "learning_rate": 1.9998009137209385e-05, "loss": 2.4364, "step": 11146 }, { "epoch": 0.14, "grad_norm": 4.8566460609436035, "learning_rate": 1.9998007039978393e-05, "loss": 2.8311, "step": 11147 }, { "epoch": 0.14, "grad_norm": 5.133406162261963, "learning_rate": 1.9998004941643446e-05, "loss": 2.3351, "step": 11148 }, { "epoch": 0.14, "grad_norm": 4.233078479766846, "learning_rate": 1.9998002842204557e-05, "loss": 2.1257, "step": 11149 }, { "epoch": 0.14, "grad_norm": 4.659419059753418, "learning_rate": 1.999800074166171e-05, "loss": 2.2517, "step": 11150 }, { "epoch": 0.14, "grad_norm": 5.187221050262451, "learning_rate": 1.9997998640014922e-05, "loss": 2.4798, "step": 11151 }, { "epoch": 0.14, "grad_norm": 4.895944118499756, "learning_rate": 1.9997996537264182e-05, "loss": 2.1493, "step": 11152 }, { "epoch": 0.14, "grad_norm": 5.520892143249512, "learning_rate": 1.9997994433409494e-05, "loss": 2.6137, "step": 11153 }, { "epoch": 0.14, "grad_norm": 4.232888221740723, "learning_rate": 1.9997992328450854e-05, "loss": 2.1935, "step": 11154 }, { "epoch": 0.14, "grad_norm": 5.565230846405029, "learning_rate": 1.9997990222388273e-05, "loss": 2.2782, "step": 11155 }, { "epoch": 0.14, "grad_norm": 5.277542591094971, "learning_rate": 1.9997988115221737e-05, "loss": 2.579, "step": 11156 }, { "epoch": 0.14, "grad_norm": 4.918145179748535, "learning_rate": 1.999798600695126e-05, "loss": 2.6375, "step": 11157 }, { "epoch": 0.14, "grad_norm": 4.815584659576416, "learning_rate": 1.999798389757683e-05, "loss": 2.0224, "step": 11158 }, { "epoch": 0.14, "grad_norm": 5.492978096008301, "learning_rate": 1.9997981787098456e-05, "loss": 2.6566, "step": 11159 }, { "epoch": 0.14, "grad_norm": 5.175149917602539, "learning_rate": 1.9997979675516138e-05, "loss": 2.2823, "step": 11160 }, { "epoch": 0.14, "grad_norm": 5.020607948303223, "learning_rate": 1.9997977562829867e-05, "loss": 2.4453, "step": 11161 }, { "epoch": 0.14, "grad_norm": 5.020918369293213, "learning_rate": 1.9997975449039656e-05, "loss": 2.4366, "step": 11162 }, { "epoch": 0.14, "grad_norm": 4.790877342224121, "learning_rate": 1.9997973334145496e-05, "loss": 2.5227, "step": 11163 }, { "epoch": 0.14, "grad_norm": 5.727531433105469, "learning_rate": 1.999797121814739e-05, "loss": 2.3303, "step": 11164 }, { "epoch": 0.14, "grad_norm": 5.357617378234863, "learning_rate": 1.9997969101045334e-05, "loss": 2.9363, "step": 11165 }, { "epoch": 0.14, "grad_norm": 4.828731060028076, "learning_rate": 1.999796698283934e-05, "loss": 2.2217, "step": 11166 }, { "epoch": 0.14, "grad_norm": 5.332019805908203, "learning_rate": 1.9997964863529397e-05, "loss": 2.3797, "step": 11167 }, { "epoch": 0.14, "grad_norm": 6.29599666595459, "learning_rate": 1.999796274311551e-05, "loss": 2.8562, "step": 11168 }, { "epoch": 0.14, "grad_norm": 4.302181243896484, "learning_rate": 1.9997960621597677e-05, "loss": 2.3321, "step": 11169 }, { "epoch": 0.14, "grad_norm": 4.53381872177124, "learning_rate": 1.9997958498975904e-05, "loss": 2.3929, "step": 11170 }, { "epoch": 0.14, "grad_norm": 4.472082614898682, "learning_rate": 1.9997956375250182e-05, "loss": 2.1322, "step": 11171 }, { "epoch": 0.14, "grad_norm": 5.040953636169434, "learning_rate": 1.999795425042052e-05, "loss": 2.9825, "step": 11172 }, { "epoch": 0.15, "grad_norm": 4.495089530944824, "learning_rate": 1.999795212448691e-05, "loss": 2.4491, "step": 11173 }, { "epoch": 0.15, "grad_norm": 4.698598861694336, "learning_rate": 1.9997949997449357e-05, "loss": 2.0847, "step": 11174 }, { "epoch": 0.15, "grad_norm": 4.824702739715576, "learning_rate": 1.999794786930786e-05, "loss": 2.1755, "step": 11175 }, { "epoch": 0.15, "grad_norm": 4.330004692077637, "learning_rate": 1.9997945740062424e-05, "loss": 1.8744, "step": 11176 }, { "epoch": 0.15, "grad_norm": 5.143649578094482, "learning_rate": 1.9997943609713043e-05, "loss": 2.6969, "step": 11177 }, { "epoch": 0.15, "grad_norm": 4.7971110343933105, "learning_rate": 1.9997941478259718e-05, "loss": 2.658, "step": 11178 }, { "epoch": 0.15, "grad_norm": 5.080422401428223, "learning_rate": 1.9997939345702454e-05, "loss": 2.108, "step": 11179 }, { "epoch": 0.15, "grad_norm": 4.40609884262085, "learning_rate": 1.9997937212041246e-05, "loss": 2.2209, "step": 11180 }, { "epoch": 0.15, "grad_norm": 4.865253448486328, "learning_rate": 1.9997935077276093e-05, "loss": 2.3762, "step": 11181 }, { "epoch": 0.15, "grad_norm": 5.58675479888916, "learning_rate": 1.9997932941407005e-05, "loss": 2.8261, "step": 11182 }, { "epoch": 0.15, "grad_norm": 4.886160850524902, "learning_rate": 1.999793080443397e-05, "loss": 2.4246, "step": 11183 }, { "epoch": 0.15, "grad_norm": 5.552563190460205, "learning_rate": 1.9997928666356995e-05, "loss": 2.624, "step": 11184 }, { "epoch": 0.15, "grad_norm": 5.3161163330078125, "learning_rate": 1.9997926527176083e-05, "loss": 2.3268, "step": 11185 }, { "epoch": 0.15, "grad_norm": 5.052109241485596, "learning_rate": 1.9997924386891226e-05, "loss": 2.4585, "step": 11186 }, { "epoch": 0.15, "grad_norm": 5.587706089019775, "learning_rate": 1.9997922245502427e-05, "loss": 2.4161, "step": 11187 }, { "epoch": 0.15, "grad_norm": 4.76718807220459, "learning_rate": 1.9997920103009694e-05, "loss": 2.0613, "step": 11188 }, { "epoch": 0.15, "grad_norm": 5.715013027191162, "learning_rate": 1.9997917959413017e-05, "loss": 2.9341, "step": 11189 }, { "epoch": 0.15, "grad_norm": 5.036962985992432, "learning_rate": 1.9997915814712397e-05, "loss": 2.2807, "step": 11190 }, { "epoch": 0.15, "grad_norm": 4.89911413192749, "learning_rate": 1.9997913668907844e-05, "loss": 2.5514, "step": 11191 }, { "epoch": 0.15, "grad_norm": 5.736342906951904, "learning_rate": 1.9997911521999348e-05, "loss": 2.6331, "step": 11192 }, { "epoch": 0.15, "grad_norm": 5.210705757141113, "learning_rate": 1.9997909373986915e-05, "loss": 2.6036, "step": 11193 }, { "epoch": 0.15, "grad_norm": 5.149136543273926, "learning_rate": 1.999790722487054e-05, "loss": 2.3066, "step": 11194 }, { "epoch": 0.15, "grad_norm": 4.518942356109619, "learning_rate": 1.999790507465023e-05, "loss": 2.2169, "step": 11195 }, { "epoch": 0.15, "grad_norm": 4.862683296203613, "learning_rate": 1.9997902923325977e-05, "loss": 2.4846, "step": 11196 }, { "epoch": 0.15, "grad_norm": 4.574051856994629, "learning_rate": 1.999790077089779e-05, "loss": 2.0415, "step": 11197 }, { "epoch": 0.15, "grad_norm": 6.2927021980285645, "learning_rate": 1.9997898617365664e-05, "loss": 2.6711, "step": 11198 }, { "epoch": 0.15, "grad_norm": 5.075451374053955, "learning_rate": 1.9997896462729602e-05, "loss": 2.1908, "step": 11199 }, { "epoch": 0.15, "grad_norm": 5.352385997772217, "learning_rate": 1.99978943069896e-05, "loss": 2.4365, "step": 11200 }, { "epoch": 0.15, "grad_norm": 5.343147277832031, "learning_rate": 1.9997892150145665e-05, "loss": 2.6346, "step": 11201 }, { "epoch": 0.15, "grad_norm": 5.131103038787842, "learning_rate": 1.9997889992197786e-05, "loss": 2.6463, "step": 11202 }, { "epoch": 0.15, "grad_norm": 4.827982425689697, "learning_rate": 1.9997887833145976e-05, "loss": 2.1925, "step": 11203 }, { "epoch": 0.15, "grad_norm": 4.57584810256958, "learning_rate": 1.9997885672990227e-05, "loss": 1.8837, "step": 11204 }, { "epoch": 0.15, "grad_norm": 4.959936618804932, "learning_rate": 1.9997883511730544e-05, "loss": 1.7676, "step": 11205 }, { "epoch": 0.15, "grad_norm": 4.839504718780518, "learning_rate": 1.9997881349366923e-05, "loss": 1.6188, "step": 11206 }, { "epoch": 0.15, "grad_norm": 5.507995128631592, "learning_rate": 1.9997879185899367e-05, "loss": 2.6105, "step": 11207 }, { "epoch": 0.15, "grad_norm": 4.745828628540039, "learning_rate": 1.9997877021327877e-05, "loss": 2.0751, "step": 11208 }, { "epoch": 0.15, "grad_norm": 4.799959182739258, "learning_rate": 1.9997874855652452e-05, "loss": 1.9957, "step": 11209 }, { "epoch": 0.15, "grad_norm": 4.597701549530029, "learning_rate": 1.999787268887309e-05, "loss": 2.5138, "step": 11210 }, { "epoch": 0.15, "grad_norm": 4.886073589324951, "learning_rate": 1.999787052098979e-05, "loss": 2.1144, "step": 11211 }, { "epoch": 0.15, "grad_norm": 4.714745044708252, "learning_rate": 1.9997868352002562e-05, "loss": 2.6498, "step": 11212 }, { "epoch": 0.15, "grad_norm": 5.880171298980713, "learning_rate": 1.9997866181911395e-05, "loss": 2.1163, "step": 11213 }, { "epoch": 0.15, "grad_norm": 5.312000274658203, "learning_rate": 1.9997864010716297e-05, "loss": 2.3286, "step": 11214 }, { "epoch": 0.15, "grad_norm": 5.194820880889893, "learning_rate": 1.9997861838417264e-05, "loss": 2.0272, "step": 11215 }, { "epoch": 0.15, "grad_norm": 4.956632137298584, "learning_rate": 1.9997859665014296e-05, "loss": 2.263, "step": 11216 }, { "epoch": 0.15, "grad_norm": 4.367481708526611, "learning_rate": 1.9997857490507394e-05, "loss": 2.1123, "step": 11217 }, { "epoch": 0.15, "grad_norm": 5.257806777954102, "learning_rate": 1.9997855314896564e-05, "loss": 2.4688, "step": 11218 }, { "epoch": 0.15, "grad_norm": 5.603707313537598, "learning_rate": 1.99978531381818e-05, "loss": 2.2, "step": 11219 }, { "epoch": 0.15, "grad_norm": 4.845039367675781, "learning_rate": 1.99978509603631e-05, "loss": 2.6817, "step": 11220 }, { "epoch": 0.15, "grad_norm": 5.143016815185547, "learning_rate": 1.999784878144047e-05, "loss": 1.9896, "step": 11221 }, { "epoch": 0.15, "grad_norm": 4.613219738006592, "learning_rate": 1.9997846601413907e-05, "loss": 1.8639, "step": 11222 }, { "epoch": 0.15, "grad_norm": 5.345661163330078, "learning_rate": 1.999784442028341e-05, "loss": 2.2799, "step": 11223 }, { "epoch": 0.15, "grad_norm": 4.969351768493652, "learning_rate": 1.9997842238048983e-05, "loss": 2.051, "step": 11224 }, { "epoch": 0.15, "grad_norm": 5.113687992095947, "learning_rate": 1.9997840054710627e-05, "loss": 2.145, "step": 11225 }, { "epoch": 0.15, "grad_norm": 4.40806245803833, "learning_rate": 1.9997837870268336e-05, "loss": 2.205, "step": 11226 }, { "epoch": 0.15, "grad_norm": 5.316740036010742, "learning_rate": 1.9997835684722118e-05, "loss": 1.9649, "step": 11227 }, { "epoch": 0.15, "grad_norm": 5.035679340362549, "learning_rate": 1.9997833498071965e-05, "loss": 2.4063, "step": 11228 }, { "epoch": 0.15, "grad_norm": 4.409108638763428, "learning_rate": 1.9997831310317884e-05, "loss": 1.6015, "step": 11229 }, { "epoch": 0.15, "grad_norm": 4.933159351348877, "learning_rate": 1.9997829121459876e-05, "loss": 2.569, "step": 11230 }, { "epoch": 0.15, "grad_norm": 4.827747344970703, "learning_rate": 1.9997826931497936e-05, "loss": 2.3792, "step": 11231 }, { "epoch": 0.15, "grad_norm": 5.038032531738281, "learning_rate": 1.9997824740432064e-05, "loss": 2.1382, "step": 11232 }, { "epoch": 0.15, "grad_norm": 4.928173065185547, "learning_rate": 1.9997822548262262e-05, "loss": 2.5305, "step": 11233 }, { "epoch": 0.15, "grad_norm": 5.016833305358887, "learning_rate": 1.999782035498853e-05, "loss": 2.4519, "step": 11234 }, { "epoch": 0.15, "grad_norm": 4.822693347930908, "learning_rate": 1.9997818160610873e-05, "loss": 1.9953, "step": 11235 }, { "epoch": 0.15, "grad_norm": 5.075071334838867, "learning_rate": 1.9997815965129287e-05, "loss": 2.5603, "step": 11236 }, { "epoch": 0.15, "grad_norm": 4.7344160079956055, "learning_rate": 1.999781376854377e-05, "loss": 2.0874, "step": 11237 }, { "epoch": 0.15, "grad_norm": 5.149618625640869, "learning_rate": 1.9997811570854325e-05, "loss": 2.1405, "step": 11238 }, { "epoch": 0.15, "grad_norm": 4.87419319152832, "learning_rate": 1.9997809372060955e-05, "loss": 2.2726, "step": 11239 }, { "epoch": 0.15, "grad_norm": 6.1632280349731445, "learning_rate": 1.9997807172163654e-05, "loss": 2.7024, "step": 11240 }, { "epoch": 0.15, "grad_norm": 5.098714828491211, "learning_rate": 1.9997804971162425e-05, "loss": 2.3904, "step": 11241 }, { "epoch": 0.15, "grad_norm": 4.631245136260986, "learning_rate": 1.999780276905727e-05, "loss": 2.6414, "step": 11242 }, { "epoch": 0.15, "grad_norm": 4.439002990722656, "learning_rate": 1.999780056584819e-05, "loss": 2.1075, "step": 11243 }, { "epoch": 0.15, "grad_norm": 5.973437309265137, "learning_rate": 1.9997798361535185e-05, "loss": 2.56, "step": 11244 }, { "epoch": 0.15, "grad_norm": 5.278111457824707, "learning_rate": 1.9997796156118244e-05, "loss": 2.4938, "step": 11245 }, { "epoch": 0.15, "grad_norm": 4.538680553436279, "learning_rate": 1.9997793949597386e-05, "loss": 2.5252, "step": 11246 }, { "epoch": 0.15, "grad_norm": 4.745448112487793, "learning_rate": 1.9997791741972597e-05, "loss": 2.4487, "step": 11247 }, { "epoch": 0.15, "grad_norm": 6.129512310028076, "learning_rate": 1.9997789533243886e-05, "loss": 3.6168, "step": 11248 }, { "epoch": 0.15, "grad_norm": 4.925975799560547, "learning_rate": 1.9997787323411245e-05, "loss": 2.1361, "step": 11249 }, { "epoch": 0.15, "grad_norm": 4.381882190704346, "learning_rate": 1.9997785112474682e-05, "loss": 2.1898, "step": 11250 }, { "epoch": 0.15, "grad_norm": 4.737696170806885, "learning_rate": 1.999778290043419e-05, "loss": 2.1945, "step": 11251 }, { "epoch": 0.15, "grad_norm": 5.109547138214111, "learning_rate": 1.999778068728978e-05, "loss": 2.3216, "step": 11252 }, { "epoch": 0.15, "grad_norm": 5.000277042388916, "learning_rate": 1.999777847304144e-05, "loss": 2.107, "step": 11253 }, { "epoch": 0.15, "grad_norm": 5.399135589599609, "learning_rate": 1.9997776257689176e-05, "loss": 1.9383, "step": 11254 }, { "epoch": 0.15, "grad_norm": 4.7012248039245605, "learning_rate": 1.999777404123299e-05, "loss": 1.9914, "step": 11255 }, { "epoch": 0.15, "grad_norm": 4.7216477394104, "learning_rate": 1.9997771823672877e-05, "loss": 2.3537, "step": 11256 }, { "epoch": 0.15, "grad_norm": 4.707441329956055, "learning_rate": 1.9997769605008843e-05, "loss": 2.3034, "step": 11257 }, { "epoch": 0.15, "grad_norm": 4.915625095367432, "learning_rate": 1.9997767385240884e-05, "loss": 2.3206, "step": 11258 }, { "epoch": 0.15, "grad_norm": 5.250576496124268, "learning_rate": 1.9997765164369004e-05, "loss": 2.2046, "step": 11259 }, { "epoch": 0.15, "grad_norm": 4.965013027191162, "learning_rate": 1.99977629423932e-05, "loss": 2.4962, "step": 11260 }, { "epoch": 0.15, "grad_norm": 5.017146587371826, "learning_rate": 1.999776071931347e-05, "loss": 2.2458, "step": 11261 }, { "epoch": 0.15, "grad_norm": 5.027545928955078, "learning_rate": 1.9997758495129825e-05, "loss": 2.6797, "step": 11262 }, { "epoch": 0.15, "grad_norm": 4.618886470794678, "learning_rate": 1.9997756269842254e-05, "loss": 2.1015, "step": 11263 }, { "epoch": 0.15, "grad_norm": 5.324641704559326, "learning_rate": 1.999775404345076e-05, "loss": 2.2534, "step": 11264 }, { "epoch": 0.15, "grad_norm": 5.326112747192383, "learning_rate": 1.9997751815955345e-05, "loss": 2.4604, "step": 11265 }, { "epoch": 0.15, "grad_norm": 4.505351543426514, "learning_rate": 1.9997749587356007e-05, "loss": 2.1074, "step": 11266 }, { "epoch": 0.15, "grad_norm": 5.511181831359863, "learning_rate": 1.999774735765275e-05, "loss": 2.5061, "step": 11267 }, { "epoch": 0.15, "grad_norm": 4.428267002105713, "learning_rate": 1.9997745126845572e-05, "loss": 2.139, "step": 11268 }, { "epoch": 0.15, "grad_norm": 4.7902936935424805, "learning_rate": 1.9997742894934475e-05, "loss": 1.913, "step": 11269 }, { "epoch": 0.15, "grad_norm": 5.443109512329102, "learning_rate": 1.9997740661919453e-05, "loss": 2.7302, "step": 11270 }, { "epoch": 0.15, "grad_norm": 5.630751609802246, "learning_rate": 1.9997738427800516e-05, "loss": 2.5556, "step": 11271 }, { "epoch": 0.15, "grad_norm": 4.409295082092285, "learning_rate": 1.9997736192577655e-05, "loss": 2.1715, "step": 11272 }, { "epoch": 0.15, "grad_norm": 5.103425025939941, "learning_rate": 1.9997733956250877e-05, "loss": 2.6094, "step": 11273 }, { "epoch": 0.15, "grad_norm": 4.558008193969727, "learning_rate": 1.9997731718820174e-05, "loss": 2.3509, "step": 11274 }, { "epoch": 0.15, "grad_norm": 4.564906120300293, "learning_rate": 1.999772948028556e-05, "loss": 2.3782, "step": 11275 }, { "epoch": 0.15, "grad_norm": 6.056887626647949, "learning_rate": 1.999772724064702e-05, "loss": 3.0771, "step": 11276 }, { "epoch": 0.15, "grad_norm": 4.958873271942139, "learning_rate": 1.9997724999904565e-05, "loss": 2.4567, "step": 11277 }, { "epoch": 0.15, "grad_norm": 4.0618510246276855, "learning_rate": 1.9997722758058194e-05, "loss": 2.0089, "step": 11278 }, { "epoch": 0.15, "grad_norm": 4.753291606903076, "learning_rate": 1.99977205151079e-05, "loss": 2.0943, "step": 11279 }, { "epoch": 0.15, "grad_norm": 5.318093776702881, "learning_rate": 1.9997718271053687e-05, "loss": 2.6013, "step": 11280 }, { "epoch": 0.15, "grad_norm": 4.840256214141846, "learning_rate": 1.9997716025895563e-05, "loss": 2.0623, "step": 11281 }, { "epoch": 0.15, "grad_norm": 5.4180989265441895, "learning_rate": 1.9997713779633515e-05, "loss": 2.7745, "step": 11282 }, { "epoch": 0.15, "grad_norm": 5.3799591064453125, "learning_rate": 1.9997711532267556e-05, "loss": 2.5047, "step": 11283 }, { "epoch": 0.15, "grad_norm": 4.556662559509277, "learning_rate": 1.9997709283797672e-05, "loss": 1.8881, "step": 11284 }, { "epoch": 0.15, "grad_norm": 4.50703763961792, "learning_rate": 1.9997707034223877e-05, "loss": 1.9596, "step": 11285 }, { "epoch": 0.15, "grad_norm": 4.999790668487549, "learning_rate": 1.9997704783546162e-05, "loss": 2.4217, "step": 11286 }, { "epoch": 0.15, "grad_norm": 4.468057632446289, "learning_rate": 1.9997702531764535e-05, "loss": 2.3101, "step": 11287 }, { "epoch": 0.15, "grad_norm": 5.422187328338623, "learning_rate": 1.999770027887899e-05, "loss": 2.7171, "step": 11288 }, { "epoch": 0.15, "grad_norm": 4.2408976554870605, "learning_rate": 1.999769802488953e-05, "loss": 2.0228, "step": 11289 }, { "epoch": 0.15, "grad_norm": 5.12920618057251, "learning_rate": 1.9997695769796156e-05, "loss": 2.5295, "step": 11290 }, { "epoch": 0.15, "grad_norm": 5.284627437591553, "learning_rate": 1.9997693513598862e-05, "loss": 2.8636, "step": 11291 }, { "epoch": 0.15, "grad_norm": 5.054642200469971, "learning_rate": 1.9997691256297657e-05, "loss": 2.64, "step": 11292 }, { "epoch": 0.15, "grad_norm": 4.409368515014648, "learning_rate": 1.9997688997892534e-05, "loss": 1.8682, "step": 11293 }, { "epoch": 0.15, "grad_norm": 4.906461238861084, "learning_rate": 1.99976867383835e-05, "loss": 2.8232, "step": 11294 }, { "epoch": 0.15, "grad_norm": 4.260860443115234, "learning_rate": 1.9997684477770553e-05, "loss": 2.3799, "step": 11295 }, { "epoch": 0.15, "grad_norm": 4.7295002937316895, "learning_rate": 1.9997682216053687e-05, "loss": 2.1309, "step": 11296 }, { "epoch": 0.15, "grad_norm": 4.592033863067627, "learning_rate": 1.999767995323291e-05, "loss": 2.1235, "step": 11297 }, { "epoch": 0.15, "grad_norm": 5.653772830963135, "learning_rate": 1.9997677689308216e-05, "loss": 2.5241, "step": 11298 }, { "epoch": 0.15, "grad_norm": 4.690654754638672, "learning_rate": 1.999767542427961e-05, "loss": 1.9379, "step": 11299 }, { "epoch": 0.15, "grad_norm": 6.507436275482178, "learning_rate": 1.9997673158147095e-05, "loss": 3.1792, "step": 11300 }, { "epoch": 0.15, "grad_norm": 4.983591556549072, "learning_rate": 1.9997670890910665e-05, "loss": 2.2729, "step": 11301 }, { "epoch": 0.15, "grad_norm": 5.054078578948975, "learning_rate": 1.9997668622570323e-05, "loss": 2.3687, "step": 11302 }, { "epoch": 0.15, "grad_norm": 5.361355781555176, "learning_rate": 1.9997666353126068e-05, "loss": 2.6101, "step": 11303 }, { "epoch": 0.15, "grad_norm": 5.57389497756958, "learning_rate": 1.99976640825779e-05, "loss": 2.3646, "step": 11304 }, { "epoch": 0.15, "grad_norm": 5.598033905029297, "learning_rate": 1.9997661810925827e-05, "loss": 2.7122, "step": 11305 }, { "epoch": 0.15, "grad_norm": 5.270165920257568, "learning_rate": 1.9997659538169835e-05, "loss": 2.7878, "step": 11306 }, { "epoch": 0.15, "grad_norm": 6.135677814483643, "learning_rate": 1.9997657264309932e-05, "loss": 2.9358, "step": 11307 }, { "epoch": 0.15, "grad_norm": 5.531177997589111, "learning_rate": 1.999765498934612e-05, "loss": 2.4714, "step": 11308 }, { "epoch": 0.15, "grad_norm": 5.374509334564209, "learning_rate": 1.99976527132784e-05, "loss": 2.3098, "step": 11309 }, { "epoch": 0.15, "grad_norm": 4.409125804901123, "learning_rate": 1.9997650436106764e-05, "loss": 2.2465, "step": 11310 }, { "epoch": 0.15, "grad_norm": 4.771947860717773, "learning_rate": 1.999764815783122e-05, "loss": 2.3413, "step": 11311 }, { "epoch": 0.15, "grad_norm": 4.798633098602295, "learning_rate": 1.9997645878451767e-05, "loss": 2.407, "step": 11312 }, { "epoch": 0.15, "grad_norm": 3.9951987266540527, "learning_rate": 1.9997643597968405e-05, "loss": 1.8747, "step": 11313 }, { "epoch": 0.15, "grad_norm": 4.735403060913086, "learning_rate": 1.9997641316381133e-05, "loss": 2.5482, "step": 11314 }, { "epoch": 0.15, "grad_norm": 4.910928726196289, "learning_rate": 1.999763903368995e-05, "loss": 2.0789, "step": 11315 }, { "epoch": 0.15, "grad_norm": 4.892088890075684, "learning_rate": 1.999763674989486e-05, "loss": 2.3035, "step": 11316 }, { "epoch": 0.15, "grad_norm": 5.498495101928711, "learning_rate": 1.999763446499586e-05, "loss": 2.6536, "step": 11317 }, { "epoch": 0.15, "grad_norm": 4.627153396606445, "learning_rate": 1.9997632178992953e-05, "loss": 2.5994, "step": 11318 }, { "epoch": 0.15, "grad_norm": 4.151047706604004, "learning_rate": 1.9997629891886136e-05, "loss": 1.9032, "step": 11319 }, { "epoch": 0.15, "grad_norm": 4.331418991088867, "learning_rate": 1.999762760367541e-05, "loss": 2.3613, "step": 11320 }, { "epoch": 0.15, "grad_norm": 4.958971977233887, "learning_rate": 1.999762531436078e-05, "loss": 2.3694, "step": 11321 }, { "epoch": 0.15, "grad_norm": 5.071577548980713, "learning_rate": 1.999762302394224e-05, "loss": 2.2802, "step": 11322 }, { "epoch": 0.15, "grad_norm": 5.371603488922119, "learning_rate": 1.9997620732419792e-05, "loss": 2.4964, "step": 11323 }, { "epoch": 0.15, "grad_norm": 5.134786605834961, "learning_rate": 1.9997618439793444e-05, "loss": 2.7524, "step": 11324 }, { "epoch": 0.15, "grad_norm": 5.124605655670166, "learning_rate": 1.999761614606318e-05, "loss": 2.7982, "step": 11325 }, { "epoch": 0.15, "grad_norm": 4.611158847808838, "learning_rate": 1.9997613851229015e-05, "loss": 1.9508, "step": 11326 }, { "epoch": 0.15, "grad_norm": 4.284425735473633, "learning_rate": 1.999761155529094e-05, "loss": 2.021, "step": 11327 }, { "epoch": 0.15, "grad_norm": 4.999773979187012, "learning_rate": 1.9997609258248963e-05, "loss": 2.5082, "step": 11328 }, { "epoch": 0.15, "grad_norm": 5.580482482910156, "learning_rate": 1.999760696010308e-05, "loss": 2.736, "step": 11329 }, { "epoch": 0.15, "grad_norm": 4.837707042694092, "learning_rate": 1.999760466085329e-05, "loss": 2.1083, "step": 11330 }, { "epoch": 0.15, "grad_norm": 4.9016947746276855, "learning_rate": 1.9997602360499596e-05, "loss": 2.5299, "step": 11331 }, { "epoch": 0.15, "grad_norm": 5.426939964294434, "learning_rate": 1.9997600059041994e-05, "loss": 2.5018, "step": 11332 }, { "epoch": 0.15, "grad_norm": 4.933835029602051, "learning_rate": 1.9997597756480492e-05, "loss": 3.0188, "step": 11333 }, { "epoch": 0.15, "grad_norm": 3.8688502311706543, "learning_rate": 1.9997595452815082e-05, "loss": 1.8551, "step": 11334 }, { "epoch": 0.15, "grad_norm": 5.10812520980835, "learning_rate": 1.9997593148045768e-05, "loss": 2.298, "step": 11335 }, { "epoch": 0.15, "grad_norm": 6.056151390075684, "learning_rate": 1.9997590842172553e-05, "loss": 2.5058, "step": 11336 }, { "epoch": 0.15, "grad_norm": 4.509955883026123, "learning_rate": 1.999758853519543e-05, "loss": 2.2718, "step": 11337 }, { "epoch": 0.15, "grad_norm": 5.062664985656738, "learning_rate": 1.9997586227114407e-05, "loss": 2.2588, "step": 11338 }, { "epoch": 0.15, "grad_norm": 4.334741115570068, "learning_rate": 1.999758391792948e-05, "loss": 2.1099, "step": 11339 }, { "epoch": 0.15, "grad_norm": 5.151236534118652, "learning_rate": 1.999758160764065e-05, "loss": 2.5156, "step": 11340 }, { "epoch": 0.15, "grad_norm": 4.864813327789307, "learning_rate": 1.999757929624792e-05, "loss": 2.1383, "step": 11341 }, { "epoch": 0.15, "grad_norm": 4.90118408203125, "learning_rate": 1.9997576983751282e-05, "loss": 2.3117, "step": 11342 }, { "epoch": 0.15, "grad_norm": 4.805215358734131, "learning_rate": 1.9997574670150745e-05, "loss": 2.5722, "step": 11343 }, { "epoch": 0.15, "grad_norm": 5.415935039520264, "learning_rate": 1.9997572355446307e-05, "loss": 2.6389, "step": 11344 }, { "epoch": 0.15, "grad_norm": 5.085854530334473, "learning_rate": 1.9997570039637968e-05, "loss": 2.5907, "step": 11345 }, { "epoch": 0.15, "grad_norm": 4.748152732849121, "learning_rate": 1.9997567722725725e-05, "loss": 2.4679, "step": 11346 }, { "epoch": 0.15, "grad_norm": 4.789626121520996, "learning_rate": 1.999756540470958e-05, "loss": 2.4596, "step": 11347 }, { "epoch": 0.15, "grad_norm": 5.562849044799805, "learning_rate": 1.9997563085589537e-05, "loss": 2.2365, "step": 11348 }, { "epoch": 0.15, "grad_norm": 4.7914299964904785, "learning_rate": 1.9997560765365592e-05, "loss": 2.0908, "step": 11349 }, { "epoch": 0.15, "grad_norm": 4.205611228942871, "learning_rate": 1.999755844403775e-05, "loss": 2.1868, "step": 11350 }, { "epoch": 0.15, "grad_norm": 4.357641220092773, "learning_rate": 1.9997556121606006e-05, "loss": 2.4496, "step": 11351 }, { "epoch": 0.15, "grad_norm": 4.050079822540283, "learning_rate": 1.999755379807036e-05, "loss": 1.9694, "step": 11352 }, { "epoch": 0.15, "grad_norm": 5.260752201080322, "learning_rate": 1.9997551473430816e-05, "loss": 2.2446, "step": 11353 }, { "epoch": 0.15, "grad_norm": 4.965195655822754, "learning_rate": 1.999754914768737e-05, "loss": 2.3408, "step": 11354 }, { "epoch": 0.15, "grad_norm": 4.6611480712890625, "learning_rate": 1.999754682084003e-05, "loss": 2.1058, "step": 11355 }, { "epoch": 0.15, "grad_norm": 4.865828514099121, "learning_rate": 1.999754449288879e-05, "loss": 2.4182, "step": 11356 }, { "epoch": 0.15, "grad_norm": 5.292959213256836, "learning_rate": 1.999754216383365e-05, "loss": 2.5211, "step": 11357 }, { "epoch": 0.15, "grad_norm": 4.718908786773682, "learning_rate": 1.999753983367461e-05, "loss": 1.8485, "step": 11358 }, { "epoch": 0.15, "grad_norm": 5.347341537475586, "learning_rate": 1.9997537502411678e-05, "loss": 2.2211, "step": 11359 }, { "epoch": 0.15, "grad_norm": 4.123287677764893, "learning_rate": 1.999753517004484e-05, "loss": 2.0149, "step": 11360 }, { "epoch": 0.15, "grad_norm": 4.7759480476379395, "learning_rate": 1.999753283657411e-05, "loss": 2.6298, "step": 11361 }, { "epoch": 0.15, "grad_norm": 5.569991588592529, "learning_rate": 1.9997530501999482e-05, "loss": 1.9946, "step": 11362 }, { "epoch": 0.15, "grad_norm": 4.742502212524414, "learning_rate": 1.9997528166320956e-05, "loss": 2.069, "step": 11363 }, { "epoch": 0.15, "grad_norm": 4.9325971603393555, "learning_rate": 1.9997525829538533e-05, "loss": 1.9272, "step": 11364 }, { "epoch": 0.15, "grad_norm": 4.28345251083374, "learning_rate": 1.9997523491652216e-05, "loss": 2.0773, "step": 11365 }, { "epoch": 0.15, "grad_norm": 4.639435768127441, "learning_rate": 1.9997521152662002e-05, "loss": 2.3711, "step": 11366 }, { "epoch": 0.15, "grad_norm": 5.268267631530762, "learning_rate": 1.999751881256789e-05, "loss": 2.4381, "step": 11367 }, { "epoch": 0.15, "grad_norm": 5.188127040863037, "learning_rate": 1.9997516471369885e-05, "loss": 2.6528, "step": 11368 }, { "epoch": 0.15, "grad_norm": 5.152918815612793, "learning_rate": 1.999751412906798e-05, "loss": 1.9037, "step": 11369 }, { "epoch": 0.15, "grad_norm": 5.690572738647461, "learning_rate": 1.9997511785662184e-05, "loss": 2.5284, "step": 11370 }, { "epoch": 0.15, "grad_norm": 5.0076584815979, "learning_rate": 1.9997509441152493e-05, "loss": 2.9537, "step": 11371 }, { "epoch": 0.15, "grad_norm": 4.60724401473999, "learning_rate": 1.9997507095538905e-05, "loss": 2.1149, "step": 11372 }, { "epoch": 0.15, "grad_norm": 4.877052307128906, "learning_rate": 1.9997504748821426e-05, "loss": 1.8307, "step": 11373 }, { "epoch": 0.15, "grad_norm": 5.4709882736206055, "learning_rate": 1.999750240100005e-05, "loss": 2.1394, "step": 11374 }, { "epoch": 0.15, "grad_norm": 4.933738708496094, "learning_rate": 1.9997500052074782e-05, "loss": 2.575, "step": 11375 }, { "epoch": 0.15, "grad_norm": 5.266234874725342, "learning_rate": 1.9997497702045618e-05, "loss": 2.393, "step": 11376 }, { "epoch": 0.15, "grad_norm": 4.829405784606934, "learning_rate": 1.9997495350912563e-05, "loss": 2.2161, "step": 11377 }, { "epoch": 0.15, "grad_norm": 4.868677139282227, "learning_rate": 1.9997492998675614e-05, "loss": 2.3341, "step": 11378 }, { "epoch": 0.15, "grad_norm": 4.182559490203857, "learning_rate": 1.999749064533477e-05, "loss": 2.1035, "step": 11379 }, { "epoch": 0.15, "grad_norm": 5.113391399383545, "learning_rate": 1.9997488290890037e-05, "loss": 2.5012, "step": 11380 }, { "epoch": 0.15, "grad_norm": 4.961242198944092, "learning_rate": 1.999748593534141e-05, "loss": 3.0109, "step": 11381 }, { "epoch": 0.15, "grad_norm": 4.486690044403076, "learning_rate": 1.999748357868889e-05, "loss": 2.5782, "step": 11382 }, { "epoch": 0.15, "grad_norm": 4.6936821937561035, "learning_rate": 1.999748122093248e-05, "loss": 2.0571, "step": 11383 }, { "epoch": 0.15, "grad_norm": 5.339630126953125, "learning_rate": 1.9997478862072176e-05, "loss": 2.3996, "step": 11384 }, { "epoch": 0.15, "grad_norm": 4.822375297546387, "learning_rate": 1.9997476502107986e-05, "loss": 2.1121, "step": 11385 }, { "epoch": 0.15, "grad_norm": 5.047452449798584, "learning_rate": 1.99974741410399e-05, "loss": 2.3714, "step": 11386 }, { "epoch": 0.15, "grad_norm": 4.902906894683838, "learning_rate": 1.9997471778867924e-05, "loss": 2.5711, "step": 11387 }, { "epoch": 0.15, "grad_norm": 4.9772233963012695, "learning_rate": 1.999746941559206e-05, "loss": 2.215, "step": 11388 }, { "epoch": 0.15, "grad_norm": 4.598562240600586, "learning_rate": 1.9997467051212306e-05, "loss": 1.898, "step": 11389 }, { "epoch": 0.15, "grad_norm": 4.820766448974609, "learning_rate": 1.999746468572866e-05, "loss": 1.6954, "step": 11390 }, { "epoch": 0.15, "grad_norm": 5.2930588722229, "learning_rate": 1.9997462319141122e-05, "loss": 2.1889, "step": 11391 }, { "epoch": 0.15, "grad_norm": 5.568788528442383, "learning_rate": 1.9997459951449695e-05, "loss": 2.9344, "step": 11392 }, { "epoch": 0.15, "grad_norm": 5.571865558624268, "learning_rate": 1.9997457582654383e-05, "loss": 2.8437, "step": 11393 }, { "epoch": 0.15, "grad_norm": 5.100225448608398, "learning_rate": 1.9997455212755177e-05, "loss": 2.124, "step": 11394 }, { "epoch": 0.15, "grad_norm": 4.939313888549805, "learning_rate": 1.9997452841752088e-05, "loss": 2.6607, "step": 11395 }, { "epoch": 0.15, "grad_norm": 4.984408855438232, "learning_rate": 1.9997450469645105e-05, "loss": 2.6657, "step": 11396 }, { "epoch": 0.15, "grad_norm": 4.423975944519043, "learning_rate": 1.9997448096434238e-05, "loss": 2.3281, "step": 11397 }, { "epoch": 0.15, "grad_norm": 4.872848987579346, "learning_rate": 1.9997445722119483e-05, "loss": 2.2129, "step": 11398 }, { "epoch": 0.15, "grad_norm": 6.011124610900879, "learning_rate": 1.9997443346700838e-05, "loss": 2.6289, "step": 11399 }, { "epoch": 0.15, "grad_norm": 4.648543357849121, "learning_rate": 1.9997440970178306e-05, "loss": 1.786, "step": 11400 }, { "epoch": 0.15, "grad_norm": 4.918817043304443, "learning_rate": 1.999743859255189e-05, "loss": 2.3869, "step": 11401 }, { "epoch": 0.15, "grad_norm": 4.875014305114746, "learning_rate": 1.9997436213821583e-05, "loss": 2.2473, "step": 11402 }, { "epoch": 0.15, "grad_norm": 5.138087749481201, "learning_rate": 1.9997433833987393e-05, "loss": 2.2412, "step": 11403 }, { "epoch": 0.15, "grad_norm": 5.033865928649902, "learning_rate": 1.9997431453049315e-05, "loss": 2.1491, "step": 11404 }, { "epoch": 0.15, "grad_norm": 4.610629081726074, "learning_rate": 1.999742907100735e-05, "loss": 2.3167, "step": 11405 }, { "epoch": 0.15, "grad_norm": 4.612403392791748, "learning_rate": 1.99974266878615e-05, "loss": 2.5506, "step": 11406 }, { "epoch": 0.15, "grad_norm": 4.759679317474365, "learning_rate": 1.9997424303611765e-05, "loss": 2.3936, "step": 11407 }, { "epoch": 0.15, "grad_norm": 5.730996608734131, "learning_rate": 1.9997421918258145e-05, "loss": 2.3008, "step": 11408 }, { "epoch": 0.15, "grad_norm": 6.824808120727539, "learning_rate": 1.999741953180064e-05, "loss": 2.639, "step": 11409 }, { "epoch": 0.15, "grad_norm": 5.223978519439697, "learning_rate": 1.9997417144239248e-05, "loss": 2.4159, "step": 11410 }, { "epoch": 0.15, "grad_norm": 5.258780479431152, "learning_rate": 1.9997414755573973e-05, "loss": 2.2874, "step": 11411 }, { "epoch": 0.15, "grad_norm": 5.451685428619385, "learning_rate": 1.9997412365804814e-05, "loss": 2.3382, "step": 11412 }, { "epoch": 0.15, "grad_norm": 4.738980293273926, "learning_rate": 1.9997409974931772e-05, "loss": 2.4403, "step": 11413 }, { "epoch": 0.15, "grad_norm": 4.299703598022461, "learning_rate": 1.9997407582954846e-05, "loss": 1.9077, "step": 11414 }, { "epoch": 0.15, "grad_norm": 4.342742443084717, "learning_rate": 1.9997405189874036e-05, "loss": 1.9935, "step": 11415 }, { "epoch": 0.15, "grad_norm": 5.039480209350586, "learning_rate": 1.999740279568934e-05, "loss": 2.1638, "step": 11416 }, { "epoch": 0.15, "grad_norm": 4.3978753089904785, "learning_rate": 1.9997400400400764e-05, "loss": 2.1378, "step": 11417 }, { "epoch": 0.15, "grad_norm": 5.009077548980713, "learning_rate": 1.9997398004008306e-05, "loss": 2.4674, "step": 11418 }, { "epoch": 0.15, "grad_norm": 4.904417514801025, "learning_rate": 1.9997395606511964e-05, "loss": 2.8803, "step": 11419 }, { "epoch": 0.15, "grad_norm": 4.33004903793335, "learning_rate": 1.9997393207911742e-05, "loss": 1.8216, "step": 11420 }, { "epoch": 0.15, "grad_norm": 5.074908256530762, "learning_rate": 1.9997390808207636e-05, "loss": 2.5096, "step": 11421 }, { "epoch": 0.15, "grad_norm": 5.777812480926514, "learning_rate": 1.9997388407399652e-05, "loss": 2.7202, "step": 11422 }, { "epoch": 0.15, "grad_norm": 5.081552982330322, "learning_rate": 1.9997386005487782e-05, "loss": 1.9996, "step": 11423 }, { "epoch": 0.15, "grad_norm": 4.470632076263428, "learning_rate": 1.9997383602472034e-05, "loss": 2.0529, "step": 11424 }, { "epoch": 0.15, "grad_norm": 4.51606559753418, "learning_rate": 1.9997381198352403e-05, "loss": 2.2477, "step": 11425 }, { "epoch": 0.15, "grad_norm": 4.542312145233154, "learning_rate": 1.9997378793128894e-05, "loss": 2.0203, "step": 11426 }, { "epoch": 0.15, "grad_norm": 5.684574127197266, "learning_rate": 1.9997376386801502e-05, "loss": 2.3979, "step": 11427 }, { "epoch": 0.15, "grad_norm": 4.683465480804443, "learning_rate": 1.9997373979370233e-05, "loss": 2.394, "step": 11428 }, { "epoch": 0.15, "grad_norm": 4.691163539886475, "learning_rate": 1.9997371570835083e-05, "loss": 2.0632, "step": 11429 }, { "epoch": 0.15, "grad_norm": 4.632529258728027, "learning_rate": 1.9997369161196053e-05, "loss": 2.1707, "step": 11430 }, { "epoch": 0.15, "grad_norm": 5.02486515045166, "learning_rate": 1.9997366750453146e-05, "loss": 1.937, "step": 11431 }, { "epoch": 0.15, "grad_norm": 5.245747089385986, "learning_rate": 1.9997364338606358e-05, "loss": 2.6956, "step": 11432 }, { "epoch": 0.15, "grad_norm": 4.50661039352417, "learning_rate": 1.9997361925655693e-05, "loss": 2.1106, "step": 11433 }, { "epoch": 0.15, "grad_norm": 5.589365005493164, "learning_rate": 1.9997359511601148e-05, "loss": 2.6669, "step": 11434 }, { "epoch": 0.15, "grad_norm": 5.2703986167907715, "learning_rate": 1.9997357096442726e-05, "loss": 1.988, "step": 11435 }, { "epoch": 0.15, "grad_norm": 5.225074768066406, "learning_rate": 1.9997354680180427e-05, "loss": 2.6104, "step": 11436 }, { "epoch": 0.15, "grad_norm": 4.871609210968018, "learning_rate": 1.9997352262814247e-05, "loss": 2.2274, "step": 11437 }, { "epoch": 0.15, "grad_norm": 5.2165985107421875, "learning_rate": 1.999734984434419e-05, "loss": 2.7208, "step": 11438 }, { "epoch": 0.15, "grad_norm": 5.808216571807861, "learning_rate": 1.999734742477026e-05, "loss": 2.2762, "step": 11439 }, { "epoch": 0.15, "grad_norm": 5.267235279083252, "learning_rate": 1.999734500409245e-05, "loss": 2.3905, "step": 11440 }, { "epoch": 0.15, "grad_norm": 4.761905670166016, "learning_rate": 1.9997342582310764e-05, "loss": 2.5554, "step": 11441 }, { "epoch": 0.15, "grad_norm": 6.6047844886779785, "learning_rate": 1.9997340159425203e-05, "loss": 2.7155, "step": 11442 }, { "epoch": 0.15, "grad_norm": 4.638735771179199, "learning_rate": 1.9997337735435764e-05, "loss": 2.1796, "step": 11443 }, { "epoch": 0.15, "grad_norm": 5.238842010498047, "learning_rate": 1.999733531034245e-05, "loss": 2.5672, "step": 11444 }, { "epoch": 0.15, "grad_norm": 5.524338722229004, "learning_rate": 1.9997332884145262e-05, "loss": 2.3925, "step": 11445 }, { "epoch": 0.15, "grad_norm": 5.527456760406494, "learning_rate": 1.99973304568442e-05, "loss": 2.2659, "step": 11446 }, { "epoch": 0.15, "grad_norm": 5.299385070800781, "learning_rate": 1.999732802843926e-05, "loss": 2.7212, "step": 11447 }, { "epoch": 0.15, "grad_norm": 4.642227649688721, "learning_rate": 1.999732559893045e-05, "loss": 2.0596, "step": 11448 }, { "epoch": 0.15, "grad_norm": 5.310620307922363, "learning_rate": 1.9997323168317758e-05, "loss": 2.7093, "step": 11449 }, { "epoch": 0.15, "grad_norm": 4.948049545288086, "learning_rate": 1.9997320736601197e-05, "loss": 2.173, "step": 11450 }, { "epoch": 0.15, "grad_norm": 4.830377578735352, "learning_rate": 1.9997318303780762e-05, "loss": 2.1568, "step": 11451 }, { "epoch": 0.15, "grad_norm": 5.141327857971191, "learning_rate": 1.9997315869856453e-05, "loss": 2.7004, "step": 11452 }, { "epoch": 0.15, "grad_norm": 4.615834712982178, "learning_rate": 1.9997313434828268e-05, "loss": 2.7663, "step": 11453 }, { "epoch": 0.15, "grad_norm": 5.3742356300354, "learning_rate": 1.9997310998696212e-05, "loss": 2.6084, "step": 11454 }, { "epoch": 0.15, "grad_norm": 4.8342814445495605, "learning_rate": 1.9997308561460286e-05, "loss": 2.5948, "step": 11455 }, { "epoch": 0.15, "grad_norm": 5.2148003578186035, "learning_rate": 1.9997306123120483e-05, "loss": 2.7865, "step": 11456 }, { "epoch": 0.15, "grad_norm": 4.946709632873535, "learning_rate": 1.9997303683676813e-05, "loss": 2.2752, "step": 11457 }, { "epoch": 0.15, "grad_norm": 5.4261555671691895, "learning_rate": 1.999730124312927e-05, "loss": 2.0874, "step": 11458 }, { "epoch": 0.15, "grad_norm": 5.123889923095703, "learning_rate": 1.999729880147785e-05, "loss": 2.4616, "step": 11459 }, { "epoch": 0.15, "grad_norm": 5.06733512878418, "learning_rate": 1.999729635872256e-05, "loss": 2.1277, "step": 11460 }, { "epoch": 0.15, "grad_norm": 6.481297969818115, "learning_rate": 1.9997293914863403e-05, "loss": 2.5512, "step": 11461 }, { "epoch": 0.15, "grad_norm": 5.92506217956543, "learning_rate": 1.9997291469900372e-05, "loss": 2.5108, "step": 11462 }, { "epoch": 0.15, "grad_norm": 4.835283279418945, "learning_rate": 1.9997289023833473e-05, "loss": 2.5565, "step": 11463 }, { "epoch": 0.15, "grad_norm": 5.2854719161987305, "learning_rate": 1.99972865766627e-05, "loss": 2.177, "step": 11464 }, { "epoch": 0.15, "grad_norm": 4.9380621910095215, "learning_rate": 1.9997284128388062e-05, "loss": 2.2103, "step": 11465 }, { "epoch": 0.15, "grad_norm": 6.147071838378906, "learning_rate": 1.999728167900955e-05, "loss": 2.6574, "step": 11466 }, { "epoch": 0.15, "grad_norm": 4.595754146575928, "learning_rate": 1.999727922852717e-05, "loss": 2.5619, "step": 11467 }, { "epoch": 0.15, "grad_norm": 4.987776756286621, "learning_rate": 1.999727677694092e-05, "loss": 2.2118, "step": 11468 }, { "epoch": 0.15, "grad_norm": 4.772886276245117, "learning_rate": 1.99972743242508e-05, "loss": 2.4503, "step": 11469 }, { "epoch": 0.15, "grad_norm": 5.215106964111328, "learning_rate": 1.9997271870456813e-05, "loss": 2.2136, "step": 11470 }, { "epoch": 0.15, "grad_norm": 4.733057975769043, "learning_rate": 1.999726941555896e-05, "loss": 2.2147, "step": 11471 }, { "epoch": 0.15, "grad_norm": 4.779830455780029, "learning_rate": 1.9997266959557235e-05, "loss": 2.1811, "step": 11472 }, { "epoch": 0.15, "grad_norm": 5.141154766082764, "learning_rate": 1.9997264502451644e-05, "loss": 2.6508, "step": 11473 }, { "epoch": 0.15, "grad_norm": 5.371994972229004, "learning_rate": 1.9997262044242186e-05, "loss": 2.6314, "step": 11474 }, { "epoch": 0.15, "grad_norm": 4.976776599884033, "learning_rate": 1.9997259584928858e-05, "loss": 2.0442, "step": 11475 }, { "epoch": 0.15, "grad_norm": 5.504467964172363, "learning_rate": 1.9997257124511663e-05, "loss": 2.6458, "step": 11476 }, { "epoch": 0.15, "grad_norm": 4.833428382873535, "learning_rate": 1.9997254662990608e-05, "loss": 2.7737, "step": 11477 }, { "epoch": 0.15, "grad_norm": 4.657750129699707, "learning_rate": 1.999725220036568e-05, "loss": 2.1629, "step": 11478 }, { "epoch": 0.15, "grad_norm": 5.196630477905273, "learning_rate": 1.9997249736636886e-05, "loss": 2.0011, "step": 11479 }, { "epoch": 0.15, "grad_norm": 6.185527324676514, "learning_rate": 1.9997247271804227e-05, "loss": 2.7576, "step": 11480 }, { "epoch": 0.15, "grad_norm": 4.444816589355469, "learning_rate": 1.99972448058677e-05, "loss": 1.9636, "step": 11481 }, { "epoch": 0.15, "grad_norm": 5.415119647979736, "learning_rate": 1.999724233882731e-05, "loss": 3.0567, "step": 11482 }, { "epoch": 0.15, "grad_norm": 4.918342590332031, "learning_rate": 1.9997239870683055e-05, "loss": 2.097, "step": 11483 }, { "epoch": 0.15, "grad_norm": 4.339874744415283, "learning_rate": 1.9997237401434935e-05, "loss": 2.0382, "step": 11484 }, { "epoch": 0.15, "grad_norm": 4.672611236572266, "learning_rate": 1.999723493108295e-05, "loss": 2.6043, "step": 11485 }, { "epoch": 0.15, "grad_norm": 5.355491638183594, "learning_rate": 1.99972324596271e-05, "loss": 2.4834, "step": 11486 }, { "epoch": 0.15, "grad_norm": 4.4882121086120605, "learning_rate": 1.9997229987067387e-05, "loss": 1.9247, "step": 11487 }, { "epoch": 0.15, "grad_norm": 5.272222995758057, "learning_rate": 1.999722751340381e-05, "loss": 2.6943, "step": 11488 }, { "epoch": 0.15, "grad_norm": 4.909092903137207, "learning_rate": 1.999722503863637e-05, "loss": 2.5181, "step": 11489 }, { "epoch": 0.15, "grad_norm": 4.929593086242676, "learning_rate": 1.9997222562765067e-05, "loss": 2.3535, "step": 11490 }, { "epoch": 0.15, "grad_norm": 4.837856292724609, "learning_rate": 1.99972200857899e-05, "loss": 2.4576, "step": 11491 }, { "epoch": 0.15, "grad_norm": 5.561097621917725, "learning_rate": 1.9997217607710868e-05, "loss": 2.7505, "step": 11492 }, { "epoch": 0.15, "grad_norm": 4.18861722946167, "learning_rate": 1.999721512852798e-05, "loss": 1.8306, "step": 11493 }, { "epoch": 0.15, "grad_norm": 4.374745845794678, "learning_rate": 1.9997212648241225e-05, "loss": 1.7433, "step": 11494 }, { "epoch": 0.15, "grad_norm": 4.365983009338379, "learning_rate": 1.9997210166850606e-05, "loss": 2.1297, "step": 11495 }, { "epoch": 0.15, "grad_norm": 5.549460411071777, "learning_rate": 1.999720768435613e-05, "loss": 2.8727, "step": 11496 }, { "epoch": 0.15, "grad_norm": 5.023843288421631, "learning_rate": 1.999720520075779e-05, "loss": 2.7566, "step": 11497 }, { "epoch": 0.15, "grad_norm": 6.969391345977783, "learning_rate": 1.999720271605559e-05, "loss": 2.4224, "step": 11498 }, { "epoch": 0.15, "grad_norm": 4.755181312561035, "learning_rate": 1.999720023024953e-05, "loss": 2.0111, "step": 11499 }, { "epoch": 0.15, "grad_norm": 6.385354518890381, "learning_rate": 1.9997197743339608e-05, "loss": 3.0152, "step": 11500 }, { "epoch": 0.15, "grad_norm": 5.27016019821167, "learning_rate": 1.999719525532583e-05, "loss": 2.8174, "step": 11501 }, { "epoch": 0.15, "grad_norm": 4.725455284118652, "learning_rate": 1.9997192766208186e-05, "loss": 2.3527, "step": 11502 }, { "epoch": 0.15, "grad_norm": 4.847960472106934, "learning_rate": 1.9997190275986683e-05, "loss": 2.2114, "step": 11503 }, { "epoch": 0.15, "grad_norm": 5.190352916717529, "learning_rate": 1.9997187784661324e-05, "loss": 2.2344, "step": 11504 }, { "epoch": 0.15, "grad_norm": 5.277295112609863, "learning_rate": 1.9997185292232104e-05, "loss": 2.2128, "step": 11505 }, { "epoch": 0.15, "grad_norm": 5.390535354614258, "learning_rate": 1.9997182798699028e-05, "loss": 2.8213, "step": 11506 }, { "epoch": 0.15, "grad_norm": 4.858962059020996, "learning_rate": 1.9997180304062088e-05, "loss": 2.2271, "step": 11507 }, { "epoch": 0.15, "grad_norm": 4.863373756408691, "learning_rate": 1.9997177808321295e-05, "loss": 2.5557, "step": 11508 }, { "epoch": 0.15, "grad_norm": 4.620484828948975, "learning_rate": 1.999717531147664e-05, "loss": 2.2631, "step": 11509 }, { "epoch": 0.15, "grad_norm": 4.705679416656494, "learning_rate": 1.9997172813528128e-05, "loss": 2.2223, "step": 11510 }, { "epoch": 0.15, "grad_norm": 5.505234241485596, "learning_rate": 1.9997170314475757e-05, "loss": 2.9107, "step": 11511 }, { "epoch": 0.15, "grad_norm": 4.421535491943359, "learning_rate": 1.9997167814319534e-05, "loss": 2.2451, "step": 11512 }, { "epoch": 0.15, "grad_norm": 5.257740020751953, "learning_rate": 1.999716531305945e-05, "loss": 2.3004, "step": 11513 }, { "epoch": 0.15, "grad_norm": 4.978649139404297, "learning_rate": 1.9997162810695513e-05, "loss": 2.5987, "step": 11514 }, { "epoch": 0.15, "grad_norm": 4.4992523193359375, "learning_rate": 1.9997160307227713e-05, "loss": 2.4641, "step": 11515 }, { "epoch": 0.15, "grad_norm": 5.057868957519531, "learning_rate": 1.9997157802656062e-05, "loss": 2.3262, "step": 11516 }, { "epoch": 0.15, "grad_norm": 5.901381969451904, "learning_rate": 1.9997155296980555e-05, "loss": 2.9228, "step": 11517 }, { "epoch": 0.15, "grad_norm": 4.624748706817627, "learning_rate": 1.9997152790201195e-05, "loss": 2.3431, "step": 11518 }, { "epoch": 0.15, "grad_norm": 5.390963077545166, "learning_rate": 1.999715028231797e-05, "loss": 2.4603, "step": 11519 }, { "epoch": 0.15, "grad_norm": 4.870250701904297, "learning_rate": 1.99971477733309e-05, "loss": 2.6443, "step": 11520 }, { "epoch": 0.15, "grad_norm": 5.395279407501221, "learning_rate": 1.999714526323997e-05, "loss": 2.5123, "step": 11521 }, { "epoch": 0.15, "grad_norm": 4.944080829620361, "learning_rate": 1.999714275204519e-05, "loss": 1.812, "step": 11522 }, { "epoch": 0.15, "grad_norm": 5.69357967376709, "learning_rate": 1.999714023974655e-05, "loss": 2.4121, "step": 11523 }, { "epoch": 0.15, "grad_norm": 4.791024684906006, "learning_rate": 1.999713772634406e-05, "loss": 2.1152, "step": 11524 }, { "epoch": 0.15, "grad_norm": 5.160465240478516, "learning_rate": 1.9997135211837713e-05, "loss": 2.5395, "step": 11525 }, { "epoch": 0.15, "grad_norm": 4.558347702026367, "learning_rate": 1.9997132696227516e-05, "loss": 2.0502, "step": 11526 }, { "epoch": 0.15, "grad_norm": 5.32716703414917, "learning_rate": 1.9997130179513465e-05, "loss": 2.5223, "step": 11527 }, { "epoch": 0.15, "grad_norm": 4.331745624542236, "learning_rate": 1.9997127661695565e-05, "loss": 2.1776, "step": 11528 }, { "epoch": 0.15, "grad_norm": 4.703280925750732, "learning_rate": 1.9997125142773807e-05, "loss": 2.3731, "step": 11529 }, { "epoch": 0.15, "grad_norm": 4.599111557006836, "learning_rate": 1.99971226227482e-05, "loss": 2.3631, "step": 11530 }, { "epoch": 0.15, "grad_norm": 4.551423072814941, "learning_rate": 1.999712010161874e-05, "loss": 1.9373, "step": 11531 }, { "epoch": 0.15, "grad_norm": 5.719842910766602, "learning_rate": 1.9997117579385425e-05, "loss": 2.6701, "step": 11532 }, { "epoch": 0.15, "grad_norm": 4.726271629333496, "learning_rate": 1.999711505604826e-05, "loss": 2.6679, "step": 11533 }, { "epoch": 0.15, "grad_norm": 6.134802341461182, "learning_rate": 1.9997112531607247e-05, "loss": 2.852, "step": 11534 }, { "epoch": 0.15, "grad_norm": 5.093177318572998, "learning_rate": 1.9997110006062384e-05, "loss": 2.5354, "step": 11535 }, { "epoch": 0.15, "grad_norm": 5.885502815246582, "learning_rate": 1.9997107479413666e-05, "loss": 2.0678, "step": 11536 }, { "epoch": 0.15, "grad_norm": 5.399373531341553, "learning_rate": 1.99971049516611e-05, "loss": 2.0708, "step": 11537 }, { "epoch": 0.15, "grad_norm": 5.116969585418701, "learning_rate": 1.9997102422804686e-05, "loss": 2.8826, "step": 11538 }, { "epoch": 0.15, "grad_norm": 5.084625244140625, "learning_rate": 1.999709989284442e-05, "loss": 2.8261, "step": 11539 }, { "epoch": 0.15, "grad_norm": 4.924742698669434, "learning_rate": 1.9997097361780305e-05, "loss": 2.3501, "step": 11540 }, { "epoch": 0.15, "grad_norm": 4.942935466766357, "learning_rate": 1.999709482961234e-05, "loss": 2.1649, "step": 11541 }, { "epoch": 0.15, "grad_norm": 4.708191394805908, "learning_rate": 1.9997092296340525e-05, "loss": 2.6728, "step": 11542 }, { "epoch": 0.15, "grad_norm": 6.2084431648254395, "learning_rate": 1.9997089761964865e-05, "loss": 3.2567, "step": 11543 }, { "epoch": 0.15, "grad_norm": 5.40977668762207, "learning_rate": 1.9997087226485355e-05, "loss": 2.8202, "step": 11544 }, { "epoch": 0.15, "grad_norm": 6.453098297119141, "learning_rate": 1.9997084689901995e-05, "loss": 2.9018, "step": 11545 }, { "epoch": 0.15, "grad_norm": 5.352470874786377, "learning_rate": 1.999708215221479e-05, "loss": 2.6089, "step": 11546 }, { "epoch": 0.15, "grad_norm": 4.904977798461914, "learning_rate": 1.999707961342374e-05, "loss": 2.3995, "step": 11547 }, { "epoch": 0.15, "grad_norm": 4.621095180511475, "learning_rate": 1.9997077073528835e-05, "loss": 2.6359, "step": 11548 }, { "epoch": 0.15, "grad_norm": 5.605673789978027, "learning_rate": 1.9997074532530086e-05, "loss": 2.3962, "step": 11549 }, { "epoch": 0.15, "grad_norm": 4.807569980621338, "learning_rate": 1.9997071990427493e-05, "loss": 1.979, "step": 11550 }, { "epoch": 0.15, "grad_norm": 5.028337478637695, "learning_rate": 1.9997069447221054e-05, "loss": 2.2245, "step": 11551 }, { "epoch": 0.15, "grad_norm": 5.2031636238098145, "learning_rate": 1.9997066902910768e-05, "loss": 2.5374, "step": 11552 }, { "epoch": 0.15, "grad_norm": 5.9415764808654785, "learning_rate": 1.9997064357496635e-05, "loss": 2.5845, "step": 11553 }, { "epoch": 0.15, "grad_norm": 4.793817520141602, "learning_rate": 1.9997061810978656e-05, "loss": 2.3874, "step": 11554 }, { "epoch": 0.15, "grad_norm": 5.410774230957031, "learning_rate": 1.9997059263356834e-05, "loss": 2.4585, "step": 11555 }, { "epoch": 0.15, "grad_norm": 4.457921981811523, "learning_rate": 1.9997056714631166e-05, "loss": 2.2977, "step": 11556 }, { "epoch": 0.15, "grad_norm": 5.512365818023682, "learning_rate": 1.9997054164801654e-05, "loss": 2.5594, "step": 11557 }, { "epoch": 0.15, "grad_norm": 4.844578742980957, "learning_rate": 1.9997051613868295e-05, "loss": 2.5543, "step": 11558 }, { "epoch": 0.15, "grad_norm": 5.065969944000244, "learning_rate": 1.9997049061831094e-05, "loss": 2.1865, "step": 11559 }, { "epoch": 0.15, "grad_norm": 5.47794246673584, "learning_rate": 1.999704650869005e-05, "loss": 2.7379, "step": 11560 }, { "epoch": 0.15, "grad_norm": 5.037389755249023, "learning_rate": 1.9997043954445157e-05, "loss": 2.3753, "step": 11561 }, { "epoch": 0.15, "grad_norm": 4.664182662963867, "learning_rate": 1.999704139909643e-05, "loss": 2.4847, "step": 11562 }, { "epoch": 0.15, "grad_norm": 5.78065824508667, "learning_rate": 1.9997038842643852e-05, "loss": 2.5673, "step": 11563 }, { "epoch": 0.15, "grad_norm": 4.198675155639648, "learning_rate": 1.9997036285087438e-05, "loss": 2.2188, "step": 11564 }, { "epoch": 0.15, "grad_norm": 4.299830436706543, "learning_rate": 1.9997033726427174e-05, "loss": 2.0567, "step": 11565 }, { "epoch": 0.15, "grad_norm": 5.381239891052246, "learning_rate": 1.9997031166663073e-05, "loss": 2.5588, "step": 11566 }, { "epoch": 0.15, "grad_norm": 6.5121259689331055, "learning_rate": 1.999702860579513e-05, "loss": 2.8137, "step": 11567 }, { "epoch": 0.15, "grad_norm": 4.714628219604492, "learning_rate": 1.9997026043823343e-05, "loss": 2.785, "step": 11568 }, { "epoch": 0.15, "grad_norm": 5.020940780639648, "learning_rate": 1.999702348074772e-05, "loss": 2.262, "step": 11569 }, { "epoch": 0.15, "grad_norm": 4.520066738128662, "learning_rate": 1.999702091656825e-05, "loss": 1.8825, "step": 11570 }, { "epoch": 0.15, "grad_norm": 5.259427547454834, "learning_rate": 1.9997018351284944e-05, "loss": 2.3003, "step": 11571 }, { "epoch": 0.15, "grad_norm": 4.451279640197754, "learning_rate": 1.9997015784897795e-05, "loss": 2.4238, "step": 11572 }, { "epoch": 0.15, "grad_norm": 6.202217102050781, "learning_rate": 1.9997013217406805e-05, "loss": 2.6469, "step": 11573 }, { "epoch": 0.15, "grad_norm": 4.519150257110596, "learning_rate": 1.9997010648811973e-05, "loss": 1.7693, "step": 11574 }, { "epoch": 0.15, "grad_norm": 4.078777313232422, "learning_rate": 1.9997008079113308e-05, "loss": 1.945, "step": 11575 }, { "epoch": 0.15, "grad_norm": 5.040008544921875, "learning_rate": 1.99970055083108e-05, "loss": 2.6638, "step": 11576 }, { "epoch": 0.15, "grad_norm": 4.752825736999512, "learning_rate": 1.999700293640445e-05, "loss": 2.3781, "step": 11577 }, { "epoch": 0.15, "grad_norm": 4.72965669631958, "learning_rate": 1.9997000363394267e-05, "loss": 2.345, "step": 11578 }, { "epoch": 0.15, "grad_norm": 5.310125350952148, "learning_rate": 1.9996997789280242e-05, "loss": 2.8149, "step": 11579 }, { "epoch": 0.15, "grad_norm": 5.536340713500977, "learning_rate": 1.999699521406238e-05, "loss": 2.367, "step": 11580 }, { "epoch": 0.15, "grad_norm": 4.4546003341674805, "learning_rate": 1.999699263774068e-05, "loss": 2.1317, "step": 11581 }, { "epoch": 0.15, "grad_norm": 5.108304023742676, "learning_rate": 1.9996990060315144e-05, "loss": 2.5869, "step": 11582 }, { "epoch": 0.15, "grad_norm": 5.336627006530762, "learning_rate": 1.999698748178577e-05, "loss": 1.929, "step": 11583 }, { "epoch": 0.15, "grad_norm": 5.228722095489502, "learning_rate": 1.9996984902152557e-05, "loss": 2.5756, "step": 11584 }, { "epoch": 0.15, "grad_norm": 5.2453742027282715, "learning_rate": 1.9996982321415508e-05, "loss": 2.5114, "step": 11585 }, { "epoch": 0.15, "grad_norm": 4.735183238983154, "learning_rate": 1.999697973957462e-05, "loss": 2.3483, "step": 11586 }, { "epoch": 0.15, "grad_norm": 5.989079475402832, "learning_rate": 1.9996977156629903e-05, "loss": 2.6186, "step": 11587 }, { "epoch": 0.15, "grad_norm": 5.18109655380249, "learning_rate": 1.9996974572581344e-05, "loss": 2.3532, "step": 11588 }, { "epoch": 0.15, "grad_norm": 4.996594429016113, "learning_rate": 1.9996971987428952e-05, "loss": 1.901, "step": 11589 }, { "epoch": 0.15, "grad_norm": 4.767280578613281, "learning_rate": 1.9996969401172724e-05, "loss": 2.5269, "step": 11590 }, { "epoch": 0.15, "grad_norm": 6.365972518920898, "learning_rate": 1.9996966813812663e-05, "loss": 2.8035, "step": 11591 }, { "epoch": 0.15, "grad_norm": 4.962021350860596, "learning_rate": 1.9996964225348762e-05, "loss": 2.2929, "step": 11592 }, { "epoch": 0.15, "grad_norm": 4.519159317016602, "learning_rate": 1.999696163578103e-05, "loss": 1.7616, "step": 11593 }, { "epoch": 0.15, "grad_norm": 5.0196027755737305, "learning_rate": 1.9996959045109464e-05, "loss": 3.2238, "step": 11594 }, { "epoch": 0.15, "grad_norm": 4.591782569885254, "learning_rate": 1.9996956453334064e-05, "loss": 2.1017, "step": 11595 }, { "epoch": 0.15, "grad_norm": 5.668819427490234, "learning_rate": 1.999695386045483e-05, "loss": 2.0371, "step": 11596 }, { "epoch": 0.15, "grad_norm": 4.885736465454102, "learning_rate": 1.9996951266471765e-05, "loss": 2.1783, "step": 11597 }, { "epoch": 0.15, "grad_norm": 4.728342533111572, "learning_rate": 1.9996948671384863e-05, "loss": 2.3186, "step": 11598 }, { "epoch": 0.15, "grad_norm": 5.05061674118042, "learning_rate": 1.999694607519413e-05, "loss": 2.5138, "step": 11599 }, { "epoch": 0.15, "grad_norm": 4.971958637237549, "learning_rate": 1.9996943477899563e-05, "loss": 2.2076, "step": 11600 }, { "epoch": 0.15, "grad_norm": 5.233386993408203, "learning_rate": 1.9996940879501168e-05, "loss": 2.5778, "step": 11601 }, { "epoch": 0.15, "grad_norm": 5.321639060974121, "learning_rate": 1.999693827999894e-05, "loss": 2.4091, "step": 11602 }, { "epoch": 0.15, "grad_norm": 4.825845241546631, "learning_rate": 1.9996935679392876e-05, "loss": 2.3263, "step": 11603 }, { "epoch": 0.15, "grad_norm": 4.954217910766602, "learning_rate": 1.9996933077682986e-05, "loss": 2.3237, "step": 11604 }, { "epoch": 0.15, "grad_norm": 4.91518497467041, "learning_rate": 1.9996930474869263e-05, "loss": 2.3617, "step": 11605 }, { "epoch": 0.15, "grad_norm": 4.66402006149292, "learning_rate": 1.9996927870951706e-05, "loss": 2.2102, "step": 11606 }, { "epoch": 0.15, "grad_norm": 4.2083306312561035, "learning_rate": 1.9996925265930324e-05, "loss": 1.9136, "step": 11607 }, { "epoch": 0.15, "grad_norm": 5.338372230529785, "learning_rate": 1.9996922659805108e-05, "loss": 2.4752, "step": 11608 }, { "epoch": 0.15, "grad_norm": 4.899264335632324, "learning_rate": 1.9996920052576066e-05, "loss": 2.5388, "step": 11609 }, { "epoch": 0.15, "grad_norm": 4.304023265838623, "learning_rate": 1.9996917444243192e-05, "loss": 2.0641, "step": 11610 }, { "epoch": 0.15, "grad_norm": 4.896451473236084, "learning_rate": 1.9996914834806487e-05, "loss": 2.4588, "step": 11611 }, { "epoch": 0.15, "grad_norm": 5.0187764167785645, "learning_rate": 1.9996912224265953e-05, "loss": 2.1021, "step": 11612 }, { "epoch": 0.15, "grad_norm": 4.417504787445068, "learning_rate": 1.9996909612621593e-05, "loss": 2.1127, "step": 11613 }, { "epoch": 0.15, "grad_norm": 4.996898174285889, "learning_rate": 1.9996906999873403e-05, "loss": 2.1802, "step": 11614 }, { "epoch": 0.15, "grad_norm": 4.801451206207275, "learning_rate": 1.9996904386021384e-05, "loss": 2.0725, "step": 11615 }, { "epoch": 0.15, "grad_norm": 4.382615089416504, "learning_rate": 1.999690177106554e-05, "loss": 2.2972, "step": 11616 }, { "epoch": 0.15, "grad_norm": 5.136987686157227, "learning_rate": 1.9996899155005866e-05, "loss": 2.4583, "step": 11617 }, { "epoch": 0.15, "grad_norm": 5.208677291870117, "learning_rate": 1.9996896537842368e-05, "loss": 2.5481, "step": 11618 }, { "epoch": 0.15, "grad_norm": 4.771947860717773, "learning_rate": 1.999689391957504e-05, "loss": 2.4642, "step": 11619 }, { "epoch": 0.15, "grad_norm": 4.586548328399658, "learning_rate": 1.9996891300203886e-05, "loss": 2.2568, "step": 11620 }, { "epoch": 0.15, "grad_norm": 4.590854644775391, "learning_rate": 1.9996888679728906e-05, "loss": 2.2525, "step": 11621 }, { "epoch": 0.15, "grad_norm": 4.323793411254883, "learning_rate": 1.99968860581501e-05, "loss": 1.9261, "step": 11622 }, { "epoch": 0.15, "grad_norm": 4.53071928024292, "learning_rate": 1.9996883435467467e-05, "loss": 2.2425, "step": 11623 }, { "epoch": 0.15, "grad_norm": 4.182562351226807, "learning_rate": 1.999688081168101e-05, "loss": 1.7295, "step": 11624 }, { "epoch": 0.15, "grad_norm": 4.707059860229492, "learning_rate": 1.9996878186790726e-05, "loss": 2.0074, "step": 11625 }, { "epoch": 0.15, "grad_norm": 4.499849796295166, "learning_rate": 1.9996875560796618e-05, "loss": 2.0207, "step": 11626 }, { "epoch": 0.15, "grad_norm": 5.339979648590088, "learning_rate": 1.9996872933698687e-05, "loss": 2.469, "step": 11627 }, { "epoch": 0.15, "grad_norm": 6.085370063781738, "learning_rate": 1.999687030549693e-05, "loss": 3.0007, "step": 11628 }, { "epoch": 0.15, "grad_norm": 5.118659496307373, "learning_rate": 1.9996867676191346e-05, "loss": 2.0657, "step": 11629 }, { "epoch": 0.15, "grad_norm": 5.746268272399902, "learning_rate": 1.9996865045781944e-05, "loss": 2.8131, "step": 11630 }, { "epoch": 0.15, "grad_norm": 5.56079626083374, "learning_rate": 1.9996862414268715e-05, "loss": 2.5981, "step": 11631 }, { "epoch": 0.15, "grad_norm": 5.213558197021484, "learning_rate": 1.9996859781651663e-05, "loss": 2.3167, "step": 11632 }, { "epoch": 0.15, "grad_norm": 5.164259910583496, "learning_rate": 1.9996857147930788e-05, "loss": 2.6832, "step": 11633 }, { "epoch": 0.15, "grad_norm": 6.008781909942627, "learning_rate": 1.999685451310609e-05, "loss": 2.6782, "step": 11634 }, { "epoch": 0.15, "grad_norm": 4.601418495178223, "learning_rate": 1.9996851877177573e-05, "loss": 2.1186, "step": 11635 }, { "epoch": 0.15, "grad_norm": 5.034898281097412, "learning_rate": 1.999684924014523e-05, "loss": 2.6098, "step": 11636 }, { "epoch": 0.15, "grad_norm": 4.597867488861084, "learning_rate": 1.9996846602009067e-05, "loss": 2.2535, "step": 11637 }, { "epoch": 0.15, "grad_norm": 5.759201526641846, "learning_rate": 1.9996843962769082e-05, "loss": 2.3408, "step": 11638 }, { "epoch": 0.15, "grad_norm": 5.221966743469238, "learning_rate": 1.9996841322425277e-05, "loss": 2.728, "step": 11639 }, { "epoch": 0.15, "grad_norm": 4.283171653747559, "learning_rate": 1.999683868097765e-05, "loss": 2.0365, "step": 11640 }, { "epoch": 0.15, "grad_norm": 5.076529502868652, "learning_rate": 1.9996836038426202e-05, "loss": 2.1811, "step": 11641 }, { "epoch": 0.15, "grad_norm": 5.040167808532715, "learning_rate": 1.9996833394770935e-05, "loss": 2.576, "step": 11642 }, { "epoch": 0.15, "grad_norm": 4.354977607727051, "learning_rate": 1.9996830750011845e-05, "loss": 2.0887, "step": 11643 }, { "epoch": 0.15, "grad_norm": 5.794002056121826, "learning_rate": 1.999682810414894e-05, "loss": 2.3824, "step": 11644 }, { "epoch": 0.15, "grad_norm": 5.683473587036133, "learning_rate": 1.9996825457182215e-05, "loss": 2.4492, "step": 11645 }, { "epoch": 0.15, "grad_norm": 5.357799053192139, "learning_rate": 1.9996822809111667e-05, "loss": 2.873, "step": 11646 }, { "epoch": 0.15, "grad_norm": 5.067392349243164, "learning_rate": 1.9996820159937303e-05, "loss": 2.7551, "step": 11647 }, { "epoch": 0.15, "grad_norm": 5.67154598236084, "learning_rate": 1.999681750965912e-05, "loss": 2.3011, "step": 11648 }, { "epoch": 0.15, "grad_norm": 3.763920307159424, "learning_rate": 1.999681485827712e-05, "loss": 1.7834, "step": 11649 }, { "epoch": 0.15, "grad_norm": 5.4561920166015625, "learning_rate": 1.9996812205791297e-05, "loss": 2.2293, "step": 11650 }, { "epoch": 0.15, "grad_norm": 4.6578216552734375, "learning_rate": 1.999680955220166e-05, "loss": 2.0536, "step": 11651 }, { "epoch": 0.15, "grad_norm": 5.297674179077148, "learning_rate": 1.9996806897508208e-05, "loss": 2.3189, "step": 11652 }, { "epoch": 0.15, "grad_norm": 5.249897003173828, "learning_rate": 1.9996804241710934e-05, "loss": 2.5038, "step": 11653 }, { "epoch": 0.15, "grad_norm": 5.289384365081787, "learning_rate": 1.9996801584809847e-05, "loss": 2.3823, "step": 11654 }, { "epoch": 0.15, "grad_norm": 4.6099724769592285, "learning_rate": 1.999679892680494e-05, "loss": 2.0065, "step": 11655 }, { "epoch": 0.15, "grad_norm": 4.489526748657227, "learning_rate": 1.9996796267696222e-05, "loss": 2.3447, "step": 11656 }, { "epoch": 0.15, "grad_norm": 4.64655065536499, "learning_rate": 1.9996793607483684e-05, "loss": 2.1539, "step": 11657 }, { "epoch": 0.15, "grad_norm": 5.688505172729492, "learning_rate": 1.999679094616733e-05, "loss": 2.4629, "step": 11658 }, { "epoch": 0.15, "grad_norm": 5.029387474060059, "learning_rate": 1.9996788283747162e-05, "loss": 2.6945, "step": 11659 }, { "epoch": 0.15, "grad_norm": 4.540671348571777, "learning_rate": 1.999678562022318e-05, "loss": 1.7455, "step": 11660 }, { "epoch": 0.15, "grad_norm": 5.188668727874756, "learning_rate": 1.9996782955595384e-05, "loss": 2.6221, "step": 11661 }, { "epoch": 0.15, "grad_norm": 5.123263835906982, "learning_rate": 1.9996780289863768e-05, "loss": 2.527, "step": 11662 }, { "epoch": 0.15, "grad_norm": 4.612757205963135, "learning_rate": 1.9996777623028344e-05, "loss": 2.2158, "step": 11663 }, { "epoch": 0.15, "grad_norm": 4.695635795593262, "learning_rate": 1.9996774955089103e-05, "loss": 2.1334, "step": 11664 }, { "epoch": 0.15, "grad_norm": 4.864478588104248, "learning_rate": 1.999677228604605e-05, "loss": 2.3361, "step": 11665 }, { "epoch": 0.15, "grad_norm": 5.15611457824707, "learning_rate": 1.999676961589918e-05, "loss": 2.6393, "step": 11666 }, { "epoch": 0.15, "grad_norm": 5.320051670074463, "learning_rate": 1.9996766944648502e-05, "loss": 2.4928, "step": 11667 }, { "epoch": 0.15, "grad_norm": 4.827033042907715, "learning_rate": 1.999676427229401e-05, "loss": 2.5055, "step": 11668 }, { "epoch": 0.15, "grad_norm": 5.039051055908203, "learning_rate": 1.9996761598835707e-05, "loss": 2.1687, "step": 11669 }, { "epoch": 0.15, "grad_norm": 4.934206485748291, "learning_rate": 1.9996758924273587e-05, "loss": 2.3813, "step": 11670 }, { "epoch": 0.15, "grad_norm": 5.259105205535889, "learning_rate": 1.9996756248607657e-05, "loss": 2.9914, "step": 11671 }, { "epoch": 0.15, "grad_norm": 5.639698028564453, "learning_rate": 1.999675357183792e-05, "loss": 2.5987, "step": 11672 }, { "epoch": 0.15, "grad_norm": 4.805471897125244, "learning_rate": 1.9996750893964364e-05, "loss": 2.9981, "step": 11673 }, { "epoch": 0.15, "grad_norm": 5.741859436035156, "learning_rate": 1.9996748214987004e-05, "loss": 2.4465, "step": 11674 }, { "epoch": 0.15, "grad_norm": 4.088096618652344, "learning_rate": 1.999674553490583e-05, "loss": 2.3993, "step": 11675 }, { "epoch": 0.15, "grad_norm": 5.07551908493042, "learning_rate": 1.9996742853720848e-05, "loss": 2.5561, "step": 11676 }, { "epoch": 0.15, "grad_norm": 5.011531352996826, "learning_rate": 1.9996740171432052e-05, "loss": 1.9269, "step": 11677 }, { "epoch": 0.15, "grad_norm": 5.32713508605957, "learning_rate": 1.999673748803945e-05, "loss": 2.4404, "step": 11678 }, { "epoch": 0.15, "grad_norm": 5.742001056671143, "learning_rate": 1.9996734803543038e-05, "loss": 2.4254, "step": 11679 }, { "epoch": 0.15, "grad_norm": 5.714741230010986, "learning_rate": 1.9996732117942818e-05, "loss": 2.4268, "step": 11680 }, { "epoch": 0.15, "grad_norm": 4.685866832733154, "learning_rate": 1.9996729431238785e-05, "loss": 2.0388, "step": 11681 }, { "epoch": 0.15, "grad_norm": 4.6180620193481445, "learning_rate": 1.9996726743430947e-05, "loss": 1.9308, "step": 11682 }, { "epoch": 0.15, "grad_norm": 3.9892890453338623, "learning_rate": 1.99967240545193e-05, "loss": 2.1572, "step": 11683 }, { "epoch": 0.15, "grad_norm": 5.836544513702393, "learning_rate": 1.9996721364503846e-05, "loss": 2.9579, "step": 11684 }, { "epoch": 0.15, "grad_norm": 4.8264241218566895, "learning_rate": 1.9996718673384583e-05, "loss": 2.4855, "step": 11685 }, { "epoch": 0.15, "grad_norm": 4.556820392608643, "learning_rate": 1.999671598116151e-05, "loss": 2.3674, "step": 11686 }, { "epoch": 0.15, "grad_norm": 4.698071002960205, "learning_rate": 1.9996713287834633e-05, "loss": 1.9009, "step": 11687 }, { "epoch": 0.15, "grad_norm": 5.310426712036133, "learning_rate": 1.9996710593403953e-05, "loss": 2.6172, "step": 11688 }, { "epoch": 0.15, "grad_norm": 4.526586055755615, "learning_rate": 1.9996707897869463e-05, "loss": 2.137, "step": 11689 }, { "epoch": 0.15, "grad_norm": 4.945224285125732, "learning_rate": 1.9996705201231167e-05, "loss": 2.7306, "step": 11690 }, { "epoch": 0.15, "grad_norm": 4.075563430786133, "learning_rate": 1.9996702503489062e-05, "loss": 2.2472, "step": 11691 }, { "epoch": 0.15, "grad_norm": 4.787052631378174, "learning_rate": 1.9996699804643155e-05, "loss": 2.5309, "step": 11692 }, { "epoch": 0.15, "grad_norm": 4.257717132568359, "learning_rate": 1.9996697104693445e-05, "loss": 2.2194, "step": 11693 }, { "epoch": 0.15, "grad_norm": 5.147042751312256, "learning_rate": 1.9996694403639925e-05, "loss": 2.0881, "step": 11694 }, { "epoch": 0.15, "grad_norm": 5.663466453552246, "learning_rate": 1.99966917014826e-05, "loss": 2.7693, "step": 11695 }, { "epoch": 0.15, "grad_norm": 4.185898780822754, "learning_rate": 1.9996688998221472e-05, "loss": 2.2537, "step": 11696 }, { "epoch": 0.15, "grad_norm": 4.310549736022949, "learning_rate": 1.9996686293856542e-05, "loss": 2.1298, "step": 11697 }, { "epoch": 0.15, "grad_norm": 4.878964424133301, "learning_rate": 1.999668358838781e-05, "loss": 2.5643, "step": 11698 }, { "epoch": 0.15, "grad_norm": 4.82213830947876, "learning_rate": 1.999668088181527e-05, "loss": 2.6477, "step": 11699 }, { "epoch": 0.15, "grad_norm": 5.502222061157227, "learning_rate": 1.999667817413893e-05, "loss": 2.8536, "step": 11700 }, { "epoch": 0.15, "grad_norm": 4.646450519561768, "learning_rate": 1.9996675465358783e-05, "loss": 2.2899, "step": 11701 }, { "epoch": 0.15, "grad_norm": 5.035329341888428, "learning_rate": 1.9996672755474836e-05, "loss": 1.8394, "step": 11702 }, { "epoch": 0.15, "grad_norm": 5.09835958480835, "learning_rate": 1.9996670044487088e-05, "loss": 2.6028, "step": 11703 }, { "epoch": 0.15, "grad_norm": 5.035196781158447, "learning_rate": 1.9996667332395536e-05, "loss": 2.3448, "step": 11704 }, { "epoch": 0.15, "grad_norm": 4.722517490386963, "learning_rate": 1.9996664619200183e-05, "loss": 2.2544, "step": 11705 }, { "epoch": 0.15, "grad_norm": 5.233933925628662, "learning_rate": 1.999666190490103e-05, "loss": 2.4789, "step": 11706 }, { "epoch": 0.15, "grad_norm": 4.443356037139893, "learning_rate": 1.9996659189498074e-05, "loss": 2.2898, "step": 11707 }, { "epoch": 0.15, "grad_norm": 4.23872184753418, "learning_rate": 1.9996656472991317e-05, "loss": 1.9408, "step": 11708 }, { "epoch": 0.15, "grad_norm": 4.522138595581055, "learning_rate": 1.9996653755380763e-05, "loss": 2.304, "step": 11709 }, { "epoch": 0.15, "grad_norm": 4.429575443267822, "learning_rate": 1.9996651036666407e-05, "loss": 2.4169, "step": 11710 }, { "epoch": 0.15, "grad_norm": 4.481004238128662, "learning_rate": 1.9996648316848248e-05, "loss": 2.0124, "step": 11711 }, { "epoch": 0.15, "grad_norm": 4.909153461456299, "learning_rate": 1.999664559592629e-05, "loss": 2.4039, "step": 11712 }, { "epoch": 0.15, "grad_norm": 4.648471355438232, "learning_rate": 1.9996642873900537e-05, "loss": 2.2739, "step": 11713 }, { "epoch": 0.15, "grad_norm": 5.287430763244629, "learning_rate": 1.999664015077098e-05, "loss": 2.3912, "step": 11714 }, { "epoch": 0.15, "grad_norm": 4.600931167602539, "learning_rate": 1.999663742653763e-05, "loss": 2.2071, "step": 11715 }, { "epoch": 0.15, "grad_norm": 4.681671142578125, "learning_rate": 1.9996634701200478e-05, "loss": 1.9714, "step": 11716 }, { "epoch": 0.15, "grad_norm": 5.053953170776367, "learning_rate": 1.9996631974759525e-05, "loss": 2.3315, "step": 11717 }, { "epoch": 0.15, "grad_norm": 4.6451640129089355, "learning_rate": 1.9996629247214782e-05, "loss": 2.1769, "step": 11718 }, { "epoch": 0.15, "grad_norm": 4.352179527282715, "learning_rate": 1.9996626518566237e-05, "loss": 1.797, "step": 11719 }, { "epoch": 0.15, "grad_norm": 4.731192111968994, "learning_rate": 1.9996623788813893e-05, "loss": 2.3391, "step": 11720 }, { "epoch": 0.15, "grad_norm": 4.750482082366943, "learning_rate": 1.9996621057957753e-05, "loss": 2.6452, "step": 11721 }, { "epoch": 0.15, "grad_norm": 4.917037487030029, "learning_rate": 1.9996618325997818e-05, "loss": 2.5627, "step": 11722 }, { "epoch": 0.15, "grad_norm": 4.619096279144287, "learning_rate": 1.9996615592934087e-05, "loss": 1.9453, "step": 11723 }, { "epoch": 0.15, "grad_norm": 5.481803894042969, "learning_rate": 1.999661285876656e-05, "loss": 2.4315, "step": 11724 }, { "epoch": 0.15, "grad_norm": 4.869987487792969, "learning_rate": 1.9996610123495237e-05, "loss": 2.4426, "step": 11725 }, { "epoch": 0.15, "grad_norm": 5.033904075622559, "learning_rate": 1.999660738712012e-05, "loss": 2.6278, "step": 11726 }, { "epoch": 0.15, "grad_norm": 5.127845764160156, "learning_rate": 1.9996604649641204e-05, "loss": 2.4108, "step": 11727 }, { "epoch": 0.15, "grad_norm": 5.1346588134765625, "learning_rate": 1.9996601911058494e-05, "loss": 2.1514, "step": 11728 }, { "epoch": 0.15, "grad_norm": 4.8101372718811035, "learning_rate": 1.9996599171371995e-05, "loss": 2.643, "step": 11729 }, { "epoch": 0.15, "grad_norm": 4.261755466461182, "learning_rate": 1.9996596430581697e-05, "loss": 2.1278, "step": 11730 }, { "epoch": 0.15, "grad_norm": 5.900538921356201, "learning_rate": 1.9996593688687607e-05, "loss": 2.4332, "step": 11731 }, { "epoch": 0.15, "grad_norm": 4.452801704406738, "learning_rate": 1.999659094568972e-05, "loss": 2.052, "step": 11732 }, { "epoch": 0.15, "grad_norm": 5.456794738769531, "learning_rate": 1.9996588201588042e-05, "loss": 2.3777, "step": 11733 }, { "epoch": 0.15, "grad_norm": 4.406388282775879, "learning_rate": 1.9996585456382575e-05, "loss": 2.2105, "step": 11734 }, { "epoch": 0.15, "grad_norm": 5.508000373840332, "learning_rate": 1.9996582710073312e-05, "loss": 2.6107, "step": 11735 }, { "epoch": 0.15, "grad_norm": 4.101746082305908, "learning_rate": 1.9996579962660253e-05, "loss": 1.8601, "step": 11736 }, { "epoch": 0.15, "grad_norm": 5.099445343017578, "learning_rate": 1.999657721414341e-05, "loss": 2.3953, "step": 11737 }, { "epoch": 0.15, "grad_norm": 4.387517929077148, "learning_rate": 1.9996574464522768e-05, "loss": 2.4432, "step": 11738 }, { "epoch": 0.15, "grad_norm": 4.6580491065979, "learning_rate": 1.999657171379834e-05, "loss": 2.889, "step": 11739 }, { "epoch": 0.15, "grad_norm": 4.925025463104248, "learning_rate": 1.9996568961970117e-05, "loss": 2.2532, "step": 11740 }, { "epoch": 0.15, "grad_norm": 4.6001739501953125, "learning_rate": 1.9996566209038104e-05, "loss": 2.6113, "step": 11741 }, { "epoch": 0.15, "grad_norm": 4.842058181762695, "learning_rate": 1.9996563455002304e-05, "loss": 2.6657, "step": 11742 }, { "epoch": 0.15, "grad_norm": 5.42285680770874, "learning_rate": 1.9996560699862706e-05, "loss": 2.392, "step": 11743 }, { "epoch": 0.15, "grad_norm": 4.532752990722656, "learning_rate": 1.9996557943619325e-05, "loss": 2.5465, "step": 11744 }, { "epoch": 0.15, "grad_norm": 4.98185396194458, "learning_rate": 1.999655518627215e-05, "loss": 2.2201, "step": 11745 }, { "epoch": 0.15, "grad_norm": 4.782646179199219, "learning_rate": 1.999655242782119e-05, "loss": 2.8203, "step": 11746 }, { "epoch": 0.15, "grad_norm": 4.882551193237305, "learning_rate": 1.999654966826644e-05, "loss": 2.8335, "step": 11747 }, { "epoch": 0.15, "grad_norm": 4.445708751678467, "learning_rate": 1.99965469076079e-05, "loss": 2.1164, "step": 11748 }, { "epoch": 0.15, "grad_norm": 4.536911487579346, "learning_rate": 1.999654414584557e-05, "loss": 2.096, "step": 11749 }, { "epoch": 0.15, "grad_norm": 4.408207416534424, "learning_rate": 1.9996541382979455e-05, "loss": 2.3595, "step": 11750 }, { "epoch": 0.15, "grad_norm": 5.523553848266602, "learning_rate": 1.999653861900955e-05, "loss": 2.5942, "step": 11751 }, { "epoch": 0.15, "grad_norm": 4.853283882141113, "learning_rate": 1.9996535853935862e-05, "loss": 2.219, "step": 11752 }, { "epoch": 0.15, "grad_norm": 5.134686470031738, "learning_rate": 1.999653308775838e-05, "loss": 2.0886, "step": 11753 }, { "epoch": 0.15, "grad_norm": 4.409463882446289, "learning_rate": 1.9996530320477117e-05, "loss": 2.0911, "step": 11754 }, { "epoch": 0.15, "grad_norm": 4.84066915512085, "learning_rate": 1.9996527552092065e-05, "loss": 2.2692, "step": 11755 }, { "epoch": 0.15, "grad_norm": 4.718732833862305, "learning_rate": 1.9996524782603227e-05, "loss": 2.2795, "step": 11756 }, { "epoch": 0.15, "grad_norm": 4.406778335571289, "learning_rate": 1.9996522012010603e-05, "loss": 2.3299, "step": 11757 }, { "epoch": 0.15, "grad_norm": 4.960170745849609, "learning_rate": 1.999651924031419e-05, "loss": 2.5202, "step": 11758 }, { "epoch": 0.15, "grad_norm": 4.586254596710205, "learning_rate": 1.9996516467514e-05, "loss": 2.4831, "step": 11759 }, { "epoch": 0.15, "grad_norm": 4.922512531280518, "learning_rate": 1.9996513693610015e-05, "loss": 2.2118, "step": 11760 }, { "epoch": 0.15, "grad_norm": 4.217237949371338, "learning_rate": 1.9996510918602252e-05, "loss": 2.0034, "step": 11761 }, { "epoch": 0.15, "grad_norm": 4.417545795440674, "learning_rate": 1.9996508142490704e-05, "loss": 2.1945, "step": 11762 }, { "epoch": 0.15, "grad_norm": 5.376797676086426, "learning_rate": 1.999650536527537e-05, "loss": 2.772, "step": 11763 }, { "epoch": 0.15, "grad_norm": 5.394772529602051, "learning_rate": 1.9996502586956254e-05, "loss": 2.6747, "step": 11764 }, { "epoch": 0.15, "grad_norm": 4.782960414886475, "learning_rate": 1.9996499807533353e-05, "loss": 2.1258, "step": 11765 }, { "epoch": 0.15, "grad_norm": 4.404119968414307, "learning_rate": 1.999649702700667e-05, "loss": 1.8673, "step": 11766 }, { "epoch": 0.15, "grad_norm": 4.714208602905273, "learning_rate": 1.9996494245376203e-05, "loss": 2.3369, "step": 11767 }, { "epoch": 0.15, "grad_norm": 4.820945739746094, "learning_rate": 1.9996491462641955e-05, "loss": 2.7941, "step": 11768 }, { "epoch": 0.15, "grad_norm": 4.652576446533203, "learning_rate": 1.999648867880392e-05, "loss": 2.112, "step": 11769 }, { "epoch": 0.15, "grad_norm": 5.396099090576172, "learning_rate": 1.999648589386211e-05, "loss": 2.5601, "step": 11770 }, { "epoch": 0.15, "grad_norm": 4.753321647644043, "learning_rate": 1.9996483107816514e-05, "loss": 2.3799, "step": 11771 }, { "epoch": 0.15, "grad_norm": 5.176448822021484, "learning_rate": 1.999648032066714e-05, "loss": 2.5631, "step": 11772 }, { "epoch": 0.15, "grad_norm": 5.27855110168457, "learning_rate": 1.999647753241398e-05, "loss": 2.9574, "step": 11773 }, { "epoch": 0.15, "grad_norm": 4.380523204803467, "learning_rate": 1.999647474305704e-05, "loss": 2.1581, "step": 11774 }, { "epoch": 0.15, "grad_norm": 5.256270408630371, "learning_rate": 1.9996471952596327e-05, "loss": 2.699, "step": 11775 }, { "epoch": 0.15, "grad_norm": 4.9401631355285645, "learning_rate": 1.9996469161031825e-05, "loss": 2.5107, "step": 11776 }, { "epoch": 0.15, "grad_norm": 5.173591613769531, "learning_rate": 1.9996466368363547e-05, "loss": 2.1223, "step": 11777 }, { "epoch": 0.15, "grad_norm": 4.295653343200684, "learning_rate": 1.999646357459149e-05, "loss": 2.269, "step": 11778 }, { "epoch": 0.15, "grad_norm": 4.839058876037598, "learning_rate": 1.9996460779715656e-05, "loss": 2.342, "step": 11779 }, { "epoch": 0.15, "grad_norm": 4.821531295776367, "learning_rate": 1.9996457983736038e-05, "loss": 3.0374, "step": 11780 }, { "epoch": 0.15, "grad_norm": 4.797796726226807, "learning_rate": 1.9996455186652642e-05, "loss": 2.3965, "step": 11781 }, { "epoch": 0.15, "grad_norm": 4.818159580230713, "learning_rate": 1.9996452388465474e-05, "loss": 2.1241, "step": 11782 }, { "epoch": 0.15, "grad_norm": 5.1923828125, "learning_rate": 1.999644958917452e-05, "loss": 2.8905, "step": 11783 }, { "epoch": 0.15, "grad_norm": 4.618932247161865, "learning_rate": 1.9996446788779794e-05, "loss": 1.9859, "step": 11784 }, { "epoch": 0.15, "grad_norm": 4.924516677856445, "learning_rate": 1.9996443987281286e-05, "loss": 2.2897, "step": 11785 }, { "epoch": 0.15, "grad_norm": 4.255308628082275, "learning_rate": 1.9996441184679006e-05, "loss": 2.155, "step": 11786 }, { "epoch": 0.15, "grad_norm": 4.886401176452637, "learning_rate": 1.9996438380972947e-05, "loss": 2.4802, "step": 11787 }, { "epoch": 0.15, "grad_norm": 4.91118860244751, "learning_rate": 1.999643557616311e-05, "loss": 2.6379, "step": 11788 }, { "epoch": 0.15, "grad_norm": 7.066431522369385, "learning_rate": 1.99964327702495e-05, "loss": 2.9261, "step": 11789 }, { "epoch": 0.15, "grad_norm": 4.362457275390625, "learning_rate": 1.999642996323211e-05, "loss": 2.3032, "step": 11790 }, { "epoch": 0.15, "grad_norm": 4.9781951904296875, "learning_rate": 1.9996427155110948e-05, "loss": 2.2249, "step": 11791 }, { "epoch": 0.15, "grad_norm": 4.380919456481934, "learning_rate": 1.999642434588601e-05, "loss": 2.2656, "step": 11792 }, { "epoch": 0.15, "grad_norm": 4.579751491546631, "learning_rate": 1.9996421535557297e-05, "loss": 2.0687, "step": 11793 }, { "epoch": 0.15, "grad_norm": 5.263104438781738, "learning_rate": 1.999641872412481e-05, "loss": 2.0745, "step": 11794 }, { "epoch": 0.15, "grad_norm": 4.630977153778076, "learning_rate": 1.999641591158855e-05, "loss": 2.385, "step": 11795 }, { "epoch": 0.15, "grad_norm": 5.084254741668701, "learning_rate": 1.9996413097948514e-05, "loss": 2.2962, "step": 11796 }, { "epoch": 0.15, "grad_norm": 4.784982681274414, "learning_rate": 1.9996410283204705e-05, "loss": 2.3219, "step": 11797 }, { "epoch": 0.15, "grad_norm": 4.639289855957031, "learning_rate": 1.9996407467357123e-05, "loss": 2.0696, "step": 11798 }, { "epoch": 0.15, "grad_norm": 4.779251575469971, "learning_rate": 1.9996404650405766e-05, "loss": 2.2976, "step": 11799 }, { "epoch": 0.15, "grad_norm": 4.782533168792725, "learning_rate": 1.9996401832350637e-05, "loss": 2.6264, "step": 11800 }, { "epoch": 0.15, "grad_norm": 4.519457817077637, "learning_rate": 1.999639901319174e-05, "loss": 2.1363, "step": 11801 }, { "epoch": 0.15, "grad_norm": 4.368236064910889, "learning_rate": 1.9996396192929067e-05, "loss": 1.9982, "step": 11802 }, { "epoch": 0.15, "grad_norm": 5.7932257652282715, "learning_rate": 1.999639337156262e-05, "loss": 2.6045, "step": 11803 }, { "epoch": 0.15, "grad_norm": 4.5247955322265625, "learning_rate": 1.9996390549092405e-05, "loss": 2.191, "step": 11804 }, { "epoch": 0.15, "grad_norm": 4.468942165374756, "learning_rate": 1.999638772551842e-05, "loss": 2.0536, "step": 11805 }, { "epoch": 0.15, "grad_norm": 4.53692626953125, "learning_rate": 1.999638490084066e-05, "loss": 2.3107, "step": 11806 }, { "epoch": 0.15, "grad_norm": 4.509849548339844, "learning_rate": 1.9996382075059135e-05, "loss": 2.3805, "step": 11807 }, { "epoch": 0.15, "grad_norm": 5.351863861083984, "learning_rate": 1.9996379248173836e-05, "loss": 2.5091, "step": 11808 }, { "epoch": 0.15, "grad_norm": 5.882505893707275, "learning_rate": 1.9996376420184766e-05, "loss": 2.5964, "step": 11809 }, { "epoch": 0.15, "grad_norm": 5.324265003204346, "learning_rate": 1.999637359109193e-05, "loss": 2.7625, "step": 11810 }, { "epoch": 0.15, "grad_norm": 4.500983238220215, "learning_rate": 1.9996370760895326e-05, "loss": 2.16, "step": 11811 }, { "epoch": 0.15, "grad_norm": 4.753170013427734, "learning_rate": 1.999636792959495e-05, "loss": 2.343, "step": 11812 }, { "epoch": 0.15, "grad_norm": 4.528636932373047, "learning_rate": 1.99963650971908e-05, "loss": 2.2921, "step": 11813 }, { "epoch": 0.15, "grad_norm": 4.946966648101807, "learning_rate": 1.999636226368289e-05, "loss": 2.491, "step": 11814 }, { "epoch": 0.15, "grad_norm": 5.3107123374938965, "learning_rate": 1.999635942907121e-05, "loss": 3.0894, "step": 11815 }, { "epoch": 0.15, "grad_norm": 4.313024997711182, "learning_rate": 1.999635659335576e-05, "loss": 1.9826, "step": 11816 }, { "epoch": 0.15, "grad_norm": 4.848712921142578, "learning_rate": 1.9996353756536545e-05, "loss": 2.1619, "step": 11817 }, { "epoch": 0.15, "grad_norm": 5.762054920196533, "learning_rate": 1.9996350918613564e-05, "loss": 2.6885, "step": 11818 }, { "epoch": 0.15, "grad_norm": 4.323840141296387, "learning_rate": 1.9996348079586814e-05, "loss": 2.0054, "step": 11819 }, { "epoch": 0.15, "grad_norm": 4.84572696685791, "learning_rate": 1.99963452394563e-05, "loss": 2.4045, "step": 11820 }, { "epoch": 0.15, "grad_norm": 5.041342258453369, "learning_rate": 1.999634239822202e-05, "loss": 2.1992, "step": 11821 }, { "epoch": 0.15, "grad_norm": 5.446953773498535, "learning_rate": 1.999633955588397e-05, "loss": 2.4784, "step": 11822 }, { "epoch": 0.15, "grad_norm": 4.797178745269775, "learning_rate": 1.999633671244216e-05, "loss": 2.4445, "step": 11823 }, { "epoch": 0.15, "grad_norm": 4.546116352081299, "learning_rate": 1.999633386789658e-05, "loss": 2.0623, "step": 11824 }, { "epoch": 0.15, "grad_norm": 4.630508899688721, "learning_rate": 1.9996331022247234e-05, "loss": 2.5862, "step": 11825 }, { "epoch": 0.15, "grad_norm": 5.356350898742676, "learning_rate": 1.9996328175494127e-05, "loss": 2.7326, "step": 11826 }, { "epoch": 0.15, "grad_norm": 4.241877555847168, "learning_rate": 1.9996325327637255e-05, "loss": 2.0899, "step": 11827 }, { "epoch": 0.15, "grad_norm": 5.04209566116333, "learning_rate": 1.9996322478676618e-05, "loss": 3.0396, "step": 11828 }, { "epoch": 0.15, "grad_norm": 4.990772724151611, "learning_rate": 1.999631962861222e-05, "loss": 2.6416, "step": 11829 }, { "epoch": 0.15, "grad_norm": 4.0595574378967285, "learning_rate": 1.9996316777444055e-05, "loss": 1.7552, "step": 11830 }, { "epoch": 0.15, "grad_norm": 4.901318073272705, "learning_rate": 1.9996313925172132e-05, "loss": 2.0166, "step": 11831 }, { "epoch": 0.15, "grad_norm": 4.929865837097168, "learning_rate": 1.9996311071796444e-05, "loss": 2.6422, "step": 11832 }, { "epoch": 0.15, "grad_norm": 4.579031944274902, "learning_rate": 1.999630821731699e-05, "loss": 2.2276, "step": 11833 }, { "epoch": 0.15, "grad_norm": 4.524979591369629, "learning_rate": 1.9996305361733783e-05, "loss": 2.4715, "step": 11834 }, { "epoch": 0.15, "grad_norm": 5.2587995529174805, "learning_rate": 1.9996302505046806e-05, "loss": 2.8562, "step": 11835 }, { "epoch": 0.15, "grad_norm": 4.684773921966553, "learning_rate": 1.9996299647256067e-05, "loss": 2.175, "step": 11836 }, { "epoch": 0.15, "grad_norm": 4.7382049560546875, "learning_rate": 1.9996296788361573e-05, "loss": 2.2896, "step": 11837 }, { "epoch": 0.15, "grad_norm": 4.821780681610107, "learning_rate": 1.9996293928363314e-05, "loss": 2.9512, "step": 11838 }, { "epoch": 0.15, "grad_norm": 5.064957141876221, "learning_rate": 1.9996291067261296e-05, "loss": 2.3504, "step": 11839 }, { "epoch": 0.15, "grad_norm": 4.830749988555908, "learning_rate": 1.9996288205055517e-05, "loss": 2.4136, "step": 11840 }, { "epoch": 0.15, "grad_norm": 4.65692663192749, "learning_rate": 1.999628534174598e-05, "loss": 2.307, "step": 11841 }, { "epoch": 0.15, "grad_norm": 4.881789207458496, "learning_rate": 1.9996282477332683e-05, "loss": 2.4877, "step": 11842 }, { "epoch": 0.15, "grad_norm": 4.91018533706665, "learning_rate": 1.9996279611815628e-05, "loss": 2.5569, "step": 11843 }, { "epoch": 0.15, "grad_norm": 4.887543678283691, "learning_rate": 1.999627674519481e-05, "loss": 2.2911, "step": 11844 }, { "epoch": 0.15, "grad_norm": 4.763067722320557, "learning_rate": 1.999627387747024e-05, "loss": 2.4596, "step": 11845 }, { "epoch": 0.15, "grad_norm": 5.236109733581543, "learning_rate": 1.9996271008641905e-05, "loss": 2.9666, "step": 11846 }, { "epoch": 0.15, "grad_norm": 4.453221321105957, "learning_rate": 1.9996268138709817e-05, "loss": 2.5977, "step": 11847 }, { "epoch": 0.15, "grad_norm": 4.827600479125977, "learning_rate": 1.9996265267673966e-05, "loss": 2.3357, "step": 11848 }, { "epoch": 0.15, "grad_norm": 4.8638505935668945, "learning_rate": 1.999626239553436e-05, "loss": 3.1153, "step": 11849 }, { "epoch": 0.15, "grad_norm": 4.7440571784973145, "learning_rate": 1.9996259522291e-05, "loss": 2.2821, "step": 11850 }, { "epoch": 0.15, "grad_norm": 5.484438896179199, "learning_rate": 1.999625664794388e-05, "loss": 2.804, "step": 11851 }, { "epoch": 0.15, "grad_norm": 5.067374229431152, "learning_rate": 1.9996253772493006e-05, "loss": 2.6042, "step": 11852 }, { "epoch": 0.15, "grad_norm": 4.724935531616211, "learning_rate": 1.9996250895938373e-05, "loss": 2.0737, "step": 11853 }, { "epoch": 0.15, "grad_norm": 4.688680648803711, "learning_rate": 1.999624801827999e-05, "loss": 2.2081, "step": 11854 }, { "epoch": 0.15, "grad_norm": 5.120787143707275, "learning_rate": 1.9996245139517846e-05, "loss": 2.2481, "step": 11855 }, { "epoch": 0.15, "grad_norm": 4.334275722503662, "learning_rate": 1.999624225965195e-05, "loss": 2.1202, "step": 11856 }, { "epoch": 0.15, "grad_norm": 5.256467819213867, "learning_rate": 1.9996239378682298e-05, "loss": 2.0655, "step": 11857 }, { "epoch": 0.15, "grad_norm": 4.933478355407715, "learning_rate": 1.9996236496608893e-05, "loss": 2.2064, "step": 11858 }, { "epoch": 0.15, "grad_norm": 4.541304111480713, "learning_rate": 1.999623361343173e-05, "loss": 2.0279, "step": 11859 }, { "epoch": 0.15, "grad_norm": 4.570938587188721, "learning_rate": 1.999623072915082e-05, "loss": 2.6433, "step": 11860 }, { "epoch": 0.15, "grad_norm": 4.566458702087402, "learning_rate": 1.999622784376615e-05, "loss": 2.3811, "step": 11861 }, { "epoch": 0.15, "grad_norm": 4.583580493927002, "learning_rate": 1.9996224957277727e-05, "loss": 2.0696, "step": 11862 }, { "epoch": 0.15, "grad_norm": 5.927268981933594, "learning_rate": 1.9996222069685557e-05, "loss": 2.8732, "step": 11863 }, { "epoch": 0.15, "grad_norm": 5.099071979522705, "learning_rate": 1.9996219180989628e-05, "loss": 2.2581, "step": 11864 }, { "epoch": 0.15, "grad_norm": 4.505218982696533, "learning_rate": 1.999621629118995e-05, "loss": 2.1711, "step": 11865 }, { "epoch": 0.15, "grad_norm": 4.517594814300537, "learning_rate": 1.9996213400286523e-05, "loss": 2.4988, "step": 11866 }, { "epoch": 0.15, "grad_norm": 4.708552360534668, "learning_rate": 1.999621050827934e-05, "loss": 2.1497, "step": 11867 }, { "epoch": 0.15, "grad_norm": 5.546460151672363, "learning_rate": 1.9996207615168407e-05, "loss": 2.5423, "step": 11868 }, { "epoch": 0.15, "grad_norm": 4.369483947753906, "learning_rate": 1.9996204720953724e-05, "loss": 2.1445, "step": 11869 }, { "epoch": 0.15, "grad_norm": 4.808987617492676, "learning_rate": 1.999620182563529e-05, "loss": 2.3529, "step": 11870 }, { "epoch": 0.15, "grad_norm": 4.3805694580078125, "learning_rate": 1.9996198929213105e-05, "loss": 2.185, "step": 11871 }, { "epoch": 0.15, "grad_norm": 4.875415325164795, "learning_rate": 1.999619603168717e-05, "loss": 1.9179, "step": 11872 }, { "epoch": 0.15, "grad_norm": 4.767016410827637, "learning_rate": 1.9996193133057486e-05, "loss": 2.4309, "step": 11873 }, { "epoch": 0.15, "grad_norm": 4.522890567779541, "learning_rate": 1.999619023332405e-05, "loss": 2.364, "step": 11874 }, { "epoch": 0.15, "grad_norm": 4.835131645202637, "learning_rate": 1.999618733248687e-05, "loss": 2.4611, "step": 11875 }, { "epoch": 0.15, "grad_norm": 5.225505352020264, "learning_rate": 1.999618443054594e-05, "loss": 2.2852, "step": 11876 }, { "epoch": 0.15, "grad_norm": 4.802770137786865, "learning_rate": 1.999618152750126e-05, "loss": 2.5939, "step": 11877 }, { "epoch": 0.15, "grad_norm": 4.696397304534912, "learning_rate": 1.999617862335283e-05, "loss": 2.24, "step": 11878 }, { "epoch": 0.15, "grad_norm": 4.598180770874023, "learning_rate": 1.9996175718100653e-05, "loss": 2.2577, "step": 11879 }, { "epoch": 0.15, "grad_norm": 5.310779571533203, "learning_rate": 1.9996172811744735e-05, "loss": 2.6032, "step": 11880 }, { "epoch": 0.15, "grad_norm": 5.0346150398254395, "learning_rate": 1.9996169904285065e-05, "loss": 2.4604, "step": 11881 }, { "epoch": 0.15, "grad_norm": 4.792277812957764, "learning_rate": 1.9996166995721647e-05, "loss": 2.387, "step": 11882 }, { "epoch": 0.15, "grad_norm": 5.298042297363281, "learning_rate": 1.9996164086054483e-05, "loss": 2.4408, "step": 11883 }, { "epoch": 0.15, "grad_norm": 4.945539951324463, "learning_rate": 1.9996161175283575e-05, "loss": 2.4278, "step": 11884 }, { "epoch": 0.15, "grad_norm": 4.5090765953063965, "learning_rate": 1.999615826340892e-05, "loss": 2.1345, "step": 11885 }, { "epoch": 0.15, "grad_norm": 4.97412109375, "learning_rate": 1.999615535043052e-05, "loss": 2.184, "step": 11886 }, { "epoch": 0.15, "grad_norm": 4.5943284034729, "learning_rate": 1.9996152436348374e-05, "loss": 2.2891, "step": 11887 }, { "epoch": 0.15, "grad_norm": 4.45049524307251, "learning_rate": 1.9996149521162483e-05, "loss": 2.0941, "step": 11888 }, { "epoch": 0.15, "grad_norm": 5.0521111488342285, "learning_rate": 1.9996146604872847e-05, "loss": 2.4821, "step": 11889 }, { "epoch": 0.15, "grad_norm": 4.626863956451416, "learning_rate": 1.9996143687479473e-05, "loss": 2.1533, "step": 11890 }, { "epoch": 0.15, "grad_norm": 5.073476314544678, "learning_rate": 1.9996140768982347e-05, "loss": 2.3291, "step": 11891 }, { "epoch": 0.15, "grad_norm": 4.969851493835449, "learning_rate": 1.999613784938148e-05, "loss": 2.4135, "step": 11892 }, { "epoch": 0.15, "grad_norm": 4.603159427642822, "learning_rate": 1.9996134928676874e-05, "loss": 2.2269, "step": 11893 }, { "epoch": 0.15, "grad_norm": 4.905965805053711, "learning_rate": 1.999613200686852e-05, "loss": 2.7384, "step": 11894 }, { "epoch": 0.15, "grad_norm": 5.743789196014404, "learning_rate": 1.9996129083956424e-05, "loss": 2.533, "step": 11895 }, { "epoch": 0.15, "grad_norm": 4.668099403381348, "learning_rate": 1.9996126159940587e-05, "loss": 2.1325, "step": 11896 }, { "epoch": 0.15, "grad_norm": 4.766663074493408, "learning_rate": 1.999612323482101e-05, "loss": 2.2075, "step": 11897 }, { "epoch": 0.15, "grad_norm": 4.6036834716796875, "learning_rate": 1.999612030859769e-05, "loss": 2.4803, "step": 11898 }, { "epoch": 0.15, "grad_norm": 4.333978176116943, "learning_rate": 1.9996117381270627e-05, "loss": 2.3237, "step": 11899 }, { "epoch": 0.15, "grad_norm": 4.952898025512695, "learning_rate": 1.9996114452839824e-05, "loss": 2.0952, "step": 11900 }, { "epoch": 0.15, "grad_norm": 4.859930992126465, "learning_rate": 1.9996111523305283e-05, "loss": 2.6075, "step": 11901 }, { "epoch": 0.15, "grad_norm": 4.877254009246826, "learning_rate": 1.9996108592666997e-05, "loss": 2.1005, "step": 11902 }, { "epoch": 0.15, "grad_norm": 4.80818510055542, "learning_rate": 1.9996105660924972e-05, "loss": 2.2457, "step": 11903 }, { "epoch": 0.15, "grad_norm": 5.599745273590088, "learning_rate": 1.999610272807921e-05, "loss": 2.8249, "step": 11904 }, { "epoch": 0.15, "grad_norm": 4.864953994750977, "learning_rate": 1.9996099794129706e-05, "loss": 2.1544, "step": 11905 }, { "epoch": 0.15, "grad_norm": 4.20546293258667, "learning_rate": 1.9996096859076464e-05, "loss": 2.1863, "step": 11906 }, { "epoch": 0.15, "grad_norm": 6.014378070831299, "learning_rate": 1.9996093922919484e-05, "loss": 2.7132, "step": 11907 }, { "epoch": 0.15, "grad_norm": 4.853306770324707, "learning_rate": 1.9996090985658765e-05, "loss": 2.6844, "step": 11908 }, { "epoch": 0.15, "grad_norm": 4.842008113861084, "learning_rate": 1.999608804729431e-05, "loss": 1.9244, "step": 11909 }, { "epoch": 0.15, "grad_norm": 4.78061056137085, "learning_rate": 1.9996085107826114e-05, "loss": 2.4641, "step": 11910 }, { "epoch": 0.15, "grad_norm": 5.4818525314331055, "learning_rate": 1.999608216725418e-05, "loss": 2.2639, "step": 11911 }, { "epoch": 0.15, "grad_norm": 4.504592418670654, "learning_rate": 1.9996079225578514e-05, "loss": 2.1875, "step": 11912 }, { "epoch": 0.15, "grad_norm": 5.289825916290283, "learning_rate": 1.9996076282799108e-05, "loss": 2.3727, "step": 11913 }, { "epoch": 0.15, "grad_norm": 4.294004917144775, "learning_rate": 1.9996073338915964e-05, "loss": 2.0625, "step": 11914 }, { "epoch": 0.15, "grad_norm": 4.575957775115967, "learning_rate": 1.999607039392909e-05, "loss": 2.1127, "step": 11915 }, { "epoch": 0.15, "grad_norm": 4.669485092163086, "learning_rate": 1.9996067447838475e-05, "loss": 2.4041, "step": 11916 }, { "epoch": 0.15, "grad_norm": 5.535792827606201, "learning_rate": 1.9996064500644127e-05, "loss": 2.6215, "step": 11917 }, { "epoch": 0.15, "grad_norm": 4.169999122619629, "learning_rate": 1.999606155234604e-05, "loss": 2.3239, "step": 11918 }, { "epoch": 0.15, "grad_norm": 4.1596360206604, "learning_rate": 1.999605860294422e-05, "loss": 1.7972, "step": 11919 }, { "epoch": 0.15, "grad_norm": 4.591264247894287, "learning_rate": 1.9996055652438668e-05, "loss": 2.382, "step": 11920 }, { "epoch": 0.15, "grad_norm": 4.803915023803711, "learning_rate": 1.9996052700829377e-05, "loss": 2.0172, "step": 11921 }, { "epoch": 0.15, "grad_norm": 5.203818321228027, "learning_rate": 1.999604974811636e-05, "loss": 2.4544, "step": 11922 }, { "epoch": 0.15, "grad_norm": 4.378808975219727, "learning_rate": 1.9996046794299602e-05, "loss": 2.0297, "step": 11923 }, { "epoch": 0.15, "grad_norm": 4.666331768035889, "learning_rate": 1.9996043839379114e-05, "loss": 2.1799, "step": 11924 }, { "epoch": 0.15, "grad_norm": 5.390605449676514, "learning_rate": 1.9996040883354895e-05, "loss": 2.6315, "step": 11925 }, { "epoch": 0.15, "grad_norm": 5.020298957824707, "learning_rate": 1.9996037926226937e-05, "loss": 2.0476, "step": 11926 }, { "epoch": 0.15, "grad_norm": 4.334738254547119, "learning_rate": 1.9996034967995252e-05, "loss": 1.9971, "step": 11927 }, { "epoch": 0.15, "grad_norm": 4.474824905395508, "learning_rate": 1.9996032008659835e-05, "loss": 2.1761, "step": 11928 }, { "epoch": 0.15, "grad_norm": 4.288699626922607, "learning_rate": 1.999602904822069e-05, "loss": 2.1351, "step": 11929 }, { "epoch": 0.15, "grad_norm": 4.582059383392334, "learning_rate": 1.9996026086677807e-05, "loss": 1.9579, "step": 11930 }, { "epoch": 0.15, "grad_norm": 4.985689640045166, "learning_rate": 1.9996023124031193e-05, "loss": 2.914, "step": 11931 }, { "epoch": 0.15, "grad_norm": 5.05501651763916, "learning_rate": 1.9996020160280854e-05, "loss": 2.9401, "step": 11932 }, { "epoch": 0.15, "grad_norm": 4.909205913543701, "learning_rate": 1.9996017195426784e-05, "loss": 2.5784, "step": 11933 }, { "epoch": 0.15, "grad_norm": 5.309829235076904, "learning_rate": 1.999601422946898e-05, "loss": 2.5006, "step": 11934 }, { "epoch": 0.15, "grad_norm": 4.665940284729004, "learning_rate": 1.999601126240745e-05, "loss": 2.1359, "step": 11935 }, { "epoch": 0.15, "grad_norm": 4.6430206298828125, "learning_rate": 1.9996008294242188e-05, "loss": 2.1477, "step": 11936 }, { "epoch": 0.15, "grad_norm": 4.49600076675415, "learning_rate": 1.99960053249732e-05, "loss": 1.984, "step": 11937 }, { "epoch": 0.15, "grad_norm": 4.292303562164307, "learning_rate": 1.9996002354600482e-05, "loss": 2.4495, "step": 11938 }, { "epoch": 0.15, "grad_norm": 4.399609565734863, "learning_rate": 1.9995999383124034e-05, "loss": 1.9617, "step": 11939 }, { "epoch": 0.15, "grad_norm": 5.2192463874816895, "learning_rate": 1.999599641054386e-05, "loss": 2.6097, "step": 11940 }, { "epoch": 0.15, "grad_norm": 4.408681869506836, "learning_rate": 1.9995993436859957e-05, "loss": 1.9239, "step": 11941 }, { "epoch": 0.15, "grad_norm": 5.349087238311768, "learning_rate": 1.9995990462072328e-05, "loss": 2.3818, "step": 11942 }, { "epoch": 0.15, "grad_norm": 4.94680643081665, "learning_rate": 1.999598748618097e-05, "loss": 2.0001, "step": 11943 }, { "epoch": 0.16, "grad_norm": 4.421529769897461, "learning_rate": 1.999598450918589e-05, "loss": 2.3023, "step": 11944 }, { "epoch": 0.16, "grad_norm": 4.517470359802246, "learning_rate": 1.9995981531087084e-05, "loss": 2.3948, "step": 11945 }, { "epoch": 0.16, "grad_norm": 4.451902866363525, "learning_rate": 1.9995978551884547e-05, "loss": 2.1752, "step": 11946 }, { "epoch": 0.16, "grad_norm": 4.370559215545654, "learning_rate": 1.9995975571578285e-05, "loss": 2.3477, "step": 11947 }, { "epoch": 0.16, "grad_norm": 4.368209362030029, "learning_rate": 1.99959725901683e-05, "loss": 2.1263, "step": 11948 }, { "epoch": 0.16, "grad_norm": 4.650289535522461, "learning_rate": 1.9995969607654587e-05, "loss": 2.0425, "step": 11949 }, { "epoch": 0.16, "grad_norm": 4.9309821128845215, "learning_rate": 1.9995966624037155e-05, "loss": 2.5952, "step": 11950 }, { "epoch": 0.16, "grad_norm": 4.41806697845459, "learning_rate": 1.9995963639315995e-05, "loss": 2.4862, "step": 11951 }, { "epoch": 0.16, "grad_norm": 5.183659553527832, "learning_rate": 1.999596065349111e-05, "loss": 2.3607, "step": 11952 }, { "epoch": 0.16, "grad_norm": 5.125725269317627, "learning_rate": 1.9995957666562504e-05, "loss": 2.7872, "step": 11953 }, { "epoch": 0.16, "grad_norm": 4.579009056091309, "learning_rate": 1.9995954678530173e-05, "loss": 1.6842, "step": 11954 }, { "epoch": 0.16, "grad_norm": 4.927674293518066, "learning_rate": 1.999595168939412e-05, "loss": 2.5385, "step": 11955 }, { "epoch": 0.16, "grad_norm": 4.774107456207275, "learning_rate": 1.9995948699154345e-05, "loss": 2.5435, "step": 11956 }, { "epoch": 0.16, "grad_norm": 4.789923667907715, "learning_rate": 1.9995945707810844e-05, "loss": 2.5855, "step": 11957 }, { "epoch": 0.16, "grad_norm": 5.527104377746582, "learning_rate": 1.9995942715363625e-05, "loss": 3.1464, "step": 11958 }, { "epoch": 0.16, "grad_norm": 5.152043342590332, "learning_rate": 1.999593972181268e-05, "loss": 2.2031, "step": 11959 }, { "epoch": 0.16, "grad_norm": 4.851963520050049, "learning_rate": 1.999593672715802e-05, "loss": 2.2141, "step": 11960 }, { "epoch": 0.16, "grad_norm": 5.151956081390381, "learning_rate": 1.9995933731399634e-05, "loss": 2.236, "step": 11961 }, { "epoch": 0.16, "grad_norm": 4.588259696960449, "learning_rate": 1.999593073453753e-05, "loss": 2.2986, "step": 11962 }, { "epoch": 0.16, "grad_norm": 4.966047286987305, "learning_rate": 1.9995927736571702e-05, "loss": 2.0124, "step": 11963 }, { "epoch": 0.16, "grad_norm": 4.126498222351074, "learning_rate": 1.999592473750216e-05, "loss": 2.0601, "step": 11964 }, { "epoch": 0.16, "grad_norm": 4.8259196281433105, "learning_rate": 1.9995921737328893e-05, "loss": 2.3242, "step": 11965 }, { "epoch": 0.16, "grad_norm": 5.019575119018555, "learning_rate": 1.9995918736051907e-05, "loss": 1.9492, "step": 11966 }, { "epoch": 0.16, "grad_norm": 4.9292707443237305, "learning_rate": 1.9995915733671205e-05, "loss": 2.4849, "step": 11967 }, { "epoch": 0.16, "grad_norm": 4.912522792816162, "learning_rate": 1.999591273018678e-05, "loss": 2.0865, "step": 11968 }, { "epoch": 0.16, "grad_norm": 4.847902297973633, "learning_rate": 1.9995909725598642e-05, "loss": 1.9361, "step": 11969 }, { "epoch": 0.16, "grad_norm": 4.375716686248779, "learning_rate": 1.9995906719906782e-05, "loss": 2.373, "step": 11970 }, { "epoch": 0.16, "grad_norm": 5.578191757202148, "learning_rate": 1.9995903713111205e-05, "loss": 2.6706, "step": 11971 }, { "epoch": 0.16, "grad_norm": 5.488204479217529, "learning_rate": 1.9995900705211916e-05, "loss": 2.4205, "step": 11972 }, { "epoch": 0.16, "grad_norm": 4.613049507141113, "learning_rate": 1.9995897696208903e-05, "loss": 2.1479, "step": 11973 }, { "epoch": 0.16, "grad_norm": 5.395111083984375, "learning_rate": 1.999589468610218e-05, "loss": 2.4708, "step": 11974 }, { "epoch": 0.16, "grad_norm": 5.22776985168457, "learning_rate": 1.999589167489173e-05, "loss": 2.2683, "step": 11975 }, { "epoch": 0.16, "grad_norm": 5.029239177703857, "learning_rate": 1.9995888662577574e-05, "loss": 2.6491, "step": 11976 }, { "epoch": 0.16, "grad_norm": 5.1273345947265625, "learning_rate": 1.99958856491597e-05, "loss": 2.51, "step": 11977 }, { "epoch": 0.16, "grad_norm": 5.038321495056152, "learning_rate": 1.9995882634638108e-05, "loss": 2.4571, "step": 11978 }, { "epoch": 0.16, "grad_norm": 5.005212306976318, "learning_rate": 1.99958796190128e-05, "loss": 1.8044, "step": 11979 }, { "epoch": 0.16, "grad_norm": 4.690053462982178, "learning_rate": 1.999587660228378e-05, "loss": 2.0945, "step": 11980 }, { "epoch": 0.16, "grad_norm": 5.3989105224609375, "learning_rate": 1.9995873584451045e-05, "loss": 2.6351, "step": 11981 }, { "epoch": 0.16, "grad_norm": 5.043126106262207, "learning_rate": 1.9995870565514595e-05, "loss": 2.7379, "step": 11982 }, { "epoch": 0.16, "grad_norm": 4.700244903564453, "learning_rate": 1.9995867545474435e-05, "loss": 2.2917, "step": 11983 }, { "epoch": 0.16, "grad_norm": 4.311906337738037, "learning_rate": 1.999586452433056e-05, "loss": 1.9737, "step": 11984 }, { "epoch": 0.16, "grad_norm": 4.663003921508789, "learning_rate": 1.999586150208297e-05, "loss": 2.2918, "step": 11985 }, { "epoch": 0.16, "grad_norm": 5.138838768005371, "learning_rate": 1.9995858478731666e-05, "loss": 2.1338, "step": 11986 }, { "epoch": 0.16, "grad_norm": 5.153351306915283, "learning_rate": 1.9995855454276654e-05, "loss": 2.2459, "step": 11987 }, { "epoch": 0.16, "grad_norm": 4.72244119644165, "learning_rate": 1.9995852428717928e-05, "loss": 2.1683, "step": 11988 }, { "epoch": 0.16, "grad_norm": 4.503098964691162, "learning_rate": 1.999584940205549e-05, "loss": 2.2127, "step": 11989 }, { "epoch": 0.16, "grad_norm": 4.228592395782471, "learning_rate": 1.9995846374289343e-05, "loss": 2.1161, "step": 11990 }, { "epoch": 0.16, "grad_norm": 4.8770647048950195, "learning_rate": 1.999584334541948e-05, "loss": 2.9786, "step": 11991 }, { "epoch": 0.16, "grad_norm": 4.219470500946045, "learning_rate": 1.999584031544591e-05, "loss": 2.0839, "step": 11992 }, { "epoch": 0.16, "grad_norm": 4.892737865447998, "learning_rate": 1.999583728436863e-05, "loss": 2.5086, "step": 11993 }, { "epoch": 0.16, "grad_norm": 4.5292229652404785, "learning_rate": 1.9995834252187637e-05, "loss": 2.0487, "step": 11994 }, { "epoch": 0.16, "grad_norm": 4.680090427398682, "learning_rate": 1.999583121890294e-05, "loss": 2.2382, "step": 11995 }, { "epoch": 0.16, "grad_norm": 4.396605491638184, "learning_rate": 1.999582818451453e-05, "loss": 1.9294, "step": 11996 }, { "epoch": 0.16, "grad_norm": 4.460887432098389, "learning_rate": 1.9995825149022407e-05, "loss": 2.2171, "step": 11997 }, { "epoch": 0.16, "grad_norm": 4.269615173339844, "learning_rate": 1.999582211242658e-05, "loss": 2.1388, "step": 11998 }, { "epoch": 0.16, "grad_norm": 4.975322723388672, "learning_rate": 1.9995819074727044e-05, "loss": 2.0456, "step": 11999 }, { "epoch": 0.16, "grad_norm": 4.685303688049316, "learning_rate": 1.99958160359238e-05, "loss": 2.1622, "step": 12000 }, { "epoch": 0.16, "grad_norm": 4.317094326019287, "learning_rate": 1.999581299601685e-05, "loss": 2.1596, "step": 12001 }, { "epoch": 0.16, "grad_norm": 4.9166154861450195, "learning_rate": 1.999580995500619e-05, "loss": 2.2177, "step": 12002 }, { "epoch": 0.16, "grad_norm": 4.649271488189697, "learning_rate": 1.9995806912891825e-05, "loss": 2.2124, "step": 12003 }, { "epoch": 0.16, "grad_norm": 5.623600959777832, "learning_rate": 1.9995803869673752e-05, "loss": 2.6198, "step": 12004 }, { "epoch": 0.16, "grad_norm": 5.318458080291748, "learning_rate": 1.999580082535197e-05, "loss": 2.3631, "step": 12005 }, { "epoch": 0.16, "grad_norm": 4.720561981201172, "learning_rate": 1.9995797779926485e-05, "loss": 2.1222, "step": 12006 }, { "epoch": 0.16, "grad_norm": 4.98959493637085, "learning_rate": 1.9995794733397297e-05, "loss": 2.397, "step": 12007 }, { "epoch": 0.16, "grad_norm": 5.433919906616211, "learning_rate": 1.9995791685764398e-05, "loss": 2.4038, "step": 12008 }, { "epoch": 0.16, "grad_norm": 3.861424446105957, "learning_rate": 1.9995788637027797e-05, "loss": 2.4995, "step": 12009 }, { "epoch": 0.16, "grad_norm": 4.24832820892334, "learning_rate": 1.9995785587187493e-05, "loss": 2.1898, "step": 12010 }, { "epoch": 0.16, "grad_norm": 5.15977144241333, "learning_rate": 1.999578253624348e-05, "loss": 2.4027, "step": 12011 }, { "epoch": 0.16, "grad_norm": 4.732285022735596, "learning_rate": 1.9995779484195768e-05, "loss": 2.4117, "step": 12012 }, { "epoch": 0.16, "grad_norm": 5.182693004608154, "learning_rate": 1.999577643104435e-05, "loss": 2.1989, "step": 12013 }, { "epoch": 0.16, "grad_norm": 4.857228755950928, "learning_rate": 1.9995773376789226e-05, "loss": 2.6413, "step": 12014 }, { "epoch": 0.16, "grad_norm": 4.73222017288208, "learning_rate": 1.9995770321430403e-05, "loss": 2.2043, "step": 12015 }, { "epoch": 0.16, "grad_norm": 4.779355049133301, "learning_rate": 1.9995767264967876e-05, "loss": 2.1286, "step": 12016 }, { "epoch": 0.16, "grad_norm": 4.6839799880981445, "learning_rate": 1.999576420740165e-05, "loss": 2.3151, "step": 12017 }, { "epoch": 0.16, "grad_norm": 4.867372035980225, "learning_rate": 1.9995761148731717e-05, "loss": 2.1639, "step": 12018 }, { "epoch": 0.16, "grad_norm": 5.366036891937256, "learning_rate": 1.9995758088958085e-05, "loss": 2.5526, "step": 12019 }, { "epoch": 0.16, "grad_norm": 5.235069751739502, "learning_rate": 1.999575502808075e-05, "loss": 2.6287, "step": 12020 }, { "epoch": 0.16, "grad_norm": 5.406099319458008, "learning_rate": 1.9995751966099717e-05, "loss": 2.0885, "step": 12021 }, { "epoch": 0.16, "grad_norm": 5.203527450561523, "learning_rate": 1.999574890301498e-05, "loss": 2.5267, "step": 12022 }, { "epoch": 0.16, "grad_norm": 4.555879592895508, "learning_rate": 1.999574583882655e-05, "loss": 2.2638, "step": 12023 }, { "epoch": 0.16, "grad_norm": 4.403968811035156, "learning_rate": 1.999574277353441e-05, "loss": 2.3613, "step": 12024 }, { "epoch": 0.16, "grad_norm": 5.380655288696289, "learning_rate": 1.9995739707138577e-05, "loss": 2.5862, "step": 12025 }, { "epoch": 0.16, "grad_norm": 4.939687252044678, "learning_rate": 1.9995736639639042e-05, "loss": 2.537, "step": 12026 }, { "epoch": 0.16, "grad_norm": 4.286696434020996, "learning_rate": 1.999573357103581e-05, "loss": 1.8565, "step": 12027 }, { "epoch": 0.16, "grad_norm": 5.233171463012695, "learning_rate": 1.9995730501328875e-05, "loss": 2.2671, "step": 12028 }, { "epoch": 0.16, "grad_norm": 4.932047367095947, "learning_rate": 1.9995727430518247e-05, "loss": 2.2956, "step": 12029 }, { "epoch": 0.16, "grad_norm": 4.971335411071777, "learning_rate": 1.999572435860392e-05, "loss": 2.7668, "step": 12030 }, { "epoch": 0.16, "grad_norm": 5.489153861999512, "learning_rate": 1.9995721285585893e-05, "loss": 2.5626, "step": 12031 }, { "epoch": 0.16, "grad_norm": 4.905402660369873, "learning_rate": 1.9995718211464172e-05, "loss": 2.4362, "step": 12032 }, { "epoch": 0.16, "grad_norm": 5.4451212882995605, "learning_rate": 1.999571513623875e-05, "loss": 2.6581, "step": 12033 }, { "epoch": 0.16, "grad_norm": 4.410176753997803, "learning_rate": 1.9995712059909636e-05, "loss": 2.1276, "step": 12034 }, { "epoch": 0.16, "grad_norm": 5.670633792877197, "learning_rate": 1.9995708982476823e-05, "loss": 2.5566, "step": 12035 }, { "epoch": 0.16, "grad_norm": 5.440207481384277, "learning_rate": 1.9995705903940313e-05, "loss": 2.5929, "step": 12036 }, { "epoch": 0.16, "grad_norm": 4.329995155334473, "learning_rate": 1.9995702824300112e-05, "loss": 1.8396, "step": 12037 }, { "epoch": 0.16, "grad_norm": 4.376788139343262, "learning_rate": 1.9995699743556214e-05, "loss": 1.7628, "step": 12038 }, { "epoch": 0.16, "grad_norm": 5.218412399291992, "learning_rate": 1.9995696661708618e-05, "loss": 2.8065, "step": 12039 }, { "epoch": 0.16, "grad_norm": 5.074086666107178, "learning_rate": 1.999569357875733e-05, "loss": 2.1053, "step": 12040 }, { "epoch": 0.16, "grad_norm": 5.235805988311768, "learning_rate": 1.9995690494702347e-05, "loss": 2.2159, "step": 12041 }, { "epoch": 0.16, "grad_norm": 5.422032833099365, "learning_rate": 1.9995687409543673e-05, "loss": 1.9003, "step": 12042 }, { "epoch": 0.16, "grad_norm": 4.632087230682373, "learning_rate": 1.9995684323281304e-05, "loss": 2.0633, "step": 12043 }, { "epoch": 0.16, "grad_norm": 3.8526463508605957, "learning_rate": 1.999568123591524e-05, "loss": 1.5497, "step": 12044 }, { "epoch": 0.16, "grad_norm": 4.788515567779541, "learning_rate": 1.9995678147445484e-05, "loss": 2.3694, "step": 12045 }, { "epoch": 0.16, "grad_norm": 5.409346580505371, "learning_rate": 1.9995675057872036e-05, "loss": 2.5856, "step": 12046 }, { "epoch": 0.16, "grad_norm": 4.395573139190674, "learning_rate": 1.9995671967194898e-05, "loss": 2.0179, "step": 12047 }, { "epoch": 0.16, "grad_norm": 5.029330253601074, "learning_rate": 1.9995668875414065e-05, "loss": 2.6022, "step": 12048 }, { "epoch": 0.16, "grad_norm": 5.272491931915283, "learning_rate": 1.9995665782529542e-05, "loss": 2.6423, "step": 12049 }, { "epoch": 0.16, "grad_norm": 4.399967193603516, "learning_rate": 1.9995662688541328e-05, "loss": 2.2475, "step": 12050 }, { "epoch": 0.16, "grad_norm": 4.749519348144531, "learning_rate": 1.9995659593449423e-05, "loss": 2.4643, "step": 12051 }, { "epoch": 0.16, "grad_norm": 4.491019248962402, "learning_rate": 1.9995656497253828e-05, "loss": 2.2336, "step": 12052 }, { "epoch": 0.16, "grad_norm": 4.748576641082764, "learning_rate": 1.999565339995454e-05, "loss": 2.2969, "step": 12053 }, { "epoch": 0.16, "grad_norm": 4.142461776733398, "learning_rate": 1.9995650301551568e-05, "loss": 1.516, "step": 12054 }, { "epoch": 0.16, "grad_norm": 4.397106647491455, "learning_rate": 1.9995647202044904e-05, "loss": 2.2565, "step": 12055 }, { "epoch": 0.16, "grad_norm": 5.145200252532959, "learning_rate": 1.999564410143455e-05, "loss": 2.599, "step": 12056 }, { "epoch": 0.16, "grad_norm": 4.537180423736572, "learning_rate": 1.9995640999720507e-05, "loss": 2.4447, "step": 12057 }, { "epoch": 0.16, "grad_norm": 4.8906707763671875, "learning_rate": 1.9995637896902774e-05, "loss": 2.3259, "step": 12058 }, { "epoch": 0.16, "grad_norm": 4.892354488372803, "learning_rate": 1.9995634792981358e-05, "loss": 2.2049, "step": 12059 }, { "epoch": 0.16, "grad_norm": 4.499412536621094, "learning_rate": 1.999563168795625e-05, "loss": 2.093, "step": 12060 }, { "epoch": 0.16, "grad_norm": 4.341023921966553, "learning_rate": 1.999562858182746e-05, "loss": 2.3008, "step": 12061 }, { "epoch": 0.16, "grad_norm": 5.918900489807129, "learning_rate": 1.9995625474594977e-05, "loss": 2.7448, "step": 12062 }, { "epoch": 0.16, "grad_norm": 5.089796543121338, "learning_rate": 1.999562236625881e-05, "loss": 2.5355, "step": 12063 }, { "epoch": 0.16, "grad_norm": 4.853800296783447, "learning_rate": 1.9995619256818957e-05, "loss": 2.5534, "step": 12064 }, { "epoch": 0.16, "grad_norm": 4.665558815002441, "learning_rate": 1.999561614627542e-05, "loss": 1.9868, "step": 12065 }, { "epoch": 0.16, "grad_norm": 4.945859909057617, "learning_rate": 1.9995613034628195e-05, "loss": 2.5574, "step": 12066 }, { "epoch": 0.16, "grad_norm": 4.746424674987793, "learning_rate": 1.9995609921877283e-05, "loss": 2.216, "step": 12067 }, { "epoch": 0.16, "grad_norm": 4.474122047424316, "learning_rate": 1.9995606808022688e-05, "loss": 2.3758, "step": 12068 }, { "epoch": 0.16, "grad_norm": 4.104760646820068, "learning_rate": 1.9995603693064408e-05, "loss": 2.1096, "step": 12069 }, { "epoch": 0.16, "grad_norm": 5.165018081665039, "learning_rate": 1.9995600577002444e-05, "loss": 2.6181, "step": 12070 }, { "epoch": 0.16, "grad_norm": 4.684467792510986, "learning_rate": 1.9995597459836796e-05, "loss": 2.5121, "step": 12071 }, { "epoch": 0.16, "grad_norm": 4.130618572235107, "learning_rate": 1.9995594341567465e-05, "loss": 2.2571, "step": 12072 }, { "epoch": 0.16, "grad_norm": 5.2070393562316895, "learning_rate": 1.999559122219445e-05, "loss": 2.6505, "step": 12073 }, { "epoch": 0.16, "grad_norm": 4.990045070648193, "learning_rate": 1.999558810171775e-05, "loss": 2.4309, "step": 12074 }, { "epoch": 0.16, "grad_norm": 4.770551681518555, "learning_rate": 1.9995584980137373e-05, "loss": 2.095, "step": 12075 }, { "epoch": 0.16, "grad_norm": 4.880731582641602, "learning_rate": 1.999558185745331e-05, "loss": 2.6288, "step": 12076 }, { "epoch": 0.16, "grad_norm": 4.597332000732422, "learning_rate": 1.9995578733665565e-05, "loss": 1.9506, "step": 12077 }, { "epoch": 0.16, "grad_norm": 4.751954555511475, "learning_rate": 1.999557560877414e-05, "loss": 1.9729, "step": 12078 }, { "epoch": 0.16, "grad_norm": 4.759018898010254, "learning_rate": 1.9995572482779034e-05, "loss": 2.4937, "step": 12079 }, { "epoch": 0.16, "grad_norm": 5.174431800842285, "learning_rate": 1.9995569355680248e-05, "loss": 2.6116, "step": 12080 }, { "epoch": 0.16, "grad_norm": 4.853924751281738, "learning_rate": 1.999556622747778e-05, "loss": 2.1308, "step": 12081 }, { "epoch": 0.16, "grad_norm": 4.420258045196533, "learning_rate": 1.999556309817163e-05, "loss": 2.0923, "step": 12082 }, { "epoch": 0.16, "grad_norm": 4.702569961547852, "learning_rate": 1.9995559967761803e-05, "loss": 2.1924, "step": 12083 }, { "epoch": 0.16, "grad_norm": 5.216681003570557, "learning_rate": 1.99955568362483e-05, "loss": 2.924, "step": 12084 }, { "epoch": 0.16, "grad_norm": 4.892042636871338, "learning_rate": 1.999555370363111e-05, "loss": 1.8749, "step": 12085 }, { "epoch": 0.16, "grad_norm": 4.967736721038818, "learning_rate": 1.9995550569910244e-05, "loss": 2.3639, "step": 12086 }, { "epoch": 0.16, "grad_norm": 5.140823841094971, "learning_rate": 1.9995547435085704e-05, "loss": 2.5986, "step": 12087 }, { "epoch": 0.16, "grad_norm": 5.200072765350342, "learning_rate": 1.9995544299157484e-05, "loss": 2.1272, "step": 12088 }, { "epoch": 0.16, "grad_norm": 4.638062000274658, "learning_rate": 1.9995541162125583e-05, "loss": 2.1808, "step": 12089 }, { "epoch": 0.16, "grad_norm": 4.968697547912598, "learning_rate": 1.999553802399001e-05, "loss": 2.1342, "step": 12090 }, { "epoch": 0.16, "grad_norm": 4.274662971496582, "learning_rate": 1.9995534884750753e-05, "loss": 2.2406, "step": 12091 }, { "epoch": 0.16, "grad_norm": 4.609947681427002, "learning_rate": 1.9995531744407825e-05, "loss": 2.2512, "step": 12092 }, { "epoch": 0.16, "grad_norm": 5.071583271026611, "learning_rate": 1.999552860296122e-05, "loss": 2.5342, "step": 12093 }, { "epoch": 0.16, "grad_norm": 4.653371810913086, "learning_rate": 1.9995525460410935e-05, "loss": 2.2272, "step": 12094 }, { "epoch": 0.16, "grad_norm": 5.058157920837402, "learning_rate": 1.999552231675698e-05, "loss": 2.3942, "step": 12095 }, { "epoch": 0.16, "grad_norm": 5.13107442855835, "learning_rate": 1.9995519171999348e-05, "loss": 2.3213, "step": 12096 }, { "epoch": 0.16, "grad_norm": 4.327820301055908, "learning_rate": 1.9995516026138036e-05, "loss": 1.8377, "step": 12097 }, { "epoch": 0.16, "grad_norm": 5.240952014923096, "learning_rate": 1.9995512879173055e-05, "loss": 2.7318, "step": 12098 }, { "epoch": 0.16, "grad_norm": 5.409509181976318, "learning_rate": 1.99955097311044e-05, "loss": 2.7089, "step": 12099 }, { "epoch": 0.16, "grad_norm": 4.5103631019592285, "learning_rate": 1.999550658193207e-05, "loss": 2.3853, "step": 12100 }, { "epoch": 0.16, "grad_norm": 6.037903785705566, "learning_rate": 1.9995503431656064e-05, "loss": 3.0458, "step": 12101 }, { "epoch": 0.16, "grad_norm": 5.34804105758667, "learning_rate": 1.999550028027639e-05, "loss": 3.0853, "step": 12102 }, { "epoch": 0.16, "grad_norm": 4.498994827270508, "learning_rate": 1.999549712779304e-05, "loss": 2.1587, "step": 12103 }, { "epoch": 0.16, "grad_norm": 4.858367443084717, "learning_rate": 1.9995493974206016e-05, "loss": 1.6909, "step": 12104 }, { "epoch": 0.16, "grad_norm": 4.688413619995117, "learning_rate": 1.9995490819515325e-05, "loss": 2.0708, "step": 12105 }, { "epoch": 0.16, "grad_norm": 4.795093536376953, "learning_rate": 1.9995487663720956e-05, "loss": 2.461, "step": 12106 }, { "epoch": 0.16, "grad_norm": 5.340782165527344, "learning_rate": 1.999548450682292e-05, "loss": 2.6139, "step": 12107 }, { "epoch": 0.16, "grad_norm": 4.9247212409973145, "learning_rate": 1.9995481348821212e-05, "loss": 2.3156, "step": 12108 }, { "epoch": 0.16, "grad_norm": 4.068656921386719, "learning_rate": 1.9995478189715832e-05, "loss": 1.7474, "step": 12109 }, { "epoch": 0.16, "grad_norm": 4.941333293914795, "learning_rate": 1.9995475029506783e-05, "loss": 2.0612, "step": 12110 }, { "epoch": 0.16, "grad_norm": 5.311863422393799, "learning_rate": 1.9995471868194062e-05, "loss": 2.6016, "step": 12111 }, { "epoch": 0.16, "grad_norm": 4.5511674880981445, "learning_rate": 1.9995468705777675e-05, "loss": 2.4231, "step": 12112 }, { "epoch": 0.16, "grad_norm": 5.012114524841309, "learning_rate": 1.9995465542257614e-05, "loss": 2.3675, "step": 12113 }, { "epoch": 0.16, "grad_norm": 5.407666206359863, "learning_rate": 1.9995462377633886e-05, "loss": 2.6644, "step": 12114 }, { "epoch": 0.16, "grad_norm": 4.360743999481201, "learning_rate": 1.999545921190649e-05, "loss": 2.0138, "step": 12115 }, { "epoch": 0.16, "grad_norm": 4.430713653564453, "learning_rate": 1.999545604507543e-05, "loss": 1.9037, "step": 12116 }, { "epoch": 0.16, "grad_norm": 4.946162223815918, "learning_rate": 1.9995452877140697e-05, "loss": 2.2054, "step": 12117 }, { "epoch": 0.16, "grad_norm": 4.850154876708984, "learning_rate": 1.9995449708102294e-05, "loss": 2.2774, "step": 12118 }, { "epoch": 0.16, "grad_norm": 4.8756327629089355, "learning_rate": 1.9995446537960228e-05, "loss": 2.3406, "step": 12119 }, { "epoch": 0.16, "grad_norm": 4.48494815826416, "learning_rate": 1.9995443366714494e-05, "loss": 1.9961, "step": 12120 }, { "epoch": 0.16, "grad_norm": 5.225815296173096, "learning_rate": 1.999544019436509e-05, "loss": 2.5302, "step": 12121 }, { "epoch": 0.16, "grad_norm": 4.872832775115967, "learning_rate": 1.9995437020912027e-05, "loss": 2.5818, "step": 12122 }, { "epoch": 0.16, "grad_norm": 5.360434532165527, "learning_rate": 1.9995433846355296e-05, "loss": 2.2188, "step": 12123 }, { "epoch": 0.16, "grad_norm": 4.7588701248168945, "learning_rate": 1.9995430670694894e-05, "loss": 2.7426, "step": 12124 }, { "epoch": 0.16, "grad_norm": 4.791999340057373, "learning_rate": 1.9995427493930833e-05, "loss": 2.4875, "step": 12125 }, { "epoch": 0.16, "grad_norm": 4.649946212768555, "learning_rate": 1.9995424316063105e-05, "loss": 2.3546, "step": 12126 }, { "epoch": 0.16, "grad_norm": 4.730215549468994, "learning_rate": 1.999542113709171e-05, "loss": 2.066, "step": 12127 }, { "epoch": 0.16, "grad_norm": 4.218090534210205, "learning_rate": 1.9995417957016657e-05, "loss": 2.0927, "step": 12128 }, { "epoch": 0.16, "grad_norm": 5.719149112701416, "learning_rate": 1.9995414775837937e-05, "loss": 3.2262, "step": 12129 }, { "epoch": 0.16, "grad_norm": 4.627964019775391, "learning_rate": 1.9995411593555555e-05, "loss": 2.2244, "step": 12130 }, { "epoch": 0.16, "grad_norm": 4.358612060546875, "learning_rate": 1.9995408410169505e-05, "loss": 2.2535, "step": 12131 }, { "epoch": 0.16, "grad_norm": 4.089609146118164, "learning_rate": 1.9995405225679795e-05, "loss": 2.036, "step": 12132 }, { "epoch": 0.16, "grad_norm": 4.080386638641357, "learning_rate": 1.9995402040086424e-05, "loss": 2.0757, "step": 12133 }, { "epoch": 0.16, "grad_norm": 5.0318121910095215, "learning_rate": 1.999539885338939e-05, "loss": 2.4613, "step": 12134 }, { "epoch": 0.16, "grad_norm": 4.8645124435424805, "learning_rate": 1.9995395665588696e-05, "loss": 2.4093, "step": 12135 }, { "epoch": 0.16, "grad_norm": 4.910184860229492, "learning_rate": 1.999539247668434e-05, "loss": 2.2494, "step": 12136 }, { "epoch": 0.16, "grad_norm": 5.766737937927246, "learning_rate": 1.9995389286676324e-05, "loss": 2.3362, "step": 12137 }, { "epoch": 0.16, "grad_norm": 5.514227867126465, "learning_rate": 1.9995386095564642e-05, "loss": 2.8365, "step": 12138 }, { "epoch": 0.16, "grad_norm": 4.448685169219971, "learning_rate": 1.9995382903349307e-05, "loss": 2.1589, "step": 12139 }, { "epoch": 0.16, "grad_norm": 5.400124549865723, "learning_rate": 1.999537971003031e-05, "loss": 2.6487, "step": 12140 }, { "epoch": 0.16, "grad_norm": 4.900454044342041, "learning_rate": 1.9995376515607653e-05, "loss": 2.5952, "step": 12141 }, { "epoch": 0.16, "grad_norm": 4.876445770263672, "learning_rate": 1.9995373320081338e-05, "loss": 2.0863, "step": 12142 }, { "epoch": 0.16, "grad_norm": 5.156450271606445, "learning_rate": 1.9995370123451362e-05, "loss": 2.1908, "step": 12143 }, { "epoch": 0.16, "grad_norm": 4.895848751068115, "learning_rate": 1.9995366925717726e-05, "loss": 2.0392, "step": 12144 }, { "epoch": 0.16, "grad_norm": 4.6389594078063965, "learning_rate": 1.9995363726880433e-05, "loss": 2.3267, "step": 12145 }, { "epoch": 0.16, "grad_norm": 5.283362865447998, "learning_rate": 1.9995360526939483e-05, "loss": 2.4967, "step": 12146 }, { "epoch": 0.16, "grad_norm": 5.341631889343262, "learning_rate": 1.9995357325894877e-05, "loss": 2.7291, "step": 12147 }, { "epoch": 0.16, "grad_norm": 6.561631679534912, "learning_rate": 1.9995354123746617e-05, "loss": 3.4241, "step": 12148 }, { "epoch": 0.16, "grad_norm": 5.222659587860107, "learning_rate": 1.9995350920494693e-05, "loss": 2.6886, "step": 12149 }, { "epoch": 0.16, "grad_norm": 5.103693008422852, "learning_rate": 1.9995347716139116e-05, "loss": 2.5794, "step": 12150 }, { "epoch": 0.16, "grad_norm": 4.702090263366699, "learning_rate": 1.9995344510679886e-05, "loss": 2.1242, "step": 12151 }, { "epoch": 0.16, "grad_norm": 4.389296531677246, "learning_rate": 1.9995341304116995e-05, "loss": 2.1623, "step": 12152 }, { "epoch": 0.16, "grad_norm": 5.3070502281188965, "learning_rate": 1.999533809645045e-05, "loss": 2.9543, "step": 12153 }, { "epoch": 0.16, "grad_norm": 4.258294582366943, "learning_rate": 1.9995334887680254e-05, "loss": 2.2117, "step": 12154 }, { "epoch": 0.16, "grad_norm": 4.853222370147705, "learning_rate": 1.9995331677806403e-05, "loss": 2.142, "step": 12155 }, { "epoch": 0.16, "grad_norm": 4.1621551513671875, "learning_rate": 1.999532846682889e-05, "loss": 2.0621, "step": 12156 }, { "epoch": 0.16, "grad_norm": 4.377334117889404, "learning_rate": 1.999532525474773e-05, "loss": 2.1618, "step": 12157 }, { "epoch": 0.16, "grad_norm": 5.590035915374756, "learning_rate": 1.999532204156292e-05, "loss": 2.258, "step": 12158 }, { "epoch": 0.16, "grad_norm": 4.718630790710449, "learning_rate": 1.9995318827274448e-05, "loss": 2.1605, "step": 12159 }, { "epoch": 0.16, "grad_norm": 4.345024108886719, "learning_rate": 1.999531561188233e-05, "loss": 2.1911, "step": 12160 }, { "epoch": 0.16, "grad_norm": 4.737756252288818, "learning_rate": 1.9995312395386554e-05, "loss": 2.1669, "step": 12161 }, { "epoch": 0.16, "grad_norm": 3.8895695209503174, "learning_rate": 1.9995309177787133e-05, "loss": 1.4758, "step": 12162 }, { "epoch": 0.16, "grad_norm": 4.231827735900879, "learning_rate": 1.9995305959084054e-05, "loss": 2.6049, "step": 12163 }, { "epoch": 0.16, "grad_norm": 4.6897664070129395, "learning_rate": 1.9995302739277326e-05, "loss": 2.2975, "step": 12164 }, { "epoch": 0.16, "grad_norm": 4.993682861328125, "learning_rate": 1.9995299518366947e-05, "loss": 2.6664, "step": 12165 }, { "epoch": 0.16, "grad_norm": 4.316343307495117, "learning_rate": 1.999529629635292e-05, "loss": 2.17, "step": 12166 }, { "epoch": 0.16, "grad_norm": 5.050573348999023, "learning_rate": 1.9995293073235237e-05, "loss": 2.2937, "step": 12167 }, { "epoch": 0.16, "grad_norm": 4.717189311981201, "learning_rate": 1.9995289849013908e-05, "loss": 2.0055, "step": 12168 }, { "epoch": 0.16, "grad_norm": 4.443099021911621, "learning_rate": 1.999528662368893e-05, "loss": 1.9997, "step": 12169 }, { "epoch": 0.16, "grad_norm": 5.036698341369629, "learning_rate": 1.99952833972603e-05, "loss": 2.5615, "step": 12170 }, { "epoch": 0.16, "grad_norm": 5.014312744140625, "learning_rate": 1.999528016972802e-05, "loss": 2.4816, "step": 12171 }, { "epoch": 0.16, "grad_norm": 4.149054527282715, "learning_rate": 1.9995276941092093e-05, "loss": 1.9581, "step": 12172 }, { "epoch": 0.16, "grad_norm": 4.9368367195129395, "learning_rate": 1.999527371135252e-05, "loss": 2.2432, "step": 12173 }, { "epoch": 0.16, "grad_norm": 4.776236534118652, "learning_rate": 1.99952704805093e-05, "loss": 2.1651, "step": 12174 }, { "epoch": 0.16, "grad_norm": 3.839940309524536, "learning_rate": 1.999526724856243e-05, "loss": 1.7725, "step": 12175 }, { "epoch": 0.16, "grad_norm": 5.209006309509277, "learning_rate": 1.9995264015511914e-05, "loss": 2.4683, "step": 12176 }, { "epoch": 0.16, "grad_norm": 4.442383766174316, "learning_rate": 1.9995260781357752e-05, "loss": 2.504, "step": 12177 }, { "epoch": 0.16, "grad_norm": 5.872051239013672, "learning_rate": 1.999525754609994e-05, "loss": 2.1485, "step": 12178 }, { "epoch": 0.16, "grad_norm": 4.915126323699951, "learning_rate": 1.9995254309738484e-05, "loss": 2.1259, "step": 12179 }, { "epoch": 0.16, "grad_norm": 4.6193928718566895, "learning_rate": 1.9995251072273385e-05, "loss": 2.0792, "step": 12180 }, { "epoch": 0.16, "grad_norm": 4.36903715133667, "learning_rate": 1.999524783370464e-05, "loss": 2.3832, "step": 12181 }, { "epoch": 0.16, "grad_norm": 5.018526554107666, "learning_rate": 1.9995244594032248e-05, "loss": 2.1506, "step": 12182 }, { "epoch": 0.16, "grad_norm": 5.18349027633667, "learning_rate": 1.999524135325621e-05, "loss": 2.4878, "step": 12183 }, { "epoch": 0.16, "grad_norm": 4.752861976623535, "learning_rate": 1.999523811137653e-05, "loss": 2.0843, "step": 12184 }, { "epoch": 0.16, "grad_norm": 4.803138256072998, "learning_rate": 1.9995234868393205e-05, "loss": 2.263, "step": 12185 }, { "epoch": 0.16, "grad_norm": 4.824693202972412, "learning_rate": 1.9995231624306236e-05, "loss": 2.3761, "step": 12186 }, { "epoch": 0.16, "grad_norm": 4.443912506103516, "learning_rate": 1.9995228379115628e-05, "loss": 2.2559, "step": 12187 }, { "epoch": 0.16, "grad_norm": 5.303732395172119, "learning_rate": 1.9995225132821373e-05, "loss": 2.6976, "step": 12188 }, { "epoch": 0.16, "grad_norm": 4.710349082946777, "learning_rate": 1.9995221885423478e-05, "loss": 2.2851, "step": 12189 }, { "epoch": 0.16, "grad_norm": 4.524722576141357, "learning_rate": 1.999521863692194e-05, "loss": 2.4044, "step": 12190 }, { "epoch": 0.16, "grad_norm": 4.948032855987549, "learning_rate": 1.999521538731676e-05, "loss": 2.4156, "step": 12191 }, { "epoch": 0.16, "grad_norm": 4.4618024826049805, "learning_rate": 1.9995212136607936e-05, "loss": 2.1864, "step": 12192 }, { "epoch": 0.16, "grad_norm": 4.759461879730225, "learning_rate": 1.9995208884795475e-05, "loss": 2.8868, "step": 12193 }, { "epoch": 0.16, "grad_norm": 4.116726398468018, "learning_rate": 1.9995205631879374e-05, "loss": 1.6921, "step": 12194 }, { "epoch": 0.16, "grad_norm": 4.60854434967041, "learning_rate": 1.999520237785963e-05, "loss": 2.4249, "step": 12195 }, { "epoch": 0.16, "grad_norm": 4.077910900115967, "learning_rate": 1.9995199122736245e-05, "loss": 2.0906, "step": 12196 }, { "epoch": 0.16, "grad_norm": 5.196506023406982, "learning_rate": 1.9995195866509224e-05, "loss": 2.1816, "step": 12197 }, { "epoch": 0.16, "grad_norm": 4.595986843109131, "learning_rate": 1.9995192609178563e-05, "loss": 2.0789, "step": 12198 }, { "epoch": 0.16, "grad_norm": 4.7031707763671875, "learning_rate": 1.9995189350744263e-05, "loss": 2.4987, "step": 12199 }, { "epoch": 0.16, "grad_norm": 5.315986156463623, "learning_rate": 1.9995186091206325e-05, "loss": 2.2755, "step": 12200 }, { "epoch": 0.16, "grad_norm": 4.7476701736450195, "learning_rate": 1.9995182830564745e-05, "loss": 2.2846, "step": 12201 }, { "epoch": 0.16, "grad_norm": 4.579911231994629, "learning_rate": 1.999517956881953e-05, "loss": 2.0513, "step": 12202 }, { "epoch": 0.16, "grad_norm": 4.5574750900268555, "learning_rate": 1.999517630597068e-05, "loss": 2.3358, "step": 12203 }, { "epoch": 0.16, "grad_norm": 5.064960956573486, "learning_rate": 1.999517304201819e-05, "loss": 2.2762, "step": 12204 }, { "epoch": 0.16, "grad_norm": 5.352512836456299, "learning_rate": 1.9995169776962065e-05, "loss": 2.2469, "step": 12205 }, { "epoch": 0.16, "grad_norm": 4.93815279006958, "learning_rate": 1.9995166510802302e-05, "loss": 2.8009, "step": 12206 }, { "epoch": 0.16, "grad_norm": 4.395145893096924, "learning_rate": 1.9995163243538903e-05, "loss": 2.0831, "step": 12207 }, { "epoch": 0.16, "grad_norm": 4.101535320281982, "learning_rate": 1.999515997517187e-05, "loss": 1.9265, "step": 12208 }, { "epoch": 0.16, "grad_norm": 4.304832458496094, "learning_rate": 1.99951567057012e-05, "loss": 2.2029, "step": 12209 }, { "epoch": 0.16, "grad_norm": 4.577499866485596, "learning_rate": 1.9995153435126896e-05, "loss": 2.2861, "step": 12210 }, { "epoch": 0.16, "grad_norm": 4.588957786560059, "learning_rate": 1.999515016344896e-05, "loss": 2.0886, "step": 12211 }, { "epoch": 0.16, "grad_norm": 4.734562873840332, "learning_rate": 1.9995146890667386e-05, "loss": 2.3573, "step": 12212 }, { "epoch": 0.16, "grad_norm": 5.405834197998047, "learning_rate": 1.9995143616782177e-05, "loss": 2.7432, "step": 12213 }, { "epoch": 0.16, "grad_norm": 4.830966949462891, "learning_rate": 1.999514034179334e-05, "loss": 2.216, "step": 12214 }, { "epoch": 0.16, "grad_norm": 4.320373058319092, "learning_rate": 1.999513706570087e-05, "loss": 2.3499, "step": 12215 }, { "epoch": 0.16, "grad_norm": 4.942784309387207, "learning_rate": 1.999513378850476e-05, "loss": 2.1331, "step": 12216 }, { "epoch": 0.16, "grad_norm": 5.134283542633057, "learning_rate": 1.9995130510205024e-05, "loss": 2.9525, "step": 12217 }, { "epoch": 0.16, "grad_norm": 4.886086463928223, "learning_rate": 1.9995127230801653e-05, "loss": 2.2559, "step": 12218 }, { "epoch": 0.16, "grad_norm": 5.364373683929443, "learning_rate": 1.999512395029465e-05, "loss": 2.6574, "step": 12219 }, { "epoch": 0.16, "grad_norm": 4.688169002532959, "learning_rate": 1.9995120668684017e-05, "loss": 2.496, "step": 12220 }, { "epoch": 0.16, "grad_norm": 4.064562797546387, "learning_rate": 1.9995117385969757e-05, "loss": 1.8405, "step": 12221 }, { "epoch": 0.16, "grad_norm": 4.62737512588501, "learning_rate": 1.999511410215186e-05, "loss": 2.0303, "step": 12222 }, { "epoch": 0.16, "grad_norm": 4.677414894104004, "learning_rate": 1.9995110817230335e-05, "loss": 2.2601, "step": 12223 }, { "epoch": 0.16, "grad_norm": 4.444288730621338, "learning_rate": 1.9995107531205183e-05, "loss": 1.8412, "step": 12224 }, { "epoch": 0.16, "grad_norm": 5.693845748901367, "learning_rate": 1.9995104244076398e-05, "loss": 2.9292, "step": 12225 }, { "epoch": 0.16, "grad_norm": 4.530627727508545, "learning_rate": 1.9995100955843987e-05, "loss": 2.0276, "step": 12226 }, { "epoch": 0.16, "grad_norm": 4.739412784576416, "learning_rate": 1.9995097666507945e-05, "loss": 2.3695, "step": 12227 }, { "epoch": 0.16, "grad_norm": 4.600285053253174, "learning_rate": 1.9995094376068274e-05, "loss": 2.4225, "step": 12228 }, { "epoch": 0.16, "grad_norm": 4.442237854003906, "learning_rate": 1.9995091084524977e-05, "loss": 1.9646, "step": 12229 }, { "epoch": 0.16, "grad_norm": 4.105595111846924, "learning_rate": 1.999508779187805e-05, "loss": 1.8727, "step": 12230 }, { "epoch": 0.16, "grad_norm": 5.100274562835693, "learning_rate": 1.99950844981275e-05, "loss": 2.8016, "step": 12231 }, { "epoch": 0.16, "grad_norm": 4.538180828094482, "learning_rate": 1.999508120327332e-05, "loss": 2.0832, "step": 12232 }, { "epoch": 0.16, "grad_norm": 5.2592997550964355, "learning_rate": 1.9995077907315514e-05, "loss": 2.6706, "step": 12233 }, { "epoch": 0.16, "grad_norm": 4.491343021392822, "learning_rate": 1.9995074610254082e-05, "loss": 2.1919, "step": 12234 }, { "epoch": 0.16, "grad_norm": 3.8846542835235596, "learning_rate": 1.9995071312089023e-05, "loss": 1.6964, "step": 12235 }, { "epoch": 0.16, "grad_norm": 4.553421974182129, "learning_rate": 1.999506801282034e-05, "loss": 2.0797, "step": 12236 }, { "epoch": 0.16, "grad_norm": 5.107456207275391, "learning_rate": 1.999506471244803e-05, "loss": 2.3386, "step": 12237 }, { "epoch": 0.16, "grad_norm": 3.8952839374542236, "learning_rate": 1.99950614109721e-05, "loss": 1.7721, "step": 12238 }, { "epoch": 0.16, "grad_norm": 4.199173450469971, "learning_rate": 1.9995058108392542e-05, "loss": 1.9778, "step": 12239 }, { "epoch": 0.16, "grad_norm": 5.017547130584717, "learning_rate": 1.999505480470936e-05, "loss": 3.219, "step": 12240 }, { "epoch": 0.16, "grad_norm": 4.514392852783203, "learning_rate": 1.9995051499922552e-05, "loss": 2.0538, "step": 12241 }, { "epoch": 0.16, "grad_norm": 4.805665016174316, "learning_rate": 1.9995048194032126e-05, "loss": 2.4992, "step": 12242 }, { "epoch": 0.16, "grad_norm": 5.3421502113342285, "learning_rate": 1.9995044887038074e-05, "loss": 2.1386, "step": 12243 }, { "epoch": 0.16, "grad_norm": 4.980048656463623, "learning_rate": 1.9995041578940402e-05, "loss": 2.6861, "step": 12244 }, { "epoch": 0.16, "grad_norm": 5.068554401397705, "learning_rate": 1.9995038269739104e-05, "loss": 1.9548, "step": 12245 }, { "epoch": 0.16, "grad_norm": 4.778740882873535, "learning_rate": 1.9995034959434186e-05, "loss": 2.3928, "step": 12246 }, { "epoch": 0.16, "grad_norm": 4.671125888824463, "learning_rate": 1.9995031648025645e-05, "loss": 2.3512, "step": 12247 }, { "epoch": 0.16, "grad_norm": 4.753431797027588, "learning_rate": 1.9995028335513488e-05, "loss": 2.2109, "step": 12248 }, { "epoch": 0.16, "grad_norm": 4.656976699829102, "learning_rate": 1.9995025021897705e-05, "loss": 1.9191, "step": 12249 }, { "epoch": 0.16, "grad_norm": 4.74257230758667, "learning_rate": 1.9995021707178302e-05, "loss": 2.6154, "step": 12250 }, { "epoch": 0.16, "grad_norm": 4.895369052886963, "learning_rate": 1.9995018391355283e-05, "loss": 2.5505, "step": 12251 }, { "epoch": 0.16, "grad_norm": 4.912349700927734, "learning_rate": 1.9995015074428645e-05, "loss": 2.8281, "step": 12252 }, { "epoch": 0.16, "grad_norm": 5.303030490875244, "learning_rate": 1.9995011756398383e-05, "loss": 2.8462, "step": 12253 }, { "epoch": 0.16, "grad_norm": 4.425755977630615, "learning_rate": 1.9995008437264502e-05, "loss": 2.1863, "step": 12254 }, { "epoch": 0.16, "grad_norm": 4.670279502868652, "learning_rate": 1.9995005117027005e-05, "loss": 2.5041, "step": 12255 }, { "epoch": 0.16, "grad_norm": 5.014029026031494, "learning_rate": 1.9995001795685888e-05, "loss": 2.7791, "step": 12256 }, { "epoch": 0.16, "grad_norm": 5.019250392913818, "learning_rate": 1.9994998473241155e-05, "loss": 2.677, "step": 12257 }, { "epoch": 0.16, "grad_norm": 4.370760440826416, "learning_rate": 1.9994995149692803e-05, "loss": 2.3052, "step": 12258 }, { "epoch": 0.16, "grad_norm": 4.1120123863220215, "learning_rate": 1.9994991825040837e-05, "loss": 1.904, "step": 12259 }, { "epoch": 0.16, "grad_norm": 5.250273704528809, "learning_rate": 1.9994988499285256e-05, "loss": 2.5683, "step": 12260 }, { "epoch": 0.16, "grad_norm": 4.814823150634766, "learning_rate": 1.9994985172426055e-05, "loss": 2.504, "step": 12261 }, { "epoch": 0.16, "grad_norm": 4.617379665374756, "learning_rate": 1.9994981844463238e-05, "loss": 1.9281, "step": 12262 }, { "epoch": 0.16, "grad_norm": 4.510617733001709, "learning_rate": 1.9994978515396805e-05, "loss": 2.0067, "step": 12263 }, { "epoch": 0.16, "grad_norm": 5.139542102813721, "learning_rate": 1.9994975185226755e-05, "loss": 2.375, "step": 12264 }, { "epoch": 0.16, "grad_norm": 4.43529748916626, "learning_rate": 1.9994971853953093e-05, "loss": 2.0744, "step": 12265 }, { "epoch": 0.16, "grad_norm": 4.441888809204102, "learning_rate": 1.9994968521575818e-05, "loss": 2.2068, "step": 12266 }, { "epoch": 0.16, "grad_norm": 4.3101983070373535, "learning_rate": 1.9994965188094926e-05, "loss": 2.4074, "step": 12267 }, { "epoch": 0.16, "grad_norm": 5.189949989318848, "learning_rate": 1.9994961853510422e-05, "loss": 2.3265, "step": 12268 }, { "epoch": 0.16, "grad_norm": 4.4362006187438965, "learning_rate": 1.9994958517822302e-05, "loss": 2.0024, "step": 12269 }, { "epoch": 0.16, "grad_norm": 4.72829532623291, "learning_rate": 1.999495518103057e-05, "loss": 2.3205, "step": 12270 }, { "epoch": 0.16, "grad_norm": 4.8539276123046875, "learning_rate": 1.9994951843135227e-05, "loss": 2.5339, "step": 12271 }, { "epoch": 0.16, "grad_norm": 4.706472873687744, "learning_rate": 1.9994948504136272e-05, "loss": 2.7006, "step": 12272 }, { "epoch": 0.16, "grad_norm": 4.717450141906738, "learning_rate": 1.9994945164033704e-05, "loss": 2.4306, "step": 12273 }, { "epoch": 0.16, "grad_norm": 3.918419599533081, "learning_rate": 1.9994941822827523e-05, "loss": 1.8596, "step": 12274 }, { "epoch": 0.16, "grad_norm": 5.016216278076172, "learning_rate": 1.9994938480517737e-05, "loss": 2.525, "step": 12275 }, { "epoch": 0.16, "grad_norm": 4.5682549476623535, "learning_rate": 1.9994935137104334e-05, "loss": 2.2218, "step": 12276 }, { "epoch": 0.16, "grad_norm": 4.719927787780762, "learning_rate": 1.999493179258732e-05, "loss": 2.3732, "step": 12277 }, { "epoch": 0.16, "grad_norm": 4.522276401519775, "learning_rate": 1.99949284469667e-05, "loss": 2.5104, "step": 12278 }, { "epoch": 0.16, "grad_norm": 4.206416606903076, "learning_rate": 1.999492510024247e-05, "loss": 1.6515, "step": 12279 }, { "epoch": 0.16, "grad_norm": 4.412840366363525, "learning_rate": 1.999492175241463e-05, "loss": 2.1277, "step": 12280 }, { "epoch": 0.16, "grad_norm": 4.7512969970703125, "learning_rate": 1.9994918403483182e-05, "loss": 2.5394, "step": 12281 }, { "epoch": 0.16, "grad_norm": 4.626897811889648, "learning_rate": 1.9994915053448123e-05, "loss": 2.5012, "step": 12282 }, { "epoch": 0.16, "grad_norm": 4.781474590301514, "learning_rate": 1.9994911702309458e-05, "loss": 2.5098, "step": 12283 }, { "epoch": 0.16, "grad_norm": 4.270496845245361, "learning_rate": 1.9994908350067183e-05, "loss": 2.1101, "step": 12284 }, { "epoch": 0.16, "grad_norm": 4.994543075561523, "learning_rate": 1.99949049967213e-05, "loss": 2.5053, "step": 12285 }, { "epoch": 0.16, "grad_norm": 5.026581287384033, "learning_rate": 1.9994901642271812e-05, "loss": 2.4653, "step": 12286 }, { "epoch": 0.16, "grad_norm": 5.419445514678955, "learning_rate": 1.9994898286718716e-05, "loss": 2.8467, "step": 12287 }, { "epoch": 0.16, "grad_norm": 5.520662784576416, "learning_rate": 1.9994894930062014e-05, "loss": 2.3289, "step": 12288 }, { "epoch": 0.16, "grad_norm": 5.001556396484375, "learning_rate": 1.999489157230171e-05, "loss": 2.2162, "step": 12289 }, { "epoch": 0.16, "grad_norm": 4.478918552398682, "learning_rate": 1.9994888213437792e-05, "loss": 2.539, "step": 12290 }, { "epoch": 0.16, "grad_norm": 4.944058895111084, "learning_rate": 1.9994884853470276e-05, "loss": 2.91, "step": 12291 }, { "epoch": 0.16, "grad_norm": 4.258758068084717, "learning_rate": 1.9994881492399153e-05, "loss": 1.9357, "step": 12292 }, { "epoch": 0.16, "grad_norm": 4.616855144500732, "learning_rate": 1.9994878130224428e-05, "loss": 2.6307, "step": 12293 }, { "epoch": 0.16, "grad_norm": 4.807067394256592, "learning_rate": 1.9994874766946093e-05, "loss": 2.1582, "step": 12294 }, { "epoch": 0.16, "grad_norm": 4.722599029541016, "learning_rate": 1.9994871402564156e-05, "loss": 2.2804, "step": 12295 }, { "epoch": 0.16, "grad_norm": 4.751645088195801, "learning_rate": 1.999486803707862e-05, "loss": 2.8258, "step": 12296 }, { "epoch": 0.16, "grad_norm": 4.610633373260498, "learning_rate": 1.9994864670489477e-05, "loss": 2.166, "step": 12297 }, { "epoch": 0.16, "grad_norm": 4.654092788696289, "learning_rate": 1.9994861302796732e-05, "loss": 2.3179, "step": 12298 }, { "epoch": 0.16, "grad_norm": 4.877201080322266, "learning_rate": 1.9994857934000388e-05, "loss": 2.705, "step": 12299 }, { "epoch": 0.16, "grad_norm": 5.3780131340026855, "learning_rate": 1.9994854564100437e-05, "loss": 2.2401, "step": 12300 }, { "epoch": 0.16, "grad_norm": 5.26255464553833, "learning_rate": 1.9994851193096888e-05, "loss": 2.5254, "step": 12301 }, { "epoch": 0.16, "grad_norm": 4.868591785430908, "learning_rate": 1.9994847820989736e-05, "loss": 2.4366, "step": 12302 }, { "epoch": 0.16, "grad_norm": 5.043830871582031, "learning_rate": 1.9994844447778984e-05, "loss": 2.6125, "step": 12303 }, { "epoch": 0.16, "grad_norm": 4.777437210083008, "learning_rate": 1.999484107346463e-05, "loss": 2.5605, "step": 12304 }, { "epoch": 0.16, "grad_norm": 5.026634693145752, "learning_rate": 1.9994837698046677e-05, "loss": 2.6388, "step": 12305 }, { "epoch": 0.16, "grad_norm": 5.554520606994629, "learning_rate": 1.9994834321525128e-05, "loss": 2.2826, "step": 12306 }, { "epoch": 0.16, "grad_norm": 4.165430068969727, "learning_rate": 1.9994830943899973e-05, "loss": 2.6039, "step": 12307 }, { "epoch": 0.16, "grad_norm": 4.49168586730957, "learning_rate": 1.9994827565171226e-05, "loss": 2.5222, "step": 12308 }, { "epoch": 0.16, "grad_norm": 5.815671920776367, "learning_rate": 1.9994824185338875e-05, "loss": 2.3316, "step": 12309 }, { "epoch": 0.16, "grad_norm": 5.458662033081055, "learning_rate": 1.999482080440293e-05, "loss": 2.5176, "step": 12310 }, { "epoch": 0.16, "grad_norm": 4.951757907867432, "learning_rate": 1.999481742236338e-05, "loss": 2.7634, "step": 12311 }, { "epoch": 0.16, "grad_norm": 5.326369285583496, "learning_rate": 1.9994814039220243e-05, "loss": 2.9608, "step": 12312 }, { "epoch": 0.16, "grad_norm": 4.657902717590332, "learning_rate": 1.9994810654973503e-05, "loss": 2.0894, "step": 12313 }, { "epoch": 0.16, "grad_norm": 4.47396993637085, "learning_rate": 1.9994807269623167e-05, "loss": 2.2802, "step": 12314 }, { "epoch": 0.16, "grad_norm": 4.527629375457764, "learning_rate": 1.9994803883169235e-05, "loss": 1.8834, "step": 12315 }, { "epoch": 0.16, "grad_norm": 5.386363983154297, "learning_rate": 1.9994800495611707e-05, "loss": 2.5248, "step": 12316 }, { "epoch": 0.16, "grad_norm": 4.735013961791992, "learning_rate": 1.999479710695058e-05, "loss": 2.5049, "step": 12317 }, { "epoch": 0.16, "grad_norm": 4.014523506164551, "learning_rate": 1.9994793717185864e-05, "loss": 2.1697, "step": 12318 }, { "epoch": 0.16, "grad_norm": 4.504000663757324, "learning_rate": 1.999479032631755e-05, "loss": 2.2357, "step": 12319 }, { "epoch": 0.16, "grad_norm": 4.40258264541626, "learning_rate": 1.999478693434564e-05, "loss": 2.308, "step": 12320 }, { "epoch": 0.16, "grad_norm": 4.494103908538818, "learning_rate": 1.999478354127014e-05, "loss": 2.4161, "step": 12321 }, { "epoch": 0.16, "grad_norm": 4.628570079803467, "learning_rate": 1.9994780147091045e-05, "loss": 2.1447, "step": 12322 }, { "epoch": 0.16, "grad_norm": 4.200405120849609, "learning_rate": 1.9994776751808356e-05, "loss": 2.3339, "step": 12323 }, { "epoch": 0.16, "grad_norm": 5.090634822845459, "learning_rate": 1.9994773355422072e-05, "loss": 2.3733, "step": 12324 }, { "epoch": 0.16, "grad_norm": 4.960862159729004, "learning_rate": 1.99947699579322e-05, "loss": 2.3439, "step": 12325 }, { "epoch": 0.16, "grad_norm": 4.401991367340088, "learning_rate": 1.9994766559338733e-05, "loss": 2.3382, "step": 12326 }, { "epoch": 0.16, "grad_norm": 5.277640342712402, "learning_rate": 1.9994763159641674e-05, "loss": 2.5656, "step": 12327 }, { "epoch": 0.16, "grad_norm": 5.1721625328063965, "learning_rate": 1.9994759758841024e-05, "loss": 2.5852, "step": 12328 }, { "epoch": 0.16, "grad_norm": 4.212710380554199, "learning_rate": 1.999475635693678e-05, "loss": 2.2045, "step": 12329 }, { "epoch": 0.16, "grad_norm": 5.206492900848389, "learning_rate": 1.999475295392895e-05, "loss": 2.4226, "step": 12330 }, { "epoch": 0.16, "grad_norm": 4.351473808288574, "learning_rate": 1.999474954981753e-05, "loss": 2.106, "step": 12331 }, { "epoch": 0.16, "grad_norm": 5.139228820800781, "learning_rate": 1.9994746144602516e-05, "loss": 2.0049, "step": 12332 }, { "epoch": 0.16, "grad_norm": 4.889039039611816, "learning_rate": 1.9994742738283914e-05, "loss": 2.1902, "step": 12333 }, { "epoch": 0.16, "grad_norm": 4.678363800048828, "learning_rate": 1.9994739330861725e-05, "loss": 2.5448, "step": 12334 }, { "epoch": 0.16, "grad_norm": 4.703399181365967, "learning_rate": 1.9994735922335947e-05, "loss": 2.7646, "step": 12335 }, { "epoch": 0.16, "grad_norm": 4.962507724761963, "learning_rate": 1.9994732512706577e-05, "loss": 2.7001, "step": 12336 }, { "epoch": 0.16, "grad_norm": 5.099864959716797, "learning_rate": 1.9994729101973622e-05, "loss": 2.4332, "step": 12337 }, { "epoch": 0.16, "grad_norm": 4.3174052238464355, "learning_rate": 1.9994725690137077e-05, "loss": 2.144, "step": 12338 }, { "epoch": 0.16, "grad_norm": 5.664813995361328, "learning_rate": 1.9994722277196947e-05, "loss": 2.5795, "step": 12339 }, { "epoch": 0.16, "grad_norm": 4.677440166473389, "learning_rate": 1.999471886315323e-05, "loss": 1.9573, "step": 12340 }, { "epoch": 0.16, "grad_norm": 4.702722072601318, "learning_rate": 1.9994715448005925e-05, "loss": 2.5216, "step": 12341 }, { "epoch": 0.16, "grad_norm": 4.947996139526367, "learning_rate": 1.9994712031755038e-05, "loss": 2.3903, "step": 12342 }, { "epoch": 0.16, "grad_norm": 4.947399616241455, "learning_rate": 1.999470861440056e-05, "loss": 2.304, "step": 12343 }, { "epoch": 0.16, "grad_norm": 4.064013957977295, "learning_rate": 1.9994705195942503e-05, "loss": 1.9495, "step": 12344 }, { "epoch": 0.16, "grad_norm": 4.271759033203125, "learning_rate": 1.9994701776380855e-05, "loss": 2.2647, "step": 12345 }, { "epoch": 0.16, "grad_norm": 4.899185657501221, "learning_rate": 1.999469835571562e-05, "loss": 2.3076, "step": 12346 }, { "epoch": 0.16, "grad_norm": 4.203751564025879, "learning_rate": 1.999469493394681e-05, "loss": 2.0006, "step": 12347 }, { "epoch": 0.16, "grad_norm": 4.228447437286377, "learning_rate": 1.999469151107441e-05, "loss": 2.3486, "step": 12348 }, { "epoch": 0.16, "grad_norm": 4.5286335945129395, "learning_rate": 1.9994688087098427e-05, "loss": 2.2966, "step": 12349 }, { "epoch": 0.16, "grad_norm": 4.895717144012451, "learning_rate": 1.999468466201886e-05, "loss": 2.2732, "step": 12350 }, { "epoch": 0.16, "grad_norm": 5.291358947753906, "learning_rate": 1.999468123583571e-05, "loss": 2.701, "step": 12351 }, { "epoch": 0.16, "grad_norm": 4.8841753005981445, "learning_rate": 1.9994677808548978e-05, "loss": 2.2237, "step": 12352 }, { "epoch": 0.16, "grad_norm": 4.619782447814941, "learning_rate": 1.9994674380158665e-05, "loss": 2.2455, "step": 12353 }, { "epoch": 0.16, "grad_norm": 4.9257354736328125, "learning_rate": 1.999467095066477e-05, "loss": 2.612, "step": 12354 }, { "epoch": 0.16, "grad_norm": 5.060259819030762, "learning_rate": 1.9994667520067296e-05, "loss": 2.7587, "step": 12355 }, { "epoch": 0.16, "grad_norm": 4.196366310119629, "learning_rate": 1.999466408836624e-05, "loss": 2.1604, "step": 12356 }, { "epoch": 0.16, "grad_norm": 4.533842086791992, "learning_rate": 1.99946606555616e-05, "loss": 2.4759, "step": 12357 }, { "epoch": 0.16, "grad_norm": 4.858612537384033, "learning_rate": 1.9994657221653387e-05, "loss": 2.3988, "step": 12358 }, { "epoch": 0.16, "grad_norm": 5.305985927581787, "learning_rate": 1.9994653786641587e-05, "loss": 2.6339, "step": 12359 }, { "epoch": 0.16, "grad_norm": 4.632864475250244, "learning_rate": 1.999465035052621e-05, "loss": 2.3179, "step": 12360 }, { "epoch": 0.16, "grad_norm": 3.8055295944213867, "learning_rate": 1.9994646913307257e-05, "loss": 1.8026, "step": 12361 }, { "epoch": 0.16, "grad_norm": 4.7155442237854, "learning_rate": 1.9994643474984724e-05, "loss": 1.6654, "step": 12362 }, { "epoch": 0.16, "grad_norm": 4.653791904449463, "learning_rate": 1.9994640035558612e-05, "loss": 2.1233, "step": 12363 }, { "epoch": 0.16, "grad_norm": 4.940434455871582, "learning_rate": 1.999463659502892e-05, "loss": 2.6324, "step": 12364 }, { "epoch": 0.16, "grad_norm": 5.4724626541137695, "learning_rate": 1.9994633153395654e-05, "loss": 2.6261, "step": 12365 }, { "epoch": 0.16, "grad_norm": 5.096858501434326, "learning_rate": 1.999462971065881e-05, "loss": 2.7609, "step": 12366 }, { "epoch": 0.16, "grad_norm": 4.002224922180176, "learning_rate": 1.9994626266818388e-05, "loss": 1.4546, "step": 12367 }, { "epoch": 0.16, "grad_norm": 4.292705535888672, "learning_rate": 1.9994622821874392e-05, "loss": 1.9906, "step": 12368 }, { "epoch": 0.16, "grad_norm": 4.678934097290039, "learning_rate": 1.9994619375826816e-05, "loss": 2.5099, "step": 12369 }, { "epoch": 0.16, "grad_norm": 5.158237934112549, "learning_rate": 1.999461592867567e-05, "loss": 2.8602, "step": 12370 }, { "epoch": 0.16, "grad_norm": 5.37701416015625, "learning_rate": 1.9994612480420946e-05, "loss": 2.8186, "step": 12371 }, { "epoch": 0.16, "grad_norm": 4.217406272888184, "learning_rate": 1.9994609031062648e-05, "loss": 1.7631, "step": 12372 }, { "epoch": 0.16, "grad_norm": 4.808892250061035, "learning_rate": 1.9994605580600774e-05, "loss": 2.2256, "step": 12373 }, { "epoch": 0.16, "grad_norm": 4.902188301086426, "learning_rate": 1.9994602129035328e-05, "loss": 2.2314, "step": 12374 }, { "epoch": 0.16, "grad_norm": 4.639349460601807, "learning_rate": 1.9994598676366306e-05, "loss": 2.1379, "step": 12375 }, { "epoch": 0.16, "grad_norm": 5.112982273101807, "learning_rate": 1.9994595222593712e-05, "loss": 2.2415, "step": 12376 }, { "epoch": 0.16, "grad_norm": 4.8795294761657715, "learning_rate": 1.9994591767717546e-05, "loss": 1.9727, "step": 12377 }, { "epoch": 0.16, "grad_norm": 4.823515892028809, "learning_rate": 1.9994588311737805e-05, "loss": 2.2019, "step": 12378 }, { "epoch": 0.16, "grad_norm": 4.041085720062256, "learning_rate": 1.9994584854654495e-05, "loss": 1.6022, "step": 12379 }, { "epoch": 0.16, "grad_norm": 5.16605806350708, "learning_rate": 1.9994581396467613e-05, "loss": 2.7266, "step": 12380 }, { "epoch": 0.16, "grad_norm": 4.488144874572754, "learning_rate": 1.999457793717716e-05, "loss": 1.9542, "step": 12381 }, { "epoch": 0.16, "grad_norm": 5.103454113006592, "learning_rate": 1.9994574476783132e-05, "loss": 2.4726, "step": 12382 }, { "epoch": 0.16, "grad_norm": 5.216377258300781, "learning_rate": 1.9994571015285533e-05, "loss": 2.6823, "step": 12383 }, { "epoch": 0.16, "grad_norm": 4.180167198181152, "learning_rate": 1.9994567552684366e-05, "loss": 2.0838, "step": 12384 }, { "epoch": 0.16, "grad_norm": 5.088379859924316, "learning_rate": 1.999456408897963e-05, "loss": 2.3319, "step": 12385 }, { "epoch": 0.16, "grad_norm": 5.033322811126709, "learning_rate": 1.9994560624171325e-05, "loss": 2.992, "step": 12386 }, { "epoch": 0.16, "grad_norm": 4.908443450927734, "learning_rate": 1.9994557158259452e-05, "loss": 2.3433, "step": 12387 }, { "epoch": 0.16, "grad_norm": 4.768362522125244, "learning_rate": 1.9994553691244006e-05, "loss": 2.4235, "step": 12388 }, { "epoch": 0.16, "grad_norm": 4.984334468841553, "learning_rate": 1.9994550223125e-05, "loss": 2.1047, "step": 12389 }, { "epoch": 0.16, "grad_norm": 4.496577739715576, "learning_rate": 1.999454675390242e-05, "loss": 2.1434, "step": 12390 }, { "epoch": 0.16, "grad_norm": 4.584146022796631, "learning_rate": 1.999454328357627e-05, "loss": 2.1706, "step": 12391 }, { "epoch": 0.16, "grad_norm": 4.864652633666992, "learning_rate": 1.9994539812146557e-05, "loss": 2.4266, "step": 12392 }, { "epoch": 0.16, "grad_norm": 4.951892375946045, "learning_rate": 1.9994536339613274e-05, "loss": 2.7265, "step": 12393 }, { "epoch": 0.16, "grad_norm": 3.9817049503326416, "learning_rate": 1.999453286597643e-05, "loss": 1.9953, "step": 12394 }, { "epoch": 0.16, "grad_norm": 5.085692405700684, "learning_rate": 1.9994529391236017e-05, "loss": 2.6037, "step": 12395 }, { "epoch": 0.16, "grad_norm": 4.908407688140869, "learning_rate": 1.9994525915392035e-05, "loss": 2.3899, "step": 12396 }, { "epoch": 0.16, "grad_norm": 4.714427471160889, "learning_rate": 1.999452243844449e-05, "loss": 2.1732, "step": 12397 }, { "epoch": 0.16, "grad_norm": 4.435211181640625, "learning_rate": 1.999451896039338e-05, "loss": 2.1517, "step": 12398 }, { "epoch": 0.16, "grad_norm": 4.409881591796875, "learning_rate": 1.999451548123871e-05, "loss": 2.1959, "step": 12399 }, { "epoch": 0.16, "grad_norm": 5.047558784484863, "learning_rate": 1.9994512000980474e-05, "loss": 2.3989, "step": 12400 }, { "epoch": 0.16, "grad_norm": 4.995269775390625, "learning_rate": 1.999450851961867e-05, "loss": 2.6346, "step": 12401 }, { "epoch": 0.16, "grad_norm": 4.341604232788086, "learning_rate": 1.9994505037153305e-05, "loss": 2.5179, "step": 12402 }, { "epoch": 0.16, "grad_norm": 5.151611804962158, "learning_rate": 1.999450155358438e-05, "loss": 2.8925, "step": 12403 }, { "epoch": 0.16, "grad_norm": 4.339518070220947, "learning_rate": 1.999449806891189e-05, "loss": 2.2027, "step": 12404 }, { "epoch": 0.16, "grad_norm": 5.198904991149902, "learning_rate": 1.9994494583135838e-05, "loss": 2.1642, "step": 12405 }, { "epoch": 0.16, "grad_norm": 4.757402420043945, "learning_rate": 1.9994491096256223e-05, "loss": 2.5135, "step": 12406 }, { "epoch": 0.16, "grad_norm": 4.4477643966674805, "learning_rate": 1.999448760827305e-05, "loss": 2.2338, "step": 12407 }, { "epoch": 0.16, "grad_norm": 4.484582901000977, "learning_rate": 1.999448411918631e-05, "loss": 1.9098, "step": 12408 }, { "epoch": 0.16, "grad_norm": 4.72814416885376, "learning_rate": 1.9994480628996013e-05, "loss": 2.3096, "step": 12409 }, { "epoch": 0.16, "grad_norm": 4.643751621246338, "learning_rate": 1.9994477137702157e-05, "loss": 2.6086, "step": 12410 }, { "epoch": 0.16, "grad_norm": 4.495089530944824, "learning_rate": 1.999447364530474e-05, "loss": 2.5534, "step": 12411 }, { "epoch": 0.16, "grad_norm": 5.0065016746521, "learning_rate": 1.9994470151803765e-05, "loss": 2.4221, "step": 12412 }, { "epoch": 0.16, "grad_norm": 4.542356491088867, "learning_rate": 1.999446665719923e-05, "loss": 1.91, "step": 12413 }, { "epoch": 0.16, "grad_norm": 3.92244553565979, "learning_rate": 1.9994463161491136e-05, "loss": 1.9657, "step": 12414 }, { "epoch": 0.16, "grad_norm": 4.507784366607666, "learning_rate": 1.999445966467948e-05, "loss": 2.1208, "step": 12415 }, { "epoch": 0.16, "grad_norm": 5.83372163772583, "learning_rate": 1.9994456166764273e-05, "loss": 2.9242, "step": 12416 }, { "epoch": 0.16, "grad_norm": 5.597304344177246, "learning_rate": 1.9994452667745507e-05, "loss": 2.6314, "step": 12417 }, { "epoch": 0.16, "grad_norm": 4.765246391296387, "learning_rate": 1.999444916762318e-05, "loss": 2.6295, "step": 12418 }, { "epoch": 0.16, "grad_norm": 5.009864330291748, "learning_rate": 1.99944456663973e-05, "loss": 2.1319, "step": 12419 }, { "epoch": 0.16, "grad_norm": 5.136650562286377, "learning_rate": 1.999444216406786e-05, "loss": 2.2321, "step": 12420 }, { "epoch": 0.16, "grad_norm": 4.647899627685547, "learning_rate": 1.9994438660634867e-05, "loss": 2.3932, "step": 12421 }, { "epoch": 0.16, "grad_norm": 4.636209487915039, "learning_rate": 1.9994435156098318e-05, "loss": 1.9133, "step": 12422 }, { "epoch": 0.16, "grad_norm": 4.35921573638916, "learning_rate": 1.9994431650458214e-05, "loss": 2.1251, "step": 12423 }, { "epoch": 0.16, "grad_norm": 5.061992645263672, "learning_rate": 1.999442814371455e-05, "loss": 2.747, "step": 12424 }, { "epoch": 0.16, "grad_norm": 4.137241840362549, "learning_rate": 1.9994424635867337e-05, "loss": 1.5622, "step": 12425 }, { "epoch": 0.16, "grad_norm": 4.846744060516357, "learning_rate": 1.999442112691657e-05, "loss": 2.152, "step": 12426 }, { "epoch": 0.16, "grad_norm": 4.496919631958008, "learning_rate": 1.999441761686225e-05, "loss": 2.4531, "step": 12427 }, { "epoch": 0.16, "grad_norm": 4.583195686340332, "learning_rate": 1.9994414105704373e-05, "loss": 1.9736, "step": 12428 }, { "epoch": 0.16, "grad_norm": 5.891076564788818, "learning_rate": 1.9994410593442945e-05, "loss": 2.5196, "step": 12429 }, { "epoch": 0.16, "grad_norm": 3.9811878204345703, "learning_rate": 1.9994407080077965e-05, "loss": 1.9528, "step": 12430 }, { "epoch": 0.16, "grad_norm": 4.473548889160156, "learning_rate": 1.9994403565609436e-05, "loss": 2.0673, "step": 12431 }, { "epoch": 0.16, "grad_norm": 4.475098609924316, "learning_rate": 1.999440005003735e-05, "loss": 2.2728, "step": 12432 }, { "epoch": 0.16, "grad_norm": 5.3032989501953125, "learning_rate": 1.9994396533361714e-05, "loss": 2.7325, "step": 12433 }, { "epoch": 0.16, "grad_norm": 4.646926403045654, "learning_rate": 1.999439301558253e-05, "loss": 2.1091, "step": 12434 }, { "epoch": 0.16, "grad_norm": 5.233576774597168, "learning_rate": 1.999438949669979e-05, "loss": 2.2158, "step": 12435 }, { "epoch": 0.16, "grad_norm": 5.023800373077393, "learning_rate": 1.9994385976713504e-05, "loss": 2.434, "step": 12436 }, { "epoch": 0.16, "grad_norm": 5.157833576202393, "learning_rate": 1.9994382455623668e-05, "loss": 2.9772, "step": 12437 }, { "epoch": 0.16, "grad_norm": 5.533669471740723, "learning_rate": 1.999437893343028e-05, "loss": 2.6296, "step": 12438 }, { "epoch": 0.16, "grad_norm": 4.4358229637146, "learning_rate": 1.9994375410133347e-05, "loss": 2.1887, "step": 12439 }, { "epoch": 0.16, "grad_norm": 5.019318580627441, "learning_rate": 1.9994371885732863e-05, "loss": 2.2689, "step": 12440 }, { "epoch": 0.16, "grad_norm": 5.043728351593018, "learning_rate": 1.999436836022883e-05, "loss": 2.6958, "step": 12441 }, { "epoch": 0.16, "grad_norm": 4.451081275939941, "learning_rate": 1.999436483362125e-05, "loss": 1.9042, "step": 12442 }, { "epoch": 0.16, "grad_norm": 5.1980414390563965, "learning_rate": 1.9994361305910123e-05, "loss": 2.6684, "step": 12443 }, { "epoch": 0.16, "grad_norm": 5.3092145919799805, "learning_rate": 1.999435777709545e-05, "loss": 2.8465, "step": 12444 }, { "epoch": 0.16, "grad_norm": 4.795688152313232, "learning_rate": 1.9994354247177228e-05, "loss": 2.4754, "step": 12445 }, { "epoch": 0.16, "grad_norm": 4.832864284515381, "learning_rate": 1.999435071615546e-05, "loss": 2.3061, "step": 12446 }, { "epoch": 0.16, "grad_norm": 4.537280082702637, "learning_rate": 1.9994347184030147e-05, "loss": 2.0515, "step": 12447 }, { "epoch": 0.16, "grad_norm": 4.489362716674805, "learning_rate": 1.999434365080129e-05, "loss": 2.5494, "step": 12448 }, { "epoch": 0.16, "grad_norm": 4.508523464202881, "learning_rate": 1.9994340116468886e-05, "loss": 2.1533, "step": 12449 }, { "epoch": 0.16, "grad_norm": 5.074887275695801, "learning_rate": 1.9994336581032937e-05, "loss": 2.7016, "step": 12450 }, { "epoch": 0.16, "grad_norm": 4.948628902435303, "learning_rate": 1.9994333044493444e-05, "loss": 2.2341, "step": 12451 }, { "epoch": 0.16, "grad_norm": 4.261110782623291, "learning_rate": 1.9994329506850406e-05, "loss": 2.1115, "step": 12452 }, { "epoch": 0.16, "grad_norm": 5.346983909606934, "learning_rate": 1.9994325968103826e-05, "loss": 2.478, "step": 12453 }, { "epoch": 0.16, "grad_norm": 4.988148212432861, "learning_rate": 1.99943224282537e-05, "loss": 2.3354, "step": 12454 }, { "epoch": 0.16, "grad_norm": 5.210200786590576, "learning_rate": 1.9994318887300038e-05, "loss": 2.2549, "step": 12455 }, { "epoch": 0.16, "grad_norm": 3.97902774810791, "learning_rate": 1.9994315345242827e-05, "loss": 1.9042, "step": 12456 }, { "epoch": 0.16, "grad_norm": 5.048275947570801, "learning_rate": 1.9994311802082077e-05, "loss": 2.5474, "step": 12457 }, { "epoch": 0.16, "grad_norm": 4.682741165161133, "learning_rate": 1.9994308257817786e-05, "loss": 2.1549, "step": 12458 }, { "epoch": 0.16, "grad_norm": 4.149623394012451, "learning_rate": 1.9994304712449956e-05, "loss": 2.1026, "step": 12459 }, { "epoch": 0.16, "grad_norm": 4.834197521209717, "learning_rate": 1.999430116597858e-05, "loss": 2.1485, "step": 12460 }, { "epoch": 0.16, "grad_norm": 4.360889434814453, "learning_rate": 1.9994297618403665e-05, "loss": 2.224, "step": 12461 }, { "epoch": 0.16, "grad_norm": 5.073518753051758, "learning_rate": 1.999429406972521e-05, "loss": 2.6475, "step": 12462 }, { "epoch": 0.16, "grad_norm": 3.9226112365722656, "learning_rate": 1.9994290519943215e-05, "loss": 1.7632, "step": 12463 }, { "epoch": 0.16, "grad_norm": 5.075344562530518, "learning_rate": 1.9994286969057684e-05, "loss": 2.5516, "step": 12464 }, { "epoch": 0.16, "grad_norm": 5.430224895477295, "learning_rate": 1.999428341706861e-05, "loss": 2.4208, "step": 12465 }, { "epoch": 0.16, "grad_norm": 4.508321762084961, "learning_rate": 1.9994279863976e-05, "loss": 2.3327, "step": 12466 }, { "epoch": 0.16, "grad_norm": 4.419991970062256, "learning_rate": 1.999427630977985e-05, "loss": 2.4509, "step": 12467 }, { "epoch": 0.16, "grad_norm": 4.840566635131836, "learning_rate": 1.9994272754480167e-05, "loss": 2.5252, "step": 12468 }, { "epoch": 0.16, "grad_norm": 5.242283344268799, "learning_rate": 1.9994269198076945e-05, "loss": 2.2885, "step": 12469 }, { "epoch": 0.16, "grad_norm": 5.528021335601807, "learning_rate": 1.9994265640570184e-05, "loss": 2.385, "step": 12470 }, { "epoch": 0.16, "grad_norm": 5.379014015197754, "learning_rate": 1.9994262081959886e-05, "loss": 2.4809, "step": 12471 }, { "epoch": 0.16, "grad_norm": 4.40098762512207, "learning_rate": 1.9994258522246052e-05, "loss": 2.3788, "step": 12472 }, { "epoch": 0.16, "grad_norm": 4.57075309753418, "learning_rate": 1.9994254961428684e-05, "loss": 2.3725, "step": 12473 }, { "epoch": 0.16, "grad_norm": 4.923068046569824, "learning_rate": 1.9994251399507777e-05, "loss": 2.3458, "step": 12474 }, { "epoch": 0.16, "grad_norm": 4.091531753540039, "learning_rate": 1.999424783648334e-05, "loss": 1.8451, "step": 12475 }, { "epoch": 0.16, "grad_norm": 4.417881965637207, "learning_rate": 1.9994244272355366e-05, "loss": 1.9799, "step": 12476 }, { "epoch": 0.16, "grad_norm": 4.962376594543457, "learning_rate": 1.999424070712386e-05, "loss": 2.3517, "step": 12477 }, { "epoch": 0.16, "grad_norm": 4.845852375030518, "learning_rate": 1.9994237140788816e-05, "loss": 2.3647, "step": 12478 }, { "epoch": 0.16, "grad_norm": 4.877926349639893, "learning_rate": 1.9994233573350242e-05, "loss": 2.2761, "step": 12479 }, { "epoch": 0.16, "grad_norm": 5.019411563873291, "learning_rate": 1.9994230004808136e-05, "loss": 2.6252, "step": 12480 }, { "epoch": 0.16, "grad_norm": 4.391210079193115, "learning_rate": 1.9994226435162496e-05, "loss": 2.0639, "step": 12481 }, { "epoch": 0.16, "grad_norm": 4.827850341796875, "learning_rate": 1.9994222864413324e-05, "loss": 2.3818, "step": 12482 }, { "epoch": 0.16, "grad_norm": 4.465351581573486, "learning_rate": 1.999421929256062e-05, "loss": 2.1593, "step": 12483 }, { "epoch": 0.16, "grad_norm": 4.034181594848633, "learning_rate": 1.9994215719604386e-05, "loss": 1.8694, "step": 12484 }, { "epoch": 0.16, "grad_norm": 4.376369953155518, "learning_rate": 1.999421214554462e-05, "loss": 2.1303, "step": 12485 }, { "epoch": 0.16, "grad_norm": 4.264467239379883, "learning_rate": 1.9994208570381322e-05, "loss": 1.9677, "step": 12486 }, { "epoch": 0.16, "grad_norm": 4.109961032867432, "learning_rate": 1.9994204994114496e-05, "loss": 1.8491, "step": 12487 }, { "epoch": 0.16, "grad_norm": 3.674642324447632, "learning_rate": 1.999420141674414e-05, "loss": 1.7214, "step": 12488 }, { "epoch": 0.16, "grad_norm": 5.233423709869385, "learning_rate": 1.9994197838270253e-05, "loss": 2.5153, "step": 12489 }, { "epoch": 0.16, "grad_norm": 3.8499579429626465, "learning_rate": 1.9994194258692837e-05, "loss": 2.1382, "step": 12490 }, { "epoch": 0.16, "grad_norm": 4.9915385246276855, "learning_rate": 1.9994190678011892e-05, "loss": 2.4792, "step": 12491 }, { "epoch": 0.16, "grad_norm": 5.477427005767822, "learning_rate": 1.999418709622742e-05, "loss": 2.637, "step": 12492 }, { "epoch": 0.16, "grad_norm": 4.93066930770874, "learning_rate": 1.999418351333942e-05, "loss": 2.7091, "step": 12493 }, { "epoch": 0.16, "grad_norm": 3.812260866165161, "learning_rate": 1.9994179929347895e-05, "loss": 1.6831, "step": 12494 }, { "epoch": 0.16, "grad_norm": 4.538064002990723, "learning_rate": 1.9994176344252838e-05, "loss": 2.1925, "step": 12495 }, { "epoch": 0.16, "grad_norm": 4.625162601470947, "learning_rate": 1.9994172758054256e-05, "loss": 1.8997, "step": 12496 }, { "epoch": 0.16, "grad_norm": 4.713883399963379, "learning_rate": 1.999416917075215e-05, "loss": 2.2605, "step": 12497 }, { "epoch": 0.16, "grad_norm": 4.495133399963379, "learning_rate": 1.9994165582346515e-05, "loss": 2.357, "step": 12498 }, { "epoch": 0.16, "grad_norm": 5.098232269287109, "learning_rate": 1.999416199283736e-05, "loss": 2.8815, "step": 12499 }, { "epoch": 0.16, "grad_norm": 5.255323886871338, "learning_rate": 1.999415840222468e-05, "loss": 2.7404, "step": 12500 }, { "epoch": 0.16, "grad_norm": 5.33528995513916, "learning_rate": 1.999415481050847e-05, "loss": 2.1793, "step": 12501 }, { "epoch": 0.16, "grad_norm": 5.0780463218688965, "learning_rate": 1.9994151217688736e-05, "loss": 2.0799, "step": 12502 }, { "epoch": 0.16, "grad_norm": 4.6171674728393555, "learning_rate": 1.999414762376548e-05, "loss": 2.317, "step": 12503 }, { "epoch": 0.16, "grad_norm": 5.3651123046875, "learning_rate": 1.99941440287387e-05, "loss": 2.9469, "step": 12504 }, { "epoch": 0.16, "grad_norm": 4.169868469238281, "learning_rate": 1.9994140432608398e-05, "loss": 2.1602, "step": 12505 }, { "epoch": 0.16, "grad_norm": 4.444197654724121, "learning_rate": 1.9994136835374573e-05, "loss": 2.2383, "step": 12506 }, { "epoch": 0.16, "grad_norm": 4.269342422485352, "learning_rate": 1.9994133237037223e-05, "loss": 2.2471, "step": 12507 }, { "epoch": 0.16, "grad_norm": 4.871679306030273, "learning_rate": 1.9994129637596355e-05, "loss": 2.5138, "step": 12508 }, { "epoch": 0.16, "grad_norm": 4.833482265472412, "learning_rate": 1.9994126037051966e-05, "loss": 2.3288, "step": 12509 }, { "epoch": 0.16, "grad_norm": 4.40289831161499, "learning_rate": 1.999412243540405e-05, "loss": 1.7553, "step": 12510 }, { "epoch": 0.16, "grad_norm": 4.52134370803833, "learning_rate": 1.999411883265262e-05, "loss": 2.0443, "step": 12511 }, { "epoch": 0.16, "grad_norm": 4.354568004608154, "learning_rate": 1.9994115228797666e-05, "loss": 1.9337, "step": 12512 }, { "epoch": 0.16, "grad_norm": 4.384368896484375, "learning_rate": 1.9994111623839195e-05, "loss": 2.3476, "step": 12513 }, { "epoch": 0.16, "grad_norm": 5.385881423950195, "learning_rate": 1.99941080177772e-05, "loss": 2.6614, "step": 12514 }, { "epoch": 0.16, "grad_norm": 4.659101963043213, "learning_rate": 1.999410441061169e-05, "loss": 2.2367, "step": 12515 }, { "epoch": 0.16, "grad_norm": 4.573820114135742, "learning_rate": 1.999410080234266e-05, "loss": 1.6302, "step": 12516 }, { "epoch": 0.16, "grad_norm": 5.53462028503418, "learning_rate": 1.999409719297011e-05, "loss": 2.3277, "step": 12517 }, { "epoch": 0.16, "grad_norm": 5.68405294418335, "learning_rate": 1.9994093582494044e-05, "loss": 2.5125, "step": 12518 }, { "epoch": 0.16, "grad_norm": 4.04881477355957, "learning_rate": 1.9994089970914462e-05, "loss": 1.6286, "step": 12519 }, { "epoch": 0.16, "grad_norm": 3.7229361534118652, "learning_rate": 1.999408635823136e-05, "loss": 1.7273, "step": 12520 }, { "epoch": 0.16, "grad_norm": 5.61466121673584, "learning_rate": 1.999408274444474e-05, "loss": 2.607, "step": 12521 }, { "epoch": 0.16, "grad_norm": 5.271885395050049, "learning_rate": 1.999407912955461e-05, "loss": 2.4092, "step": 12522 }, { "epoch": 0.16, "grad_norm": 3.6377270221710205, "learning_rate": 1.999407551356096e-05, "loss": 1.7982, "step": 12523 }, { "epoch": 0.16, "grad_norm": 4.882618427276611, "learning_rate": 1.9994071896463794e-05, "loss": 2.2944, "step": 12524 }, { "epoch": 0.16, "grad_norm": 3.762986183166504, "learning_rate": 1.9994068278263115e-05, "loss": 2.0314, "step": 12525 }, { "epoch": 0.16, "grad_norm": 5.483335018157959, "learning_rate": 1.999406465895892e-05, "loss": 2.5281, "step": 12526 }, { "epoch": 0.16, "grad_norm": 4.4760050773620605, "learning_rate": 1.9994061038551213e-05, "loss": 2.0976, "step": 12527 }, { "epoch": 0.16, "grad_norm": 4.489151954650879, "learning_rate": 1.9994057417039986e-05, "loss": 2.1123, "step": 12528 }, { "epoch": 0.16, "grad_norm": 4.376626968383789, "learning_rate": 1.999405379442525e-05, "loss": 1.8824, "step": 12529 }, { "epoch": 0.16, "grad_norm": 4.685856819152832, "learning_rate": 1.9994050170707e-05, "loss": 2.2664, "step": 12530 }, { "epoch": 0.16, "grad_norm": 4.632082939147949, "learning_rate": 1.999404654588524e-05, "loss": 2.3045, "step": 12531 }, { "epoch": 0.16, "grad_norm": 4.799190521240234, "learning_rate": 1.9994042919959963e-05, "loss": 1.9611, "step": 12532 }, { "epoch": 0.16, "grad_norm": 4.163972854614258, "learning_rate": 1.9994039292931178e-05, "loss": 1.958, "step": 12533 }, { "epoch": 0.16, "grad_norm": 4.644217491149902, "learning_rate": 1.9994035664798882e-05, "loss": 2.1265, "step": 12534 }, { "epoch": 0.16, "grad_norm": 4.484249591827393, "learning_rate": 1.999403203556307e-05, "loss": 2.235, "step": 12535 }, { "epoch": 0.16, "grad_norm": 5.592034339904785, "learning_rate": 1.9994028405223752e-05, "loss": 2.3829, "step": 12536 }, { "epoch": 0.16, "grad_norm": 5.1984171867370605, "learning_rate": 1.9994024773780922e-05, "loss": 2.3814, "step": 12537 }, { "epoch": 0.16, "grad_norm": 5.367483139038086, "learning_rate": 1.999402114123458e-05, "loss": 2.6949, "step": 12538 }, { "epoch": 0.16, "grad_norm": 4.309726238250732, "learning_rate": 1.999401750758473e-05, "loss": 2.064, "step": 12539 }, { "epoch": 0.16, "grad_norm": 5.658254146575928, "learning_rate": 1.9994013872831373e-05, "loss": 2.5554, "step": 12540 }, { "epoch": 0.16, "grad_norm": 5.249820709228516, "learning_rate": 1.9994010236974505e-05, "loss": 2.0771, "step": 12541 }, { "epoch": 0.16, "grad_norm": 4.585327625274658, "learning_rate": 1.999400660001413e-05, "loss": 2.2208, "step": 12542 }, { "epoch": 0.16, "grad_norm": 4.705560207366943, "learning_rate": 1.9994002961950247e-05, "loss": 2.49, "step": 12543 }, { "epoch": 0.16, "grad_norm": 5.108259677886963, "learning_rate": 1.9993999322782856e-05, "loss": 2.308, "step": 12544 }, { "epoch": 0.16, "grad_norm": 4.4619245529174805, "learning_rate": 1.999399568251196e-05, "loss": 2.1123, "step": 12545 }, { "epoch": 0.16, "grad_norm": 4.6759538650512695, "learning_rate": 1.9993992041137554e-05, "loss": 2.4177, "step": 12546 }, { "epoch": 0.16, "grad_norm": 4.38537073135376, "learning_rate": 1.9993988398659644e-05, "loss": 1.9137, "step": 12547 }, { "epoch": 0.16, "grad_norm": 4.618974208831787, "learning_rate": 1.9993984755078225e-05, "loss": 2.1312, "step": 12548 }, { "epoch": 0.16, "grad_norm": 5.2394585609436035, "learning_rate": 1.9993981110393303e-05, "loss": 2.5484, "step": 12549 }, { "epoch": 0.16, "grad_norm": 4.616937637329102, "learning_rate": 1.9993977464604875e-05, "loss": 2.2764, "step": 12550 }, { "epoch": 0.16, "grad_norm": 4.698223114013672, "learning_rate": 1.9993973817712944e-05, "loss": 2.122, "step": 12551 }, { "epoch": 0.16, "grad_norm": 4.867253303527832, "learning_rate": 1.9993970169717508e-05, "loss": 2.4752, "step": 12552 }, { "epoch": 0.16, "grad_norm": 4.7960100173950195, "learning_rate": 1.9993966520618568e-05, "loss": 2.2361, "step": 12553 }, { "epoch": 0.16, "grad_norm": 5.439713001251221, "learning_rate": 1.9993962870416123e-05, "loss": 2.4089, "step": 12554 }, { "epoch": 0.16, "grad_norm": 4.409394264221191, "learning_rate": 1.9993959219110178e-05, "loss": 2.1007, "step": 12555 }, { "epoch": 0.16, "grad_norm": 4.667548656463623, "learning_rate": 1.9993955566700728e-05, "loss": 1.6345, "step": 12556 }, { "epoch": 0.16, "grad_norm": 4.737171649932861, "learning_rate": 1.9993951913187777e-05, "loss": 2.4464, "step": 12557 }, { "epoch": 0.16, "grad_norm": 4.302194118499756, "learning_rate": 1.999394825857132e-05, "loss": 2.4584, "step": 12558 }, { "epoch": 0.16, "grad_norm": 5.224601745605469, "learning_rate": 1.999394460285137e-05, "loss": 2.4736, "step": 12559 }, { "epoch": 0.16, "grad_norm": 4.671046733856201, "learning_rate": 1.999394094602791e-05, "loss": 2.2829, "step": 12560 }, { "epoch": 0.16, "grad_norm": 5.164055824279785, "learning_rate": 1.9993937288100957e-05, "loss": 2.5354, "step": 12561 }, { "epoch": 0.16, "grad_norm": 4.3790364265441895, "learning_rate": 1.9993933629070498e-05, "loss": 2.2507, "step": 12562 }, { "epoch": 0.16, "grad_norm": 4.142065048217773, "learning_rate": 1.9993929968936544e-05, "loss": 2.0236, "step": 12563 }, { "epoch": 0.16, "grad_norm": 4.937648296356201, "learning_rate": 1.9993926307699087e-05, "loss": 2.1724, "step": 12564 }, { "epoch": 0.16, "grad_norm": 4.7081804275512695, "learning_rate": 1.999392264535813e-05, "loss": 2.5023, "step": 12565 }, { "epoch": 0.16, "grad_norm": 4.652537822723389, "learning_rate": 1.999391898191368e-05, "loss": 2.353, "step": 12566 }, { "epoch": 0.16, "grad_norm": 4.2691969871521, "learning_rate": 1.9993915317365728e-05, "loss": 2.0931, "step": 12567 }, { "epoch": 0.16, "grad_norm": 4.853679656982422, "learning_rate": 1.999391165171428e-05, "loss": 2.3172, "step": 12568 }, { "epoch": 0.16, "grad_norm": 4.73844575881958, "learning_rate": 1.999390798495933e-05, "loss": 2.6162, "step": 12569 }, { "epoch": 0.16, "grad_norm": 5.409597873687744, "learning_rate": 1.9993904317100888e-05, "loss": 2.3677, "step": 12570 }, { "epoch": 0.16, "grad_norm": 4.439749240875244, "learning_rate": 1.9993900648138947e-05, "loss": 1.8718, "step": 12571 }, { "epoch": 0.16, "grad_norm": 5.63251256942749, "learning_rate": 1.999389697807351e-05, "loss": 3.0994, "step": 12572 }, { "epoch": 0.16, "grad_norm": 4.491039752960205, "learning_rate": 1.9993893306904576e-05, "loss": 1.9597, "step": 12573 }, { "epoch": 0.16, "grad_norm": 5.347276210784912, "learning_rate": 1.999388963463215e-05, "loss": 2.6835, "step": 12574 }, { "epoch": 0.16, "grad_norm": 5.195314407348633, "learning_rate": 1.999388596125623e-05, "loss": 2.7141, "step": 12575 }, { "epoch": 0.16, "grad_norm": 4.165405750274658, "learning_rate": 1.999388228677681e-05, "loss": 2.0218, "step": 12576 }, { "epoch": 0.16, "grad_norm": 4.71677827835083, "learning_rate": 1.9993878611193897e-05, "loss": 2.6309, "step": 12577 }, { "epoch": 0.16, "grad_norm": 5.695891857147217, "learning_rate": 1.9993874934507494e-05, "loss": 2.57, "step": 12578 }, { "epoch": 0.16, "grad_norm": 4.7454938888549805, "learning_rate": 1.9993871256717593e-05, "loss": 2.4511, "step": 12579 }, { "epoch": 0.16, "grad_norm": 4.906606197357178, "learning_rate": 1.9993867577824204e-05, "loss": 2.653, "step": 12580 }, { "epoch": 0.16, "grad_norm": 4.982384204864502, "learning_rate": 1.9993863897827318e-05, "loss": 2.5566, "step": 12581 }, { "epoch": 0.16, "grad_norm": 4.612549781799316, "learning_rate": 1.9993860216726945e-05, "loss": 2.3042, "step": 12582 }, { "epoch": 0.16, "grad_norm": 5.698512554168701, "learning_rate": 1.9993856534523074e-05, "loss": 2.6585, "step": 12583 }, { "epoch": 0.16, "grad_norm": 5.18314790725708, "learning_rate": 1.9993852851215715e-05, "loss": 2.9273, "step": 12584 }, { "epoch": 0.16, "grad_norm": 4.475957870483398, "learning_rate": 1.9993849166804866e-05, "loss": 2.4173, "step": 12585 }, { "epoch": 0.16, "grad_norm": 4.486523628234863, "learning_rate": 1.9993845481290526e-05, "loss": 2.2286, "step": 12586 }, { "epoch": 0.16, "grad_norm": 5.239188194274902, "learning_rate": 1.9993841794672695e-05, "loss": 2.6377, "step": 12587 }, { "epoch": 0.16, "grad_norm": 4.304327487945557, "learning_rate": 1.9993838106951373e-05, "loss": 2.1325, "step": 12588 }, { "epoch": 0.16, "grad_norm": 5.033464431762695, "learning_rate": 1.9993834418126564e-05, "loss": 2.1272, "step": 12589 }, { "epoch": 0.16, "grad_norm": 4.794559955596924, "learning_rate": 1.9993830728198267e-05, "loss": 2.8019, "step": 12590 }, { "epoch": 0.16, "grad_norm": 4.349186897277832, "learning_rate": 1.999382703716648e-05, "loss": 2.0912, "step": 12591 }, { "epoch": 0.16, "grad_norm": 4.713449478149414, "learning_rate": 1.9993823345031205e-05, "loss": 3.0602, "step": 12592 }, { "epoch": 0.16, "grad_norm": 4.444426536560059, "learning_rate": 1.9993819651792443e-05, "loss": 2.2179, "step": 12593 }, { "epoch": 0.16, "grad_norm": 4.615180492401123, "learning_rate": 1.999381595745019e-05, "loss": 2.0298, "step": 12594 }, { "epoch": 0.16, "grad_norm": 4.185062408447266, "learning_rate": 1.9993812262004456e-05, "loss": 1.7989, "step": 12595 }, { "epoch": 0.16, "grad_norm": 5.229289531707764, "learning_rate": 1.999380856545523e-05, "loss": 2.2741, "step": 12596 }, { "epoch": 0.16, "grad_norm": 4.768967628479004, "learning_rate": 1.9993804867802523e-05, "loss": 2.1692, "step": 12597 }, { "epoch": 0.16, "grad_norm": 4.031741142272949, "learning_rate": 1.9993801169046327e-05, "loss": 2.0318, "step": 12598 }, { "epoch": 0.16, "grad_norm": 4.9786601066589355, "learning_rate": 1.9993797469186647e-05, "loss": 2.264, "step": 12599 }, { "epoch": 0.16, "grad_norm": 4.639925956726074, "learning_rate": 1.9993793768223483e-05, "loss": 2.2865, "step": 12600 }, { "epoch": 0.16, "grad_norm": 4.515322208404541, "learning_rate": 1.9993790066156835e-05, "loss": 2.2134, "step": 12601 }, { "epoch": 0.16, "grad_norm": 4.546453475952148, "learning_rate": 1.99937863629867e-05, "loss": 2.5625, "step": 12602 }, { "epoch": 0.16, "grad_norm": 4.427395820617676, "learning_rate": 1.9993782658713083e-05, "loss": 2.0522, "step": 12603 }, { "epoch": 0.16, "grad_norm": 4.493978500366211, "learning_rate": 1.9993778953335983e-05, "loss": 2.4858, "step": 12604 }, { "epoch": 0.16, "grad_norm": 4.2305121421813965, "learning_rate": 1.99937752468554e-05, "loss": 2.0033, "step": 12605 }, { "epoch": 0.16, "grad_norm": 4.350450038909912, "learning_rate": 1.9993771539271338e-05, "loss": 1.7603, "step": 12606 }, { "epoch": 0.16, "grad_norm": 4.323578834533691, "learning_rate": 1.999376783058379e-05, "loss": 2.346, "step": 12607 }, { "epoch": 0.16, "grad_norm": 4.831646919250488, "learning_rate": 1.999376412079276e-05, "loss": 2.5227, "step": 12608 }, { "epoch": 0.16, "grad_norm": 4.884199619293213, "learning_rate": 1.9993760409898253e-05, "loss": 2.3212, "step": 12609 }, { "epoch": 0.16, "grad_norm": 4.543961524963379, "learning_rate": 1.9993756697900262e-05, "loss": 2.3802, "step": 12610 }, { "epoch": 0.16, "grad_norm": 4.896425247192383, "learning_rate": 1.9993752984798794e-05, "loss": 2.1974, "step": 12611 }, { "epoch": 0.16, "grad_norm": 5.30673885345459, "learning_rate": 1.9993749270593842e-05, "loss": 2.6737, "step": 12612 }, { "epoch": 0.16, "grad_norm": 4.396398544311523, "learning_rate": 1.999374555528541e-05, "loss": 2.4179, "step": 12613 }, { "epoch": 0.16, "grad_norm": 5.89572811126709, "learning_rate": 1.9993741838873503e-05, "loss": 3.0056, "step": 12614 }, { "epoch": 0.16, "grad_norm": 5.169562339782715, "learning_rate": 1.9993738121358116e-05, "loss": 3.0103, "step": 12615 }, { "epoch": 0.16, "grad_norm": 4.748502254486084, "learning_rate": 1.9993734402739248e-05, "loss": 2.3907, "step": 12616 }, { "epoch": 0.16, "grad_norm": 4.5222039222717285, "learning_rate": 1.9993730683016906e-05, "loss": 2.1548, "step": 12617 }, { "epoch": 0.16, "grad_norm": 5.427084445953369, "learning_rate": 1.9993726962191084e-05, "loss": 2.9057, "step": 12618 }, { "epoch": 0.16, "grad_norm": 4.647733211517334, "learning_rate": 1.9993723240261788e-05, "loss": 2.4524, "step": 12619 }, { "epoch": 0.16, "grad_norm": 4.305237293243408, "learning_rate": 1.999371951722901e-05, "loss": 2.2441, "step": 12620 }, { "epoch": 0.16, "grad_norm": 4.253269195556641, "learning_rate": 1.9993715793092763e-05, "loss": 2.0216, "step": 12621 }, { "epoch": 0.16, "grad_norm": 3.9381375312805176, "learning_rate": 1.9993712067853035e-05, "loss": 1.8601, "step": 12622 }, { "epoch": 0.16, "grad_norm": 3.945206642150879, "learning_rate": 1.9993708341509834e-05, "loss": 1.9562, "step": 12623 }, { "epoch": 0.16, "grad_norm": 6.052948474884033, "learning_rate": 1.9993704614063158e-05, "loss": 2.6704, "step": 12624 }, { "epoch": 0.16, "grad_norm": 4.948209762573242, "learning_rate": 1.9993700885513005e-05, "loss": 2.6027, "step": 12625 }, { "epoch": 0.16, "grad_norm": 4.598477840423584, "learning_rate": 1.9993697155859378e-05, "loss": 2.0867, "step": 12626 }, { "epoch": 0.16, "grad_norm": 4.455875396728516, "learning_rate": 1.999369342510228e-05, "loss": 2.0952, "step": 12627 }, { "epoch": 0.16, "grad_norm": 5.200475215911865, "learning_rate": 1.999368969324171e-05, "loss": 2.3247, "step": 12628 }, { "epoch": 0.16, "grad_norm": 4.590578556060791, "learning_rate": 1.9993685960277665e-05, "loss": 2.5443, "step": 12629 }, { "epoch": 0.16, "grad_norm": 4.284414768218994, "learning_rate": 1.9993682226210146e-05, "loss": 2.251, "step": 12630 }, { "epoch": 0.16, "grad_norm": 5.5188517570495605, "learning_rate": 1.9993678491039156e-05, "loss": 2.772, "step": 12631 }, { "epoch": 0.16, "grad_norm": 4.254933834075928, "learning_rate": 1.9993674754764693e-05, "loss": 1.7907, "step": 12632 }, { "epoch": 0.16, "grad_norm": 4.7841267585754395, "learning_rate": 1.9993671017386763e-05, "loss": 2.2032, "step": 12633 }, { "epoch": 0.16, "grad_norm": 4.664913654327393, "learning_rate": 1.999366727890536e-05, "loss": 2.5962, "step": 12634 }, { "epoch": 0.16, "grad_norm": 5.272148132324219, "learning_rate": 1.9993663539320484e-05, "loss": 2.3701, "step": 12635 }, { "epoch": 0.16, "grad_norm": 4.830695152282715, "learning_rate": 1.999365979863214e-05, "loss": 2.2832, "step": 12636 }, { "epoch": 0.16, "grad_norm": 5.102426052093506, "learning_rate": 1.9993656056840327e-05, "loss": 2.4554, "step": 12637 }, { "epoch": 0.16, "grad_norm": 4.858977317810059, "learning_rate": 1.9993652313945044e-05, "loss": 2.1466, "step": 12638 }, { "epoch": 0.16, "grad_norm": 4.950339317321777, "learning_rate": 1.999364856994629e-05, "loss": 2.5897, "step": 12639 }, { "epoch": 0.16, "grad_norm": 4.670877933502197, "learning_rate": 1.999364482484407e-05, "loss": 2.4607, "step": 12640 }, { "epoch": 0.16, "grad_norm": 5.074166297912598, "learning_rate": 1.9993641078638383e-05, "loss": 2.3305, "step": 12641 }, { "epoch": 0.16, "grad_norm": 5.187329292297363, "learning_rate": 1.999363733132923e-05, "loss": 2.5127, "step": 12642 }, { "epoch": 0.16, "grad_norm": 4.342235565185547, "learning_rate": 1.9993633582916607e-05, "loss": 2.4475, "step": 12643 }, { "epoch": 0.16, "grad_norm": 4.612916946411133, "learning_rate": 1.9993629833400515e-05, "loss": 2.557, "step": 12644 }, { "epoch": 0.16, "grad_norm": 4.717013835906982, "learning_rate": 1.9993626082780963e-05, "loss": 2.3364, "step": 12645 }, { "epoch": 0.16, "grad_norm": 4.6684770584106445, "learning_rate": 1.999362233105794e-05, "loss": 2.3095, "step": 12646 }, { "epoch": 0.16, "grad_norm": 5.4456868171691895, "learning_rate": 1.9993618578231453e-05, "loss": 2.6883, "step": 12647 }, { "epoch": 0.16, "grad_norm": 4.535741806030273, "learning_rate": 1.9993614824301503e-05, "loss": 2.3009, "step": 12648 }, { "epoch": 0.16, "grad_norm": 4.163057327270508, "learning_rate": 1.9993611069268086e-05, "loss": 1.6927, "step": 12649 }, { "epoch": 0.16, "grad_norm": 4.556146144866943, "learning_rate": 1.9993607313131205e-05, "loss": 2.0698, "step": 12650 }, { "epoch": 0.16, "grad_norm": 4.678808212280273, "learning_rate": 1.999360355589086e-05, "loss": 2.6896, "step": 12651 }, { "epoch": 0.16, "grad_norm": 4.658749580383301, "learning_rate": 1.999359979754705e-05, "loss": 2.7014, "step": 12652 }, { "epoch": 0.16, "grad_norm": 5.173757553100586, "learning_rate": 1.9993596038099783e-05, "loss": 2.5287, "step": 12653 }, { "epoch": 0.16, "grad_norm": 4.691448211669922, "learning_rate": 1.999359227754905e-05, "loss": 2.4823, "step": 12654 }, { "epoch": 0.16, "grad_norm": 5.440535068511963, "learning_rate": 1.9993588515894854e-05, "loss": 2.5083, "step": 12655 }, { "epoch": 0.16, "grad_norm": 4.438226222991943, "learning_rate": 1.9993584753137198e-05, "loss": 1.954, "step": 12656 }, { "epoch": 0.16, "grad_norm": 4.570897102355957, "learning_rate": 1.9993580989276078e-05, "loss": 2.176, "step": 12657 }, { "epoch": 0.16, "grad_norm": 4.481299877166748, "learning_rate": 1.9993577224311497e-05, "loss": 2.0274, "step": 12658 }, { "epoch": 0.16, "grad_norm": 4.4496660232543945, "learning_rate": 1.999357345824346e-05, "loss": 2.268, "step": 12659 }, { "epoch": 0.16, "grad_norm": 4.962306499481201, "learning_rate": 1.999356969107196e-05, "loss": 2.2411, "step": 12660 }, { "epoch": 0.16, "grad_norm": 4.845116138458252, "learning_rate": 1.9993565922796997e-05, "loss": 2.2764, "step": 12661 }, { "epoch": 0.16, "grad_norm": 4.444343566894531, "learning_rate": 1.999356215341858e-05, "loss": 2.2193, "step": 12662 }, { "epoch": 0.16, "grad_norm": 4.707401752471924, "learning_rate": 1.9993558382936704e-05, "loss": 1.9794, "step": 12663 }, { "epoch": 0.16, "grad_norm": 4.634373188018799, "learning_rate": 1.9993554611351368e-05, "loss": 2.2819, "step": 12664 }, { "epoch": 0.16, "grad_norm": 4.796378135681152, "learning_rate": 1.9993550838662572e-05, "loss": 1.8998, "step": 12665 }, { "epoch": 0.16, "grad_norm": 4.592530250549316, "learning_rate": 1.999354706487032e-05, "loss": 2.205, "step": 12666 }, { "epoch": 0.16, "grad_norm": 4.944199085235596, "learning_rate": 1.9993543289974613e-05, "loss": 2.5661, "step": 12667 }, { "epoch": 0.16, "grad_norm": 4.396770477294922, "learning_rate": 1.9993539513975446e-05, "loss": 2.2491, "step": 12668 }, { "epoch": 0.16, "grad_norm": 4.5920186042785645, "learning_rate": 1.9993535736872826e-05, "loss": 2.535, "step": 12669 }, { "epoch": 0.16, "grad_norm": 4.857746601104736, "learning_rate": 1.999353195866675e-05, "loss": 2.6007, "step": 12670 }, { "epoch": 0.16, "grad_norm": 4.606523036956787, "learning_rate": 1.9993528179357215e-05, "loss": 2.4425, "step": 12671 }, { "epoch": 0.16, "grad_norm": 4.7066168785095215, "learning_rate": 1.9993524398944227e-05, "loss": 2.2395, "step": 12672 }, { "epoch": 0.16, "grad_norm": 4.727542400360107, "learning_rate": 1.9993520617427786e-05, "loss": 2.4382, "step": 12673 }, { "epoch": 0.16, "grad_norm": 4.8864264488220215, "learning_rate": 1.9993516834807888e-05, "loss": 2.497, "step": 12674 }, { "epoch": 0.16, "grad_norm": 3.945115566253662, "learning_rate": 1.9993513051084537e-05, "loss": 1.752, "step": 12675 }, { "epoch": 0.16, "grad_norm": 4.732886791229248, "learning_rate": 1.9993509266257735e-05, "loss": 2.3595, "step": 12676 }, { "epoch": 0.16, "grad_norm": 4.133309364318848, "learning_rate": 1.9993505480327476e-05, "loss": 1.688, "step": 12677 }, { "epoch": 0.16, "grad_norm": 4.430782318115234, "learning_rate": 1.9993501693293767e-05, "loss": 2.4381, "step": 12678 }, { "epoch": 0.16, "grad_norm": 4.419760704040527, "learning_rate": 1.999349790515661e-05, "loss": 2.1791, "step": 12679 }, { "epoch": 0.16, "grad_norm": 4.409965515136719, "learning_rate": 1.9993494115915992e-05, "loss": 2.2264, "step": 12680 }, { "epoch": 0.16, "grad_norm": 4.666378974914551, "learning_rate": 1.9993490325571926e-05, "loss": 2.2707, "step": 12681 }, { "epoch": 0.16, "grad_norm": 4.445356369018555, "learning_rate": 1.999348653412441e-05, "loss": 2.3123, "step": 12682 }, { "epoch": 0.16, "grad_norm": 4.58917236328125, "learning_rate": 1.9993482741573444e-05, "loss": 2.4285, "step": 12683 }, { "epoch": 0.16, "grad_norm": 4.480139255523682, "learning_rate": 1.999347894791903e-05, "loss": 2.3115, "step": 12684 }, { "epoch": 0.16, "grad_norm": 3.973433017730713, "learning_rate": 1.9993475153161164e-05, "loss": 1.7674, "step": 12685 }, { "epoch": 0.16, "grad_norm": 4.523685455322266, "learning_rate": 1.999347135729985e-05, "loss": 2.4359, "step": 12686 }, { "epoch": 0.16, "grad_norm": 5.063723087310791, "learning_rate": 1.9993467560335084e-05, "loss": 2.4664, "step": 12687 }, { "epoch": 0.16, "grad_norm": 4.478879928588867, "learning_rate": 1.9993463762266874e-05, "loss": 1.8866, "step": 12688 }, { "epoch": 0.16, "grad_norm": 4.525343894958496, "learning_rate": 1.9993459963095213e-05, "loss": 1.8858, "step": 12689 }, { "epoch": 0.16, "grad_norm": 4.427163124084473, "learning_rate": 1.9993456162820106e-05, "loss": 2.1511, "step": 12690 }, { "epoch": 0.16, "grad_norm": 4.843690872192383, "learning_rate": 1.9993452361441552e-05, "loss": 2.3664, "step": 12691 }, { "epoch": 0.16, "grad_norm": 4.896684646606445, "learning_rate": 1.999344855895955e-05, "loss": 2.4877, "step": 12692 }, { "epoch": 0.16, "grad_norm": 4.228965759277344, "learning_rate": 1.9993444755374104e-05, "loss": 2.1707, "step": 12693 }, { "epoch": 0.16, "grad_norm": 4.373833656311035, "learning_rate": 1.999344095068521e-05, "loss": 1.6902, "step": 12694 }, { "epoch": 0.16, "grad_norm": 5.425629615783691, "learning_rate": 1.9993437144892872e-05, "loss": 2.5107, "step": 12695 }, { "epoch": 0.16, "grad_norm": 4.494009494781494, "learning_rate": 1.9993433337997087e-05, "loss": 2.2673, "step": 12696 }, { "epoch": 0.16, "grad_norm": 4.34672212600708, "learning_rate": 1.999342952999786e-05, "loss": 2.0632, "step": 12697 }, { "epoch": 0.16, "grad_norm": 5.4568281173706055, "learning_rate": 1.9993425720895185e-05, "loss": 2.4953, "step": 12698 }, { "epoch": 0.16, "grad_norm": 4.748597621917725, "learning_rate": 1.999342191068907e-05, "loss": 2.5609, "step": 12699 }, { "epoch": 0.16, "grad_norm": 4.049221515655518, "learning_rate": 1.999341809937951e-05, "loss": 2.1622, "step": 12700 }, { "epoch": 0.16, "grad_norm": 4.493687152862549, "learning_rate": 1.9993414286966508e-05, "loss": 2.4923, "step": 12701 }, { "epoch": 0.16, "grad_norm": 5.062831878662109, "learning_rate": 1.999341047345006e-05, "loss": 2.7097, "step": 12702 }, { "epoch": 0.16, "grad_norm": 4.439589977264404, "learning_rate": 1.9993406658830175e-05, "loss": 2.0786, "step": 12703 }, { "epoch": 0.16, "grad_norm": 5.2050089836120605, "learning_rate": 1.9993402843106847e-05, "loss": 2.5038, "step": 12704 }, { "epoch": 0.16, "grad_norm": 4.903962135314941, "learning_rate": 1.9993399026280076e-05, "loss": 2.3929, "step": 12705 }, { "epoch": 0.16, "grad_norm": 4.052535057067871, "learning_rate": 1.9993395208349865e-05, "loss": 2.2132, "step": 12706 }, { "epoch": 0.16, "grad_norm": 4.6919145584106445, "learning_rate": 1.9993391389316213e-05, "loss": 2.433, "step": 12707 }, { "epoch": 0.16, "grad_norm": 4.524722099304199, "learning_rate": 1.9993387569179122e-05, "loss": 2.3086, "step": 12708 }, { "epoch": 0.16, "grad_norm": 4.971895217895508, "learning_rate": 1.9993383747938594e-05, "loss": 2.4065, "step": 12709 }, { "epoch": 0.16, "grad_norm": 4.505787372589111, "learning_rate": 1.9993379925594626e-05, "loss": 2.026, "step": 12710 }, { "epoch": 0.16, "grad_norm": 4.592741966247559, "learning_rate": 1.999337610214722e-05, "loss": 1.9958, "step": 12711 }, { "epoch": 0.16, "grad_norm": 4.36541748046875, "learning_rate": 1.9993372277596367e-05, "loss": 1.8427, "step": 12712 }, { "epoch": 0.16, "grad_norm": 4.874792575836182, "learning_rate": 1.9993368451942086e-05, "loss": 2.3841, "step": 12713 }, { "epoch": 0.16, "grad_norm": 4.8156585693359375, "learning_rate": 1.999336462518436e-05, "loss": 2.2814, "step": 12714 }, { "epoch": 0.17, "grad_norm": 5.1120100021362305, "learning_rate": 1.9993360797323204e-05, "loss": 2.5574, "step": 12715 }, { "epoch": 0.17, "grad_norm": 4.744541645050049, "learning_rate": 1.999335696835861e-05, "loss": 2.2277, "step": 12716 }, { "epoch": 0.17, "grad_norm": 5.584117412567139, "learning_rate": 1.999335313829058e-05, "loss": 2.85, "step": 12717 }, { "epoch": 0.17, "grad_norm": 4.2574310302734375, "learning_rate": 1.999334930711911e-05, "loss": 1.8532, "step": 12718 }, { "epoch": 0.17, "grad_norm": 5.190009593963623, "learning_rate": 1.9993345474844207e-05, "loss": 2.9698, "step": 12719 }, { "epoch": 0.17, "grad_norm": 4.479468822479248, "learning_rate": 1.999334164146587e-05, "loss": 2.0605, "step": 12720 }, { "epoch": 0.17, "grad_norm": 4.312263488769531, "learning_rate": 1.9993337806984096e-05, "loss": 2.3299, "step": 12721 }, { "epoch": 0.17, "grad_norm": 5.753214359283447, "learning_rate": 1.9993333971398893e-05, "loss": 3.0274, "step": 12722 }, { "epoch": 0.17, "grad_norm": 3.957627773284912, "learning_rate": 1.9993330134710252e-05, "loss": 1.6978, "step": 12723 }, { "epoch": 0.17, "grad_norm": 4.774328231811523, "learning_rate": 1.999332629691818e-05, "loss": 2.3862, "step": 12724 }, { "epoch": 0.17, "grad_norm": 4.5559492111206055, "learning_rate": 1.9993322458022676e-05, "loss": 1.8318, "step": 12725 }, { "epoch": 0.17, "grad_norm": 4.966384410858154, "learning_rate": 1.999331861802374e-05, "loss": 2.0788, "step": 12726 }, { "epoch": 0.17, "grad_norm": 4.758889675140381, "learning_rate": 1.9993314776921366e-05, "loss": 2.0905, "step": 12727 }, { "epoch": 0.17, "grad_norm": 4.732698917388916, "learning_rate": 1.9993310934715566e-05, "loss": 2.0261, "step": 12728 }, { "epoch": 0.17, "grad_norm": 4.743049144744873, "learning_rate": 1.9993307091406334e-05, "loss": 2.3828, "step": 12729 }, { "epoch": 0.17, "grad_norm": 4.840507984161377, "learning_rate": 1.999330324699367e-05, "loss": 1.9564, "step": 12730 }, { "epoch": 0.17, "grad_norm": 4.801084041595459, "learning_rate": 1.999329940147758e-05, "loss": 2.2953, "step": 12731 }, { "epoch": 0.17, "grad_norm": 6.741942405700684, "learning_rate": 1.9993295554858053e-05, "loss": 3.3748, "step": 12732 }, { "epoch": 0.17, "grad_norm": 4.582865238189697, "learning_rate": 1.99932917071351e-05, "loss": 2.2941, "step": 12733 }, { "epoch": 0.17, "grad_norm": 4.501564025878906, "learning_rate": 1.999328785830872e-05, "loss": 2.3421, "step": 12734 }, { "epoch": 0.17, "grad_norm": 4.320775985717773, "learning_rate": 1.999328400837891e-05, "loss": 3.015, "step": 12735 }, { "epoch": 0.17, "grad_norm": 4.630004405975342, "learning_rate": 1.9993280157345673e-05, "loss": 2.347, "step": 12736 }, { "epoch": 0.17, "grad_norm": 4.294827461242676, "learning_rate": 1.9993276305209005e-05, "loss": 2.0943, "step": 12737 }, { "epoch": 0.17, "grad_norm": 4.414182186126709, "learning_rate": 1.9993272451968914e-05, "loss": 2.2073, "step": 12738 }, { "epoch": 0.17, "grad_norm": 4.254794120788574, "learning_rate": 1.9993268597625393e-05, "loss": 2.3412, "step": 12739 }, { "epoch": 0.17, "grad_norm": 4.308663368225098, "learning_rate": 1.999326474217845e-05, "loss": 2.1227, "step": 12740 }, { "epoch": 0.17, "grad_norm": 4.970479965209961, "learning_rate": 1.999326088562808e-05, "loss": 2.4668, "step": 12741 }, { "epoch": 0.17, "grad_norm": 5.354552745819092, "learning_rate": 1.999325702797428e-05, "loss": 2.4308, "step": 12742 }, { "epoch": 0.17, "grad_norm": 4.978754043579102, "learning_rate": 1.9993253169217056e-05, "loss": 2.2787, "step": 12743 }, { "epoch": 0.17, "grad_norm": 4.227240562438965, "learning_rate": 1.999324930935641e-05, "loss": 2.2254, "step": 12744 }, { "epoch": 0.17, "grad_norm": 4.299243450164795, "learning_rate": 1.9993245448392337e-05, "loss": 2.1374, "step": 12745 }, { "epoch": 0.17, "grad_norm": 4.443274021148682, "learning_rate": 1.999324158632484e-05, "loss": 2.5794, "step": 12746 }, { "epoch": 0.17, "grad_norm": 4.16209602355957, "learning_rate": 1.9993237723153923e-05, "loss": 1.7709, "step": 12747 }, { "epoch": 0.17, "grad_norm": 5.02715539932251, "learning_rate": 1.999323385887958e-05, "loss": 2.8017, "step": 12748 }, { "epoch": 0.17, "grad_norm": 4.352784633636475, "learning_rate": 1.9993229993501816e-05, "loss": 1.5909, "step": 12749 }, { "epoch": 0.17, "grad_norm": 4.404326438903809, "learning_rate": 1.9993226127020632e-05, "loss": 1.8797, "step": 12750 }, { "epoch": 0.17, "grad_norm": 4.4264302253723145, "learning_rate": 1.999322225943602e-05, "loss": 2.1168, "step": 12751 }, { "epoch": 0.17, "grad_norm": 5.230850696563721, "learning_rate": 1.9993218390747994e-05, "loss": 2.3722, "step": 12752 }, { "epoch": 0.17, "grad_norm": 4.644216060638428, "learning_rate": 1.9993214520956544e-05, "loss": 2.6974, "step": 12753 }, { "epoch": 0.17, "grad_norm": 4.127298831939697, "learning_rate": 1.999321065006167e-05, "loss": 1.9107, "step": 12754 }, { "epoch": 0.17, "grad_norm": 3.9819726943969727, "learning_rate": 1.9993206778063384e-05, "loss": 1.8426, "step": 12755 }, { "epoch": 0.17, "grad_norm": 4.440592288970947, "learning_rate": 1.9993202904961672e-05, "loss": 2.0774, "step": 12756 }, { "epoch": 0.17, "grad_norm": 4.053404808044434, "learning_rate": 1.9993199030756543e-05, "loss": 1.5474, "step": 12757 }, { "epoch": 0.17, "grad_norm": 4.662166595458984, "learning_rate": 1.9993195155447995e-05, "loss": 2.0713, "step": 12758 }, { "epoch": 0.17, "grad_norm": 4.7207441329956055, "learning_rate": 1.999319127903603e-05, "loss": 2.0767, "step": 12759 }, { "epoch": 0.17, "grad_norm": 5.106196880340576, "learning_rate": 1.9993187401520646e-05, "loss": 2.5745, "step": 12760 }, { "epoch": 0.17, "grad_norm": 5.429496765136719, "learning_rate": 1.999318352290184e-05, "loss": 2.4794, "step": 12761 }, { "epoch": 0.17, "grad_norm": 4.551084041595459, "learning_rate": 1.999317964317962e-05, "loss": 2.3156, "step": 12762 }, { "epoch": 0.17, "grad_norm": 5.095762729644775, "learning_rate": 1.9993175762353988e-05, "loss": 2.366, "step": 12763 }, { "epoch": 0.17, "grad_norm": 4.460298538208008, "learning_rate": 1.999317188042494e-05, "loss": 2.3489, "step": 12764 }, { "epoch": 0.17, "grad_norm": 4.333319664001465, "learning_rate": 1.999316799739247e-05, "loss": 2.2971, "step": 12765 }, { "epoch": 0.17, "grad_norm": 4.5944414138793945, "learning_rate": 1.9993164113256587e-05, "loss": 1.9834, "step": 12766 }, { "epoch": 0.17, "grad_norm": 4.123499393463135, "learning_rate": 1.999316022801729e-05, "loss": 1.9511, "step": 12767 }, { "epoch": 0.17, "grad_norm": 5.095733165740967, "learning_rate": 1.9993156341674578e-05, "loss": 2.6239, "step": 12768 }, { "epoch": 0.17, "grad_norm": 5.269919395446777, "learning_rate": 1.999315245422845e-05, "loss": 2.1626, "step": 12769 }, { "epoch": 0.17, "grad_norm": 4.837510585784912, "learning_rate": 1.9993148565678914e-05, "loss": 2.5581, "step": 12770 }, { "epoch": 0.17, "grad_norm": 5.05563497543335, "learning_rate": 1.999314467602596e-05, "loss": 2.5258, "step": 12771 }, { "epoch": 0.17, "grad_norm": 4.501990795135498, "learning_rate": 1.9993140785269595e-05, "loss": 2.3318, "step": 12772 }, { "epoch": 0.17, "grad_norm": 4.391523838043213, "learning_rate": 1.9993136893409816e-05, "loss": 2.282, "step": 12773 }, { "epoch": 0.17, "grad_norm": 4.293076038360596, "learning_rate": 1.999313300044663e-05, "loss": 1.9094, "step": 12774 }, { "epoch": 0.17, "grad_norm": 4.876295566558838, "learning_rate": 1.9993129106380026e-05, "loss": 2.2515, "step": 12775 }, { "epoch": 0.17, "grad_norm": 5.547547340393066, "learning_rate": 1.9993125211210015e-05, "loss": 2.2652, "step": 12776 }, { "epoch": 0.17, "grad_norm": 4.4664387702941895, "learning_rate": 1.9993121314936595e-05, "loss": 2.1533, "step": 12777 }, { "epoch": 0.17, "grad_norm": 4.3580427169799805, "learning_rate": 1.999311741755976e-05, "loss": 2.1943, "step": 12778 }, { "epoch": 0.17, "grad_norm": 4.70742654800415, "learning_rate": 1.999311351907952e-05, "loss": 2.556, "step": 12779 }, { "epoch": 0.17, "grad_norm": 5.530946731567383, "learning_rate": 1.9993109619495867e-05, "loss": 2.6461, "step": 12780 }, { "epoch": 0.17, "grad_norm": 4.9018473625183105, "learning_rate": 1.999310571880881e-05, "loss": 2.2571, "step": 12781 }, { "epoch": 0.17, "grad_norm": 3.778658390045166, "learning_rate": 1.9993101817018338e-05, "loss": 1.6561, "step": 12782 }, { "epoch": 0.17, "grad_norm": 4.052066326141357, "learning_rate": 1.999309791412446e-05, "loss": 2.002, "step": 12783 }, { "epoch": 0.17, "grad_norm": 5.543269157409668, "learning_rate": 1.9993094010127177e-05, "loss": 2.6311, "step": 12784 }, { "epoch": 0.17, "grad_norm": 4.376779556274414, "learning_rate": 1.9993090105026488e-05, "loss": 2.1033, "step": 12785 }, { "epoch": 0.17, "grad_norm": 4.653139591217041, "learning_rate": 1.9993086198822392e-05, "loss": 2.4396, "step": 12786 }, { "epoch": 0.17, "grad_norm": 4.450307846069336, "learning_rate": 1.9993082291514887e-05, "loss": 2.4941, "step": 12787 }, { "epoch": 0.17, "grad_norm": 5.58467435836792, "learning_rate": 1.9993078383103976e-05, "loss": 2.3636, "step": 12788 }, { "epoch": 0.17, "grad_norm": 4.250171661376953, "learning_rate": 1.999307447358966e-05, "loss": 2.2689, "step": 12789 }, { "epoch": 0.17, "grad_norm": 4.763043403625488, "learning_rate": 1.9993070562971942e-05, "loss": 2.3076, "step": 12790 }, { "epoch": 0.17, "grad_norm": 4.9646406173706055, "learning_rate": 1.9993066651250816e-05, "loss": 2.2632, "step": 12791 }, { "epoch": 0.17, "grad_norm": 5.021633625030518, "learning_rate": 1.9993062738426284e-05, "loss": 2.4064, "step": 12792 }, { "epoch": 0.17, "grad_norm": 5.266993999481201, "learning_rate": 1.9993058824498352e-05, "loss": 2.4263, "step": 12793 }, { "epoch": 0.17, "grad_norm": 5.014400959014893, "learning_rate": 1.9993054909467018e-05, "loss": 2.63, "step": 12794 }, { "epoch": 0.17, "grad_norm": 4.652273654937744, "learning_rate": 1.9993050993332278e-05, "loss": 2.7736, "step": 12795 }, { "epoch": 0.17, "grad_norm": 4.426362037658691, "learning_rate": 1.999304707609414e-05, "loss": 1.6183, "step": 12796 }, { "epoch": 0.17, "grad_norm": 4.141024589538574, "learning_rate": 1.9993043157752593e-05, "loss": 1.6576, "step": 12797 }, { "epoch": 0.17, "grad_norm": 5.605798244476318, "learning_rate": 1.999303923830765e-05, "loss": 2.1492, "step": 12798 }, { "epoch": 0.17, "grad_norm": 3.8616526126861572, "learning_rate": 1.9993035317759307e-05, "loss": 2.3144, "step": 12799 }, { "epoch": 0.17, "grad_norm": 4.713241100311279, "learning_rate": 1.999303139610756e-05, "loss": 2.0234, "step": 12800 }, { "epoch": 0.17, "grad_norm": 4.461298942565918, "learning_rate": 1.9993027473352413e-05, "loss": 2.2685, "step": 12801 }, { "epoch": 0.17, "grad_norm": 4.464441776275635, "learning_rate": 1.9993023549493867e-05, "loss": 1.8789, "step": 12802 }, { "epoch": 0.17, "grad_norm": 4.503680229187012, "learning_rate": 1.9993019624531922e-05, "loss": 2.2721, "step": 12803 }, { "epoch": 0.17, "grad_norm": 4.743252277374268, "learning_rate": 1.9993015698466578e-05, "loss": 2.5589, "step": 12804 }, { "epoch": 0.17, "grad_norm": 4.6731133460998535, "learning_rate": 1.9993011771297834e-05, "loss": 2.6964, "step": 12805 }, { "epoch": 0.17, "grad_norm": 5.03872013092041, "learning_rate": 1.9993007843025694e-05, "loss": 2.3711, "step": 12806 }, { "epoch": 0.17, "grad_norm": 4.444615840911865, "learning_rate": 1.9993003913650155e-05, "loss": 2.312, "step": 12807 }, { "epoch": 0.17, "grad_norm": 4.412850379943848, "learning_rate": 1.999299998317122e-05, "loss": 2.0943, "step": 12808 }, { "epoch": 0.17, "grad_norm": 4.279388427734375, "learning_rate": 1.9992996051588886e-05, "loss": 1.9023, "step": 12809 }, { "epoch": 0.17, "grad_norm": 4.7813520431518555, "learning_rate": 1.999299211890316e-05, "loss": 2.2824, "step": 12810 }, { "epoch": 0.17, "grad_norm": 5.085231304168701, "learning_rate": 1.9992988185114037e-05, "loss": 2.9418, "step": 12811 }, { "epoch": 0.17, "grad_norm": 4.437471389770508, "learning_rate": 1.9992984250221514e-05, "loss": 1.9998, "step": 12812 }, { "epoch": 0.17, "grad_norm": 4.621908187866211, "learning_rate": 1.99929803142256e-05, "loss": 1.992, "step": 12813 }, { "epoch": 0.17, "grad_norm": 5.498351097106934, "learning_rate": 1.999297637712629e-05, "loss": 2.2129, "step": 12814 }, { "epoch": 0.17, "grad_norm": 4.497053623199463, "learning_rate": 1.999297243892359e-05, "loss": 2.1064, "step": 12815 }, { "epoch": 0.17, "grad_norm": 5.048168659210205, "learning_rate": 1.999296849961749e-05, "loss": 2.614, "step": 12816 }, { "epoch": 0.17, "grad_norm": 4.902340412139893, "learning_rate": 1.9992964559208e-05, "loss": 2.1385, "step": 12817 }, { "epoch": 0.17, "grad_norm": 4.2607035636901855, "learning_rate": 1.9992960617695115e-05, "loss": 1.9995, "step": 12818 }, { "epoch": 0.17, "grad_norm": 4.43245792388916, "learning_rate": 1.999295667507884e-05, "loss": 2.2473, "step": 12819 }, { "epoch": 0.17, "grad_norm": 5.711310386657715, "learning_rate": 1.9992952731359172e-05, "loss": 2.7237, "step": 12820 }, { "epoch": 0.17, "grad_norm": 4.66813850402832, "learning_rate": 1.9992948786536114e-05, "loss": 2.2587, "step": 12821 }, { "epoch": 0.17, "grad_norm": 5.476913928985596, "learning_rate": 1.9992944840609663e-05, "loss": 3.1088, "step": 12822 }, { "epoch": 0.17, "grad_norm": 5.374139308929443, "learning_rate": 1.9992940893579822e-05, "loss": 3.0576, "step": 12823 }, { "epoch": 0.17, "grad_norm": 4.721897125244141, "learning_rate": 1.999293694544659e-05, "loss": 2.4506, "step": 12824 }, { "epoch": 0.17, "grad_norm": 4.244673252105713, "learning_rate": 1.999293299620997e-05, "loss": 2.0847, "step": 12825 }, { "epoch": 0.17, "grad_norm": 4.750399589538574, "learning_rate": 1.999292904586996e-05, "loss": 2.625, "step": 12826 }, { "epoch": 0.17, "grad_norm": 4.330352783203125, "learning_rate": 1.999292509442656e-05, "loss": 2.1119, "step": 12827 }, { "epoch": 0.17, "grad_norm": 5.198670387268066, "learning_rate": 1.9992921141879774e-05, "loss": 2.5488, "step": 12828 }, { "epoch": 0.17, "grad_norm": 4.986922740936279, "learning_rate": 1.99929171882296e-05, "loss": 2.7654, "step": 12829 }, { "epoch": 0.17, "grad_norm": 4.4344635009765625, "learning_rate": 1.9992913233476035e-05, "loss": 2.493, "step": 12830 }, { "epoch": 0.17, "grad_norm": 4.084278583526611, "learning_rate": 1.9992909277619085e-05, "loss": 2.0341, "step": 12831 }, { "epoch": 0.17, "grad_norm": 4.332655429840088, "learning_rate": 1.999290532065875e-05, "loss": 2.0376, "step": 12832 }, { "epoch": 0.17, "grad_norm": 4.8703083992004395, "learning_rate": 1.9992901362595024e-05, "loss": 2.5484, "step": 12833 }, { "epoch": 0.17, "grad_norm": 5.13329553604126, "learning_rate": 1.9992897403427913e-05, "loss": 2.2321, "step": 12834 }, { "epoch": 0.17, "grad_norm": 5.308158874511719, "learning_rate": 1.999289344315742e-05, "loss": 2.2528, "step": 12835 }, { "epoch": 0.17, "grad_norm": 4.634584426879883, "learning_rate": 1.999288948178354e-05, "loss": 2.6189, "step": 12836 }, { "epoch": 0.17, "grad_norm": 4.933273792266846, "learning_rate": 1.9992885519306276e-05, "loss": 2.17, "step": 12837 }, { "epoch": 0.17, "grad_norm": 4.794870376586914, "learning_rate": 1.999288155572563e-05, "loss": 2.4981, "step": 12838 }, { "epoch": 0.17, "grad_norm": 4.897562026977539, "learning_rate": 1.9992877591041596e-05, "loss": 2.3075, "step": 12839 }, { "epoch": 0.17, "grad_norm": 4.458056449890137, "learning_rate": 1.999287362525418e-05, "loss": 2.3949, "step": 12840 }, { "epoch": 0.17, "grad_norm": 4.411736488342285, "learning_rate": 1.9992869658363383e-05, "loss": 2.163, "step": 12841 }, { "epoch": 0.17, "grad_norm": 5.431575775146484, "learning_rate": 1.9992865690369203e-05, "loss": 2.1145, "step": 12842 }, { "epoch": 0.17, "grad_norm": 4.8514885902404785, "learning_rate": 1.999286172127164e-05, "loss": 2.1714, "step": 12843 }, { "epoch": 0.17, "grad_norm": 4.374162197113037, "learning_rate": 1.99928577510707e-05, "loss": 1.9404, "step": 12844 }, { "epoch": 0.17, "grad_norm": 4.559693336486816, "learning_rate": 1.9992853779766372e-05, "loss": 2.5142, "step": 12845 }, { "epoch": 0.17, "grad_norm": 3.7867093086242676, "learning_rate": 1.9992849807358666e-05, "loss": 1.9212, "step": 12846 }, { "epoch": 0.17, "grad_norm": 4.329111099243164, "learning_rate": 1.9992845833847584e-05, "loss": 2.0698, "step": 12847 }, { "epoch": 0.17, "grad_norm": 5.329410552978516, "learning_rate": 1.999284185923312e-05, "loss": 2.741, "step": 12848 }, { "epoch": 0.17, "grad_norm": 3.962747812271118, "learning_rate": 1.9992837883515274e-05, "loss": 2.0146, "step": 12849 }, { "epoch": 0.17, "grad_norm": 4.542811870574951, "learning_rate": 1.999283390669405e-05, "loss": 2.1962, "step": 12850 }, { "epoch": 0.17, "grad_norm": 4.5653076171875, "learning_rate": 1.9992829928769447e-05, "loss": 2.1789, "step": 12851 }, { "epoch": 0.17, "grad_norm": 4.3441243171691895, "learning_rate": 1.999282594974147e-05, "loss": 2.2106, "step": 12852 }, { "epoch": 0.17, "grad_norm": 5.7176594734191895, "learning_rate": 1.999282196961011e-05, "loss": 2.3255, "step": 12853 }, { "epoch": 0.17, "grad_norm": 4.7025675773620605, "learning_rate": 1.999281798837538e-05, "loss": 2.6535, "step": 12854 }, { "epoch": 0.17, "grad_norm": 4.314369201660156, "learning_rate": 1.9992814006037266e-05, "loss": 2.1799, "step": 12855 }, { "epoch": 0.17, "grad_norm": 4.560941219329834, "learning_rate": 1.999281002259578e-05, "loss": 2.2853, "step": 12856 }, { "epoch": 0.17, "grad_norm": 5.088746547698975, "learning_rate": 1.9992806038050915e-05, "loss": 2.2808, "step": 12857 }, { "epoch": 0.17, "grad_norm": 5.321258544921875, "learning_rate": 1.999280205240268e-05, "loss": 2.6348, "step": 12858 }, { "epoch": 0.17, "grad_norm": 4.494105339050293, "learning_rate": 1.9992798065651068e-05, "loss": 1.9965, "step": 12859 }, { "epoch": 0.17, "grad_norm": 4.704769611358643, "learning_rate": 1.999279407779608e-05, "loss": 2.62, "step": 12860 }, { "epoch": 0.17, "grad_norm": 3.964383125305176, "learning_rate": 1.9992790088837715e-05, "loss": 2.2912, "step": 12861 }, { "epoch": 0.17, "grad_norm": 4.519333362579346, "learning_rate": 1.9992786098775983e-05, "loss": 2.2074, "step": 12862 }, { "epoch": 0.17, "grad_norm": 5.049473285675049, "learning_rate": 1.9992782107610874e-05, "loss": 1.9186, "step": 12863 }, { "epoch": 0.17, "grad_norm": 4.7011799812316895, "learning_rate": 1.9992778115342393e-05, "loss": 2.0878, "step": 12864 }, { "epoch": 0.17, "grad_norm": 4.287178993225098, "learning_rate": 1.999277412197054e-05, "loss": 2.5133, "step": 12865 }, { "epoch": 0.17, "grad_norm": 4.063138484954834, "learning_rate": 1.9992770127495315e-05, "loss": 1.9599, "step": 12866 }, { "epoch": 0.17, "grad_norm": 4.882050514221191, "learning_rate": 1.999276613191672e-05, "loss": 1.7191, "step": 12867 }, { "epoch": 0.17, "grad_norm": 4.803492069244385, "learning_rate": 1.9992762135234755e-05, "loss": 2.2862, "step": 12868 }, { "epoch": 0.17, "grad_norm": 4.768144607543945, "learning_rate": 1.9992758137449417e-05, "loss": 2.1686, "step": 12869 }, { "epoch": 0.17, "grad_norm": 5.557990550994873, "learning_rate": 1.999275413856071e-05, "loss": 2.8847, "step": 12870 }, { "epoch": 0.17, "grad_norm": 4.040143966674805, "learning_rate": 1.9992750138568633e-05, "loss": 1.9265, "step": 12871 }, { "epoch": 0.17, "grad_norm": 4.731392860412598, "learning_rate": 1.9992746137473188e-05, "loss": 2.4222, "step": 12872 }, { "epoch": 0.17, "grad_norm": 4.58158540725708, "learning_rate": 1.999274213527437e-05, "loss": 2.3237, "step": 12873 }, { "epoch": 0.17, "grad_norm": 4.786690711975098, "learning_rate": 1.999273813197219e-05, "loss": 2.1886, "step": 12874 }, { "epoch": 0.17, "grad_norm": 3.732649326324463, "learning_rate": 1.9992734127566637e-05, "loss": 1.7308, "step": 12875 }, { "epoch": 0.17, "grad_norm": 5.188466548919678, "learning_rate": 1.9992730122057717e-05, "loss": 2.4678, "step": 12876 }, { "epoch": 0.17, "grad_norm": 4.971794128417969, "learning_rate": 1.9992726115445434e-05, "loss": 2.339, "step": 12877 }, { "epoch": 0.17, "grad_norm": 4.109872817993164, "learning_rate": 1.9992722107729783e-05, "loss": 1.8431, "step": 12878 }, { "epoch": 0.17, "grad_norm": 4.873902320861816, "learning_rate": 1.9992718098910763e-05, "loss": 2.3503, "step": 12879 }, { "epoch": 0.17, "grad_norm": 4.419763565063477, "learning_rate": 1.999271408898838e-05, "loss": 2.1622, "step": 12880 }, { "epoch": 0.17, "grad_norm": 4.741941928863525, "learning_rate": 1.999271007796263e-05, "loss": 2.3495, "step": 12881 }, { "epoch": 0.17, "grad_norm": 4.85014009475708, "learning_rate": 1.9992706065833517e-05, "loss": 2.0478, "step": 12882 }, { "epoch": 0.17, "grad_norm": 5.002934455871582, "learning_rate": 1.9992702052601042e-05, "loss": 2.6928, "step": 12883 }, { "epoch": 0.17, "grad_norm": 5.116668224334717, "learning_rate": 1.9992698038265198e-05, "loss": 2.5495, "step": 12884 }, { "epoch": 0.17, "grad_norm": 4.204075336456299, "learning_rate": 1.9992694022825992e-05, "loss": 1.9745, "step": 12885 }, { "epoch": 0.17, "grad_norm": 4.007495880126953, "learning_rate": 1.9992690006283428e-05, "loss": 1.9868, "step": 12886 }, { "epoch": 0.17, "grad_norm": 5.110581874847412, "learning_rate": 1.9992685988637498e-05, "loss": 2.5046, "step": 12887 }, { "epoch": 0.17, "grad_norm": 5.262054443359375, "learning_rate": 1.9992681969888202e-05, "loss": 2.4262, "step": 12888 }, { "epoch": 0.17, "grad_norm": 4.592303276062012, "learning_rate": 1.999267795003555e-05, "loss": 2.2687, "step": 12889 }, { "epoch": 0.17, "grad_norm": 4.646817207336426, "learning_rate": 1.9992673929079535e-05, "loss": 2.2628, "step": 12890 }, { "epoch": 0.17, "grad_norm": 5.89870548248291, "learning_rate": 1.999266990702016e-05, "loss": 2.9056, "step": 12891 }, { "epoch": 0.17, "grad_norm": 4.355645656585693, "learning_rate": 1.9992665883857424e-05, "loss": 2.1576, "step": 12892 }, { "epoch": 0.17, "grad_norm": 5.285682678222656, "learning_rate": 1.9992661859591328e-05, "loss": 2.3887, "step": 12893 }, { "epoch": 0.17, "grad_norm": 5.67573881149292, "learning_rate": 1.999265783422187e-05, "loss": 2.5277, "step": 12894 }, { "epoch": 0.17, "grad_norm": 4.409317970275879, "learning_rate": 1.9992653807749057e-05, "loss": 2.2693, "step": 12895 }, { "epoch": 0.17, "grad_norm": 5.316338062286377, "learning_rate": 1.9992649780172882e-05, "loss": 2.194, "step": 12896 }, { "epoch": 0.17, "grad_norm": 4.687917232513428, "learning_rate": 1.9992645751493352e-05, "loss": 2.2969, "step": 12897 }, { "epoch": 0.17, "grad_norm": 4.407048225402832, "learning_rate": 1.9992641721710467e-05, "loss": 2.7275, "step": 12898 }, { "epoch": 0.17, "grad_norm": 5.13293981552124, "learning_rate": 1.999263769082422e-05, "loss": 2.4998, "step": 12899 }, { "epoch": 0.17, "grad_norm": 4.706865310668945, "learning_rate": 1.9992633658834616e-05, "loss": 2.2915, "step": 12900 }, { "epoch": 0.17, "grad_norm": 5.138160228729248, "learning_rate": 1.9992629625741658e-05, "loss": 3.0875, "step": 12901 }, { "epoch": 0.17, "grad_norm": 4.3511505126953125, "learning_rate": 1.999262559154534e-05, "loss": 1.8346, "step": 12902 }, { "epoch": 0.17, "grad_norm": 4.299135684967041, "learning_rate": 1.9992621556245676e-05, "loss": 2.27, "step": 12903 }, { "epoch": 0.17, "grad_norm": 4.721521854400635, "learning_rate": 1.9992617519842648e-05, "loss": 2.505, "step": 12904 }, { "epoch": 0.17, "grad_norm": 4.549760341644287, "learning_rate": 1.9992613482336272e-05, "loss": 2.3392, "step": 12905 }, { "epoch": 0.17, "grad_norm": 4.6868109703063965, "learning_rate": 1.9992609443726538e-05, "loss": 2.5079, "step": 12906 }, { "epoch": 0.17, "grad_norm": 4.935458183288574, "learning_rate": 1.999260540401345e-05, "loss": 2.2942, "step": 12907 }, { "epoch": 0.17, "grad_norm": 4.81335973739624, "learning_rate": 1.999260136319701e-05, "loss": 2.4906, "step": 12908 }, { "epoch": 0.17, "grad_norm": 4.8008551597595215, "learning_rate": 1.9992597321277216e-05, "loss": 2.1352, "step": 12909 }, { "epoch": 0.17, "grad_norm": 4.613293647766113, "learning_rate": 1.999259327825407e-05, "loss": 2.2823, "step": 12910 }, { "epoch": 0.17, "grad_norm": 4.8144025802612305, "learning_rate": 1.9992589234127573e-05, "loss": 2.4864, "step": 12911 }, { "epoch": 0.17, "grad_norm": 4.319824695587158, "learning_rate": 1.9992585188897727e-05, "loss": 2.0146, "step": 12912 }, { "epoch": 0.17, "grad_norm": 4.365667343139648, "learning_rate": 1.9992581142564526e-05, "loss": 1.7513, "step": 12913 }, { "epoch": 0.17, "grad_norm": 4.351280689239502, "learning_rate": 1.9992577095127976e-05, "loss": 2.2168, "step": 12914 }, { "epoch": 0.17, "grad_norm": 4.775496482849121, "learning_rate": 1.9992573046588078e-05, "loss": 2.5825, "step": 12915 }, { "epoch": 0.17, "grad_norm": 4.195987701416016, "learning_rate": 1.9992568996944828e-05, "loss": 1.9994, "step": 12916 }, { "epoch": 0.17, "grad_norm": 4.510256290435791, "learning_rate": 1.999256494619823e-05, "loss": 2.3445, "step": 12917 }, { "epoch": 0.17, "grad_norm": 4.554749488830566, "learning_rate": 1.9992560894348282e-05, "loss": 2.4168, "step": 12918 }, { "epoch": 0.17, "grad_norm": 4.696681022644043, "learning_rate": 1.9992556841394986e-05, "loss": 2.3968, "step": 12919 }, { "epoch": 0.17, "grad_norm": 4.1033220291137695, "learning_rate": 1.9992552787338342e-05, "loss": 2.0015, "step": 12920 }, { "epoch": 0.17, "grad_norm": 5.498895645141602, "learning_rate": 1.9992548732178353e-05, "loss": 2.5322, "step": 12921 }, { "epoch": 0.17, "grad_norm": 4.346324443817139, "learning_rate": 1.9992544675915015e-05, "loss": 1.9289, "step": 12922 }, { "epoch": 0.17, "grad_norm": 4.591768741607666, "learning_rate": 1.9992540618548332e-05, "loss": 2.5773, "step": 12923 }, { "epoch": 0.17, "grad_norm": 4.219705104827881, "learning_rate": 1.99925365600783e-05, "loss": 2.0216, "step": 12924 }, { "epoch": 0.17, "grad_norm": 4.7119646072387695, "learning_rate": 1.9992532500504924e-05, "loss": 1.8969, "step": 12925 }, { "epoch": 0.17, "grad_norm": 4.686766624450684, "learning_rate": 1.9992528439828206e-05, "loss": 2.4566, "step": 12926 }, { "epoch": 0.17, "grad_norm": 4.314820289611816, "learning_rate": 1.9992524378048136e-05, "loss": 2.308, "step": 12927 }, { "epoch": 0.17, "grad_norm": 4.289636135101318, "learning_rate": 1.999252031516473e-05, "loss": 2.0355, "step": 12928 }, { "epoch": 0.17, "grad_norm": 4.285118579864502, "learning_rate": 1.9992516251177974e-05, "loss": 2.4421, "step": 12929 }, { "epoch": 0.17, "grad_norm": 5.133802890777588, "learning_rate": 1.999251218608788e-05, "loss": 2.497, "step": 12930 }, { "epoch": 0.17, "grad_norm": 4.575719833374023, "learning_rate": 1.9992508119894438e-05, "loss": 1.9813, "step": 12931 }, { "epoch": 0.17, "grad_norm": 4.826447486877441, "learning_rate": 1.9992504052597656e-05, "loss": 2.4789, "step": 12932 }, { "epoch": 0.17, "grad_norm": 4.80359411239624, "learning_rate": 1.9992499984197532e-05, "loss": 1.8919, "step": 12933 }, { "epoch": 0.17, "grad_norm": 4.132397651672363, "learning_rate": 1.9992495914694066e-05, "loss": 1.959, "step": 12934 }, { "epoch": 0.17, "grad_norm": 4.717064380645752, "learning_rate": 1.9992491844087262e-05, "loss": 2.1773, "step": 12935 }, { "epoch": 0.17, "grad_norm": 5.4306840896606445, "learning_rate": 1.9992487772377113e-05, "loss": 2.5846, "step": 12936 }, { "epoch": 0.17, "grad_norm": 4.786918640136719, "learning_rate": 1.9992483699563626e-05, "loss": 2.5495, "step": 12937 }, { "epoch": 0.17, "grad_norm": 4.647321701049805, "learning_rate": 1.99924796256468e-05, "loss": 2.536, "step": 12938 }, { "epoch": 0.17, "grad_norm": 5.210820198059082, "learning_rate": 1.9992475550626632e-05, "loss": 2.3764, "step": 12939 }, { "epoch": 0.17, "grad_norm": 4.072408199310303, "learning_rate": 1.9992471474503127e-05, "loss": 2.2112, "step": 12940 }, { "epoch": 0.17, "grad_norm": 5.600205421447754, "learning_rate": 1.9992467397276286e-05, "loss": 2.4342, "step": 12941 }, { "epoch": 0.17, "grad_norm": 5.201791763305664, "learning_rate": 1.9992463318946103e-05, "loss": 2.293, "step": 12942 }, { "epoch": 0.17, "grad_norm": 4.772923469543457, "learning_rate": 1.9992459239512586e-05, "loss": 2.491, "step": 12943 }, { "epoch": 0.17, "grad_norm": 5.396586894989014, "learning_rate": 1.9992455158975727e-05, "loss": 2.7753, "step": 12944 }, { "epoch": 0.17, "grad_norm": 4.455406665802002, "learning_rate": 1.9992451077335536e-05, "loss": 1.9331, "step": 12945 }, { "epoch": 0.17, "grad_norm": 4.5080461502075195, "learning_rate": 1.9992446994592007e-05, "loss": 2.1105, "step": 12946 }, { "epoch": 0.17, "grad_norm": 4.784358501434326, "learning_rate": 1.999244291074514e-05, "loss": 1.9868, "step": 12947 }, { "epoch": 0.17, "grad_norm": 5.436280250549316, "learning_rate": 1.999243882579494e-05, "loss": 2.5903, "step": 12948 }, { "epoch": 0.17, "grad_norm": 4.624366283416748, "learning_rate": 1.999243473974141e-05, "loss": 2.3017, "step": 12949 }, { "epoch": 0.17, "grad_norm": 5.07973051071167, "learning_rate": 1.9992430652584537e-05, "loss": 2.6644, "step": 12950 }, { "epoch": 0.17, "grad_norm": 5.76607608795166, "learning_rate": 1.9992426564324337e-05, "loss": 2.656, "step": 12951 }, { "epoch": 0.17, "grad_norm": 4.158864974975586, "learning_rate": 1.9992422474960798e-05, "loss": 2.2508, "step": 12952 }, { "epoch": 0.17, "grad_norm": 4.29421854019165, "learning_rate": 1.999241838449393e-05, "loss": 1.9015, "step": 12953 }, { "epoch": 0.17, "grad_norm": 4.887359619140625, "learning_rate": 1.999241429292373e-05, "loss": 2.5663, "step": 12954 }, { "epoch": 0.17, "grad_norm": 4.7498884201049805, "learning_rate": 1.9992410200250196e-05, "loss": 2.3176, "step": 12955 }, { "epoch": 0.17, "grad_norm": 4.294635772705078, "learning_rate": 1.999240610647333e-05, "loss": 2.1014, "step": 12956 }, { "epoch": 0.17, "grad_norm": 4.8361334800720215, "learning_rate": 1.9992402011593134e-05, "loss": 2.3546, "step": 12957 }, { "epoch": 0.17, "grad_norm": 4.954380035400391, "learning_rate": 1.9992397915609606e-05, "loss": 2.4409, "step": 12958 }, { "epoch": 0.17, "grad_norm": 4.204455375671387, "learning_rate": 1.999239381852275e-05, "loss": 2.072, "step": 12959 }, { "epoch": 0.17, "grad_norm": 4.855573654174805, "learning_rate": 1.9992389720332565e-05, "loss": 2.6145, "step": 12960 }, { "epoch": 0.17, "grad_norm": 4.753971576690674, "learning_rate": 1.999238562103905e-05, "loss": 2.2795, "step": 12961 }, { "epoch": 0.17, "grad_norm": 4.5737199783325195, "learning_rate": 1.99923815206422e-05, "loss": 2.1402, "step": 12962 }, { "epoch": 0.17, "grad_norm": 4.79337215423584, "learning_rate": 1.999237741914203e-05, "loss": 2.2711, "step": 12963 }, { "epoch": 0.17, "grad_norm": 4.600409507751465, "learning_rate": 1.9992373316538528e-05, "loss": 2.4182, "step": 12964 }, { "epoch": 0.17, "grad_norm": 4.284512042999268, "learning_rate": 1.9992369212831696e-05, "loss": 2.4682, "step": 12965 }, { "epoch": 0.17, "grad_norm": 4.948541164398193, "learning_rate": 1.9992365108021542e-05, "loss": 2.4676, "step": 12966 }, { "epoch": 0.17, "grad_norm": 4.35867977142334, "learning_rate": 1.9992361002108057e-05, "loss": 2.1479, "step": 12967 }, { "epoch": 0.17, "grad_norm": 4.626272201538086, "learning_rate": 1.9992356895091247e-05, "loss": 2.545, "step": 12968 }, { "epoch": 0.17, "grad_norm": 4.973013401031494, "learning_rate": 1.9992352786971115e-05, "loss": 2.2261, "step": 12969 }, { "epoch": 0.17, "grad_norm": 4.263187408447266, "learning_rate": 1.9992348677747656e-05, "loss": 2.13, "step": 12970 }, { "epoch": 0.17, "grad_norm": 4.494139671325684, "learning_rate": 1.9992344567420868e-05, "loss": 2.5478, "step": 12971 }, { "epoch": 0.17, "grad_norm": 4.124292373657227, "learning_rate": 1.999234045599076e-05, "loss": 2.0152, "step": 12972 }, { "epoch": 0.17, "grad_norm": 4.7217817306518555, "learning_rate": 1.9992336343457325e-05, "loss": 2.395, "step": 12973 }, { "epoch": 0.17, "grad_norm": 4.421388149261475, "learning_rate": 1.9992332229820566e-05, "loss": 2.1386, "step": 12974 }, { "epoch": 0.17, "grad_norm": 4.87585973739624, "learning_rate": 1.999232811508049e-05, "loss": 2.8138, "step": 12975 }, { "epoch": 0.17, "grad_norm": 4.383354663848877, "learning_rate": 1.9992323999237085e-05, "loss": 2.0468, "step": 12976 }, { "epoch": 0.17, "grad_norm": 4.2765302658081055, "learning_rate": 1.9992319882290362e-05, "loss": 1.8668, "step": 12977 }, { "epoch": 0.17, "grad_norm": 4.225453853607178, "learning_rate": 1.9992315764240318e-05, "loss": 2.1654, "step": 12978 }, { "epoch": 0.17, "grad_norm": 5.144755840301514, "learning_rate": 1.9992311645086952e-05, "loss": 2.3021, "step": 12979 }, { "epoch": 0.17, "grad_norm": 4.161968231201172, "learning_rate": 1.999230752483026e-05, "loss": 2.198, "step": 12980 }, { "epoch": 0.17, "grad_norm": 3.7788004875183105, "learning_rate": 1.9992303403470253e-05, "loss": 1.8297, "step": 12981 }, { "epoch": 0.17, "grad_norm": 4.243636608123779, "learning_rate": 1.9992299281006926e-05, "loss": 1.936, "step": 12982 }, { "epoch": 0.17, "grad_norm": 4.279439926147461, "learning_rate": 1.999229515744028e-05, "loss": 1.7011, "step": 12983 }, { "epoch": 0.17, "grad_norm": 4.632265567779541, "learning_rate": 1.9992291032770313e-05, "loss": 2.0994, "step": 12984 }, { "epoch": 0.17, "grad_norm": 3.5414023399353027, "learning_rate": 1.999228690699703e-05, "loss": 1.5407, "step": 12985 }, { "epoch": 0.17, "grad_norm": 4.762264251708984, "learning_rate": 1.9992282780120423e-05, "loss": 2.4227, "step": 12986 }, { "epoch": 0.17, "grad_norm": 4.966531753540039, "learning_rate": 1.9992278652140503e-05, "loss": 2.6499, "step": 12987 }, { "epoch": 0.17, "grad_norm": 4.424343109130859, "learning_rate": 1.9992274523057265e-05, "loss": 2.0254, "step": 12988 }, { "epoch": 0.17, "grad_norm": 4.1124701499938965, "learning_rate": 1.9992270392870713e-05, "loss": 1.7796, "step": 12989 }, { "epoch": 0.17, "grad_norm": 4.146139144897461, "learning_rate": 1.9992266261580842e-05, "loss": 1.9714, "step": 12990 }, { "epoch": 0.17, "grad_norm": 4.755130290985107, "learning_rate": 1.9992262129187657e-05, "loss": 2.4611, "step": 12991 }, { "epoch": 0.17, "grad_norm": 4.948306083679199, "learning_rate": 1.9992257995691157e-05, "loss": 2.4792, "step": 12992 }, { "epoch": 0.17, "grad_norm": 4.629185199737549, "learning_rate": 1.9992253861091343e-05, "loss": 1.9705, "step": 12993 }, { "epoch": 0.17, "grad_norm": 4.305873394012451, "learning_rate": 1.999224972538821e-05, "loss": 2.5522, "step": 12994 }, { "epoch": 0.17, "grad_norm": 4.818337917327881, "learning_rate": 1.9992245588581766e-05, "loss": 2.4729, "step": 12995 }, { "epoch": 0.17, "grad_norm": 4.615042209625244, "learning_rate": 1.999224145067201e-05, "loss": 2.4024, "step": 12996 }, { "epoch": 0.17, "grad_norm": 4.39635705947876, "learning_rate": 1.999223731165894e-05, "loss": 1.8522, "step": 12997 }, { "epoch": 0.17, "grad_norm": 4.802613735198975, "learning_rate": 1.9992233171542554e-05, "loss": 2.6455, "step": 12998 }, { "epoch": 0.17, "grad_norm": 4.868955135345459, "learning_rate": 1.9992229030322862e-05, "loss": 2.5858, "step": 12999 }, { "epoch": 0.17, "grad_norm": 5.381633281707764, "learning_rate": 1.9992224887999852e-05, "loss": 2.7801, "step": 13000 }, { "epoch": 0.17, "grad_norm": 4.547694206237793, "learning_rate": 1.9992220744573534e-05, "loss": 2.3986, "step": 13001 }, { "epoch": 0.17, "grad_norm": 4.759273529052734, "learning_rate": 1.999221660004391e-05, "loss": 2.3498, "step": 13002 }, { "epoch": 0.17, "grad_norm": 4.626165390014648, "learning_rate": 1.9992212454410968e-05, "loss": 2.229, "step": 13003 }, { "epoch": 0.17, "grad_norm": 4.40376091003418, "learning_rate": 1.999220830767472e-05, "loss": 2.1728, "step": 13004 }, { "epoch": 0.17, "grad_norm": 4.912513256072998, "learning_rate": 1.999220415983516e-05, "loss": 2.622, "step": 13005 }, { "epoch": 0.17, "grad_norm": 4.4410247802734375, "learning_rate": 1.9992200010892296e-05, "loss": 2.2484, "step": 13006 }, { "epoch": 0.17, "grad_norm": 4.736684799194336, "learning_rate": 1.9992195860846118e-05, "loss": 2.4859, "step": 13007 }, { "epoch": 0.17, "grad_norm": 4.461970806121826, "learning_rate": 1.9992191709696635e-05, "loss": 1.9174, "step": 13008 }, { "epoch": 0.17, "grad_norm": 4.576940536499023, "learning_rate": 1.9992187557443844e-05, "loss": 2.5809, "step": 13009 }, { "epoch": 0.17, "grad_norm": 4.7619476318359375, "learning_rate": 1.999218340408775e-05, "loss": 2.3826, "step": 13010 }, { "epoch": 0.17, "grad_norm": 4.202635288238525, "learning_rate": 1.9992179249628342e-05, "loss": 2.0074, "step": 13011 }, { "epoch": 0.17, "grad_norm": 4.552720546722412, "learning_rate": 1.9992175094065634e-05, "loss": 2.7104, "step": 13012 }, { "epoch": 0.17, "grad_norm": 5.005443572998047, "learning_rate": 1.999217093739962e-05, "loss": 2.3256, "step": 13013 }, { "epoch": 0.17, "grad_norm": 4.276623249053955, "learning_rate": 1.9992166779630296e-05, "loss": 2.0909, "step": 13014 }, { "epoch": 0.17, "grad_norm": 4.766499996185303, "learning_rate": 1.999216262075767e-05, "loss": 2.3316, "step": 13015 }, { "epoch": 0.17, "grad_norm": 4.0814948081970215, "learning_rate": 1.999215846078174e-05, "loss": 2.2466, "step": 13016 }, { "epoch": 0.17, "grad_norm": 4.1322712898254395, "learning_rate": 1.9992154299702506e-05, "loss": 2.4133, "step": 13017 }, { "epoch": 0.17, "grad_norm": 4.915233612060547, "learning_rate": 1.999215013751997e-05, "loss": 2.3146, "step": 13018 }, { "epoch": 0.17, "grad_norm": 4.440155506134033, "learning_rate": 1.999214597423413e-05, "loss": 1.8604, "step": 13019 }, { "epoch": 0.17, "grad_norm": 4.048025608062744, "learning_rate": 1.9992141809844986e-05, "loss": 1.8318, "step": 13020 }, { "epoch": 0.17, "grad_norm": 5.120628356933594, "learning_rate": 1.999213764435254e-05, "loss": 2.5331, "step": 13021 }, { "epoch": 0.17, "grad_norm": 4.900915145874023, "learning_rate": 1.9992133477756796e-05, "loss": 2.5163, "step": 13022 }, { "epoch": 0.17, "grad_norm": 4.673200607299805, "learning_rate": 1.999212931005775e-05, "loss": 2.2551, "step": 13023 }, { "epoch": 0.17, "grad_norm": 4.960524559020996, "learning_rate": 1.99921251412554e-05, "loss": 2.4243, "step": 13024 }, { "epoch": 0.17, "grad_norm": 4.143899917602539, "learning_rate": 1.9992120971349753e-05, "loss": 2.4112, "step": 13025 }, { "epoch": 0.17, "grad_norm": 5.120055675506592, "learning_rate": 1.9992116800340807e-05, "loss": 2.6721, "step": 13026 }, { "epoch": 0.17, "grad_norm": 4.6902313232421875, "learning_rate": 1.999211262822856e-05, "loss": 2.6035, "step": 13027 }, { "epoch": 0.17, "grad_norm": 4.355711936950684, "learning_rate": 1.9992108455013014e-05, "loss": 2.1415, "step": 13028 }, { "epoch": 0.17, "grad_norm": 4.889894008636475, "learning_rate": 1.9992104280694168e-05, "loss": 2.476, "step": 13029 }, { "epoch": 0.17, "grad_norm": 4.713906288146973, "learning_rate": 1.9992100105272024e-05, "loss": 2.2851, "step": 13030 }, { "epoch": 0.17, "grad_norm": 4.528378963470459, "learning_rate": 1.999209592874659e-05, "loss": 2.7216, "step": 13031 }, { "epoch": 0.17, "grad_norm": 4.859793186187744, "learning_rate": 1.9992091751117853e-05, "loss": 2.6185, "step": 13032 }, { "epoch": 0.17, "grad_norm": 5.043634414672852, "learning_rate": 1.999208757238582e-05, "loss": 1.9587, "step": 13033 }, { "epoch": 0.17, "grad_norm": 5.1043243408203125, "learning_rate": 1.999208339255049e-05, "loss": 2.3452, "step": 13034 }, { "epoch": 0.17, "grad_norm": 4.581301689147949, "learning_rate": 1.9992079211611867e-05, "loss": 2.3413, "step": 13035 }, { "epoch": 0.17, "grad_norm": 4.322786331176758, "learning_rate": 1.9992075029569947e-05, "loss": 2.3025, "step": 13036 }, { "epoch": 0.17, "grad_norm": 4.8302741050720215, "learning_rate": 1.9992070846424733e-05, "loss": 2.7766, "step": 13037 }, { "epoch": 0.17, "grad_norm": 4.624013423919678, "learning_rate": 1.9992066662176228e-05, "loss": 2.1688, "step": 13038 }, { "epoch": 0.17, "grad_norm": 4.310302734375, "learning_rate": 1.9992062476824425e-05, "loss": 2.2302, "step": 13039 }, { "epoch": 0.17, "grad_norm": 4.223495006561279, "learning_rate": 1.9992058290369328e-05, "loss": 2.4285, "step": 13040 }, { "epoch": 0.17, "grad_norm": 4.616428375244141, "learning_rate": 1.9992054102810944e-05, "loss": 2.1321, "step": 13041 }, { "epoch": 0.17, "grad_norm": 4.333306789398193, "learning_rate": 1.999204991414926e-05, "loss": 1.9281, "step": 13042 }, { "epoch": 0.17, "grad_norm": 4.731851577758789, "learning_rate": 1.999204572438429e-05, "loss": 2.7125, "step": 13043 }, { "epoch": 0.17, "grad_norm": 4.349193572998047, "learning_rate": 1.9992041533516028e-05, "loss": 2.1624, "step": 13044 }, { "epoch": 0.17, "grad_norm": 5.156905651092529, "learning_rate": 1.9992037341544476e-05, "loss": 2.3931, "step": 13045 }, { "epoch": 0.17, "grad_norm": 4.632485866546631, "learning_rate": 1.999203314846963e-05, "loss": 2.1495, "step": 13046 }, { "epoch": 0.17, "grad_norm": 4.951341152191162, "learning_rate": 1.9992028954291497e-05, "loss": 2.3516, "step": 13047 }, { "epoch": 0.17, "grad_norm": 3.8772974014282227, "learning_rate": 1.9992024759010073e-05, "loss": 1.9771, "step": 13048 }, { "epoch": 0.17, "grad_norm": 4.62883186340332, "learning_rate": 1.999202056262536e-05, "loss": 2.3637, "step": 13049 }, { "epoch": 0.17, "grad_norm": 4.584240436553955, "learning_rate": 1.999201636513736e-05, "loss": 2.5405, "step": 13050 }, { "epoch": 0.17, "grad_norm": 4.432614803314209, "learning_rate": 1.999201216654607e-05, "loss": 2.1748, "step": 13051 }, { "epoch": 0.17, "grad_norm": 5.0948405265808105, "learning_rate": 1.999200796685149e-05, "loss": 2.4787, "step": 13052 }, { "epoch": 0.17, "grad_norm": 4.45470666885376, "learning_rate": 1.999200376605363e-05, "loss": 2.0733, "step": 13053 }, { "epoch": 0.17, "grad_norm": 4.0363664627075195, "learning_rate": 1.9991999564152476e-05, "loss": 1.999, "step": 13054 }, { "epoch": 0.17, "grad_norm": 4.14666223526001, "learning_rate": 1.999199536114804e-05, "loss": 2.0722, "step": 13055 }, { "epoch": 0.17, "grad_norm": 4.626898288726807, "learning_rate": 1.9991991157040318e-05, "loss": 2.1183, "step": 13056 }, { "epoch": 0.17, "grad_norm": 5.17907190322876, "learning_rate": 1.9991986951829306e-05, "loss": 2.4469, "step": 13057 }, { "epoch": 0.17, "grad_norm": 4.196246147155762, "learning_rate": 1.9991982745515014e-05, "loss": 2.3526, "step": 13058 }, { "epoch": 0.17, "grad_norm": 4.403114318847656, "learning_rate": 1.9991978538097437e-05, "loss": 2.1341, "step": 13059 }, { "epoch": 0.17, "grad_norm": 4.368368625640869, "learning_rate": 1.9991974329576576e-05, "loss": 2.1016, "step": 13060 }, { "epoch": 0.17, "grad_norm": 4.551812171936035, "learning_rate": 1.9991970119952427e-05, "loss": 2.0853, "step": 13061 }, { "epoch": 0.17, "grad_norm": 4.510312080383301, "learning_rate": 1.9991965909225e-05, "loss": 2.2643, "step": 13062 }, { "epoch": 0.17, "grad_norm": 4.596641540527344, "learning_rate": 1.9991961697394292e-05, "loss": 2.4635, "step": 13063 }, { "epoch": 0.17, "grad_norm": 4.558793544769287, "learning_rate": 1.9991957484460298e-05, "loss": 2.4104, "step": 13064 }, { "epoch": 0.17, "grad_norm": 4.5688862800598145, "learning_rate": 1.9991953270423023e-05, "loss": 2.4199, "step": 13065 }, { "epoch": 0.17, "grad_norm": 4.741030216217041, "learning_rate": 1.9991949055282467e-05, "loss": 2.5261, "step": 13066 }, { "epoch": 0.17, "grad_norm": 3.75866436958313, "learning_rate": 1.999194483903863e-05, "loss": 1.6137, "step": 13067 }, { "epoch": 0.17, "grad_norm": 4.24034309387207, "learning_rate": 1.9991940621691514e-05, "loss": 1.7967, "step": 13068 }, { "epoch": 0.17, "grad_norm": 4.568973541259766, "learning_rate": 1.999193640324112e-05, "loss": 2.3628, "step": 13069 }, { "epoch": 0.17, "grad_norm": 5.047839164733887, "learning_rate": 1.9991932183687445e-05, "loss": 2.397, "step": 13070 }, { "epoch": 0.17, "grad_norm": 4.639161586761475, "learning_rate": 1.999192796303049e-05, "loss": 2.137, "step": 13071 }, { "epoch": 0.17, "grad_norm": 4.463656425476074, "learning_rate": 1.9991923741270256e-05, "loss": 2.0708, "step": 13072 }, { "epoch": 0.17, "grad_norm": 4.535966396331787, "learning_rate": 1.9991919518406745e-05, "loss": 2.0159, "step": 13073 }, { "epoch": 0.17, "grad_norm": 4.53131628036499, "learning_rate": 1.9991915294439957e-05, "loss": 2.2962, "step": 13074 }, { "epoch": 0.17, "grad_norm": 4.641707897186279, "learning_rate": 1.9991911069369895e-05, "loss": 2.6076, "step": 13075 }, { "epoch": 0.17, "grad_norm": 4.437922954559326, "learning_rate": 1.999190684319655e-05, "loss": 1.7902, "step": 13076 }, { "epoch": 0.17, "grad_norm": 4.77732515335083, "learning_rate": 1.9991902615919933e-05, "loss": 2.5611, "step": 13077 }, { "epoch": 0.17, "grad_norm": 3.916377544403076, "learning_rate": 1.9991898387540042e-05, "loss": 2.2467, "step": 13078 }, { "epoch": 0.17, "grad_norm": 4.731943607330322, "learning_rate": 1.9991894158056874e-05, "loss": 2.6334, "step": 13079 }, { "epoch": 0.17, "grad_norm": 5.631216526031494, "learning_rate": 1.999188992747043e-05, "loss": 2.5654, "step": 13080 }, { "epoch": 0.17, "grad_norm": 4.338334560394287, "learning_rate": 1.9991885695780713e-05, "loss": 2.2642, "step": 13081 }, { "epoch": 0.17, "grad_norm": 4.475568771362305, "learning_rate": 1.999188146298772e-05, "loss": 2.011, "step": 13082 }, { "epoch": 0.17, "grad_norm": 4.810760974884033, "learning_rate": 1.9991877229091456e-05, "loss": 2.3157, "step": 13083 }, { "epoch": 0.17, "grad_norm": 5.0500569343566895, "learning_rate": 1.999187299409192e-05, "loss": 2.5492, "step": 13084 }, { "epoch": 0.17, "grad_norm": 4.835294723510742, "learning_rate": 1.9991868757989113e-05, "loss": 2.3746, "step": 13085 }, { "epoch": 0.17, "grad_norm": 4.927416801452637, "learning_rate": 1.9991864520783028e-05, "loss": 2.6367, "step": 13086 }, { "epoch": 0.17, "grad_norm": 4.387719631195068, "learning_rate": 1.9991860282473675e-05, "loss": 2.4606, "step": 13087 }, { "epoch": 0.17, "grad_norm": 4.596551895141602, "learning_rate": 1.9991856043061055e-05, "loss": 2.0461, "step": 13088 }, { "epoch": 0.17, "grad_norm": 4.9969706535339355, "learning_rate": 1.999185180254516e-05, "loss": 2.3727, "step": 13089 }, { "epoch": 0.17, "grad_norm": 4.361392021179199, "learning_rate": 1.9991847560926e-05, "loss": 2.3773, "step": 13090 }, { "epoch": 0.17, "grad_norm": 4.4560394287109375, "learning_rate": 1.9991843318203564e-05, "loss": 2.3345, "step": 13091 }, { "epoch": 0.17, "grad_norm": 4.741962432861328, "learning_rate": 1.9991839074377862e-05, "loss": 2.2915, "step": 13092 }, { "epoch": 0.17, "grad_norm": 5.377442836761475, "learning_rate": 1.999183482944889e-05, "loss": 2.2229, "step": 13093 }, { "epoch": 0.17, "grad_norm": 4.726420879364014, "learning_rate": 1.9991830583416652e-05, "loss": 2.4235, "step": 13094 }, { "epoch": 0.17, "grad_norm": 5.066073417663574, "learning_rate": 1.999182633628115e-05, "loss": 2.2771, "step": 13095 }, { "epoch": 0.17, "grad_norm": 4.7315497398376465, "learning_rate": 1.9991822088042374e-05, "loss": 2.4477, "step": 13096 }, { "epoch": 0.17, "grad_norm": 4.353466510772705, "learning_rate": 1.9991817838700332e-05, "loss": 1.8254, "step": 13097 }, { "epoch": 0.17, "grad_norm": 5.185457229614258, "learning_rate": 1.999181358825503e-05, "loss": 2.8695, "step": 13098 }, { "epoch": 0.17, "grad_norm": 4.068173408508301, "learning_rate": 1.9991809336706457e-05, "loss": 1.876, "step": 13099 }, { "epoch": 0.17, "grad_norm": 3.86262583732605, "learning_rate": 1.9991805084054618e-05, "loss": 1.7529, "step": 13100 }, { "epoch": 0.17, "grad_norm": 4.571854114532471, "learning_rate": 1.9991800830299514e-05, "loss": 2.3322, "step": 13101 }, { "epoch": 0.17, "grad_norm": 4.277148723602295, "learning_rate": 1.999179657544115e-05, "loss": 1.8189, "step": 13102 }, { "epoch": 0.17, "grad_norm": 4.492516994476318, "learning_rate": 1.999179231947952e-05, "loss": 1.8382, "step": 13103 }, { "epoch": 0.17, "grad_norm": 4.39759635925293, "learning_rate": 1.9991788062414627e-05, "loss": 2.308, "step": 13104 }, { "epoch": 0.17, "grad_norm": 5.308710098266602, "learning_rate": 1.999178380424647e-05, "loss": 2.7397, "step": 13105 }, { "epoch": 0.17, "grad_norm": 4.433001518249512, "learning_rate": 1.9991779544975052e-05, "loss": 2.2144, "step": 13106 }, { "epoch": 0.17, "grad_norm": 4.520156383514404, "learning_rate": 1.999177528460037e-05, "loss": 2.2696, "step": 13107 }, { "epoch": 0.17, "grad_norm": 3.7360899448394775, "learning_rate": 1.999177102312243e-05, "loss": 1.8709, "step": 13108 }, { "epoch": 0.17, "grad_norm": 4.7419610023498535, "learning_rate": 1.9991766760541225e-05, "loss": 2.5007, "step": 13109 }, { "epoch": 0.17, "grad_norm": 4.764918327331543, "learning_rate": 1.999176249685676e-05, "loss": 2.9199, "step": 13110 }, { "epoch": 0.17, "grad_norm": 4.1689934730529785, "learning_rate": 1.9991758232069036e-05, "loss": 2.1651, "step": 13111 }, { "epoch": 0.17, "grad_norm": 4.466528415679932, "learning_rate": 1.999175396617805e-05, "loss": 2.2804, "step": 13112 }, { "epoch": 0.17, "grad_norm": 4.454347133636475, "learning_rate": 1.9991749699183808e-05, "loss": 2.1731, "step": 13113 }, { "epoch": 0.17, "grad_norm": 4.97898006439209, "learning_rate": 1.9991745431086306e-05, "loss": 2.554, "step": 13114 }, { "epoch": 0.17, "grad_norm": 5.027466773986816, "learning_rate": 1.9991741161885546e-05, "loss": 2.8072, "step": 13115 }, { "epoch": 0.17, "grad_norm": 4.429535388946533, "learning_rate": 1.999173689158153e-05, "loss": 2.5926, "step": 13116 }, { "epoch": 0.17, "grad_norm": 4.455897808074951, "learning_rate": 1.9991732620174256e-05, "loss": 2.1599, "step": 13117 }, { "epoch": 0.17, "grad_norm": 5.076503753662109, "learning_rate": 1.9991728347663725e-05, "loss": 2.1437, "step": 13118 }, { "epoch": 0.17, "grad_norm": 4.291251182556152, "learning_rate": 1.9991724074049937e-05, "loss": 2.3911, "step": 13119 }, { "epoch": 0.17, "grad_norm": 4.595798492431641, "learning_rate": 1.9991719799332892e-05, "loss": 2.3706, "step": 13120 }, { "epoch": 0.17, "grad_norm": 4.441412448883057, "learning_rate": 1.999171552351259e-05, "loss": 2.1769, "step": 13121 }, { "epoch": 0.17, "grad_norm": 4.641469478607178, "learning_rate": 1.999171124658904e-05, "loss": 2.1235, "step": 13122 }, { "epoch": 0.17, "grad_norm": 4.432361602783203, "learning_rate": 1.999170696856223e-05, "loss": 2.1398, "step": 13123 }, { "epoch": 0.17, "grad_norm": 4.375837802886963, "learning_rate": 1.9991702689432167e-05, "loss": 2.1447, "step": 13124 }, { "epoch": 0.17, "grad_norm": 4.956554889678955, "learning_rate": 1.999169840919885e-05, "loss": 2.5394, "step": 13125 }, { "epoch": 0.17, "grad_norm": 4.626706600189209, "learning_rate": 1.9991694127862284e-05, "loss": 2.1048, "step": 13126 }, { "epoch": 0.17, "grad_norm": 4.534173965454102, "learning_rate": 1.999168984542246e-05, "loss": 2.0557, "step": 13127 }, { "epoch": 0.17, "grad_norm": 4.484701633453369, "learning_rate": 1.999168556187939e-05, "loss": 2.233, "step": 13128 }, { "epoch": 0.17, "grad_norm": 4.361428737640381, "learning_rate": 1.9991681277233062e-05, "loss": 2.3747, "step": 13129 }, { "epoch": 0.17, "grad_norm": 4.296651363372803, "learning_rate": 1.9991676991483488e-05, "loss": 2.2486, "step": 13130 }, { "epoch": 0.17, "grad_norm": 4.992788791656494, "learning_rate": 1.999167270463066e-05, "loss": 2.9062, "step": 13131 }, { "epoch": 0.17, "grad_norm": 4.786208629608154, "learning_rate": 1.9991668416674585e-05, "loss": 2.6067, "step": 13132 }, { "epoch": 0.17, "grad_norm": 4.749430179595947, "learning_rate": 1.9991664127615256e-05, "loss": 2.397, "step": 13133 }, { "epoch": 0.17, "grad_norm": 4.719705581665039, "learning_rate": 1.999165983745268e-05, "loss": 2.1094, "step": 13134 }, { "epoch": 0.17, "grad_norm": 4.589778423309326, "learning_rate": 1.9991655546186855e-05, "loss": 2.2019, "step": 13135 }, { "epoch": 0.17, "grad_norm": 4.769685745239258, "learning_rate": 1.999165125381778e-05, "loss": 2.679, "step": 13136 }, { "epoch": 0.17, "grad_norm": 4.597898483276367, "learning_rate": 1.999164696034546e-05, "loss": 2.3629, "step": 13137 }, { "epoch": 0.17, "grad_norm": 4.527941703796387, "learning_rate": 1.9991642665769894e-05, "loss": 2.3045, "step": 13138 }, { "epoch": 0.17, "grad_norm": 5.046926498413086, "learning_rate": 1.9991638370091077e-05, "loss": 2.0975, "step": 13139 }, { "epoch": 0.17, "grad_norm": 4.49773645401001, "learning_rate": 1.9991634073309014e-05, "loss": 2.497, "step": 13140 }, { "epoch": 0.17, "grad_norm": 5.135488510131836, "learning_rate": 1.9991629775423707e-05, "loss": 2.3667, "step": 13141 }, { "epoch": 0.17, "grad_norm": 5.353927135467529, "learning_rate": 1.9991625476435154e-05, "loss": 1.9768, "step": 13142 }, { "epoch": 0.17, "grad_norm": 4.301982402801514, "learning_rate": 1.999162117634336e-05, "loss": 1.9056, "step": 13143 }, { "epoch": 0.17, "grad_norm": 4.654133319854736, "learning_rate": 1.9991616875148313e-05, "loss": 2.3517, "step": 13144 }, { "epoch": 0.17, "grad_norm": 4.914588451385498, "learning_rate": 1.999161257285003e-05, "loss": 2.5081, "step": 13145 }, { "epoch": 0.17, "grad_norm": 6.720312118530273, "learning_rate": 1.9991608269448498e-05, "loss": 2.729, "step": 13146 }, { "epoch": 0.17, "grad_norm": 4.238478660583496, "learning_rate": 1.9991603964943727e-05, "loss": 1.9262, "step": 13147 }, { "epoch": 0.17, "grad_norm": 5.560182571411133, "learning_rate": 1.9991599659335712e-05, "loss": 2.6249, "step": 13148 }, { "epoch": 0.17, "grad_norm": 4.48821496963501, "learning_rate": 1.9991595352624454e-05, "loss": 1.9644, "step": 13149 }, { "epoch": 0.17, "grad_norm": 4.335267066955566, "learning_rate": 1.9991591044809953e-05, "loss": 1.8854, "step": 13150 }, { "epoch": 0.17, "grad_norm": 4.6978349685668945, "learning_rate": 1.9991586735892215e-05, "loss": 2.5381, "step": 13151 }, { "epoch": 0.17, "grad_norm": 4.019748687744141, "learning_rate": 1.9991582425871233e-05, "loss": 1.8648, "step": 13152 }, { "epoch": 0.17, "grad_norm": 4.891899108886719, "learning_rate": 1.999157811474701e-05, "loss": 2.2759, "step": 13153 }, { "epoch": 0.17, "grad_norm": 4.932554721832275, "learning_rate": 1.999157380251955e-05, "loss": 2.7158, "step": 13154 }, { "epoch": 0.17, "grad_norm": 4.336145401000977, "learning_rate": 1.9991569489188848e-05, "loss": 1.7446, "step": 13155 }, { "epoch": 0.17, "grad_norm": 4.666767120361328, "learning_rate": 1.999156517475491e-05, "loss": 2.2104, "step": 13156 }, { "epoch": 0.17, "grad_norm": 4.604068756103516, "learning_rate": 1.9991560859217734e-05, "loss": 2.2027, "step": 13157 }, { "epoch": 0.17, "grad_norm": 4.511218547821045, "learning_rate": 1.999155654257732e-05, "loss": 2.3855, "step": 13158 }, { "epoch": 0.17, "grad_norm": 4.088251113891602, "learning_rate": 1.9991552224833666e-05, "loss": 2.5082, "step": 13159 }, { "epoch": 0.17, "grad_norm": 4.831174850463867, "learning_rate": 1.9991547905986778e-05, "loss": 2.7819, "step": 13160 }, { "epoch": 0.17, "grad_norm": 4.333658218383789, "learning_rate": 1.999154358603665e-05, "loss": 2.4044, "step": 13161 }, { "epoch": 0.17, "grad_norm": 3.202131509780884, "learning_rate": 1.999153926498329e-05, "loss": 1.2633, "step": 13162 }, { "epoch": 0.17, "grad_norm": 4.384894847869873, "learning_rate": 1.999153494282669e-05, "loss": 2.2655, "step": 13163 }, { "epoch": 0.17, "grad_norm": 4.368701457977295, "learning_rate": 1.999153061956686e-05, "loss": 1.8293, "step": 13164 }, { "epoch": 0.17, "grad_norm": 4.830847263336182, "learning_rate": 1.999152629520379e-05, "loss": 1.9515, "step": 13165 }, { "epoch": 0.17, "grad_norm": 4.035557270050049, "learning_rate": 1.999152196973749e-05, "loss": 1.9, "step": 13166 }, { "epoch": 0.17, "grad_norm": 4.024352550506592, "learning_rate": 1.9991517643167956e-05, "loss": 1.6139, "step": 13167 }, { "epoch": 0.17, "grad_norm": 4.067078590393066, "learning_rate": 1.999151331549519e-05, "loss": 2.1082, "step": 13168 }, { "epoch": 0.17, "grad_norm": 4.3539958000183105, "learning_rate": 1.999150898671919e-05, "loss": 2.0459, "step": 13169 }, { "epoch": 0.17, "grad_norm": 5.319108009338379, "learning_rate": 1.9991504656839957e-05, "loss": 2.798, "step": 13170 }, { "epoch": 0.17, "grad_norm": 5.387237548828125, "learning_rate": 1.9991500325857495e-05, "loss": 3.1, "step": 13171 }, { "epoch": 0.17, "grad_norm": 4.028017997741699, "learning_rate": 1.99914959937718e-05, "loss": 1.8896, "step": 13172 }, { "epoch": 0.17, "grad_norm": 4.975110054016113, "learning_rate": 1.9991491660582874e-05, "loss": 2.1547, "step": 13173 }, { "epoch": 0.17, "grad_norm": 4.41092586517334, "learning_rate": 1.999148732629072e-05, "loss": 2.0199, "step": 13174 }, { "epoch": 0.17, "grad_norm": 4.48363733291626, "learning_rate": 1.999148299089533e-05, "loss": 2.1541, "step": 13175 }, { "epoch": 0.17, "grad_norm": 4.447613716125488, "learning_rate": 1.999147865439672e-05, "loss": 2.2359, "step": 13176 }, { "epoch": 0.17, "grad_norm": 4.238359451293945, "learning_rate": 1.9991474316794878e-05, "loss": 2.1257, "step": 13177 }, { "epoch": 0.17, "grad_norm": 4.388242244720459, "learning_rate": 1.9991469978089807e-05, "loss": 2.1962, "step": 13178 }, { "epoch": 0.17, "grad_norm": 4.833856105804443, "learning_rate": 1.9991465638281505e-05, "loss": 2.6761, "step": 13179 }, { "epoch": 0.17, "grad_norm": 4.47876501083374, "learning_rate": 1.9991461297369978e-05, "loss": 2.3157, "step": 13180 }, { "epoch": 0.17, "grad_norm": 4.7493391036987305, "learning_rate": 1.9991456955355227e-05, "loss": 2.3252, "step": 13181 }, { "epoch": 0.17, "grad_norm": 5.41299295425415, "learning_rate": 1.9991452612237246e-05, "loss": 2.3355, "step": 13182 }, { "epoch": 0.17, "grad_norm": 4.805627822875977, "learning_rate": 1.9991448268016042e-05, "loss": 2.2718, "step": 13183 }, { "epoch": 0.17, "grad_norm": 4.708531379699707, "learning_rate": 1.9991443922691608e-05, "loss": 2.4401, "step": 13184 }, { "epoch": 0.17, "grad_norm": 5.155713081359863, "learning_rate": 1.9991439576263954e-05, "loss": 2.5542, "step": 13185 }, { "epoch": 0.17, "grad_norm": 4.528026103973389, "learning_rate": 1.999143522873307e-05, "loss": 2.0855, "step": 13186 }, { "epoch": 0.17, "grad_norm": 4.888026714324951, "learning_rate": 1.9991430880098967e-05, "loss": 2.509, "step": 13187 }, { "epoch": 0.17, "grad_norm": 5.18172550201416, "learning_rate": 1.999142653036164e-05, "loss": 2.5495, "step": 13188 }, { "epoch": 0.17, "grad_norm": 4.424293518066406, "learning_rate": 1.999142217952109e-05, "loss": 1.8777, "step": 13189 }, { "epoch": 0.17, "grad_norm": 5.29502010345459, "learning_rate": 1.9991417827577315e-05, "loss": 2.7235, "step": 13190 }, { "epoch": 0.17, "grad_norm": 4.871888160705566, "learning_rate": 1.9991413474530322e-05, "loss": 2.7041, "step": 13191 }, { "epoch": 0.17, "grad_norm": 4.764937877655029, "learning_rate": 1.9991409120380103e-05, "loss": 2.5889, "step": 13192 }, { "epoch": 0.17, "grad_norm": 4.172772407531738, "learning_rate": 1.9991404765126664e-05, "loss": 2.4551, "step": 13193 }, { "epoch": 0.17, "grad_norm": 4.535677909851074, "learning_rate": 1.999140040877001e-05, "loss": 2.4242, "step": 13194 }, { "epoch": 0.17, "grad_norm": 4.624521732330322, "learning_rate": 1.9991396051310124e-05, "loss": 2.3112, "step": 13195 }, { "epoch": 0.17, "grad_norm": 4.257235527038574, "learning_rate": 1.999139169274703e-05, "loss": 2.5617, "step": 13196 }, { "epoch": 0.17, "grad_norm": 4.2304277420043945, "learning_rate": 1.999138733308071e-05, "loss": 2.5895, "step": 13197 }, { "epoch": 0.17, "grad_norm": 4.206518173217773, "learning_rate": 1.9991382972311174e-05, "loss": 2.235, "step": 13198 }, { "epoch": 0.17, "grad_norm": 4.247049331665039, "learning_rate": 1.999137861043842e-05, "loss": 2.459, "step": 13199 }, { "epoch": 0.17, "grad_norm": 4.9405694007873535, "learning_rate": 1.9991374247462446e-05, "loss": 2.6575, "step": 13200 }, { "epoch": 0.17, "grad_norm": 4.196819305419922, "learning_rate": 1.999136988338326e-05, "loss": 2.274, "step": 13201 }, { "epoch": 0.17, "grad_norm": 4.8709611892700195, "learning_rate": 1.9991365518200853e-05, "loss": 2.205, "step": 13202 }, { "epoch": 0.17, "grad_norm": 4.83056640625, "learning_rate": 1.9991361151915233e-05, "loss": 1.9082, "step": 13203 }, { "epoch": 0.17, "grad_norm": 5.620519638061523, "learning_rate": 1.999135678452639e-05, "loss": 2.3499, "step": 13204 }, { "epoch": 0.17, "grad_norm": 4.417604923248291, "learning_rate": 1.999135241603434e-05, "loss": 2.1423, "step": 13205 }, { "epoch": 0.17, "grad_norm": 4.483242988586426, "learning_rate": 1.999134804643907e-05, "loss": 2.1956, "step": 13206 }, { "epoch": 0.17, "grad_norm": 4.358292579650879, "learning_rate": 1.999134367574059e-05, "loss": 2.1512, "step": 13207 }, { "epoch": 0.17, "grad_norm": 5.365274906158447, "learning_rate": 1.9991339303938894e-05, "loss": 2.4783, "step": 13208 }, { "epoch": 0.17, "grad_norm": 4.364157199859619, "learning_rate": 1.9991334931033983e-05, "loss": 1.9046, "step": 13209 }, { "epoch": 0.17, "grad_norm": 4.650261402130127, "learning_rate": 1.999133055702586e-05, "loss": 1.9795, "step": 13210 }, { "epoch": 0.17, "grad_norm": 4.905891418457031, "learning_rate": 1.9991326181914522e-05, "loss": 2.5152, "step": 13211 }, { "epoch": 0.17, "grad_norm": 3.76157546043396, "learning_rate": 1.9991321805699976e-05, "loss": 1.6261, "step": 13212 }, { "epoch": 0.17, "grad_norm": 4.159187316894531, "learning_rate": 1.999131742838222e-05, "loss": 1.9974, "step": 13213 }, { "epoch": 0.17, "grad_norm": 4.5756659507751465, "learning_rate": 1.999131304996125e-05, "loss": 1.7474, "step": 13214 }, { "epoch": 0.17, "grad_norm": 5.2368316650390625, "learning_rate": 1.9991308670437072e-05, "loss": 2.5956, "step": 13215 }, { "epoch": 0.17, "grad_norm": 4.812662124633789, "learning_rate": 1.999130428980968e-05, "loss": 2.4167, "step": 13216 }, { "epoch": 0.17, "grad_norm": 4.980422496795654, "learning_rate": 1.9991299908079083e-05, "loss": 2.7049, "step": 13217 }, { "epoch": 0.17, "grad_norm": 4.668308258056641, "learning_rate": 1.9991295525245272e-05, "loss": 2.2493, "step": 13218 }, { "epoch": 0.17, "grad_norm": 4.231028079986572, "learning_rate": 1.9991291141308255e-05, "loss": 1.8532, "step": 13219 }, { "epoch": 0.17, "grad_norm": 4.8596110343933105, "learning_rate": 1.9991286756268032e-05, "loss": 2.4923, "step": 13220 }, { "epoch": 0.17, "grad_norm": 4.936870098114014, "learning_rate": 1.99912823701246e-05, "loss": 2.2581, "step": 13221 }, { "epoch": 0.17, "grad_norm": 4.322427272796631, "learning_rate": 1.999127798287796e-05, "loss": 2.2147, "step": 13222 }, { "epoch": 0.17, "grad_norm": 4.878986358642578, "learning_rate": 1.9991273594528115e-05, "loss": 2.443, "step": 13223 }, { "epoch": 0.17, "grad_norm": 4.664405345916748, "learning_rate": 1.999126920507506e-05, "loss": 2.2271, "step": 13224 }, { "epoch": 0.17, "grad_norm": 4.517578601837158, "learning_rate": 1.9991264814518803e-05, "loss": 2.4274, "step": 13225 }, { "epoch": 0.17, "grad_norm": 4.5680060386657715, "learning_rate": 1.999126042285934e-05, "loss": 2.6979, "step": 13226 }, { "epoch": 0.17, "grad_norm": 4.647835731506348, "learning_rate": 1.9991256030096672e-05, "loss": 2.0002, "step": 13227 }, { "epoch": 0.17, "grad_norm": 4.557388782501221, "learning_rate": 1.99912516362308e-05, "loss": 2.1687, "step": 13228 }, { "epoch": 0.17, "grad_norm": 4.681727886199951, "learning_rate": 1.9991247241261727e-05, "loss": 2.2049, "step": 13229 }, { "epoch": 0.17, "grad_norm": 4.782141208648682, "learning_rate": 1.9991242845189445e-05, "loss": 2.59, "step": 13230 }, { "epoch": 0.17, "grad_norm": 4.085665702819824, "learning_rate": 1.9991238448013964e-05, "loss": 2.0614, "step": 13231 }, { "epoch": 0.17, "grad_norm": 4.285740852355957, "learning_rate": 1.9991234049735283e-05, "loss": 2.1733, "step": 13232 }, { "epoch": 0.17, "grad_norm": 5.19195032119751, "learning_rate": 1.9991229650353396e-05, "loss": 2.6398, "step": 13233 }, { "epoch": 0.17, "grad_norm": 4.00526237487793, "learning_rate": 1.999122524986831e-05, "loss": 1.9329, "step": 13234 }, { "epoch": 0.17, "grad_norm": 4.157565116882324, "learning_rate": 1.999122084828002e-05, "loss": 1.9109, "step": 13235 }, { "epoch": 0.17, "grad_norm": 4.809205532073975, "learning_rate": 1.9991216445588532e-05, "loss": 2.4305, "step": 13236 }, { "epoch": 0.17, "grad_norm": 4.657844543457031, "learning_rate": 1.9991212041793848e-05, "loss": 2.4556, "step": 13237 }, { "epoch": 0.17, "grad_norm": 4.148647785186768, "learning_rate": 1.999120763689596e-05, "loss": 1.6861, "step": 13238 }, { "epoch": 0.17, "grad_norm": 4.38839864730835, "learning_rate": 1.9991203230894874e-05, "loss": 2.3058, "step": 13239 }, { "epoch": 0.17, "grad_norm": 4.483307838439941, "learning_rate": 1.999119882379059e-05, "loss": 2.2158, "step": 13240 }, { "epoch": 0.17, "grad_norm": 4.428165912628174, "learning_rate": 1.999119441558311e-05, "loss": 2.2733, "step": 13241 }, { "epoch": 0.17, "grad_norm": 4.761056423187256, "learning_rate": 1.999119000627243e-05, "loss": 2.5615, "step": 13242 }, { "epoch": 0.17, "grad_norm": 4.176554203033447, "learning_rate": 1.9991185595858552e-05, "loss": 1.9255, "step": 13243 }, { "epoch": 0.17, "grad_norm": 4.664291858673096, "learning_rate": 1.9991181184341482e-05, "loss": 2.6579, "step": 13244 }, { "epoch": 0.17, "grad_norm": 4.342270374298096, "learning_rate": 1.9991176771721212e-05, "loss": 2.5031, "step": 13245 }, { "epoch": 0.17, "grad_norm": 5.006978988647461, "learning_rate": 1.999117235799775e-05, "loss": 2.457, "step": 13246 }, { "epoch": 0.17, "grad_norm": 4.601694107055664, "learning_rate": 1.999116794317109e-05, "loss": 2.6679, "step": 13247 }, { "epoch": 0.17, "grad_norm": 4.412411689758301, "learning_rate": 1.9991163527241236e-05, "loss": 2.2722, "step": 13248 }, { "epoch": 0.17, "grad_norm": 5.846407890319824, "learning_rate": 1.999115911020819e-05, "loss": 2.4321, "step": 13249 }, { "epoch": 0.17, "grad_norm": 4.865503311157227, "learning_rate": 1.999115469207195e-05, "loss": 2.4814, "step": 13250 }, { "epoch": 0.17, "grad_norm": 4.87617301940918, "learning_rate": 1.9991150272832517e-05, "loss": 2.088, "step": 13251 }, { "epoch": 0.17, "grad_norm": 4.535380840301514, "learning_rate": 1.999114585248989e-05, "loss": 2.1916, "step": 13252 }, { "epoch": 0.17, "grad_norm": 4.719103813171387, "learning_rate": 1.9991141431044074e-05, "loss": 2.3092, "step": 13253 }, { "epoch": 0.17, "grad_norm": 4.535665512084961, "learning_rate": 1.999113700849506e-05, "loss": 2.7028, "step": 13254 }, { "epoch": 0.17, "grad_norm": 4.89709997177124, "learning_rate": 1.999113258484286e-05, "loss": 2.0702, "step": 13255 }, { "epoch": 0.17, "grad_norm": 4.73859977722168, "learning_rate": 1.999112816008747e-05, "loss": 2.6005, "step": 13256 }, { "epoch": 0.17, "grad_norm": 4.616312503814697, "learning_rate": 1.999112373422889e-05, "loss": 2.0128, "step": 13257 }, { "epoch": 0.17, "grad_norm": 4.670926094055176, "learning_rate": 1.999111930726712e-05, "loss": 1.9202, "step": 13258 }, { "epoch": 0.17, "grad_norm": 4.7425055503845215, "learning_rate": 1.9991114879202158e-05, "loss": 2.6659, "step": 13259 }, { "epoch": 0.17, "grad_norm": 5.642739295959473, "learning_rate": 1.999111045003401e-05, "loss": 2.0443, "step": 13260 }, { "epoch": 0.17, "grad_norm": 4.789144515991211, "learning_rate": 1.999110601976267e-05, "loss": 2.5219, "step": 13261 }, { "epoch": 0.17, "grad_norm": 4.255070686340332, "learning_rate": 1.999110158838815e-05, "loss": 1.7801, "step": 13262 }, { "epoch": 0.17, "grad_norm": 4.873724937438965, "learning_rate": 1.9991097155910438e-05, "loss": 2.4204, "step": 13263 }, { "epoch": 0.17, "grad_norm": 4.849061012268066, "learning_rate": 1.999109272232954e-05, "loss": 2.5483, "step": 13264 }, { "epoch": 0.17, "grad_norm": 3.8439745903015137, "learning_rate": 1.9991088287645456e-05, "loss": 1.7402, "step": 13265 }, { "epoch": 0.17, "grad_norm": 5.235604286193848, "learning_rate": 1.9991083851858185e-05, "loss": 2.6415, "step": 13266 }, { "epoch": 0.17, "grad_norm": 5.453592777252197, "learning_rate": 1.999107941496773e-05, "loss": 2.9249, "step": 13267 }, { "epoch": 0.17, "grad_norm": 4.589276313781738, "learning_rate": 1.999107497697409e-05, "loss": 2.1453, "step": 13268 }, { "epoch": 0.17, "grad_norm": 4.932601451873779, "learning_rate": 1.999107053787727e-05, "loss": 2.0367, "step": 13269 }, { "epoch": 0.17, "grad_norm": 4.7783966064453125, "learning_rate": 1.999106609767726e-05, "loss": 2.469, "step": 13270 }, { "epoch": 0.17, "grad_norm": 4.726748943328857, "learning_rate": 1.999106165637407e-05, "loss": 2.2243, "step": 13271 }, { "epoch": 0.17, "grad_norm": 4.511629104614258, "learning_rate": 1.9991057213967697e-05, "loss": 2.3146, "step": 13272 }, { "epoch": 0.17, "grad_norm": 4.614133834838867, "learning_rate": 1.999105277045814e-05, "loss": 2.6837, "step": 13273 }, { "epoch": 0.17, "grad_norm": 4.082084655761719, "learning_rate": 1.9991048325845403e-05, "loss": 2.0142, "step": 13274 }, { "epoch": 0.17, "grad_norm": 4.333456993103027, "learning_rate": 1.9991043880129486e-05, "loss": 2.065, "step": 13275 }, { "epoch": 0.17, "grad_norm": 4.734191417694092, "learning_rate": 1.9991039433310386e-05, "loss": 2.7428, "step": 13276 }, { "epoch": 0.17, "grad_norm": 4.607398509979248, "learning_rate": 1.9991034985388107e-05, "loss": 2.5295, "step": 13277 }, { "epoch": 0.17, "grad_norm": 4.3465118408203125, "learning_rate": 1.999103053636265e-05, "loss": 1.7973, "step": 13278 }, { "epoch": 0.17, "grad_norm": 5.241004943847656, "learning_rate": 1.999102608623401e-05, "loss": 2.8129, "step": 13279 }, { "epoch": 0.17, "grad_norm": 4.656013011932373, "learning_rate": 1.999102163500219e-05, "loss": 2.4613, "step": 13280 }, { "epoch": 0.17, "grad_norm": 4.311720371246338, "learning_rate": 1.9991017182667197e-05, "loss": 2.0432, "step": 13281 }, { "epoch": 0.17, "grad_norm": 3.738762855529785, "learning_rate": 1.9991012729229023e-05, "loss": 1.5655, "step": 13282 }, { "epoch": 0.17, "grad_norm": 3.7790729999542236, "learning_rate": 1.9991008274687673e-05, "loss": 1.9112, "step": 13283 }, { "epoch": 0.17, "grad_norm": 4.303285598754883, "learning_rate": 1.9991003819043144e-05, "loss": 2.3444, "step": 13284 }, { "epoch": 0.17, "grad_norm": 4.867535591125488, "learning_rate": 1.999099936229544e-05, "loss": 2.456, "step": 13285 }, { "epoch": 0.17, "grad_norm": 4.642795562744141, "learning_rate": 1.999099490444456e-05, "loss": 2.2198, "step": 13286 }, { "epoch": 0.17, "grad_norm": 4.374855995178223, "learning_rate": 1.9990990445490505e-05, "loss": 2.0942, "step": 13287 }, { "epoch": 0.17, "grad_norm": 4.737785339355469, "learning_rate": 1.9990985985433277e-05, "loss": 2.2938, "step": 13288 }, { "epoch": 0.17, "grad_norm": 4.947596073150635, "learning_rate": 1.9990981524272873e-05, "loss": 2.1354, "step": 13289 }, { "epoch": 0.17, "grad_norm": 4.737178325653076, "learning_rate": 1.9990977062009293e-05, "loss": 1.9204, "step": 13290 }, { "epoch": 0.17, "grad_norm": 4.721139907836914, "learning_rate": 1.999097259864254e-05, "loss": 2.5533, "step": 13291 }, { "epoch": 0.17, "grad_norm": 4.687568187713623, "learning_rate": 1.999096813417262e-05, "loss": 2.6944, "step": 13292 }, { "epoch": 0.17, "grad_norm": 4.601081848144531, "learning_rate": 1.9990963668599522e-05, "loss": 2.1548, "step": 13293 }, { "epoch": 0.17, "grad_norm": 4.835039138793945, "learning_rate": 1.9990959201923253e-05, "loss": 2.4108, "step": 13294 }, { "epoch": 0.17, "grad_norm": 4.5081071853637695, "learning_rate": 1.999095473414381e-05, "loss": 2.2278, "step": 13295 }, { "epoch": 0.17, "grad_norm": 4.827999591827393, "learning_rate": 1.99909502652612e-05, "loss": 2.1594, "step": 13296 }, { "epoch": 0.17, "grad_norm": 4.169717788696289, "learning_rate": 1.999094579527542e-05, "loss": 1.9151, "step": 13297 }, { "epoch": 0.17, "grad_norm": 4.471851825714111, "learning_rate": 1.999094132418647e-05, "loss": 2.3306, "step": 13298 }, { "epoch": 0.17, "grad_norm": 4.247395992279053, "learning_rate": 1.999093685199435e-05, "loss": 1.7046, "step": 13299 }, { "epoch": 0.17, "grad_norm": 5.722105503082275, "learning_rate": 1.999093237869906e-05, "loss": 2.5754, "step": 13300 }, { "epoch": 0.17, "grad_norm": 4.605302810668945, "learning_rate": 1.9990927904300603e-05, "loss": 2.5892, "step": 13301 }, { "epoch": 0.17, "grad_norm": 4.196570873260498, "learning_rate": 1.9990923428798976e-05, "loss": 2.3596, "step": 13302 }, { "epoch": 0.17, "grad_norm": 4.933729648590088, "learning_rate": 1.9990918952194183e-05, "loss": 2.4576, "step": 13303 }, { "epoch": 0.17, "grad_norm": 4.468828201293945, "learning_rate": 1.9990914474486224e-05, "loss": 2.4243, "step": 13304 }, { "epoch": 0.17, "grad_norm": 5.11685848236084, "learning_rate": 1.9990909995675097e-05, "loss": 2.4577, "step": 13305 }, { "epoch": 0.17, "grad_norm": 4.388351917266846, "learning_rate": 1.9990905515760803e-05, "loss": 1.7974, "step": 13306 }, { "epoch": 0.17, "grad_norm": 4.343481063842773, "learning_rate": 1.9990901034743345e-05, "loss": 2.4385, "step": 13307 }, { "epoch": 0.17, "grad_norm": 4.335254192352295, "learning_rate": 1.9990896552622724e-05, "loss": 1.7852, "step": 13308 }, { "epoch": 0.17, "grad_norm": 5.043567657470703, "learning_rate": 1.9990892069398934e-05, "loss": 2.9887, "step": 13309 }, { "epoch": 0.17, "grad_norm": 4.070785045623779, "learning_rate": 1.9990887585071982e-05, "loss": 1.8341, "step": 13310 }, { "epoch": 0.17, "grad_norm": 5.525538921356201, "learning_rate": 1.9990883099641868e-05, "loss": 2.2731, "step": 13311 }, { "epoch": 0.17, "grad_norm": 4.602045059204102, "learning_rate": 1.9990878613108592e-05, "loss": 2.2204, "step": 13312 }, { "epoch": 0.17, "grad_norm": 4.96380615234375, "learning_rate": 1.999087412547215e-05, "loss": 2.758, "step": 13313 }, { "epoch": 0.17, "grad_norm": 3.989612579345703, "learning_rate": 1.999086963673255e-05, "loss": 1.6971, "step": 13314 }, { "epoch": 0.17, "grad_norm": 4.24535608291626, "learning_rate": 1.9990865146889783e-05, "loss": 2.023, "step": 13315 }, { "epoch": 0.17, "grad_norm": 3.8592069149017334, "learning_rate": 1.999086065594386e-05, "loss": 1.762, "step": 13316 }, { "epoch": 0.17, "grad_norm": 4.827001571655273, "learning_rate": 1.999085616389477e-05, "loss": 2.1055, "step": 13317 }, { "epoch": 0.17, "grad_norm": 4.508605480194092, "learning_rate": 1.9990851670742526e-05, "loss": 1.84, "step": 13318 }, { "epoch": 0.17, "grad_norm": 5.882048606872559, "learning_rate": 1.999084717648712e-05, "loss": 2.6388, "step": 13319 }, { "epoch": 0.17, "grad_norm": 3.844261646270752, "learning_rate": 1.9990842681128558e-05, "loss": 2.1232, "step": 13320 }, { "epoch": 0.17, "grad_norm": 4.461956977844238, "learning_rate": 1.9990838184666836e-05, "loss": 2.3641, "step": 13321 }, { "epoch": 0.17, "grad_norm": 4.690323829650879, "learning_rate": 1.9990833687101955e-05, "loss": 1.868, "step": 13322 }, { "epoch": 0.17, "grad_norm": 3.7392079830169678, "learning_rate": 1.9990829188433915e-05, "loss": 1.724, "step": 13323 }, { "epoch": 0.17, "grad_norm": 3.8709678649902344, "learning_rate": 1.999082468866272e-05, "loss": 1.7724, "step": 13324 }, { "epoch": 0.17, "grad_norm": 4.995609283447266, "learning_rate": 1.999082018778837e-05, "loss": 2.4604, "step": 13325 }, { "epoch": 0.17, "grad_norm": 4.613690376281738, "learning_rate": 1.999081568581086e-05, "loss": 2.7042, "step": 13326 }, { "epoch": 0.17, "grad_norm": 5.225893497467041, "learning_rate": 1.9990811182730196e-05, "loss": 2.3325, "step": 13327 }, { "epoch": 0.17, "grad_norm": 4.449103832244873, "learning_rate": 1.9990806678546376e-05, "loss": 2.2109, "step": 13328 }, { "epoch": 0.17, "grad_norm": 4.6708831787109375, "learning_rate": 1.9990802173259404e-05, "loss": 1.6355, "step": 13329 }, { "epoch": 0.17, "grad_norm": 4.548458576202393, "learning_rate": 1.999079766686928e-05, "loss": 2.5158, "step": 13330 }, { "epoch": 0.17, "grad_norm": 4.554184436798096, "learning_rate": 1.9990793159375997e-05, "loss": 2.4265, "step": 13331 }, { "epoch": 0.17, "grad_norm": 4.629947185516357, "learning_rate": 1.9990788650779566e-05, "loss": 2.3139, "step": 13332 }, { "epoch": 0.17, "grad_norm": 4.665132999420166, "learning_rate": 1.9990784141079976e-05, "loss": 2.5488, "step": 13333 }, { "epoch": 0.17, "grad_norm": 4.448894023895264, "learning_rate": 1.999077963027724e-05, "loss": 2.0411, "step": 13334 }, { "epoch": 0.17, "grad_norm": 4.753785133361816, "learning_rate": 1.9990775118371347e-05, "loss": 2.1966, "step": 13335 }, { "epoch": 0.17, "grad_norm": 4.380643844604492, "learning_rate": 1.9990770605362307e-05, "loss": 2.1355, "step": 13336 }, { "epoch": 0.17, "grad_norm": 4.50689172744751, "learning_rate": 1.9990766091250116e-05, "loss": 2.2226, "step": 13337 }, { "epoch": 0.17, "grad_norm": 4.767742156982422, "learning_rate": 1.9990761576034776e-05, "loss": 2.6774, "step": 13338 }, { "epoch": 0.17, "grad_norm": 5.299630641937256, "learning_rate": 1.9990757059716284e-05, "loss": 2.9378, "step": 13339 }, { "epoch": 0.17, "grad_norm": 5.1230010986328125, "learning_rate": 1.9990752542294644e-05, "loss": 2.639, "step": 13340 }, { "epoch": 0.17, "grad_norm": 4.055613040924072, "learning_rate": 1.9990748023769855e-05, "loss": 1.9809, "step": 13341 }, { "epoch": 0.17, "grad_norm": 4.201326370239258, "learning_rate": 1.9990743504141917e-05, "loss": 1.8732, "step": 13342 }, { "epoch": 0.17, "grad_norm": 4.040425777435303, "learning_rate": 1.9990738983410834e-05, "loss": 2.0108, "step": 13343 }, { "epoch": 0.17, "grad_norm": 4.831068515777588, "learning_rate": 1.9990734461576603e-05, "loss": 2.9467, "step": 13344 }, { "epoch": 0.17, "grad_norm": 4.123953819274902, "learning_rate": 1.9990729938639223e-05, "loss": 2.0967, "step": 13345 }, { "epoch": 0.17, "grad_norm": 4.009420871734619, "learning_rate": 1.99907254145987e-05, "loss": 1.832, "step": 13346 }, { "epoch": 0.17, "grad_norm": 4.393282413482666, "learning_rate": 1.999072088945503e-05, "loss": 2.1907, "step": 13347 }, { "epoch": 0.17, "grad_norm": 4.049358367919922, "learning_rate": 1.9990716363208214e-05, "loss": 2.2628, "step": 13348 }, { "epoch": 0.17, "grad_norm": 4.289093017578125, "learning_rate": 1.9990711835858257e-05, "loss": 1.9762, "step": 13349 }, { "epoch": 0.17, "grad_norm": 4.927919864654541, "learning_rate": 1.999070730740515e-05, "loss": 2.5552, "step": 13350 }, { "epoch": 0.17, "grad_norm": 4.2385029792785645, "learning_rate": 1.9990702777848906e-05, "loss": 1.9, "step": 13351 }, { "epoch": 0.17, "grad_norm": 5.026007652282715, "learning_rate": 1.9990698247189516e-05, "loss": 2.3429, "step": 13352 }, { "epoch": 0.17, "grad_norm": 5.273721694946289, "learning_rate": 1.999069371542698e-05, "loss": 2.2712, "step": 13353 }, { "epoch": 0.17, "grad_norm": 4.022327899932861, "learning_rate": 1.9990689182561307e-05, "loss": 2.1183, "step": 13354 }, { "epoch": 0.17, "grad_norm": 4.722105979919434, "learning_rate": 1.999068464859249e-05, "loss": 3.0468, "step": 13355 }, { "epoch": 0.17, "grad_norm": 4.167282581329346, "learning_rate": 1.999068011352053e-05, "loss": 1.8958, "step": 13356 }, { "epoch": 0.17, "grad_norm": 5.019237518310547, "learning_rate": 1.9990675577345432e-05, "loss": 2.2587, "step": 13357 }, { "epoch": 0.17, "grad_norm": 4.43625545501709, "learning_rate": 1.9990671040067195e-05, "loss": 2.3246, "step": 13358 }, { "epoch": 0.17, "grad_norm": 4.504783630371094, "learning_rate": 1.999066650168582e-05, "loss": 1.9748, "step": 13359 }, { "epoch": 0.17, "grad_norm": 4.04803466796875, "learning_rate": 1.99906619622013e-05, "loss": 1.8006, "step": 13360 }, { "epoch": 0.17, "grad_norm": 4.146816253662109, "learning_rate": 1.9990657421613645e-05, "loss": 2.3035, "step": 13361 }, { "epoch": 0.17, "grad_norm": 4.757133483886719, "learning_rate": 1.999065287992285e-05, "loss": 2.2726, "step": 13362 }, { "epoch": 0.17, "grad_norm": 4.193867206573486, "learning_rate": 1.999064833712892e-05, "loss": 2.1522, "step": 13363 }, { "epoch": 0.17, "grad_norm": 4.4652862548828125, "learning_rate": 1.9990643793231853e-05, "loss": 2.168, "step": 13364 }, { "epoch": 0.17, "grad_norm": 3.8753321170806885, "learning_rate": 1.999063924823165e-05, "loss": 2.0947, "step": 13365 }, { "epoch": 0.17, "grad_norm": 4.306000232696533, "learning_rate": 1.9990634702128308e-05, "loss": 2.4365, "step": 13366 }, { "epoch": 0.17, "grad_norm": 4.16101598739624, "learning_rate": 1.999063015492183e-05, "loss": 2.2087, "step": 13367 }, { "epoch": 0.17, "grad_norm": 4.628796577453613, "learning_rate": 1.999062560661222e-05, "loss": 2.1199, "step": 13368 }, { "epoch": 0.17, "grad_norm": 5.153843879699707, "learning_rate": 1.9990621057199476e-05, "loss": 2.3524, "step": 13369 }, { "epoch": 0.17, "grad_norm": 4.224188804626465, "learning_rate": 1.9990616506683595e-05, "loss": 1.9097, "step": 13370 }, { "epoch": 0.17, "grad_norm": 4.470267295837402, "learning_rate": 1.999061195506458e-05, "loss": 1.9841, "step": 13371 }, { "epoch": 0.17, "grad_norm": 4.845715522766113, "learning_rate": 1.9990607402342436e-05, "loss": 2.1525, "step": 13372 }, { "epoch": 0.17, "grad_norm": 4.883098125457764, "learning_rate": 1.9990602848517152e-05, "loss": 2.7265, "step": 13373 }, { "epoch": 0.17, "grad_norm": 5.06780481338501, "learning_rate": 1.9990598293588744e-05, "loss": 2.3919, "step": 13374 }, { "epoch": 0.17, "grad_norm": 3.9404826164245605, "learning_rate": 1.99905937375572e-05, "loss": 2.0764, "step": 13375 }, { "epoch": 0.17, "grad_norm": 4.898808002471924, "learning_rate": 1.9990589180422525e-05, "loss": 2.455, "step": 13376 }, { "epoch": 0.17, "grad_norm": 4.24180269241333, "learning_rate": 1.9990584622184722e-05, "loss": 2.1808, "step": 13377 }, { "epoch": 0.17, "grad_norm": 4.822801113128662, "learning_rate": 1.9990580062843787e-05, "loss": 2.3015, "step": 13378 }, { "epoch": 0.17, "grad_norm": 4.2404303550720215, "learning_rate": 1.9990575502399727e-05, "loss": 2.3919, "step": 13379 }, { "epoch": 0.17, "grad_norm": 4.592014312744141, "learning_rate": 1.999057094085253e-05, "loss": 2.3192, "step": 13380 }, { "epoch": 0.17, "grad_norm": 4.469059467315674, "learning_rate": 1.9990566378202208e-05, "loss": 2.1029, "step": 13381 }, { "epoch": 0.17, "grad_norm": 4.938096046447754, "learning_rate": 1.999056181444876e-05, "loss": 2.3191, "step": 13382 }, { "epoch": 0.17, "grad_norm": 4.017247200012207, "learning_rate": 1.999055724959218e-05, "loss": 2.0957, "step": 13383 }, { "epoch": 0.17, "grad_norm": 4.899898052215576, "learning_rate": 1.999055268363248e-05, "loss": 2.197, "step": 13384 }, { "epoch": 0.17, "grad_norm": 5.1967453956604, "learning_rate": 1.999054811656965e-05, "loss": 2.5763, "step": 13385 }, { "epoch": 0.17, "grad_norm": 4.555583953857422, "learning_rate": 1.999054354840369e-05, "loss": 2.243, "step": 13386 }, { "epoch": 0.17, "grad_norm": 4.1745500564575195, "learning_rate": 1.999053897913461e-05, "loss": 2.3031, "step": 13387 }, { "epoch": 0.17, "grad_norm": 5.065612316131592, "learning_rate": 1.99905344087624e-05, "loss": 2.3722, "step": 13388 }, { "epoch": 0.17, "grad_norm": 4.335407257080078, "learning_rate": 1.9990529837287068e-05, "loss": 2.1375, "step": 13389 }, { "epoch": 0.17, "grad_norm": 4.330642223358154, "learning_rate": 1.9990525264708617e-05, "loss": 2.4455, "step": 13390 }, { "epoch": 0.17, "grad_norm": 4.816597938537598, "learning_rate": 1.9990520691027035e-05, "loss": 2.7218, "step": 13391 }, { "epoch": 0.17, "grad_norm": 5.378001689910889, "learning_rate": 1.9990516116242334e-05, "loss": 2.4759, "step": 13392 }, { "epoch": 0.17, "grad_norm": 4.634626388549805, "learning_rate": 1.999051154035451e-05, "loss": 2.9275, "step": 13393 }, { "epoch": 0.17, "grad_norm": 4.462621688842773, "learning_rate": 1.9990506963363562e-05, "loss": 2.51, "step": 13394 }, { "epoch": 0.17, "grad_norm": 4.194620609283447, "learning_rate": 1.9990502385269494e-05, "loss": 2.3219, "step": 13395 }, { "epoch": 0.17, "grad_norm": 4.812442302703857, "learning_rate": 1.9990497806072307e-05, "loss": 1.9755, "step": 13396 }, { "epoch": 0.17, "grad_norm": 4.136824131011963, "learning_rate": 1.9990493225771996e-05, "loss": 2.0884, "step": 13397 }, { "epoch": 0.17, "grad_norm": 4.457479000091553, "learning_rate": 1.9990488644368566e-05, "loss": 2.09, "step": 13398 }, { "epoch": 0.17, "grad_norm": 4.476028919219971, "learning_rate": 1.999048406186202e-05, "loss": 1.9734, "step": 13399 }, { "epoch": 0.17, "grad_norm": 4.187513828277588, "learning_rate": 1.9990479478252353e-05, "loss": 1.9983, "step": 13400 }, { "epoch": 0.17, "grad_norm": 4.894084453582764, "learning_rate": 1.9990474893539566e-05, "loss": 2.3541, "step": 13401 }, { "epoch": 0.17, "grad_norm": 4.876465797424316, "learning_rate": 1.999047030772366e-05, "loss": 2.4214, "step": 13402 }, { "epoch": 0.17, "grad_norm": 4.6484832763671875, "learning_rate": 1.999046572080464e-05, "loss": 2.0052, "step": 13403 }, { "epoch": 0.17, "grad_norm": 4.333771705627441, "learning_rate": 1.9990461132782502e-05, "loss": 1.8511, "step": 13404 }, { "epoch": 0.17, "grad_norm": 4.423350811004639, "learning_rate": 1.9990456543657246e-05, "loss": 1.868, "step": 13405 }, { "epoch": 0.17, "grad_norm": 5.104581356048584, "learning_rate": 1.9990451953428875e-05, "loss": 2.2437, "step": 13406 }, { "epoch": 0.17, "grad_norm": 4.882191181182861, "learning_rate": 1.9990447362097392e-05, "loss": 2.3323, "step": 13407 }, { "epoch": 0.17, "grad_norm": 4.830873489379883, "learning_rate": 1.999044276966279e-05, "loss": 2.705, "step": 13408 }, { "epoch": 0.17, "grad_norm": 4.457634449005127, "learning_rate": 1.9990438176125077e-05, "loss": 2.4414, "step": 13409 }, { "epoch": 0.17, "grad_norm": 4.421976089477539, "learning_rate": 1.9990433581484247e-05, "loss": 1.9864, "step": 13410 }, { "epoch": 0.17, "grad_norm": 4.335270881652832, "learning_rate": 1.9990428985740302e-05, "loss": 2.3582, "step": 13411 }, { "epoch": 0.17, "grad_norm": 5.3495683670043945, "learning_rate": 1.999042438889325e-05, "loss": 2.4617, "step": 13412 }, { "epoch": 0.17, "grad_norm": 4.603435039520264, "learning_rate": 1.9990419790943082e-05, "loss": 2.1171, "step": 13413 }, { "epoch": 0.17, "grad_norm": 4.1624579429626465, "learning_rate": 1.99904151918898e-05, "loss": 2.0289, "step": 13414 }, { "epoch": 0.17, "grad_norm": 4.930926322937012, "learning_rate": 1.999041059173341e-05, "loss": 2.4756, "step": 13415 }, { "epoch": 0.17, "grad_norm": 4.686928749084473, "learning_rate": 1.999040599047391e-05, "loss": 2.6295, "step": 13416 }, { "epoch": 0.17, "grad_norm": 4.637304782867432, "learning_rate": 1.9990401388111296e-05, "loss": 2.1197, "step": 13417 }, { "epoch": 0.17, "grad_norm": 4.351053237915039, "learning_rate": 1.9990396784645572e-05, "loss": 2.1386, "step": 13418 }, { "epoch": 0.17, "grad_norm": 4.7178263664245605, "learning_rate": 1.9990392180076744e-05, "loss": 1.979, "step": 13419 }, { "epoch": 0.17, "grad_norm": 5.208208084106445, "learning_rate": 1.9990387574404804e-05, "loss": 2.3663, "step": 13420 }, { "epoch": 0.17, "grad_norm": 4.810403823852539, "learning_rate": 1.9990382967629756e-05, "loss": 2.7692, "step": 13421 }, { "epoch": 0.17, "grad_norm": 4.398562431335449, "learning_rate": 1.99903783597516e-05, "loss": 2.4976, "step": 13422 }, { "epoch": 0.17, "grad_norm": 4.33819580078125, "learning_rate": 1.9990373750770337e-05, "loss": 2.1691, "step": 13423 }, { "epoch": 0.17, "grad_norm": 4.986894607543945, "learning_rate": 1.9990369140685968e-05, "loss": 2.2324, "step": 13424 }, { "epoch": 0.17, "grad_norm": 5.170466899871826, "learning_rate": 1.9990364529498492e-05, "loss": 2.6434, "step": 13425 }, { "epoch": 0.17, "grad_norm": 4.757166862487793, "learning_rate": 1.9990359917207908e-05, "loss": 2.3533, "step": 13426 }, { "epoch": 0.17, "grad_norm": 4.275818347930908, "learning_rate": 1.9990355303814223e-05, "loss": 1.8419, "step": 13427 }, { "epoch": 0.17, "grad_norm": 4.550116062164307, "learning_rate": 1.999035068931743e-05, "loss": 2.5262, "step": 13428 }, { "epoch": 0.17, "grad_norm": 4.664318561553955, "learning_rate": 1.9990346073717535e-05, "loss": 2.4717, "step": 13429 }, { "epoch": 0.17, "grad_norm": 4.292061805725098, "learning_rate": 1.9990341457014532e-05, "loss": 2.3593, "step": 13430 }, { "epoch": 0.17, "grad_norm": 3.900608777999878, "learning_rate": 1.9990336839208432e-05, "loss": 1.8391, "step": 13431 }, { "epoch": 0.17, "grad_norm": 5.078938007354736, "learning_rate": 1.9990332220299224e-05, "loss": 2.4486, "step": 13432 }, { "epoch": 0.17, "grad_norm": 5.360442638397217, "learning_rate": 1.9990327600286918e-05, "loss": 2.3177, "step": 13433 }, { "epoch": 0.17, "grad_norm": 4.395846843719482, "learning_rate": 1.9990322979171508e-05, "loss": 2.2862, "step": 13434 }, { "epoch": 0.17, "grad_norm": 5.312134742736816, "learning_rate": 1.9990318356953e-05, "loss": 2.1858, "step": 13435 }, { "epoch": 0.17, "grad_norm": 4.627810955047607, "learning_rate": 1.9990313733631386e-05, "loss": 2.2139, "step": 13436 }, { "epoch": 0.17, "grad_norm": 5.240856647491455, "learning_rate": 1.9990309109206676e-05, "loss": 2.5609, "step": 13437 }, { "epoch": 0.17, "grad_norm": 4.054281234741211, "learning_rate": 1.9990304483678864e-05, "loss": 2.0454, "step": 13438 }, { "epoch": 0.17, "grad_norm": 4.48068904876709, "learning_rate": 1.9990299857047954e-05, "loss": 2.4367, "step": 13439 }, { "epoch": 0.17, "grad_norm": 4.278367042541504, "learning_rate": 1.9990295229313944e-05, "loss": 2.0565, "step": 13440 }, { "epoch": 0.17, "grad_norm": 4.234507083892822, "learning_rate": 1.9990290600476842e-05, "loss": 1.837, "step": 13441 }, { "epoch": 0.17, "grad_norm": 4.226072311401367, "learning_rate": 1.9990285970536636e-05, "loss": 2.0545, "step": 13442 }, { "epoch": 0.17, "grad_norm": 4.988399982452393, "learning_rate": 1.9990281339493335e-05, "loss": 2.2528, "step": 13443 }, { "epoch": 0.17, "grad_norm": 4.311945915222168, "learning_rate": 1.9990276707346936e-05, "loss": 2.242, "step": 13444 }, { "epoch": 0.17, "grad_norm": 4.475631237030029, "learning_rate": 1.999027207409744e-05, "loss": 1.9583, "step": 13445 }, { "epoch": 0.17, "grad_norm": 4.272570610046387, "learning_rate": 1.9990267439744853e-05, "loss": 1.9149, "step": 13446 }, { "epoch": 0.17, "grad_norm": 4.210896968841553, "learning_rate": 1.9990262804289168e-05, "loss": 2.1394, "step": 13447 }, { "epoch": 0.17, "grad_norm": 5.04305362701416, "learning_rate": 1.9990258167730388e-05, "loss": 3.0689, "step": 13448 }, { "epoch": 0.17, "grad_norm": 4.771261692047119, "learning_rate": 1.9990253530068518e-05, "loss": 2.8193, "step": 13449 }, { "epoch": 0.17, "grad_norm": 4.847923278808594, "learning_rate": 1.9990248891303553e-05, "loss": 2.3998, "step": 13450 }, { "epoch": 0.17, "grad_norm": 5.095672130584717, "learning_rate": 1.999024425143549e-05, "loss": 2.4314, "step": 13451 }, { "epoch": 0.17, "grad_norm": 4.148305892944336, "learning_rate": 1.999023961046434e-05, "loss": 2.2025, "step": 13452 }, { "epoch": 0.17, "grad_norm": 4.620162487030029, "learning_rate": 1.9990234968390095e-05, "loss": 2.6588, "step": 13453 }, { "epoch": 0.17, "grad_norm": 4.775552272796631, "learning_rate": 1.999023032521276e-05, "loss": 2.7523, "step": 13454 }, { "epoch": 0.17, "grad_norm": 5.589901447296143, "learning_rate": 1.9990225680932336e-05, "loss": 2.5159, "step": 13455 }, { "epoch": 0.17, "grad_norm": 4.181347370147705, "learning_rate": 1.999022103554882e-05, "loss": 2.0038, "step": 13456 }, { "epoch": 0.17, "grad_norm": 4.899084568023682, "learning_rate": 1.999021638906221e-05, "loss": 2.6254, "step": 13457 }, { "epoch": 0.17, "grad_norm": 4.859081745147705, "learning_rate": 1.9990211741472517e-05, "loss": 2.6165, "step": 13458 }, { "epoch": 0.17, "grad_norm": 4.735118865966797, "learning_rate": 1.9990207092779733e-05, "loss": 2.1935, "step": 13459 }, { "epoch": 0.17, "grad_norm": 4.604483127593994, "learning_rate": 1.9990202442983858e-05, "loss": 2.6101, "step": 13460 }, { "epoch": 0.17, "grad_norm": 4.641529560089111, "learning_rate": 1.99901977920849e-05, "loss": 2.3117, "step": 13461 }, { "epoch": 0.17, "grad_norm": 4.612090110778809, "learning_rate": 1.999019314008285e-05, "loss": 2.3295, "step": 13462 }, { "epoch": 0.17, "grad_norm": 4.6110734939575195, "learning_rate": 1.9990188486977714e-05, "loss": 2.007, "step": 13463 }, { "epoch": 0.17, "grad_norm": 4.4872918128967285, "learning_rate": 1.9990183832769496e-05, "loss": 2.1399, "step": 13464 }, { "epoch": 0.17, "grad_norm": 4.666362285614014, "learning_rate": 1.9990179177458186e-05, "loss": 2.674, "step": 13465 }, { "epoch": 0.17, "grad_norm": 4.311765670776367, "learning_rate": 1.9990174521043792e-05, "loss": 2.0742, "step": 13466 }, { "epoch": 0.17, "grad_norm": 4.864057540893555, "learning_rate": 1.9990169863526318e-05, "loss": 2.2297, "step": 13467 }, { "epoch": 0.17, "grad_norm": 4.86057186126709, "learning_rate": 1.9990165204905755e-05, "loss": 2.6788, "step": 13468 }, { "epoch": 0.17, "grad_norm": 4.016477584838867, "learning_rate": 1.9990160545182112e-05, "loss": 1.7718, "step": 13469 }, { "epoch": 0.17, "grad_norm": 5.298035621643066, "learning_rate": 1.999015588435538e-05, "loss": 2.6123, "step": 13470 }, { "epoch": 0.17, "grad_norm": 4.455286026000977, "learning_rate": 1.999015122242557e-05, "loss": 1.9208, "step": 13471 }, { "epoch": 0.17, "grad_norm": 5.058951377868652, "learning_rate": 1.9990146559392677e-05, "loss": 2.2904, "step": 13472 }, { "epoch": 0.17, "grad_norm": 4.6155290603637695, "learning_rate": 1.9990141895256704e-05, "loss": 2.4164, "step": 13473 }, { "epoch": 0.17, "grad_norm": 4.641715049743652, "learning_rate": 1.9990137230017646e-05, "loss": 2.2084, "step": 13474 }, { "epoch": 0.17, "grad_norm": 4.429736614227295, "learning_rate": 1.999013256367551e-05, "loss": 2.4416, "step": 13475 }, { "epoch": 0.17, "grad_norm": 4.437530994415283, "learning_rate": 1.9990127896230292e-05, "loss": 2.0375, "step": 13476 }, { "epoch": 0.17, "grad_norm": 4.984241008758545, "learning_rate": 1.9990123227682e-05, "loss": 2.7215, "step": 13477 }, { "epoch": 0.17, "grad_norm": 4.759271144866943, "learning_rate": 1.999011855803062e-05, "loss": 2.5011, "step": 13478 }, { "epoch": 0.17, "grad_norm": 4.448195934295654, "learning_rate": 1.999011388727617e-05, "loss": 2.5208, "step": 13479 }, { "epoch": 0.17, "grad_norm": 4.486763954162598, "learning_rate": 1.9990109215418637e-05, "loss": 1.952, "step": 13480 }, { "epoch": 0.17, "grad_norm": 4.7864460945129395, "learning_rate": 1.9990104542458027e-05, "loss": 2.1731, "step": 13481 }, { "epoch": 0.17, "grad_norm": 5.473147869110107, "learning_rate": 1.999009986839434e-05, "loss": 2.6124, "step": 13482 }, { "epoch": 0.17, "grad_norm": 4.3773932456970215, "learning_rate": 1.999009519322758e-05, "loss": 2.3015, "step": 13483 }, { "epoch": 0.17, "grad_norm": 3.7071595191955566, "learning_rate": 1.999009051695774e-05, "loss": 1.9237, "step": 13484 }, { "epoch": 0.18, "grad_norm": 3.882171869277954, "learning_rate": 1.9990085839584827e-05, "loss": 1.9167, "step": 13485 }, { "epoch": 0.18, "grad_norm": 4.335762977600098, "learning_rate": 1.9990081161108836e-05, "loss": 1.8322, "step": 13486 }, { "epoch": 0.18, "grad_norm": 4.523681640625, "learning_rate": 1.9990076481529768e-05, "loss": 2.578, "step": 13487 }, { "epoch": 0.18, "grad_norm": 5.087576866149902, "learning_rate": 1.9990071800847633e-05, "loss": 2.3582, "step": 13488 }, { "epoch": 0.18, "grad_norm": 4.251394271850586, "learning_rate": 1.999006711906242e-05, "loss": 1.9587, "step": 13489 }, { "epoch": 0.18, "grad_norm": 4.637671947479248, "learning_rate": 1.9990062436174137e-05, "loss": 2.5367, "step": 13490 }, { "epoch": 0.18, "grad_norm": 4.579165458679199, "learning_rate": 1.999005775218278e-05, "loss": 2.9317, "step": 13491 }, { "epoch": 0.18, "grad_norm": 4.380072116851807, "learning_rate": 1.9990053067088352e-05, "loss": 2.1842, "step": 13492 }, { "epoch": 0.18, "grad_norm": 4.471294403076172, "learning_rate": 1.9990048380890853e-05, "loss": 2.3114, "step": 13493 }, { "epoch": 0.18, "grad_norm": 3.9675254821777344, "learning_rate": 1.999004369359028e-05, "loss": 1.7429, "step": 13494 }, { "epoch": 0.18, "grad_norm": 4.8042378425598145, "learning_rate": 1.999003900518664e-05, "loss": 2.2284, "step": 13495 }, { "epoch": 0.18, "grad_norm": 4.913441181182861, "learning_rate": 1.9990034315679933e-05, "loss": 2.5259, "step": 13496 }, { "epoch": 0.18, "grad_norm": 4.1439127922058105, "learning_rate": 1.999002962507015e-05, "loss": 2.1298, "step": 13497 }, { "epoch": 0.18, "grad_norm": 4.546418190002441, "learning_rate": 1.9990024933357303e-05, "loss": 2.1422, "step": 13498 }, { "epoch": 0.18, "grad_norm": 4.014786720275879, "learning_rate": 1.9990020240541387e-05, "loss": 2.4638, "step": 13499 }, { "epoch": 0.18, "grad_norm": 4.307523727416992, "learning_rate": 1.99900155466224e-05, "loss": 2.2878, "step": 13500 }, { "epoch": 0.18, "grad_norm": 4.4460368156433105, "learning_rate": 1.999001085160035e-05, "loss": 2.4435, "step": 13501 }, { "epoch": 0.18, "grad_norm": 4.694075584411621, "learning_rate": 1.999000615547523e-05, "loss": 2.3113, "step": 13502 }, { "epoch": 0.18, "grad_norm": 4.687921524047852, "learning_rate": 1.9990001458247047e-05, "loss": 2.9667, "step": 13503 }, { "epoch": 0.18, "grad_norm": 4.397662162780762, "learning_rate": 1.9989996759915794e-05, "loss": 2.3702, "step": 13504 }, { "epoch": 0.18, "grad_norm": 4.513963222503662, "learning_rate": 1.998999206048148e-05, "loss": 2.5381, "step": 13505 }, { "epoch": 0.18, "grad_norm": 4.0005717277526855, "learning_rate": 1.9989987359944097e-05, "loss": 2.0994, "step": 13506 }, { "epoch": 0.18, "grad_norm": 4.461301803588867, "learning_rate": 1.9989982658303657e-05, "loss": 2.0903, "step": 13507 }, { "epoch": 0.18, "grad_norm": 4.785233497619629, "learning_rate": 1.998997795556015e-05, "loss": 2.8179, "step": 13508 }, { "epoch": 0.18, "grad_norm": 5.670864582061768, "learning_rate": 1.9989973251713577e-05, "loss": 2.8371, "step": 13509 }, { "epoch": 0.18, "grad_norm": 4.471692085266113, "learning_rate": 1.9989968546763944e-05, "loss": 2.1786, "step": 13510 }, { "epoch": 0.18, "grad_norm": 4.859864234924316, "learning_rate": 1.9989963840711248e-05, "loss": 2.7598, "step": 13511 }, { "epoch": 0.18, "grad_norm": 4.22047758102417, "learning_rate": 1.998995913355549e-05, "loss": 2.0109, "step": 13512 }, { "epoch": 0.18, "grad_norm": 3.8341522216796875, "learning_rate": 1.9989954425296673e-05, "loss": 1.6966, "step": 13513 }, { "epoch": 0.18, "grad_norm": 4.971161365509033, "learning_rate": 1.9989949715934796e-05, "loss": 2.2425, "step": 13514 }, { "epoch": 0.18, "grad_norm": 4.343339443206787, "learning_rate": 1.9989945005469857e-05, "loss": 1.8684, "step": 13515 }, { "epoch": 0.18, "grad_norm": 4.213619232177734, "learning_rate": 1.998994029390186e-05, "loss": 2.491, "step": 13516 }, { "epoch": 0.18, "grad_norm": 4.558745861053467, "learning_rate": 1.9989935581230802e-05, "loss": 2.6278, "step": 13517 }, { "epoch": 0.18, "grad_norm": 5.416643142700195, "learning_rate": 1.998993086745669e-05, "loss": 2.5403, "step": 13518 }, { "epoch": 0.18, "grad_norm": 4.30906343460083, "learning_rate": 1.998992615257952e-05, "loss": 2.2876, "step": 13519 }, { "epoch": 0.18, "grad_norm": 4.757150650024414, "learning_rate": 1.9989921436599284e-05, "loss": 2.5997, "step": 13520 }, { "epoch": 0.18, "grad_norm": 4.229072570800781, "learning_rate": 1.9989916719516e-05, "loss": 2.0123, "step": 13521 }, { "epoch": 0.18, "grad_norm": 4.967330455780029, "learning_rate": 1.9989912001329658e-05, "loss": 2.1606, "step": 13522 }, { "epoch": 0.18, "grad_norm": 3.9589502811431885, "learning_rate": 1.9989907282040256e-05, "loss": 1.8507, "step": 13523 }, { "epoch": 0.18, "grad_norm": 4.498048782348633, "learning_rate": 1.9989902561647803e-05, "loss": 1.8435, "step": 13524 }, { "epoch": 0.18, "grad_norm": 5.011392593383789, "learning_rate": 1.9989897840152294e-05, "loss": 2.3602, "step": 13525 }, { "epoch": 0.18, "grad_norm": 4.468774318695068, "learning_rate": 1.998989311755373e-05, "loss": 2.2735, "step": 13526 }, { "epoch": 0.18, "grad_norm": 4.466690540313721, "learning_rate": 1.9989888393852116e-05, "loss": 2.2783, "step": 13527 }, { "epoch": 0.18, "grad_norm": 4.6945390701293945, "learning_rate": 1.9989883669047445e-05, "loss": 2.3135, "step": 13528 }, { "epoch": 0.18, "grad_norm": 4.315732002258301, "learning_rate": 1.998987894313972e-05, "loss": 2.3546, "step": 13529 }, { "epoch": 0.18, "grad_norm": 5.0807976722717285, "learning_rate": 1.9989874216128946e-05, "loss": 2.8431, "step": 13530 }, { "epoch": 0.18, "grad_norm": 4.820970058441162, "learning_rate": 1.998986948801512e-05, "loss": 2.3434, "step": 13531 }, { "epoch": 0.18, "grad_norm": 4.145129680633545, "learning_rate": 1.9989864758798245e-05, "loss": 1.9193, "step": 13532 }, { "epoch": 0.18, "grad_norm": 4.692264556884766, "learning_rate": 1.9989860028478315e-05, "loss": 2.276, "step": 13533 }, { "epoch": 0.18, "grad_norm": 4.84425687789917, "learning_rate": 1.998985529705534e-05, "loss": 2.6944, "step": 13534 }, { "epoch": 0.18, "grad_norm": 4.398650169372559, "learning_rate": 1.9989850564529312e-05, "loss": 2.1665, "step": 13535 }, { "epoch": 0.18, "grad_norm": 4.7355804443359375, "learning_rate": 1.9989845830900235e-05, "loss": 1.9222, "step": 13536 }, { "epoch": 0.18, "grad_norm": 4.965299606323242, "learning_rate": 1.998984109616811e-05, "loss": 3.0769, "step": 13537 }, { "epoch": 0.18, "grad_norm": 4.799397945404053, "learning_rate": 1.9989836360332937e-05, "loss": 2.3345, "step": 13538 }, { "epoch": 0.18, "grad_norm": 4.516617774963379, "learning_rate": 1.998983162339472e-05, "loss": 2.0786, "step": 13539 }, { "epoch": 0.18, "grad_norm": 3.3479883670806885, "learning_rate": 1.998982688535345e-05, "loss": 1.8837, "step": 13540 }, { "epoch": 0.18, "grad_norm": 4.69639778137207, "learning_rate": 1.9989822146209135e-05, "loss": 2.3947, "step": 13541 }, { "epoch": 0.18, "grad_norm": 3.611433982849121, "learning_rate": 1.9989817405961776e-05, "loss": 1.942, "step": 13542 }, { "epoch": 0.18, "grad_norm": 4.241630554199219, "learning_rate": 1.9989812664611374e-05, "loss": 1.916, "step": 13543 }, { "epoch": 0.18, "grad_norm": 4.796136379241943, "learning_rate": 1.998980792215792e-05, "loss": 2.9597, "step": 13544 }, { "epoch": 0.18, "grad_norm": 4.245503902435303, "learning_rate": 1.9989803178601428e-05, "loss": 2.2029, "step": 13545 }, { "epoch": 0.18, "grad_norm": 4.843629837036133, "learning_rate": 1.998979843394189e-05, "loss": 1.8222, "step": 13546 }, { "epoch": 0.18, "grad_norm": 4.915460586547852, "learning_rate": 1.998979368817931e-05, "loss": 2.6655, "step": 13547 }, { "epoch": 0.18, "grad_norm": 4.531651020050049, "learning_rate": 1.9989788941313688e-05, "loss": 2.0498, "step": 13548 }, { "epoch": 0.18, "grad_norm": 4.352298259735107, "learning_rate": 1.998978419334502e-05, "loss": 2.1185, "step": 13549 }, { "epoch": 0.18, "grad_norm": 4.427636623382568, "learning_rate": 1.9989779444273316e-05, "loss": 1.8892, "step": 13550 }, { "epoch": 0.18, "grad_norm": 4.301252365112305, "learning_rate": 1.998977469409857e-05, "loss": 2.1299, "step": 13551 }, { "epoch": 0.18, "grad_norm": 4.8184733390808105, "learning_rate": 1.9989769942820778e-05, "loss": 2.5642, "step": 13552 }, { "epoch": 0.18, "grad_norm": 4.56788969039917, "learning_rate": 1.998976519043995e-05, "loss": 2.111, "step": 13553 }, { "epoch": 0.18, "grad_norm": 3.917407512664795, "learning_rate": 1.998976043695608e-05, "loss": 1.9122, "step": 13554 }, { "epoch": 0.18, "grad_norm": 4.4679999351501465, "learning_rate": 1.9989755682369173e-05, "loss": 2.1863, "step": 13555 }, { "epoch": 0.18, "grad_norm": 4.436145782470703, "learning_rate": 1.9989750926679224e-05, "loss": 2.7754, "step": 13556 }, { "epoch": 0.18, "grad_norm": 4.101840496063232, "learning_rate": 1.9989746169886242e-05, "loss": 1.8055, "step": 13557 }, { "epoch": 0.18, "grad_norm": 4.542287826538086, "learning_rate": 1.998974141199022e-05, "loss": 2.109, "step": 13558 }, { "epoch": 0.18, "grad_norm": 4.481408596038818, "learning_rate": 1.998973665299116e-05, "loss": 2.4448, "step": 13559 }, { "epoch": 0.18, "grad_norm": 4.780111789703369, "learning_rate": 1.998973189288907e-05, "loss": 2.2442, "step": 13560 }, { "epoch": 0.18, "grad_norm": 4.312210559844971, "learning_rate": 1.9989727131683936e-05, "loss": 2.789, "step": 13561 }, { "epoch": 0.18, "grad_norm": 4.103761196136475, "learning_rate": 1.998972236937577e-05, "loss": 1.8208, "step": 13562 }, { "epoch": 0.18, "grad_norm": 4.555433750152588, "learning_rate": 1.9989717605964568e-05, "loss": 2.1513, "step": 13563 }, { "epoch": 0.18, "grad_norm": 4.15320348739624, "learning_rate": 1.9989712841450335e-05, "loss": 2.2537, "step": 13564 }, { "epoch": 0.18, "grad_norm": 4.881692409515381, "learning_rate": 1.9989708075833065e-05, "loss": 2.1605, "step": 13565 }, { "epoch": 0.18, "grad_norm": 4.480855941772461, "learning_rate": 1.9989703309112762e-05, "loss": 2.5292, "step": 13566 }, { "epoch": 0.18, "grad_norm": 4.7238383293151855, "learning_rate": 1.998969854128943e-05, "loss": 2.3225, "step": 13567 }, { "epoch": 0.18, "grad_norm": 4.5307393074035645, "learning_rate": 1.9989693772363063e-05, "loss": 2.4025, "step": 13568 }, { "epoch": 0.18, "grad_norm": 4.397276878356934, "learning_rate": 1.9989689002333663e-05, "loss": 2.6804, "step": 13569 }, { "epoch": 0.18, "grad_norm": 4.8066582679748535, "learning_rate": 1.9989684231201233e-05, "loss": 2.462, "step": 13570 }, { "epoch": 0.18, "grad_norm": 4.8592095375061035, "learning_rate": 1.998967945896577e-05, "loss": 2.5848, "step": 13571 }, { "epoch": 0.18, "grad_norm": 4.207786560058594, "learning_rate": 1.998967468562728e-05, "loss": 1.8596, "step": 13572 }, { "epoch": 0.18, "grad_norm": 4.199997425079346, "learning_rate": 1.998966991118576e-05, "loss": 2.2344, "step": 13573 }, { "epoch": 0.18, "grad_norm": 4.138522148132324, "learning_rate": 1.998966513564121e-05, "loss": 2.1753, "step": 13574 }, { "epoch": 0.18, "grad_norm": 4.908089637756348, "learning_rate": 1.9989660358993634e-05, "loss": 2.5103, "step": 13575 }, { "epoch": 0.18, "grad_norm": 4.771650791168213, "learning_rate": 1.9989655581243027e-05, "loss": 2.0072, "step": 13576 }, { "epoch": 0.18, "grad_norm": 4.336050987243652, "learning_rate": 1.9989650802389394e-05, "loss": 2.3068, "step": 13577 }, { "epoch": 0.18, "grad_norm": 3.7191667556762695, "learning_rate": 1.9989646022432734e-05, "loss": 1.7549, "step": 13578 }, { "epoch": 0.18, "grad_norm": 4.0759992599487305, "learning_rate": 1.9989641241373048e-05, "loss": 1.8996, "step": 13579 }, { "epoch": 0.18, "grad_norm": 4.60429573059082, "learning_rate": 1.9989636459210335e-05, "loss": 2.2678, "step": 13580 }, { "epoch": 0.18, "grad_norm": 4.037755012512207, "learning_rate": 1.9989631675944595e-05, "loss": 2.0362, "step": 13581 }, { "epoch": 0.18, "grad_norm": 4.213531970977783, "learning_rate": 1.9989626891575835e-05, "loss": 2.213, "step": 13582 }, { "epoch": 0.18, "grad_norm": 5.216425895690918, "learning_rate": 1.998962210610405e-05, "loss": 2.9253, "step": 13583 }, { "epoch": 0.18, "grad_norm": 4.88228178024292, "learning_rate": 1.9989617319529236e-05, "loss": 2.2493, "step": 13584 }, { "epoch": 0.18, "grad_norm": 5.243107795715332, "learning_rate": 1.9989612531851403e-05, "loss": 2.5198, "step": 13585 }, { "epoch": 0.18, "grad_norm": 4.553443908691406, "learning_rate": 1.9989607743070548e-05, "loss": 2.5606, "step": 13586 }, { "epoch": 0.18, "grad_norm": 4.442758560180664, "learning_rate": 1.9989602953186668e-05, "loss": 2.3385, "step": 13587 }, { "epoch": 0.18, "grad_norm": 5.3963942527771, "learning_rate": 1.998959816219977e-05, "loss": 2.5453, "step": 13588 }, { "epoch": 0.18, "grad_norm": 4.35858678817749, "learning_rate": 1.9989593370109847e-05, "loss": 2.2911, "step": 13589 }, { "epoch": 0.18, "grad_norm": 4.510293960571289, "learning_rate": 1.99895885769169e-05, "loss": 2.0338, "step": 13590 }, { "epoch": 0.18, "grad_norm": 4.751743316650391, "learning_rate": 1.9989583782620943e-05, "loss": 2.8686, "step": 13591 }, { "epoch": 0.18, "grad_norm": 4.748647212982178, "learning_rate": 1.998957898722196e-05, "loss": 2.2987, "step": 13592 }, { "epoch": 0.18, "grad_norm": 4.678414344787598, "learning_rate": 1.998957419071996e-05, "loss": 2.6695, "step": 13593 }, { "epoch": 0.18, "grad_norm": 3.992722511291504, "learning_rate": 1.998956939311494e-05, "loss": 2.2071, "step": 13594 }, { "epoch": 0.18, "grad_norm": 4.744458198547363, "learning_rate": 1.9989564594406903e-05, "loss": 2.5411, "step": 13595 }, { "epoch": 0.18, "grad_norm": 5.3322553634643555, "learning_rate": 1.998955979459585e-05, "loss": 2.5769, "step": 13596 }, { "epoch": 0.18, "grad_norm": 5.059849739074707, "learning_rate": 1.9989554993681777e-05, "loss": 2.0971, "step": 13597 }, { "epoch": 0.18, "grad_norm": 3.980931043624878, "learning_rate": 1.998955019166469e-05, "loss": 1.9056, "step": 13598 }, { "epoch": 0.18, "grad_norm": 5.057332515716553, "learning_rate": 1.9989545388544586e-05, "loss": 2.7053, "step": 13599 }, { "epoch": 0.18, "grad_norm": 4.548840522766113, "learning_rate": 1.998954058432147e-05, "loss": 2.4234, "step": 13600 }, { "epoch": 0.18, "grad_norm": 5.220074653625488, "learning_rate": 1.9989535778995335e-05, "loss": 2.6003, "step": 13601 }, { "epoch": 0.18, "grad_norm": 3.995304822921753, "learning_rate": 1.9989530972566186e-05, "loss": 1.9956, "step": 13602 }, { "epoch": 0.18, "grad_norm": 4.756234169006348, "learning_rate": 1.9989526165034023e-05, "loss": 2.436, "step": 13603 }, { "epoch": 0.18, "grad_norm": 4.183319091796875, "learning_rate": 1.998952135639885e-05, "loss": 2.1301, "step": 13604 }, { "epoch": 0.18, "grad_norm": 4.318985939025879, "learning_rate": 1.9989516546660664e-05, "loss": 2.1535, "step": 13605 }, { "epoch": 0.18, "grad_norm": 5.077226161956787, "learning_rate": 1.9989511735819462e-05, "loss": 2.4213, "step": 13606 }, { "epoch": 0.18, "grad_norm": 4.516814231872559, "learning_rate": 1.9989506923875253e-05, "loss": 2.6864, "step": 13607 }, { "epoch": 0.18, "grad_norm": 4.612672328948975, "learning_rate": 1.998950211082803e-05, "loss": 2.1561, "step": 13608 }, { "epoch": 0.18, "grad_norm": 4.955330848693848, "learning_rate": 1.9989497296677797e-05, "loss": 2.5021, "step": 13609 }, { "epoch": 0.18, "grad_norm": 5.026834487915039, "learning_rate": 1.9989492481424556e-05, "loss": 2.0551, "step": 13610 }, { "epoch": 0.18, "grad_norm": 4.979813098907471, "learning_rate": 1.9989487665068305e-05, "loss": 2.3844, "step": 13611 }, { "epoch": 0.18, "grad_norm": 4.840725421905518, "learning_rate": 1.9989482847609045e-05, "loss": 2.1606, "step": 13612 }, { "epoch": 0.18, "grad_norm": 4.467456817626953, "learning_rate": 1.9989478029046774e-05, "loss": 2.1953, "step": 13613 }, { "epoch": 0.18, "grad_norm": 4.465298175811768, "learning_rate": 1.9989473209381498e-05, "loss": 2.2326, "step": 13614 }, { "epoch": 0.18, "grad_norm": 4.535134315490723, "learning_rate": 1.9989468388613212e-05, "loss": 2.1999, "step": 13615 }, { "epoch": 0.18, "grad_norm": 5.167264938354492, "learning_rate": 1.9989463566741923e-05, "loss": 2.2981, "step": 13616 }, { "epoch": 0.18, "grad_norm": 4.09352445602417, "learning_rate": 1.998945874376762e-05, "loss": 2.1615, "step": 13617 }, { "epoch": 0.18, "grad_norm": 6.077152252197266, "learning_rate": 1.998945391969032e-05, "loss": 2.4732, "step": 13618 }, { "epoch": 0.18, "grad_norm": 4.424075603485107, "learning_rate": 1.998944909451001e-05, "loss": 2.2536, "step": 13619 }, { "epoch": 0.18, "grad_norm": 4.192066669464111, "learning_rate": 1.99894442682267e-05, "loss": 1.7812, "step": 13620 }, { "epoch": 0.18, "grad_norm": 5.062605857849121, "learning_rate": 1.9989439440840383e-05, "loss": 2.17, "step": 13621 }, { "epoch": 0.18, "grad_norm": 5.7763800621032715, "learning_rate": 1.9989434612351063e-05, "loss": 2.3909, "step": 13622 }, { "epoch": 0.18, "grad_norm": 4.733475685119629, "learning_rate": 1.9989429782758737e-05, "loss": 2.6509, "step": 13623 }, { "epoch": 0.18, "grad_norm": 4.377261161804199, "learning_rate": 1.998942495206341e-05, "loss": 1.8324, "step": 13624 }, { "epoch": 0.18, "grad_norm": 4.462042808532715, "learning_rate": 1.9989420120265085e-05, "loss": 2.4765, "step": 13625 }, { "epoch": 0.18, "grad_norm": 4.350403785705566, "learning_rate": 1.9989415287363754e-05, "loss": 2.3007, "step": 13626 }, { "epoch": 0.18, "grad_norm": 4.958655834197998, "learning_rate": 1.9989410453359422e-05, "loss": 2.5463, "step": 13627 }, { "epoch": 0.18, "grad_norm": 3.9834084510803223, "learning_rate": 1.9989405618252095e-05, "loss": 1.922, "step": 13628 }, { "epoch": 0.18, "grad_norm": 4.5477519035339355, "learning_rate": 1.998940078204176e-05, "loss": 2.0336, "step": 13629 }, { "epoch": 0.18, "grad_norm": 4.307192325592041, "learning_rate": 1.998939594472843e-05, "loss": 2.5117, "step": 13630 }, { "epoch": 0.18, "grad_norm": 4.373239040374756, "learning_rate": 1.9989391106312102e-05, "loss": 2.2406, "step": 13631 }, { "epoch": 0.18, "grad_norm": 4.298811435699463, "learning_rate": 1.9989386266792773e-05, "loss": 2.0299, "step": 13632 }, { "epoch": 0.18, "grad_norm": 4.631649971008301, "learning_rate": 1.9989381426170448e-05, "loss": 2.8549, "step": 13633 }, { "epoch": 0.18, "grad_norm": 4.588550090789795, "learning_rate": 1.9989376584445127e-05, "loss": 1.9576, "step": 13634 }, { "epoch": 0.18, "grad_norm": 3.611297845840454, "learning_rate": 1.998937174161681e-05, "loss": 1.6875, "step": 13635 }, { "epoch": 0.18, "grad_norm": 4.651081085205078, "learning_rate": 1.9989366897685493e-05, "loss": 2.3641, "step": 13636 }, { "epoch": 0.18, "grad_norm": 5.2365403175354, "learning_rate": 1.9989362052651183e-05, "loss": 2.9888, "step": 13637 }, { "epoch": 0.18, "grad_norm": 5.263967514038086, "learning_rate": 1.998935720651388e-05, "loss": 2.881, "step": 13638 }, { "epoch": 0.18, "grad_norm": 4.342981815338135, "learning_rate": 1.9989352359273576e-05, "loss": 2.2139, "step": 13639 }, { "epoch": 0.18, "grad_norm": 4.562254428863525, "learning_rate": 1.9989347510930284e-05, "loss": 2.2848, "step": 13640 }, { "epoch": 0.18, "grad_norm": 4.179756164550781, "learning_rate": 1.9989342661483997e-05, "loss": 2.1657, "step": 13641 }, { "epoch": 0.18, "grad_norm": 4.339375972747803, "learning_rate": 1.9989337810934717e-05, "loss": 2.3253, "step": 13642 }, { "epoch": 0.18, "grad_norm": 4.747276306152344, "learning_rate": 1.9989332959282444e-05, "loss": 2.0377, "step": 13643 }, { "epoch": 0.18, "grad_norm": 4.994261264801025, "learning_rate": 1.998932810652718e-05, "loss": 2.5533, "step": 13644 }, { "epoch": 0.18, "grad_norm": 3.990678071975708, "learning_rate": 1.9989323252668923e-05, "loss": 2.2952, "step": 13645 }, { "epoch": 0.18, "grad_norm": 5.408407688140869, "learning_rate": 1.9989318397707676e-05, "loss": 2.118, "step": 13646 }, { "epoch": 0.18, "grad_norm": 4.975526809692383, "learning_rate": 1.9989313541643442e-05, "loss": 2.3184, "step": 13647 }, { "epoch": 0.18, "grad_norm": 4.992402076721191, "learning_rate": 1.9989308684476215e-05, "loss": 2.4293, "step": 13648 }, { "epoch": 0.18, "grad_norm": 4.660031318664551, "learning_rate": 1.9989303826206e-05, "loss": 2.4001, "step": 13649 }, { "epoch": 0.18, "grad_norm": 4.355020523071289, "learning_rate": 1.9989298966832794e-05, "loss": 1.8197, "step": 13650 }, { "epoch": 0.18, "grad_norm": 4.829975128173828, "learning_rate": 1.9989294106356603e-05, "loss": 2.4117, "step": 13651 }, { "epoch": 0.18, "grad_norm": 4.060517311096191, "learning_rate": 1.9989289244777423e-05, "loss": 1.9695, "step": 13652 }, { "epoch": 0.18, "grad_norm": 3.932772159576416, "learning_rate": 1.9989284382095256e-05, "loss": 2.1219, "step": 13653 }, { "epoch": 0.18, "grad_norm": 4.781240463256836, "learning_rate": 1.9989279518310104e-05, "loss": 2.5794, "step": 13654 }, { "epoch": 0.18, "grad_norm": 4.661479473114014, "learning_rate": 1.9989274653421962e-05, "loss": 2.1026, "step": 13655 }, { "epoch": 0.18, "grad_norm": 4.528968334197998, "learning_rate": 1.9989269787430837e-05, "loss": 2.731, "step": 13656 }, { "epoch": 0.18, "grad_norm": 4.796468734741211, "learning_rate": 1.9989264920336727e-05, "loss": 2.011, "step": 13657 }, { "epoch": 0.18, "grad_norm": 5.679324626922607, "learning_rate": 1.9989260052139634e-05, "loss": 2.9503, "step": 13658 }, { "epoch": 0.18, "grad_norm": 4.754339694976807, "learning_rate": 1.998925518283956e-05, "loss": 2.21, "step": 13659 }, { "epoch": 0.18, "grad_norm": 5.18125057220459, "learning_rate": 1.9989250312436493e-05, "loss": 1.9962, "step": 13660 }, { "epoch": 0.18, "grad_norm": 5.0040764808654785, "learning_rate": 1.9989245440930453e-05, "loss": 2.3865, "step": 13661 }, { "epoch": 0.18, "grad_norm": 4.773068904876709, "learning_rate": 1.9989240568321426e-05, "loss": 2.5536, "step": 13662 }, { "epoch": 0.18, "grad_norm": 4.878274917602539, "learning_rate": 1.998923569460942e-05, "loss": 2.4371, "step": 13663 }, { "epoch": 0.18, "grad_norm": 4.671384334564209, "learning_rate": 1.9989230819794428e-05, "loss": 2.2704, "step": 13664 }, { "epoch": 0.18, "grad_norm": 4.700289249420166, "learning_rate": 1.9989225943876464e-05, "loss": 1.9625, "step": 13665 }, { "epoch": 0.18, "grad_norm": 4.873106956481934, "learning_rate": 1.9989221066855513e-05, "loss": 2.3637, "step": 13666 }, { "epoch": 0.18, "grad_norm": 4.538351535797119, "learning_rate": 1.9989216188731584e-05, "loss": 1.9526, "step": 13667 }, { "epoch": 0.18, "grad_norm": 5.216032981872559, "learning_rate": 1.998921130950468e-05, "loss": 2.3725, "step": 13668 }, { "epoch": 0.18, "grad_norm": 4.827005863189697, "learning_rate": 1.998920642917479e-05, "loss": 2.2462, "step": 13669 }, { "epoch": 0.18, "grad_norm": 4.395163536071777, "learning_rate": 1.9989201547741927e-05, "loss": 2.3787, "step": 13670 }, { "epoch": 0.18, "grad_norm": 4.909709930419922, "learning_rate": 1.9989196665206084e-05, "loss": 2.7387, "step": 13671 }, { "epoch": 0.18, "grad_norm": 5.433443069458008, "learning_rate": 1.998919178156727e-05, "loss": 2.4719, "step": 13672 }, { "epoch": 0.18, "grad_norm": 4.4255523681640625, "learning_rate": 1.9989186896825474e-05, "loss": 2.1265, "step": 13673 }, { "epoch": 0.18, "grad_norm": 3.7038304805755615, "learning_rate": 1.9989182010980707e-05, "loss": 1.729, "step": 13674 }, { "epoch": 0.18, "grad_norm": 7.236738681793213, "learning_rate": 1.998917712403296e-05, "loss": 2.7065, "step": 13675 }, { "epoch": 0.18, "grad_norm": 4.568717002868652, "learning_rate": 1.9989172235982242e-05, "loss": 2.412, "step": 13676 }, { "epoch": 0.18, "grad_norm": 4.410878658294678, "learning_rate": 1.9989167346828545e-05, "loss": 2.111, "step": 13677 }, { "epoch": 0.18, "grad_norm": 4.384735584259033, "learning_rate": 1.998916245657188e-05, "loss": 2.3319, "step": 13678 }, { "epoch": 0.18, "grad_norm": 3.6764020919799805, "learning_rate": 1.9989157565212242e-05, "loss": 1.7628, "step": 13679 }, { "epoch": 0.18, "grad_norm": 4.617441654205322, "learning_rate": 1.9989152672749627e-05, "loss": 2.4338, "step": 13680 }, { "epoch": 0.18, "grad_norm": 4.6609625816345215, "learning_rate": 1.9989147779184043e-05, "loss": 2.6814, "step": 13681 }, { "epoch": 0.18, "grad_norm": 4.656504154205322, "learning_rate": 1.9989142884515486e-05, "loss": 2.4767, "step": 13682 }, { "epoch": 0.18, "grad_norm": 5.104503154754639, "learning_rate": 1.998913798874396e-05, "loss": 2.1418, "step": 13683 }, { "epoch": 0.18, "grad_norm": 3.7324633598327637, "learning_rate": 1.9989133091869463e-05, "loss": 1.779, "step": 13684 }, { "epoch": 0.18, "grad_norm": 4.761821269989014, "learning_rate": 1.9989128193891996e-05, "loss": 2.5658, "step": 13685 }, { "epoch": 0.18, "grad_norm": 3.836397647857666, "learning_rate": 1.998912329481156e-05, "loss": 1.6154, "step": 13686 }, { "epoch": 0.18, "grad_norm": 4.515083312988281, "learning_rate": 1.9989118394628155e-05, "loss": 2.9506, "step": 13687 }, { "epoch": 0.18, "grad_norm": 4.738914489746094, "learning_rate": 1.9989113493341784e-05, "loss": 2.1487, "step": 13688 }, { "epoch": 0.18, "grad_norm": 4.243679046630859, "learning_rate": 1.998910859095244e-05, "loss": 2.3105, "step": 13689 }, { "epoch": 0.18, "grad_norm": 4.651264190673828, "learning_rate": 1.9989103687460136e-05, "loss": 1.9654, "step": 13690 }, { "epoch": 0.18, "grad_norm": 4.306619167327881, "learning_rate": 1.998909878286486e-05, "loss": 2.3409, "step": 13691 }, { "epoch": 0.18, "grad_norm": 4.436585903167725, "learning_rate": 1.998909387716662e-05, "loss": 2.0099, "step": 13692 }, { "epoch": 0.18, "grad_norm": 4.286801815032959, "learning_rate": 1.9989088970365415e-05, "loss": 2.2842, "step": 13693 }, { "epoch": 0.18, "grad_norm": 4.393006801605225, "learning_rate": 1.9989084062461247e-05, "loss": 1.7654, "step": 13694 }, { "epoch": 0.18, "grad_norm": 4.809586048126221, "learning_rate": 1.998907915345411e-05, "loss": 2.637, "step": 13695 }, { "epoch": 0.18, "grad_norm": 4.5054426193237305, "learning_rate": 1.998907424334401e-05, "loss": 2.4683, "step": 13696 }, { "epoch": 0.18, "grad_norm": 4.004508018493652, "learning_rate": 1.998906933213095e-05, "loss": 1.9487, "step": 13697 }, { "epoch": 0.18, "grad_norm": 4.8195624351501465, "learning_rate": 1.9989064419814926e-05, "loss": 2.9376, "step": 13698 }, { "epoch": 0.18, "grad_norm": 4.187139511108398, "learning_rate": 1.998905950639594e-05, "loss": 2.1073, "step": 13699 }, { "epoch": 0.18, "grad_norm": 4.861268520355225, "learning_rate": 1.998905459187399e-05, "loss": 2.8918, "step": 13700 }, { "epoch": 0.18, "grad_norm": 4.079925537109375, "learning_rate": 1.998904967624908e-05, "loss": 2.0177, "step": 13701 }, { "epoch": 0.18, "grad_norm": 4.472370624542236, "learning_rate": 1.998904475952121e-05, "loss": 2.2208, "step": 13702 }, { "epoch": 0.18, "grad_norm": 3.922327756881714, "learning_rate": 1.9989039841690383e-05, "loss": 2.1319, "step": 13703 }, { "epoch": 0.18, "grad_norm": 4.165738582611084, "learning_rate": 1.9989034922756592e-05, "loss": 2.227, "step": 13704 }, { "epoch": 0.18, "grad_norm": 4.740450859069824, "learning_rate": 1.9989030002719843e-05, "loss": 2.7307, "step": 13705 }, { "epoch": 0.18, "grad_norm": 4.739034175872803, "learning_rate": 1.9989025081580138e-05, "loss": 2.7221, "step": 13706 }, { "epoch": 0.18, "grad_norm": 4.163937568664551, "learning_rate": 1.998902015933747e-05, "loss": 1.7155, "step": 13707 }, { "epoch": 0.18, "grad_norm": 4.7404561042785645, "learning_rate": 1.9989015235991852e-05, "loss": 2.6383, "step": 13708 }, { "epoch": 0.18, "grad_norm": 4.463885307312012, "learning_rate": 1.9989010311543274e-05, "loss": 2.3616, "step": 13709 }, { "epoch": 0.18, "grad_norm": 4.554862976074219, "learning_rate": 1.9989005385991737e-05, "loss": 2.4006, "step": 13710 }, { "epoch": 0.18, "grad_norm": 4.4103240966796875, "learning_rate": 1.9989000459337247e-05, "loss": 2.2393, "step": 13711 }, { "epoch": 0.18, "grad_norm": 5.094907760620117, "learning_rate": 1.9988995531579803e-05, "loss": 2.7134, "step": 13712 }, { "epoch": 0.18, "grad_norm": 4.675873279571533, "learning_rate": 1.9988990602719403e-05, "loss": 2.2429, "step": 13713 }, { "epoch": 0.18, "grad_norm": 4.953831195831299, "learning_rate": 1.9988985672756047e-05, "loss": 2.2327, "step": 13714 }, { "epoch": 0.18, "grad_norm": 5.132626533508301, "learning_rate": 1.998898074168974e-05, "loss": 2.4386, "step": 13715 }, { "epoch": 0.18, "grad_norm": 4.417701244354248, "learning_rate": 1.998897580952048e-05, "loss": 2.2896, "step": 13716 }, { "epoch": 0.18, "grad_norm": 4.087934970855713, "learning_rate": 1.9988970876248265e-05, "loss": 2.0641, "step": 13717 }, { "epoch": 0.18, "grad_norm": 4.128647327423096, "learning_rate": 1.99889659418731e-05, "loss": 2.0483, "step": 13718 }, { "epoch": 0.18, "grad_norm": 4.533995628356934, "learning_rate": 1.9988961006394985e-05, "loss": 2.1027, "step": 13719 }, { "epoch": 0.18, "grad_norm": 3.8662376403808594, "learning_rate": 1.998895606981392e-05, "loss": 1.9043, "step": 13720 }, { "epoch": 0.18, "grad_norm": 4.648121356964111, "learning_rate": 1.99889511321299e-05, "loss": 2.4834, "step": 13721 }, { "epoch": 0.18, "grad_norm": 4.656615257263184, "learning_rate": 1.9988946193342934e-05, "loss": 2.6103, "step": 13722 }, { "epoch": 0.18, "grad_norm": 4.2914042472839355, "learning_rate": 1.9988941253453017e-05, "loss": 2.0674, "step": 13723 }, { "epoch": 0.18, "grad_norm": 5.291516304016113, "learning_rate": 1.9988936312460154e-05, "loss": 2.615, "step": 13724 }, { "epoch": 0.18, "grad_norm": 4.681386470794678, "learning_rate": 1.9988931370364342e-05, "loss": 2.1072, "step": 13725 }, { "epoch": 0.18, "grad_norm": 4.480697154998779, "learning_rate": 1.9988926427165582e-05, "loss": 2.228, "step": 13726 }, { "epoch": 0.18, "grad_norm": 4.895979881286621, "learning_rate": 1.9988921482863876e-05, "loss": 2.2553, "step": 13727 }, { "epoch": 0.18, "grad_norm": 4.173348426818848, "learning_rate": 1.998891653745922e-05, "loss": 2.0894, "step": 13728 }, { "epoch": 0.18, "grad_norm": 4.203343391418457, "learning_rate": 1.9988911590951622e-05, "loss": 1.7801, "step": 13729 }, { "epoch": 0.18, "grad_norm": 3.9932520389556885, "learning_rate": 1.9988906643341077e-05, "loss": 1.9869, "step": 13730 }, { "epoch": 0.18, "grad_norm": 4.946023941040039, "learning_rate": 1.998890169462759e-05, "loss": 2.6184, "step": 13731 }, { "epoch": 0.18, "grad_norm": 4.272813320159912, "learning_rate": 1.9988896744811155e-05, "loss": 2.1031, "step": 13732 }, { "epoch": 0.18, "grad_norm": 4.836383819580078, "learning_rate": 1.9988891793891777e-05, "loss": 2.2168, "step": 13733 }, { "epoch": 0.18, "grad_norm": 5.046113967895508, "learning_rate": 1.9988886841869457e-05, "loss": 2.8788, "step": 13734 }, { "epoch": 0.18, "grad_norm": 4.653476238250732, "learning_rate": 1.9988881888744193e-05, "loss": 2.4006, "step": 13735 }, { "epoch": 0.18, "grad_norm": 4.2425456047058105, "learning_rate": 1.998887693451599e-05, "loss": 1.8415, "step": 13736 }, { "epoch": 0.18, "grad_norm": 3.8230977058410645, "learning_rate": 1.9988871979184844e-05, "loss": 1.8398, "step": 13737 }, { "epoch": 0.18, "grad_norm": 4.3354339599609375, "learning_rate": 1.9988867022750757e-05, "loss": 2.2125, "step": 13738 }, { "epoch": 0.18, "grad_norm": 4.704217433929443, "learning_rate": 1.9988862065213728e-05, "loss": 2.016, "step": 13739 }, { "epoch": 0.18, "grad_norm": 4.613288402557373, "learning_rate": 1.9988857106573763e-05, "loss": 2.1138, "step": 13740 }, { "epoch": 0.18, "grad_norm": 4.080596923828125, "learning_rate": 1.9988852146830854e-05, "loss": 2.1195, "step": 13741 }, { "epoch": 0.18, "grad_norm": 4.8757171630859375, "learning_rate": 1.998884718598501e-05, "loss": 1.9643, "step": 13742 }, { "epoch": 0.18, "grad_norm": 4.637223243713379, "learning_rate": 1.9988842224036226e-05, "loss": 2.0619, "step": 13743 }, { "epoch": 0.18, "grad_norm": 3.857982635498047, "learning_rate": 1.9988837260984504e-05, "loss": 1.8555, "step": 13744 }, { "epoch": 0.18, "grad_norm": 4.493814945220947, "learning_rate": 1.9988832296829847e-05, "loss": 2.5497, "step": 13745 }, { "epoch": 0.18, "grad_norm": 4.4168829917907715, "learning_rate": 1.9988827331572248e-05, "loss": 2.3234, "step": 13746 }, { "epoch": 0.18, "grad_norm": 4.60626220703125, "learning_rate": 1.998882236521172e-05, "loss": 2.8842, "step": 13747 }, { "epoch": 0.18, "grad_norm": 4.6521148681640625, "learning_rate": 1.9988817397748255e-05, "loss": 2.1488, "step": 13748 }, { "epoch": 0.18, "grad_norm": 4.428354263305664, "learning_rate": 1.998881242918185e-05, "loss": 2.336, "step": 13749 }, { "epoch": 0.18, "grad_norm": 3.6278159618377686, "learning_rate": 1.9988807459512514e-05, "loss": 1.7717, "step": 13750 }, { "epoch": 0.18, "grad_norm": 4.421643257141113, "learning_rate": 1.9988802488740246e-05, "loss": 2.321, "step": 13751 }, { "epoch": 0.18, "grad_norm": 5.507791042327881, "learning_rate": 1.9988797516865043e-05, "loss": 2.4032, "step": 13752 }, { "epoch": 0.18, "grad_norm": 4.887559413909912, "learning_rate": 1.998879254388691e-05, "loss": 2.4013, "step": 13753 }, { "epoch": 0.18, "grad_norm": 4.276300430297852, "learning_rate": 1.9988787569805842e-05, "loss": 2.371, "step": 13754 }, { "epoch": 0.18, "grad_norm": 4.687263011932373, "learning_rate": 1.9988782594621843e-05, "loss": 2.0527, "step": 13755 }, { "epoch": 0.18, "grad_norm": 4.714634418487549, "learning_rate": 1.998877761833491e-05, "loss": 2.1091, "step": 13756 }, { "epoch": 0.18, "grad_norm": 4.547544002532959, "learning_rate": 1.9988772640945048e-05, "loss": 1.988, "step": 13757 }, { "epoch": 0.18, "grad_norm": 4.891152381896973, "learning_rate": 1.998876766245226e-05, "loss": 2.3323, "step": 13758 }, { "epoch": 0.18, "grad_norm": 3.7174270153045654, "learning_rate": 1.998876268285654e-05, "loss": 2.0262, "step": 13759 }, { "epoch": 0.18, "grad_norm": 4.4741926193237305, "learning_rate": 1.9988757702157892e-05, "loss": 2.26, "step": 13760 }, { "epoch": 0.18, "grad_norm": 4.245270252227783, "learning_rate": 1.9988752720356313e-05, "loss": 1.7855, "step": 13761 }, { "epoch": 0.18, "grad_norm": 4.282844543457031, "learning_rate": 1.998874773745181e-05, "loss": 2.4452, "step": 13762 }, { "epoch": 0.18, "grad_norm": 4.200479984283447, "learning_rate": 1.9988742753444374e-05, "loss": 1.9383, "step": 13763 }, { "epoch": 0.18, "grad_norm": 5.269410133361816, "learning_rate": 1.9988737768334017e-05, "loss": 2.6809, "step": 13764 }, { "epoch": 0.18, "grad_norm": 4.236199855804443, "learning_rate": 1.998873278212073e-05, "loss": 2.0676, "step": 13765 }, { "epoch": 0.18, "grad_norm": 4.402541160583496, "learning_rate": 1.998872779480452e-05, "loss": 1.6736, "step": 13766 }, { "epoch": 0.18, "grad_norm": 4.753429412841797, "learning_rate": 1.9988722806385385e-05, "loss": 2.4716, "step": 13767 }, { "epoch": 0.18, "grad_norm": 4.972461700439453, "learning_rate": 1.9988717816863325e-05, "loss": 2.1211, "step": 13768 }, { "epoch": 0.18, "grad_norm": 4.16320276260376, "learning_rate": 1.9988712826238343e-05, "loss": 2.1254, "step": 13769 }, { "epoch": 0.18, "grad_norm": 4.753805637359619, "learning_rate": 1.9988707834510436e-05, "loss": 2.3386, "step": 13770 }, { "epoch": 0.18, "grad_norm": 4.47402811050415, "learning_rate": 1.9988702841679604e-05, "loss": 2.282, "step": 13771 }, { "epoch": 0.18, "grad_norm": 4.5557732582092285, "learning_rate": 1.998869784774585e-05, "loss": 2.5179, "step": 13772 }, { "epoch": 0.18, "grad_norm": 4.642094135284424, "learning_rate": 1.9988692852709178e-05, "loss": 2.1656, "step": 13773 }, { "epoch": 0.18, "grad_norm": 5.297358512878418, "learning_rate": 1.998868785656958e-05, "loss": 3.0328, "step": 13774 }, { "epoch": 0.18, "grad_norm": 5.230227470397949, "learning_rate": 1.9988682859327063e-05, "loss": 2.5985, "step": 13775 }, { "epoch": 0.18, "grad_norm": 4.368359565734863, "learning_rate": 1.998867786098163e-05, "loss": 2.1619, "step": 13776 }, { "epoch": 0.18, "grad_norm": 4.558750629425049, "learning_rate": 1.9988672861533274e-05, "loss": 2.2726, "step": 13777 }, { "epoch": 0.18, "grad_norm": 4.259431838989258, "learning_rate": 1.9988667860982e-05, "loss": 2.0092, "step": 13778 }, { "epoch": 0.18, "grad_norm": 4.766435146331787, "learning_rate": 1.9988662859327807e-05, "loss": 2.2391, "step": 13779 }, { "epoch": 0.18, "grad_norm": 4.294477939605713, "learning_rate": 1.9988657856570697e-05, "loss": 2.115, "step": 13780 }, { "epoch": 0.18, "grad_norm": 4.2531657218933105, "learning_rate": 1.998865285271067e-05, "loss": 2.2583, "step": 13781 }, { "epoch": 0.18, "grad_norm": 4.787529468536377, "learning_rate": 1.9988647847747723e-05, "loss": 2.5841, "step": 13782 }, { "epoch": 0.18, "grad_norm": 4.549615859985352, "learning_rate": 1.9988642841681863e-05, "loss": 2.4534, "step": 13783 }, { "epoch": 0.18, "grad_norm": 4.426246166229248, "learning_rate": 1.9988637834513084e-05, "loss": 1.8771, "step": 13784 }, { "epoch": 0.18, "grad_norm": 5.38248872756958, "learning_rate": 1.9988632826241393e-05, "loss": 2.3621, "step": 13785 }, { "epoch": 0.18, "grad_norm": 4.296921253204346, "learning_rate": 1.998862781686679e-05, "loss": 1.9555, "step": 13786 }, { "epoch": 0.18, "grad_norm": 4.244059085845947, "learning_rate": 1.9988622806389268e-05, "loss": 2.2001, "step": 13787 }, { "epoch": 0.18, "grad_norm": 4.744285583496094, "learning_rate": 1.9988617794808836e-05, "loss": 2.3064, "step": 13788 }, { "epoch": 0.18, "grad_norm": 4.681697368621826, "learning_rate": 1.998861278212549e-05, "loss": 2.175, "step": 13789 }, { "epoch": 0.18, "grad_norm": 4.68200159072876, "learning_rate": 1.998860776833923e-05, "loss": 2.8511, "step": 13790 }, { "epoch": 0.18, "grad_norm": 5.892437934875488, "learning_rate": 1.998860275345006e-05, "loss": 2.9275, "step": 13791 }, { "epoch": 0.18, "grad_norm": 4.240290641784668, "learning_rate": 1.998859773745798e-05, "loss": 1.64, "step": 13792 }, { "epoch": 0.18, "grad_norm": 4.861053943634033, "learning_rate": 1.9988592720362983e-05, "loss": 2.5077, "step": 13793 }, { "epoch": 0.18, "grad_norm": 3.6103320121765137, "learning_rate": 1.9988587702165083e-05, "loss": 1.7321, "step": 13794 }, { "epoch": 0.18, "grad_norm": 5.4993672370910645, "learning_rate": 1.9988582682864272e-05, "loss": 2.7073, "step": 13795 }, { "epoch": 0.18, "grad_norm": 4.071563720703125, "learning_rate": 1.9988577662460552e-05, "loss": 2.0444, "step": 13796 }, { "epoch": 0.18, "grad_norm": 4.701729774475098, "learning_rate": 1.998857264095392e-05, "loss": 2.6193, "step": 13797 }, { "epoch": 0.18, "grad_norm": 4.011246681213379, "learning_rate": 1.9988567618344382e-05, "loss": 2.1901, "step": 13798 }, { "epoch": 0.18, "grad_norm": 4.253027439117432, "learning_rate": 1.9988562594631938e-05, "loss": 2.0664, "step": 13799 }, { "epoch": 0.18, "grad_norm": 4.881491661071777, "learning_rate": 1.998855756981659e-05, "loss": 2.2465, "step": 13800 }, { "epoch": 0.18, "grad_norm": 3.955721378326416, "learning_rate": 1.9988552543898333e-05, "loss": 1.9155, "step": 13801 }, { "epoch": 0.18, "grad_norm": 4.227466106414795, "learning_rate": 1.998854751687717e-05, "loss": 2.235, "step": 13802 }, { "epoch": 0.18, "grad_norm": 4.842202186584473, "learning_rate": 1.99885424887531e-05, "loss": 2.5296, "step": 13803 }, { "epoch": 0.18, "grad_norm": 3.969930648803711, "learning_rate": 1.9988537459526126e-05, "loss": 1.9405, "step": 13804 }, { "epoch": 0.18, "grad_norm": 4.501908302307129, "learning_rate": 1.998853242919625e-05, "loss": 2.3773, "step": 13805 }, { "epoch": 0.18, "grad_norm": 5.1643853187561035, "learning_rate": 1.9988527397763468e-05, "loss": 2.533, "step": 13806 }, { "epoch": 0.18, "grad_norm": 4.612739086151123, "learning_rate": 1.9988522365227786e-05, "loss": 2.5397, "step": 13807 }, { "epoch": 0.18, "grad_norm": 4.458236217498779, "learning_rate": 1.99885173315892e-05, "loss": 2.4054, "step": 13808 }, { "epoch": 0.18, "grad_norm": 3.9339919090270996, "learning_rate": 1.9988512296847714e-05, "loss": 2.0315, "step": 13809 }, { "epoch": 0.18, "grad_norm": 4.081721305847168, "learning_rate": 1.9988507261003326e-05, "loss": 1.7584, "step": 13810 }, { "epoch": 0.18, "grad_norm": 4.733093738555908, "learning_rate": 1.9988502224056036e-05, "loss": 2.1223, "step": 13811 }, { "epoch": 0.18, "grad_norm": 4.271683216094971, "learning_rate": 1.998849718600585e-05, "loss": 2.3844, "step": 13812 }, { "epoch": 0.18, "grad_norm": 4.586292743682861, "learning_rate": 1.9988492146852757e-05, "loss": 2.3498, "step": 13813 }, { "epoch": 0.18, "grad_norm": 5.267391681671143, "learning_rate": 1.9988487106596773e-05, "loss": 2.2202, "step": 13814 }, { "epoch": 0.18, "grad_norm": 4.913673400878906, "learning_rate": 1.9988482065237886e-05, "loss": 2.3112, "step": 13815 }, { "epoch": 0.18, "grad_norm": 4.29075813293457, "learning_rate": 1.99884770227761e-05, "loss": 1.9593, "step": 13816 }, { "epoch": 0.18, "grad_norm": 4.330733299255371, "learning_rate": 1.998847197921142e-05, "loss": 2.0159, "step": 13817 }, { "epoch": 0.18, "grad_norm": 4.786392688751221, "learning_rate": 1.998846693454384e-05, "loss": 2.2693, "step": 13818 }, { "epoch": 0.18, "grad_norm": 4.778112888336182, "learning_rate": 1.9988461888773366e-05, "loss": 2.3526, "step": 13819 }, { "epoch": 0.18, "grad_norm": 4.831200122833252, "learning_rate": 1.99884568419e-05, "loss": 2.239, "step": 13820 }, { "epoch": 0.18, "grad_norm": 4.760076999664307, "learning_rate": 1.9988451793923734e-05, "loss": 2.1494, "step": 13821 }, { "epoch": 0.18, "grad_norm": 3.964933156967163, "learning_rate": 1.998844674484457e-05, "loss": 2.0075, "step": 13822 }, { "epoch": 0.18, "grad_norm": 4.430073261260986, "learning_rate": 1.998844169466252e-05, "loss": 2.2025, "step": 13823 }, { "epoch": 0.18, "grad_norm": 4.343410015106201, "learning_rate": 1.998843664337757e-05, "loss": 2.1048, "step": 13824 }, { "epoch": 0.18, "grad_norm": 4.4799418449401855, "learning_rate": 1.9988431590989732e-05, "loss": 2.0757, "step": 13825 }, { "epoch": 0.18, "grad_norm": 4.599262714385986, "learning_rate": 1.9988426537498998e-05, "loss": 2.3227, "step": 13826 }, { "epoch": 0.18, "grad_norm": 4.621365070343018, "learning_rate": 1.9988421482905376e-05, "loss": 2.2799, "step": 13827 }, { "epoch": 0.18, "grad_norm": 4.994604110717773, "learning_rate": 1.998841642720886e-05, "loss": 2.6835, "step": 13828 }, { "epoch": 0.18, "grad_norm": 4.870229721069336, "learning_rate": 1.9988411370409456e-05, "loss": 2.7086, "step": 13829 }, { "epoch": 0.18, "grad_norm": 4.740037441253662, "learning_rate": 1.9988406312507158e-05, "loss": 2.4412, "step": 13830 }, { "epoch": 0.18, "grad_norm": 4.70639705657959, "learning_rate": 1.9988401253501975e-05, "loss": 2.6003, "step": 13831 }, { "epoch": 0.18, "grad_norm": 4.8004302978515625, "learning_rate": 1.99883961933939e-05, "loss": 2.7293, "step": 13832 }, { "epoch": 0.18, "grad_norm": 4.395411014556885, "learning_rate": 1.9988391132182936e-05, "loss": 1.8267, "step": 13833 }, { "epoch": 0.18, "grad_norm": 3.8818767070770264, "learning_rate": 1.9988386069869086e-05, "loss": 1.7229, "step": 13834 }, { "epoch": 0.18, "grad_norm": 4.507387161254883, "learning_rate": 1.998838100645235e-05, "loss": 2.4986, "step": 13835 }, { "epoch": 0.18, "grad_norm": 4.68223237991333, "learning_rate": 1.9988375941932725e-05, "loss": 1.8417, "step": 13836 }, { "epoch": 0.18, "grad_norm": 4.619378089904785, "learning_rate": 1.9988370876310212e-05, "loss": 2.4114, "step": 13837 }, { "epoch": 0.18, "grad_norm": 4.791966438293457, "learning_rate": 1.998836580958482e-05, "loss": 2.262, "step": 13838 }, { "epoch": 0.18, "grad_norm": 3.994507074356079, "learning_rate": 1.9988360741756537e-05, "loss": 1.6508, "step": 13839 }, { "epoch": 0.18, "grad_norm": 5.0491790771484375, "learning_rate": 1.9988355672825372e-05, "loss": 2.6727, "step": 13840 }, { "epoch": 0.18, "grad_norm": 5.22847843170166, "learning_rate": 1.998835060279132e-05, "loss": 2.2523, "step": 13841 }, { "epoch": 0.18, "grad_norm": 4.909914493560791, "learning_rate": 1.9988345531654388e-05, "loss": 2.406, "step": 13842 }, { "epoch": 0.18, "grad_norm": 4.526825428009033, "learning_rate": 1.998834045941457e-05, "loss": 2.2553, "step": 13843 }, { "epoch": 0.18, "grad_norm": 4.232032299041748, "learning_rate": 1.9988335386071873e-05, "loss": 1.9781, "step": 13844 }, { "epoch": 0.18, "grad_norm": 4.810744762420654, "learning_rate": 1.9988330311626293e-05, "loss": 2.3878, "step": 13845 }, { "epoch": 0.18, "grad_norm": 4.25054931640625, "learning_rate": 1.998832523607783e-05, "loss": 1.9199, "step": 13846 }, { "epoch": 0.18, "grad_norm": 4.4873480796813965, "learning_rate": 1.998832015942649e-05, "loss": 2.2139, "step": 13847 }, { "epoch": 0.18, "grad_norm": 3.854055404663086, "learning_rate": 1.998831508167227e-05, "loss": 1.9216, "step": 13848 }, { "epoch": 0.18, "grad_norm": 4.884199619293213, "learning_rate": 1.998831000281517e-05, "loss": 2.5438, "step": 13849 }, { "epoch": 0.18, "grad_norm": 4.348665714263916, "learning_rate": 1.9988304922855187e-05, "loss": 2.2654, "step": 13850 }, { "epoch": 0.18, "grad_norm": 4.628180503845215, "learning_rate": 1.998829984179233e-05, "loss": 2.3877, "step": 13851 }, { "epoch": 0.18, "grad_norm": 3.8584914207458496, "learning_rate": 1.9988294759626594e-05, "loss": 1.9934, "step": 13852 }, { "epoch": 0.18, "grad_norm": 4.70529842376709, "learning_rate": 1.998828967635798e-05, "loss": 2.4099, "step": 13853 }, { "epoch": 0.18, "grad_norm": 4.575780391693115, "learning_rate": 1.998828459198649e-05, "loss": 2.2542, "step": 13854 }, { "epoch": 0.18, "grad_norm": 3.758838653564453, "learning_rate": 1.9988279506512125e-05, "loss": 2.0191, "step": 13855 }, { "epoch": 0.18, "grad_norm": 4.636816501617432, "learning_rate": 1.9988274419934885e-05, "loss": 2.0191, "step": 13856 }, { "epoch": 0.18, "grad_norm": 5.681851387023926, "learning_rate": 1.9988269332254767e-05, "loss": 2.9801, "step": 13857 }, { "epoch": 0.18, "grad_norm": 4.386650085449219, "learning_rate": 1.998826424347178e-05, "loss": 2.1422, "step": 13858 }, { "epoch": 0.18, "grad_norm": 4.784187316894531, "learning_rate": 1.9988259153585912e-05, "loss": 2.6953, "step": 13859 }, { "epoch": 0.18, "grad_norm": 5.414065837860107, "learning_rate": 1.9988254062597175e-05, "loss": 2.4961, "step": 13860 }, { "epoch": 0.18, "grad_norm": 4.150619983673096, "learning_rate": 1.9988248970505568e-05, "loss": 1.9392, "step": 13861 }, { "epoch": 0.18, "grad_norm": 4.7832818031311035, "learning_rate": 1.9988243877311082e-05, "loss": 2.1251, "step": 13862 }, { "epoch": 0.18, "grad_norm": 4.10559606552124, "learning_rate": 1.998823878301373e-05, "loss": 2.1687, "step": 13863 }, { "epoch": 0.18, "grad_norm": 4.784896373748779, "learning_rate": 1.9988233687613503e-05, "loss": 1.9242, "step": 13864 }, { "epoch": 0.18, "grad_norm": 4.599996566772461, "learning_rate": 1.998822859111041e-05, "loss": 2.3279, "step": 13865 }, { "epoch": 0.18, "grad_norm": 4.7979559898376465, "learning_rate": 1.9988223493504444e-05, "loss": 2.6106, "step": 13866 }, { "epoch": 0.18, "grad_norm": 4.316260814666748, "learning_rate": 1.9988218394795608e-05, "loss": 2.4013, "step": 13867 }, { "epoch": 0.18, "grad_norm": 4.877228260040283, "learning_rate": 1.9988213294983905e-05, "loss": 2.7501, "step": 13868 }, { "epoch": 0.18, "grad_norm": 3.7697103023529053, "learning_rate": 1.998820819406933e-05, "loss": 2.1509, "step": 13869 }, { "epoch": 0.18, "grad_norm": 4.673757553100586, "learning_rate": 1.9988203092051893e-05, "loss": 2.4124, "step": 13870 }, { "epoch": 0.18, "grad_norm": 5.407771110534668, "learning_rate": 1.9988197988931587e-05, "loss": 2.8477, "step": 13871 }, { "epoch": 0.18, "grad_norm": 4.623876571655273, "learning_rate": 1.998819288470841e-05, "loss": 1.9124, "step": 13872 }, { "epoch": 0.18, "grad_norm": 4.506262302398682, "learning_rate": 1.9988187779382375e-05, "loss": 2.5909, "step": 13873 }, { "epoch": 0.18, "grad_norm": 4.168071746826172, "learning_rate": 1.998818267295347e-05, "loss": 1.7597, "step": 13874 }, { "epoch": 0.18, "grad_norm": 4.666757106781006, "learning_rate": 1.99881775654217e-05, "loss": 2.3853, "step": 13875 }, { "epoch": 0.18, "grad_norm": 4.769224643707275, "learning_rate": 1.9988172456787067e-05, "loss": 2.2421, "step": 13876 }, { "epoch": 0.18, "grad_norm": 4.103901386260986, "learning_rate": 1.998816734704957e-05, "loss": 2.0312, "step": 13877 }, { "epoch": 0.18, "grad_norm": 4.139318466186523, "learning_rate": 1.998816223620921e-05, "loss": 1.6688, "step": 13878 }, { "epoch": 0.18, "grad_norm": 4.674832344055176, "learning_rate": 1.9988157124265986e-05, "loss": 2.3196, "step": 13879 }, { "epoch": 0.18, "grad_norm": 4.209442615509033, "learning_rate": 1.9988152011219905e-05, "loss": 2.1258, "step": 13880 }, { "epoch": 0.18, "grad_norm": 4.349122047424316, "learning_rate": 1.9988146897070956e-05, "loss": 1.8291, "step": 13881 }, { "epoch": 0.18, "grad_norm": 5.168506145477295, "learning_rate": 1.998814178181915e-05, "loss": 2.4955, "step": 13882 }, { "epoch": 0.18, "grad_norm": 5.721834182739258, "learning_rate": 1.998813666546448e-05, "loss": 2.8358, "step": 13883 }, { "epoch": 0.18, "grad_norm": 4.737621784210205, "learning_rate": 1.9988131548006956e-05, "loss": 2.3332, "step": 13884 }, { "epoch": 0.18, "grad_norm": 4.365714073181152, "learning_rate": 1.998812642944657e-05, "loss": 2.3478, "step": 13885 }, { "epoch": 0.18, "grad_norm": 4.186678886413574, "learning_rate": 1.9988121309783324e-05, "loss": 1.8323, "step": 13886 }, { "epoch": 0.18, "grad_norm": 4.074003219604492, "learning_rate": 1.9988116189017218e-05, "loss": 2.1095, "step": 13887 }, { "epoch": 0.18, "grad_norm": 4.104129791259766, "learning_rate": 1.998811106714826e-05, "loss": 2.2321, "step": 13888 }, { "epoch": 0.18, "grad_norm": 5.2472734451293945, "learning_rate": 1.9988105944176445e-05, "loss": 2.4989, "step": 13889 }, { "epoch": 0.18, "grad_norm": 4.080410480499268, "learning_rate": 1.9988100820101768e-05, "loss": 2.1756, "step": 13890 }, { "epoch": 0.18, "grad_norm": 4.932647228240967, "learning_rate": 1.998809569492424e-05, "loss": 2.4099, "step": 13891 }, { "epoch": 0.18, "grad_norm": 3.7821707725524902, "learning_rate": 1.9988090568643855e-05, "loss": 1.6283, "step": 13892 }, { "epoch": 0.18, "grad_norm": 4.463929653167725, "learning_rate": 1.9988085441260616e-05, "loss": 2.2161, "step": 13893 }, { "epoch": 0.18, "grad_norm": 3.894442558288574, "learning_rate": 1.9988080312774523e-05, "loss": 1.5696, "step": 13894 }, { "epoch": 0.18, "grad_norm": 4.811089038848877, "learning_rate": 1.9988075183185573e-05, "loss": 2.7263, "step": 13895 }, { "epoch": 0.18, "grad_norm": 4.8189616203308105, "learning_rate": 1.9988070052493776e-05, "loss": 2.1642, "step": 13896 }, { "epoch": 0.18, "grad_norm": 4.748133659362793, "learning_rate": 1.9988064920699122e-05, "loss": 2.0025, "step": 13897 }, { "epoch": 0.18, "grad_norm": 4.322805881500244, "learning_rate": 1.9988059787801618e-05, "loss": 1.8418, "step": 13898 }, { "epoch": 0.18, "grad_norm": 4.6208600997924805, "learning_rate": 1.9988054653801263e-05, "loss": 2.0516, "step": 13899 }, { "epoch": 0.18, "grad_norm": 4.745690822601318, "learning_rate": 1.9988049518698057e-05, "loss": 2.1042, "step": 13900 }, { "epoch": 0.18, "grad_norm": 4.581640243530273, "learning_rate": 1.9988044382492e-05, "loss": 2.25, "step": 13901 }, { "epoch": 0.18, "grad_norm": 4.229738712310791, "learning_rate": 1.9988039245183094e-05, "loss": 2.2474, "step": 13902 }, { "epoch": 0.18, "grad_norm": 4.833459377288818, "learning_rate": 1.998803410677134e-05, "loss": 2.7573, "step": 13903 }, { "epoch": 0.18, "grad_norm": 4.132355690002441, "learning_rate": 1.9988028967256737e-05, "loss": 1.7155, "step": 13904 }, { "epoch": 0.18, "grad_norm": 4.6631083488464355, "learning_rate": 1.9988023826639286e-05, "loss": 2.5278, "step": 13905 }, { "epoch": 0.18, "grad_norm": 4.3458685874938965, "learning_rate": 1.9988018684918988e-05, "loss": 2.2801, "step": 13906 }, { "epoch": 0.18, "grad_norm": 4.9631195068359375, "learning_rate": 1.9988013542095843e-05, "loss": 2.5891, "step": 13907 }, { "epoch": 0.18, "grad_norm": 4.979144096374512, "learning_rate": 1.998800839816985e-05, "loss": 2.3239, "step": 13908 }, { "epoch": 0.18, "grad_norm": 3.8446145057678223, "learning_rate": 1.9988003253141018e-05, "loss": 2.0453, "step": 13909 }, { "epoch": 0.18, "grad_norm": 4.7262282371521, "learning_rate": 1.9987998107009335e-05, "loss": 2.403, "step": 13910 }, { "epoch": 0.18, "grad_norm": 4.263004779815674, "learning_rate": 1.998799295977481e-05, "loss": 2.0998, "step": 13911 }, { "epoch": 0.18, "grad_norm": 4.316336631774902, "learning_rate": 1.9987987811437438e-05, "loss": 2.4458, "step": 13912 }, { "epoch": 0.18, "grad_norm": 4.620569229125977, "learning_rate": 1.9987982661997224e-05, "loss": 2.3113, "step": 13913 }, { "epoch": 0.18, "grad_norm": 4.60243558883667, "learning_rate": 1.998797751145417e-05, "loss": 2.0182, "step": 13914 }, { "epoch": 0.18, "grad_norm": 4.309152603149414, "learning_rate": 1.9987972359808272e-05, "loss": 2.2954, "step": 13915 }, { "epoch": 0.18, "grad_norm": 4.62611722946167, "learning_rate": 1.998796720705953e-05, "loss": 2.6765, "step": 13916 }, { "epoch": 0.18, "grad_norm": 4.581267356872559, "learning_rate": 1.998796205320795e-05, "loss": 2.5273, "step": 13917 }, { "epoch": 0.18, "grad_norm": 4.659019947052002, "learning_rate": 1.998795689825353e-05, "loss": 2.5082, "step": 13918 }, { "epoch": 0.18, "grad_norm": 4.360029697418213, "learning_rate": 1.9987951742196267e-05, "loss": 2.1287, "step": 13919 }, { "epoch": 0.18, "grad_norm": 4.306521892547607, "learning_rate": 1.9987946585036168e-05, "loss": 2.6798, "step": 13920 }, { "epoch": 0.18, "grad_norm": 4.509636878967285, "learning_rate": 1.9987941426773228e-05, "loss": 2.0308, "step": 13921 }, { "epoch": 0.18, "grad_norm": 4.754624366760254, "learning_rate": 1.998793626740745e-05, "loss": 2.6521, "step": 13922 }, { "epoch": 0.18, "grad_norm": 4.5463547706604, "learning_rate": 1.9987931106938835e-05, "loss": 2.7606, "step": 13923 }, { "epoch": 0.18, "grad_norm": 4.222080230712891, "learning_rate": 1.9987925945367385e-05, "loss": 2.164, "step": 13924 }, { "epoch": 0.18, "grad_norm": 5.474710941314697, "learning_rate": 1.9987920782693094e-05, "loss": 2.4975, "step": 13925 }, { "epoch": 0.18, "grad_norm": 4.011866092681885, "learning_rate": 1.9987915618915973e-05, "loss": 2.1036, "step": 13926 }, { "epoch": 0.18, "grad_norm": 4.565189361572266, "learning_rate": 1.998791045403601e-05, "loss": 1.9664, "step": 13927 }, { "epoch": 0.18, "grad_norm": 4.041968822479248, "learning_rate": 1.9987905288053214e-05, "loss": 1.8227, "step": 13928 }, { "epoch": 0.18, "grad_norm": 4.090735912322998, "learning_rate": 1.9987900120967586e-05, "loss": 2.1775, "step": 13929 }, { "epoch": 0.18, "grad_norm": 4.43843412399292, "learning_rate": 1.9987894952779125e-05, "loss": 2.1021, "step": 13930 }, { "epoch": 0.18, "grad_norm": 5.353108882904053, "learning_rate": 1.998788978348783e-05, "loss": 2.3065, "step": 13931 }, { "epoch": 0.18, "grad_norm": 4.031158447265625, "learning_rate": 1.99878846130937e-05, "loss": 2.1067, "step": 13932 }, { "epoch": 0.18, "grad_norm": 4.617420673370361, "learning_rate": 1.9987879441596742e-05, "loss": 2.7077, "step": 13933 }, { "epoch": 0.18, "grad_norm": 4.302832126617432, "learning_rate": 1.998787426899695e-05, "loss": 2.2905, "step": 13934 }, { "epoch": 0.18, "grad_norm": 4.350325107574463, "learning_rate": 1.9987869095294328e-05, "loss": 2.3779, "step": 13935 }, { "epoch": 0.18, "grad_norm": 5.239475250244141, "learning_rate": 1.9987863920488875e-05, "loss": 2.7963, "step": 13936 }, { "epoch": 0.18, "grad_norm": 4.205315589904785, "learning_rate": 1.9987858744580596e-05, "loss": 2.2043, "step": 13937 }, { "epoch": 0.18, "grad_norm": 4.4123854637146, "learning_rate": 1.9987853567569483e-05, "loss": 2.023, "step": 13938 }, { "epoch": 0.18, "grad_norm": 5.089180946350098, "learning_rate": 1.9987848389455547e-05, "loss": 2.3238, "step": 13939 }, { "epoch": 0.18, "grad_norm": 4.4160895347595215, "learning_rate": 1.9987843210238778e-05, "loss": 2.1785, "step": 13940 }, { "epoch": 0.18, "grad_norm": 4.714012622833252, "learning_rate": 1.9987838029919185e-05, "loss": 2.6827, "step": 13941 }, { "epoch": 0.18, "grad_norm": 5.223880290985107, "learning_rate": 1.9987832848496762e-05, "loss": 3.1443, "step": 13942 }, { "epoch": 0.18, "grad_norm": 4.5471649169921875, "learning_rate": 1.9987827665971516e-05, "loss": 2.0735, "step": 13943 }, { "epoch": 0.18, "grad_norm": 4.336728572845459, "learning_rate": 1.9987822482343443e-05, "loss": 2.7233, "step": 13944 }, { "epoch": 0.18, "grad_norm": 3.9670097827911377, "learning_rate": 1.9987817297612543e-05, "loss": 1.9416, "step": 13945 }, { "epoch": 0.18, "grad_norm": 4.273519515991211, "learning_rate": 1.9987812111778823e-05, "loss": 2.0853, "step": 13946 }, { "epoch": 0.18, "grad_norm": 4.267110347747803, "learning_rate": 1.9987806924842273e-05, "loss": 2.0609, "step": 13947 }, { "epoch": 0.18, "grad_norm": 4.25001335144043, "learning_rate": 1.9987801736802903e-05, "loss": 1.9774, "step": 13948 }, { "epoch": 0.18, "grad_norm": 4.066343307495117, "learning_rate": 1.998779654766071e-05, "loss": 1.7807, "step": 13949 }, { "epoch": 0.18, "grad_norm": 5.086217403411865, "learning_rate": 1.9987791357415697e-05, "loss": 2.3922, "step": 13950 }, { "epoch": 0.18, "grad_norm": 4.499380588531494, "learning_rate": 1.998778616606786e-05, "loss": 2.2891, "step": 13951 }, { "epoch": 0.18, "grad_norm": 4.341362476348877, "learning_rate": 1.9987780973617204e-05, "loss": 2.0681, "step": 13952 }, { "epoch": 0.18, "grad_norm": 4.894327163696289, "learning_rate": 1.9987775780063724e-05, "loss": 2.575, "step": 13953 }, { "epoch": 0.18, "grad_norm": 4.8281168937683105, "learning_rate": 1.9987770585407428e-05, "loss": 2.4684, "step": 13954 }, { "epoch": 0.18, "grad_norm": 4.53763484954834, "learning_rate": 1.998776538964831e-05, "loss": 1.9442, "step": 13955 }, { "epoch": 0.18, "grad_norm": 4.452340126037598, "learning_rate": 1.9987760192786375e-05, "loss": 2.4157, "step": 13956 }, { "epoch": 0.18, "grad_norm": 4.391175746917725, "learning_rate": 1.9987754994821622e-05, "loss": 2.729, "step": 13957 }, { "epoch": 0.18, "grad_norm": 5.025847911834717, "learning_rate": 1.998774979575405e-05, "loss": 2.8363, "step": 13958 }, { "epoch": 0.18, "grad_norm": 4.3625030517578125, "learning_rate": 1.998774459558366e-05, "loss": 2.8564, "step": 13959 }, { "epoch": 0.18, "grad_norm": 4.546248912811279, "learning_rate": 1.9987739394310454e-05, "loss": 2.3126, "step": 13960 }, { "epoch": 0.18, "grad_norm": 4.529508590698242, "learning_rate": 1.9987734191934435e-05, "loss": 2.4155, "step": 13961 }, { "epoch": 0.18, "grad_norm": 4.312575340270996, "learning_rate": 1.9987728988455596e-05, "loss": 2.2915, "step": 13962 }, { "epoch": 0.18, "grad_norm": 4.927656173706055, "learning_rate": 1.9987723783873947e-05, "loss": 2.4414, "step": 13963 }, { "epoch": 0.18, "grad_norm": 4.052318096160889, "learning_rate": 1.998771857818948e-05, "loss": 2.1773, "step": 13964 }, { "epoch": 0.18, "grad_norm": 4.060871601104736, "learning_rate": 1.9987713371402203e-05, "loss": 2.0913, "step": 13965 }, { "epoch": 0.18, "grad_norm": 4.874138355255127, "learning_rate": 1.998770816351211e-05, "loss": 2.3883, "step": 13966 }, { "epoch": 0.18, "grad_norm": 3.996511459350586, "learning_rate": 1.998770295451921e-05, "loss": 1.9529, "step": 13967 }, { "epoch": 0.18, "grad_norm": 4.6231279373168945, "learning_rate": 1.9987697744423495e-05, "loss": 2.1759, "step": 13968 }, { "epoch": 0.18, "grad_norm": 4.803279399871826, "learning_rate": 1.9987692533224963e-05, "loss": 2.727, "step": 13969 }, { "epoch": 0.18, "grad_norm": 5.18209171295166, "learning_rate": 1.9987687320923626e-05, "loss": 2.987, "step": 13970 }, { "epoch": 0.18, "grad_norm": 4.824145793914795, "learning_rate": 1.9987682107519478e-05, "loss": 2.9848, "step": 13971 }, { "epoch": 0.18, "grad_norm": 4.511308670043945, "learning_rate": 1.998767689301252e-05, "loss": 1.9421, "step": 13972 }, { "epoch": 0.18, "grad_norm": 3.725386381149292, "learning_rate": 1.9987671677402757e-05, "loss": 1.5385, "step": 13973 }, { "epoch": 0.18, "grad_norm": 4.051525592803955, "learning_rate": 1.9987666460690177e-05, "loss": 1.745, "step": 13974 }, { "epoch": 0.18, "grad_norm": 4.121970176696777, "learning_rate": 1.9987661242874797e-05, "loss": 1.9569, "step": 13975 }, { "epoch": 0.18, "grad_norm": 4.365799427032471, "learning_rate": 1.9987656023956607e-05, "loss": 2.4945, "step": 13976 }, { "epoch": 0.18, "grad_norm": 4.6639885902404785, "learning_rate": 1.998765080393561e-05, "loss": 2.0707, "step": 13977 }, { "epoch": 0.18, "grad_norm": 4.487457275390625, "learning_rate": 1.9987645582811805e-05, "loss": 1.8362, "step": 13978 }, { "epoch": 0.18, "grad_norm": 4.1765336990356445, "learning_rate": 1.99876403605852e-05, "loss": 1.8288, "step": 13979 }, { "epoch": 0.18, "grad_norm": 4.3768439292907715, "learning_rate": 1.9987635137255784e-05, "loss": 2.2109, "step": 13980 }, { "epoch": 0.18, "grad_norm": 4.307078838348389, "learning_rate": 1.9987629912823566e-05, "loss": 2.0632, "step": 13981 }, { "epoch": 0.18, "grad_norm": 4.221817493438721, "learning_rate": 1.9987624687288548e-05, "loss": 2.1329, "step": 13982 }, { "epoch": 0.18, "grad_norm": 4.499715805053711, "learning_rate": 1.9987619460650723e-05, "loss": 2.334, "step": 13983 }, { "epoch": 0.18, "grad_norm": 4.235601425170898, "learning_rate": 1.9987614232910096e-05, "loss": 1.8937, "step": 13984 }, { "epoch": 0.18, "grad_norm": 3.6554441452026367, "learning_rate": 1.9987609004066666e-05, "loss": 1.8677, "step": 13985 }, { "epoch": 0.18, "grad_norm": 4.499410629272461, "learning_rate": 1.9987603774120436e-05, "loss": 2.3436, "step": 13986 }, { "epoch": 0.18, "grad_norm": 4.189736366271973, "learning_rate": 1.9987598543071403e-05, "loss": 1.9726, "step": 13987 }, { "epoch": 0.18, "grad_norm": 4.849615097045898, "learning_rate": 1.998759331091957e-05, "loss": 2.627, "step": 13988 }, { "epoch": 0.18, "grad_norm": 3.844062566757202, "learning_rate": 1.9987588077664938e-05, "loss": 1.8768, "step": 13989 }, { "epoch": 0.18, "grad_norm": 4.600218772888184, "learning_rate": 1.9987582843307506e-05, "loss": 2.1907, "step": 13990 }, { "epoch": 0.18, "grad_norm": 4.300650596618652, "learning_rate": 1.9987577607847277e-05, "loss": 1.8729, "step": 13991 }, { "epoch": 0.18, "grad_norm": 4.9183125495910645, "learning_rate": 1.998757237128425e-05, "loss": 2.178, "step": 13992 }, { "epoch": 0.18, "grad_norm": 5.042866230010986, "learning_rate": 1.9987567133618425e-05, "loss": 2.0375, "step": 13993 }, { "epoch": 0.18, "grad_norm": 4.926701545715332, "learning_rate": 1.9987561894849805e-05, "loss": 2.4206, "step": 13994 }, { "epoch": 0.18, "grad_norm": 3.9367077350616455, "learning_rate": 1.9987556654978385e-05, "loss": 2.0804, "step": 13995 }, { "epoch": 0.18, "grad_norm": 4.214657783508301, "learning_rate": 1.9987551414004172e-05, "loss": 2.036, "step": 13996 }, { "epoch": 0.18, "grad_norm": 4.694620609283447, "learning_rate": 1.9987546171927163e-05, "loss": 2.3117, "step": 13997 }, { "epoch": 0.18, "grad_norm": 4.783729076385498, "learning_rate": 1.9987540928747358e-05, "loss": 2.5674, "step": 13998 }, { "epoch": 0.18, "grad_norm": 5.103672504425049, "learning_rate": 1.9987535684464763e-05, "loss": 2.3896, "step": 13999 }, { "epoch": 0.18, "grad_norm": 4.155974388122559, "learning_rate": 1.9987530439079373e-05, "loss": 2.0929, "step": 14000 }, { "epoch": 0.18, "grad_norm": 5.287112236022949, "learning_rate": 1.998752519259119e-05, "loss": 2.3258, "step": 14001 }, { "epoch": 0.18, "grad_norm": 4.82365608215332, "learning_rate": 1.9987519945000212e-05, "loss": 3.0279, "step": 14002 }, { "epoch": 0.18, "grad_norm": 5.035998344421387, "learning_rate": 1.9987514696306446e-05, "loss": 2.9392, "step": 14003 }, { "epoch": 0.18, "grad_norm": 4.87091064453125, "learning_rate": 1.9987509446509884e-05, "loss": 2.5659, "step": 14004 }, { "epoch": 0.18, "grad_norm": 4.913960933685303, "learning_rate": 1.998750419561054e-05, "loss": 2.3154, "step": 14005 }, { "epoch": 0.18, "grad_norm": 4.700836658477783, "learning_rate": 1.9987498943608397e-05, "loss": 2.5193, "step": 14006 }, { "epoch": 0.18, "grad_norm": 5.4643683433532715, "learning_rate": 1.998749369050347e-05, "loss": 2.822, "step": 14007 }, { "epoch": 0.18, "grad_norm": 4.810218334197998, "learning_rate": 1.9987488436295753e-05, "loss": 2.0927, "step": 14008 }, { "epoch": 0.18, "grad_norm": 4.282737731933594, "learning_rate": 1.998748318098525e-05, "loss": 2.2341, "step": 14009 }, { "epoch": 0.18, "grad_norm": 4.212810039520264, "learning_rate": 1.9987477924571954e-05, "loss": 2.1469, "step": 14010 }, { "epoch": 0.18, "grad_norm": 4.191547393798828, "learning_rate": 1.9987472667055876e-05, "loss": 2.1006, "step": 14011 }, { "epoch": 0.18, "grad_norm": 4.230093479156494, "learning_rate": 1.9987467408437008e-05, "loss": 2.1501, "step": 14012 }, { "epoch": 0.18, "grad_norm": 4.644474506378174, "learning_rate": 1.9987462148715357e-05, "loss": 2.3464, "step": 14013 }, { "epoch": 0.18, "grad_norm": 4.168874740600586, "learning_rate": 1.998745688789092e-05, "loss": 2.2577, "step": 14014 }, { "epoch": 0.18, "grad_norm": 4.879915714263916, "learning_rate": 1.9987451625963698e-05, "loss": 2.6879, "step": 14015 }, { "epoch": 0.18, "grad_norm": 4.571920871734619, "learning_rate": 1.9987446362933693e-05, "loss": 2.0685, "step": 14016 }, { "epoch": 0.18, "grad_norm": 4.281394004821777, "learning_rate": 1.9987441098800904e-05, "loss": 2.0353, "step": 14017 }, { "epoch": 0.18, "grad_norm": 4.377927780151367, "learning_rate": 1.998743583356533e-05, "loss": 2.2265, "step": 14018 }, { "epoch": 0.18, "grad_norm": 4.070380687713623, "learning_rate": 1.9987430567226977e-05, "loss": 1.9741, "step": 14019 }, { "epoch": 0.18, "grad_norm": 4.265462875366211, "learning_rate": 1.9987425299785837e-05, "loss": 1.8987, "step": 14020 }, { "epoch": 0.18, "grad_norm": 5.173040390014648, "learning_rate": 1.998742003124192e-05, "loss": 2.4252, "step": 14021 }, { "epoch": 0.18, "grad_norm": 5.145983695983887, "learning_rate": 1.9987414761595224e-05, "loss": 2.9272, "step": 14022 }, { "epoch": 0.18, "grad_norm": 5.270602226257324, "learning_rate": 1.9987409490845743e-05, "loss": 2.757, "step": 14023 }, { "epoch": 0.18, "grad_norm": 4.822906970977783, "learning_rate": 1.9987404218993486e-05, "loss": 1.9317, "step": 14024 }, { "epoch": 0.18, "grad_norm": 4.077288627624512, "learning_rate": 1.998739894603845e-05, "loss": 1.9528, "step": 14025 }, { "epoch": 0.18, "grad_norm": 4.067139625549316, "learning_rate": 1.9987393671980634e-05, "loss": 1.7065, "step": 14026 }, { "epoch": 0.18, "grad_norm": 4.450578212738037, "learning_rate": 1.9987388396820043e-05, "loss": 2.0531, "step": 14027 }, { "epoch": 0.18, "grad_norm": 4.929566383361816, "learning_rate": 1.9987383120556673e-05, "loss": 2.1294, "step": 14028 }, { "epoch": 0.18, "grad_norm": 4.281188011169434, "learning_rate": 1.9987377843190526e-05, "loss": 2.3319, "step": 14029 }, { "epoch": 0.18, "grad_norm": 4.863198757171631, "learning_rate": 1.9987372564721604e-05, "loss": 2.7199, "step": 14030 }, { "epoch": 0.18, "grad_norm": 4.346127986907959, "learning_rate": 1.9987367285149905e-05, "loss": 2.1353, "step": 14031 }, { "epoch": 0.18, "grad_norm": 4.714461803436279, "learning_rate": 1.998736200447543e-05, "loss": 2.4822, "step": 14032 }, { "epoch": 0.18, "grad_norm": 4.4099555015563965, "learning_rate": 1.9987356722698184e-05, "loss": 2.2379, "step": 14033 }, { "epoch": 0.18, "grad_norm": 4.9859209060668945, "learning_rate": 1.998735143981817e-05, "loss": 2.4741, "step": 14034 }, { "epoch": 0.18, "grad_norm": 4.41294527053833, "learning_rate": 1.9987346155835373e-05, "loss": 2.0535, "step": 14035 }, { "epoch": 0.18, "grad_norm": 4.207385540008545, "learning_rate": 1.9987340870749806e-05, "loss": 2.3438, "step": 14036 }, { "epoch": 0.18, "grad_norm": 3.9430441856384277, "learning_rate": 1.998733558456147e-05, "loss": 1.7998, "step": 14037 }, { "epoch": 0.18, "grad_norm": 4.340242862701416, "learning_rate": 1.9987330297270363e-05, "loss": 2.6198, "step": 14038 }, { "epoch": 0.18, "grad_norm": 4.570297718048096, "learning_rate": 1.998732500887648e-05, "loss": 2.4958, "step": 14039 }, { "epoch": 0.18, "grad_norm": 3.59372615814209, "learning_rate": 1.998731971937983e-05, "loss": 1.8732, "step": 14040 }, { "epoch": 0.18, "grad_norm": 4.608931064605713, "learning_rate": 1.9987314428780413e-05, "loss": 2.3503, "step": 14041 }, { "epoch": 0.18, "grad_norm": 4.3673624992370605, "learning_rate": 1.9987309137078224e-05, "loss": 2.4766, "step": 14042 }, { "epoch": 0.18, "grad_norm": 5.002110481262207, "learning_rate": 1.9987303844273266e-05, "loss": 2.2186, "step": 14043 }, { "epoch": 0.18, "grad_norm": 4.87161922454834, "learning_rate": 1.998729855036554e-05, "loss": 2.508, "step": 14044 }, { "epoch": 0.18, "grad_norm": 4.7778215408325195, "learning_rate": 1.998729325535505e-05, "loss": 3.0381, "step": 14045 }, { "epoch": 0.18, "grad_norm": 4.989300727844238, "learning_rate": 1.9987287959241792e-05, "loss": 2.494, "step": 14046 }, { "epoch": 0.18, "grad_norm": 4.477170944213867, "learning_rate": 1.9987282662025768e-05, "loss": 2.3875, "step": 14047 }, { "epoch": 0.18, "grad_norm": 4.255449295043945, "learning_rate": 1.9987277363706977e-05, "loss": 1.9433, "step": 14048 }, { "epoch": 0.18, "grad_norm": 4.979955673217773, "learning_rate": 1.9987272064285424e-05, "loss": 1.812, "step": 14049 }, { "epoch": 0.18, "grad_norm": 4.667741775512695, "learning_rate": 1.9987266763761106e-05, "loss": 2.3162, "step": 14050 }, { "epoch": 0.18, "grad_norm": 4.674494743347168, "learning_rate": 1.9987261462134025e-05, "loss": 2.001, "step": 14051 }, { "epoch": 0.18, "grad_norm": 3.802342176437378, "learning_rate": 1.9987256159404178e-05, "loss": 2.1818, "step": 14052 }, { "epoch": 0.18, "grad_norm": 4.484698295593262, "learning_rate": 1.998725085557157e-05, "loss": 2.4316, "step": 14053 }, { "epoch": 0.18, "grad_norm": 4.922063827514648, "learning_rate": 1.9987245550636197e-05, "loss": 1.9474, "step": 14054 }, { "epoch": 0.18, "grad_norm": 4.223681926727295, "learning_rate": 1.9987240244598067e-05, "loss": 1.9756, "step": 14055 }, { "epoch": 0.18, "grad_norm": 4.340301990509033, "learning_rate": 1.9987234937457174e-05, "loss": 2.4493, "step": 14056 }, { "epoch": 0.18, "grad_norm": 3.7698237895965576, "learning_rate": 1.9987229629213526e-05, "loss": 1.7916, "step": 14057 }, { "epoch": 0.18, "grad_norm": 4.2978515625, "learning_rate": 1.998722431986711e-05, "loss": 2.2867, "step": 14058 }, { "epoch": 0.18, "grad_norm": 4.0673370361328125, "learning_rate": 1.998721900941794e-05, "loss": 1.8164, "step": 14059 }, { "epoch": 0.18, "grad_norm": 5.245460033416748, "learning_rate": 1.9987213697866013e-05, "loss": 2.2993, "step": 14060 }, { "epoch": 0.18, "grad_norm": 4.053170680999756, "learning_rate": 1.9987208385211322e-05, "loss": 1.8418, "step": 14061 }, { "epoch": 0.18, "grad_norm": 4.22600793838501, "learning_rate": 1.998720307145388e-05, "loss": 2.1156, "step": 14062 }, { "epoch": 0.18, "grad_norm": 4.693276882171631, "learning_rate": 1.998719775659368e-05, "loss": 2.3135, "step": 14063 }, { "epoch": 0.18, "grad_norm": 4.276937007904053, "learning_rate": 1.998719244063072e-05, "loss": 2.1, "step": 14064 }, { "epoch": 0.18, "grad_norm": 4.813436985015869, "learning_rate": 1.998718712356501e-05, "loss": 2.5955, "step": 14065 }, { "epoch": 0.18, "grad_norm": 4.765401840209961, "learning_rate": 1.9987181805396543e-05, "loss": 2.4307, "step": 14066 }, { "epoch": 0.18, "grad_norm": 4.183908462524414, "learning_rate": 1.998717648612532e-05, "loss": 2.2371, "step": 14067 }, { "epoch": 0.18, "grad_norm": 4.574198246002197, "learning_rate": 1.9987171165751342e-05, "loss": 2.3048, "step": 14068 }, { "epoch": 0.18, "grad_norm": 4.318888187408447, "learning_rate": 1.9987165844274617e-05, "loss": 2.6947, "step": 14069 }, { "epoch": 0.18, "grad_norm": 4.261184215545654, "learning_rate": 1.9987160521695132e-05, "loss": 2.2249, "step": 14070 }, { "epoch": 0.18, "grad_norm": 5.122681617736816, "learning_rate": 1.99871551980129e-05, "loss": 2.5822, "step": 14071 }, { "epoch": 0.18, "grad_norm": 4.493227005004883, "learning_rate": 1.9987149873227917e-05, "loss": 2.3625, "step": 14072 }, { "epoch": 0.18, "grad_norm": 4.310608386993408, "learning_rate": 1.998714454734018e-05, "loss": 2.3215, "step": 14073 }, { "epoch": 0.18, "grad_norm": 4.469099998474121, "learning_rate": 1.9987139220349692e-05, "loss": 2.1789, "step": 14074 }, { "epoch": 0.18, "grad_norm": 4.509777069091797, "learning_rate": 1.998713389225646e-05, "loss": 2.5378, "step": 14075 }, { "epoch": 0.18, "grad_norm": 4.527596950531006, "learning_rate": 1.9987128563060476e-05, "loss": 2.7476, "step": 14076 }, { "epoch": 0.18, "grad_norm": 5.230167388916016, "learning_rate": 1.9987123232761744e-05, "loss": 2.034, "step": 14077 }, { "epoch": 0.18, "grad_norm": 4.32633113861084, "learning_rate": 1.9987117901360262e-05, "loss": 2.2647, "step": 14078 }, { "epoch": 0.18, "grad_norm": 4.976600646972656, "learning_rate": 1.9987112568856035e-05, "loss": 2.3027, "step": 14079 }, { "epoch": 0.18, "grad_norm": 4.448851108551025, "learning_rate": 1.9987107235249062e-05, "loss": 2.1013, "step": 14080 }, { "epoch": 0.18, "grad_norm": 4.347334384918213, "learning_rate": 1.998710190053934e-05, "loss": 2.0126, "step": 14081 }, { "epoch": 0.18, "grad_norm": 4.674292087554932, "learning_rate": 1.9987096564726874e-05, "loss": 2.6706, "step": 14082 }, { "epoch": 0.18, "grad_norm": 4.418411731719971, "learning_rate": 1.9987091227811665e-05, "loss": 2.5587, "step": 14083 }, { "epoch": 0.18, "grad_norm": 4.698522090911865, "learning_rate": 1.9987085889793708e-05, "loss": 2.5855, "step": 14084 }, { "epoch": 0.18, "grad_norm": 5.671481609344482, "learning_rate": 1.998708055067301e-05, "loss": 2.2591, "step": 14085 }, { "epoch": 0.18, "grad_norm": 3.594289779663086, "learning_rate": 1.9987075210449566e-05, "loss": 1.7555, "step": 14086 }, { "epoch": 0.18, "grad_norm": 4.282749652862549, "learning_rate": 1.9987069869123386e-05, "loss": 2.2456, "step": 14087 }, { "epoch": 0.18, "grad_norm": 5.0048675537109375, "learning_rate": 1.9987064526694457e-05, "loss": 1.9731, "step": 14088 }, { "epoch": 0.18, "grad_norm": 5.011548042297363, "learning_rate": 1.998705918316279e-05, "loss": 3.0239, "step": 14089 }, { "epoch": 0.18, "grad_norm": 4.648346424102783, "learning_rate": 1.998705383852838e-05, "loss": 2.4404, "step": 14090 }, { "epoch": 0.18, "grad_norm": 4.630107402801514, "learning_rate": 1.998704849279123e-05, "loss": 2.2458, "step": 14091 }, { "epoch": 0.18, "grad_norm": 4.3627824783325195, "learning_rate": 1.9987043145951347e-05, "loss": 2.4335, "step": 14092 }, { "epoch": 0.18, "grad_norm": 4.743342876434326, "learning_rate": 1.9987037798008718e-05, "loss": 2.1681, "step": 14093 }, { "epoch": 0.18, "grad_norm": 4.293964862823486, "learning_rate": 1.9987032448963354e-05, "loss": 2.0429, "step": 14094 }, { "epoch": 0.18, "grad_norm": 4.586354732513428, "learning_rate": 1.9987027098815247e-05, "loss": 2.6799, "step": 14095 }, { "epoch": 0.18, "grad_norm": 5.333573818206787, "learning_rate": 1.998702174756441e-05, "loss": 2.354, "step": 14096 }, { "epoch": 0.18, "grad_norm": 4.357401371002197, "learning_rate": 1.998701639521083e-05, "loss": 2.5349, "step": 14097 }, { "epoch": 0.18, "grad_norm": 4.775345325469971, "learning_rate": 1.9987011041754517e-05, "loss": 2.6988, "step": 14098 }, { "epoch": 0.18, "grad_norm": 4.871994495391846, "learning_rate": 1.9987005687195466e-05, "loss": 2.8786, "step": 14099 }, { "epoch": 0.18, "grad_norm": 3.6496403217315674, "learning_rate": 1.9987000331533683e-05, "loss": 1.8437, "step": 14100 }, { "epoch": 0.18, "grad_norm": 4.41222620010376, "learning_rate": 1.9986994974769165e-05, "loss": 2.4831, "step": 14101 }, { "epoch": 0.18, "grad_norm": 4.703390598297119, "learning_rate": 1.9986989616901912e-05, "loss": 2.4119, "step": 14102 }, { "epoch": 0.18, "grad_norm": 4.382725238800049, "learning_rate": 1.998698425793193e-05, "loss": 2.1664, "step": 14103 }, { "epoch": 0.18, "grad_norm": 4.699751853942871, "learning_rate": 1.998697889785921e-05, "loss": 1.8539, "step": 14104 }, { "epoch": 0.18, "grad_norm": 4.2026777267456055, "learning_rate": 1.998697353668376e-05, "loss": 2.0284, "step": 14105 }, { "epoch": 0.18, "grad_norm": 4.543116092681885, "learning_rate": 1.9986968174405584e-05, "loss": 2.0976, "step": 14106 }, { "epoch": 0.18, "grad_norm": 4.498281955718994, "learning_rate": 1.998696281102467e-05, "loss": 2.4583, "step": 14107 }, { "epoch": 0.18, "grad_norm": 4.095114231109619, "learning_rate": 1.998695744654103e-05, "loss": 2.0317, "step": 14108 }, { "epoch": 0.18, "grad_norm": 4.189915180206299, "learning_rate": 1.998695208095466e-05, "loss": 2.0435, "step": 14109 }, { "epoch": 0.18, "grad_norm": 4.030698776245117, "learning_rate": 1.9986946714265557e-05, "loss": 2.1628, "step": 14110 }, { "epoch": 0.18, "grad_norm": 4.646031379699707, "learning_rate": 1.998694134647373e-05, "loss": 2.5882, "step": 14111 }, { "epoch": 0.18, "grad_norm": 4.494781017303467, "learning_rate": 1.9986935977579175e-05, "loss": 2.2582, "step": 14112 }, { "epoch": 0.18, "grad_norm": 4.113405704498291, "learning_rate": 1.9986930607581892e-05, "loss": 2.0309, "step": 14113 }, { "epoch": 0.18, "grad_norm": 3.941164493560791, "learning_rate": 1.9986925236481883e-05, "loss": 2.073, "step": 14114 }, { "epoch": 0.18, "grad_norm": 4.6330742835998535, "learning_rate": 1.9986919864279147e-05, "loss": 2.4545, "step": 14115 }, { "epoch": 0.18, "grad_norm": 4.031282424926758, "learning_rate": 1.9986914490973688e-05, "loss": 1.9713, "step": 14116 }, { "epoch": 0.18, "grad_norm": 4.53983736038208, "learning_rate": 1.99869091165655e-05, "loss": 2.0713, "step": 14117 }, { "epoch": 0.18, "grad_norm": 4.518949508666992, "learning_rate": 1.9986903741054592e-05, "loss": 2.3791, "step": 14118 }, { "epoch": 0.18, "grad_norm": 4.682703018188477, "learning_rate": 1.9986898364440955e-05, "loss": 2.4333, "step": 14119 }, { "epoch": 0.18, "grad_norm": 3.3810646533966064, "learning_rate": 1.99868929867246e-05, "loss": 1.6786, "step": 14120 }, { "epoch": 0.18, "grad_norm": 4.412724018096924, "learning_rate": 1.9986887607905522e-05, "loss": 2.378, "step": 14121 }, { "epoch": 0.18, "grad_norm": 5.090981960296631, "learning_rate": 1.998688222798372e-05, "loss": 2.8312, "step": 14122 }, { "epoch": 0.18, "grad_norm": 4.169541358947754, "learning_rate": 1.99868768469592e-05, "loss": 1.8833, "step": 14123 }, { "epoch": 0.18, "grad_norm": 4.617278575897217, "learning_rate": 1.9986871464831956e-05, "loss": 2.2694, "step": 14124 }, { "epoch": 0.18, "grad_norm": 4.433929443359375, "learning_rate": 1.9986866081601996e-05, "loss": 1.9794, "step": 14125 }, { "epoch": 0.18, "grad_norm": 4.371583938598633, "learning_rate": 1.9986860697269313e-05, "loss": 2.137, "step": 14126 }, { "epoch": 0.18, "grad_norm": 4.091494560241699, "learning_rate": 1.998685531183391e-05, "loss": 2.1983, "step": 14127 }, { "epoch": 0.18, "grad_norm": 4.282941818237305, "learning_rate": 1.998684992529579e-05, "loss": 2.2711, "step": 14128 }, { "epoch": 0.18, "grad_norm": 4.266471862792969, "learning_rate": 1.9986844537654956e-05, "loss": 1.9966, "step": 14129 }, { "epoch": 0.18, "grad_norm": 4.703651428222656, "learning_rate": 1.99868391489114e-05, "loss": 2.2812, "step": 14130 }, { "epoch": 0.18, "grad_norm": 4.724820137023926, "learning_rate": 1.998683375906513e-05, "loss": 2.5265, "step": 14131 }, { "epoch": 0.18, "grad_norm": 3.936143636703491, "learning_rate": 1.9986828368116145e-05, "loss": 2.1086, "step": 14132 }, { "epoch": 0.18, "grad_norm": 5.37601375579834, "learning_rate": 1.998682297606444e-05, "loss": 2.1791, "step": 14133 }, { "epoch": 0.18, "grad_norm": 4.578895092010498, "learning_rate": 1.9986817582910025e-05, "loss": 2.2758, "step": 14134 }, { "epoch": 0.18, "grad_norm": 4.406754493713379, "learning_rate": 1.9986812188652895e-05, "loss": 2.4023, "step": 14135 }, { "epoch": 0.18, "grad_norm": 4.5024800300598145, "learning_rate": 1.9986806793293053e-05, "loss": 2.3794, "step": 14136 }, { "epoch": 0.18, "grad_norm": 4.379785060882568, "learning_rate": 1.9986801396830497e-05, "loss": 2.4553, "step": 14137 }, { "epoch": 0.18, "grad_norm": 4.3287553787231445, "learning_rate": 1.9986795999265225e-05, "loss": 2.4558, "step": 14138 }, { "epoch": 0.18, "grad_norm": 3.9972126483917236, "learning_rate": 1.9986790600597245e-05, "loss": 1.837, "step": 14139 }, { "epoch": 0.18, "grad_norm": 4.034024238586426, "learning_rate": 1.9986785200826554e-05, "loss": 2.1403, "step": 14140 }, { "epoch": 0.18, "grad_norm": 4.986600875854492, "learning_rate": 1.998677979995315e-05, "loss": 2.6229, "step": 14141 }, { "epoch": 0.18, "grad_norm": 4.838857173919678, "learning_rate": 1.9986774397977036e-05, "loss": 2.6122, "step": 14142 }, { "epoch": 0.18, "grad_norm": 4.602822780609131, "learning_rate": 1.9986768994898217e-05, "loss": 1.9146, "step": 14143 }, { "epoch": 0.18, "grad_norm": 4.150525093078613, "learning_rate": 1.9986763590716687e-05, "loss": 2.0041, "step": 14144 }, { "epoch": 0.18, "grad_norm": 4.474818706512451, "learning_rate": 1.9986758185432448e-05, "loss": 2.2158, "step": 14145 }, { "epoch": 0.18, "grad_norm": 4.610186576843262, "learning_rate": 1.99867527790455e-05, "loss": 2.3585, "step": 14146 }, { "epoch": 0.18, "grad_norm": 4.406601905822754, "learning_rate": 1.998674737155585e-05, "loss": 2.2539, "step": 14147 }, { "epoch": 0.18, "grad_norm": 4.279137134552002, "learning_rate": 1.998674196296349e-05, "loss": 2.1823, "step": 14148 }, { "epoch": 0.18, "grad_norm": 4.8763275146484375, "learning_rate": 1.9986736553268424e-05, "loss": 2.4645, "step": 14149 }, { "epoch": 0.18, "grad_norm": 4.475849151611328, "learning_rate": 1.9986731142470655e-05, "loss": 1.9795, "step": 14150 }, { "epoch": 0.18, "grad_norm": 3.88700008392334, "learning_rate": 1.998672573057018e-05, "loss": 2.0186, "step": 14151 }, { "epoch": 0.18, "grad_norm": 4.040409564971924, "learning_rate": 1.9986720317567e-05, "loss": 1.8036, "step": 14152 }, { "epoch": 0.18, "grad_norm": 4.17612886428833, "learning_rate": 1.9986714903461118e-05, "loss": 1.9689, "step": 14153 }, { "epoch": 0.18, "grad_norm": 5.12786340713501, "learning_rate": 1.9986709488252533e-05, "loss": 2.5722, "step": 14154 }, { "epoch": 0.18, "grad_norm": 4.611968517303467, "learning_rate": 1.9986704071941246e-05, "loss": 2.5205, "step": 14155 }, { "epoch": 0.18, "grad_norm": 3.623688220977783, "learning_rate": 1.9986698654527255e-05, "loss": 1.3813, "step": 14156 }, { "epoch": 0.18, "grad_norm": 4.696049213409424, "learning_rate": 1.998669323601057e-05, "loss": 1.9584, "step": 14157 }, { "epoch": 0.18, "grad_norm": 5.283667087554932, "learning_rate": 1.998668781639118e-05, "loss": 2.8718, "step": 14158 }, { "epoch": 0.18, "grad_norm": 4.1646318435668945, "learning_rate": 1.998668239566909e-05, "loss": 2.0126, "step": 14159 }, { "epoch": 0.18, "grad_norm": 4.367808818817139, "learning_rate": 1.99866769738443e-05, "loss": 2.0585, "step": 14160 }, { "epoch": 0.18, "grad_norm": 3.682263135910034, "learning_rate": 1.9986671550916812e-05, "loss": 1.7051, "step": 14161 }, { "epoch": 0.18, "grad_norm": 5.100513458251953, "learning_rate": 1.9986666126886626e-05, "loss": 2.064, "step": 14162 }, { "epoch": 0.18, "grad_norm": 4.174334526062012, "learning_rate": 1.9986660701753746e-05, "loss": 2.3459, "step": 14163 }, { "epoch": 0.18, "grad_norm": 4.189294338226318, "learning_rate": 1.9986655275518164e-05, "loss": 1.8763, "step": 14164 }, { "epoch": 0.18, "grad_norm": 4.020230293273926, "learning_rate": 1.9986649848179888e-05, "loss": 2.0337, "step": 14165 }, { "epoch": 0.18, "grad_norm": 4.459935188293457, "learning_rate": 1.998664441973892e-05, "loss": 2.4115, "step": 14166 }, { "epoch": 0.18, "grad_norm": 4.4282660484313965, "learning_rate": 1.9986638990195254e-05, "loss": 2.4122, "step": 14167 }, { "epoch": 0.18, "grad_norm": 5.269876480102539, "learning_rate": 1.998663355954889e-05, "loss": 2.3468, "step": 14168 }, { "epoch": 0.18, "grad_norm": 4.444557189941406, "learning_rate": 1.998662812779984e-05, "loss": 2.1883, "step": 14169 }, { "epoch": 0.18, "grad_norm": 5.166380405426025, "learning_rate": 1.998662269494809e-05, "loss": 2.3329, "step": 14170 }, { "epoch": 0.18, "grad_norm": 4.173636436462402, "learning_rate": 1.998661726099365e-05, "loss": 2.2914, "step": 14171 }, { "epoch": 0.18, "grad_norm": 4.207357406616211, "learning_rate": 1.9986611825936517e-05, "loss": 2.2233, "step": 14172 }, { "epoch": 0.18, "grad_norm": 4.1016645431518555, "learning_rate": 1.9986606389776694e-05, "loss": 2.1376, "step": 14173 }, { "epoch": 0.18, "grad_norm": 4.416130065917969, "learning_rate": 1.998660095251418e-05, "loss": 2.2429, "step": 14174 }, { "epoch": 0.18, "grad_norm": 4.333789825439453, "learning_rate": 1.9986595514148978e-05, "loss": 2.0447, "step": 14175 }, { "epoch": 0.18, "grad_norm": 4.7691473960876465, "learning_rate": 1.998659007468108e-05, "loss": 2.6291, "step": 14176 }, { "epoch": 0.18, "grad_norm": 4.861827850341797, "learning_rate": 1.99865846341105e-05, "loss": 2.3224, "step": 14177 }, { "epoch": 0.18, "grad_norm": 4.300387859344482, "learning_rate": 1.9986579192437228e-05, "loss": 1.8812, "step": 14178 }, { "epoch": 0.18, "grad_norm": 5.2248334884643555, "learning_rate": 1.9986573749661267e-05, "loss": 2.3821, "step": 14179 }, { "epoch": 0.18, "grad_norm": 4.9822468757629395, "learning_rate": 1.9986568305782623e-05, "loss": 2.7609, "step": 14180 }, { "epoch": 0.18, "grad_norm": 4.578269958496094, "learning_rate": 1.998656286080129e-05, "loss": 2.4912, "step": 14181 }, { "epoch": 0.18, "grad_norm": 3.841707229614258, "learning_rate": 1.998655741471727e-05, "loss": 1.7456, "step": 14182 }, { "epoch": 0.18, "grad_norm": 4.581357002258301, "learning_rate": 1.998655196753057e-05, "loss": 2.5222, "step": 14183 }, { "epoch": 0.18, "grad_norm": 4.8274641036987305, "learning_rate": 1.998654651924118e-05, "loss": 2.9288, "step": 14184 }, { "epoch": 0.18, "grad_norm": 4.3236894607543945, "learning_rate": 1.9986541069849106e-05, "loss": 2.2554, "step": 14185 }, { "epoch": 0.18, "grad_norm": 4.082208633422852, "learning_rate": 1.998653561935435e-05, "loss": 1.9777, "step": 14186 }, { "epoch": 0.18, "grad_norm": 4.802998065948486, "learning_rate": 1.998653016775691e-05, "loss": 2.1595, "step": 14187 }, { "epoch": 0.18, "grad_norm": 4.629563331604004, "learning_rate": 1.998652471505679e-05, "loss": 2.7086, "step": 14188 }, { "epoch": 0.18, "grad_norm": 4.159627437591553, "learning_rate": 1.9986519261253985e-05, "loss": 2.0217, "step": 14189 }, { "epoch": 0.18, "grad_norm": 4.69852352142334, "learning_rate": 1.99865138063485e-05, "loss": 1.886, "step": 14190 }, { "epoch": 0.18, "grad_norm": 4.388012409210205, "learning_rate": 1.9986508350340336e-05, "loss": 2.3765, "step": 14191 }, { "epoch": 0.18, "grad_norm": 4.2595930099487305, "learning_rate": 1.998650289322949e-05, "loss": 2.249, "step": 14192 }, { "epoch": 0.18, "grad_norm": 4.5443243980407715, "learning_rate": 1.9986497435015963e-05, "loss": 2.0869, "step": 14193 }, { "epoch": 0.18, "grad_norm": 4.850910186767578, "learning_rate": 1.9986491975699764e-05, "loss": 2.3751, "step": 14194 }, { "epoch": 0.18, "grad_norm": 3.9043054580688477, "learning_rate": 1.998648651528088e-05, "loss": 2.0973, "step": 14195 }, { "epoch": 0.18, "grad_norm": 4.630000114440918, "learning_rate": 1.998648105375932e-05, "loss": 2.4415, "step": 14196 }, { "epoch": 0.18, "grad_norm": 4.687661170959473, "learning_rate": 1.9986475591135086e-05, "loss": 2.0774, "step": 14197 }, { "epoch": 0.18, "grad_norm": 4.607484817504883, "learning_rate": 1.9986470127408172e-05, "loss": 2.6611, "step": 14198 }, { "epoch": 0.18, "grad_norm": 4.1916327476501465, "learning_rate": 1.9986464662578588e-05, "loss": 2.1154, "step": 14199 }, { "epoch": 0.18, "grad_norm": 4.699650287628174, "learning_rate": 1.9986459196646322e-05, "loss": 2.0012, "step": 14200 }, { "epoch": 0.18, "grad_norm": 3.9347665309906006, "learning_rate": 1.9986453729611383e-05, "loss": 1.728, "step": 14201 }, { "epoch": 0.18, "grad_norm": 4.042327404022217, "learning_rate": 1.9986448261473775e-05, "loss": 2.1156, "step": 14202 }, { "epoch": 0.18, "grad_norm": 3.9415628910064697, "learning_rate": 1.9986442792233487e-05, "loss": 1.7744, "step": 14203 }, { "epoch": 0.18, "grad_norm": 4.2935404777526855, "learning_rate": 1.998643732189053e-05, "loss": 2.0892, "step": 14204 }, { "epoch": 0.18, "grad_norm": 4.477507591247559, "learning_rate": 1.99864318504449e-05, "loss": 2.0654, "step": 14205 }, { "epoch": 0.18, "grad_norm": 4.389122009277344, "learning_rate": 1.99864263778966e-05, "loss": 2.1114, "step": 14206 }, { "epoch": 0.18, "grad_norm": 3.967388391494751, "learning_rate": 1.998642090424563e-05, "loss": 2.0158, "step": 14207 }, { "epoch": 0.18, "grad_norm": 4.547857761383057, "learning_rate": 1.9986415429491986e-05, "loss": 2.1841, "step": 14208 }, { "epoch": 0.18, "grad_norm": 4.67773962020874, "learning_rate": 1.9986409953635673e-05, "loss": 2.0542, "step": 14209 }, { "epoch": 0.18, "grad_norm": 4.076555252075195, "learning_rate": 1.998640447667669e-05, "loss": 2.0662, "step": 14210 }, { "epoch": 0.18, "grad_norm": 4.5322418212890625, "learning_rate": 1.9986398998615043e-05, "loss": 1.9675, "step": 14211 }, { "epoch": 0.18, "grad_norm": 4.297439098358154, "learning_rate": 1.9986393519450727e-05, "loss": 2.138, "step": 14212 }, { "epoch": 0.18, "grad_norm": 4.836276054382324, "learning_rate": 1.998638803918374e-05, "loss": 2.2856, "step": 14213 }, { "epoch": 0.18, "grad_norm": 4.298810958862305, "learning_rate": 1.998638255781409e-05, "loss": 2.0508, "step": 14214 }, { "epoch": 0.18, "grad_norm": 4.300309658050537, "learning_rate": 1.998637707534177e-05, "loss": 2.3221, "step": 14215 }, { "epoch": 0.18, "grad_norm": 5.151665210723877, "learning_rate": 1.9986371591766788e-05, "loss": 2.6098, "step": 14216 }, { "epoch": 0.18, "grad_norm": 4.527304172515869, "learning_rate": 1.998636610708914e-05, "loss": 2.5092, "step": 14217 }, { "epoch": 0.18, "grad_norm": 4.708105087280273, "learning_rate": 1.998636062130883e-05, "loss": 2.4169, "step": 14218 }, { "epoch": 0.18, "grad_norm": 4.032897472381592, "learning_rate": 1.9986355134425852e-05, "loss": 2.0308, "step": 14219 }, { "epoch": 0.18, "grad_norm": 4.4152021408081055, "learning_rate": 1.9986349646440213e-05, "loss": 2.1449, "step": 14220 }, { "epoch": 0.18, "grad_norm": 4.039608955383301, "learning_rate": 1.9986344157351914e-05, "loss": 1.9362, "step": 14221 }, { "epoch": 0.18, "grad_norm": 3.939809799194336, "learning_rate": 1.998633866716095e-05, "loss": 2.264, "step": 14222 }, { "epoch": 0.18, "grad_norm": 3.942058563232422, "learning_rate": 1.998633317586733e-05, "loss": 1.604, "step": 14223 }, { "epoch": 0.18, "grad_norm": 4.516855239868164, "learning_rate": 1.998632768347104e-05, "loss": 2.2597, "step": 14224 }, { "epoch": 0.18, "grad_norm": 4.54718542098999, "learning_rate": 1.9986322189972098e-05, "loss": 2.3461, "step": 14225 }, { "epoch": 0.18, "grad_norm": 5.236942768096924, "learning_rate": 1.9986316695370494e-05, "loss": 2.0873, "step": 14226 }, { "epoch": 0.18, "grad_norm": 5.060459613800049, "learning_rate": 1.998631119966623e-05, "loss": 2.5003, "step": 14227 }, { "epoch": 0.18, "grad_norm": 4.553661823272705, "learning_rate": 1.998630570285931e-05, "loss": 2.3057, "step": 14228 }, { "epoch": 0.18, "grad_norm": 4.49742317199707, "learning_rate": 1.998630020494973e-05, "loss": 2.2897, "step": 14229 }, { "epoch": 0.18, "grad_norm": 4.116176128387451, "learning_rate": 1.9986294705937498e-05, "loss": 2.0785, "step": 14230 }, { "epoch": 0.18, "grad_norm": 5.0050883293151855, "learning_rate": 1.9986289205822605e-05, "loss": 2.8097, "step": 14231 }, { "epoch": 0.18, "grad_norm": 4.220829963684082, "learning_rate": 1.9986283704605057e-05, "loss": 2.1436, "step": 14232 }, { "epoch": 0.18, "grad_norm": 5.080482482910156, "learning_rate": 1.9986278202284853e-05, "loss": 2.53, "step": 14233 }, { "epoch": 0.18, "grad_norm": 5.4355692863464355, "learning_rate": 1.9986272698861998e-05, "loss": 2.8332, "step": 14234 }, { "epoch": 0.18, "grad_norm": 4.622243881225586, "learning_rate": 1.9986267194336485e-05, "loss": 2.0039, "step": 14235 }, { "epoch": 0.18, "grad_norm": 3.7386796474456787, "learning_rate": 1.998626168870832e-05, "loss": 1.902, "step": 14236 }, { "epoch": 0.18, "grad_norm": 4.463401794433594, "learning_rate": 1.9986256181977502e-05, "loss": 2.0288, "step": 14237 }, { "epoch": 0.18, "grad_norm": 4.371326923370361, "learning_rate": 1.998625067414403e-05, "loss": 2.2437, "step": 14238 }, { "epoch": 0.18, "grad_norm": 4.289981365203857, "learning_rate": 1.9986245165207913e-05, "loss": 2.3642, "step": 14239 }, { "epoch": 0.18, "grad_norm": 5.063274383544922, "learning_rate": 1.998623965516914e-05, "loss": 2.3657, "step": 14240 }, { "epoch": 0.18, "grad_norm": 4.750138759613037, "learning_rate": 1.998623414402772e-05, "loss": 2.3122, "step": 14241 }, { "epoch": 0.18, "grad_norm": 4.511466979980469, "learning_rate": 1.9986228631783646e-05, "loss": 2.5255, "step": 14242 }, { "epoch": 0.18, "grad_norm": 4.0734734535217285, "learning_rate": 1.9986223118436925e-05, "loss": 1.9491, "step": 14243 }, { "epoch": 0.18, "grad_norm": 4.46796989440918, "learning_rate": 1.9986217603987558e-05, "loss": 2.0894, "step": 14244 }, { "epoch": 0.18, "grad_norm": 4.452682971954346, "learning_rate": 1.998621208843554e-05, "loss": 2.3314, "step": 14245 }, { "epoch": 0.18, "grad_norm": 5.506025791168213, "learning_rate": 1.9986206571780875e-05, "loss": 2.1949, "step": 14246 }, { "epoch": 0.18, "grad_norm": 4.700343132019043, "learning_rate": 1.9986201054023563e-05, "loss": 1.9915, "step": 14247 }, { "epoch": 0.18, "grad_norm": 5.066288948059082, "learning_rate": 1.9986195535163607e-05, "loss": 2.162, "step": 14248 }, { "epoch": 0.18, "grad_norm": 4.726134300231934, "learning_rate": 1.9986190015201004e-05, "loss": 2.065, "step": 14249 }, { "epoch": 0.18, "grad_norm": 4.403445720672607, "learning_rate": 1.998618449413576e-05, "loss": 1.8627, "step": 14250 }, { "epoch": 0.18, "grad_norm": 3.9683492183685303, "learning_rate": 1.9986178971967866e-05, "loss": 1.6309, "step": 14251 }, { "epoch": 0.18, "grad_norm": 4.20247745513916, "learning_rate": 1.998617344869733e-05, "loss": 2.0927, "step": 14252 }, { "epoch": 0.18, "grad_norm": 4.160782337188721, "learning_rate": 1.9986167924324153e-05, "loss": 2.4048, "step": 14253 }, { "epoch": 0.18, "grad_norm": 4.48523473739624, "learning_rate": 1.998616239884833e-05, "loss": 1.8931, "step": 14254 }, { "epoch": 0.18, "grad_norm": 4.460635185241699, "learning_rate": 1.9986156872269872e-05, "loss": 2.398, "step": 14255 }, { "epoch": 0.19, "grad_norm": 4.212780475616455, "learning_rate": 1.998615134458877e-05, "loss": 2.4532, "step": 14256 }, { "epoch": 0.19, "grad_norm": 4.716455936431885, "learning_rate": 1.9986145815805026e-05, "loss": 2.1858, "step": 14257 }, { "epoch": 0.19, "grad_norm": 4.408437728881836, "learning_rate": 1.998614028591864e-05, "loss": 2.1427, "step": 14258 }, { "epoch": 0.19, "grad_norm": 3.947179079055786, "learning_rate": 1.9986134754929618e-05, "loss": 2.1049, "step": 14259 }, { "epoch": 0.19, "grad_norm": 4.657158374786377, "learning_rate": 1.9986129222837957e-05, "loss": 2.1746, "step": 14260 }, { "epoch": 0.19, "grad_norm": 3.9495880603790283, "learning_rate": 1.9986123689643656e-05, "loss": 1.6534, "step": 14261 }, { "epoch": 0.19, "grad_norm": 4.585160255432129, "learning_rate": 1.9986118155346718e-05, "loss": 2.1804, "step": 14262 }, { "epoch": 0.19, "grad_norm": 3.998253345489502, "learning_rate": 1.9986112619947146e-05, "loss": 2.153, "step": 14263 }, { "epoch": 0.19, "grad_norm": 4.998622417449951, "learning_rate": 1.9986107083444937e-05, "loss": 2.8907, "step": 14264 }, { "epoch": 0.19, "grad_norm": 5.00671911239624, "learning_rate": 1.998610154584009e-05, "loss": 3.1497, "step": 14265 }, { "epoch": 0.19, "grad_norm": 3.742821216583252, "learning_rate": 1.9986096007132608e-05, "loss": 1.6343, "step": 14266 }, { "epoch": 0.19, "grad_norm": 4.019918918609619, "learning_rate": 1.9986090467322494e-05, "loss": 1.9329, "step": 14267 }, { "epoch": 0.19, "grad_norm": 4.2533159255981445, "learning_rate": 1.9986084926409744e-05, "loss": 2.1671, "step": 14268 }, { "epoch": 0.19, "grad_norm": 3.7056901454925537, "learning_rate": 1.9986079384394364e-05, "loss": 1.9573, "step": 14269 }, { "epoch": 0.19, "grad_norm": 4.465987205505371, "learning_rate": 1.9986073841276346e-05, "loss": 2.0983, "step": 14270 }, { "epoch": 0.19, "grad_norm": 4.315191268920898, "learning_rate": 1.9986068297055702e-05, "loss": 2.093, "step": 14271 }, { "epoch": 0.19, "grad_norm": 4.278530597686768, "learning_rate": 1.998606275173242e-05, "loss": 2.2059, "step": 14272 }, { "epoch": 0.19, "grad_norm": 4.426839351654053, "learning_rate": 1.9986057205306513e-05, "loss": 2.0166, "step": 14273 }, { "epoch": 0.19, "grad_norm": 4.996886253356934, "learning_rate": 1.9986051657777975e-05, "loss": 2.4203, "step": 14274 }, { "epoch": 0.19, "grad_norm": 4.647979259490967, "learning_rate": 1.9986046109146806e-05, "loss": 2.2281, "step": 14275 }, { "epoch": 0.19, "grad_norm": 4.931005954742432, "learning_rate": 1.998604055941301e-05, "loss": 2.7334, "step": 14276 }, { "epoch": 0.19, "grad_norm": 4.5817975997924805, "learning_rate": 1.9986035008576582e-05, "loss": 2.417, "step": 14277 }, { "epoch": 0.19, "grad_norm": 4.541940212249756, "learning_rate": 1.998602945663753e-05, "loss": 2.4193, "step": 14278 }, { "epoch": 0.19, "grad_norm": 3.7508249282836914, "learning_rate": 1.9986023903595847e-05, "loss": 1.5768, "step": 14279 }, { "epoch": 0.19, "grad_norm": 3.961724281311035, "learning_rate": 1.998601834945154e-05, "loss": 2.0067, "step": 14280 }, { "epoch": 0.19, "grad_norm": 4.13129997253418, "learning_rate": 1.998601279420461e-05, "loss": 1.7412, "step": 14281 }, { "epoch": 0.19, "grad_norm": 4.03481388092041, "learning_rate": 1.998600723785505e-05, "loss": 1.9628, "step": 14282 }, { "epoch": 0.19, "grad_norm": 4.060113430023193, "learning_rate": 1.9986001680402868e-05, "loss": 1.9612, "step": 14283 }, { "epoch": 0.19, "grad_norm": 3.548337697982788, "learning_rate": 1.998599612184806e-05, "loss": 1.6579, "step": 14284 }, { "epoch": 0.19, "grad_norm": 4.636675834655762, "learning_rate": 1.998599056219063e-05, "loss": 2.207, "step": 14285 }, { "epoch": 0.19, "grad_norm": 4.813174247741699, "learning_rate": 1.9985985001430578e-05, "loss": 2.3837, "step": 14286 }, { "epoch": 0.19, "grad_norm": 4.6820478439331055, "learning_rate": 1.9985979439567904e-05, "loss": 2.5433, "step": 14287 }, { "epoch": 0.19, "grad_norm": 4.659643173217773, "learning_rate": 1.9985973876602607e-05, "loss": 2.932, "step": 14288 }, { "epoch": 0.19, "grad_norm": 4.718502044677734, "learning_rate": 1.998596831253469e-05, "loss": 2.4175, "step": 14289 }, { "epoch": 0.19, "grad_norm": 4.703545570373535, "learning_rate": 1.998596274736415e-05, "loss": 2.3015, "step": 14290 }, { "epoch": 0.19, "grad_norm": 4.339473724365234, "learning_rate": 1.9985957181090993e-05, "loss": 2.3285, "step": 14291 }, { "epoch": 0.19, "grad_norm": 4.6960368156433105, "learning_rate": 1.998595161371522e-05, "loss": 2.4789, "step": 14292 }, { "epoch": 0.19, "grad_norm": 5.007978916168213, "learning_rate": 1.998594604523682e-05, "loss": 2.4624, "step": 14293 }, { "epoch": 0.19, "grad_norm": 4.492223262786865, "learning_rate": 1.998594047565581e-05, "loss": 2.2683, "step": 14294 }, { "epoch": 0.19, "grad_norm": 4.054882049560547, "learning_rate": 1.9985934904972177e-05, "loss": 2.1149, "step": 14295 }, { "epoch": 0.19, "grad_norm": 4.733531475067139, "learning_rate": 1.9985929333185933e-05, "loss": 2.5902, "step": 14296 }, { "epoch": 0.19, "grad_norm": 5.485479831695557, "learning_rate": 1.998592376029707e-05, "loss": 2.5595, "step": 14297 }, { "epoch": 0.19, "grad_norm": 4.21347713470459, "learning_rate": 1.9985918186305592e-05, "loss": 1.9474, "step": 14298 }, { "epoch": 0.19, "grad_norm": 4.066742420196533, "learning_rate": 1.99859126112115e-05, "loss": 2.1963, "step": 14299 }, { "epoch": 0.19, "grad_norm": 3.845738172531128, "learning_rate": 1.998590703501479e-05, "loss": 1.8992, "step": 14300 }, { "epoch": 0.19, "grad_norm": 4.581493854522705, "learning_rate": 1.998590145771547e-05, "loss": 2.6323, "step": 14301 }, { "epoch": 0.19, "grad_norm": 4.906214237213135, "learning_rate": 1.9985895879313536e-05, "loss": 2.024, "step": 14302 }, { "epoch": 0.19, "grad_norm": 4.4977617263793945, "learning_rate": 1.9985890299808993e-05, "loss": 1.8933, "step": 14303 }, { "epoch": 0.19, "grad_norm": 4.209259986877441, "learning_rate": 1.9985884719201832e-05, "loss": 1.8765, "step": 14304 }, { "epoch": 0.19, "grad_norm": 4.0913214683532715, "learning_rate": 1.9985879137492062e-05, "loss": 1.9894, "step": 14305 }, { "epoch": 0.19, "grad_norm": 4.656472206115723, "learning_rate": 1.9985873554679685e-05, "loss": 2.6931, "step": 14306 }, { "epoch": 0.19, "grad_norm": 4.340729236602783, "learning_rate": 1.9985867970764695e-05, "loss": 2.1378, "step": 14307 }, { "epoch": 0.19, "grad_norm": 4.606222152709961, "learning_rate": 1.99858623857471e-05, "loss": 2.067, "step": 14308 }, { "epoch": 0.19, "grad_norm": 4.770516395568848, "learning_rate": 1.998585679962689e-05, "loss": 2.3255, "step": 14309 }, { "epoch": 0.19, "grad_norm": 4.171594142913818, "learning_rate": 1.9985851212404076e-05, "loss": 1.9302, "step": 14310 }, { "epoch": 0.19, "grad_norm": 4.133462905883789, "learning_rate": 1.9985845624078653e-05, "loss": 2.2926, "step": 14311 }, { "epoch": 0.19, "grad_norm": 4.113479137420654, "learning_rate": 1.9985840034650624e-05, "loss": 2.2761, "step": 14312 }, { "epoch": 0.19, "grad_norm": 4.283117771148682, "learning_rate": 1.9985834444119988e-05, "loss": 2.3021, "step": 14313 }, { "epoch": 0.19, "grad_norm": 4.6827778816223145, "learning_rate": 1.998582885248675e-05, "loss": 2.0744, "step": 14314 }, { "epoch": 0.19, "grad_norm": 4.384220123291016, "learning_rate": 1.9985823259750904e-05, "loss": 2.0038, "step": 14315 }, { "epoch": 0.19, "grad_norm": 4.732237339019775, "learning_rate": 1.9985817665912452e-05, "loss": 2.2929, "step": 14316 }, { "epoch": 0.19, "grad_norm": 4.36037540435791, "learning_rate": 1.9985812070971397e-05, "loss": 2.2979, "step": 14317 }, { "epoch": 0.19, "grad_norm": 4.49339485168457, "learning_rate": 1.9985806474927742e-05, "loss": 2.3765, "step": 14318 }, { "epoch": 0.19, "grad_norm": 4.269418239593506, "learning_rate": 1.9985800877781484e-05, "loss": 1.846, "step": 14319 }, { "epoch": 0.19, "grad_norm": 4.141536235809326, "learning_rate": 1.998579527953262e-05, "loss": 2.1924, "step": 14320 }, { "epoch": 0.19, "grad_norm": 5.015845775604248, "learning_rate": 1.998578968018116e-05, "loss": 2.5956, "step": 14321 }, { "epoch": 0.19, "grad_norm": 4.558530807495117, "learning_rate": 1.99857840797271e-05, "loss": 2.4915, "step": 14322 }, { "epoch": 0.19, "grad_norm": 4.274542808532715, "learning_rate": 1.9985778478170435e-05, "loss": 2.2525, "step": 14323 }, { "epoch": 0.19, "grad_norm": 3.6094114780426025, "learning_rate": 1.9985772875511175e-05, "loss": 1.8215, "step": 14324 }, { "epoch": 0.19, "grad_norm": 4.063408851623535, "learning_rate": 1.998576727174931e-05, "loss": 2.1798, "step": 14325 }, { "epoch": 0.19, "grad_norm": 4.435074806213379, "learning_rate": 1.9985761666884853e-05, "loss": 2.3114, "step": 14326 }, { "epoch": 0.19, "grad_norm": 3.7471225261688232, "learning_rate": 1.9985756060917797e-05, "loss": 1.8485, "step": 14327 }, { "epoch": 0.19, "grad_norm": 4.8740234375, "learning_rate": 1.998575045384814e-05, "loss": 2.7205, "step": 14328 }, { "epoch": 0.19, "grad_norm": 4.56201696395874, "learning_rate": 1.9985744845675893e-05, "loss": 2.1978, "step": 14329 }, { "epoch": 0.19, "grad_norm": 4.590728282928467, "learning_rate": 1.998573923640105e-05, "loss": 2.2172, "step": 14330 }, { "epoch": 0.19, "grad_norm": 4.798819065093994, "learning_rate": 1.9985733626023607e-05, "loss": 2.2243, "step": 14331 }, { "epoch": 0.19, "grad_norm": 4.587538719177246, "learning_rate": 1.9985728014543573e-05, "loss": 2.2182, "step": 14332 }, { "epoch": 0.19, "grad_norm": 4.427412509918213, "learning_rate": 1.9985722401960946e-05, "loss": 2.1121, "step": 14333 }, { "epoch": 0.19, "grad_norm": 3.925260305404663, "learning_rate": 1.9985716788275723e-05, "loss": 2.0134, "step": 14334 }, { "epoch": 0.19, "grad_norm": 4.062974452972412, "learning_rate": 1.998571117348791e-05, "loss": 2.0715, "step": 14335 }, { "epoch": 0.19, "grad_norm": 4.540980815887451, "learning_rate": 1.9985705557597504e-05, "loss": 2.2004, "step": 14336 }, { "epoch": 0.19, "grad_norm": 5.5835137367248535, "learning_rate": 1.9985699940604502e-05, "loss": 2.7554, "step": 14337 }, { "epoch": 0.19, "grad_norm": 5.179768085479736, "learning_rate": 1.9985694322508914e-05, "loss": 2.7798, "step": 14338 }, { "epoch": 0.19, "grad_norm": 4.309642791748047, "learning_rate": 1.9985688703310736e-05, "loss": 2.4341, "step": 14339 }, { "epoch": 0.19, "grad_norm": 3.619776487350464, "learning_rate": 1.9985683083009972e-05, "loss": 1.4039, "step": 14340 }, { "epoch": 0.19, "grad_norm": 4.526325702667236, "learning_rate": 1.9985677461606615e-05, "loss": 2.0851, "step": 14341 }, { "epoch": 0.19, "grad_norm": 4.209808349609375, "learning_rate": 1.9985671839100668e-05, "loss": 1.8151, "step": 14342 }, { "epoch": 0.19, "grad_norm": 4.378542900085449, "learning_rate": 1.998566621549214e-05, "loss": 1.8415, "step": 14343 }, { "epoch": 0.19, "grad_norm": 5.098639488220215, "learning_rate": 1.9985660590781016e-05, "loss": 2.4843, "step": 14344 }, { "epoch": 0.19, "grad_norm": 3.6863455772399902, "learning_rate": 1.998565496496731e-05, "loss": 1.9562, "step": 14345 }, { "epoch": 0.19, "grad_norm": 4.667219638824463, "learning_rate": 1.998564933805102e-05, "loss": 2.2888, "step": 14346 }, { "epoch": 0.19, "grad_norm": 4.302071571350098, "learning_rate": 1.9985643710032146e-05, "loss": 1.5279, "step": 14347 }, { "epoch": 0.19, "grad_norm": 4.99605131149292, "learning_rate": 1.9985638080910682e-05, "loss": 2.2103, "step": 14348 }, { "epoch": 0.19, "grad_norm": 3.8756089210510254, "learning_rate": 1.998563245068664e-05, "loss": 2.3647, "step": 14349 }, { "epoch": 0.19, "grad_norm": 4.567441940307617, "learning_rate": 1.998562681936001e-05, "loss": 2.2277, "step": 14350 }, { "epoch": 0.19, "grad_norm": 4.651425838470459, "learning_rate": 1.99856211869308e-05, "loss": 2.2543, "step": 14351 }, { "epoch": 0.19, "grad_norm": 4.026160717010498, "learning_rate": 1.998561555339901e-05, "loss": 2.0351, "step": 14352 }, { "epoch": 0.19, "grad_norm": 4.295382976531982, "learning_rate": 1.9985609918764636e-05, "loss": 2.3054, "step": 14353 }, { "epoch": 0.19, "grad_norm": 4.772730827331543, "learning_rate": 1.998560428302768e-05, "loss": 2.5272, "step": 14354 }, { "epoch": 0.19, "grad_norm": 5.2172064781188965, "learning_rate": 1.9985598646188146e-05, "loss": 2.565, "step": 14355 }, { "epoch": 0.19, "grad_norm": 4.3691630363464355, "learning_rate": 1.998559300824603e-05, "loss": 2.4265, "step": 14356 }, { "epoch": 0.19, "grad_norm": 4.768436431884766, "learning_rate": 1.9985587369201338e-05, "loss": 2.3027, "step": 14357 }, { "epoch": 0.19, "grad_norm": 4.375308990478516, "learning_rate": 1.9985581729054067e-05, "loss": 2.3366, "step": 14358 }, { "epoch": 0.19, "grad_norm": 4.795823097229004, "learning_rate": 1.9985576087804216e-05, "loss": 2.8495, "step": 14359 }, { "epoch": 0.19, "grad_norm": 4.0401225090026855, "learning_rate": 1.9985570445451793e-05, "loss": 2.1107, "step": 14360 }, { "epoch": 0.19, "grad_norm": 3.9922406673431396, "learning_rate": 1.998556480199679e-05, "loss": 1.8571, "step": 14361 }, { "epoch": 0.19, "grad_norm": 4.971881866455078, "learning_rate": 1.998555915743921e-05, "loss": 2.5937, "step": 14362 }, { "epoch": 0.19, "grad_norm": 4.390839576721191, "learning_rate": 1.9985553511779057e-05, "loss": 2.4713, "step": 14363 }, { "epoch": 0.19, "grad_norm": 4.838074684143066, "learning_rate": 1.998554786501633e-05, "loss": 2.4886, "step": 14364 }, { "epoch": 0.19, "grad_norm": 5.394981384277344, "learning_rate": 1.998554221715103e-05, "loss": 2.3268, "step": 14365 }, { "epoch": 0.19, "grad_norm": 3.9414279460906982, "learning_rate": 1.9985536568183154e-05, "loss": 1.6336, "step": 14366 }, { "epoch": 0.19, "grad_norm": 4.729243755340576, "learning_rate": 1.9985530918112705e-05, "loss": 2.3323, "step": 14367 }, { "epoch": 0.19, "grad_norm": 5.337800025939941, "learning_rate": 1.9985525266939688e-05, "loss": 2.7745, "step": 14368 }, { "epoch": 0.19, "grad_norm": 4.866057395935059, "learning_rate": 1.9985519614664094e-05, "loss": 2.3748, "step": 14369 }, { "epoch": 0.19, "grad_norm": 5.086320877075195, "learning_rate": 1.9985513961285934e-05, "loss": 2.4067, "step": 14370 }, { "epoch": 0.19, "grad_norm": 4.686248302459717, "learning_rate": 1.9985508306805202e-05, "loss": 1.9099, "step": 14371 }, { "epoch": 0.19, "grad_norm": 4.841989517211914, "learning_rate": 1.99855026512219e-05, "loss": 2.4811, "step": 14372 }, { "epoch": 0.19, "grad_norm": 4.418820381164551, "learning_rate": 1.998549699453603e-05, "loss": 1.7449, "step": 14373 }, { "epoch": 0.19, "grad_norm": 4.396607875823975, "learning_rate": 1.998549133674759e-05, "loss": 2.6266, "step": 14374 }, { "epoch": 0.19, "grad_norm": 4.110665321350098, "learning_rate": 1.9985485677856584e-05, "loss": 1.8541, "step": 14375 }, { "epoch": 0.19, "grad_norm": 4.584826946258545, "learning_rate": 1.998548001786301e-05, "loss": 1.9285, "step": 14376 }, { "epoch": 0.19, "grad_norm": 4.400036811828613, "learning_rate": 1.998547435676687e-05, "loss": 2.3857, "step": 14377 }, { "epoch": 0.19, "grad_norm": 4.256311416625977, "learning_rate": 1.9985468694568165e-05, "loss": 2.0868, "step": 14378 }, { "epoch": 0.19, "grad_norm": 5.0010833740234375, "learning_rate": 1.9985463031266896e-05, "loss": 2.5499, "step": 14379 }, { "epoch": 0.19, "grad_norm": 4.757951736450195, "learning_rate": 1.998545736686306e-05, "loss": 2.8124, "step": 14380 }, { "epoch": 0.19, "grad_norm": 3.97409987449646, "learning_rate": 1.9985451701356663e-05, "loss": 1.7474, "step": 14381 }, { "epoch": 0.19, "grad_norm": 3.9722321033477783, "learning_rate": 1.9985446034747696e-05, "loss": 2.0842, "step": 14382 }, { "epoch": 0.19, "grad_norm": 3.768204689025879, "learning_rate": 1.9985440367036174e-05, "loss": 2.0883, "step": 14383 }, { "epoch": 0.19, "grad_norm": 4.6973090171813965, "learning_rate": 1.9985434698222086e-05, "loss": 2.3474, "step": 14384 }, { "epoch": 0.19, "grad_norm": 5.221142292022705, "learning_rate": 1.998542902830544e-05, "loss": 2.7153, "step": 14385 }, { "epoch": 0.19, "grad_norm": 4.446409702301025, "learning_rate": 1.998542335728623e-05, "loss": 2.3656, "step": 14386 }, { "epoch": 0.19, "grad_norm": 4.224267959594727, "learning_rate": 1.998541768516446e-05, "loss": 2.0963, "step": 14387 }, { "epoch": 0.19, "grad_norm": 5.231186389923096, "learning_rate": 1.9985412011940132e-05, "loss": 2.6034, "step": 14388 }, { "epoch": 0.19, "grad_norm": 4.1341471672058105, "learning_rate": 1.998540633761324e-05, "loss": 2.1482, "step": 14389 }, { "epoch": 0.19, "grad_norm": 4.634074687957764, "learning_rate": 1.9985400662183795e-05, "loss": 1.8942, "step": 14390 }, { "epoch": 0.19, "grad_norm": 4.6021342277526855, "learning_rate": 1.9985394985651794e-05, "loss": 1.8456, "step": 14391 }, { "epoch": 0.19, "grad_norm": 4.381480693817139, "learning_rate": 1.9985389308017233e-05, "loss": 2.1221, "step": 14392 }, { "epoch": 0.19, "grad_norm": 4.113040924072266, "learning_rate": 1.9985383629280116e-05, "loss": 2.0957, "step": 14393 }, { "epoch": 0.19, "grad_norm": 4.113271236419678, "learning_rate": 1.9985377949440444e-05, "loss": 1.8487, "step": 14394 }, { "epoch": 0.19, "grad_norm": 4.5533576011657715, "learning_rate": 1.9985372268498216e-05, "loss": 1.9892, "step": 14395 }, { "epoch": 0.19, "grad_norm": 4.159041404724121, "learning_rate": 1.9985366586453433e-05, "loss": 2.4938, "step": 14396 }, { "epoch": 0.19, "grad_norm": 4.162121772766113, "learning_rate": 1.9985360903306097e-05, "loss": 2.2964, "step": 14397 }, { "epoch": 0.19, "grad_norm": 5.065903663635254, "learning_rate": 1.9985355219056205e-05, "loss": 2.848, "step": 14398 }, { "epoch": 0.19, "grad_norm": 4.371587753295898, "learning_rate": 1.9985349533703765e-05, "loss": 2.4807, "step": 14399 }, { "epoch": 0.19, "grad_norm": 3.993978500366211, "learning_rate": 1.998534384724877e-05, "loss": 1.9394, "step": 14400 }, { "epoch": 0.19, "grad_norm": 4.313905715942383, "learning_rate": 1.9985338159691227e-05, "loss": 2.0492, "step": 14401 }, { "epoch": 0.19, "grad_norm": 3.9665191173553467, "learning_rate": 1.998533247103113e-05, "loss": 2.0356, "step": 14402 }, { "epoch": 0.19, "grad_norm": 4.197910785675049, "learning_rate": 1.998532678126848e-05, "loss": 1.7235, "step": 14403 }, { "epoch": 0.19, "grad_norm": 5.10652494430542, "learning_rate": 1.998532109040329e-05, "loss": 2.5231, "step": 14404 }, { "epoch": 0.19, "grad_norm": 5.061537265777588, "learning_rate": 1.9985315398435544e-05, "loss": 2.7221, "step": 14405 }, { "epoch": 0.19, "grad_norm": 3.677243947982788, "learning_rate": 1.9985309705365248e-05, "loss": 1.6049, "step": 14406 }, { "epoch": 0.19, "grad_norm": 5.098809242248535, "learning_rate": 1.998530401119241e-05, "loss": 2.1622, "step": 14407 }, { "epoch": 0.19, "grad_norm": 4.765665531158447, "learning_rate": 1.9985298315917022e-05, "loss": 2.2439, "step": 14408 }, { "epoch": 0.19, "grad_norm": 3.61875319480896, "learning_rate": 1.9985292619539086e-05, "loss": 1.8514, "step": 14409 }, { "epoch": 0.19, "grad_norm": 4.563216209411621, "learning_rate": 1.9985286922058608e-05, "loss": 2.0837, "step": 14410 }, { "epoch": 0.19, "grad_norm": 4.59070348739624, "learning_rate": 1.9985281223475584e-05, "loss": 2.4373, "step": 14411 }, { "epoch": 0.19, "grad_norm": 4.593616008758545, "learning_rate": 1.9985275523790014e-05, "loss": 2.3153, "step": 14412 }, { "epoch": 0.19, "grad_norm": 4.467137813568115, "learning_rate": 1.9985269823001903e-05, "loss": 1.8613, "step": 14413 }, { "epoch": 0.19, "grad_norm": 4.910007476806641, "learning_rate": 1.9985264121111246e-05, "loss": 2.169, "step": 14414 }, { "epoch": 0.19, "grad_norm": 4.147118091583252, "learning_rate": 1.9985258418118046e-05, "loss": 2.3322, "step": 14415 }, { "epoch": 0.19, "grad_norm": 4.653083324432373, "learning_rate": 1.9985252714022305e-05, "loss": 2.1165, "step": 14416 }, { "epoch": 0.19, "grad_norm": 4.6666083335876465, "learning_rate": 1.9985247008824025e-05, "loss": 2.4326, "step": 14417 }, { "epoch": 0.19, "grad_norm": 4.518150806427002, "learning_rate": 1.9985241302523203e-05, "loss": 2.1599, "step": 14418 }, { "epoch": 0.19, "grad_norm": 4.074950218200684, "learning_rate": 1.9985235595119842e-05, "loss": 2.0232, "step": 14419 }, { "epoch": 0.19, "grad_norm": 4.614339828491211, "learning_rate": 1.998522988661394e-05, "loss": 2.287, "step": 14420 }, { "epoch": 0.19, "grad_norm": 3.951037883758545, "learning_rate": 1.9985224177005497e-05, "loss": 2.013, "step": 14421 }, { "epoch": 0.19, "grad_norm": 4.517737865447998, "learning_rate": 1.998521846629452e-05, "loss": 2.9064, "step": 14422 }, { "epoch": 0.19, "grad_norm": 4.4985671043396, "learning_rate": 1.9985212754481004e-05, "loss": 2.2699, "step": 14423 }, { "epoch": 0.19, "grad_norm": 4.506141185760498, "learning_rate": 1.9985207041564946e-05, "loss": 2.3178, "step": 14424 }, { "epoch": 0.19, "grad_norm": 4.788051128387451, "learning_rate": 1.998520132754636e-05, "loss": 2.8246, "step": 14425 }, { "epoch": 0.19, "grad_norm": 4.13426399230957, "learning_rate": 1.9985195612425234e-05, "loss": 1.9446, "step": 14426 }, { "epoch": 0.19, "grad_norm": 4.437416076660156, "learning_rate": 1.9985189896201573e-05, "loss": 2.2234, "step": 14427 }, { "epoch": 0.19, "grad_norm": 4.9425458908081055, "learning_rate": 1.998518417887538e-05, "loss": 2.711, "step": 14428 }, { "epoch": 0.19, "grad_norm": 4.2272725105285645, "learning_rate": 1.998517846044665e-05, "loss": 2.1169, "step": 14429 }, { "epoch": 0.19, "grad_norm": 4.074219703674316, "learning_rate": 1.998517274091539e-05, "loss": 2.4118, "step": 14430 }, { "epoch": 0.19, "grad_norm": 4.420470714569092, "learning_rate": 1.9985167020281593e-05, "loss": 2.1092, "step": 14431 }, { "epoch": 0.19, "grad_norm": 4.324364185333252, "learning_rate": 1.998516129854527e-05, "loss": 2.0133, "step": 14432 }, { "epoch": 0.19, "grad_norm": 4.408358097076416, "learning_rate": 1.9985155575706414e-05, "loss": 2.2051, "step": 14433 }, { "epoch": 0.19, "grad_norm": 5.022355556488037, "learning_rate": 1.9985149851765026e-05, "loss": 2.3019, "step": 14434 }, { "epoch": 0.19, "grad_norm": 3.99960994720459, "learning_rate": 1.9985144126721106e-05, "loss": 2.0301, "step": 14435 }, { "epoch": 0.19, "grad_norm": 5.317275524139404, "learning_rate": 1.998513840057466e-05, "loss": 2.5434, "step": 14436 }, { "epoch": 0.19, "grad_norm": 3.8757760524749756, "learning_rate": 1.9985132673325684e-05, "loss": 1.7387, "step": 14437 }, { "epoch": 0.19, "grad_norm": 4.3787641525268555, "learning_rate": 1.9985126944974182e-05, "loss": 2.3459, "step": 14438 }, { "epoch": 0.19, "grad_norm": 4.284261703491211, "learning_rate": 1.998512121552015e-05, "loss": 2.1823, "step": 14439 }, { "epoch": 0.19, "grad_norm": 4.619556427001953, "learning_rate": 1.998511548496359e-05, "loss": 2.196, "step": 14440 }, { "epoch": 0.19, "grad_norm": 4.219614028930664, "learning_rate": 1.9985109753304507e-05, "loss": 1.6236, "step": 14441 }, { "epoch": 0.19, "grad_norm": 4.713326930999756, "learning_rate": 1.99851040205429e-05, "loss": 2.6713, "step": 14442 }, { "epoch": 0.19, "grad_norm": 4.275091171264648, "learning_rate": 1.9985098286678766e-05, "loss": 2.3329, "step": 14443 }, { "epoch": 0.19, "grad_norm": 4.534102916717529, "learning_rate": 1.9985092551712106e-05, "loss": 2.4803, "step": 14444 }, { "epoch": 0.19, "grad_norm": 4.625237941741943, "learning_rate": 1.9985086815642924e-05, "loss": 2.3738, "step": 14445 }, { "epoch": 0.19, "grad_norm": 4.8088836669921875, "learning_rate": 1.9985081078471217e-05, "loss": 2.6222, "step": 14446 }, { "epoch": 0.19, "grad_norm": 3.8148248195648193, "learning_rate": 1.9985075340196995e-05, "loss": 1.9529, "step": 14447 }, { "epoch": 0.19, "grad_norm": 3.8190622329711914, "learning_rate": 1.998506960082024e-05, "loss": 1.7754, "step": 14448 }, { "epoch": 0.19, "grad_norm": 4.515834331512451, "learning_rate": 1.998506386034097e-05, "loss": 2.361, "step": 14449 }, { "epoch": 0.19, "grad_norm": 4.6566290855407715, "learning_rate": 1.998505811875918e-05, "loss": 2.0696, "step": 14450 }, { "epoch": 0.19, "grad_norm": 5.11345911026001, "learning_rate": 1.998505237607487e-05, "loss": 2.7001, "step": 14451 }, { "epoch": 0.19, "grad_norm": 3.989577293395996, "learning_rate": 1.998504663228804e-05, "loss": 1.7866, "step": 14452 }, { "epoch": 0.19, "grad_norm": 4.203860282897949, "learning_rate": 1.9985040887398694e-05, "loss": 2.1183, "step": 14453 }, { "epoch": 0.19, "grad_norm": 4.402892589569092, "learning_rate": 1.9985035141406826e-05, "loss": 1.9694, "step": 14454 }, { "epoch": 0.19, "grad_norm": 4.992178916931152, "learning_rate": 1.9985029394312443e-05, "loss": 2.2552, "step": 14455 }, { "epoch": 0.19, "grad_norm": 4.028443336486816, "learning_rate": 1.9985023646115542e-05, "loss": 1.8442, "step": 14456 }, { "epoch": 0.19, "grad_norm": 4.025208473205566, "learning_rate": 1.998501789681613e-05, "loss": 2.2726, "step": 14457 }, { "epoch": 0.19, "grad_norm": 4.6535186767578125, "learning_rate": 1.9985012146414194e-05, "loss": 2.275, "step": 14458 }, { "epoch": 0.19, "grad_norm": 4.536770343780518, "learning_rate": 1.998500639490975e-05, "loss": 2.367, "step": 14459 }, { "epoch": 0.19, "grad_norm": 4.883126735687256, "learning_rate": 1.998500064230279e-05, "loss": 3.034, "step": 14460 }, { "epoch": 0.19, "grad_norm": 3.7434637546539307, "learning_rate": 1.9984994888593316e-05, "loss": 2.0243, "step": 14461 }, { "epoch": 0.19, "grad_norm": 4.154439926147461, "learning_rate": 1.9984989133781328e-05, "loss": 1.9908, "step": 14462 }, { "epoch": 0.19, "grad_norm": 4.084211349487305, "learning_rate": 1.998498337786683e-05, "loss": 1.8336, "step": 14463 }, { "epoch": 0.19, "grad_norm": 4.3253912925720215, "learning_rate": 1.998497762084982e-05, "loss": 2.2833, "step": 14464 }, { "epoch": 0.19, "grad_norm": 4.6646833419799805, "learning_rate": 1.9984971862730297e-05, "loss": 2.162, "step": 14465 }, { "epoch": 0.19, "grad_norm": 4.018996715545654, "learning_rate": 1.9984966103508266e-05, "loss": 2.164, "step": 14466 }, { "epoch": 0.19, "grad_norm": 4.538030624389648, "learning_rate": 1.9984960343183726e-05, "loss": 2.6173, "step": 14467 }, { "epoch": 0.19, "grad_norm": 4.375753402709961, "learning_rate": 1.9984954581756675e-05, "loss": 2.3104, "step": 14468 }, { "epoch": 0.19, "grad_norm": 5.004620552062988, "learning_rate": 1.9984948819227116e-05, "loss": 2.7534, "step": 14469 }, { "epoch": 0.19, "grad_norm": 4.738550662994385, "learning_rate": 1.9984943055595048e-05, "loss": 2.4711, "step": 14470 }, { "epoch": 0.19, "grad_norm": 4.670289516448975, "learning_rate": 1.9984937290860476e-05, "loss": 2.8541, "step": 14471 }, { "epoch": 0.19, "grad_norm": 4.2530059814453125, "learning_rate": 1.9984931525023398e-05, "loss": 1.7893, "step": 14472 }, { "epoch": 0.19, "grad_norm": 4.039532661437988, "learning_rate": 1.998492575808381e-05, "loss": 1.7593, "step": 14473 }, { "epoch": 0.19, "grad_norm": 4.296736240386963, "learning_rate": 1.998491999004172e-05, "loss": 1.9586, "step": 14474 }, { "epoch": 0.19, "grad_norm": 4.321922302246094, "learning_rate": 1.9984914220897123e-05, "loss": 2.6287, "step": 14475 }, { "epoch": 0.19, "grad_norm": 4.289681911468506, "learning_rate": 1.9984908450650023e-05, "loss": 2.6409, "step": 14476 }, { "epoch": 0.19, "grad_norm": 4.498598575592041, "learning_rate": 1.998490267930042e-05, "loss": 2.3328, "step": 14477 }, { "epoch": 0.19, "grad_norm": 4.4866862297058105, "learning_rate": 1.9984896906848318e-05, "loss": 2.2145, "step": 14478 }, { "epoch": 0.19, "grad_norm": 4.360952377319336, "learning_rate": 1.998489113329371e-05, "loss": 2.0634, "step": 14479 }, { "epoch": 0.19, "grad_norm": 4.497835636138916, "learning_rate": 1.9984885358636597e-05, "loss": 2.2873, "step": 14480 }, { "epoch": 0.19, "grad_norm": 4.775356769561768, "learning_rate": 1.998487958287699e-05, "loss": 2.3608, "step": 14481 }, { "epoch": 0.19, "grad_norm": 4.768008708953857, "learning_rate": 1.998487380601488e-05, "loss": 2.6876, "step": 14482 }, { "epoch": 0.19, "grad_norm": 4.392327785491943, "learning_rate": 1.9984868028050273e-05, "loss": 1.9281, "step": 14483 }, { "epoch": 0.19, "grad_norm": 4.4297919273376465, "learning_rate": 1.9984862248983165e-05, "loss": 1.9674, "step": 14484 }, { "epoch": 0.19, "grad_norm": 4.775796890258789, "learning_rate": 1.9984856468813558e-05, "loss": 2.5196, "step": 14485 }, { "epoch": 0.19, "grad_norm": 4.854778289794922, "learning_rate": 1.9984850687541453e-05, "loss": 2.8333, "step": 14486 }, { "epoch": 0.19, "grad_norm": 5.406858921051025, "learning_rate": 1.9984844905166854e-05, "loss": 2.8986, "step": 14487 }, { "epoch": 0.19, "grad_norm": 4.354869365692139, "learning_rate": 1.9984839121689756e-05, "loss": 2.5278, "step": 14488 }, { "epoch": 0.19, "grad_norm": 4.372695446014404, "learning_rate": 1.9984833337110164e-05, "loss": 1.8883, "step": 14489 }, { "epoch": 0.19, "grad_norm": 5.1596269607543945, "learning_rate": 1.9984827551428077e-05, "loss": 2.8406, "step": 14490 }, { "epoch": 0.19, "grad_norm": 3.802205801010132, "learning_rate": 1.9984821764643495e-05, "loss": 1.7536, "step": 14491 }, { "epoch": 0.19, "grad_norm": 4.648021697998047, "learning_rate": 1.998481597675642e-05, "loss": 2.0461, "step": 14492 }, { "epoch": 0.19, "grad_norm": 4.099653244018555, "learning_rate": 1.998481018776685e-05, "loss": 2.1336, "step": 14493 }, { "epoch": 0.19, "grad_norm": 4.960946083068848, "learning_rate": 1.998480439767479e-05, "loss": 2.6468, "step": 14494 }, { "epoch": 0.19, "grad_norm": 5.4465742111206055, "learning_rate": 1.9984798606480235e-05, "loss": 2.6225, "step": 14495 }, { "epoch": 0.19, "grad_norm": 4.03444242477417, "learning_rate": 1.998479281418319e-05, "loss": 1.7747, "step": 14496 }, { "epoch": 0.19, "grad_norm": 4.149356365203857, "learning_rate": 1.9984787020783658e-05, "loss": 2.0868, "step": 14497 }, { "epoch": 0.19, "grad_norm": 4.493574619293213, "learning_rate": 1.9984781226281633e-05, "loss": 1.9652, "step": 14498 }, { "epoch": 0.19, "grad_norm": 4.874976634979248, "learning_rate": 1.9984775430677118e-05, "loss": 2.8327, "step": 14499 }, { "epoch": 0.19, "grad_norm": 4.013669013977051, "learning_rate": 1.9984769633970118e-05, "loss": 2.1675, "step": 14500 }, { "epoch": 0.19, "grad_norm": 4.319431304931641, "learning_rate": 1.9984763836160626e-05, "loss": 2.0017, "step": 14501 }, { "epoch": 0.19, "grad_norm": 4.452126502990723, "learning_rate": 1.9984758037248647e-05, "loss": 2.5868, "step": 14502 }, { "epoch": 0.19, "grad_norm": 4.94252347946167, "learning_rate": 1.9984752237234183e-05, "loss": 1.6964, "step": 14503 }, { "epoch": 0.19, "grad_norm": 4.621551513671875, "learning_rate": 1.9984746436117235e-05, "loss": 2.5563, "step": 14504 }, { "epoch": 0.19, "grad_norm": 3.9501793384552, "learning_rate": 1.9984740633897795e-05, "loss": 1.9987, "step": 14505 }, { "epoch": 0.19, "grad_norm": 4.441221237182617, "learning_rate": 1.9984734830575878e-05, "loss": 2.3383, "step": 14506 }, { "epoch": 0.19, "grad_norm": 5.147508144378662, "learning_rate": 1.9984729026151473e-05, "loss": 2.344, "step": 14507 }, { "epoch": 0.19, "grad_norm": 4.789654731750488, "learning_rate": 1.9984723220624583e-05, "loss": 2.0459, "step": 14508 }, { "epoch": 0.19, "grad_norm": 4.721354007720947, "learning_rate": 1.9984717413995215e-05, "loss": 2.8785, "step": 14509 }, { "epoch": 0.19, "grad_norm": 3.8028714656829834, "learning_rate": 1.998471160626336e-05, "loss": 2.0107, "step": 14510 }, { "epoch": 0.19, "grad_norm": 4.398150444030762, "learning_rate": 1.9984705797429026e-05, "loss": 2.1672, "step": 14511 }, { "epoch": 0.19, "grad_norm": 4.112902641296387, "learning_rate": 1.9984699987492212e-05, "loss": 1.9475, "step": 14512 }, { "epoch": 0.19, "grad_norm": 4.544276237487793, "learning_rate": 1.9984694176452913e-05, "loss": 2.5094, "step": 14513 }, { "epoch": 0.19, "grad_norm": 4.199952602386475, "learning_rate": 1.9984688364311136e-05, "loss": 2.1728, "step": 14514 }, { "epoch": 0.19, "grad_norm": 4.285614013671875, "learning_rate": 1.9984682551066885e-05, "loss": 2.4881, "step": 14515 }, { "epoch": 0.19, "grad_norm": 4.190463066101074, "learning_rate": 1.9984676736720153e-05, "loss": 2.2106, "step": 14516 }, { "epoch": 0.19, "grad_norm": 4.3015666007995605, "learning_rate": 1.998467092127094e-05, "loss": 2.0406, "step": 14517 }, { "epoch": 0.19, "grad_norm": 4.245194911956787, "learning_rate": 1.9984665104719255e-05, "loss": 2.2689, "step": 14518 }, { "epoch": 0.19, "grad_norm": 4.467631816864014, "learning_rate": 1.998465928706509e-05, "loss": 1.983, "step": 14519 }, { "epoch": 0.19, "grad_norm": 4.4525628089904785, "learning_rate": 1.998465346830845e-05, "loss": 2.3221, "step": 14520 }, { "epoch": 0.19, "grad_norm": 4.482085704803467, "learning_rate": 1.9984647648449336e-05, "loss": 2.4032, "step": 14521 }, { "epoch": 0.19, "grad_norm": 5.201516628265381, "learning_rate": 1.9984641827487744e-05, "loss": 2.2611, "step": 14522 }, { "epoch": 0.19, "grad_norm": 4.653609275817871, "learning_rate": 1.998463600542368e-05, "loss": 2.1711, "step": 14523 }, { "epoch": 0.19, "grad_norm": 4.209876537322998, "learning_rate": 1.9984630182257147e-05, "loss": 1.9292, "step": 14524 }, { "epoch": 0.19, "grad_norm": 4.821700096130371, "learning_rate": 1.9984624357988135e-05, "loss": 2.7462, "step": 14525 }, { "epoch": 0.19, "grad_norm": 4.660754680633545, "learning_rate": 1.9984618532616656e-05, "loss": 2.3293, "step": 14526 }, { "epoch": 0.19, "grad_norm": 4.1360392570495605, "learning_rate": 1.9984612706142702e-05, "loss": 2.348, "step": 14527 }, { "epoch": 0.19, "grad_norm": 4.295842170715332, "learning_rate": 1.998460687856628e-05, "loss": 2.428, "step": 14528 }, { "epoch": 0.19, "grad_norm": 4.426731109619141, "learning_rate": 1.998460104988739e-05, "loss": 2.1166, "step": 14529 }, { "epoch": 0.19, "grad_norm": 4.455653667449951, "learning_rate": 1.9984595220106026e-05, "loss": 2.179, "step": 14530 }, { "epoch": 0.19, "grad_norm": 4.410898685455322, "learning_rate": 1.9984589389222195e-05, "loss": 1.6602, "step": 14531 }, { "epoch": 0.19, "grad_norm": 4.573181629180908, "learning_rate": 1.9984583557235897e-05, "loss": 2.2952, "step": 14532 }, { "epoch": 0.19, "grad_norm": 4.323240756988525, "learning_rate": 1.998457772414713e-05, "loss": 2.1683, "step": 14533 }, { "epoch": 0.19, "grad_norm": 4.665161609649658, "learning_rate": 1.9984571889955897e-05, "loss": 2.0953, "step": 14534 }, { "epoch": 0.19, "grad_norm": 4.121818542480469, "learning_rate": 1.9984566054662196e-05, "loss": 1.8876, "step": 14535 }, { "epoch": 0.19, "grad_norm": 4.149878978729248, "learning_rate": 1.998456021826603e-05, "loss": 2.4153, "step": 14536 }, { "epoch": 0.19, "grad_norm": 3.991392135620117, "learning_rate": 1.99845543807674e-05, "loss": 2.1941, "step": 14537 }, { "epoch": 0.19, "grad_norm": 4.674256324768066, "learning_rate": 1.9984548542166307e-05, "loss": 2.1421, "step": 14538 }, { "epoch": 0.19, "grad_norm": 3.8585824966430664, "learning_rate": 1.998454270246275e-05, "loss": 1.9859, "step": 14539 }, { "epoch": 0.19, "grad_norm": 4.655036926269531, "learning_rate": 1.998453686165673e-05, "loss": 2.1999, "step": 14540 }, { "epoch": 0.19, "grad_norm": 3.7138030529022217, "learning_rate": 1.9984531019748248e-05, "loss": 1.5502, "step": 14541 }, { "epoch": 0.19, "grad_norm": 4.94143533706665, "learning_rate": 1.9984525176737304e-05, "loss": 2.677, "step": 14542 }, { "epoch": 0.19, "grad_norm": 4.015456199645996, "learning_rate": 1.99845193326239e-05, "loss": 2.0449, "step": 14543 }, { "epoch": 0.19, "grad_norm": 4.250494480133057, "learning_rate": 1.9984513487408034e-05, "loss": 2.5962, "step": 14544 }, { "epoch": 0.19, "grad_norm": 4.542487144470215, "learning_rate": 1.998450764108971e-05, "loss": 2.1312, "step": 14545 }, { "epoch": 0.19, "grad_norm": 4.149415016174316, "learning_rate": 1.9984501793668924e-05, "loss": 2.3592, "step": 14546 }, { "epoch": 0.19, "grad_norm": 4.218315124511719, "learning_rate": 1.998449594514568e-05, "loss": 2.2787, "step": 14547 }, { "epoch": 0.19, "grad_norm": 4.4432268142700195, "learning_rate": 1.998449009551998e-05, "loss": 2.4294, "step": 14548 }, { "epoch": 0.19, "grad_norm": 4.633265495300293, "learning_rate": 1.9984484244791825e-05, "loss": 2.5173, "step": 14549 }, { "epoch": 0.19, "grad_norm": 5.200128555297852, "learning_rate": 1.9984478392961208e-05, "loss": 2.5058, "step": 14550 }, { "epoch": 0.19, "grad_norm": 3.75299072265625, "learning_rate": 1.998447254002814e-05, "loss": 1.6918, "step": 14551 }, { "epoch": 0.19, "grad_norm": 4.0335893630981445, "learning_rate": 1.9984466685992613e-05, "loss": 1.9488, "step": 14552 }, { "epoch": 0.19, "grad_norm": 3.9166691303253174, "learning_rate": 1.9984460830854634e-05, "loss": 1.946, "step": 14553 }, { "epoch": 0.19, "grad_norm": 4.120650768280029, "learning_rate": 1.9984454974614204e-05, "loss": 2.2537, "step": 14554 }, { "epoch": 0.19, "grad_norm": 4.317564487457275, "learning_rate": 1.9984449117271317e-05, "loss": 2.3002, "step": 14555 }, { "epoch": 0.19, "grad_norm": 4.646549224853516, "learning_rate": 1.9984443258825976e-05, "loss": 2.1303, "step": 14556 }, { "epoch": 0.19, "grad_norm": 3.9956777095794678, "learning_rate": 1.9984437399278188e-05, "loss": 2.1178, "step": 14557 }, { "epoch": 0.19, "grad_norm": 4.746360778808594, "learning_rate": 1.9984431538627946e-05, "loss": 2.8285, "step": 14558 }, { "epoch": 0.19, "grad_norm": 4.604310035705566, "learning_rate": 1.9984425676875253e-05, "loss": 2.5393, "step": 14559 }, { "epoch": 0.19, "grad_norm": 4.208877086639404, "learning_rate": 1.998441981402011e-05, "loss": 1.9809, "step": 14560 }, { "epoch": 0.19, "grad_norm": 5.066770076751709, "learning_rate": 1.9984413950062515e-05, "loss": 2.5322, "step": 14561 }, { "epoch": 0.19, "grad_norm": 4.292809963226318, "learning_rate": 1.9984408085002477e-05, "loss": 2.4274, "step": 14562 }, { "epoch": 0.19, "grad_norm": 4.37033224105835, "learning_rate": 1.9984402218839988e-05, "loss": 1.9502, "step": 14563 }, { "epoch": 0.19, "grad_norm": 4.719605445861816, "learning_rate": 1.9984396351575052e-05, "loss": 2.7903, "step": 14564 }, { "epoch": 0.19, "grad_norm": 4.39917516708374, "learning_rate": 1.998439048320767e-05, "loss": 2.2008, "step": 14565 }, { "epoch": 0.19, "grad_norm": 4.143481254577637, "learning_rate": 1.9984384613737838e-05, "loss": 2.0018, "step": 14566 }, { "epoch": 0.19, "grad_norm": 4.750271320343018, "learning_rate": 1.9984378743165564e-05, "loss": 2.3579, "step": 14567 }, { "epoch": 0.19, "grad_norm": 4.580400466918945, "learning_rate": 1.9984372871490845e-05, "loss": 2.3545, "step": 14568 }, { "epoch": 0.19, "grad_norm": 4.307094573974609, "learning_rate": 1.998436699871368e-05, "loss": 2.0498, "step": 14569 }, { "epoch": 0.19, "grad_norm": 4.254444599151611, "learning_rate": 1.9984361124834074e-05, "loss": 1.9942, "step": 14570 }, { "epoch": 0.19, "grad_norm": 4.608694076538086, "learning_rate": 1.9984355249852024e-05, "loss": 2.4027, "step": 14571 }, { "epoch": 0.19, "grad_norm": 4.582331657409668, "learning_rate": 1.9984349373767533e-05, "loss": 2.4666, "step": 14572 }, { "epoch": 0.19, "grad_norm": 4.702995300292969, "learning_rate": 1.99843434965806e-05, "loss": 2.4587, "step": 14573 }, { "epoch": 0.19, "grad_norm": 4.38971471786499, "learning_rate": 1.9984337618291224e-05, "loss": 2.4512, "step": 14574 }, { "epoch": 0.19, "grad_norm": 4.482569217681885, "learning_rate": 1.998433173889941e-05, "loss": 2.7707, "step": 14575 }, { "epoch": 0.19, "grad_norm": 4.821359634399414, "learning_rate": 1.9984325858405157e-05, "loss": 2.6303, "step": 14576 }, { "epoch": 0.19, "grad_norm": 5.008803844451904, "learning_rate": 1.998431997680846e-05, "loss": 2.2127, "step": 14577 }, { "epoch": 0.19, "grad_norm": 4.862883567810059, "learning_rate": 1.998431409410933e-05, "loss": 2.701, "step": 14578 }, { "epoch": 0.19, "grad_norm": 5.287559509277344, "learning_rate": 1.998430821030776e-05, "loss": 2.924, "step": 14579 }, { "epoch": 0.19, "grad_norm": 4.499840259552002, "learning_rate": 1.9984302325403756e-05, "loss": 1.9295, "step": 14580 }, { "epoch": 0.19, "grad_norm": 4.1798014640808105, "learning_rate": 1.998429643939731e-05, "loss": 2.1414, "step": 14581 }, { "epoch": 0.19, "grad_norm": 4.723503589630127, "learning_rate": 1.9984290552288433e-05, "loss": 2.3328, "step": 14582 }, { "epoch": 0.19, "grad_norm": 4.449731349945068, "learning_rate": 1.998428466407712e-05, "loss": 2.3791, "step": 14583 }, { "epoch": 0.19, "grad_norm": 4.617086887359619, "learning_rate": 1.9984278774763372e-05, "loss": 2.5939, "step": 14584 }, { "epoch": 0.19, "grad_norm": 4.262602806091309, "learning_rate": 1.998427288434719e-05, "loss": 2.2977, "step": 14585 }, { "epoch": 0.19, "grad_norm": 4.477181434631348, "learning_rate": 1.9984266992828576e-05, "loss": 2.3749, "step": 14586 }, { "epoch": 0.19, "grad_norm": 4.1355814933776855, "learning_rate": 1.998426110020753e-05, "loss": 2.2673, "step": 14587 }, { "epoch": 0.19, "grad_norm": 4.356017589569092, "learning_rate": 1.9984255206484052e-05, "loss": 2.3556, "step": 14588 }, { "epoch": 0.19, "grad_norm": 4.970034122467041, "learning_rate": 1.9984249311658138e-05, "loss": 3.0826, "step": 14589 }, { "epoch": 0.19, "grad_norm": 4.5541276931762695, "learning_rate": 1.99842434157298e-05, "loss": 2.2217, "step": 14590 }, { "epoch": 0.19, "grad_norm": 4.723984241485596, "learning_rate": 1.9984237518699028e-05, "loss": 2.3914, "step": 14591 }, { "epoch": 0.19, "grad_norm": 4.234049320220947, "learning_rate": 1.998423162056583e-05, "loss": 2.0662, "step": 14592 }, { "epoch": 0.19, "grad_norm": 4.575653553009033, "learning_rate": 1.99842257213302e-05, "loss": 2.6375, "step": 14593 }, { "epoch": 0.19, "grad_norm": 4.497339725494385, "learning_rate": 1.9984219820992142e-05, "loss": 2.8177, "step": 14594 }, { "epoch": 0.19, "grad_norm": 4.348679542541504, "learning_rate": 1.998421391955166e-05, "loss": 2.4353, "step": 14595 }, { "epoch": 0.19, "grad_norm": 4.366496562957764, "learning_rate": 1.998420801700875e-05, "loss": 2.2265, "step": 14596 }, { "epoch": 0.19, "grad_norm": 4.2993483543396, "learning_rate": 1.9984202113363413e-05, "loss": 2.6608, "step": 14597 }, { "epoch": 0.19, "grad_norm": 4.502804279327393, "learning_rate": 1.9984196208615653e-05, "loss": 2.2234, "step": 14598 }, { "epoch": 0.19, "grad_norm": 4.542330741882324, "learning_rate": 1.9984190302765466e-05, "loss": 2.2156, "step": 14599 }, { "epoch": 0.19, "grad_norm": 3.7805018424987793, "learning_rate": 1.998418439581286e-05, "loss": 1.5406, "step": 14600 }, { "epoch": 0.19, "grad_norm": 4.119508743286133, "learning_rate": 1.9984178487757825e-05, "loss": 2.0465, "step": 14601 }, { "epoch": 0.19, "grad_norm": 4.286911487579346, "learning_rate": 1.9984172578600367e-05, "loss": 2.3057, "step": 14602 }, { "epoch": 0.19, "grad_norm": 4.20055627822876, "learning_rate": 1.998416666834049e-05, "loss": 2.1238, "step": 14603 }, { "epoch": 0.19, "grad_norm": 4.242522716522217, "learning_rate": 1.998416075697819e-05, "loss": 2.198, "step": 14604 }, { "epoch": 0.19, "grad_norm": 4.622196674346924, "learning_rate": 1.9984154844513466e-05, "loss": 2.3061, "step": 14605 }, { "epoch": 0.19, "grad_norm": 4.547444820404053, "learning_rate": 1.998414893094633e-05, "loss": 2.6275, "step": 14606 }, { "epoch": 0.19, "grad_norm": 4.568131446838379, "learning_rate": 1.998414301627677e-05, "loss": 2.5154, "step": 14607 }, { "epoch": 0.19, "grad_norm": 4.935013771057129, "learning_rate": 1.998413710050479e-05, "loss": 2.661, "step": 14608 }, { "epoch": 0.19, "grad_norm": 4.964855670928955, "learning_rate": 1.9984131183630396e-05, "loss": 2.5516, "step": 14609 }, { "epoch": 0.19, "grad_norm": 5.093544960021973, "learning_rate": 1.9984125265653584e-05, "loss": 2.2285, "step": 14610 }, { "epoch": 0.19, "grad_norm": 4.2076802253723145, "learning_rate": 1.998411934657435e-05, "loss": 1.9411, "step": 14611 }, { "epoch": 0.19, "grad_norm": 4.37895393371582, "learning_rate": 1.9984113426392702e-05, "loss": 1.8956, "step": 14612 }, { "epoch": 0.19, "grad_norm": 4.808600902557373, "learning_rate": 1.998410750510864e-05, "loss": 2.5315, "step": 14613 }, { "epoch": 0.19, "grad_norm": 4.521633625030518, "learning_rate": 1.998410158272216e-05, "loss": 2.1181, "step": 14614 }, { "epoch": 0.19, "grad_norm": 4.4054436683654785, "learning_rate": 1.998409565923327e-05, "loss": 2.1646, "step": 14615 }, { "epoch": 0.19, "grad_norm": 4.319633483886719, "learning_rate": 1.9984089734641965e-05, "loss": 2.0146, "step": 14616 }, { "epoch": 0.19, "grad_norm": 4.832048416137695, "learning_rate": 1.9984083808948246e-05, "loss": 2.3525, "step": 14617 }, { "epoch": 0.19, "grad_norm": 4.593879222869873, "learning_rate": 1.9984077882152117e-05, "loss": 2.6259, "step": 14618 }, { "epoch": 0.19, "grad_norm": 4.490903854370117, "learning_rate": 1.9984071954253577e-05, "loss": 2.1579, "step": 14619 }, { "epoch": 0.19, "grad_norm": 4.741896152496338, "learning_rate": 1.9984066025252625e-05, "loss": 2.4944, "step": 14620 }, { "epoch": 0.19, "grad_norm": 4.1670331954956055, "learning_rate": 1.998406009514926e-05, "loss": 2.0558, "step": 14621 }, { "epoch": 0.19, "grad_norm": 3.850864887237549, "learning_rate": 1.998405416394349e-05, "loss": 2.3326, "step": 14622 }, { "epoch": 0.19, "grad_norm": 4.376675128936768, "learning_rate": 1.9984048231635307e-05, "loss": 2.3105, "step": 14623 }, { "epoch": 0.19, "grad_norm": 4.695605278015137, "learning_rate": 1.9984042298224717e-05, "loss": 2.0721, "step": 14624 }, { "epoch": 0.19, "grad_norm": 4.708892822265625, "learning_rate": 1.9984036363711718e-05, "loss": 2.2693, "step": 14625 }, { "epoch": 0.19, "grad_norm": 4.635470390319824, "learning_rate": 1.9984030428096316e-05, "loss": 2.2623, "step": 14626 }, { "epoch": 0.19, "grad_norm": 4.60138463973999, "learning_rate": 1.9984024491378503e-05, "loss": 2.3009, "step": 14627 }, { "epoch": 0.19, "grad_norm": 4.282951354980469, "learning_rate": 1.9984018553558287e-05, "loss": 2.1184, "step": 14628 }, { "epoch": 0.19, "grad_norm": 4.007142543792725, "learning_rate": 1.9984012614635665e-05, "loss": 1.8422, "step": 14629 }, { "epoch": 0.19, "grad_norm": 4.242541313171387, "learning_rate": 1.998400667461064e-05, "loss": 2.2627, "step": 14630 }, { "epoch": 0.19, "grad_norm": 5.049932479858398, "learning_rate": 1.998400073348321e-05, "loss": 2.0406, "step": 14631 }, { "epoch": 0.19, "grad_norm": 4.742064952850342, "learning_rate": 1.998399479125338e-05, "loss": 2.5552, "step": 14632 }, { "epoch": 0.19, "grad_norm": 4.6298651695251465, "learning_rate": 1.9983988847921144e-05, "loss": 2.3898, "step": 14633 }, { "epoch": 0.19, "grad_norm": 4.401328086853027, "learning_rate": 1.9983982903486505e-05, "loss": 2.1795, "step": 14634 }, { "epoch": 0.19, "grad_norm": 4.539331912994385, "learning_rate": 1.9983976957949467e-05, "loss": 2.1848, "step": 14635 }, { "epoch": 0.19, "grad_norm": 4.7797651290893555, "learning_rate": 1.998397101131003e-05, "loss": 2.5829, "step": 14636 }, { "epoch": 0.19, "grad_norm": 4.958370685577393, "learning_rate": 1.9983965063568188e-05, "loss": 2.7077, "step": 14637 }, { "epoch": 0.19, "grad_norm": 4.611642837524414, "learning_rate": 1.9983959114723953e-05, "loss": 2.4596, "step": 14638 }, { "epoch": 0.19, "grad_norm": 4.3248610496521, "learning_rate": 1.9983953164777316e-05, "loss": 2.2577, "step": 14639 }, { "epoch": 0.19, "grad_norm": 4.336768627166748, "learning_rate": 1.9983947213728282e-05, "loss": 2.0105, "step": 14640 }, { "epoch": 0.19, "grad_norm": 3.6901164054870605, "learning_rate": 1.998394126157685e-05, "loss": 1.6674, "step": 14641 }, { "epoch": 0.19, "grad_norm": 4.433701515197754, "learning_rate": 1.9983935308323024e-05, "loss": 2.0984, "step": 14642 }, { "epoch": 0.19, "grad_norm": 4.483633995056152, "learning_rate": 1.99839293539668e-05, "loss": 2.3707, "step": 14643 }, { "epoch": 0.19, "grad_norm": 3.789186477661133, "learning_rate": 1.998392339850818e-05, "loss": 2.047, "step": 14644 }, { "epoch": 0.19, "grad_norm": 3.956850290298462, "learning_rate": 1.9983917441947172e-05, "loss": 2.0694, "step": 14645 }, { "epoch": 0.19, "grad_norm": 4.3553266525268555, "learning_rate": 1.9983911484283766e-05, "loss": 2.0612, "step": 14646 }, { "epoch": 0.19, "grad_norm": 4.377066612243652, "learning_rate": 1.9983905525517967e-05, "loss": 2.3724, "step": 14647 }, { "epoch": 0.19, "grad_norm": 4.797732830047607, "learning_rate": 1.9983899565649772e-05, "loss": 2.5296, "step": 14648 }, { "epoch": 0.19, "grad_norm": 4.687637805938721, "learning_rate": 1.9983893604679187e-05, "loss": 2.1796, "step": 14649 }, { "epoch": 0.19, "grad_norm": 4.312335968017578, "learning_rate": 1.9983887642606212e-05, "loss": 2.1342, "step": 14650 }, { "epoch": 0.19, "grad_norm": 3.778470754623413, "learning_rate": 1.9983881679430848e-05, "loss": 1.715, "step": 14651 }, { "epoch": 0.19, "grad_norm": 4.075899600982666, "learning_rate": 1.998387571515309e-05, "loss": 2.1989, "step": 14652 }, { "epoch": 0.19, "grad_norm": 3.937389373779297, "learning_rate": 1.9983869749772947e-05, "loss": 1.9638, "step": 14653 }, { "epoch": 0.19, "grad_norm": 4.507306098937988, "learning_rate": 1.998386378329041e-05, "loss": 2.6793, "step": 14654 }, { "epoch": 0.19, "grad_norm": 4.855542182922363, "learning_rate": 1.998385781570549e-05, "loss": 2.2491, "step": 14655 }, { "epoch": 0.19, "grad_norm": 4.5837907791137695, "learning_rate": 1.9983851847018178e-05, "loss": 2.4881, "step": 14656 }, { "epoch": 0.19, "grad_norm": 4.630714416503906, "learning_rate": 1.9983845877228486e-05, "loss": 2.3258, "step": 14657 }, { "epoch": 0.19, "grad_norm": 3.8969497680664062, "learning_rate": 1.99838399063364e-05, "loss": 1.9512, "step": 14658 }, { "epoch": 0.19, "grad_norm": 4.615667819976807, "learning_rate": 1.9983833934341932e-05, "loss": 2.3978, "step": 14659 }, { "epoch": 0.19, "grad_norm": 5.050666332244873, "learning_rate": 1.9983827961245085e-05, "loss": 2.3458, "step": 14660 }, { "epoch": 0.19, "grad_norm": 3.911980152130127, "learning_rate": 1.9983821987045848e-05, "loss": 1.8641, "step": 14661 }, { "epoch": 0.19, "grad_norm": 4.917266368865967, "learning_rate": 1.998381601174423e-05, "loss": 2.6459, "step": 14662 }, { "epoch": 0.19, "grad_norm": 4.514812469482422, "learning_rate": 1.9983810035340228e-05, "loss": 2.6431, "step": 14663 }, { "epoch": 0.19, "grad_norm": 4.649775981903076, "learning_rate": 1.9983804057833845e-05, "loss": 2.2085, "step": 14664 }, { "epoch": 0.19, "grad_norm": 4.492013454437256, "learning_rate": 1.998379807922508e-05, "loss": 2.3419, "step": 14665 }, { "epoch": 0.19, "grad_norm": 4.742650985717773, "learning_rate": 1.998379209951393e-05, "loss": 2.7788, "step": 14666 }, { "epoch": 0.19, "grad_norm": 5.144779682159424, "learning_rate": 1.9983786118700407e-05, "loss": 2.2818, "step": 14667 }, { "epoch": 0.19, "grad_norm": 4.101676940917969, "learning_rate": 1.9983780136784503e-05, "loss": 1.7342, "step": 14668 }, { "epoch": 0.19, "grad_norm": 4.561905860900879, "learning_rate": 1.9983774153766216e-05, "loss": 1.9373, "step": 14669 }, { "epoch": 0.19, "grad_norm": 3.73054575920105, "learning_rate": 1.9983768169645557e-05, "loss": 1.6336, "step": 14670 }, { "epoch": 0.19, "grad_norm": 4.3304219245910645, "learning_rate": 1.9983762184422518e-05, "loss": 2.2376, "step": 14671 }, { "epoch": 0.19, "grad_norm": 4.855432033538818, "learning_rate": 1.9983756198097103e-05, "loss": 2.2766, "step": 14672 }, { "epoch": 0.19, "grad_norm": 4.265719413757324, "learning_rate": 1.9983750210669308e-05, "loss": 1.9498, "step": 14673 }, { "epoch": 0.19, "grad_norm": 4.489530563354492, "learning_rate": 1.9983744222139144e-05, "loss": 2.7086, "step": 14674 }, { "epoch": 0.19, "grad_norm": 4.815532207489014, "learning_rate": 1.99837382325066e-05, "loss": 2.6723, "step": 14675 }, { "epoch": 0.19, "grad_norm": 4.736531734466553, "learning_rate": 1.9983732241771684e-05, "loss": 2.3799, "step": 14676 }, { "epoch": 0.19, "grad_norm": 4.3518452644348145, "learning_rate": 1.9983726249934396e-05, "loss": 2.3267, "step": 14677 }, { "epoch": 0.19, "grad_norm": 4.7601776123046875, "learning_rate": 1.998372025699473e-05, "loss": 2.6124, "step": 14678 }, { "epoch": 0.19, "grad_norm": 5.0549774169921875, "learning_rate": 1.99837142629527e-05, "loss": 2.3654, "step": 14679 }, { "epoch": 0.19, "grad_norm": 4.58358097076416, "learning_rate": 1.9983708267808292e-05, "loss": 2.1288, "step": 14680 }, { "epoch": 0.19, "grad_norm": 4.280506134033203, "learning_rate": 1.9983702271561513e-05, "loss": 2.1688, "step": 14681 }, { "epoch": 0.19, "grad_norm": 4.302192211151123, "learning_rate": 1.9983696274212367e-05, "loss": 2.2382, "step": 14682 }, { "epoch": 0.19, "grad_norm": 3.838953733444214, "learning_rate": 1.9983690275760852e-05, "loss": 1.9414, "step": 14683 }, { "epoch": 0.19, "grad_norm": 4.380890369415283, "learning_rate": 1.9983684276206967e-05, "loss": 2.2077, "step": 14684 }, { "epoch": 0.19, "grad_norm": 4.322887420654297, "learning_rate": 1.9983678275550714e-05, "loss": 2.0302, "step": 14685 }, { "epoch": 0.19, "grad_norm": 4.382876873016357, "learning_rate": 1.9983672273792094e-05, "loss": 1.777, "step": 14686 }, { "epoch": 0.19, "grad_norm": 5.626663684844971, "learning_rate": 1.9983666270931105e-05, "loss": 2.4655, "step": 14687 }, { "epoch": 0.19, "grad_norm": 4.483994960784912, "learning_rate": 1.9983660266967753e-05, "loss": 2.3406, "step": 14688 }, { "epoch": 0.19, "grad_norm": 4.314785480499268, "learning_rate": 1.9983654261902036e-05, "loss": 2.1423, "step": 14689 }, { "epoch": 0.19, "grad_norm": 4.228076934814453, "learning_rate": 1.998364825573395e-05, "loss": 2.2221, "step": 14690 }, { "epoch": 0.19, "grad_norm": 4.703142166137695, "learning_rate": 1.9983642248463503e-05, "loss": 2.3899, "step": 14691 }, { "epoch": 0.19, "grad_norm": 4.493273735046387, "learning_rate": 1.998363624009069e-05, "loss": 2.0543, "step": 14692 }, { "epoch": 0.19, "grad_norm": 4.218460559844971, "learning_rate": 1.9983630230615516e-05, "loss": 2.3402, "step": 14693 }, { "epoch": 0.19, "grad_norm": 4.872367858886719, "learning_rate": 1.998362422003798e-05, "loss": 2.2878, "step": 14694 }, { "epoch": 0.19, "grad_norm": 3.7405076026916504, "learning_rate": 1.9983618208358083e-05, "loss": 1.844, "step": 14695 }, { "epoch": 0.19, "grad_norm": 4.638162136077881, "learning_rate": 1.9983612195575824e-05, "loss": 2.287, "step": 14696 }, { "epoch": 0.19, "grad_norm": 4.650001525878906, "learning_rate": 1.9983606181691202e-05, "loss": 2.4311, "step": 14697 }, { "epoch": 0.19, "grad_norm": 4.771653652191162, "learning_rate": 1.9983600166704225e-05, "loss": 2.3336, "step": 14698 }, { "epoch": 0.19, "grad_norm": 4.1863274574279785, "learning_rate": 1.998359415061489e-05, "loss": 1.8855, "step": 14699 }, { "epoch": 0.19, "grad_norm": 4.3238205909729, "learning_rate": 1.9983588133423197e-05, "loss": 2.1407, "step": 14700 }, { "epoch": 0.19, "grad_norm": 4.236594200134277, "learning_rate": 1.9983582115129142e-05, "loss": 2.6234, "step": 14701 }, { "epoch": 0.19, "grad_norm": 5.14934778213501, "learning_rate": 1.998357609573273e-05, "loss": 2.4326, "step": 14702 }, { "epoch": 0.19, "grad_norm": 4.902523517608643, "learning_rate": 1.9983570075233965e-05, "loss": 2.9441, "step": 14703 }, { "epoch": 0.19, "grad_norm": 4.657225608825684, "learning_rate": 1.9983564053632843e-05, "loss": 2.5323, "step": 14704 }, { "epoch": 0.19, "grad_norm": 3.8002636432647705, "learning_rate": 1.998355803092937e-05, "loss": 1.9447, "step": 14705 }, { "epoch": 0.19, "grad_norm": 4.629294395446777, "learning_rate": 1.9983552007123535e-05, "loss": 2.6842, "step": 14706 }, { "epoch": 0.19, "grad_norm": 5.5217390060424805, "learning_rate": 1.9983545982215353e-05, "loss": 2.6661, "step": 14707 }, { "epoch": 0.19, "grad_norm": 4.570513725280762, "learning_rate": 1.9983539956204818e-05, "loss": 2.7274, "step": 14708 }, { "epoch": 0.19, "grad_norm": 4.509810447692871, "learning_rate": 1.9983533929091927e-05, "loss": 1.9853, "step": 14709 }, { "epoch": 0.19, "grad_norm": 4.253626823425293, "learning_rate": 1.9983527900876687e-05, "loss": 2.4732, "step": 14710 }, { "epoch": 0.19, "grad_norm": 4.476745128631592, "learning_rate": 1.9983521871559098e-05, "loss": 1.909, "step": 14711 }, { "epoch": 0.19, "grad_norm": 3.7511956691741943, "learning_rate": 1.9983515841139153e-05, "loss": 1.8758, "step": 14712 }, { "epoch": 0.19, "grad_norm": 4.167636394500732, "learning_rate": 1.9983509809616863e-05, "loss": 2.2445, "step": 14713 }, { "epoch": 0.19, "grad_norm": 4.808417797088623, "learning_rate": 1.9983503776992224e-05, "loss": 2.4227, "step": 14714 }, { "epoch": 0.19, "grad_norm": 4.8146185874938965, "learning_rate": 1.9983497743265236e-05, "loss": 2.7844, "step": 14715 }, { "epoch": 0.19, "grad_norm": 4.391319274902344, "learning_rate": 1.99834917084359e-05, "loss": 2.1988, "step": 14716 }, { "epoch": 0.19, "grad_norm": 4.1493940353393555, "learning_rate": 1.9983485672504216e-05, "loss": 2.1955, "step": 14717 }, { "epoch": 0.19, "grad_norm": 4.16542911529541, "learning_rate": 1.9983479635470187e-05, "loss": 2.3293, "step": 14718 }, { "epoch": 0.19, "grad_norm": 4.802727699279785, "learning_rate": 1.9983473597333813e-05, "loss": 2.3416, "step": 14719 }, { "epoch": 0.19, "grad_norm": 4.516780853271484, "learning_rate": 1.9983467558095097e-05, "loss": 2.5266, "step": 14720 }, { "epoch": 0.19, "grad_norm": 4.190368175506592, "learning_rate": 1.998346151775403e-05, "loss": 2.2272, "step": 14721 }, { "epoch": 0.19, "grad_norm": 4.391120433807373, "learning_rate": 1.9983455476310624e-05, "loss": 2.2345, "step": 14722 }, { "epoch": 0.19, "grad_norm": 4.389275074005127, "learning_rate": 1.9983449433764878e-05, "loss": 1.8702, "step": 14723 }, { "epoch": 0.19, "grad_norm": 3.91508412361145, "learning_rate": 1.9983443390116782e-05, "loss": 1.7399, "step": 14724 }, { "epoch": 0.19, "grad_norm": 4.54485559463501, "learning_rate": 1.998343734536635e-05, "loss": 2.1479, "step": 14725 }, { "epoch": 0.19, "grad_norm": 4.302280426025391, "learning_rate": 1.9983431299513575e-05, "loss": 2.5185, "step": 14726 }, { "epoch": 0.19, "grad_norm": 4.076253890991211, "learning_rate": 1.9983425252558463e-05, "loss": 2.4061, "step": 14727 }, { "epoch": 0.19, "grad_norm": 4.454805374145508, "learning_rate": 1.9983419204501005e-05, "loss": 2.2009, "step": 14728 }, { "epoch": 0.19, "grad_norm": 3.9305360317230225, "learning_rate": 1.9983413155341216e-05, "loss": 1.6364, "step": 14729 }, { "epoch": 0.19, "grad_norm": 4.362677574157715, "learning_rate": 1.9983407105079084e-05, "loss": 2.4241, "step": 14730 }, { "epoch": 0.19, "grad_norm": 4.606936454772949, "learning_rate": 1.9983401053714617e-05, "loss": 2.7727, "step": 14731 }, { "epoch": 0.19, "grad_norm": 4.417177677154541, "learning_rate": 1.998339500124781e-05, "loss": 1.914, "step": 14732 }, { "epoch": 0.19, "grad_norm": 5.9948039054870605, "learning_rate": 1.998338894767867e-05, "loss": 2.1647, "step": 14733 }, { "epoch": 0.19, "grad_norm": 3.957521915435791, "learning_rate": 1.9983382893007192e-05, "loss": 1.9525, "step": 14734 }, { "epoch": 0.19, "grad_norm": 4.152438163757324, "learning_rate": 1.998337683723338e-05, "loss": 1.8373, "step": 14735 }, { "epoch": 0.19, "grad_norm": 4.371068954467773, "learning_rate": 1.9983370780357235e-05, "loss": 1.9861, "step": 14736 }, { "epoch": 0.19, "grad_norm": 3.9666872024536133, "learning_rate": 1.9983364722378755e-05, "loss": 2.1485, "step": 14737 }, { "epoch": 0.19, "grad_norm": 3.961866855621338, "learning_rate": 1.9983358663297943e-05, "loss": 2.1534, "step": 14738 }, { "epoch": 0.19, "grad_norm": 4.3834147453308105, "learning_rate": 1.99833526031148e-05, "loss": 2.1906, "step": 14739 }, { "epoch": 0.19, "grad_norm": 5.015593528747559, "learning_rate": 1.9983346541829322e-05, "loss": 2.4276, "step": 14740 }, { "epoch": 0.19, "grad_norm": 4.5290751457214355, "learning_rate": 1.9983340479441514e-05, "loss": 2.687, "step": 14741 }, { "epoch": 0.19, "grad_norm": 3.390995979309082, "learning_rate": 1.9983334415951377e-05, "loss": 1.429, "step": 14742 }, { "epoch": 0.19, "grad_norm": 6.407441139221191, "learning_rate": 1.998332835135891e-05, "loss": 2.3868, "step": 14743 }, { "epoch": 0.19, "grad_norm": 4.156374931335449, "learning_rate": 1.9983322285664117e-05, "loss": 2.0955, "step": 14744 }, { "epoch": 0.19, "grad_norm": 4.356954097747803, "learning_rate": 1.9983316218866994e-05, "loss": 2.809, "step": 14745 }, { "epoch": 0.19, "grad_norm": 4.083048343658447, "learning_rate": 1.9983310150967542e-05, "loss": 2.3858, "step": 14746 }, { "epoch": 0.19, "grad_norm": 4.325100421905518, "learning_rate": 1.9983304081965765e-05, "loss": 1.9452, "step": 14747 }, { "epoch": 0.19, "grad_norm": 3.8345043659210205, "learning_rate": 1.998329801186166e-05, "loss": 2.1369, "step": 14748 }, { "epoch": 0.19, "grad_norm": 3.7885758876800537, "learning_rate": 1.9983291940655232e-05, "loss": 2.0555, "step": 14749 }, { "epoch": 0.19, "grad_norm": 4.376031875610352, "learning_rate": 1.9983285868346476e-05, "loss": 2.1751, "step": 14750 }, { "epoch": 0.19, "grad_norm": 4.582479953765869, "learning_rate": 1.99832797949354e-05, "loss": 2.5375, "step": 14751 }, { "epoch": 0.19, "grad_norm": 4.11591911315918, "learning_rate": 1.9983273720421998e-05, "loss": 2.0605, "step": 14752 }, { "epoch": 0.19, "grad_norm": 4.398451805114746, "learning_rate": 1.9983267644806273e-05, "loss": 2.3919, "step": 14753 }, { "epoch": 0.19, "grad_norm": 4.002729415893555, "learning_rate": 1.9983261568088225e-05, "loss": 1.8003, "step": 14754 }, { "epoch": 0.19, "grad_norm": 5.05837345123291, "learning_rate": 1.998325549026786e-05, "loss": 2.6878, "step": 14755 }, { "epoch": 0.19, "grad_norm": 3.7632498741149902, "learning_rate": 1.998324941134517e-05, "loss": 1.8729, "step": 14756 }, { "epoch": 0.19, "grad_norm": 4.7421746253967285, "learning_rate": 1.9983243331320162e-05, "loss": 2.1618, "step": 14757 }, { "epoch": 0.19, "grad_norm": 4.469725608825684, "learning_rate": 1.998323725019283e-05, "loss": 2.4128, "step": 14758 }, { "epoch": 0.19, "grad_norm": 4.802826881408691, "learning_rate": 1.9983231167963188e-05, "loss": 2.2896, "step": 14759 }, { "epoch": 0.19, "grad_norm": 4.626326084136963, "learning_rate": 1.998322508463122e-05, "loss": 2.2281, "step": 14760 }, { "epoch": 0.19, "grad_norm": 4.848660945892334, "learning_rate": 1.998321900019694e-05, "loss": 2.6406, "step": 14761 }, { "epoch": 0.19, "grad_norm": 4.334237098693848, "learning_rate": 1.9983212914660337e-05, "loss": 1.8643, "step": 14762 }, { "epoch": 0.19, "grad_norm": 4.184408664703369, "learning_rate": 1.9983206828021423e-05, "loss": 2.0778, "step": 14763 }, { "epoch": 0.19, "grad_norm": 3.828303575515747, "learning_rate": 1.998320074028019e-05, "loss": 1.9825, "step": 14764 }, { "epoch": 0.19, "grad_norm": 4.546274662017822, "learning_rate": 1.9983194651436647e-05, "loss": 2.0983, "step": 14765 }, { "epoch": 0.19, "grad_norm": 4.083334445953369, "learning_rate": 1.9983188561490788e-05, "loss": 2.0275, "step": 14766 }, { "epoch": 0.19, "grad_norm": 4.7833075523376465, "learning_rate": 1.9983182470442613e-05, "loss": 2.3389, "step": 14767 }, { "epoch": 0.19, "grad_norm": 3.6180100440979004, "learning_rate": 1.9983176378292127e-05, "loss": 1.7942, "step": 14768 }, { "epoch": 0.19, "grad_norm": 4.561583518981934, "learning_rate": 1.998317028503933e-05, "loss": 2.3692, "step": 14769 }, { "epoch": 0.19, "grad_norm": 4.052065849304199, "learning_rate": 1.998316419068422e-05, "loss": 2.1358, "step": 14770 }, { "epoch": 0.19, "grad_norm": 4.82046365737915, "learning_rate": 1.99831580952268e-05, "loss": 2.1383, "step": 14771 }, { "epoch": 0.19, "grad_norm": 4.645232677459717, "learning_rate": 1.9983151998667072e-05, "loss": 2.3563, "step": 14772 }, { "epoch": 0.19, "grad_norm": 4.409960746765137, "learning_rate": 1.998314590100503e-05, "loss": 2.2442, "step": 14773 }, { "epoch": 0.19, "grad_norm": 4.763492107391357, "learning_rate": 1.9983139802240685e-05, "loss": 2.224, "step": 14774 }, { "epoch": 0.19, "grad_norm": 4.028239727020264, "learning_rate": 1.9983133702374027e-05, "loss": 2.3812, "step": 14775 }, { "epoch": 0.19, "grad_norm": 4.536679267883301, "learning_rate": 1.9983127601405067e-05, "loss": 2.3997, "step": 14776 }, { "epoch": 0.19, "grad_norm": 4.142921447753906, "learning_rate": 1.9983121499333796e-05, "loss": 2.2995, "step": 14777 }, { "epoch": 0.19, "grad_norm": 4.251694679260254, "learning_rate": 1.998311539616022e-05, "loss": 2.2065, "step": 14778 }, { "epoch": 0.19, "grad_norm": 4.173031330108643, "learning_rate": 1.9983109291884338e-05, "loss": 2.2021, "step": 14779 }, { "epoch": 0.19, "grad_norm": 4.549596309661865, "learning_rate": 1.998310318650615e-05, "loss": 2.6339, "step": 14780 }, { "epoch": 0.19, "grad_norm": 4.207091331481934, "learning_rate": 1.998309708002566e-05, "loss": 1.9931, "step": 14781 }, { "epoch": 0.19, "grad_norm": 4.34049654006958, "learning_rate": 1.9983090972442868e-05, "loss": 2.1556, "step": 14782 }, { "epoch": 0.19, "grad_norm": 4.288107395172119, "learning_rate": 1.9983084863757773e-05, "loss": 2.3219, "step": 14783 }, { "epoch": 0.19, "grad_norm": 4.645194053649902, "learning_rate": 1.9983078753970374e-05, "loss": 2.2653, "step": 14784 }, { "epoch": 0.19, "grad_norm": 4.1193952560424805, "learning_rate": 1.9983072643080677e-05, "loss": 2.1697, "step": 14785 }, { "epoch": 0.19, "grad_norm": 5.340877532958984, "learning_rate": 1.9983066531088678e-05, "loss": 2.6252, "step": 14786 }, { "epoch": 0.19, "grad_norm": 4.604493141174316, "learning_rate": 1.9983060417994377e-05, "loss": 2.2538, "step": 14787 }, { "epoch": 0.19, "grad_norm": 4.350520610809326, "learning_rate": 1.998305430379778e-05, "loss": 1.9276, "step": 14788 }, { "epoch": 0.19, "grad_norm": 4.048399448394775, "learning_rate": 1.998304818849888e-05, "loss": 2.1006, "step": 14789 }, { "epoch": 0.19, "grad_norm": 4.107206344604492, "learning_rate": 1.9983042072097686e-05, "loss": 1.8085, "step": 14790 }, { "epoch": 0.19, "grad_norm": 4.1822662353515625, "learning_rate": 1.9983035954594196e-05, "loss": 2.1802, "step": 14791 }, { "epoch": 0.19, "grad_norm": 4.263236045837402, "learning_rate": 1.9983029835988404e-05, "loss": 2.0006, "step": 14792 }, { "epoch": 0.19, "grad_norm": 4.473174571990967, "learning_rate": 1.998302371628032e-05, "loss": 2.2052, "step": 14793 }, { "epoch": 0.19, "grad_norm": 4.51744270324707, "learning_rate": 1.998301759546994e-05, "loss": 2.3, "step": 14794 }, { "epoch": 0.19, "grad_norm": 4.021395683288574, "learning_rate": 1.9983011473557268e-05, "loss": 2.0182, "step": 14795 }, { "epoch": 0.19, "grad_norm": 4.430123805999756, "learning_rate": 1.99830053505423e-05, "loss": 2.5218, "step": 14796 }, { "epoch": 0.19, "grad_norm": 4.815581321716309, "learning_rate": 1.9982999226425038e-05, "loss": 2.2267, "step": 14797 }, { "epoch": 0.19, "grad_norm": 4.555628776550293, "learning_rate": 1.9982993101205484e-05, "loss": 2.1912, "step": 14798 }, { "epoch": 0.19, "grad_norm": 4.051893711090088, "learning_rate": 1.998298697488364e-05, "loss": 2.0771, "step": 14799 }, { "epoch": 0.19, "grad_norm": 4.513871669769287, "learning_rate": 1.99829808474595e-05, "loss": 2.2091, "step": 14800 }, { "epoch": 0.19, "grad_norm": 4.097082138061523, "learning_rate": 1.9982974718933075e-05, "loss": 1.9844, "step": 14801 }, { "epoch": 0.19, "grad_norm": 4.162374019622803, "learning_rate": 1.9982968589304356e-05, "loss": 2.301, "step": 14802 }, { "epoch": 0.19, "grad_norm": 4.502251148223877, "learning_rate": 1.9982962458573352e-05, "loss": 2.503, "step": 14803 }, { "epoch": 0.19, "grad_norm": 5.076058387756348, "learning_rate": 1.9982956326740057e-05, "loss": 2.6129, "step": 14804 }, { "epoch": 0.19, "grad_norm": 4.623456001281738, "learning_rate": 1.9982950193804474e-05, "loss": 2.4632, "step": 14805 }, { "epoch": 0.19, "grad_norm": 5.1678466796875, "learning_rate": 1.9982944059766606e-05, "loss": 1.821, "step": 14806 }, { "epoch": 0.19, "grad_norm": 4.599082946777344, "learning_rate": 1.998293792462645e-05, "loss": 2.4654, "step": 14807 }, { "epoch": 0.19, "grad_norm": 4.630925178527832, "learning_rate": 1.998293178838401e-05, "loss": 1.9906, "step": 14808 }, { "epoch": 0.19, "grad_norm": 4.826778888702393, "learning_rate": 1.998292565103928e-05, "loss": 2.3948, "step": 14809 }, { "epoch": 0.19, "grad_norm": 3.9945502281188965, "learning_rate": 1.998291951259227e-05, "loss": 2.368, "step": 14810 }, { "epoch": 0.19, "grad_norm": 5.184581756591797, "learning_rate": 1.9982913373042977e-05, "loss": 1.9345, "step": 14811 }, { "epoch": 0.19, "grad_norm": 4.956267356872559, "learning_rate": 1.99829072323914e-05, "loss": 2.5915, "step": 14812 }, { "epoch": 0.19, "grad_norm": 4.377978801727295, "learning_rate": 1.998290109063754e-05, "loss": 2.392, "step": 14813 }, { "epoch": 0.19, "grad_norm": 4.20081901550293, "learning_rate": 1.9982894947781396e-05, "loss": 2.3462, "step": 14814 }, { "epoch": 0.19, "grad_norm": 4.188158988952637, "learning_rate": 1.9982888803822976e-05, "loss": 1.9967, "step": 14815 }, { "epoch": 0.19, "grad_norm": 4.232126235961914, "learning_rate": 1.9982882658762276e-05, "loss": 1.8823, "step": 14816 }, { "epoch": 0.19, "grad_norm": 4.379583835601807, "learning_rate": 1.998287651259929e-05, "loss": 1.8472, "step": 14817 }, { "epoch": 0.19, "grad_norm": 4.893995761871338, "learning_rate": 1.998287036533403e-05, "loss": 2.2996, "step": 14818 }, { "epoch": 0.19, "grad_norm": 4.781900882720947, "learning_rate": 1.998286421696649e-05, "loss": 2.6854, "step": 14819 }, { "epoch": 0.19, "grad_norm": 4.134164810180664, "learning_rate": 1.9982858067496674e-05, "loss": 2.1618, "step": 14820 }, { "epoch": 0.19, "grad_norm": 3.7281196117401123, "learning_rate": 1.998285191692458e-05, "loss": 1.7846, "step": 14821 }, { "epoch": 0.19, "grad_norm": 4.1704792976379395, "learning_rate": 1.998284576525021e-05, "loss": 2.1128, "step": 14822 }, { "epoch": 0.19, "grad_norm": 4.074560642242432, "learning_rate": 1.9982839612473564e-05, "loss": 2.2478, "step": 14823 }, { "epoch": 0.19, "grad_norm": 4.217455863952637, "learning_rate": 1.9982833458594644e-05, "loss": 1.917, "step": 14824 }, { "epoch": 0.19, "grad_norm": 4.763837814331055, "learning_rate": 1.998282730361345e-05, "loss": 2.3742, "step": 14825 }, { "epoch": 0.19, "grad_norm": 3.983306884765625, "learning_rate": 1.998282114752998e-05, "loss": 1.9348, "step": 14826 }, { "epoch": 0.19, "grad_norm": 3.9346659183502197, "learning_rate": 1.998281499034424e-05, "loss": 1.8579, "step": 14827 }, { "epoch": 0.19, "grad_norm": 3.6900899410247803, "learning_rate": 1.9982808832056225e-05, "loss": 1.9172, "step": 14828 }, { "epoch": 0.19, "grad_norm": 5.065816402435303, "learning_rate": 1.9982802672665943e-05, "loss": 2.6334, "step": 14829 }, { "epoch": 0.19, "grad_norm": 4.679914951324463, "learning_rate": 1.9982796512173386e-05, "loss": 2.2144, "step": 14830 }, { "epoch": 0.19, "grad_norm": 4.117374897003174, "learning_rate": 1.9982790350578562e-05, "loss": 1.7194, "step": 14831 }, { "epoch": 0.19, "grad_norm": 4.253676891326904, "learning_rate": 1.9982784187881466e-05, "loss": 1.8324, "step": 14832 }, { "epoch": 0.19, "grad_norm": 3.864431858062744, "learning_rate": 1.9982778024082103e-05, "loss": 1.6971, "step": 14833 }, { "epoch": 0.19, "grad_norm": 4.035835266113281, "learning_rate": 1.9982771859180472e-05, "loss": 1.9338, "step": 14834 }, { "epoch": 0.19, "grad_norm": 4.270191192626953, "learning_rate": 1.9982765693176574e-05, "loss": 2.4018, "step": 14835 }, { "epoch": 0.19, "grad_norm": 4.576832294464111, "learning_rate": 1.9982759526070404e-05, "loss": 2.5059, "step": 14836 }, { "epoch": 0.19, "grad_norm": 4.676778793334961, "learning_rate": 1.9982753357861973e-05, "loss": 2.3589, "step": 14837 }, { "epoch": 0.19, "grad_norm": 4.253069877624512, "learning_rate": 1.998274718855128e-05, "loss": 2.052, "step": 14838 }, { "epoch": 0.19, "grad_norm": 4.674861907958984, "learning_rate": 1.9982741018138316e-05, "loss": 2.4631, "step": 14839 }, { "epoch": 0.19, "grad_norm": 5.1799468994140625, "learning_rate": 1.9982734846623092e-05, "loss": 1.9583, "step": 14840 }, { "epoch": 0.19, "grad_norm": 5.144081115722656, "learning_rate": 1.99827286740056e-05, "loss": 2.4953, "step": 14841 }, { "epoch": 0.19, "grad_norm": 5.078728675842285, "learning_rate": 1.9982722500285852e-05, "loss": 2.494, "step": 14842 }, { "epoch": 0.19, "grad_norm": 4.0544891357421875, "learning_rate": 1.998271632546384e-05, "loss": 2.5201, "step": 14843 }, { "epoch": 0.19, "grad_norm": 4.322124481201172, "learning_rate": 1.9982710149539562e-05, "loss": 2.0087, "step": 14844 }, { "epoch": 0.19, "grad_norm": 4.765674591064453, "learning_rate": 1.9982703972513027e-05, "loss": 2.6664, "step": 14845 }, { "epoch": 0.19, "grad_norm": 4.205389976501465, "learning_rate": 1.998269779438423e-05, "loss": 1.9777, "step": 14846 }, { "epoch": 0.19, "grad_norm": 3.9182119369506836, "learning_rate": 1.9982691615153178e-05, "loss": 2.0105, "step": 14847 }, { "epoch": 0.19, "grad_norm": 4.314565181732178, "learning_rate": 1.9982685434819864e-05, "loss": 1.8379, "step": 14848 }, { "epoch": 0.19, "grad_norm": 4.202420234680176, "learning_rate": 1.9982679253384296e-05, "loss": 1.9333, "step": 14849 }, { "epoch": 0.19, "grad_norm": 4.478123188018799, "learning_rate": 1.9982673070846466e-05, "loss": 2.1256, "step": 14850 }, { "epoch": 0.19, "grad_norm": 5.134142875671387, "learning_rate": 1.998266688720638e-05, "loss": 2.308, "step": 14851 }, { "epoch": 0.19, "grad_norm": 3.658109188079834, "learning_rate": 1.9982660702464042e-05, "loss": 1.7584, "step": 14852 }, { "epoch": 0.19, "grad_norm": 3.8657069206237793, "learning_rate": 1.9982654516619447e-05, "loss": 2.0734, "step": 14853 }, { "epoch": 0.19, "grad_norm": 4.1197829246521, "learning_rate": 1.9982648329672594e-05, "loss": 2.2322, "step": 14854 }, { "epoch": 0.19, "grad_norm": 4.3985700607299805, "learning_rate": 1.998264214162349e-05, "loss": 2.1263, "step": 14855 }, { "epoch": 0.19, "grad_norm": 4.612940788269043, "learning_rate": 1.9982635952472137e-05, "loss": 2.4414, "step": 14856 }, { "epoch": 0.19, "grad_norm": 4.65735387802124, "learning_rate": 1.998262976221853e-05, "loss": 2.0207, "step": 14857 }, { "epoch": 0.19, "grad_norm": 4.387218475341797, "learning_rate": 1.9982623570862666e-05, "loss": 2.1678, "step": 14858 }, { "epoch": 0.19, "grad_norm": 4.262618064880371, "learning_rate": 1.9982617378404557e-05, "loss": 2.2326, "step": 14859 }, { "epoch": 0.19, "grad_norm": 4.3675031661987305, "learning_rate": 1.9982611184844193e-05, "loss": 2.6457, "step": 14860 }, { "epoch": 0.19, "grad_norm": 4.005722999572754, "learning_rate": 1.998260499018158e-05, "loss": 2.2049, "step": 14861 }, { "epoch": 0.19, "grad_norm": 4.788550853729248, "learning_rate": 1.998259879441672e-05, "loss": 2.2828, "step": 14862 }, { "epoch": 0.19, "grad_norm": 4.364377021789551, "learning_rate": 1.9982592597549613e-05, "loss": 2.2262, "step": 14863 }, { "epoch": 0.19, "grad_norm": 3.401785373687744, "learning_rate": 1.9982586399580257e-05, "loss": 1.88, "step": 14864 }, { "epoch": 0.19, "grad_norm": 3.9371016025543213, "learning_rate": 1.9982580200508656e-05, "loss": 2.2148, "step": 14865 }, { "epoch": 0.19, "grad_norm": 4.244755744934082, "learning_rate": 1.9982574000334805e-05, "loss": 2.1394, "step": 14866 }, { "epoch": 0.19, "grad_norm": 4.483395099639893, "learning_rate": 1.9982567799058713e-05, "loss": 2.1637, "step": 14867 }, { "epoch": 0.19, "grad_norm": 4.582268238067627, "learning_rate": 1.9982561596680373e-05, "loss": 2.3237, "step": 14868 }, { "epoch": 0.19, "grad_norm": 4.504789352416992, "learning_rate": 1.9982555393199786e-05, "loss": 2.5571, "step": 14869 }, { "epoch": 0.19, "grad_norm": 4.477421283721924, "learning_rate": 1.998254918861696e-05, "loss": 2.3501, "step": 14870 }, { "epoch": 0.19, "grad_norm": 4.62004280090332, "learning_rate": 1.998254298293189e-05, "loss": 1.9671, "step": 14871 }, { "epoch": 0.19, "grad_norm": 6.0486650466918945, "learning_rate": 1.9982536776144576e-05, "loss": 2.2855, "step": 14872 }, { "epoch": 0.19, "grad_norm": 5.558917999267578, "learning_rate": 1.9982530568255023e-05, "loss": 2.5595, "step": 14873 }, { "epoch": 0.19, "grad_norm": 4.476992130279541, "learning_rate": 1.998252435926323e-05, "loss": 1.8612, "step": 14874 }, { "epoch": 0.19, "grad_norm": 4.278225898742676, "learning_rate": 1.9982518149169193e-05, "loss": 2.0648, "step": 14875 }, { "epoch": 0.19, "grad_norm": 4.143152236938477, "learning_rate": 1.9982511937972922e-05, "loss": 2.2982, "step": 14876 }, { "epoch": 0.19, "grad_norm": 4.730434894561768, "learning_rate": 1.9982505725674408e-05, "loss": 2.0553, "step": 14877 }, { "epoch": 0.19, "grad_norm": 3.4862914085388184, "learning_rate": 1.998249951227366e-05, "loss": 1.7706, "step": 14878 }, { "epoch": 0.19, "grad_norm": 4.397824287414551, "learning_rate": 1.998249329777067e-05, "loss": 2.1262, "step": 14879 }, { "epoch": 0.19, "grad_norm": 3.4708750247955322, "learning_rate": 1.9982487082165443e-05, "loss": 1.6735, "step": 14880 }, { "epoch": 0.19, "grad_norm": 4.683155536651611, "learning_rate": 1.9982480865457987e-05, "loss": 2.4272, "step": 14881 }, { "epoch": 0.19, "grad_norm": 4.545281410217285, "learning_rate": 1.9982474647648292e-05, "loss": 2.1792, "step": 14882 }, { "epoch": 0.19, "grad_norm": 5.491332530975342, "learning_rate": 1.9982468428736358e-05, "loss": 2.5376, "step": 14883 }, { "epoch": 0.19, "grad_norm": 4.562077522277832, "learning_rate": 1.9982462208722196e-05, "loss": 2.2462, "step": 14884 }, { "epoch": 0.19, "grad_norm": 4.0170488357543945, "learning_rate": 1.9982455987605797e-05, "loss": 2.17, "step": 14885 }, { "epoch": 0.19, "grad_norm": 3.7567367553710938, "learning_rate": 1.9982449765387168e-05, "loss": 1.9049, "step": 14886 }, { "epoch": 0.19, "grad_norm": 3.6969356536865234, "learning_rate": 1.998244354206631e-05, "loss": 1.8403, "step": 14887 }, { "epoch": 0.19, "grad_norm": 4.510796070098877, "learning_rate": 1.9982437317643218e-05, "loss": 2.3232, "step": 14888 }, { "epoch": 0.19, "grad_norm": 4.786943435668945, "learning_rate": 1.9982431092117894e-05, "loss": 2.2872, "step": 14889 }, { "epoch": 0.19, "grad_norm": 3.9659101963043213, "learning_rate": 1.998242486549034e-05, "loss": 1.9363, "step": 14890 }, { "epoch": 0.19, "grad_norm": 4.516561985015869, "learning_rate": 1.998241863776056e-05, "loss": 2.0732, "step": 14891 }, { "epoch": 0.19, "grad_norm": 4.047966957092285, "learning_rate": 1.998241240892855e-05, "loss": 1.9376, "step": 14892 }, { "epoch": 0.19, "grad_norm": 4.656898498535156, "learning_rate": 1.9982406178994308e-05, "loss": 2.3269, "step": 14893 }, { "epoch": 0.19, "grad_norm": 4.192235469818115, "learning_rate": 1.9982399947957845e-05, "loss": 2.38, "step": 14894 }, { "epoch": 0.19, "grad_norm": 5.225299835205078, "learning_rate": 1.9982393715819156e-05, "loss": 2.8468, "step": 14895 }, { "epoch": 0.19, "grad_norm": 4.332930088043213, "learning_rate": 1.9982387482578236e-05, "loss": 2.3312, "step": 14896 }, { "epoch": 0.19, "grad_norm": 3.8531980514526367, "learning_rate": 1.9982381248235096e-05, "loss": 1.9696, "step": 14897 }, { "epoch": 0.19, "grad_norm": 3.903982162475586, "learning_rate": 1.998237501278973e-05, "loss": 1.8442, "step": 14898 }, { "epoch": 0.19, "grad_norm": 4.379482269287109, "learning_rate": 1.998236877624214e-05, "loss": 2.8085, "step": 14899 }, { "epoch": 0.19, "grad_norm": 4.488481521606445, "learning_rate": 1.9982362538592326e-05, "loss": 2.2824, "step": 14900 }, { "epoch": 0.19, "grad_norm": 4.119595527648926, "learning_rate": 1.9982356299840292e-05, "loss": 1.7759, "step": 14901 }, { "epoch": 0.19, "grad_norm": 3.780573606491089, "learning_rate": 1.9982350059986037e-05, "loss": 2.0387, "step": 14902 }, { "epoch": 0.19, "grad_norm": 3.7569503784179688, "learning_rate": 1.998234381902956e-05, "loss": 1.8589, "step": 14903 }, { "epoch": 0.19, "grad_norm": 4.1677775382995605, "learning_rate": 1.9982337576970866e-05, "loss": 2.2054, "step": 14904 }, { "epoch": 0.19, "grad_norm": 3.715505838394165, "learning_rate": 1.9982331333809947e-05, "loss": 1.997, "step": 14905 }, { "epoch": 0.19, "grad_norm": 4.161685466766357, "learning_rate": 1.9982325089546814e-05, "loss": 1.992, "step": 14906 }, { "epoch": 0.19, "grad_norm": 4.8910746574401855, "learning_rate": 1.998231884418146e-05, "loss": 2.4135, "step": 14907 }, { "epoch": 0.19, "grad_norm": 3.3987770080566406, "learning_rate": 1.9982312597713888e-05, "loss": 1.5771, "step": 14908 }, { "epoch": 0.19, "grad_norm": 4.932730197906494, "learning_rate": 1.99823063501441e-05, "loss": 3.0663, "step": 14909 }, { "epoch": 0.19, "grad_norm": 4.137594223022461, "learning_rate": 1.9982300101472098e-05, "loss": 1.9498, "step": 14910 }, { "epoch": 0.19, "grad_norm": 3.845353841781616, "learning_rate": 1.998229385169788e-05, "loss": 1.8732, "step": 14911 }, { "epoch": 0.19, "grad_norm": 4.741830348968506, "learning_rate": 1.9982287600821447e-05, "loss": 2.6982, "step": 14912 }, { "epoch": 0.19, "grad_norm": 4.1194562911987305, "learning_rate": 1.99822813488428e-05, "loss": 2.0882, "step": 14913 }, { "epoch": 0.19, "grad_norm": 4.579422473907471, "learning_rate": 1.9982275095761942e-05, "loss": 2.0452, "step": 14914 }, { "epoch": 0.19, "grad_norm": 4.622072696685791, "learning_rate": 1.9982268841578867e-05, "loss": 2.534, "step": 14915 }, { "epoch": 0.19, "grad_norm": 4.9870381355285645, "learning_rate": 1.9982262586293583e-05, "loss": 2.5439, "step": 14916 }, { "epoch": 0.19, "grad_norm": 4.376262664794922, "learning_rate": 1.9982256329906085e-05, "loss": 1.9564, "step": 14917 }, { "epoch": 0.19, "grad_norm": 3.7980098724365234, "learning_rate": 1.9982250072416384e-05, "loss": 2.0422, "step": 14918 }, { "epoch": 0.19, "grad_norm": 5.70959997177124, "learning_rate": 1.9982243813824466e-05, "loss": 2.6163, "step": 14919 }, { "epoch": 0.19, "grad_norm": 4.598010540008545, "learning_rate": 1.998223755413034e-05, "loss": 2.3714, "step": 14920 }, { "epoch": 0.19, "grad_norm": 5.005671501159668, "learning_rate": 1.9982231293334006e-05, "loss": 2.3588, "step": 14921 }, { "epoch": 0.19, "grad_norm": 4.621491432189941, "learning_rate": 1.9982225031435468e-05, "loss": 2.1592, "step": 14922 }, { "epoch": 0.19, "grad_norm": 3.8460681438446045, "learning_rate": 1.998221876843472e-05, "loss": 1.7273, "step": 14923 }, { "epoch": 0.19, "grad_norm": 4.319901943206787, "learning_rate": 1.9982212504331765e-05, "loss": 2.3126, "step": 14924 }, { "epoch": 0.19, "grad_norm": 4.851681232452393, "learning_rate": 1.9982206239126606e-05, "loss": 2.5333, "step": 14925 }, { "epoch": 0.19, "grad_norm": 4.239163398742676, "learning_rate": 1.998219997281924e-05, "loss": 2.2748, "step": 14926 }, { "epoch": 0.19, "grad_norm": 4.851820468902588, "learning_rate": 1.9982193705409673e-05, "loss": 2.5226, "step": 14927 }, { "epoch": 0.19, "grad_norm": 5.067200183868408, "learning_rate": 1.9982187436897898e-05, "loss": 2.2851, "step": 14928 }, { "epoch": 0.19, "grad_norm": 4.618920803070068, "learning_rate": 1.9982181167283923e-05, "loss": 2.0141, "step": 14929 }, { "epoch": 0.19, "grad_norm": 5.000265121459961, "learning_rate": 1.9982174896567744e-05, "loss": 2.7604, "step": 14930 }, { "epoch": 0.19, "grad_norm": 5.126736640930176, "learning_rate": 1.9982168624749366e-05, "loss": 2.5107, "step": 14931 }, { "epoch": 0.19, "grad_norm": 4.0618896484375, "learning_rate": 1.9982162351828788e-05, "loss": 1.9174, "step": 14932 }, { "epoch": 0.19, "grad_norm": 4.6900315284729, "learning_rate": 1.9982156077806007e-05, "loss": 2.4542, "step": 14933 }, { "epoch": 0.19, "grad_norm": 3.9806487560272217, "learning_rate": 1.998214980268103e-05, "loss": 2.1539, "step": 14934 }, { "epoch": 0.19, "grad_norm": 4.360888481140137, "learning_rate": 1.998214352645385e-05, "loss": 2.0781, "step": 14935 }, { "epoch": 0.19, "grad_norm": 4.8285136222839355, "learning_rate": 1.9982137249124473e-05, "loss": 2.6102, "step": 14936 }, { "epoch": 0.19, "grad_norm": 4.753215789794922, "learning_rate": 1.99821309706929e-05, "loss": 2.2893, "step": 14937 }, { "epoch": 0.19, "grad_norm": 4.470273017883301, "learning_rate": 1.998212469115913e-05, "loss": 2.323, "step": 14938 }, { "epoch": 0.19, "grad_norm": 3.921170949935913, "learning_rate": 1.9982118410523163e-05, "loss": 2.3282, "step": 14939 }, { "epoch": 0.19, "grad_norm": 4.632988452911377, "learning_rate": 1.9982112128785003e-05, "loss": 2.2494, "step": 14940 }, { "epoch": 0.19, "grad_norm": 4.346860408782959, "learning_rate": 1.998210584594465e-05, "loss": 2.2135, "step": 14941 }, { "epoch": 0.19, "grad_norm": 4.78318977355957, "learning_rate": 1.99820995620021e-05, "loss": 2.0712, "step": 14942 }, { "epoch": 0.19, "grad_norm": 4.146889686584473, "learning_rate": 1.998209327695736e-05, "loss": 2.0093, "step": 14943 }, { "epoch": 0.19, "grad_norm": 4.221525192260742, "learning_rate": 1.9982086990810422e-05, "loss": 1.8223, "step": 14944 }, { "epoch": 0.19, "grad_norm": 4.946859836578369, "learning_rate": 1.9982080703561295e-05, "loss": 2.1567, "step": 14945 }, { "epoch": 0.19, "grad_norm": 4.7780961990356445, "learning_rate": 1.9982074415209976e-05, "loss": 2.2812, "step": 14946 }, { "epoch": 0.19, "grad_norm": 4.5372490882873535, "learning_rate": 1.998206812575647e-05, "loss": 2.2728, "step": 14947 }, { "epoch": 0.19, "grad_norm": 4.586424827575684, "learning_rate": 1.998206183520077e-05, "loss": 2.3058, "step": 14948 }, { "epoch": 0.19, "grad_norm": 4.6907806396484375, "learning_rate": 1.9982055543542885e-05, "loss": 2.2453, "step": 14949 }, { "epoch": 0.19, "grad_norm": 4.825423240661621, "learning_rate": 1.998204925078281e-05, "loss": 2.4303, "step": 14950 }, { "epoch": 0.19, "grad_norm": 4.379467010498047, "learning_rate": 1.9982042956920545e-05, "loss": 2.2206, "step": 14951 }, { "epoch": 0.19, "grad_norm": 4.614194393157959, "learning_rate": 1.9982036661956097e-05, "loss": 2.2579, "step": 14952 }, { "epoch": 0.19, "grad_norm": 3.94061017036438, "learning_rate": 1.9982030365889463e-05, "loss": 2.4085, "step": 14953 }, { "epoch": 0.19, "grad_norm": 3.7484960556030273, "learning_rate": 1.998202406872064e-05, "loss": 1.857, "step": 14954 }, { "epoch": 0.19, "grad_norm": 4.3034563064575195, "learning_rate": 1.9982017770449635e-05, "loss": 2.2704, "step": 14955 }, { "epoch": 0.19, "grad_norm": 4.881503105163574, "learning_rate": 1.9982011471076445e-05, "loss": 2.5485, "step": 14956 }, { "epoch": 0.19, "grad_norm": 4.7665886878967285, "learning_rate": 1.9982005170601072e-05, "loss": 2.6156, "step": 14957 }, { "epoch": 0.19, "grad_norm": 4.354236125946045, "learning_rate": 1.9981998869023512e-05, "loss": 2.0282, "step": 14958 }, { "epoch": 0.19, "grad_norm": 5.274124622344971, "learning_rate": 1.9981992566343777e-05, "loss": 2.6909, "step": 14959 }, { "epoch": 0.19, "grad_norm": 4.7534565925598145, "learning_rate": 1.9981986262561858e-05, "loss": 2.2891, "step": 14960 }, { "epoch": 0.19, "grad_norm": 4.558889389038086, "learning_rate": 1.9981979957677756e-05, "loss": 2.4067, "step": 14961 }, { "epoch": 0.19, "grad_norm": 4.886285781860352, "learning_rate": 1.998197365169148e-05, "loss": 2.0617, "step": 14962 }, { "epoch": 0.19, "grad_norm": 4.719277858734131, "learning_rate": 1.9981967344603018e-05, "loss": 2.0189, "step": 14963 }, { "epoch": 0.19, "grad_norm": 4.217291831970215, "learning_rate": 1.998196103641238e-05, "loss": 2.2267, "step": 14964 }, { "epoch": 0.19, "grad_norm": 3.4481985569000244, "learning_rate": 1.9981954727119564e-05, "loss": 1.457, "step": 14965 }, { "epoch": 0.19, "grad_norm": 4.65937614440918, "learning_rate": 1.998194841672457e-05, "loss": 1.7582, "step": 14966 }, { "epoch": 0.19, "grad_norm": 4.2637152671813965, "learning_rate": 1.9981942105227402e-05, "loss": 2.1186, "step": 14967 }, { "epoch": 0.19, "grad_norm": 3.8406920433044434, "learning_rate": 1.9981935792628057e-05, "loss": 1.8146, "step": 14968 }, { "epoch": 0.19, "grad_norm": 4.72059965133667, "learning_rate": 1.9981929478926535e-05, "loss": 2.2872, "step": 14969 }, { "epoch": 0.19, "grad_norm": 4.709349632263184, "learning_rate": 1.998192316412284e-05, "loss": 2.4056, "step": 14970 }, { "epoch": 0.19, "grad_norm": 4.9059977531433105, "learning_rate": 1.9981916848216974e-05, "loss": 2.2186, "step": 14971 }, { "epoch": 0.19, "grad_norm": 4.509825706481934, "learning_rate": 1.9981910531208934e-05, "loss": 2.2033, "step": 14972 }, { "epoch": 0.19, "grad_norm": 4.151770114898682, "learning_rate": 1.9981904213098717e-05, "loss": 2.0102, "step": 14973 }, { "epoch": 0.19, "grad_norm": 4.3932013511657715, "learning_rate": 1.9981897893886332e-05, "loss": 2.425, "step": 14974 }, { "epoch": 0.19, "grad_norm": 4.417911529541016, "learning_rate": 1.9981891573571777e-05, "loss": 2.2945, "step": 14975 }, { "epoch": 0.19, "grad_norm": 4.93684720993042, "learning_rate": 1.9981885252155052e-05, "loss": 2.8585, "step": 14976 }, { "epoch": 0.19, "grad_norm": 4.413371562957764, "learning_rate": 1.9981878929636155e-05, "loss": 1.9144, "step": 14977 }, { "epoch": 0.19, "grad_norm": 4.163049697875977, "learning_rate": 1.998187260601509e-05, "loss": 1.921, "step": 14978 }, { "epoch": 0.19, "grad_norm": 4.814452648162842, "learning_rate": 1.998186628129186e-05, "loss": 1.9474, "step": 14979 }, { "epoch": 0.19, "grad_norm": 4.44819450378418, "learning_rate": 1.998185995546646e-05, "loss": 1.7935, "step": 14980 }, { "epoch": 0.19, "grad_norm": 4.814516067504883, "learning_rate": 1.998185362853889e-05, "loss": 2.5513, "step": 14981 }, { "epoch": 0.19, "grad_norm": 4.361288070678711, "learning_rate": 1.998184730050916e-05, "loss": 2.2851, "step": 14982 }, { "epoch": 0.19, "grad_norm": 5.0485053062438965, "learning_rate": 1.9981840971377264e-05, "loss": 2.4663, "step": 14983 }, { "epoch": 0.19, "grad_norm": 3.6778788566589355, "learning_rate": 1.99818346411432e-05, "loss": 1.6803, "step": 14984 }, { "epoch": 0.19, "grad_norm": 4.553186416625977, "learning_rate": 1.9981828309806975e-05, "loss": 2.6177, "step": 14985 }, { "epoch": 0.19, "grad_norm": 4.542206287384033, "learning_rate": 1.9981821977368587e-05, "loss": 2.3431, "step": 14986 }, { "epoch": 0.19, "grad_norm": 4.633423328399658, "learning_rate": 1.9981815643828032e-05, "loss": 2.4038, "step": 14987 }, { "epoch": 0.19, "grad_norm": 4.510908126831055, "learning_rate": 1.998180930918532e-05, "loss": 2.4118, "step": 14988 }, { "epoch": 0.19, "grad_norm": 3.8933358192443848, "learning_rate": 1.9981802973440446e-05, "loss": 2.1728, "step": 14989 }, { "epoch": 0.19, "grad_norm": 4.021912097930908, "learning_rate": 1.998179663659341e-05, "loss": 1.9063, "step": 14990 }, { "epoch": 0.19, "grad_norm": 4.152196884155273, "learning_rate": 1.9981790298644215e-05, "loss": 1.802, "step": 14991 }, { "epoch": 0.19, "grad_norm": 3.591662883758545, "learning_rate": 1.998178395959286e-05, "loss": 1.627, "step": 14992 }, { "epoch": 0.19, "grad_norm": 4.543684482574463, "learning_rate": 1.9981777619439348e-05, "loss": 2.7278, "step": 14993 }, { "epoch": 0.19, "grad_norm": 4.101637840270996, "learning_rate": 1.9981771278183678e-05, "loss": 1.9701, "step": 14994 }, { "epoch": 0.19, "grad_norm": 4.389479637145996, "learning_rate": 1.998176493582585e-05, "loss": 1.834, "step": 14995 }, { "epoch": 0.19, "grad_norm": 4.42367696762085, "learning_rate": 1.998175859236587e-05, "loss": 2.6321, "step": 14996 }, { "epoch": 0.19, "grad_norm": 4.578293323516846, "learning_rate": 1.9981752247803732e-05, "loss": 1.9526, "step": 14997 }, { "epoch": 0.19, "grad_norm": 4.855299472808838, "learning_rate": 1.998174590213944e-05, "loss": 2.6079, "step": 14998 }, { "epoch": 0.19, "grad_norm": 4.251790523529053, "learning_rate": 1.9981739555372994e-05, "loss": 2.5817, "step": 14999 }, { "epoch": 0.19, "grad_norm": 4.234216213226318, "learning_rate": 1.998173320750439e-05, "loss": 2.105, "step": 15000 }, { "epoch": 0.19, "grad_norm": 4.316723346710205, "learning_rate": 1.9981726858533635e-05, "loss": 2.0774, "step": 15001 }, { "epoch": 0.19, "grad_norm": 3.6771080493927, "learning_rate": 1.9981720508460732e-05, "loss": 1.6344, "step": 15002 }, { "epoch": 0.19, "grad_norm": 4.642273426055908, "learning_rate": 1.9981714157285674e-05, "loss": 2.7274, "step": 15003 }, { "epoch": 0.19, "grad_norm": 4.1230878829956055, "learning_rate": 1.998170780500847e-05, "loss": 1.8098, "step": 15004 }, { "epoch": 0.19, "grad_norm": 3.7941300868988037, "learning_rate": 1.9981701451629112e-05, "loss": 1.9062, "step": 15005 }, { "epoch": 0.19, "grad_norm": 4.1395673751831055, "learning_rate": 1.9981695097147606e-05, "loss": 2.1253, "step": 15006 }, { "epoch": 0.19, "grad_norm": 4.453337669372559, "learning_rate": 1.998168874156395e-05, "loss": 2.2356, "step": 15007 }, { "epoch": 0.19, "grad_norm": 4.808499813079834, "learning_rate": 1.9981682384878147e-05, "loss": 2.3286, "step": 15008 }, { "epoch": 0.19, "grad_norm": 4.483442306518555, "learning_rate": 1.9981676027090197e-05, "loss": 2.4611, "step": 15009 }, { "epoch": 0.19, "grad_norm": 4.38091516494751, "learning_rate": 1.9981669668200105e-05, "loss": 1.839, "step": 15010 }, { "epoch": 0.19, "grad_norm": 4.409127235412598, "learning_rate": 1.998166330820786e-05, "loss": 2.1545, "step": 15011 }, { "epoch": 0.19, "grad_norm": 4.048798084259033, "learning_rate": 1.9981656947113473e-05, "loss": 2.0983, "step": 15012 }, { "epoch": 0.19, "grad_norm": 4.530207633972168, "learning_rate": 1.9981650584916944e-05, "loss": 2.1323, "step": 15013 }, { "epoch": 0.19, "grad_norm": 4.714200019836426, "learning_rate": 1.9981644221618272e-05, "loss": 2.0612, "step": 15014 }, { "epoch": 0.19, "grad_norm": 4.070802688598633, "learning_rate": 1.998163785721745e-05, "loss": 2.244, "step": 15015 }, { "epoch": 0.19, "grad_norm": 4.4534196853637695, "learning_rate": 1.998163149171449e-05, "loss": 2.0266, "step": 15016 }, { "epoch": 0.19, "grad_norm": 4.261216163635254, "learning_rate": 1.9981625125109392e-05, "loss": 1.9675, "step": 15017 }, { "epoch": 0.19, "grad_norm": 4.2796831130981445, "learning_rate": 1.998161875740215e-05, "loss": 2.3409, "step": 15018 }, { "epoch": 0.19, "grad_norm": 3.628798007965088, "learning_rate": 1.9981612388592768e-05, "loss": 1.8117, "step": 15019 }, { "epoch": 0.19, "grad_norm": 3.9232308864593506, "learning_rate": 1.998160601868125e-05, "loss": 1.8865, "step": 15020 }, { "epoch": 0.19, "grad_norm": 4.043813228607178, "learning_rate": 1.9981599647667588e-05, "loss": 2.1476, "step": 15021 }, { "epoch": 0.19, "grad_norm": 5.135302543640137, "learning_rate": 1.998159327555179e-05, "loss": 2.9576, "step": 15022 }, { "epoch": 0.19, "grad_norm": 4.48235559463501, "learning_rate": 1.9981586902333855e-05, "loss": 2.1531, "step": 15023 }, { "epoch": 0.19, "grad_norm": 4.531869411468506, "learning_rate": 1.9981580528013784e-05, "loss": 2.6685, "step": 15024 }, { "epoch": 0.19, "grad_norm": 4.467935562133789, "learning_rate": 1.9981574152591577e-05, "loss": 2.2414, "step": 15025 }, { "epoch": 0.2, "grad_norm": 4.290855407714844, "learning_rate": 1.9981567776067234e-05, "loss": 1.9096, "step": 15026 }, { "epoch": 0.2, "grad_norm": 4.036255836486816, "learning_rate": 1.998156139844076e-05, "loss": 2.2248, "step": 15027 }, { "epoch": 0.2, "grad_norm": 4.9499640464782715, "learning_rate": 1.9981555019712146e-05, "loss": 2.1363, "step": 15028 }, { "epoch": 0.2, "grad_norm": 4.584598064422607, "learning_rate": 1.9981548639881404e-05, "loss": 2.4603, "step": 15029 }, { "epoch": 0.2, "grad_norm": 4.712059497833252, "learning_rate": 1.9981542258948526e-05, "loss": 2.203, "step": 15030 }, { "epoch": 0.2, "grad_norm": 4.945537090301514, "learning_rate": 1.998153587691352e-05, "loss": 2.5142, "step": 15031 }, { "epoch": 0.2, "grad_norm": 4.028591156005859, "learning_rate": 1.998152949377638e-05, "loss": 2.3593, "step": 15032 }, { "epoch": 0.2, "grad_norm": 4.187914848327637, "learning_rate": 1.9981523109537113e-05, "loss": 2.1132, "step": 15033 }, { "epoch": 0.2, "grad_norm": 4.901366710662842, "learning_rate": 1.9981516724195716e-05, "loss": 2.3558, "step": 15034 }, { "epoch": 0.2, "grad_norm": 4.008631706237793, "learning_rate": 1.9981510337752188e-05, "loss": 1.9769, "step": 15035 }, { "epoch": 0.2, "grad_norm": 3.62054443359375, "learning_rate": 1.9981503950206534e-05, "loss": 1.7796, "step": 15036 }, { "epoch": 0.2, "grad_norm": 4.318140506744385, "learning_rate": 1.998149756155875e-05, "loss": 2.2068, "step": 15037 }, { "epoch": 0.2, "grad_norm": 4.730523109436035, "learning_rate": 1.9981491171808843e-05, "loss": 2.2008, "step": 15038 }, { "epoch": 0.2, "grad_norm": 4.1318440437316895, "learning_rate": 1.998148478095681e-05, "loss": 2.049, "step": 15039 }, { "epoch": 0.2, "grad_norm": 5.423193454742432, "learning_rate": 1.9981478389002647e-05, "loss": 2.9738, "step": 15040 }, { "epoch": 0.2, "grad_norm": 4.27618932723999, "learning_rate": 1.9981471995946363e-05, "loss": 2.1338, "step": 15041 }, { "epoch": 0.2, "grad_norm": 4.745567321777344, "learning_rate": 1.9981465601787953e-05, "loss": 2.589, "step": 15042 }, { "epoch": 0.2, "grad_norm": 4.788174152374268, "learning_rate": 1.998145920652742e-05, "loss": 1.9776, "step": 15043 }, { "epoch": 0.2, "grad_norm": 4.4591145515441895, "learning_rate": 1.998145281016477e-05, "loss": 2.2733, "step": 15044 }, { "epoch": 0.2, "grad_norm": 3.750396490097046, "learning_rate": 1.9981446412699996e-05, "loss": 1.8222, "step": 15045 }, { "epoch": 0.2, "grad_norm": 3.9239087104797363, "learning_rate": 1.9981440014133095e-05, "loss": 1.5758, "step": 15046 }, { "epoch": 0.2, "grad_norm": 4.272238731384277, "learning_rate": 1.998143361446408e-05, "loss": 2.1737, "step": 15047 }, { "epoch": 0.2, "grad_norm": 4.685910224914551, "learning_rate": 1.9981427213692943e-05, "loss": 2.5088, "step": 15048 }, { "epoch": 0.2, "grad_norm": 4.027853488922119, "learning_rate": 1.9981420811819687e-05, "loss": 2.0398, "step": 15049 }, { "epoch": 0.2, "grad_norm": 4.651631832122803, "learning_rate": 1.9981414408844314e-05, "loss": 2.5195, "step": 15050 }, { "epoch": 0.2, "grad_norm": 4.573421478271484, "learning_rate": 1.9981408004766824e-05, "loss": 2.147, "step": 15051 }, { "epoch": 0.2, "grad_norm": 4.539375305175781, "learning_rate": 1.9981401599587214e-05, "loss": 2.0918, "step": 15052 }, { "epoch": 0.2, "grad_norm": 4.223010540008545, "learning_rate": 1.998139519330549e-05, "loss": 2.3683, "step": 15053 }, { "epoch": 0.2, "grad_norm": 5.061431884765625, "learning_rate": 1.998138878592165e-05, "loss": 2.663, "step": 15054 }, { "epoch": 0.2, "grad_norm": 3.7470781803131104, "learning_rate": 1.9981382377435695e-05, "loss": 1.8169, "step": 15055 }, { "epoch": 0.2, "grad_norm": 4.220045566558838, "learning_rate": 1.9981375967847625e-05, "loss": 2.5364, "step": 15056 }, { "epoch": 0.2, "grad_norm": 5.314875602722168, "learning_rate": 1.9981369557157446e-05, "loss": 2.2637, "step": 15057 }, { "epoch": 0.2, "grad_norm": 4.070012092590332, "learning_rate": 1.9981363145365152e-05, "loss": 2.1678, "step": 15058 }, { "epoch": 0.2, "grad_norm": 4.2083420753479, "learning_rate": 1.9981356732470746e-05, "loss": 2.2225, "step": 15059 }, { "epoch": 0.2, "grad_norm": 4.483668327331543, "learning_rate": 1.998135031847423e-05, "loss": 2.2089, "step": 15060 }, { "epoch": 0.2, "grad_norm": 5.2185893058776855, "learning_rate": 1.99813439033756e-05, "loss": 2.3771, "step": 15061 }, { "epoch": 0.2, "grad_norm": 4.51342248916626, "learning_rate": 1.9981337487174865e-05, "loss": 1.9702, "step": 15062 }, { "epoch": 0.2, "grad_norm": 4.508563995361328, "learning_rate": 1.9981331069872015e-05, "loss": 2.7916, "step": 15063 }, { "epoch": 0.2, "grad_norm": 4.363519191741943, "learning_rate": 1.9981324651467063e-05, "loss": 2.2728, "step": 15064 }, { "epoch": 0.2, "grad_norm": 4.9092841148376465, "learning_rate": 1.9981318231960004e-05, "loss": 2.112, "step": 15065 }, { "epoch": 0.2, "grad_norm": 3.8512299060821533, "learning_rate": 1.9981311811350835e-05, "loss": 1.9934, "step": 15066 }, { "epoch": 0.2, "grad_norm": 3.915893316268921, "learning_rate": 1.998130538963956e-05, "loss": 1.9917, "step": 15067 }, { "epoch": 0.2, "grad_norm": 4.822920322418213, "learning_rate": 1.9981298966826177e-05, "loss": 2.0934, "step": 15068 }, { "epoch": 0.2, "grad_norm": 4.054629325866699, "learning_rate": 1.9981292542910693e-05, "loss": 2.1033, "step": 15069 }, { "epoch": 0.2, "grad_norm": 4.374443054199219, "learning_rate": 1.9981286117893105e-05, "loss": 2.4608, "step": 15070 }, { "epoch": 0.2, "grad_norm": 4.592408180236816, "learning_rate": 1.998127969177341e-05, "loss": 2.2419, "step": 15071 }, { "epoch": 0.2, "grad_norm": 4.3596510887146, "learning_rate": 1.9981273264551616e-05, "loss": 2.4025, "step": 15072 }, { "epoch": 0.2, "grad_norm": 4.380721569061279, "learning_rate": 1.998126683622772e-05, "loss": 2.1709, "step": 15073 }, { "epoch": 0.2, "grad_norm": 4.130439281463623, "learning_rate": 1.998126040680172e-05, "loss": 2.2963, "step": 15074 }, { "epoch": 0.2, "grad_norm": 4.724384784698486, "learning_rate": 1.9981253976273623e-05, "loss": 2.3662, "step": 15075 }, { "epoch": 0.2, "grad_norm": 4.416982173919678, "learning_rate": 1.998124754464342e-05, "loss": 2.0489, "step": 15076 }, { "epoch": 0.2, "grad_norm": 4.437446594238281, "learning_rate": 1.9981241111911128e-05, "loss": 2.0329, "step": 15077 }, { "epoch": 0.2, "grad_norm": 5.071950912475586, "learning_rate": 1.998123467807673e-05, "loss": 1.8726, "step": 15078 }, { "epoch": 0.2, "grad_norm": 4.350112438201904, "learning_rate": 1.9981228243140236e-05, "loss": 1.8528, "step": 15079 }, { "epoch": 0.2, "grad_norm": 5.355628490447998, "learning_rate": 1.9981221807101648e-05, "loss": 2.0714, "step": 15080 }, { "epoch": 0.2, "grad_norm": 4.847919464111328, "learning_rate": 1.998121536996096e-05, "loss": 2.574, "step": 15081 }, { "epoch": 0.2, "grad_norm": 4.865298748016357, "learning_rate": 1.9981208931718176e-05, "loss": 2.0705, "step": 15082 }, { "epoch": 0.2, "grad_norm": 4.0630927085876465, "learning_rate": 1.9981202492373303e-05, "loss": 2.1157, "step": 15083 }, { "epoch": 0.2, "grad_norm": 4.848119258880615, "learning_rate": 1.9981196051926332e-05, "loss": 2.4857, "step": 15084 }, { "epoch": 0.2, "grad_norm": 4.61726713180542, "learning_rate": 1.9981189610377266e-05, "loss": 2.2378, "step": 15085 }, { "epoch": 0.2, "grad_norm": 4.723144054412842, "learning_rate": 1.9981183167726108e-05, "loss": 2.6863, "step": 15086 }, { "epoch": 0.2, "grad_norm": 4.6969218254089355, "learning_rate": 1.998117672397286e-05, "loss": 2.1449, "step": 15087 }, { "epoch": 0.2, "grad_norm": 4.08413553237915, "learning_rate": 1.9981170279117518e-05, "loss": 2.2682, "step": 15088 }, { "epoch": 0.2, "grad_norm": 4.501406192779541, "learning_rate": 1.998116383316009e-05, "loss": 2.5625, "step": 15089 }, { "epoch": 0.2, "grad_norm": 4.992645740509033, "learning_rate": 1.998115738610057e-05, "loss": 2.5656, "step": 15090 }, { "epoch": 0.2, "grad_norm": 4.661669731140137, "learning_rate": 1.9981150937938957e-05, "loss": 2.4237, "step": 15091 }, { "epoch": 0.2, "grad_norm": 4.377716064453125, "learning_rate": 1.998114448867526e-05, "loss": 2.1571, "step": 15092 }, { "epoch": 0.2, "grad_norm": 4.491008758544922, "learning_rate": 1.9981138038309472e-05, "loss": 2.7018, "step": 15093 }, { "epoch": 0.2, "grad_norm": 3.952458381652832, "learning_rate": 1.99811315868416e-05, "loss": 1.781, "step": 15094 }, { "epoch": 0.2, "grad_norm": 4.313022136688232, "learning_rate": 1.9981125134271638e-05, "loss": 2.0914, "step": 15095 }, { "epoch": 0.2, "grad_norm": 4.525606632232666, "learning_rate": 1.9981118680599596e-05, "loss": 1.8594, "step": 15096 }, { "epoch": 0.2, "grad_norm": 4.105460166931152, "learning_rate": 1.998111222582547e-05, "loss": 1.8798, "step": 15097 }, { "epoch": 0.2, "grad_norm": 4.746646881103516, "learning_rate": 1.9981105769949254e-05, "loss": 2.5886, "step": 15098 }, { "epoch": 0.2, "grad_norm": 5.180272579193115, "learning_rate": 1.9981099312970954e-05, "loss": 2.2368, "step": 15099 }, { "epoch": 0.2, "grad_norm": 4.486976623535156, "learning_rate": 1.998109285489058e-05, "loss": 2.6613, "step": 15100 }, { "epoch": 0.2, "grad_norm": 4.689830780029297, "learning_rate": 1.9981086395708114e-05, "loss": 2.3156, "step": 15101 }, { "epoch": 0.2, "grad_norm": 4.498418807983398, "learning_rate": 1.998107993542357e-05, "loss": 2.5002, "step": 15102 }, { "epoch": 0.2, "grad_norm": 4.579139232635498, "learning_rate": 1.998107347403695e-05, "loss": 2.265, "step": 15103 }, { "epoch": 0.2, "grad_norm": 4.558854579925537, "learning_rate": 1.9981067011548244e-05, "loss": 2.1, "step": 15104 }, { "epoch": 0.2, "grad_norm": 5.318640232086182, "learning_rate": 1.9981060547957463e-05, "loss": 2.0919, "step": 15105 }, { "epoch": 0.2, "grad_norm": 4.511526107788086, "learning_rate": 1.99810540832646e-05, "loss": 2.4457, "step": 15106 }, { "epoch": 0.2, "grad_norm": 4.113360404968262, "learning_rate": 1.9981047617469664e-05, "loss": 1.7851, "step": 15107 }, { "epoch": 0.2, "grad_norm": 3.9115753173828125, "learning_rate": 1.998104115057265e-05, "loss": 1.7036, "step": 15108 }, { "epoch": 0.2, "grad_norm": 4.687986850738525, "learning_rate": 1.9981034682573556e-05, "loss": 2.2513, "step": 15109 }, { "epoch": 0.2, "grad_norm": 3.8742074966430664, "learning_rate": 1.9981028213472388e-05, "loss": 2.1039, "step": 15110 }, { "epoch": 0.2, "grad_norm": 3.816455841064453, "learning_rate": 1.9981021743269146e-05, "loss": 2.2251, "step": 15111 }, { "epoch": 0.2, "grad_norm": 4.170754909515381, "learning_rate": 1.998101527196383e-05, "loss": 1.9852, "step": 15112 }, { "epoch": 0.2, "grad_norm": 4.517704486846924, "learning_rate": 1.998100879955644e-05, "loss": 2.4044, "step": 15113 }, { "epoch": 0.2, "grad_norm": 4.72701358795166, "learning_rate": 1.998100232604698e-05, "loss": 2.6485, "step": 15114 }, { "epoch": 0.2, "grad_norm": 4.746607780456543, "learning_rate": 1.9980995851435442e-05, "loss": 2.9142, "step": 15115 }, { "epoch": 0.2, "grad_norm": 4.589298248291016, "learning_rate": 1.9980989375721837e-05, "loss": 2.1559, "step": 15116 }, { "epoch": 0.2, "grad_norm": 5.154946804046631, "learning_rate": 1.998098289890616e-05, "loss": 2.6717, "step": 15117 }, { "epoch": 0.2, "grad_norm": 4.790086269378662, "learning_rate": 1.9980976420988414e-05, "loss": 2.58, "step": 15118 }, { "epoch": 0.2, "grad_norm": 4.13169002532959, "learning_rate": 1.99809699419686e-05, "loss": 2.2248, "step": 15119 }, { "epoch": 0.2, "grad_norm": 3.5906710624694824, "learning_rate": 1.9980963461846716e-05, "loss": 1.5693, "step": 15120 }, { "epoch": 0.2, "grad_norm": 4.3073930740356445, "learning_rate": 1.9980956980622762e-05, "loss": 2.6695, "step": 15121 }, { "epoch": 0.2, "grad_norm": 3.9316749572753906, "learning_rate": 1.9980950498296744e-05, "loss": 2.3915, "step": 15122 }, { "epoch": 0.2, "grad_norm": 4.220512390136719, "learning_rate": 1.998094401486866e-05, "loss": 2.4076, "step": 15123 }, { "epoch": 0.2, "grad_norm": 5.100727558135986, "learning_rate": 1.998093753033851e-05, "loss": 2.1935, "step": 15124 }, { "epoch": 0.2, "grad_norm": 4.191021919250488, "learning_rate": 1.9980931044706296e-05, "loss": 2.0247, "step": 15125 }, { "epoch": 0.2, "grad_norm": 4.800817966461182, "learning_rate": 1.9980924557972016e-05, "loss": 2.686, "step": 15126 }, { "epoch": 0.2, "grad_norm": 3.9069502353668213, "learning_rate": 1.9980918070135673e-05, "loss": 1.8514, "step": 15127 }, { "epoch": 0.2, "grad_norm": 4.766290664672852, "learning_rate": 1.9980911581197268e-05, "loss": 2.7262, "step": 15128 }, { "epoch": 0.2, "grad_norm": 4.841934680938721, "learning_rate": 1.9980905091156802e-05, "loss": 2.3524, "step": 15129 }, { "epoch": 0.2, "grad_norm": 4.553263187408447, "learning_rate": 1.9980898600014272e-05, "loss": 2.6873, "step": 15130 }, { "epoch": 0.2, "grad_norm": 4.427062034606934, "learning_rate": 1.9980892107769684e-05, "loss": 2.4078, "step": 15131 }, { "epoch": 0.2, "grad_norm": 3.877183198928833, "learning_rate": 1.9980885614423035e-05, "loss": 1.9904, "step": 15132 }, { "epoch": 0.2, "grad_norm": 4.280111789703369, "learning_rate": 1.998087911997433e-05, "loss": 1.7932, "step": 15133 }, { "epoch": 0.2, "grad_norm": 4.817061424255371, "learning_rate": 1.998087262442356e-05, "loss": 2.5265, "step": 15134 }, { "epoch": 0.2, "grad_norm": 3.902625322341919, "learning_rate": 1.998086612777074e-05, "loss": 2.1069, "step": 15135 }, { "epoch": 0.2, "grad_norm": 4.527263164520264, "learning_rate": 1.9980859630015857e-05, "loss": 2.2051, "step": 15136 }, { "epoch": 0.2, "grad_norm": 4.340601921081543, "learning_rate": 1.998085313115892e-05, "loss": 2.1054, "step": 15137 }, { "epoch": 0.2, "grad_norm": 5.258053779602051, "learning_rate": 1.998084663119993e-05, "loss": 2.6429, "step": 15138 }, { "epoch": 0.2, "grad_norm": 4.233216285705566, "learning_rate": 1.998084013013888e-05, "loss": 2.0249, "step": 15139 }, { "epoch": 0.2, "grad_norm": 4.241320610046387, "learning_rate": 1.9980833627975777e-05, "loss": 2.3106, "step": 15140 }, { "epoch": 0.2, "grad_norm": 4.61948299407959, "learning_rate": 1.9980827124710623e-05, "loss": 2.1447, "step": 15141 }, { "epoch": 0.2, "grad_norm": 4.5278472900390625, "learning_rate": 1.9980820620343415e-05, "loss": 2.4811, "step": 15142 }, { "epoch": 0.2, "grad_norm": 3.9926517009735107, "learning_rate": 1.9980814114874156e-05, "loss": 2.0466, "step": 15143 }, { "epoch": 0.2, "grad_norm": 4.1234564781188965, "learning_rate": 1.9980807608302846e-05, "loss": 2.2716, "step": 15144 }, { "epoch": 0.2, "grad_norm": 4.244182586669922, "learning_rate": 1.9980801100629486e-05, "loss": 1.9681, "step": 15145 }, { "epoch": 0.2, "grad_norm": 4.057407379150391, "learning_rate": 1.9980794591854074e-05, "loss": 1.7458, "step": 15146 }, { "epoch": 0.2, "grad_norm": 5.1280999183654785, "learning_rate": 1.9980788081976615e-05, "loss": 2.2552, "step": 15147 }, { "epoch": 0.2, "grad_norm": 4.018417835235596, "learning_rate": 1.9980781570997105e-05, "loss": 2.2625, "step": 15148 }, { "epoch": 0.2, "grad_norm": 3.5688018798828125, "learning_rate": 1.9980775058915548e-05, "loss": 1.8579, "step": 15149 }, { "epoch": 0.2, "grad_norm": 4.536154747009277, "learning_rate": 1.9980768545731946e-05, "loss": 2.6157, "step": 15150 }, { "epoch": 0.2, "grad_norm": 4.807644367218018, "learning_rate": 1.9980762031446298e-05, "loss": 2.4941, "step": 15151 }, { "epoch": 0.2, "grad_norm": 4.748058319091797, "learning_rate": 1.9980755516058605e-05, "loss": 2.1138, "step": 15152 }, { "epoch": 0.2, "grad_norm": 3.931058645248413, "learning_rate": 1.9980748999568864e-05, "loss": 1.987, "step": 15153 }, { "epoch": 0.2, "grad_norm": 3.8172245025634766, "learning_rate": 1.9980742481977083e-05, "loss": 1.6945, "step": 15154 }, { "epoch": 0.2, "grad_norm": 4.76387882232666, "learning_rate": 1.9980735963283255e-05, "loss": 2.6687, "step": 15155 }, { "epoch": 0.2, "grad_norm": 4.643990993499756, "learning_rate": 1.9980729443487385e-05, "loss": 1.9731, "step": 15156 }, { "epoch": 0.2, "grad_norm": 4.423389434814453, "learning_rate": 1.9980722922589476e-05, "loss": 2.3155, "step": 15157 }, { "epoch": 0.2, "grad_norm": 4.7625041007995605, "learning_rate": 1.998071640058952e-05, "loss": 2.6615, "step": 15158 }, { "epoch": 0.2, "grad_norm": 4.453067302703857, "learning_rate": 1.998070987748753e-05, "loss": 2.215, "step": 15159 }, { "epoch": 0.2, "grad_norm": 4.606618881225586, "learning_rate": 1.9980703353283498e-05, "loss": 2.2922, "step": 15160 }, { "epoch": 0.2, "grad_norm": 4.691338539123535, "learning_rate": 1.9980696827977426e-05, "loss": 2.8268, "step": 15161 }, { "epoch": 0.2, "grad_norm": 4.483641624450684, "learning_rate": 1.998069030156932e-05, "loss": 2.116, "step": 15162 }, { "epoch": 0.2, "grad_norm": 4.530156135559082, "learning_rate": 1.9980683774059172e-05, "loss": 2.4201, "step": 15163 }, { "epoch": 0.2, "grad_norm": 4.1854634284973145, "learning_rate": 1.9980677245446987e-05, "loss": 2.4619, "step": 15164 }, { "epoch": 0.2, "grad_norm": 4.220689296722412, "learning_rate": 1.9980670715732765e-05, "loss": 2.0643, "step": 15165 }, { "epoch": 0.2, "grad_norm": 4.4572434425354, "learning_rate": 1.9980664184916513e-05, "loss": 2.1344, "step": 15166 }, { "epoch": 0.2, "grad_norm": 4.13104248046875, "learning_rate": 1.9980657652998223e-05, "loss": 2.1017, "step": 15167 }, { "epoch": 0.2, "grad_norm": 4.076494216918945, "learning_rate": 1.99806511199779e-05, "loss": 2.1481, "step": 15168 }, { "epoch": 0.2, "grad_norm": 4.184763431549072, "learning_rate": 1.998064458585554e-05, "loss": 1.9149, "step": 15169 }, { "epoch": 0.2, "grad_norm": 4.4232497215271, "learning_rate": 1.998063805063115e-05, "loss": 2.3189, "step": 15170 }, { "epoch": 0.2, "grad_norm": 4.2145562171936035, "learning_rate": 1.998063151430473e-05, "loss": 2.3583, "step": 15171 }, { "epoch": 0.2, "grad_norm": 4.503676414489746, "learning_rate": 1.9980624976876276e-05, "loss": 1.9085, "step": 15172 }, { "epoch": 0.2, "grad_norm": 4.1128339767456055, "learning_rate": 1.9980618438345797e-05, "loss": 2.1191, "step": 15173 }, { "epoch": 0.2, "grad_norm": 4.936868667602539, "learning_rate": 1.9980611898713283e-05, "loss": 2.3675, "step": 15174 }, { "epoch": 0.2, "grad_norm": 4.812470436096191, "learning_rate": 1.9980605357978743e-05, "loss": 2.4642, "step": 15175 }, { "epoch": 0.2, "grad_norm": 4.68981409072876, "learning_rate": 1.9980598816142175e-05, "loss": 2.6144, "step": 15176 }, { "epoch": 0.2, "grad_norm": 6.910434246063232, "learning_rate": 1.9980592273203577e-05, "loss": 2.5054, "step": 15177 }, { "epoch": 0.2, "grad_norm": 4.356433391571045, "learning_rate": 1.9980585729162955e-05, "loss": 2.1357, "step": 15178 }, { "epoch": 0.2, "grad_norm": 4.211226940155029, "learning_rate": 1.9980579184020302e-05, "loss": 1.8567, "step": 15179 }, { "epoch": 0.2, "grad_norm": 4.446516513824463, "learning_rate": 1.998057263777563e-05, "loss": 2.3193, "step": 15180 }, { "epoch": 0.2, "grad_norm": 4.248812198638916, "learning_rate": 1.998056609042893e-05, "loss": 2.2034, "step": 15181 }, { "epoch": 0.2, "grad_norm": 4.3869948387146, "learning_rate": 1.9980559541980205e-05, "loss": 2.4118, "step": 15182 }, { "epoch": 0.2, "grad_norm": 4.768543243408203, "learning_rate": 1.998055299242946e-05, "loss": 2.1859, "step": 15183 }, { "epoch": 0.2, "grad_norm": 3.9656155109405518, "learning_rate": 1.9980546441776693e-05, "loss": 2.0369, "step": 15184 }, { "epoch": 0.2, "grad_norm": 4.062192440032959, "learning_rate": 1.9980539890021902e-05, "loss": 1.9397, "step": 15185 }, { "epoch": 0.2, "grad_norm": 4.716184139251709, "learning_rate": 1.998053333716509e-05, "loss": 2.659, "step": 15186 }, { "epoch": 0.2, "grad_norm": 4.692562580108643, "learning_rate": 1.998052678320626e-05, "loss": 2.2142, "step": 15187 }, { "epoch": 0.2, "grad_norm": 4.867834568023682, "learning_rate": 1.998052022814541e-05, "loss": 2.2882, "step": 15188 }, { "epoch": 0.2, "grad_norm": 3.9662930965423584, "learning_rate": 1.998051367198254e-05, "loss": 1.8701, "step": 15189 }, { "epoch": 0.2, "grad_norm": 4.478696823120117, "learning_rate": 1.9980507114717652e-05, "loss": 2.4995, "step": 15190 }, { "epoch": 0.2, "grad_norm": 4.119495868682861, "learning_rate": 1.9980500556350748e-05, "loss": 1.8569, "step": 15191 }, { "epoch": 0.2, "grad_norm": 4.553506851196289, "learning_rate": 1.9980493996881828e-05, "loss": 2.6439, "step": 15192 }, { "epoch": 0.2, "grad_norm": 4.263066291809082, "learning_rate": 1.998048743631089e-05, "loss": 1.8781, "step": 15193 }, { "epoch": 0.2, "grad_norm": 3.793355941772461, "learning_rate": 1.9980480874637935e-05, "loss": 1.9602, "step": 15194 }, { "epoch": 0.2, "grad_norm": 3.9172840118408203, "learning_rate": 1.998047431186297e-05, "loss": 2.1465, "step": 15195 }, { "epoch": 0.2, "grad_norm": 4.473212718963623, "learning_rate": 1.998046774798599e-05, "loss": 2.3679, "step": 15196 }, { "epoch": 0.2, "grad_norm": 3.8123080730438232, "learning_rate": 1.9980461183006997e-05, "loss": 1.6745, "step": 15197 }, { "epoch": 0.2, "grad_norm": 4.07371711730957, "learning_rate": 1.9980454616925992e-05, "loss": 2.1029, "step": 15198 }, { "epoch": 0.2, "grad_norm": 4.604018688201904, "learning_rate": 1.9980448049742973e-05, "loss": 2.2031, "step": 15199 }, { "epoch": 0.2, "grad_norm": 4.719146728515625, "learning_rate": 1.9980441481457947e-05, "loss": 2.6184, "step": 15200 }, { "epoch": 0.2, "grad_norm": 4.221049785614014, "learning_rate": 1.9980434912070907e-05, "loss": 2.2728, "step": 15201 }, { "epoch": 0.2, "grad_norm": 4.521275997161865, "learning_rate": 1.998042834158186e-05, "loss": 2.4269, "step": 15202 }, { "epoch": 0.2, "grad_norm": 4.030503749847412, "learning_rate": 1.9980421769990805e-05, "loss": 2.0055, "step": 15203 }, { "epoch": 0.2, "grad_norm": 4.642164707183838, "learning_rate": 1.998041519729774e-05, "loss": 2.3935, "step": 15204 }, { "epoch": 0.2, "grad_norm": 4.0664753913879395, "learning_rate": 1.9980408623502668e-05, "loss": 1.7566, "step": 15205 }, { "epoch": 0.2, "grad_norm": 4.286587238311768, "learning_rate": 1.998040204860559e-05, "loss": 2.1712, "step": 15206 }, { "epoch": 0.2, "grad_norm": 4.381092071533203, "learning_rate": 1.9980395472606508e-05, "loss": 2.0005, "step": 15207 }, { "epoch": 0.2, "grad_norm": 4.14070463180542, "learning_rate": 1.998038889550542e-05, "loss": 2.0462, "step": 15208 }, { "epoch": 0.2, "grad_norm": 4.375069618225098, "learning_rate": 1.9980382317302327e-05, "loss": 2.7136, "step": 15209 }, { "epoch": 0.2, "grad_norm": 4.653295993804932, "learning_rate": 1.998037573799723e-05, "loss": 2.1917, "step": 15210 }, { "epoch": 0.2, "grad_norm": 4.133543014526367, "learning_rate": 1.9980369157590132e-05, "loss": 2.1692, "step": 15211 }, { "epoch": 0.2, "grad_norm": 4.496231555938721, "learning_rate": 1.9980362576081035e-05, "loss": 2.0561, "step": 15212 }, { "epoch": 0.2, "grad_norm": 4.885518550872803, "learning_rate": 1.9980355993469927e-05, "loss": 2.2186, "step": 15213 }, { "epoch": 0.2, "grad_norm": 4.116592884063721, "learning_rate": 1.9980349409756826e-05, "loss": 2.4066, "step": 15214 }, { "epoch": 0.2, "grad_norm": 4.338729381561279, "learning_rate": 1.9980342824941726e-05, "loss": 2.4478, "step": 15215 }, { "epoch": 0.2, "grad_norm": 4.503648281097412, "learning_rate": 1.998033623902462e-05, "loss": 2.3062, "step": 15216 }, { "epoch": 0.2, "grad_norm": 3.9459068775177, "learning_rate": 1.9980329652005524e-05, "loss": 1.9411, "step": 15217 }, { "epoch": 0.2, "grad_norm": 4.339076519012451, "learning_rate": 1.9980323063884426e-05, "loss": 1.9552, "step": 15218 }, { "epoch": 0.2, "grad_norm": 4.355114459991455, "learning_rate": 1.998031647466133e-05, "loss": 2.0406, "step": 15219 }, { "epoch": 0.2, "grad_norm": 5.789880752563477, "learning_rate": 1.998030988433624e-05, "loss": 3.1248, "step": 15220 }, { "epoch": 0.2, "grad_norm": 4.112720489501953, "learning_rate": 1.998030329290915e-05, "loss": 2.0466, "step": 15221 }, { "epoch": 0.2, "grad_norm": 5.011165142059326, "learning_rate": 1.998029670038007e-05, "loss": 2.3671, "step": 15222 }, { "epoch": 0.2, "grad_norm": 3.918333053588867, "learning_rate": 1.9980290106748993e-05, "loss": 1.9713, "step": 15223 }, { "epoch": 0.2, "grad_norm": 4.532506465911865, "learning_rate": 1.9980283512015925e-05, "loss": 2.5113, "step": 15224 }, { "epoch": 0.2, "grad_norm": 4.341846942901611, "learning_rate": 1.9980276916180865e-05, "loss": 2.0167, "step": 15225 }, { "epoch": 0.2, "grad_norm": 4.9716796875, "learning_rate": 1.998027031924381e-05, "loss": 2.6171, "step": 15226 }, { "epoch": 0.2, "grad_norm": 3.7177798748016357, "learning_rate": 1.9980263721204768e-05, "loss": 1.6074, "step": 15227 }, { "epoch": 0.2, "grad_norm": 4.230385780334473, "learning_rate": 1.998025712206373e-05, "loss": 2.4813, "step": 15228 }, { "epoch": 0.2, "grad_norm": 3.4444797039031982, "learning_rate": 1.9980250521820704e-05, "loss": 1.751, "step": 15229 }, { "epoch": 0.2, "grad_norm": 3.9408059120178223, "learning_rate": 1.998024392047569e-05, "loss": 1.9561, "step": 15230 }, { "epoch": 0.2, "grad_norm": 4.505558013916016, "learning_rate": 1.998023731802869e-05, "loss": 2.268, "step": 15231 }, { "epoch": 0.2, "grad_norm": 4.541830539703369, "learning_rate": 1.9980230714479702e-05, "loss": 2.2831, "step": 15232 }, { "epoch": 0.2, "grad_norm": 4.511039733886719, "learning_rate": 1.9980224109828726e-05, "loss": 2.1735, "step": 15233 }, { "epoch": 0.2, "grad_norm": 3.4930310249328613, "learning_rate": 1.9980217504075763e-05, "loss": 1.6042, "step": 15234 }, { "epoch": 0.2, "grad_norm": 4.304093360900879, "learning_rate": 1.9980210897220814e-05, "loss": 2.3059, "step": 15235 }, { "epoch": 0.2, "grad_norm": 4.147520065307617, "learning_rate": 1.9980204289263882e-05, "loss": 1.9262, "step": 15236 }, { "epoch": 0.2, "grad_norm": 4.474002361297607, "learning_rate": 1.9980197680204964e-05, "loss": 2.2622, "step": 15237 }, { "epoch": 0.2, "grad_norm": 3.9920690059661865, "learning_rate": 1.9980191070044066e-05, "loss": 2.3309, "step": 15238 }, { "epoch": 0.2, "grad_norm": 4.155295372009277, "learning_rate": 1.9980184458781184e-05, "loss": 2.4979, "step": 15239 }, { "epoch": 0.2, "grad_norm": 4.623631954193115, "learning_rate": 1.9980177846416324e-05, "loss": 2.7792, "step": 15240 }, { "epoch": 0.2, "grad_norm": 3.8145530223846436, "learning_rate": 1.9980171232949476e-05, "loss": 1.9207, "step": 15241 }, { "epoch": 0.2, "grad_norm": 4.340486526489258, "learning_rate": 1.9980164618380652e-05, "loss": 2.1857, "step": 15242 }, { "epoch": 0.2, "grad_norm": 4.597064018249512, "learning_rate": 1.998015800270985e-05, "loss": 2.1714, "step": 15243 }, { "epoch": 0.2, "grad_norm": 4.82539701461792, "learning_rate": 1.9980151385937066e-05, "loss": 2.6569, "step": 15244 }, { "epoch": 0.2, "grad_norm": 4.503752708435059, "learning_rate": 1.9980144768062306e-05, "loss": 2.4589, "step": 15245 }, { "epoch": 0.2, "grad_norm": 4.328433513641357, "learning_rate": 1.9980138149085567e-05, "loss": 2.1899, "step": 15246 }, { "epoch": 0.2, "grad_norm": 4.32564115524292, "learning_rate": 1.998013152900685e-05, "loss": 2.3121, "step": 15247 }, { "epoch": 0.2, "grad_norm": 4.265255928039551, "learning_rate": 1.9980124907826163e-05, "loss": 2.5903, "step": 15248 }, { "epoch": 0.2, "grad_norm": 4.608950138092041, "learning_rate": 1.99801182855435e-05, "loss": 2.1271, "step": 15249 }, { "epoch": 0.2, "grad_norm": 3.976250171661377, "learning_rate": 1.9980111662158857e-05, "loss": 2.1887, "step": 15250 }, { "epoch": 0.2, "grad_norm": 3.9231925010681152, "learning_rate": 1.998010503767224e-05, "loss": 2.09, "step": 15251 }, { "epoch": 0.2, "grad_norm": 4.229857444763184, "learning_rate": 1.9980098412083656e-05, "loss": 2.1062, "step": 15252 }, { "epoch": 0.2, "grad_norm": 4.1454758644104, "learning_rate": 1.9980091785393097e-05, "loss": 2.5042, "step": 15253 }, { "epoch": 0.2, "grad_norm": 4.366222381591797, "learning_rate": 1.9980085157600567e-05, "loss": 2.2455, "step": 15254 }, { "epoch": 0.2, "grad_norm": 4.483700752258301, "learning_rate": 1.9980078528706065e-05, "loss": 1.9028, "step": 15255 }, { "epoch": 0.2, "grad_norm": 3.7513508796691895, "learning_rate": 1.9980071898709593e-05, "loss": 2.0892, "step": 15256 }, { "epoch": 0.2, "grad_norm": 4.321831703186035, "learning_rate": 1.9980065267611155e-05, "loss": 2.2497, "step": 15257 }, { "epoch": 0.2, "grad_norm": 4.47859525680542, "learning_rate": 1.9980058635410748e-05, "loss": 2.3468, "step": 15258 }, { "epoch": 0.2, "grad_norm": 4.390801429748535, "learning_rate": 1.998005200210837e-05, "loss": 2.4413, "step": 15259 }, { "epoch": 0.2, "grad_norm": 3.9448907375335693, "learning_rate": 1.9980045367704028e-05, "loss": 2.2197, "step": 15260 }, { "epoch": 0.2, "grad_norm": 4.553284645080566, "learning_rate": 1.9980038732197715e-05, "loss": 1.96, "step": 15261 }, { "epoch": 0.2, "grad_norm": 4.1418232917785645, "learning_rate": 1.998003209558944e-05, "loss": 2.1996, "step": 15262 }, { "epoch": 0.2, "grad_norm": 4.241208076477051, "learning_rate": 1.99800254578792e-05, "loss": 2.2103, "step": 15263 }, { "epoch": 0.2, "grad_norm": 4.4516119956970215, "learning_rate": 1.9980018819066997e-05, "loss": 2.4541, "step": 15264 }, { "epoch": 0.2, "grad_norm": 3.9463889598846436, "learning_rate": 1.9980012179152826e-05, "loss": 2.3997, "step": 15265 }, { "epoch": 0.2, "grad_norm": 4.589404582977295, "learning_rate": 1.9980005538136696e-05, "loss": 2.7502, "step": 15266 }, { "epoch": 0.2, "grad_norm": 4.79764461517334, "learning_rate": 1.9979998896018604e-05, "loss": 2.598, "step": 15267 }, { "epoch": 0.2, "grad_norm": 4.596065044403076, "learning_rate": 1.9979992252798552e-05, "loss": 2.2796, "step": 15268 }, { "epoch": 0.2, "grad_norm": 4.471271991729736, "learning_rate": 1.9979985608476537e-05, "loss": 2.4441, "step": 15269 }, { "epoch": 0.2, "grad_norm": 4.628696441650391, "learning_rate": 1.9979978963052563e-05, "loss": 2.3367, "step": 15270 }, { "epoch": 0.2, "grad_norm": 4.717861652374268, "learning_rate": 1.9979972316526633e-05, "loss": 2.4884, "step": 15271 }, { "epoch": 0.2, "grad_norm": 4.611765384674072, "learning_rate": 1.997996566889874e-05, "loss": 1.8622, "step": 15272 }, { "epoch": 0.2, "grad_norm": 4.968084812164307, "learning_rate": 1.997995902016889e-05, "loss": 2.4929, "step": 15273 }, { "epoch": 0.2, "grad_norm": 4.514298915863037, "learning_rate": 1.9979952370337086e-05, "loss": 2.375, "step": 15274 }, { "epoch": 0.2, "grad_norm": 4.733613967895508, "learning_rate": 1.9979945719403325e-05, "loss": 2.1703, "step": 15275 }, { "epoch": 0.2, "grad_norm": 4.553056716918945, "learning_rate": 1.9979939067367605e-05, "loss": 2.1876, "step": 15276 }, { "epoch": 0.2, "grad_norm": 5.152634620666504, "learning_rate": 1.9979932414229936e-05, "loss": 2.7492, "step": 15277 }, { "epoch": 0.2, "grad_norm": 4.834233283996582, "learning_rate": 1.9979925759990314e-05, "loss": 2.2024, "step": 15278 }, { "epoch": 0.2, "grad_norm": 4.405328273773193, "learning_rate": 1.9979919104648732e-05, "loss": 2.0234, "step": 15279 }, { "epoch": 0.2, "grad_norm": 4.604680061340332, "learning_rate": 1.99799124482052e-05, "loss": 2.7681, "step": 15280 }, { "epoch": 0.2, "grad_norm": 4.509479522705078, "learning_rate": 1.997990579065972e-05, "loss": 2.4536, "step": 15281 }, { "epoch": 0.2, "grad_norm": 4.6463541984558105, "learning_rate": 1.9979899132012286e-05, "loss": 2.1773, "step": 15282 }, { "epoch": 0.2, "grad_norm": 4.8315911293029785, "learning_rate": 1.9979892472262904e-05, "loss": 2.3472, "step": 15283 }, { "epoch": 0.2, "grad_norm": 4.59011697769165, "learning_rate": 1.997988581141157e-05, "loss": 2.4796, "step": 15284 }, { "epoch": 0.2, "grad_norm": 4.595742225646973, "learning_rate": 1.9979879149458287e-05, "loss": 2.1071, "step": 15285 }, { "epoch": 0.2, "grad_norm": 4.967422962188721, "learning_rate": 1.997987248640306e-05, "loss": 2.6117, "step": 15286 }, { "epoch": 0.2, "grad_norm": 4.130478858947754, "learning_rate": 1.9979865822245882e-05, "loss": 2.0073, "step": 15287 }, { "epoch": 0.2, "grad_norm": 4.294676303863525, "learning_rate": 1.997985915698676e-05, "loss": 2.1989, "step": 15288 }, { "epoch": 0.2, "grad_norm": 4.200153350830078, "learning_rate": 1.997985249062569e-05, "loss": 2.344, "step": 15289 }, { "epoch": 0.2, "grad_norm": 4.613924980163574, "learning_rate": 1.9979845823162676e-05, "loss": 1.9491, "step": 15290 }, { "epoch": 0.2, "grad_norm": 4.580280303955078, "learning_rate": 1.9979839154597717e-05, "loss": 2.0966, "step": 15291 }, { "epoch": 0.2, "grad_norm": 3.8725085258483887, "learning_rate": 1.9979832484930817e-05, "loss": 2.4223, "step": 15292 }, { "epoch": 0.2, "grad_norm": 4.9288787841796875, "learning_rate": 1.997982581416197e-05, "loss": 3.0683, "step": 15293 }, { "epoch": 0.2, "grad_norm": 4.849405288696289, "learning_rate": 1.9979819142291184e-05, "loss": 2.3584, "step": 15294 }, { "epoch": 0.2, "grad_norm": 3.991415500640869, "learning_rate": 1.9979812469318456e-05, "loss": 2.3068, "step": 15295 }, { "epoch": 0.2, "grad_norm": 4.456639289855957, "learning_rate": 1.997980579524379e-05, "loss": 2.3777, "step": 15296 }, { "epoch": 0.2, "grad_norm": 5.005468845367432, "learning_rate": 1.997979912006718e-05, "loss": 2.7471, "step": 15297 }, { "epoch": 0.2, "grad_norm": 4.105931282043457, "learning_rate": 1.997979244378863e-05, "loss": 2.1934, "step": 15298 }, { "epoch": 0.2, "grad_norm": 4.929733753204346, "learning_rate": 1.9979785766408147e-05, "loss": 2.4913, "step": 15299 }, { "epoch": 0.2, "grad_norm": 5.104826927185059, "learning_rate": 1.997977908792572e-05, "loss": 2.6621, "step": 15300 }, { "epoch": 0.2, "grad_norm": 4.670741081237793, "learning_rate": 1.997977240834136e-05, "loss": 2.5394, "step": 15301 }, { "epoch": 0.2, "grad_norm": 4.574732303619385, "learning_rate": 1.9979765727655064e-05, "loss": 2.3381, "step": 15302 }, { "epoch": 0.2, "grad_norm": 4.213190078735352, "learning_rate": 1.9979759045866833e-05, "loss": 2.5815, "step": 15303 }, { "epoch": 0.2, "grad_norm": 6.166733741760254, "learning_rate": 1.9979752362976664e-05, "loss": 2.4162, "step": 15304 }, { "epoch": 0.2, "grad_norm": 4.629156112670898, "learning_rate": 1.9979745678984566e-05, "loss": 1.9742, "step": 15305 }, { "epoch": 0.2, "grad_norm": 4.663384437561035, "learning_rate": 1.9979738993890532e-05, "loss": 2.5253, "step": 15306 }, { "epoch": 0.2, "grad_norm": 4.478745937347412, "learning_rate": 1.9979732307694563e-05, "loss": 2.3153, "step": 15307 }, { "epoch": 0.2, "grad_norm": 4.041675090789795, "learning_rate": 1.9979725620396664e-05, "loss": 2.1914, "step": 15308 }, { "epoch": 0.2, "grad_norm": 4.043710231781006, "learning_rate": 1.9979718931996834e-05, "loss": 1.9297, "step": 15309 }, { "epoch": 0.2, "grad_norm": 4.3306050300598145, "learning_rate": 1.9979712242495078e-05, "loss": 2.4246, "step": 15310 }, { "epoch": 0.2, "grad_norm": 5.251163005828857, "learning_rate": 1.9979705551891386e-05, "loss": 2.9674, "step": 15311 }, { "epoch": 0.2, "grad_norm": 4.167317867279053, "learning_rate": 1.997969886018577e-05, "loss": 2.1904, "step": 15312 }, { "epoch": 0.2, "grad_norm": 4.384130477905273, "learning_rate": 1.9979692167378223e-05, "loss": 2.7805, "step": 15313 }, { "epoch": 0.2, "grad_norm": 4.048710823059082, "learning_rate": 1.9979685473468748e-05, "loss": 2.1705, "step": 15314 }, { "epoch": 0.2, "grad_norm": 4.622816562652588, "learning_rate": 1.997967877845735e-05, "loss": 2.4234, "step": 15315 }, { "epoch": 0.2, "grad_norm": 5.065551280975342, "learning_rate": 1.9979672082344025e-05, "loss": 2.6782, "step": 15316 }, { "epoch": 0.2, "grad_norm": 4.296047210693359, "learning_rate": 1.997966538512877e-05, "loss": 2.1122, "step": 15317 }, { "epoch": 0.2, "grad_norm": 4.854038238525391, "learning_rate": 1.9979658686811596e-05, "loss": 2.2493, "step": 15318 }, { "epoch": 0.2, "grad_norm": 4.822791576385498, "learning_rate": 1.9979651987392497e-05, "loss": 2.7652, "step": 15319 }, { "epoch": 0.2, "grad_norm": 4.198803901672363, "learning_rate": 1.9979645286871476e-05, "loss": 2.2007, "step": 15320 }, { "epoch": 0.2, "grad_norm": 3.8876545429229736, "learning_rate": 1.997963858524853e-05, "loss": 2.0141, "step": 15321 }, { "epoch": 0.2, "grad_norm": 4.904576778411865, "learning_rate": 1.9979631882523667e-05, "loss": 2.6931, "step": 15322 }, { "epoch": 0.2, "grad_norm": 3.9745302200317383, "learning_rate": 1.997962517869688e-05, "loss": 2.2766, "step": 15323 }, { "epoch": 0.2, "grad_norm": 5.266428470611572, "learning_rate": 1.9979618473768174e-05, "loss": 2.6153, "step": 15324 }, { "epoch": 0.2, "grad_norm": 4.606686115264893, "learning_rate": 1.997961176773755e-05, "loss": 2.8274, "step": 15325 }, { "epoch": 0.2, "grad_norm": 4.348974227905273, "learning_rate": 1.9979605060605005e-05, "loss": 1.6319, "step": 15326 }, { "epoch": 0.2, "grad_norm": 4.803779125213623, "learning_rate": 1.9979598352370543e-05, "loss": 2.443, "step": 15327 }, { "epoch": 0.2, "grad_norm": 4.860045433044434, "learning_rate": 1.9979591643034165e-05, "loss": 2.3665, "step": 15328 }, { "epoch": 0.2, "grad_norm": 3.697286605834961, "learning_rate": 1.997958493259587e-05, "loss": 1.7183, "step": 15329 }, { "epoch": 0.2, "grad_norm": 4.5303521156311035, "learning_rate": 1.997957822105566e-05, "loss": 2.5514, "step": 15330 }, { "epoch": 0.2, "grad_norm": 4.111180782318115, "learning_rate": 1.9979571508413534e-05, "loss": 2.2684, "step": 15331 }, { "epoch": 0.2, "grad_norm": 4.3257951736450195, "learning_rate": 1.99795647946695e-05, "loss": 2.1782, "step": 15332 }, { "epoch": 0.2, "grad_norm": 3.9119186401367188, "learning_rate": 1.9979558079823544e-05, "loss": 2.0741, "step": 15333 }, { "epoch": 0.2, "grad_norm": 4.268965721130371, "learning_rate": 1.997955136387568e-05, "loss": 2.4364, "step": 15334 }, { "epoch": 0.2, "grad_norm": 4.434479236602783, "learning_rate": 1.9979544646825903e-05, "loss": 2.2958, "step": 15335 }, { "epoch": 0.2, "grad_norm": 4.270910263061523, "learning_rate": 1.9979537928674216e-05, "loss": 2.1652, "step": 15336 }, { "epoch": 0.2, "grad_norm": 5.050973415374756, "learning_rate": 1.9979531209420618e-05, "loss": 2.5673, "step": 15337 }, { "epoch": 0.2, "grad_norm": 4.336853504180908, "learning_rate": 1.997952448906511e-05, "loss": 2.1718, "step": 15338 }, { "epoch": 0.2, "grad_norm": 4.251143932342529, "learning_rate": 1.99795177676077e-05, "loss": 1.8459, "step": 15339 }, { "epoch": 0.2, "grad_norm": 3.9004852771759033, "learning_rate": 1.997951104504837e-05, "loss": 1.9648, "step": 15340 }, { "epoch": 0.2, "grad_norm": 4.2563958168029785, "learning_rate": 1.997950432138714e-05, "loss": 2.4726, "step": 15341 }, { "epoch": 0.2, "grad_norm": 5.112297534942627, "learning_rate": 1.9979497596624003e-05, "loss": 2.2945, "step": 15342 }, { "epoch": 0.2, "grad_norm": 4.537629127502441, "learning_rate": 1.9979490870758957e-05, "loss": 2.3613, "step": 15343 }, { "epoch": 0.2, "grad_norm": 3.6977851390838623, "learning_rate": 1.997948414379201e-05, "loss": 1.8398, "step": 15344 }, { "epoch": 0.2, "grad_norm": 4.216941833496094, "learning_rate": 1.9979477415723155e-05, "loss": 2.0402, "step": 15345 }, { "epoch": 0.2, "grad_norm": 5.439946174621582, "learning_rate": 1.9979470686552396e-05, "loss": 2.9592, "step": 15346 }, { "epoch": 0.2, "grad_norm": 3.874769687652588, "learning_rate": 1.997946395627974e-05, "loss": 1.8383, "step": 15347 }, { "epoch": 0.2, "grad_norm": 4.723268508911133, "learning_rate": 1.9979457224905177e-05, "loss": 1.9681, "step": 15348 }, { "epoch": 0.2, "grad_norm": 4.5937724113464355, "learning_rate": 1.9979450492428713e-05, "loss": 2.1151, "step": 15349 }, { "epoch": 0.2, "grad_norm": 3.692629814147949, "learning_rate": 1.997944375885035e-05, "loss": 1.6097, "step": 15350 }, { "epoch": 0.2, "grad_norm": 4.211348533630371, "learning_rate": 1.9979437024170087e-05, "loss": 2.1522, "step": 15351 }, { "epoch": 0.2, "grad_norm": 4.452347755432129, "learning_rate": 1.997943028838792e-05, "loss": 1.7456, "step": 15352 }, { "epoch": 0.2, "grad_norm": 4.6148176193237305, "learning_rate": 1.9979423551503864e-05, "loss": 2.33, "step": 15353 }, { "epoch": 0.2, "grad_norm": 4.080563545227051, "learning_rate": 1.99794168135179e-05, "loss": 2.0214, "step": 15354 }, { "epoch": 0.2, "grad_norm": 4.687322616577148, "learning_rate": 1.9979410074430047e-05, "loss": 2.3204, "step": 15355 }, { "epoch": 0.2, "grad_norm": 3.9389867782592773, "learning_rate": 1.9979403334240294e-05, "loss": 2.0712, "step": 15356 }, { "epoch": 0.2, "grad_norm": 5.064651012420654, "learning_rate": 1.9979396592948646e-05, "loss": 2.8324, "step": 15357 }, { "epoch": 0.2, "grad_norm": 5.431707859039307, "learning_rate": 1.9979389850555104e-05, "loss": 2.4025, "step": 15358 }, { "epoch": 0.2, "grad_norm": 4.719159126281738, "learning_rate": 1.997938310705967e-05, "loss": 2.6041, "step": 15359 }, { "epoch": 0.2, "grad_norm": 5.013566017150879, "learning_rate": 1.997937636246234e-05, "loss": 2.6031, "step": 15360 }, { "epoch": 0.2, "grad_norm": 4.443265914916992, "learning_rate": 1.997936961676312e-05, "loss": 2.2598, "step": 15361 }, { "epoch": 0.2, "grad_norm": 4.224661827087402, "learning_rate": 1.9979362869962004e-05, "loss": 2.1497, "step": 15362 }, { "epoch": 0.2, "grad_norm": 3.9162678718566895, "learning_rate": 1.9979356122059e-05, "loss": 1.8354, "step": 15363 }, { "epoch": 0.2, "grad_norm": 4.2963480949401855, "learning_rate": 1.9979349373054104e-05, "loss": 2.1579, "step": 15364 }, { "epoch": 0.2, "grad_norm": 4.788063049316406, "learning_rate": 1.997934262294732e-05, "loss": 2.0826, "step": 15365 }, { "epoch": 0.2, "grad_norm": 3.661710739135742, "learning_rate": 1.9979335871738645e-05, "loss": 1.9129, "step": 15366 }, { "epoch": 0.2, "grad_norm": 3.765838623046875, "learning_rate": 1.9979329119428084e-05, "loss": 1.5848, "step": 15367 }, { "epoch": 0.2, "grad_norm": 4.78205680847168, "learning_rate": 1.997932236601564e-05, "loss": 2.3239, "step": 15368 }, { "epoch": 0.2, "grad_norm": 4.573483467102051, "learning_rate": 1.9979315611501306e-05, "loss": 1.9886, "step": 15369 }, { "epoch": 0.2, "grad_norm": 4.214826583862305, "learning_rate": 1.997930885588508e-05, "loss": 1.9596, "step": 15370 }, { "epoch": 0.2, "grad_norm": 4.327518463134766, "learning_rate": 1.9979302099166978e-05, "loss": 1.8152, "step": 15371 }, { "epoch": 0.2, "grad_norm": 4.622026443481445, "learning_rate": 1.997929534134699e-05, "loss": 2.7547, "step": 15372 }, { "epoch": 0.2, "grad_norm": 4.93196964263916, "learning_rate": 1.9979288582425114e-05, "loss": 2.3398, "step": 15373 }, { "epoch": 0.2, "grad_norm": 4.555859565734863, "learning_rate": 1.997928182240136e-05, "loss": 2.3042, "step": 15374 }, { "epoch": 0.2, "grad_norm": 3.3791284561157227, "learning_rate": 1.997927506127572e-05, "loss": 1.7604, "step": 15375 }, { "epoch": 0.2, "grad_norm": 4.39216947555542, "learning_rate": 1.9979268299048204e-05, "loss": 2.0344, "step": 15376 }, { "epoch": 0.2, "grad_norm": 4.111532688140869, "learning_rate": 1.99792615357188e-05, "loss": 1.6682, "step": 15377 }, { "epoch": 0.2, "grad_norm": 4.083187580108643, "learning_rate": 1.9979254771287522e-05, "loss": 1.7861, "step": 15378 }, { "epoch": 0.2, "grad_norm": 4.230874538421631, "learning_rate": 1.9979248005754364e-05, "loss": 2.0842, "step": 15379 }, { "epoch": 0.2, "grad_norm": 4.135164260864258, "learning_rate": 1.9979241239119328e-05, "loss": 2.5968, "step": 15380 }, { "epoch": 0.2, "grad_norm": 5.687538146972656, "learning_rate": 1.9979234471382414e-05, "loss": 2.1753, "step": 15381 }, { "epoch": 0.2, "grad_norm": 4.077368259429932, "learning_rate": 1.997922770254362e-05, "loss": 2.3563, "step": 15382 }, { "epoch": 0.2, "grad_norm": 4.722318172454834, "learning_rate": 1.9979220932602955e-05, "loss": 2.3945, "step": 15383 }, { "epoch": 0.2, "grad_norm": 5.075766563415527, "learning_rate": 1.9979214161560413e-05, "loss": 2.4878, "step": 15384 }, { "epoch": 0.2, "grad_norm": 4.230952262878418, "learning_rate": 1.9979207389415994e-05, "loss": 2.2323, "step": 15385 }, { "epoch": 0.2, "grad_norm": 4.658911228179932, "learning_rate": 1.9979200616169706e-05, "loss": 2.2571, "step": 15386 }, { "epoch": 0.2, "grad_norm": 4.094665050506592, "learning_rate": 1.997919384182154e-05, "loss": 1.9202, "step": 15387 }, { "epoch": 0.2, "grad_norm": 4.821954727172852, "learning_rate": 1.9979187066371504e-05, "loss": 2.4801, "step": 15388 }, { "epoch": 0.2, "grad_norm": 4.165030479431152, "learning_rate": 1.9979180289819596e-05, "loss": 2.2863, "step": 15389 }, { "epoch": 0.2, "grad_norm": 3.532320737838745, "learning_rate": 1.997917351216582e-05, "loss": 1.6929, "step": 15390 }, { "epoch": 0.2, "grad_norm": 4.443198204040527, "learning_rate": 1.997916673341017e-05, "loss": 2.1878, "step": 15391 }, { "epoch": 0.2, "grad_norm": 3.926964521408081, "learning_rate": 1.997915995355265e-05, "loss": 2.0095, "step": 15392 }, { "epoch": 0.2, "grad_norm": 4.988936424255371, "learning_rate": 1.9979153172593264e-05, "loss": 2.5657, "step": 15393 }, { "epoch": 0.2, "grad_norm": 4.434813022613525, "learning_rate": 1.997914639053201e-05, "loss": 2.5172, "step": 15394 }, { "epoch": 0.2, "grad_norm": 4.699798107147217, "learning_rate": 1.9979139607368888e-05, "loss": 2.3061, "step": 15395 }, { "epoch": 0.2, "grad_norm": 3.893340587615967, "learning_rate": 1.99791328231039e-05, "loss": 2.2065, "step": 15396 }, { "epoch": 0.2, "grad_norm": 4.563174724578857, "learning_rate": 1.997912603773705e-05, "loss": 2.0341, "step": 15397 }, { "epoch": 0.2, "grad_norm": 4.1910905838012695, "learning_rate": 1.997911925126833e-05, "loss": 1.8552, "step": 15398 }, { "epoch": 0.2, "grad_norm": 4.881649494171143, "learning_rate": 1.9979112463697747e-05, "loss": 2.1251, "step": 15399 }, { "epoch": 0.2, "grad_norm": 3.58746075630188, "learning_rate": 1.9979105675025303e-05, "loss": 1.793, "step": 15400 }, { "epoch": 0.2, "grad_norm": 3.8320815563201904, "learning_rate": 1.9979098885250994e-05, "loss": 1.7842, "step": 15401 }, { "epoch": 0.2, "grad_norm": 4.052009582519531, "learning_rate": 1.9979092094374827e-05, "loss": 2.0859, "step": 15402 }, { "epoch": 0.2, "grad_norm": 4.721722602844238, "learning_rate": 1.9979085302396795e-05, "loss": 2.5386, "step": 15403 }, { "epoch": 0.2, "grad_norm": 3.788008451461792, "learning_rate": 1.9979078509316903e-05, "loss": 1.6266, "step": 15404 }, { "epoch": 0.2, "grad_norm": 4.043633937835693, "learning_rate": 1.9979071715135152e-05, "loss": 2.0861, "step": 15405 }, { "epoch": 0.2, "grad_norm": 3.8396036624908447, "learning_rate": 1.9979064919851544e-05, "loss": 2.0506, "step": 15406 }, { "epoch": 0.2, "grad_norm": 4.166713237762451, "learning_rate": 1.9979058123466075e-05, "loss": 2.204, "step": 15407 }, { "epoch": 0.2, "grad_norm": 4.871299743652344, "learning_rate": 1.9979051325978752e-05, "loss": 2.0556, "step": 15408 }, { "epoch": 0.2, "grad_norm": 3.9122729301452637, "learning_rate": 1.9979044527389567e-05, "loss": 1.9517, "step": 15409 }, { "epoch": 0.2, "grad_norm": 4.995724201202393, "learning_rate": 1.9979037727698532e-05, "loss": 2.546, "step": 15410 }, { "epoch": 0.2, "grad_norm": 4.064031600952148, "learning_rate": 1.997903092690564e-05, "loss": 2.117, "step": 15411 }, { "epoch": 0.2, "grad_norm": 3.675332546234131, "learning_rate": 1.9979024125010894e-05, "loss": 1.9755, "step": 15412 }, { "epoch": 0.2, "grad_norm": 4.455132961273193, "learning_rate": 1.9979017322014292e-05, "loss": 2.3959, "step": 15413 }, { "epoch": 0.2, "grad_norm": 4.79699182510376, "learning_rate": 1.997901051791584e-05, "loss": 2.3987, "step": 15414 }, { "epoch": 0.2, "grad_norm": 4.683689594268799, "learning_rate": 1.9979003712715536e-05, "loss": 2.0829, "step": 15415 }, { "epoch": 0.2, "grad_norm": 3.9512112140655518, "learning_rate": 1.997899690641338e-05, "loss": 1.5789, "step": 15416 }, { "epoch": 0.2, "grad_norm": 4.846491813659668, "learning_rate": 1.997899009900937e-05, "loss": 2.6125, "step": 15417 }, { "epoch": 0.2, "grad_norm": 4.9477009773254395, "learning_rate": 1.9978983290503515e-05, "loss": 2.8356, "step": 15418 }, { "epoch": 0.2, "grad_norm": 4.643484115600586, "learning_rate": 1.9978976480895808e-05, "loss": 2.2754, "step": 15419 }, { "epoch": 0.2, "grad_norm": 4.735405921936035, "learning_rate": 1.9978969670186256e-05, "loss": 2.4379, "step": 15420 }, { "epoch": 0.2, "grad_norm": 4.288224697113037, "learning_rate": 1.9978962858374853e-05, "loss": 2.3245, "step": 15421 }, { "epoch": 0.2, "grad_norm": 4.147627830505371, "learning_rate": 1.9978956045461606e-05, "loss": 1.9475, "step": 15422 }, { "epoch": 0.2, "grad_norm": 4.315036296844482, "learning_rate": 1.997894923144651e-05, "loss": 2.2615, "step": 15423 }, { "epoch": 0.2, "grad_norm": 4.102737903594971, "learning_rate": 1.997894241632957e-05, "loss": 2.0469, "step": 15424 }, { "epoch": 0.2, "grad_norm": 4.1008806228637695, "learning_rate": 1.9978935600110783e-05, "loss": 2.0784, "step": 15425 }, { "epoch": 0.2, "grad_norm": 4.772441864013672, "learning_rate": 1.9978928782790156e-05, "loss": 1.8888, "step": 15426 }, { "epoch": 0.2, "grad_norm": 3.7435617446899414, "learning_rate": 1.9978921964367685e-05, "loss": 1.8511, "step": 15427 }, { "epoch": 0.2, "grad_norm": 4.667945384979248, "learning_rate": 1.9978915144843373e-05, "loss": 2.7287, "step": 15428 }, { "epoch": 0.2, "grad_norm": 4.225515842437744, "learning_rate": 1.997890832421722e-05, "loss": 1.9499, "step": 15429 }, { "epoch": 0.2, "grad_norm": 4.368856906890869, "learning_rate": 1.9978901502489223e-05, "loss": 2.2946, "step": 15430 }, { "epoch": 0.2, "grad_norm": 4.557089805603027, "learning_rate": 1.997889467965939e-05, "loss": 2.4633, "step": 15431 }, { "epoch": 0.2, "grad_norm": 4.295909881591797, "learning_rate": 1.997888785572771e-05, "loss": 2.1867, "step": 15432 }, { "epoch": 0.2, "grad_norm": 4.355757713317871, "learning_rate": 1.9978881030694198e-05, "loss": 1.9671, "step": 15433 }, { "epoch": 0.2, "grad_norm": 4.0405378341674805, "learning_rate": 1.9978874204558848e-05, "loss": 1.9649, "step": 15434 }, { "epoch": 0.2, "grad_norm": 4.590426921844482, "learning_rate": 1.997886737732166e-05, "loss": 1.9354, "step": 15435 }, { "epoch": 0.2, "grad_norm": 4.901127815246582, "learning_rate": 1.9978860548982636e-05, "loss": 2.2758, "step": 15436 }, { "epoch": 0.2, "grad_norm": 4.258020877838135, "learning_rate": 1.9978853719541774e-05, "loss": 2.03, "step": 15437 }, { "epoch": 0.2, "grad_norm": 4.986496925354004, "learning_rate": 1.9978846888999078e-05, "loss": 2.1769, "step": 15438 }, { "epoch": 0.2, "grad_norm": 4.0412702560424805, "learning_rate": 1.997884005735455e-05, "loss": 2.0101, "step": 15439 }, { "epoch": 0.2, "grad_norm": 4.501669883728027, "learning_rate": 1.997883322460819e-05, "loss": 2.3401, "step": 15440 }, { "epoch": 0.2, "grad_norm": 5.076687812805176, "learning_rate": 1.9978826390759995e-05, "loss": 2.8722, "step": 15441 }, { "epoch": 0.2, "grad_norm": 4.44186544418335, "learning_rate": 1.9978819555809967e-05, "loss": 2.6761, "step": 15442 }, { "epoch": 0.2, "grad_norm": 4.320607662200928, "learning_rate": 1.997881271975811e-05, "loss": 2.2435, "step": 15443 }, { "epoch": 0.2, "grad_norm": 3.6898107528686523, "learning_rate": 1.9978805882604424e-05, "loss": 1.9993, "step": 15444 }, { "epoch": 0.2, "grad_norm": 4.397338390350342, "learning_rate": 1.997879904434891e-05, "loss": 2.5419, "step": 15445 }, { "epoch": 0.2, "grad_norm": 4.494856834411621, "learning_rate": 1.9978792204991562e-05, "loss": 2.0563, "step": 15446 }, { "epoch": 0.2, "grad_norm": 4.2757887840271, "learning_rate": 1.997878536453239e-05, "loss": 2.0917, "step": 15447 }, { "epoch": 0.2, "grad_norm": 4.328336238861084, "learning_rate": 1.9978778522971388e-05, "loss": 2.208, "step": 15448 }, { "epoch": 0.2, "grad_norm": 4.459947109222412, "learning_rate": 1.997877168030856e-05, "loss": 2.2409, "step": 15449 }, { "epoch": 0.2, "grad_norm": 4.110901832580566, "learning_rate": 1.9978764836543905e-05, "loss": 2.115, "step": 15450 }, { "epoch": 0.2, "grad_norm": 4.366110324859619, "learning_rate": 1.9978757991677428e-05, "loss": 2.1525, "step": 15451 }, { "epoch": 0.2, "grad_norm": 4.380190849304199, "learning_rate": 1.9978751145709127e-05, "loss": 2.4969, "step": 15452 }, { "epoch": 0.2, "grad_norm": 4.395824909210205, "learning_rate": 1.9978744298639002e-05, "loss": 2.027, "step": 15453 }, { "epoch": 0.2, "grad_norm": 4.244774341583252, "learning_rate": 1.9978737450467053e-05, "loss": 2.6519, "step": 15454 }, { "epoch": 0.2, "grad_norm": 3.816279411315918, "learning_rate": 1.9978730601193287e-05, "loss": 2.4972, "step": 15455 }, { "epoch": 0.2, "grad_norm": 4.0569071769714355, "learning_rate": 1.9978723750817694e-05, "loss": 2.1, "step": 15456 }, { "epoch": 0.2, "grad_norm": 4.8818678855896, "learning_rate": 1.9978716899340284e-05, "loss": 2.394, "step": 15457 }, { "epoch": 0.2, "grad_norm": 4.19577693939209, "learning_rate": 1.997871004676105e-05, "loss": 2.3557, "step": 15458 }, { "epoch": 0.2, "grad_norm": 4.754899024963379, "learning_rate": 1.997870319308e-05, "loss": 2.48, "step": 15459 }, { "epoch": 0.2, "grad_norm": 3.969616413116455, "learning_rate": 1.9978696338297134e-05, "loss": 1.9083, "step": 15460 }, { "epoch": 0.2, "grad_norm": 4.011049747467041, "learning_rate": 1.997868948241245e-05, "loss": 1.8282, "step": 15461 }, { "epoch": 0.2, "grad_norm": 4.337042331695557, "learning_rate": 1.9978682625425946e-05, "loss": 1.9506, "step": 15462 }, { "epoch": 0.2, "grad_norm": 4.40885591506958, "learning_rate": 1.9978675767337627e-05, "loss": 2.4094, "step": 15463 }, { "epoch": 0.2, "grad_norm": 4.561457633972168, "learning_rate": 1.9978668908147494e-05, "loss": 2.3291, "step": 15464 }, { "epoch": 0.2, "grad_norm": 3.8296332359313965, "learning_rate": 1.9978662047855547e-05, "loss": 1.8554, "step": 15465 }, { "epoch": 0.2, "grad_norm": 4.764369964599609, "learning_rate": 1.9978655186461787e-05, "loss": 2.4939, "step": 15466 }, { "epoch": 0.2, "grad_norm": 4.249947547912598, "learning_rate": 1.9978648323966213e-05, "loss": 1.7884, "step": 15467 }, { "epoch": 0.2, "grad_norm": 4.400415897369385, "learning_rate": 1.9978641460368825e-05, "loss": 2.3296, "step": 15468 }, { "epoch": 0.2, "grad_norm": 4.066311836242676, "learning_rate": 1.997863459566963e-05, "loss": 2.1621, "step": 15469 }, { "epoch": 0.2, "grad_norm": 4.137875556945801, "learning_rate": 1.9978627729868622e-05, "loss": 2.0426, "step": 15470 }, { "epoch": 0.2, "grad_norm": 4.1803202629089355, "learning_rate": 1.9978620862965803e-05, "loss": 1.9265, "step": 15471 }, { "epoch": 0.2, "grad_norm": 4.757083892822266, "learning_rate": 1.9978613994961174e-05, "loss": 2.2347, "step": 15472 }, { "epoch": 0.2, "grad_norm": 4.51753044128418, "learning_rate": 1.9978607125854742e-05, "loss": 2.5148, "step": 15473 }, { "epoch": 0.2, "grad_norm": 4.957581520080566, "learning_rate": 1.99786002556465e-05, "loss": 2.6209, "step": 15474 }, { "epoch": 0.2, "grad_norm": 4.226772785186768, "learning_rate": 1.9978593384336453e-05, "loss": 2.4923, "step": 15475 }, { "epoch": 0.2, "grad_norm": 3.887770175933838, "learning_rate": 1.9978586511924596e-05, "loss": 2.1446, "step": 15476 }, { "epoch": 0.2, "grad_norm": 4.6572699546813965, "learning_rate": 1.9978579638410936e-05, "loss": 2.2466, "step": 15477 }, { "epoch": 0.2, "grad_norm": 4.0010833740234375, "learning_rate": 1.9978572763795472e-05, "loss": 2.4473, "step": 15478 }, { "epoch": 0.2, "grad_norm": 4.054202079772949, "learning_rate": 1.9978565888078202e-05, "loss": 1.8241, "step": 15479 }, { "epoch": 0.2, "grad_norm": 3.807842493057251, "learning_rate": 1.997855901125913e-05, "loss": 1.7945, "step": 15480 }, { "epoch": 0.2, "grad_norm": 4.951267242431641, "learning_rate": 1.9978552133338257e-05, "loss": 2.4758, "step": 15481 }, { "epoch": 0.2, "grad_norm": 4.557794094085693, "learning_rate": 1.9978545254315583e-05, "loss": 2.3353, "step": 15482 }, { "epoch": 0.2, "grad_norm": 3.94527268409729, "learning_rate": 1.9978538374191108e-05, "loss": 2.1629, "step": 15483 }, { "epoch": 0.2, "grad_norm": 4.345099925994873, "learning_rate": 1.997853149296483e-05, "loss": 2.2746, "step": 15484 }, { "epoch": 0.2, "grad_norm": 4.87848424911499, "learning_rate": 1.9978524610636756e-05, "loss": 2.4269, "step": 15485 }, { "epoch": 0.2, "grad_norm": 4.410762786865234, "learning_rate": 1.9978517727206884e-05, "loss": 2.2233, "step": 15486 }, { "epoch": 0.2, "grad_norm": 5.082663059234619, "learning_rate": 1.9978510842675213e-05, "loss": 2.3716, "step": 15487 }, { "epoch": 0.2, "grad_norm": 4.319828987121582, "learning_rate": 1.9978503957041744e-05, "loss": 2.3155, "step": 15488 }, { "epoch": 0.2, "grad_norm": 4.40386962890625, "learning_rate": 1.997849707030648e-05, "loss": 1.9834, "step": 15489 }, { "epoch": 0.2, "grad_norm": 4.398983478546143, "learning_rate": 1.9978490182469418e-05, "loss": 1.9813, "step": 15490 }, { "epoch": 0.2, "grad_norm": 4.229344367980957, "learning_rate": 1.9978483293530566e-05, "loss": 1.9765, "step": 15491 }, { "epoch": 0.2, "grad_norm": 4.549172401428223, "learning_rate": 1.997847640348992e-05, "loss": 2.4032, "step": 15492 }, { "epoch": 0.2, "grad_norm": 4.2725138664245605, "learning_rate": 1.997846951234748e-05, "loss": 2.0673, "step": 15493 }, { "epoch": 0.2, "grad_norm": 3.94911789894104, "learning_rate": 1.9978462620103244e-05, "loss": 2.2324, "step": 15494 }, { "epoch": 0.2, "grad_norm": 4.666902542114258, "learning_rate": 1.997845572675722e-05, "loss": 2.5328, "step": 15495 }, { "epoch": 0.2, "grad_norm": 4.45157527923584, "learning_rate": 1.9978448832309403e-05, "loss": 2.2232, "step": 15496 }, { "epoch": 0.2, "grad_norm": 4.047799587249756, "learning_rate": 1.99784419367598e-05, "loss": 1.765, "step": 15497 }, { "epoch": 0.2, "grad_norm": 4.3613810539245605, "learning_rate": 1.9978435040108403e-05, "loss": 2.45, "step": 15498 }, { "epoch": 0.2, "grad_norm": 5.084423542022705, "learning_rate": 1.9978428142355222e-05, "loss": 2.1899, "step": 15499 }, { "epoch": 0.2, "grad_norm": 5.39471435546875, "learning_rate": 1.997842124350025e-05, "loss": 2.7437, "step": 15500 }, { "epoch": 0.2, "grad_norm": 4.49472713470459, "learning_rate": 1.9978414343543492e-05, "loss": 2.3738, "step": 15501 }, { "epoch": 0.2, "grad_norm": 4.459755897521973, "learning_rate": 1.9978407442484947e-05, "loss": 2.4338, "step": 15502 }, { "epoch": 0.2, "grad_norm": 4.4059553146362305, "learning_rate": 1.997840054032462e-05, "loss": 1.8431, "step": 15503 }, { "epoch": 0.2, "grad_norm": 4.105100154876709, "learning_rate": 1.99783936370625e-05, "loss": 2.1316, "step": 15504 }, { "epoch": 0.2, "grad_norm": 4.7703142166137695, "learning_rate": 1.9978386732698604e-05, "loss": 2.3074, "step": 15505 }, { "epoch": 0.2, "grad_norm": 4.716729640960693, "learning_rate": 1.9978379827232923e-05, "loss": 2.3081, "step": 15506 }, { "epoch": 0.2, "grad_norm": 3.8984856605529785, "learning_rate": 1.997837292066546e-05, "loss": 1.8839, "step": 15507 }, { "epoch": 0.2, "grad_norm": 4.714389801025391, "learning_rate": 1.997836601299621e-05, "loss": 1.9503, "step": 15508 }, { "epoch": 0.2, "grad_norm": 4.974388122558594, "learning_rate": 1.9978359104225188e-05, "loss": 1.9681, "step": 15509 }, { "epoch": 0.2, "grad_norm": 4.251732349395752, "learning_rate": 1.997835219435238e-05, "loss": 1.925, "step": 15510 }, { "epoch": 0.2, "grad_norm": 4.246705532073975, "learning_rate": 1.997834528337779e-05, "loss": 2.0258, "step": 15511 }, { "epoch": 0.2, "grad_norm": 4.290140151977539, "learning_rate": 1.9978338371301428e-05, "loss": 2.3454, "step": 15512 }, { "epoch": 0.2, "grad_norm": 4.019914150238037, "learning_rate": 1.9978331458123285e-05, "loss": 2.3637, "step": 15513 }, { "epoch": 0.2, "grad_norm": 4.4329729080200195, "learning_rate": 1.9978324543843366e-05, "loss": 1.8416, "step": 15514 }, { "epoch": 0.2, "grad_norm": 3.9109556674957275, "learning_rate": 1.997831762846167e-05, "loss": 1.8417, "step": 15515 }, { "epoch": 0.2, "grad_norm": 4.426055908203125, "learning_rate": 1.9978310711978198e-05, "loss": 2.4381, "step": 15516 }, { "epoch": 0.2, "grad_norm": 4.708679676055908, "learning_rate": 1.9978303794392953e-05, "loss": 2.6256, "step": 15517 }, { "epoch": 0.2, "grad_norm": 4.83919620513916, "learning_rate": 1.997829687570593e-05, "loss": 2.5025, "step": 15518 }, { "epoch": 0.2, "grad_norm": 4.216188907623291, "learning_rate": 1.997828995591714e-05, "loss": 1.9352, "step": 15519 }, { "epoch": 0.2, "grad_norm": 4.152790069580078, "learning_rate": 1.9978283035026572e-05, "loss": 2.3384, "step": 15520 }, { "epoch": 0.2, "grad_norm": 4.520959377288818, "learning_rate": 1.9978276113034235e-05, "loss": 2.0393, "step": 15521 }, { "epoch": 0.2, "grad_norm": 4.516191482543945, "learning_rate": 1.9978269189940125e-05, "loss": 2.3282, "step": 15522 }, { "epoch": 0.2, "grad_norm": 4.183093070983887, "learning_rate": 1.9978262265744245e-05, "loss": 2.0258, "step": 15523 }, { "epoch": 0.2, "grad_norm": 4.196780204772949, "learning_rate": 1.99782553404466e-05, "loss": 2.1609, "step": 15524 }, { "epoch": 0.2, "grad_norm": 4.006222724914551, "learning_rate": 1.997824841404718e-05, "loss": 2.2375, "step": 15525 }, { "epoch": 0.2, "grad_norm": 4.92562198638916, "learning_rate": 1.9978241486545994e-05, "loss": 2.3756, "step": 15526 }, { "epoch": 0.2, "grad_norm": 4.405766487121582, "learning_rate": 1.9978234557943043e-05, "loss": 2.1403, "step": 15527 }, { "epoch": 0.2, "grad_norm": 4.256051540374756, "learning_rate": 1.9978227628238325e-05, "loss": 2.42, "step": 15528 }, { "epoch": 0.2, "grad_norm": 4.561025142669678, "learning_rate": 1.997822069743184e-05, "loss": 2.4957, "step": 15529 }, { "epoch": 0.2, "grad_norm": 4.546108722686768, "learning_rate": 1.9978213765523594e-05, "loss": 1.9926, "step": 15530 }, { "epoch": 0.2, "grad_norm": 4.345462799072266, "learning_rate": 1.9978206832513577e-05, "loss": 1.9395, "step": 15531 }, { "epoch": 0.2, "grad_norm": 3.7474632263183594, "learning_rate": 1.9978199898401804e-05, "loss": 1.6411, "step": 15532 }, { "epoch": 0.2, "grad_norm": 4.257511138916016, "learning_rate": 1.9978192963188262e-05, "loss": 1.8761, "step": 15533 }, { "epoch": 0.2, "grad_norm": 3.8208658695220947, "learning_rate": 1.9978186026872964e-05, "loss": 2.0182, "step": 15534 }, { "epoch": 0.2, "grad_norm": 4.56786584854126, "learning_rate": 1.99781790894559e-05, "loss": 2.3986, "step": 15535 }, { "epoch": 0.2, "grad_norm": 4.179819583892822, "learning_rate": 1.9978172150937078e-05, "loss": 2.2896, "step": 15536 }, { "epoch": 0.2, "grad_norm": 4.167489051818848, "learning_rate": 1.9978165211316498e-05, "loss": 2.2077, "step": 15537 }, { "epoch": 0.2, "grad_norm": 4.543004512786865, "learning_rate": 1.9978158270594158e-05, "loss": 1.9945, "step": 15538 }, { "epoch": 0.2, "grad_norm": 4.438534259796143, "learning_rate": 1.9978151328770062e-05, "loss": 2.4737, "step": 15539 }, { "epoch": 0.2, "grad_norm": 4.276235580444336, "learning_rate": 1.9978144385844204e-05, "loss": 2.1899, "step": 15540 }, { "epoch": 0.2, "grad_norm": 4.720442295074463, "learning_rate": 1.9978137441816596e-05, "loss": 2.1215, "step": 15541 }, { "epoch": 0.2, "grad_norm": 4.351494312286377, "learning_rate": 1.9978130496687225e-05, "loss": 2.4973, "step": 15542 }, { "epoch": 0.2, "grad_norm": 4.41424036026001, "learning_rate": 1.9978123550456105e-05, "loss": 2.2709, "step": 15543 }, { "epoch": 0.2, "grad_norm": 4.109429836273193, "learning_rate": 1.997811660312323e-05, "loss": 2.0208, "step": 15544 }, { "epoch": 0.2, "grad_norm": 4.759026050567627, "learning_rate": 1.9978109654688596e-05, "loss": 2.5418, "step": 15545 }, { "epoch": 0.2, "grad_norm": 4.269306182861328, "learning_rate": 1.9978102705152215e-05, "loss": 2.396, "step": 15546 }, { "epoch": 0.2, "grad_norm": 4.659605503082275, "learning_rate": 1.997809575451408e-05, "loss": 2.1707, "step": 15547 }, { "epoch": 0.2, "grad_norm": 4.277069568634033, "learning_rate": 1.9978088802774197e-05, "loss": 2.3233, "step": 15548 }, { "epoch": 0.2, "grad_norm": 4.399735450744629, "learning_rate": 1.997808184993256e-05, "loss": 1.976, "step": 15549 }, { "epoch": 0.2, "grad_norm": 4.191700458526611, "learning_rate": 1.9978074895989175e-05, "loss": 2.3194, "step": 15550 }, { "epoch": 0.2, "grad_norm": 4.115909576416016, "learning_rate": 1.997806794094404e-05, "loss": 2.3123, "step": 15551 }, { "epoch": 0.2, "grad_norm": 4.118836879730225, "learning_rate": 1.997806098479716e-05, "loss": 1.8216, "step": 15552 }, { "epoch": 0.2, "grad_norm": 5.01195764541626, "learning_rate": 1.997805402754853e-05, "loss": 2.9693, "step": 15553 }, { "epoch": 0.2, "grad_norm": 3.9563851356506348, "learning_rate": 1.9978047069198153e-05, "loss": 2.0599, "step": 15554 }, { "epoch": 0.2, "grad_norm": 3.9992380142211914, "learning_rate": 1.9978040109746034e-05, "loss": 1.7765, "step": 15555 }, { "epoch": 0.2, "grad_norm": 3.761711835861206, "learning_rate": 1.997803314919217e-05, "loss": 1.8315, "step": 15556 }, { "epoch": 0.2, "grad_norm": 4.127561092376709, "learning_rate": 1.9978026187536556e-05, "loss": 2.2779, "step": 15557 }, { "epoch": 0.2, "grad_norm": 4.275899887084961, "learning_rate": 1.9978019224779203e-05, "loss": 2.145, "step": 15558 }, { "epoch": 0.2, "grad_norm": 4.8771796226501465, "learning_rate": 1.9978012260920107e-05, "loss": 2.3963, "step": 15559 }, { "epoch": 0.2, "grad_norm": 4.130923271179199, "learning_rate": 1.997800529595927e-05, "loss": 1.9721, "step": 15560 }, { "epoch": 0.2, "grad_norm": 5.141072750091553, "learning_rate": 1.9977998329896692e-05, "loss": 2.7199, "step": 15561 }, { "epoch": 0.2, "grad_norm": 3.7785611152648926, "learning_rate": 1.997799136273237e-05, "loss": 1.5597, "step": 15562 }, { "epoch": 0.2, "grad_norm": 3.882521629333496, "learning_rate": 1.9977984394466314e-05, "loss": 2.1569, "step": 15563 }, { "epoch": 0.2, "grad_norm": 4.282042026519775, "learning_rate": 1.9977977425098512e-05, "loss": 2.6672, "step": 15564 }, { "epoch": 0.2, "grad_norm": 4.5111470222473145, "learning_rate": 1.997797045462898e-05, "loss": 3.0041, "step": 15565 }, { "epoch": 0.2, "grad_norm": 4.378716945648193, "learning_rate": 1.997796348305771e-05, "loss": 1.9831, "step": 15566 }, { "epoch": 0.2, "grad_norm": 4.746182441711426, "learning_rate": 1.9977956510384694e-05, "loss": 2.5404, "step": 15567 }, { "epoch": 0.2, "grad_norm": 4.5029120445251465, "learning_rate": 1.997794953660995e-05, "loss": 2.4604, "step": 15568 }, { "epoch": 0.2, "grad_norm": 4.2271342277526855, "learning_rate": 1.9977942561733472e-05, "loss": 2.2947, "step": 15569 }, { "epoch": 0.2, "grad_norm": 4.075370788574219, "learning_rate": 1.9977935585755256e-05, "loss": 1.9782, "step": 15570 }, { "epoch": 0.2, "grad_norm": 4.511455535888672, "learning_rate": 1.997792860867531e-05, "loss": 1.9895, "step": 15571 }, { "epoch": 0.2, "grad_norm": 4.081428527832031, "learning_rate": 1.997792163049363e-05, "loss": 1.8092, "step": 15572 }, { "epoch": 0.2, "grad_norm": 5.0035271644592285, "learning_rate": 1.9977914651210218e-05, "loss": 2.1521, "step": 15573 }, { "epoch": 0.2, "grad_norm": 4.835687637329102, "learning_rate": 1.9977907670825076e-05, "loss": 2.457, "step": 15574 }, { "epoch": 0.2, "grad_norm": 4.157716751098633, "learning_rate": 1.99779006893382e-05, "loss": 2.4609, "step": 15575 }, { "epoch": 0.2, "grad_norm": 4.451684951782227, "learning_rate": 1.9977893706749602e-05, "loss": 2.0981, "step": 15576 }, { "epoch": 0.2, "grad_norm": 3.733830690383911, "learning_rate": 1.997788672305927e-05, "loss": 1.6149, "step": 15577 }, { "epoch": 0.2, "grad_norm": 4.199152946472168, "learning_rate": 1.9977879738267208e-05, "loss": 2.4425, "step": 15578 }, { "epoch": 0.2, "grad_norm": 3.86612606048584, "learning_rate": 1.997787275237342e-05, "loss": 2.031, "step": 15579 }, { "epoch": 0.2, "grad_norm": 4.597973823547363, "learning_rate": 1.9977865765377912e-05, "loss": 2.2168, "step": 15580 }, { "epoch": 0.2, "grad_norm": 5.133955955505371, "learning_rate": 1.997785877728067e-05, "loss": 2.503, "step": 15581 }, { "epoch": 0.2, "grad_norm": 4.921510696411133, "learning_rate": 1.9977851788081707e-05, "loss": 2.2862, "step": 15582 }, { "epoch": 0.2, "grad_norm": 4.123103141784668, "learning_rate": 1.997784479778102e-05, "loss": 2.5613, "step": 15583 }, { "epoch": 0.2, "grad_norm": 4.051821708679199, "learning_rate": 1.997783780637861e-05, "loss": 2.1656, "step": 15584 }, { "epoch": 0.2, "grad_norm": 4.40843391418457, "learning_rate": 1.9977830813874476e-05, "loss": 2.007, "step": 15585 }, { "epoch": 0.2, "grad_norm": 4.94284725189209, "learning_rate": 1.997782382026862e-05, "loss": 3.032, "step": 15586 }, { "epoch": 0.2, "grad_norm": 4.72102165222168, "learning_rate": 1.9977816825561046e-05, "loss": 2.4531, "step": 15587 }, { "epoch": 0.2, "grad_norm": 4.530117988586426, "learning_rate": 1.997780982975175e-05, "loss": 1.9836, "step": 15588 }, { "epoch": 0.2, "grad_norm": 4.66946268081665, "learning_rate": 1.997780283284073e-05, "loss": 2.2935, "step": 15589 }, { "epoch": 0.2, "grad_norm": 4.852494716644287, "learning_rate": 1.9977795834827995e-05, "loss": 2.3413, "step": 15590 }, { "epoch": 0.2, "grad_norm": 4.475719451904297, "learning_rate": 1.9977788835713542e-05, "loss": 2.5898, "step": 15591 }, { "epoch": 0.2, "grad_norm": 4.470993995666504, "learning_rate": 1.9977781835497373e-05, "loss": 2.5037, "step": 15592 }, { "epoch": 0.2, "grad_norm": 3.588801622390747, "learning_rate": 1.9977774834179486e-05, "loss": 1.7476, "step": 15593 }, { "epoch": 0.2, "grad_norm": 4.492420196533203, "learning_rate": 1.9977767831759886e-05, "loss": 2.4357, "step": 15594 }, { "epoch": 0.2, "grad_norm": 4.812926292419434, "learning_rate": 1.9977760828238568e-05, "loss": 2.5311, "step": 15595 }, { "epoch": 0.2, "grad_norm": 4.21267557144165, "learning_rate": 1.9977753823615537e-05, "loss": 2.0429, "step": 15596 }, { "epoch": 0.2, "grad_norm": 4.2332234382629395, "learning_rate": 1.997774681789079e-05, "loss": 2.588, "step": 15597 }, { "epoch": 0.2, "grad_norm": 4.251152038574219, "learning_rate": 1.9977739811064337e-05, "loss": 2.4565, "step": 15598 }, { "epoch": 0.2, "grad_norm": 4.160054683685303, "learning_rate": 1.997773280313617e-05, "loss": 2.2046, "step": 15599 }, { "epoch": 0.2, "grad_norm": 5.360334396362305, "learning_rate": 1.997772579410629e-05, "loss": 2.6003, "step": 15600 }, { "epoch": 0.2, "grad_norm": 5.17326021194458, "learning_rate": 1.99777187839747e-05, "loss": 2.7211, "step": 15601 }, { "epoch": 0.2, "grad_norm": 4.748886585235596, "learning_rate": 1.9977711772741402e-05, "loss": 2.1633, "step": 15602 }, { "epoch": 0.2, "grad_norm": 4.234918117523193, "learning_rate": 1.9977704760406394e-05, "loss": 1.9422, "step": 15603 }, { "epoch": 0.2, "grad_norm": 4.083731651306152, "learning_rate": 1.9977697746969678e-05, "loss": 2.5034, "step": 15604 }, { "epoch": 0.2, "grad_norm": 3.8231992721557617, "learning_rate": 1.9977690732431252e-05, "loss": 1.5922, "step": 15605 }, { "epoch": 0.2, "grad_norm": 4.955036640167236, "learning_rate": 1.9977683716791125e-05, "loss": 2.6906, "step": 15606 }, { "epoch": 0.2, "grad_norm": 3.9730238914489746, "learning_rate": 1.997767670004929e-05, "loss": 2.1063, "step": 15607 }, { "epoch": 0.2, "grad_norm": 4.677797317504883, "learning_rate": 1.9977669682205752e-05, "loss": 2.4853, "step": 15608 }, { "epoch": 0.2, "grad_norm": 4.505822658538818, "learning_rate": 1.997766266326051e-05, "loss": 2.1253, "step": 15609 }, { "epoch": 0.2, "grad_norm": 4.411340713500977, "learning_rate": 1.997765564321356e-05, "loss": 2.0094, "step": 15610 }, { "epoch": 0.2, "grad_norm": 4.481847763061523, "learning_rate": 1.9977648622064914e-05, "loss": 2.4252, "step": 15611 }, { "epoch": 0.2, "grad_norm": 4.681706428527832, "learning_rate": 1.9977641599814563e-05, "loss": 2.4032, "step": 15612 }, { "epoch": 0.2, "grad_norm": 4.704679489135742, "learning_rate": 1.997763457646251e-05, "loss": 2.4323, "step": 15613 }, { "epoch": 0.2, "grad_norm": 3.9616787433624268, "learning_rate": 1.997762755200876e-05, "loss": 1.845, "step": 15614 }, { "epoch": 0.2, "grad_norm": 4.197118282318115, "learning_rate": 1.9977620526453306e-05, "loss": 2.0911, "step": 15615 }, { "epoch": 0.2, "grad_norm": 4.0637526512146, "learning_rate": 1.9977613499796157e-05, "loss": 1.9852, "step": 15616 }, { "epoch": 0.2, "grad_norm": 4.24874210357666, "learning_rate": 1.9977606472037312e-05, "loss": 1.8748, "step": 15617 }, { "epoch": 0.2, "grad_norm": 4.862092971801758, "learning_rate": 1.9977599443176766e-05, "loss": 2.7697, "step": 15618 }, { "epoch": 0.2, "grad_norm": 4.2416911125183105, "learning_rate": 1.9977592413214525e-05, "loss": 1.886, "step": 15619 }, { "epoch": 0.2, "grad_norm": 4.683785915374756, "learning_rate": 1.9977585382150592e-05, "loss": 2.1903, "step": 15620 }, { "epoch": 0.2, "grad_norm": 4.252310752868652, "learning_rate": 1.9977578349984957e-05, "loss": 2.0235, "step": 15621 }, { "epoch": 0.2, "grad_norm": 3.86232328414917, "learning_rate": 1.9977571316717634e-05, "loss": 1.9409, "step": 15622 }, { "epoch": 0.2, "grad_norm": 4.065740585327148, "learning_rate": 1.9977564282348612e-05, "loss": 2.1201, "step": 15623 }, { "epoch": 0.2, "grad_norm": 5.223880290985107, "learning_rate": 1.9977557246877906e-05, "loss": 2.6434, "step": 15624 }, { "epoch": 0.2, "grad_norm": 3.8286073207855225, "learning_rate": 1.9977550210305504e-05, "loss": 1.9368, "step": 15625 }, { "epoch": 0.2, "grad_norm": 4.651027202606201, "learning_rate": 1.997754317263141e-05, "loss": 2.1061, "step": 15626 }, { "epoch": 0.2, "grad_norm": 4.308676242828369, "learning_rate": 1.997753613385563e-05, "loss": 2.3751, "step": 15627 }, { "epoch": 0.2, "grad_norm": 4.56522798538208, "learning_rate": 1.9977529093978155e-05, "loss": 2.2689, "step": 15628 }, { "epoch": 0.2, "grad_norm": 5.138084411621094, "learning_rate": 1.9977522052998994e-05, "loss": 2.4384, "step": 15629 }, { "epoch": 0.2, "grad_norm": 3.8962535858154297, "learning_rate": 1.9977515010918147e-05, "loss": 1.7009, "step": 15630 }, { "epoch": 0.2, "grad_norm": 4.549780368804932, "learning_rate": 1.997750796773561e-05, "loss": 2.1574, "step": 15631 }, { "epoch": 0.2, "grad_norm": 3.777376174926758, "learning_rate": 1.997750092345139e-05, "loss": 1.8987, "step": 15632 }, { "epoch": 0.2, "grad_norm": 3.861215114593506, "learning_rate": 1.9977493878065484e-05, "loss": 1.6035, "step": 15633 }, { "epoch": 0.2, "grad_norm": 4.937371730804443, "learning_rate": 1.9977486831577894e-05, "loss": 2.5219, "step": 15634 }, { "epoch": 0.2, "grad_norm": 4.774874687194824, "learning_rate": 1.9977479783988616e-05, "loss": 1.991, "step": 15635 }, { "epoch": 0.2, "grad_norm": 4.691425323486328, "learning_rate": 1.997747273529766e-05, "loss": 2.232, "step": 15636 }, { "epoch": 0.2, "grad_norm": 4.92581844329834, "learning_rate": 1.997746568550502e-05, "loss": 2.8281, "step": 15637 }, { "epoch": 0.2, "grad_norm": 3.8425464630126953, "learning_rate": 1.9977458634610696e-05, "loss": 2.0803, "step": 15638 }, { "epoch": 0.2, "grad_norm": 5.2129106521606445, "learning_rate": 1.9977451582614695e-05, "loss": 2.1916, "step": 15639 }, { "epoch": 0.2, "grad_norm": 3.6573221683502197, "learning_rate": 1.9977444529517012e-05, "loss": 1.8804, "step": 15640 }, { "epoch": 0.2, "grad_norm": 4.261052131652832, "learning_rate": 1.997743747531765e-05, "loss": 1.7682, "step": 15641 }, { "epoch": 0.2, "grad_norm": 4.521426677703857, "learning_rate": 1.997743042001661e-05, "loss": 2.3143, "step": 15642 }, { "epoch": 0.2, "grad_norm": 4.246732711791992, "learning_rate": 1.9977423363613893e-05, "loss": 2.2591, "step": 15643 }, { "epoch": 0.2, "grad_norm": 6.759105682373047, "learning_rate": 1.9977416306109498e-05, "loss": 2.4155, "step": 15644 }, { "epoch": 0.2, "grad_norm": 4.150235652923584, "learning_rate": 1.9977409247503424e-05, "loss": 2.1333, "step": 15645 }, { "epoch": 0.2, "grad_norm": 4.772291660308838, "learning_rate": 1.9977402187795683e-05, "loss": 2.5645, "step": 15646 }, { "epoch": 0.2, "grad_norm": 4.108341217041016, "learning_rate": 1.997739512698626e-05, "loss": 2.1929, "step": 15647 }, { "epoch": 0.2, "grad_norm": 4.411865234375, "learning_rate": 1.9977388065075166e-05, "loss": 2.2488, "step": 15648 }, { "epoch": 0.2, "grad_norm": 4.817422866821289, "learning_rate": 1.99773810020624e-05, "loss": 2.3813, "step": 15649 }, { "epoch": 0.2, "grad_norm": 4.425929069519043, "learning_rate": 1.997737393794796e-05, "loss": 2.3876, "step": 15650 }, { "epoch": 0.2, "grad_norm": 4.83490514755249, "learning_rate": 1.9977366872731852e-05, "loss": 2.5093, "step": 15651 }, { "epoch": 0.2, "grad_norm": 4.2709736824035645, "learning_rate": 1.997735980641407e-05, "loss": 2.1353, "step": 15652 }, { "epoch": 0.2, "grad_norm": 4.553196430206299, "learning_rate": 1.997735273899462e-05, "loss": 2.2367, "step": 15653 }, { "epoch": 0.2, "grad_norm": 4.471006870269775, "learning_rate": 1.99773456704735e-05, "loss": 2.1735, "step": 15654 }, { "epoch": 0.2, "grad_norm": 4.5639495849609375, "learning_rate": 1.9977338600850712e-05, "loss": 1.9796, "step": 15655 }, { "epoch": 0.2, "grad_norm": 4.298710346221924, "learning_rate": 1.9977331530126257e-05, "loss": 2.2384, "step": 15656 }, { "epoch": 0.2, "grad_norm": 4.645875453948975, "learning_rate": 1.9977324458300137e-05, "loss": 2.2373, "step": 15657 }, { "epoch": 0.2, "grad_norm": 3.8991756439208984, "learning_rate": 1.9977317385372346e-05, "loss": 1.7679, "step": 15658 }, { "epoch": 0.2, "grad_norm": 4.458983898162842, "learning_rate": 1.9977310311342894e-05, "loss": 2.1325, "step": 15659 }, { "epoch": 0.2, "grad_norm": 4.752949237823486, "learning_rate": 1.9977303236211777e-05, "loss": 2.0596, "step": 15660 }, { "epoch": 0.2, "grad_norm": 4.431856632232666, "learning_rate": 1.9977296159978996e-05, "loss": 2.0971, "step": 15661 }, { "epoch": 0.2, "grad_norm": 4.244964599609375, "learning_rate": 1.9977289082644553e-05, "loss": 2.3895, "step": 15662 }, { "epoch": 0.2, "grad_norm": 3.978606700897217, "learning_rate": 1.9977282004208446e-05, "loss": 2.0672, "step": 15663 }, { "epoch": 0.2, "grad_norm": 4.423789978027344, "learning_rate": 1.997727492467068e-05, "loss": 2.3868, "step": 15664 }, { "epoch": 0.2, "grad_norm": 4.378438472747803, "learning_rate": 1.997726784403125e-05, "loss": 2.4196, "step": 15665 }, { "epoch": 0.2, "grad_norm": 5.102541923522949, "learning_rate": 1.9977260762290164e-05, "loss": 2.7396, "step": 15666 }, { "epoch": 0.2, "grad_norm": 4.051455020904541, "learning_rate": 1.997725367944742e-05, "loss": 1.81, "step": 15667 }, { "epoch": 0.2, "grad_norm": 4.524204730987549, "learning_rate": 1.9977246595503015e-05, "loss": 2.1058, "step": 15668 }, { "epoch": 0.2, "grad_norm": 4.198702335357666, "learning_rate": 1.9977239510456954e-05, "loss": 1.9883, "step": 15669 }, { "epoch": 0.2, "grad_norm": 4.830541133880615, "learning_rate": 1.9977232424309235e-05, "loss": 2.8133, "step": 15670 }, { "epoch": 0.2, "grad_norm": 4.212435722351074, "learning_rate": 1.9977225337059862e-05, "loss": 1.9896, "step": 15671 }, { "epoch": 0.2, "grad_norm": 4.380545139312744, "learning_rate": 1.9977218248708838e-05, "loss": 2.1759, "step": 15672 }, { "epoch": 0.2, "grad_norm": 4.209687232971191, "learning_rate": 1.9977211159256152e-05, "loss": 2.3978, "step": 15673 }, { "epoch": 0.2, "grad_norm": 4.428238868713379, "learning_rate": 1.997720406870182e-05, "loss": 2.5187, "step": 15674 }, { "epoch": 0.2, "grad_norm": 4.464115619659424, "learning_rate": 1.9977196977045828e-05, "loss": 1.9155, "step": 15675 }, { "epoch": 0.2, "grad_norm": 3.9736337661743164, "learning_rate": 1.997718988428819e-05, "loss": 2.2634, "step": 15676 }, { "epoch": 0.2, "grad_norm": 4.713077068328857, "learning_rate": 1.9977182790428896e-05, "loss": 2.1614, "step": 15677 }, { "epoch": 0.2, "grad_norm": 4.26299524307251, "learning_rate": 1.9977175695467956e-05, "loss": 2.2772, "step": 15678 }, { "epoch": 0.2, "grad_norm": 4.38852596282959, "learning_rate": 1.9977168599405364e-05, "loss": 2.529, "step": 15679 }, { "epoch": 0.2, "grad_norm": 4.00416374206543, "learning_rate": 1.9977161502241124e-05, "loss": 2.2532, "step": 15680 }, { "epoch": 0.2, "grad_norm": 3.8532137870788574, "learning_rate": 1.9977154403975237e-05, "loss": 1.9707, "step": 15681 }, { "epoch": 0.2, "grad_norm": 4.749820709228516, "learning_rate": 1.9977147304607702e-05, "loss": 2.3876, "step": 15682 }, { "epoch": 0.2, "grad_norm": 4.337556838989258, "learning_rate": 1.9977140204138523e-05, "loss": 2.395, "step": 15683 }, { "epoch": 0.2, "grad_norm": 4.86665153503418, "learning_rate": 1.9977133102567693e-05, "loss": 2.3473, "step": 15684 }, { "epoch": 0.2, "grad_norm": 3.8700456619262695, "learning_rate": 1.9977125999895225e-05, "loss": 1.8294, "step": 15685 }, { "epoch": 0.2, "grad_norm": 4.810683727264404, "learning_rate": 1.997711889612111e-05, "loss": 2.6692, "step": 15686 }, { "epoch": 0.2, "grad_norm": 4.403834342956543, "learning_rate": 1.997711179124535e-05, "loss": 2.3507, "step": 15687 }, { "epoch": 0.2, "grad_norm": 4.306574821472168, "learning_rate": 1.997710468526795e-05, "loss": 2.092, "step": 15688 }, { "epoch": 0.2, "grad_norm": 4.148085117340088, "learning_rate": 1.997709757818891e-05, "loss": 2.3137, "step": 15689 }, { "epoch": 0.2, "grad_norm": 4.1985321044921875, "learning_rate": 1.9977090470008223e-05, "loss": 2.1568, "step": 15690 }, { "epoch": 0.2, "grad_norm": 5.428717136383057, "learning_rate": 1.9977083360725903e-05, "loss": 2.5171, "step": 15691 }, { "epoch": 0.2, "grad_norm": 4.416327953338623, "learning_rate": 1.997707625034194e-05, "loss": 1.9873, "step": 15692 }, { "epoch": 0.2, "grad_norm": 4.464540958404541, "learning_rate": 1.9977069138856334e-05, "loss": 2.2161, "step": 15693 }, { "epoch": 0.2, "grad_norm": 4.405782699584961, "learning_rate": 1.99770620262691e-05, "loss": 1.9679, "step": 15694 }, { "epoch": 0.2, "grad_norm": 4.617864608764648, "learning_rate": 1.9977054912580222e-05, "loss": 2.3531, "step": 15695 }, { "epoch": 0.2, "grad_norm": 4.305485725402832, "learning_rate": 1.997704779778971e-05, "loss": 2.4526, "step": 15696 }, { "epoch": 0.2, "grad_norm": 4.0746870040893555, "learning_rate": 1.9977040681897564e-05, "loss": 2.1587, "step": 15697 }, { "epoch": 0.2, "grad_norm": 4.0633344650268555, "learning_rate": 1.997703356490378e-05, "loss": 2.4087, "step": 15698 }, { "epoch": 0.2, "grad_norm": 3.9478139877319336, "learning_rate": 1.997702644680837e-05, "loss": 1.7946, "step": 15699 }, { "epoch": 0.2, "grad_norm": 4.185517311096191, "learning_rate": 1.9977019327611318e-05, "loss": 2.4334, "step": 15700 }, { "epoch": 0.2, "grad_norm": 4.8243865966796875, "learning_rate": 1.9977012207312637e-05, "loss": 2.8756, "step": 15701 }, { "epoch": 0.2, "grad_norm": 3.8708178997039795, "learning_rate": 1.9977005085912325e-05, "loss": 1.981, "step": 15702 }, { "epoch": 0.2, "grad_norm": 4.523853778839111, "learning_rate": 1.9976997963410382e-05, "loss": 2.5264, "step": 15703 }, { "epoch": 0.2, "grad_norm": 4.657924652099609, "learning_rate": 1.997699083980681e-05, "loss": 2.3987, "step": 15704 }, { "epoch": 0.2, "grad_norm": 3.9466607570648193, "learning_rate": 1.9976983715101608e-05, "loss": 1.9717, "step": 15705 }, { "epoch": 0.2, "grad_norm": 4.759490966796875, "learning_rate": 1.997697658929478e-05, "loss": 2.7308, "step": 15706 }, { "epoch": 0.2, "grad_norm": 4.315077781677246, "learning_rate": 1.9976969462386318e-05, "loss": 2.2371, "step": 15707 }, { "epoch": 0.2, "grad_norm": 4.1957573890686035, "learning_rate": 1.9976962334376235e-05, "loss": 1.8097, "step": 15708 }, { "epoch": 0.2, "grad_norm": 4.241976261138916, "learning_rate": 1.9976955205264525e-05, "loss": 2.0042, "step": 15709 }, { "epoch": 0.2, "grad_norm": 3.592970371246338, "learning_rate": 1.997694807505119e-05, "loss": 1.9057, "step": 15710 }, { "epoch": 0.2, "grad_norm": 4.647404193878174, "learning_rate": 1.997694094373623e-05, "loss": 2.3438, "step": 15711 }, { "epoch": 0.2, "grad_norm": 4.441201210021973, "learning_rate": 1.9976933811319648e-05, "loss": 1.8643, "step": 15712 }, { "epoch": 0.2, "grad_norm": 5.318508625030518, "learning_rate": 1.9976926677801446e-05, "loss": 2.518, "step": 15713 }, { "epoch": 0.2, "grad_norm": 4.804535865783691, "learning_rate": 1.9976919543181616e-05, "loss": 2.6397, "step": 15714 }, { "epoch": 0.2, "grad_norm": 4.275427341461182, "learning_rate": 1.9976912407460166e-05, "loss": 2.2653, "step": 15715 }, { "epoch": 0.2, "grad_norm": 4.302213191986084, "learning_rate": 1.99769052706371e-05, "loss": 2.0366, "step": 15716 }, { "epoch": 0.2, "grad_norm": 4.4163618087768555, "learning_rate": 1.997689813271241e-05, "loss": 2.0437, "step": 15717 }, { "epoch": 0.2, "grad_norm": 4.2300214767456055, "learning_rate": 1.9976890993686102e-05, "loss": 2.2735, "step": 15718 }, { "epoch": 0.2, "grad_norm": 4.689201354980469, "learning_rate": 1.9976883853558176e-05, "loss": 3.0688, "step": 15719 }, { "epoch": 0.2, "grad_norm": 4.572388648986816, "learning_rate": 1.9976876712328633e-05, "loss": 2.169, "step": 15720 }, { "epoch": 0.2, "grad_norm": 4.510225772857666, "learning_rate": 1.9976869569997473e-05, "loss": 2.2017, "step": 15721 }, { "epoch": 0.2, "grad_norm": 4.483590602874756, "learning_rate": 1.99768624265647e-05, "loss": 2.25, "step": 15722 }, { "epoch": 0.2, "grad_norm": 4.182180404663086, "learning_rate": 1.9976855282030308e-05, "loss": 2.5384, "step": 15723 }, { "epoch": 0.2, "grad_norm": 3.8817458152770996, "learning_rate": 1.9976848136394303e-05, "loss": 2.181, "step": 15724 }, { "epoch": 0.2, "grad_norm": 4.5884904861450195, "learning_rate": 1.9976840989656688e-05, "loss": 2.6995, "step": 15725 }, { "epoch": 0.2, "grad_norm": 4.3288960456848145, "learning_rate": 1.997683384181746e-05, "loss": 2.5802, "step": 15726 }, { "epoch": 0.2, "grad_norm": 3.721836566925049, "learning_rate": 1.9976826692876616e-05, "loss": 1.6571, "step": 15727 }, { "epoch": 0.2, "grad_norm": 4.336682319641113, "learning_rate": 1.9976819542834166e-05, "loss": 2.1586, "step": 15728 }, { "epoch": 0.2, "grad_norm": 4.043184280395508, "learning_rate": 1.9976812391690103e-05, "loss": 2.037, "step": 15729 }, { "epoch": 0.2, "grad_norm": 4.263831615447998, "learning_rate": 1.997680523944443e-05, "loss": 1.9907, "step": 15730 }, { "epoch": 0.2, "grad_norm": 4.610480308532715, "learning_rate": 1.9976798086097148e-05, "loss": 2.4041, "step": 15731 }, { "epoch": 0.2, "grad_norm": 4.760210990905762, "learning_rate": 1.997679093164826e-05, "loss": 2.458, "step": 15732 }, { "epoch": 0.2, "grad_norm": 3.896714448928833, "learning_rate": 1.9976783776097765e-05, "loss": 2.0038, "step": 15733 }, { "epoch": 0.2, "grad_norm": 5.3200154304504395, "learning_rate": 1.9976776619445663e-05, "loss": 2.8433, "step": 15734 }, { "epoch": 0.2, "grad_norm": 4.008632659912109, "learning_rate": 1.9976769461691957e-05, "loss": 2.3314, "step": 15735 }, { "epoch": 0.2, "grad_norm": 4.299372673034668, "learning_rate": 1.9976762302836645e-05, "loss": 2.5479, "step": 15736 }, { "epoch": 0.2, "grad_norm": 4.017544269561768, "learning_rate": 1.997675514287973e-05, "loss": 1.8443, "step": 15737 }, { "epoch": 0.2, "grad_norm": 4.6542792320251465, "learning_rate": 1.9976747981821212e-05, "loss": 2.3846, "step": 15738 }, { "epoch": 0.2, "grad_norm": 4.681424140930176, "learning_rate": 1.9976740819661092e-05, "loss": 2.3894, "step": 15739 }, { "epoch": 0.2, "grad_norm": 4.1375412940979, "learning_rate": 1.9976733656399368e-05, "loss": 2.4801, "step": 15740 }, { "epoch": 0.2, "grad_norm": 4.631800651550293, "learning_rate": 1.9976726492036048e-05, "loss": 2.7665, "step": 15741 }, { "epoch": 0.2, "grad_norm": 3.927727222442627, "learning_rate": 1.9976719326571127e-05, "loss": 2.0568, "step": 15742 }, { "epoch": 0.2, "grad_norm": 4.740096569061279, "learning_rate": 1.9976712160004603e-05, "loss": 2.6713, "step": 15743 }, { "epoch": 0.2, "grad_norm": 3.6346163749694824, "learning_rate": 1.997670499233648e-05, "loss": 1.5693, "step": 15744 }, { "epoch": 0.2, "grad_norm": 3.8802459239959717, "learning_rate": 1.9976697823566767e-05, "loss": 1.6316, "step": 15745 }, { "epoch": 0.2, "grad_norm": 4.553741455078125, "learning_rate": 1.9976690653695452e-05, "loss": 2.1182, "step": 15746 }, { "epoch": 0.2, "grad_norm": 4.343223571777344, "learning_rate": 1.9976683482722543e-05, "loss": 2.2692, "step": 15747 }, { "epoch": 0.2, "grad_norm": 5.154659748077393, "learning_rate": 1.9976676310648038e-05, "loss": 3.0205, "step": 15748 }, { "epoch": 0.2, "grad_norm": 4.099193096160889, "learning_rate": 1.997666913747194e-05, "loss": 1.7099, "step": 15749 }, { "epoch": 0.2, "grad_norm": 4.752029895782471, "learning_rate": 1.9976661963194247e-05, "loss": 2.4555, "step": 15750 }, { "epoch": 0.2, "grad_norm": 3.9407970905303955, "learning_rate": 1.997665478781496e-05, "loss": 2.0028, "step": 15751 }, { "epoch": 0.2, "grad_norm": 4.101338863372803, "learning_rate": 1.9976647611334083e-05, "loss": 2.1679, "step": 15752 }, { "epoch": 0.2, "grad_norm": 4.885722637176514, "learning_rate": 1.9976640433751614e-05, "loss": 2.8536, "step": 15753 }, { "epoch": 0.2, "grad_norm": 4.608248710632324, "learning_rate": 1.9976633255067555e-05, "loss": 2.7256, "step": 15754 }, { "epoch": 0.2, "grad_norm": 4.240291595458984, "learning_rate": 1.9976626075281905e-05, "loss": 2.7741, "step": 15755 }, { "epoch": 0.2, "grad_norm": 4.067309379577637, "learning_rate": 1.997661889439467e-05, "loss": 1.8824, "step": 15756 }, { "epoch": 0.2, "grad_norm": 4.087800979614258, "learning_rate": 1.9976611712405844e-05, "loss": 2.6916, "step": 15757 }, { "epoch": 0.2, "grad_norm": 4.18494987487793, "learning_rate": 1.997660452931543e-05, "loss": 2.1932, "step": 15758 }, { "epoch": 0.2, "grad_norm": 3.7664730548858643, "learning_rate": 1.997659734512343e-05, "loss": 2.1696, "step": 15759 }, { "epoch": 0.2, "grad_norm": 4.165872573852539, "learning_rate": 1.9976590159829846e-05, "loss": 1.7668, "step": 15760 }, { "epoch": 0.2, "grad_norm": 4.262963771820068, "learning_rate": 1.9976582973434676e-05, "loss": 2.0609, "step": 15761 }, { "epoch": 0.2, "grad_norm": 3.9979403018951416, "learning_rate": 1.9976575785937924e-05, "loss": 2.2157, "step": 15762 }, { "epoch": 0.2, "grad_norm": 4.701674461364746, "learning_rate": 1.9976568597339588e-05, "loss": 2.9158, "step": 15763 }, { "epoch": 0.2, "grad_norm": 3.717231273651123, "learning_rate": 1.997656140763967e-05, "loss": 1.8385, "step": 15764 }, { "epoch": 0.2, "grad_norm": 3.823129653930664, "learning_rate": 1.997655421683817e-05, "loss": 1.7568, "step": 15765 }, { "epoch": 0.2, "grad_norm": 4.4862189292907715, "learning_rate": 1.9976547024935088e-05, "loss": 2.5238, "step": 15766 }, { "epoch": 0.2, "grad_norm": 4.0766401290893555, "learning_rate": 1.9976539831930425e-05, "loss": 2.0483, "step": 15767 }, { "epoch": 0.2, "grad_norm": 4.5951313972473145, "learning_rate": 1.9976532637824183e-05, "loss": 2.7096, "step": 15768 }, { "epoch": 0.2, "grad_norm": 4.2075395584106445, "learning_rate": 1.9976525442616365e-05, "loss": 2.1498, "step": 15769 }, { "epoch": 0.2, "grad_norm": 4.337894916534424, "learning_rate": 1.997651824630697e-05, "loss": 2.7153, "step": 15770 }, { "epoch": 0.2, "grad_norm": 4.411537170410156, "learning_rate": 1.9976511048895994e-05, "loss": 1.9384, "step": 15771 }, { "epoch": 0.2, "grad_norm": 6.233180999755859, "learning_rate": 1.9976503850383443e-05, "loss": 2.9227, "step": 15772 }, { "epoch": 0.2, "grad_norm": 4.3032307624816895, "learning_rate": 1.9976496650769318e-05, "loss": 2.7022, "step": 15773 }, { "epoch": 0.2, "grad_norm": 3.8679921627044678, "learning_rate": 1.997648945005362e-05, "loss": 1.9008, "step": 15774 }, { "epoch": 0.2, "grad_norm": 5.296370029449463, "learning_rate": 1.9976482248236346e-05, "loss": 2.7936, "step": 15775 }, { "epoch": 0.2, "grad_norm": 4.508509635925293, "learning_rate": 1.9976475045317502e-05, "loss": 1.9012, "step": 15776 }, { "epoch": 0.2, "grad_norm": 4.3938117027282715, "learning_rate": 1.997646784129708e-05, "loss": 2.1887, "step": 15777 }, { "epoch": 0.2, "grad_norm": 4.094775676727295, "learning_rate": 1.997646063617509e-05, "loss": 1.9006, "step": 15778 }, { "epoch": 0.2, "grad_norm": 4.416779041290283, "learning_rate": 1.997645342995153e-05, "loss": 2.4221, "step": 15779 }, { "epoch": 0.2, "grad_norm": 4.333614826202393, "learning_rate": 1.99764462226264e-05, "loss": 1.7406, "step": 15780 }, { "epoch": 0.2, "grad_norm": 3.7894809246063232, "learning_rate": 1.9976439014199705e-05, "loss": 1.7999, "step": 15781 }, { "epoch": 0.2, "grad_norm": 4.157242298126221, "learning_rate": 1.9976431804671436e-05, "loss": 2.2, "step": 15782 }, { "epoch": 0.2, "grad_norm": 3.896134614944458, "learning_rate": 1.99764245940416e-05, "loss": 1.9845, "step": 15783 }, { "epoch": 0.2, "grad_norm": 4.212671279907227, "learning_rate": 1.9976417382310202e-05, "loss": 2.0527, "step": 15784 }, { "epoch": 0.2, "grad_norm": 4.242246150970459, "learning_rate": 1.9976410169477237e-05, "loss": 1.8376, "step": 15785 }, { "epoch": 0.2, "grad_norm": 3.9664864540100098, "learning_rate": 1.9976402955542706e-05, "loss": 1.6151, "step": 15786 }, { "epoch": 0.2, "grad_norm": 4.3208723068237305, "learning_rate": 1.9976395740506608e-05, "loss": 2.7699, "step": 15787 }, { "epoch": 0.2, "grad_norm": 4.753607273101807, "learning_rate": 1.9976388524368947e-05, "loss": 2.6958, "step": 15788 }, { "epoch": 0.2, "grad_norm": 3.973221778869629, "learning_rate": 1.9976381307129727e-05, "loss": 1.9314, "step": 15789 }, { "epoch": 0.2, "grad_norm": 4.382957935333252, "learning_rate": 1.9976374088788944e-05, "loss": 2.2355, "step": 15790 }, { "epoch": 0.2, "grad_norm": 5.191096305847168, "learning_rate": 1.9976366869346598e-05, "loss": 2.3792, "step": 15791 }, { "epoch": 0.2, "grad_norm": 4.680094242095947, "learning_rate": 1.9976359648802696e-05, "loss": 2.4036, "step": 15792 }, { "epoch": 0.2, "grad_norm": 3.3892085552215576, "learning_rate": 1.9976352427157234e-05, "loss": 1.7399, "step": 15793 }, { "epoch": 0.2, "grad_norm": 4.540426731109619, "learning_rate": 1.997634520441021e-05, "loss": 2.2118, "step": 15794 }, { "epoch": 0.2, "grad_norm": 3.725201368331909, "learning_rate": 1.9976337980561632e-05, "loss": 1.8804, "step": 15795 }, { "epoch": 0.2, "grad_norm": 4.414482116699219, "learning_rate": 1.9976330755611495e-05, "loss": 2.1813, "step": 15796 }, { "epoch": 0.21, "grad_norm": 5.027854919433594, "learning_rate": 1.99763235295598e-05, "loss": 2.3127, "step": 15797 }, { "epoch": 0.21, "grad_norm": 4.1548027992248535, "learning_rate": 1.9976316302406552e-05, "loss": 2.2367, "step": 15798 }, { "epoch": 0.21, "grad_norm": 4.670012474060059, "learning_rate": 1.997630907415175e-05, "loss": 2.2066, "step": 15799 }, { "epoch": 0.21, "grad_norm": 4.313360691070557, "learning_rate": 1.9976301844795394e-05, "loss": 2.0455, "step": 15800 }, { "epoch": 0.21, "grad_norm": 4.564408302307129, "learning_rate": 1.997629461433748e-05, "loss": 2.0668, "step": 15801 }, { "epoch": 0.21, "grad_norm": 4.076961994171143, "learning_rate": 1.997628738277802e-05, "loss": 2.0658, "step": 15802 }, { "epoch": 0.21, "grad_norm": 4.125972747802734, "learning_rate": 1.9976280150117006e-05, "loss": 2.162, "step": 15803 }, { "epoch": 0.21, "grad_norm": 4.337308406829834, "learning_rate": 1.9976272916354446e-05, "loss": 2.326, "step": 15804 }, { "epoch": 0.21, "grad_norm": 4.5271687507629395, "learning_rate": 1.997626568149033e-05, "loss": 2.0947, "step": 15805 }, { "epoch": 0.21, "grad_norm": 4.689648628234863, "learning_rate": 1.9976258445524665e-05, "loss": 2.5606, "step": 15806 }, { "epoch": 0.21, "grad_norm": 4.1678266525268555, "learning_rate": 1.9976251208457458e-05, "loss": 2.0244, "step": 15807 }, { "epoch": 0.21, "grad_norm": 4.172209739685059, "learning_rate": 1.9976243970288697e-05, "loss": 2.4013, "step": 15808 }, { "epoch": 0.21, "grad_norm": 3.933974027633667, "learning_rate": 1.9976236731018394e-05, "loss": 2.0554, "step": 15809 }, { "epoch": 0.21, "grad_norm": 4.653967380523682, "learning_rate": 1.9976229490646545e-05, "loss": 2.4655, "step": 15810 }, { "epoch": 0.21, "grad_norm": 4.525683403015137, "learning_rate": 1.9976222249173146e-05, "loss": 2.2574, "step": 15811 }, { "epoch": 0.21, "grad_norm": 4.085020542144775, "learning_rate": 1.997621500659821e-05, "loss": 1.965, "step": 15812 }, { "epoch": 0.21, "grad_norm": 3.8863489627838135, "learning_rate": 1.9976207762921725e-05, "loss": 2.1468, "step": 15813 }, { "epoch": 0.21, "grad_norm": 4.547411918640137, "learning_rate": 1.9976200518143698e-05, "loss": 2.4101, "step": 15814 }, { "epoch": 0.21, "grad_norm": 4.216238498687744, "learning_rate": 1.997619327226413e-05, "loss": 2.1056, "step": 15815 }, { "epoch": 0.21, "grad_norm": 4.583121299743652, "learning_rate": 1.997618602528302e-05, "loss": 2.2314, "step": 15816 }, { "epoch": 0.21, "grad_norm": 4.345722198486328, "learning_rate": 1.9976178777200373e-05, "loss": 2.2248, "step": 15817 }, { "epoch": 0.21, "grad_norm": 3.54453706741333, "learning_rate": 1.9976171528016186e-05, "loss": 1.8182, "step": 15818 }, { "epoch": 0.21, "grad_norm": 3.5181665420532227, "learning_rate": 1.997616427773046e-05, "loss": 1.6751, "step": 15819 }, { "epoch": 0.21, "grad_norm": 4.176915168762207, "learning_rate": 1.9976157026343195e-05, "loss": 1.9947, "step": 15820 }, { "epoch": 0.21, "grad_norm": 4.429725170135498, "learning_rate": 1.9976149773854394e-05, "loss": 1.8744, "step": 15821 }, { "epoch": 0.21, "grad_norm": 3.9011270999908447, "learning_rate": 1.9976142520264057e-05, "loss": 2.2898, "step": 15822 }, { "epoch": 0.21, "grad_norm": 4.319012641906738, "learning_rate": 1.9976135265572184e-05, "loss": 2.2785, "step": 15823 }, { "epoch": 0.21, "grad_norm": 4.688788414001465, "learning_rate": 1.9976128009778775e-05, "loss": 2.3693, "step": 15824 }, { "epoch": 0.21, "grad_norm": 4.515916347503662, "learning_rate": 1.9976120752883834e-05, "loss": 2.4763, "step": 15825 }, { "epoch": 0.21, "grad_norm": 4.468559265136719, "learning_rate": 1.997611349488736e-05, "loss": 2.0245, "step": 15826 }, { "epoch": 0.21, "grad_norm": 4.0847039222717285, "learning_rate": 1.997610623578935e-05, "loss": 2.0424, "step": 15827 }, { "epoch": 0.21, "grad_norm": 3.884690523147583, "learning_rate": 1.9976098975589816e-05, "loss": 2.1862, "step": 15828 }, { "epoch": 0.21, "grad_norm": 4.690298080444336, "learning_rate": 1.9976091714288744e-05, "loss": 2.2593, "step": 15829 }, { "epoch": 0.21, "grad_norm": 4.314324855804443, "learning_rate": 1.9976084451886148e-05, "loss": 2.3759, "step": 15830 }, { "epoch": 0.21, "grad_norm": 4.995893955230713, "learning_rate": 1.997607718838202e-05, "loss": 2.6977, "step": 15831 }, { "epoch": 0.21, "grad_norm": 4.125174045562744, "learning_rate": 1.9976069923776364e-05, "loss": 2.111, "step": 15832 }, { "epoch": 0.21, "grad_norm": 4.191359043121338, "learning_rate": 1.9976062658069183e-05, "loss": 2.2028, "step": 15833 }, { "epoch": 0.21, "grad_norm": 4.628026962280273, "learning_rate": 1.9976055391260473e-05, "loss": 2.594, "step": 15834 }, { "epoch": 0.21, "grad_norm": 4.009130477905273, "learning_rate": 1.9976048123350238e-05, "loss": 2.3759, "step": 15835 }, { "epoch": 0.21, "grad_norm": 4.531440258026123, "learning_rate": 1.997604085433848e-05, "loss": 2.3479, "step": 15836 }, { "epoch": 0.21, "grad_norm": 4.000336647033691, "learning_rate": 1.9976033584225194e-05, "loss": 1.6378, "step": 15837 }, { "epoch": 0.21, "grad_norm": 4.359925270080566, "learning_rate": 1.9976026313010388e-05, "loss": 2.1583, "step": 15838 }, { "epoch": 0.21, "grad_norm": 4.553194999694824, "learning_rate": 1.9976019040694057e-05, "loss": 1.9604, "step": 15839 }, { "epoch": 0.21, "grad_norm": 4.203441619873047, "learning_rate": 1.9976011767276203e-05, "loss": 2.1961, "step": 15840 }, { "epoch": 0.21, "grad_norm": 3.9166502952575684, "learning_rate": 1.9976004492756834e-05, "loss": 1.9075, "step": 15841 }, { "epoch": 0.21, "grad_norm": 4.343888759613037, "learning_rate": 1.997599721713594e-05, "loss": 2.2667, "step": 15842 }, { "epoch": 0.21, "grad_norm": 4.363466262817383, "learning_rate": 1.997598994041353e-05, "loss": 2.2757, "step": 15843 }, { "epoch": 0.21, "grad_norm": 4.199173450469971, "learning_rate": 1.9975982662589596e-05, "loss": 1.911, "step": 15844 }, { "epoch": 0.21, "grad_norm": 4.207782745361328, "learning_rate": 1.9975975383664144e-05, "loss": 2.2053, "step": 15845 }, { "epoch": 0.21, "grad_norm": 4.463388919830322, "learning_rate": 1.9975968103637184e-05, "loss": 2.4206, "step": 15846 }, { "epoch": 0.21, "grad_norm": 4.221379280090332, "learning_rate": 1.99759608225087e-05, "loss": 2.0382, "step": 15847 }, { "epoch": 0.21, "grad_norm": 4.081934928894043, "learning_rate": 1.9975953540278703e-05, "loss": 1.7644, "step": 15848 }, { "epoch": 0.21, "grad_norm": 3.714003801345825, "learning_rate": 1.9975946256947193e-05, "loss": 2.3253, "step": 15849 }, { "epoch": 0.21, "grad_norm": 3.917267322540283, "learning_rate": 1.9975938972514167e-05, "loss": 1.855, "step": 15850 }, { "epoch": 0.21, "grad_norm": 5.524631977081299, "learning_rate": 1.997593168697963e-05, "loss": 2.5764, "step": 15851 }, { "epoch": 0.21, "grad_norm": 3.8397700786590576, "learning_rate": 1.997592440034358e-05, "loss": 1.9074, "step": 15852 }, { "epoch": 0.21, "grad_norm": 4.317114353179932, "learning_rate": 1.9975917112606016e-05, "loss": 2.3785, "step": 15853 }, { "epoch": 0.21, "grad_norm": 5.530608654022217, "learning_rate": 1.9975909823766945e-05, "loss": 2.8631, "step": 15854 }, { "epoch": 0.21, "grad_norm": 4.560372352600098, "learning_rate": 1.9975902533826362e-05, "loss": 2.4184, "step": 15855 }, { "epoch": 0.21, "grad_norm": 3.711221218109131, "learning_rate": 1.9975895242784274e-05, "loss": 1.7562, "step": 15856 }, { "epoch": 0.21, "grad_norm": 4.096747398376465, "learning_rate": 1.9975887950640676e-05, "loss": 2.2253, "step": 15857 }, { "epoch": 0.21, "grad_norm": 4.567947864532471, "learning_rate": 1.997588065739557e-05, "loss": 2.4474, "step": 15858 }, { "epoch": 0.21, "grad_norm": 3.784414291381836, "learning_rate": 1.9975873363048956e-05, "loss": 1.7375, "step": 15859 }, { "epoch": 0.21, "grad_norm": 3.9928269386291504, "learning_rate": 1.997586606760084e-05, "loss": 2.2512, "step": 15860 }, { "epoch": 0.21, "grad_norm": 3.711806058883667, "learning_rate": 1.9975858771051217e-05, "loss": 1.8432, "step": 15861 }, { "epoch": 0.21, "grad_norm": 4.211962699890137, "learning_rate": 1.9975851473400086e-05, "loss": 2.2239, "step": 15862 }, { "epoch": 0.21, "grad_norm": 3.965832471847534, "learning_rate": 1.997584417464746e-05, "loss": 1.8769, "step": 15863 }, { "epoch": 0.21, "grad_norm": 3.762861728668213, "learning_rate": 1.9975836874793325e-05, "loss": 1.7768, "step": 15864 }, { "epoch": 0.21, "grad_norm": 3.6525163650512695, "learning_rate": 1.9975829573837692e-05, "loss": 1.8578, "step": 15865 }, { "epoch": 0.21, "grad_norm": 4.060824394226074, "learning_rate": 1.9975822271780556e-05, "loss": 2.2321, "step": 15866 }, { "epoch": 0.21, "grad_norm": 4.513862133026123, "learning_rate": 1.9975814968621922e-05, "loss": 2.543, "step": 15867 }, { "epoch": 0.21, "grad_norm": 3.9284989833831787, "learning_rate": 1.997580766436179e-05, "loss": 2.4094, "step": 15868 }, { "epoch": 0.21, "grad_norm": 4.899227142333984, "learning_rate": 1.9975800359000155e-05, "loss": 2.8665, "step": 15869 }, { "epoch": 0.21, "grad_norm": 4.613742351531982, "learning_rate": 1.9975793052537026e-05, "loss": 2.5895, "step": 15870 }, { "epoch": 0.21, "grad_norm": 5.199118137359619, "learning_rate": 1.9975785744972397e-05, "loss": 2.6182, "step": 15871 }, { "epoch": 0.21, "grad_norm": 4.167404651641846, "learning_rate": 1.9975778436306274e-05, "loss": 2.387, "step": 15872 }, { "epoch": 0.21, "grad_norm": 4.145509243011475, "learning_rate": 1.9975771126538655e-05, "loss": 2.1268, "step": 15873 }, { "epoch": 0.21, "grad_norm": 4.569780349731445, "learning_rate": 1.997576381566954e-05, "loss": 2.0559, "step": 15874 }, { "epoch": 0.21, "grad_norm": 4.241247177124023, "learning_rate": 1.997575650369894e-05, "loss": 1.9194, "step": 15875 }, { "epoch": 0.21, "grad_norm": 3.881704568862915, "learning_rate": 1.9975749190626838e-05, "loss": 2.1496, "step": 15876 }, { "epoch": 0.21, "grad_norm": 3.756450891494751, "learning_rate": 1.997574187645325e-05, "loss": 2.0716, "step": 15877 }, { "epoch": 0.21, "grad_norm": 4.071229934692383, "learning_rate": 1.9975734561178168e-05, "loss": 1.7119, "step": 15878 }, { "epoch": 0.21, "grad_norm": 4.24782133102417, "learning_rate": 1.997572724480159e-05, "loss": 2.409, "step": 15879 }, { "epoch": 0.21, "grad_norm": 4.181184768676758, "learning_rate": 1.997571992732353e-05, "loss": 2.0609, "step": 15880 }, { "epoch": 0.21, "grad_norm": 3.707761764526367, "learning_rate": 1.997571260874398e-05, "loss": 1.9579, "step": 15881 }, { "epoch": 0.21, "grad_norm": 4.440412998199463, "learning_rate": 1.9975705289062942e-05, "loss": 2.1794, "step": 15882 }, { "epoch": 0.21, "grad_norm": 4.345154762268066, "learning_rate": 1.9975697968280416e-05, "loss": 2.5298, "step": 15883 }, { "epoch": 0.21, "grad_norm": 4.475282192230225, "learning_rate": 1.9975690646396404e-05, "loss": 2.4518, "step": 15884 }, { "epoch": 0.21, "grad_norm": 4.386129856109619, "learning_rate": 1.9975683323410907e-05, "loss": 2.2152, "step": 15885 }, { "epoch": 0.21, "grad_norm": 5.102297782897949, "learning_rate": 1.9975675999323926e-05, "loss": 2.945, "step": 15886 }, { "epoch": 0.21, "grad_norm": 3.7209055423736572, "learning_rate": 1.997566867413546e-05, "loss": 2.0478, "step": 15887 }, { "epoch": 0.21, "grad_norm": 4.109642028808594, "learning_rate": 1.997566134784551e-05, "loss": 2.192, "step": 15888 }, { "epoch": 0.21, "grad_norm": 4.519856929779053, "learning_rate": 1.9975654020454077e-05, "loss": 1.926, "step": 15889 }, { "epoch": 0.21, "grad_norm": 3.998107671737671, "learning_rate": 1.9975646691961165e-05, "loss": 1.9189, "step": 15890 }, { "epoch": 0.21, "grad_norm": 4.069443702697754, "learning_rate": 1.9975639362366772e-05, "loss": 1.92, "step": 15891 }, { "epoch": 0.21, "grad_norm": 3.6463913917541504, "learning_rate": 1.99756320316709e-05, "loss": 1.7374, "step": 15892 }, { "epoch": 0.21, "grad_norm": 4.435347080230713, "learning_rate": 1.9975624699873547e-05, "loss": 2.3384, "step": 15893 }, { "epoch": 0.21, "grad_norm": 4.669504165649414, "learning_rate": 1.997561736697472e-05, "loss": 2.293, "step": 15894 }, { "epoch": 0.21, "grad_norm": 4.178791046142578, "learning_rate": 1.997561003297441e-05, "loss": 2.2757, "step": 15895 }, { "epoch": 0.21, "grad_norm": 3.858590841293335, "learning_rate": 1.9975602697872625e-05, "loss": 2.0304, "step": 15896 }, { "epoch": 0.21, "grad_norm": 4.0188822746276855, "learning_rate": 1.9975595361669365e-05, "loss": 2.5582, "step": 15897 }, { "epoch": 0.21, "grad_norm": 4.336331844329834, "learning_rate": 1.9975588024364628e-05, "loss": 2.3973, "step": 15898 }, { "epoch": 0.21, "grad_norm": 3.792739152908325, "learning_rate": 1.997558068595842e-05, "loss": 1.9753, "step": 15899 }, { "epoch": 0.21, "grad_norm": 3.6285276412963867, "learning_rate": 1.9975573346450738e-05, "loss": 1.9687, "step": 15900 }, { "epoch": 0.21, "grad_norm": 3.767624616622925, "learning_rate": 1.997556600584158e-05, "loss": 1.6694, "step": 15901 }, { "epoch": 0.21, "grad_norm": 3.8014626502990723, "learning_rate": 1.9975558664130955e-05, "loss": 2.0272, "step": 15902 }, { "epoch": 0.21, "grad_norm": 5.143958568572998, "learning_rate": 1.9975551321318857e-05, "loss": 2.9779, "step": 15903 }, { "epoch": 0.21, "grad_norm": 4.100592613220215, "learning_rate": 1.9975543977405287e-05, "loss": 1.9421, "step": 15904 }, { "epoch": 0.21, "grad_norm": 3.7696521282196045, "learning_rate": 1.9975536632390252e-05, "loss": 1.8304, "step": 15905 }, { "epoch": 0.21, "grad_norm": 4.413320541381836, "learning_rate": 1.9975529286273746e-05, "loss": 2.4463, "step": 15906 }, { "epoch": 0.21, "grad_norm": 3.7381396293640137, "learning_rate": 1.997552193905577e-05, "loss": 1.9064, "step": 15907 }, { "epoch": 0.21, "grad_norm": 4.498565673828125, "learning_rate": 1.9975514590736333e-05, "loss": 2.1547, "step": 15908 }, { "epoch": 0.21, "grad_norm": 4.5624213218688965, "learning_rate": 1.9975507241315426e-05, "loss": 2.2607, "step": 15909 }, { "epoch": 0.21, "grad_norm": 4.064627647399902, "learning_rate": 1.9975499890793054e-05, "loss": 2.4673, "step": 15910 }, { "epoch": 0.21, "grad_norm": 4.889171600341797, "learning_rate": 1.9975492539169217e-05, "loss": 2.7919, "step": 15911 }, { "epoch": 0.21, "grad_norm": 4.152987003326416, "learning_rate": 1.997548518644392e-05, "loss": 2.1198, "step": 15912 }, { "epoch": 0.21, "grad_norm": 5.045387268066406, "learning_rate": 1.9975477832617157e-05, "loss": 2.6454, "step": 15913 }, { "epoch": 0.21, "grad_norm": 4.857189178466797, "learning_rate": 1.9975470477688933e-05, "loss": 2.1096, "step": 15914 }, { "epoch": 0.21, "grad_norm": 4.27750825881958, "learning_rate": 1.9975463121659245e-05, "loss": 2.2714, "step": 15915 }, { "epoch": 0.21, "grad_norm": 3.416497230529785, "learning_rate": 1.9975455764528102e-05, "loss": 1.6155, "step": 15916 }, { "epoch": 0.21, "grad_norm": 4.165848255157471, "learning_rate": 1.9975448406295497e-05, "loss": 2.1207, "step": 15917 }, { "epoch": 0.21, "grad_norm": 3.9159138202667236, "learning_rate": 1.9975441046961432e-05, "loss": 1.9942, "step": 15918 }, { "epoch": 0.21, "grad_norm": 3.62998628616333, "learning_rate": 1.9975433686525908e-05, "loss": 1.6138, "step": 15919 }, { "epoch": 0.21, "grad_norm": 3.75331711769104, "learning_rate": 1.997542632498893e-05, "loss": 1.9046, "step": 15920 }, { "epoch": 0.21, "grad_norm": 4.27189302444458, "learning_rate": 1.9975418962350494e-05, "loss": 2.3921, "step": 15921 }, { "epoch": 0.21, "grad_norm": 4.064119815826416, "learning_rate": 1.9975411598610603e-05, "loss": 1.9259, "step": 15922 }, { "epoch": 0.21, "grad_norm": 3.9766347408294678, "learning_rate": 1.9975404233769258e-05, "loss": 2.3346, "step": 15923 }, { "epoch": 0.21, "grad_norm": 3.766608476638794, "learning_rate": 1.997539686782646e-05, "loss": 1.7659, "step": 15924 }, { "epoch": 0.21, "grad_norm": 4.463730812072754, "learning_rate": 1.9975389500782204e-05, "loss": 2.473, "step": 15925 }, { "epoch": 0.21, "grad_norm": 3.8321352005004883, "learning_rate": 1.99753821326365e-05, "loss": 1.8059, "step": 15926 }, { "epoch": 0.21, "grad_norm": 4.217014789581299, "learning_rate": 1.9975374763389346e-05, "loss": 1.9055, "step": 15927 }, { "epoch": 0.21, "grad_norm": 4.359696388244629, "learning_rate": 1.9975367393040738e-05, "loss": 1.8472, "step": 15928 }, { "epoch": 0.21, "grad_norm": 3.953325033187866, "learning_rate": 1.997536002159068e-05, "loss": 1.9067, "step": 15929 }, { "epoch": 0.21, "grad_norm": 5.322265148162842, "learning_rate": 1.9975352649039176e-05, "loss": 2.6175, "step": 15930 }, { "epoch": 0.21, "grad_norm": 3.8606326580047607, "learning_rate": 1.997534527538622e-05, "loss": 1.7789, "step": 15931 }, { "epoch": 0.21, "grad_norm": 4.71002197265625, "learning_rate": 1.997533790063182e-05, "loss": 2.5582, "step": 15932 }, { "epoch": 0.21, "grad_norm": 4.326179504394531, "learning_rate": 1.9975330524775973e-05, "loss": 1.9799, "step": 15933 }, { "epoch": 0.21, "grad_norm": 4.543169975280762, "learning_rate": 1.997532314781868e-05, "loss": 2.3393, "step": 15934 }, { "epoch": 0.21, "grad_norm": 3.8036653995513916, "learning_rate": 1.9975315769759943e-05, "loss": 1.9587, "step": 15935 }, { "epoch": 0.21, "grad_norm": 4.611262798309326, "learning_rate": 1.9975308390599757e-05, "loss": 2.2433, "step": 15936 }, { "epoch": 0.21, "grad_norm": 4.7335710525512695, "learning_rate": 1.9975301010338133e-05, "loss": 2.4605, "step": 15937 }, { "epoch": 0.21, "grad_norm": 3.7163937091827393, "learning_rate": 1.9975293628975065e-05, "loss": 1.6124, "step": 15938 }, { "epoch": 0.21, "grad_norm": 4.107633113861084, "learning_rate": 1.9975286246510555e-05, "loss": 2.1395, "step": 15939 }, { "epoch": 0.21, "grad_norm": 4.323264122009277, "learning_rate": 1.9975278862944605e-05, "loss": 1.7883, "step": 15940 }, { "epoch": 0.21, "grad_norm": 3.9710240364074707, "learning_rate": 1.997527147827721e-05, "loss": 2.0918, "step": 15941 }, { "epoch": 0.21, "grad_norm": 4.3351826667785645, "learning_rate": 1.9975264092508385e-05, "loss": 1.8586, "step": 15942 }, { "epoch": 0.21, "grad_norm": 4.380300998687744, "learning_rate": 1.9975256705638115e-05, "loss": 2.0446, "step": 15943 }, { "epoch": 0.21, "grad_norm": 4.886444091796875, "learning_rate": 1.9975249317666407e-05, "loss": 2.1674, "step": 15944 }, { "epoch": 0.21, "grad_norm": 4.251138210296631, "learning_rate": 1.997524192859327e-05, "loss": 2.0655, "step": 15945 }, { "epoch": 0.21, "grad_norm": 4.434206962585449, "learning_rate": 1.997523453841869e-05, "loss": 2.2993, "step": 15946 }, { "epoch": 0.21, "grad_norm": 3.9384796619415283, "learning_rate": 1.9975227147142676e-05, "loss": 2.0634, "step": 15947 }, { "epoch": 0.21, "grad_norm": 4.629106521606445, "learning_rate": 1.9975219754765228e-05, "loss": 2.4818, "step": 15948 }, { "epoch": 0.21, "grad_norm": 4.852952480316162, "learning_rate": 1.9975212361286346e-05, "loss": 2.6131, "step": 15949 }, { "epoch": 0.21, "grad_norm": 4.200699806213379, "learning_rate": 1.997520496670603e-05, "loss": 2.1765, "step": 15950 }, { "epoch": 0.21, "grad_norm": 4.506475448608398, "learning_rate": 1.9975197571024287e-05, "loss": 2.3734, "step": 15951 }, { "epoch": 0.21, "grad_norm": 4.5686445236206055, "learning_rate": 1.997519017424111e-05, "loss": 2.2562, "step": 15952 }, { "epoch": 0.21, "grad_norm": 4.398160934448242, "learning_rate": 1.99751827763565e-05, "loss": 2.5051, "step": 15953 }, { "epoch": 0.21, "grad_norm": 4.356109619140625, "learning_rate": 1.9975175377370464e-05, "loss": 2.3972, "step": 15954 }, { "epoch": 0.21, "grad_norm": 4.671848773956299, "learning_rate": 1.9975167977282997e-05, "loss": 2.47, "step": 15955 }, { "epoch": 0.21, "grad_norm": 4.466048240661621, "learning_rate": 1.9975160576094103e-05, "loss": 2.3814, "step": 15956 }, { "epoch": 0.21, "grad_norm": 4.136626243591309, "learning_rate": 1.9975153173803784e-05, "loss": 1.9585, "step": 15957 }, { "epoch": 0.21, "grad_norm": 4.752613544464111, "learning_rate": 1.997514577041204e-05, "loss": 2.7821, "step": 15958 }, { "epoch": 0.21, "grad_norm": 4.536401271820068, "learning_rate": 1.9975138365918865e-05, "loss": 2.1742, "step": 15959 }, { "epoch": 0.21, "grad_norm": 3.6943228244781494, "learning_rate": 1.997513096032427e-05, "loss": 1.8615, "step": 15960 }, { "epoch": 0.21, "grad_norm": 3.8639636039733887, "learning_rate": 1.997512355362825e-05, "loss": 2.0735, "step": 15961 }, { "epoch": 0.21, "grad_norm": 3.5328993797302246, "learning_rate": 1.9975116145830808e-05, "loss": 1.7747, "step": 15962 }, { "epoch": 0.21, "grad_norm": 3.6685736179351807, "learning_rate": 1.9975108736931945e-05, "loss": 1.9427, "step": 15963 }, { "epoch": 0.21, "grad_norm": 4.0492095947265625, "learning_rate": 1.997510132693166e-05, "loss": 1.9073, "step": 15964 }, { "epoch": 0.21, "grad_norm": 4.2555036544799805, "learning_rate": 1.997509391582995e-05, "loss": 2.5973, "step": 15965 }, { "epoch": 0.21, "grad_norm": 4.066501617431641, "learning_rate": 1.9975086503626827e-05, "loss": 1.928, "step": 15966 }, { "epoch": 0.21, "grad_norm": 4.271275043487549, "learning_rate": 1.997507909032228e-05, "loss": 2.1493, "step": 15967 }, { "epoch": 0.21, "grad_norm": 4.89072322845459, "learning_rate": 1.997507167591632e-05, "loss": 2.4247, "step": 15968 }, { "epoch": 0.21, "grad_norm": 3.954749822616577, "learning_rate": 1.997506426040894e-05, "loss": 2.1167, "step": 15969 }, { "epoch": 0.21, "grad_norm": 4.90227746963501, "learning_rate": 1.9975056843800145e-05, "loss": 2.2571, "step": 15970 }, { "epoch": 0.21, "grad_norm": 4.9157185554504395, "learning_rate": 1.997504942608993e-05, "loss": 2.284, "step": 15971 }, { "epoch": 0.21, "grad_norm": 4.963230609893799, "learning_rate": 1.9975042007278306e-05, "loss": 2.0388, "step": 15972 }, { "epoch": 0.21, "grad_norm": 3.646299123764038, "learning_rate": 1.9975034587365264e-05, "loss": 1.609, "step": 15973 }, { "epoch": 0.21, "grad_norm": 4.412540435791016, "learning_rate": 1.997502716635081e-05, "loss": 2.2493, "step": 15974 }, { "epoch": 0.21, "grad_norm": 4.374776840209961, "learning_rate": 1.9975019744234948e-05, "loss": 2.1065, "step": 15975 }, { "epoch": 0.21, "grad_norm": 4.079343795776367, "learning_rate": 1.997501232101767e-05, "loss": 1.9773, "step": 15976 }, { "epoch": 0.21, "grad_norm": 3.9649033546447754, "learning_rate": 1.997500489669898e-05, "loss": 2.0836, "step": 15977 }, { "epoch": 0.21, "grad_norm": 3.9903481006622314, "learning_rate": 1.9974997471278886e-05, "loss": 1.8988, "step": 15978 }, { "epoch": 0.21, "grad_norm": 3.991853952407837, "learning_rate": 1.9974990044757377e-05, "loss": 2.0613, "step": 15979 }, { "epoch": 0.21, "grad_norm": 4.066000461578369, "learning_rate": 1.9974982617134465e-05, "loss": 2.0479, "step": 15980 }, { "epoch": 0.21, "grad_norm": 3.9632863998413086, "learning_rate": 1.9974975188410146e-05, "loss": 1.6486, "step": 15981 }, { "epoch": 0.21, "grad_norm": 4.391363620758057, "learning_rate": 1.9974967758584416e-05, "loss": 1.9894, "step": 15982 }, { "epoch": 0.21, "grad_norm": 4.122739791870117, "learning_rate": 1.997496032765728e-05, "loss": 2.0586, "step": 15983 }, { "epoch": 0.21, "grad_norm": 4.234616756439209, "learning_rate": 1.997495289562874e-05, "loss": 2.2022, "step": 15984 }, { "epoch": 0.21, "grad_norm": 4.380184650421143, "learning_rate": 1.99749454624988e-05, "loss": 2.2987, "step": 15985 }, { "epoch": 0.21, "grad_norm": 4.116730690002441, "learning_rate": 1.9974938028267456e-05, "loss": 1.7731, "step": 15986 }, { "epoch": 0.21, "grad_norm": 3.811436653137207, "learning_rate": 1.9974930592934708e-05, "loss": 1.8521, "step": 15987 }, { "epoch": 0.21, "grad_norm": 4.4644927978515625, "learning_rate": 1.9974923156500555e-05, "loss": 1.9757, "step": 15988 }, { "epoch": 0.21, "grad_norm": 4.743566036224365, "learning_rate": 1.9974915718965006e-05, "loss": 2.5788, "step": 15989 }, { "epoch": 0.21, "grad_norm": 4.414793491363525, "learning_rate": 1.9974908280328057e-05, "loss": 2.2962, "step": 15990 }, { "epoch": 0.21, "grad_norm": 4.974040508270264, "learning_rate": 1.9974900840589707e-05, "loss": 2.41, "step": 15991 }, { "epoch": 0.21, "grad_norm": 4.292206764221191, "learning_rate": 1.9974893399749957e-05, "loss": 1.9845, "step": 15992 }, { "epoch": 0.21, "grad_norm": 4.307910442352295, "learning_rate": 1.9974885957808814e-05, "loss": 2.2385, "step": 15993 }, { "epoch": 0.21, "grad_norm": 4.735483169555664, "learning_rate": 1.9974878514766273e-05, "loss": 2.8763, "step": 15994 }, { "epoch": 0.21, "grad_norm": 5.275736331939697, "learning_rate": 1.9974871070622336e-05, "loss": 3.094, "step": 15995 }, { "epoch": 0.21, "grad_norm": 3.76644229888916, "learning_rate": 1.9974863625377e-05, "loss": 1.9764, "step": 15996 }, { "epoch": 0.21, "grad_norm": 4.473854064941406, "learning_rate": 1.9974856179030273e-05, "loss": 2.2921, "step": 15997 }, { "epoch": 0.21, "grad_norm": 4.856691360473633, "learning_rate": 1.9974848731582152e-05, "loss": 2.6704, "step": 15998 }, { "epoch": 0.21, "grad_norm": 3.66563081741333, "learning_rate": 1.9974841283032643e-05, "loss": 1.8263, "step": 15999 }, { "epoch": 0.21, "grad_norm": 3.8492915630340576, "learning_rate": 1.9974833833381738e-05, "loss": 1.713, "step": 16000 }, { "epoch": 0.21, "grad_norm": 4.772765636444092, "learning_rate": 1.9974826382629443e-05, "loss": 2.4438, "step": 16001 }, { "epoch": 0.21, "grad_norm": 4.70821475982666, "learning_rate": 1.9974818930775757e-05, "loss": 2.2523, "step": 16002 }, { "epoch": 0.21, "grad_norm": 4.254929542541504, "learning_rate": 1.997481147782068e-05, "loss": 2.5241, "step": 16003 }, { "epoch": 0.21, "grad_norm": 4.283604145050049, "learning_rate": 1.997480402376422e-05, "loss": 1.8037, "step": 16004 }, { "epoch": 0.21, "grad_norm": 4.151856422424316, "learning_rate": 1.9974796568606366e-05, "loss": 1.9446, "step": 16005 }, { "epoch": 0.21, "grad_norm": 4.115746021270752, "learning_rate": 1.9974789112347127e-05, "loss": 1.9443, "step": 16006 }, { "epoch": 0.21, "grad_norm": 4.568209171295166, "learning_rate": 1.9974781654986507e-05, "loss": 2.0747, "step": 16007 }, { "epoch": 0.21, "grad_norm": 4.887279033660889, "learning_rate": 1.9974774196524498e-05, "loss": 2.0993, "step": 16008 }, { "epoch": 0.21, "grad_norm": 3.8077094554901123, "learning_rate": 1.9974766736961105e-05, "loss": 1.822, "step": 16009 }, { "epoch": 0.21, "grad_norm": 4.431918144226074, "learning_rate": 1.997475927629633e-05, "loss": 2.5251, "step": 16010 }, { "epoch": 0.21, "grad_norm": 4.421673774719238, "learning_rate": 1.997475181453017e-05, "loss": 2.1838, "step": 16011 }, { "epoch": 0.21, "grad_norm": 3.8349368572235107, "learning_rate": 1.997474435166263e-05, "loss": 2.0066, "step": 16012 }, { "epoch": 0.21, "grad_norm": 4.375859260559082, "learning_rate": 1.997473688769371e-05, "loss": 2.1766, "step": 16013 }, { "epoch": 0.21, "grad_norm": 4.567678451538086, "learning_rate": 1.9974729422623406e-05, "loss": 2.215, "step": 16014 }, { "epoch": 0.21, "grad_norm": 4.62803840637207, "learning_rate": 1.9974721956451723e-05, "loss": 2.4561, "step": 16015 }, { "epoch": 0.21, "grad_norm": 4.436315536499023, "learning_rate": 1.9974714489178666e-05, "loss": 2.4035, "step": 16016 }, { "epoch": 0.21, "grad_norm": 3.864281177520752, "learning_rate": 1.9974707020804226e-05, "loss": 2.1625, "step": 16017 }, { "epoch": 0.21, "grad_norm": 4.938634395599365, "learning_rate": 1.997469955132841e-05, "loss": 2.4578, "step": 16018 }, { "epoch": 0.21, "grad_norm": 4.600559234619141, "learning_rate": 1.9974692080751224e-05, "loss": 2.4968, "step": 16019 }, { "epoch": 0.21, "grad_norm": 4.192385196685791, "learning_rate": 1.9974684609072658e-05, "loss": 2.3479, "step": 16020 }, { "epoch": 0.21, "grad_norm": 3.8497369289398193, "learning_rate": 1.9974677136292718e-05, "loss": 2.261, "step": 16021 }, { "epoch": 0.21, "grad_norm": 4.5330915451049805, "learning_rate": 1.9974669662411402e-05, "loss": 2.0982, "step": 16022 }, { "epoch": 0.21, "grad_norm": 4.307577133178711, "learning_rate": 1.9974662187428716e-05, "loss": 1.8477, "step": 16023 }, { "epoch": 0.21, "grad_norm": 5.140035629272461, "learning_rate": 1.997465471134466e-05, "loss": 2.449, "step": 16024 }, { "epoch": 0.21, "grad_norm": 4.619837284088135, "learning_rate": 1.9974647234159228e-05, "loss": 2.2079, "step": 16025 }, { "epoch": 0.21, "grad_norm": 4.229742050170898, "learning_rate": 1.997463975587243e-05, "loss": 2.0916, "step": 16026 }, { "epoch": 0.21, "grad_norm": 4.550575256347656, "learning_rate": 1.997463227648426e-05, "loss": 2.3031, "step": 16027 }, { "epoch": 0.21, "grad_norm": 4.345615863800049, "learning_rate": 1.9974624795994722e-05, "loss": 1.9165, "step": 16028 }, { "epoch": 0.21, "grad_norm": 4.368555545806885, "learning_rate": 1.9974617314403817e-05, "loss": 1.796, "step": 16029 }, { "epoch": 0.21, "grad_norm": 4.553576469421387, "learning_rate": 1.9974609831711546e-05, "loss": 2.4731, "step": 16030 }, { "epoch": 0.21, "grad_norm": 4.279126167297363, "learning_rate": 1.9974602347917908e-05, "loss": 2.3369, "step": 16031 }, { "epoch": 0.21, "grad_norm": 4.324181079864502, "learning_rate": 1.9974594863022904e-05, "loss": 1.9269, "step": 16032 }, { "epoch": 0.21, "grad_norm": 4.488579273223877, "learning_rate": 1.9974587377026533e-05, "loss": 2.517, "step": 16033 }, { "epoch": 0.21, "grad_norm": 4.021167755126953, "learning_rate": 1.99745798899288e-05, "loss": 2.5065, "step": 16034 }, { "epoch": 0.21, "grad_norm": 4.696248531341553, "learning_rate": 1.9974572401729706e-05, "loss": 2.6571, "step": 16035 }, { "epoch": 0.21, "grad_norm": 4.227461338043213, "learning_rate": 1.997456491242925e-05, "loss": 2.2928, "step": 16036 }, { "epoch": 0.21, "grad_norm": 4.405061721801758, "learning_rate": 1.997455742202743e-05, "loss": 2.4532, "step": 16037 }, { "epoch": 0.21, "grad_norm": 4.2504963874816895, "learning_rate": 1.997454993052425e-05, "loss": 2.2218, "step": 16038 }, { "epoch": 0.21, "grad_norm": 4.839466094970703, "learning_rate": 1.9974542437919715e-05, "loss": 2.4312, "step": 16039 }, { "epoch": 0.21, "grad_norm": 5.068326473236084, "learning_rate": 1.997453494421382e-05, "loss": 2.6773, "step": 16040 }, { "epoch": 0.21, "grad_norm": 4.622369289398193, "learning_rate": 1.9974527449406565e-05, "loss": 2.4492, "step": 16041 }, { "epoch": 0.21, "grad_norm": 4.507327556610107, "learning_rate": 1.997451995349795e-05, "loss": 2.1843, "step": 16042 }, { "epoch": 0.21, "grad_norm": 4.693589210510254, "learning_rate": 1.9974512456487983e-05, "loss": 2.1559, "step": 16043 }, { "epoch": 0.21, "grad_norm": 4.493215084075928, "learning_rate": 1.997450495837666e-05, "loss": 2.2577, "step": 16044 }, { "epoch": 0.21, "grad_norm": 3.6249992847442627, "learning_rate": 1.9974497459163982e-05, "loss": 1.706, "step": 16045 }, { "epoch": 0.21, "grad_norm": 4.020590782165527, "learning_rate": 1.997448995884995e-05, "loss": 1.8237, "step": 16046 }, { "epoch": 0.21, "grad_norm": 5.115073204040527, "learning_rate": 1.9974482457434564e-05, "loss": 2.3424, "step": 16047 }, { "epoch": 0.21, "grad_norm": 4.266253471374512, "learning_rate": 1.9974474954917832e-05, "loss": 2.4487, "step": 16048 }, { "epoch": 0.21, "grad_norm": 4.093057155609131, "learning_rate": 1.997446745129974e-05, "loss": 1.7316, "step": 16049 }, { "epoch": 0.21, "grad_norm": 3.9676315784454346, "learning_rate": 1.9974459946580302e-05, "loss": 1.7899, "step": 16050 }, { "epoch": 0.21, "grad_norm": 4.750936508178711, "learning_rate": 1.9974452440759515e-05, "loss": 2.9992, "step": 16051 }, { "epoch": 0.21, "grad_norm": 3.6538589000701904, "learning_rate": 1.997444493383738e-05, "loss": 1.8705, "step": 16052 }, { "epoch": 0.21, "grad_norm": 4.635017395019531, "learning_rate": 1.9974437425813896e-05, "loss": 2.2144, "step": 16053 }, { "epoch": 0.21, "grad_norm": 3.884053945541382, "learning_rate": 1.9974429916689063e-05, "loss": 2.0694, "step": 16054 }, { "epoch": 0.21, "grad_norm": 4.026153087615967, "learning_rate": 1.9974422406462885e-05, "loss": 1.8645, "step": 16055 }, { "epoch": 0.21, "grad_norm": 4.217270374298096, "learning_rate": 1.997441489513536e-05, "loss": 2.1458, "step": 16056 }, { "epoch": 0.21, "grad_norm": 3.795250177383423, "learning_rate": 1.9974407382706493e-05, "loss": 1.8336, "step": 16057 }, { "epoch": 0.21, "grad_norm": 4.0065460205078125, "learning_rate": 1.9974399869176283e-05, "loss": 1.8621, "step": 16058 }, { "epoch": 0.21, "grad_norm": 5.037421703338623, "learning_rate": 1.9974392354544727e-05, "loss": 2.908, "step": 16059 }, { "epoch": 0.21, "grad_norm": 4.483493328094482, "learning_rate": 1.9974384838811828e-05, "loss": 2.5858, "step": 16060 }, { "epoch": 0.21, "grad_norm": 4.277219295501709, "learning_rate": 1.9974377321977593e-05, "loss": 2.0922, "step": 16061 }, { "epoch": 0.21, "grad_norm": 4.609938144683838, "learning_rate": 1.9974369804042012e-05, "loss": 2.1607, "step": 16062 }, { "epoch": 0.21, "grad_norm": 4.424716472625732, "learning_rate": 1.9974362285005095e-05, "loss": 2.6959, "step": 16063 }, { "epoch": 0.21, "grad_norm": 4.258895397186279, "learning_rate": 1.9974354764866835e-05, "loss": 2.0145, "step": 16064 }, { "epoch": 0.21, "grad_norm": 4.857244491577148, "learning_rate": 1.997434724362724e-05, "loss": 2.5399, "step": 16065 }, { "epoch": 0.21, "grad_norm": 4.518958568572998, "learning_rate": 1.9974339721286308e-05, "loss": 2.3247, "step": 16066 }, { "epoch": 0.21, "grad_norm": 3.915980815887451, "learning_rate": 1.997433219784404e-05, "loss": 1.9281, "step": 16067 }, { "epoch": 0.21, "grad_norm": 4.090770244598389, "learning_rate": 1.9974324673300437e-05, "loss": 1.9433, "step": 16068 }, { "epoch": 0.21, "grad_norm": 3.9587337970733643, "learning_rate": 1.9974317147655498e-05, "loss": 2.0703, "step": 16069 }, { "epoch": 0.21, "grad_norm": 4.503875255584717, "learning_rate": 1.9974309620909222e-05, "loss": 2.0426, "step": 16070 }, { "epoch": 0.21, "grad_norm": 4.322810173034668, "learning_rate": 1.997430209306162e-05, "loss": 2.4077, "step": 16071 }, { "epoch": 0.21, "grad_norm": 4.77992057800293, "learning_rate": 1.997429456411268e-05, "loss": 2.5073, "step": 16072 }, { "epoch": 0.21, "grad_norm": 4.481997013092041, "learning_rate": 1.997428703406241e-05, "loss": 2.0776, "step": 16073 }, { "epoch": 0.21, "grad_norm": 4.50986385345459, "learning_rate": 1.9974279502910812e-05, "loss": 2.2791, "step": 16074 }, { "epoch": 0.21, "grad_norm": 4.143205642700195, "learning_rate": 1.9974271970657884e-05, "loss": 2.5048, "step": 16075 }, { "epoch": 0.21, "grad_norm": 4.293602466583252, "learning_rate": 1.9974264437303623e-05, "loss": 1.9825, "step": 16076 }, { "epoch": 0.21, "grad_norm": 5.27936315536499, "learning_rate": 1.9974256902848037e-05, "loss": 2.1049, "step": 16077 }, { "epoch": 0.21, "grad_norm": 5.162219047546387, "learning_rate": 1.9974249367291125e-05, "loss": 2.7992, "step": 16078 }, { "epoch": 0.21, "grad_norm": 4.886758327484131, "learning_rate": 1.9974241830632883e-05, "loss": 2.5743, "step": 16079 }, { "epoch": 0.21, "grad_norm": 4.356543064117432, "learning_rate": 1.997423429287332e-05, "loss": 2.5515, "step": 16080 }, { "epoch": 0.21, "grad_norm": 4.113330364227295, "learning_rate": 1.997422675401243e-05, "loss": 1.9226, "step": 16081 }, { "epoch": 0.21, "grad_norm": 4.645307540893555, "learning_rate": 1.9974219214050214e-05, "loss": 2.1798, "step": 16082 }, { "epoch": 0.21, "grad_norm": 3.8338632583618164, "learning_rate": 1.997421167298668e-05, "loss": 2.0053, "step": 16083 }, { "epoch": 0.21, "grad_norm": 3.503300905227661, "learning_rate": 1.997420413082182e-05, "loss": 1.512, "step": 16084 }, { "epoch": 0.21, "grad_norm": 3.942068099975586, "learning_rate": 1.9974196587555637e-05, "loss": 2.1803, "step": 16085 }, { "epoch": 0.21, "grad_norm": 3.73036527633667, "learning_rate": 1.997418904318814e-05, "loss": 1.9845, "step": 16086 }, { "epoch": 0.21, "grad_norm": 3.727287530899048, "learning_rate": 1.997418149771932e-05, "loss": 1.8563, "step": 16087 }, { "epoch": 0.21, "grad_norm": 4.820948600769043, "learning_rate": 1.997417395114918e-05, "loss": 1.9874, "step": 16088 }, { "epoch": 0.21, "grad_norm": 4.769244194030762, "learning_rate": 1.997416640347772e-05, "loss": 2.4006, "step": 16089 }, { "epoch": 0.21, "grad_norm": 4.397634029388428, "learning_rate": 1.9974158854704946e-05, "loss": 1.9963, "step": 16090 }, { "epoch": 0.21, "grad_norm": 4.1879777908325195, "learning_rate": 1.9974151304830856e-05, "loss": 2.2359, "step": 16091 }, { "epoch": 0.21, "grad_norm": 4.2336506843566895, "learning_rate": 1.997414375385545e-05, "loss": 2.063, "step": 16092 }, { "epoch": 0.21, "grad_norm": 4.0568037033081055, "learning_rate": 1.9974136201778726e-05, "loss": 1.9039, "step": 16093 }, { "epoch": 0.21, "grad_norm": 4.215350151062012, "learning_rate": 1.9974128648600693e-05, "loss": 1.9177, "step": 16094 }, { "epoch": 0.21, "grad_norm": 4.234943866729736, "learning_rate": 1.9974121094321344e-05, "loss": 2.0946, "step": 16095 }, { "epoch": 0.21, "grad_norm": 4.413479328155518, "learning_rate": 1.9974113538940686e-05, "loss": 2.4986, "step": 16096 }, { "epoch": 0.21, "grad_norm": 4.068106174468994, "learning_rate": 1.9974105982458712e-05, "loss": 1.7531, "step": 16097 }, { "epoch": 0.21, "grad_norm": 4.108391284942627, "learning_rate": 1.9974098424875433e-05, "loss": 2.2582, "step": 16098 }, { "epoch": 0.21, "grad_norm": 4.413048267364502, "learning_rate": 1.997409086619084e-05, "loss": 2.4542, "step": 16099 }, { "epoch": 0.21, "grad_norm": 4.346156597137451, "learning_rate": 1.997408330640494e-05, "loss": 2.1859, "step": 16100 }, { "epoch": 0.21, "grad_norm": 4.699396133422852, "learning_rate": 1.997407574551773e-05, "loss": 2.2897, "step": 16101 }, { "epoch": 0.21, "grad_norm": 4.1159491539001465, "learning_rate": 1.9974068183529215e-05, "loss": 1.958, "step": 16102 }, { "epoch": 0.21, "grad_norm": 4.687519550323486, "learning_rate": 1.9974060620439392e-05, "loss": 2.2094, "step": 16103 }, { "epoch": 0.21, "grad_norm": 3.869513511657715, "learning_rate": 1.9974053056248267e-05, "loss": 1.785, "step": 16104 }, { "epoch": 0.21, "grad_norm": 4.190550804138184, "learning_rate": 1.9974045490955833e-05, "loss": 1.9935, "step": 16105 }, { "epoch": 0.21, "grad_norm": 4.460225582122803, "learning_rate": 1.9974037924562097e-05, "loss": 2.407, "step": 16106 }, { "epoch": 0.21, "grad_norm": 4.989743709564209, "learning_rate": 1.9974030357067055e-05, "loss": 2.828, "step": 16107 }, { "epoch": 0.21, "grad_norm": 4.0948920249938965, "learning_rate": 1.9974022788470715e-05, "loss": 1.7074, "step": 16108 }, { "epoch": 0.21, "grad_norm": 4.151796817779541, "learning_rate": 1.9974015218773072e-05, "loss": 2.1402, "step": 16109 }, { "epoch": 0.21, "grad_norm": 4.984410285949707, "learning_rate": 1.997400764797413e-05, "loss": 3.0666, "step": 16110 }, { "epoch": 0.21, "grad_norm": 4.447888374328613, "learning_rate": 1.9974000076073888e-05, "loss": 2.2124, "step": 16111 }, { "epoch": 0.21, "grad_norm": 4.489902019500732, "learning_rate": 1.9973992503072346e-05, "loss": 2.3743, "step": 16112 }, { "epoch": 0.21, "grad_norm": 4.48164701461792, "learning_rate": 1.9973984928969506e-05, "loss": 1.9628, "step": 16113 }, { "epoch": 0.21, "grad_norm": 3.5471582412719727, "learning_rate": 1.997397735376537e-05, "loss": 1.7595, "step": 16114 }, { "epoch": 0.21, "grad_norm": 4.923675060272217, "learning_rate": 1.9973969777459935e-05, "loss": 2.506, "step": 16115 }, { "epoch": 0.21, "grad_norm": 4.66567325592041, "learning_rate": 1.997396220005321e-05, "loss": 2.7321, "step": 16116 }, { "epoch": 0.21, "grad_norm": 4.454740524291992, "learning_rate": 1.9973954621545184e-05, "loss": 2.6254, "step": 16117 }, { "epoch": 0.21, "grad_norm": 4.053475856781006, "learning_rate": 1.997394704193587e-05, "loss": 2.3711, "step": 16118 }, { "epoch": 0.21, "grad_norm": 4.026801586151123, "learning_rate": 1.9973939461225258e-05, "loss": 2.3349, "step": 16119 }, { "epoch": 0.21, "grad_norm": 4.388067722320557, "learning_rate": 1.9973931879413357e-05, "loss": 2.1121, "step": 16120 }, { "epoch": 0.21, "grad_norm": 4.410154342651367, "learning_rate": 1.997392429650016e-05, "loss": 2.0394, "step": 16121 }, { "epoch": 0.21, "grad_norm": 4.069884300231934, "learning_rate": 1.997391671248568e-05, "loss": 1.7767, "step": 16122 }, { "epoch": 0.21, "grad_norm": 4.528343677520752, "learning_rate": 1.9973909127369906e-05, "loss": 2.0332, "step": 16123 }, { "epoch": 0.21, "grad_norm": 4.623687744140625, "learning_rate": 1.9973901541152844e-05, "loss": 2.5218, "step": 16124 }, { "epoch": 0.21, "grad_norm": 4.928347587585449, "learning_rate": 1.9973893953834494e-05, "loss": 2.6587, "step": 16125 }, { "epoch": 0.21, "grad_norm": 4.606868743896484, "learning_rate": 1.9973886365414855e-05, "loss": 2.0578, "step": 16126 }, { "epoch": 0.21, "grad_norm": 4.4051690101623535, "learning_rate": 1.997387877589393e-05, "loss": 2.4304, "step": 16127 }, { "epoch": 0.21, "grad_norm": 3.8611502647399902, "learning_rate": 1.9973871185271726e-05, "loss": 1.9647, "step": 16128 }, { "epoch": 0.21, "grad_norm": 4.5907816886901855, "learning_rate": 1.9973863593548232e-05, "loss": 2.2943, "step": 16129 }, { "epoch": 0.21, "grad_norm": 4.367226600646973, "learning_rate": 1.9973856000723452e-05, "loss": 1.9464, "step": 16130 }, { "epoch": 0.21, "grad_norm": 3.8884832859039307, "learning_rate": 1.997384840679739e-05, "loss": 1.6617, "step": 16131 }, { "epoch": 0.21, "grad_norm": 4.603103160858154, "learning_rate": 1.9973840811770052e-05, "loss": 2.2509, "step": 16132 }, { "epoch": 0.21, "grad_norm": 4.405789375305176, "learning_rate": 1.9973833215641428e-05, "loss": 2.1005, "step": 16133 }, { "epoch": 0.21, "grad_norm": 4.302097797393799, "learning_rate": 1.9973825618411525e-05, "loss": 2.3428, "step": 16134 }, { "epoch": 0.21, "grad_norm": 4.471526622772217, "learning_rate": 1.997381802008034e-05, "loss": 2.0131, "step": 16135 }, { "epoch": 0.21, "grad_norm": 4.311367988586426, "learning_rate": 1.9973810420647877e-05, "loss": 2.3945, "step": 16136 }, { "epoch": 0.21, "grad_norm": 3.877444267272949, "learning_rate": 1.9973802820114136e-05, "loss": 2.1431, "step": 16137 }, { "epoch": 0.21, "grad_norm": 5.444668769836426, "learning_rate": 1.9973795218479123e-05, "loss": 2.5229, "step": 16138 }, { "epoch": 0.21, "grad_norm": 5.080872535705566, "learning_rate": 1.9973787615742828e-05, "loss": 2.1944, "step": 16139 }, { "epoch": 0.21, "grad_norm": 4.2783589363098145, "learning_rate": 1.997378001190526e-05, "loss": 2.2285, "step": 16140 }, { "epoch": 0.21, "grad_norm": 4.827141284942627, "learning_rate": 1.9973772406966417e-05, "loss": 2.73, "step": 16141 }, { "epoch": 0.21, "grad_norm": 4.406558036804199, "learning_rate": 1.9973764800926298e-05, "loss": 2.2175, "step": 16142 }, { "epoch": 0.21, "grad_norm": 4.394301414489746, "learning_rate": 1.997375719378491e-05, "loss": 2.2167, "step": 16143 }, { "epoch": 0.21, "grad_norm": 4.070405006408691, "learning_rate": 1.9973749585542247e-05, "loss": 1.9626, "step": 16144 }, { "epoch": 0.21, "grad_norm": 4.4545979499816895, "learning_rate": 1.9973741976198316e-05, "loss": 2.2542, "step": 16145 }, { "epoch": 0.21, "grad_norm": 4.606106281280518, "learning_rate": 1.9973734365753114e-05, "loss": 2.4845, "step": 16146 }, { "epoch": 0.21, "grad_norm": 3.958449125289917, "learning_rate": 1.997372675420664e-05, "loss": 2.2614, "step": 16147 }, { "epoch": 0.21, "grad_norm": 3.811725378036499, "learning_rate": 1.9973719141558897e-05, "loss": 2.1121, "step": 16148 }, { "epoch": 0.21, "grad_norm": 4.703139305114746, "learning_rate": 1.997371152780989e-05, "loss": 2.6759, "step": 16149 }, { "epoch": 0.21, "grad_norm": 4.634704113006592, "learning_rate": 1.9973703912959612e-05, "loss": 2.2065, "step": 16150 }, { "epoch": 0.21, "grad_norm": 4.225659370422363, "learning_rate": 1.997369629700807e-05, "loss": 1.7605, "step": 16151 }, { "epoch": 0.21, "grad_norm": 4.5130486488342285, "learning_rate": 1.9973688679955263e-05, "loss": 2.1665, "step": 16152 }, { "epoch": 0.21, "grad_norm": 3.802128553390503, "learning_rate": 1.997368106180119e-05, "loss": 1.9954, "step": 16153 }, { "epoch": 0.21, "grad_norm": 4.0598297119140625, "learning_rate": 1.9973673442545856e-05, "loss": 2.178, "step": 16154 }, { "epoch": 0.21, "grad_norm": 4.343287467956543, "learning_rate": 1.9973665822189257e-05, "loss": 2.2711, "step": 16155 }, { "epoch": 0.21, "grad_norm": 5.4660539627075195, "learning_rate": 1.9973658200731396e-05, "loss": 2.1843, "step": 16156 }, { "epoch": 0.21, "grad_norm": 4.041895866394043, "learning_rate": 1.9973650578172277e-05, "loss": 1.7916, "step": 16157 }, { "epoch": 0.21, "grad_norm": 3.7425014972686768, "learning_rate": 1.9973642954511897e-05, "loss": 1.8945, "step": 16158 }, { "epoch": 0.21, "grad_norm": 4.103961944580078, "learning_rate": 1.9973635329750255e-05, "loss": 1.8549, "step": 16159 }, { "epoch": 0.21, "grad_norm": 3.992547035217285, "learning_rate": 1.9973627703887356e-05, "loss": 2.1116, "step": 16160 }, { "epoch": 0.21, "grad_norm": 3.8814477920532227, "learning_rate": 1.9973620076923198e-05, "loss": 1.8524, "step": 16161 }, { "epoch": 0.21, "grad_norm": 5.162468910217285, "learning_rate": 1.9973612448857786e-05, "loss": 2.6488, "step": 16162 }, { "epoch": 0.21, "grad_norm": 4.579641819000244, "learning_rate": 1.9973604819691116e-05, "loss": 2.467, "step": 16163 }, { "epoch": 0.21, "grad_norm": 4.255311012268066, "learning_rate": 1.9973597189423187e-05, "loss": 2.3571, "step": 16164 }, { "epoch": 0.21, "grad_norm": 4.072140216827393, "learning_rate": 1.997358955805401e-05, "loss": 2.0757, "step": 16165 }, { "epoch": 0.21, "grad_norm": 4.491877555847168, "learning_rate": 1.9973581925583577e-05, "loss": 2.682, "step": 16166 }, { "epoch": 0.21, "grad_norm": 4.111354827880859, "learning_rate": 1.997357429201189e-05, "loss": 2.3318, "step": 16167 }, { "epoch": 0.21, "grad_norm": 4.559754371643066, "learning_rate": 1.997356665733895e-05, "loss": 2.2175, "step": 16168 }, { "epoch": 0.21, "grad_norm": 4.605471611022949, "learning_rate": 1.9973559021564767e-05, "loss": 2.4337, "step": 16169 }, { "epoch": 0.21, "grad_norm": 4.911857604980469, "learning_rate": 1.9973551384689328e-05, "loss": 2.1926, "step": 16170 }, { "epoch": 0.21, "grad_norm": 4.591651916503906, "learning_rate": 1.997354374671264e-05, "loss": 2.0534, "step": 16171 }, { "epoch": 0.21, "grad_norm": 4.265807628631592, "learning_rate": 1.9973536107634702e-05, "loss": 1.7803, "step": 16172 }, { "epoch": 0.21, "grad_norm": 4.8239665031433105, "learning_rate": 1.997352846745552e-05, "loss": 2.7284, "step": 16173 }, { "epoch": 0.21, "grad_norm": 4.188533782958984, "learning_rate": 1.997352082617509e-05, "loss": 2.2459, "step": 16174 }, { "epoch": 0.21, "grad_norm": 4.359582424163818, "learning_rate": 1.997351318379341e-05, "loss": 2.2567, "step": 16175 }, { "epoch": 0.21, "grad_norm": 4.3604536056518555, "learning_rate": 1.997350554031049e-05, "loss": 2.2974, "step": 16176 }, { "epoch": 0.21, "grad_norm": 4.385730743408203, "learning_rate": 1.9973497895726326e-05, "loss": 2.0313, "step": 16177 }, { "epoch": 0.21, "grad_norm": 4.585736274719238, "learning_rate": 1.9973490250040918e-05, "loss": 2.1373, "step": 16178 }, { "epoch": 0.21, "grad_norm": 4.516119003295898, "learning_rate": 1.9973482603254265e-05, "loss": 2.3012, "step": 16179 }, { "epoch": 0.21, "grad_norm": 4.556217193603516, "learning_rate": 1.997347495536637e-05, "loss": 2.3817, "step": 16180 }, { "epoch": 0.21, "grad_norm": 4.339495658874512, "learning_rate": 1.9973467306377235e-05, "loss": 2.2616, "step": 16181 }, { "epoch": 0.21, "grad_norm": 4.36104679107666, "learning_rate": 1.9973459656286863e-05, "loss": 2.3791, "step": 16182 }, { "epoch": 0.21, "grad_norm": 4.2593607902526855, "learning_rate": 1.997345200509525e-05, "loss": 2.1869, "step": 16183 }, { "epoch": 0.21, "grad_norm": 4.32806396484375, "learning_rate": 1.9973444352802397e-05, "loss": 2.323, "step": 16184 }, { "epoch": 0.21, "grad_norm": 4.256782054901123, "learning_rate": 1.9973436699408307e-05, "loss": 1.9036, "step": 16185 }, { "epoch": 0.21, "grad_norm": 4.180462837219238, "learning_rate": 1.997342904491298e-05, "loss": 2.279, "step": 16186 }, { "epoch": 0.21, "grad_norm": 4.96281623840332, "learning_rate": 1.9973421389316422e-05, "loss": 2.2761, "step": 16187 }, { "epoch": 0.21, "grad_norm": 4.045446395874023, "learning_rate": 1.9973413732618627e-05, "loss": 2.0432, "step": 16188 }, { "epoch": 0.21, "grad_norm": 4.24640417098999, "learning_rate": 1.9973406074819593e-05, "loss": 1.9435, "step": 16189 }, { "epoch": 0.21, "grad_norm": 4.505069732666016, "learning_rate": 1.9973398415919332e-05, "loss": 2.0402, "step": 16190 }, { "epoch": 0.21, "grad_norm": 4.245862007141113, "learning_rate": 1.9973390755917834e-05, "loss": 1.9357, "step": 16191 }, { "epoch": 0.21, "grad_norm": 4.383202075958252, "learning_rate": 1.9973383094815107e-05, "loss": 2.4882, "step": 16192 }, { "epoch": 0.21, "grad_norm": 4.295919418334961, "learning_rate": 1.9973375432611147e-05, "loss": 2.2718, "step": 16193 }, { "epoch": 0.21, "grad_norm": 4.090313911437988, "learning_rate": 1.9973367769305962e-05, "loss": 2.0723, "step": 16194 }, { "epoch": 0.21, "grad_norm": 4.104245185852051, "learning_rate": 1.9973360104899543e-05, "loss": 2.4182, "step": 16195 }, { "epoch": 0.21, "grad_norm": 4.30534553527832, "learning_rate": 1.99733524393919e-05, "loss": 2.2106, "step": 16196 }, { "epoch": 0.21, "grad_norm": 3.7708401679992676, "learning_rate": 1.9973344772783026e-05, "loss": 2.0048, "step": 16197 }, { "epoch": 0.21, "grad_norm": 4.02587890625, "learning_rate": 1.9973337105072928e-05, "loss": 1.7126, "step": 16198 }, { "epoch": 0.21, "grad_norm": 4.467806339263916, "learning_rate": 1.9973329436261602e-05, "loss": 2.2574, "step": 16199 }, { "epoch": 0.21, "grad_norm": 4.42022180557251, "learning_rate": 1.9973321766349052e-05, "loss": 2.455, "step": 16200 }, { "epoch": 0.21, "grad_norm": 3.730607032775879, "learning_rate": 1.9973314095335282e-05, "loss": 2.2767, "step": 16201 }, { "epoch": 0.21, "grad_norm": 4.010070323944092, "learning_rate": 1.9973306423220284e-05, "loss": 2.2467, "step": 16202 }, { "epoch": 0.21, "grad_norm": 4.678711891174316, "learning_rate": 1.9973298750004068e-05, "loss": 2.1899, "step": 16203 }, { "epoch": 0.21, "grad_norm": 4.188848495483398, "learning_rate": 1.9973291075686625e-05, "loss": 2.3044, "step": 16204 }, { "epoch": 0.21, "grad_norm": 4.636356353759766, "learning_rate": 1.9973283400267965e-05, "loss": 2.283, "step": 16205 }, { "epoch": 0.21, "grad_norm": 4.241157531738281, "learning_rate": 1.9973275723748083e-05, "loss": 2.5368, "step": 16206 }, { "epoch": 0.21, "grad_norm": 4.324210166931152, "learning_rate": 1.9973268046126985e-05, "loss": 2.3674, "step": 16207 }, { "epoch": 0.21, "grad_norm": 4.459686756134033, "learning_rate": 1.997326036740467e-05, "loss": 2.5283, "step": 16208 }, { "epoch": 0.21, "grad_norm": 3.6542932987213135, "learning_rate": 1.9973252687581132e-05, "loss": 1.7094, "step": 16209 }, { "epoch": 0.21, "grad_norm": 4.298191070556641, "learning_rate": 1.997324500665638e-05, "loss": 2.3581, "step": 16210 }, { "epoch": 0.21, "grad_norm": 4.9195942878723145, "learning_rate": 1.9973237324630417e-05, "loss": 2.4856, "step": 16211 }, { "epoch": 0.21, "grad_norm": 4.322357177734375, "learning_rate": 1.9973229641503238e-05, "loss": 2.3838, "step": 16212 }, { "epoch": 0.21, "grad_norm": 4.158738136291504, "learning_rate": 1.9973221957274842e-05, "loss": 1.9831, "step": 16213 }, { "epoch": 0.21, "grad_norm": 4.427499771118164, "learning_rate": 1.9973214271945232e-05, "loss": 2.5851, "step": 16214 }, { "epoch": 0.21, "grad_norm": 4.5431599617004395, "learning_rate": 1.9973206585514415e-05, "loss": 2.3408, "step": 16215 }, { "epoch": 0.21, "grad_norm": 4.486869812011719, "learning_rate": 1.9973198897982384e-05, "loss": 2.3815, "step": 16216 }, { "epoch": 0.21, "grad_norm": 4.235763072967529, "learning_rate": 1.9973191209349142e-05, "loss": 1.9869, "step": 16217 }, { "epoch": 0.21, "grad_norm": 4.318610668182373, "learning_rate": 1.9973183519614693e-05, "loss": 2.3341, "step": 16218 }, { "epoch": 0.21, "grad_norm": 3.810239553451538, "learning_rate": 1.9973175828779037e-05, "loss": 1.7177, "step": 16219 }, { "epoch": 0.21, "grad_norm": 4.3854756355285645, "learning_rate": 1.9973168136842167e-05, "loss": 2.0877, "step": 16220 }, { "epoch": 0.21, "grad_norm": 4.407405376434326, "learning_rate": 1.9973160443804093e-05, "loss": 2.4928, "step": 16221 }, { "epoch": 0.21, "grad_norm": 4.225485324859619, "learning_rate": 1.9973152749664815e-05, "loss": 2.2819, "step": 16222 }, { "epoch": 0.21, "grad_norm": 4.368983268737793, "learning_rate": 1.997314505442433e-05, "loss": 2.0429, "step": 16223 }, { "epoch": 0.21, "grad_norm": 4.273592948913574, "learning_rate": 1.9973137358082638e-05, "loss": 2.2668, "step": 16224 }, { "epoch": 0.21, "grad_norm": 4.526426315307617, "learning_rate": 1.997312966063975e-05, "loss": 2.3402, "step": 16225 }, { "epoch": 0.21, "grad_norm": 4.651634216308594, "learning_rate": 1.9973121962095652e-05, "loss": 2.5113, "step": 16226 }, { "epoch": 0.21, "grad_norm": 3.835244655609131, "learning_rate": 1.9973114262450352e-05, "loss": 2.2169, "step": 16227 }, { "epoch": 0.21, "grad_norm": 3.9391093254089355, "learning_rate": 1.9973106561703855e-05, "loss": 2.1904, "step": 16228 }, { "epoch": 0.21, "grad_norm": 3.8255410194396973, "learning_rate": 1.9973098859856157e-05, "loss": 1.5191, "step": 16229 }, { "epoch": 0.21, "grad_norm": 4.023573398590088, "learning_rate": 1.9973091156907255e-05, "loss": 2.0795, "step": 16230 }, { "epoch": 0.21, "grad_norm": 4.0104265213012695, "learning_rate": 1.997308345285716e-05, "loss": 2.5868, "step": 16231 }, { "epoch": 0.21, "grad_norm": 4.745743751525879, "learning_rate": 1.9973075747705864e-05, "loss": 2.9608, "step": 16232 }, { "epoch": 0.21, "grad_norm": 3.942290782928467, "learning_rate": 1.9973068041453375e-05, "loss": 1.9714, "step": 16233 }, { "epoch": 0.21, "grad_norm": 5.679055690765381, "learning_rate": 1.997306033409969e-05, "loss": 2.4868, "step": 16234 }, { "epoch": 0.21, "grad_norm": 3.933696746826172, "learning_rate": 1.9973052625644805e-05, "loss": 2.2182, "step": 16235 }, { "epoch": 0.21, "grad_norm": 3.2206356525421143, "learning_rate": 1.997304491608873e-05, "loss": 1.426, "step": 16236 }, { "epoch": 0.21, "grad_norm": 4.529945373535156, "learning_rate": 1.997303720543146e-05, "loss": 2.4423, "step": 16237 }, { "epoch": 0.21, "grad_norm": 4.316847324371338, "learning_rate": 1.9973029493673e-05, "loss": 2.1692, "step": 16238 }, { "epoch": 0.21, "grad_norm": 4.200464248657227, "learning_rate": 1.9973021780813345e-05, "loss": 1.9387, "step": 16239 }, { "epoch": 0.21, "grad_norm": 3.8200745582580566, "learning_rate": 1.99730140668525e-05, "loss": 1.4985, "step": 16240 }, { "epoch": 0.21, "grad_norm": 3.758347749710083, "learning_rate": 1.9973006351790465e-05, "loss": 1.8999, "step": 16241 }, { "epoch": 0.21, "grad_norm": 3.9082212448120117, "learning_rate": 1.9972998635627243e-05, "loss": 1.9934, "step": 16242 }, { "epoch": 0.21, "grad_norm": 3.547900676727295, "learning_rate": 1.997299091836283e-05, "loss": 1.6973, "step": 16243 }, { "epoch": 0.21, "grad_norm": 4.633162021636963, "learning_rate": 1.9972983199997232e-05, "loss": 2.5547, "step": 16244 }, { "epoch": 0.21, "grad_norm": 3.6502158641815186, "learning_rate": 1.9972975480530446e-05, "loss": 1.5283, "step": 16245 }, { "epoch": 0.21, "grad_norm": 4.028741359710693, "learning_rate": 1.9972967759962473e-05, "loss": 1.8986, "step": 16246 }, { "epoch": 0.21, "grad_norm": 3.3875958919525146, "learning_rate": 1.997296003829332e-05, "loss": 1.9755, "step": 16247 }, { "epoch": 0.21, "grad_norm": 4.888978958129883, "learning_rate": 1.9972952315522977e-05, "loss": 2.2533, "step": 16248 }, { "epoch": 0.21, "grad_norm": 4.789900302886963, "learning_rate": 1.9972944591651457e-05, "loss": 2.4265, "step": 16249 }, { "epoch": 0.21, "grad_norm": 4.059145450592041, "learning_rate": 1.997293686667875e-05, "loss": 1.9458, "step": 16250 }, { "epoch": 0.21, "grad_norm": 4.534018516540527, "learning_rate": 1.9972929140604863e-05, "loss": 2.2661, "step": 16251 }, { "epoch": 0.21, "grad_norm": 3.9837753772735596, "learning_rate": 1.9972921413429796e-05, "loss": 1.7232, "step": 16252 }, { "epoch": 0.21, "grad_norm": 5.340126037597656, "learning_rate": 1.997291368515355e-05, "loss": 3.2851, "step": 16253 }, { "epoch": 0.21, "grad_norm": 4.524316310882568, "learning_rate": 1.997290595577612e-05, "loss": 2.4857, "step": 16254 }, { "epoch": 0.21, "grad_norm": 4.042933464050293, "learning_rate": 1.9972898225297517e-05, "loss": 1.9361, "step": 16255 }, { "epoch": 0.21, "grad_norm": 4.509505271911621, "learning_rate": 1.9972890493717737e-05, "loss": 2.0227, "step": 16256 }, { "epoch": 0.21, "grad_norm": 3.8916172981262207, "learning_rate": 1.997288276103678e-05, "loss": 2.2576, "step": 16257 }, { "epoch": 0.21, "grad_norm": 4.422789096832275, "learning_rate": 1.9972875027254645e-05, "loss": 2.1772, "step": 16258 }, { "epoch": 0.21, "grad_norm": 4.405124664306641, "learning_rate": 1.9972867292371338e-05, "loss": 2.1639, "step": 16259 }, { "epoch": 0.21, "grad_norm": 3.887929677963257, "learning_rate": 1.9972859556386854e-05, "loss": 1.8481, "step": 16260 }, { "epoch": 0.21, "grad_norm": 4.152780532836914, "learning_rate": 1.9972851819301203e-05, "loss": 1.8529, "step": 16261 }, { "epoch": 0.21, "grad_norm": 4.245559215545654, "learning_rate": 1.9972844081114375e-05, "loss": 2.2457, "step": 16262 }, { "epoch": 0.21, "grad_norm": 4.239809513092041, "learning_rate": 1.9972836341826375e-05, "loss": 1.7445, "step": 16263 }, { "epoch": 0.21, "grad_norm": 4.130683898925781, "learning_rate": 1.997282860143721e-05, "loss": 2.1818, "step": 16264 }, { "epoch": 0.21, "grad_norm": 4.245309829711914, "learning_rate": 1.9972820859946874e-05, "loss": 2.0346, "step": 16265 }, { "epoch": 0.21, "grad_norm": 4.6200127601623535, "learning_rate": 1.9972813117355366e-05, "loss": 2.1901, "step": 16266 }, { "epoch": 0.21, "grad_norm": 4.294144630432129, "learning_rate": 1.9972805373662693e-05, "loss": 1.9705, "step": 16267 }, { "epoch": 0.21, "grad_norm": 4.680232048034668, "learning_rate": 1.9972797628868852e-05, "loss": 2.1416, "step": 16268 }, { "epoch": 0.21, "grad_norm": 4.078852653503418, "learning_rate": 1.9972789882973846e-05, "loss": 2.1169, "step": 16269 }, { "epoch": 0.21, "grad_norm": 4.29640531539917, "learning_rate": 1.9972782135977672e-05, "loss": 1.8189, "step": 16270 }, { "epoch": 0.21, "grad_norm": 3.9897096157073975, "learning_rate": 1.9972774387880336e-05, "loss": 2.1323, "step": 16271 }, { "epoch": 0.21, "grad_norm": 4.485411167144775, "learning_rate": 1.9972766638681836e-05, "loss": 2.3326, "step": 16272 }, { "epoch": 0.21, "grad_norm": 4.132328033447266, "learning_rate": 1.9972758888382177e-05, "loss": 2.4441, "step": 16273 }, { "epoch": 0.21, "grad_norm": 4.142960548400879, "learning_rate": 1.997275113698135e-05, "loss": 2.0892, "step": 16274 }, { "epoch": 0.21, "grad_norm": 5.7295308113098145, "learning_rate": 1.9972743384479365e-05, "loss": 2.047, "step": 16275 }, { "epoch": 0.21, "grad_norm": 4.05756139755249, "learning_rate": 1.9972735630876223e-05, "loss": 1.895, "step": 16276 }, { "epoch": 0.21, "grad_norm": 3.7727997303009033, "learning_rate": 1.9972727876171917e-05, "loss": 2.0188, "step": 16277 }, { "epoch": 0.21, "grad_norm": 4.417667388916016, "learning_rate": 1.9972720120366452e-05, "loss": 2.257, "step": 16278 }, { "epoch": 0.21, "grad_norm": 4.126269340515137, "learning_rate": 1.9972712363459834e-05, "loss": 2.0736, "step": 16279 }, { "epoch": 0.21, "grad_norm": 4.000183582305908, "learning_rate": 1.9972704605452056e-05, "loss": 2.0008, "step": 16280 }, { "epoch": 0.21, "grad_norm": 4.458343505859375, "learning_rate": 1.9972696846343125e-05, "loss": 2.3695, "step": 16281 }, { "epoch": 0.21, "grad_norm": 4.779115200042725, "learning_rate": 1.9972689086133038e-05, "loss": 2.7077, "step": 16282 }, { "epoch": 0.21, "grad_norm": 4.473811149597168, "learning_rate": 1.9972681324821798e-05, "loss": 1.9351, "step": 16283 }, { "epoch": 0.21, "grad_norm": 3.9749741554260254, "learning_rate": 1.99726735624094e-05, "loss": 1.8713, "step": 16284 }, { "epoch": 0.21, "grad_norm": 3.9573142528533936, "learning_rate": 1.9972665798895855e-05, "loss": 1.9918, "step": 16285 }, { "epoch": 0.21, "grad_norm": 4.682225227355957, "learning_rate": 1.9972658034281156e-05, "loss": 2.0879, "step": 16286 }, { "epoch": 0.21, "grad_norm": 4.89155912399292, "learning_rate": 1.9972650268565307e-05, "loss": 3.1662, "step": 16287 }, { "epoch": 0.21, "grad_norm": 4.380592346191406, "learning_rate": 1.997264250174831e-05, "loss": 1.9503, "step": 16288 }, { "epoch": 0.21, "grad_norm": 5.131933212280273, "learning_rate": 1.9972634733830157e-05, "loss": 3.1039, "step": 16289 }, { "epoch": 0.21, "grad_norm": 4.322289943695068, "learning_rate": 1.9972626964810863e-05, "loss": 2.2752, "step": 16290 }, { "epoch": 0.21, "grad_norm": 4.5296549797058105, "learning_rate": 1.997261919469042e-05, "loss": 2.1154, "step": 16291 }, { "epoch": 0.21, "grad_norm": 4.575369834899902, "learning_rate": 1.997261142346883e-05, "loss": 2.1906, "step": 16292 }, { "epoch": 0.21, "grad_norm": 4.1441650390625, "learning_rate": 1.9972603651146096e-05, "loss": 2.2125, "step": 16293 }, { "epoch": 0.21, "grad_norm": 3.9944474697113037, "learning_rate": 1.9972595877722217e-05, "loss": 1.8999, "step": 16294 }, { "epoch": 0.21, "grad_norm": 4.249787330627441, "learning_rate": 1.997258810319719e-05, "loss": 2.0074, "step": 16295 }, { "epoch": 0.21, "grad_norm": 4.280144691467285, "learning_rate": 1.9972580327571027e-05, "loss": 2.3099, "step": 16296 }, { "epoch": 0.21, "grad_norm": 4.199949741363525, "learning_rate": 1.997257255084372e-05, "loss": 2.4052, "step": 16297 }, { "epoch": 0.21, "grad_norm": 4.202551364898682, "learning_rate": 1.997256477301527e-05, "loss": 2.1957, "step": 16298 }, { "epoch": 0.21, "grad_norm": 4.685677528381348, "learning_rate": 1.997255699408568e-05, "loss": 2.087, "step": 16299 }, { "epoch": 0.21, "grad_norm": 4.397066593170166, "learning_rate": 1.997254921405495e-05, "loss": 1.8245, "step": 16300 }, { "epoch": 0.21, "grad_norm": 4.982439041137695, "learning_rate": 1.9972541432923084e-05, "loss": 2.7322, "step": 16301 }, { "epoch": 0.21, "grad_norm": 3.963088274002075, "learning_rate": 1.9972533650690076e-05, "loss": 2.2294, "step": 16302 }, { "epoch": 0.21, "grad_norm": 4.281265735626221, "learning_rate": 1.997252586735593e-05, "loss": 2.2181, "step": 16303 }, { "epoch": 0.21, "grad_norm": 3.9567177295684814, "learning_rate": 1.9972518082920654e-05, "loss": 1.7441, "step": 16304 }, { "epoch": 0.21, "grad_norm": 3.8360049724578857, "learning_rate": 1.997251029738424e-05, "loss": 1.8743, "step": 16305 }, { "epoch": 0.21, "grad_norm": 4.794510364532471, "learning_rate": 1.9972502510746692e-05, "loss": 2.0368, "step": 16306 }, { "epoch": 0.21, "grad_norm": 4.092384338378906, "learning_rate": 1.997249472300801e-05, "loss": 2.2703, "step": 16307 }, { "epoch": 0.21, "grad_norm": 4.530204772949219, "learning_rate": 1.9972486934168196e-05, "loss": 2.3679, "step": 16308 }, { "epoch": 0.21, "grad_norm": 4.197330951690674, "learning_rate": 1.997247914422725e-05, "loss": 2.3422, "step": 16309 }, { "epoch": 0.21, "grad_norm": 3.5994608402252197, "learning_rate": 1.9972471353185173e-05, "loss": 1.7481, "step": 16310 }, { "epoch": 0.21, "grad_norm": 3.9710023403167725, "learning_rate": 1.9972463561041967e-05, "loss": 1.9444, "step": 16311 }, { "epoch": 0.21, "grad_norm": 4.25507116317749, "learning_rate": 1.9972455767797632e-05, "loss": 2.209, "step": 16312 }, { "epoch": 0.21, "grad_norm": 4.631453037261963, "learning_rate": 1.9972447973452165e-05, "loss": 1.9244, "step": 16313 }, { "epoch": 0.21, "grad_norm": 4.457644939422607, "learning_rate": 1.9972440178005575e-05, "loss": 2.2777, "step": 16314 }, { "epoch": 0.21, "grad_norm": 4.209293842315674, "learning_rate": 1.9972432381457855e-05, "loss": 2.1806, "step": 16315 }, { "epoch": 0.21, "grad_norm": 4.08178186416626, "learning_rate": 1.997242458380901e-05, "loss": 2.091, "step": 16316 }, { "epoch": 0.21, "grad_norm": 4.099030494689941, "learning_rate": 1.9972416785059042e-05, "loss": 2.1326, "step": 16317 }, { "epoch": 0.21, "grad_norm": 3.91105055809021, "learning_rate": 1.997240898520795e-05, "loss": 2.0039, "step": 16318 }, { "epoch": 0.21, "grad_norm": 5.234610080718994, "learning_rate": 1.9972401184255732e-05, "loss": 2.7249, "step": 16319 }, { "epoch": 0.21, "grad_norm": 4.397969722747803, "learning_rate": 1.9972393382202394e-05, "loss": 2.4298, "step": 16320 }, { "epoch": 0.21, "grad_norm": 4.206408500671387, "learning_rate": 1.9972385579047936e-05, "loss": 2.0508, "step": 16321 }, { "epoch": 0.21, "grad_norm": 4.154628276824951, "learning_rate": 1.9972377774792356e-05, "loss": 2.2548, "step": 16322 }, { "epoch": 0.21, "grad_norm": 3.9858946800231934, "learning_rate": 1.9972369969435653e-05, "loss": 1.6875, "step": 16323 }, { "epoch": 0.21, "grad_norm": 4.614896297454834, "learning_rate": 1.9972362162977834e-05, "loss": 2.5186, "step": 16324 }, { "epoch": 0.21, "grad_norm": 4.08406400680542, "learning_rate": 1.99723543554189e-05, "loss": 2.2625, "step": 16325 }, { "epoch": 0.21, "grad_norm": 4.073922634124756, "learning_rate": 1.9972346546758844e-05, "loss": 2.4978, "step": 16326 }, { "epoch": 0.21, "grad_norm": 4.518715858459473, "learning_rate": 1.9972338736997674e-05, "loss": 2.2551, "step": 16327 }, { "epoch": 0.21, "grad_norm": 4.62926721572876, "learning_rate": 1.997233092613539e-05, "loss": 2.5682, "step": 16328 }, { "epoch": 0.21, "grad_norm": 4.305211067199707, "learning_rate": 1.997232311417199e-05, "loss": 2.0295, "step": 16329 }, { "epoch": 0.21, "grad_norm": 4.9267377853393555, "learning_rate": 1.9972315301107476e-05, "loss": 2.8157, "step": 16330 }, { "epoch": 0.21, "grad_norm": 3.945528268814087, "learning_rate": 1.9972307486941848e-05, "loss": 1.5845, "step": 16331 }, { "epoch": 0.21, "grad_norm": 4.272035598754883, "learning_rate": 1.997229967167511e-05, "loss": 2.048, "step": 16332 }, { "epoch": 0.21, "grad_norm": 4.078681945800781, "learning_rate": 1.997229185530726e-05, "loss": 2.0777, "step": 16333 }, { "epoch": 0.21, "grad_norm": 4.478896141052246, "learning_rate": 1.99722840378383e-05, "loss": 2.1735, "step": 16334 }, { "epoch": 0.21, "grad_norm": 4.853461742401123, "learning_rate": 1.997227621926823e-05, "loss": 2.3048, "step": 16335 }, { "epoch": 0.21, "grad_norm": 4.557734489440918, "learning_rate": 1.9972268399597054e-05, "loss": 2.4322, "step": 16336 }, { "epoch": 0.21, "grad_norm": 4.386146068572998, "learning_rate": 1.9972260578824768e-05, "loss": 2.1233, "step": 16337 }, { "epoch": 0.21, "grad_norm": 5.136478900909424, "learning_rate": 1.9972252756951377e-05, "loss": 2.5342, "step": 16338 }, { "epoch": 0.21, "grad_norm": 3.5522055625915527, "learning_rate": 1.997224493397688e-05, "loss": 1.8704, "step": 16339 }, { "epoch": 0.21, "grad_norm": 4.138025760650635, "learning_rate": 1.9972237109901278e-05, "loss": 2.1766, "step": 16340 }, { "epoch": 0.21, "grad_norm": 4.640761852264404, "learning_rate": 1.997222928472457e-05, "loss": 2.523, "step": 16341 }, { "epoch": 0.21, "grad_norm": 4.674239635467529, "learning_rate": 1.997222145844676e-05, "loss": 2.3466, "step": 16342 }, { "epoch": 0.21, "grad_norm": 4.394548416137695, "learning_rate": 1.9972213631067844e-05, "loss": 2.4562, "step": 16343 }, { "epoch": 0.21, "grad_norm": 4.478116512298584, "learning_rate": 1.9972205802587833e-05, "loss": 2.5206, "step": 16344 }, { "epoch": 0.21, "grad_norm": 3.8134446144104004, "learning_rate": 1.9972197973006716e-05, "loss": 2.1607, "step": 16345 }, { "epoch": 0.21, "grad_norm": 3.80196475982666, "learning_rate": 1.9972190142324504e-05, "loss": 1.7969, "step": 16346 }, { "epoch": 0.21, "grad_norm": 4.086705207824707, "learning_rate": 1.9972182310541187e-05, "loss": 2.1791, "step": 16347 }, { "epoch": 0.21, "grad_norm": 4.17879056930542, "learning_rate": 1.9972174477656777e-05, "loss": 2.109, "step": 16348 }, { "epoch": 0.21, "grad_norm": 5.193641185760498, "learning_rate": 1.9972166643671265e-05, "loss": 2.5728, "step": 16349 }, { "epoch": 0.21, "grad_norm": 3.571110963821411, "learning_rate": 1.997215880858466e-05, "loss": 1.7425, "step": 16350 }, { "epoch": 0.21, "grad_norm": 4.00970458984375, "learning_rate": 1.9972150972396962e-05, "loss": 1.8568, "step": 16351 }, { "epoch": 0.21, "grad_norm": 4.356159210205078, "learning_rate": 1.9972143135108168e-05, "loss": 1.9659, "step": 16352 }, { "epoch": 0.21, "grad_norm": 4.8167595863342285, "learning_rate": 1.9972135296718275e-05, "loss": 2.3443, "step": 16353 }, { "epoch": 0.21, "grad_norm": 4.985016345977783, "learning_rate": 1.9972127457227293e-05, "loss": 2.5928, "step": 16354 }, { "epoch": 0.21, "grad_norm": 4.564814567565918, "learning_rate": 1.997211961663522e-05, "loss": 2.8094, "step": 16355 }, { "epoch": 0.21, "grad_norm": 4.032374858856201, "learning_rate": 1.9972111774942053e-05, "loss": 2.0529, "step": 16356 }, { "epoch": 0.21, "grad_norm": 3.845670461654663, "learning_rate": 1.9972103932147798e-05, "loss": 1.735, "step": 16357 }, { "epoch": 0.21, "grad_norm": 4.796000957489014, "learning_rate": 1.997209608825245e-05, "loss": 2.3688, "step": 16358 }, { "epoch": 0.21, "grad_norm": 4.600316047668457, "learning_rate": 1.997208824325602e-05, "loss": 1.9758, "step": 16359 }, { "epoch": 0.21, "grad_norm": 4.374851703643799, "learning_rate": 1.9972080397158496e-05, "loss": 2.117, "step": 16360 }, { "epoch": 0.21, "grad_norm": 3.3864617347717285, "learning_rate": 1.997207254995989e-05, "loss": 1.4099, "step": 16361 }, { "epoch": 0.21, "grad_norm": 3.8590023517608643, "learning_rate": 1.9972064701660196e-05, "loss": 1.8917, "step": 16362 }, { "epoch": 0.21, "grad_norm": 3.578829050064087, "learning_rate": 1.9972056852259413e-05, "loss": 1.7178, "step": 16363 }, { "epoch": 0.21, "grad_norm": 4.32914400100708, "learning_rate": 1.997204900175755e-05, "loss": 2.7655, "step": 16364 }, { "epoch": 0.21, "grad_norm": 4.182252883911133, "learning_rate": 1.99720411501546e-05, "loss": 2.2834, "step": 16365 }, { "epoch": 0.21, "grad_norm": 4.405464172363281, "learning_rate": 1.9972033297450573e-05, "loss": 1.8274, "step": 16366 }, { "epoch": 0.21, "grad_norm": 3.925297737121582, "learning_rate": 1.9972025443645462e-05, "loss": 2.1404, "step": 16367 }, { "epoch": 0.21, "grad_norm": 4.216248035430908, "learning_rate": 1.997201758873927e-05, "loss": 2.2857, "step": 16368 }, { "epoch": 0.21, "grad_norm": 4.070960998535156, "learning_rate": 1.9972009732731996e-05, "loss": 2.2788, "step": 16369 }, { "epoch": 0.21, "grad_norm": 4.425295352935791, "learning_rate": 1.9972001875623644e-05, "loss": 2.2757, "step": 16370 }, { "epoch": 0.21, "grad_norm": 4.645224571228027, "learning_rate": 1.9971994017414216e-05, "loss": 2.0462, "step": 16371 }, { "epoch": 0.21, "grad_norm": 3.7804346084594727, "learning_rate": 1.997198615810371e-05, "loss": 2.0782, "step": 16372 }, { "epoch": 0.21, "grad_norm": 3.9601657390594482, "learning_rate": 1.9971978297692126e-05, "loss": 1.7408, "step": 16373 }, { "epoch": 0.21, "grad_norm": 4.633029460906982, "learning_rate": 1.9971970436179466e-05, "loss": 2.6618, "step": 16374 }, { "epoch": 0.21, "grad_norm": 4.502002716064453, "learning_rate": 1.997196257356573e-05, "loss": 2.5009, "step": 16375 }, { "epoch": 0.21, "grad_norm": 4.683364391326904, "learning_rate": 1.9971954709850925e-05, "loss": 2.775, "step": 16376 }, { "epoch": 0.21, "grad_norm": 4.044293403625488, "learning_rate": 1.9971946845035047e-05, "loss": 2.1937, "step": 16377 }, { "epoch": 0.21, "grad_norm": 3.9496071338653564, "learning_rate": 1.9971938979118094e-05, "loss": 1.6732, "step": 16378 }, { "epoch": 0.21, "grad_norm": 3.8983442783355713, "learning_rate": 1.997193111210007e-05, "loss": 2.1195, "step": 16379 }, { "epoch": 0.21, "grad_norm": 4.425592422485352, "learning_rate": 1.9971923243980977e-05, "loss": 2.139, "step": 16380 }, { "epoch": 0.21, "grad_norm": 4.593349456787109, "learning_rate": 1.9971915374760812e-05, "loss": 2.4609, "step": 16381 }, { "epoch": 0.21, "grad_norm": 4.602244853973389, "learning_rate": 1.997190750443958e-05, "loss": 1.9033, "step": 16382 }, { "epoch": 0.21, "grad_norm": 3.996356725692749, "learning_rate": 1.997189963301728e-05, "loss": 1.8796, "step": 16383 }, { "epoch": 0.21, "grad_norm": 3.8466739654541016, "learning_rate": 1.9971891760493915e-05, "loss": 2.5392, "step": 16384 }, { "epoch": 0.21, "grad_norm": 3.704622268676758, "learning_rate": 1.997188388686948e-05, "loss": 1.8255, "step": 16385 }, { "epoch": 0.21, "grad_norm": 4.064257621765137, "learning_rate": 1.997187601214398e-05, "loss": 2.2103, "step": 16386 }, { "epoch": 0.21, "grad_norm": 4.749755382537842, "learning_rate": 1.9971868136317417e-05, "loss": 2.3939, "step": 16387 }, { "epoch": 0.21, "grad_norm": 4.405355930328369, "learning_rate": 1.997186025938979e-05, "loss": 2.06, "step": 16388 }, { "epoch": 0.21, "grad_norm": 4.08318567276001, "learning_rate": 1.9971852381361104e-05, "loss": 1.7597, "step": 16389 }, { "epoch": 0.21, "grad_norm": 4.777575492858887, "learning_rate": 1.997184450223136e-05, "loss": 2.6688, "step": 16390 }, { "epoch": 0.21, "grad_norm": 3.7783889770507812, "learning_rate": 1.9971836622000548e-05, "loss": 2.1109, "step": 16391 }, { "epoch": 0.21, "grad_norm": 4.000679969787598, "learning_rate": 1.9971828740668676e-05, "loss": 2.2552, "step": 16392 }, { "epoch": 0.21, "grad_norm": 4.502628803253174, "learning_rate": 1.9971820858235743e-05, "loss": 2.4097, "step": 16393 }, { "epoch": 0.21, "grad_norm": 4.77426815032959, "learning_rate": 1.9971812974701755e-05, "loss": 2.0958, "step": 16394 }, { "epoch": 0.21, "grad_norm": 4.445237159729004, "learning_rate": 1.9971805090066712e-05, "loss": 2.0753, "step": 16395 }, { "epoch": 0.21, "grad_norm": 4.18986701965332, "learning_rate": 1.997179720433061e-05, "loss": 1.9842, "step": 16396 }, { "epoch": 0.21, "grad_norm": 4.64576530456543, "learning_rate": 1.9971789317493452e-05, "loss": 2.2662, "step": 16397 }, { "epoch": 0.21, "grad_norm": 4.49131441116333, "learning_rate": 1.9971781429555238e-05, "loss": 2.3713, "step": 16398 }, { "epoch": 0.21, "grad_norm": 4.90303373336792, "learning_rate": 1.9971773540515973e-05, "loss": 2.0329, "step": 16399 }, { "epoch": 0.21, "grad_norm": 4.217397689819336, "learning_rate": 1.9971765650375653e-05, "loss": 2.141, "step": 16400 }, { "epoch": 0.21, "grad_norm": 4.883430004119873, "learning_rate": 1.9971757759134285e-05, "loss": 2.4557, "step": 16401 }, { "epoch": 0.21, "grad_norm": 4.103353023529053, "learning_rate": 1.9971749866791862e-05, "loss": 2.0005, "step": 16402 }, { "epoch": 0.21, "grad_norm": 4.054234027862549, "learning_rate": 1.997174197334839e-05, "loss": 2.0629, "step": 16403 }, { "epoch": 0.21, "grad_norm": 4.198331356048584, "learning_rate": 1.9971734078803866e-05, "loss": 1.6795, "step": 16404 }, { "epoch": 0.21, "grad_norm": 3.7983896732330322, "learning_rate": 1.9971726183158296e-05, "loss": 1.7989, "step": 16405 }, { "epoch": 0.21, "grad_norm": 4.439173221588135, "learning_rate": 1.997171828641168e-05, "loss": 2.4597, "step": 16406 }, { "epoch": 0.21, "grad_norm": 3.941720724105835, "learning_rate": 1.9971710388564013e-05, "loss": 2.6009, "step": 16407 }, { "epoch": 0.21, "grad_norm": 4.230388164520264, "learning_rate": 1.9971702489615302e-05, "loss": 2.0143, "step": 16408 }, { "epoch": 0.21, "grad_norm": 3.96121883392334, "learning_rate": 1.9971694589565547e-05, "loss": 1.8504, "step": 16409 }, { "epoch": 0.21, "grad_norm": 3.607701301574707, "learning_rate": 1.9971686688414748e-05, "loss": 1.6148, "step": 16410 }, { "epoch": 0.21, "grad_norm": 4.444249629974365, "learning_rate": 1.9971678786162904e-05, "loss": 2.4409, "step": 16411 }, { "epoch": 0.21, "grad_norm": 3.601541757583618, "learning_rate": 1.9971670882810015e-05, "loss": 1.7418, "step": 16412 }, { "epoch": 0.21, "grad_norm": 5.112332820892334, "learning_rate": 1.997166297835609e-05, "loss": 2.4375, "step": 16413 }, { "epoch": 0.21, "grad_norm": 4.076874732971191, "learning_rate": 1.9971655072801117e-05, "loss": 1.9517, "step": 16414 }, { "epoch": 0.21, "grad_norm": 4.423877716064453, "learning_rate": 1.997164716614511e-05, "loss": 2.2735, "step": 16415 }, { "epoch": 0.21, "grad_norm": 3.8362133502960205, "learning_rate": 1.997163925838806e-05, "loss": 1.8707, "step": 16416 }, { "epoch": 0.21, "grad_norm": 3.7046351432800293, "learning_rate": 1.9971631349529974e-05, "loss": 1.7455, "step": 16417 }, { "epoch": 0.21, "grad_norm": 3.907788038253784, "learning_rate": 1.997162343957085e-05, "loss": 1.6896, "step": 16418 }, { "epoch": 0.21, "grad_norm": 4.10551643371582, "learning_rate": 1.997161552851069e-05, "loss": 1.638, "step": 16419 }, { "epoch": 0.21, "grad_norm": 3.916835308074951, "learning_rate": 1.9971607616349497e-05, "loss": 1.8899, "step": 16420 }, { "epoch": 0.21, "grad_norm": 4.409732818603516, "learning_rate": 1.9971599703087268e-05, "loss": 2.4253, "step": 16421 }, { "epoch": 0.21, "grad_norm": 4.188427925109863, "learning_rate": 1.9971591788724005e-05, "loss": 2.4297, "step": 16422 }, { "epoch": 0.21, "grad_norm": 4.285727024078369, "learning_rate": 1.9971583873259707e-05, "loss": 2.4026, "step": 16423 }, { "epoch": 0.21, "grad_norm": 4.288252353668213, "learning_rate": 1.9971575956694378e-05, "loss": 2.3073, "step": 16424 }, { "epoch": 0.21, "grad_norm": 4.092189788818359, "learning_rate": 1.9971568039028018e-05, "loss": 2.3913, "step": 16425 }, { "epoch": 0.21, "grad_norm": 4.5025458335876465, "learning_rate": 1.9971560120260627e-05, "loss": 2.2685, "step": 16426 }, { "epoch": 0.21, "grad_norm": 4.872178077697754, "learning_rate": 1.997155220039221e-05, "loss": 3.0301, "step": 16427 }, { "epoch": 0.21, "grad_norm": 4.351108074188232, "learning_rate": 1.997154427942276e-05, "loss": 2.2442, "step": 16428 }, { "epoch": 0.21, "grad_norm": 4.362000465393066, "learning_rate": 1.9971536357352286e-05, "loss": 2.1812, "step": 16429 }, { "epoch": 0.21, "grad_norm": 4.440372467041016, "learning_rate": 1.997152843418078e-05, "loss": 2.4545, "step": 16430 }, { "epoch": 0.21, "grad_norm": 3.9902961254119873, "learning_rate": 1.9971520509908252e-05, "loss": 2.1815, "step": 16431 }, { "epoch": 0.21, "grad_norm": 3.761075973510742, "learning_rate": 1.99715125845347e-05, "loss": 1.967, "step": 16432 }, { "epoch": 0.21, "grad_norm": 4.4955668449401855, "learning_rate": 1.9971504658060125e-05, "loss": 2.1801, "step": 16433 }, { "epoch": 0.21, "grad_norm": 3.843292713165283, "learning_rate": 1.9971496730484523e-05, "loss": 1.9791, "step": 16434 }, { "epoch": 0.21, "grad_norm": 4.78224515914917, "learning_rate": 1.99714888018079e-05, "loss": 3.0631, "step": 16435 }, { "epoch": 0.21, "grad_norm": 4.442633628845215, "learning_rate": 1.9971480872030257e-05, "loss": 2.4192, "step": 16436 }, { "epoch": 0.21, "grad_norm": 3.782372236251831, "learning_rate": 1.997147294115159e-05, "loss": 2.1063, "step": 16437 }, { "epoch": 0.21, "grad_norm": 4.598505973815918, "learning_rate": 1.9971465009171904e-05, "loss": 2.8207, "step": 16438 }, { "epoch": 0.21, "grad_norm": 4.505384922027588, "learning_rate": 1.99714570760912e-05, "loss": 2.3621, "step": 16439 }, { "epoch": 0.21, "grad_norm": 3.8545587062835693, "learning_rate": 1.9971449141909478e-05, "loss": 1.9559, "step": 16440 }, { "epoch": 0.21, "grad_norm": 4.268080234527588, "learning_rate": 1.9971441206626737e-05, "loss": 2.1574, "step": 16441 }, { "epoch": 0.21, "grad_norm": 4.155834197998047, "learning_rate": 1.9971433270242982e-05, "loss": 1.8524, "step": 16442 }, { "epoch": 0.21, "grad_norm": 3.6161372661590576, "learning_rate": 1.9971425332758213e-05, "loss": 1.691, "step": 16443 }, { "epoch": 0.21, "grad_norm": 4.849700927734375, "learning_rate": 1.9971417394172426e-05, "loss": 2.5967, "step": 16444 }, { "epoch": 0.21, "grad_norm": 4.30581521987915, "learning_rate": 1.9971409454485626e-05, "loss": 1.9897, "step": 16445 }, { "epoch": 0.21, "grad_norm": 4.8969902992248535, "learning_rate": 1.9971401513697818e-05, "loss": 2.4901, "step": 16446 }, { "epoch": 0.21, "grad_norm": 5.429630756378174, "learning_rate": 1.9971393571808992e-05, "loss": 2.6903, "step": 16447 }, { "epoch": 0.21, "grad_norm": 5.0927605628967285, "learning_rate": 1.9971385628819156e-05, "loss": 2.4768, "step": 16448 }, { "epoch": 0.21, "grad_norm": 4.443706512451172, "learning_rate": 1.9971377684728313e-05, "loss": 2.4285, "step": 16449 }, { "epoch": 0.21, "grad_norm": 4.505790710449219, "learning_rate": 1.997136973953646e-05, "loss": 2.6718, "step": 16450 }, { "epoch": 0.21, "grad_norm": 4.154331684112549, "learning_rate": 1.9971361793243597e-05, "loss": 2.0378, "step": 16451 }, { "epoch": 0.21, "grad_norm": 4.218125820159912, "learning_rate": 1.9971353845849725e-05, "loss": 2.2845, "step": 16452 }, { "epoch": 0.21, "grad_norm": 4.000930309295654, "learning_rate": 1.997134589735485e-05, "loss": 1.9045, "step": 16453 }, { "epoch": 0.21, "grad_norm": 4.018893241882324, "learning_rate": 1.9971337947758965e-05, "loss": 2.1513, "step": 16454 }, { "epoch": 0.21, "grad_norm": 4.549992561340332, "learning_rate": 1.9971329997062078e-05, "loss": 2.4746, "step": 16455 }, { "epoch": 0.21, "grad_norm": 4.483987331390381, "learning_rate": 1.9971322045264187e-05, "loss": 2.4906, "step": 16456 }, { "epoch": 0.21, "grad_norm": 3.6429030895233154, "learning_rate": 1.9971314092365292e-05, "loss": 1.7069, "step": 16457 }, { "epoch": 0.21, "grad_norm": 4.5715436935424805, "learning_rate": 1.9971306138365396e-05, "loss": 2.3952, "step": 16458 }, { "epoch": 0.21, "grad_norm": 4.221672058105469, "learning_rate": 1.9971298183264497e-05, "loss": 2.2803, "step": 16459 }, { "epoch": 0.21, "grad_norm": 4.361061096191406, "learning_rate": 1.99712902270626e-05, "loss": 2.3861, "step": 16460 }, { "epoch": 0.21, "grad_norm": 4.304427623748779, "learning_rate": 1.9971282269759703e-05, "loss": 1.7974, "step": 16461 }, { "epoch": 0.21, "grad_norm": 5.035087585449219, "learning_rate": 1.9971274311355805e-05, "loss": 2.3277, "step": 16462 }, { "epoch": 0.21, "grad_norm": 4.8927226066589355, "learning_rate": 1.997126635185091e-05, "loss": 2.4897, "step": 16463 }, { "epoch": 0.21, "grad_norm": 4.326672077178955, "learning_rate": 1.9971258391245018e-05, "loss": 1.9725, "step": 16464 }, { "epoch": 0.21, "grad_norm": 3.885066032409668, "learning_rate": 1.9971250429538132e-05, "loss": 1.8109, "step": 16465 }, { "epoch": 0.21, "grad_norm": 4.213484287261963, "learning_rate": 1.9971242466730246e-05, "loss": 2.1735, "step": 16466 }, { "epoch": 0.21, "grad_norm": 4.174249649047852, "learning_rate": 1.997123450282137e-05, "loss": 2.1318, "step": 16467 }, { "epoch": 0.21, "grad_norm": 3.7827935218811035, "learning_rate": 1.99712265378115e-05, "loss": 1.8804, "step": 16468 }, { "epoch": 0.21, "grad_norm": 5.23413610458374, "learning_rate": 1.9971218571700634e-05, "loss": 2.5525, "step": 16469 }, { "epoch": 0.21, "grad_norm": 4.125908374786377, "learning_rate": 1.997121060448878e-05, "loss": 2.0177, "step": 16470 }, { "epoch": 0.21, "grad_norm": 4.21854829788208, "learning_rate": 1.9971202636175935e-05, "loss": 2.0235, "step": 16471 }, { "epoch": 0.21, "grad_norm": 3.3118765354156494, "learning_rate": 1.99711946667621e-05, "loss": 1.5012, "step": 16472 }, { "epoch": 0.21, "grad_norm": 4.828587532043457, "learning_rate": 1.9971186696247274e-05, "loss": 2.4438, "step": 16473 }, { "epoch": 0.21, "grad_norm": 4.899196147918701, "learning_rate": 1.997117872463146e-05, "loss": 3.2899, "step": 16474 }, { "epoch": 0.21, "grad_norm": 4.5885162353515625, "learning_rate": 1.997117075191466e-05, "loss": 1.9614, "step": 16475 }, { "epoch": 0.21, "grad_norm": 3.6378273963928223, "learning_rate": 1.9971162778096875e-05, "loss": 1.9469, "step": 16476 }, { "epoch": 0.21, "grad_norm": 4.751986503601074, "learning_rate": 1.99711548031781e-05, "loss": 2.1855, "step": 16477 }, { "epoch": 0.21, "grad_norm": 4.6457343101501465, "learning_rate": 1.9971146827158344e-05, "loss": 2.301, "step": 16478 }, { "epoch": 0.21, "grad_norm": 4.1820549964904785, "learning_rate": 1.9971138850037603e-05, "loss": 2.7889, "step": 16479 }, { "epoch": 0.21, "grad_norm": 4.527878761291504, "learning_rate": 1.997113087181588e-05, "loss": 2.3303, "step": 16480 }, { "epoch": 0.21, "grad_norm": 4.81076192855835, "learning_rate": 1.9971122892493174e-05, "loss": 2.3307, "step": 16481 }, { "epoch": 0.21, "grad_norm": 4.127875804901123, "learning_rate": 1.997111491206949e-05, "loss": 2.1862, "step": 16482 }, { "epoch": 0.21, "grad_norm": 4.768167018890381, "learning_rate": 1.997110693054482e-05, "loss": 2.5079, "step": 16483 }, { "epoch": 0.21, "grad_norm": 4.804640293121338, "learning_rate": 1.9971098947919176e-05, "loss": 2.3676, "step": 16484 }, { "epoch": 0.21, "grad_norm": 3.964804172515869, "learning_rate": 1.997109096419255e-05, "loss": 2.4605, "step": 16485 }, { "epoch": 0.21, "grad_norm": 5.017655849456787, "learning_rate": 1.9971082979364945e-05, "loss": 2.0178, "step": 16486 }, { "epoch": 0.21, "grad_norm": 4.485256671905518, "learning_rate": 1.9971074993436366e-05, "loss": 1.9362, "step": 16487 }, { "epoch": 0.21, "grad_norm": 4.4595866203308105, "learning_rate": 1.9971067006406814e-05, "loss": 2.1726, "step": 16488 }, { "epoch": 0.21, "grad_norm": 4.399577617645264, "learning_rate": 1.997105901827628e-05, "loss": 2.2538, "step": 16489 }, { "epoch": 0.21, "grad_norm": 3.7831318378448486, "learning_rate": 1.997105102904478e-05, "loss": 1.83, "step": 16490 }, { "epoch": 0.21, "grad_norm": 4.797606945037842, "learning_rate": 1.9971043038712303e-05, "loss": 2.7516, "step": 16491 }, { "epoch": 0.21, "grad_norm": 4.749159336090088, "learning_rate": 1.9971035047278853e-05, "loss": 2.2791, "step": 16492 }, { "epoch": 0.21, "grad_norm": 4.157660961151123, "learning_rate": 1.997102705474443e-05, "loss": 2.1902, "step": 16493 }, { "epoch": 0.21, "grad_norm": 4.294620990753174, "learning_rate": 1.997101906110904e-05, "loss": 2.2825, "step": 16494 }, { "epoch": 0.21, "grad_norm": 3.781174659729004, "learning_rate": 1.9971011066372678e-05, "loss": 1.9319, "step": 16495 }, { "epoch": 0.21, "grad_norm": 4.499472618103027, "learning_rate": 1.9971003070535348e-05, "loss": 2.0306, "step": 16496 }, { "epoch": 0.21, "grad_norm": 4.738282680511475, "learning_rate": 1.997099507359705e-05, "loss": 2.3344, "step": 16497 }, { "epoch": 0.21, "grad_norm": 4.845463275909424, "learning_rate": 1.9970987075557785e-05, "loss": 2.3847, "step": 16498 }, { "epoch": 0.21, "grad_norm": 3.927253484725952, "learning_rate": 1.9970979076417552e-05, "loss": 1.9112, "step": 16499 }, { "epoch": 0.21, "grad_norm": 5.294838905334473, "learning_rate": 1.997097107617636e-05, "loss": 2.7669, "step": 16500 }, { "epoch": 0.21, "grad_norm": 4.39170503616333, "learning_rate": 1.9970963074834197e-05, "loss": 2.2575, "step": 16501 }, { "epoch": 0.21, "grad_norm": 4.311341762542725, "learning_rate": 1.9970955072391074e-05, "loss": 2.3757, "step": 16502 }, { "epoch": 0.21, "grad_norm": 4.039053916931152, "learning_rate": 1.9970947068846988e-05, "loss": 1.8839, "step": 16503 }, { "epoch": 0.21, "grad_norm": 3.6765053272247314, "learning_rate": 1.997093906420194e-05, "loss": 1.8158, "step": 16504 }, { "epoch": 0.21, "grad_norm": 3.785242795944214, "learning_rate": 1.997093105845593e-05, "loss": 1.5912, "step": 16505 }, { "epoch": 0.21, "grad_norm": 4.477118492126465, "learning_rate": 1.997092305160896e-05, "loss": 2.2212, "step": 16506 }, { "epoch": 0.21, "grad_norm": 3.871422529220581, "learning_rate": 1.997091504366103e-05, "loss": 2.15, "step": 16507 }, { "epoch": 0.21, "grad_norm": 3.646418809890747, "learning_rate": 1.9970907034612143e-05, "loss": 2.0502, "step": 16508 }, { "epoch": 0.21, "grad_norm": 3.4482080936431885, "learning_rate": 1.99708990244623e-05, "loss": 1.8536, "step": 16509 }, { "epoch": 0.21, "grad_norm": 4.565735340118408, "learning_rate": 1.9970891013211502e-05, "loss": 2.4598, "step": 16510 }, { "epoch": 0.21, "grad_norm": 4.791543483734131, "learning_rate": 1.9970883000859748e-05, "loss": 2.7147, "step": 16511 }, { "epoch": 0.21, "grad_norm": 4.1945576667785645, "learning_rate": 1.9970874987407036e-05, "loss": 2.0831, "step": 16512 }, { "epoch": 0.21, "grad_norm": 4.056637763977051, "learning_rate": 1.9970866972853372e-05, "loss": 2.163, "step": 16513 }, { "epoch": 0.21, "grad_norm": 4.757598876953125, "learning_rate": 1.9970858957198754e-05, "loss": 2.7277, "step": 16514 }, { "epoch": 0.21, "grad_norm": 3.9056763648986816, "learning_rate": 1.9970850940443184e-05, "loss": 2.3143, "step": 16515 }, { "epoch": 0.21, "grad_norm": 4.647045612335205, "learning_rate": 1.9970842922586663e-05, "loss": 2.1703, "step": 16516 }, { "epoch": 0.21, "grad_norm": 5.695962905883789, "learning_rate": 1.9970834903629193e-05, "loss": 2.8304, "step": 16517 }, { "epoch": 0.21, "grad_norm": 4.1277313232421875, "learning_rate": 1.9970826883570774e-05, "loss": 2.3391, "step": 16518 }, { "epoch": 0.21, "grad_norm": 3.950685977935791, "learning_rate": 1.9970818862411408e-05, "loss": 1.7805, "step": 16519 }, { "epoch": 0.21, "grad_norm": 4.090921401977539, "learning_rate": 1.9970810840151092e-05, "loss": 1.9625, "step": 16520 }, { "epoch": 0.21, "grad_norm": 4.709878444671631, "learning_rate": 1.997080281678983e-05, "loss": 1.8381, "step": 16521 }, { "epoch": 0.21, "grad_norm": 4.693686485290527, "learning_rate": 1.997079479232762e-05, "loss": 2.3049, "step": 16522 }, { "epoch": 0.21, "grad_norm": 3.92586612701416, "learning_rate": 1.9970786766764467e-05, "loss": 2.1254, "step": 16523 }, { "epoch": 0.21, "grad_norm": 4.685851573944092, "learning_rate": 1.997077874010037e-05, "loss": 2.4025, "step": 16524 }, { "epoch": 0.21, "grad_norm": 4.764357566833496, "learning_rate": 1.997077071233533e-05, "loss": 2.5296, "step": 16525 }, { "epoch": 0.21, "grad_norm": 4.413835525512695, "learning_rate": 1.9970762683469348e-05, "loss": 2.1829, "step": 16526 }, { "epoch": 0.21, "grad_norm": 4.180485725402832, "learning_rate": 1.9970754653502425e-05, "loss": 1.9861, "step": 16527 }, { "epoch": 0.21, "grad_norm": 4.606992244720459, "learning_rate": 1.997074662243456e-05, "loss": 2.0582, "step": 16528 }, { "epoch": 0.21, "grad_norm": 4.450106620788574, "learning_rate": 1.9970738590265757e-05, "loss": 2.3097, "step": 16529 }, { "epoch": 0.21, "grad_norm": 4.496275901794434, "learning_rate": 1.9970730556996015e-05, "loss": 2.366, "step": 16530 }, { "epoch": 0.21, "grad_norm": 4.099129676818848, "learning_rate": 1.9970722522625334e-05, "loss": 1.8266, "step": 16531 }, { "epoch": 0.21, "grad_norm": 4.37338924407959, "learning_rate": 1.9970714487153717e-05, "loss": 2.4728, "step": 16532 }, { "epoch": 0.21, "grad_norm": 4.5903706550598145, "learning_rate": 1.9970706450581163e-05, "loss": 2.5232, "step": 16533 }, { "epoch": 0.21, "grad_norm": 4.381993770599365, "learning_rate": 1.9970698412907677e-05, "loss": 2.2817, "step": 16534 }, { "epoch": 0.21, "grad_norm": 3.5753421783447266, "learning_rate": 1.997069037413325e-05, "loss": 1.9682, "step": 16535 }, { "epoch": 0.21, "grad_norm": 4.345672607421875, "learning_rate": 1.9970682334257896e-05, "loss": 2.3171, "step": 16536 }, { "epoch": 0.21, "grad_norm": 3.9665944576263428, "learning_rate": 1.9970674293281608e-05, "loss": 1.9457, "step": 16537 }, { "epoch": 0.21, "grad_norm": 4.30338191986084, "learning_rate": 1.9970666251204387e-05, "loss": 2.1689, "step": 16538 }, { "epoch": 0.21, "grad_norm": 3.956974506378174, "learning_rate": 1.9970658208026237e-05, "loss": 2.0438, "step": 16539 }, { "epoch": 0.21, "grad_norm": 4.520275115966797, "learning_rate": 1.9970650163747157e-05, "loss": 2.2205, "step": 16540 }, { "epoch": 0.21, "grad_norm": 3.9637644290924072, "learning_rate": 1.9970642118367145e-05, "loss": 1.7905, "step": 16541 }, { "epoch": 0.21, "grad_norm": 4.460674285888672, "learning_rate": 1.997063407188621e-05, "loss": 2.0659, "step": 16542 }, { "epoch": 0.21, "grad_norm": 4.615842342376709, "learning_rate": 1.9970626024304343e-05, "loss": 2.1343, "step": 16543 }, { "epoch": 0.21, "grad_norm": 4.218652725219727, "learning_rate": 1.9970617975621552e-05, "loss": 2.3599, "step": 16544 }, { "epoch": 0.21, "grad_norm": 4.235142707824707, "learning_rate": 1.9970609925837836e-05, "loss": 1.822, "step": 16545 }, { "epoch": 0.21, "grad_norm": 3.854320526123047, "learning_rate": 1.9970601874953194e-05, "loss": 1.762, "step": 16546 }, { "epoch": 0.21, "grad_norm": 4.0023040771484375, "learning_rate": 1.997059382296763e-05, "loss": 2.0536, "step": 16547 }, { "epoch": 0.21, "grad_norm": 4.272873878479004, "learning_rate": 1.9970585769881145e-05, "loss": 2.1427, "step": 16548 }, { "epoch": 0.21, "grad_norm": 3.548417329788208, "learning_rate": 1.997057771569373e-05, "loss": 1.8191, "step": 16549 }, { "epoch": 0.21, "grad_norm": 4.254775047302246, "learning_rate": 1.9970569660405402e-05, "loss": 2.0687, "step": 16550 }, { "epoch": 0.21, "grad_norm": 3.7766647338867188, "learning_rate": 1.9970561604016154e-05, "loss": 2.1017, "step": 16551 }, { "epoch": 0.21, "grad_norm": 4.657205104827881, "learning_rate": 1.9970553546525986e-05, "loss": 2.0046, "step": 16552 }, { "epoch": 0.21, "grad_norm": 4.304282188415527, "learning_rate": 1.99705454879349e-05, "loss": 2.1665, "step": 16553 }, { "epoch": 0.21, "grad_norm": 4.181889057159424, "learning_rate": 1.9970537428242893e-05, "loss": 2.0947, "step": 16554 }, { "epoch": 0.21, "grad_norm": 4.755042552947998, "learning_rate": 1.997052936744997e-05, "loss": 2.3763, "step": 16555 }, { "epoch": 0.21, "grad_norm": 3.7899084091186523, "learning_rate": 1.9970521305556133e-05, "loss": 1.826, "step": 16556 }, { "epoch": 0.21, "grad_norm": 4.052846431732178, "learning_rate": 1.9970513242561383e-05, "loss": 2.0509, "step": 16557 }, { "epoch": 0.21, "grad_norm": 5.19553279876709, "learning_rate": 1.9970505178465717e-05, "loss": 2.3956, "step": 16558 }, { "epoch": 0.21, "grad_norm": 3.6727542877197266, "learning_rate": 1.9970497113269142e-05, "loss": 1.6351, "step": 16559 }, { "epoch": 0.21, "grad_norm": 3.867741107940674, "learning_rate": 1.9970489046971648e-05, "loss": 1.9408, "step": 16560 }, { "epoch": 0.21, "grad_norm": 4.233098983764648, "learning_rate": 1.9970480979573244e-05, "loss": 2.5366, "step": 16561 }, { "epoch": 0.21, "grad_norm": 4.722075462341309, "learning_rate": 1.997047291107394e-05, "loss": 2.5197, "step": 16562 }, { "epoch": 0.21, "grad_norm": 4.256981372833252, "learning_rate": 1.9970464841473714e-05, "loss": 1.7388, "step": 16563 }, { "epoch": 0.21, "grad_norm": 3.7506258487701416, "learning_rate": 1.9970456770772583e-05, "loss": 1.9415, "step": 16564 }, { "epoch": 0.21, "grad_norm": 4.4698381423950195, "learning_rate": 1.9970448698970547e-05, "loss": 2.2219, "step": 16565 }, { "epoch": 0.21, "grad_norm": 4.441654205322266, "learning_rate": 1.9970440626067602e-05, "loss": 2.0599, "step": 16566 }, { "epoch": 0.22, "grad_norm": 4.141611576080322, "learning_rate": 1.9970432552063754e-05, "loss": 2.5368, "step": 16567 }, { "epoch": 0.22, "grad_norm": 4.430323123931885, "learning_rate": 1.9970424476959e-05, "loss": 2.5393, "step": 16568 }, { "epoch": 0.22, "grad_norm": 4.213985443115234, "learning_rate": 1.9970416400753342e-05, "loss": 2.1604, "step": 16569 }, { "epoch": 0.22, "grad_norm": 4.354084491729736, "learning_rate": 1.9970408323446778e-05, "loss": 2.1995, "step": 16570 }, { "epoch": 0.22, "grad_norm": 5.05520486831665, "learning_rate": 1.9970400245039318e-05, "loss": 2.3888, "step": 16571 }, { "epoch": 0.22, "grad_norm": 4.813080787658691, "learning_rate": 1.997039216553095e-05, "loss": 2.3939, "step": 16572 }, { "epoch": 0.22, "grad_norm": 4.301477432250977, "learning_rate": 1.9970384084921684e-05, "loss": 2.0498, "step": 16573 }, { "epoch": 0.22, "grad_norm": 4.553921699523926, "learning_rate": 1.9970376003211517e-05, "loss": 1.8754, "step": 16574 }, { "epoch": 0.22, "grad_norm": 3.8115880489349365, "learning_rate": 1.9970367920400455e-05, "loss": 2.0724, "step": 16575 }, { "epoch": 0.22, "grad_norm": 3.7775731086730957, "learning_rate": 1.9970359836488494e-05, "loss": 2.2238, "step": 16576 }, { "epoch": 0.22, "grad_norm": 4.236513614654541, "learning_rate": 1.9970351751475634e-05, "loss": 2.4832, "step": 16577 }, { "epoch": 0.22, "grad_norm": 4.480657577514648, "learning_rate": 1.9970343665361878e-05, "loss": 2.3234, "step": 16578 }, { "epoch": 0.22, "grad_norm": 3.980294704437256, "learning_rate": 1.997033557814723e-05, "loss": 2.4839, "step": 16579 }, { "epoch": 0.22, "grad_norm": 3.850651264190674, "learning_rate": 1.9970327489831687e-05, "loss": 1.9435, "step": 16580 }, { "epoch": 0.22, "grad_norm": 3.7018210887908936, "learning_rate": 1.997031940041525e-05, "loss": 1.9569, "step": 16581 }, { "epoch": 0.22, "grad_norm": 4.082957744598389, "learning_rate": 1.9970311309897918e-05, "loss": 2.2695, "step": 16582 }, { "epoch": 0.22, "grad_norm": 4.304728984832764, "learning_rate": 1.99703032182797e-05, "loss": 2.5958, "step": 16583 }, { "epoch": 0.22, "grad_norm": 4.159384250640869, "learning_rate": 1.9970295125560587e-05, "loss": 2.5205, "step": 16584 }, { "epoch": 0.22, "grad_norm": 4.35482120513916, "learning_rate": 1.9970287031740586e-05, "loss": 2.4966, "step": 16585 }, { "epoch": 0.22, "grad_norm": 4.709456443786621, "learning_rate": 1.9970278936819697e-05, "loss": 2.701, "step": 16586 }, { "epoch": 0.22, "grad_norm": 4.581921577453613, "learning_rate": 1.9970270840797916e-05, "loss": 2.4683, "step": 16587 }, { "epoch": 0.22, "grad_norm": 4.618490219116211, "learning_rate": 1.9970262743675253e-05, "loss": 2.2159, "step": 16588 }, { "epoch": 0.22, "grad_norm": 3.8187925815582275, "learning_rate": 1.9970254645451698e-05, "loss": 1.4838, "step": 16589 }, { "epoch": 0.22, "grad_norm": 4.188662052154541, "learning_rate": 1.9970246546127264e-05, "loss": 2.1142, "step": 16590 }, { "epoch": 0.22, "grad_norm": 4.194814682006836, "learning_rate": 1.9970238445701945e-05, "loss": 2.2571, "step": 16591 }, { "epoch": 0.22, "grad_norm": 4.03156852722168, "learning_rate": 1.9970230344175737e-05, "loss": 2.5389, "step": 16592 }, { "epoch": 0.22, "grad_norm": 3.9358162879943848, "learning_rate": 1.997022224154865e-05, "loss": 2.2944, "step": 16593 }, { "epoch": 0.22, "grad_norm": 4.325407981872559, "learning_rate": 1.997021413782068e-05, "loss": 2.322, "step": 16594 }, { "epoch": 0.22, "grad_norm": 3.685969829559326, "learning_rate": 1.9970206032991832e-05, "loss": 2.0543, "step": 16595 }, { "epoch": 0.22, "grad_norm": 4.335400581359863, "learning_rate": 1.9970197927062103e-05, "loss": 2.1067, "step": 16596 }, { "epoch": 0.22, "grad_norm": 4.0294060707092285, "learning_rate": 1.9970189820031492e-05, "loss": 2.1542, "step": 16597 }, { "epoch": 0.22, "grad_norm": 4.358894348144531, "learning_rate": 1.9970181711900006e-05, "loss": 2.0923, "step": 16598 }, { "epoch": 0.22, "grad_norm": 4.212900161743164, "learning_rate": 1.9970173602667645e-05, "loss": 2.2756, "step": 16599 }, { "epoch": 0.22, "grad_norm": 4.145659446716309, "learning_rate": 1.9970165492334402e-05, "loss": 1.6785, "step": 16600 }, { "epoch": 0.22, "grad_norm": 4.362633228302002, "learning_rate": 1.9970157380900287e-05, "loss": 2.6673, "step": 16601 }, { "epoch": 0.22, "grad_norm": 3.5688958168029785, "learning_rate": 1.99701492683653e-05, "loss": 1.5024, "step": 16602 }, { "epoch": 0.22, "grad_norm": 3.9778923988342285, "learning_rate": 1.9970141154729436e-05, "loss": 1.9351, "step": 16603 }, { "epoch": 0.22, "grad_norm": 3.8571388721466064, "learning_rate": 1.99701330399927e-05, "loss": 2.183, "step": 16604 }, { "epoch": 0.22, "grad_norm": 3.69573974609375, "learning_rate": 1.997012492415509e-05, "loss": 1.9289, "step": 16605 }, { "epoch": 0.22, "grad_norm": 4.10559606552124, "learning_rate": 1.997011680721661e-05, "loss": 2.1146, "step": 16606 }, { "epoch": 0.22, "grad_norm": 4.263547420501709, "learning_rate": 1.997010868917726e-05, "loss": 2.167, "step": 16607 }, { "epoch": 0.22, "grad_norm": 4.419260501861572, "learning_rate": 1.9970100570037043e-05, "loss": 2.8615, "step": 16608 }, { "epoch": 0.22, "grad_norm": 4.18880558013916, "learning_rate": 1.9970092449795957e-05, "loss": 1.9512, "step": 16609 }, { "epoch": 0.22, "grad_norm": 4.068741321563721, "learning_rate": 1.9970084328454003e-05, "loss": 1.9908, "step": 16610 }, { "epoch": 0.22, "grad_norm": 4.171075344085693, "learning_rate": 1.997007620601118e-05, "loss": 2.0499, "step": 16611 }, { "epoch": 0.22, "grad_norm": 4.00620174407959, "learning_rate": 1.9970068082467497e-05, "loss": 2.3671, "step": 16612 }, { "epoch": 0.22, "grad_norm": 3.7183456420898438, "learning_rate": 1.9970059957822947e-05, "loss": 1.7581, "step": 16613 }, { "epoch": 0.22, "grad_norm": 4.290940761566162, "learning_rate": 1.9970051832077533e-05, "loss": 2.1198, "step": 16614 }, { "epoch": 0.22, "grad_norm": 4.468106269836426, "learning_rate": 1.997004370523125e-05, "loss": 2.1618, "step": 16615 }, { "epoch": 0.22, "grad_norm": 3.7739500999450684, "learning_rate": 1.9970035577284113e-05, "loss": 2.1014, "step": 16616 }, { "epoch": 0.22, "grad_norm": 4.11401891708374, "learning_rate": 1.9970027448236115e-05, "loss": 2.1646, "step": 16617 }, { "epoch": 0.22, "grad_norm": 4.47886323928833, "learning_rate": 1.9970019318087254e-05, "loss": 1.7773, "step": 16618 }, { "epoch": 0.22, "grad_norm": 3.958425283432007, "learning_rate": 1.9970011186837535e-05, "loss": 1.9855, "step": 16619 }, { "epoch": 0.22, "grad_norm": 4.624138832092285, "learning_rate": 1.9970003054486958e-05, "loss": 2.2691, "step": 16620 }, { "epoch": 0.22, "grad_norm": 4.221149921417236, "learning_rate": 1.996999492103552e-05, "loss": 2.2784, "step": 16621 }, { "epoch": 0.22, "grad_norm": 4.889537334442139, "learning_rate": 1.996998678648323e-05, "loss": 2.8166, "step": 16622 }, { "epoch": 0.22, "grad_norm": 3.714901924133301, "learning_rate": 1.996997865083008e-05, "loss": 1.9394, "step": 16623 }, { "epoch": 0.22, "grad_norm": 3.978125810623169, "learning_rate": 1.9969970514076078e-05, "loss": 1.9324, "step": 16624 }, { "epoch": 0.22, "grad_norm": 4.044342517852783, "learning_rate": 1.996996237622122e-05, "loss": 2.0699, "step": 16625 }, { "epoch": 0.22, "grad_norm": 4.942835807800293, "learning_rate": 1.9969954237265514e-05, "loss": 2.1208, "step": 16626 }, { "epoch": 0.22, "grad_norm": 4.726114749908447, "learning_rate": 1.996994609720895e-05, "loss": 2.1934, "step": 16627 }, { "epoch": 0.22, "grad_norm": 4.554875373840332, "learning_rate": 1.9969937956051537e-05, "loss": 2.5891, "step": 16628 }, { "epoch": 0.22, "grad_norm": 4.496059417724609, "learning_rate": 1.9969929813793273e-05, "loss": 2.2838, "step": 16629 }, { "epoch": 0.22, "grad_norm": 4.174305438995361, "learning_rate": 1.996992167043416e-05, "loss": 2.2121, "step": 16630 }, { "epoch": 0.22, "grad_norm": 4.592151165008545, "learning_rate": 1.99699135259742e-05, "loss": 2.0185, "step": 16631 }, { "epoch": 0.22, "grad_norm": 3.827974557876587, "learning_rate": 1.996990538041339e-05, "loss": 1.7861, "step": 16632 }, { "epoch": 0.22, "grad_norm": 4.299079418182373, "learning_rate": 1.9969897233751737e-05, "loss": 2.3948, "step": 16633 }, { "epoch": 0.22, "grad_norm": 4.1340556144714355, "learning_rate": 1.9969889085989236e-05, "loss": 2.0774, "step": 16634 }, { "epoch": 0.22, "grad_norm": 4.339442253112793, "learning_rate": 1.996988093712589e-05, "loss": 2.0945, "step": 16635 }, { "epoch": 0.22, "grad_norm": 4.840633869171143, "learning_rate": 1.99698727871617e-05, "loss": 2.3869, "step": 16636 }, { "epoch": 0.22, "grad_norm": 4.1192145347595215, "learning_rate": 1.9969864636096666e-05, "loss": 1.9465, "step": 16637 }, { "epoch": 0.22, "grad_norm": 4.356480598449707, "learning_rate": 1.996985648393079e-05, "loss": 2.1147, "step": 16638 }, { "epoch": 0.22, "grad_norm": 4.913179397583008, "learning_rate": 1.9969848330664076e-05, "loss": 2.8389, "step": 16639 }, { "epoch": 0.22, "grad_norm": 4.865675926208496, "learning_rate": 1.9969840176296517e-05, "loss": 2.3309, "step": 16640 }, { "epoch": 0.22, "grad_norm": 4.374258518218994, "learning_rate": 1.996983202082812e-05, "loss": 1.9485, "step": 16641 }, { "epoch": 0.22, "grad_norm": 4.279393672943115, "learning_rate": 1.9969823864258886e-05, "loss": 1.8935, "step": 16642 }, { "epoch": 0.22, "grad_norm": 3.654104471206665, "learning_rate": 1.9969815706588814e-05, "loss": 1.7851, "step": 16643 }, { "epoch": 0.22, "grad_norm": 4.077334880828857, "learning_rate": 1.9969807547817904e-05, "loss": 1.8452, "step": 16644 }, { "epoch": 0.22, "grad_norm": 3.848681926727295, "learning_rate": 1.996979938794616e-05, "loss": 2.2636, "step": 16645 }, { "epoch": 0.22, "grad_norm": 3.2485711574554443, "learning_rate": 1.996979122697358e-05, "loss": 1.6705, "step": 16646 }, { "epoch": 0.22, "grad_norm": 4.198360919952393, "learning_rate": 1.9969783064900162e-05, "loss": 2.2336, "step": 16647 }, { "epoch": 0.22, "grad_norm": 4.216000080108643, "learning_rate": 1.9969774901725914e-05, "loss": 2.2492, "step": 16648 }, { "epoch": 0.22, "grad_norm": 5.003149032592773, "learning_rate": 1.9969766737450835e-05, "loss": 2.5179, "step": 16649 }, { "epoch": 0.22, "grad_norm": 3.6797544956207275, "learning_rate": 1.9969758572074924e-05, "loss": 1.907, "step": 16650 }, { "epoch": 0.22, "grad_norm": 4.287243843078613, "learning_rate": 1.9969750405598182e-05, "loss": 2.2493, "step": 16651 }, { "epoch": 0.22, "grad_norm": 3.926860809326172, "learning_rate": 1.996974223802061e-05, "loss": 1.8814, "step": 16652 }, { "epoch": 0.22, "grad_norm": 4.105186462402344, "learning_rate": 1.996973406934221e-05, "loss": 2.2603, "step": 16653 }, { "epoch": 0.22, "grad_norm": 4.520981311798096, "learning_rate": 1.996972589956298e-05, "loss": 2.1187, "step": 16654 }, { "epoch": 0.22, "grad_norm": 4.00299072265625, "learning_rate": 1.9969717728682928e-05, "loss": 2.2869, "step": 16655 }, { "epoch": 0.22, "grad_norm": 4.843175888061523, "learning_rate": 1.9969709556702045e-05, "loss": 2.3837, "step": 16656 }, { "epoch": 0.22, "grad_norm": 4.2019877433776855, "learning_rate": 1.996970138362034e-05, "loss": 1.7117, "step": 16657 }, { "epoch": 0.22, "grad_norm": 4.0971527099609375, "learning_rate": 1.996969320943781e-05, "loss": 2.4219, "step": 16658 }, { "epoch": 0.22, "grad_norm": 4.643895626068115, "learning_rate": 1.9969685034154457e-05, "loss": 2.4464, "step": 16659 }, { "epoch": 0.22, "grad_norm": 4.1750054359436035, "learning_rate": 1.9969676857770283e-05, "loss": 1.7524, "step": 16660 }, { "epoch": 0.22, "grad_norm": 4.221214771270752, "learning_rate": 1.9969668680285284e-05, "loss": 1.9943, "step": 16661 }, { "epoch": 0.22, "grad_norm": 3.737841844558716, "learning_rate": 1.9969660501699465e-05, "loss": 2.0035, "step": 16662 }, { "epoch": 0.22, "grad_norm": 4.504734039306641, "learning_rate": 1.9969652322012828e-05, "loss": 2.1923, "step": 16663 }, { "epoch": 0.22, "grad_norm": 4.232248306274414, "learning_rate": 1.9969644141225376e-05, "loss": 1.6127, "step": 16664 }, { "epoch": 0.22, "grad_norm": 3.647430658340454, "learning_rate": 1.99696359593371e-05, "loss": 1.7152, "step": 16665 }, { "epoch": 0.22, "grad_norm": 4.832480430603027, "learning_rate": 1.9969627776348007e-05, "loss": 2.6099, "step": 16666 }, { "epoch": 0.22, "grad_norm": 4.917209148406982, "learning_rate": 1.9969619592258103e-05, "loss": 2.3989, "step": 16667 }, { "epoch": 0.22, "grad_norm": 4.6328277587890625, "learning_rate": 1.996961140706738e-05, "loss": 2.1994, "step": 16668 }, { "epoch": 0.22, "grad_norm": 3.6931307315826416, "learning_rate": 1.9969603220775845e-05, "loss": 1.9748, "step": 16669 }, { "epoch": 0.22, "grad_norm": 4.574906349182129, "learning_rate": 1.9969595033383492e-05, "loss": 2.6653, "step": 16670 }, { "epoch": 0.22, "grad_norm": 4.433218002319336, "learning_rate": 1.996958684489033e-05, "loss": 2.2596, "step": 16671 }, { "epoch": 0.22, "grad_norm": 4.185986518859863, "learning_rate": 1.996957865529636e-05, "loss": 2.105, "step": 16672 }, { "epoch": 0.22, "grad_norm": 4.265335559844971, "learning_rate": 1.9969570464601574e-05, "loss": 2.075, "step": 16673 }, { "epoch": 0.22, "grad_norm": 3.7873406410217285, "learning_rate": 1.996956227280598e-05, "loss": 2.0803, "step": 16674 }, { "epoch": 0.22, "grad_norm": 4.639370918273926, "learning_rate": 1.9969554079909576e-05, "loss": 2.9228, "step": 16675 }, { "epoch": 0.22, "grad_norm": 4.135369777679443, "learning_rate": 1.9969545885912365e-05, "loss": 2.3994, "step": 16676 }, { "epoch": 0.22, "grad_norm": 4.535195350646973, "learning_rate": 1.9969537690814346e-05, "loss": 2.3575, "step": 16677 }, { "epoch": 0.22, "grad_norm": 4.829367637634277, "learning_rate": 1.9969529494615527e-05, "loss": 2.6084, "step": 16678 }, { "epoch": 0.22, "grad_norm": 4.679894924163818, "learning_rate": 1.9969521297315896e-05, "loss": 2.2092, "step": 16679 }, { "epoch": 0.22, "grad_norm": 4.818275451660156, "learning_rate": 1.9969513098915462e-05, "loss": 2.5122, "step": 16680 }, { "epoch": 0.22, "grad_norm": 3.973069906234741, "learning_rate": 1.9969504899414227e-05, "loss": 2.0384, "step": 16681 }, { "epoch": 0.22, "grad_norm": 4.251798629760742, "learning_rate": 1.9969496698812185e-05, "loss": 2.133, "step": 16682 }, { "epoch": 0.22, "grad_norm": 3.787574052810669, "learning_rate": 1.9969488497109345e-05, "loss": 1.9705, "step": 16683 }, { "epoch": 0.22, "grad_norm": 4.846024036407471, "learning_rate": 1.99694802943057e-05, "loss": 2.4492, "step": 16684 }, { "epoch": 0.22, "grad_norm": 4.641073703765869, "learning_rate": 1.9969472090401264e-05, "loss": 2.2575, "step": 16685 }, { "epoch": 0.22, "grad_norm": 4.789765357971191, "learning_rate": 1.9969463885396023e-05, "loss": 2.1788, "step": 16686 }, { "epoch": 0.22, "grad_norm": 3.9545705318450928, "learning_rate": 1.9969455679289984e-05, "loss": 2.038, "step": 16687 }, { "epoch": 0.22, "grad_norm": 4.474745273590088, "learning_rate": 1.996944747208315e-05, "loss": 2.4112, "step": 16688 }, { "epoch": 0.22, "grad_norm": 4.119502067565918, "learning_rate": 1.9969439263775522e-05, "loss": 1.7987, "step": 16689 }, { "epoch": 0.22, "grad_norm": 4.60391092300415, "learning_rate": 1.996943105436709e-05, "loss": 2.3738, "step": 16690 }, { "epoch": 0.22, "grad_norm": 4.178908348083496, "learning_rate": 1.9969422843857874e-05, "loss": 2.4856, "step": 16691 }, { "epoch": 0.22, "grad_norm": 4.294042110443115, "learning_rate": 1.996941463224786e-05, "loss": 2.5346, "step": 16692 }, { "epoch": 0.22, "grad_norm": 3.8051116466522217, "learning_rate": 1.996940641953705e-05, "loss": 1.9438, "step": 16693 }, { "epoch": 0.22, "grad_norm": 5.453519344329834, "learning_rate": 1.9969398205725455e-05, "loss": 2.471, "step": 16694 }, { "epoch": 0.22, "grad_norm": 4.888430118560791, "learning_rate": 1.9969389990813065e-05, "loss": 2.9823, "step": 16695 }, { "epoch": 0.22, "grad_norm": 4.201983451843262, "learning_rate": 1.996938177479989e-05, "loss": 2.2005, "step": 16696 }, { "epoch": 0.22, "grad_norm": 4.221504211425781, "learning_rate": 1.9969373557685918e-05, "loss": 2.7608, "step": 16697 }, { "epoch": 0.22, "grad_norm": 3.975280284881592, "learning_rate": 1.9969365339471164e-05, "loss": 2.2612, "step": 16698 }, { "epoch": 0.22, "grad_norm": 5.225852966308594, "learning_rate": 1.996935712015562e-05, "loss": 3.1157, "step": 16699 }, { "epoch": 0.22, "grad_norm": 3.9914650917053223, "learning_rate": 1.996934889973929e-05, "loss": 2.1172, "step": 16700 }, { "epoch": 0.22, "grad_norm": 3.956892728805542, "learning_rate": 1.996934067822218e-05, "loss": 1.792, "step": 16701 }, { "epoch": 0.22, "grad_norm": 4.123817443847656, "learning_rate": 1.996933245560428e-05, "loss": 2.1599, "step": 16702 }, { "epoch": 0.22, "grad_norm": 4.374632358551025, "learning_rate": 1.9969324231885597e-05, "loss": 1.9458, "step": 16703 }, { "epoch": 0.22, "grad_norm": 4.712252140045166, "learning_rate": 1.9969316007066134e-05, "loss": 2.3742, "step": 16704 }, { "epoch": 0.22, "grad_norm": 4.197150707244873, "learning_rate": 1.9969307781145888e-05, "loss": 2.1644, "step": 16705 }, { "epoch": 0.22, "grad_norm": 4.202478885650635, "learning_rate": 1.996929955412486e-05, "loss": 2.0678, "step": 16706 }, { "epoch": 0.22, "grad_norm": 4.534388542175293, "learning_rate": 1.9969291326003053e-05, "loss": 2.4923, "step": 16707 }, { "epoch": 0.22, "grad_norm": 4.579103469848633, "learning_rate": 1.996928309678047e-05, "loss": 2.2648, "step": 16708 }, { "epoch": 0.22, "grad_norm": 4.540823936462402, "learning_rate": 1.9969274866457104e-05, "loss": 2.3754, "step": 16709 }, { "epoch": 0.22, "grad_norm": 4.231592178344727, "learning_rate": 1.9969266635032966e-05, "loss": 2.176, "step": 16710 }, { "epoch": 0.22, "grad_norm": 4.157196044921875, "learning_rate": 1.9969258402508048e-05, "loss": 2.338, "step": 16711 }, { "epoch": 0.22, "grad_norm": 4.504724025726318, "learning_rate": 1.9969250168882356e-05, "loss": 2.3624, "step": 16712 }, { "epoch": 0.22, "grad_norm": 3.990617036819458, "learning_rate": 1.996924193415589e-05, "loss": 1.8277, "step": 16713 }, { "epoch": 0.22, "grad_norm": 4.434162139892578, "learning_rate": 1.996923369832865e-05, "loss": 2.0431, "step": 16714 }, { "epoch": 0.22, "grad_norm": 4.262613773345947, "learning_rate": 1.9969225461400642e-05, "loss": 2.1791, "step": 16715 }, { "epoch": 0.22, "grad_norm": 4.3015546798706055, "learning_rate": 1.9969217223371856e-05, "loss": 1.8834, "step": 16716 }, { "epoch": 0.22, "grad_norm": 4.459645748138428, "learning_rate": 1.99692089842423e-05, "loss": 2.3467, "step": 16717 }, { "epoch": 0.22, "grad_norm": 4.414496421813965, "learning_rate": 1.9969200744011977e-05, "loss": 2.3807, "step": 16718 }, { "epoch": 0.22, "grad_norm": 4.745204448699951, "learning_rate": 1.9969192502680884e-05, "loss": 2.5213, "step": 16719 }, { "epoch": 0.22, "grad_norm": 3.9526658058166504, "learning_rate": 1.996918426024902e-05, "loss": 1.7797, "step": 16720 }, { "epoch": 0.22, "grad_norm": 3.9138004779815674, "learning_rate": 1.9969176016716395e-05, "loss": 1.7637, "step": 16721 }, { "epoch": 0.22, "grad_norm": 4.128586769104004, "learning_rate": 1.9969167772083e-05, "loss": 2.0577, "step": 16722 }, { "epoch": 0.22, "grad_norm": 4.35015869140625, "learning_rate": 1.996915952634884e-05, "loss": 2.5626, "step": 16723 }, { "epoch": 0.22, "grad_norm": 4.204846382141113, "learning_rate": 1.9969151279513918e-05, "loss": 2.3991, "step": 16724 }, { "epoch": 0.22, "grad_norm": 4.773756980895996, "learning_rate": 1.9969143031578228e-05, "loss": 2.6429, "step": 16725 }, { "epoch": 0.22, "grad_norm": 4.374152183532715, "learning_rate": 1.9969134782541778e-05, "loss": 2.1767, "step": 16726 }, { "epoch": 0.22, "grad_norm": 4.513891220092773, "learning_rate": 1.9969126532404568e-05, "loss": 2.5702, "step": 16727 }, { "epoch": 0.22, "grad_norm": 3.9095845222473145, "learning_rate": 1.9969118281166594e-05, "loss": 2.2508, "step": 16728 }, { "epoch": 0.22, "grad_norm": 4.364722728729248, "learning_rate": 1.9969110028827864e-05, "loss": 2.423, "step": 16729 }, { "epoch": 0.22, "grad_norm": 3.6058008670806885, "learning_rate": 1.9969101775388374e-05, "loss": 1.9004, "step": 16730 }, { "epoch": 0.22, "grad_norm": 3.897355079650879, "learning_rate": 1.9969093520848124e-05, "loss": 2.2296, "step": 16731 }, { "epoch": 0.22, "grad_norm": 4.352839946746826, "learning_rate": 1.9969085265207118e-05, "loss": 2.1481, "step": 16732 }, { "epoch": 0.22, "grad_norm": 4.396707534790039, "learning_rate": 1.9969077008465354e-05, "loss": 1.7114, "step": 16733 }, { "epoch": 0.22, "grad_norm": 4.375271320343018, "learning_rate": 1.9969068750622838e-05, "loss": 2.3197, "step": 16734 }, { "epoch": 0.22, "grad_norm": 4.533532619476318, "learning_rate": 1.9969060491679565e-05, "loss": 2.2678, "step": 16735 }, { "epoch": 0.22, "grad_norm": 4.704681873321533, "learning_rate": 1.9969052231635542e-05, "loss": 2.4337, "step": 16736 }, { "epoch": 0.22, "grad_norm": 4.053093433380127, "learning_rate": 1.9969043970490763e-05, "loss": 2.3184, "step": 16737 }, { "epoch": 0.22, "grad_norm": 4.178330421447754, "learning_rate": 1.9969035708245234e-05, "loss": 2.1091, "step": 16738 }, { "epoch": 0.22, "grad_norm": 4.093526840209961, "learning_rate": 1.9969027444898955e-05, "loss": 1.9891, "step": 16739 }, { "epoch": 0.22, "grad_norm": 4.220961093902588, "learning_rate": 1.9969019180451926e-05, "loss": 2.2507, "step": 16740 }, { "epoch": 0.22, "grad_norm": 4.1377081871032715, "learning_rate": 1.9969010914904148e-05, "loss": 2.4891, "step": 16741 }, { "epoch": 0.22, "grad_norm": 4.3554887771606445, "learning_rate": 1.9969002648255623e-05, "loss": 1.9927, "step": 16742 }, { "epoch": 0.22, "grad_norm": 3.940009117126465, "learning_rate": 1.9968994380506348e-05, "loss": 1.944, "step": 16743 }, { "epoch": 0.22, "grad_norm": 3.877753496170044, "learning_rate": 1.9968986111656327e-05, "loss": 1.9361, "step": 16744 }, { "epoch": 0.22, "grad_norm": 4.272834777832031, "learning_rate": 1.9968977841705562e-05, "loss": 2.2683, "step": 16745 }, { "epoch": 0.22, "grad_norm": 4.125756740570068, "learning_rate": 1.9968969570654055e-05, "loss": 2.2391, "step": 16746 }, { "epoch": 0.22, "grad_norm": 4.23895788192749, "learning_rate": 1.9968961298501805e-05, "loss": 2.3046, "step": 16747 }, { "epoch": 0.22, "grad_norm": 3.7836034297943115, "learning_rate": 1.9968953025248808e-05, "loss": 1.5641, "step": 16748 }, { "epoch": 0.22, "grad_norm": 3.887960195541382, "learning_rate": 1.9968944750895072e-05, "loss": 1.7619, "step": 16749 }, { "epoch": 0.22, "grad_norm": 4.318161964416504, "learning_rate": 1.9968936475440596e-05, "loss": 2.4865, "step": 16750 }, { "epoch": 0.22, "grad_norm": 4.791220664978027, "learning_rate": 1.996892819888538e-05, "loss": 2.4019, "step": 16751 }, { "epoch": 0.22, "grad_norm": 4.086742877960205, "learning_rate": 1.9968919921229425e-05, "loss": 2.2985, "step": 16752 }, { "epoch": 0.22, "grad_norm": 4.953344821929932, "learning_rate": 1.996891164247273e-05, "loss": 2.3836, "step": 16753 }, { "epoch": 0.22, "grad_norm": 4.324511528015137, "learning_rate": 1.99689033626153e-05, "loss": 2.0255, "step": 16754 }, { "epoch": 0.22, "grad_norm": 4.531703948974609, "learning_rate": 1.9968895081657135e-05, "loss": 2.6034, "step": 16755 }, { "epoch": 0.22, "grad_norm": 4.079343795776367, "learning_rate": 1.996888679959823e-05, "loss": 2.0806, "step": 16756 }, { "epoch": 0.22, "grad_norm": 4.271872043609619, "learning_rate": 1.9968878516438598e-05, "loss": 2.1494, "step": 16757 }, { "epoch": 0.22, "grad_norm": 3.5928077697753906, "learning_rate": 1.9968870232178225e-05, "loss": 1.812, "step": 16758 }, { "epoch": 0.22, "grad_norm": 4.594698429107666, "learning_rate": 1.9968861946817126e-05, "loss": 2.2895, "step": 16759 }, { "epoch": 0.22, "grad_norm": 3.6205713748931885, "learning_rate": 1.9968853660355294e-05, "loss": 1.8658, "step": 16760 }, { "epoch": 0.22, "grad_norm": 3.750201463699341, "learning_rate": 1.996884537279273e-05, "loss": 2.0394, "step": 16761 }, { "epoch": 0.22, "grad_norm": 3.94330096244812, "learning_rate": 1.9968837084129436e-05, "loss": 1.9458, "step": 16762 }, { "epoch": 0.22, "grad_norm": 4.367397308349609, "learning_rate": 1.9968828794365412e-05, "loss": 1.6181, "step": 16763 }, { "epoch": 0.22, "grad_norm": 4.446852684020996, "learning_rate": 1.9968820503500666e-05, "loss": 2.6689, "step": 16764 }, { "epoch": 0.22, "grad_norm": 3.526716947555542, "learning_rate": 1.996881221153519e-05, "loss": 1.5496, "step": 16765 }, { "epoch": 0.22, "grad_norm": 4.561562538146973, "learning_rate": 1.9968803918468988e-05, "loss": 2.0765, "step": 16766 }, { "epoch": 0.22, "grad_norm": 4.78642463684082, "learning_rate": 1.996879562430206e-05, "loss": 2.5147, "step": 16767 }, { "epoch": 0.22, "grad_norm": 4.19269323348999, "learning_rate": 1.9968787329034406e-05, "loss": 2.0955, "step": 16768 }, { "epoch": 0.22, "grad_norm": 4.4345502853393555, "learning_rate": 1.996877903266603e-05, "loss": 1.9924, "step": 16769 }, { "epoch": 0.22, "grad_norm": 3.7970752716064453, "learning_rate": 1.9968770735196933e-05, "loss": 1.8069, "step": 16770 }, { "epoch": 0.22, "grad_norm": 3.970505952835083, "learning_rate": 1.9968762436627114e-05, "loss": 2.1313, "step": 16771 }, { "epoch": 0.22, "grad_norm": 4.276482105255127, "learning_rate": 1.9968754136956576e-05, "loss": 2.144, "step": 16772 }, { "epoch": 0.22, "grad_norm": 4.469385147094727, "learning_rate": 1.996874583618532e-05, "loss": 2.1626, "step": 16773 }, { "epoch": 0.22, "grad_norm": 4.029240608215332, "learning_rate": 1.996873753431334e-05, "loss": 1.8852, "step": 16774 }, { "epoch": 0.22, "grad_norm": 4.7229533195495605, "learning_rate": 1.9968729231340646e-05, "loss": 2.4383, "step": 16775 }, { "epoch": 0.22, "grad_norm": 4.4253411293029785, "learning_rate": 1.996872092726723e-05, "loss": 2.7344, "step": 16776 }, { "epoch": 0.22, "grad_norm": 4.511361122131348, "learning_rate": 1.9968712622093106e-05, "loss": 2.2121, "step": 16777 }, { "epoch": 0.22, "grad_norm": 4.357509613037109, "learning_rate": 1.9968704315818262e-05, "loss": 2.0761, "step": 16778 }, { "epoch": 0.22, "grad_norm": 4.806342601776123, "learning_rate": 1.9968696008442702e-05, "loss": 2.4654, "step": 16779 }, { "epoch": 0.22, "grad_norm": 3.8129446506500244, "learning_rate": 1.9968687699966437e-05, "loss": 1.8071, "step": 16780 }, { "epoch": 0.22, "grad_norm": 3.5132925510406494, "learning_rate": 1.9968679390389452e-05, "loss": 1.8849, "step": 16781 }, { "epoch": 0.22, "grad_norm": 3.968238353729248, "learning_rate": 1.9968671079711758e-05, "loss": 1.9099, "step": 16782 }, { "epoch": 0.22, "grad_norm": 4.519952774047852, "learning_rate": 1.9968662767933355e-05, "loss": 2.2991, "step": 16783 }, { "epoch": 0.22, "grad_norm": 4.247682571411133, "learning_rate": 1.996865445505424e-05, "loss": 2.0384, "step": 16784 }, { "epoch": 0.22, "grad_norm": 3.9969677925109863, "learning_rate": 1.9968646141074418e-05, "loss": 2.3729, "step": 16785 }, { "epoch": 0.22, "grad_norm": 4.5215325355529785, "learning_rate": 1.9968637825993887e-05, "loss": 2.3578, "step": 16786 }, { "epoch": 0.22, "grad_norm": 4.669930934906006, "learning_rate": 1.996862950981265e-05, "loss": 2.3229, "step": 16787 }, { "epoch": 0.22, "grad_norm": 3.8457839488983154, "learning_rate": 1.996862119253071e-05, "loss": 2.0251, "step": 16788 }, { "epoch": 0.22, "grad_norm": 4.405794143676758, "learning_rate": 1.996861287414806e-05, "loss": 2.3628, "step": 16789 }, { "epoch": 0.22, "grad_norm": 4.564093589782715, "learning_rate": 1.996860455466471e-05, "loss": 2.3248, "step": 16790 }, { "epoch": 0.22, "grad_norm": 4.507601737976074, "learning_rate": 1.9968596234080655e-05, "loss": 2.3689, "step": 16791 }, { "epoch": 0.22, "grad_norm": 4.049942970275879, "learning_rate": 1.99685879123959e-05, "loss": 2.0516, "step": 16792 }, { "epoch": 0.22, "grad_norm": 3.898308753967285, "learning_rate": 1.9968579589610442e-05, "loss": 1.7422, "step": 16793 }, { "epoch": 0.22, "grad_norm": 4.39485502243042, "learning_rate": 1.9968571265724283e-05, "loss": 2.0838, "step": 16794 }, { "epoch": 0.22, "grad_norm": 4.039687156677246, "learning_rate": 1.9968562940737427e-05, "loss": 2.1285, "step": 16795 }, { "epoch": 0.22, "grad_norm": 4.220388412475586, "learning_rate": 1.996855461464987e-05, "loss": 2.3169, "step": 16796 }, { "epoch": 0.22, "grad_norm": 4.242717266082764, "learning_rate": 1.9968546287461616e-05, "loss": 2.2935, "step": 16797 }, { "epoch": 0.22, "grad_norm": 4.761200428009033, "learning_rate": 1.9968537959172667e-05, "loss": 2.3271, "step": 16798 }, { "epoch": 0.22, "grad_norm": 5.320737838745117, "learning_rate": 1.996852962978302e-05, "loss": 2.8271, "step": 16799 }, { "epoch": 0.22, "grad_norm": 4.439545631408691, "learning_rate": 1.996852129929268e-05, "loss": 2.4044, "step": 16800 }, { "epoch": 0.22, "grad_norm": 4.401105880737305, "learning_rate": 1.9968512967701647e-05, "loss": 1.9235, "step": 16801 }, { "epoch": 0.22, "grad_norm": 4.6600189208984375, "learning_rate": 1.996850463500992e-05, "loss": 2.188, "step": 16802 }, { "epoch": 0.22, "grad_norm": 3.7149083614349365, "learning_rate": 1.9968496301217496e-05, "loss": 1.9067, "step": 16803 }, { "epoch": 0.22, "grad_norm": 3.9581425189971924, "learning_rate": 1.9968487966324387e-05, "loss": 1.8487, "step": 16804 }, { "epoch": 0.22, "grad_norm": 3.689136505126953, "learning_rate": 1.9968479630330585e-05, "loss": 1.7518, "step": 16805 }, { "epoch": 0.22, "grad_norm": 4.009921073913574, "learning_rate": 1.9968471293236095e-05, "loss": 1.8877, "step": 16806 }, { "epoch": 0.22, "grad_norm": 4.127885341644287, "learning_rate": 1.9968462955040917e-05, "loss": 2.2311, "step": 16807 }, { "epoch": 0.22, "grad_norm": 3.8035430908203125, "learning_rate": 1.9968454615745056e-05, "loss": 1.7268, "step": 16808 }, { "epoch": 0.22, "grad_norm": 4.779735088348389, "learning_rate": 1.99684462753485e-05, "loss": 2.4342, "step": 16809 }, { "epoch": 0.22, "grad_norm": 4.315995693206787, "learning_rate": 1.9968437933851264e-05, "loss": 2.222, "step": 16810 }, { "epoch": 0.22, "grad_norm": 3.9272942543029785, "learning_rate": 1.996842959125334e-05, "loss": 2.1037, "step": 16811 }, { "epoch": 0.22, "grad_norm": 4.306417942047119, "learning_rate": 1.996842124755473e-05, "loss": 2.1559, "step": 16812 }, { "epoch": 0.22, "grad_norm": 4.297574520111084, "learning_rate": 1.9968412902755446e-05, "loss": 2.2236, "step": 16813 }, { "epoch": 0.22, "grad_norm": 4.284270286560059, "learning_rate": 1.9968404556855473e-05, "loss": 2.0285, "step": 16814 }, { "epoch": 0.22, "grad_norm": 4.003608226776123, "learning_rate": 1.996839620985482e-05, "loss": 2.041, "step": 16815 }, { "epoch": 0.22, "grad_norm": 3.8648648262023926, "learning_rate": 1.996838786175349e-05, "loss": 1.7906, "step": 16816 }, { "epoch": 0.22, "grad_norm": 3.7020506858825684, "learning_rate": 1.996837951255148e-05, "loss": 1.8628, "step": 16817 }, { "epoch": 0.22, "grad_norm": 4.101565361022949, "learning_rate": 1.996837116224879e-05, "loss": 2.2323, "step": 16818 }, { "epoch": 0.22, "grad_norm": 3.801694631576538, "learning_rate": 1.9968362810845422e-05, "loss": 2.0571, "step": 16819 }, { "epoch": 0.22, "grad_norm": 4.434092998504639, "learning_rate": 1.996835445834138e-05, "loss": 2.1151, "step": 16820 }, { "epoch": 0.22, "grad_norm": 3.699824810028076, "learning_rate": 1.996834610473666e-05, "loss": 2.1396, "step": 16821 }, { "epoch": 0.22, "grad_norm": 4.958114147186279, "learning_rate": 1.9968337750031267e-05, "loss": 2.2439, "step": 16822 }, { "epoch": 0.22, "grad_norm": 4.637004375457764, "learning_rate": 1.99683293942252e-05, "loss": 2.3992, "step": 16823 }, { "epoch": 0.22, "grad_norm": 4.686432361602783, "learning_rate": 1.996832103731846e-05, "loss": 2.5983, "step": 16824 }, { "epoch": 0.22, "grad_norm": 4.08035135269165, "learning_rate": 1.996831267931105e-05, "loss": 2.0632, "step": 16825 }, { "epoch": 0.22, "grad_norm": 4.27025842666626, "learning_rate": 1.9968304320202968e-05, "loss": 1.5294, "step": 16826 }, { "epoch": 0.22, "grad_norm": 4.196990966796875, "learning_rate": 1.9968295959994216e-05, "loss": 1.9318, "step": 16827 }, { "epoch": 0.22, "grad_norm": 4.2865705490112305, "learning_rate": 1.9968287598684796e-05, "loss": 1.8302, "step": 16828 }, { "epoch": 0.22, "grad_norm": 4.061117172241211, "learning_rate": 1.9968279236274707e-05, "loss": 1.7031, "step": 16829 }, { "epoch": 0.22, "grad_norm": 4.530511856079102, "learning_rate": 1.996827087276395e-05, "loss": 2.2263, "step": 16830 }, { "epoch": 0.22, "grad_norm": 3.241203546524048, "learning_rate": 1.9968262508152527e-05, "loss": 1.2546, "step": 16831 }, { "epoch": 0.22, "grad_norm": 3.7974443435668945, "learning_rate": 1.996825414244044e-05, "loss": 1.9684, "step": 16832 }, { "epoch": 0.22, "grad_norm": 4.156065940856934, "learning_rate": 1.996824577562769e-05, "loss": 2.0881, "step": 16833 }, { "epoch": 0.22, "grad_norm": 3.904153347015381, "learning_rate": 1.9968237407714272e-05, "loss": 2.109, "step": 16834 }, { "epoch": 0.22, "grad_norm": 4.4805684089660645, "learning_rate": 1.9968229038700196e-05, "loss": 2.4822, "step": 16835 }, { "epoch": 0.22, "grad_norm": 3.944275379180908, "learning_rate": 1.9968220668585454e-05, "loss": 2.1821, "step": 16836 }, { "epoch": 0.22, "grad_norm": 4.39286470413208, "learning_rate": 1.9968212297370055e-05, "loss": 2.3524, "step": 16837 }, { "epoch": 0.22, "grad_norm": 4.221611976623535, "learning_rate": 1.9968203925053994e-05, "loss": 2.4457, "step": 16838 }, { "epoch": 0.22, "grad_norm": 4.46548318862915, "learning_rate": 1.9968195551637277e-05, "loss": 2.2568, "step": 16839 }, { "epoch": 0.22, "grad_norm": 4.210291862487793, "learning_rate": 1.99681871771199e-05, "loss": 1.9677, "step": 16840 }, { "epoch": 0.22, "grad_norm": 5.4410552978515625, "learning_rate": 1.9968178801501866e-05, "loss": 2.405, "step": 16841 }, { "epoch": 0.22, "grad_norm": 4.208526611328125, "learning_rate": 1.9968170424783175e-05, "loss": 2.1479, "step": 16842 }, { "epoch": 0.22, "grad_norm": 4.571198463439941, "learning_rate": 1.996816204696383e-05, "loss": 2.632, "step": 16843 }, { "epoch": 0.22, "grad_norm": 3.5872418880462646, "learning_rate": 1.996815366804383e-05, "loss": 1.7523, "step": 16844 }, { "epoch": 0.22, "grad_norm": 4.492805480957031, "learning_rate": 1.9968145288023182e-05, "loss": 2.4249, "step": 16845 }, { "epoch": 0.22, "grad_norm": 4.513419151306152, "learning_rate": 1.9968136906901878e-05, "loss": 2.0781, "step": 16846 }, { "epoch": 0.22, "grad_norm": 3.487743854522705, "learning_rate": 1.996812852467992e-05, "loss": 1.641, "step": 16847 }, { "epoch": 0.22, "grad_norm": 4.1024580001831055, "learning_rate": 1.996812014135731e-05, "loss": 2.3128, "step": 16848 }, { "epoch": 0.22, "grad_norm": 4.270388126373291, "learning_rate": 1.9968111756934056e-05, "loss": 2.0541, "step": 16849 }, { "epoch": 0.22, "grad_norm": 3.8453354835510254, "learning_rate": 1.9968103371410152e-05, "loss": 1.941, "step": 16850 }, { "epoch": 0.22, "grad_norm": 4.228949069976807, "learning_rate": 1.99680949847856e-05, "loss": 1.794, "step": 16851 }, { "epoch": 0.22, "grad_norm": 4.548916816711426, "learning_rate": 1.99680865970604e-05, "loss": 2.4309, "step": 16852 }, { "epoch": 0.22, "grad_norm": 4.8350348472595215, "learning_rate": 1.9968078208234555e-05, "loss": 2.3874, "step": 16853 }, { "epoch": 0.22, "grad_norm": 4.358104228973389, "learning_rate": 1.9968069818308066e-05, "loss": 2.3305, "step": 16854 }, { "epoch": 0.22, "grad_norm": 4.0917463302612305, "learning_rate": 1.9968061427280928e-05, "loss": 2.3161, "step": 16855 }, { "epoch": 0.22, "grad_norm": 4.417464256286621, "learning_rate": 1.9968053035153156e-05, "loss": 2.6353, "step": 16856 }, { "epoch": 0.22, "grad_norm": 4.31387186050415, "learning_rate": 1.9968044641924735e-05, "loss": 2.2879, "step": 16857 }, { "epoch": 0.22, "grad_norm": 4.188350200653076, "learning_rate": 1.9968036247595673e-05, "loss": 2.2847, "step": 16858 }, { "epoch": 0.22, "grad_norm": 4.469507694244385, "learning_rate": 1.9968027852165973e-05, "loss": 2.6732, "step": 16859 }, { "epoch": 0.22, "grad_norm": 4.543539047241211, "learning_rate": 1.9968019455635632e-05, "loss": 2.2168, "step": 16860 }, { "epoch": 0.22, "grad_norm": 4.124383449554443, "learning_rate": 1.9968011058004656e-05, "loss": 1.9384, "step": 16861 }, { "epoch": 0.22, "grad_norm": 4.354973316192627, "learning_rate": 1.996800265927304e-05, "loss": 1.9153, "step": 16862 }, { "epoch": 0.22, "grad_norm": 4.407303810119629, "learning_rate": 1.9967994259440784e-05, "loss": 2.55, "step": 16863 }, { "epoch": 0.22, "grad_norm": 4.6989827156066895, "learning_rate": 1.9967985858507894e-05, "loss": 2.3307, "step": 16864 }, { "epoch": 0.22, "grad_norm": 4.179277420043945, "learning_rate": 1.9967977456474373e-05, "loss": 2.1024, "step": 16865 }, { "epoch": 0.22, "grad_norm": 4.005543231964111, "learning_rate": 1.9967969053340214e-05, "loss": 2.1874, "step": 16866 }, { "epoch": 0.22, "grad_norm": 4.019686698913574, "learning_rate": 1.9967960649105423e-05, "loss": 2.1506, "step": 16867 }, { "epoch": 0.22, "grad_norm": 4.219637393951416, "learning_rate": 1.9967952243770005e-05, "loss": 1.8632, "step": 16868 }, { "epoch": 0.22, "grad_norm": 4.298046112060547, "learning_rate": 1.996794383733395e-05, "loss": 2.4032, "step": 16869 }, { "epoch": 0.22, "grad_norm": 4.477790832519531, "learning_rate": 1.9967935429797265e-05, "loss": 2.5768, "step": 16870 }, { "epoch": 0.22, "grad_norm": 4.064940929412842, "learning_rate": 1.9967927021159953e-05, "loss": 2.025, "step": 16871 }, { "epoch": 0.22, "grad_norm": 4.302270412445068, "learning_rate": 1.9967918611422014e-05, "loss": 1.8791, "step": 16872 }, { "epoch": 0.22, "grad_norm": 3.835254192352295, "learning_rate": 1.9967910200583446e-05, "loss": 2.1519, "step": 16873 }, { "epoch": 0.22, "grad_norm": 4.898058891296387, "learning_rate": 1.996790178864425e-05, "loss": 2.4176, "step": 16874 }, { "epoch": 0.22, "grad_norm": 4.449195384979248, "learning_rate": 1.996789337560443e-05, "loss": 2.1798, "step": 16875 }, { "epoch": 0.22, "grad_norm": 4.125551700592041, "learning_rate": 1.9967884961463987e-05, "loss": 1.9073, "step": 16876 }, { "epoch": 0.22, "grad_norm": 4.154303073883057, "learning_rate": 1.9967876546222918e-05, "loss": 2.0158, "step": 16877 }, { "epoch": 0.22, "grad_norm": 3.6751065254211426, "learning_rate": 1.9967868129881228e-05, "loss": 1.8688, "step": 16878 }, { "epoch": 0.22, "grad_norm": 4.447895526885986, "learning_rate": 1.9967859712438917e-05, "loss": 2.2126, "step": 16879 }, { "epoch": 0.22, "grad_norm": 3.5805206298828125, "learning_rate": 1.9967851293895982e-05, "loss": 1.7359, "step": 16880 }, { "epoch": 0.22, "grad_norm": 4.439427852630615, "learning_rate": 1.996784287425243e-05, "loss": 2.3278, "step": 16881 }, { "epoch": 0.22, "grad_norm": 3.8192930221557617, "learning_rate": 1.9967834453508255e-05, "loss": 1.8145, "step": 16882 }, { "epoch": 0.22, "grad_norm": 4.469201564788818, "learning_rate": 1.9967826031663467e-05, "loss": 2.0299, "step": 16883 }, { "epoch": 0.22, "grad_norm": 3.9101388454437256, "learning_rate": 1.996781760871806e-05, "loss": 2.1123, "step": 16884 }, { "epoch": 0.22, "grad_norm": 4.076002597808838, "learning_rate": 1.9967809184672034e-05, "loss": 2.1058, "step": 16885 }, { "epoch": 0.22, "grad_norm": 3.6809513568878174, "learning_rate": 1.9967800759525396e-05, "loss": 1.8096, "step": 16886 }, { "epoch": 0.22, "grad_norm": 3.943399667739868, "learning_rate": 1.9967792333278148e-05, "loss": 1.944, "step": 16887 }, { "epoch": 0.22, "grad_norm": 4.146187782287598, "learning_rate": 1.9967783905930278e-05, "loss": 2.07, "step": 16888 }, { "epoch": 0.22, "grad_norm": 4.067744255065918, "learning_rate": 1.99677754774818e-05, "loss": 2.1314, "step": 16889 }, { "epoch": 0.22, "grad_norm": 4.533777713775635, "learning_rate": 1.9967767047932712e-05, "loss": 2.0597, "step": 16890 }, { "epoch": 0.22, "grad_norm": 4.206625938415527, "learning_rate": 1.996775861728301e-05, "loss": 1.9563, "step": 16891 }, { "epoch": 0.22, "grad_norm": 4.124368667602539, "learning_rate": 1.9967750185532696e-05, "loss": 2.3348, "step": 16892 }, { "epoch": 0.22, "grad_norm": 4.369669437408447, "learning_rate": 1.996774175268178e-05, "loss": 2.0768, "step": 16893 }, { "epoch": 0.22, "grad_norm": 3.8252463340759277, "learning_rate": 1.9967733318730254e-05, "loss": 1.6652, "step": 16894 }, { "epoch": 0.22, "grad_norm": 4.079951286315918, "learning_rate": 1.9967724883678118e-05, "loss": 2.0408, "step": 16895 }, { "epoch": 0.22, "grad_norm": 4.00437068939209, "learning_rate": 1.9967716447525378e-05, "loss": 1.9802, "step": 16896 }, { "epoch": 0.22, "grad_norm": 4.262406349182129, "learning_rate": 1.9967708010272033e-05, "loss": 2.2296, "step": 16897 }, { "epoch": 0.22, "grad_norm": 4.912131309509277, "learning_rate": 1.9967699571918085e-05, "loss": 2.7065, "step": 16898 }, { "epoch": 0.22, "grad_norm": 4.545315265655518, "learning_rate": 1.9967691132463533e-05, "loss": 2.3298, "step": 16899 }, { "epoch": 0.22, "grad_norm": 4.768500804901123, "learning_rate": 1.996768269190838e-05, "loss": 2.521, "step": 16900 }, { "epoch": 0.22, "grad_norm": 3.6755902767181396, "learning_rate": 1.9967674250252626e-05, "loss": 1.929, "step": 16901 }, { "epoch": 0.22, "grad_norm": 4.24953031539917, "learning_rate": 1.9967665807496268e-05, "loss": 2.1098, "step": 16902 }, { "epoch": 0.22, "grad_norm": 4.252329349517822, "learning_rate": 1.9967657363639312e-05, "loss": 2.1104, "step": 16903 }, { "epoch": 0.22, "grad_norm": 4.138506889343262, "learning_rate": 1.9967648918681756e-05, "loss": 1.969, "step": 16904 }, { "epoch": 0.22, "grad_norm": 4.1338276863098145, "learning_rate": 1.9967640472623606e-05, "loss": 2.3357, "step": 16905 }, { "epoch": 0.22, "grad_norm": 5.06650447845459, "learning_rate": 1.996763202546486e-05, "loss": 3.0981, "step": 16906 }, { "epoch": 0.22, "grad_norm": 4.279932022094727, "learning_rate": 1.9967623577205514e-05, "loss": 2.2395, "step": 16907 }, { "epoch": 0.22, "grad_norm": 4.153616428375244, "learning_rate": 1.9967615127845576e-05, "loss": 2.0371, "step": 16908 }, { "epoch": 0.22, "grad_norm": 4.469601154327393, "learning_rate": 1.9967606677385046e-05, "loss": 2.1616, "step": 16909 }, { "epoch": 0.22, "grad_norm": 3.8577611446380615, "learning_rate": 1.996759822582392e-05, "loss": 1.8738, "step": 16910 }, { "epoch": 0.22, "grad_norm": 4.448635578155518, "learning_rate": 1.9967589773162203e-05, "loss": 2.3985, "step": 16911 }, { "epoch": 0.22, "grad_norm": 3.493396043777466, "learning_rate": 1.9967581319399895e-05, "loss": 1.418, "step": 16912 }, { "epoch": 0.22, "grad_norm": 3.3546481132507324, "learning_rate": 1.9967572864536997e-05, "loss": 1.5117, "step": 16913 }, { "epoch": 0.22, "grad_norm": 3.667977809906006, "learning_rate": 1.996756440857351e-05, "loss": 1.9884, "step": 16914 }, { "epoch": 0.22, "grad_norm": 4.005819320678711, "learning_rate": 1.9967555951509435e-05, "loss": 2.2729, "step": 16915 }, { "epoch": 0.22, "grad_norm": 4.590994834899902, "learning_rate": 1.996754749334477e-05, "loss": 2.3985, "step": 16916 }, { "epoch": 0.22, "grad_norm": 4.920730113983154, "learning_rate": 1.9967539034079522e-05, "loss": 2.4881, "step": 16917 }, { "epoch": 0.22, "grad_norm": 4.599342346191406, "learning_rate": 1.9967530573713688e-05, "loss": 2.2372, "step": 16918 }, { "epoch": 0.22, "grad_norm": 4.424160957336426, "learning_rate": 1.996752211224727e-05, "loss": 2.2672, "step": 16919 }, { "epoch": 0.22, "grad_norm": 4.423065662384033, "learning_rate": 1.9967513649680268e-05, "loss": 2.1541, "step": 16920 }, { "epoch": 0.22, "grad_norm": 4.346581935882568, "learning_rate": 1.9967505186012682e-05, "loss": 2.2757, "step": 16921 }, { "epoch": 0.22, "grad_norm": 3.867067813873291, "learning_rate": 1.9967496721244513e-05, "loss": 1.7716, "step": 16922 }, { "epoch": 0.22, "grad_norm": 4.741251468658447, "learning_rate": 1.996748825537577e-05, "loss": 2.3491, "step": 16923 }, { "epoch": 0.22, "grad_norm": 4.996963977813721, "learning_rate": 1.996747978840644e-05, "loss": 2.4117, "step": 16924 }, { "epoch": 0.22, "grad_norm": 4.177013397216797, "learning_rate": 1.9967471320336533e-05, "loss": 1.8071, "step": 16925 }, { "epoch": 0.22, "grad_norm": 4.577324390411377, "learning_rate": 1.9967462851166052e-05, "loss": 2.4202, "step": 16926 }, { "epoch": 0.22, "grad_norm": 3.951266288757324, "learning_rate": 1.9967454380894988e-05, "loss": 2.0078, "step": 16927 }, { "epoch": 0.22, "grad_norm": 4.303818225860596, "learning_rate": 1.996744590952335e-05, "loss": 2.4882, "step": 16928 }, { "epoch": 0.22, "grad_norm": 4.674989223480225, "learning_rate": 1.996743743705114e-05, "loss": 2.2994, "step": 16929 }, { "epoch": 0.22, "grad_norm": 4.65194845199585, "learning_rate": 1.9967428963478352e-05, "loss": 2.1584, "step": 16930 }, { "epoch": 0.22, "grad_norm": 4.23369836807251, "learning_rate": 1.9967420488804993e-05, "loss": 2.0779, "step": 16931 }, { "epoch": 0.22, "grad_norm": 3.4169583320617676, "learning_rate": 1.996741201303106e-05, "loss": 1.6424, "step": 16932 }, { "epoch": 0.22, "grad_norm": 4.688455104827881, "learning_rate": 1.9967403536156558e-05, "loss": 2.6153, "step": 16933 }, { "epoch": 0.22, "grad_norm": 4.2593913078308105, "learning_rate": 1.996739505818148e-05, "loss": 2.0003, "step": 16934 }, { "epoch": 0.22, "grad_norm": 4.105918884277344, "learning_rate": 1.996738657910584e-05, "loss": 1.9803, "step": 16935 }, { "epoch": 0.22, "grad_norm": 4.197372913360596, "learning_rate": 1.9967378098929628e-05, "loss": 2.0872, "step": 16936 }, { "epoch": 0.22, "grad_norm": 3.8691279888153076, "learning_rate": 1.9967369617652847e-05, "loss": 1.7061, "step": 16937 }, { "epoch": 0.22, "grad_norm": 4.832493305206299, "learning_rate": 1.99673611352755e-05, "loss": 2.3831, "step": 16938 }, { "epoch": 0.22, "grad_norm": 4.226932525634766, "learning_rate": 1.996735265179759e-05, "loss": 2.4644, "step": 16939 }, { "epoch": 0.22, "grad_norm": 4.222316741943359, "learning_rate": 1.9967344167219115e-05, "loss": 2.4473, "step": 16940 }, { "epoch": 0.22, "grad_norm": 4.481902599334717, "learning_rate": 1.9967335681540074e-05, "loss": 2.5211, "step": 16941 }, { "epoch": 0.22, "grad_norm": 4.272235870361328, "learning_rate": 1.996732719476047e-05, "loss": 2.2774, "step": 16942 }, { "epoch": 0.22, "grad_norm": 4.459693908691406, "learning_rate": 1.9967318706880305e-05, "loss": 2.1685, "step": 16943 }, { "epoch": 0.22, "grad_norm": 4.171292781829834, "learning_rate": 1.9967310217899576e-05, "loss": 2.3571, "step": 16944 }, { "epoch": 0.22, "grad_norm": 4.142923355102539, "learning_rate": 1.9967301727818288e-05, "loss": 1.9279, "step": 16945 }, { "epoch": 0.22, "grad_norm": 4.292602062225342, "learning_rate": 1.9967293236636443e-05, "loss": 2.2336, "step": 16946 }, { "epoch": 0.22, "grad_norm": 4.388615131378174, "learning_rate": 1.9967284744354035e-05, "loss": 2.0793, "step": 16947 }, { "epoch": 0.22, "grad_norm": 5.3009538650512695, "learning_rate": 1.9967276250971074e-05, "loss": 2.4, "step": 16948 }, { "epoch": 0.22, "grad_norm": 3.9869656562805176, "learning_rate": 1.996726775648756e-05, "loss": 1.8172, "step": 16949 }, { "epoch": 0.22, "grad_norm": 4.027814865112305, "learning_rate": 1.9967259260903484e-05, "loss": 1.8608, "step": 16950 }, { "epoch": 0.22, "grad_norm": 4.403231143951416, "learning_rate": 1.9967250764218855e-05, "loss": 2.1556, "step": 16951 }, { "epoch": 0.22, "grad_norm": 3.892507314682007, "learning_rate": 1.9967242266433674e-05, "loss": 2.0349, "step": 16952 }, { "epoch": 0.22, "grad_norm": 4.565835952758789, "learning_rate": 1.996723376754794e-05, "loss": 2.2133, "step": 16953 }, { "epoch": 0.22, "grad_norm": 4.883802890777588, "learning_rate": 1.996722526756165e-05, "loss": 2.5391, "step": 16954 }, { "epoch": 0.22, "grad_norm": 4.812324523925781, "learning_rate": 1.9967216766474816e-05, "loss": 2.3179, "step": 16955 }, { "epoch": 0.22, "grad_norm": 4.918213844299316, "learning_rate": 1.9967208264287428e-05, "loss": 2.7122, "step": 16956 }, { "epoch": 0.22, "grad_norm": 4.567811489105225, "learning_rate": 1.9967199760999494e-05, "loss": 2.3245, "step": 16957 }, { "epoch": 0.22, "grad_norm": 4.348727703094482, "learning_rate": 1.996719125661101e-05, "loss": 2.3663, "step": 16958 }, { "epoch": 0.22, "grad_norm": 3.9921061992645264, "learning_rate": 1.9967182751121982e-05, "loss": 2.2325, "step": 16959 }, { "epoch": 0.22, "grad_norm": 4.358543395996094, "learning_rate": 1.99671742445324e-05, "loss": 2.0481, "step": 16960 }, { "epoch": 0.22, "grad_norm": 4.431434631347656, "learning_rate": 1.996716573684228e-05, "loss": 2.1696, "step": 16961 }, { "epoch": 0.22, "grad_norm": 3.9523541927337646, "learning_rate": 1.9967157228051613e-05, "loss": 2.1529, "step": 16962 }, { "epoch": 0.22, "grad_norm": 3.969054698944092, "learning_rate": 1.9967148718160407e-05, "loss": 2.2426, "step": 16963 }, { "epoch": 0.22, "grad_norm": 3.8066647052764893, "learning_rate": 1.9967140207168653e-05, "loss": 1.7501, "step": 16964 }, { "epoch": 0.22, "grad_norm": 5.185628890991211, "learning_rate": 1.9967131695076364e-05, "loss": 2.5923, "step": 16965 }, { "epoch": 0.22, "grad_norm": 4.443305015563965, "learning_rate": 1.9967123181883527e-05, "loss": 2.0978, "step": 16966 }, { "epoch": 0.22, "grad_norm": 4.500893592834473, "learning_rate": 1.9967114667590155e-05, "loss": 2.3765, "step": 16967 }, { "epoch": 0.22, "grad_norm": 5.008704662322998, "learning_rate": 1.9967106152196246e-05, "loss": 2.5735, "step": 16968 }, { "epoch": 0.22, "grad_norm": 4.284459590911865, "learning_rate": 1.9967097635701794e-05, "loss": 1.7359, "step": 16969 }, { "epoch": 0.22, "grad_norm": 4.446544647216797, "learning_rate": 1.996708911810681e-05, "loss": 2.222, "step": 16970 }, { "epoch": 0.22, "grad_norm": 3.7796473503112793, "learning_rate": 1.9967080599411287e-05, "loss": 1.9781, "step": 16971 }, { "epoch": 0.22, "grad_norm": 4.66507625579834, "learning_rate": 1.996707207961523e-05, "loss": 2.3408, "step": 16972 }, { "epoch": 0.22, "grad_norm": 4.417724609375, "learning_rate": 1.9967063558718642e-05, "loss": 2.5704, "step": 16973 }, { "epoch": 0.22, "grad_norm": 4.382221221923828, "learning_rate": 1.9967055036721515e-05, "loss": 2.3513, "step": 16974 }, { "epoch": 0.22, "grad_norm": 3.806659460067749, "learning_rate": 1.9967046513623862e-05, "loss": 2.0486, "step": 16975 }, { "epoch": 0.22, "grad_norm": 4.426748275756836, "learning_rate": 1.9967037989425676e-05, "loss": 2.0268, "step": 16976 }, { "epoch": 0.22, "grad_norm": 4.5277533531188965, "learning_rate": 1.996702946412696e-05, "loss": 2.7565, "step": 16977 }, { "epoch": 0.22, "grad_norm": 3.821429491043091, "learning_rate": 1.9967020937727714e-05, "loss": 2.0008, "step": 16978 }, { "epoch": 0.22, "grad_norm": 4.723226070404053, "learning_rate": 1.9967012410227943e-05, "loss": 2.279, "step": 16979 }, { "epoch": 0.22, "grad_norm": 4.180716514587402, "learning_rate": 1.996700388162764e-05, "loss": 2.6421, "step": 16980 }, { "epoch": 0.22, "grad_norm": 3.815769910812378, "learning_rate": 1.9966995351926814e-05, "loss": 1.6097, "step": 16981 }, { "epoch": 0.22, "grad_norm": 5.117020130157471, "learning_rate": 1.996698682112546e-05, "loss": 2.4704, "step": 16982 }, { "epoch": 0.22, "grad_norm": 4.214147567749023, "learning_rate": 1.9966978289223588e-05, "loss": 2.0966, "step": 16983 }, { "epoch": 0.22, "grad_norm": 4.344893932342529, "learning_rate": 1.9966969756221185e-05, "loss": 2.2537, "step": 16984 }, { "epoch": 0.22, "grad_norm": 4.3681464195251465, "learning_rate": 1.9966961222118263e-05, "loss": 2.0979, "step": 16985 }, { "epoch": 0.22, "grad_norm": 4.31758451461792, "learning_rate": 1.996695268691482e-05, "loss": 2.0923, "step": 16986 }, { "epoch": 0.22, "grad_norm": 4.327649116516113, "learning_rate": 1.996694415061085e-05, "loss": 2.2958, "step": 16987 }, { "epoch": 0.22, "grad_norm": 4.284855842590332, "learning_rate": 1.9966935613206367e-05, "loss": 2.467, "step": 16988 }, { "epoch": 0.22, "grad_norm": 4.468924522399902, "learning_rate": 1.9966927074701364e-05, "loss": 2.2502, "step": 16989 }, { "epoch": 0.22, "grad_norm": 5.070211887359619, "learning_rate": 1.9966918535095842e-05, "loss": 3.1175, "step": 16990 }, { "epoch": 0.22, "grad_norm": 4.325638294219971, "learning_rate": 1.9966909994389804e-05, "loss": 2.8099, "step": 16991 }, { "epoch": 0.22, "grad_norm": 3.864992618560791, "learning_rate": 1.996690145258325e-05, "loss": 1.6424, "step": 16992 }, { "epoch": 0.22, "grad_norm": 4.530059337615967, "learning_rate": 1.9966892909676182e-05, "loss": 2.6289, "step": 16993 }, { "epoch": 0.22, "grad_norm": 4.452538013458252, "learning_rate": 1.9966884365668595e-05, "loss": 2.0411, "step": 16994 }, { "epoch": 0.22, "grad_norm": 4.134395122528076, "learning_rate": 1.99668758205605e-05, "loss": 2.1515, "step": 16995 }, { "epoch": 0.22, "grad_norm": 4.386471748352051, "learning_rate": 1.996686727435189e-05, "loss": 2.0147, "step": 16996 }, { "epoch": 0.22, "grad_norm": 4.621356010437012, "learning_rate": 1.996685872704277e-05, "loss": 2.6907, "step": 16997 }, { "epoch": 0.22, "grad_norm": 4.479735851287842, "learning_rate": 1.996685017863314e-05, "loss": 2.1926, "step": 16998 }, { "epoch": 0.22, "grad_norm": 3.9057250022888184, "learning_rate": 1.9966841629123e-05, "loss": 2.1747, "step": 16999 }, { "epoch": 0.22, "grad_norm": 4.839929103851318, "learning_rate": 1.996683307851235e-05, "loss": 2.3467, "step": 17000 }, { "epoch": 0.22, "grad_norm": 3.9097485542297363, "learning_rate": 1.9966824526801197e-05, "loss": 1.9376, "step": 17001 }, { "epoch": 0.22, "grad_norm": 4.05303955078125, "learning_rate": 1.9966815973989536e-05, "loss": 2.1414, "step": 17002 }, { "epoch": 0.22, "grad_norm": 4.760036468505859, "learning_rate": 1.9966807420077367e-05, "loss": 2.4144, "step": 17003 }, { "epoch": 0.22, "grad_norm": 4.425610065460205, "learning_rate": 1.9966798865064698e-05, "loss": 2.4829, "step": 17004 }, { "epoch": 0.22, "grad_norm": 4.070572376251221, "learning_rate": 1.996679030895152e-05, "loss": 1.8131, "step": 17005 }, { "epoch": 0.22, "grad_norm": 3.9229936599731445, "learning_rate": 1.996678175173784e-05, "loss": 2.0117, "step": 17006 }, { "epoch": 0.22, "grad_norm": 3.942122459411621, "learning_rate": 1.996677319342366e-05, "loss": 2.096, "step": 17007 }, { "epoch": 0.22, "grad_norm": 4.743741512298584, "learning_rate": 1.996676463400898e-05, "loss": 2.7393, "step": 17008 }, { "epoch": 0.22, "grad_norm": 4.5182719230651855, "learning_rate": 1.99667560734938e-05, "loss": 2.2666, "step": 17009 }, { "epoch": 0.22, "grad_norm": 4.597648620605469, "learning_rate": 1.996674751187812e-05, "loss": 2.1643, "step": 17010 }, { "epoch": 0.22, "grad_norm": 4.447445392608643, "learning_rate": 1.9966738949161943e-05, "loss": 2.0121, "step": 17011 }, { "epoch": 0.22, "grad_norm": 4.919744968414307, "learning_rate": 1.9966730385345264e-05, "loss": 2.2099, "step": 17012 }, { "epoch": 0.22, "grad_norm": 4.256072998046875, "learning_rate": 1.9966721820428094e-05, "loss": 2.2414, "step": 17013 }, { "epoch": 0.22, "grad_norm": 4.58834981918335, "learning_rate": 1.9966713254410427e-05, "loss": 2.0198, "step": 17014 }, { "epoch": 0.22, "grad_norm": 4.010199546813965, "learning_rate": 1.9966704687292265e-05, "loss": 2.1663, "step": 17015 }, { "epoch": 0.22, "grad_norm": 3.795815944671631, "learning_rate": 1.996669611907361e-05, "loss": 1.7842, "step": 17016 }, { "epoch": 0.22, "grad_norm": 3.7410850524902344, "learning_rate": 1.9966687549754463e-05, "loss": 1.8646, "step": 17017 }, { "epoch": 0.22, "grad_norm": 4.743411064147949, "learning_rate": 1.9966678979334828e-05, "loss": 2.2314, "step": 17018 }, { "epoch": 0.22, "grad_norm": 4.336313247680664, "learning_rate": 1.99666704078147e-05, "loss": 2.295, "step": 17019 }, { "epoch": 0.22, "grad_norm": 4.895068645477295, "learning_rate": 1.9966661835194083e-05, "loss": 2.7383, "step": 17020 }, { "epoch": 0.22, "grad_norm": 3.5608749389648438, "learning_rate": 1.9966653261472973e-05, "loss": 1.8076, "step": 17021 }, { "epoch": 0.22, "grad_norm": 4.085938930511475, "learning_rate": 1.996664468665138e-05, "loss": 1.8747, "step": 17022 }, { "epoch": 0.22, "grad_norm": 3.7901430130004883, "learning_rate": 1.99666361107293e-05, "loss": 1.7075, "step": 17023 }, { "epoch": 0.22, "grad_norm": 4.225141525268555, "learning_rate": 1.9966627533706732e-05, "loss": 2.3484, "step": 17024 }, { "epoch": 0.22, "grad_norm": 4.09721040725708, "learning_rate": 1.9966618955583684e-05, "loss": 2.1558, "step": 17025 }, { "epoch": 0.22, "grad_norm": 4.49536657333374, "learning_rate": 1.996661037636015e-05, "loss": 2.5754, "step": 17026 }, { "epoch": 0.22, "grad_norm": 4.170502185821533, "learning_rate": 1.9966601796036128e-05, "loss": 2.0239, "step": 17027 }, { "epoch": 0.22, "grad_norm": 4.207200050354004, "learning_rate": 1.996659321461163e-05, "loss": 1.9765, "step": 17028 }, { "epoch": 0.22, "grad_norm": 4.417466640472412, "learning_rate": 1.996658463208665e-05, "loss": 2.3646, "step": 17029 }, { "epoch": 0.22, "grad_norm": 4.09881067276001, "learning_rate": 1.996657604846119e-05, "loss": 1.9867, "step": 17030 }, { "epoch": 0.22, "grad_norm": 4.215193748474121, "learning_rate": 1.996656746373525e-05, "loss": 1.8737, "step": 17031 }, { "epoch": 0.22, "grad_norm": 3.9531075954437256, "learning_rate": 1.996655887790883e-05, "loss": 2.2904, "step": 17032 }, { "epoch": 0.22, "grad_norm": 3.594496965408325, "learning_rate": 1.9966550290981936e-05, "loss": 1.6308, "step": 17033 }, { "epoch": 0.22, "grad_norm": 4.332050323486328, "learning_rate": 1.9966541702954566e-05, "loss": 2.1175, "step": 17034 }, { "epoch": 0.22, "grad_norm": 4.760721206665039, "learning_rate": 1.996653311382672e-05, "loss": 2.3572, "step": 17035 }, { "epoch": 0.22, "grad_norm": 4.849227428436279, "learning_rate": 1.99665245235984e-05, "loss": 2.4349, "step": 17036 }, { "epoch": 0.22, "grad_norm": 4.358071327209473, "learning_rate": 1.9966515932269602e-05, "loss": 2.1393, "step": 17037 }, { "epoch": 0.22, "grad_norm": 4.425847053527832, "learning_rate": 1.9966507339840337e-05, "loss": 2.5544, "step": 17038 }, { "epoch": 0.22, "grad_norm": 4.540461540222168, "learning_rate": 1.99664987463106e-05, "loss": 2.0911, "step": 17039 }, { "epoch": 0.22, "grad_norm": 4.473416328430176, "learning_rate": 1.996649015168039e-05, "loss": 2.035, "step": 17040 }, { "epoch": 0.22, "grad_norm": 4.404378890991211, "learning_rate": 1.9966481555949713e-05, "loss": 2.1549, "step": 17041 }, { "epoch": 0.22, "grad_norm": 4.591191291809082, "learning_rate": 1.9966472959118563e-05, "loss": 2.0022, "step": 17042 }, { "epoch": 0.22, "grad_norm": 3.9348390102386475, "learning_rate": 1.996646436118695e-05, "loss": 1.9767, "step": 17043 }, { "epoch": 0.22, "grad_norm": 4.57891321182251, "learning_rate": 1.996645576215487e-05, "loss": 2.4144, "step": 17044 }, { "epoch": 0.22, "grad_norm": 4.400040626525879, "learning_rate": 1.9966447162022323e-05, "loss": 2.5109, "step": 17045 }, { "epoch": 0.22, "grad_norm": 4.38118839263916, "learning_rate": 1.9966438560789306e-05, "loss": 2.1612, "step": 17046 }, { "epoch": 0.22, "grad_norm": 3.7964704036712646, "learning_rate": 1.9966429958455834e-05, "loss": 1.8016, "step": 17047 }, { "epoch": 0.22, "grad_norm": 3.965118885040283, "learning_rate": 1.9966421355021893e-05, "loss": 2.0939, "step": 17048 }, { "epoch": 0.22, "grad_norm": 4.651243209838867, "learning_rate": 1.996641275048749e-05, "loss": 2.5896, "step": 17049 }, { "epoch": 0.22, "grad_norm": 4.135872840881348, "learning_rate": 1.996640414485263e-05, "loss": 2.2817, "step": 17050 }, { "epoch": 0.22, "grad_norm": 4.072013854980469, "learning_rate": 1.996639553811731e-05, "loss": 2.3635, "step": 17051 }, { "epoch": 0.22, "grad_norm": 3.962263584136963, "learning_rate": 1.9966386930281528e-05, "loss": 2.0398, "step": 17052 }, { "epoch": 0.22, "grad_norm": 4.2043585777282715, "learning_rate": 1.996637832134529e-05, "loss": 2.1422, "step": 17053 }, { "epoch": 0.22, "grad_norm": 4.799726963043213, "learning_rate": 1.996636971130859e-05, "loss": 2.6786, "step": 17054 }, { "epoch": 0.22, "grad_norm": 4.371220111846924, "learning_rate": 1.9966361100171437e-05, "loss": 2.0949, "step": 17055 }, { "epoch": 0.22, "grad_norm": 3.922985076904297, "learning_rate": 1.996635248793383e-05, "loss": 1.7399, "step": 17056 }, { "epoch": 0.22, "grad_norm": 4.129830837249756, "learning_rate": 1.9966343874595765e-05, "loss": 2.3259, "step": 17057 }, { "epoch": 0.22, "grad_norm": 4.330163478851318, "learning_rate": 1.996633526015725e-05, "loss": 2.312, "step": 17058 }, { "epoch": 0.22, "grad_norm": 3.493870735168457, "learning_rate": 1.996632664461828e-05, "loss": 1.9177, "step": 17059 }, { "epoch": 0.22, "grad_norm": 3.80293607711792, "learning_rate": 1.9966318027978862e-05, "loss": 1.5466, "step": 17060 }, { "epoch": 0.22, "grad_norm": 3.8888633251190186, "learning_rate": 1.9966309410238992e-05, "loss": 1.7506, "step": 17061 }, { "epoch": 0.22, "grad_norm": 4.202181339263916, "learning_rate": 1.996630079139867e-05, "loss": 2.2183, "step": 17062 }, { "epoch": 0.22, "grad_norm": 3.7893898487091064, "learning_rate": 1.99662921714579e-05, "loss": 1.798, "step": 17063 }, { "epoch": 0.22, "grad_norm": 4.40960693359375, "learning_rate": 1.9966283550416686e-05, "loss": 2.377, "step": 17064 }, { "epoch": 0.22, "grad_norm": 4.3856425285339355, "learning_rate": 1.996627492827502e-05, "loss": 2.208, "step": 17065 }, { "epoch": 0.22, "grad_norm": 4.303086280822754, "learning_rate": 1.996626630503291e-05, "loss": 2.2725, "step": 17066 }, { "epoch": 0.22, "grad_norm": 3.8731744289398193, "learning_rate": 1.9966257680690355e-05, "loss": 2.1851, "step": 17067 }, { "epoch": 0.22, "grad_norm": 4.624186992645264, "learning_rate": 1.9966249055247357e-05, "loss": 1.9187, "step": 17068 }, { "epoch": 0.22, "grad_norm": 4.759547233581543, "learning_rate": 1.9966240428703915e-05, "loss": 2.7915, "step": 17069 }, { "epoch": 0.22, "grad_norm": 4.072115421295166, "learning_rate": 1.996623180106003e-05, "loss": 2.3521, "step": 17070 }, { "epoch": 0.22, "grad_norm": 4.498871803283691, "learning_rate": 1.996622317231571e-05, "loss": 2.1958, "step": 17071 }, { "epoch": 0.22, "grad_norm": 4.237387180328369, "learning_rate": 1.9966214542470945e-05, "loss": 1.79, "step": 17072 }, { "epoch": 0.22, "grad_norm": 4.8159379959106445, "learning_rate": 1.996620591152574e-05, "loss": 2.227, "step": 17073 }, { "epoch": 0.22, "grad_norm": 4.7339301109313965, "learning_rate": 1.99661972794801e-05, "loss": 2.6076, "step": 17074 }, { "epoch": 0.22, "grad_norm": 4.4571990966796875, "learning_rate": 1.996618864633402e-05, "loss": 2.2078, "step": 17075 }, { "epoch": 0.22, "grad_norm": 3.7541422843933105, "learning_rate": 1.9966180012087503e-05, "loss": 2.2577, "step": 17076 }, { "epoch": 0.22, "grad_norm": 4.213251113891602, "learning_rate": 1.9966171376740552e-05, "loss": 1.9356, "step": 17077 }, { "epoch": 0.22, "grad_norm": 5.112081050872803, "learning_rate": 1.9966162740293167e-05, "loss": 2.1381, "step": 17078 }, { "epoch": 0.22, "grad_norm": 3.971406936645508, "learning_rate": 1.9966154102745347e-05, "loss": 2.0389, "step": 17079 }, { "epoch": 0.22, "grad_norm": 3.362060785293579, "learning_rate": 1.99661454640971e-05, "loss": 1.4224, "step": 17080 }, { "epoch": 0.22, "grad_norm": 3.777188301086426, "learning_rate": 1.9966136824348416e-05, "loss": 1.8569, "step": 17081 }, { "epoch": 0.22, "grad_norm": 3.71928071975708, "learning_rate": 1.9966128183499302e-05, "loss": 1.846, "step": 17082 }, { "epoch": 0.22, "grad_norm": 4.72711706161499, "learning_rate": 1.9966119541549756e-05, "loss": 2.429, "step": 17083 }, { "epoch": 0.22, "grad_norm": 3.8275198936462402, "learning_rate": 1.9966110898499786e-05, "loss": 1.8138, "step": 17084 }, { "epoch": 0.22, "grad_norm": 4.641992092132568, "learning_rate": 1.996610225434939e-05, "loss": 2.4749, "step": 17085 }, { "epoch": 0.22, "grad_norm": 3.9608540534973145, "learning_rate": 1.9966093609098563e-05, "loss": 1.882, "step": 17086 }, { "epoch": 0.22, "grad_norm": 4.270479679107666, "learning_rate": 1.996608496274731e-05, "loss": 2.5688, "step": 17087 }, { "epoch": 0.22, "grad_norm": 3.906425714492798, "learning_rate": 1.9966076315295634e-05, "loss": 1.4796, "step": 17088 }, { "epoch": 0.22, "grad_norm": 4.207115173339844, "learning_rate": 1.996606766674353e-05, "loss": 2.2167, "step": 17089 }, { "epoch": 0.22, "grad_norm": 4.077493190765381, "learning_rate": 1.9966059017091007e-05, "loss": 2.0737, "step": 17090 }, { "epoch": 0.22, "grad_norm": 3.9889938831329346, "learning_rate": 1.9966050366338062e-05, "loss": 2.3364, "step": 17091 }, { "epoch": 0.22, "grad_norm": 4.46645975112915, "learning_rate": 1.9966041714484695e-05, "loss": 2.2183, "step": 17092 }, { "epoch": 0.22, "grad_norm": 3.7402923107147217, "learning_rate": 1.9966033061530908e-05, "loss": 2.2654, "step": 17093 }, { "epoch": 0.22, "grad_norm": 3.9880411624908447, "learning_rate": 1.9966024407476703e-05, "loss": 1.8679, "step": 17094 }, { "epoch": 0.22, "grad_norm": 4.136749267578125, "learning_rate": 1.996601575232208e-05, "loss": 1.8668, "step": 17095 }, { "epoch": 0.22, "grad_norm": 4.446537971496582, "learning_rate": 1.996600709606704e-05, "loss": 2.5055, "step": 17096 }, { "epoch": 0.22, "grad_norm": 3.889648199081421, "learning_rate": 1.996599843871158e-05, "loss": 1.8267, "step": 17097 }, { "epoch": 0.22, "grad_norm": 4.028013229370117, "learning_rate": 1.9965989780255707e-05, "loss": 2.5626, "step": 17098 }, { "epoch": 0.22, "grad_norm": 4.330018520355225, "learning_rate": 1.996598112069942e-05, "loss": 2.2136, "step": 17099 }, { "epoch": 0.22, "grad_norm": 4.388360023498535, "learning_rate": 1.996597246004272e-05, "loss": 2.3395, "step": 17100 }, { "epoch": 0.22, "grad_norm": 4.654629707336426, "learning_rate": 1.9965963798285605e-05, "loss": 2.5375, "step": 17101 }, { "epoch": 0.22, "grad_norm": 3.6484737396240234, "learning_rate": 1.996595513542808e-05, "loss": 1.8939, "step": 17102 }, { "epoch": 0.22, "grad_norm": 3.920151948928833, "learning_rate": 1.9965946471470147e-05, "loss": 1.7605, "step": 17103 }, { "epoch": 0.22, "grad_norm": 4.551156520843506, "learning_rate": 1.9965937806411804e-05, "loss": 1.9442, "step": 17104 }, { "epoch": 0.22, "grad_norm": 4.0020670890808105, "learning_rate": 1.996592914025305e-05, "loss": 1.8308, "step": 17105 }, { "epoch": 0.22, "grad_norm": 4.334708213806152, "learning_rate": 1.996592047299389e-05, "loss": 1.942, "step": 17106 }, { "epoch": 0.22, "grad_norm": 4.542428016662598, "learning_rate": 1.996591180463432e-05, "loss": 2.3738, "step": 17107 }, { "epoch": 0.22, "grad_norm": 4.1461358070373535, "learning_rate": 1.9965903135174345e-05, "loss": 2.0106, "step": 17108 }, { "epoch": 0.22, "grad_norm": 4.33561897277832, "learning_rate": 1.9965894464613972e-05, "loss": 2.5954, "step": 17109 }, { "epoch": 0.22, "grad_norm": 4.295038223266602, "learning_rate": 1.996588579295319e-05, "loss": 2.2936, "step": 17110 }, { "epoch": 0.22, "grad_norm": 4.424861431121826, "learning_rate": 1.9965877120192005e-05, "loss": 2.5485, "step": 17111 }, { "epoch": 0.22, "grad_norm": 3.873298406600952, "learning_rate": 1.9965868446330416e-05, "loss": 1.9057, "step": 17112 }, { "epoch": 0.22, "grad_norm": 4.728815078735352, "learning_rate": 1.996585977136843e-05, "loss": 2.4005, "step": 17113 }, { "epoch": 0.22, "grad_norm": 4.980397701263428, "learning_rate": 1.9965851095306044e-05, "loss": 2.1589, "step": 17114 }, { "epoch": 0.22, "grad_norm": 4.3833909034729, "learning_rate": 1.9965842418143255e-05, "loss": 2.1819, "step": 17115 }, { "epoch": 0.22, "grad_norm": 4.878068447113037, "learning_rate": 1.9965833739880072e-05, "loss": 2.3757, "step": 17116 }, { "epoch": 0.22, "grad_norm": 4.351688385009766, "learning_rate": 1.996582506051649e-05, "loss": 2.2872, "step": 17117 }, { "epoch": 0.22, "grad_norm": 3.752328872680664, "learning_rate": 1.996581638005251e-05, "loss": 1.9585, "step": 17118 }, { "epoch": 0.22, "grad_norm": 4.237076282501221, "learning_rate": 1.996580769848814e-05, "loss": 1.7967, "step": 17119 }, { "epoch": 0.22, "grad_norm": 4.790412902832031, "learning_rate": 1.996579901582337e-05, "loss": 2.2013, "step": 17120 }, { "epoch": 0.22, "grad_norm": 4.170185089111328, "learning_rate": 1.996579033205821e-05, "loss": 1.849, "step": 17121 }, { "epoch": 0.22, "grad_norm": 4.675230979919434, "learning_rate": 1.9965781647192656e-05, "loss": 2.2014, "step": 17122 }, { "epoch": 0.22, "grad_norm": 4.236873149871826, "learning_rate": 1.9965772961226712e-05, "loss": 2.4152, "step": 17123 }, { "epoch": 0.22, "grad_norm": 3.9205071926116943, "learning_rate": 1.9965764274160376e-05, "loss": 1.9147, "step": 17124 }, { "epoch": 0.22, "grad_norm": 4.008286952972412, "learning_rate": 1.9965755585993654e-05, "loss": 2.0959, "step": 17125 }, { "epoch": 0.22, "grad_norm": 3.919654607772827, "learning_rate": 1.996574689672654e-05, "loss": 1.9322, "step": 17126 }, { "epoch": 0.22, "grad_norm": 4.942570209503174, "learning_rate": 1.996573820635904e-05, "loss": 2.4042, "step": 17127 }, { "epoch": 0.22, "grad_norm": 4.94204568862915, "learning_rate": 1.9965729514891153e-05, "loss": 2.5407, "step": 17128 }, { "epoch": 0.22, "grad_norm": 3.9959311485290527, "learning_rate": 1.9965720822322884e-05, "loss": 1.9824, "step": 17129 }, { "epoch": 0.22, "grad_norm": 5.429800033569336, "learning_rate": 1.9965712128654225e-05, "loss": 1.909, "step": 17130 }, { "epoch": 0.22, "grad_norm": 5.149401664733887, "learning_rate": 1.9965703433885183e-05, "loss": 2.5445, "step": 17131 }, { "epoch": 0.22, "grad_norm": 4.407553195953369, "learning_rate": 1.996569473801576e-05, "loss": 2.2335, "step": 17132 }, { "epoch": 0.22, "grad_norm": 4.582963943481445, "learning_rate": 1.9965686041045956e-05, "loss": 2.3584, "step": 17133 }, { "epoch": 0.22, "grad_norm": 3.8090949058532715, "learning_rate": 1.9965677342975768e-05, "loss": 2.0728, "step": 17134 }, { "epoch": 0.22, "grad_norm": 4.197194576263428, "learning_rate": 1.9965668643805203e-05, "loss": 2.1111, "step": 17135 }, { "epoch": 0.22, "grad_norm": 4.242641448974609, "learning_rate": 1.9965659943534258e-05, "loss": 2.4045, "step": 17136 }, { "epoch": 0.22, "grad_norm": 4.178502082824707, "learning_rate": 1.9965651242162935e-05, "loss": 2.2446, "step": 17137 }, { "epoch": 0.22, "grad_norm": 4.3389105796813965, "learning_rate": 1.9965642539691236e-05, "loss": 1.6899, "step": 17138 }, { "epoch": 0.22, "grad_norm": 3.972116470336914, "learning_rate": 1.996563383611916e-05, "loss": 2.0087, "step": 17139 }, { "epoch": 0.22, "grad_norm": 4.112865924835205, "learning_rate": 1.996562513144671e-05, "loss": 2.5402, "step": 17140 }, { "epoch": 0.22, "grad_norm": 4.56178092956543, "learning_rate": 1.9965616425673885e-05, "loss": 2.5173, "step": 17141 }, { "epoch": 0.22, "grad_norm": 4.046512603759766, "learning_rate": 1.9965607718800687e-05, "loss": 2.0996, "step": 17142 }, { "epoch": 0.22, "grad_norm": 4.540547847747803, "learning_rate": 1.9965599010827115e-05, "loss": 2.6321, "step": 17143 }, { "epoch": 0.22, "grad_norm": 4.133866786956787, "learning_rate": 1.9965590301753173e-05, "loss": 2.6256, "step": 17144 }, { "epoch": 0.22, "grad_norm": 4.569063663482666, "learning_rate": 1.9965581591578865e-05, "loss": 2.2903, "step": 17145 }, { "epoch": 0.22, "grad_norm": 4.533634185791016, "learning_rate": 1.9965572880304182e-05, "loss": 2.3753, "step": 17146 }, { "epoch": 0.22, "grad_norm": 4.0049519538879395, "learning_rate": 1.9965564167929133e-05, "loss": 1.7948, "step": 17147 }, { "epoch": 0.22, "grad_norm": 4.023061752319336, "learning_rate": 1.9965555454453716e-05, "loss": 2.1361, "step": 17148 }, { "epoch": 0.22, "grad_norm": 3.9454987049102783, "learning_rate": 1.9965546739877936e-05, "loss": 2.0382, "step": 17149 }, { "epoch": 0.22, "grad_norm": 4.462888717651367, "learning_rate": 1.9965538024201786e-05, "loss": 2.1892, "step": 17150 }, { "epoch": 0.22, "grad_norm": 3.8445873260498047, "learning_rate": 1.9965529307425272e-05, "loss": 2.1342, "step": 17151 }, { "epoch": 0.22, "grad_norm": 3.537729024887085, "learning_rate": 1.9965520589548398e-05, "loss": 1.5584, "step": 17152 }, { "epoch": 0.22, "grad_norm": 3.9256436824798584, "learning_rate": 1.996551187057116e-05, "loss": 2.3432, "step": 17153 }, { "epoch": 0.22, "grad_norm": 4.040251731872559, "learning_rate": 1.9965503150493555e-05, "loss": 1.9996, "step": 17154 }, { "epoch": 0.22, "grad_norm": 4.3793439865112305, "learning_rate": 1.9965494429315594e-05, "loss": 2.6353, "step": 17155 }, { "epoch": 0.22, "grad_norm": 4.264583587646484, "learning_rate": 1.9965485707037276e-05, "loss": 2.0815, "step": 17156 }, { "epoch": 0.22, "grad_norm": 4.458551406860352, "learning_rate": 1.9965476983658597e-05, "loss": 2.297, "step": 17157 }, { "epoch": 0.22, "grad_norm": 4.353806972503662, "learning_rate": 1.996546825917956e-05, "loss": 2.1766, "step": 17158 }, { "epoch": 0.22, "grad_norm": 3.4842448234558105, "learning_rate": 1.9965459533600167e-05, "loss": 1.6731, "step": 17159 }, { "epoch": 0.22, "grad_norm": 4.143701076507568, "learning_rate": 1.9965450806920415e-05, "loss": 2.3988, "step": 17160 }, { "epoch": 0.22, "grad_norm": 4.11649751663208, "learning_rate": 1.9965442079140313e-05, "loss": 2.1658, "step": 17161 }, { "epoch": 0.22, "grad_norm": 4.3158183097839355, "learning_rate": 1.9965433350259854e-05, "loss": 1.9335, "step": 17162 }, { "epoch": 0.22, "grad_norm": 3.607611656188965, "learning_rate": 1.9965424620279042e-05, "loss": 1.6481, "step": 17163 }, { "epoch": 0.22, "grad_norm": 4.401788711547852, "learning_rate": 1.996541588919788e-05, "loss": 2.1213, "step": 17164 }, { "epoch": 0.22, "grad_norm": 3.9474802017211914, "learning_rate": 1.9965407157016364e-05, "loss": 2.0693, "step": 17165 }, { "epoch": 0.22, "grad_norm": 3.991297960281372, "learning_rate": 1.99653984237345e-05, "loss": 2.1493, "step": 17166 }, { "epoch": 0.22, "grad_norm": 4.2836456298828125, "learning_rate": 1.996538968935229e-05, "loss": 2.3257, "step": 17167 }, { "epoch": 0.22, "grad_norm": 4.63701057434082, "learning_rate": 1.9965380953869723e-05, "loss": 2.1922, "step": 17168 }, { "epoch": 0.22, "grad_norm": 3.834726095199585, "learning_rate": 1.9965372217286818e-05, "loss": 1.6845, "step": 17169 }, { "epoch": 0.22, "grad_norm": 3.9791688919067383, "learning_rate": 1.9965363479603562e-05, "loss": 2.17, "step": 17170 }, { "epoch": 0.22, "grad_norm": 4.495367527008057, "learning_rate": 1.9965354740819963e-05, "loss": 2.161, "step": 17171 }, { "epoch": 0.22, "grad_norm": 3.4971470832824707, "learning_rate": 1.9965346000936018e-05, "loss": 1.9497, "step": 17172 }, { "epoch": 0.22, "grad_norm": 4.412813186645508, "learning_rate": 1.996533725995173e-05, "loss": 2.5017, "step": 17173 }, { "epoch": 0.22, "grad_norm": 4.04407262802124, "learning_rate": 1.99653285178671e-05, "loss": 2.1893, "step": 17174 }, { "epoch": 0.22, "grad_norm": 3.7260117530822754, "learning_rate": 1.996531977468213e-05, "loss": 2.0193, "step": 17175 }, { "epoch": 0.22, "grad_norm": 4.42978048324585, "learning_rate": 1.9965311030396818e-05, "loss": 2.1638, "step": 17176 }, { "epoch": 0.22, "grad_norm": 4.173986434936523, "learning_rate": 1.9965302285011166e-05, "loss": 2.0264, "step": 17177 }, { "epoch": 0.22, "grad_norm": 4.505021572113037, "learning_rate": 1.996529353852518e-05, "loss": 2.9297, "step": 17178 }, { "epoch": 0.22, "grad_norm": 3.954677104949951, "learning_rate": 1.9965284790938854e-05, "loss": 2.3097, "step": 17179 }, { "epoch": 0.22, "grad_norm": 3.9913790225982666, "learning_rate": 1.996527604225219e-05, "loss": 2.5215, "step": 17180 }, { "epoch": 0.22, "grad_norm": 4.732451915740967, "learning_rate": 1.9965267292465193e-05, "loss": 1.9, "step": 17181 }, { "epoch": 0.22, "grad_norm": 3.972106695175171, "learning_rate": 1.9965258541577856e-05, "loss": 2.2143, "step": 17182 }, { "epoch": 0.22, "grad_norm": 4.0671281814575195, "learning_rate": 1.9965249789590192e-05, "loss": 1.6431, "step": 17183 }, { "epoch": 0.22, "grad_norm": 4.4163360595703125, "learning_rate": 1.9965241036502192e-05, "loss": 2.1583, "step": 17184 }, { "epoch": 0.22, "grad_norm": 4.178427219390869, "learning_rate": 1.996523228231386e-05, "loss": 1.9679, "step": 17185 }, { "epoch": 0.22, "grad_norm": 3.7881157398223877, "learning_rate": 1.99652235270252e-05, "loss": 2.0701, "step": 17186 }, { "epoch": 0.22, "grad_norm": 5.316526889801025, "learning_rate": 1.9965214770636208e-05, "loss": 2.5541, "step": 17187 }, { "epoch": 0.22, "grad_norm": 4.430954933166504, "learning_rate": 1.9965206013146886e-05, "loss": 2.0885, "step": 17188 }, { "epoch": 0.22, "grad_norm": 4.5820231437683105, "learning_rate": 1.9965197254557238e-05, "loss": 2.4861, "step": 17189 }, { "epoch": 0.22, "grad_norm": 4.000499725341797, "learning_rate": 1.9965188494867263e-05, "loss": 2.2674, "step": 17190 }, { "epoch": 0.22, "grad_norm": 4.575469017028809, "learning_rate": 1.9965179734076963e-05, "loss": 2.698, "step": 17191 }, { "epoch": 0.22, "grad_norm": 4.022863388061523, "learning_rate": 1.9965170972186335e-05, "loss": 1.9714, "step": 17192 }, { "epoch": 0.22, "grad_norm": 3.946122646331787, "learning_rate": 1.9965162209195385e-05, "loss": 2.0229, "step": 17193 }, { "epoch": 0.22, "grad_norm": 4.079875946044922, "learning_rate": 1.9965153445104116e-05, "loss": 1.9734, "step": 17194 }, { "epoch": 0.22, "grad_norm": 4.244551181793213, "learning_rate": 1.996514467991252e-05, "loss": 2.0489, "step": 17195 }, { "epoch": 0.22, "grad_norm": 3.792269468307495, "learning_rate": 1.9965135913620605e-05, "loss": 2.1012, "step": 17196 }, { "epoch": 0.22, "grad_norm": 4.150293827056885, "learning_rate": 1.9965127146228367e-05, "loss": 2.2047, "step": 17197 }, { "epoch": 0.22, "grad_norm": 3.955662965774536, "learning_rate": 1.9965118377735812e-05, "loss": 1.9687, "step": 17198 }, { "epoch": 0.22, "grad_norm": 3.9285264015197754, "learning_rate": 1.9965109608142938e-05, "loss": 2.266, "step": 17199 }, { "epoch": 0.22, "grad_norm": 4.370039463043213, "learning_rate": 1.9965100837449748e-05, "loss": 2.2813, "step": 17200 }, { "epoch": 0.22, "grad_norm": 5.115530490875244, "learning_rate": 1.9965092065656246e-05, "loss": 2.1923, "step": 17201 }, { "epoch": 0.22, "grad_norm": 4.744689464569092, "learning_rate": 1.996508329276242e-05, "loss": 2.2785, "step": 17202 }, { "epoch": 0.22, "grad_norm": 3.8094608783721924, "learning_rate": 1.9965074518768285e-05, "loss": 2.0512, "step": 17203 }, { "epoch": 0.22, "grad_norm": 4.685549736022949, "learning_rate": 1.9965065743673837e-05, "loss": 2.3971, "step": 17204 }, { "epoch": 0.22, "grad_norm": 5.167141437530518, "learning_rate": 1.9965056967479077e-05, "loss": 2.6569, "step": 17205 }, { "epoch": 0.22, "grad_norm": 4.140505313873291, "learning_rate": 1.9965048190184003e-05, "loss": 1.9467, "step": 17206 }, { "epoch": 0.22, "grad_norm": 4.67145299911499, "learning_rate": 1.9965039411788618e-05, "loss": 1.9758, "step": 17207 }, { "epoch": 0.22, "grad_norm": 5.070371150970459, "learning_rate": 1.9965030632292926e-05, "loss": 2.578, "step": 17208 }, { "epoch": 0.22, "grad_norm": 4.624985694885254, "learning_rate": 1.9965021851696928e-05, "loss": 1.8882, "step": 17209 }, { "epoch": 0.22, "grad_norm": 4.479825019836426, "learning_rate": 1.9965013070000617e-05, "loss": 2.4133, "step": 17210 }, { "epoch": 0.22, "grad_norm": 4.515099048614502, "learning_rate": 1.9965004287204e-05, "loss": 2.9128, "step": 17211 }, { "epoch": 0.22, "grad_norm": 4.859493255615234, "learning_rate": 1.996499550330708e-05, "loss": 2.5436, "step": 17212 }, { "epoch": 0.22, "grad_norm": 4.075995445251465, "learning_rate": 1.9964986718309856e-05, "loss": 1.9401, "step": 17213 }, { "epoch": 0.22, "grad_norm": 3.2258522510528564, "learning_rate": 1.9964977932212325e-05, "loss": 1.389, "step": 17214 }, { "epoch": 0.22, "grad_norm": 3.7424073219299316, "learning_rate": 1.9964969145014495e-05, "loss": 1.9394, "step": 17215 }, { "epoch": 0.22, "grad_norm": 4.225037574768066, "learning_rate": 1.9964960356716362e-05, "loss": 1.9175, "step": 17216 }, { "epoch": 0.22, "grad_norm": 4.361298084259033, "learning_rate": 1.996495156731793e-05, "loss": 2.2126, "step": 17217 }, { "epoch": 0.22, "grad_norm": 3.8300318717956543, "learning_rate": 1.9964942776819195e-05, "loss": 1.8918, "step": 17218 }, { "epoch": 0.22, "grad_norm": 3.959036111831665, "learning_rate": 1.996493398522016e-05, "loss": 1.9221, "step": 17219 }, { "epoch": 0.22, "grad_norm": 4.1459550857543945, "learning_rate": 1.9964925192520832e-05, "loss": 2.4332, "step": 17220 }, { "epoch": 0.22, "grad_norm": 4.634983539581299, "learning_rate": 1.9964916398721207e-05, "loss": 2.3225, "step": 17221 }, { "epoch": 0.22, "grad_norm": 3.9286861419677734, "learning_rate": 1.9964907603821285e-05, "loss": 1.9645, "step": 17222 }, { "epoch": 0.22, "grad_norm": 4.382444381713867, "learning_rate": 1.996489880782107e-05, "loss": 1.9844, "step": 17223 }, { "epoch": 0.22, "grad_norm": 4.084870338439941, "learning_rate": 1.9964890010720556e-05, "loss": 2.173, "step": 17224 }, { "epoch": 0.22, "grad_norm": 4.391706943511963, "learning_rate": 1.996488121251975e-05, "loss": 2.0305, "step": 17225 }, { "epoch": 0.22, "grad_norm": 4.25776481628418, "learning_rate": 1.9964872413218653e-05, "loss": 2.1559, "step": 17226 }, { "epoch": 0.22, "grad_norm": 4.165195941925049, "learning_rate": 1.996486361281727e-05, "loss": 1.9914, "step": 17227 }, { "epoch": 0.22, "grad_norm": 4.129258155822754, "learning_rate": 1.996485481131559e-05, "loss": 2.1166, "step": 17228 }, { "epoch": 0.22, "grad_norm": 4.354475498199463, "learning_rate": 1.9964846008713623e-05, "loss": 2.0116, "step": 17229 }, { "epoch": 0.22, "grad_norm": 4.2710676193237305, "learning_rate": 1.9964837205011372e-05, "loss": 2.1299, "step": 17230 }, { "epoch": 0.22, "grad_norm": 4.829367637634277, "learning_rate": 1.996482840020883e-05, "loss": 2.4005, "step": 17231 }, { "epoch": 0.22, "grad_norm": 3.950096368789673, "learning_rate": 1.9964819594306e-05, "loss": 1.7555, "step": 17232 }, { "epoch": 0.22, "grad_norm": 4.813612461090088, "learning_rate": 1.9964810787302887e-05, "loss": 2.4643, "step": 17233 }, { "epoch": 0.22, "grad_norm": 3.7926902770996094, "learning_rate": 1.9964801979199494e-05, "loss": 1.9324, "step": 17234 }, { "epoch": 0.22, "grad_norm": 4.188370704650879, "learning_rate": 1.9964793169995813e-05, "loss": 2.2504, "step": 17235 }, { "epoch": 0.22, "grad_norm": 4.255410671234131, "learning_rate": 1.9964784359691852e-05, "loss": 2.3943, "step": 17236 }, { "epoch": 0.22, "grad_norm": 4.209874153137207, "learning_rate": 1.9964775548287605e-05, "loss": 1.8273, "step": 17237 }, { "epoch": 0.22, "grad_norm": 4.3274946212768555, "learning_rate": 1.9964766735783084e-05, "loss": 2.406, "step": 17238 }, { "epoch": 0.22, "grad_norm": 3.509821891784668, "learning_rate": 1.9964757922178283e-05, "loss": 1.8649, "step": 17239 }, { "epoch": 0.22, "grad_norm": 4.307028770446777, "learning_rate": 1.99647491074732e-05, "loss": 2.4178, "step": 17240 }, { "epoch": 0.22, "grad_norm": 4.552897930145264, "learning_rate": 1.996474029166784e-05, "loss": 2.6723, "step": 17241 }, { "epoch": 0.22, "grad_norm": 4.6751933097839355, "learning_rate": 1.9964731474762208e-05, "loss": 1.9425, "step": 17242 }, { "epoch": 0.22, "grad_norm": 4.073802471160889, "learning_rate": 1.9964722656756295e-05, "loss": 1.7049, "step": 17243 }, { "epoch": 0.22, "grad_norm": 4.221043586730957, "learning_rate": 1.996471383765011e-05, "loss": 2.3577, "step": 17244 }, { "epoch": 0.22, "grad_norm": 4.08399772644043, "learning_rate": 1.9964705017443653e-05, "loss": 2.0923, "step": 17245 }, { "epoch": 0.22, "grad_norm": 4.107312202453613, "learning_rate": 1.996469619613692e-05, "loss": 2.0313, "step": 17246 }, { "epoch": 0.22, "grad_norm": 4.493102550506592, "learning_rate": 1.996468737372992e-05, "loss": 2.5693, "step": 17247 }, { "epoch": 0.22, "grad_norm": 4.01517391204834, "learning_rate": 1.9964678550222647e-05, "loss": 1.8555, "step": 17248 }, { "epoch": 0.22, "grad_norm": 4.825817584991455, "learning_rate": 1.9964669725615105e-05, "loss": 2.6236, "step": 17249 }, { "epoch": 0.22, "grad_norm": 4.146678447723389, "learning_rate": 1.9964660899907294e-05, "loss": 2.1152, "step": 17250 }, { "epoch": 0.22, "grad_norm": 3.9387104511260986, "learning_rate": 1.9964652073099215e-05, "loss": 2.0708, "step": 17251 }, { "epoch": 0.22, "grad_norm": 4.21918249130249, "learning_rate": 1.9964643245190867e-05, "loss": 2.2445, "step": 17252 }, { "epoch": 0.22, "grad_norm": 3.8830339908599854, "learning_rate": 1.9964634416182258e-05, "loss": 1.6647, "step": 17253 }, { "epoch": 0.22, "grad_norm": 4.485076427459717, "learning_rate": 1.9964625586073382e-05, "loss": 2.6093, "step": 17254 }, { "epoch": 0.22, "grad_norm": 4.322872161865234, "learning_rate": 1.9964616754864242e-05, "loss": 2.2807, "step": 17255 }, { "epoch": 0.22, "grad_norm": 4.221618175506592, "learning_rate": 1.9964607922554843e-05, "loss": 2.3284, "step": 17256 }, { "epoch": 0.22, "grad_norm": 4.350305557250977, "learning_rate": 1.996459908914518e-05, "loss": 2.3834, "step": 17257 }, { "epoch": 0.22, "grad_norm": 4.630918502807617, "learning_rate": 1.9964590254635256e-05, "loss": 2.9332, "step": 17258 }, { "epoch": 0.22, "grad_norm": 5.0321245193481445, "learning_rate": 1.9964581419025068e-05, "loss": 2.3912, "step": 17259 }, { "epoch": 0.22, "grad_norm": 4.357033729553223, "learning_rate": 1.9964572582314625e-05, "loss": 2.3554, "step": 17260 }, { "epoch": 0.22, "grad_norm": 3.4823009967803955, "learning_rate": 1.9964563744503923e-05, "loss": 1.5677, "step": 17261 }, { "epoch": 0.22, "grad_norm": 4.136662006378174, "learning_rate": 1.9964554905592966e-05, "loss": 1.956, "step": 17262 }, { "epoch": 0.22, "grad_norm": 4.237222671508789, "learning_rate": 1.996454606558175e-05, "loss": 2.0968, "step": 17263 }, { "epoch": 0.22, "grad_norm": 4.478634834289551, "learning_rate": 1.9964537224470283e-05, "loss": 2.4961, "step": 17264 }, { "epoch": 0.22, "grad_norm": 3.773808002471924, "learning_rate": 1.996452838225856e-05, "loss": 1.9456, "step": 17265 }, { "epoch": 0.22, "grad_norm": 3.8355536460876465, "learning_rate": 1.9964519538946587e-05, "loss": 1.9345, "step": 17266 }, { "epoch": 0.22, "grad_norm": 4.526544570922852, "learning_rate": 1.9964510694534354e-05, "loss": 2.7173, "step": 17267 }, { "epoch": 0.22, "grad_norm": 4.0607099533081055, "learning_rate": 1.9964501849021877e-05, "loss": 2.2253, "step": 17268 }, { "epoch": 0.22, "grad_norm": 4.23645544052124, "learning_rate": 1.9964493002409147e-05, "loss": 1.8906, "step": 17269 }, { "epoch": 0.22, "grad_norm": 4.9808831214904785, "learning_rate": 1.996448415469617e-05, "loss": 2.6899, "step": 17270 }, { "epoch": 0.22, "grad_norm": 4.449711322784424, "learning_rate": 1.9964475305882943e-05, "loss": 2.4017, "step": 17271 }, { "epoch": 0.22, "grad_norm": 4.832873344421387, "learning_rate": 1.9964466455969472e-05, "loss": 2.8065, "step": 17272 }, { "epoch": 0.22, "grad_norm": 4.430552005767822, "learning_rate": 1.9964457604955753e-05, "loss": 2.4363, "step": 17273 }, { "epoch": 0.22, "grad_norm": 3.58497953414917, "learning_rate": 1.996444875284179e-05, "loss": 1.7951, "step": 17274 }, { "epoch": 0.22, "grad_norm": 4.307592391967773, "learning_rate": 1.996443989962758e-05, "loss": 2.1644, "step": 17275 }, { "epoch": 0.22, "grad_norm": 3.6925976276397705, "learning_rate": 1.9964431045313125e-05, "loss": 1.9531, "step": 17276 }, { "epoch": 0.22, "grad_norm": 4.354220390319824, "learning_rate": 1.9964422189898432e-05, "loss": 2.5114, "step": 17277 }, { "epoch": 0.22, "grad_norm": 4.350801944732666, "learning_rate": 1.9964413333383498e-05, "loss": 1.8739, "step": 17278 }, { "epoch": 0.22, "grad_norm": 4.402603626251221, "learning_rate": 1.996440447576832e-05, "loss": 2.1871, "step": 17279 }, { "epoch": 0.22, "grad_norm": 4.592244625091553, "learning_rate": 1.9964395617052905e-05, "loss": 1.84, "step": 17280 }, { "epoch": 0.22, "grad_norm": 4.166320323944092, "learning_rate": 1.9964386757237253e-05, "loss": 2.1648, "step": 17281 }, { "epoch": 0.22, "grad_norm": 4.440915584564209, "learning_rate": 1.996437789632136e-05, "loss": 2.0091, "step": 17282 }, { "epoch": 0.22, "grad_norm": 4.81564998626709, "learning_rate": 1.9964369034305234e-05, "loss": 2.739, "step": 17283 }, { "epoch": 0.22, "grad_norm": 3.933415651321411, "learning_rate": 1.996436017118887e-05, "loss": 1.9479, "step": 17284 }, { "epoch": 0.22, "grad_norm": 4.1138200759887695, "learning_rate": 1.9964351306972273e-05, "loss": 2.067, "step": 17285 }, { "epoch": 0.22, "grad_norm": 5.100243091583252, "learning_rate": 1.9964342441655447e-05, "loss": 2.4266, "step": 17286 }, { "epoch": 0.22, "grad_norm": 3.904660701751709, "learning_rate": 1.9964333575238382e-05, "loss": 1.7491, "step": 17287 }, { "epoch": 0.22, "grad_norm": 4.175206184387207, "learning_rate": 1.9964324707721086e-05, "loss": 2.3418, "step": 17288 }, { "epoch": 0.22, "grad_norm": 4.31862211227417, "learning_rate": 1.9964315839103562e-05, "loss": 2.4918, "step": 17289 }, { "epoch": 0.22, "grad_norm": 3.9866528511047363, "learning_rate": 1.996430696938581e-05, "loss": 2.0422, "step": 17290 }, { "epoch": 0.22, "grad_norm": 4.040596008300781, "learning_rate": 1.9964298098567827e-05, "loss": 1.9864, "step": 17291 }, { "epoch": 0.22, "grad_norm": 4.096959114074707, "learning_rate": 1.9964289226649616e-05, "loss": 2.1596, "step": 17292 }, { "epoch": 0.22, "grad_norm": 4.64438009262085, "learning_rate": 1.9964280353631177e-05, "loss": 2.3537, "step": 17293 }, { "epoch": 0.22, "grad_norm": 4.832378387451172, "learning_rate": 1.9964271479512516e-05, "loss": 2.3431, "step": 17294 }, { "epoch": 0.22, "grad_norm": 4.526149272918701, "learning_rate": 1.996426260429363e-05, "loss": 2.5832, "step": 17295 }, { "epoch": 0.22, "grad_norm": 4.207765102386475, "learning_rate": 1.9964253727974517e-05, "loss": 2.3221, "step": 17296 }, { "epoch": 0.22, "grad_norm": 4.123274803161621, "learning_rate": 1.9964244850555186e-05, "loss": 2.0778, "step": 17297 }, { "epoch": 0.22, "grad_norm": 4.131882190704346, "learning_rate": 1.996423597203563e-05, "loss": 2.315, "step": 17298 }, { "epoch": 0.22, "grad_norm": 4.589804649353027, "learning_rate": 1.9964227092415857e-05, "loss": 2.4728, "step": 17299 }, { "epoch": 0.22, "grad_norm": 3.588839530944824, "learning_rate": 1.9964218211695858e-05, "loss": 1.5174, "step": 17300 }, { "epoch": 0.22, "grad_norm": 4.830472469329834, "learning_rate": 1.9964209329875642e-05, "loss": 2.1972, "step": 17301 }, { "epoch": 0.22, "grad_norm": 4.176151752471924, "learning_rate": 1.9964200446955215e-05, "loss": 2.4302, "step": 17302 }, { "epoch": 0.22, "grad_norm": 3.9571592807769775, "learning_rate": 1.9964191562934566e-05, "loss": 2.1233, "step": 17303 }, { "epoch": 0.22, "grad_norm": 4.252103328704834, "learning_rate": 1.99641826778137e-05, "loss": 1.9668, "step": 17304 }, { "epoch": 0.22, "grad_norm": 3.568713426589966, "learning_rate": 1.9964173791592622e-05, "loss": 1.8126, "step": 17305 }, { "epoch": 0.22, "grad_norm": 4.553459167480469, "learning_rate": 1.9964164904271327e-05, "loss": 2.0017, "step": 17306 }, { "epoch": 0.22, "grad_norm": 3.8654465675354004, "learning_rate": 1.996415601584982e-05, "loss": 2.0358, "step": 17307 }, { "epoch": 0.22, "grad_norm": 4.567370891571045, "learning_rate": 1.9964147126328103e-05, "loss": 1.9645, "step": 17308 }, { "epoch": 0.22, "grad_norm": 4.54727840423584, "learning_rate": 1.996413823570617e-05, "loss": 2.1129, "step": 17309 }, { "epoch": 0.22, "grad_norm": 4.446267604827881, "learning_rate": 1.9964129343984034e-05, "loss": 2.8747, "step": 17310 }, { "epoch": 0.22, "grad_norm": 4.434800148010254, "learning_rate": 1.9964120451161686e-05, "loss": 1.8984, "step": 17311 }, { "epoch": 0.22, "grad_norm": 3.9200165271759033, "learning_rate": 1.996411155723913e-05, "loss": 2.0312, "step": 17312 }, { "epoch": 0.22, "grad_norm": 4.5058913230896, "learning_rate": 1.996410266221637e-05, "loss": 2.0298, "step": 17313 }, { "epoch": 0.22, "grad_norm": 4.146353244781494, "learning_rate": 1.99640937660934e-05, "loss": 2.3009, "step": 17314 }, { "epoch": 0.22, "grad_norm": 4.8479437828063965, "learning_rate": 1.9964084868870226e-05, "loss": 2.6153, "step": 17315 }, { "epoch": 0.22, "grad_norm": 4.962101936340332, "learning_rate": 1.996407597054685e-05, "loss": 2.4142, "step": 17316 }, { "epoch": 0.22, "grad_norm": 4.10679292678833, "learning_rate": 1.996406707112327e-05, "loss": 2.052, "step": 17317 }, { "epoch": 0.22, "grad_norm": 4.501832962036133, "learning_rate": 1.9964058170599486e-05, "loss": 2.2831, "step": 17318 }, { "epoch": 0.22, "grad_norm": 4.279021739959717, "learning_rate": 1.9964049268975503e-05, "loss": 2.7228, "step": 17319 }, { "epoch": 0.22, "grad_norm": 3.8735854625701904, "learning_rate": 1.9964040366251317e-05, "loss": 2.0517, "step": 17320 }, { "epoch": 0.22, "grad_norm": 4.8920183181762695, "learning_rate": 1.9964031462426937e-05, "loss": 2.3382, "step": 17321 }, { "epoch": 0.22, "grad_norm": 4.110260009765625, "learning_rate": 1.9964022557502355e-05, "loss": 2.3253, "step": 17322 }, { "epoch": 0.22, "grad_norm": 4.173102378845215, "learning_rate": 1.9964013651477576e-05, "loss": 2.1421, "step": 17323 }, { "epoch": 0.22, "grad_norm": 4.010387897491455, "learning_rate": 1.99640047443526e-05, "loss": 2.0365, "step": 17324 }, { "epoch": 0.22, "grad_norm": 4.038000106811523, "learning_rate": 1.9963995836127436e-05, "loss": 2.454, "step": 17325 }, { "epoch": 0.22, "grad_norm": 3.994088888168335, "learning_rate": 1.9963986926802067e-05, "loss": 2.0891, "step": 17326 }, { "epoch": 0.22, "grad_norm": 4.405471324920654, "learning_rate": 1.9963978016376512e-05, "loss": 1.936, "step": 17327 }, { "epoch": 0.22, "grad_norm": 4.073815822601318, "learning_rate": 1.9963969104850762e-05, "loss": 2.2685, "step": 17328 }, { "epoch": 0.22, "grad_norm": 4.463619709014893, "learning_rate": 1.996396019222482e-05, "loss": 1.8613, "step": 17329 }, { "epoch": 0.22, "grad_norm": 4.332534313201904, "learning_rate": 1.9963951278498693e-05, "loss": 2.0894, "step": 17330 }, { "epoch": 0.22, "grad_norm": 5.141669750213623, "learning_rate": 1.996394236367237e-05, "loss": 2.6987, "step": 17331 }, { "epoch": 0.22, "grad_norm": 3.489431381225586, "learning_rate": 1.9963933447745865e-05, "loss": 1.5255, "step": 17332 }, { "epoch": 0.22, "grad_norm": 4.697132110595703, "learning_rate": 1.996392453071917e-05, "loss": 1.9929, "step": 17333 }, { "epoch": 0.22, "grad_norm": 3.94207501411438, "learning_rate": 1.9963915612592288e-05, "loss": 2.1954, "step": 17334 }, { "epoch": 0.22, "grad_norm": 3.7020223140716553, "learning_rate": 1.996390669336522e-05, "loss": 1.9683, "step": 17335 }, { "epoch": 0.22, "grad_norm": 4.331671714782715, "learning_rate": 1.9963897773037967e-05, "loss": 2.2576, "step": 17336 }, { "epoch": 0.22, "grad_norm": 4.2008137702941895, "learning_rate": 1.9963888851610534e-05, "loss": 1.8721, "step": 17337 }, { "epoch": 0.23, "grad_norm": 4.292079448699951, "learning_rate": 1.9963879929082918e-05, "loss": 2.5015, "step": 17338 }, { "epoch": 0.23, "grad_norm": 4.088766574859619, "learning_rate": 1.996387100545512e-05, "loss": 1.9147, "step": 17339 }, { "epoch": 0.23, "grad_norm": 3.9130935668945312, "learning_rate": 1.9963862080727142e-05, "loss": 1.9486, "step": 17340 }, { "epoch": 0.23, "grad_norm": 3.8658902645111084, "learning_rate": 1.9963853154898984e-05, "loss": 1.8279, "step": 17341 }, { "epoch": 0.23, "grad_norm": 4.132661819458008, "learning_rate": 1.9963844227970644e-05, "loss": 1.8655, "step": 17342 }, { "epoch": 0.23, "grad_norm": 4.379392147064209, "learning_rate": 1.996383529994213e-05, "loss": 2.4416, "step": 17343 }, { "epoch": 0.23, "grad_norm": 4.580626964569092, "learning_rate": 1.9963826370813438e-05, "loss": 2.2199, "step": 17344 }, { "epoch": 0.23, "grad_norm": 3.939347267150879, "learning_rate": 1.9963817440584574e-05, "loss": 2.1091, "step": 17345 }, { "epoch": 0.23, "grad_norm": 4.738082408905029, "learning_rate": 1.9963808509255536e-05, "loss": 2.6248, "step": 17346 }, { "epoch": 0.23, "grad_norm": 3.584709882736206, "learning_rate": 1.996379957682632e-05, "loss": 1.8582, "step": 17347 }, { "epoch": 0.23, "grad_norm": 4.422426223754883, "learning_rate": 1.9963790643296935e-05, "loss": 2.5055, "step": 17348 }, { "epoch": 0.23, "grad_norm": 4.005945682525635, "learning_rate": 1.9963781708667376e-05, "loss": 1.7514, "step": 17349 }, { "epoch": 0.23, "grad_norm": 4.725750923156738, "learning_rate": 1.996377277293765e-05, "loss": 2.587, "step": 17350 }, { "epoch": 0.23, "grad_norm": 3.6102490425109863, "learning_rate": 1.996376383610775e-05, "loss": 1.9685, "step": 17351 }, { "epoch": 0.23, "grad_norm": 4.694684982299805, "learning_rate": 1.9963754898177682e-05, "loss": 2.2746, "step": 17352 }, { "epoch": 0.23, "grad_norm": 4.075532913208008, "learning_rate": 1.9963745959147448e-05, "loss": 2.2459, "step": 17353 }, { "epoch": 0.23, "grad_norm": 4.1618218421936035, "learning_rate": 1.9963737019017046e-05, "loss": 2.4237, "step": 17354 }, { "epoch": 0.23, "grad_norm": 4.186347484588623, "learning_rate": 1.996372807778648e-05, "loss": 2.1009, "step": 17355 }, { "epoch": 0.23, "grad_norm": 3.9959146976470947, "learning_rate": 1.996371913545575e-05, "loss": 1.9267, "step": 17356 }, { "epoch": 0.23, "grad_norm": 3.683405876159668, "learning_rate": 1.9963710192024853e-05, "loss": 1.9972, "step": 17357 }, { "epoch": 0.23, "grad_norm": 4.239079475402832, "learning_rate": 1.9963701247493797e-05, "loss": 2.4302, "step": 17358 }, { "epoch": 0.23, "grad_norm": 3.7797629833221436, "learning_rate": 1.9963692301862578e-05, "loss": 1.7436, "step": 17359 }, { "epoch": 0.23, "grad_norm": 3.6659107208251953, "learning_rate": 1.9963683355131198e-05, "loss": 1.7867, "step": 17360 }, { "epoch": 0.23, "grad_norm": 4.972646236419678, "learning_rate": 1.9963674407299657e-05, "loss": 2.779, "step": 17361 }, { "epoch": 0.23, "grad_norm": 3.909849166870117, "learning_rate": 1.996366545836796e-05, "loss": 1.9204, "step": 17362 }, { "epoch": 0.23, "grad_norm": 4.168725967407227, "learning_rate": 1.9963656508336106e-05, "loss": 2.3737, "step": 17363 }, { "epoch": 0.23, "grad_norm": 4.045619964599609, "learning_rate": 1.9963647557204092e-05, "loss": 2.0692, "step": 17364 }, { "epoch": 0.23, "grad_norm": 4.593830585479736, "learning_rate": 1.9963638604971924e-05, "loss": 2.4449, "step": 17365 }, { "epoch": 0.23, "grad_norm": 4.0804443359375, "learning_rate": 1.99636296516396e-05, "loss": 2.2096, "step": 17366 }, { "epoch": 0.23, "grad_norm": 4.441762447357178, "learning_rate": 1.9963620697207125e-05, "loss": 2.1277, "step": 17367 }, { "epoch": 0.23, "grad_norm": 4.639977931976318, "learning_rate": 1.9963611741674496e-05, "loss": 2.2017, "step": 17368 }, { "epoch": 0.23, "grad_norm": 3.930239677429199, "learning_rate": 1.9963602785041715e-05, "loss": 2.2053, "step": 17369 }, { "epoch": 0.23, "grad_norm": 4.088381767272949, "learning_rate": 1.9963593827308783e-05, "loss": 2.0484, "step": 17370 }, { "epoch": 0.23, "grad_norm": 4.8870649337768555, "learning_rate": 1.99635848684757e-05, "loss": 3.0233, "step": 17371 }, { "epoch": 0.23, "grad_norm": 3.9668209552764893, "learning_rate": 1.996357590854247e-05, "loss": 2.2809, "step": 17372 }, { "epoch": 0.23, "grad_norm": 3.9583616256713867, "learning_rate": 1.996356694750909e-05, "loss": 2.2002, "step": 17373 }, { "epoch": 0.23, "grad_norm": 4.938107490539551, "learning_rate": 1.9963557985375564e-05, "loss": 2.4842, "step": 17374 }, { "epoch": 0.23, "grad_norm": 4.0442938804626465, "learning_rate": 1.9963549022141895e-05, "loss": 2.017, "step": 17375 }, { "epoch": 0.23, "grad_norm": 4.174388408660889, "learning_rate": 1.9963540057808077e-05, "loss": 2.0891, "step": 17376 }, { "epoch": 0.23, "grad_norm": 4.013363838195801, "learning_rate": 1.9963531092374118e-05, "loss": 1.9546, "step": 17377 }, { "epoch": 0.23, "grad_norm": 4.064201354980469, "learning_rate": 1.9963522125840015e-05, "loss": 1.9713, "step": 17378 }, { "epoch": 0.23, "grad_norm": 4.76345682144165, "learning_rate": 1.9963513158205773e-05, "loss": 2.4284, "step": 17379 }, { "epoch": 0.23, "grad_norm": 4.595170021057129, "learning_rate": 1.9963504189471384e-05, "loss": 2.6847, "step": 17380 }, { "epoch": 0.23, "grad_norm": 4.371406078338623, "learning_rate": 1.9963495219636862e-05, "loss": 1.9972, "step": 17381 }, { "epoch": 0.23, "grad_norm": 4.762630462646484, "learning_rate": 1.9963486248702197e-05, "loss": 2.5443, "step": 17382 }, { "epoch": 0.23, "grad_norm": 4.052637577056885, "learning_rate": 1.9963477276667395e-05, "loss": 2.2216, "step": 17383 }, { "epoch": 0.23, "grad_norm": 4.737083911895752, "learning_rate": 1.9963468303532456e-05, "loss": 2.8435, "step": 17384 }, { "epoch": 0.23, "grad_norm": 3.997270345687866, "learning_rate": 1.996345932929738e-05, "loss": 1.8739, "step": 17385 }, { "epoch": 0.23, "grad_norm": 4.477347373962402, "learning_rate": 1.996345035396217e-05, "loss": 2.5129, "step": 17386 }, { "epoch": 0.23, "grad_norm": 4.557318687438965, "learning_rate": 1.9963441377526828e-05, "loss": 2.1301, "step": 17387 }, { "epoch": 0.23, "grad_norm": 4.569859027862549, "learning_rate": 1.996343239999135e-05, "loss": 2.3856, "step": 17388 }, { "epoch": 0.23, "grad_norm": 3.907646894454956, "learning_rate": 1.9963423421355742e-05, "loss": 2.3825, "step": 17389 }, { "epoch": 0.23, "grad_norm": 4.4738240242004395, "learning_rate": 1.996341444162e-05, "loss": 2.242, "step": 17390 }, { "epoch": 0.23, "grad_norm": 3.9408764839172363, "learning_rate": 1.996340546078413e-05, "loss": 2.0277, "step": 17391 }, { "epoch": 0.23, "grad_norm": 3.844719409942627, "learning_rate": 1.9963396478848134e-05, "loss": 1.9616, "step": 17392 }, { "epoch": 0.23, "grad_norm": 4.269389629364014, "learning_rate": 1.9963387495812007e-05, "loss": 2.0352, "step": 17393 }, { "epoch": 0.23, "grad_norm": 4.537705421447754, "learning_rate": 1.9963378511675753e-05, "loss": 2.5374, "step": 17394 }, { "epoch": 0.23, "grad_norm": 4.369192123413086, "learning_rate": 1.996336952643937e-05, "loss": 1.9476, "step": 17395 }, { "epoch": 0.23, "grad_norm": 4.800515651702881, "learning_rate": 1.9963360540102868e-05, "loss": 2.3897, "step": 17396 }, { "epoch": 0.23, "grad_norm": 3.669602632522583, "learning_rate": 1.996335155266624e-05, "loss": 2.1495, "step": 17397 }, { "epoch": 0.23, "grad_norm": 4.161421775817871, "learning_rate": 1.9963342564129487e-05, "loss": 2.3735, "step": 17398 }, { "epoch": 0.23, "grad_norm": 4.3000807762146, "learning_rate": 1.9963333574492615e-05, "loss": 2.0761, "step": 17399 }, { "epoch": 0.23, "grad_norm": 3.5886852741241455, "learning_rate": 1.996332458375562e-05, "loss": 1.6474, "step": 17400 }, { "epoch": 0.23, "grad_norm": 4.205067157745361, "learning_rate": 1.9963315591918502e-05, "loss": 2.2617, "step": 17401 }, { "epoch": 0.23, "grad_norm": 3.868908643722534, "learning_rate": 1.996330659898127e-05, "loss": 2.0935, "step": 17402 }, { "epoch": 0.23, "grad_norm": 4.3475236892700195, "learning_rate": 1.9963297604943915e-05, "loss": 2.1418, "step": 17403 }, { "epoch": 0.23, "grad_norm": 3.843501329421997, "learning_rate": 1.996328860980645e-05, "loss": 2.0711, "step": 17404 }, { "epoch": 0.23, "grad_norm": 3.9449610710144043, "learning_rate": 1.9963279613568863e-05, "loss": 1.9803, "step": 17405 }, { "epoch": 0.23, "grad_norm": 5.144030570983887, "learning_rate": 1.996327061623116e-05, "loss": 2.3879, "step": 17406 }, { "epoch": 0.23, "grad_norm": 4.053491592407227, "learning_rate": 1.9963261617793345e-05, "loss": 2.1841, "step": 17407 }, { "epoch": 0.23, "grad_norm": 4.8378520011901855, "learning_rate": 1.996325261825542e-05, "loss": 2.903, "step": 17408 }, { "epoch": 0.23, "grad_norm": 4.734529495239258, "learning_rate": 1.9963243617617377e-05, "loss": 2.1883, "step": 17409 }, { "epoch": 0.23, "grad_norm": 4.3091959953308105, "learning_rate": 1.9963234615879227e-05, "loss": 2.6385, "step": 17410 }, { "epoch": 0.23, "grad_norm": 5.481428146362305, "learning_rate": 1.9963225613040967e-05, "loss": 2.958, "step": 17411 }, { "epoch": 0.23, "grad_norm": 4.556845188140869, "learning_rate": 1.9963216609102594e-05, "loss": 1.9417, "step": 17412 }, { "epoch": 0.23, "grad_norm": 4.034186363220215, "learning_rate": 1.9963207604064116e-05, "loss": 2.1065, "step": 17413 }, { "epoch": 0.23, "grad_norm": 4.663837909698486, "learning_rate": 1.996319859792553e-05, "loss": 2.3445, "step": 17414 }, { "epoch": 0.23, "grad_norm": 4.0266642570495605, "learning_rate": 1.996318959068684e-05, "loss": 2.3174, "step": 17415 }, { "epoch": 0.23, "grad_norm": 4.730576515197754, "learning_rate": 1.9963180582348044e-05, "loss": 2.2351, "step": 17416 }, { "epoch": 0.23, "grad_norm": 3.694434404373169, "learning_rate": 1.996317157290914e-05, "loss": 2.0331, "step": 17417 }, { "epoch": 0.23, "grad_norm": 4.391165256500244, "learning_rate": 1.996316256237014e-05, "loss": 2.2409, "step": 17418 }, { "epoch": 0.23, "grad_norm": 4.078717231750488, "learning_rate": 1.996315355073103e-05, "loss": 2.2122, "step": 17419 }, { "epoch": 0.23, "grad_norm": 3.3544440269470215, "learning_rate": 1.9963144537991824e-05, "loss": 1.6996, "step": 17420 }, { "epoch": 0.23, "grad_norm": 4.255072116851807, "learning_rate": 1.9963135524152517e-05, "loss": 2.1807, "step": 17421 }, { "epoch": 0.23, "grad_norm": 4.120334625244141, "learning_rate": 1.9963126509213107e-05, "loss": 2.2315, "step": 17422 }, { "epoch": 0.23, "grad_norm": 3.452899932861328, "learning_rate": 1.9963117493173606e-05, "loss": 1.8103, "step": 17423 }, { "epoch": 0.23, "grad_norm": 4.427767276763916, "learning_rate": 1.9963108476034004e-05, "loss": 2.3129, "step": 17424 }, { "epoch": 0.23, "grad_norm": 4.267590522766113, "learning_rate": 1.9963099457794304e-05, "loss": 2.3322, "step": 17425 }, { "epoch": 0.23, "grad_norm": 4.222615718841553, "learning_rate": 1.996309043845451e-05, "loss": 1.8107, "step": 17426 }, { "epoch": 0.23, "grad_norm": 4.031184196472168, "learning_rate": 1.9963081418014622e-05, "loss": 2.1611, "step": 17427 }, { "epoch": 0.23, "grad_norm": 4.404300689697266, "learning_rate": 1.996307239647464e-05, "loss": 2.1175, "step": 17428 }, { "epoch": 0.23, "grad_norm": 4.0453596115112305, "learning_rate": 1.9963063373834567e-05, "loss": 2.3505, "step": 17429 }, { "epoch": 0.23, "grad_norm": 4.012684345245361, "learning_rate": 1.99630543500944e-05, "loss": 2.1009, "step": 17430 }, { "epoch": 0.23, "grad_norm": 4.095292091369629, "learning_rate": 1.9963045325254148e-05, "loss": 2.2828, "step": 17431 }, { "epoch": 0.23, "grad_norm": 3.3445277214050293, "learning_rate": 1.9963036299313803e-05, "loss": 1.6643, "step": 17432 }, { "epoch": 0.23, "grad_norm": 4.425975799560547, "learning_rate": 1.9963027272273373e-05, "loss": 2.1539, "step": 17433 }, { "epoch": 0.23, "grad_norm": 3.629167318344116, "learning_rate": 1.9963018244132854e-05, "loss": 1.9153, "step": 17434 }, { "epoch": 0.23, "grad_norm": 4.225146770477295, "learning_rate": 1.9963009214892248e-05, "loss": 2.1766, "step": 17435 }, { "epoch": 0.23, "grad_norm": 3.941122055053711, "learning_rate": 1.9963000184551554e-05, "loss": 2.1175, "step": 17436 }, { "epoch": 0.23, "grad_norm": 4.780111789703369, "learning_rate": 1.996299115311078e-05, "loss": 2.3283, "step": 17437 }, { "epoch": 0.23, "grad_norm": 3.848569631576538, "learning_rate": 1.996298212056992e-05, "loss": 1.8476, "step": 17438 }, { "epoch": 0.23, "grad_norm": 3.7630014419555664, "learning_rate": 1.9962973086928984e-05, "loss": 2.1101, "step": 17439 }, { "epoch": 0.23, "grad_norm": 3.809601068496704, "learning_rate": 1.996296405218796e-05, "loss": 1.9702, "step": 17440 }, { "epoch": 0.23, "grad_norm": 4.264123916625977, "learning_rate": 1.9962955016346857e-05, "loss": 2.1061, "step": 17441 }, { "epoch": 0.23, "grad_norm": 4.272258758544922, "learning_rate": 1.9962945979405675e-05, "loss": 2.2522, "step": 17442 }, { "epoch": 0.23, "grad_norm": 3.880280017852783, "learning_rate": 1.9962936941364415e-05, "loss": 2.0039, "step": 17443 }, { "epoch": 0.23, "grad_norm": 4.060068130493164, "learning_rate": 1.996292790222308e-05, "loss": 1.8437, "step": 17444 }, { "epoch": 0.23, "grad_norm": 4.024496555328369, "learning_rate": 1.9962918861981663e-05, "loss": 1.9892, "step": 17445 }, { "epoch": 0.23, "grad_norm": 4.419495105743408, "learning_rate": 1.9962909820640176e-05, "loss": 2.4874, "step": 17446 }, { "epoch": 0.23, "grad_norm": 3.514630079269409, "learning_rate": 1.9962900778198616e-05, "loss": 2.068, "step": 17447 }, { "epoch": 0.23, "grad_norm": 4.52409029006958, "learning_rate": 1.9962891734656978e-05, "loss": 2.0156, "step": 17448 }, { "epoch": 0.23, "grad_norm": 3.93644118309021, "learning_rate": 1.9962882690015267e-05, "loss": 2.3006, "step": 17449 }, { "epoch": 0.23, "grad_norm": 4.253432750701904, "learning_rate": 1.996287364427349e-05, "loss": 2.3592, "step": 17450 }, { "epoch": 0.23, "grad_norm": 4.07754373550415, "learning_rate": 1.9962864597431643e-05, "loss": 2.2033, "step": 17451 }, { "epoch": 0.23, "grad_norm": 3.963874101638794, "learning_rate": 1.9962855549489723e-05, "loss": 1.8419, "step": 17452 }, { "epoch": 0.23, "grad_norm": 4.472999572753906, "learning_rate": 1.9962846500447733e-05, "loss": 2.3142, "step": 17453 }, { "epoch": 0.23, "grad_norm": 4.318856716156006, "learning_rate": 1.996283745030568e-05, "loss": 2.3409, "step": 17454 }, { "epoch": 0.23, "grad_norm": 3.53243088722229, "learning_rate": 1.9962828399063555e-05, "loss": 1.6671, "step": 17455 }, { "epoch": 0.23, "grad_norm": 3.7712721824645996, "learning_rate": 1.996281934672137e-05, "loss": 1.5611, "step": 17456 }, { "epoch": 0.23, "grad_norm": 4.832724571228027, "learning_rate": 1.996281029327912e-05, "loss": 2.3418, "step": 17457 }, { "epoch": 0.23, "grad_norm": 4.611542701721191, "learning_rate": 1.9962801238736806e-05, "loss": 2.0976, "step": 17458 }, { "epoch": 0.23, "grad_norm": 4.340738773345947, "learning_rate": 1.996279218309443e-05, "loss": 2.4182, "step": 17459 }, { "epoch": 0.23, "grad_norm": 4.2618327140808105, "learning_rate": 1.9962783126351994e-05, "loss": 1.9973, "step": 17460 }, { "epoch": 0.23, "grad_norm": 4.80177116394043, "learning_rate": 1.9962774068509495e-05, "loss": 2.4384, "step": 17461 }, { "epoch": 0.23, "grad_norm": 4.372261047363281, "learning_rate": 1.996276500956694e-05, "loss": 2.088, "step": 17462 }, { "epoch": 0.23, "grad_norm": 4.036883354187012, "learning_rate": 1.9962755949524325e-05, "loss": 2.0904, "step": 17463 }, { "epoch": 0.23, "grad_norm": 4.620778560638428, "learning_rate": 1.996274688838165e-05, "loss": 2.0909, "step": 17464 }, { "epoch": 0.23, "grad_norm": 3.953688383102417, "learning_rate": 1.9962737826138924e-05, "loss": 2.1971, "step": 17465 }, { "epoch": 0.23, "grad_norm": 4.373997688293457, "learning_rate": 1.996272876279614e-05, "loss": 2.2718, "step": 17466 }, { "epoch": 0.23, "grad_norm": 3.383999824523926, "learning_rate": 1.99627196983533e-05, "loss": 1.5064, "step": 17467 }, { "epoch": 0.23, "grad_norm": 4.87093448638916, "learning_rate": 1.9962710632810405e-05, "loss": 2.6466, "step": 17468 }, { "epoch": 0.23, "grad_norm": 4.650026798248291, "learning_rate": 1.9962701566167465e-05, "loss": 2.0981, "step": 17469 }, { "epoch": 0.23, "grad_norm": 3.732757329940796, "learning_rate": 1.996269249842447e-05, "loss": 2.112, "step": 17470 }, { "epoch": 0.23, "grad_norm": 3.9853625297546387, "learning_rate": 1.996268342958142e-05, "loss": 2.0431, "step": 17471 }, { "epoch": 0.23, "grad_norm": 3.782088279724121, "learning_rate": 1.9962674359638328e-05, "loss": 2.0038, "step": 17472 }, { "epoch": 0.23, "grad_norm": 3.9317221641540527, "learning_rate": 1.9962665288595183e-05, "loss": 1.9005, "step": 17473 }, { "epoch": 0.23, "grad_norm": 4.519224643707275, "learning_rate": 1.9962656216451996e-05, "loss": 2.3318, "step": 17474 }, { "epoch": 0.23, "grad_norm": 3.576188325881958, "learning_rate": 1.996264714320876e-05, "loss": 1.6331, "step": 17475 }, { "epoch": 0.23, "grad_norm": 4.209419250488281, "learning_rate": 1.9962638068865478e-05, "loss": 1.9823, "step": 17476 }, { "epoch": 0.23, "grad_norm": 4.198975086212158, "learning_rate": 1.9962628993422152e-05, "loss": 2.1568, "step": 17477 }, { "epoch": 0.23, "grad_norm": 4.221142292022705, "learning_rate": 1.9962619916878782e-05, "loss": 1.9727, "step": 17478 }, { "epoch": 0.23, "grad_norm": 4.319725036621094, "learning_rate": 1.996261083923537e-05, "loss": 2.1891, "step": 17479 }, { "epoch": 0.23, "grad_norm": 4.070407390594482, "learning_rate": 1.9962601760491917e-05, "loss": 1.945, "step": 17480 }, { "epoch": 0.23, "grad_norm": 3.7830746173858643, "learning_rate": 1.9962592680648425e-05, "loss": 1.8139, "step": 17481 }, { "epoch": 0.23, "grad_norm": 4.438711643218994, "learning_rate": 1.996258359970489e-05, "loss": 2.4708, "step": 17482 }, { "epoch": 0.23, "grad_norm": 3.8830454349517822, "learning_rate": 1.9962574517661323e-05, "loss": 1.8042, "step": 17483 }, { "epoch": 0.23, "grad_norm": 4.212221145629883, "learning_rate": 1.9962565434517712e-05, "loss": 2.1505, "step": 17484 }, { "epoch": 0.23, "grad_norm": 3.938105344772339, "learning_rate": 1.996255635027407e-05, "loss": 2.1324, "step": 17485 }, { "epoch": 0.23, "grad_norm": 4.088898658752441, "learning_rate": 1.996254726493039e-05, "loss": 2.0395, "step": 17486 }, { "epoch": 0.23, "grad_norm": 4.164245128631592, "learning_rate": 1.9962538178486676e-05, "loss": 2.0076, "step": 17487 }, { "epoch": 0.23, "grad_norm": 3.7855587005615234, "learning_rate": 1.996252909094293e-05, "loss": 1.7619, "step": 17488 }, { "epoch": 0.23, "grad_norm": 4.353041648864746, "learning_rate": 1.996252000229915e-05, "loss": 2.3632, "step": 17489 }, { "epoch": 0.23, "grad_norm": 4.004187107086182, "learning_rate": 1.9962510912555338e-05, "loss": 1.8757, "step": 17490 }, { "epoch": 0.23, "grad_norm": 3.8714346885681152, "learning_rate": 1.99625018217115e-05, "loss": 2.0986, "step": 17491 }, { "epoch": 0.23, "grad_norm": 4.412347793579102, "learning_rate": 1.996249272976763e-05, "loss": 2.5313, "step": 17492 }, { "epoch": 0.23, "grad_norm": 3.9052538871765137, "learning_rate": 1.9962483636723734e-05, "loss": 2.2341, "step": 17493 }, { "epoch": 0.23, "grad_norm": 4.2225775718688965, "learning_rate": 1.996247454257981e-05, "loss": 1.8572, "step": 17494 }, { "epoch": 0.23, "grad_norm": 3.7367918491363525, "learning_rate": 1.996246544733586e-05, "loss": 1.7857, "step": 17495 }, { "epoch": 0.23, "grad_norm": 4.295860290527344, "learning_rate": 1.996245635099188e-05, "loss": 2.2697, "step": 17496 }, { "epoch": 0.23, "grad_norm": 3.8658623695373535, "learning_rate": 1.996244725354788e-05, "loss": 2.0986, "step": 17497 }, { "epoch": 0.23, "grad_norm": 4.652743339538574, "learning_rate": 1.9962438155003856e-05, "loss": 2.5592, "step": 17498 }, { "epoch": 0.23, "grad_norm": 3.8508763313293457, "learning_rate": 1.996242905535981e-05, "loss": 1.8331, "step": 17499 }, { "epoch": 0.23, "grad_norm": 4.480083465576172, "learning_rate": 1.9962419954615743e-05, "loss": 2.4804, "step": 17500 }, { "epoch": 0.23, "grad_norm": 4.036876678466797, "learning_rate": 1.9962410852771654e-05, "loss": 1.7511, "step": 17501 }, { "epoch": 0.23, "grad_norm": 4.051039218902588, "learning_rate": 1.9962401749827548e-05, "loss": 2.0515, "step": 17502 }, { "epoch": 0.23, "grad_norm": 4.78624963760376, "learning_rate": 1.9962392645783423e-05, "loss": 2.336, "step": 17503 }, { "epoch": 0.23, "grad_norm": 4.037922382354736, "learning_rate": 1.996238354063928e-05, "loss": 2.1972, "step": 17504 }, { "epoch": 0.23, "grad_norm": 4.480708122253418, "learning_rate": 1.996237443439512e-05, "loss": 2.2321, "step": 17505 }, { "epoch": 0.23, "grad_norm": 4.349857330322266, "learning_rate": 1.996236532705095e-05, "loss": 2.1213, "step": 17506 }, { "epoch": 0.23, "grad_norm": 5.1201605796813965, "learning_rate": 1.9962356218606766e-05, "loss": 2.4911, "step": 17507 }, { "epoch": 0.23, "grad_norm": 4.4825663566589355, "learning_rate": 1.9962347109062562e-05, "loss": 2.2682, "step": 17508 }, { "epoch": 0.23, "grad_norm": 4.466457366943359, "learning_rate": 1.996233799841835e-05, "loss": 2.4961, "step": 17509 }, { "epoch": 0.23, "grad_norm": 4.560506343841553, "learning_rate": 1.9962328886674127e-05, "loss": 2.0482, "step": 17510 }, { "epoch": 0.23, "grad_norm": 4.045881748199463, "learning_rate": 1.9962319773829892e-05, "loss": 2.4258, "step": 17511 }, { "epoch": 0.23, "grad_norm": 3.9719479084014893, "learning_rate": 1.996231065988565e-05, "loss": 2.7, "step": 17512 }, { "epoch": 0.23, "grad_norm": 4.654129505157471, "learning_rate": 1.9962301544841397e-05, "loss": 2.0793, "step": 17513 }, { "epoch": 0.23, "grad_norm": 3.84222412109375, "learning_rate": 1.996229242869714e-05, "loss": 1.9437, "step": 17514 }, { "epoch": 0.23, "grad_norm": 4.2940497398376465, "learning_rate": 1.9962283311452877e-05, "loss": 2.47, "step": 17515 }, { "epoch": 0.23, "grad_norm": 4.118537425994873, "learning_rate": 1.9962274193108607e-05, "loss": 2.0927, "step": 17516 }, { "epoch": 0.23, "grad_norm": 4.583016872406006, "learning_rate": 1.9962265073664332e-05, "loss": 1.8338, "step": 17517 }, { "epoch": 0.23, "grad_norm": 4.767356872558594, "learning_rate": 1.9962255953120057e-05, "loss": 2.3578, "step": 17518 }, { "epoch": 0.23, "grad_norm": 4.428865909576416, "learning_rate": 1.9962246831475777e-05, "loss": 2.1235, "step": 17519 }, { "epoch": 0.23, "grad_norm": 3.887402296066284, "learning_rate": 1.99622377087315e-05, "loss": 1.7353, "step": 17520 }, { "epoch": 0.23, "grad_norm": 4.52340030670166, "learning_rate": 1.9962228584887217e-05, "loss": 2.1829, "step": 17521 }, { "epoch": 0.23, "grad_norm": 4.47353982925415, "learning_rate": 1.996221945994294e-05, "loss": 2.6342, "step": 17522 }, { "epoch": 0.23, "grad_norm": 3.793175458908081, "learning_rate": 1.996221033389866e-05, "loss": 1.892, "step": 17523 }, { "epoch": 0.23, "grad_norm": 4.565255641937256, "learning_rate": 1.996220120675439e-05, "loss": 2.6168, "step": 17524 }, { "epoch": 0.23, "grad_norm": 4.04329776763916, "learning_rate": 1.996219207851012e-05, "loss": 2.027, "step": 17525 }, { "epoch": 0.23, "grad_norm": 3.976102590560913, "learning_rate": 1.9962182949165853e-05, "loss": 2.0979, "step": 17526 }, { "epoch": 0.23, "grad_norm": 4.695575714111328, "learning_rate": 1.9962173818721596e-05, "loss": 2.1612, "step": 17527 }, { "epoch": 0.23, "grad_norm": 4.0599365234375, "learning_rate": 1.996216468717734e-05, "loss": 2.5566, "step": 17528 }, { "epoch": 0.23, "grad_norm": 4.183202266693115, "learning_rate": 1.9962155554533098e-05, "loss": 2.2158, "step": 17529 }, { "epoch": 0.23, "grad_norm": 4.055242538452148, "learning_rate": 1.9962146420788864e-05, "loss": 1.8389, "step": 17530 }, { "epoch": 0.23, "grad_norm": 4.626469612121582, "learning_rate": 1.996213728594464e-05, "loss": 2.6019, "step": 17531 }, { "epoch": 0.23, "grad_norm": 3.8586068153381348, "learning_rate": 1.9962128150000427e-05, "loss": 2.0398, "step": 17532 }, { "epoch": 0.23, "grad_norm": 4.585236072540283, "learning_rate": 1.996211901295622e-05, "loss": 2.6512, "step": 17533 }, { "epoch": 0.23, "grad_norm": 4.147642135620117, "learning_rate": 1.9962109874812034e-05, "loss": 1.671, "step": 17534 }, { "epoch": 0.23, "grad_norm": 4.27953577041626, "learning_rate": 1.996210073556786e-05, "loss": 2.3709, "step": 17535 }, { "epoch": 0.23, "grad_norm": 4.794002532958984, "learning_rate": 1.99620915952237e-05, "loss": 2.8464, "step": 17536 }, { "epoch": 0.23, "grad_norm": 3.8782968521118164, "learning_rate": 1.9962082453779555e-05, "loss": 1.8067, "step": 17537 }, { "epoch": 0.23, "grad_norm": 4.011862277984619, "learning_rate": 1.996207331123543e-05, "loss": 1.8862, "step": 17538 }, { "epoch": 0.23, "grad_norm": 4.912693023681641, "learning_rate": 1.996206416759132e-05, "loss": 2.3931, "step": 17539 }, { "epoch": 0.23, "grad_norm": 4.058554172515869, "learning_rate": 1.996205502284723e-05, "loss": 2.0218, "step": 17540 }, { "epoch": 0.23, "grad_norm": 4.649911403656006, "learning_rate": 1.996204587700316e-05, "loss": 2.2677, "step": 17541 }, { "epoch": 0.23, "grad_norm": 4.024483680725098, "learning_rate": 1.9962036730059115e-05, "loss": 1.8582, "step": 17542 }, { "epoch": 0.23, "grad_norm": 3.9684348106384277, "learning_rate": 1.9962027582015088e-05, "loss": 2.3164, "step": 17543 }, { "epoch": 0.23, "grad_norm": 4.129769802093506, "learning_rate": 1.9962018432871087e-05, "loss": 2.0001, "step": 17544 }, { "epoch": 0.23, "grad_norm": 3.579347610473633, "learning_rate": 1.996200928262711e-05, "loss": 1.8568, "step": 17545 }, { "epoch": 0.23, "grad_norm": 4.034500598907471, "learning_rate": 1.9962000131283154e-05, "loss": 1.832, "step": 17546 }, { "epoch": 0.23, "grad_norm": 4.110189914703369, "learning_rate": 1.9961990978839228e-05, "loss": 2.1269, "step": 17547 }, { "epoch": 0.23, "grad_norm": 3.9955413341522217, "learning_rate": 1.9961981825295328e-05, "loss": 1.8517, "step": 17548 }, { "epoch": 0.23, "grad_norm": 4.331658840179443, "learning_rate": 1.9961972670651455e-05, "loss": 1.7795, "step": 17549 }, { "epoch": 0.23, "grad_norm": 4.848576068878174, "learning_rate": 1.9961963514907614e-05, "loss": 2.405, "step": 17550 }, { "epoch": 0.23, "grad_norm": 3.8066930770874023, "learning_rate": 1.99619543580638e-05, "loss": 1.6375, "step": 17551 }, { "epoch": 0.23, "grad_norm": 4.056615829467773, "learning_rate": 1.996194520012002e-05, "loss": 2.1487, "step": 17552 }, { "epoch": 0.23, "grad_norm": 4.199993133544922, "learning_rate": 1.996193604107627e-05, "loss": 2.3646, "step": 17553 }, { "epoch": 0.23, "grad_norm": 4.228349208831787, "learning_rate": 1.9961926880932555e-05, "loss": 1.936, "step": 17554 }, { "epoch": 0.23, "grad_norm": 4.576873302459717, "learning_rate": 1.996191771968887e-05, "loss": 2.1863, "step": 17555 }, { "epoch": 0.23, "grad_norm": 4.26427698135376, "learning_rate": 1.9961908557345228e-05, "loss": 2.2783, "step": 17556 }, { "epoch": 0.23, "grad_norm": 3.947540521621704, "learning_rate": 1.9961899393901617e-05, "loss": 2.05, "step": 17557 }, { "epoch": 0.23, "grad_norm": 3.8074870109558105, "learning_rate": 1.9961890229358042e-05, "loss": 1.7118, "step": 17558 }, { "epoch": 0.23, "grad_norm": 4.409600257873535, "learning_rate": 1.996188106371451e-05, "loss": 2.0098, "step": 17559 }, { "epoch": 0.23, "grad_norm": 4.627596378326416, "learning_rate": 1.9961871896971015e-05, "loss": 2.0218, "step": 17560 }, { "epoch": 0.23, "grad_norm": 4.275244235992432, "learning_rate": 1.996186272912756e-05, "loss": 2.4398, "step": 17561 }, { "epoch": 0.23, "grad_norm": 3.66103196144104, "learning_rate": 1.9961853560184146e-05, "loss": 1.6948, "step": 17562 }, { "epoch": 0.23, "grad_norm": 3.9043221473693848, "learning_rate": 1.9961844390140776e-05, "loss": 2.228, "step": 17563 }, { "epoch": 0.23, "grad_norm": 4.008868217468262, "learning_rate": 1.9961835218997446e-05, "loss": 2.2477, "step": 17564 }, { "epoch": 0.23, "grad_norm": 4.687460422515869, "learning_rate": 1.9961826046754166e-05, "loss": 2.4245, "step": 17565 }, { "epoch": 0.23, "grad_norm": 4.391247272491455, "learning_rate": 1.996181687341093e-05, "loss": 2.2376, "step": 17566 }, { "epoch": 0.23, "grad_norm": 4.26018762588501, "learning_rate": 1.9961807698967737e-05, "loss": 2.1758, "step": 17567 }, { "epoch": 0.23, "grad_norm": 4.998486518859863, "learning_rate": 1.9961798523424593e-05, "loss": 2.6952, "step": 17568 }, { "epoch": 0.23, "grad_norm": 4.170516490936279, "learning_rate": 1.9961789346781498e-05, "loss": 2.3079, "step": 17569 }, { "epoch": 0.23, "grad_norm": 4.666228294372559, "learning_rate": 1.9961780169038453e-05, "loss": 2.6722, "step": 17570 }, { "epoch": 0.23, "grad_norm": 4.269695281982422, "learning_rate": 1.9961770990195455e-05, "loss": 2.419, "step": 17571 }, { "epoch": 0.23, "grad_norm": 4.102054119110107, "learning_rate": 1.9961761810252513e-05, "loss": 1.9884, "step": 17572 }, { "epoch": 0.23, "grad_norm": 4.421220779418945, "learning_rate": 1.996175262920962e-05, "loss": 2.349, "step": 17573 }, { "epoch": 0.23, "grad_norm": 4.206658840179443, "learning_rate": 1.9961743447066785e-05, "loss": 2.3179, "step": 17574 }, { "epoch": 0.23, "grad_norm": 4.463343143463135, "learning_rate": 1.9961734263824e-05, "loss": 2.4163, "step": 17575 }, { "epoch": 0.23, "grad_norm": 3.794975757598877, "learning_rate": 1.996172507948127e-05, "loss": 2.25, "step": 17576 }, { "epoch": 0.23, "grad_norm": 4.521674633026123, "learning_rate": 1.99617158940386e-05, "loss": 2.4099, "step": 17577 }, { "epoch": 0.23, "grad_norm": 4.544346809387207, "learning_rate": 1.9961706707495984e-05, "loss": 2.5714, "step": 17578 }, { "epoch": 0.23, "grad_norm": 4.455445289611816, "learning_rate": 1.996169751985343e-05, "loss": 2.3992, "step": 17579 }, { "epoch": 0.23, "grad_norm": 4.994776248931885, "learning_rate": 1.9961688331110935e-05, "loss": 2.464, "step": 17580 }, { "epoch": 0.23, "grad_norm": 4.202686309814453, "learning_rate": 1.99616791412685e-05, "loss": 2.0554, "step": 17581 }, { "epoch": 0.23, "grad_norm": 3.8068697452545166, "learning_rate": 1.9961669950326126e-05, "loss": 1.969, "step": 17582 }, { "epoch": 0.23, "grad_norm": 4.449558734893799, "learning_rate": 1.9961660758283816e-05, "loss": 2.2585, "step": 17583 }, { "epoch": 0.23, "grad_norm": 4.238184452056885, "learning_rate": 1.996165156514157e-05, "loss": 1.9147, "step": 17584 }, { "epoch": 0.23, "grad_norm": 4.152242660522461, "learning_rate": 1.9961642370899384e-05, "loss": 2.2544, "step": 17585 }, { "epoch": 0.23, "grad_norm": 4.47110652923584, "learning_rate": 1.9961633175557267e-05, "loss": 2.3252, "step": 17586 }, { "epoch": 0.23, "grad_norm": 4.127469062805176, "learning_rate": 1.9961623979115218e-05, "loss": 2.1854, "step": 17587 }, { "epoch": 0.23, "grad_norm": 4.652220249176025, "learning_rate": 1.9961614781573235e-05, "loss": 2.5041, "step": 17588 }, { "epoch": 0.23, "grad_norm": 4.069544315338135, "learning_rate": 1.996160558293132e-05, "loss": 2.1974, "step": 17589 }, { "epoch": 0.23, "grad_norm": 4.20449161529541, "learning_rate": 1.9961596383189477e-05, "loss": 1.9293, "step": 17590 }, { "epoch": 0.23, "grad_norm": 3.793161630630493, "learning_rate": 1.9961587182347704e-05, "loss": 1.7485, "step": 17591 }, { "epoch": 0.23, "grad_norm": 4.10884428024292, "learning_rate": 1.9961577980406002e-05, "loss": 2.2478, "step": 17592 }, { "epoch": 0.23, "grad_norm": 3.961076259613037, "learning_rate": 1.9961568777364377e-05, "loss": 1.8881, "step": 17593 }, { "epoch": 0.23, "grad_norm": 3.8618738651275635, "learning_rate": 1.9961559573222818e-05, "loss": 1.9082, "step": 17594 }, { "epoch": 0.23, "grad_norm": 4.619154453277588, "learning_rate": 1.996155036798134e-05, "loss": 2.5137, "step": 17595 }, { "epoch": 0.23, "grad_norm": 3.9055848121643066, "learning_rate": 1.9961541161639935e-05, "loss": 2.1097, "step": 17596 }, { "epoch": 0.23, "grad_norm": 3.8498265743255615, "learning_rate": 1.9961531954198607e-05, "loss": 1.9377, "step": 17597 }, { "epoch": 0.23, "grad_norm": 4.147780418395996, "learning_rate": 1.996152274565736e-05, "loss": 2.2348, "step": 17598 }, { "epoch": 0.23, "grad_norm": 4.36940336227417, "learning_rate": 1.996151353601619e-05, "loss": 2.3717, "step": 17599 }, { "epoch": 0.23, "grad_norm": 4.801020622253418, "learning_rate": 1.9961504325275095e-05, "loss": 2.4612, "step": 17600 }, { "epoch": 0.23, "grad_norm": 4.317132472991943, "learning_rate": 1.9961495113434085e-05, "loss": 2.1592, "step": 17601 }, { "epoch": 0.23, "grad_norm": 4.428150177001953, "learning_rate": 1.996148590049316e-05, "loss": 2.2203, "step": 17602 }, { "epoch": 0.23, "grad_norm": 4.966385364532471, "learning_rate": 1.9961476686452315e-05, "loss": 2.3163, "step": 17603 }, { "epoch": 0.23, "grad_norm": 4.418056964874268, "learning_rate": 1.9961467471311553e-05, "loss": 2.7595, "step": 17604 }, { "epoch": 0.23, "grad_norm": 4.43817138671875, "learning_rate": 1.9961458255070877e-05, "loss": 2.4875, "step": 17605 }, { "epoch": 0.23, "grad_norm": 3.8857247829437256, "learning_rate": 1.996144903773029e-05, "loss": 1.8332, "step": 17606 }, { "epoch": 0.23, "grad_norm": 4.157998561859131, "learning_rate": 1.9961439819289784e-05, "loss": 1.8225, "step": 17607 }, { "epoch": 0.23, "grad_norm": 4.280293941497803, "learning_rate": 1.9961430599749373e-05, "loss": 2.4123, "step": 17608 }, { "epoch": 0.23, "grad_norm": 4.638604640960693, "learning_rate": 1.9961421379109046e-05, "loss": 2.4208, "step": 17609 }, { "epoch": 0.23, "grad_norm": 3.996504068374634, "learning_rate": 1.996141215736881e-05, "loss": 1.9178, "step": 17610 }, { "epoch": 0.23, "grad_norm": 3.7183756828308105, "learning_rate": 1.9961402934528663e-05, "loss": 2.0228, "step": 17611 }, { "epoch": 0.23, "grad_norm": 4.691773414611816, "learning_rate": 1.9961393710588614e-05, "loss": 2.5291, "step": 17612 }, { "epoch": 0.23, "grad_norm": 4.4449310302734375, "learning_rate": 1.996138448554865e-05, "loss": 2.0192, "step": 17613 }, { "epoch": 0.23, "grad_norm": 3.7362444400787354, "learning_rate": 1.9961375259408787e-05, "loss": 1.4682, "step": 17614 }, { "epoch": 0.23, "grad_norm": 4.685112953186035, "learning_rate": 1.996136603216902e-05, "loss": 2.4061, "step": 17615 }, { "epoch": 0.23, "grad_norm": 3.906221628189087, "learning_rate": 1.9961356803829345e-05, "loss": 1.9893, "step": 17616 }, { "epoch": 0.23, "grad_norm": 4.4060587882995605, "learning_rate": 1.9961347574389765e-05, "loss": 2.4808, "step": 17617 }, { "epoch": 0.23, "grad_norm": 4.883724689483643, "learning_rate": 1.9961338343850286e-05, "loss": 2.1524, "step": 17618 }, { "epoch": 0.23, "grad_norm": 4.200420379638672, "learning_rate": 1.996132911221091e-05, "loss": 2.2929, "step": 17619 }, { "epoch": 0.23, "grad_norm": 4.146621227264404, "learning_rate": 1.996131987947163e-05, "loss": 1.966, "step": 17620 }, { "epoch": 0.23, "grad_norm": 4.227590084075928, "learning_rate": 1.9961310645632452e-05, "loss": 1.8872, "step": 17621 }, { "epoch": 0.23, "grad_norm": 3.3537559509277344, "learning_rate": 1.9961301410693378e-05, "loss": 1.5065, "step": 17622 }, { "epoch": 0.23, "grad_norm": 4.307442665100098, "learning_rate": 1.996129217465441e-05, "loss": 2.0724, "step": 17623 }, { "epoch": 0.23, "grad_norm": 4.286566257476807, "learning_rate": 1.9961282937515536e-05, "loss": 2.3676, "step": 17624 }, { "epoch": 0.23, "grad_norm": 3.876481771469116, "learning_rate": 1.9961273699276775e-05, "loss": 2.1716, "step": 17625 }, { "epoch": 0.23, "grad_norm": 5.106813907623291, "learning_rate": 1.996126445993812e-05, "loss": 2.3032, "step": 17626 }, { "epoch": 0.23, "grad_norm": 4.549445629119873, "learning_rate": 1.9961255219499574e-05, "loss": 2.1943, "step": 17627 }, { "epoch": 0.23, "grad_norm": 4.9796142578125, "learning_rate": 1.996124597796113e-05, "loss": 2.7097, "step": 17628 }, { "epoch": 0.23, "grad_norm": 3.719735622406006, "learning_rate": 1.9961236735322803e-05, "loss": 1.625, "step": 17629 }, { "epoch": 0.23, "grad_norm": 3.8003060817718506, "learning_rate": 1.9961227491584582e-05, "loss": 1.9737, "step": 17630 }, { "epoch": 0.23, "grad_norm": 3.7818431854248047, "learning_rate": 1.9961218246746475e-05, "loss": 1.6174, "step": 17631 }, { "epoch": 0.23, "grad_norm": 4.336060523986816, "learning_rate": 1.996120900080848e-05, "loss": 1.7981, "step": 17632 }, { "epoch": 0.23, "grad_norm": 4.226834774017334, "learning_rate": 1.9961199753770593e-05, "loss": 2.56, "step": 17633 }, { "epoch": 0.23, "grad_norm": 4.164657115936279, "learning_rate": 1.996119050563283e-05, "loss": 2.103, "step": 17634 }, { "epoch": 0.23, "grad_norm": 4.4212822914123535, "learning_rate": 1.9961181256395175e-05, "loss": 2.5906, "step": 17635 }, { "epoch": 0.23, "grad_norm": 4.318653583526611, "learning_rate": 1.996117200605764e-05, "loss": 2.0906, "step": 17636 }, { "epoch": 0.23, "grad_norm": 3.7641427516937256, "learning_rate": 1.996116275462022e-05, "loss": 1.7279, "step": 17637 }, { "epoch": 0.23, "grad_norm": 3.983105182647705, "learning_rate": 1.996115350208292e-05, "loss": 2.0169, "step": 17638 }, { "epoch": 0.23, "grad_norm": 4.628017902374268, "learning_rate": 1.9961144248445742e-05, "loss": 2.044, "step": 17639 }, { "epoch": 0.23, "grad_norm": 4.8229265213012695, "learning_rate": 1.9961134993708683e-05, "loss": 2.2186, "step": 17640 }, { "epoch": 0.23, "grad_norm": 4.2653303146362305, "learning_rate": 1.9961125737871745e-05, "loss": 2.0314, "step": 17641 }, { "epoch": 0.23, "grad_norm": 3.5305211544036865, "learning_rate": 1.9961116480934928e-05, "loss": 1.4484, "step": 17642 }, { "epoch": 0.23, "grad_norm": 3.9384145736694336, "learning_rate": 1.9961107222898238e-05, "loss": 1.9547, "step": 17643 }, { "epoch": 0.23, "grad_norm": 3.7539782524108887, "learning_rate": 1.996109796376167e-05, "loss": 1.7197, "step": 17644 }, { "epoch": 0.23, "grad_norm": 5.103827953338623, "learning_rate": 1.9961088703525232e-05, "loss": 2.5533, "step": 17645 }, { "epoch": 0.23, "grad_norm": 3.8202872276306152, "learning_rate": 1.996107944218892e-05, "loss": 1.6256, "step": 17646 }, { "epoch": 0.23, "grad_norm": 4.508013725280762, "learning_rate": 1.9961070179752733e-05, "loss": 2.1903, "step": 17647 }, { "epoch": 0.23, "grad_norm": 4.6596856117248535, "learning_rate": 1.9961060916216676e-05, "loss": 1.8877, "step": 17648 }, { "epoch": 0.23, "grad_norm": 3.98287034034729, "learning_rate": 1.996105165158075e-05, "loss": 2.4027, "step": 17649 }, { "epoch": 0.23, "grad_norm": 3.727849006652832, "learning_rate": 1.9961042385844953e-05, "loss": 1.9824, "step": 17650 }, { "epoch": 0.23, "grad_norm": 3.7240796089172363, "learning_rate": 1.996103311900929e-05, "loss": 1.7506, "step": 17651 }, { "epoch": 0.23, "grad_norm": 4.227171897888184, "learning_rate": 1.9961023851073756e-05, "loss": 2.2703, "step": 17652 }, { "epoch": 0.23, "grad_norm": 3.8862569332122803, "learning_rate": 1.9961014582038357e-05, "loss": 2.0576, "step": 17653 }, { "epoch": 0.23, "grad_norm": 5.007362365722656, "learning_rate": 1.9961005311903096e-05, "loss": 2.7427, "step": 17654 }, { "epoch": 0.23, "grad_norm": 4.859324932098389, "learning_rate": 1.996099604066797e-05, "loss": 2.2397, "step": 17655 }, { "epoch": 0.23, "grad_norm": 4.239256381988525, "learning_rate": 1.996098676833298e-05, "loss": 1.9354, "step": 17656 }, { "epoch": 0.23, "grad_norm": 4.352973461151123, "learning_rate": 1.996097749489813e-05, "loss": 2.5849, "step": 17657 }, { "epoch": 0.23, "grad_norm": 4.1316704750061035, "learning_rate": 1.996096822036342e-05, "loss": 1.9617, "step": 17658 }, { "epoch": 0.23, "grad_norm": 3.6760635375976562, "learning_rate": 1.996095894472885e-05, "loss": 1.9564, "step": 17659 }, { "epoch": 0.23, "grad_norm": 4.2294511795043945, "learning_rate": 1.9960949667994412e-05, "loss": 2.2128, "step": 17660 }, { "epoch": 0.23, "grad_norm": 4.5534234046936035, "learning_rate": 1.9960940390160126e-05, "loss": 2.1325, "step": 17661 }, { "epoch": 0.23, "grad_norm": 4.405620098114014, "learning_rate": 1.9960931111225982e-05, "loss": 2.0973, "step": 17662 }, { "epoch": 0.23, "grad_norm": 4.623856067657471, "learning_rate": 1.996092183119198e-05, "loss": 2.6495, "step": 17663 }, { "epoch": 0.23, "grad_norm": 3.953831911087036, "learning_rate": 1.9960912550058124e-05, "loss": 1.9891, "step": 17664 }, { "epoch": 0.23, "grad_norm": 5.062533378601074, "learning_rate": 1.9960903267824414e-05, "loss": 2.5507, "step": 17665 }, { "epoch": 0.23, "grad_norm": 4.482722282409668, "learning_rate": 1.996089398449085e-05, "loss": 2.1054, "step": 17666 }, { "epoch": 0.23, "grad_norm": 3.9632110595703125, "learning_rate": 1.9960884700057437e-05, "loss": 1.8909, "step": 17667 }, { "epoch": 0.23, "grad_norm": 4.132906913757324, "learning_rate": 1.9960875414524174e-05, "loss": 2.2745, "step": 17668 }, { "epoch": 0.23, "grad_norm": 4.635927200317383, "learning_rate": 1.996086612789106e-05, "loss": 2.0224, "step": 17669 }, { "epoch": 0.23, "grad_norm": 3.920130968093872, "learning_rate": 1.9960856840158093e-05, "loss": 1.9412, "step": 17670 }, { "epoch": 0.23, "grad_norm": 3.634565591812134, "learning_rate": 1.9960847551325284e-05, "loss": 1.8072, "step": 17671 }, { "epoch": 0.23, "grad_norm": 4.889660358428955, "learning_rate": 1.9960838261392627e-05, "loss": 2.0724, "step": 17672 }, { "epoch": 0.23, "grad_norm": 4.111757755279541, "learning_rate": 1.9960828970360128e-05, "loss": 2.0356, "step": 17673 }, { "epoch": 0.23, "grad_norm": 4.079285621643066, "learning_rate": 1.9960819678227777e-05, "loss": 1.9972, "step": 17674 }, { "epoch": 0.23, "grad_norm": 4.3111419677734375, "learning_rate": 1.9960810384995585e-05, "loss": 2.3539, "step": 17675 }, { "epoch": 0.23, "grad_norm": 4.556095123291016, "learning_rate": 1.9960801090663554e-05, "loss": 2.128, "step": 17676 }, { "epoch": 0.23, "grad_norm": 4.565251350402832, "learning_rate": 1.996079179523168e-05, "loss": 2.3963, "step": 17677 }, { "epoch": 0.23, "grad_norm": 3.789872407913208, "learning_rate": 1.9960782498699963e-05, "loss": 2.0199, "step": 17678 }, { "epoch": 0.23, "grad_norm": 3.727269172668457, "learning_rate": 1.996077320106841e-05, "loss": 1.8389, "step": 17679 }, { "epoch": 0.23, "grad_norm": 4.46635103225708, "learning_rate": 1.996076390233702e-05, "loss": 2.0921, "step": 17680 }, { "epoch": 0.23, "grad_norm": 4.626352787017822, "learning_rate": 1.996075460250579e-05, "loss": 2.2834, "step": 17681 }, { "epoch": 0.23, "grad_norm": 4.169631481170654, "learning_rate": 1.996074530157472e-05, "loss": 2.0297, "step": 17682 }, { "epoch": 0.23, "grad_norm": 3.965559959411621, "learning_rate": 1.9960735999543817e-05, "loss": 1.9274, "step": 17683 }, { "epoch": 0.23, "grad_norm": 4.119779586791992, "learning_rate": 1.9960726696413082e-05, "loss": 1.7748, "step": 17684 }, { "epoch": 0.23, "grad_norm": 4.008552074432373, "learning_rate": 1.9960717392182512e-05, "loss": 2.17, "step": 17685 }, { "epoch": 0.23, "grad_norm": 4.284956932067871, "learning_rate": 1.9960708086852114e-05, "loss": 2.2899, "step": 17686 }, { "epoch": 0.23, "grad_norm": 4.2338547706604, "learning_rate": 1.996069878042188e-05, "loss": 2.1719, "step": 17687 }, { "epoch": 0.23, "grad_norm": 4.367312908172607, "learning_rate": 1.9960689472891817e-05, "loss": 2.4899, "step": 17688 }, { "epoch": 0.23, "grad_norm": 3.922811269760132, "learning_rate": 1.9960680164261927e-05, "loss": 2.0753, "step": 17689 }, { "epoch": 0.23, "grad_norm": 3.794415235519409, "learning_rate": 1.9960670854532206e-05, "loss": 1.6383, "step": 17690 }, { "epoch": 0.23, "grad_norm": 4.318903923034668, "learning_rate": 1.996066154370266e-05, "loss": 2.1875, "step": 17691 }, { "epoch": 0.23, "grad_norm": 4.027774810791016, "learning_rate": 1.996065223177329e-05, "loss": 1.9659, "step": 17692 }, { "epoch": 0.23, "grad_norm": 4.389060020446777, "learning_rate": 1.996064291874409e-05, "loss": 2.6305, "step": 17693 }, { "epoch": 0.23, "grad_norm": 4.668229579925537, "learning_rate": 1.996063360461507e-05, "loss": 2.4897, "step": 17694 }, { "epoch": 0.23, "grad_norm": 4.481019020080566, "learning_rate": 1.9960624289386224e-05, "loss": 2.1094, "step": 17695 }, { "epoch": 0.23, "grad_norm": 4.270423412322998, "learning_rate": 1.9960614973057557e-05, "loss": 1.7707, "step": 17696 }, { "epoch": 0.23, "grad_norm": 3.4764347076416016, "learning_rate": 1.996060565562907e-05, "loss": 1.6146, "step": 17697 }, { "epoch": 0.23, "grad_norm": 4.469573020935059, "learning_rate": 1.9960596337100762e-05, "loss": 2.5801, "step": 17698 }, { "epoch": 0.23, "grad_norm": 4.423196315765381, "learning_rate": 1.9960587017472637e-05, "loss": 2.4387, "step": 17699 }, { "epoch": 0.23, "grad_norm": 4.064341068267822, "learning_rate": 1.996057769674469e-05, "loss": 2.2396, "step": 17700 }, { "epoch": 0.23, "grad_norm": 4.283603668212891, "learning_rate": 1.996056837491693e-05, "loss": 2.1625, "step": 17701 }, { "epoch": 0.23, "grad_norm": 4.058230400085449, "learning_rate": 1.9960559051989355e-05, "loss": 2.0531, "step": 17702 }, { "epoch": 0.23, "grad_norm": 3.562413454055786, "learning_rate": 1.9960549727961965e-05, "loss": 1.7384, "step": 17703 }, { "epoch": 0.23, "grad_norm": 4.049871444702148, "learning_rate": 1.9960540402834757e-05, "loss": 2.0456, "step": 17704 }, { "epoch": 0.23, "grad_norm": 4.204655170440674, "learning_rate": 1.996053107660774e-05, "loss": 1.7944, "step": 17705 }, { "epoch": 0.23, "grad_norm": 4.1999335289001465, "learning_rate": 1.996052174928091e-05, "loss": 2.2306, "step": 17706 }, { "epoch": 0.23, "grad_norm": 4.096086502075195, "learning_rate": 1.9960512420854275e-05, "loss": 2.3178, "step": 17707 }, { "epoch": 0.23, "grad_norm": 4.11049222946167, "learning_rate": 1.9960503091327826e-05, "loss": 2.1048, "step": 17708 }, { "epoch": 0.23, "grad_norm": 3.995504856109619, "learning_rate": 1.9960493760701568e-05, "loss": 1.9183, "step": 17709 }, { "epoch": 0.23, "grad_norm": 3.8798301219940186, "learning_rate": 1.99604844289755e-05, "loss": 2.1228, "step": 17710 }, { "epoch": 0.23, "grad_norm": 4.094404697418213, "learning_rate": 1.996047509614963e-05, "loss": 2.2161, "step": 17711 }, { "epoch": 0.23, "grad_norm": 4.103283405303955, "learning_rate": 1.996046576222395e-05, "loss": 2.3393, "step": 17712 }, { "epoch": 0.23, "grad_norm": 3.9084761142730713, "learning_rate": 1.9960456427198468e-05, "loss": 2.2122, "step": 17713 }, { "epoch": 0.23, "grad_norm": 3.396890640258789, "learning_rate": 1.9960447091073187e-05, "loss": 1.7297, "step": 17714 }, { "epoch": 0.23, "grad_norm": 4.489639759063721, "learning_rate": 1.99604377538481e-05, "loss": 2.4175, "step": 17715 }, { "epoch": 0.23, "grad_norm": 4.056857109069824, "learning_rate": 1.996042841552321e-05, "loss": 1.9958, "step": 17716 }, { "epoch": 0.23, "grad_norm": 4.390764236450195, "learning_rate": 1.9960419076098522e-05, "loss": 2.5109, "step": 17717 }, { "epoch": 0.23, "grad_norm": 4.383767604827881, "learning_rate": 1.9960409735574034e-05, "loss": 2.1791, "step": 17718 }, { "epoch": 0.23, "grad_norm": 4.4960808753967285, "learning_rate": 1.9960400393949748e-05, "loss": 2.7467, "step": 17719 }, { "epoch": 0.23, "grad_norm": 3.960150957107544, "learning_rate": 1.9960391051225664e-05, "loss": 2.238, "step": 17720 }, { "epoch": 0.23, "grad_norm": 4.948431015014648, "learning_rate": 1.9960381707401783e-05, "loss": 2.3092, "step": 17721 }, { "epoch": 0.23, "grad_norm": 3.965358257293701, "learning_rate": 1.996037236247811e-05, "loss": 2.0034, "step": 17722 }, { "epoch": 0.23, "grad_norm": 3.8347365856170654, "learning_rate": 1.996036301645464e-05, "loss": 1.92, "step": 17723 }, { "epoch": 0.23, "grad_norm": 3.780090570449829, "learning_rate": 1.996035366933138e-05, "loss": 1.7144, "step": 17724 }, { "epoch": 0.23, "grad_norm": 4.624272346496582, "learning_rate": 1.9960344321108323e-05, "loss": 2.5498, "step": 17725 }, { "epoch": 0.23, "grad_norm": 4.55118465423584, "learning_rate": 1.996033497178548e-05, "loss": 2.0618, "step": 17726 }, { "epoch": 0.23, "grad_norm": 4.17814302444458, "learning_rate": 1.9960325621362842e-05, "loss": 2.1388, "step": 17727 }, { "epoch": 0.23, "grad_norm": 5.560735702514648, "learning_rate": 1.996031626984042e-05, "loss": 2.2627, "step": 17728 }, { "epoch": 0.23, "grad_norm": 5.0398688316345215, "learning_rate": 1.996030691721821e-05, "loss": 2.5568, "step": 17729 }, { "epoch": 0.23, "grad_norm": 4.305756092071533, "learning_rate": 1.996029756349621e-05, "loss": 2.2459, "step": 17730 }, { "epoch": 0.23, "grad_norm": 3.619615077972412, "learning_rate": 1.9960288208674427e-05, "loss": 1.6774, "step": 17731 }, { "epoch": 0.23, "grad_norm": 3.8104565143585205, "learning_rate": 1.9960278852752858e-05, "loss": 2.2151, "step": 17732 }, { "epoch": 0.23, "grad_norm": 4.158882141113281, "learning_rate": 1.9960269495731505e-05, "loss": 2.164, "step": 17733 }, { "epoch": 0.23, "grad_norm": 3.7599475383758545, "learning_rate": 1.996026013761037e-05, "loss": 1.7516, "step": 17734 }, { "epoch": 0.23, "grad_norm": 3.895338773727417, "learning_rate": 1.996025077838945e-05, "loss": 1.9944, "step": 17735 }, { "epoch": 0.23, "grad_norm": 4.116291046142578, "learning_rate": 1.9960241418068752e-05, "loss": 2.0234, "step": 17736 }, { "epoch": 0.23, "grad_norm": 4.896916389465332, "learning_rate": 1.9960232056648276e-05, "loss": 2.2265, "step": 17737 }, { "epoch": 0.23, "grad_norm": 4.464535713195801, "learning_rate": 1.996022269412802e-05, "loss": 1.9161, "step": 17738 }, { "epoch": 0.23, "grad_norm": 4.377864837646484, "learning_rate": 1.9960213330507985e-05, "loss": 2.244, "step": 17739 }, { "epoch": 0.23, "grad_norm": 3.62292742729187, "learning_rate": 1.9960203965788177e-05, "loss": 1.8056, "step": 17740 }, { "epoch": 0.23, "grad_norm": 4.389168739318848, "learning_rate": 1.996019459996859e-05, "loss": 2.4228, "step": 17741 }, { "epoch": 0.23, "grad_norm": 4.052196979522705, "learning_rate": 1.996018523304923e-05, "loss": 2.0758, "step": 17742 }, { "epoch": 0.23, "grad_norm": 4.221771240234375, "learning_rate": 1.9960175865030094e-05, "loss": 2.0137, "step": 17743 }, { "epoch": 0.23, "grad_norm": 4.880364418029785, "learning_rate": 1.996016649591119e-05, "loss": 2.1614, "step": 17744 }, { "epoch": 0.23, "grad_norm": 4.462826728820801, "learning_rate": 1.996015712569251e-05, "loss": 2.0693, "step": 17745 }, { "epoch": 0.23, "grad_norm": 4.0065693855285645, "learning_rate": 1.9960147754374062e-05, "loss": 2.0503, "step": 17746 }, { "epoch": 0.23, "grad_norm": 4.676377773284912, "learning_rate": 1.9960138381955846e-05, "loss": 2.479, "step": 17747 }, { "epoch": 0.23, "grad_norm": 4.004758358001709, "learning_rate": 1.9960129008437863e-05, "loss": 1.8184, "step": 17748 }, { "epoch": 0.23, "grad_norm": 3.8075990676879883, "learning_rate": 1.996011963382011e-05, "loss": 2.0097, "step": 17749 }, { "epoch": 0.23, "grad_norm": 4.083260536193848, "learning_rate": 1.996011025810259e-05, "loss": 1.9763, "step": 17750 }, { "epoch": 0.23, "grad_norm": 4.025755882263184, "learning_rate": 1.996010088128531e-05, "loss": 2.1264, "step": 17751 }, { "epoch": 0.23, "grad_norm": 4.081103324890137, "learning_rate": 1.9960091503368256e-05, "loss": 2.367, "step": 17752 }, { "epoch": 0.23, "grad_norm": 4.187819004058838, "learning_rate": 1.9960082124351446e-05, "loss": 2.0099, "step": 17753 }, { "epoch": 0.23, "grad_norm": 3.984957218170166, "learning_rate": 1.9960072744234876e-05, "loss": 1.9477, "step": 17754 }, { "epoch": 0.23, "grad_norm": 4.0419135093688965, "learning_rate": 1.996006336301854e-05, "loss": 2.2661, "step": 17755 }, { "epoch": 0.23, "grad_norm": 4.285394191741943, "learning_rate": 1.9960053980702448e-05, "loss": 2.4798, "step": 17756 }, { "epoch": 0.23, "grad_norm": 4.138875484466553, "learning_rate": 1.9960044597286593e-05, "loss": 2.18, "step": 17757 }, { "epoch": 0.23, "grad_norm": 4.005358695983887, "learning_rate": 1.9960035212770984e-05, "loss": 2.1717, "step": 17758 }, { "epoch": 0.23, "grad_norm": 3.285291910171509, "learning_rate": 1.9960025827155615e-05, "loss": 1.5845, "step": 17759 }, { "epoch": 0.23, "grad_norm": 4.322659492492676, "learning_rate": 1.996001644044049e-05, "loss": 2.0791, "step": 17760 }, { "epoch": 0.23, "grad_norm": 4.080808162689209, "learning_rate": 1.9960007052625613e-05, "loss": 2.0918, "step": 17761 }, { "epoch": 0.23, "grad_norm": 3.537681818008423, "learning_rate": 1.9959997663710983e-05, "loss": 1.7374, "step": 17762 }, { "epoch": 0.23, "grad_norm": 4.381173133850098, "learning_rate": 1.9959988273696595e-05, "loss": 2.3378, "step": 17763 }, { "epoch": 0.23, "grad_norm": 4.0940728187561035, "learning_rate": 1.9959978882582458e-05, "loss": 2.1986, "step": 17764 }, { "epoch": 0.23, "grad_norm": 4.278557300567627, "learning_rate": 1.995996949036857e-05, "loss": 2.2204, "step": 17765 }, { "epoch": 0.23, "grad_norm": 3.7362077236175537, "learning_rate": 1.9959960097054935e-05, "loss": 1.7864, "step": 17766 }, { "epoch": 0.23, "grad_norm": 4.4586944580078125, "learning_rate": 1.995995070264155e-05, "loss": 2.1533, "step": 17767 }, { "epoch": 0.23, "grad_norm": 4.341891765594482, "learning_rate": 1.9959941307128418e-05, "loss": 2.8146, "step": 17768 }, { "epoch": 0.23, "grad_norm": 4.220378398895264, "learning_rate": 1.995993191051554e-05, "loss": 2.1117, "step": 17769 }, { "epoch": 0.23, "grad_norm": 4.380168437957764, "learning_rate": 1.9959922512802913e-05, "loss": 2.1115, "step": 17770 }, { "epoch": 0.23, "grad_norm": 4.466073036193848, "learning_rate": 1.9959913113990544e-05, "loss": 2.143, "step": 17771 }, { "epoch": 0.23, "grad_norm": 4.4571309089660645, "learning_rate": 1.995990371407843e-05, "loss": 2.0358, "step": 17772 }, { "epoch": 0.23, "grad_norm": 4.664978504180908, "learning_rate": 1.9959894313066574e-05, "loss": 2.0874, "step": 17773 }, { "epoch": 0.23, "grad_norm": 4.28139066696167, "learning_rate": 1.9959884910954978e-05, "loss": 2.3622, "step": 17774 }, { "epoch": 0.23, "grad_norm": 4.0900702476501465, "learning_rate": 1.9959875507743644e-05, "loss": 2.4102, "step": 17775 }, { "epoch": 0.23, "grad_norm": 4.58065938949585, "learning_rate": 1.9959866103432568e-05, "loss": 2.5528, "step": 17776 }, { "epoch": 0.23, "grad_norm": 4.611166000366211, "learning_rate": 1.9959856698021754e-05, "loss": 1.9053, "step": 17777 }, { "epoch": 0.23, "grad_norm": 4.162250518798828, "learning_rate": 1.99598472915112e-05, "loss": 2.0443, "step": 17778 }, { "epoch": 0.23, "grad_norm": 5.520500659942627, "learning_rate": 1.9959837883900913e-05, "loss": 2.5736, "step": 17779 }, { "epoch": 0.23, "grad_norm": 4.290781497955322, "learning_rate": 1.9959828475190892e-05, "loss": 1.8749, "step": 17780 }, { "epoch": 0.23, "grad_norm": 4.176438808441162, "learning_rate": 1.9959819065381135e-05, "loss": 2.415, "step": 17781 }, { "epoch": 0.23, "grad_norm": 4.20319938659668, "learning_rate": 1.9959809654471648e-05, "loss": 2.3738, "step": 17782 }, { "epoch": 0.23, "grad_norm": 3.9544568061828613, "learning_rate": 1.9959800242462427e-05, "loss": 2.2313, "step": 17783 }, { "epoch": 0.23, "grad_norm": 4.154629707336426, "learning_rate": 1.9959790829353473e-05, "loss": 1.9211, "step": 17784 }, { "epoch": 0.23, "grad_norm": 4.126742362976074, "learning_rate": 1.995978141514479e-05, "loss": 2.3178, "step": 17785 }, { "epoch": 0.23, "grad_norm": 4.465044021606445, "learning_rate": 1.9959771999836382e-05, "loss": 2.3786, "step": 17786 }, { "epoch": 0.23, "grad_norm": 4.668455123901367, "learning_rate": 1.995976258342824e-05, "loss": 2.1231, "step": 17787 }, { "epoch": 0.23, "grad_norm": 4.075489521026611, "learning_rate": 1.9959753165920378e-05, "loss": 2.2648, "step": 17788 }, { "epoch": 0.23, "grad_norm": 4.215548515319824, "learning_rate": 1.9959743747312784e-05, "loss": 1.9893, "step": 17789 }, { "epoch": 0.23, "grad_norm": 5.1019511222839355, "learning_rate": 1.995973432760547e-05, "loss": 2.329, "step": 17790 }, { "epoch": 0.23, "grad_norm": 4.445501804351807, "learning_rate": 1.995972490679843e-05, "loss": 2.1167, "step": 17791 }, { "epoch": 0.23, "grad_norm": 4.413228511810303, "learning_rate": 1.9959715484891667e-05, "loss": 2.2108, "step": 17792 }, { "epoch": 0.23, "grad_norm": 3.5656681060791016, "learning_rate": 1.9959706061885184e-05, "loss": 2.1972, "step": 17793 }, { "epoch": 0.23, "grad_norm": 4.6307172775268555, "learning_rate": 1.9959696637778977e-05, "loss": 2.1747, "step": 17794 }, { "epoch": 0.23, "grad_norm": 3.8274641036987305, "learning_rate": 1.9959687212573055e-05, "loss": 1.9794, "step": 17795 }, { "epoch": 0.23, "grad_norm": 5.435314178466797, "learning_rate": 1.9959677786267412e-05, "loss": 2.7538, "step": 17796 }, { "epoch": 0.23, "grad_norm": 4.47924280166626, "learning_rate": 1.9959668358862053e-05, "loss": 2.2038, "step": 17797 }, { "epoch": 0.23, "grad_norm": 3.976820230484009, "learning_rate": 1.995965893035698e-05, "loss": 2.2079, "step": 17798 }, { "epoch": 0.23, "grad_norm": 4.249461650848389, "learning_rate": 1.995964950075219e-05, "loss": 2.2824, "step": 17799 }, { "epoch": 0.23, "grad_norm": 3.9120428562164307, "learning_rate": 1.995964007004768e-05, "loss": 1.8977, "step": 17800 }, { "epoch": 0.23, "grad_norm": 4.079573154449463, "learning_rate": 1.9959630638243463e-05, "loss": 1.7874, "step": 17801 }, { "epoch": 0.23, "grad_norm": 4.265861511230469, "learning_rate": 1.995962120533953e-05, "loss": 2.4346, "step": 17802 }, { "epoch": 0.23, "grad_norm": 4.869982719421387, "learning_rate": 1.995961177133589e-05, "loss": 2.4147, "step": 17803 }, { "epoch": 0.23, "grad_norm": 3.911425828933716, "learning_rate": 1.9959602336232537e-05, "loss": 1.9252, "step": 17804 }, { "epoch": 0.23, "grad_norm": 3.7103636264801025, "learning_rate": 1.9959592900029477e-05, "loss": 1.9903, "step": 17805 }, { "epoch": 0.23, "grad_norm": 3.73457670211792, "learning_rate": 1.9959583462726707e-05, "loss": 1.991, "step": 17806 }, { "epoch": 0.23, "grad_norm": 4.127974987030029, "learning_rate": 1.995957402432423e-05, "loss": 2.0847, "step": 17807 }, { "epoch": 0.23, "grad_norm": 3.8449578285217285, "learning_rate": 1.995956458482205e-05, "loss": 2.1442, "step": 17808 }, { "epoch": 0.23, "grad_norm": 4.610749244689941, "learning_rate": 1.995955514422016e-05, "loss": 2.5599, "step": 17809 }, { "epoch": 0.23, "grad_norm": 4.1016035079956055, "learning_rate": 1.9959545702518573e-05, "loss": 2.1944, "step": 17810 }, { "epoch": 0.23, "grad_norm": 3.8310294151306152, "learning_rate": 1.9959536259717276e-05, "loss": 1.9909, "step": 17811 }, { "epoch": 0.23, "grad_norm": 4.499260425567627, "learning_rate": 1.9959526815816282e-05, "loss": 2.3921, "step": 17812 }, { "epoch": 0.23, "grad_norm": 4.045012474060059, "learning_rate": 1.9959517370815585e-05, "loss": 2.1591, "step": 17813 }, { "epoch": 0.23, "grad_norm": 5.831596374511719, "learning_rate": 1.995950792471519e-05, "loss": 2.8575, "step": 17814 }, { "epoch": 0.23, "grad_norm": 4.162813663482666, "learning_rate": 1.9959498477515097e-05, "loss": 1.8824, "step": 17815 }, { "epoch": 0.23, "grad_norm": 3.9758429527282715, "learning_rate": 1.9959489029215306e-05, "loss": 2.3638, "step": 17816 }, { "epoch": 0.23, "grad_norm": 3.584101915359497, "learning_rate": 1.9959479579815814e-05, "loss": 1.6048, "step": 17817 }, { "epoch": 0.23, "grad_norm": 4.7423930168151855, "learning_rate": 1.995947012931663e-05, "loss": 2.2102, "step": 17818 }, { "epoch": 0.23, "grad_norm": 4.0339274406433105, "learning_rate": 1.995946067771775e-05, "loss": 1.8304, "step": 17819 }, { "epoch": 0.23, "grad_norm": 3.873164176940918, "learning_rate": 1.9959451225019178e-05, "loss": 1.6092, "step": 17820 }, { "epoch": 0.23, "grad_norm": 4.109731197357178, "learning_rate": 1.9959441771220916e-05, "loss": 2.0924, "step": 17821 }, { "epoch": 0.23, "grad_norm": 4.284273624420166, "learning_rate": 1.9959432316322958e-05, "loss": 2.1824, "step": 17822 }, { "epoch": 0.23, "grad_norm": 4.764007568359375, "learning_rate": 1.9959422860325314e-05, "loss": 2.4952, "step": 17823 }, { "epoch": 0.23, "grad_norm": 4.428328037261963, "learning_rate": 1.9959413403227978e-05, "loss": 2.2211, "step": 17824 }, { "epoch": 0.23, "grad_norm": 4.629209995269775, "learning_rate": 1.995940394503095e-05, "loss": 2.0467, "step": 17825 }, { "epoch": 0.23, "grad_norm": 3.339359760284424, "learning_rate": 1.9959394485734243e-05, "loss": 1.4916, "step": 17826 }, { "epoch": 0.23, "grad_norm": 3.976449966430664, "learning_rate": 1.995938502533785e-05, "loss": 2.0898, "step": 17827 }, { "epoch": 0.23, "grad_norm": 4.3941826820373535, "learning_rate": 1.9959375563841765e-05, "loss": 2.3471, "step": 17828 }, { "epoch": 0.23, "grad_norm": 4.339880466461182, "learning_rate": 1.9959366101245998e-05, "loss": 1.9989, "step": 17829 }, { "epoch": 0.23, "grad_norm": 4.917272567749023, "learning_rate": 1.995935663755055e-05, "loss": 3.0125, "step": 17830 }, { "epoch": 0.23, "grad_norm": 3.9789798259735107, "learning_rate": 1.9959347172755418e-05, "loss": 1.756, "step": 17831 }, { "epoch": 0.23, "grad_norm": 4.449886322021484, "learning_rate": 1.9959337706860607e-05, "loss": 2.6756, "step": 17832 }, { "epoch": 0.23, "grad_norm": 4.0429558753967285, "learning_rate": 1.9959328239866118e-05, "loss": 2.1294, "step": 17833 }, { "epoch": 0.23, "grad_norm": 4.144214630126953, "learning_rate": 1.9959318771771945e-05, "loss": 2.1642, "step": 17834 }, { "epoch": 0.23, "grad_norm": 3.875513792037964, "learning_rate": 1.9959309302578097e-05, "loss": 1.7909, "step": 17835 }, { "epoch": 0.23, "grad_norm": 4.645778179168701, "learning_rate": 1.9959299832284573e-05, "loss": 2.3162, "step": 17836 }, { "epoch": 0.23, "grad_norm": 4.768964767456055, "learning_rate": 1.9959290360891374e-05, "loss": 1.9219, "step": 17837 }, { "epoch": 0.23, "grad_norm": 3.9578843116760254, "learning_rate": 1.9959280888398495e-05, "loss": 2.1731, "step": 17838 }, { "epoch": 0.23, "grad_norm": 4.276803016662598, "learning_rate": 1.995927141480595e-05, "loss": 2.4145, "step": 17839 }, { "epoch": 0.23, "grad_norm": 4.427609443664551, "learning_rate": 1.9959261940113727e-05, "loss": 2.1635, "step": 17840 }, { "epoch": 0.23, "grad_norm": 4.048436641693115, "learning_rate": 1.9959252464321834e-05, "loss": 1.7053, "step": 17841 }, { "epoch": 0.23, "grad_norm": 4.369920253753662, "learning_rate": 1.995924298743027e-05, "loss": 2.6966, "step": 17842 }, { "epoch": 0.23, "grad_norm": 4.83614444732666, "learning_rate": 1.995923350943904e-05, "loss": 2.6083, "step": 17843 }, { "epoch": 0.23, "grad_norm": 5.035306930541992, "learning_rate": 1.995922403034814e-05, "loss": 2.6427, "step": 17844 }, { "epoch": 0.23, "grad_norm": 4.139800071716309, "learning_rate": 1.9959214550157568e-05, "loss": 2.3458, "step": 17845 }, { "epoch": 0.23, "grad_norm": 3.971935987472534, "learning_rate": 1.9959205068867337e-05, "loss": 2.6124, "step": 17846 }, { "epoch": 0.23, "grad_norm": 3.9982423782348633, "learning_rate": 1.9959195586477437e-05, "loss": 1.8283, "step": 17847 }, { "epoch": 0.23, "grad_norm": 4.423402786254883, "learning_rate": 1.995918610298787e-05, "loss": 2.3952, "step": 17848 }, { "epoch": 0.23, "grad_norm": 4.103288173675537, "learning_rate": 1.9959176618398644e-05, "loss": 1.9464, "step": 17849 }, { "epoch": 0.23, "grad_norm": 4.574535369873047, "learning_rate": 1.9959167132709754e-05, "loss": 2.342, "step": 17850 }, { "epoch": 0.23, "grad_norm": 4.011410713195801, "learning_rate": 1.9959157645921206e-05, "loss": 2.4736, "step": 17851 }, { "epoch": 0.23, "grad_norm": 4.455999851226807, "learning_rate": 1.9959148158033e-05, "loss": 2.528, "step": 17852 }, { "epoch": 0.23, "grad_norm": 4.007405757904053, "learning_rate": 1.9959138669045125e-05, "loss": 1.8715, "step": 17853 }, { "epoch": 0.23, "grad_norm": 4.470283031463623, "learning_rate": 1.99591291789576e-05, "loss": 2.1821, "step": 17854 }, { "epoch": 0.23, "grad_norm": 4.71207857131958, "learning_rate": 1.995911968777042e-05, "loss": 2.5138, "step": 17855 }, { "epoch": 0.23, "grad_norm": 4.127486705780029, "learning_rate": 1.9959110195483577e-05, "loss": 2.2336, "step": 17856 }, { "epoch": 0.23, "grad_norm": 3.961121082305908, "learning_rate": 1.9959100702097082e-05, "loss": 1.8711, "step": 17857 }, { "epoch": 0.23, "grad_norm": 3.75278902053833, "learning_rate": 1.9959091207610935e-05, "loss": 2.011, "step": 17858 }, { "epoch": 0.23, "grad_norm": 4.500196933746338, "learning_rate": 1.9959081712025136e-05, "loss": 2.2088, "step": 17859 }, { "epoch": 0.23, "grad_norm": 3.923413038253784, "learning_rate": 1.9959072215339685e-05, "loss": 2.1281, "step": 17860 }, { "epoch": 0.23, "grad_norm": 4.119529724121094, "learning_rate": 1.995906271755458e-05, "loss": 2.3263, "step": 17861 }, { "epoch": 0.23, "grad_norm": 3.869147777557373, "learning_rate": 1.995905321866983e-05, "loss": 2.3072, "step": 17862 }, { "epoch": 0.23, "grad_norm": 4.511556625366211, "learning_rate": 1.995904371868543e-05, "loss": 2.1422, "step": 17863 }, { "epoch": 0.23, "grad_norm": 4.32337760925293, "learning_rate": 1.995903421760138e-05, "loss": 2.2281, "step": 17864 }, { "epoch": 0.23, "grad_norm": 4.314717769622803, "learning_rate": 1.9959024715417687e-05, "loss": 1.937, "step": 17865 }, { "epoch": 0.23, "grad_norm": 3.959662675857544, "learning_rate": 1.9959015212134347e-05, "loss": 2.0062, "step": 17866 }, { "epoch": 0.23, "grad_norm": 4.5527663230896, "learning_rate": 1.9959005707751366e-05, "loss": 2.1426, "step": 17867 }, { "epoch": 0.23, "grad_norm": 4.032273292541504, "learning_rate": 1.9958996202268736e-05, "loss": 2.2481, "step": 17868 }, { "epoch": 0.23, "grad_norm": 3.6249423027038574, "learning_rate": 1.9958986695686467e-05, "loss": 1.7303, "step": 17869 }, { "epoch": 0.23, "grad_norm": 3.9915812015533447, "learning_rate": 1.995897718800456e-05, "loss": 1.8397, "step": 17870 }, { "epoch": 0.23, "grad_norm": 3.715311050415039, "learning_rate": 1.9958967679223008e-05, "loss": 1.9144, "step": 17871 }, { "epoch": 0.23, "grad_norm": 3.5183653831481934, "learning_rate": 1.995895816934182e-05, "loss": 1.8394, "step": 17872 }, { "epoch": 0.23, "grad_norm": 4.39652156829834, "learning_rate": 1.9958948658360994e-05, "loss": 2.7426, "step": 17873 }, { "epoch": 0.23, "grad_norm": 5.163729190826416, "learning_rate": 1.995893914628053e-05, "loss": 2.9406, "step": 17874 }, { "epoch": 0.23, "grad_norm": 4.003574848175049, "learning_rate": 1.995892963310043e-05, "loss": 1.9958, "step": 17875 }, { "epoch": 0.23, "grad_norm": 3.7513134479522705, "learning_rate": 1.9958920118820695e-05, "loss": 1.8914, "step": 17876 }, { "epoch": 0.23, "grad_norm": 3.943269968032837, "learning_rate": 1.995891060344133e-05, "loss": 1.9087, "step": 17877 }, { "epoch": 0.23, "grad_norm": 4.319065570831299, "learning_rate": 1.9958901086962327e-05, "loss": 2.2098, "step": 17878 }, { "epoch": 0.23, "grad_norm": 4.189746379852295, "learning_rate": 1.9958891569383694e-05, "loss": 1.7293, "step": 17879 }, { "epoch": 0.23, "grad_norm": 4.132321357727051, "learning_rate": 1.995888205070543e-05, "loss": 2.093, "step": 17880 }, { "epoch": 0.23, "grad_norm": 3.857123851776123, "learning_rate": 1.995887253092754e-05, "loss": 1.7669, "step": 17881 }, { "epoch": 0.23, "grad_norm": 4.472414970397949, "learning_rate": 1.9958863010050017e-05, "loss": 2.2956, "step": 17882 }, { "epoch": 0.23, "grad_norm": 3.427607536315918, "learning_rate": 1.9958853488072867e-05, "loss": 1.6166, "step": 17883 }, { "epoch": 0.23, "grad_norm": 4.458231449127197, "learning_rate": 1.9958843964996092e-05, "loss": 2.4365, "step": 17884 }, { "epoch": 0.23, "grad_norm": 4.046107292175293, "learning_rate": 1.9958834440819692e-05, "loss": 1.5629, "step": 17885 }, { "epoch": 0.23, "grad_norm": 4.961225509643555, "learning_rate": 1.9958824915543667e-05, "loss": 2.3818, "step": 17886 }, { "epoch": 0.23, "grad_norm": 4.193946361541748, "learning_rate": 1.995881538916802e-05, "loss": 2.191, "step": 17887 }, { "epoch": 0.23, "grad_norm": 4.3685383796691895, "learning_rate": 1.9958805861692753e-05, "loss": 1.9117, "step": 17888 }, { "epoch": 0.23, "grad_norm": 4.070569038391113, "learning_rate": 1.995879633311786e-05, "loss": 1.9617, "step": 17889 }, { "epoch": 0.23, "grad_norm": 3.7370765209198, "learning_rate": 1.995878680344335e-05, "loss": 1.8485, "step": 17890 }, { "epoch": 0.23, "grad_norm": 3.794224500656128, "learning_rate": 1.995877727266922e-05, "loss": 1.6495, "step": 17891 }, { "epoch": 0.23, "grad_norm": 3.8435730934143066, "learning_rate": 1.9958767740795472e-05, "loss": 2.1414, "step": 17892 }, { "epoch": 0.23, "grad_norm": 4.075280666351318, "learning_rate": 1.9958758207822106e-05, "loss": 2.0794, "step": 17893 }, { "epoch": 0.23, "grad_norm": 4.079358100891113, "learning_rate": 1.9958748673749126e-05, "loss": 1.9959, "step": 17894 }, { "epoch": 0.23, "grad_norm": 4.260309219360352, "learning_rate": 1.9958739138576527e-05, "loss": 2.2534, "step": 17895 }, { "epoch": 0.23, "grad_norm": 4.269319534301758, "learning_rate": 1.995872960230432e-05, "loss": 2.1816, "step": 17896 }, { "epoch": 0.23, "grad_norm": 4.132532119750977, "learning_rate": 1.9958720064932496e-05, "loss": 2.418, "step": 17897 }, { "epoch": 0.23, "grad_norm": 4.857382774353027, "learning_rate": 1.9958710526461063e-05, "loss": 2.4496, "step": 17898 }, { "epoch": 0.23, "grad_norm": 3.9783153533935547, "learning_rate": 1.995870098689002e-05, "loss": 2.084, "step": 17899 }, { "epoch": 0.23, "grad_norm": 4.185488224029541, "learning_rate": 1.9958691446219363e-05, "loss": 1.6645, "step": 17900 }, { "epoch": 0.23, "grad_norm": 4.1131672859191895, "learning_rate": 1.9958681904449103e-05, "loss": 1.8465, "step": 17901 }, { "epoch": 0.23, "grad_norm": 3.898099660873413, "learning_rate": 1.995867236157923e-05, "loss": 2.1322, "step": 17902 }, { "epoch": 0.23, "grad_norm": 4.126163959503174, "learning_rate": 1.9958662817609755e-05, "loss": 2.1334, "step": 17903 }, { "epoch": 0.23, "grad_norm": 4.135072708129883, "learning_rate": 1.9958653272540675e-05, "loss": 2.1582, "step": 17904 }, { "epoch": 0.23, "grad_norm": 4.406450271606445, "learning_rate": 1.995864372637199e-05, "loss": 2.4868, "step": 17905 }, { "epoch": 0.23, "grad_norm": 4.9778900146484375, "learning_rate": 1.99586341791037e-05, "loss": 2.6336, "step": 17906 }, { "epoch": 0.23, "grad_norm": 3.874767303466797, "learning_rate": 1.9958624630735808e-05, "loss": 1.895, "step": 17907 }, { "epoch": 0.23, "grad_norm": 4.183900833129883, "learning_rate": 1.9958615081268315e-05, "loss": 1.916, "step": 17908 }, { "epoch": 0.23, "grad_norm": 4.385565757751465, "learning_rate": 1.995860553070122e-05, "loss": 2.1952, "step": 17909 }, { "epoch": 0.23, "grad_norm": 4.4874491691589355, "learning_rate": 1.9958595979034533e-05, "loss": 2.5114, "step": 17910 }, { "epoch": 0.23, "grad_norm": 4.4502763748168945, "learning_rate": 1.9958586426268244e-05, "loss": 2.413, "step": 17911 }, { "epoch": 0.23, "grad_norm": 4.137231826782227, "learning_rate": 1.9958576872402356e-05, "loss": 1.9557, "step": 17912 }, { "epoch": 0.23, "grad_norm": 4.8869853019714355, "learning_rate": 1.9958567317436875e-05, "loss": 2.9286, "step": 17913 }, { "epoch": 0.23, "grad_norm": 4.030605316162109, "learning_rate": 1.99585577613718e-05, "loss": 1.9319, "step": 17914 }, { "epoch": 0.23, "grad_norm": 4.757425785064697, "learning_rate": 1.9958548204207128e-05, "loss": 2.4516, "step": 17915 }, { "epoch": 0.23, "grad_norm": 4.096313953399658, "learning_rate": 1.9958538645942863e-05, "loss": 2.3085, "step": 17916 }, { "epoch": 0.23, "grad_norm": 4.036101341247559, "learning_rate": 1.995852908657901e-05, "loss": 2.1342, "step": 17917 }, { "epoch": 0.23, "grad_norm": 4.061570644378662, "learning_rate": 1.9958519526115566e-05, "loss": 1.964, "step": 17918 }, { "epoch": 0.23, "grad_norm": 4.162447929382324, "learning_rate": 1.995850996455253e-05, "loss": 2.0252, "step": 17919 }, { "epoch": 0.23, "grad_norm": 4.172389984130859, "learning_rate": 1.9958500401889905e-05, "loss": 2.3112, "step": 17920 }, { "epoch": 0.23, "grad_norm": 4.695836067199707, "learning_rate": 1.9958490838127695e-05, "loss": 2.746, "step": 17921 }, { "epoch": 0.23, "grad_norm": 4.213352203369141, "learning_rate": 1.99584812732659e-05, "loss": 2.3797, "step": 17922 }, { "epoch": 0.23, "grad_norm": 5.0049333572387695, "learning_rate": 1.9958471707304518e-05, "loss": 2.5285, "step": 17923 }, { "epoch": 0.23, "grad_norm": 4.4535088539123535, "learning_rate": 1.995846214024355e-05, "loss": 2.2509, "step": 17924 }, { "epoch": 0.23, "grad_norm": 4.178743362426758, "learning_rate": 1.9958452572083e-05, "loss": 1.8104, "step": 17925 }, { "epoch": 0.23, "grad_norm": 4.4767165184021, "learning_rate": 1.9958443002822866e-05, "loss": 2.3513, "step": 17926 }, { "epoch": 0.23, "grad_norm": 4.6582770347595215, "learning_rate": 1.9958433432463153e-05, "loss": 2.5393, "step": 17927 }, { "epoch": 0.23, "grad_norm": 4.407929420471191, "learning_rate": 1.9958423861003863e-05, "loss": 2.5239, "step": 17928 }, { "epoch": 0.23, "grad_norm": 4.439140319824219, "learning_rate": 1.995841428844499e-05, "loss": 2.0589, "step": 17929 }, { "epoch": 0.23, "grad_norm": 3.956794261932373, "learning_rate": 1.995840471478654e-05, "loss": 2.2387, "step": 17930 }, { "epoch": 0.23, "grad_norm": 3.960308313369751, "learning_rate": 1.9958395140028514e-05, "loss": 2.4216, "step": 17931 }, { "epoch": 0.23, "grad_norm": 3.5695412158966064, "learning_rate": 1.9958385564170913e-05, "loss": 1.9103, "step": 17932 }, { "epoch": 0.23, "grad_norm": 4.363765716552734, "learning_rate": 1.9958375987213732e-05, "loss": 2.4303, "step": 17933 }, { "epoch": 0.23, "grad_norm": 4.294456481933594, "learning_rate": 1.9958366409156984e-05, "loss": 2.4633, "step": 17934 }, { "epoch": 0.23, "grad_norm": 3.8959109783172607, "learning_rate": 1.995835683000066e-05, "loss": 2.0966, "step": 17935 }, { "epoch": 0.23, "grad_norm": 4.128552436828613, "learning_rate": 1.9958347249744764e-05, "loss": 1.8908, "step": 17936 }, { "epoch": 0.23, "grad_norm": 4.633918762207031, "learning_rate": 1.99583376683893e-05, "loss": 2.4586, "step": 17937 }, { "epoch": 0.23, "grad_norm": 4.229183673858643, "learning_rate": 1.9958328085934266e-05, "loss": 2.5382, "step": 17938 }, { "epoch": 0.23, "grad_norm": 4.613803386688232, "learning_rate": 1.9958318502379663e-05, "loss": 2.1689, "step": 17939 }, { "epoch": 0.23, "grad_norm": 4.104706287384033, "learning_rate": 1.995830891772549e-05, "loss": 2.1132, "step": 17940 }, { "epoch": 0.23, "grad_norm": 4.074917316436768, "learning_rate": 1.9958299331971757e-05, "loss": 1.652, "step": 17941 }, { "epoch": 0.23, "grad_norm": 4.334662437438965, "learning_rate": 1.9958289745118453e-05, "loss": 2.6539, "step": 17942 }, { "epoch": 0.23, "grad_norm": 4.773787021636963, "learning_rate": 1.9958280157165588e-05, "loss": 2.4464, "step": 17943 }, { "epoch": 0.23, "grad_norm": 4.088616371154785, "learning_rate": 1.995827056811316e-05, "loss": 2.076, "step": 17944 }, { "epoch": 0.23, "grad_norm": 4.088335037231445, "learning_rate": 1.9958260977961167e-05, "loss": 1.9699, "step": 17945 }, { "epoch": 0.23, "grad_norm": 4.307905673980713, "learning_rate": 1.9958251386709617e-05, "loss": 2.0969, "step": 17946 }, { "epoch": 0.23, "grad_norm": 4.410123825073242, "learning_rate": 1.9958241794358503e-05, "loss": 2.4514, "step": 17947 }, { "epoch": 0.23, "grad_norm": 3.5411858558654785, "learning_rate": 1.9958232200907832e-05, "loss": 1.7872, "step": 17948 }, { "epoch": 0.23, "grad_norm": 4.145425796508789, "learning_rate": 1.9958222606357604e-05, "loss": 2.09, "step": 17949 }, { "epoch": 0.23, "grad_norm": 4.00609827041626, "learning_rate": 1.9958213010707818e-05, "loss": 2.1019, "step": 17950 }, { "epoch": 0.23, "grad_norm": 4.145169734954834, "learning_rate": 1.9958203413958476e-05, "loss": 1.8532, "step": 17951 }, { "epoch": 0.23, "grad_norm": 5.047163963317871, "learning_rate": 1.9958193816109583e-05, "loss": 2.9222, "step": 17952 }, { "epoch": 0.23, "grad_norm": 4.583460330963135, "learning_rate": 1.9958184217161132e-05, "loss": 2.6057, "step": 17953 }, { "epoch": 0.23, "grad_norm": 4.213750839233398, "learning_rate": 1.9958174617113132e-05, "loss": 2.0814, "step": 17954 }, { "epoch": 0.23, "grad_norm": 3.956279993057251, "learning_rate": 1.9958165015965577e-05, "loss": 1.9991, "step": 17955 }, { "epoch": 0.23, "grad_norm": 4.564404487609863, "learning_rate": 1.9958155413718473e-05, "loss": 2.2559, "step": 17956 }, { "epoch": 0.23, "grad_norm": 3.95450758934021, "learning_rate": 1.9958145810371824e-05, "loss": 2.2436, "step": 17957 }, { "epoch": 0.23, "grad_norm": 4.281439304351807, "learning_rate": 1.995813620592562e-05, "loss": 2.3919, "step": 17958 }, { "epoch": 0.23, "grad_norm": 3.8330724239349365, "learning_rate": 1.9958126600379876e-05, "loss": 1.5604, "step": 17959 }, { "epoch": 0.23, "grad_norm": 3.9761016368865967, "learning_rate": 1.9958116993734576e-05, "loss": 1.9033, "step": 17960 }, { "epoch": 0.23, "grad_norm": 4.835717678070068, "learning_rate": 1.995810738598974e-05, "loss": 2.5549, "step": 17961 }, { "epoch": 0.23, "grad_norm": 4.668197154998779, "learning_rate": 1.9958097777145356e-05, "loss": 2.2724, "step": 17962 }, { "epoch": 0.23, "grad_norm": 3.712026834487915, "learning_rate": 1.995808816720143e-05, "loss": 1.8836, "step": 17963 }, { "epoch": 0.23, "grad_norm": 4.139925003051758, "learning_rate": 1.995807855615796e-05, "loss": 2.1679, "step": 17964 }, { "epoch": 0.23, "grad_norm": 4.228228569030762, "learning_rate": 1.995806894401495e-05, "loss": 1.9406, "step": 17965 }, { "epoch": 0.23, "grad_norm": 4.558824062347412, "learning_rate": 1.9958059330772403e-05, "loss": 2.7324, "step": 17966 }, { "epoch": 0.23, "grad_norm": 4.483501434326172, "learning_rate": 1.9958049716430318e-05, "loss": 2.3975, "step": 17967 }, { "epoch": 0.23, "grad_norm": 3.9681735038757324, "learning_rate": 1.9958040100988693e-05, "loss": 2.0419, "step": 17968 }, { "epoch": 0.23, "grad_norm": 4.2900567054748535, "learning_rate": 1.995803048444753e-05, "loss": 2.4522, "step": 17969 }, { "epoch": 0.23, "grad_norm": 4.674472808837891, "learning_rate": 1.9958020866806836e-05, "loss": 1.9858, "step": 17970 }, { "epoch": 0.23, "grad_norm": 4.481167316436768, "learning_rate": 1.9958011248066603e-05, "loss": 2.2307, "step": 17971 }, { "epoch": 0.23, "grad_norm": 4.519752025604248, "learning_rate": 1.9958001628226838e-05, "loss": 2.784, "step": 17972 }, { "epoch": 0.23, "grad_norm": 3.638517379760742, "learning_rate": 1.9957992007287542e-05, "loss": 1.6465, "step": 17973 }, { "epoch": 0.23, "grad_norm": 4.660680294036865, "learning_rate": 1.9957982385248713e-05, "loss": 2.2818, "step": 17974 }, { "epoch": 0.23, "grad_norm": 4.067091941833496, "learning_rate": 1.9957972762110353e-05, "loss": 2.0685, "step": 17975 }, { "epoch": 0.23, "grad_norm": 4.362613201141357, "learning_rate": 1.9957963137872464e-05, "loss": 2.4642, "step": 17976 }, { "epoch": 0.23, "grad_norm": 4.079712867736816, "learning_rate": 1.9957953512535048e-05, "loss": 2.1598, "step": 17977 }, { "epoch": 0.23, "grad_norm": 4.570691108703613, "learning_rate": 1.995794388609811e-05, "loss": 2.5083, "step": 17978 }, { "epoch": 0.23, "grad_norm": 5.081580638885498, "learning_rate": 1.995793425856164e-05, "loss": 2.132, "step": 17979 }, { "epoch": 0.23, "grad_norm": 4.338796138763428, "learning_rate": 1.9957924629925643e-05, "loss": 2.2533, "step": 17980 }, { "epoch": 0.23, "grad_norm": 4.063093185424805, "learning_rate": 1.9957915000190127e-05, "loss": 2.2299, "step": 17981 }, { "epoch": 0.23, "grad_norm": 4.203115463256836, "learning_rate": 1.9957905369355087e-05, "loss": 2.2369, "step": 17982 }, { "epoch": 0.23, "grad_norm": 3.9379706382751465, "learning_rate": 1.9957895737420525e-05, "loss": 2.1527, "step": 17983 }, { "epoch": 0.23, "grad_norm": 4.047821521759033, "learning_rate": 1.9957886104386442e-05, "loss": 1.8104, "step": 17984 }, { "epoch": 0.23, "grad_norm": 4.293508052825928, "learning_rate": 1.9957876470252843e-05, "loss": 1.748, "step": 17985 }, { "epoch": 0.23, "grad_norm": 4.177486896514893, "learning_rate": 1.995786683501972e-05, "loss": 1.9686, "step": 17986 }, { "epoch": 0.23, "grad_norm": 3.6562132835388184, "learning_rate": 1.9957857198687082e-05, "loss": 1.7198, "step": 17987 }, { "epoch": 0.23, "grad_norm": 4.627790927886963, "learning_rate": 1.9957847561254926e-05, "loss": 2.3412, "step": 17988 }, { "epoch": 0.23, "grad_norm": 4.592371940612793, "learning_rate": 1.9957837922723254e-05, "loss": 2.9282, "step": 17989 }, { "epoch": 0.23, "grad_norm": 4.461215496063232, "learning_rate": 1.9957828283092072e-05, "loss": 2.4704, "step": 17990 }, { "epoch": 0.23, "grad_norm": 4.725505352020264, "learning_rate": 1.9957818642361374e-05, "loss": 2.4951, "step": 17991 }, { "epoch": 0.23, "grad_norm": 3.779719114303589, "learning_rate": 1.9957809000531166e-05, "loss": 2.267, "step": 17992 }, { "epoch": 0.23, "grad_norm": 4.089411735534668, "learning_rate": 1.9957799357601445e-05, "loss": 1.991, "step": 17993 }, { "epoch": 0.23, "grad_norm": 4.138153076171875, "learning_rate": 1.9957789713572214e-05, "loss": 2.1003, "step": 17994 }, { "epoch": 0.23, "grad_norm": 4.118610382080078, "learning_rate": 1.9957780068443477e-05, "loss": 2.064, "step": 17995 }, { "epoch": 0.23, "grad_norm": 3.7463622093200684, "learning_rate": 1.995777042221523e-05, "loss": 1.9898, "step": 17996 }, { "epoch": 0.23, "grad_norm": 4.339897632598877, "learning_rate": 1.9957760774887476e-05, "loss": 2.0307, "step": 17997 }, { "epoch": 0.23, "grad_norm": 4.131803035736084, "learning_rate": 1.9957751126460216e-05, "loss": 2.1457, "step": 17998 }, { "epoch": 0.23, "grad_norm": 3.7541751861572266, "learning_rate": 1.9957741476933454e-05, "loss": 2.3723, "step": 17999 }, { "epoch": 0.23, "grad_norm": 3.978217124938965, "learning_rate": 1.9957731826307185e-05, "loss": 2.4078, "step": 18000 }, { "epoch": 0.23, "grad_norm": 3.8856210708618164, "learning_rate": 1.9957722174581412e-05, "loss": 1.9748, "step": 18001 }, { "epoch": 0.23, "grad_norm": 4.09514856338501, "learning_rate": 1.9957712521756144e-05, "loss": 2.4885, "step": 18002 }, { "epoch": 0.23, "grad_norm": 3.9072422981262207, "learning_rate": 1.995770286783137e-05, "loss": 1.911, "step": 18003 }, { "epoch": 0.23, "grad_norm": 4.294544696807861, "learning_rate": 1.99576932128071e-05, "loss": 2.0224, "step": 18004 }, { "epoch": 0.23, "grad_norm": 4.002171993255615, "learning_rate": 1.9957683556683328e-05, "loss": 2.1328, "step": 18005 }, { "epoch": 0.23, "grad_norm": 3.550431728363037, "learning_rate": 1.995767389946006e-05, "loss": 1.8754, "step": 18006 }, { "epoch": 0.23, "grad_norm": 4.25936222076416, "learning_rate": 1.99576642411373e-05, "loss": 2.144, "step": 18007 }, { "epoch": 0.23, "grad_norm": 4.671572208404541, "learning_rate": 1.995765458171504e-05, "loss": 2.3477, "step": 18008 }, { "epoch": 0.23, "grad_norm": 4.149235725402832, "learning_rate": 1.9957644921193288e-05, "loss": 2.1426, "step": 18009 }, { "epoch": 0.23, "grad_norm": 4.185347557067871, "learning_rate": 1.9957635259572043e-05, "loss": 2.1515, "step": 18010 }, { "epoch": 0.23, "grad_norm": 4.19073486328125, "learning_rate": 1.9957625596851306e-05, "loss": 2.2953, "step": 18011 }, { "epoch": 0.23, "grad_norm": 4.4034953117370605, "learning_rate": 1.9957615933031075e-05, "loss": 2.317, "step": 18012 }, { "epoch": 0.23, "grad_norm": 4.462993144989014, "learning_rate": 1.995760626811136e-05, "loss": 2.0066, "step": 18013 }, { "epoch": 0.23, "grad_norm": 4.0623860359191895, "learning_rate": 1.9957596602092153e-05, "loss": 2.1868, "step": 18014 }, { "epoch": 0.23, "grad_norm": 3.5546369552612305, "learning_rate": 1.9957586934973458e-05, "loss": 1.7209, "step": 18015 }, { "epoch": 0.23, "grad_norm": 4.878564357757568, "learning_rate": 1.995757726675528e-05, "loss": 2.6303, "step": 18016 }, { "epoch": 0.23, "grad_norm": 4.38139533996582, "learning_rate": 1.9957567597437616e-05, "loss": 2.2197, "step": 18017 }, { "epoch": 0.23, "grad_norm": 4.0369343757629395, "learning_rate": 1.9957557927020463e-05, "loss": 2.1528, "step": 18018 }, { "epoch": 0.23, "grad_norm": 3.932042121887207, "learning_rate": 1.995754825550383e-05, "loss": 1.7135, "step": 18019 }, { "epoch": 0.23, "grad_norm": 4.515132427215576, "learning_rate": 1.9957538582887715e-05, "loss": 2.1445, "step": 18020 }, { "epoch": 0.23, "grad_norm": 4.16336727142334, "learning_rate": 1.9957528909172118e-05, "loss": 2.1127, "step": 18021 }, { "epoch": 0.23, "grad_norm": 4.166482448577881, "learning_rate": 1.995751923435704e-05, "loss": 2.063, "step": 18022 }, { "epoch": 0.23, "grad_norm": 4.05803918838501, "learning_rate": 1.9957509558442484e-05, "loss": 2.4926, "step": 18023 }, { "epoch": 0.23, "grad_norm": 3.3497071266174316, "learning_rate": 1.9957499881428452e-05, "loss": 1.6114, "step": 18024 }, { "epoch": 0.23, "grad_norm": 5.166985511779785, "learning_rate": 1.9957490203314937e-05, "loss": 2.5362, "step": 18025 }, { "epoch": 0.23, "grad_norm": 4.086052417755127, "learning_rate": 1.995748052410195e-05, "loss": 1.8931, "step": 18026 }, { "epoch": 0.23, "grad_norm": 4.125395774841309, "learning_rate": 1.995747084378949e-05, "loss": 2.2251, "step": 18027 }, { "epoch": 0.23, "grad_norm": 4.358144283294678, "learning_rate": 1.9957461162377554e-05, "loss": 2.6431, "step": 18028 }, { "epoch": 0.23, "grad_norm": 4.234518051147461, "learning_rate": 1.9957451479866146e-05, "loss": 2.2941, "step": 18029 }, { "epoch": 0.23, "grad_norm": 4.190836429595947, "learning_rate": 1.9957441796255265e-05, "loss": 2.2576, "step": 18030 }, { "epoch": 0.23, "grad_norm": 3.5931286811828613, "learning_rate": 1.9957432111544916e-05, "loss": 1.8199, "step": 18031 }, { "epoch": 0.23, "grad_norm": 4.0606842041015625, "learning_rate": 1.9957422425735097e-05, "loss": 1.9235, "step": 18032 }, { "epoch": 0.23, "grad_norm": 3.589827299118042, "learning_rate": 1.995741273882581e-05, "loss": 1.7803, "step": 18033 }, { "epoch": 0.23, "grad_norm": 4.374697208404541, "learning_rate": 1.9957403050817055e-05, "loss": 2.0322, "step": 18034 }, { "epoch": 0.23, "grad_norm": 4.106228828430176, "learning_rate": 1.9957393361708832e-05, "loss": 2.0155, "step": 18035 }, { "epoch": 0.23, "grad_norm": 4.417142391204834, "learning_rate": 1.9957383671501144e-05, "loss": 2.6784, "step": 18036 }, { "epoch": 0.23, "grad_norm": 3.9375975131988525, "learning_rate": 1.9957373980193993e-05, "loss": 1.9175, "step": 18037 }, { "epoch": 0.23, "grad_norm": 4.077059268951416, "learning_rate": 1.995736428778738e-05, "loss": 2.0887, "step": 18038 }, { "epoch": 0.23, "grad_norm": 4.452512264251709, "learning_rate": 1.9957354594281305e-05, "loss": 2.55, "step": 18039 }, { "epoch": 0.23, "grad_norm": 4.159228324890137, "learning_rate": 1.9957344899675767e-05, "loss": 2.1889, "step": 18040 }, { "epoch": 0.23, "grad_norm": 4.270398139953613, "learning_rate": 1.995733520397077e-05, "loss": 2.236, "step": 18041 }, { "epoch": 0.23, "grad_norm": 4.707099914550781, "learning_rate": 1.9957325507166315e-05, "loss": 2.2676, "step": 18042 }, { "epoch": 0.23, "grad_norm": 4.169035911560059, "learning_rate": 1.9957315809262404e-05, "loss": 2.0322, "step": 18043 }, { "epoch": 0.23, "grad_norm": 4.707849979400635, "learning_rate": 1.9957306110259034e-05, "loss": 2.5455, "step": 18044 }, { "epoch": 0.23, "grad_norm": 3.802128553390503, "learning_rate": 1.995729641015621e-05, "loss": 1.9123, "step": 18045 }, { "epoch": 0.23, "grad_norm": 3.774919033050537, "learning_rate": 1.995728670895393e-05, "loss": 1.9107, "step": 18046 }, { "epoch": 0.23, "grad_norm": 4.933773040771484, "learning_rate": 1.9957277006652197e-05, "loss": 2.3183, "step": 18047 }, { "epoch": 0.23, "grad_norm": 4.077506065368652, "learning_rate": 1.9957267303251015e-05, "loss": 2.1081, "step": 18048 }, { "epoch": 0.23, "grad_norm": 4.084447860717773, "learning_rate": 1.9957257598750374e-05, "loss": 2.3816, "step": 18049 }, { "epoch": 0.23, "grad_norm": 3.6927573680877686, "learning_rate": 1.995724789315029e-05, "loss": 2.2416, "step": 18050 }, { "epoch": 0.23, "grad_norm": 4.184085369110107, "learning_rate": 1.9957238186450752e-05, "loss": 2.1953, "step": 18051 }, { "epoch": 0.23, "grad_norm": 4.3336663246154785, "learning_rate": 1.9957228478651772e-05, "loss": 2.3002, "step": 18052 }, { "epoch": 0.23, "grad_norm": 3.8554344177246094, "learning_rate": 1.995721876975334e-05, "loss": 1.9754, "step": 18053 }, { "epoch": 0.23, "grad_norm": 3.8268513679504395, "learning_rate": 1.995720905975546e-05, "loss": 2.2255, "step": 18054 }, { "epoch": 0.23, "grad_norm": 4.575255870819092, "learning_rate": 1.9957199348658144e-05, "loss": 2.2619, "step": 18055 }, { "epoch": 0.23, "grad_norm": 3.7823262214660645, "learning_rate": 1.995718963646138e-05, "loss": 1.9577, "step": 18056 }, { "epoch": 0.23, "grad_norm": 4.151242256164551, "learning_rate": 1.995717992316517e-05, "loss": 1.9348, "step": 18057 }, { "epoch": 0.23, "grad_norm": 4.166748046875, "learning_rate": 1.9957170208769522e-05, "loss": 2.0028, "step": 18058 }, { "epoch": 0.23, "grad_norm": 4.255900859832764, "learning_rate": 1.995716049327443e-05, "loss": 2.1446, "step": 18059 }, { "epoch": 0.23, "grad_norm": 4.687081336975098, "learning_rate": 1.99571507766799e-05, "loss": 2.0862, "step": 18060 }, { "epoch": 0.23, "grad_norm": 4.154877662658691, "learning_rate": 1.9957141058985932e-05, "loss": 2.1983, "step": 18061 }, { "epoch": 0.23, "grad_norm": 4.350256443023682, "learning_rate": 1.9957131340192532e-05, "loss": 2.1264, "step": 18062 }, { "epoch": 0.23, "grad_norm": 3.7204415798187256, "learning_rate": 1.995712162029969e-05, "loss": 2.1365, "step": 18063 }, { "epoch": 0.23, "grad_norm": 4.428857803344727, "learning_rate": 1.9957111899307412e-05, "loss": 2.2874, "step": 18064 }, { "epoch": 0.23, "grad_norm": 3.653571605682373, "learning_rate": 1.9957102177215703e-05, "loss": 2.0633, "step": 18065 }, { "epoch": 0.23, "grad_norm": 3.8094193935394287, "learning_rate": 1.995709245402456e-05, "loss": 2.0166, "step": 18066 }, { "epoch": 0.23, "grad_norm": 4.377095699310303, "learning_rate": 1.995708272973398e-05, "loss": 2.5048, "step": 18067 }, { "epoch": 0.23, "grad_norm": 3.7520904541015625, "learning_rate": 1.995707300434398e-05, "loss": 1.6827, "step": 18068 }, { "epoch": 0.23, "grad_norm": 4.041733264923096, "learning_rate": 1.9957063277854543e-05, "loss": 2.1058, "step": 18069 }, { "epoch": 0.23, "grad_norm": 4.060452938079834, "learning_rate": 1.9957053550265678e-05, "loss": 2.0567, "step": 18070 }, { "epoch": 0.23, "grad_norm": 4.396783828735352, "learning_rate": 1.9957043821577384e-05, "loss": 1.7227, "step": 18071 }, { "epoch": 0.23, "grad_norm": 3.8029987812042236, "learning_rate": 1.9957034091789666e-05, "loss": 2.0472, "step": 18072 }, { "epoch": 0.23, "grad_norm": 4.6470794677734375, "learning_rate": 1.9957024360902522e-05, "loss": 2.5665, "step": 18073 }, { "epoch": 0.23, "grad_norm": 4.016674518585205, "learning_rate": 1.9957014628915953e-05, "loss": 2.1732, "step": 18074 }, { "epoch": 0.23, "grad_norm": 4.070849895477295, "learning_rate": 1.995700489582996e-05, "loss": 1.8144, "step": 18075 }, { "epoch": 0.23, "grad_norm": 4.117918491363525, "learning_rate": 1.9956995161644544e-05, "loss": 2.0245, "step": 18076 }, { "epoch": 0.23, "grad_norm": 3.92887282371521, "learning_rate": 1.995698542635971e-05, "loss": 2.0978, "step": 18077 }, { "epoch": 0.23, "grad_norm": 4.459639549255371, "learning_rate": 1.9956975689975455e-05, "loss": 2.5029, "step": 18078 }, { "epoch": 0.23, "grad_norm": 4.190389633178711, "learning_rate": 1.995696595249178e-05, "loss": 2.4436, "step": 18079 }, { "epoch": 0.23, "grad_norm": 4.716184139251709, "learning_rate": 1.9956956213908686e-05, "loss": 2.5346, "step": 18080 }, { "epoch": 0.23, "grad_norm": 3.829042673110962, "learning_rate": 1.995694647422618e-05, "loss": 2.0472, "step": 18081 }, { "epoch": 0.23, "grad_norm": 4.209011554718018, "learning_rate": 1.9956936733444254e-05, "loss": 2.2446, "step": 18082 }, { "epoch": 0.23, "grad_norm": 4.709848880767822, "learning_rate": 1.9956926991562914e-05, "loss": 2.5176, "step": 18083 }, { "epoch": 0.23, "grad_norm": 4.54317569732666, "learning_rate": 1.9956917248582163e-05, "loss": 2.0229, "step": 18084 }, { "epoch": 0.23, "grad_norm": 4.806858062744141, "learning_rate": 1.9956907504501993e-05, "loss": 2.627, "step": 18085 }, { "epoch": 0.23, "grad_norm": 4.137813091278076, "learning_rate": 1.9956897759322415e-05, "loss": 2.1181, "step": 18086 }, { "epoch": 0.23, "grad_norm": 4.0006513595581055, "learning_rate": 1.9956888013043426e-05, "loss": 2.1642, "step": 18087 }, { "epoch": 0.23, "grad_norm": 3.8291919231414795, "learning_rate": 1.995687826566503e-05, "loss": 2.017, "step": 18088 }, { "epoch": 0.23, "grad_norm": 4.0760698318481445, "learning_rate": 1.9956868517187226e-05, "loss": 2.4099, "step": 18089 }, { "epoch": 0.23, "grad_norm": 4.010509967803955, "learning_rate": 1.9956858767610012e-05, "loss": 2.0157, "step": 18090 }, { "epoch": 0.23, "grad_norm": 3.9367148876190186, "learning_rate": 1.995684901693339e-05, "loss": 2.1196, "step": 18091 }, { "epoch": 0.23, "grad_norm": 4.201414108276367, "learning_rate": 1.9956839265157367e-05, "loss": 2.0508, "step": 18092 }, { "epoch": 0.23, "grad_norm": 3.839261293411255, "learning_rate": 1.995682951228194e-05, "loss": 2.1293, "step": 18093 }, { "epoch": 0.23, "grad_norm": 4.2684149742126465, "learning_rate": 1.995681975830711e-05, "loss": 1.9206, "step": 18094 }, { "epoch": 0.23, "grad_norm": 4.147326946258545, "learning_rate": 1.9956810003232877e-05, "loss": 2.2461, "step": 18095 }, { "epoch": 0.23, "grad_norm": 4.045230388641357, "learning_rate": 1.995680024705924e-05, "loss": 2.2566, "step": 18096 }, { "epoch": 0.23, "grad_norm": 4.236015796661377, "learning_rate": 1.9956790489786208e-05, "loss": 2.3546, "step": 18097 }, { "epoch": 0.23, "grad_norm": 3.9796762466430664, "learning_rate": 1.9956780731413777e-05, "loss": 2.0335, "step": 18098 }, { "epoch": 0.23, "grad_norm": 3.9310171604156494, "learning_rate": 1.9956770971941948e-05, "loss": 1.8067, "step": 18099 }, { "epoch": 0.23, "grad_norm": 3.606003761291504, "learning_rate": 1.995676121137072e-05, "loss": 1.8368, "step": 18100 }, { "epoch": 0.23, "grad_norm": 4.21793794631958, "learning_rate": 1.99567514497001e-05, "loss": 1.9543, "step": 18101 }, { "epoch": 0.23, "grad_norm": 4.1008501052856445, "learning_rate": 1.9956741686930083e-05, "loss": 1.8247, "step": 18102 }, { "epoch": 0.23, "grad_norm": 4.39069938659668, "learning_rate": 1.9956731923060676e-05, "loss": 2.2397, "step": 18103 }, { "epoch": 0.23, "grad_norm": 3.9377124309539795, "learning_rate": 1.9956722158091874e-05, "loss": 1.9585, "step": 18104 }, { "epoch": 0.23, "grad_norm": 4.539683818817139, "learning_rate": 1.995671239202368e-05, "loss": 2.4565, "step": 18105 }, { "epoch": 0.23, "grad_norm": 4.612430095672607, "learning_rate": 1.99567026248561e-05, "loss": 2.6171, "step": 18106 }, { "epoch": 0.23, "grad_norm": 4.304697513580322, "learning_rate": 1.9956692856589132e-05, "loss": 2.2835, "step": 18107 }, { "epoch": 0.24, "grad_norm": 4.924831867218018, "learning_rate": 1.9956683087222772e-05, "loss": 3.0124, "step": 18108 }, { "epoch": 0.24, "grad_norm": 4.167261600494385, "learning_rate": 1.9956673316757028e-05, "loss": 2.105, "step": 18109 }, { "epoch": 0.24, "grad_norm": 3.9571633338928223, "learning_rate": 1.9956663545191896e-05, "loss": 1.9258, "step": 18110 }, { "epoch": 0.24, "grad_norm": 4.152431488037109, "learning_rate": 1.995665377252738e-05, "loss": 2.3691, "step": 18111 }, { "epoch": 0.24, "grad_norm": 4.131272315979004, "learning_rate": 1.995664399876348e-05, "loss": 2.0468, "step": 18112 }, { "epoch": 0.24, "grad_norm": 4.062648773193359, "learning_rate": 1.99566342239002e-05, "loss": 2.0929, "step": 18113 }, { "epoch": 0.24, "grad_norm": 4.140014171600342, "learning_rate": 1.995662444793754e-05, "loss": 2.2369, "step": 18114 }, { "epoch": 0.24, "grad_norm": 4.315512657165527, "learning_rate": 1.9956614670875495e-05, "loss": 2.3533, "step": 18115 }, { "epoch": 0.24, "grad_norm": 4.137665748596191, "learning_rate": 1.995660489271407e-05, "loss": 2.1954, "step": 18116 }, { "epoch": 0.24, "grad_norm": 3.967681407928467, "learning_rate": 1.995659511345327e-05, "loss": 1.969, "step": 18117 }, { "epoch": 0.24, "grad_norm": 4.099052906036377, "learning_rate": 1.9956585333093095e-05, "loss": 1.8388, "step": 18118 }, { "epoch": 0.24, "grad_norm": 3.7062442302703857, "learning_rate": 1.995657555163354e-05, "loss": 1.63, "step": 18119 }, { "epoch": 0.24, "grad_norm": 4.185008525848389, "learning_rate": 1.9956565769074615e-05, "loss": 2.1512, "step": 18120 }, { "epoch": 0.24, "grad_norm": 4.42493200302124, "learning_rate": 1.9956555985416313e-05, "loss": 2.1102, "step": 18121 }, { "epoch": 0.24, "grad_norm": 4.1973795890808105, "learning_rate": 1.9956546200658636e-05, "loss": 2.2747, "step": 18122 }, { "epoch": 0.24, "grad_norm": 3.7926511764526367, "learning_rate": 1.9956536414801592e-05, "loss": 2.2306, "step": 18123 }, { "epoch": 0.24, "grad_norm": 4.462313175201416, "learning_rate": 1.9956526627845173e-05, "loss": 2.2748, "step": 18124 }, { "epoch": 0.24, "grad_norm": 4.144748210906982, "learning_rate": 1.9956516839789387e-05, "loss": 1.8381, "step": 18125 }, { "epoch": 0.24, "grad_norm": 3.9486544132232666, "learning_rate": 1.9956507050634234e-05, "loss": 2.2727, "step": 18126 }, { "epoch": 0.24, "grad_norm": 4.362306118011475, "learning_rate": 1.995649726037971e-05, "loss": 2.1351, "step": 18127 }, { "epoch": 0.24, "grad_norm": 4.135018825531006, "learning_rate": 1.995648746902582e-05, "loss": 2.7164, "step": 18128 }, { "epoch": 0.24, "grad_norm": 4.013012409210205, "learning_rate": 1.995647767657257e-05, "loss": 2.1582, "step": 18129 }, { "epoch": 0.24, "grad_norm": 4.526183128356934, "learning_rate": 1.9956467883019953e-05, "loss": 2.3168, "step": 18130 }, { "epoch": 0.24, "grad_norm": 4.107961177825928, "learning_rate": 1.9956458088367972e-05, "loss": 2.4949, "step": 18131 }, { "epoch": 0.24, "grad_norm": 4.228914737701416, "learning_rate": 1.995644829261663e-05, "loss": 2.0351, "step": 18132 }, { "epoch": 0.24, "grad_norm": 3.9374947547912598, "learning_rate": 1.9956438495765926e-05, "loss": 2.1941, "step": 18133 }, { "epoch": 0.24, "grad_norm": 3.813568353652954, "learning_rate": 1.9956428697815867e-05, "loss": 1.9481, "step": 18134 }, { "epoch": 0.24, "grad_norm": 3.9632320404052734, "learning_rate": 1.9956418898766447e-05, "loss": 2.1395, "step": 18135 }, { "epoch": 0.24, "grad_norm": 4.359327793121338, "learning_rate": 1.9956409098617663e-05, "loss": 2.27, "step": 18136 }, { "epoch": 0.24, "grad_norm": 4.520341396331787, "learning_rate": 1.9956399297369532e-05, "loss": 2.0951, "step": 18137 }, { "epoch": 0.24, "grad_norm": 3.7080202102661133, "learning_rate": 1.995638949502204e-05, "loss": 1.5536, "step": 18138 }, { "epoch": 0.24, "grad_norm": 4.344967365264893, "learning_rate": 1.9956379691575195e-05, "loss": 2.2298, "step": 18139 }, { "epoch": 0.24, "grad_norm": 4.085400581359863, "learning_rate": 1.9956369887028995e-05, "loss": 2.1958, "step": 18140 }, { "epoch": 0.24, "grad_norm": 4.467033863067627, "learning_rate": 1.9956360081383445e-05, "loss": 2.6667, "step": 18141 }, { "epoch": 0.24, "grad_norm": 3.4323954582214355, "learning_rate": 1.9956350274638545e-05, "loss": 1.6095, "step": 18142 }, { "epoch": 0.24, "grad_norm": 3.818930149078369, "learning_rate": 1.9956340466794294e-05, "loss": 1.7593, "step": 18143 }, { "epoch": 0.24, "grad_norm": 4.315155982971191, "learning_rate": 1.9956330657850693e-05, "loss": 2.7013, "step": 18144 }, { "epoch": 0.24, "grad_norm": 4.695371150970459, "learning_rate": 1.9956320847807744e-05, "loss": 2.801, "step": 18145 }, { "epoch": 0.24, "grad_norm": 3.7386045455932617, "learning_rate": 1.9956311036665448e-05, "loss": 1.5107, "step": 18146 }, { "epoch": 0.24, "grad_norm": 3.8055875301361084, "learning_rate": 1.995630122442381e-05, "loss": 2.2017, "step": 18147 }, { "epoch": 0.24, "grad_norm": 4.480145454406738, "learning_rate": 1.9956291411082822e-05, "loss": 2.3529, "step": 18148 }, { "epoch": 0.24, "grad_norm": 3.9888789653778076, "learning_rate": 1.9956281596642495e-05, "loss": 2.4641, "step": 18149 }, { "epoch": 0.24, "grad_norm": 3.7020998001098633, "learning_rate": 1.9956271781102825e-05, "loss": 1.7269, "step": 18150 }, { "epoch": 0.24, "grad_norm": 4.394322872161865, "learning_rate": 1.9956261964463814e-05, "loss": 2.6078, "step": 18151 }, { "epoch": 0.24, "grad_norm": 3.698240041732788, "learning_rate": 1.995625214672546e-05, "loss": 2.1972, "step": 18152 }, { "epoch": 0.24, "grad_norm": 3.8764092922210693, "learning_rate": 1.9956242327887767e-05, "loss": 2.4108, "step": 18153 }, { "epoch": 0.24, "grad_norm": 3.9332377910614014, "learning_rate": 1.995623250795074e-05, "loss": 1.997, "step": 18154 }, { "epoch": 0.24, "grad_norm": 3.658473014831543, "learning_rate": 1.995622268691437e-05, "loss": 1.8321, "step": 18155 }, { "epoch": 0.24, "grad_norm": 3.6484644412994385, "learning_rate": 1.9956212864778667e-05, "loss": 1.7856, "step": 18156 }, { "epoch": 0.24, "grad_norm": 3.7080800533294678, "learning_rate": 1.995620304154363e-05, "loss": 1.8948, "step": 18157 }, { "epoch": 0.24, "grad_norm": 4.058667182922363, "learning_rate": 1.995619321720926e-05, "loss": 2.2511, "step": 18158 }, { "epoch": 0.24, "grad_norm": 4.2320237159729, "learning_rate": 1.9956183391775554e-05, "loss": 2.3672, "step": 18159 }, { "epoch": 0.24, "grad_norm": 3.613452196121216, "learning_rate": 1.995617356524252e-05, "loss": 2.0779, "step": 18160 }, { "epoch": 0.24, "grad_norm": 4.896831035614014, "learning_rate": 1.9956163737610154e-05, "loss": 2.1154, "step": 18161 }, { "epoch": 0.24, "grad_norm": 4.518040657043457, "learning_rate": 1.995615390887846e-05, "loss": 3.0617, "step": 18162 }, { "epoch": 0.24, "grad_norm": 3.854050397872925, "learning_rate": 1.9956144079047436e-05, "loss": 2.0479, "step": 18163 }, { "epoch": 0.24, "grad_norm": 4.796370029449463, "learning_rate": 1.9956134248117085e-05, "loss": 2.2758, "step": 18164 }, { "epoch": 0.24, "grad_norm": 4.830611228942871, "learning_rate": 1.9956124416087408e-05, "loss": 2.3057, "step": 18165 }, { "epoch": 0.24, "grad_norm": 4.336064338684082, "learning_rate": 1.9956114582958407e-05, "loss": 2.1119, "step": 18166 }, { "epoch": 0.24, "grad_norm": 4.280087947845459, "learning_rate": 1.9956104748730082e-05, "loss": 2.2769, "step": 18167 }, { "epoch": 0.24, "grad_norm": 3.787907361984253, "learning_rate": 1.9956094913402435e-05, "loss": 1.9673, "step": 18168 }, { "epoch": 0.24, "grad_norm": 3.9997432231903076, "learning_rate": 1.9956085076975463e-05, "loss": 2.0861, "step": 18169 }, { "epoch": 0.24, "grad_norm": 4.509599208831787, "learning_rate": 1.9956075239449172e-05, "loss": 2.6559, "step": 18170 }, { "epoch": 0.24, "grad_norm": 3.4422969818115234, "learning_rate": 1.9956065400823558e-05, "loss": 1.8325, "step": 18171 }, { "epoch": 0.24, "grad_norm": 4.908946514129639, "learning_rate": 1.995605556109863e-05, "loss": 2.3219, "step": 18172 }, { "epoch": 0.24, "grad_norm": 3.98356556892395, "learning_rate": 1.9956045720274382e-05, "loss": 1.9114, "step": 18173 }, { "epoch": 0.24, "grad_norm": 4.108822345733643, "learning_rate": 1.995603587835082e-05, "loss": 2.1668, "step": 18174 }, { "epoch": 0.24, "grad_norm": 4.085148811340332, "learning_rate": 1.9956026035327943e-05, "loss": 2.3303, "step": 18175 }, { "epoch": 0.24, "grad_norm": 3.931703805923462, "learning_rate": 1.995601619120575e-05, "loss": 2.2459, "step": 18176 }, { "epoch": 0.24, "grad_norm": 4.017292022705078, "learning_rate": 1.995600634598424e-05, "loss": 1.9442, "step": 18177 }, { "epoch": 0.24, "grad_norm": 3.954346179962158, "learning_rate": 1.9955996499663427e-05, "loss": 2.0968, "step": 18178 }, { "epoch": 0.24, "grad_norm": 4.124944686889648, "learning_rate": 1.9955986652243296e-05, "loss": 2.4394, "step": 18179 }, { "epoch": 0.24, "grad_norm": 4.180533409118652, "learning_rate": 1.995597680372386e-05, "loss": 2.3991, "step": 18180 }, { "epoch": 0.24, "grad_norm": 4.1196088790893555, "learning_rate": 1.9955966954105112e-05, "loss": 2.2658, "step": 18181 }, { "epoch": 0.24, "grad_norm": 4.1664838790893555, "learning_rate": 1.9955957103387055e-05, "loss": 2.1643, "step": 18182 }, { "epoch": 0.24, "grad_norm": 4.428995132446289, "learning_rate": 1.9955947251569695e-05, "loss": 2.1535, "step": 18183 }, { "epoch": 0.24, "grad_norm": 3.4307169914245605, "learning_rate": 1.995593739865303e-05, "loss": 1.5715, "step": 18184 }, { "epoch": 0.24, "grad_norm": 3.960441827774048, "learning_rate": 1.9955927544637056e-05, "loss": 2.2013, "step": 18185 }, { "epoch": 0.24, "grad_norm": 3.715242862701416, "learning_rate": 1.9955917689521783e-05, "loss": 2.2507, "step": 18186 }, { "epoch": 0.24, "grad_norm": 3.900526285171509, "learning_rate": 1.9955907833307204e-05, "loss": 2.4402, "step": 18187 }, { "epoch": 0.24, "grad_norm": 3.677835464477539, "learning_rate": 1.995589797599333e-05, "loss": 2.0915, "step": 18188 }, { "epoch": 0.24, "grad_norm": 4.946736812591553, "learning_rate": 1.995588811758015e-05, "loss": 2.8965, "step": 18189 }, { "epoch": 0.24, "grad_norm": 3.6155881881713867, "learning_rate": 1.9955878258067672e-05, "loss": 1.8625, "step": 18190 }, { "epoch": 0.24, "grad_norm": 4.155622482299805, "learning_rate": 1.9955868397455898e-05, "loss": 2.2532, "step": 18191 }, { "epoch": 0.24, "grad_norm": 3.7164721488952637, "learning_rate": 1.9955858535744824e-05, "loss": 1.7065, "step": 18192 }, { "epoch": 0.24, "grad_norm": 4.7372870445251465, "learning_rate": 1.995584867293446e-05, "loss": 2.5221, "step": 18193 }, { "epoch": 0.24, "grad_norm": 5.044318199157715, "learning_rate": 1.99558388090248e-05, "loss": 2.4515, "step": 18194 }, { "epoch": 0.24, "grad_norm": 3.8740200996398926, "learning_rate": 1.9955828944015844e-05, "loss": 1.7613, "step": 18195 }, { "epoch": 0.24, "grad_norm": 4.300245761871338, "learning_rate": 1.9955819077907595e-05, "loss": 1.9909, "step": 18196 }, { "epoch": 0.24, "grad_norm": 4.048354148864746, "learning_rate": 1.9955809210700056e-05, "loss": 2.2943, "step": 18197 }, { "epoch": 0.24, "grad_norm": 4.157490253448486, "learning_rate": 1.9955799342393228e-05, "loss": 1.9425, "step": 18198 }, { "epoch": 0.24, "grad_norm": 3.983949899673462, "learning_rate": 1.9955789472987107e-05, "loss": 2.3434, "step": 18199 }, { "epoch": 0.24, "grad_norm": 3.633387565612793, "learning_rate": 1.99557796024817e-05, "loss": 1.9426, "step": 18200 }, { "epoch": 0.24, "grad_norm": 4.124646186828613, "learning_rate": 1.9955769730877007e-05, "loss": 2.3681, "step": 18201 }, { "epoch": 0.24, "grad_norm": 4.274794578552246, "learning_rate": 1.9955759858173027e-05, "loss": 2.69, "step": 18202 }, { "epoch": 0.24, "grad_norm": 4.395493984222412, "learning_rate": 1.9955749984369765e-05, "loss": 2.051, "step": 18203 }, { "epoch": 0.24, "grad_norm": 4.454409599304199, "learning_rate": 1.9955740109467213e-05, "loss": 2.357, "step": 18204 }, { "epoch": 0.24, "grad_norm": 4.549829006195068, "learning_rate": 1.9955730233465385e-05, "loss": 2.5621, "step": 18205 }, { "epoch": 0.24, "grad_norm": 4.221926689147949, "learning_rate": 1.9955720356364275e-05, "loss": 1.9817, "step": 18206 }, { "epoch": 0.24, "grad_norm": 4.410362243652344, "learning_rate": 1.9955710478163882e-05, "loss": 1.9295, "step": 18207 }, { "epoch": 0.24, "grad_norm": 4.376473426818848, "learning_rate": 1.9955700598864206e-05, "loss": 2.0832, "step": 18208 }, { "epoch": 0.24, "grad_norm": 4.1421027183532715, "learning_rate": 1.995569071846526e-05, "loss": 2.1534, "step": 18209 }, { "epoch": 0.24, "grad_norm": 4.0938944816589355, "learning_rate": 1.995568083696703e-05, "loss": 1.9829, "step": 18210 }, { "epoch": 0.24, "grad_norm": 4.031245231628418, "learning_rate": 1.995567095436953e-05, "loss": 2.1489, "step": 18211 }, { "epoch": 0.24, "grad_norm": 4.121889114379883, "learning_rate": 1.995566107067275e-05, "loss": 2.2194, "step": 18212 }, { "epoch": 0.24, "grad_norm": 4.6246466636657715, "learning_rate": 1.9955651185876697e-05, "loss": 2.3116, "step": 18213 }, { "epoch": 0.24, "grad_norm": 4.988661289215088, "learning_rate": 1.995564129998137e-05, "loss": 2.4788, "step": 18214 }, { "epoch": 0.24, "grad_norm": 4.418257236480713, "learning_rate": 1.9955631412986777e-05, "loss": 2.4463, "step": 18215 }, { "epoch": 0.24, "grad_norm": 4.566495418548584, "learning_rate": 1.9955621524892907e-05, "loss": 2.3492, "step": 18216 }, { "epoch": 0.24, "grad_norm": 3.9436774253845215, "learning_rate": 1.995561163569977e-05, "loss": 2.2989, "step": 18217 }, { "epoch": 0.24, "grad_norm": 4.117892265319824, "learning_rate": 1.9955601745407366e-05, "loss": 2.0089, "step": 18218 }, { "epoch": 0.24, "grad_norm": 4.032068729400635, "learning_rate": 1.9955591854015695e-05, "loss": 2.2047, "step": 18219 }, { "epoch": 0.24, "grad_norm": 3.888084650039673, "learning_rate": 1.9955581961524758e-05, "loss": 2.2282, "step": 18220 }, { "epoch": 0.24, "grad_norm": 3.7485015392303467, "learning_rate": 1.9955572067934552e-05, "loss": 2.0962, "step": 18221 }, { "epoch": 0.24, "grad_norm": 4.72690486907959, "learning_rate": 1.9955562173245084e-05, "loss": 2.5012, "step": 18222 }, { "epoch": 0.24, "grad_norm": 4.434104919433594, "learning_rate": 1.9955552277456353e-05, "loss": 2.4081, "step": 18223 }, { "epoch": 0.24, "grad_norm": 5.3912248611450195, "learning_rate": 1.995554238056836e-05, "loss": 2.0827, "step": 18224 }, { "epoch": 0.24, "grad_norm": 4.446160316467285, "learning_rate": 1.9955532482581107e-05, "loss": 1.9184, "step": 18225 }, { "epoch": 0.24, "grad_norm": 4.160194396972656, "learning_rate": 1.9955522583494593e-05, "loss": 2.2252, "step": 18226 }, { "epoch": 0.24, "grad_norm": 4.23595666885376, "learning_rate": 1.9955512683308822e-05, "loss": 1.9485, "step": 18227 }, { "epoch": 0.24, "grad_norm": 4.41909646987915, "learning_rate": 1.9955502782023793e-05, "loss": 2.0633, "step": 18228 }, { "epoch": 0.24, "grad_norm": 4.084144592285156, "learning_rate": 1.9955492879639508e-05, "loss": 2.1817, "step": 18229 }, { "epoch": 0.24, "grad_norm": 3.859602451324463, "learning_rate": 1.9955482976155967e-05, "loss": 1.9769, "step": 18230 }, { "epoch": 0.24, "grad_norm": 3.5861289501190186, "learning_rate": 1.995547307157317e-05, "loss": 1.8834, "step": 18231 }, { "epoch": 0.24, "grad_norm": 4.354996204376221, "learning_rate": 1.995546316589112e-05, "loss": 2.1166, "step": 18232 }, { "epoch": 0.24, "grad_norm": 4.148810386657715, "learning_rate": 1.9955453259109822e-05, "loss": 1.7863, "step": 18233 }, { "epoch": 0.24, "grad_norm": 4.2554731369018555, "learning_rate": 1.9955443351229272e-05, "loss": 2.7301, "step": 18234 }, { "epoch": 0.24, "grad_norm": 3.656419038772583, "learning_rate": 1.995543344224947e-05, "loss": 1.9674, "step": 18235 }, { "epoch": 0.24, "grad_norm": 4.01785945892334, "learning_rate": 1.995542353217042e-05, "loss": 1.9478, "step": 18236 }, { "epoch": 0.24, "grad_norm": 4.157394886016846, "learning_rate": 1.995541362099212e-05, "loss": 2.3393, "step": 18237 }, { "epoch": 0.24, "grad_norm": 3.844024181365967, "learning_rate": 1.9955403708714578e-05, "loss": 1.9608, "step": 18238 }, { "epoch": 0.24, "grad_norm": 4.1186323165893555, "learning_rate": 1.9955393795337787e-05, "loss": 1.9055, "step": 18239 }, { "epoch": 0.24, "grad_norm": 4.177091598510742, "learning_rate": 1.9955383880861755e-05, "loss": 2.5697, "step": 18240 }, { "epoch": 0.24, "grad_norm": 4.437134742736816, "learning_rate": 1.9955373965286477e-05, "loss": 2.1107, "step": 18241 }, { "epoch": 0.24, "grad_norm": 3.7706139087677, "learning_rate": 1.995536404861196e-05, "loss": 2.0945, "step": 18242 }, { "epoch": 0.24, "grad_norm": 4.257420063018799, "learning_rate": 1.9955354130838197e-05, "loss": 1.9426, "step": 18243 }, { "epoch": 0.24, "grad_norm": 3.748061180114746, "learning_rate": 1.9955344211965196e-05, "loss": 1.7962, "step": 18244 }, { "epoch": 0.24, "grad_norm": 3.4199438095092773, "learning_rate": 1.9955334291992957e-05, "loss": 1.3479, "step": 18245 }, { "epoch": 0.24, "grad_norm": 3.5896050930023193, "learning_rate": 1.9955324370921478e-05, "loss": 1.836, "step": 18246 }, { "epoch": 0.24, "grad_norm": 4.5308661460876465, "learning_rate": 1.9955314448750768e-05, "loss": 2.0824, "step": 18247 }, { "epoch": 0.24, "grad_norm": 3.66929030418396, "learning_rate": 1.9955304525480815e-05, "loss": 1.6789, "step": 18248 }, { "epoch": 0.24, "grad_norm": 3.914888858795166, "learning_rate": 1.995529460111163e-05, "loss": 1.9985, "step": 18249 }, { "epoch": 0.24, "grad_norm": 4.2528300285339355, "learning_rate": 1.9955284675643215e-05, "loss": 2.0338, "step": 18250 }, { "epoch": 0.24, "grad_norm": 4.285284996032715, "learning_rate": 1.9955274749075563e-05, "loss": 2.2848, "step": 18251 }, { "epoch": 0.24, "grad_norm": 4.5614519119262695, "learning_rate": 1.9955264821408683e-05, "loss": 2.6865, "step": 18252 }, { "epoch": 0.24, "grad_norm": 4.006863117218018, "learning_rate": 1.995525489264257e-05, "loss": 2.1189, "step": 18253 }, { "epoch": 0.24, "grad_norm": 4.337019443511963, "learning_rate": 1.995524496277723e-05, "loss": 1.9903, "step": 18254 }, { "epoch": 0.24, "grad_norm": 3.820835828781128, "learning_rate": 1.9955235031812664e-05, "loss": 1.5039, "step": 18255 }, { "epoch": 0.24, "grad_norm": 4.063208103179932, "learning_rate": 1.9955225099748867e-05, "loss": 2.2869, "step": 18256 }, { "epoch": 0.24, "grad_norm": 4.254223346710205, "learning_rate": 1.9955215166585847e-05, "loss": 2.1648, "step": 18257 }, { "epoch": 0.24, "grad_norm": 4.46958065032959, "learning_rate": 1.9955205232323602e-05, "loss": 2.495, "step": 18258 }, { "epoch": 0.24, "grad_norm": 4.592388153076172, "learning_rate": 1.9955195296962133e-05, "loss": 2.7415, "step": 18259 }, { "epoch": 0.24, "grad_norm": 3.452211618423462, "learning_rate": 1.995518536050144e-05, "loss": 1.7363, "step": 18260 }, { "epoch": 0.24, "grad_norm": 4.0791120529174805, "learning_rate": 1.9955175422941528e-05, "loss": 1.851, "step": 18261 }, { "epoch": 0.24, "grad_norm": 3.9614920616149902, "learning_rate": 1.9955165484282396e-05, "loss": 1.9633, "step": 18262 }, { "epoch": 0.24, "grad_norm": 4.03933048248291, "learning_rate": 1.9955155544524043e-05, "loss": 2.1548, "step": 18263 }, { "epoch": 0.24, "grad_norm": 4.572312355041504, "learning_rate": 1.9955145603666474e-05, "loss": 1.935, "step": 18264 }, { "epoch": 0.24, "grad_norm": 4.791810035705566, "learning_rate": 1.9955135661709688e-05, "loss": 2.4075, "step": 18265 }, { "epoch": 0.24, "grad_norm": 4.28015661239624, "learning_rate": 1.9955125718653683e-05, "loss": 2.0352, "step": 18266 }, { "epoch": 0.24, "grad_norm": 4.347559928894043, "learning_rate": 1.9955115774498466e-05, "loss": 2.6876, "step": 18267 }, { "epoch": 0.24, "grad_norm": 3.623124599456787, "learning_rate": 1.9955105829244034e-05, "loss": 1.9004, "step": 18268 }, { "epoch": 0.24, "grad_norm": 4.061944007873535, "learning_rate": 1.995509588289039e-05, "loss": 1.8242, "step": 18269 }, { "epoch": 0.24, "grad_norm": 3.7608373165130615, "learning_rate": 1.9955085935437536e-05, "loss": 1.5708, "step": 18270 }, { "epoch": 0.24, "grad_norm": 4.2574462890625, "learning_rate": 1.995507598688547e-05, "loss": 2.3663, "step": 18271 }, { "epoch": 0.24, "grad_norm": 4.306049346923828, "learning_rate": 1.9955066037234195e-05, "loss": 2.6951, "step": 18272 }, { "epoch": 0.24, "grad_norm": 4.497859477996826, "learning_rate": 1.995505608648371e-05, "loss": 2.3089, "step": 18273 }, { "epoch": 0.24, "grad_norm": 4.469075679779053, "learning_rate": 1.995504613463402e-05, "loss": 2.4866, "step": 18274 }, { "epoch": 0.24, "grad_norm": 4.810868263244629, "learning_rate": 1.9955036181685125e-05, "loss": 2.7904, "step": 18275 }, { "epoch": 0.24, "grad_norm": 4.319666385650635, "learning_rate": 1.9955026227637024e-05, "loss": 2.1127, "step": 18276 }, { "epoch": 0.24, "grad_norm": 5.578829288482666, "learning_rate": 1.9955016272489715e-05, "loss": 2.5759, "step": 18277 }, { "epoch": 0.24, "grad_norm": 4.5283966064453125, "learning_rate": 1.9955006316243208e-05, "loss": 2.0944, "step": 18278 }, { "epoch": 0.24, "grad_norm": 4.841416835784912, "learning_rate": 1.99549963588975e-05, "loss": 2.7215, "step": 18279 }, { "epoch": 0.24, "grad_norm": 3.3810606002807617, "learning_rate": 1.9954986400452593e-05, "loss": 1.6027, "step": 18280 }, { "epoch": 0.24, "grad_norm": 4.107321739196777, "learning_rate": 1.995497644090848e-05, "loss": 2.2064, "step": 18281 }, { "epoch": 0.24, "grad_norm": 4.643843173980713, "learning_rate": 1.9954966480265172e-05, "loss": 2.1628, "step": 18282 }, { "epoch": 0.24, "grad_norm": 4.817986488342285, "learning_rate": 1.995495651852267e-05, "loss": 2.4476, "step": 18283 }, { "epoch": 0.24, "grad_norm": 4.657252788543701, "learning_rate": 1.9954946555680966e-05, "loss": 2.3643, "step": 18284 }, { "epoch": 0.24, "grad_norm": 4.621373653411865, "learning_rate": 1.9954936591740073e-05, "loss": 1.9993, "step": 18285 }, { "epoch": 0.24, "grad_norm": 4.008300304412842, "learning_rate": 1.995492662669998e-05, "loss": 2.1462, "step": 18286 }, { "epoch": 0.24, "grad_norm": 4.359842300415039, "learning_rate": 1.99549166605607e-05, "loss": 2.3467, "step": 18287 }, { "epoch": 0.24, "grad_norm": 4.3528828620910645, "learning_rate": 1.9954906693322225e-05, "loss": 2.0768, "step": 18288 }, { "epoch": 0.24, "grad_norm": 3.786412477493286, "learning_rate": 1.995489672498456e-05, "loss": 2.0723, "step": 18289 }, { "epoch": 0.24, "grad_norm": 4.253054141998291, "learning_rate": 1.9954886755547706e-05, "loss": 1.9926, "step": 18290 }, { "epoch": 0.24, "grad_norm": 4.495238780975342, "learning_rate": 1.9954876785011662e-05, "loss": 2.0345, "step": 18291 }, { "epoch": 0.24, "grad_norm": 4.72563362121582, "learning_rate": 1.9954866813376433e-05, "loss": 2.3512, "step": 18292 }, { "epoch": 0.24, "grad_norm": 4.228023052215576, "learning_rate": 1.995485684064202e-05, "loss": 2.4207, "step": 18293 }, { "epoch": 0.24, "grad_norm": 4.1253228187561035, "learning_rate": 1.995484686680842e-05, "loss": 1.9034, "step": 18294 }, { "epoch": 0.24, "grad_norm": 3.264155387878418, "learning_rate": 1.9954836891875633e-05, "loss": 1.4508, "step": 18295 }, { "epoch": 0.24, "grad_norm": 4.174185752868652, "learning_rate": 1.9954826915843663e-05, "loss": 2.2392, "step": 18296 }, { "epoch": 0.24, "grad_norm": 3.5566775798797607, "learning_rate": 1.9954816938712515e-05, "loss": 1.9269, "step": 18297 }, { "epoch": 0.24, "grad_norm": 4.387127876281738, "learning_rate": 1.9954806960482183e-05, "loss": 2.3238, "step": 18298 }, { "epoch": 0.24, "grad_norm": 4.448996067047119, "learning_rate": 1.9954796981152672e-05, "loss": 1.9539, "step": 18299 }, { "epoch": 0.24, "grad_norm": 4.127622604370117, "learning_rate": 1.9954787000723985e-05, "loss": 2.2273, "step": 18300 }, { "epoch": 0.24, "grad_norm": 3.475599527359009, "learning_rate": 1.995477701919612e-05, "loss": 1.6528, "step": 18301 }, { "epoch": 0.24, "grad_norm": 3.783026695251465, "learning_rate": 1.9954767036569076e-05, "loss": 1.9149, "step": 18302 }, { "epoch": 0.24, "grad_norm": 4.031226634979248, "learning_rate": 1.995475705284286e-05, "loss": 2.0694, "step": 18303 }, { "epoch": 0.24, "grad_norm": 4.524837970733643, "learning_rate": 1.995474706801747e-05, "loss": 2.4307, "step": 18304 }, { "epoch": 0.24, "grad_norm": 4.2032976150512695, "learning_rate": 1.9954737082092904e-05, "loss": 2.2116, "step": 18305 }, { "epoch": 0.24, "grad_norm": 3.8907055854797363, "learning_rate": 1.995472709506917e-05, "loss": 2.1251, "step": 18306 }, { "epoch": 0.24, "grad_norm": 4.243636131286621, "learning_rate": 1.995471710694626e-05, "loss": 2.4504, "step": 18307 }, { "epoch": 0.24, "grad_norm": 4.011246681213379, "learning_rate": 1.9954707117724183e-05, "loss": 2.3138, "step": 18308 }, { "epoch": 0.24, "grad_norm": 4.431501865386963, "learning_rate": 1.9954697127402936e-05, "loss": 2.3121, "step": 18309 }, { "epoch": 0.24, "grad_norm": 4.285568714141846, "learning_rate": 1.9954687135982525e-05, "loss": 2.1416, "step": 18310 }, { "epoch": 0.24, "grad_norm": 4.104674816131592, "learning_rate": 1.9954677143462947e-05, "loss": 2.2262, "step": 18311 }, { "epoch": 0.24, "grad_norm": 4.808638095855713, "learning_rate": 1.99546671498442e-05, "loss": 2.3566, "step": 18312 }, { "epoch": 0.24, "grad_norm": 4.6147613525390625, "learning_rate": 1.9954657155126295e-05, "loss": 2.3893, "step": 18313 }, { "epoch": 0.24, "grad_norm": 4.2959442138671875, "learning_rate": 1.9954647159309224e-05, "loss": 2.0486, "step": 18314 }, { "epoch": 0.24, "grad_norm": 4.668445587158203, "learning_rate": 1.995463716239299e-05, "loss": 2.7811, "step": 18315 }, { "epoch": 0.24, "grad_norm": 4.1894307136535645, "learning_rate": 1.9954627164377593e-05, "loss": 2.1552, "step": 18316 }, { "epoch": 0.24, "grad_norm": 4.442202568054199, "learning_rate": 1.995461716526304e-05, "loss": 2.4121, "step": 18317 }, { "epoch": 0.24, "grad_norm": 3.7556703090667725, "learning_rate": 1.995460716504933e-05, "loss": 1.7334, "step": 18318 }, { "epoch": 0.24, "grad_norm": 3.783259391784668, "learning_rate": 1.9954597163736458e-05, "loss": 1.722, "step": 18319 }, { "epoch": 0.24, "grad_norm": 4.950934410095215, "learning_rate": 1.9954587161324433e-05, "loss": 2.116, "step": 18320 }, { "epoch": 0.24, "grad_norm": 3.7523460388183594, "learning_rate": 1.995457715781325e-05, "loss": 1.7139, "step": 18321 }, { "epoch": 0.24, "grad_norm": 4.540464878082275, "learning_rate": 1.9954567153202914e-05, "loss": 2.3496, "step": 18322 }, { "epoch": 0.24, "grad_norm": 3.5760130882263184, "learning_rate": 1.9954557147493426e-05, "loss": 1.8075, "step": 18323 }, { "epoch": 0.24, "grad_norm": 3.6983234882354736, "learning_rate": 1.9954547140684785e-05, "loss": 2.1134, "step": 18324 }, { "epoch": 0.24, "grad_norm": 4.329565525054932, "learning_rate": 1.9954537132776993e-05, "loss": 2.5202, "step": 18325 }, { "epoch": 0.24, "grad_norm": 4.2683844566345215, "learning_rate": 1.995452712377005e-05, "loss": 1.8213, "step": 18326 }, { "epoch": 0.24, "grad_norm": 3.823709726333618, "learning_rate": 1.995451711366396e-05, "loss": 2.2696, "step": 18327 }, { "epoch": 0.24, "grad_norm": 4.458833694458008, "learning_rate": 1.995450710245872e-05, "loss": 2.2945, "step": 18328 }, { "epoch": 0.24, "grad_norm": 4.029778957366943, "learning_rate": 1.995449709015434e-05, "loss": 1.7896, "step": 18329 }, { "epoch": 0.24, "grad_norm": 3.8681671619415283, "learning_rate": 1.995448707675081e-05, "loss": 1.9629, "step": 18330 }, { "epoch": 0.24, "grad_norm": 3.898789167404175, "learning_rate": 1.9954477062248133e-05, "loss": 1.9143, "step": 18331 }, { "epoch": 0.24, "grad_norm": 4.149134635925293, "learning_rate": 1.9954467046646318e-05, "loss": 2.3516, "step": 18332 }, { "epoch": 0.24, "grad_norm": 4.488105297088623, "learning_rate": 1.9954457029945358e-05, "loss": 2.3744, "step": 18333 }, { "epoch": 0.24, "grad_norm": 4.51379919052124, "learning_rate": 1.9954447012145258e-05, "loss": 2.0493, "step": 18334 }, { "epoch": 0.24, "grad_norm": 4.064085483551025, "learning_rate": 1.9954436993246018e-05, "loss": 1.9134, "step": 18335 }, { "epoch": 0.24, "grad_norm": 4.023717403411865, "learning_rate": 1.995442697324764e-05, "loss": 2.0336, "step": 18336 }, { "epoch": 0.24, "grad_norm": 4.437955379486084, "learning_rate": 1.9954416952150123e-05, "loss": 2.2722, "step": 18337 }, { "epoch": 0.24, "grad_norm": 4.385337829589844, "learning_rate": 1.9954406929953468e-05, "loss": 2.0813, "step": 18338 }, { "epoch": 0.24, "grad_norm": 3.7439281940460205, "learning_rate": 1.9954396906657682e-05, "loss": 1.9218, "step": 18339 }, { "epoch": 0.24, "grad_norm": 4.896524429321289, "learning_rate": 1.995438688226276e-05, "loss": 2.2663, "step": 18340 }, { "epoch": 0.24, "grad_norm": 3.6303112506866455, "learning_rate": 1.9954376856768704e-05, "loss": 1.9034, "step": 18341 }, { "epoch": 0.24, "grad_norm": 3.635920763015747, "learning_rate": 1.9954366830175518e-05, "loss": 1.6304, "step": 18342 }, { "epoch": 0.24, "grad_norm": 4.0485687255859375, "learning_rate": 1.9954356802483198e-05, "loss": 2.31, "step": 18343 }, { "epoch": 0.24, "grad_norm": 5.080014705657959, "learning_rate": 1.9954346773691748e-05, "loss": 2.0988, "step": 18344 }, { "epoch": 0.24, "grad_norm": 3.782050371170044, "learning_rate": 1.995433674380117e-05, "loss": 2.1136, "step": 18345 }, { "epoch": 0.24, "grad_norm": 3.6512553691864014, "learning_rate": 1.9954326712811464e-05, "loss": 1.5489, "step": 18346 }, { "epoch": 0.24, "grad_norm": 3.9862122535705566, "learning_rate": 1.9954316680722633e-05, "loss": 1.8892, "step": 18347 }, { "epoch": 0.24, "grad_norm": 4.30493688583374, "learning_rate": 1.9954306647534678e-05, "loss": 2.1033, "step": 18348 }, { "epoch": 0.24, "grad_norm": 3.3735034465789795, "learning_rate": 1.9954296613247597e-05, "loss": 1.4877, "step": 18349 }, { "epoch": 0.24, "grad_norm": 3.7342190742492676, "learning_rate": 1.995428657786139e-05, "loss": 2.0752, "step": 18350 }, { "epoch": 0.24, "grad_norm": 3.2227530479431152, "learning_rate": 1.9954276541376064e-05, "loss": 1.4616, "step": 18351 }, { "epoch": 0.24, "grad_norm": 3.7966315746307373, "learning_rate": 1.9954266503791616e-05, "loss": 1.8606, "step": 18352 }, { "epoch": 0.24, "grad_norm": 4.339945316314697, "learning_rate": 1.995425646510805e-05, "loss": 2.4098, "step": 18353 }, { "epoch": 0.24, "grad_norm": 4.298126697540283, "learning_rate": 1.9954246425325362e-05, "loss": 2.1932, "step": 18354 }, { "epoch": 0.24, "grad_norm": 4.306648254394531, "learning_rate": 1.995423638444356e-05, "loss": 1.9911, "step": 18355 }, { "epoch": 0.24, "grad_norm": 4.505475044250488, "learning_rate": 1.9954226342462638e-05, "loss": 2.1647, "step": 18356 }, { "epoch": 0.24, "grad_norm": 4.621774196624756, "learning_rate": 1.9954216299382602e-05, "loss": 2.103, "step": 18357 }, { "epoch": 0.24, "grad_norm": 5.7525248527526855, "learning_rate": 1.9954206255203454e-05, "loss": 2.7127, "step": 18358 }, { "epoch": 0.24, "grad_norm": 4.5680975914001465, "learning_rate": 1.9954196209925188e-05, "loss": 2.5166, "step": 18359 }, { "epoch": 0.24, "grad_norm": 4.678067684173584, "learning_rate": 1.995418616354781e-05, "loss": 2.7558, "step": 18360 }, { "epoch": 0.24, "grad_norm": 3.9412755966186523, "learning_rate": 1.9954176116071324e-05, "loss": 2.0874, "step": 18361 }, { "epoch": 0.24, "grad_norm": 4.088704586029053, "learning_rate": 1.995416606749573e-05, "loss": 2.3016, "step": 18362 }, { "epoch": 0.24, "grad_norm": 4.808866500854492, "learning_rate": 1.9954156017821023e-05, "loss": 2.2184, "step": 18363 }, { "epoch": 0.24, "grad_norm": 4.6097869873046875, "learning_rate": 1.9954145967047208e-05, "loss": 2.117, "step": 18364 }, { "epoch": 0.24, "grad_norm": 4.884203910827637, "learning_rate": 1.995413591517429e-05, "loss": 2.7436, "step": 18365 }, { "epoch": 0.24, "grad_norm": 4.205578804016113, "learning_rate": 1.9954125862202265e-05, "loss": 2.0982, "step": 18366 }, { "epoch": 0.24, "grad_norm": 4.257538318634033, "learning_rate": 1.9954115808131137e-05, "loss": 2.2457, "step": 18367 }, { "epoch": 0.24, "grad_norm": 4.915480613708496, "learning_rate": 1.9954105752960902e-05, "loss": 2.115, "step": 18368 }, { "epoch": 0.24, "grad_norm": 4.623272895812988, "learning_rate": 1.9954095696691567e-05, "loss": 2.2969, "step": 18369 }, { "epoch": 0.24, "grad_norm": 3.915458917617798, "learning_rate": 1.995408563932313e-05, "loss": 2.4748, "step": 18370 }, { "epoch": 0.24, "grad_norm": 4.275343418121338, "learning_rate": 1.9954075580855596e-05, "loss": 2.125, "step": 18371 }, { "epoch": 0.24, "grad_norm": 4.658552169799805, "learning_rate": 1.995406552128896e-05, "loss": 2.0715, "step": 18372 }, { "epoch": 0.24, "grad_norm": 4.765394687652588, "learning_rate": 1.9954055460623225e-05, "loss": 3.1169, "step": 18373 }, { "epoch": 0.24, "grad_norm": 4.681290626525879, "learning_rate": 1.9954045398858398e-05, "loss": 2.6583, "step": 18374 }, { "epoch": 0.24, "grad_norm": 4.2214837074279785, "learning_rate": 1.995403533599447e-05, "loss": 2.4161, "step": 18375 }, { "epoch": 0.24, "grad_norm": 4.035574913024902, "learning_rate": 1.9954025272031452e-05, "loss": 1.9118, "step": 18376 }, { "epoch": 0.24, "grad_norm": 3.714237689971924, "learning_rate": 1.9954015206969343e-05, "loss": 1.6667, "step": 18377 }, { "epoch": 0.24, "grad_norm": 3.9706501960754395, "learning_rate": 1.9954005140808138e-05, "loss": 2.309, "step": 18378 }, { "epoch": 0.24, "grad_norm": 4.348177433013916, "learning_rate": 1.995399507354784e-05, "loss": 2.6134, "step": 18379 }, { "epoch": 0.24, "grad_norm": 4.468237400054932, "learning_rate": 1.9953985005188454e-05, "loss": 2.0076, "step": 18380 }, { "epoch": 0.24, "grad_norm": 4.241028785705566, "learning_rate": 1.9953974935729983e-05, "loss": 1.9719, "step": 18381 }, { "epoch": 0.24, "grad_norm": 4.859642028808594, "learning_rate": 1.995396486517242e-05, "loss": 2.6931, "step": 18382 }, { "epoch": 0.24, "grad_norm": 4.642344951629639, "learning_rate": 1.995395479351577e-05, "loss": 1.9927, "step": 18383 }, { "epoch": 0.24, "grad_norm": 4.030661106109619, "learning_rate": 1.9953944720760036e-05, "loss": 2.1676, "step": 18384 }, { "epoch": 0.24, "grad_norm": 3.6746957302093506, "learning_rate": 1.9953934646905217e-05, "loss": 1.8148, "step": 18385 }, { "epoch": 0.24, "grad_norm": 3.6162073612213135, "learning_rate": 1.9953924571951315e-05, "loss": 1.8662, "step": 18386 }, { "epoch": 0.24, "grad_norm": 4.493967056274414, "learning_rate": 1.9953914495898333e-05, "loss": 2.0616, "step": 18387 }, { "epoch": 0.24, "grad_norm": 4.277963638305664, "learning_rate": 1.995390441874627e-05, "loss": 2.3151, "step": 18388 }, { "epoch": 0.24, "grad_norm": 4.283083915710449, "learning_rate": 1.9953894340495125e-05, "loss": 2.3408, "step": 18389 }, { "epoch": 0.24, "grad_norm": 4.61867094039917, "learning_rate": 1.9953884261144898e-05, "loss": 2.5974, "step": 18390 }, { "epoch": 0.24, "grad_norm": 3.8691508769989014, "learning_rate": 1.9953874180695598e-05, "loss": 1.9224, "step": 18391 }, { "epoch": 0.24, "grad_norm": 3.879298448562622, "learning_rate": 1.995386409914722e-05, "loss": 2.2832, "step": 18392 }, { "epoch": 0.24, "grad_norm": 3.6953420639038086, "learning_rate": 1.9953854016499768e-05, "loss": 1.9281, "step": 18393 }, { "epoch": 0.24, "grad_norm": 4.282097816467285, "learning_rate": 1.995384393275324e-05, "loss": 2.4469, "step": 18394 }, { "epoch": 0.24, "grad_norm": 4.220028400421143, "learning_rate": 1.9953833847907636e-05, "loss": 2.237, "step": 18395 }, { "epoch": 0.24, "grad_norm": 4.347559452056885, "learning_rate": 1.9953823761962963e-05, "loss": 2.205, "step": 18396 }, { "epoch": 0.24, "grad_norm": 4.802802085876465, "learning_rate": 1.995381367491922e-05, "loss": 2.1745, "step": 18397 }, { "epoch": 0.24, "grad_norm": 3.7975497245788574, "learning_rate": 1.9953803586776406e-05, "loss": 2.1226, "step": 18398 }, { "epoch": 0.24, "grad_norm": 4.029007911682129, "learning_rate": 1.9953793497534525e-05, "loss": 1.7289, "step": 18399 }, { "epoch": 0.24, "grad_norm": 4.011009216308594, "learning_rate": 1.9953783407193575e-05, "loss": 2.4546, "step": 18400 }, { "epoch": 0.24, "grad_norm": 4.3927083015441895, "learning_rate": 1.995377331575356e-05, "loss": 2.6588, "step": 18401 }, { "epoch": 0.24, "grad_norm": 3.8823843002319336, "learning_rate": 1.9953763223214473e-05, "loss": 2.4291, "step": 18402 }, { "epoch": 0.24, "grad_norm": 4.317251205444336, "learning_rate": 1.995375312957633e-05, "loss": 2.5015, "step": 18403 }, { "epoch": 0.24, "grad_norm": 4.493772029876709, "learning_rate": 1.995374303483912e-05, "loss": 2.7672, "step": 18404 }, { "epoch": 0.24, "grad_norm": 3.878021478652954, "learning_rate": 1.9953732939002845e-05, "loss": 2.4977, "step": 18405 }, { "epoch": 0.24, "grad_norm": 3.9550869464874268, "learning_rate": 1.9953722842067513e-05, "loss": 2.1151, "step": 18406 }, { "epoch": 0.24, "grad_norm": 3.9706387519836426, "learning_rate": 1.995371274403312e-05, "loss": 2.1283, "step": 18407 }, { "epoch": 0.24, "grad_norm": 3.868647813796997, "learning_rate": 1.995370264489967e-05, "loss": 2.0481, "step": 18408 }, { "epoch": 0.24, "grad_norm": 4.742164134979248, "learning_rate": 1.995369254466716e-05, "loss": 2.2926, "step": 18409 }, { "epoch": 0.24, "grad_norm": 4.272304534912109, "learning_rate": 1.9953682443335594e-05, "loss": 2.0574, "step": 18410 }, { "epoch": 0.24, "grad_norm": 4.701399803161621, "learning_rate": 1.9953672340904973e-05, "loss": 2.4069, "step": 18411 }, { "epoch": 0.24, "grad_norm": 3.8836288452148438, "learning_rate": 1.9953662237375296e-05, "loss": 2.1682, "step": 18412 }, { "epoch": 0.24, "grad_norm": 4.0609025955200195, "learning_rate": 1.9953652132746567e-05, "loss": 2.2552, "step": 18413 }, { "epoch": 0.24, "grad_norm": 4.246391773223877, "learning_rate": 1.9953642027018785e-05, "loss": 2.1837, "step": 18414 }, { "epoch": 0.24, "grad_norm": 3.7562787532806396, "learning_rate": 1.9953631920191955e-05, "loss": 1.6733, "step": 18415 }, { "epoch": 0.24, "grad_norm": 3.932884931564331, "learning_rate": 1.9953621812266072e-05, "loss": 1.8405, "step": 18416 }, { "epoch": 0.24, "grad_norm": 4.330657005310059, "learning_rate": 1.9953611703241144e-05, "loss": 1.9556, "step": 18417 }, { "epoch": 0.24, "grad_norm": 3.7947044372558594, "learning_rate": 1.9953601593117163e-05, "loss": 1.7087, "step": 18418 }, { "epoch": 0.24, "grad_norm": 3.9970755577087402, "learning_rate": 1.995359148189414e-05, "loss": 2.1337, "step": 18419 }, { "epoch": 0.24, "grad_norm": 4.5006937980651855, "learning_rate": 1.9953581369572072e-05, "loss": 1.962, "step": 18420 }, { "epoch": 0.24, "grad_norm": 4.187686920166016, "learning_rate": 1.9953571256150958e-05, "loss": 2.3878, "step": 18421 }, { "epoch": 0.24, "grad_norm": 4.075499057769775, "learning_rate": 1.99535611416308e-05, "loss": 1.8888, "step": 18422 }, { "epoch": 0.24, "grad_norm": 3.8964011669158936, "learning_rate": 1.99535510260116e-05, "loss": 2.4333, "step": 18423 }, { "epoch": 0.24, "grad_norm": 4.808071136474609, "learning_rate": 1.995354090929336e-05, "loss": 2.5541, "step": 18424 }, { "epoch": 0.24, "grad_norm": 4.176899433135986, "learning_rate": 1.995353079147608e-05, "loss": 2.0249, "step": 18425 }, { "epoch": 0.24, "grad_norm": 4.618113040924072, "learning_rate": 1.9953520672559762e-05, "loss": 2.2389, "step": 18426 }, { "epoch": 0.24, "grad_norm": 4.019519805908203, "learning_rate": 1.9953510552544405e-05, "loss": 1.8966, "step": 18427 }, { "epoch": 0.24, "grad_norm": 3.7674829959869385, "learning_rate": 1.9953500431430016e-05, "loss": 1.8982, "step": 18428 }, { "epoch": 0.24, "grad_norm": 4.042713165283203, "learning_rate": 1.9953490309216585e-05, "loss": 2.0231, "step": 18429 }, { "epoch": 0.24, "grad_norm": 3.911749839782715, "learning_rate": 1.9953480185904125e-05, "loss": 1.8511, "step": 18430 }, { "epoch": 0.24, "grad_norm": 4.208340167999268, "learning_rate": 1.995347006149263e-05, "loss": 2.4336, "step": 18431 }, { "epoch": 0.24, "grad_norm": 3.3763515949249268, "learning_rate": 1.99534599359821e-05, "loss": 1.9214, "step": 18432 }, { "epoch": 0.24, "grad_norm": 4.982323169708252, "learning_rate": 1.9953449809372546e-05, "loss": 2.9471, "step": 18433 }, { "epoch": 0.24, "grad_norm": 4.122245788574219, "learning_rate": 1.9953439681663958e-05, "loss": 2.1751, "step": 18434 }, { "epoch": 0.24, "grad_norm": 4.634612083435059, "learning_rate": 1.9953429552856342e-05, "loss": 2.4013, "step": 18435 }, { "epoch": 0.24, "grad_norm": 4.632099151611328, "learning_rate": 1.9953419422949696e-05, "loss": 2.4541, "step": 18436 }, { "epoch": 0.24, "grad_norm": 4.732856750488281, "learning_rate": 1.9953409291944026e-05, "loss": 2.0964, "step": 18437 }, { "epoch": 0.24, "grad_norm": 3.8282384872436523, "learning_rate": 1.9953399159839334e-05, "loss": 1.909, "step": 18438 }, { "epoch": 0.24, "grad_norm": 3.6896655559539795, "learning_rate": 1.9953389026635613e-05, "loss": 1.8665, "step": 18439 }, { "epoch": 0.24, "grad_norm": 4.5797119140625, "learning_rate": 1.995337889233287e-05, "loss": 2.3114, "step": 18440 }, { "epoch": 0.24, "grad_norm": 3.7966415882110596, "learning_rate": 1.9953368756931105e-05, "loss": 2.1447, "step": 18441 }, { "epoch": 0.24, "grad_norm": 3.4949004650115967, "learning_rate": 1.9953358620430322e-05, "loss": 1.689, "step": 18442 }, { "epoch": 0.24, "grad_norm": 3.7697858810424805, "learning_rate": 1.9953348482830517e-05, "loss": 1.8374, "step": 18443 }, { "epoch": 0.24, "grad_norm": 3.653258800506592, "learning_rate": 1.9953338344131694e-05, "loss": 1.8679, "step": 18444 }, { "epoch": 0.24, "grad_norm": 3.9636127948760986, "learning_rate": 1.995332820433385e-05, "loss": 2.0116, "step": 18445 }, { "epoch": 0.24, "grad_norm": 3.8085296154022217, "learning_rate": 1.9953318063436994e-05, "loss": 2.226, "step": 18446 }, { "epoch": 0.24, "grad_norm": 5.228693962097168, "learning_rate": 1.9953307921441122e-05, "loss": 2.4078, "step": 18447 }, { "epoch": 0.24, "grad_norm": 3.6349246501922607, "learning_rate": 1.9953297778346235e-05, "loss": 2.0676, "step": 18448 }, { "epoch": 0.24, "grad_norm": 4.715782642364502, "learning_rate": 1.9953287634152336e-05, "loss": 2.6531, "step": 18449 }, { "epoch": 0.24, "grad_norm": 4.190914630889893, "learning_rate": 1.9953277488859426e-05, "loss": 2.4254, "step": 18450 }, { "epoch": 0.24, "grad_norm": 4.0865092277526855, "learning_rate": 1.9953267342467507e-05, "loss": 2.2501, "step": 18451 }, { "epoch": 0.24, "grad_norm": 4.307384490966797, "learning_rate": 1.9953257194976573e-05, "loss": 2.2163, "step": 18452 }, { "epoch": 0.24, "grad_norm": 3.8242461681365967, "learning_rate": 1.9953247046386634e-05, "loss": 1.8118, "step": 18453 }, { "epoch": 0.24, "grad_norm": 4.6001877784729, "learning_rate": 1.9953236896697686e-05, "loss": 2.7036, "step": 18454 }, { "epoch": 0.24, "grad_norm": 4.293942451477051, "learning_rate": 1.9953226745909734e-05, "loss": 2.3376, "step": 18455 }, { "epoch": 0.24, "grad_norm": 3.6456055641174316, "learning_rate": 1.9953216594022774e-05, "loss": 1.9342, "step": 18456 }, { "epoch": 0.24, "grad_norm": 3.5961029529571533, "learning_rate": 1.9953206441036812e-05, "loss": 1.4616, "step": 18457 }, { "epoch": 0.24, "grad_norm": 3.8002538681030273, "learning_rate": 1.9953196286951848e-05, "loss": 2.1645, "step": 18458 }, { "epoch": 0.24, "grad_norm": 4.01981782913208, "learning_rate": 1.995318613176788e-05, "loss": 2.1116, "step": 18459 }, { "epoch": 0.24, "grad_norm": 4.250186443328857, "learning_rate": 1.9953175975484913e-05, "loss": 1.9075, "step": 18460 }, { "epoch": 0.24, "grad_norm": 3.952436923980713, "learning_rate": 1.9953165818102945e-05, "loss": 2.2035, "step": 18461 }, { "epoch": 0.24, "grad_norm": 4.281612873077393, "learning_rate": 1.9953155659621982e-05, "loss": 2.2075, "step": 18462 }, { "epoch": 0.24, "grad_norm": 3.919747829437256, "learning_rate": 1.9953145500042018e-05, "loss": 2.009, "step": 18463 }, { "epoch": 0.24, "grad_norm": 4.677478313446045, "learning_rate": 1.995313533936306e-05, "loss": 2.1235, "step": 18464 }, { "epoch": 0.24, "grad_norm": 4.4471540451049805, "learning_rate": 1.9953125177585108e-05, "loss": 2.7336, "step": 18465 }, { "epoch": 0.24, "grad_norm": 4.453718185424805, "learning_rate": 1.9953115014708156e-05, "loss": 2.2662, "step": 18466 }, { "epoch": 0.24, "grad_norm": 3.8108623027801514, "learning_rate": 1.995310485073222e-05, "loss": 1.9715, "step": 18467 }, { "epoch": 0.24, "grad_norm": 3.787583589553833, "learning_rate": 1.9953094685657288e-05, "loss": 1.9378, "step": 18468 }, { "epoch": 0.24, "grad_norm": 5.176345348358154, "learning_rate": 1.9953084519483362e-05, "loss": 2.0443, "step": 18469 }, { "epoch": 0.24, "grad_norm": 4.553221225738525, "learning_rate": 1.9953074352210454e-05, "loss": 2.1583, "step": 18470 }, { "epoch": 0.24, "grad_norm": 4.793455600738525, "learning_rate": 1.9953064183838552e-05, "loss": 2.4913, "step": 18471 }, { "epoch": 0.24, "grad_norm": 4.798027992248535, "learning_rate": 1.9953054014367666e-05, "loss": 2.6073, "step": 18472 }, { "epoch": 0.24, "grad_norm": 4.402039527893066, "learning_rate": 1.995304384379779e-05, "loss": 2.2128, "step": 18473 }, { "epoch": 0.24, "grad_norm": 4.1482367515563965, "learning_rate": 1.9953033672128935e-05, "loss": 1.7296, "step": 18474 }, { "epoch": 0.24, "grad_norm": 3.921769618988037, "learning_rate": 1.9953023499361094e-05, "loss": 1.8623, "step": 18475 }, { "epoch": 0.24, "grad_norm": 4.851273536682129, "learning_rate": 1.995301332549427e-05, "loss": 2.7051, "step": 18476 }, { "epoch": 0.24, "grad_norm": 4.279934883117676, "learning_rate": 1.995300315052846e-05, "loss": 2.1041, "step": 18477 }, { "epoch": 0.24, "grad_norm": 4.3203125, "learning_rate": 1.9952992974463676e-05, "loss": 2.4423, "step": 18478 }, { "epoch": 0.24, "grad_norm": 3.8014302253723145, "learning_rate": 1.995298279729991e-05, "loss": 1.8396, "step": 18479 }, { "epoch": 0.24, "grad_norm": 4.418035984039307, "learning_rate": 1.9952972619037164e-05, "loss": 2.2732, "step": 18480 }, { "epoch": 0.24, "grad_norm": 4.103533744812012, "learning_rate": 1.9952962439675443e-05, "loss": 2.0891, "step": 18481 }, { "epoch": 0.24, "grad_norm": 4.1985039710998535, "learning_rate": 1.9952952259214747e-05, "loss": 2.3568, "step": 18482 }, { "epoch": 0.24, "grad_norm": 4.149879455566406, "learning_rate": 1.9952942077655074e-05, "loss": 1.7496, "step": 18483 }, { "epoch": 0.24, "grad_norm": 4.2726311683654785, "learning_rate": 1.995293189499643e-05, "loss": 2.1133, "step": 18484 }, { "epoch": 0.24, "grad_norm": 3.7691450119018555, "learning_rate": 1.995292171123881e-05, "loss": 2.0804, "step": 18485 }, { "epoch": 0.24, "grad_norm": 3.915987968444824, "learning_rate": 1.995291152638222e-05, "loss": 2.0626, "step": 18486 }, { "epoch": 0.24, "grad_norm": 3.865661382675171, "learning_rate": 1.995290134042666e-05, "loss": 2.0294, "step": 18487 }, { "epoch": 0.24, "grad_norm": 3.8288733959198, "learning_rate": 1.9952891153372132e-05, "loss": 1.8533, "step": 18488 }, { "epoch": 0.24, "grad_norm": 4.766420841217041, "learning_rate": 1.995288096521864e-05, "loss": 2.5178, "step": 18489 }, { "epoch": 0.24, "grad_norm": 3.7630815505981445, "learning_rate": 1.9952870775966176e-05, "loss": 1.8898, "step": 18490 }, { "epoch": 0.24, "grad_norm": 3.782827377319336, "learning_rate": 1.9952860585614745e-05, "loss": 2.0951, "step": 18491 }, { "epoch": 0.24, "grad_norm": 4.216168403625488, "learning_rate": 1.9952850394164353e-05, "loss": 2.0725, "step": 18492 }, { "epoch": 0.24, "grad_norm": 4.631117820739746, "learning_rate": 1.9952840201614993e-05, "loss": 2.1136, "step": 18493 }, { "epoch": 0.24, "grad_norm": 4.836262226104736, "learning_rate": 1.9952830007966675e-05, "loss": 1.992, "step": 18494 }, { "epoch": 0.24, "grad_norm": 4.094234943389893, "learning_rate": 1.9952819813219394e-05, "loss": 2.3334, "step": 18495 }, { "epoch": 0.24, "grad_norm": 4.069117069244385, "learning_rate": 1.995280961737315e-05, "loss": 2.2728, "step": 18496 }, { "epoch": 0.24, "grad_norm": 4.01314115524292, "learning_rate": 1.995279942042795e-05, "loss": 2.1833, "step": 18497 }, { "epoch": 0.24, "grad_norm": 4.24406099319458, "learning_rate": 1.995278922238379e-05, "loss": 2.3826, "step": 18498 }, { "epoch": 0.24, "grad_norm": 5.107680320739746, "learning_rate": 1.9952779023240676e-05, "loss": 2.3451, "step": 18499 }, { "epoch": 0.24, "grad_norm": 3.8663389682769775, "learning_rate": 1.9952768822998607e-05, "loss": 2.1521, "step": 18500 }, { "epoch": 0.24, "grad_norm": 3.3704893589019775, "learning_rate": 1.995275862165758e-05, "loss": 1.5714, "step": 18501 }, { "epoch": 0.24, "grad_norm": 4.09675407409668, "learning_rate": 1.9952748419217603e-05, "loss": 1.8132, "step": 18502 }, { "epoch": 0.24, "grad_norm": 3.931774139404297, "learning_rate": 1.9952738215678668e-05, "loss": 1.727, "step": 18503 }, { "epoch": 0.24, "grad_norm": 4.5578999519348145, "learning_rate": 1.9952728011040786e-05, "loss": 2.1454, "step": 18504 }, { "epoch": 0.24, "grad_norm": 4.183850288391113, "learning_rate": 1.9952717805303953e-05, "loss": 2.024, "step": 18505 }, { "epoch": 0.24, "grad_norm": 4.035488128662109, "learning_rate": 1.9952707598468173e-05, "loss": 2.1687, "step": 18506 }, { "epoch": 0.24, "grad_norm": 5.094546794891357, "learning_rate": 1.9952697390533446e-05, "loss": 2.5432, "step": 18507 }, { "epoch": 0.24, "grad_norm": 3.972142457962036, "learning_rate": 1.9952687181499768e-05, "loss": 2.1131, "step": 18508 }, { "epoch": 0.24, "grad_norm": 4.26986026763916, "learning_rate": 1.995267697136715e-05, "loss": 2.0304, "step": 18509 }, { "epoch": 0.24, "grad_norm": 4.336973190307617, "learning_rate": 1.9952666760135583e-05, "loss": 2.3738, "step": 18510 }, { "epoch": 0.24, "grad_norm": 4.3717803955078125, "learning_rate": 1.9952656547805076e-05, "loss": 2.3568, "step": 18511 }, { "epoch": 0.24, "grad_norm": 4.385186672210693, "learning_rate": 1.9952646334375623e-05, "loss": 2.403, "step": 18512 }, { "epoch": 0.24, "grad_norm": 4.8364787101745605, "learning_rate": 1.995263611984723e-05, "loss": 2.9166, "step": 18513 }, { "epoch": 0.24, "grad_norm": 3.7654366493225098, "learning_rate": 1.99526259042199e-05, "loss": 1.9705, "step": 18514 }, { "epoch": 0.24, "grad_norm": 4.445459365844727, "learning_rate": 1.995261568749363e-05, "loss": 2.3073, "step": 18515 }, { "epoch": 0.24, "grad_norm": 4.1852874755859375, "learning_rate": 1.9952605469668422e-05, "loss": 2.0783, "step": 18516 }, { "epoch": 0.24, "grad_norm": 4.004304885864258, "learning_rate": 1.9952595250744276e-05, "loss": 2.5187, "step": 18517 }, { "epoch": 0.24, "grad_norm": 3.5682873725891113, "learning_rate": 1.9952585030721196e-05, "loss": 1.9853, "step": 18518 }, { "epoch": 0.24, "grad_norm": 4.727842330932617, "learning_rate": 1.9952574809599185e-05, "loss": 2.4266, "step": 18519 }, { "epoch": 0.24, "grad_norm": 3.872974157333374, "learning_rate": 1.9952564587378234e-05, "loss": 2.0371, "step": 18520 }, { "epoch": 0.24, "grad_norm": 3.7407586574554443, "learning_rate": 1.9952554364058356e-05, "loss": 2.0001, "step": 18521 }, { "epoch": 0.24, "grad_norm": 4.807343006134033, "learning_rate": 1.9952544139639544e-05, "loss": 2.6132, "step": 18522 }, { "epoch": 0.24, "grad_norm": 3.6778650283813477, "learning_rate": 1.9952533914121805e-05, "loss": 1.9745, "step": 18523 }, { "epoch": 0.24, "grad_norm": 3.7482285499572754, "learning_rate": 1.9952523687505136e-05, "loss": 1.8794, "step": 18524 }, { "epoch": 0.24, "grad_norm": 4.000119209289551, "learning_rate": 1.9952513459789543e-05, "loss": 2.2139, "step": 18525 }, { "epoch": 0.24, "grad_norm": 4.075865268707275, "learning_rate": 1.9952503230975016e-05, "loss": 2.2976, "step": 18526 }, { "epoch": 0.24, "grad_norm": 4.396695613861084, "learning_rate": 1.9952493001061573e-05, "loss": 1.8836, "step": 18527 }, { "epoch": 0.24, "grad_norm": 4.468029499053955, "learning_rate": 1.99524827700492e-05, "loss": 2.3073, "step": 18528 }, { "epoch": 0.24, "grad_norm": 4.113668918609619, "learning_rate": 1.9952472537937905e-05, "loss": 2.2861, "step": 18529 }, { "epoch": 0.24, "grad_norm": 4.367561340332031, "learning_rate": 1.9952462304727687e-05, "loss": 2.276, "step": 18530 }, { "epoch": 0.24, "grad_norm": 4.073010444641113, "learning_rate": 1.995245207041855e-05, "loss": 2.3258, "step": 18531 }, { "epoch": 0.24, "grad_norm": 3.8356621265411377, "learning_rate": 1.9952441835010498e-05, "loss": 2.0936, "step": 18532 }, { "epoch": 0.24, "grad_norm": 4.107468128204346, "learning_rate": 1.995243159850352e-05, "loss": 2.336, "step": 18533 }, { "epoch": 0.24, "grad_norm": 4.1512770652771, "learning_rate": 1.9952421360897628e-05, "loss": 2.091, "step": 18534 }, { "epoch": 0.24, "grad_norm": 4.290153980255127, "learning_rate": 1.995241112219282e-05, "loss": 2.0577, "step": 18535 }, { "epoch": 0.24, "grad_norm": 4.373159885406494, "learning_rate": 1.9952400882389094e-05, "loss": 2.3475, "step": 18536 }, { "epoch": 0.24, "grad_norm": 4.33902645111084, "learning_rate": 1.995239064148646e-05, "loss": 2.4048, "step": 18537 }, { "epoch": 0.24, "grad_norm": 4.183615207672119, "learning_rate": 1.995238039948491e-05, "loss": 1.9932, "step": 18538 }, { "epoch": 0.24, "grad_norm": 4.130062103271484, "learning_rate": 1.9952370156384447e-05, "loss": 2.1162, "step": 18539 }, { "epoch": 0.24, "grad_norm": 4.82126522064209, "learning_rate": 1.9952359912185075e-05, "loss": 2.1459, "step": 18540 }, { "epoch": 0.24, "grad_norm": 4.077949047088623, "learning_rate": 1.9952349666886796e-05, "loss": 1.8974, "step": 18541 }, { "epoch": 0.24, "grad_norm": 4.12552547454834, "learning_rate": 1.9952339420489603e-05, "loss": 2.1013, "step": 18542 }, { "epoch": 0.24, "grad_norm": 4.274193286895752, "learning_rate": 1.9952329172993504e-05, "loss": 2.4421, "step": 18543 }, { "epoch": 0.24, "grad_norm": 4.058186054229736, "learning_rate": 1.9952318924398505e-05, "loss": 2.1152, "step": 18544 }, { "epoch": 0.24, "grad_norm": 4.168400764465332, "learning_rate": 1.9952308674704596e-05, "loss": 2.0239, "step": 18545 }, { "epoch": 0.24, "grad_norm": 4.4148335456848145, "learning_rate": 1.9952298423911784e-05, "loss": 2.0965, "step": 18546 }, { "epoch": 0.24, "grad_norm": 4.380173206329346, "learning_rate": 1.995228817202007e-05, "loss": 2.1673, "step": 18547 }, { "epoch": 0.24, "grad_norm": 4.14035701751709, "learning_rate": 1.9952277919029456e-05, "loss": 2.1583, "step": 18548 }, { "epoch": 0.24, "grad_norm": 3.6980364322662354, "learning_rate": 1.995226766493994e-05, "loss": 2.1125, "step": 18549 }, { "epoch": 0.24, "grad_norm": 5.066097736358643, "learning_rate": 1.9952257409751522e-05, "loss": 2.7042, "step": 18550 }, { "epoch": 0.24, "grad_norm": 4.0500168800354, "learning_rate": 1.995224715346421e-05, "loss": 2.1121, "step": 18551 }, { "epoch": 0.24, "grad_norm": 3.6098082065582275, "learning_rate": 1.9952236896078002e-05, "loss": 1.6375, "step": 18552 }, { "epoch": 0.24, "grad_norm": 4.253503799438477, "learning_rate": 1.9952226637592897e-05, "loss": 2.5576, "step": 18553 }, { "epoch": 0.24, "grad_norm": 4.286248207092285, "learning_rate": 1.9952216378008892e-05, "loss": 2.2324, "step": 18554 }, { "epoch": 0.24, "grad_norm": 3.8444693088531494, "learning_rate": 1.9952206117326e-05, "loss": 1.6776, "step": 18555 }, { "epoch": 0.24, "grad_norm": 4.464425086975098, "learning_rate": 1.995219585554421e-05, "loss": 2.4145, "step": 18556 }, { "epoch": 0.24, "grad_norm": 4.042121410369873, "learning_rate": 1.9952185592663536e-05, "loss": 2.2365, "step": 18557 }, { "epoch": 0.24, "grad_norm": 4.389888286590576, "learning_rate": 1.9952175328683966e-05, "loss": 2.4832, "step": 18558 }, { "epoch": 0.24, "grad_norm": 3.8142101764678955, "learning_rate": 1.995216506360551e-05, "loss": 1.8282, "step": 18559 }, { "epoch": 0.24, "grad_norm": 4.111155986785889, "learning_rate": 1.9952154797428166e-05, "loss": 2.384, "step": 18560 }, { "epoch": 0.24, "grad_norm": 4.18826150894165, "learning_rate": 1.9952144530151933e-05, "loss": 1.9259, "step": 18561 }, { "epoch": 0.24, "grad_norm": 3.877965211868286, "learning_rate": 1.9952134261776818e-05, "loss": 2.1135, "step": 18562 }, { "epoch": 0.24, "grad_norm": 3.92390513420105, "learning_rate": 1.9952123992302816e-05, "loss": 2.1338, "step": 18563 }, { "epoch": 0.24, "grad_norm": 4.179442405700684, "learning_rate": 1.9952113721729927e-05, "loss": 2.1375, "step": 18564 }, { "epoch": 0.24, "grad_norm": 4.007573127746582, "learning_rate": 1.9952103450058163e-05, "loss": 2.4782, "step": 18565 }, { "epoch": 0.24, "grad_norm": 4.021139621734619, "learning_rate": 1.9952093177287515e-05, "loss": 2.1188, "step": 18566 }, { "epoch": 0.24, "grad_norm": 4.680933952331543, "learning_rate": 1.9952082903417985e-05, "loss": 2.214, "step": 18567 }, { "epoch": 0.24, "grad_norm": 4.442881107330322, "learning_rate": 1.995207262844958e-05, "loss": 1.8794, "step": 18568 }, { "epoch": 0.24, "grad_norm": 3.665153741836548, "learning_rate": 1.9952062352382295e-05, "loss": 2.0132, "step": 18569 }, { "epoch": 0.24, "grad_norm": 4.059228420257568, "learning_rate": 1.9952052075216133e-05, "loss": 2.1813, "step": 18570 }, { "epoch": 0.24, "grad_norm": 3.6190998554229736, "learning_rate": 1.9952041796951098e-05, "loss": 1.6982, "step": 18571 }, { "epoch": 0.24, "grad_norm": 4.4636549949646, "learning_rate": 1.9952031517587187e-05, "loss": 2.7041, "step": 18572 }, { "epoch": 0.24, "grad_norm": 4.225461483001709, "learning_rate": 1.9952021237124402e-05, "loss": 2.6804, "step": 18573 }, { "epoch": 0.24, "grad_norm": 4.6446380615234375, "learning_rate": 1.995201095556275e-05, "loss": 2.1481, "step": 18574 }, { "epoch": 0.24, "grad_norm": 4.331753730773926, "learning_rate": 1.995200067290222e-05, "loss": 2.2828, "step": 18575 }, { "epoch": 0.24, "grad_norm": 4.642465591430664, "learning_rate": 1.9951990389142827e-05, "loss": 2.751, "step": 18576 }, { "epoch": 0.24, "grad_norm": 4.246876239776611, "learning_rate": 1.995198010428456e-05, "loss": 2.2874, "step": 18577 }, { "epoch": 0.24, "grad_norm": 3.6967296600341797, "learning_rate": 1.9951969818327428e-05, "loss": 2.1216, "step": 18578 }, { "epoch": 0.24, "grad_norm": 4.292179584503174, "learning_rate": 1.995195953127143e-05, "loss": 2.1959, "step": 18579 }, { "epoch": 0.24, "grad_norm": 3.560563802719116, "learning_rate": 1.9951949243116564e-05, "loss": 1.47, "step": 18580 }, { "epoch": 0.24, "grad_norm": 4.687123775482178, "learning_rate": 1.9951938953862838e-05, "loss": 2.2944, "step": 18581 }, { "epoch": 0.24, "grad_norm": 4.225861072540283, "learning_rate": 1.9951928663510247e-05, "loss": 1.8371, "step": 18582 }, { "epoch": 0.24, "grad_norm": 4.061265468597412, "learning_rate": 1.9951918372058792e-05, "loss": 1.8906, "step": 18583 }, { "epoch": 0.24, "grad_norm": 4.576735019683838, "learning_rate": 1.9951908079508482e-05, "loss": 2.5021, "step": 18584 }, { "epoch": 0.24, "grad_norm": 4.194774627685547, "learning_rate": 1.995189778585931e-05, "loss": 2.0562, "step": 18585 }, { "epoch": 0.24, "grad_norm": 3.8762147426605225, "learning_rate": 1.9951887491111277e-05, "loss": 1.7571, "step": 18586 }, { "epoch": 0.24, "grad_norm": 3.664266586303711, "learning_rate": 1.995187719526439e-05, "loss": 1.9642, "step": 18587 }, { "epoch": 0.24, "grad_norm": 4.244561195373535, "learning_rate": 1.9951866898318646e-05, "loss": 2.2527, "step": 18588 }, { "epoch": 0.24, "grad_norm": 4.166467189788818, "learning_rate": 1.9951856600274046e-05, "loss": 2.4699, "step": 18589 }, { "epoch": 0.24, "grad_norm": 4.736210823059082, "learning_rate": 1.9951846301130594e-05, "loss": 2.6265, "step": 18590 }, { "epoch": 0.24, "grad_norm": 4.043822765350342, "learning_rate": 1.9951836000888286e-05, "loss": 2.1729, "step": 18591 }, { "epoch": 0.24, "grad_norm": 3.548739433288574, "learning_rate": 1.995182569954713e-05, "loss": 1.8936, "step": 18592 }, { "epoch": 0.24, "grad_norm": 3.824467420578003, "learning_rate": 1.995181539710712e-05, "loss": 2.0604, "step": 18593 }, { "epoch": 0.24, "grad_norm": 4.35698127746582, "learning_rate": 1.9951805093568265e-05, "loss": 2.4348, "step": 18594 }, { "epoch": 0.24, "grad_norm": 4.219587326049805, "learning_rate": 1.9951794788930557e-05, "loss": 2.1489, "step": 18595 }, { "epoch": 0.24, "grad_norm": 4.856856346130371, "learning_rate": 1.9951784483194008e-05, "loss": 2.7846, "step": 18596 }, { "epoch": 0.24, "grad_norm": 3.9983787536621094, "learning_rate": 1.995177417635861e-05, "loss": 1.869, "step": 18597 }, { "epoch": 0.24, "grad_norm": 3.6877505779266357, "learning_rate": 1.9951763868424368e-05, "loss": 2.0803, "step": 18598 }, { "epoch": 0.24, "grad_norm": 4.36719274520874, "learning_rate": 1.9951753559391284e-05, "loss": 2.1266, "step": 18599 }, { "epoch": 0.24, "grad_norm": 5.181199073791504, "learning_rate": 1.9951743249259352e-05, "loss": 2.5812, "step": 18600 }, { "epoch": 0.24, "grad_norm": 4.181905269622803, "learning_rate": 1.9951732938028584e-05, "loss": 1.8934, "step": 18601 }, { "epoch": 0.24, "grad_norm": 4.300561904907227, "learning_rate": 1.9951722625698973e-05, "loss": 2.5094, "step": 18602 }, { "epoch": 0.24, "grad_norm": 3.5708632469177246, "learning_rate": 1.9951712312270524e-05, "loss": 1.9275, "step": 18603 }, { "epoch": 0.24, "grad_norm": 4.626772403717041, "learning_rate": 1.995170199774324e-05, "loss": 2.7234, "step": 18604 }, { "epoch": 0.24, "grad_norm": 3.3386197090148926, "learning_rate": 1.9951691682117112e-05, "loss": 1.7946, "step": 18605 }, { "epoch": 0.24, "grad_norm": 4.377020835876465, "learning_rate": 1.9951681365392153e-05, "loss": 2.2405, "step": 18606 }, { "epoch": 0.24, "grad_norm": 3.8555715084075928, "learning_rate": 1.9951671047568362e-05, "loss": 2.0164, "step": 18607 }, { "epoch": 0.24, "grad_norm": 3.9635422229766846, "learning_rate": 1.9951660728645735e-05, "loss": 1.9058, "step": 18608 }, { "epoch": 0.24, "grad_norm": 3.950394868850708, "learning_rate": 1.9951650408624273e-05, "loss": 2.028, "step": 18609 }, { "epoch": 0.24, "grad_norm": 4.517138957977295, "learning_rate": 1.9951640087503982e-05, "loss": 2.7621, "step": 18610 }, { "epoch": 0.24, "grad_norm": 4.13899564743042, "learning_rate": 1.9951629765284862e-05, "loss": 2.0774, "step": 18611 }, { "epoch": 0.24, "grad_norm": 4.363129138946533, "learning_rate": 1.9951619441966917e-05, "loss": 2.1187, "step": 18612 }, { "epoch": 0.24, "grad_norm": 4.176173210144043, "learning_rate": 1.995160911755014e-05, "loss": 1.9938, "step": 18613 }, { "epoch": 0.24, "grad_norm": 3.5616254806518555, "learning_rate": 1.9951598792034534e-05, "loss": 1.7219, "step": 18614 }, { "epoch": 0.24, "grad_norm": 3.9032347202301025, "learning_rate": 1.995158846542011e-05, "loss": 1.6967, "step": 18615 }, { "epoch": 0.24, "grad_norm": 4.034849643707275, "learning_rate": 1.9951578137706856e-05, "loss": 2.1863, "step": 18616 }, { "epoch": 0.24, "grad_norm": 5.050420761108398, "learning_rate": 1.9951567808894777e-05, "loss": 3.222, "step": 18617 }, { "epoch": 0.24, "grad_norm": 3.4538280963897705, "learning_rate": 1.9951557478983884e-05, "loss": 1.8718, "step": 18618 }, { "epoch": 0.24, "grad_norm": 4.057657718658447, "learning_rate": 1.9951547147974164e-05, "loss": 2.0843, "step": 18619 }, { "epoch": 0.24, "grad_norm": 4.326353073120117, "learning_rate": 1.9951536815865627e-05, "loss": 2.3027, "step": 18620 }, { "epoch": 0.24, "grad_norm": 4.199683666229248, "learning_rate": 1.995152648265827e-05, "loss": 2.4255, "step": 18621 }, { "epoch": 0.24, "grad_norm": 4.640275955200195, "learning_rate": 1.9951516148352096e-05, "loss": 2.16, "step": 18622 }, { "epoch": 0.24, "grad_norm": 3.6744818687438965, "learning_rate": 1.9951505812947105e-05, "loss": 1.6017, "step": 18623 }, { "epoch": 0.24, "grad_norm": 4.305191993713379, "learning_rate": 1.99514954764433e-05, "loss": 1.8982, "step": 18624 }, { "epoch": 0.24, "grad_norm": 4.2904157638549805, "learning_rate": 1.9951485138840683e-05, "loss": 2.0247, "step": 18625 }, { "epoch": 0.24, "grad_norm": 3.5490453243255615, "learning_rate": 1.995147480013925e-05, "loss": 1.6635, "step": 18626 }, { "epoch": 0.24, "grad_norm": 4.138275146484375, "learning_rate": 1.9951464460339007e-05, "loss": 2.0904, "step": 18627 }, { "epoch": 0.24, "grad_norm": 4.861756801605225, "learning_rate": 1.9951454119439954e-05, "loss": 2.3556, "step": 18628 }, { "epoch": 0.24, "grad_norm": 4.534149646759033, "learning_rate": 1.995144377744209e-05, "loss": 2.1728, "step": 18629 }, { "epoch": 0.24, "grad_norm": 4.330044746398926, "learning_rate": 1.995143343434542e-05, "loss": 2.0004, "step": 18630 }, { "epoch": 0.24, "grad_norm": 4.620514392852783, "learning_rate": 1.9951423090149943e-05, "loss": 2.5056, "step": 18631 }, { "epoch": 0.24, "grad_norm": 3.879209041595459, "learning_rate": 1.9951412744855657e-05, "loss": 1.7778, "step": 18632 }, { "epoch": 0.24, "grad_norm": 4.787882328033447, "learning_rate": 1.9951402398462566e-05, "loss": 2.1754, "step": 18633 }, { "epoch": 0.24, "grad_norm": 3.395695447921753, "learning_rate": 1.9951392050970673e-05, "loss": 1.8558, "step": 18634 }, { "epoch": 0.24, "grad_norm": 4.284219741821289, "learning_rate": 1.995138170237998e-05, "loss": 2.1122, "step": 18635 }, { "epoch": 0.24, "grad_norm": 4.043757915496826, "learning_rate": 1.9951371352690483e-05, "loss": 1.9861, "step": 18636 }, { "epoch": 0.24, "grad_norm": 3.711447238922119, "learning_rate": 1.9951361001902185e-05, "loss": 1.8111, "step": 18637 }, { "epoch": 0.24, "grad_norm": 4.470158576965332, "learning_rate": 1.9951350650015086e-05, "loss": 2.2979, "step": 18638 }, { "epoch": 0.24, "grad_norm": 4.035046100616455, "learning_rate": 1.9951340297029192e-05, "loss": 1.8666, "step": 18639 }, { "epoch": 0.24, "grad_norm": 3.6754939556121826, "learning_rate": 1.9951329942944503e-05, "loss": 2.0855, "step": 18640 }, { "epoch": 0.24, "grad_norm": 4.421922206878662, "learning_rate": 1.9951319587761016e-05, "loss": 2.4875, "step": 18641 }, { "epoch": 0.24, "grad_norm": 4.703213214874268, "learning_rate": 1.9951309231478734e-05, "loss": 2.2938, "step": 18642 }, { "epoch": 0.24, "grad_norm": 4.646636962890625, "learning_rate": 1.9951298874097664e-05, "loss": 2.0676, "step": 18643 }, { "epoch": 0.24, "grad_norm": 4.253129005432129, "learning_rate": 1.9951288515617795e-05, "loss": 2.1266, "step": 18644 }, { "epoch": 0.24, "grad_norm": 3.9849274158477783, "learning_rate": 1.9951278156039135e-05, "loss": 1.9852, "step": 18645 }, { "epoch": 0.24, "grad_norm": 4.016342639923096, "learning_rate": 1.995126779536169e-05, "loss": 1.828, "step": 18646 }, { "epoch": 0.24, "grad_norm": 4.610888481140137, "learning_rate": 1.9951257433585455e-05, "loss": 2.2618, "step": 18647 }, { "epoch": 0.24, "grad_norm": 4.3685173988342285, "learning_rate": 1.9951247070710427e-05, "loss": 2.5709, "step": 18648 }, { "epoch": 0.24, "grad_norm": 4.6392822265625, "learning_rate": 1.995123670673662e-05, "loss": 2.4057, "step": 18649 }, { "epoch": 0.24, "grad_norm": 4.474701881408691, "learning_rate": 1.995122634166402e-05, "loss": 2.5689, "step": 18650 }, { "epoch": 0.24, "grad_norm": 3.694532632827759, "learning_rate": 1.995121597549264e-05, "loss": 1.967, "step": 18651 }, { "epoch": 0.24, "grad_norm": 4.4581756591796875, "learning_rate": 1.995120560822248e-05, "loss": 3.0099, "step": 18652 }, { "epoch": 0.24, "grad_norm": 4.233577251434326, "learning_rate": 1.9951195239853533e-05, "loss": 1.9911, "step": 18653 }, { "epoch": 0.24, "grad_norm": 3.9429588317871094, "learning_rate": 1.9951184870385807e-05, "loss": 2.006, "step": 18654 }, { "epoch": 0.24, "grad_norm": 4.392853736877441, "learning_rate": 1.99511744998193e-05, "loss": 2.1924, "step": 18655 }, { "epoch": 0.24, "grad_norm": 3.756713628768921, "learning_rate": 1.995116412815402e-05, "loss": 1.7437, "step": 18656 }, { "epoch": 0.24, "grad_norm": 4.007180690765381, "learning_rate": 1.9951153755389957e-05, "loss": 1.996, "step": 18657 }, { "epoch": 0.24, "grad_norm": 4.033413410186768, "learning_rate": 1.995114338152712e-05, "loss": 2.0319, "step": 18658 }, { "epoch": 0.24, "grad_norm": 3.321721315383911, "learning_rate": 1.9951133006565508e-05, "loss": 1.6966, "step": 18659 }, { "epoch": 0.24, "grad_norm": 3.9587554931640625, "learning_rate": 1.995112263050512e-05, "loss": 1.7506, "step": 18660 }, { "epoch": 0.24, "grad_norm": 4.432926177978516, "learning_rate": 1.995111225334596e-05, "loss": 2.4103, "step": 18661 }, { "epoch": 0.24, "grad_norm": 4.630805015563965, "learning_rate": 1.995110187508803e-05, "loss": 2.4935, "step": 18662 }, { "epoch": 0.24, "grad_norm": 4.591071128845215, "learning_rate": 1.995109149573133e-05, "loss": 2.0287, "step": 18663 }, { "epoch": 0.24, "grad_norm": 4.3297929763793945, "learning_rate": 1.995108111527586e-05, "loss": 2.1754, "step": 18664 }, { "epoch": 0.24, "grad_norm": 3.8996148109436035, "learning_rate": 1.995107073372162e-05, "loss": 1.8033, "step": 18665 }, { "epoch": 0.24, "grad_norm": 3.7531378269195557, "learning_rate": 1.9951060351068614e-05, "loss": 1.706, "step": 18666 }, { "epoch": 0.24, "grad_norm": 4.9768877029418945, "learning_rate": 1.9951049967316843e-05, "loss": 2.3925, "step": 18667 }, { "epoch": 0.24, "grad_norm": 4.2623467445373535, "learning_rate": 1.9951039582466305e-05, "loss": 2.3696, "step": 18668 }, { "epoch": 0.24, "grad_norm": 4.171173095703125, "learning_rate": 1.9951029196517006e-05, "loss": 2.2545, "step": 18669 }, { "epoch": 0.24, "grad_norm": 3.984994888305664, "learning_rate": 1.9951018809468942e-05, "loss": 1.8739, "step": 18670 }, { "epoch": 0.24, "grad_norm": 3.962535858154297, "learning_rate": 1.995100842132212e-05, "loss": 1.6283, "step": 18671 }, { "epoch": 0.24, "grad_norm": 4.0448737144470215, "learning_rate": 1.9950998032076535e-05, "loss": 2.1016, "step": 18672 }, { "epoch": 0.24, "grad_norm": 4.019045352935791, "learning_rate": 1.9950987641732192e-05, "loss": 2.0986, "step": 18673 }, { "epoch": 0.24, "grad_norm": 4.087467670440674, "learning_rate": 1.995097725028909e-05, "loss": 1.5687, "step": 18674 }, { "epoch": 0.24, "grad_norm": 4.627777099609375, "learning_rate": 1.9950966857747233e-05, "loss": 2.3992, "step": 18675 }, { "epoch": 0.24, "grad_norm": 6.234522342681885, "learning_rate": 1.995095646410662e-05, "loss": 2.7352, "step": 18676 }, { "epoch": 0.24, "grad_norm": 3.7308852672576904, "learning_rate": 1.9950946069367253e-05, "loss": 1.8541, "step": 18677 }, { "epoch": 0.24, "grad_norm": 4.527523040771484, "learning_rate": 1.9950935673529132e-05, "loss": 2.0113, "step": 18678 }, { "epoch": 0.24, "grad_norm": 4.035537242889404, "learning_rate": 1.995092527659226e-05, "loss": 2.2479, "step": 18679 }, { "epoch": 0.24, "grad_norm": 3.978917360305786, "learning_rate": 1.9950914878556634e-05, "loss": 1.7853, "step": 18680 }, { "epoch": 0.24, "grad_norm": 4.361893653869629, "learning_rate": 1.995090447942226e-05, "loss": 1.9449, "step": 18681 }, { "epoch": 0.24, "grad_norm": 4.421886444091797, "learning_rate": 1.995089407918914e-05, "loss": 2.3121, "step": 18682 }, { "epoch": 0.24, "grad_norm": 4.339885711669922, "learning_rate": 1.995088367785727e-05, "loss": 2.1787, "step": 18683 }, { "epoch": 0.24, "grad_norm": 4.184340000152588, "learning_rate": 1.9950873275426656e-05, "loss": 2.0217, "step": 18684 }, { "epoch": 0.24, "grad_norm": 4.549980163574219, "learning_rate": 1.9950862871897293e-05, "loss": 2.0348, "step": 18685 }, { "epoch": 0.24, "grad_norm": 3.8505735397338867, "learning_rate": 1.9950852467269186e-05, "loss": 1.9307, "step": 18686 }, { "epoch": 0.24, "grad_norm": 4.2760090827941895, "learning_rate": 1.9950842061542338e-05, "loss": 2.3363, "step": 18687 }, { "epoch": 0.24, "grad_norm": 4.378235340118408, "learning_rate": 1.9950831654716746e-05, "loss": 2.5101, "step": 18688 }, { "epoch": 0.24, "grad_norm": 3.8525540828704834, "learning_rate": 1.9950821246792417e-05, "loss": 1.9673, "step": 18689 }, { "epoch": 0.24, "grad_norm": 4.177121639251709, "learning_rate": 1.9950810837769344e-05, "loss": 2.0776, "step": 18690 }, { "epoch": 0.24, "grad_norm": 3.8723902702331543, "learning_rate": 1.9950800427647537e-05, "loss": 1.7112, "step": 18691 }, { "epoch": 0.24, "grad_norm": 3.7990918159484863, "learning_rate": 1.995079001642699e-05, "loss": 1.936, "step": 18692 }, { "epoch": 0.24, "grad_norm": 4.441401481628418, "learning_rate": 1.9950779604107708e-05, "loss": 2.0293, "step": 18693 }, { "epoch": 0.24, "grad_norm": 3.846287488937378, "learning_rate": 1.9950769190689692e-05, "loss": 2.3, "step": 18694 }, { "epoch": 0.24, "grad_norm": 4.037317752838135, "learning_rate": 1.995075877617294e-05, "loss": 1.9703, "step": 18695 }, { "epoch": 0.24, "grad_norm": 5.520115375518799, "learning_rate": 1.995074836055746e-05, "loss": 2.2607, "step": 18696 }, { "epoch": 0.24, "grad_norm": 4.881148338317871, "learning_rate": 1.9950737943843243e-05, "loss": 2.481, "step": 18697 }, { "epoch": 0.24, "grad_norm": 4.219540119171143, "learning_rate": 1.99507275260303e-05, "loss": 2.0203, "step": 18698 }, { "epoch": 0.24, "grad_norm": 3.730404853820801, "learning_rate": 1.9950717107118624e-05, "loss": 1.8406, "step": 18699 }, { "epoch": 0.24, "grad_norm": 4.1582465171813965, "learning_rate": 1.9950706687108222e-05, "loss": 2.3858, "step": 18700 }, { "epoch": 0.24, "grad_norm": 4.119438648223877, "learning_rate": 1.9950696265999094e-05, "loss": 2.6154, "step": 18701 }, { "epoch": 0.24, "grad_norm": 3.754690408706665, "learning_rate": 1.9950685843791238e-05, "loss": 1.9707, "step": 18702 }, { "epoch": 0.24, "grad_norm": 4.026762962341309, "learning_rate": 1.995067542048466e-05, "loss": 2.1338, "step": 18703 }, { "epoch": 0.24, "grad_norm": 4.219352722167969, "learning_rate": 1.9950664996079355e-05, "loss": 1.7491, "step": 18704 }, { "epoch": 0.24, "grad_norm": 3.988677978515625, "learning_rate": 1.995065457057533e-05, "loss": 2.1206, "step": 18705 }, { "epoch": 0.24, "grad_norm": 3.52993106842041, "learning_rate": 1.9950644143972588e-05, "loss": 1.8556, "step": 18706 }, { "epoch": 0.24, "grad_norm": 3.583073854446411, "learning_rate": 1.9950633716271123e-05, "loss": 1.7052, "step": 18707 }, { "epoch": 0.24, "grad_norm": 4.214550971984863, "learning_rate": 1.9950623287470935e-05, "loss": 2.0463, "step": 18708 }, { "epoch": 0.24, "grad_norm": 4.227837085723877, "learning_rate": 1.9950612857572037e-05, "loss": 2.2247, "step": 18709 }, { "epoch": 0.24, "grad_norm": 4.090055465698242, "learning_rate": 1.9950602426574416e-05, "loss": 1.8731, "step": 18710 }, { "epoch": 0.24, "grad_norm": 4.186220645904541, "learning_rate": 1.995059199447808e-05, "loss": 2.232, "step": 18711 }, { "epoch": 0.24, "grad_norm": 4.421445846557617, "learning_rate": 1.9950581561283035e-05, "loss": 2.3593, "step": 18712 }, { "epoch": 0.24, "grad_norm": 3.799133777618408, "learning_rate": 1.9950571126989272e-05, "loss": 2.243, "step": 18713 }, { "epoch": 0.24, "grad_norm": 4.126330375671387, "learning_rate": 1.99505606915968e-05, "loss": 2.1626, "step": 18714 }, { "epoch": 0.24, "grad_norm": 3.752825975418091, "learning_rate": 1.9950550255105617e-05, "loss": 1.7132, "step": 18715 }, { "epoch": 0.24, "grad_norm": 3.8344035148620605, "learning_rate": 1.995053981751572e-05, "loss": 1.9825, "step": 18716 }, { "epoch": 0.24, "grad_norm": 4.773158550262451, "learning_rate": 1.995052937882712e-05, "loss": 2.5137, "step": 18717 }, { "epoch": 0.24, "grad_norm": 3.6984691619873047, "learning_rate": 1.9950518939039815e-05, "loss": 1.9709, "step": 18718 }, { "epoch": 0.24, "grad_norm": 4.437272071838379, "learning_rate": 1.9950508498153798e-05, "loss": 2.1397, "step": 18719 }, { "epoch": 0.24, "grad_norm": 4.15488862991333, "learning_rate": 1.995049805616908e-05, "loss": 2.0139, "step": 18720 }, { "epoch": 0.24, "grad_norm": 4.6726975440979, "learning_rate": 1.9950487613085655e-05, "loss": 2.3915, "step": 18721 }, { "epoch": 0.24, "grad_norm": 4.145297050476074, "learning_rate": 1.9950477168903525e-05, "loss": 2.1046, "step": 18722 }, { "epoch": 0.24, "grad_norm": 5.905240535736084, "learning_rate": 1.99504667236227e-05, "loss": 2.0329, "step": 18723 }, { "epoch": 0.24, "grad_norm": 4.786080837249756, "learning_rate": 1.995045627724317e-05, "loss": 2.4624, "step": 18724 }, { "epoch": 0.24, "grad_norm": 4.58933162689209, "learning_rate": 1.9950445829764945e-05, "loss": 1.9167, "step": 18725 }, { "epoch": 0.24, "grad_norm": 4.101318359375, "learning_rate": 1.995043538118802e-05, "loss": 2.1064, "step": 18726 }, { "epoch": 0.24, "grad_norm": 4.102776050567627, "learning_rate": 1.9950424931512395e-05, "loss": 1.8959, "step": 18727 }, { "epoch": 0.24, "grad_norm": 5.48259162902832, "learning_rate": 1.9950414480738075e-05, "loss": 2.9969, "step": 18728 }, { "epoch": 0.24, "grad_norm": 4.098917484283447, "learning_rate": 1.9950404028865066e-05, "loss": 1.7502, "step": 18729 }, { "epoch": 0.24, "grad_norm": 4.2609357833862305, "learning_rate": 1.995039357589336e-05, "loss": 2.4038, "step": 18730 }, { "epoch": 0.24, "grad_norm": 4.16812801361084, "learning_rate": 1.995038312182296e-05, "loss": 1.9884, "step": 18731 }, { "epoch": 0.24, "grad_norm": 3.995818853378296, "learning_rate": 1.995037266665387e-05, "loss": 2.1187, "step": 18732 }, { "epoch": 0.24, "grad_norm": 5.51239538192749, "learning_rate": 1.995036221038609e-05, "loss": 2.5398, "step": 18733 }, { "epoch": 0.24, "grad_norm": 4.451761245727539, "learning_rate": 1.9950351753019623e-05, "loss": 2.1466, "step": 18734 }, { "epoch": 0.24, "grad_norm": 4.6998443603515625, "learning_rate": 1.995034129455447e-05, "loss": 2.3207, "step": 18735 }, { "epoch": 0.24, "grad_norm": 5.002884387969971, "learning_rate": 1.9950330834990625e-05, "loss": 1.8324, "step": 18736 }, { "epoch": 0.24, "grad_norm": 4.730864524841309, "learning_rate": 1.9950320374328097e-05, "loss": 2.6667, "step": 18737 }, { "epoch": 0.24, "grad_norm": 4.378442764282227, "learning_rate": 1.9950309912566886e-05, "loss": 2.3386, "step": 18738 }, { "epoch": 0.24, "grad_norm": 3.8068432807922363, "learning_rate": 1.995029944970699e-05, "loss": 2.4379, "step": 18739 }, { "epoch": 0.24, "grad_norm": 4.9052839279174805, "learning_rate": 1.9950288985748414e-05, "loss": 2.2526, "step": 18740 }, { "epoch": 0.24, "grad_norm": 4.358022212982178, "learning_rate": 1.9950278520691157e-05, "loss": 2.115, "step": 18741 }, { "epoch": 0.24, "grad_norm": 4.781054496765137, "learning_rate": 1.995026805453522e-05, "loss": 2.4056, "step": 18742 }, { "epoch": 0.24, "grad_norm": 4.262020587921143, "learning_rate": 1.9950257587280607e-05, "loss": 2.5692, "step": 18743 }, { "epoch": 0.24, "grad_norm": 3.8567686080932617, "learning_rate": 1.995024711892731e-05, "loss": 1.946, "step": 18744 }, { "epoch": 0.24, "grad_norm": 4.043335437774658, "learning_rate": 1.9950236649475342e-05, "loss": 1.9977, "step": 18745 }, { "epoch": 0.24, "grad_norm": 3.7802810668945312, "learning_rate": 1.9950226178924697e-05, "loss": 2.0678, "step": 18746 }, { "epoch": 0.24, "grad_norm": 4.542247772216797, "learning_rate": 1.9950215707275383e-05, "loss": 2.4107, "step": 18747 }, { "epoch": 0.24, "grad_norm": 3.644996166229248, "learning_rate": 1.995020523452739e-05, "loss": 1.7543, "step": 18748 }, { "epoch": 0.24, "grad_norm": 3.8571808338165283, "learning_rate": 1.995019476068073e-05, "loss": 1.9233, "step": 18749 }, { "epoch": 0.24, "grad_norm": 3.9977867603302, "learning_rate": 1.9950184285735398e-05, "loss": 2.3534, "step": 18750 }, { "epoch": 0.24, "grad_norm": 4.151456832885742, "learning_rate": 1.99501738096914e-05, "loss": 2.3199, "step": 18751 }, { "epoch": 0.24, "grad_norm": 4.244722366333008, "learning_rate": 1.995016333254873e-05, "loss": 2.3086, "step": 18752 }, { "epoch": 0.24, "grad_norm": 4.873843669891357, "learning_rate": 1.995015285430739e-05, "loss": 2.3292, "step": 18753 }, { "epoch": 0.24, "grad_norm": 4.146817207336426, "learning_rate": 1.9950142374967393e-05, "loss": 2.3006, "step": 18754 }, { "epoch": 0.24, "grad_norm": 4.004948616027832, "learning_rate": 1.9950131894528723e-05, "loss": 2.5774, "step": 18755 }, { "epoch": 0.24, "grad_norm": 4.099358558654785, "learning_rate": 1.9950121412991396e-05, "loss": 2.6224, "step": 18756 }, { "epoch": 0.24, "grad_norm": 4.4630446434021, "learning_rate": 1.9950110930355403e-05, "loss": 2.6258, "step": 18757 }, { "epoch": 0.24, "grad_norm": 4.170738697052002, "learning_rate": 1.995010044662075e-05, "loss": 1.967, "step": 18758 }, { "epoch": 0.24, "grad_norm": 4.295549392700195, "learning_rate": 1.995008996178744e-05, "loss": 2.1541, "step": 18759 }, { "epoch": 0.24, "grad_norm": 4.036794662475586, "learning_rate": 1.995007947585547e-05, "loss": 2.133, "step": 18760 }, { "epoch": 0.24, "grad_norm": 4.051267623901367, "learning_rate": 1.995006898882484e-05, "loss": 2.2647, "step": 18761 }, { "epoch": 0.24, "grad_norm": 3.5640852451324463, "learning_rate": 1.9950058500695553e-05, "loss": 1.8791, "step": 18762 }, { "epoch": 0.24, "grad_norm": 3.661745071411133, "learning_rate": 1.995004801146761e-05, "loss": 1.7848, "step": 18763 }, { "epoch": 0.24, "grad_norm": 4.273130416870117, "learning_rate": 1.995003752114102e-05, "loss": 2.2848, "step": 18764 }, { "epoch": 0.24, "grad_norm": 3.647250175476074, "learning_rate": 1.995002702971577e-05, "loss": 1.886, "step": 18765 }, { "epoch": 0.24, "grad_norm": 3.7128167152404785, "learning_rate": 1.995001653719187e-05, "loss": 1.9139, "step": 18766 }, { "epoch": 0.24, "grad_norm": 4.633198261260986, "learning_rate": 1.9950006043569322e-05, "loss": 2.7226, "step": 18767 }, { "epoch": 0.24, "grad_norm": 3.9588520526885986, "learning_rate": 1.994999554884812e-05, "loss": 2.0239, "step": 18768 }, { "epoch": 0.24, "grad_norm": 4.082831859588623, "learning_rate": 1.9949985053028273e-05, "loss": 2.1356, "step": 18769 }, { "epoch": 0.24, "grad_norm": 4.0043511390686035, "learning_rate": 1.994997455610978e-05, "loss": 2.184, "step": 18770 }, { "epoch": 0.24, "grad_norm": 4.30426549911499, "learning_rate": 1.9949964058092635e-05, "loss": 1.9136, "step": 18771 }, { "epoch": 0.24, "grad_norm": 4.163818359375, "learning_rate": 1.994995355897685e-05, "loss": 2.2402, "step": 18772 }, { "epoch": 0.24, "grad_norm": 4.219205379486084, "learning_rate": 1.994994305876242e-05, "loss": 2.139, "step": 18773 }, { "epoch": 0.24, "grad_norm": 4.643373012542725, "learning_rate": 1.994993255744935e-05, "loss": 2.3727, "step": 18774 }, { "epoch": 0.24, "grad_norm": 3.509007453918457, "learning_rate": 1.994992205503764e-05, "loss": 1.8582, "step": 18775 }, { "epoch": 0.24, "grad_norm": 4.177225589752197, "learning_rate": 1.9949911551527283e-05, "loss": 2.1973, "step": 18776 }, { "epoch": 0.24, "grad_norm": 3.4427220821380615, "learning_rate": 1.9949901046918293e-05, "loss": 1.5723, "step": 18777 }, { "epoch": 0.24, "grad_norm": 4.080624103546143, "learning_rate": 1.9949890541210658e-05, "loss": 2.2755, "step": 18778 }, { "epoch": 0.24, "grad_norm": 4.307379722595215, "learning_rate": 1.9949880034404393e-05, "loss": 2.1045, "step": 18779 }, { "epoch": 0.24, "grad_norm": 3.695519208908081, "learning_rate": 1.994986952649949e-05, "loss": 1.8797, "step": 18780 }, { "epoch": 0.24, "grad_norm": 4.592517852783203, "learning_rate": 1.9949859017495955e-05, "loss": 2.3335, "step": 18781 }, { "epoch": 0.24, "grad_norm": 4.281846523284912, "learning_rate": 1.9949848507393784e-05, "loss": 2.1497, "step": 18782 }, { "epoch": 0.24, "grad_norm": 4.7148003578186035, "learning_rate": 1.994983799619298e-05, "loss": 2.0694, "step": 18783 }, { "epoch": 0.24, "grad_norm": 3.6196184158325195, "learning_rate": 1.994982748389355e-05, "loss": 1.921, "step": 18784 }, { "epoch": 0.24, "grad_norm": 4.201292514801025, "learning_rate": 1.9949816970495484e-05, "loss": 2.5815, "step": 18785 }, { "epoch": 0.24, "grad_norm": 4.717881679534912, "learning_rate": 1.994980645599879e-05, "loss": 2.5839, "step": 18786 }, { "epoch": 0.24, "grad_norm": 4.20625638961792, "learning_rate": 1.9949795940403472e-05, "loss": 2.2601, "step": 18787 }, { "epoch": 0.24, "grad_norm": 3.8734681606292725, "learning_rate": 1.994978542370953e-05, "loss": 1.5144, "step": 18788 }, { "epoch": 0.24, "grad_norm": 3.753171682357788, "learning_rate": 1.994977490591696e-05, "loss": 1.9823, "step": 18789 }, { "epoch": 0.24, "grad_norm": 4.226529598236084, "learning_rate": 1.9949764387025764e-05, "loss": 2.3466, "step": 18790 }, { "epoch": 0.24, "grad_norm": 4.038303375244141, "learning_rate": 1.9949753867035947e-05, "loss": 2.0663, "step": 18791 }, { "epoch": 0.24, "grad_norm": 4.092591285705566, "learning_rate": 1.9949743345947508e-05, "loss": 1.9177, "step": 18792 }, { "epoch": 0.24, "grad_norm": 3.986257553100586, "learning_rate": 1.9949732823760447e-05, "loss": 1.8464, "step": 18793 }, { "epoch": 0.24, "grad_norm": 3.7160778045654297, "learning_rate": 1.994972230047477e-05, "loss": 1.8138, "step": 18794 }, { "epoch": 0.24, "grad_norm": 4.042917728424072, "learning_rate": 1.9949711776090472e-05, "loss": 2.2667, "step": 18795 }, { "epoch": 0.24, "grad_norm": 3.8857016563415527, "learning_rate": 1.9949701250607558e-05, "loss": 1.927, "step": 18796 }, { "epoch": 0.24, "grad_norm": 4.636261940002441, "learning_rate": 1.994969072402603e-05, "loss": 2.6163, "step": 18797 }, { "epoch": 0.24, "grad_norm": 5.124617576599121, "learning_rate": 1.9949680196345885e-05, "loss": 2.5981, "step": 18798 }, { "epoch": 0.24, "grad_norm": 4.197626113891602, "learning_rate": 1.9949669667567125e-05, "loss": 2.2366, "step": 18799 }, { "epoch": 0.24, "grad_norm": 4.319112300872803, "learning_rate": 1.9949659137689757e-05, "loss": 2.1759, "step": 18800 }, { "epoch": 0.24, "grad_norm": 3.972874879837036, "learning_rate": 1.9949648606713773e-05, "loss": 1.9753, "step": 18801 }, { "epoch": 0.24, "grad_norm": 4.285731315612793, "learning_rate": 1.9949638074639185e-05, "loss": 2.5746, "step": 18802 }, { "epoch": 0.24, "grad_norm": 4.29304313659668, "learning_rate": 1.9949627541465984e-05, "loss": 2.5507, "step": 18803 }, { "epoch": 0.24, "grad_norm": 3.928971529006958, "learning_rate": 1.9949617007194175e-05, "loss": 1.7658, "step": 18804 }, { "epoch": 0.24, "grad_norm": 3.503931999206543, "learning_rate": 1.994960647182376e-05, "loss": 1.6493, "step": 18805 }, { "epoch": 0.24, "grad_norm": 3.980236291885376, "learning_rate": 1.9949595935354742e-05, "loss": 2.2005, "step": 18806 }, { "epoch": 0.24, "grad_norm": 4.004824638366699, "learning_rate": 1.9949585397787118e-05, "loss": 2.1682, "step": 18807 }, { "epoch": 0.24, "grad_norm": 4.383069038391113, "learning_rate": 1.9949574859120892e-05, "loss": 2.4724, "step": 18808 }, { "epoch": 0.24, "grad_norm": 3.9117465019226074, "learning_rate": 1.9949564319356064e-05, "loss": 1.8234, "step": 18809 }, { "epoch": 0.24, "grad_norm": 4.202539443969727, "learning_rate": 1.994955377849263e-05, "loss": 2.2617, "step": 18810 }, { "epoch": 0.24, "grad_norm": 4.531868934631348, "learning_rate": 1.9949543236530603e-05, "loss": 2.2315, "step": 18811 }, { "epoch": 0.24, "grad_norm": 4.491209506988525, "learning_rate": 1.9949532693469974e-05, "loss": 2.6001, "step": 18812 }, { "epoch": 0.24, "grad_norm": 3.8198280334472656, "learning_rate": 1.9949522149310753e-05, "loss": 2.0441, "step": 18813 }, { "epoch": 0.24, "grad_norm": 4.7130446434021, "learning_rate": 1.994951160405293e-05, "loss": 2.4998, "step": 18814 }, { "epoch": 0.24, "grad_norm": 3.754500150680542, "learning_rate": 1.9949501057696515e-05, "loss": 2.1649, "step": 18815 }, { "epoch": 0.24, "grad_norm": 4.685612201690674, "learning_rate": 1.9949490510241506e-05, "loss": 2.9035, "step": 18816 }, { "epoch": 0.24, "grad_norm": 4.445709705352783, "learning_rate": 1.9949479961687905e-05, "loss": 2.3023, "step": 18817 }, { "epoch": 0.24, "grad_norm": 4.576241493225098, "learning_rate": 1.9949469412035712e-05, "loss": 2.0869, "step": 18818 }, { "epoch": 0.24, "grad_norm": 3.9418163299560547, "learning_rate": 1.9949458861284928e-05, "loss": 1.8768, "step": 18819 }, { "epoch": 0.24, "grad_norm": 4.336956024169922, "learning_rate": 1.9949448309435556e-05, "loss": 2.2689, "step": 18820 }, { "epoch": 0.24, "grad_norm": 3.932548761367798, "learning_rate": 1.9949437756487595e-05, "loss": 1.9608, "step": 18821 }, { "epoch": 0.24, "grad_norm": 4.050412178039551, "learning_rate": 1.994942720244105e-05, "loss": 1.9628, "step": 18822 }, { "epoch": 0.24, "grad_norm": 3.755267858505249, "learning_rate": 1.9949416647295917e-05, "loss": 1.8704, "step": 18823 }, { "epoch": 0.24, "grad_norm": 3.6676440238952637, "learning_rate": 1.99494060910522e-05, "loss": 1.8791, "step": 18824 }, { "epoch": 0.24, "grad_norm": 4.051819324493408, "learning_rate": 1.99493955337099e-05, "loss": 1.9961, "step": 18825 }, { "epoch": 0.24, "grad_norm": 4.176859378814697, "learning_rate": 1.9949384975269018e-05, "loss": 1.933, "step": 18826 }, { "epoch": 0.24, "grad_norm": 4.506024360656738, "learning_rate": 1.9949374415729555e-05, "loss": 2.4017, "step": 18827 }, { "epoch": 0.24, "grad_norm": 3.755828619003296, "learning_rate": 1.994936385509151e-05, "loss": 1.9375, "step": 18828 }, { "epoch": 0.24, "grad_norm": 4.538398742675781, "learning_rate": 1.994935329335489e-05, "loss": 2.4051, "step": 18829 }, { "epoch": 0.24, "grad_norm": 4.609405517578125, "learning_rate": 1.994934273051969e-05, "loss": 2.3945, "step": 18830 }, { "epoch": 0.24, "grad_norm": 4.382579803466797, "learning_rate": 1.9949332166585915e-05, "loss": 2.0908, "step": 18831 }, { "epoch": 0.24, "grad_norm": 3.712742328643799, "learning_rate": 1.9949321601553566e-05, "loss": 1.761, "step": 18832 }, { "epoch": 0.24, "grad_norm": 4.070563793182373, "learning_rate": 1.994931103542264e-05, "loss": 2.1921, "step": 18833 }, { "epoch": 0.24, "grad_norm": 4.19798469543457, "learning_rate": 1.9949300468193144e-05, "loss": 1.8002, "step": 18834 }, { "epoch": 0.24, "grad_norm": 4.321385383605957, "learning_rate": 1.9949289899865078e-05, "loss": 2.0319, "step": 18835 }, { "epoch": 0.24, "grad_norm": 4.278085708618164, "learning_rate": 1.9949279330438437e-05, "loss": 2.1797, "step": 18836 }, { "epoch": 0.24, "grad_norm": 4.322017192840576, "learning_rate": 1.9949268759913228e-05, "loss": 2.2676, "step": 18837 }, { "epoch": 0.24, "grad_norm": 4.579039573669434, "learning_rate": 1.994925818828945e-05, "loss": 2.2855, "step": 18838 }, { "epoch": 0.24, "grad_norm": 4.252439498901367, "learning_rate": 1.994924761556711e-05, "loss": 2.1604, "step": 18839 }, { "epoch": 0.24, "grad_norm": 4.415966033935547, "learning_rate": 1.99492370417462e-05, "loss": 1.9798, "step": 18840 }, { "epoch": 0.24, "grad_norm": 4.582923412322998, "learning_rate": 1.994922646682673e-05, "loss": 2.5618, "step": 18841 }, { "epoch": 0.24, "grad_norm": 3.3928415775299072, "learning_rate": 1.9949215890808694e-05, "loss": 1.6314, "step": 18842 }, { "epoch": 0.24, "grad_norm": 4.169544219970703, "learning_rate": 1.994920531369209e-05, "loss": 1.8085, "step": 18843 }, { "epoch": 0.24, "grad_norm": 4.079974174499512, "learning_rate": 1.994919473547693e-05, "loss": 2.1316, "step": 18844 }, { "epoch": 0.24, "grad_norm": 3.91701078414917, "learning_rate": 1.9949184156163214e-05, "loss": 1.8297, "step": 18845 }, { "epoch": 0.24, "grad_norm": 4.619073867797852, "learning_rate": 1.9949173575750935e-05, "loss": 2.3449, "step": 18846 }, { "epoch": 0.24, "grad_norm": 5.43549919128418, "learning_rate": 1.99491629942401e-05, "loss": 2.8358, "step": 18847 }, { "epoch": 0.24, "grad_norm": 4.012148857116699, "learning_rate": 1.9949152411630708e-05, "loss": 1.994, "step": 18848 }, { "epoch": 0.24, "grad_norm": 3.6437478065490723, "learning_rate": 1.9949141827922756e-05, "loss": 1.8065, "step": 18849 }, { "epoch": 0.24, "grad_norm": 4.176813125610352, "learning_rate": 1.9949131243116257e-05, "loss": 1.8767, "step": 18850 }, { "epoch": 0.24, "grad_norm": 4.009230136871338, "learning_rate": 1.9949120657211203e-05, "loss": 2.1401, "step": 18851 }, { "epoch": 0.24, "grad_norm": 4.451587677001953, "learning_rate": 1.99491100702076e-05, "loss": 2.8201, "step": 18852 }, { "epoch": 0.24, "grad_norm": 4.445758819580078, "learning_rate": 1.994909948210544e-05, "loss": 2.3447, "step": 18853 }, { "epoch": 0.24, "grad_norm": 3.7773430347442627, "learning_rate": 1.9949088892904734e-05, "loss": 1.8182, "step": 18854 }, { "epoch": 0.24, "grad_norm": 3.7509512901306152, "learning_rate": 1.994907830260548e-05, "loss": 1.9235, "step": 18855 }, { "epoch": 0.24, "grad_norm": 4.606667995452881, "learning_rate": 1.994906771120768e-05, "loss": 2.2815, "step": 18856 }, { "epoch": 0.24, "grad_norm": 3.8990843296051025, "learning_rate": 1.994905711871133e-05, "loss": 1.7944, "step": 18857 }, { "epoch": 0.24, "grad_norm": 4.072506904602051, "learning_rate": 1.994904652511644e-05, "loss": 2.1164, "step": 18858 }, { "epoch": 0.24, "grad_norm": 3.6714820861816406, "learning_rate": 1.9949035930423003e-05, "loss": 1.9458, "step": 18859 }, { "epoch": 0.24, "grad_norm": 4.082688808441162, "learning_rate": 1.9949025334631025e-05, "loss": 1.938, "step": 18860 }, { "epoch": 0.24, "grad_norm": 3.2686567306518555, "learning_rate": 1.9949014737740506e-05, "loss": 1.5551, "step": 18861 }, { "epoch": 0.24, "grad_norm": 3.6207034587860107, "learning_rate": 1.9949004139751447e-05, "loss": 1.753, "step": 18862 }, { "epoch": 0.24, "grad_norm": 4.348368167877197, "learning_rate": 1.994899354066385e-05, "loss": 2.2201, "step": 18863 }, { "epoch": 0.24, "grad_norm": 3.98051118850708, "learning_rate": 1.9948982940477712e-05, "loss": 2.0135, "step": 18864 }, { "epoch": 0.24, "grad_norm": 3.7807180881500244, "learning_rate": 1.994897233919304e-05, "loss": 2.0839, "step": 18865 }, { "epoch": 0.24, "grad_norm": 5.248201370239258, "learning_rate": 1.9948961736809832e-05, "loss": 2.3863, "step": 18866 }, { "epoch": 0.24, "grad_norm": 4.179032802581787, "learning_rate": 1.9948951133328093e-05, "loss": 2.246, "step": 18867 }, { "epoch": 0.24, "grad_norm": 4.365622520446777, "learning_rate": 1.994894052874782e-05, "loss": 2.0879, "step": 18868 }, { "epoch": 0.24, "grad_norm": 4.3702826499938965, "learning_rate": 1.9948929923069012e-05, "loss": 2.1926, "step": 18869 }, { "epoch": 0.24, "grad_norm": 4.123182773590088, "learning_rate": 1.9948919316291674e-05, "loss": 1.7858, "step": 18870 }, { "epoch": 0.24, "grad_norm": 4.250686168670654, "learning_rate": 1.994890870841581e-05, "loss": 2.0767, "step": 18871 }, { "epoch": 0.24, "grad_norm": 4.8561248779296875, "learning_rate": 1.9948898099441413e-05, "loss": 2.5928, "step": 18872 }, { "epoch": 0.24, "grad_norm": 4.447875499725342, "learning_rate": 1.994888748936849e-05, "loss": 2.3902, "step": 18873 }, { "epoch": 0.24, "grad_norm": 4.496001720428467, "learning_rate": 1.9948876878197043e-05, "loss": 2.2447, "step": 18874 }, { "epoch": 0.24, "grad_norm": 4.199726104736328, "learning_rate": 1.994886626592707e-05, "loss": 2.0891, "step": 18875 }, { "epoch": 0.24, "grad_norm": 3.9589781761169434, "learning_rate": 1.9948855652558575e-05, "loss": 1.9063, "step": 18876 }, { "epoch": 0.24, "grad_norm": 3.8272321224212646, "learning_rate": 1.9948845038091557e-05, "loss": 1.9226, "step": 18877 }, { "epoch": 0.24, "grad_norm": 4.293035984039307, "learning_rate": 1.9948834422526015e-05, "loss": 2.1328, "step": 18878 }, { "epoch": 0.25, "grad_norm": 3.9881742000579834, "learning_rate": 1.9948823805861956e-05, "loss": 1.8571, "step": 18879 }, { "epoch": 0.25, "grad_norm": 4.474067687988281, "learning_rate": 1.9948813188099377e-05, "loss": 2.4884, "step": 18880 }, { "epoch": 0.25, "grad_norm": 4.225884437561035, "learning_rate": 1.994880256923828e-05, "loss": 2.1747, "step": 18881 }, { "epoch": 0.25, "grad_norm": 4.221709728240967, "learning_rate": 1.9948791949278667e-05, "loss": 2.0277, "step": 18882 }, { "epoch": 0.25, "grad_norm": 4.359618186950684, "learning_rate": 1.9948781328220536e-05, "loss": 1.7732, "step": 18883 }, { "epoch": 0.25, "grad_norm": 4.750170707702637, "learning_rate": 1.9948770706063892e-05, "loss": 2.2238, "step": 18884 }, { "epoch": 0.25, "grad_norm": 3.9347245693206787, "learning_rate": 1.9948760082808738e-05, "loss": 1.8551, "step": 18885 }, { "epoch": 0.25, "grad_norm": 4.423939228057861, "learning_rate": 1.994874945845507e-05, "loss": 2.5784, "step": 18886 }, { "epoch": 0.25, "grad_norm": 4.061092853546143, "learning_rate": 1.994873883300289e-05, "loss": 1.9021, "step": 18887 }, { "epoch": 0.25, "grad_norm": 4.077447414398193, "learning_rate": 1.9948728206452203e-05, "loss": 2.1917, "step": 18888 }, { "epoch": 0.25, "grad_norm": 5.00070858001709, "learning_rate": 1.9948717578803008e-05, "loss": 2.6415, "step": 18889 }, { "epoch": 0.25, "grad_norm": 3.548088312149048, "learning_rate": 1.9948706950055302e-05, "loss": 1.7707, "step": 18890 }, { "epoch": 0.25, "grad_norm": 4.149027347564697, "learning_rate": 1.994869632020909e-05, "loss": 2.1161, "step": 18891 }, { "epoch": 0.25, "grad_norm": 4.518855094909668, "learning_rate": 1.9948685689264377e-05, "loss": 2.0355, "step": 18892 }, { "epoch": 0.25, "grad_norm": 3.9714481830596924, "learning_rate": 1.9948675057221157e-05, "loss": 2.2202, "step": 18893 }, { "epoch": 0.25, "grad_norm": 4.181593418121338, "learning_rate": 1.9948664424079438e-05, "loss": 2.4475, "step": 18894 }, { "epoch": 0.25, "grad_norm": 4.2087907791137695, "learning_rate": 1.9948653789839215e-05, "loss": 2.0346, "step": 18895 }, { "epoch": 0.25, "grad_norm": 4.28347110748291, "learning_rate": 1.9948643154500492e-05, "loss": 2.5665, "step": 18896 }, { "epoch": 0.25, "grad_norm": 3.958075523376465, "learning_rate": 1.9948632518063272e-05, "loss": 2.0885, "step": 18897 }, { "epoch": 0.25, "grad_norm": 4.151418685913086, "learning_rate": 1.9948621880527552e-05, "loss": 2.1188, "step": 18898 }, { "epoch": 0.25, "grad_norm": 3.818436861038208, "learning_rate": 1.9948611241893335e-05, "loss": 1.8359, "step": 18899 }, { "epoch": 0.25, "grad_norm": 4.452212810516357, "learning_rate": 1.9948600602160626e-05, "loss": 2.208, "step": 18900 }, { "epoch": 0.25, "grad_norm": 4.073630332946777, "learning_rate": 1.994858996132942e-05, "loss": 1.9884, "step": 18901 }, { "epoch": 0.25, "grad_norm": 4.205947399139404, "learning_rate": 1.9948579319399722e-05, "loss": 1.7399, "step": 18902 }, { "epoch": 0.25, "grad_norm": 3.9904308319091797, "learning_rate": 1.994856867637153e-05, "loss": 2.1156, "step": 18903 }, { "epoch": 0.25, "grad_norm": 4.381476879119873, "learning_rate": 1.9948558032244846e-05, "loss": 2.4578, "step": 18904 }, { "epoch": 0.25, "grad_norm": 4.486893177032471, "learning_rate": 1.9948547387019676e-05, "loss": 2.3045, "step": 18905 }, { "epoch": 0.25, "grad_norm": 3.886688232421875, "learning_rate": 1.9948536740696017e-05, "loss": 2.1139, "step": 18906 }, { "epoch": 0.25, "grad_norm": 6.826897144317627, "learning_rate": 1.9948526093273874e-05, "loss": 2.5014, "step": 18907 }, { "epoch": 0.25, "grad_norm": 4.102276802062988, "learning_rate": 1.9948515444753238e-05, "loss": 1.9373, "step": 18908 }, { "epoch": 0.25, "grad_norm": 3.9845094680786133, "learning_rate": 1.9948504795134122e-05, "loss": 2.1749, "step": 18909 }, { "epoch": 0.25, "grad_norm": 4.869855880737305, "learning_rate": 1.994849414441652e-05, "loss": 2.5097, "step": 18910 }, { "epoch": 0.25, "grad_norm": 4.367288589477539, "learning_rate": 1.9948483492600434e-05, "loss": 2.3133, "step": 18911 }, { "epoch": 0.25, "grad_norm": 4.570287227630615, "learning_rate": 1.9948472839685872e-05, "loss": 2.1123, "step": 18912 }, { "epoch": 0.25, "grad_norm": 3.8782849311828613, "learning_rate": 1.9948462185672827e-05, "loss": 2.0631, "step": 18913 }, { "epoch": 0.25, "grad_norm": 3.869499921798706, "learning_rate": 1.99484515305613e-05, "loss": 2.0162, "step": 18914 }, { "epoch": 0.25, "grad_norm": 3.58845853805542, "learning_rate": 1.9948440874351298e-05, "loss": 1.6991, "step": 18915 }, { "epoch": 0.25, "grad_norm": 3.60935115814209, "learning_rate": 1.994843021704282e-05, "loss": 2.0442, "step": 18916 }, { "epoch": 0.25, "grad_norm": 4.626890182495117, "learning_rate": 1.9948419558635863e-05, "loss": 2.5682, "step": 18917 }, { "epoch": 0.25, "grad_norm": 4.126289367675781, "learning_rate": 1.9948408899130436e-05, "loss": 2.3581, "step": 18918 }, { "epoch": 0.25, "grad_norm": 4.3780670166015625, "learning_rate": 1.9948398238526533e-05, "loss": 2.5983, "step": 18919 }, { "epoch": 0.25, "grad_norm": 3.7245519161224365, "learning_rate": 1.994838757682416e-05, "loss": 1.9427, "step": 18920 }, { "epoch": 0.25, "grad_norm": 4.213972091674805, "learning_rate": 1.9948376914023314e-05, "loss": 2.2767, "step": 18921 }, { "epoch": 0.25, "grad_norm": 4.104557514190674, "learning_rate": 1.9948366250124002e-05, "loss": 1.8546, "step": 18922 }, { "epoch": 0.25, "grad_norm": 4.062285423278809, "learning_rate": 1.9948355585126217e-05, "loss": 2.02, "step": 18923 }, { "epoch": 0.25, "grad_norm": 4.629411220550537, "learning_rate": 1.994834491902997e-05, "loss": 2.1384, "step": 18924 }, { "epoch": 0.25, "grad_norm": 4.233861923217773, "learning_rate": 1.994833425183525e-05, "loss": 2.0609, "step": 18925 }, { "epoch": 0.25, "grad_norm": 4.5220255851745605, "learning_rate": 1.9948323583542075e-05, "loss": 2.1682, "step": 18926 }, { "epoch": 0.25, "grad_norm": 4.652792930603027, "learning_rate": 1.9948312914150428e-05, "loss": 2.4086, "step": 18927 }, { "epoch": 0.25, "grad_norm": 4.400210857391357, "learning_rate": 1.994830224366032e-05, "loss": 2.823, "step": 18928 }, { "epoch": 0.25, "grad_norm": 4.027817249298096, "learning_rate": 1.9948291572071753e-05, "loss": 1.9802, "step": 18929 }, { "epoch": 0.25, "grad_norm": 4.150343894958496, "learning_rate": 1.9948280899384725e-05, "loss": 2.1993, "step": 18930 }, { "epoch": 0.25, "grad_norm": 3.7967629432678223, "learning_rate": 1.9948270225599238e-05, "loss": 1.9514, "step": 18931 }, { "epoch": 0.25, "grad_norm": 3.5855190753936768, "learning_rate": 1.994825955071529e-05, "loss": 1.9627, "step": 18932 }, { "epoch": 0.25, "grad_norm": 3.9384872913360596, "learning_rate": 1.994824887473289e-05, "loss": 1.8836, "step": 18933 }, { "epoch": 0.25, "grad_norm": 3.781473398208618, "learning_rate": 1.9948238197652032e-05, "loss": 2.0555, "step": 18934 }, { "epoch": 0.25, "grad_norm": 3.6011252403259277, "learning_rate": 1.9948227519472723e-05, "loss": 1.8444, "step": 18935 }, { "epoch": 0.25, "grad_norm": 3.927187442779541, "learning_rate": 1.9948216840194956e-05, "loss": 1.7299, "step": 18936 }, { "epoch": 0.25, "grad_norm": 3.7895150184631348, "learning_rate": 1.994820615981874e-05, "loss": 2.4061, "step": 18937 }, { "epoch": 0.25, "grad_norm": 3.82387638092041, "learning_rate": 1.9948195478344072e-05, "loss": 2.1156, "step": 18938 }, { "epoch": 0.25, "grad_norm": 3.95238995552063, "learning_rate": 1.9948184795770957e-05, "loss": 2.2103, "step": 18939 }, { "epoch": 0.25, "grad_norm": 4.291543483734131, "learning_rate": 1.9948174112099388e-05, "loss": 2.6458, "step": 18940 }, { "epoch": 0.25, "grad_norm": 4.334994792938232, "learning_rate": 1.9948163427329378e-05, "loss": 2.0239, "step": 18941 }, { "epoch": 0.25, "grad_norm": 4.612454414367676, "learning_rate": 1.994815274146092e-05, "loss": 2.2178, "step": 18942 }, { "epoch": 0.25, "grad_norm": 4.253182888031006, "learning_rate": 1.9948142054494015e-05, "loss": 2.2863, "step": 18943 }, { "epoch": 0.25, "grad_norm": 4.9526143074035645, "learning_rate": 1.994813136642867e-05, "loss": 2.1772, "step": 18944 }, { "epoch": 0.25, "grad_norm": 4.055072784423828, "learning_rate": 1.994812067726488e-05, "loss": 2.0479, "step": 18945 }, { "epoch": 0.25, "grad_norm": 3.919198989868164, "learning_rate": 1.994810998700265e-05, "loss": 1.9721, "step": 18946 }, { "epoch": 0.25, "grad_norm": 4.54762601852417, "learning_rate": 1.994809929564198e-05, "loss": 2.5927, "step": 18947 }, { "epoch": 0.25, "grad_norm": 3.763380527496338, "learning_rate": 1.994808860318287e-05, "loss": 1.9679, "step": 18948 }, { "epoch": 0.25, "grad_norm": 3.977231979370117, "learning_rate": 1.994807790962532e-05, "loss": 2.1853, "step": 18949 }, { "epoch": 0.25, "grad_norm": 4.039700984954834, "learning_rate": 1.9948067214969335e-05, "loss": 2.2155, "step": 18950 }, { "epoch": 0.25, "grad_norm": 3.78513503074646, "learning_rate": 1.994805651921492e-05, "loss": 2.1937, "step": 18951 }, { "epoch": 0.25, "grad_norm": 3.5261240005493164, "learning_rate": 1.994804582236206e-05, "loss": 1.8624, "step": 18952 }, { "epoch": 0.25, "grad_norm": 3.7842931747436523, "learning_rate": 1.9948035124410778e-05, "loss": 2.0127, "step": 18953 }, { "epoch": 0.25, "grad_norm": 4.070160865783691, "learning_rate": 1.994802442536106e-05, "loss": 1.9903, "step": 18954 }, { "epoch": 0.25, "grad_norm": 4.174620151519775, "learning_rate": 1.9948013725212908e-05, "loss": 1.9215, "step": 18955 }, { "epoch": 0.25, "grad_norm": 4.988969326019287, "learning_rate": 1.994800302396633e-05, "loss": 2.5318, "step": 18956 }, { "epoch": 0.25, "grad_norm": 4.040046215057373, "learning_rate": 1.9947992321621325e-05, "loss": 2.2378, "step": 18957 }, { "epoch": 0.25, "grad_norm": 4.4013471603393555, "learning_rate": 1.994798161817789e-05, "loss": 2.2351, "step": 18958 }, { "epoch": 0.25, "grad_norm": 3.52656626701355, "learning_rate": 1.9947970913636026e-05, "loss": 1.8645, "step": 18959 }, { "epoch": 0.25, "grad_norm": 4.4387593269348145, "learning_rate": 1.9947960207995744e-05, "loss": 2.4365, "step": 18960 }, { "epoch": 0.25, "grad_norm": 4.058426380157471, "learning_rate": 1.9947949501257035e-05, "loss": 2.3209, "step": 18961 }, { "epoch": 0.25, "grad_norm": 3.819032907485962, "learning_rate": 1.9947938793419905e-05, "loss": 1.8146, "step": 18962 }, { "epoch": 0.25, "grad_norm": 4.50397253036499, "learning_rate": 1.9947928084484353e-05, "loss": 2.4559, "step": 18963 }, { "epoch": 0.25, "grad_norm": 4.471827030181885, "learning_rate": 1.994791737445038e-05, "loss": 2.1684, "step": 18964 }, { "epoch": 0.25, "grad_norm": 3.898444890975952, "learning_rate": 1.9947906663317988e-05, "loss": 1.8597, "step": 18965 }, { "epoch": 0.25, "grad_norm": 4.653432369232178, "learning_rate": 1.9947895951087183e-05, "loss": 2.7984, "step": 18966 }, { "epoch": 0.25, "grad_norm": 4.70573091506958, "learning_rate": 1.9947885237757955e-05, "loss": 2.9913, "step": 18967 }, { "epoch": 0.25, "grad_norm": 4.250147819519043, "learning_rate": 1.9947874523330316e-05, "loss": 2.4525, "step": 18968 }, { "epoch": 0.25, "grad_norm": 3.5168309211730957, "learning_rate": 1.994786380780426e-05, "loss": 1.9103, "step": 18969 }, { "epoch": 0.25, "grad_norm": 3.8793370723724365, "learning_rate": 1.9947853091179794e-05, "loss": 1.8688, "step": 18970 }, { "epoch": 0.25, "grad_norm": 3.7604360580444336, "learning_rate": 1.9947842373456915e-05, "loss": 2.1051, "step": 18971 }, { "epoch": 0.25, "grad_norm": 3.868583917617798, "learning_rate": 1.9947831654635624e-05, "loss": 1.5736, "step": 18972 }, { "epoch": 0.25, "grad_norm": 4.060597896575928, "learning_rate": 1.9947820934715928e-05, "loss": 2.5894, "step": 18973 }, { "epoch": 0.25, "grad_norm": 4.4897966384887695, "learning_rate": 1.9947810213697817e-05, "loss": 2.3662, "step": 18974 }, { "epoch": 0.25, "grad_norm": 4.245142936706543, "learning_rate": 1.9947799491581303e-05, "loss": 2.3624, "step": 18975 }, { "epoch": 0.25, "grad_norm": 4.466820240020752, "learning_rate": 1.994778876836638e-05, "loss": 2.1341, "step": 18976 }, { "epoch": 0.25, "grad_norm": 4.256296634674072, "learning_rate": 1.994777804405306e-05, "loss": 1.9898, "step": 18977 }, { "epoch": 0.25, "grad_norm": 4.548855304718018, "learning_rate": 1.9947767318641332e-05, "loss": 2.1096, "step": 18978 }, { "epoch": 0.25, "grad_norm": 4.507237434387207, "learning_rate": 1.99477565921312e-05, "loss": 2.0068, "step": 18979 }, { "epoch": 0.25, "grad_norm": 4.151951313018799, "learning_rate": 1.994774586452267e-05, "loss": 2.1917, "step": 18980 }, { "epoch": 0.25, "grad_norm": 4.1518402099609375, "learning_rate": 1.9947735135815737e-05, "loss": 2.0239, "step": 18981 }, { "epoch": 0.25, "grad_norm": 4.028029441833496, "learning_rate": 1.9947724406010408e-05, "loss": 2.3451, "step": 18982 }, { "epoch": 0.25, "grad_norm": 4.165586948394775, "learning_rate": 1.994771367510668e-05, "loss": 2.3188, "step": 18983 }, { "epoch": 0.25, "grad_norm": 3.892784833908081, "learning_rate": 1.9947702943104557e-05, "loss": 2.1533, "step": 18984 }, { "epoch": 0.25, "grad_norm": 4.802414894104004, "learning_rate": 1.994769221000404e-05, "loss": 1.95, "step": 18985 }, { "epoch": 0.25, "grad_norm": 3.76872181892395, "learning_rate": 1.9947681475805122e-05, "loss": 1.8206, "step": 18986 }, { "epoch": 0.25, "grad_norm": 4.136807918548584, "learning_rate": 1.9947670740507817e-05, "loss": 2.2683, "step": 18987 }, { "epoch": 0.25, "grad_norm": 3.5812551975250244, "learning_rate": 1.994766000411212e-05, "loss": 1.6339, "step": 18988 }, { "epoch": 0.25, "grad_norm": 3.684319257736206, "learning_rate": 1.994764926661803e-05, "loss": 1.9291, "step": 18989 }, { "epoch": 0.25, "grad_norm": 3.941063165664673, "learning_rate": 1.9947638528025553e-05, "loss": 2.1472, "step": 18990 }, { "epoch": 0.25, "grad_norm": 3.4764223098754883, "learning_rate": 1.9947627788334688e-05, "loss": 1.7811, "step": 18991 }, { "epoch": 0.25, "grad_norm": 3.5334787368774414, "learning_rate": 1.9947617047545435e-05, "loss": 1.823, "step": 18992 }, { "epoch": 0.25, "grad_norm": 4.184815406799316, "learning_rate": 1.9947606305657797e-05, "loss": 2.1575, "step": 18993 }, { "epoch": 0.25, "grad_norm": 3.551302433013916, "learning_rate": 1.9947595562671774e-05, "loss": 1.67, "step": 18994 }, { "epoch": 0.25, "grad_norm": 4.7108869552612305, "learning_rate": 1.994758481858737e-05, "loss": 2.5847, "step": 18995 }, { "epoch": 0.25, "grad_norm": 4.823437213897705, "learning_rate": 1.994757407340458e-05, "loss": 2.1662, "step": 18996 }, { "epoch": 0.25, "grad_norm": 4.014651775360107, "learning_rate": 1.994756332712341e-05, "loss": 2.1637, "step": 18997 }, { "epoch": 0.25, "grad_norm": 4.024088382720947, "learning_rate": 1.9947552579743865e-05, "loss": 2.0298, "step": 18998 }, { "epoch": 0.25, "grad_norm": 3.9556236267089844, "learning_rate": 1.9947541831265934e-05, "loss": 2.5652, "step": 18999 }, { "epoch": 0.25, "grad_norm": 3.5635457038879395, "learning_rate": 1.994753108168963e-05, "loss": 1.9542, "step": 19000 }, { "epoch": 0.25, "grad_norm": 3.947410821914673, "learning_rate": 1.9947520331014953e-05, "loss": 2.0557, "step": 19001 }, { "epoch": 0.25, "grad_norm": 3.960996627807617, "learning_rate": 1.9947509579241895e-05, "loss": 2.2281, "step": 19002 }, { "epoch": 0.25, "grad_norm": 4.7152323722839355, "learning_rate": 1.9947498826370462e-05, "loss": 2.5232, "step": 19003 }, { "epoch": 0.25, "grad_norm": 4.2131195068359375, "learning_rate": 1.9947488072400665e-05, "loss": 2.0829, "step": 19004 }, { "epoch": 0.25, "grad_norm": 3.763601779937744, "learning_rate": 1.994747731733249e-05, "loss": 2.0204, "step": 19005 }, { "epoch": 0.25, "grad_norm": 4.6625657081604, "learning_rate": 1.9947466561165947e-05, "loss": 2.0853, "step": 19006 }, { "epoch": 0.25, "grad_norm": 4.081596374511719, "learning_rate": 1.9947455803901032e-05, "loss": 1.9399, "step": 19007 }, { "epoch": 0.25, "grad_norm": 3.9828333854675293, "learning_rate": 1.9947445045537753e-05, "loss": 2.0612, "step": 19008 }, { "epoch": 0.25, "grad_norm": 4.232419967651367, "learning_rate": 1.9947434286076102e-05, "loss": 2.4713, "step": 19009 }, { "epoch": 0.25, "grad_norm": 4.122562408447266, "learning_rate": 1.994742352551609e-05, "loss": 2.2914, "step": 19010 }, { "epoch": 0.25, "grad_norm": 4.123614311218262, "learning_rate": 1.994741276385771e-05, "loss": 1.8966, "step": 19011 }, { "epoch": 0.25, "grad_norm": 3.7957918643951416, "learning_rate": 1.9947402001100974e-05, "loss": 1.9391, "step": 19012 }, { "epoch": 0.25, "grad_norm": 4.046322822570801, "learning_rate": 1.994739123724587e-05, "loss": 2.0003, "step": 19013 }, { "epoch": 0.25, "grad_norm": 4.964431285858154, "learning_rate": 1.9947380472292405e-05, "loss": 2.6973, "step": 19014 }, { "epoch": 0.25, "grad_norm": 4.141018390655518, "learning_rate": 1.9947369706240584e-05, "loss": 2.2334, "step": 19015 }, { "epoch": 0.25, "grad_norm": 4.061453819274902, "learning_rate": 1.9947358939090402e-05, "loss": 2.2762, "step": 19016 }, { "epoch": 0.25, "grad_norm": 4.072558879852295, "learning_rate": 1.9947348170841866e-05, "loss": 1.9028, "step": 19017 }, { "epoch": 0.25, "grad_norm": 5.131922245025635, "learning_rate": 1.994733740149497e-05, "loss": 2.5377, "step": 19018 }, { "epoch": 0.25, "grad_norm": 4.11840295791626, "learning_rate": 1.9947326631049723e-05, "loss": 2.2556, "step": 19019 }, { "epoch": 0.25, "grad_norm": 3.919917345046997, "learning_rate": 1.9947315859506116e-05, "loss": 1.8391, "step": 19020 }, { "epoch": 0.25, "grad_norm": 4.714604377746582, "learning_rate": 1.9947305086864162e-05, "loss": 2.4735, "step": 19021 }, { "epoch": 0.25, "grad_norm": 3.8709475994110107, "learning_rate": 1.9947294313123854e-05, "loss": 1.7343, "step": 19022 }, { "epoch": 0.25, "grad_norm": 3.8097779750823975, "learning_rate": 1.99472835382852e-05, "loss": 1.8077, "step": 19023 }, { "epoch": 0.25, "grad_norm": 4.303415775299072, "learning_rate": 1.9947272762348194e-05, "loss": 2.0888, "step": 19024 }, { "epoch": 0.25, "grad_norm": 4.837655544281006, "learning_rate": 1.994726198531284e-05, "loss": 2.65, "step": 19025 }, { "epoch": 0.25, "grad_norm": 4.101627826690674, "learning_rate": 1.9947251207179143e-05, "loss": 2.6328, "step": 19026 }, { "epoch": 0.25, "grad_norm": 4.341216087341309, "learning_rate": 1.9947240427947097e-05, "loss": 2.191, "step": 19027 }, { "epoch": 0.25, "grad_norm": 4.176843166351318, "learning_rate": 1.994722964761671e-05, "loss": 2.5428, "step": 19028 }, { "epoch": 0.25, "grad_norm": 4.249050140380859, "learning_rate": 1.9947218866187978e-05, "loss": 1.8321, "step": 19029 }, { "epoch": 0.25, "grad_norm": 4.46403694152832, "learning_rate": 1.99472080836609e-05, "loss": 2.412, "step": 19030 }, { "epoch": 0.25, "grad_norm": 3.8618323802948, "learning_rate": 1.9947197300035488e-05, "loss": 2.0698, "step": 19031 }, { "epoch": 0.25, "grad_norm": 4.606912612915039, "learning_rate": 1.9947186515311734e-05, "loss": 2.0187, "step": 19032 }, { "epoch": 0.25, "grad_norm": 3.9387218952178955, "learning_rate": 1.9947175729489646e-05, "loss": 1.7057, "step": 19033 }, { "epoch": 0.25, "grad_norm": 4.252933025360107, "learning_rate": 1.9947164942569213e-05, "loss": 2.2386, "step": 19034 }, { "epoch": 0.25, "grad_norm": 4.108438968658447, "learning_rate": 1.994715415455045e-05, "loss": 2.3637, "step": 19035 }, { "epoch": 0.25, "grad_norm": 3.834239959716797, "learning_rate": 1.9947143365433353e-05, "loss": 1.8505, "step": 19036 }, { "epoch": 0.25, "grad_norm": 4.578585624694824, "learning_rate": 1.994713257521792e-05, "loss": 2.0932, "step": 19037 }, { "epoch": 0.25, "grad_norm": 4.191548824310303, "learning_rate": 1.9947121783904156e-05, "loss": 2.2942, "step": 19038 }, { "epoch": 0.25, "grad_norm": 3.5331361293792725, "learning_rate": 1.994711099149206e-05, "loss": 1.9013, "step": 19039 }, { "epoch": 0.25, "grad_norm": 3.8955349922180176, "learning_rate": 1.9947100197981636e-05, "loss": 2.0318, "step": 19040 }, { "epoch": 0.25, "grad_norm": 4.251002311706543, "learning_rate": 1.994708940337288e-05, "loss": 2.4185, "step": 19041 }, { "epoch": 0.25, "grad_norm": 4.170960426330566, "learning_rate": 1.99470786076658e-05, "loss": 2.2957, "step": 19042 }, { "epoch": 0.25, "grad_norm": 4.135786533355713, "learning_rate": 1.9947067810860392e-05, "loss": 2.0193, "step": 19043 }, { "epoch": 0.25, "grad_norm": 4.292699337005615, "learning_rate": 1.994705701295666e-05, "loss": 2.2216, "step": 19044 }, { "epoch": 0.25, "grad_norm": 4.012509346008301, "learning_rate": 1.9947046213954603e-05, "loss": 1.8269, "step": 19045 }, { "epoch": 0.25, "grad_norm": 4.00470495223999, "learning_rate": 1.9947035413854223e-05, "loss": 2.4916, "step": 19046 }, { "epoch": 0.25, "grad_norm": 3.932072401046753, "learning_rate": 1.9947024612655522e-05, "loss": 1.8538, "step": 19047 }, { "epoch": 0.25, "grad_norm": 4.373744487762451, "learning_rate": 1.99470138103585e-05, "loss": 2.4138, "step": 19048 }, { "epoch": 0.25, "grad_norm": 4.217865467071533, "learning_rate": 1.9947003006963165e-05, "loss": 2.0338, "step": 19049 }, { "epoch": 0.25, "grad_norm": 3.8029751777648926, "learning_rate": 1.9946992202469503e-05, "loss": 1.875, "step": 19050 }, { "epoch": 0.25, "grad_norm": 4.537752628326416, "learning_rate": 1.994698139687753e-05, "loss": 2.2715, "step": 19051 }, { "epoch": 0.25, "grad_norm": 4.795203685760498, "learning_rate": 1.994697059018724e-05, "loss": 2.347, "step": 19052 }, { "epoch": 0.25, "grad_norm": 3.4174468517303467, "learning_rate": 1.9946959782398636e-05, "loss": 1.7929, "step": 19053 }, { "epoch": 0.25, "grad_norm": 3.408055305480957, "learning_rate": 1.9946948973511716e-05, "loss": 1.7433, "step": 19054 }, { "epoch": 0.25, "grad_norm": 4.5312395095825195, "learning_rate": 1.994693816352649e-05, "loss": 2.6106, "step": 19055 }, { "epoch": 0.25, "grad_norm": 3.294800281524658, "learning_rate": 1.994692735244295e-05, "loss": 1.5578, "step": 19056 }, { "epoch": 0.25, "grad_norm": 3.8391647338867188, "learning_rate": 1.9946916540261098e-05, "loss": 1.7872, "step": 19057 }, { "epoch": 0.25, "grad_norm": 3.942033052444458, "learning_rate": 1.9946905726980942e-05, "loss": 2.0111, "step": 19058 }, { "epoch": 0.25, "grad_norm": 4.381657600402832, "learning_rate": 1.9946894912602473e-05, "loss": 1.9734, "step": 19059 }, { "epoch": 0.25, "grad_norm": 4.294107913970947, "learning_rate": 1.9946884097125704e-05, "loss": 2.2117, "step": 19060 }, { "epoch": 0.25, "grad_norm": 4.605683326721191, "learning_rate": 1.994687328055063e-05, "loss": 1.7384, "step": 19061 }, { "epoch": 0.25, "grad_norm": 4.6359968185424805, "learning_rate": 1.994686246287725e-05, "loss": 2.1796, "step": 19062 }, { "epoch": 0.25, "grad_norm": 4.1986260414123535, "learning_rate": 1.994685164410557e-05, "loss": 1.9731, "step": 19063 }, { "epoch": 0.25, "grad_norm": 3.5883023738861084, "learning_rate": 1.9946840824235587e-05, "loss": 1.8645, "step": 19064 }, { "epoch": 0.25, "grad_norm": 3.946523427963257, "learning_rate": 1.99468300032673e-05, "loss": 1.9327, "step": 19065 }, { "epoch": 0.25, "grad_norm": 4.6265177726745605, "learning_rate": 1.994681918120072e-05, "loss": 2.077, "step": 19066 }, { "epoch": 0.25, "grad_norm": 3.999556541442871, "learning_rate": 1.994680835803584e-05, "loss": 2.3391, "step": 19067 }, { "epoch": 0.25, "grad_norm": 4.035402774810791, "learning_rate": 1.9946797533772667e-05, "loss": 2.2806, "step": 19068 }, { "epoch": 0.25, "grad_norm": 4.080470085144043, "learning_rate": 1.9946786708411196e-05, "loss": 1.9958, "step": 19069 }, { "epoch": 0.25, "grad_norm": 4.072214126586914, "learning_rate": 1.9946775881951432e-05, "loss": 1.9058, "step": 19070 }, { "epoch": 0.25, "grad_norm": 3.719438314437866, "learning_rate": 1.9946765054393374e-05, "loss": 1.6591, "step": 19071 }, { "epoch": 0.25, "grad_norm": 4.479597568511963, "learning_rate": 1.9946754225737022e-05, "loss": 2.1791, "step": 19072 }, { "epoch": 0.25, "grad_norm": 4.349989414215088, "learning_rate": 1.9946743395982385e-05, "loss": 2.1706, "step": 19073 }, { "epoch": 0.25, "grad_norm": 3.7189528942108154, "learning_rate": 1.9946732565129453e-05, "loss": 1.8227, "step": 19074 }, { "epoch": 0.25, "grad_norm": 3.813610315322876, "learning_rate": 1.994672173317824e-05, "loss": 1.9276, "step": 19075 }, { "epoch": 0.25, "grad_norm": 3.4721522331237793, "learning_rate": 1.9946710900128737e-05, "loss": 1.801, "step": 19076 }, { "epoch": 0.25, "grad_norm": 3.959407329559326, "learning_rate": 1.9946700065980943e-05, "loss": 2.419, "step": 19077 }, { "epoch": 0.25, "grad_norm": 3.843553066253662, "learning_rate": 1.994668923073487e-05, "loss": 1.7377, "step": 19078 }, { "epoch": 0.25, "grad_norm": 4.011197090148926, "learning_rate": 1.9946678394390514e-05, "loss": 2.3793, "step": 19079 }, { "epoch": 0.25, "grad_norm": 3.686577796936035, "learning_rate": 1.9946667556947873e-05, "loss": 1.9939, "step": 19080 }, { "epoch": 0.25, "grad_norm": 5.069401264190674, "learning_rate": 1.9946656718406955e-05, "loss": 2.5469, "step": 19081 }, { "epoch": 0.25, "grad_norm": 4.915748119354248, "learning_rate": 1.9946645878767758e-05, "loss": 2.7317, "step": 19082 }, { "epoch": 0.25, "grad_norm": 3.9342000484466553, "learning_rate": 1.9946635038030277e-05, "loss": 2.1725, "step": 19083 }, { "epoch": 0.25, "grad_norm": 3.921966791152954, "learning_rate": 1.994662419619452e-05, "loss": 2.0404, "step": 19084 }, { "epoch": 0.25, "grad_norm": 3.6027421951293945, "learning_rate": 1.994661335326049e-05, "loss": 1.7303, "step": 19085 }, { "epoch": 0.25, "grad_norm": 3.7881035804748535, "learning_rate": 1.9946602509228187e-05, "loss": 2.4473, "step": 19086 }, { "epoch": 0.25, "grad_norm": 4.293323993682861, "learning_rate": 1.9946591664097607e-05, "loss": 2.1586, "step": 19087 }, { "epoch": 0.25, "grad_norm": 4.096999645233154, "learning_rate": 1.9946580817868754e-05, "loss": 2.0092, "step": 19088 }, { "epoch": 0.25, "grad_norm": 4.583132266998291, "learning_rate": 1.9946569970541628e-05, "loss": 2.0634, "step": 19089 }, { "epoch": 0.25, "grad_norm": 4.141847133636475, "learning_rate": 1.9946559122116236e-05, "loss": 1.9989, "step": 19090 }, { "epoch": 0.25, "grad_norm": 4.40590238571167, "learning_rate": 1.9946548272592574e-05, "loss": 1.6572, "step": 19091 }, { "epoch": 0.25, "grad_norm": 3.687744379043579, "learning_rate": 1.9946537421970643e-05, "loss": 1.6516, "step": 19092 }, { "epoch": 0.25, "grad_norm": 4.225584983825684, "learning_rate": 1.9946526570250445e-05, "loss": 2.3662, "step": 19093 }, { "epoch": 0.25, "grad_norm": 4.264952659606934, "learning_rate": 1.9946515717431984e-05, "loss": 2.442, "step": 19094 }, { "epoch": 0.25, "grad_norm": 3.9582908153533936, "learning_rate": 1.994650486351526e-05, "loss": 1.7083, "step": 19095 }, { "epoch": 0.25, "grad_norm": 4.354539394378662, "learning_rate": 1.994649400850027e-05, "loss": 2.1688, "step": 19096 }, { "epoch": 0.25, "grad_norm": 4.127285957336426, "learning_rate": 1.9946483152387017e-05, "loss": 2.3657, "step": 19097 }, { "epoch": 0.25, "grad_norm": 4.470579624176025, "learning_rate": 1.9946472295175508e-05, "loss": 2.6082, "step": 19098 }, { "epoch": 0.25, "grad_norm": 4.058378219604492, "learning_rate": 1.994646143686574e-05, "loss": 1.7012, "step": 19099 }, { "epoch": 0.25, "grad_norm": 3.518641710281372, "learning_rate": 1.9946450577457708e-05, "loss": 1.8208, "step": 19100 }, { "epoch": 0.25, "grad_norm": 4.0504865646362305, "learning_rate": 1.9946439716951424e-05, "loss": 2.1423, "step": 19101 }, { "epoch": 0.25, "grad_norm": 4.37053918838501, "learning_rate": 1.9946428855346883e-05, "loss": 1.9097, "step": 19102 }, { "epoch": 0.25, "grad_norm": 3.9719529151916504, "learning_rate": 1.9946417992644087e-05, "loss": 1.9101, "step": 19103 }, { "epoch": 0.25, "grad_norm": 3.783743381500244, "learning_rate": 1.9946407128843037e-05, "loss": 1.9435, "step": 19104 }, { "epoch": 0.25, "grad_norm": 4.296836853027344, "learning_rate": 1.994639626394374e-05, "loss": 1.8274, "step": 19105 }, { "epoch": 0.25, "grad_norm": 4.671823024749756, "learning_rate": 1.9946385397946187e-05, "loss": 2.4061, "step": 19106 }, { "epoch": 0.25, "grad_norm": 3.4275879859924316, "learning_rate": 1.9946374530850386e-05, "loss": 1.8397, "step": 19107 }, { "epoch": 0.25, "grad_norm": 4.013548374176025, "learning_rate": 1.9946363662656333e-05, "loss": 2.315, "step": 19108 }, { "epoch": 0.25, "grad_norm": 3.9233808517456055, "learning_rate": 1.9946352793364036e-05, "loss": 2.1247, "step": 19109 }, { "epoch": 0.25, "grad_norm": 3.767059326171875, "learning_rate": 1.9946341922973494e-05, "loss": 1.9097, "step": 19110 }, { "epoch": 0.25, "grad_norm": 4.400822162628174, "learning_rate": 1.9946331051484703e-05, "loss": 2.3296, "step": 19111 }, { "epoch": 0.25, "grad_norm": 3.7152976989746094, "learning_rate": 1.9946320178897672e-05, "loss": 2.3113, "step": 19112 }, { "epoch": 0.25, "grad_norm": 4.276791572570801, "learning_rate": 1.9946309305212395e-05, "loss": 2.2056, "step": 19113 }, { "epoch": 0.25, "grad_norm": 3.5393505096435547, "learning_rate": 1.9946298430428883e-05, "loss": 1.866, "step": 19114 }, { "epoch": 0.25, "grad_norm": 3.698606252670288, "learning_rate": 1.9946287554547128e-05, "loss": 1.9809, "step": 19115 }, { "epoch": 0.25, "grad_norm": 3.836225986480713, "learning_rate": 1.994627667756713e-05, "loss": 1.9702, "step": 19116 }, { "epoch": 0.25, "grad_norm": 4.168305397033691, "learning_rate": 1.99462657994889e-05, "loss": 1.7664, "step": 19117 }, { "epoch": 0.25, "grad_norm": 4.191102981567383, "learning_rate": 1.994625492031243e-05, "loss": 1.9103, "step": 19118 }, { "epoch": 0.25, "grad_norm": 4.174046516418457, "learning_rate": 1.994624404003773e-05, "loss": 1.998, "step": 19119 }, { "epoch": 0.25, "grad_norm": 4.051157474517822, "learning_rate": 1.9946233158664788e-05, "loss": 2.6107, "step": 19120 }, { "epoch": 0.25, "grad_norm": 3.9169938564300537, "learning_rate": 1.9946222276193618e-05, "loss": 1.9107, "step": 19121 }, { "epoch": 0.25, "grad_norm": 3.6299054622650146, "learning_rate": 1.9946211392624212e-05, "loss": 1.8321, "step": 19122 }, { "epoch": 0.25, "grad_norm": 3.744586229324341, "learning_rate": 1.994620050795658e-05, "loss": 2.0294, "step": 19123 }, { "epoch": 0.25, "grad_norm": 4.089025974273682, "learning_rate": 1.9946189622190717e-05, "loss": 2.1614, "step": 19124 }, { "epoch": 0.25, "grad_norm": 4.5633440017700195, "learning_rate": 1.9946178735326627e-05, "loss": 2.4803, "step": 19125 }, { "epoch": 0.25, "grad_norm": 3.516336441040039, "learning_rate": 1.9946167847364312e-05, "loss": 1.6868, "step": 19126 }, { "epoch": 0.25, "grad_norm": 4.312305450439453, "learning_rate": 1.9946156958303768e-05, "loss": 2.3489, "step": 19127 }, { "epoch": 0.25, "grad_norm": 4.441429615020752, "learning_rate": 1.9946146068145e-05, "loss": 2.0082, "step": 19128 }, { "epoch": 0.25, "grad_norm": 4.678877353668213, "learning_rate": 1.994613517688801e-05, "loss": 2.1783, "step": 19129 }, { "epoch": 0.25, "grad_norm": 3.9462552070617676, "learning_rate": 1.9946124284532795e-05, "loss": 2.1758, "step": 19130 }, { "epoch": 0.25, "grad_norm": 4.651412010192871, "learning_rate": 1.994611339107936e-05, "loss": 2.9938, "step": 19131 }, { "epoch": 0.25, "grad_norm": 3.567939519882202, "learning_rate": 1.994610249652771e-05, "loss": 1.5596, "step": 19132 }, { "epoch": 0.25, "grad_norm": 4.053582668304443, "learning_rate": 1.9946091600877838e-05, "loss": 2.3484, "step": 19133 }, { "epoch": 0.25, "grad_norm": 3.9756062030792236, "learning_rate": 1.994608070412975e-05, "loss": 1.9798, "step": 19134 }, { "epoch": 0.25, "grad_norm": 3.6187291145324707, "learning_rate": 1.9946069806283443e-05, "loss": 2.0673, "step": 19135 }, { "epoch": 0.25, "grad_norm": 3.858065128326416, "learning_rate": 1.9946058907338922e-05, "loss": 2.1465, "step": 19136 }, { "epoch": 0.25, "grad_norm": 4.281425952911377, "learning_rate": 1.994604800729619e-05, "loss": 2.1022, "step": 19137 }, { "epoch": 0.25, "grad_norm": 4.304704189300537, "learning_rate": 1.994603710615524e-05, "loss": 2.1748, "step": 19138 }, { "epoch": 0.25, "grad_norm": 3.964249849319458, "learning_rate": 1.9946026203916083e-05, "loss": 2.0937, "step": 19139 }, { "epoch": 0.25, "grad_norm": 3.856435537338257, "learning_rate": 1.9946015300578718e-05, "loss": 1.9385, "step": 19140 }, { "epoch": 0.25, "grad_norm": 4.364656925201416, "learning_rate": 1.9946004396143143e-05, "loss": 1.8437, "step": 19141 }, { "epoch": 0.25, "grad_norm": 4.211830139160156, "learning_rate": 1.9945993490609356e-05, "loss": 1.8537, "step": 19142 }, { "epoch": 0.25, "grad_norm": 4.073079586029053, "learning_rate": 1.9945982583977364e-05, "loss": 2.0906, "step": 19143 }, { "epoch": 0.25, "grad_norm": 4.117635726928711, "learning_rate": 1.994597167624717e-05, "loss": 2.2035, "step": 19144 }, { "epoch": 0.25, "grad_norm": 5.6437296867370605, "learning_rate": 1.994596076741877e-05, "loss": 2.4429, "step": 19145 }, { "epoch": 0.25, "grad_norm": 4.878939151763916, "learning_rate": 1.994594985749217e-05, "loss": 2.2652, "step": 19146 }, { "epoch": 0.25, "grad_norm": 4.827517986297607, "learning_rate": 1.9945938946467363e-05, "loss": 2.76, "step": 19147 }, { "epoch": 0.25, "grad_norm": 4.1212358474731445, "learning_rate": 1.994592803434436e-05, "loss": 2.2813, "step": 19148 }, { "epoch": 0.25, "grad_norm": 4.12117338180542, "learning_rate": 1.9945917121123158e-05, "loss": 2.3009, "step": 19149 }, { "epoch": 0.25, "grad_norm": 3.9761195182800293, "learning_rate": 1.9945906206803756e-05, "loss": 1.9068, "step": 19150 }, { "epoch": 0.25, "grad_norm": 3.6481900215148926, "learning_rate": 1.9945895291386155e-05, "loss": 1.7577, "step": 19151 }, { "epoch": 0.25, "grad_norm": 4.017996788024902, "learning_rate": 1.9945884374870363e-05, "loss": 2.1677, "step": 19152 }, { "epoch": 0.25, "grad_norm": 5.156970500946045, "learning_rate": 1.994587345725638e-05, "loss": 2.41, "step": 19153 }, { "epoch": 0.25, "grad_norm": 4.278343200683594, "learning_rate": 1.9945862538544193e-05, "loss": 2.2365, "step": 19154 }, { "epoch": 0.25, "grad_norm": 3.9015119075775146, "learning_rate": 1.994585161873382e-05, "loss": 2.0767, "step": 19155 }, { "epoch": 0.25, "grad_norm": 4.041911602020264, "learning_rate": 1.994584069782526e-05, "loss": 2.2882, "step": 19156 }, { "epoch": 0.25, "grad_norm": 3.9071614742279053, "learning_rate": 1.99458297758185e-05, "loss": 1.8254, "step": 19157 }, { "epoch": 0.25, "grad_norm": 4.868988513946533, "learning_rate": 1.994581885271356e-05, "loss": 2.2655, "step": 19158 }, { "epoch": 0.25, "grad_norm": 4.375226974487305, "learning_rate": 1.9945807928510433e-05, "loss": 2.369, "step": 19159 }, { "epoch": 0.25, "grad_norm": 3.6826090812683105, "learning_rate": 1.9945797003209116e-05, "loss": 2.0577, "step": 19160 }, { "epoch": 0.25, "grad_norm": 4.545778274536133, "learning_rate": 1.9945786076809618e-05, "loss": 2.3934, "step": 19161 }, { "epoch": 0.25, "grad_norm": 4.881951332092285, "learning_rate": 1.9945775149311935e-05, "loss": 2.685, "step": 19162 }, { "epoch": 0.25, "grad_norm": 4.160317897796631, "learning_rate": 1.9945764220716067e-05, "loss": 2.3482, "step": 19163 }, { "epoch": 0.25, "grad_norm": 4.473273277282715, "learning_rate": 1.9945753291022024e-05, "loss": 2.0526, "step": 19164 }, { "epoch": 0.25, "grad_norm": 4.16604471206665, "learning_rate": 1.9945742360229796e-05, "loss": 2.1573, "step": 19165 }, { "epoch": 0.25, "grad_norm": 4.326786518096924, "learning_rate": 1.994573142833939e-05, "loss": 2.8347, "step": 19166 }, { "epoch": 0.25, "grad_norm": 4.193688869476318, "learning_rate": 1.9945720495350806e-05, "loss": 2.0208, "step": 19167 }, { "epoch": 0.25, "grad_norm": 4.357266902923584, "learning_rate": 1.9945709561264047e-05, "loss": 2.2916, "step": 19168 }, { "epoch": 0.25, "grad_norm": 3.5001959800720215, "learning_rate": 1.9945698626079113e-05, "loss": 1.6844, "step": 19169 }, { "epoch": 0.25, "grad_norm": 4.046102523803711, "learning_rate": 1.9945687689796008e-05, "loss": 2.0961, "step": 19170 }, { "epoch": 0.25, "grad_norm": 4.259396076202393, "learning_rate": 1.9945676752414727e-05, "loss": 2.3405, "step": 19171 }, { "epoch": 0.25, "grad_norm": 4.812330722808838, "learning_rate": 1.9945665813935272e-05, "loss": 2.4796, "step": 19172 }, { "epoch": 0.25, "grad_norm": 4.58489990234375, "learning_rate": 1.994565487435765e-05, "loss": 2.5969, "step": 19173 }, { "epoch": 0.25, "grad_norm": 3.758204936981201, "learning_rate": 1.9945643933681858e-05, "loss": 1.8113, "step": 19174 }, { "epoch": 0.25, "grad_norm": 4.604275226593018, "learning_rate": 1.99456329919079e-05, "loss": 2.0339, "step": 19175 }, { "epoch": 0.25, "grad_norm": 4.839043617248535, "learning_rate": 1.9945622049035775e-05, "loss": 2.6937, "step": 19176 }, { "epoch": 0.25, "grad_norm": 5.029354095458984, "learning_rate": 1.9945611105065483e-05, "loss": 2.0283, "step": 19177 }, { "epoch": 0.25, "grad_norm": 4.36962890625, "learning_rate": 1.994560015999703e-05, "loss": 2.0474, "step": 19178 }, { "epoch": 0.25, "grad_norm": 5.020421981811523, "learning_rate": 1.994558921383041e-05, "loss": 2.6516, "step": 19179 }, { "epoch": 0.25, "grad_norm": 3.7420713901519775, "learning_rate": 1.994557826656563e-05, "loss": 1.9464, "step": 19180 }, { "epoch": 0.25, "grad_norm": 4.193752288818359, "learning_rate": 1.9945567318202686e-05, "loss": 2.0694, "step": 19181 }, { "epoch": 0.25, "grad_norm": 3.999559164047241, "learning_rate": 1.994555636874159e-05, "loss": 2.1718, "step": 19182 }, { "epoch": 0.25, "grad_norm": 4.618689060211182, "learning_rate": 1.994554541818233e-05, "loss": 2.2662, "step": 19183 }, { "epoch": 0.25, "grad_norm": 3.9147908687591553, "learning_rate": 1.9945534466524914e-05, "loss": 1.8078, "step": 19184 }, { "epoch": 0.25, "grad_norm": 3.4926199913024902, "learning_rate": 1.9945523513769346e-05, "loss": 1.6871, "step": 19185 }, { "epoch": 0.25, "grad_norm": 4.641477108001709, "learning_rate": 1.994551255991562e-05, "loss": 2.3089, "step": 19186 }, { "epoch": 0.25, "grad_norm": 3.8574106693267822, "learning_rate": 1.9945501604963738e-05, "loss": 1.8455, "step": 19187 }, { "epoch": 0.25, "grad_norm": 4.545434951782227, "learning_rate": 1.994549064891371e-05, "loss": 2.3795, "step": 19188 }, { "epoch": 0.25, "grad_norm": 4.059703826904297, "learning_rate": 1.9945479691765526e-05, "loss": 2.2042, "step": 19189 }, { "epoch": 0.25, "grad_norm": 3.938110828399658, "learning_rate": 1.9945468733519195e-05, "loss": 1.903, "step": 19190 }, { "epoch": 0.25, "grad_norm": 4.291016101837158, "learning_rate": 1.9945457774174717e-05, "loss": 2.2126, "step": 19191 }, { "epoch": 0.25, "grad_norm": 3.6757991313934326, "learning_rate": 1.994544681373209e-05, "loss": 2.1723, "step": 19192 }, { "epoch": 0.25, "grad_norm": 4.630047798156738, "learning_rate": 1.9945435852191316e-05, "loss": 2.4436, "step": 19193 }, { "epoch": 0.25, "grad_norm": 4.281033039093018, "learning_rate": 1.9945424889552398e-05, "loss": 2.0751, "step": 19194 }, { "epoch": 0.25, "grad_norm": 3.9242372512817383, "learning_rate": 1.9945413925815335e-05, "loss": 2.2168, "step": 19195 }, { "epoch": 0.25, "grad_norm": 4.423486232757568, "learning_rate": 1.994540296098013e-05, "loss": 2.5346, "step": 19196 }, { "epoch": 0.25, "grad_norm": 4.054373264312744, "learning_rate": 1.9945391995046787e-05, "loss": 2.0067, "step": 19197 }, { "epoch": 0.25, "grad_norm": 4.104221343994141, "learning_rate": 1.99453810280153e-05, "loss": 1.986, "step": 19198 }, { "epoch": 0.25, "grad_norm": 3.958326816558838, "learning_rate": 1.994537005988568e-05, "loss": 1.9907, "step": 19199 }, { "epoch": 0.25, "grad_norm": 4.094866752624512, "learning_rate": 1.9945359090657918e-05, "loss": 1.8695, "step": 19200 }, { "epoch": 0.25, "grad_norm": 4.039376258850098, "learning_rate": 1.9945348120332016e-05, "loss": 2.6023, "step": 19201 }, { "epoch": 0.25, "grad_norm": 4.0348219871521, "learning_rate": 1.9945337148907984e-05, "loss": 1.8963, "step": 19202 }, { "epoch": 0.25, "grad_norm": 5.023434638977051, "learning_rate": 1.9945326176385817e-05, "loss": 2.4008, "step": 19203 }, { "epoch": 0.25, "grad_norm": 3.950190544128418, "learning_rate": 1.9945315202765516e-05, "loss": 1.7912, "step": 19204 }, { "epoch": 0.25, "grad_norm": 3.509356737136841, "learning_rate": 1.9945304228047085e-05, "loss": 2.034, "step": 19205 }, { "epoch": 0.25, "grad_norm": 4.295016765594482, "learning_rate": 1.9945293252230526e-05, "loss": 2.2391, "step": 19206 }, { "epoch": 0.25, "grad_norm": 4.576829433441162, "learning_rate": 1.9945282275315833e-05, "loss": 2.3315, "step": 19207 }, { "epoch": 0.25, "grad_norm": 3.76554536819458, "learning_rate": 1.9945271297303016e-05, "loss": 1.7244, "step": 19208 }, { "epoch": 0.25, "grad_norm": 4.43674898147583, "learning_rate": 1.994526031819207e-05, "loss": 2.3909, "step": 19209 }, { "epoch": 0.25, "grad_norm": 4.346179008483887, "learning_rate": 1.9945249337983e-05, "loss": 2.3456, "step": 19210 }, { "epoch": 0.25, "grad_norm": 4.585451126098633, "learning_rate": 1.9945238356675803e-05, "loss": 2.1674, "step": 19211 }, { "epoch": 0.25, "grad_norm": 3.731550693511963, "learning_rate": 1.9945227374270483e-05, "loss": 2.0244, "step": 19212 }, { "epoch": 0.25, "grad_norm": 4.410526752471924, "learning_rate": 1.9945216390767043e-05, "loss": 2.2765, "step": 19213 }, { "epoch": 0.25, "grad_norm": 4.607865810394287, "learning_rate": 1.9945205406165485e-05, "loss": 2.5161, "step": 19214 }, { "epoch": 0.25, "grad_norm": 3.9618115425109863, "learning_rate": 1.9945194420465803e-05, "loss": 2.355, "step": 19215 }, { "epoch": 0.25, "grad_norm": 4.214495658874512, "learning_rate": 1.9945183433668008e-05, "loss": 2.3664, "step": 19216 }, { "epoch": 0.25, "grad_norm": 4.834873676300049, "learning_rate": 1.9945172445772088e-05, "loss": 2.4813, "step": 19217 }, { "epoch": 0.25, "grad_norm": 4.243317127227783, "learning_rate": 1.9945161456778058e-05, "loss": 1.9123, "step": 19218 }, { "epoch": 0.25, "grad_norm": 4.672025203704834, "learning_rate": 1.9945150466685914e-05, "loss": 2.2648, "step": 19219 }, { "epoch": 0.25, "grad_norm": 4.057665824890137, "learning_rate": 1.9945139475495657e-05, "loss": 2.5233, "step": 19220 }, { "epoch": 0.25, "grad_norm": 4.109401226043701, "learning_rate": 1.9945128483207285e-05, "loss": 2.1853, "step": 19221 }, { "epoch": 0.25, "grad_norm": 4.061063766479492, "learning_rate": 1.9945117489820803e-05, "loss": 2.1566, "step": 19222 }, { "epoch": 0.25, "grad_norm": 3.9709134101867676, "learning_rate": 1.9945106495336214e-05, "loss": 2.1177, "step": 19223 }, { "epoch": 0.25, "grad_norm": 4.443347454071045, "learning_rate": 1.994509549975351e-05, "loss": 1.8753, "step": 19224 }, { "epoch": 0.25, "grad_norm": 4.100096702575684, "learning_rate": 1.9945084503072704e-05, "loss": 2.1008, "step": 19225 }, { "epoch": 0.25, "grad_norm": 4.0567498207092285, "learning_rate": 1.994507350529379e-05, "loss": 1.7897, "step": 19226 }, { "epoch": 0.25, "grad_norm": 4.0107316970825195, "learning_rate": 1.9945062506416773e-05, "loss": 1.9929, "step": 19227 }, { "epoch": 0.25, "grad_norm": 4.7946062088012695, "learning_rate": 1.9945051506441652e-05, "loss": 2.1939, "step": 19228 }, { "epoch": 0.25, "grad_norm": 4.230583667755127, "learning_rate": 1.9945040505368427e-05, "loss": 1.6135, "step": 19229 }, { "epoch": 0.25, "grad_norm": 4.600652694702148, "learning_rate": 1.99450295031971e-05, "loss": 2.5411, "step": 19230 }, { "epoch": 0.25, "grad_norm": 4.129772186279297, "learning_rate": 1.9945018499927674e-05, "loss": 1.9179, "step": 19231 }, { "epoch": 0.25, "grad_norm": 4.078619956970215, "learning_rate": 1.994500749556015e-05, "loss": 1.821, "step": 19232 }, { "epoch": 0.25, "grad_norm": 4.051426410675049, "learning_rate": 1.994499649009453e-05, "loss": 2.0469, "step": 19233 }, { "epoch": 0.25, "grad_norm": 4.525594711303711, "learning_rate": 1.9944985483530813e-05, "loss": 2.598, "step": 19234 }, { "epoch": 0.25, "grad_norm": 4.2434468269348145, "learning_rate": 1.9944974475869e-05, "loss": 1.9834, "step": 19235 }, { "epoch": 0.25, "grad_norm": 4.442257404327393, "learning_rate": 1.9944963467109094e-05, "loss": 2.5742, "step": 19236 }, { "epoch": 0.25, "grad_norm": 3.892831802368164, "learning_rate": 1.9944952457251094e-05, "loss": 2.143, "step": 19237 }, { "epoch": 0.25, "grad_norm": 4.283548831939697, "learning_rate": 1.9944941446295e-05, "loss": 2.3498, "step": 19238 }, { "epoch": 0.25, "grad_norm": 3.6256372928619385, "learning_rate": 1.994493043424082e-05, "loss": 1.8501, "step": 19239 }, { "epoch": 0.25, "grad_norm": 3.797708511352539, "learning_rate": 1.994491942108855e-05, "loss": 1.8519, "step": 19240 }, { "epoch": 0.25, "grad_norm": 3.932178497314453, "learning_rate": 1.994490840683819e-05, "loss": 2.0326, "step": 19241 }, { "epoch": 0.25, "grad_norm": 4.0630412101745605, "learning_rate": 1.9944897391489746e-05, "loss": 2.1882, "step": 19242 }, { "epoch": 0.25, "grad_norm": 4.18507719039917, "learning_rate": 1.9944886375043215e-05, "loss": 2.0618, "step": 19243 }, { "epoch": 0.25, "grad_norm": 4.0415849685668945, "learning_rate": 1.99448753574986e-05, "loss": 2.1159, "step": 19244 }, { "epoch": 0.25, "grad_norm": 4.542459964752197, "learning_rate": 1.9944864338855904e-05, "loss": 2.4068, "step": 19245 }, { "epoch": 0.25, "grad_norm": 4.230745792388916, "learning_rate": 1.9944853319115124e-05, "loss": 2.3035, "step": 19246 }, { "epoch": 0.25, "grad_norm": 4.397265434265137, "learning_rate": 1.9944842298276263e-05, "loss": 2.3749, "step": 19247 }, { "epoch": 0.25, "grad_norm": 3.6242024898529053, "learning_rate": 1.9944831276339326e-05, "loss": 1.6625, "step": 19248 }, { "epoch": 0.25, "grad_norm": 4.239387512207031, "learning_rate": 1.994482025330431e-05, "loss": 2.6333, "step": 19249 }, { "epoch": 0.25, "grad_norm": 4.623335838317871, "learning_rate": 1.9944809229171212e-05, "loss": 2.489, "step": 19250 }, { "epoch": 0.25, "grad_norm": 4.677533149719238, "learning_rate": 1.9944798203940045e-05, "loss": 2.5742, "step": 19251 }, { "epoch": 0.25, "grad_norm": 4.10589599609375, "learning_rate": 1.9944787177610802e-05, "loss": 1.8377, "step": 19252 }, { "epoch": 0.25, "grad_norm": 3.91131591796875, "learning_rate": 1.9944776150183482e-05, "loss": 1.813, "step": 19253 }, { "epoch": 0.25, "grad_norm": 3.7537450790405273, "learning_rate": 1.9944765121658096e-05, "loss": 1.7461, "step": 19254 }, { "epoch": 0.25, "grad_norm": 4.387922763824463, "learning_rate": 1.9944754092034636e-05, "loss": 2.2935, "step": 19255 }, { "epoch": 0.25, "grad_norm": 3.9176952838897705, "learning_rate": 1.9944743061313107e-05, "loss": 1.7324, "step": 19256 }, { "epoch": 0.25, "grad_norm": 4.2432146072387695, "learning_rate": 1.9944732029493507e-05, "loss": 2.1572, "step": 19257 }, { "epoch": 0.25, "grad_norm": 3.807539939880371, "learning_rate": 1.9944720996575842e-05, "loss": 2.0747, "step": 19258 }, { "epoch": 0.25, "grad_norm": 4.089491367340088, "learning_rate": 1.994470996256011e-05, "loss": 2.2605, "step": 19259 }, { "epoch": 0.25, "grad_norm": 4.565218448638916, "learning_rate": 1.9944698927446315e-05, "loss": 2.4841, "step": 19260 }, { "epoch": 0.25, "grad_norm": 3.781275510787964, "learning_rate": 1.9944687891234457e-05, "loss": 2.1334, "step": 19261 }, { "epoch": 0.25, "grad_norm": 3.7118635177612305, "learning_rate": 1.9944676853924533e-05, "loss": 1.9998, "step": 19262 }, { "epoch": 0.25, "grad_norm": 3.9720351696014404, "learning_rate": 1.9944665815516552e-05, "loss": 2.1475, "step": 19263 }, { "epoch": 0.25, "grad_norm": 5.026503086090088, "learning_rate": 1.9944654776010512e-05, "loss": 1.9484, "step": 19264 }, { "epoch": 0.25, "grad_norm": 4.07269287109375, "learning_rate": 1.9944643735406412e-05, "loss": 2.1147, "step": 19265 }, { "epoch": 0.25, "grad_norm": 4.091413497924805, "learning_rate": 1.9944632693704252e-05, "loss": 2.1853, "step": 19266 }, { "epoch": 0.25, "grad_norm": 4.502718925476074, "learning_rate": 1.9944621650904037e-05, "loss": 1.9802, "step": 19267 }, { "epoch": 0.25, "grad_norm": 4.553983688354492, "learning_rate": 1.9944610607005765e-05, "loss": 2.5151, "step": 19268 }, { "epoch": 0.25, "grad_norm": 3.763044595718384, "learning_rate": 1.9944599562009443e-05, "loss": 1.9441, "step": 19269 }, { "epoch": 0.25, "grad_norm": 4.281139373779297, "learning_rate": 1.994458851591507e-05, "loss": 2.3059, "step": 19270 }, { "epoch": 0.25, "grad_norm": 4.084808349609375, "learning_rate": 1.994457746872264e-05, "loss": 2.2682, "step": 19271 }, { "epoch": 0.25, "grad_norm": 4.717162609100342, "learning_rate": 1.9944566420432165e-05, "loss": 2.5365, "step": 19272 }, { "epoch": 0.25, "grad_norm": 3.6139564514160156, "learning_rate": 1.994455537104364e-05, "loss": 2.2069, "step": 19273 }, { "epoch": 0.25, "grad_norm": 3.9556477069854736, "learning_rate": 1.9944544320557067e-05, "loss": 1.6548, "step": 19274 }, { "epoch": 0.25, "grad_norm": 4.572234153747559, "learning_rate": 1.9944533268972445e-05, "loss": 2.4178, "step": 19275 }, { "epoch": 0.25, "grad_norm": 3.975613832473755, "learning_rate": 1.994452221628978e-05, "loss": 1.8714, "step": 19276 }, { "epoch": 0.25, "grad_norm": 3.807861804962158, "learning_rate": 1.994451116250907e-05, "loss": 1.8507, "step": 19277 }, { "epoch": 0.25, "grad_norm": 3.278007984161377, "learning_rate": 1.994450010763032e-05, "loss": 1.4744, "step": 19278 }, { "epoch": 0.25, "grad_norm": 4.339329719543457, "learning_rate": 1.9944489051653524e-05, "loss": 2.4445, "step": 19279 }, { "epoch": 0.25, "grad_norm": 4.000926494598389, "learning_rate": 1.9944477994578693e-05, "loss": 1.9575, "step": 19280 }, { "epoch": 0.25, "grad_norm": 3.714047431945801, "learning_rate": 1.9944466936405818e-05, "loss": 1.9991, "step": 19281 }, { "epoch": 0.25, "grad_norm": 4.122231483459473, "learning_rate": 1.9944455877134908e-05, "loss": 1.8525, "step": 19282 }, { "epoch": 0.25, "grad_norm": 5.801141738891602, "learning_rate": 1.994444481676596e-05, "loss": 2.3448, "step": 19283 }, { "epoch": 0.25, "grad_norm": 3.7032310962677, "learning_rate": 1.994443375529898e-05, "loss": 1.8533, "step": 19284 }, { "epoch": 0.25, "grad_norm": 3.6821742057800293, "learning_rate": 1.9944422692733965e-05, "loss": 2.1387, "step": 19285 }, { "epoch": 0.25, "grad_norm": 4.376418113708496, "learning_rate": 1.994441162907091e-05, "loss": 1.9752, "step": 19286 }, { "epoch": 0.25, "grad_norm": 3.9987213611602783, "learning_rate": 1.994440056430983e-05, "loss": 2.4813, "step": 19287 }, { "epoch": 0.25, "grad_norm": 4.751378536224365, "learning_rate": 1.994438949845072e-05, "loss": 2.1582, "step": 19288 }, { "epoch": 0.25, "grad_norm": 4.031061172485352, "learning_rate": 1.9944378431493577e-05, "loss": 2.3095, "step": 19289 }, { "epoch": 0.25, "grad_norm": 4.103209495544434, "learning_rate": 1.994436736343841e-05, "loss": 1.9652, "step": 19290 }, { "epoch": 0.25, "grad_norm": 4.753909587860107, "learning_rate": 1.994435629428521e-05, "loss": 2.2317, "step": 19291 }, { "epoch": 0.25, "grad_norm": 3.543010950088501, "learning_rate": 1.994434522403399e-05, "loss": 1.9465, "step": 19292 }, { "epoch": 0.25, "grad_norm": 4.447648525238037, "learning_rate": 1.9944334152684744e-05, "loss": 2.5609, "step": 19293 }, { "epoch": 0.25, "grad_norm": 3.992743968963623, "learning_rate": 1.9944323080237474e-05, "loss": 1.891, "step": 19294 }, { "epoch": 0.25, "grad_norm": 4.390812397003174, "learning_rate": 1.9944312006692185e-05, "loss": 2.2614, "step": 19295 }, { "epoch": 0.25, "grad_norm": 3.8559279441833496, "learning_rate": 1.994430093204887e-05, "loss": 1.9548, "step": 19296 }, { "epoch": 0.25, "grad_norm": 4.223861217498779, "learning_rate": 1.994428985630754e-05, "loss": 2.1413, "step": 19297 }, { "epoch": 0.25, "grad_norm": 3.665555000305176, "learning_rate": 1.994427877946819e-05, "loss": 1.6877, "step": 19298 }, { "epoch": 0.25, "grad_norm": 3.919869899749756, "learning_rate": 1.994426770153082e-05, "loss": 1.9867, "step": 19299 }, { "epoch": 0.25, "grad_norm": 4.312719821929932, "learning_rate": 1.994425662249544e-05, "loss": 2.0608, "step": 19300 }, { "epoch": 0.25, "grad_norm": 3.930685520172119, "learning_rate": 1.994424554236204e-05, "loss": 1.9967, "step": 19301 }, { "epoch": 0.25, "grad_norm": 4.2946367263793945, "learning_rate": 1.994423446113063e-05, "loss": 2.3488, "step": 19302 }, { "epoch": 0.25, "grad_norm": 4.909261226654053, "learning_rate": 1.9944223378801205e-05, "loss": 2.2832, "step": 19303 }, { "epoch": 0.25, "grad_norm": 3.9582302570343018, "learning_rate": 1.9944212295373773e-05, "loss": 2.2479, "step": 19304 }, { "epoch": 0.25, "grad_norm": 3.8389344215393066, "learning_rate": 1.9944201210848328e-05, "loss": 1.7874, "step": 19305 }, { "epoch": 0.25, "grad_norm": 4.153881072998047, "learning_rate": 1.9944190125224877e-05, "loss": 2.2415, "step": 19306 }, { "epoch": 0.25, "grad_norm": 4.132147789001465, "learning_rate": 1.9944179038503418e-05, "loss": 2.0386, "step": 19307 }, { "epoch": 0.25, "grad_norm": 3.618870973587036, "learning_rate": 1.994416795068395e-05, "loss": 1.5299, "step": 19308 }, { "epoch": 0.25, "grad_norm": 4.104698181152344, "learning_rate": 1.994415686176648e-05, "loss": 2.1099, "step": 19309 }, { "epoch": 0.25, "grad_norm": 4.000235557556152, "learning_rate": 1.9944145771751008e-05, "loss": 1.6442, "step": 19310 }, { "epoch": 0.25, "grad_norm": 5.474789142608643, "learning_rate": 1.9944134680637527e-05, "loss": 2.5558, "step": 19311 }, { "epoch": 0.25, "grad_norm": 3.9574368000030518, "learning_rate": 1.994412358842605e-05, "loss": 1.8935, "step": 19312 }, { "epoch": 0.25, "grad_norm": 4.082218647003174, "learning_rate": 1.9944112495116572e-05, "loss": 1.9851, "step": 19313 }, { "epoch": 0.25, "grad_norm": 3.8454127311706543, "learning_rate": 1.9944101400709096e-05, "loss": 2.0902, "step": 19314 }, { "epoch": 0.25, "grad_norm": 4.215682506561279, "learning_rate": 1.994409030520362e-05, "loss": 2.1019, "step": 19315 }, { "epoch": 0.25, "grad_norm": 4.023764610290527, "learning_rate": 1.9944079208600153e-05, "loss": 1.8815, "step": 19316 }, { "epoch": 0.25, "grad_norm": 3.742544174194336, "learning_rate": 1.9944068110898683e-05, "loss": 1.7265, "step": 19317 }, { "epoch": 0.25, "grad_norm": 4.098876953125, "learning_rate": 1.994405701209923e-05, "loss": 2.1799, "step": 19318 }, { "epoch": 0.25, "grad_norm": 3.912813186645508, "learning_rate": 1.9944045912201776e-05, "loss": 2.2037, "step": 19319 }, { "epoch": 0.25, "grad_norm": 4.517022609710693, "learning_rate": 1.994403481120633e-05, "loss": 2.7122, "step": 19320 }, { "epoch": 0.25, "grad_norm": 4.332633018493652, "learning_rate": 1.9944023709112897e-05, "loss": 1.9483, "step": 19321 }, { "epoch": 0.25, "grad_norm": 4.488947868347168, "learning_rate": 1.9944012605921475e-05, "loss": 2.5909, "step": 19322 }, { "epoch": 0.25, "grad_norm": 4.067687034606934, "learning_rate": 1.9944001501632068e-05, "loss": 2.1418, "step": 19323 }, { "epoch": 0.25, "grad_norm": 3.8701224327087402, "learning_rate": 1.994399039624467e-05, "loss": 1.8346, "step": 19324 }, { "epoch": 0.25, "grad_norm": 4.031516075134277, "learning_rate": 1.994397928975929e-05, "loss": 1.7388, "step": 19325 }, { "epoch": 0.25, "grad_norm": 3.6850790977478027, "learning_rate": 1.9943968182175923e-05, "loss": 1.9608, "step": 19326 }, { "epoch": 0.25, "grad_norm": 4.557472229003906, "learning_rate": 1.9943957073494572e-05, "loss": 2.0031, "step": 19327 }, { "epoch": 0.25, "grad_norm": 4.070809841156006, "learning_rate": 1.9943945963715243e-05, "loss": 1.9399, "step": 19328 }, { "epoch": 0.25, "grad_norm": 3.7192893028259277, "learning_rate": 1.9943934852837935e-05, "loss": 1.9706, "step": 19329 }, { "epoch": 0.25, "grad_norm": 4.534698963165283, "learning_rate": 1.9943923740862643e-05, "loss": 2.6345, "step": 19330 }, { "epoch": 0.25, "grad_norm": 4.660130023956299, "learning_rate": 1.9943912627789375e-05, "loss": 2.6922, "step": 19331 }, { "epoch": 0.25, "grad_norm": 4.910376071929932, "learning_rate": 1.9943901513618133e-05, "loss": 2.579, "step": 19332 }, { "epoch": 0.25, "grad_norm": 4.990977764129639, "learning_rate": 1.9943890398348912e-05, "loss": 2.5394, "step": 19333 }, { "epoch": 0.25, "grad_norm": 4.682915210723877, "learning_rate": 1.994387928198172e-05, "loss": 2.2397, "step": 19334 }, { "epoch": 0.25, "grad_norm": 4.007721424102783, "learning_rate": 1.9943868164516553e-05, "loss": 2.1042, "step": 19335 }, { "epoch": 0.25, "grad_norm": 4.327559471130371, "learning_rate": 1.9943857045953414e-05, "loss": 2.1728, "step": 19336 }, { "epoch": 0.25, "grad_norm": 4.053951740264893, "learning_rate": 1.9943845926292304e-05, "loss": 2.1203, "step": 19337 }, { "epoch": 0.25, "grad_norm": 3.9286346435546875, "learning_rate": 1.9943834805533225e-05, "loss": 2.0276, "step": 19338 }, { "epoch": 0.25, "grad_norm": 4.592172622680664, "learning_rate": 1.994382368367618e-05, "loss": 2.776, "step": 19339 }, { "epoch": 0.25, "grad_norm": 4.112705230712891, "learning_rate": 1.9943812560721167e-05, "loss": 2.4307, "step": 19340 }, { "epoch": 0.25, "grad_norm": 4.297118663787842, "learning_rate": 1.9943801436668188e-05, "loss": 1.9488, "step": 19341 }, { "epoch": 0.25, "grad_norm": 4.287009239196777, "learning_rate": 1.9943790311517247e-05, "loss": 2.1753, "step": 19342 }, { "epoch": 0.25, "grad_norm": 4.124431133270264, "learning_rate": 1.9943779185268338e-05, "loss": 1.9703, "step": 19343 }, { "epoch": 0.25, "grad_norm": 4.440592288970947, "learning_rate": 1.994376805792147e-05, "loss": 2.1692, "step": 19344 }, { "epoch": 0.25, "grad_norm": 4.209737777709961, "learning_rate": 1.994375692947664e-05, "loss": 2.3122, "step": 19345 }, { "epoch": 0.25, "grad_norm": 4.270514965057373, "learning_rate": 1.9943745799933853e-05, "loss": 2.2461, "step": 19346 }, { "epoch": 0.25, "grad_norm": 3.8837387561798096, "learning_rate": 1.994373466929311e-05, "loss": 1.8222, "step": 19347 }, { "epoch": 0.25, "grad_norm": 4.428551197052002, "learning_rate": 1.9943723537554406e-05, "loss": 2.2737, "step": 19348 }, { "epoch": 0.25, "grad_norm": 3.857952117919922, "learning_rate": 1.9943712404717743e-05, "loss": 1.8321, "step": 19349 }, { "epoch": 0.25, "grad_norm": 4.124759674072266, "learning_rate": 1.994370127078313e-05, "loss": 1.973, "step": 19350 }, { "epoch": 0.25, "grad_norm": 4.833749771118164, "learning_rate": 1.994369013575056e-05, "loss": 2.5803, "step": 19351 }, { "epoch": 0.25, "grad_norm": 4.812317371368408, "learning_rate": 1.9943678999620044e-05, "loss": 1.8557, "step": 19352 }, { "epoch": 0.25, "grad_norm": 3.3819704055786133, "learning_rate": 1.9943667862391572e-05, "loss": 1.716, "step": 19353 }, { "epoch": 0.25, "grad_norm": 3.705414056777954, "learning_rate": 1.9943656724065156e-05, "loss": 1.7224, "step": 19354 }, { "epoch": 0.25, "grad_norm": 4.4342265129089355, "learning_rate": 1.9943645584640788e-05, "loss": 2.1568, "step": 19355 }, { "epoch": 0.25, "grad_norm": 4.276762008666992, "learning_rate": 1.994363444411847e-05, "loss": 2.2583, "step": 19356 }, { "epoch": 0.25, "grad_norm": 3.933530330657959, "learning_rate": 1.994362330249821e-05, "loss": 1.8728, "step": 19357 }, { "epoch": 0.25, "grad_norm": 4.336792469024658, "learning_rate": 1.9943612159780005e-05, "loss": 2.2646, "step": 19358 }, { "epoch": 0.25, "grad_norm": 4.9130682945251465, "learning_rate": 1.9943601015963853e-05, "loss": 2.7175, "step": 19359 }, { "epoch": 0.25, "grad_norm": 4.060894966125488, "learning_rate": 1.9943589871049763e-05, "loss": 2.1592, "step": 19360 }, { "epoch": 0.25, "grad_norm": 3.9065141677856445, "learning_rate": 1.994357872503773e-05, "loss": 1.7739, "step": 19361 }, { "epoch": 0.25, "grad_norm": 4.559895992279053, "learning_rate": 1.9943567577927757e-05, "loss": 2.0624, "step": 19362 }, { "epoch": 0.25, "grad_norm": 3.8699512481689453, "learning_rate": 1.9943556429719844e-05, "loss": 2.0092, "step": 19363 }, { "epoch": 0.25, "grad_norm": 4.087926864624023, "learning_rate": 1.9943545280413997e-05, "loss": 1.9461, "step": 19364 }, { "epoch": 0.25, "grad_norm": 4.178502559661865, "learning_rate": 1.9943534130010212e-05, "loss": 1.9489, "step": 19365 }, { "epoch": 0.25, "grad_norm": 4.397349834442139, "learning_rate": 1.9943522978508493e-05, "loss": 2.1016, "step": 19366 }, { "epoch": 0.25, "grad_norm": 4.4968671798706055, "learning_rate": 1.994351182590884e-05, "loss": 2.1763, "step": 19367 }, { "epoch": 0.25, "grad_norm": 4.176757335662842, "learning_rate": 1.9943500672211256e-05, "loss": 2.3233, "step": 19368 }, { "epoch": 0.25, "grad_norm": 4.275697231292725, "learning_rate": 1.9943489517415737e-05, "loss": 2.1234, "step": 19369 }, { "epoch": 0.25, "grad_norm": 4.161069869995117, "learning_rate": 1.994347836152229e-05, "loss": 2.0081, "step": 19370 }, { "epoch": 0.25, "grad_norm": 3.6738696098327637, "learning_rate": 1.9943467204530917e-05, "loss": 1.9468, "step": 19371 }, { "epoch": 0.25, "grad_norm": 4.43070650100708, "learning_rate": 1.9943456046441613e-05, "loss": 2.5123, "step": 19372 }, { "epoch": 0.25, "grad_norm": 4.501965045928955, "learning_rate": 1.9943444887254385e-05, "loss": 1.9537, "step": 19373 }, { "epoch": 0.25, "grad_norm": 4.617671489715576, "learning_rate": 1.994343372696923e-05, "loss": 2.2334, "step": 19374 }, { "epoch": 0.25, "grad_norm": 4.6108269691467285, "learning_rate": 1.9943422565586155e-05, "loss": 2.361, "step": 19375 }, { "epoch": 0.25, "grad_norm": 4.061872482299805, "learning_rate": 1.9943411403105154e-05, "loss": 1.8642, "step": 19376 }, { "epoch": 0.25, "grad_norm": 3.5717453956604004, "learning_rate": 1.9943400239526233e-05, "loss": 1.82, "step": 19377 }, { "epoch": 0.25, "grad_norm": 4.5500102043151855, "learning_rate": 1.994338907484939e-05, "loss": 2.3902, "step": 19378 }, { "epoch": 0.25, "grad_norm": 4.227559566497803, "learning_rate": 1.994337790907463e-05, "loss": 2.7483, "step": 19379 }, { "epoch": 0.25, "grad_norm": 3.4186131954193115, "learning_rate": 1.994336674220195e-05, "loss": 1.7582, "step": 19380 }, { "epoch": 0.25, "grad_norm": 4.432355880737305, "learning_rate": 1.9943355574231356e-05, "loss": 1.9348, "step": 19381 }, { "epoch": 0.25, "grad_norm": 4.232828617095947, "learning_rate": 1.9943344405162848e-05, "loss": 2.4713, "step": 19382 }, { "epoch": 0.25, "grad_norm": 4.2759270668029785, "learning_rate": 1.9943333234996426e-05, "loss": 2.6092, "step": 19383 }, { "epoch": 0.25, "grad_norm": 4.535914897918701, "learning_rate": 1.994332206373209e-05, "loss": 2.6612, "step": 19384 }, { "epoch": 0.25, "grad_norm": 3.947753429412842, "learning_rate": 1.994331089136984e-05, "loss": 1.9706, "step": 19385 }, { "epoch": 0.25, "grad_norm": 4.277866840362549, "learning_rate": 1.9943299717909683e-05, "loss": 2.2263, "step": 19386 }, { "epoch": 0.25, "grad_norm": 4.164119243621826, "learning_rate": 1.9943288543351615e-05, "loss": 2.538, "step": 19387 }, { "epoch": 0.25, "grad_norm": 4.5409722328186035, "learning_rate": 1.994327736769564e-05, "loss": 2.415, "step": 19388 }, { "epoch": 0.25, "grad_norm": 4.118879318237305, "learning_rate": 1.994326619094176e-05, "loss": 1.8573, "step": 19389 }, { "epoch": 0.25, "grad_norm": 4.212170124053955, "learning_rate": 1.9943255013089976e-05, "loss": 2.4161, "step": 19390 }, { "epoch": 0.25, "grad_norm": 3.7279157638549805, "learning_rate": 1.9943243834140283e-05, "loss": 1.951, "step": 19391 }, { "epoch": 0.25, "grad_norm": 3.7707929611206055, "learning_rate": 1.994323265409269e-05, "loss": 1.9316, "step": 19392 }, { "epoch": 0.25, "grad_norm": 4.604652404785156, "learning_rate": 1.9943221472947195e-05, "loss": 2.8763, "step": 19393 }, { "epoch": 0.25, "grad_norm": 3.8583266735076904, "learning_rate": 1.9943210290703798e-05, "loss": 2.0517, "step": 19394 }, { "epoch": 0.25, "grad_norm": 3.991006851196289, "learning_rate": 1.9943199107362507e-05, "loss": 2.0633, "step": 19395 }, { "epoch": 0.25, "grad_norm": 4.020025253295898, "learning_rate": 1.994318792292331e-05, "loss": 2.2711, "step": 19396 }, { "epoch": 0.25, "grad_norm": 4.208208084106445, "learning_rate": 1.9943176737386226e-05, "loss": 2.5045, "step": 19397 }, { "epoch": 0.25, "grad_norm": 4.3163347244262695, "learning_rate": 1.994316555075124e-05, "loss": 2.0944, "step": 19398 }, { "epoch": 0.25, "grad_norm": 4.081602573394775, "learning_rate": 1.994315436301836e-05, "loss": 2.0287, "step": 19399 }, { "epoch": 0.25, "grad_norm": 4.231806755065918, "learning_rate": 1.9943143174187592e-05, "loss": 2.0198, "step": 19400 }, { "epoch": 0.25, "grad_norm": 3.5892221927642822, "learning_rate": 1.994313198425893e-05, "loss": 1.9572, "step": 19401 }, { "epoch": 0.25, "grad_norm": 3.360366106033325, "learning_rate": 1.9943120793232373e-05, "loss": 1.5195, "step": 19402 }, { "epoch": 0.25, "grad_norm": 3.959885835647583, "learning_rate": 1.9943109601107932e-05, "loss": 2.0649, "step": 19403 }, { "epoch": 0.25, "grad_norm": 4.36702823638916, "learning_rate": 1.9943098407885602e-05, "loss": 2.1465, "step": 19404 }, { "epoch": 0.25, "grad_norm": 3.666804790496826, "learning_rate": 1.9943087213565385e-05, "loss": 2.018, "step": 19405 }, { "epoch": 0.25, "grad_norm": 3.575101613998413, "learning_rate": 1.994307601814728e-05, "loss": 2.0765, "step": 19406 }, { "epoch": 0.25, "grad_norm": 4.235132217407227, "learning_rate": 1.9943064821631294e-05, "loss": 2.1428, "step": 19407 }, { "epoch": 0.25, "grad_norm": 4.140958309173584, "learning_rate": 1.9943053624017423e-05, "loss": 2.174, "step": 19408 }, { "epoch": 0.25, "grad_norm": 5.1717658042907715, "learning_rate": 1.9943042425305668e-05, "loss": 3.2371, "step": 19409 }, { "epoch": 0.25, "grad_norm": 4.2742018699646, "learning_rate": 1.9943031225496037e-05, "loss": 1.8842, "step": 19410 }, { "epoch": 0.25, "grad_norm": 4.143057823181152, "learning_rate": 1.9943020024588526e-05, "loss": 1.9107, "step": 19411 }, { "epoch": 0.25, "grad_norm": 4.320228576660156, "learning_rate": 1.9943008822583135e-05, "loss": 2.3437, "step": 19412 }, { "epoch": 0.25, "grad_norm": 4.369653224945068, "learning_rate": 1.994299761947987e-05, "loss": 2.1406, "step": 19413 }, { "epoch": 0.25, "grad_norm": 4.055571556091309, "learning_rate": 1.9942986415278726e-05, "loss": 2.1367, "step": 19414 }, { "epoch": 0.25, "grad_norm": 4.097960948944092, "learning_rate": 1.9942975209979712e-05, "loss": 1.9209, "step": 19415 }, { "epoch": 0.25, "grad_norm": 4.366275787353516, "learning_rate": 1.994296400358282e-05, "loss": 2.2716, "step": 19416 }, { "epoch": 0.25, "grad_norm": 3.9243741035461426, "learning_rate": 1.9942952796088056e-05, "loss": 2.0935, "step": 19417 }, { "epoch": 0.25, "grad_norm": 4.950509071350098, "learning_rate": 1.9942941587495426e-05, "loss": 2.6905, "step": 19418 }, { "epoch": 0.25, "grad_norm": 3.59731125831604, "learning_rate": 1.9942930377804925e-05, "loss": 1.9681, "step": 19419 }, { "epoch": 0.25, "grad_norm": 4.0792646408081055, "learning_rate": 1.994291916701655e-05, "loss": 2.3323, "step": 19420 }, { "epoch": 0.25, "grad_norm": 4.083702087402344, "learning_rate": 1.9942907955130314e-05, "loss": 1.7636, "step": 19421 }, { "epoch": 0.25, "grad_norm": 4.055682182312012, "learning_rate": 1.9942896742146213e-05, "loss": 1.9929, "step": 19422 }, { "epoch": 0.25, "grad_norm": 4.305746555328369, "learning_rate": 1.9942885528064243e-05, "loss": 2.4788, "step": 19423 }, { "epoch": 0.25, "grad_norm": 4.060368061065674, "learning_rate": 1.9942874312884413e-05, "loss": 2.2195, "step": 19424 }, { "epoch": 0.25, "grad_norm": 3.7634692192077637, "learning_rate": 1.994286309660672e-05, "loss": 1.9168, "step": 19425 }, { "epoch": 0.25, "grad_norm": 3.9560134410858154, "learning_rate": 1.9942851879231168e-05, "loss": 2.6569, "step": 19426 }, { "epoch": 0.25, "grad_norm": 3.920356035232544, "learning_rate": 1.9942840660757755e-05, "loss": 2.0, "step": 19427 }, { "epoch": 0.25, "grad_norm": 4.2427287101745605, "learning_rate": 1.9942829441186483e-05, "loss": 1.7068, "step": 19428 }, { "epoch": 0.25, "grad_norm": 4.792674541473389, "learning_rate": 1.9942818220517358e-05, "loss": 2.2069, "step": 19429 }, { "epoch": 0.25, "grad_norm": 4.249953269958496, "learning_rate": 1.9942806998750373e-05, "loss": 2.5299, "step": 19430 }, { "epoch": 0.25, "grad_norm": 4.4757585525512695, "learning_rate": 1.9942795775885532e-05, "loss": 2.9434, "step": 19431 }, { "epoch": 0.25, "grad_norm": 4.052870273590088, "learning_rate": 1.994278455192284e-05, "loss": 2.1465, "step": 19432 }, { "epoch": 0.25, "grad_norm": 4.070826053619385, "learning_rate": 1.9942773326862297e-05, "loss": 1.974, "step": 19433 }, { "epoch": 0.25, "grad_norm": 4.305478096008301, "learning_rate": 1.9942762100703904e-05, "loss": 2.4089, "step": 19434 }, { "epoch": 0.25, "grad_norm": 4.746901035308838, "learning_rate": 1.9942750873447658e-05, "loss": 2.5544, "step": 19435 }, { "epoch": 0.25, "grad_norm": 4.749856472015381, "learning_rate": 1.9942739645093562e-05, "loss": 2.8878, "step": 19436 }, { "epoch": 0.25, "grad_norm": 4.535559177398682, "learning_rate": 1.9942728415641627e-05, "loss": 2.7765, "step": 19437 }, { "epoch": 0.25, "grad_norm": 3.8893532752990723, "learning_rate": 1.994271718509184e-05, "loss": 2.4246, "step": 19438 }, { "epoch": 0.25, "grad_norm": 4.779813289642334, "learning_rate": 1.9942705953444207e-05, "loss": 2.6004, "step": 19439 }, { "epoch": 0.25, "grad_norm": 3.53977108001709, "learning_rate": 1.9942694720698734e-05, "loss": 1.9059, "step": 19440 }, { "epoch": 0.25, "grad_norm": 4.210805416107178, "learning_rate": 1.9942683486855417e-05, "loss": 2.0665, "step": 19441 }, { "epoch": 0.25, "grad_norm": 4.281462669372559, "learning_rate": 1.9942672251914258e-05, "loss": 2.2587, "step": 19442 }, { "epoch": 0.25, "grad_norm": 3.5328962802886963, "learning_rate": 1.9942661015875262e-05, "loss": 1.8875, "step": 19443 }, { "epoch": 0.25, "grad_norm": 3.8956682682037354, "learning_rate": 1.9942649778738427e-05, "loss": 1.9206, "step": 19444 }, { "epoch": 0.25, "grad_norm": 4.01564359664917, "learning_rate": 1.9942638540503757e-05, "loss": 1.8999, "step": 19445 }, { "epoch": 0.25, "grad_norm": 3.6090564727783203, "learning_rate": 1.9942627301171246e-05, "loss": 1.7735, "step": 19446 }, { "epoch": 0.25, "grad_norm": 4.080475807189941, "learning_rate": 1.9942616060740904e-05, "loss": 2.1843, "step": 19447 }, { "epoch": 0.25, "grad_norm": 3.7219622135162354, "learning_rate": 1.9942604819212725e-05, "loss": 1.9893, "step": 19448 }, { "epoch": 0.25, "grad_norm": 4.0780110359191895, "learning_rate": 1.9942593576586717e-05, "loss": 2.639, "step": 19449 }, { "epoch": 0.25, "grad_norm": 3.609842538833618, "learning_rate": 1.9942582332862876e-05, "loss": 2.1039, "step": 19450 }, { "epoch": 0.25, "grad_norm": 3.5169975757598877, "learning_rate": 1.9942571088041206e-05, "loss": 1.6449, "step": 19451 }, { "epoch": 0.25, "grad_norm": 4.937888145446777, "learning_rate": 1.994255984212171e-05, "loss": 2.6808, "step": 19452 }, { "epoch": 0.25, "grad_norm": 3.6386659145355225, "learning_rate": 1.9942548595104382e-05, "loss": 1.8034, "step": 19453 }, { "epoch": 0.25, "grad_norm": 4.371743679046631, "learning_rate": 1.994253734698923e-05, "loss": 2.394, "step": 19454 }, { "epoch": 0.25, "grad_norm": 3.718075752258301, "learning_rate": 1.9942526097776254e-05, "loss": 1.5865, "step": 19455 }, { "epoch": 0.25, "grad_norm": 4.251014232635498, "learning_rate": 1.9942514847465455e-05, "loss": 2.2332, "step": 19456 }, { "epoch": 0.25, "grad_norm": 3.7744033336639404, "learning_rate": 1.9942503596056833e-05, "loss": 1.8787, "step": 19457 }, { "epoch": 0.25, "grad_norm": 3.6384968757629395, "learning_rate": 1.994249234355039e-05, "loss": 1.854, "step": 19458 }, { "epoch": 0.25, "grad_norm": 4.135141849517822, "learning_rate": 1.9942481089946126e-05, "loss": 1.9708, "step": 19459 }, { "epoch": 0.25, "grad_norm": 4.261479377746582, "learning_rate": 1.9942469835244044e-05, "loss": 2.223, "step": 19460 }, { "epoch": 0.25, "grad_norm": 4.514817714691162, "learning_rate": 1.9942458579444146e-05, "loss": 2.5662, "step": 19461 }, { "epoch": 0.25, "grad_norm": 4.336280345916748, "learning_rate": 1.994244732254643e-05, "loss": 2.4534, "step": 19462 }, { "epoch": 0.25, "grad_norm": 4.021470069885254, "learning_rate": 1.99424360645509e-05, "loss": 2.3316, "step": 19463 }, { "epoch": 0.25, "grad_norm": 3.948401927947998, "learning_rate": 1.9942424805457558e-05, "loss": 1.827, "step": 19464 }, { "epoch": 0.25, "grad_norm": 4.188469409942627, "learning_rate": 1.99424135452664e-05, "loss": 2.3157, "step": 19465 }, { "epoch": 0.25, "grad_norm": 4.244035243988037, "learning_rate": 1.994240228397743e-05, "loss": 1.9271, "step": 19466 }, { "epoch": 0.25, "grad_norm": 3.96675968170166, "learning_rate": 1.9942391021590656e-05, "loss": 2.2028, "step": 19467 }, { "epoch": 0.25, "grad_norm": 3.6969780921936035, "learning_rate": 1.994237975810607e-05, "loss": 1.9899, "step": 19468 }, { "epoch": 0.25, "grad_norm": 4.757069110870361, "learning_rate": 1.9942368493523675e-05, "loss": 2.0125, "step": 19469 }, { "epoch": 0.25, "grad_norm": 4.198019027709961, "learning_rate": 1.9942357227843477e-05, "loss": 1.8499, "step": 19470 }, { "epoch": 0.25, "grad_norm": 4.1564130783081055, "learning_rate": 1.9942345961065473e-05, "loss": 2.354, "step": 19471 }, { "epoch": 0.25, "grad_norm": 4.412965297698975, "learning_rate": 1.9942334693189663e-05, "loss": 2.3463, "step": 19472 }, { "epoch": 0.25, "grad_norm": 4.525328636169434, "learning_rate": 1.9942323424216052e-05, "loss": 2.2741, "step": 19473 }, { "epoch": 0.25, "grad_norm": 4.127838611602783, "learning_rate": 1.994231215414464e-05, "loss": 2.0028, "step": 19474 }, { "epoch": 0.25, "grad_norm": 4.765364646911621, "learning_rate": 1.9942300882975432e-05, "loss": 2.4361, "step": 19475 }, { "epoch": 0.25, "grad_norm": 3.7671549320220947, "learning_rate": 1.994228961070842e-05, "loss": 2.0833, "step": 19476 }, { "epoch": 0.25, "grad_norm": 4.330550670623779, "learning_rate": 1.994227833734361e-05, "loss": 2.2667, "step": 19477 }, { "epoch": 0.25, "grad_norm": 3.732872486114502, "learning_rate": 1.994226706288101e-05, "loss": 2.2381, "step": 19478 }, { "epoch": 0.25, "grad_norm": 3.8527884483337402, "learning_rate": 1.994225578732061e-05, "loss": 2.1083, "step": 19479 }, { "epoch": 0.25, "grad_norm": 4.068925380706787, "learning_rate": 1.994224451066242e-05, "loss": 2.2233, "step": 19480 }, { "epoch": 0.25, "grad_norm": 4.197841644287109, "learning_rate": 1.994223323290643e-05, "loss": 1.7387, "step": 19481 }, { "epoch": 0.25, "grad_norm": 3.6131439208984375, "learning_rate": 1.9942221954052658e-05, "loss": 1.7454, "step": 19482 }, { "epoch": 0.25, "grad_norm": 3.921595573425293, "learning_rate": 1.994221067410109e-05, "loss": 1.9935, "step": 19483 }, { "epoch": 0.25, "grad_norm": 4.391623020172119, "learning_rate": 1.9942199393051735e-05, "loss": 2.2398, "step": 19484 }, { "epoch": 0.25, "grad_norm": 3.7919464111328125, "learning_rate": 1.9942188110904596e-05, "loss": 2.1012, "step": 19485 }, { "epoch": 0.25, "grad_norm": 4.146433353424072, "learning_rate": 1.9942176827659665e-05, "loss": 1.6725, "step": 19486 }, { "epoch": 0.25, "grad_norm": 4.833984375, "learning_rate": 1.9942165543316952e-05, "loss": 2.9787, "step": 19487 }, { "epoch": 0.25, "grad_norm": 4.569305896759033, "learning_rate": 1.9942154257876454e-05, "loss": 2.5406, "step": 19488 }, { "epoch": 0.25, "grad_norm": 3.5709471702575684, "learning_rate": 1.994214297133817e-05, "loss": 1.6501, "step": 19489 }, { "epoch": 0.25, "grad_norm": 3.940479278564453, "learning_rate": 1.994213168370211e-05, "loss": 2.2979, "step": 19490 }, { "epoch": 0.25, "grad_norm": 4.050595760345459, "learning_rate": 1.994212039496827e-05, "loss": 1.728, "step": 19491 }, { "epoch": 0.25, "grad_norm": 3.9911088943481445, "learning_rate": 1.994210910513665e-05, "loss": 1.8855, "step": 19492 }, { "epoch": 0.25, "grad_norm": 3.8427069187164307, "learning_rate": 1.9942097814207255e-05, "loss": 2.2295, "step": 19493 }, { "epoch": 0.25, "grad_norm": 3.7925610542297363, "learning_rate": 1.994208652218008e-05, "loss": 1.7174, "step": 19494 }, { "epoch": 0.25, "grad_norm": 3.9059488773345947, "learning_rate": 1.9942075229055132e-05, "loss": 2.3331, "step": 19495 }, { "epoch": 0.25, "grad_norm": 3.916455030441284, "learning_rate": 1.994206393483241e-05, "loss": 2.1566, "step": 19496 }, { "epoch": 0.25, "grad_norm": 4.452004909515381, "learning_rate": 1.9942052639511914e-05, "loss": 2.5688, "step": 19497 }, { "epoch": 0.25, "grad_norm": 3.4505434036254883, "learning_rate": 1.994204134309365e-05, "loss": 1.4333, "step": 19498 }, { "epoch": 0.25, "grad_norm": 3.9147653579711914, "learning_rate": 1.994203004557761e-05, "loss": 1.8355, "step": 19499 }, { "epoch": 0.25, "grad_norm": 3.9251961708068848, "learning_rate": 1.9942018746963806e-05, "loss": 2.0913, "step": 19500 }, { "epoch": 0.25, "grad_norm": 4.331692218780518, "learning_rate": 1.9942007447252233e-05, "loss": 2.1345, "step": 19501 }, { "epoch": 0.25, "grad_norm": 4.822974681854248, "learning_rate": 1.9941996146442895e-05, "loss": 2.3572, "step": 19502 }, { "epoch": 0.25, "grad_norm": 4.36116361618042, "learning_rate": 1.9941984844535792e-05, "loss": 2.5609, "step": 19503 }, { "epoch": 0.25, "grad_norm": 4.136412143707275, "learning_rate": 1.994197354153092e-05, "loss": 2.0251, "step": 19504 }, { "epoch": 0.25, "grad_norm": 3.6667304039001465, "learning_rate": 1.9941962237428295e-05, "loss": 2.1966, "step": 19505 }, { "epoch": 0.25, "grad_norm": 4.833141326904297, "learning_rate": 1.99419509322279e-05, "loss": 2.2743, "step": 19506 }, { "epoch": 0.25, "grad_norm": 4.828363418579102, "learning_rate": 1.9941939625929748e-05, "loss": 2.7622, "step": 19507 }, { "epoch": 0.25, "grad_norm": 4.531763076782227, "learning_rate": 1.9941928318533837e-05, "loss": 2.3205, "step": 19508 }, { "epoch": 0.25, "grad_norm": 3.660878896713257, "learning_rate": 1.994191701004017e-05, "loss": 1.7947, "step": 19509 }, { "epoch": 0.25, "grad_norm": 4.183554172515869, "learning_rate": 1.9941905700448746e-05, "loss": 2.3372, "step": 19510 }, { "epoch": 0.25, "grad_norm": 4.072670936584473, "learning_rate": 1.9941894389759568e-05, "loss": 2.2972, "step": 19511 }, { "epoch": 0.25, "grad_norm": 5.042463779449463, "learning_rate": 1.9941883077972632e-05, "loss": 2.0834, "step": 19512 }, { "epoch": 0.25, "grad_norm": 3.944780111312866, "learning_rate": 1.9941871765087948e-05, "loss": 2.0718, "step": 19513 }, { "epoch": 0.25, "grad_norm": 4.116537094116211, "learning_rate": 1.994186045110551e-05, "loss": 2.1643, "step": 19514 }, { "epoch": 0.25, "grad_norm": 3.8888938426971436, "learning_rate": 1.9941849136025323e-05, "loss": 2.2588, "step": 19515 }, { "epoch": 0.25, "grad_norm": 3.897758722305298, "learning_rate": 1.9941837819847388e-05, "loss": 1.8048, "step": 19516 }, { "epoch": 0.25, "grad_norm": 4.182849884033203, "learning_rate": 1.9941826502571702e-05, "loss": 2.0195, "step": 19517 }, { "epoch": 0.25, "grad_norm": 3.8380963802337646, "learning_rate": 1.9941815184198275e-05, "loss": 1.7766, "step": 19518 }, { "epoch": 0.25, "grad_norm": 4.165027141571045, "learning_rate": 1.99418038647271e-05, "loss": 1.926, "step": 19519 }, { "epoch": 0.25, "grad_norm": 4.418661594390869, "learning_rate": 1.994179254415818e-05, "loss": 2.0406, "step": 19520 }, { "epoch": 0.25, "grad_norm": 4.274230480194092, "learning_rate": 1.994178122249152e-05, "loss": 2.1648, "step": 19521 }, { "epoch": 0.25, "grad_norm": 4.401187419891357, "learning_rate": 1.9941769899727116e-05, "loss": 2.3914, "step": 19522 }, { "epoch": 0.25, "grad_norm": 4.379659175872803, "learning_rate": 1.9941758575864973e-05, "loss": 2.5386, "step": 19523 }, { "epoch": 0.25, "grad_norm": 3.9522478580474854, "learning_rate": 1.9941747250905092e-05, "loss": 1.6164, "step": 19524 }, { "epoch": 0.25, "grad_norm": 4.089181423187256, "learning_rate": 1.9941735924847474e-05, "loss": 1.7447, "step": 19525 }, { "epoch": 0.25, "grad_norm": 4.4752197265625, "learning_rate": 1.9941724597692118e-05, "loss": 2.1781, "step": 19526 }, { "epoch": 0.25, "grad_norm": 4.88496208190918, "learning_rate": 1.994171326943903e-05, "loss": 2.1612, "step": 19527 }, { "epoch": 0.25, "grad_norm": 4.673039436340332, "learning_rate": 1.9941701940088202e-05, "loss": 2.6254, "step": 19528 }, { "epoch": 0.25, "grad_norm": 4.340988636016846, "learning_rate": 1.9941690609639646e-05, "loss": 2.2047, "step": 19529 }, { "epoch": 0.25, "grad_norm": 5.1533098220825195, "learning_rate": 1.9941679278093356e-05, "loss": 2.4286, "step": 19530 }, { "epoch": 0.25, "grad_norm": 3.7502946853637695, "learning_rate": 1.9941667945449342e-05, "loss": 1.739, "step": 19531 }, { "epoch": 0.25, "grad_norm": 4.277972221374512, "learning_rate": 1.9941656611707593e-05, "loss": 1.9284, "step": 19532 }, { "epoch": 0.25, "grad_norm": 4.4033308029174805, "learning_rate": 1.9941645276868117e-05, "loss": 2.0201, "step": 19533 }, { "epoch": 0.25, "grad_norm": 4.196259498596191, "learning_rate": 1.994163394093092e-05, "loss": 2.3826, "step": 19534 }, { "epoch": 0.25, "grad_norm": 3.3235814571380615, "learning_rate": 1.994162260389599e-05, "loss": 1.5802, "step": 19535 }, { "epoch": 0.25, "grad_norm": 5.2469401359558105, "learning_rate": 1.9941611265763342e-05, "loss": 2.685, "step": 19536 }, { "epoch": 0.25, "grad_norm": 3.771474838256836, "learning_rate": 1.9941599926532972e-05, "loss": 2.0237, "step": 19537 }, { "epoch": 0.25, "grad_norm": 3.6423094272613525, "learning_rate": 1.994158858620488e-05, "loss": 2.0092, "step": 19538 }, { "epoch": 0.25, "grad_norm": 3.60616135597229, "learning_rate": 1.9941577244779063e-05, "loss": 1.8078, "step": 19539 }, { "epoch": 0.25, "grad_norm": 4.891423225402832, "learning_rate": 1.9941565902255535e-05, "loss": 2.3437, "step": 19540 }, { "epoch": 0.25, "grad_norm": 4.506597518920898, "learning_rate": 1.994155455863428e-05, "loss": 2.0405, "step": 19541 }, { "epoch": 0.25, "grad_norm": 4.190593242645264, "learning_rate": 1.9941543213915318e-05, "loss": 2.4445, "step": 19542 }, { "epoch": 0.25, "grad_norm": 4.456590175628662, "learning_rate": 1.9941531868098637e-05, "loss": 2.0524, "step": 19543 }, { "epoch": 0.25, "grad_norm": 4.211596488952637, "learning_rate": 1.994152052118424e-05, "loss": 2.1777, "step": 19544 }, { "epoch": 0.25, "grad_norm": 3.3874075412750244, "learning_rate": 1.9941509173172132e-05, "loss": 1.8269, "step": 19545 }, { "epoch": 0.25, "grad_norm": 3.748880386352539, "learning_rate": 1.9941497824062312e-05, "loss": 1.9131, "step": 19546 }, { "epoch": 0.25, "grad_norm": 4.720961570739746, "learning_rate": 1.9941486473854785e-05, "loss": 2.5842, "step": 19547 }, { "epoch": 0.25, "grad_norm": 3.9141016006469727, "learning_rate": 1.9941475122549547e-05, "loss": 2.068, "step": 19548 }, { "epoch": 0.25, "grad_norm": 4.2287678718566895, "learning_rate": 1.9941463770146602e-05, "loss": 2.3653, "step": 19549 }, { "epoch": 0.25, "grad_norm": 4.2766804695129395, "learning_rate": 1.9941452416645953e-05, "loss": 2.185, "step": 19550 }, { "epoch": 0.25, "grad_norm": 3.88539981842041, "learning_rate": 1.9941441062047597e-05, "loss": 1.9131, "step": 19551 }, { "epoch": 0.25, "grad_norm": 4.858459949493408, "learning_rate": 1.9941429706351533e-05, "loss": 2.2919, "step": 19552 }, { "epoch": 0.25, "grad_norm": 4.098732948303223, "learning_rate": 1.9941418349557773e-05, "loss": 1.8662, "step": 19553 }, { "epoch": 0.25, "grad_norm": 4.106724739074707, "learning_rate": 1.9941406991666308e-05, "loss": 2.1538, "step": 19554 }, { "epoch": 0.25, "grad_norm": 4.167690277099609, "learning_rate": 1.9941395632677147e-05, "loss": 2.2431, "step": 19555 }, { "epoch": 0.25, "grad_norm": 4.190988540649414, "learning_rate": 1.9941384272590284e-05, "loss": 2.0188, "step": 19556 }, { "epoch": 0.25, "grad_norm": 4.0494585037231445, "learning_rate": 1.9941372911405722e-05, "loss": 1.9769, "step": 19557 }, { "epoch": 0.25, "grad_norm": 3.739110231399536, "learning_rate": 1.9941361549123466e-05, "loss": 1.7822, "step": 19558 }, { "epoch": 0.25, "grad_norm": 4.158454418182373, "learning_rate": 1.9941350185743516e-05, "loss": 2.5006, "step": 19559 }, { "epoch": 0.25, "grad_norm": 4.33592414855957, "learning_rate": 1.994133882126587e-05, "loss": 2.2212, "step": 19560 }, { "epoch": 0.25, "grad_norm": 4.36098575592041, "learning_rate": 1.9941327455690534e-05, "loss": 2.2125, "step": 19561 }, { "epoch": 0.25, "grad_norm": 4.2069807052612305, "learning_rate": 1.9941316089017503e-05, "loss": 2.5704, "step": 19562 }, { "epoch": 0.25, "grad_norm": 4.274834632873535, "learning_rate": 1.9941304721246785e-05, "loss": 2.2378, "step": 19563 }, { "epoch": 0.25, "grad_norm": 3.988734245300293, "learning_rate": 1.9941293352378377e-05, "loss": 2.0494, "step": 19564 }, { "epoch": 0.25, "grad_norm": 3.869981288909912, "learning_rate": 1.9941281982412285e-05, "loss": 1.7465, "step": 19565 }, { "epoch": 0.25, "grad_norm": 3.7338223457336426, "learning_rate": 1.9941270611348506e-05, "loss": 2.0644, "step": 19566 }, { "epoch": 0.25, "grad_norm": 3.745131254196167, "learning_rate": 1.994125923918704e-05, "loss": 2.02, "step": 19567 }, { "epoch": 0.25, "grad_norm": 3.769228458404541, "learning_rate": 1.9941247865927887e-05, "loss": 1.8187, "step": 19568 }, { "epoch": 0.25, "grad_norm": 3.843301296234131, "learning_rate": 1.9941236491571058e-05, "loss": 1.6827, "step": 19569 }, { "epoch": 0.25, "grad_norm": 4.051496505737305, "learning_rate": 1.994122511611654e-05, "loss": 2.1106, "step": 19570 }, { "epoch": 0.25, "grad_norm": 4.081105709075928, "learning_rate": 1.994121373956435e-05, "loss": 2.5256, "step": 19571 }, { "epoch": 0.25, "grad_norm": 4.4459357261657715, "learning_rate": 1.9941202361914478e-05, "loss": 2.5026, "step": 19572 }, { "epoch": 0.25, "grad_norm": 4.632483959197998, "learning_rate": 1.9941190983166928e-05, "loss": 2.4426, "step": 19573 }, { "epoch": 0.25, "grad_norm": 4.896371364593506, "learning_rate": 1.9941179603321704e-05, "loss": 2.2496, "step": 19574 }, { "epoch": 0.25, "grad_norm": 4.415667533874512, "learning_rate": 1.9941168222378804e-05, "loss": 2.432, "step": 19575 }, { "epoch": 0.25, "grad_norm": 4.403800964355469, "learning_rate": 1.994115684033823e-05, "loss": 1.8977, "step": 19576 }, { "epoch": 0.25, "grad_norm": 4.452061653137207, "learning_rate": 1.9941145457199983e-05, "loss": 2.0396, "step": 19577 }, { "epoch": 0.25, "grad_norm": 4.166552543640137, "learning_rate": 1.9941134072964066e-05, "loss": 1.8497, "step": 19578 }, { "epoch": 0.25, "grad_norm": 4.047451019287109, "learning_rate": 1.994112268763048e-05, "loss": 2.2523, "step": 19579 }, { "epoch": 0.25, "grad_norm": 4.033846855163574, "learning_rate": 1.9941111301199222e-05, "loss": 2.3529, "step": 19580 }, { "epoch": 0.25, "grad_norm": 4.394082069396973, "learning_rate": 1.99410999136703e-05, "loss": 2.2765, "step": 19581 }, { "epoch": 0.25, "grad_norm": 4.971466541290283, "learning_rate": 1.994108852504371e-05, "loss": 2.6096, "step": 19582 }, { "epoch": 0.25, "grad_norm": 4.346205234527588, "learning_rate": 1.9941077135319458e-05, "loss": 2.603, "step": 19583 }, { "epoch": 0.25, "grad_norm": 4.154763221740723, "learning_rate": 1.994106574449754e-05, "loss": 2.176, "step": 19584 }, { "epoch": 0.25, "grad_norm": 4.217316627502441, "learning_rate": 1.994105435257796e-05, "loss": 2.1374, "step": 19585 }, { "epoch": 0.25, "grad_norm": 4.089473247528076, "learning_rate": 1.9941042959560718e-05, "loss": 1.8414, "step": 19586 }, { "epoch": 0.25, "grad_norm": 4.422519683837891, "learning_rate": 1.9941031565445818e-05, "loss": 2.2397, "step": 19587 }, { "epoch": 0.25, "grad_norm": 4.159780502319336, "learning_rate": 1.9941020170233262e-05, "loss": 2.2534, "step": 19588 }, { "epoch": 0.25, "grad_norm": 3.8632380962371826, "learning_rate": 1.9941008773923046e-05, "loss": 2.0609, "step": 19589 }, { "epoch": 0.25, "grad_norm": 3.635681390762329, "learning_rate": 1.994099737651517e-05, "loss": 1.6861, "step": 19590 }, { "epoch": 0.25, "grad_norm": 4.180491924285889, "learning_rate": 1.9940985978009645e-05, "loss": 2.4351, "step": 19591 }, { "epoch": 0.25, "grad_norm": 3.602362871170044, "learning_rate": 1.9940974578406463e-05, "loss": 1.7055, "step": 19592 }, { "epoch": 0.25, "grad_norm": 4.2003092765808105, "learning_rate": 1.9940963177705632e-05, "loss": 1.9389, "step": 19593 }, { "epoch": 0.25, "grad_norm": 3.6043879985809326, "learning_rate": 1.994095177590715e-05, "loss": 1.6553, "step": 19594 }, { "epoch": 0.25, "grad_norm": 4.063288688659668, "learning_rate": 1.9940940373011016e-05, "loss": 2.1905, "step": 19595 }, { "epoch": 0.25, "grad_norm": 4.946638107299805, "learning_rate": 1.994092896901723e-05, "loss": 2.977, "step": 19596 }, { "epoch": 0.25, "grad_norm": 4.299592971801758, "learning_rate": 1.9940917563925802e-05, "loss": 2.2063, "step": 19597 }, { "epoch": 0.25, "grad_norm": 4.150951862335205, "learning_rate": 1.9940906157736726e-05, "loss": 2.2094, "step": 19598 }, { "epoch": 0.25, "grad_norm": 4.299482345581055, "learning_rate": 1.9940894750450006e-05, "loss": 2.2831, "step": 19599 }, { "epoch": 0.25, "grad_norm": 4.352697849273682, "learning_rate": 1.9940883342065644e-05, "loss": 1.9411, "step": 19600 }, { "epoch": 0.25, "grad_norm": 3.999600410461426, "learning_rate": 1.994087193258364e-05, "loss": 2.0988, "step": 19601 }, { "epoch": 0.25, "grad_norm": 3.3997061252593994, "learning_rate": 1.994086052200399e-05, "loss": 1.406, "step": 19602 }, { "epoch": 0.25, "grad_norm": 3.6852431297302246, "learning_rate": 1.9940849110326707e-05, "loss": 2.0344, "step": 19603 }, { "epoch": 0.25, "grad_norm": 4.29121208190918, "learning_rate": 1.9940837697551783e-05, "loss": 2.3741, "step": 19604 }, { "epoch": 0.25, "grad_norm": 4.293737411499023, "learning_rate": 1.994082628367922e-05, "loss": 2.1639, "step": 19605 }, { "epoch": 0.25, "grad_norm": 3.8902173042297363, "learning_rate": 1.994081486870902e-05, "loss": 2.0516, "step": 19606 }, { "epoch": 0.25, "grad_norm": 3.834775447845459, "learning_rate": 1.9940803452641192e-05, "loss": 1.9077, "step": 19607 }, { "epoch": 0.25, "grad_norm": 3.8179678916931152, "learning_rate": 1.9940792035475724e-05, "loss": 1.9153, "step": 19608 }, { "epoch": 0.25, "grad_norm": 3.5197136402130127, "learning_rate": 1.9940780617212626e-05, "loss": 1.6923, "step": 19609 }, { "epoch": 0.25, "grad_norm": 4.369640350341797, "learning_rate": 1.9940769197851896e-05, "loss": 2.3299, "step": 19610 }, { "epoch": 0.25, "grad_norm": 4.97031831741333, "learning_rate": 1.994075777739354e-05, "loss": 2.1624, "step": 19611 }, { "epoch": 0.25, "grad_norm": 4.169098377227783, "learning_rate": 1.9940746355837555e-05, "loss": 2.2377, "step": 19612 }, { "epoch": 0.25, "grad_norm": 4.19543981552124, "learning_rate": 1.994073493318394e-05, "loss": 1.8867, "step": 19613 }, { "epoch": 0.25, "grad_norm": 4.13582181930542, "learning_rate": 1.9940723509432703e-05, "loss": 2.2687, "step": 19614 }, { "epoch": 0.25, "grad_norm": 4.111593723297119, "learning_rate": 1.994071208458384e-05, "loss": 1.8086, "step": 19615 }, { "epoch": 0.25, "grad_norm": 3.9917404651641846, "learning_rate": 1.9940700658637353e-05, "loss": 2.1815, "step": 19616 }, { "epoch": 0.25, "grad_norm": 4.780526161193848, "learning_rate": 1.994068923159324e-05, "loss": 2.065, "step": 19617 }, { "epoch": 0.25, "grad_norm": 3.560851573944092, "learning_rate": 1.994067780345151e-05, "loss": 1.9922, "step": 19618 }, { "epoch": 0.25, "grad_norm": 4.68679666519165, "learning_rate": 1.9940666374212164e-05, "loss": 2.2688, "step": 19619 }, { "epoch": 0.25, "grad_norm": 3.720708131790161, "learning_rate": 1.9940654943875194e-05, "loss": 1.6897, "step": 19620 }, { "epoch": 0.25, "grad_norm": 4.202770233154297, "learning_rate": 1.9940643512440613e-05, "loss": 2.3551, "step": 19621 }, { "epoch": 0.25, "grad_norm": 4.239742279052734, "learning_rate": 1.994063207990841e-05, "loss": 2.0406, "step": 19622 }, { "epoch": 0.25, "grad_norm": 4.035427570343018, "learning_rate": 1.99406206462786e-05, "loss": 2.2392, "step": 19623 }, { "epoch": 0.25, "grad_norm": 4.368595600128174, "learning_rate": 1.994060921155117e-05, "loss": 2.7586, "step": 19624 }, { "epoch": 0.25, "grad_norm": 4.064131259918213, "learning_rate": 1.994059777572613e-05, "loss": 2.2134, "step": 19625 }, { "epoch": 0.25, "grad_norm": 4.032052993774414, "learning_rate": 1.994058633880348e-05, "loss": 2.1094, "step": 19626 }, { "epoch": 0.25, "grad_norm": 3.892430305480957, "learning_rate": 1.9940574900783224e-05, "loss": 1.92, "step": 19627 }, { "epoch": 0.25, "grad_norm": 3.754650592803955, "learning_rate": 1.9940563461665356e-05, "loss": 1.8575, "step": 19628 }, { "epoch": 0.25, "grad_norm": 5.260861396789551, "learning_rate": 1.994055202144988e-05, "loss": 3.1434, "step": 19629 }, { "epoch": 0.25, "grad_norm": 4.252096176147461, "learning_rate": 1.99405405801368e-05, "loss": 2.2159, "step": 19630 }, { "epoch": 0.25, "grad_norm": 4.268959999084473, "learning_rate": 1.9940529137726113e-05, "loss": 2.0624, "step": 19631 }, { "epoch": 0.25, "grad_norm": 5.621796607971191, "learning_rate": 1.9940517694217825e-05, "loss": 2.8556, "step": 19632 }, { "epoch": 0.25, "grad_norm": 4.985367298126221, "learning_rate": 1.9940506249611936e-05, "loss": 2.8908, "step": 19633 }, { "epoch": 0.25, "grad_norm": 4.015353679656982, "learning_rate": 1.994049480390845e-05, "loss": 1.9485, "step": 19634 }, { "epoch": 0.25, "grad_norm": 4.423545837402344, "learning_rate": 1.994048335710736e-05, "loss": 2.6169, "step": 19635 }, { "epoch": 0.25, "grad_norm": 4.0262041091918945, "learning_rate": 1.9940471909208673e-05, "loss": 2.1273, "step": 19636 }, { "epoch": 0.25, "grad_norm": 4.079819202423096, "learning_rate": 1.994046046021239e-05, "loss": 2.1946, "step": 19637 }, { "epoch": 0.25, "grad_norm": 4.082037448883057, "learning_rate": 1.994044901011851e-05, "loss": 2.4093, "step": 19638 }, { "epoch": 0.25, "grad_norm": 4.8126959800720215, "learning_rate": 1.9940437558927038e-05, "loss": 2.3564, "step": 19639 }, { "epoch": 0.25, "grad_norm": 3.8617184162139893, "learning_rate": 1.994042610663797e-05, "loss": 2.0833, "step": 19640 }, { "epoch": 0.25, "grad_norm": 4.19571590423584, "learning_rate": 1.9940414653251312e-05, "loss": 1.9582, "step": 19641 }, { "epoch": 0.25, "grad_norm": 4.306593894958496, "learning_rate": 1.9940403198767063e-05, "loss": 2.1246, "step": 19642 }, { "epoch": 0.25, "grad_norm": 4.409751892089844, "learning_rate": 1.9940391743185226e-05, "loss": 2.5178, "step": 19643 }, { "epoch": 0.25, "grad_norm": 4.591650009155273, "learning_rate": 1.99403802865058e-05, "loss": 2.3468, "step": 19644 }, { "epoch": 0.25, "grad_norm": 4.247588634490967, "learning_rate": 1.9940368828728788e-05, "loss": 2.0781, "step": 19645 }, { "epoch": 0.25, "grad_norm": 3.3443539142608643, "learning_rate": 1.994035736985419e-05, "loss": 1.4969, "step": 19646 }, { "epoch": 0.25, "grad_norm": 4.005080223083496, "learning_rate": 1.994034590988201e-05, "loss": 2.2498, "step": 19647 }, { "epoch": 0.25, "grad_norm": 4.212490081787109, "learning_rate": 1.9940334448812244e-05, "loss": 2.0958, "step": 19648 }, { "epoch": 0.25, "grad_norm": 4.629774570465088, "learning_rate": 1.99403229866449e-05, "loss": 2.1571, "step": 19649 }, { "epoch": 0.26, "grad_norm": 4.125953674316406, "learning_rate": 1.994031152337997e-05, "loss": 2.5873, "step": 19650 }, { "epoch": 0.26, "grad_norm": 3.567654848098755, "learning_rate": 1.9940300059017467e-05, "loss": 1.6632, "step": 19651 }, { "epoch": 0.26, "grad_norm": 3.7096400260925293, "learning_rate": 1.9940288593557386e-05, "loss": 1.9555, "step": 19652 }, { "epoch": 0.26, "grad_norm": 4.471035003662109, "learning_rate": 1.9940277126999726e-05, "loss": 2.2897, "step": 19653 }, { "epoch": 0.26, "grad_norm": 4.030473232269287, "learning_rate": 1.994026565934449e-05, "loss": 2.1777, "step": 19654 }, { "epoch": 0.26, "grad_norm": 3.8190765380859375, "learning_rate": 1.994025419059168e-05, "loss": 1.8527, "step": 19655 }, { "epoch": 0.26, "grad_norm": 4.714578151702881, "learning_rate": 1.99402427207413e-05, "loss": 2.043, "step": 19656 }, { "epoch": 0.26, "grad_norm": 3.780771255493164, "learning_rate": 1.9940231249793344e-05, "loss": 2.0116, "step": 19657 }, { "epoch": 0.26, "grad_norm": 3.67877459526062, "learning_rate": 1.9940219777747822e-05, "loss": 1.7646, "step": 19658 }, { "epoch": 0.26, "grad_norm": 4.463465690612793, "learning_rate": 1.994020830460473e-05, "loss": 2.6775, "step": 19659 }, { "epoch": 0.26, "grad_norm": 3.477189779281616, "learning_rate": 1.9940196830364072e-05, "loss": 1.8724, "step": 19660 }, { "epoch": 0.26, "grad_norm": 4.542232990264893, "learning_rate": 1.9940185355025845e-05, "loss": 2.4097, "step": 19661 }, { "epoch": 0.26, "grad_norm": 4.044005870819092, "learning_rate": 1.9940173878590053e-05, "loss": 2.0869, "step": 19662 }, { "epoch": 0.26, "grad_norm": 3.809298515319824, "learning_rate": 1.99401624010567e-05, "loss": 1.6666, "step": 19663 }, { "epoch": 0.26, "grad_norm": 4.192472457885742, "learning_rate": 1.9940150922425784e-05, "loss": 2.2467, "step": 19664 }, { "epoch": 0.26, "grad_norm": 4.451750755310059, "learning_rate": 1.9940139442697303e-05, "loss": 2.7991, "step": 19665 }, { "epoch": 0.26, "grad_norm": 4.059126853942871, "learning_rate": 1.994012796187126e-05, "loss": 1.9868, "step": 19666 }, { "epoch": 0.26, "grad_norm": 4.15778923034668, "learning_rate": 1.9940116479947668e-05, "loss": 2.2704, "step": 19667 }, { "epoch": 0.26, "grad_norm": 3.842259168624878, "learning_rate": 1.9940104996926513e-05, "loss": 2.1634, "step": 19668 }, { "epoch": 0.26, "grad_norm": 4.342134952545166, "learning_rate": 1.99400935128078e-05, "loss": 2.1397, "step": 19669 }, { "epoch": 0.26, "grad_norm": 3.794692039489746, "learning_rate": 1.9940082027591536e-05, "loss": 2.3688, "step": 19670 }, { "epoch": 0.26, "grad_norm": 3.6475155353546143, "learning_rate": 1.9940070541277713e-05, "loss": 1.5234, "step": 19671 }, { "epoch": 0.26, "grad_norm": 3.524846315383911, "learning_rate": 1.9940059053866342e-05, "loss": 1.7259, "step": 19672 }, { "epoch": 0.26, "grad_norm": 4.538772106170654, "learning_rate": 1.9940047565357417e-05, "loss": 2.6363, "step": 19673 }, { "epoch": 0.26, "grad_norm": 4.1303558349609375, "learning_rate": 1.9940036075750947e-05, "loss": 1.8407, "step": 19674 }, { "epoch": 0.26, "grad_norm": 3.6285018920898438, "learning_rate": 1.994002458504692e-05, "loss": 2.0459, "step": 19675 }, { "epoch": 0.26, "grad_norm": 3.788503408432007, "learning_rate": 1.9940013093245355e-05, "loss": 1.9117, "step": 19676 }, { "epoch": 0.26, "grad_norm": 4.029031753540039, "learning_rate": 1.9940001600346238e-05, "loss": 2.0886, "step": 19677 }, { "epoch": 0.26, "grad_norm": 4.045563697814941, "learning_rate": 1.9939990106349576e-05, "loss": 1.9549, "step": 19678 }, { "epoch": 0.26, "grad_norm": 4.0536298751831055, "learning_rate": 1.9939978611255375e-05, "loss": 2.2565, "step": 19679 }, { "epoch": 0.26, "grad_norm": 3.6303012371063232, "learning_rate": 1.9939967115063627e-05, "loss": 1.5479, "step": 19680 }, { "epoch": 0.26, "grad_norm": 4.436891078948975, "learning_rate": 1.993995561777434e-05, "loss": 2.2277, "step": 19681 }, { "epoch": 0.26, "grad_norm": 4.374346733093262, "learning_rate": 1.9939944119387514e-05, "loss": 2.1834, "step": 19682 }, { "epoch": 0.26, "grad_norm": 4.113265514373779, "learning_rate": 1.993993261990315e-05, "loss": 2.4555, "step": 19683 }, { "epoch": 0.26, "grad_norm": 4.186200141906738, "learning_rate": 1.9939921119321246e-05, "loss": 1.8548, "step": 19684 }, { "epoch": 0.26, "grad_norm": 4.748473167419434, "learning_rate": 1.9939909617641805e-05, "loss": 2.3422, "step": 19685 }, { "epoch": 0.26, "grad_norm": 4.7323808670043945, "learning_rate": 1.9939898114864837e-05, "loss": 2.0693, "step": 19686 }, { "epoch": 0.26, "grad_norm": 4.378364562988281, "learning_rate": 1.993988661099033e-05, "loss": 2.1298, "step": 19687 }, { "epoch": 0.26, "grad_norm": 4.08296012878418, "learning_rate": 1.993987510601829e-05, "loss": 1.9325, "step": 19688 }, { "epoch": 0.26, "grad_norm": 4.48704719543457, "learning_rate": 1.9939863599948718e-05, "loss": 2.2684, "step": 19689 }, { "epoch": 0.26, "grad_norm": 4.053762912750244, "learning_rate": 1.9939852092781618e-05, "loss": 1.9927, "step": 19690 }, { "epoch": 0.26, "grad_norm": 4.670725345611572, "learning_rate": 1.9939840584516994e-05, "loss": 2.0025, "step": 19691 }, { "epoch": 0.26, "grad_norm": 3.865410089492798, "learning_rate": 1.9939829075154836e-05, "loss": 1.844, "step": 19692 }, { "epoch": 0.26, "grad_norm": 4.05173921585083, "learning_rate": 1.993981756469516e-05, "loss": 1.879, "step": 19693 }, { "epoch": 0.26, "grad_norm": 4.059212684631348, "learning_rate": 1.9939806053137953e-05, "loss": 1.9657, "step": 19694 }, { "epoch": 0.26, "grad_norm": 4.251945495605469, "learning_rate": 1.9939794540483225e-05, "loss": 2.0985, "step": 19695 }, { "epoch": 0.26, "grad_norm": 4.888241291046143, "learning_rate": 1.9939783026730976e-05, "loss": 2.203, "step": 19696 }, { "epoch": 0.26, "grad_norm": 3.684699535369873, "learning_rate": 1.9939771511881206e-05, "loss": 1.8183, "step": 19697 }, { "epoch": 0.26, "grad_norm": 3.6762118339538574, "learning_rate": 1.9939759995933916e-05, "loss": 1.914, "step": 19698 }, { "epoch": 0.26, "grad_norm": 3.6559383869171143, "learning_rate": 1.993974847888911e-05, "loss": 1.4568, "step": 19699 }, { "epoch": 0.26, "grad_norm": 4.534544467926025, "learning_rate": 1.993973696074678e-05, "loss": 2.7245, "step": 19700 }, { "epoch": 0.26, "grad_norm": 4.2285847663879395, "learning_rate": 1.9939725441506942e-05, "loss": 1.7238, "step": 19701 }, { "epoch": 0.26, "grad_norm": 4.084443092346191, "learning_rate": 1.9939713921169588e-05, "loss": 2.0686, "step": 19702 }, { "epoch": 0.26, "grad_norm": 4.451988697052002, "learning_rate": 1.993970239973472e-05, "loss": 2.3706, "step": 19703 }, { "epoch": 0.26, "grad_norm": 4.608288764953613, "learning_rate": 1.993969087720234e-05, "loss": 2.6685, "step": 19704 }, { "epoch": 0.26, "grad_norm": 4.571742534637451, "learning_rate": 1.993967935357245e-05, "loss": 2.4018, "step": 19705 }, { "epoch": 0.26, "grad_norm": 4.010315418243408, "learning_rate": 1.993966782884505e-05, "loss": 2.162, "step": 19706 }, { "epoch": 0.26, "grad_norm": 4.055537700653076, "learning_rate": 1.9939656303020146e-05, "loss": 2.0297, "step": 19707 }, { "epoch": 0.26, "grad_norm": 4.454372406005859, "learning_rate": 1.9939644776097728e-05, "loss": 2.316, "step": 19708 }, { "epoch": 0.26, "grad_norm": 4.502566814422607, "learning_rate": 1.9939633248077812e-05, "loss": 2.2784, "step": 19709 }, { "epoch": 0.26, "grad_norm": 6.182973384857178, "learning_rate": 1.9939621718960385e-05, "loss": 2.0117, "step": 19710 }, { "epoch": 0.26, "grad_norm": 3.90775990486145, "learning_rate": 1.993961018874546e-05, "loss": 2.0829, "step": 19711 }, { "epoch": 0.26, "grad_norm": 4.311434268951416, "learning_rate": 1.9939598657433034e-05, "loss": 2.1219, "step": 19712 }, { "epoch": 0.26, "grad_norm": 4.072844505310059, "learning_rate": 1.9939587125023102e-05, "loss": 2.2357, "step": 19713 }, { "epoch": 0.26, "grad_norm": 4.600220203399658, "learning_rate": 1.993957559151568e-05, "loss": 2.195, "step": 19714 }, { "epoch": 0.26, "grad_norm": 3.8755972385406494, "learning_rate": 1.993956405691075e-05, "loss": 2.5888, "step": 19715 }, { "epoch": 0.26, "grad_norm": 3.937221050262451, "learning_rate": 1.993955252120833e-05, "loss": 1.5523, "step": 19716 }, { "epoch": 0.26, "grad_norm": 3.8822062015533447, "learning_rate": 1.9939540984408414e-05, "loss": 1.8629, "step": 19717 }, { "epoch": 0.26, "grad_norm": 3.9231321811676025, "learning_rate": 1.9939529446511003e-05, "loss": 1.9418, "step": 19718 }, { "epoch": 0.26, "grad_norm": 3.9303510189056396, "learning_rate": 1.9939517907516098e-05, "loss": 2.2537, "step": 19719 }, { "epoch": 0.26, "grad_norm": 3.411576271057129, "learning_rate": 1.9939506367423703e-05, "loss": 1.7383, "step": 19720 }, { "epoch": 0.26, "grad_norm": 4.458077907562256, "learning_rate": 1.993949482623382e-05, "loss": 2.5083, "step": 19721 }, { "epoch": 0.26, "grad_norm": 3.986286163330078, "learning_rate": 1.9939483283946445e-05, "loss": 1.9904, "step": 19722 }, { "epoch": 0.26, "grad_norm": 3.917496919631958, "learning_rate": 1.9939471740561585e-05, "loss": 1.9824, "step": 19723 }, { "epoch": 0.26, "grad_norm": 3.986689329147339, "learning_rate": 1.9939460196079235e-05, "loss": 1.9236, "step": 19724 }, { "epoch": 0.26, "grad_norm": 4.721978187561035, "learning_rate": 1.99394486504994e-05, "loss": 2.8179, "step": 19725 }, { "epoch": 0.26, "grad_norm": 3.9049837589263916, "learning_rate": 1.9939437103822084e-05, "loss": 1.7996, "step": 19726 }, { "epoch": 0.26, "grad_norm": 3.5864880084991455, "learning_rate": 1.9939425556047286e-05, "loss": 1.8996, "step": 19727 }, { "epoch": 0.26, "grad_norm": 4.543087005615234, "learning_rate": 1.9939414007175005e-05, "loss": 2.1588, "step": 19728 }, { "epoch": 0.26, "grad_norm": 4.023096084594727, "learning_rate": 1.9939402457205247e-05, "loss": 2.2268, "step": 19729 }, { "epoch": 0.26, "grad_norm": 4.167713642120361, "learning_rate": 1.993939090613801e-05, "loss": 2.2415, "step": 19730 }, { "epoch": 0.26, "grad_norm": 3.988527297973633, "learning_rate": 1.9939379353973293e-05, "loss": 2.1035, "step": 19731 }, { "epoch": 0.26, "grad_norm": 3.858152151107788, "learning_rate": 1.99393678007111e-05, "loss": 2.0224, "step": 19732 }, { "epoch": 0.26, "grad_norm": 4.485359191894531, "learning_rate": 1.9939356246351432e-05, "loss": 2.3697, "step": 19733 }, { "epoch": 0.26, "grad_norm": 4.7516961097717285, "learning_rate": 1.993934469089429e-05, "loss": 2.4345, "step": 19734 }, { "epoch": 0.26, "grad_norm": 3.60134220123291, "learning_rate": 1.993933313433968e-05, "loss": 1.83, "step": 19735 }, { "epoch": 0.26, "grad_norm": 4.130252838134766, "learning_rate": 1.9939321576687594e-05, "loss": 2.3036, "step": 19736 }, { "epoch": 0.26, "grad_norm": 3.954238176345825, "learning_rate": 1.993931001793804e-05, "loss": 2.1283, "step": 19737 }, { "epoch": 0.26, "grad_norm": 4.137977600097656, "learning_rate": 1.9939298458091022e-05, "loss": 2.0541, "step": 19738 }, { "epoch": 0.26, "grad_norm": 4.305263519287109, "learning_rate": 1.993928689714653e-05, "loss": 1.9763, "step": 19739 }, { "epoch": 0.26, "grad_norm": 4.696726322174072, "learning_rate": 1.9939275335104576e-05, "loss": 2.4813, "step": 19740 }, { "epoch": 0.26, "grad_norm": 4.2702155113220215, "learning_rate": 1.9939263771965157e-05, "loss": 1.8744, "step": 19741 }, { "epoch": 0.26, "grad_norm": 4.332123279571533, "learning_rate": 1.9939252207728274e-05, "loss": 2.0691, "step": 19742 }, { "epoch": 0.26, "grad_norm": 3.7708206176757812, "learning_rate": 1.9939240642393927e-05, "loss": 1.6144, "step": 19743 }, { "epoch": 0.26, "grad_norm": 4.325055122375488, "learning_rate": 1.993922907596212e-05, "loss": 1.8049, "step": 19744 }, { "epoch": 0.26, "grad_norm": 4.05527925491333, "learning_rate": 1.9939217508432855e-05, "loss": 2.5956, "step": 19745 }, { "epoch": 0.26, "grad_norm": 4.205085754394531, "learning_rate": 1.9939205939806132e-05, "loss": 1.8997, "step": 19746 }, { "epoch": 0.26, "grad_norm": 5.831775188446045, "learning_rate": 1.993919437008195e-05, "loss": 2.214, "step": 19747 }, { "epoch": 0.26, "grad_norm": 4.662138938903809, "learning_rate": 1.9939182799260314e-05, "loss": 2.4695, "step": 19748 }, { "epoch": 0.26, "grad_norm": 3.8524694442749023, "learning_rate": 1.993917122734122e-05, "loss": 2.2806, "step": 19749 }, { "epoch": 0.26, "grad_norm": 4.031355381011963, "learning_rate": 1.993915965432468e-05, "loss": 2.1267, "step": 19750 }, { "epoch": 0.26, "grad_norm": 4.272625923156738, "learning_rate": 1.9939148080210684e-05, "loss": 2.3449, "step": 19751 }, { "epoch": 0.26, "grad_norm": 4.604526042938232, "learning_rate": 1.993913650499924e-05, "loss": 2.2349, "step": 19752 }, { "epoch": 0.26, "grad_norm": 4.478212833404541, "learning_rate": 1.993912492869034e-05, "loss": 2.5171, "step": 19753 }, { "epoch": 0.26, "grad_norm": 4.360225200653076, "learning_rate": 1.9939113351284e-05, "loss": 2.0706, "step": 19754 }, { "epoch": 0.26, "grad_norm": 4.1798834800720215, "learning_rate": 1.993910177278021e-05, "loss": 2.1365, "step": 19755 }, { "epoch": 0.26, "grad_norm": 4.606409072875977, "learning_rate": 1.9939090193178972e-05, "loss": 2.5014, "step": 19756 }, { "epoch": 0.26, "grad_norm": 4.030826091766357, "learning_rate": 1.993907861248029e-05, "loss": 2.1051, "step": 19757 }, { "epoch": 0.26, "grad_norm": 4.393436908721924, "learning_rate": 1.9939067030684168e-05, "loss": 1.9502, "step": 19758 }, { "epoch": 0.26, "grad_norm": 3.7881245613098145, "learning_rate": 1.9939055447790605e-05, "loss": 2.2818, "step": 19759 }, { "epoch": 0.26, "grad_norm": 3.717597484588623, "learning_rate": 1.99390438637996e-05, "loss": 1.6751, "step": 19760 }, { "epoch": 0.26, "grad_norm": 4.079358100891113, "learning_rate": 1.9939032278711157e-05, "loss": 2.048, "step": 19761 }, { "epoch": 0.26, "grad_norm": 4.155632019042969, "learning_rate": 1.9939020692525276e-05, "loss": 2.0514, "step": 19762 }, { "epoch": 0.26, "grad_norm": 4.3357977867126465, "learning_rate": 1.9939009105241958e-05, "loss": 2.1121, "step": 19763 }, { "epoch": 0.26, "grad_norm": 3.6557843685150146, "learning_rate": 1.9938997516861204e-05, "loss": 2.1639, "step": 19764 }, { "epoch": 0.26, "grad_norm": 3.956270217895508, "learning_rate": 1.9938985927383013e-05, "loss": 2.1431, "step": 19765 }, { "epoch": 0.26, "grad_norm": 3.630430221557617, "learning_rate": 1.9938974336807394e-05, "loss": 1.8034, "step": 19766 }, { "epoch": 0.26, "grad_norm": 4.259457588195801, "learning_rate": 1.993896274513434e-05, "loss": 2.657, "step": 19767 }, { "epoch": 0.26, "grad_norm": 4.611410617828369, "learning_rate": 1.9938951152363862e-05, "loss": 2.8864, "step": 19768 }, { "epoch": 0.26, "grad_norm": 3.6357662677764893, "learning_rate": 1.993893955849595e-05, "loss": 1.6558, "step": 19769 }, { "epoch": 0.26, "grad_norm": 3.6733498573303223, "learning_rate": 1.9938927963530613e-05, "loss": 1.563, "step": 19770 }, { "epoch": 0.26, "grad_norm": 4.1174468994140625, "learning_rate": 1.993891636746785e-05, "loss": 2.0489, "step": 19771 }, { "epoch": 0.26, "grad_norm": 4.0146870613098145, "learning_rate": 1.993890477030766e-05, "loss": 2.0705, "step": 19772 }, { "epoch": 0.26, "grad_norm": 3.821040630340576, "learning_rate": 1.9938893172050048e-05, "loss": 2.0319, "step": 19773 }, { "epoch": 0.26, "grad_norm": 4.075718879699707, "learning_rate": 1.9938881572695013e-05, "loss": 1.8926, "step": 19774 }, { "epoch": 0.26, "grad_norm": 3.9098246097564697, "learning_rate": 1.9938869972242555e-05, "loss": 2.5546, "step": 19775 }, { "epoch": 0.26, "grad_norm": 3.9552791118621826, "learning_rate": 1.993885837069268e-05, "loss": 1.8692, "step": 19776 }, { "epoch": 0.26, "grad_norm": 4.028622150421143, "learning_rate": 1.9938846768045386e-05, "loss": 2.2764, "step": 19777 }, { "epoch": 0.26, "grad_norm": 4.191764831542969, "learning_rate": 1.993883516430067e-05, "loss": 2.1036, "step": 19778 }, { "epoch": 0.26, "grad_norm": 4.368307590484619, "learning_rate": 1.9938823559458546e-05, "loss": 2.2259, "step": 19779 }, { "epoch": 0.26, "grad_norm": 4.64015007019043, "learning_rate": 1.9938811953519002e-05, "loss": 2.213, "step": 19780 }, { "epoch": 0.26, "grad_norm": 4.03187370300293, "learning_rate": 1.9938800346482045e-05, "loss": 1.7561, "step": 19781 }, { "epoch": 0.26, "grad_norm": 4.086425304412842, "learning_rate": 1.993878873834768e-05, "loss": 1.9659, "step": 19782 }, { "epoch": 0.26, "grad_norm": 4.941117763519287, "learning_rate": 1.99387771291159e-05, "loss": 2.6115, "step": 19783 }, { "epoch": 0.26, "grad_norm": 4.0117316246032715, "learning_rate": 1.9938765518786708e-05, "loss": 1.9917, "step": 19784 }, { "epoch": 0.26, "grad_norm": 4.08282470703125, "learning_rate": 1.9938753907360112e-05, "loss": 1.9687, "step": 19785 }, { "epoch": 0.26, "grad_norm": 3.879232883453369, "learning_rate": 1.9938742294836107e-05, "loss": 2.0353, "step": 19786 }, { "epoch": 0.26, "grad_norm": 4.236489295959473, "learning_rate": 1.99387306812147e-05, "loss": 2.6368, "step": 19787 }, { "epoch": 0.26, "grad_norm": 3.6851444244384766, "learning_rate": 1.9938719066495882e-05, "loss": 1.7492, "step": 19788 }, { "epoch": 0.26, "grad_norm": 4.12018346786499, "learning_rate": 1.9938707450679663e-05, "loss": 2.524, "step": 19789 }, { "epoch": 0.26, "grad_norm": 3.842047691345215, "learning_rate": 1.9938695833766045e-05, "loss": 2.0263, "step": 19790 }, { "epoch": 0.26, "grad_norm": 4.559662818908691, "learning_rate": 1.9938684215755025e-05, "loss": 1.9139, "step": 19791 }, { "epoch": 0.26, "grad_norm": 3.730025053024292, "learning_rate": 1.9938672596646606e-05, "loss": 1.5599, "step": 19792 }, { "epoch": 0.26, "grad_norm": 4.277972221374512, "learning_rate": 1.9938660976440788e-05, "loss": 1.9663, "step": 19793 }, { "epoch": 0.26, "grad_norm": 3.8344228267669678, "learning_rate": 1.993864935513757e-05, "loss": 1.7308, "step": 19794 }, { "epoch": 0.26, "grad_norm": 3.8817408084869385, "learning_rate": 1.993863773273696e-05, "loss": 1.8665, "step": 19795 }, { "epoch": 0.26, "grad_norm": 3.9705746173858643, "learning_rate": 1.9938626109238956e-05, "loss": 1.929, "step": 19796 }, { "epoch": 0.26, "grad_norm": 4.366081714630127, "learning_rate": 1.993861448464356e-05, "loss": 2.1721, "step": 19797 }, { "epoch": 0.26, "grad_norm": 4.109889984130859, "learning_rate": 1.9938602858950773e-05, "loss": 2.1466, "step": 19798 }, { "epoch": 0.26, "grad_norm": 3.8571650981903076, "learning_rate": 1.9938591232160592e-05, "loss": 1.8194, "step": 19799 }, { "epoch": 0.26, "grad_norm": 4.31493616104126, "learning_rate": 1.9938579604273025e-05, "loss": 1.9494, "step": 19800 }, { "epoch": 0.26, "grad_norm": 3.1861867904663086, "learning_rate": 1.993856797528807e-05, "loss": 1.5191, "step": 19801 }, { "epoch": 0.26, "grad_norm": 4.329362392425537, "learning_rate": 1.9938556345205725e-05, "loss": 2.5162, "step": 19802 }, { "epoch": 0.26, "grad_norm": 3.954970598220825, "learning_rate": 1.9938544714025995e-05, "loss": 2.1954, "step": 19803 }, { "epoch": 0.26, "grad_norm": 4.228597164154053, "learning_rate": 1.9938533081748887e-05, "loss": 2.3379, "step": 19804 }, { "epoch": 0.26, "grad_norm": 3.797153949737549, "learning_rate": 1.993852144837439e-05, "loss": 2.017, "step": 19805 }, { "epoch": 0.26, "grad_norm": 4.240231037139893, "learning_rate": 1.9938509813902514e-05, "loss": 2.4157, "step": 19806 }, { "epoch": 0.26, "grad_norm": 4.5329389572143555, "learning_rate": 1.993849817833326e-05, "loss": 2.2722, "step": 19807 }, { "epoch": 0.26, "grad_norm": 4.278262615203857, "learning_rate": 1.9938486541666623e-05, "loss": 2.6837, "step": 19808 }, { "epoch": 0.26, "grad_norm": 3.7480902671813965, "learning_rate": 1.9938474903902612e-05, "loss": 1.9312, "step": 19809 }, { "epoch": 0.26, "grad_norm": 3.711355686187744, "learning_rate": 1.9938463265041225e-05, "loss": 1.7016, "step": 19810 }, { "epoch": 0.26, "grad_norm": 4.242558479309082, "learning_rate": 1.993845162508246e-05, "loss": 2.2876, "step": 19811 }, { "epoch": 0.26, "grad_norm": 4.434440612792969, "learning_rate": 1.9938439984026322e-05, "loss": 1.9017, "step": 19812 }, { "epoch": 0.26, "grad_norm": 4.304213523864746, "learning_rate": 1.9938428341872813e-05, "loss": 2.2227, "step": 19813 }, { "epoch": 0.26, "grad_norm": 3.9452641010284424, "learning_rate": 1.9938416698621932e-05, "loss": 1.9727, "step": 19814 }, { "epoch": 0.26, "grad_norm": 5.176230430603027, "learning_rate": 1.9938405054273683e-05, "loss": 2.9896, "step": 19815 }, { "epoch": 0.26, "grad_norm": 3.5331242084503174, "learning_rate": 1.993839340882806e-05, "loss": 2.0891, "step": 19816 }, { "epoch": 0.26, "grad_norm": 4.105738639831543, "learning_rate": 1.993838176228508e-05, "loss": 2.2897, "step": 19817 }, { "epoch": 0.26, "grad_norm": 4.376145839691162, "learning_rate": 1.9938370114644724e-05, "loss": 2.301, "step": 19818 }, { "epoch": 0.26, "grad_norm": 4.399998664855957, "learning_rate": 1.9938358465907007e-05, "loss": 2.4958, "step": 19819 }, { "epoch": 0.26, "grad_norm": 4.033256530761719, "learning_rate": 1.993834681607193e-05, "loss": 2.337, "step": 19820 }, { "epoch": 0.26, "grad_norm": 4.135070323944092, "learning_rate": 1.9938335165139486e-05, "loss": 2.208, "step": 19821 }, { "epoch": 0.26, "grad_norm": 4.323203086853027, "learning_rate": 1.9938323513109684e-05, "loss": 2.0608, "step": 19822 }, { "epoch": 0.26, "grad_norm": 3.9751713275909424, "learning_rate": 1.993831185998252e-05, "loss": 2.166, "step": 19823 }, { "epoch": 0.26, "grad_norm": 4.143633842468262, "learning_rate": 1.9938300205758e-05, "loss": 2.1385, "step": 19824 }, { "epoch": 0.26, "grad_norm": 4.240627765655518, "learning_rate": 1.9938288550436123e-05, "loss": 2.0012, "step": 19825 }, { "epoch": 0.26, "grad_norm": 4.0981011390686035, "learning_rate": 1.993827689401689e-05, "loss": 2.3004, "step": 19826 }, { "epoch": 0.26, "grad_norm": 3.577132225036621, "learning_rate": 1.99382652365003e-05, "loss": 1.8972, "step": 19827 }, { "epoch": 0.26, "grad_norm": 4.020647048950195, "learning_rate": 1.9938253577886358e-05, "loss": 2.0441, "step": 19828 }, { "epoch": 0.26, "grad_norm": 4.02100944519043, "learning_rate": 1.9938241918175065e-05, "loss": 1.9939, "step": 19829 }, { "epoch": 0.26, "grad_norm": 4.211004734039307, "learning_rate": 1.993823025736642e-05, "loss": 1.8603, "step": 19830 }, { "epoch": 0.26, "grad_norm": 3.53151273727417, "learning_rate": 1.993821859546043e-05, "loss": 1.9473, "step": 19831 }, { "epoch": 0.26, "grad_norm": 4.354271411895752, "learning_rate": 1.993820693245709e-05, "loss": 1.7081, "step": 19832 }, { "epoch": 0.26, "grad_norm": 4.6592020988464355, "learning_rate": 1.9938195268356404e-05, "loss": 2.1021, "step": 19833 }, { "epoch": 0.26, "grad_norm": 3.9815165996551514, "learning_rate": 1.993818360315837e-05, "loss": 1.8224, "step": 19834 }, { "epoch": 0.26, "grad_norm": 4.779998779296875, "learning_rate": 1.9938171936862993e-05, "loss": 3.1948, "step": 19835 }, { "epoch": 0.26, "grad_norm": 4.191551208496094, "learning_rate": 1.9938160269470273e-05, "loss": 1.9427, "step": 19836 }, { "epoch": 0.26, "grad_norm": 3.7240710258483887, "learning_rate": 1.9938148600980213e-05, "loss": 1.764, "step": 19837 }, { "epoch": 0.26, "grad_norm": 4.29422664642334, "learning_rate": 1.993813693139281e-05, "loss": 2.1229, "step": 19838 }, { "epoch": 0.26, "grad_norm": 4.592874050140381, "learning_rate": 1.993812526070807e-05, "loss": 2.2059, "step": 19839 }, { "epoch": 0.26, "grad_norm": 3.5536210536956787, "learning_rate": 1.9938113588925993e-05, "loss": 1.8993, "step": 19840 }, { "epoch": 0.26, "grad_norm": 4.050301551818848, "learning_rate": 1.993810191604658e-05, "loss": 2.3174, "step": 19841 }, { "epoch": 0.26, "grad_norm": 4.192773818969727, "learning_rate": 1.9938090242069833e-05, "loss": 2.0476, "step": 19842 }, { "epoch": 0.26, "grad_norm": 4.430079936981201, "learning_rate": 1.993807856699575e-05, "loss": 2.2401, "step": 19843 }, { "epoch": 0.26, "grad_norm": 4.666380882263184, "learning_rate": 1.9938066890824332e-05, "loss": 2.2858, "step": 19844 }, { "epoch": 0.26, "grad_norm": 4.079384803771973, "learning_rate": 1.9938055213555588e-05, "loss": 1.7996, "step": 19845 }, { "epoch": 0.26, "grad_norm": 5.351866722106934, "learning_rate": 1.993804353518951e-05, "loss": 2.5176, "step": 19846 }, { "epoch": 0.26, "grad_norm": 4.214743137359619, "learning_rate": 1.9938031855726107e-05, "loss": 2.1192, "step": 19847 }, { "epoch": 0.26, "grad_norm": 4.625613212585449, "learning_rate": 1.9938020175165373e-05, "loss": 2.233, "step": 19848 }, { "epoch": 0.26, "grad_norm": 4.8513617515563965, "learning_rate": 1.9938008493507314e-05, "loss": 2.4414, "step": 19849 }, { "epoch": 0.26, "grad_norm": 4.431394577026367, "learning_rate": 1.9937996810751934e-05, "loss": 2.1692, "step": 19850 }, { "epoch": 0.26, "grad_norm": 5.049071788787842, "learning_rate": 1.9937985126899227e-05, "loss": 2.5308, "step": 19851 }, { "epoch": 0.26, "grad_norm": 5.255755424499512, "learning_rate": 1.9937973441949195e-05, "loss": 2.3113, "step": 19852 }, { "epoch": 0.26, "grad_norm": 3.993828535079956, "learning_rate": 1.993796175590185e-05, "loss": 2.4268, "step": 19853 }, { "epoch": 0.26, "grad_norm": 4.97051477432251, "learning_rate": 1.993795006875718e-05, "loss": 2.1566, "step": 19854 }, { "epoch": 0.26, "grad_norm": 4.128593921661377, "learning_rate": 1.993793838051519e-05, "loss": 2.291, "step": 19855 }, { "epoch": 0.26, "grad_norm": 3.4689273834228516, "learning_rate": 1.9937926691175885e-05, "loss": 1.7496, "step": 19856 }, { "epoch": 0.26, "grad_norm": 3.821646213531494, "learning_rate": 1.993791500073927e-05, "loss": 2.0721, "step": 19857 }, { "epoch": 0.26, "grad_norm": 5.03914737701416, "learning_rate": 1.9937903309205335e-05, "loss": 2.7457, "step": 19858 }, { "epoch": 0.26, "grad_norm": 4.433999538421631, "learning_rate": 1.9937891616574086e-05, "loss": 2.3469, "step": 19859 }, { "epoch": 0.26, "grad_norm": 3.6678848266601562, "learning_rate": 1.9937879922845527e-05, "loss": 1.5167, "step": 19860 }, { "epoch": 0.26, "grad_norm": 3.816985607147217, "learning_rate": 1.9937868228019657e-05, "loss": 1.8951, "step": 19861 }, { "epoch": 0.26, "grad_norm": 4.226968288421631, "learning_rate": 1.993785653209648e-05, "loss": 2.8538, "step": 19862 }, { "epoch": 0.26, "grad_norm": 4.344313621520996, "learning_rate": 1.993784483507599e-05, "loss": 2.6087, "step": 19863 }, { "epoch": 0.26, "grad_norm": 4.397477626800537, "learning_rate": 1.9937833136958198e-05, "loss": 2.087, "step": 19864 }, { "epoch": 0.26, "grad_norm": 4.547492027282715, "learning_rate": 1.9937821437743098e-05, "loss": 2.2726, "step": 19865 }, { "epoch": 0.26, "grad_norm": 3.986776828765869, "learning_rate": 1.9937809737430694e-05, "loss": 1.873, "step": 19866 }, { "epoch": 0.26, "grad_norm": 4.028393745422363, "learning_rate": 1.9937798036020986e-05, "loss": 2.0631, "step": 19867 }, { "epoch": 0.26, "grad_norm": 4.88117790222168, "learning_rate": 1.993778633351398e-05, "loss": 2.1025, "step": 19868 }, { "epoch": 0.26, "grad_norm": 4.430961608886719, "learning_rate": 1.993777462990967e-05, "loss": 2.5066, "step": 19869 }, { "epoch": 0.26, "grad_norm": 4.2168145179748535, "learning_rate": 1.9937762925208068e-05, "loss": 2.5763, "step": 19870 }, { "epoch": 0.26, "grad_norm": 4.263657093048096, "learning_rate": 1.993775121940916e-05, "loss": 2.1386, "step": 19871 }, { "epoch": 0.26, "grad_norm": 3.906473159790039, "learning_rate": 1.9937739512512962e-05, "loss": 1.8231, "step": 19872 }, { "epoch": 0.26, "grad_norm": 4.024524211883545, "learning_rate": 1.9937727804519467e-05, "loss": 1.9492, "step": 19873 }, { "epoch": 0.26, "grad_norm": 4.729410648345947, "learning_rate": 1.9937716095428678e-05, "loss": 2.3689, "step": 19874 }, { "epoch": 0.26, "grad_norm": 4.897964954376221, "learning_rate": 1.9937704385240594e-05, "loss": 2.454, "step": 19875 }, { "epoch": 0.26, "grad_norm": 4.048922538757324, "learning_rate": 1.993769267395522e-05, "loss": 2.1497, "step": 19876 }, { "epoch": 0.26, "grad_norm": 4.168648719787598, "learning_rate": 1.993768096157256e-05, "loss": 2.1488, "step": 19877 }, { "epoch": 0.26, "grad_norm": 4.108426570892334, "learning_rate": 1.9937669248092605e-05, "loss": 1.9251, "step": 19878 }, { "epoch": 0.26, "grad_norm": 3.834463357925415, "learning_rate": 1.9937657533515367e-05, "loss": 2.1615, "step": 19879 }, { "epoch": 0.26, "grad_norm": 3.3053300380706787, "learning_rate": 1.993764581784084e-05, "loss": 1.7314, "step": 19880 }, { "epoch": 0.26, "grad_norm": 3.84592604637146, "learning_rate": 1.9937634101069032e-05, "loss": 1.794, "step": 19881 }, { "epoch": 0.26, "grad_norm": 4.287400245666504, "learning_rate": 1.993762238319994e-05, "loss": 2.1524, "step": 19882 }, { "epoch": 0.26, "grad_norm": 4.045449256896973, "learning_rate": 1.9937610664233565e-05, "loss": 2.0761, "step": 19883 }, { "epoch": 0.26, "grad_norm": 4.247876167297363, "learning_rate": 1.9937598944169908e-05, "loss": 2.392, "step": 19884 }, { "epoch": 0.26, "grad_norm": 3.7031030654907227, "learning_rate": 1.9937587223008974e-05, "loss": 1.8773, "step": 19885 }, { "epoch": 0.26, "grad_norm": 4.398301124572754, "learning_rate": 1.9937575500750763e-05, "loss": 2.4073, "step": 19886 }, { "epoch": 0.26, "grad_norm": 4.381344318389893, "learning_rate": 1.993756377739527e-05, "loss": 2.2004, "step": 19887 }, { "epoch": 0.26, "grad_norm": 3.7140533924102783, "learning_rate": 1.9937552052942503e-05, "loss": 2.0472, "step": 19888 }, { "epoch": 0.26, "grad_norm": 3.8769984245300293, "learning_rate": 1.9937540327392464e-05, "loss": 1.9922, "step": 19889 }, { "epoch": 0.26, "grad_norm": 4.1697773933410645, "learning_rate": 1.993752860074515e-05, "loss": 1.9719, "step": 19890 }, { "epoch": 0.26, "grad_norm": 4.07564115524292, "learning_rate": 1.9937516873000566e-05, "loss": 2.146, "step": 19891 }, { "epoch": 0.26, "grad_norm": 4.524931907653809, "learning_rate": 1.993750514415871e-05, "loss": 2.3849, "step": 19892 }, { "epoch": 0.26, "grad_norm": 3.502415180206299, "learning_rate": 1.9937493414219583e-05, "loss": 1.8345, "step": 19893 }, { "epoch": 0.26, "grad_norm": 4.575283050537109, "learning_rate": 1.993748168318319e-05, "loss": 2.0649, "step": 19894 }, { "epoch": 0.26, "grad_norm": 3.9087064266204834, "learning_rate": 1.9937469951049534e-05, "loss": 1.8625, "step": 19895 }, { "epoch": 0.26, "grad_norm": 4.020991325378418, "learning_rate": 1.9937458217818607e-05, "loss": 1.9496, "step": 19896 }, { "epoch": 0.26, "grad_norm": 4.693316459655762, "learning_rate": 1.993744648349042e-05, "loss": 2.559, "step": 19897 }, { "epoch": 0.26, "grad_norm": 4.043649673461914, "learning_rate": 1.9937434748064967e-05, "loss": 2.1601, "step": 19898 }, { "epoch": 0.26, "grad_norm": 4.214627742767334, "learning_rate": 1.9937423011542257e-05, "loss": 2.1413, "step": 19899 }, { "epoch": 0.26, "grad_norm": 5.336503505706787, "learning_rate": 1.9937411273922286e-05, "loss": 2.2415, "step": 19900 }, { "epoch": 0.26, "grad_norm": 4.075779438018799, "learning_rate": 1.993739953520505e-05, "loss": 2.246, "step": 19901 }, { "epoch": 0.26, "grad_norm": 4.493673801422119, "learning_rate": 1.9937387795390562e-05, "loss": 2.0154, "step": 19902 }, { "epoch": 0.26, "grad_norm": 4.016164779663086, "learning_rate": 1.9937376054478816e-05, "loss": 1.5809, "step": 19903 }, { "epoch": 0.26, "grad_norm": 4.161156177520752, "learning_rate": 1.993736431246982e-05, "loss": 2.2506, "step": 19904 }, { "epoch": 0.26, "grad_norm": 4.642040252685547, "learning_rate": 1.9937352569363565e-05, "loss": 1.9934, "step": 19905 }, { "epoch": 0.26, "grad_norm": 4.2822346687316895, "learning_rate": 1.9937340825160055e-05, "loss": 1.6244, "step": 19906 }, { "epoch": 0.26, "grad_norm": 3.7516825199127197, "learning_rate": 1.9937329079859302e-05, "loss": 2.1466, "step": 19907 }, { "epoch": 0.26, "grad_norm": 4.710654258728027, "learning_rate": 1.9937317333461296e-05, "loss": 2.6074, "step": 19908 }, { "epoch": 0.26, "grad_norm": 3.9190163612365723, "learning_rate": 1.993730558596604e-05, "loss": 1.8056, "step": 19909 }, { "epoch": 0.26, "grad_norm": 4.717161655426025, "learning_rate": 1.9937293837373537e-05, "loss": 2.2261, "step": 19910 }, { "epoch": 0.26, "grad_norm": 4.150884628295898, "learning_rate": 1.9937282087683787e-05, "loss": 1.9792, "step": 19911 }, { "epoch": 0.26, "grad_norm": 4.389639854431152, "learning_rate": 1.9937270336896795e-05, "loss": 2.7246, "step": 19912 }, { "epoch": 0.26, "grad_norm": 3.9098055362701416, "learning_rate": 1.993725858501256e-05, "loss": 2.3447, "step": 19913 }, { "epoch": 0.26, "grad_norm": 4.382754802703857, "learning_rate": 1.9937246832031084e-05, "loss": 2.0673, "step": 19914 }, { "epoch": 0.26, "grad_norm": 4.061171531677246, "learning_rate": 1.9937235077952363e-05, "loss": 2.0506, "step": 19915 }, { "epoch": 0.26, "grad_norm": 3.9906694889068604, "learning_rate": 1.9937223322776408e-05, "loss": 1.691, "step": 19916 }, { "epoch": 0.26, "grad_norm": 4.279658794403076, "learning_rate": 1.993721156650321e-05, "loss": 2.1672, "step": 19917 }, { "epoch": 0.26, "grad_norm": 4.139368057250977, "learning_rate": 1.993719980913278e-05, "loss": 2.6208, "step": 19918 }, { "epoch": 0.26, "grad_norm": 3.7334096431732178, "learning_rate": 1.993718805066511e-05, "loss": 1.9917, "step": 19919 }, { "epoch": 0.26, "grad_norm": 4.319728851318359, "learning_rate": 1.993717629110021e-05, "loss": 2.3174, "step": 19920 }, { "epoch": 0.26, "grad_norm": 4.875711441040039, "learning_rate": 1.993716453043807e-05, "loss": 2.5816, "step": 19921 }, { "epoch": 0.26, "grad_norm": 3.6149473190307617, "learning_rate": 1.9937152768678705e-05, "loss": 1.7594, "step": 19922 }, { "epoch": 0.26, "grad_norm": 4.517821311950684, "learning_rate": 1.993714100582211e-05, "loss": 2.5571, "step": 19923 }, { "epoch": 0.26, "grad_norm": 4.060359001159668, "learning_rate": 1.993712924186828e-05, "loss": 2.2731, "step": 19924 }, { "epoch": 0.26, "grad_norm": 4.22274923324585, "learning_rate": 1.993711747681723e-05, "loss": 1.6064, "step": 19925 }, { "epoch": 0.26, "grad_norm": 4.000070095062256, "learning_rate": 1.993710571066895e-05, "loss": 2.0181, "step": 19926 }, { "epoch": 0.26, "grad_norm": 4.385165214538574, "learning_rate": 1.9937093943423443e-05, "loss": 2.2309, "step": 19927 }, { "epoch": 0.26, "grad_norm": 3.4626197814941406, "learning_rate": 1.9937082175080714e-05, "loss": 1.9709, "step": 19928 }, { "epoch": 0.26, "grad_norm": 4.855739593505859, "learning_rate": 1.9937070405640765e-05, "loss": 2.5717, "step": 19929 }, { "epoch": 0.26, "grad_norm": 4.543110370635986, "learning_rate": 1.993705863510359e-05, "loss": 2.9573, "step": 19930 }, { "epoch": 0.26, "grad_norm": 4.015852928161621, "learning_rate": 1.99370468634692e-05, "loss": 2.3163, "step": 19931 }, { "epoch": 0.26, "grad_norm": 3.866717576980591, "learning_rate": 1.993703509073759e-05, "loss": 1.9538, "step": 19932 }, { "epoch": 0.26, "grad_norm": 4.090182304382324, "learning_rate": 1.993702331690876e-05, "loss": 1.7237, "step": 19933 }, { "epoch": 0.26, "grad_norm": 4.373220443725586, "learning_rate": 1.9937011541982718e-05, "loss": 2.4539, "step": 19934 }, { "epoch": 0.26, "grad_norm": 4.57352352142334, "learning_rate": 1.993699976595946e-05, "loss": 2.1211, "step": 19935 }, { "epoch": 0.26, "grad_norm": 4.4273529052734375, "learning_rate": 1.993698798883899e-05, "loss": 2.2594, "step": 19936 }, { "epoch": 0.26, "grad_norm": 4.1857380867004395, "learning_rate": 1.9936976210621305e-05, "loss": 2.0484, "step": 19937 }, { "epoch": 0.26, "grad_norm": 4.613154411315918, "learning_rate": 1.993696443130641e-05, "loss": 2.4643, "step": 19938 }, { "epoch": 0.26, "grad_norm": 3.7216291427612305, "learning_rate": 1.9936952650894304e-05, "loss": 1.7695, "step": 19939 }, { "epoch": 0.26, "grad_norm": 4.136347770690918, "learning_rate": 1.9936940869384993e-05, "loss": 2.1852, "step": 19940 }, { "epoch": 0.26, "grad_norm": 4.601489067077637, "learning_rate": 1.9936929086778473e-05, "loss": 2.8106, "step": 19941 }, { "epoch": 0.26, "grad_norm": 4.3911051750183105, "learning_rate": 1.9936917303074747e-05, "loss": 1.8142, "step": 19942 }, { "epoch": 0.26, "grad_norm": 3.5781402587890625, "learning_rate": 1.993690551827382e-05, "loss": 1.8997, "step": 19943 }, { "epoch": 0.26, "grad_norm": 3.885141372680664, "learning_rate": 1.9936893732375687e-05, "loss": 2.1529, "step": 19944 }, { "epoch": 0.26, "grad_norm": 3.953052043914795, "learning_rate": 1.9936881945380354e-05, "loss": 1.9148, "step": 19945 }, { "epoch": 0.26, "grad_norm": 4.540635108947754, "learning_rate": 1.993687015728782e-05, "loss": 2.6716, "step": 19946 }, { "epoch": 0.26, "grad_norm": 4.14754056930542, "learning_rate": 1.993685836809809e-05, "loss": 2.3071, "step": 19947 }, { "epoch": 0.26, "grad_norm": 3.783463954925537, "learning_rate": 1.9936846577811156e-05, "loss": 1.8281, "step": 19948 }, { "epoch": 0.26, "grad_norm": 4.553097724914551, "learning_rate": 1.993683478642703e-05, "loss": 2.5334, "step": 19949 }, { "epoch": 0.26, "grad_norm": 4.220886707305908, "learning_rate": 1.9936822993945708e-05, "loss": 2.0245, "step": 19950 }, { "epoch": 0.26, "grad_norm": 4.448840618133545, "learning_rate": 1.993681120036719e-05, "loss": 2.2252, "step": 19951 }, { "epoch": 0.26, "grad_norm": 4.114387035369873, "learning_rate": 1.993679940569148e-05, "loss": 2.0597, "step": 19952 }, { "epoch": 0.26, "grad_norm": 3.6700305938720703, "learning_rate": 1.9936787609918582e-05, "loss": 2.0185, "step": 19953 }, { "epoch": 0.26, "grad_norm": 4.100681304931641, "learning_rate": 1.993677581304849e-05, "loss": 1.8912, "step": 19954 }, { "epoch": 0.26, "grad_norm": 4.481338977813721, "learning_rate": 1.9936764015081215e-05, "loss": 2.483, "step": 19955 }, { "epoch": 0.26, "grad_norm": 3.6968467235565186, "learning_rate": 1.993675221601675e-05, "loss": 2.0197, "step": 19956 }, { "epoch": 0.26, "grad_norm": 4.048829078674316, "learning_rate": 1.99367404158551e-05, "loss": 2.2594, "step": 19957 }, { "epoch": 0.26, "grad_norm": 3.966996192932129, "learning_rate": 1.9936728614596263e-05, "loss": 1.987, "step": 19958 }, { "epoch": 0.26, "grad_norm": 4.052201747894287, "learning_rate": 1.993671681224024e-05, "loss": 1.8915, "step": 19959 }, { "epoch": 0.26, "grad_norm": 4.139946460723877, "learning_rate": 1.9936705008787037e-05, "loss": 2.141, "step": 19960 }, { "epoch": 0.26, "grad_norm": 4.278438568115234, "learning_rate": 1.9936693204236658e-05, "loss": 2.5461, "step": 19961 }, { "epoch": 0.26, "grad_norm": 4.093381404876709, "learning_rate": 1.9936681398589093e-05, "loss": 2.106, "step": 19962 }, { "epoch": 0.26, "grad_norm": 4.315193176269531, "learning_rate": 1.9936669591844353e-05, "loss": 1.8964, "step": 19963 }, { "epoch": 0.26, "grad_norm": 4.661324977874756, "learning_rate": 1.9936657784002435e-05, "loss": 2.8815, "step": 19964 }, { "epoch": 0.26, "grad_norm": 4.584590435028076, "learning_rate": 1.9936645975063344e-05, "loss": 2.2296, "step": 19965 }, { "epoch": 0.26, "grad_norm": 3.766897201538086, "learning_rate": 1.9936634165027075e-05, "loss": 1.9125, "step": 19966 }, { "epoch": 0.26, "grad_norm": 4.227521896362305, "learning_rate": 1.9936622353893633e-05, "loss": 2.0851, "step": 19967 }, { "epoch": 0.26, "grad_norm": 4.221475124359131, "learning_rate": 1.9936610541663023e-05, "loss": 2.3708, "step": 19968 }, { "epoch": 0.26, "grad_norm": 4.283149719238281, "learning_rate": 1.9936598728335238e-05, "loss": 1.8385, "step": 19969 }, { "epoch": 0.26, "grad_norm": 4.25095796585083, "learning_rate": 1.993658691391029e-05, "loss": 2.2151, "step": 19970 }, { "epoch": 0.26, "grad_norm": 3.9174964427948, "learning_rate": 1.993657509838817e-05, "loss": 2.0173, "step": 19971 }, { "epoch": 0.26, "grad_norm": 4.480676174163818, "learning_rate": 1.993656328176888e-05, "loss": 2.3707, "step": 19972 }, { "epoch": 0.26, "grad_norm": 4.149412631988525, "learning_rate": 1.9936551464052432e-05, "loss": 2.0458, "step": 19973 }, { "epoch": 0.26, "grad_norm": 3.746403217315674, "learning_rate": 1.9936539645238818e-05, "loss": 1.8894, "step": 19974 }, { "epoch": 0.26, "grad_norm": 3.8621859550476074, "learning_rate": 1.993652782532804e-05, "loss": 2.0894, "step": 19975 }, { "epoch": 0.26, "grad_norm": 3.8196451663970947, "learning_rate": 1.99365160043201e-05, "loss": 1.8198, "step": 19976 }, { "epoch": 0.26, "grad_norm": 3.9387426376342773, "learning_rate": 1.9936504182215004e-05, "loss": 1.9472, "step": 19977 }, { "epoch": 0.26, "grad_norm": 3.9899356365203857, "learning_rate": 1.9936492359012746e-05, "loss": 2.3166, "step": 19978 }, { "epoch": 0.26, "grad_norm": 3.78554368019104, "learning_rate": 1.9936480534713333e-05, "loss": 1.8419, "step": 19979 }, { "epoch": 0.26, "grad_norm": 3.6479146480560303, "learning_rate": 1.993646870931676e-05, "loss": 1.6628, "step": 19980 }, { "epoch": 0.26, "grad_norm": 5.127398490905762, "learning_rate": 1.9936456882823038e-05, "loss": 2.5774, "step": 19981 }, { "epoch": 0.26, "grad_norm": 4.049070835113525, "learning_rate": 1.9936445055232156e-05, "loss": 2.1421, "step": 19982 }, { "epoch": 0.26, "grad_norm": 3.9022786617279053, "learning_rate": 1.9936433226544127e-05, "loss": 1.7229, "step": 19983 }, { "epoch": 0.26, "grad_norm": 4.139493465423584, "learning_rate": 1.9936421396758945e-05, "loss": 2.9236, "step": 19984 }, { "epoch": 0.26, "grad_norm": 3.657266139984131, "learning_rate": 1.9936409565876612e-05, "loss": 1.7809, "step": 19985 }, { "epoch": 0.26, "grad_norm": 4.2991485595703125, "learning_rate": 1.993639773389713e-05, "loss": 2.2807, "step": 19986 }, { "epoch": 0.26, "grad_norm": 3.8764688968658447, "learning_rate": 1.993638590082051e-05, "loss": 1.7581, "step": 19987 }, { "epoch": 0.26, "grad_norm": 3.7450575828552246, "learning_rate": 1.9936374066646738e-05, "loss": 1.8595, "step": 19988 }, { "epoch": 0.26, "grad_norm": 4.011174201965332, "learning_rate": 1.993636223137582e-05, "loss": 1.8688, "step": 19989 }, { "epoch": 0.26, "grad_norm": 4.279287815093994, "learning_rate": 1.993635039500776e-05, "loss": 2.0265, "step": 19990 }, { "epoch": 0.26, "grad_norm": 3.9868786334991455, "learning_rate": 1.993633855754256e-05, "loss": 2.4395, "step": 19991 }, { "epoch": 0.26, "grad_norm": 3.882969617843628, "learning_rate": 1.9936326718980223e-05, "loss": 1.6921, "step": 19992 }, { "epoch": 0.26, "grad_norm": 4.326711177825928, "learning_rate": 1.9936314879320738e-05, "loss": 1.9613, "step": 19993 }, { "epoch": 0.26, "grad_norm": 4.811336994171143, "learning_rate": 1.993630303856412e-05, "loss": 2.7044, "step": 19994 }, { "epoch": 0.26, "grad_norm": 4.27886962890625, "learning_rate": 1.9936291196710367e-05, "loss": 2.2451, "step": 19995 }, { "epoch": 0.26, "grad_norm": 4.427332878112793, "learning_rate": 1.993627935375948e-05, "loss": 2.3296, "step": 19996 }, { "epoch": 0.26, "grad_norm": 4.331064224243164, "learning_rate": 1.9936267509711455e-05, "loss": 1.9216, "step": 19997 }, { "epoch": 0.26, "grad_norm": 4.428499698638916, "learning_rate": 1.99362556645663e-05, "loss": 2.3112, "step": 19998 }, { "epoch": 0.26, "grad_norm": 3.5492866039276123, "learning_rate": 1.9936243818324014e-05, "loss": 1.8799, "step": 19999 }, { "epoch": 0.26, "grad_norm": 3.995727062225342, "learning_rate": 1.9936231970984597e-05, "loss": 1.65, "step": 20000 }, { "epoch": 0.26, "grad_norm": 4.253725528717041, "learning_rate": 1.993622012254805e-05, "loss": 1.9262, "step": 20001 }, { "epoch": 0.26, "grad_norm": 3.2026913166046143, "learning_rate": 1.9936208273014378e-05, "loss": 1.5757, "step": 20002 }, { "epoch": 0.26, "grad_norm": 3.959512948989868, "learning_rate": 1.993619642238358e-05, "loss": 1.9181, "step": 20003 }, { "epoch": 0.26, "grad_norm": 4.153846740722656, "learning_rate": 1.9936184570655658e-05, "loss": 2.0493, "step": 20004 }, { "epoch": 0.26, "grad_norm": 4.104275703430176, "learning_rate": 1.993617271783061e-05, "loss": 2.2299, "step": 20005 }, { "epoch": 0.26, "grad_norm": 4.944378852844238, "learning_rate": 1.9936160863908444e-05, "loss": 2.4691, "step": 20006 }, { "epoch": 0.26, "grad_norm": 4.242530345916748, "learning_rate": 1.993614900888915e-05, "loss": 2.1579, "step": 20007 }, { "epoch": 0.26, "grad_norm": 4.812862396240234, "learning_rate": 1.9936137152772744e-05, "loss": 2.2571, "step": 20008 }, { "epoch": 0.26, "grad_norm": 9.27908992767334, "learning_rate": 1.9936125295559217e-05, "loss": 2.8857, "step": 20009 }, { "epoch": 0.26, "grad_norm": 3.5815773010253906, "learning_rate": 1.993611343724857e-05, "loss": 1.8598, "step": 20010 }, { "epoch": 0.26, "grad_norm": 4.018231391906738, "learning_rate": 1.9936101577840815e-05, "loss": 1.7181, "step": 20011 }, { "epoch": 0.26, "grad_norm": 4.129042148590088, "learning_rate": 1.9936089717335944e-05, "loss": 2.0131, "step": 20012 }, { "epoch": 0.26, "grad_norm": 3.8953874111175537, "learning_rate": 1.9936077855733955e-05, "loss": 2.2177, "step": 20013 }, { "epoch": 0.26, "grad_norm": 4.078856468200684, "learning_rate": 1.993606599303486e-05, "loss": 1.9381, "step": 20014 }, { "epoch": 0.26, "grad_norm": 4.28132438659668, "learning_rate": 1.993605412923865e-05, "loss": 2.1251, "step": 20015 }, { "epoch": 0.26, "grad_norm": 3.518378973007202, "learning_rate": 1.9936042264345334e-05, "loss": 1.9299, "step": 20016 }, { "epoch": 0.26, "grad_norm": 4.155153274536133, "learning_rate": 1.9936030398354908e-05, "loss": 2.0001, "step": 20017 }, { "epoch": 0.26, "grad_norm": 4.65361213684082, "learning_rate": 1.993601853126738e-05, "loss": 2.267, "step": 20018 }, { "epoch": 0.26, "grad_norm": 3.7116804122924805, "learning_rate": 1.993600666308274e-05, "loss": 1.8763, "step": 20019 }, { "epoch": 0.26, "grad_norm": 4.3417768478393555, "learning_rate": 1.9935994793801003e-05, "loss": 2.4835, "step": 20020 }, { "epoch": 0.26, "grad_norm": 4.018265247344971, "learning_rate": 1.993598292342216e-05, "loss": 2.189, "step": 20021 }, { "epoch": 0.26, "grad_norm": 4.379052639007568, "learning_rate": 1.9935971051946217e-05, "loss": 2.4581, "step": 20022 }, { "epoch": 0.26, "grad_norm": 4.50749397277832, "learning_rate": 1.9935959179373174e-05, "loss": 2.1661, "step": 20023 }, { "epoch": 0.26, "grad_norm": 3.811507225036621, "learning_rate": 1.9935947305703032e-05, "loss": 2.1606, "step": 20024 }, { "epoch": 0.26, "grad_norm": 3.850375175476074, "learning_rate": 1.9935935430935794e-05, "loss": 2.0111, "step": 20025 }, { "epoch": 0.26, "grad_norm": 3.9616196155548096, "learning_rate": 1.993592355507146e-05, "loss": 1.8195, "step": 20026 }, { "epoch": 0.26, "grad_norm": 4.340387344360352, "learning_rate": 1.993591167811003e-05, "loss": 2.3468, "step": 20027 }, { "epoch": 0.26, "grad_norm": 4.293125152587891, "learning_rate": 1.993589980005151e-05, "loss": 2.0849, "step": 20028 }, { "epoch": 0.26, "grad_norm": 4.833113193511963, "learning_rate": 1.9935887920895895e-05, "loss": 2.4717, "step": 20029 }, { "epoch": 0.26, "grad_norm": 4.422967433929443, "learning_rate": 1.993587604064319e-05, "loss": 2.2366, "step": 20030 }, { "epoch": 0.26, "grad_norm": 4.411367893218994, "learning_rate": 1.9935864159293397e-05, "loss": 1.905, "step": 20031 }, { "epoch": 0.26, "grad_norm": 3.8906924724578857, "learning_rate": 1.9935852276846514e-05, "loss": 1.9232, "step": 20032 }, { "epoch": 0.26, "grad_norm": 4.034314155578613, "learning_rate": 1.9935840393302547e-05, "loss": 2.1511, "step": 20033 }, { "epoch": 0.26, "grad_norm": 4.182768821716309, "learning_rate": 1.9935828508661492e-05, "loss": 2.0246, "step": 20034 }, { "epoch": 0.26, "grad_norm": 3.709578514099121, "learning_rate": 1.9935816622923358e-05, "loss": 1.7284, "step": 20035 }, { "epoch": 0.26, "grad_norm": 4.181471824645996, "learning_rate": 1.9935804736088132e-05, "loss": 2.0601, "step": 20036 }, { "epoch": 0.26, "grad_norm": 3.5183985233306885, "learning_rate": 1.9935792848155833e-05, "loss": 1.58, "step": 20037 }, { "epoch": 0.26, "grad_norm": 4.165890216827393, "learning_rate": 1.993578095912645e-05, "loss": 2.3375, "step": 20038 }, { "epoch": 0.26, "grad_norm": 4.292595386505127, "learning_rate": 1.993576906899999e-05, "loss": 2.4041, "step": 20039 }, { "epoch": 0.26, "grad_norm": 4.017538070678711, "learning_rate": 1.9935757177776453e-05, "loss": 1.8134, "step": 20040 }, { "epoch": 0.26, "grad_norm": 4.3949432373046875, "learning_rate": 1.9935745285455835e-05, "loss": 2.1868, "step": 20041 }, { "epoch": 0.26, "grad_norm": 3.7609076499938965, "learning_rate": 1.9935733392038147e-05, "loss": 1.9028, "step": 20042 }, { "epoch": 0.26, "grad_norm": 3.8822426795959473, "learning_rate": 1.9935721497523386e-05, "loss": 1.8853, "step": 20043 }, { "epoch": 0.26, "grad_norm": 3.9649181365966797, "learning_rate": 1.9935709601911547e-05, "loss": 1.9841, "step": 20044 }, { "epoch": 0.26, "grad_norm": 4.823812484741211, "learning_rate": 1.9935697705202645e-05, "loss": 2.1787, "step": 20045 }, { "epoch": 0.26, "grad_norm": 4.062495231628418, "learning_rate": 1.993568580739667e-05, "loss": 1.9441, "step": 20046 }, { "epoch": 0.26, "grad_norm": 4.328280448913574, "learning_rate": 1.9935673908493623e-05, "loss": 2.0754, "step": 20047 }, { "epoch": 0.26, "grad_norm": 4.108399868011475, "learning_rate": 1.9935662008493513e-05, "loss": 2.2808, "step": 20048 }, { "epoch": 0.26, "grad_norm": 4.9107794761657715, "learning_rate": 1.9935650107396337e-05, "loss": 1.8593, "step": 20049 }, { "epoch": 0.26, "grad_norm": 4.021245956420898, "learning_rate": 1.9935638205202097e-05, "loss": 2.2766, "step": 20050 }, { "epoch": 0.26, "grad_norm": 3.917186737060547, "learning_rate": 1.9935626301910793e-05, "loss": 2.0377, "step": 20051 }, { "epoch": 0.26, "grad_norm": 3.829163074493408, "learning_rate": 1.993561439752243e-05, "loss": 1.685, "step": 20052 }, { "epoch": 0.26, "grad_norm": 4.305392265319824, "learning_rate": 1.9935602492037002e-05, "loss": 2.2663, "step": 20053 }, { "epoch": 0.26, "grad_norm": 4.080236911773682, "learning_rate": 1.9935590585454515e-05, "loss": 2.1362, "step": 20054 }, { "epoch": 0.26, "grad_norm": 4.574577808380127, "learning_rate": 1.9935578677774974e-05, "loss": 2.3585, "step": 20055 }, { "epoch": 0.26, "grad_norm": 4.75154447555542, "learning_rate": 1.9935566768998376e-05, "loss": 2.2394, "step": 20056 }, { "epoch": 0.26, "grad_norm": 4.087058067321777, "learning_rate": 1.993555485912472e-05, "loss": 2.5708, "step": 20057 }, { "epoch": 0.26, "grad_norm": 3.6812009811401367, "learning_rate": 1.993554294815401e-05, "loss": 1.6852, "step": 20058 }, { "epoch": 0.26, "grad_norm": 4.620413303375244, "learning_rate": 1.9935531036086254e-05, "loss": 2.596, "step": 20059 }, { "epoch": 0.26, "grad_norm": 4.617361545562744, "learning_rate": 1.993551912292144e-05, "loss": 2.8157, "step": 20060 }, { "epoch": 0.26, "grad_norm": 4.301004886627197, "learning_rate": 1.993550720865958e-05, "loss": 1.4599, "step": 20061 }, { "epoch": 0.26, "grad_norm": 3.930168628692627, "learning_rate": 1.9935495293300668e-05, "loss": 2.5355, "step": 20062 }, { "epoch": 0.26, "grad_norm": 3.6462488174438477, "learning_rate": 1.993548337684471e-05, "loss": 1.7131, "step": 20063 }, { "epoch": 0.26, "grad_norm": 4.54675817489624, "learning_rate": 1.9935471459291706e-05, "loss": 2.692, "step": 20064 }, { "epoch": 0.26, "grad_norm": 4.685424327850342, "learning_rate": 1.9935459540641662e-05, "loss": 2.098, "step": 20065 }, { "epoch": 0.26, "grad_norm": 4.177743434906006, "learning_rate": 1.993544762089457e-05, "loss": 2.372, "step": 20066 }, { "epoch": 0.26, "grad_norm": 4.308095932006836, "learning_rate": 1.9935435700050435e-05, "loss": 2.0725, "step": 20067 }, { "epoch": 0.26, "grad_norm": 4.136600017547607, "learning_rate": 1.993542377810926e-05, "loss": 1.8405, "step": 20068 }, { "epoch": 0.26, "grad_norm": 4.201308250427246, "learning_rate": 1.993541185507105e-05, "loss": 1.9855, "step": 20069 }, { "epoch": 0.26, "grad_norm": 4.252732276916504, "learning_rate": 1.99353999309358e-05, "loss": 2.2169, "step": 20070 }, { "epoch": 0.26, "grad_norm": 3.726390838623047, "learning_rate": 1.993538800570351e-05, "loss": 1.8543, "step": 20071 }, { "epoch": 0.26, "grad_norm": 4.105838298797607, "learning_rate": 1.9935376079374187e-05, "loss": 2.1629, "step": 20072 }, { "epoch": 0.26, "grad_norm": 4.366840362548828, "learning_rate": 1.9935364151947828e-05, "loss": 1.9514, "step": 20073 }, { "epoch": 0.26, "grad_norm": 4.088689804077148, "learning_rate": 1.993535222342444e-05, "loss": 2.2067, "step": 20074 }, { "epoch": 0.26, "grad_norm": 4.214381694793701, "learning_rate": 1.9935340293804018e-05, "loss": 2.5223, "step": 20075 }, { "epoch": 0.26, "grad_norm": 4.284530162811279, "learning_rate": 1.9935328363086567e-05, "loss": 2.3852, "step": 20076 }, { "epoch": 0.26, "grad_norm": 4.281277656555176, "learning_rate": 1.9935316431272086e-05, "loss": 2.2142, "step": 20077 }, { "epoch": 0.26, "grad_norm": 3.92854380607605, "learning_rate": 1.9935304498360576e-05, "loss": 2.1298, "step": 20078 }, { "epoch": 0.26, "grad_norm": 3.7482826709747314, "learning_rate": 1.9935292564352042e-05, "loss": 2.376, "step": 20079 }, { "epoch": 0.26, "grad_norm": 4.320026397705078, "learning_rate": 1.9935280629246483e-05, "loss": 2.4364, "step": 20080 }, { "epoch": 0.26, "grad_norm": 4.619804382324219, "learning_rate": 1.99352686930439e-05, "loss": 2.1327, "step": 20081 }, { "epoch": 0.26, "grad_norm": 3.939081907272339, "learning_rate": 1.9935256755744295e-05, "loss": 2.0776, "step": 20082 }, { "epoch": 0.26, "grad_norm": 3.8113019466400146, "learning_rate": 1.9935244817347674e-05, "loss": 1.9622, "step": 20083 }, { "epoch": 0.26, "grad_norm": 4.549561023712158, "learning_rate": 1.9935232877854026e-05, "loss": 2.2529, "step": 20084 }, { "epoch": 0.26, "grad_norm": 3.4458279609680176, "learning_rate": 1.9935220937263362e-05, "loss": 1.6556, "step": 20085 }, { "epoch": 0.26, "grad_norm": 4.311899662017822, "learning_rate": 1.9935208995575682e-05, "loss": 2.1188, "step": 20086 }, { "epoch": 0.26, "grad_norm": 3.908724308013916, "learning_rate": 1.9935197052790983e-05, "loss": 2.0002, "step": 20087 }, { "epoch": 0.26, "grad_norm": 4.384981632232666, "learning_rate": 1.9935185108909275e-05, "loss": 2.1666, "step": 20088 }, { "epoch": 0.26, "grad_norm": 4.554654121398926, "learning_rate": 1.9935173163930553e-05, "loss": 2.3426, "step": 20089 }, { "epoch": 0.26, "grad_norm": 5.283195972442627, "learning_rate": 1.9935161217854816e-05, "loss": 2.2969, "step": 20090 }, { "epoch": 0.26, "grad_norm": 4.220272064208984, "learning_rate": 1.993514927068207e-05, "loss": 2.2145, "step": 20091 }, { "epoch": 0.26, "grad_norm": 4.547178745269775, "learning_rate": 1.9935137322412313e-05, "loss": 2.2402, "step": 20092 }, { "epoch": 0.26, "grad_norm": 4.1229071617126465, "learning_rate": 1.9935125373045555e-05, "loss": 1.8047, "step": 20093 }, { "epoch": 0.26, "grad_norm": 4.181777000427246, "learning_rate": 1.9935113422581787e-05, "loss": 2.0928, "step": 20094 }, { "epoch": 0.26, "grad_norm": 4.090522766113281, "learning_rate": 1.993510147102101e-05, "loss": 1.7764, "step": 20095 }, { "epoch": 0.26, "grad_norm": 3.5409233570098877, "learning_rate": 1.9935089518363237e-05, "loss": 1.9763, "step": 20096 }, { "epoch": 0.26, "grad_norm": 3.922353506088257, "learning_rate": 1.9935077564608455e-05, "loss": 1.9143, "step": 20097 }, { "epoch": 0.26, "grad_norm": 3.9074881076812744, "learning_rate": 1.9935065609756673e-05, "loss": 2.0302, "step": 20098 }, { "epoch": 0.26, "grad_norm": 3.6922624111175537, "learning_rate": 1.9935053653807893e-05, "loss": 1.9393, "step": 20099 }, { "epoch": 0.26, "grad_norm": 3.963557481765747, "learning_rate": 1.9935041696762113e-05, "loss": 1.7574, "step": 20100 }, { "epoch": 0.26, "grad_norm": 4.159547805786133, "learning_rate": 1.9935029738619338e-05, "loss": 2.1366, "step": 20101 }, { "epoch": 0.26, "grad_norm": 4.152477741241455, "learning_rate": 1.9935017779379566e-05, "loss": 2.2726, "step": 20102 }, { "epoch": 0.26, "grad_norm": 4.628138065338135, "learning_rate": 1.99350058190428e-05, "loss": 2.8668, "step": 20103 }, { "epoch": 0.26, "grad_norm": 4.378931522369385, "learning_rate": 1.9934993857609037e-05, "loss": 2.2417, "step": 20104 }, { "epoch": 0.26, "grad_norm": 4.331455707550049, "learning_rate": 1.993498189507829e-05, "loss": 2.5948, "step": 20105 }, { "epoch": 0.26, "grad_norm": 4.491311550140381, "learning_rate": 1.9934969931450544e-05, "loss": 2.1487, "step": 20106 }, { "epoch": 0.26, "grad_norm": 3.347571611404419, "learning_rate": 1.993495796672581e-05, "loss": 1.4499, "step": 20107 }, { "epoch": 0.26, "grad_norm": 4.196234703063965, "learning_rate": 1.9934946000904093e-05, "loss": 2.5044, "step": 20108 }, { "epoch": 0.26, "grad_norm": 4.382129192352295, "learning_rate": 1.993493403398539e-05, "loss": 2.4546, "step": 20109 }, { "epoch": 0.26, "grad_norm": 3.8114895820617676, "learning_rate": 1.9934922065969695e-05, "loss": 2.0839, "step": 20110 }, { "epoch": 0.26, "grad_norm": 4.508089065551758, "learning_rate": 1.993491009685702e-05, "loss": 2.1834, "step": 20111 }, { "epoch": 0.26, "grad_norm": 3.7696003913879395, "learning_rate": 1.9934898126647362e-05, "loss": 2.0803, "step": 20112 }, { "epoch": 0.26, "grad_norm": 3.805765151977539, "learning_rate": 1.9934886155340726e-05, "loss": 2.2013, "step": 20113 }, { "epoch": 0.26, "grad_norm": 4.789685249328613, "learning_rate": 1.9934874182937104e-05, "loss": 2.8342, "step": 20114 }, { "epoch": 0.26, "grad_norm": 3.8109145164489746, "learning_rate": 1.9934862209436507e-05, "loss": 1.8347, "step": 20115 }, { "epoch": 0.26, "grad_norm": 3.7531349658966064, "learning_rate": 1.993485023483893e-05, "loss": 1.8522, "step": 20116 }, { "epoch": 0.26, "grad_norm": 3.986276149749756, "learning_rate": 1.993483825914438e-05, "loss": 1.9464, "step": 20117 }, { "epoch": 0.26, "grad_norm": 4.014795780181885, "learning_rate": 1.9934826282352855e-05, "loss": 1.9012, "step": 20118 }, { "epoch": 0.26, "grad_norm": 3.6605308055877686, "learning_rate": 1.9934814304464356e-05, "loss": 2.2344, "step": 20119 }, { "epoch": 0.26, "grad_norm": 4.184957027435303, "learning_rate": 1.9934802325478884e-05, "loss": 2.2921, "step": 20120 }, { "epoch": 0.26, "grad_norm": 4.174584865570068, "learning_rate": 1.993479034539644e-05, "loss": 2.2017, "step": 20121 }, { "epoch": 0.26, "grad_norm": 3.861020088195801, "learning_rate": 1.9934778364217033e-05, "loss": 1.8976, "step": 20122 }, { "epoch": 0.26, "grad_norm": 4.089574813842773, "learning_rate": 1.9934766381940652e-05, "loss": 2.224, "step": 20123 }, { "epoch": 0.26, "grad_norm": 3.9883832931518555, "learning_rate": 1.9934754398567303e-05, "loss": 2.042, "step": 20124 }, { "epoch": 0.26, "grad_norm": 4.723259925842285, "learning_rate": 1.9934742414096992e-05, "loss": 2.5989, "step": 20125 }, { "epoch": 0.26, "grad_norm": 4.467871189117432, "learning_rate": 1.993473042852972e-05, "loss": 2.6919, "step": 20126 }, { "epoch": 0.26, "grad_norm": 3.638951539993286, "learning_rate": 1.993471844186548e-05, "loss": 2.0501, "step": 20127 }, { "epoch": 0.26, "grad_norm": 4.036396503448486, "learning_rate": 1.9934706454104277e-05, "loss": 1.8377, "step": 20128 }, { "epoch": 0.26, "grad_norm": 4.114701747894287, "learning_rate": 1.993469446524612e-05, "loss": 1.7256, "step": 20129 }, { "epoch": 0.26, "grad_norm": 4.438697338104248, "learning_rate": 1.9934682475291e-05, "loss": 2.2428, "step": 20130 }, { "epoch": 0.26, "grad_norm": 3.662654161453247, "learning_rate": 1.9934670484238923e-05, "loss": 1.7729, "step": 20131 }, { "epoch": 0.26, "grad_norm": 4.198250770568848, "learning_rate": 1.9934658492089892e-05, "loss": 2.5355, "step": 20132 }, { "epoch": 0.26, "grad_norm": 4.346597194671631, "learning_rate": 1.9934646498843903e-05, "loss": 2.4071, "step": 20133 }, { "epoch": 0.26, "grad_norm": 4.48044490814209, "learning_rate": 1.9934634504500962e-05, "loss": 1.9635, "step": 20134 }, { "epoch": 0.26, "grad_norm": 3.5978379249572754, "learning_rate": 1.993462250906107e-05, "loss": 1.6102, "step": 20135 }, { "epoch": 0.26, "grad_norm": 3.9104630947113037, "learning_rate": 1.9934610512524225e-05, "loss": 1.9809, "step": 20136 }, { "epoch": 0.26, "grad_norm": 3.6162917613983154, "learning_rate": 1.9934598514890432e-05, "loss": 1.8272, "step": 20137 }, { "epoch": 0.26, "grad_norm": 3.368802785873413, "learning_rate": 1.9934586516159688e-05, "loss": 1.6898, "step": 20138 }, { "epoch": 0.26, "grad_norm": 4.029378890991211, "learning_rate": 1.9934574516332002e-05, "loss": 2.2917, "step": 20139 }, { "epoch": 0.26, "grad_norm": 4.670548915863037, "learning_rate": 1.9934562515407365e-05, "loss": 2.3676, "step": 20140 }, { "epoch": 0.26, "grad_norm": 4.495560169219971, "learning_rate": 1.993455051338579e-05, "loss": 2.4334, "step": 20141 }, { "epoch": 0.26, "grad_norm": 3.927718162536621, "learning_rate": 1.9934538510267268e-05, "loss": 1.9801, "step": 20142 }, { "epoch": 0.26, "grad_norm": 3.8528969287872314, "learning_rate": 1.9934526506051803e-05, "loss": 2.2549, "step": 20143 }, { "epoch": 0.26, "grad_norm": 3.731326103210449, "learning_rate": 1.99345145007394e-05, "loss": 1.9981, "step": 20144 }, { "epoch": 0.26, "grad_norm": 4.299250602722168, "learning_rate": 1.9934502494330057e-05, "loss": 2.3292, "step": 20145 }, { "epoch": 0.26, "grad_norm": 3.483771800994873, "learning_rate": 1.993449048682378e-05, "loss": 1.5169, "step": 20146 }, { "epoch": 0.26, "grad_norm": 4.2108941078186035, "learning_rate": 1.9934478478220562e-05, "loss": 2.052, "step": 20147 }, { "epoch": 0.26, "grad_norm": 4.128800392150879, "learning_rate": 1.993446646852041e-05, "loss": 2.0239, "step": 20148 }, { "epoch": 0.26, "grad_norm": 4.494658946990967, "learning_rate": 1.9934454457723324e-05, "loss": 2.2119, "step": 20149 }, { "epoch": 0.26, "grad_norm": 4.3124566078186035, "learning_rate": 1.993444244582931e-05, "loss": 2.2445, "step": 20150 }, { "epoch": 0.26, "grad_norm": 3.98872709274292, "learning_rate": 1.9934430432838358e-05, "loss": 2.5307, "step": 20151 }, { "epoch": 0.26, "grad_norm": 3.746767997741699, "learning_rate": 1.9934418418750483e-05, "loss": 2.1675, "step": 20152 }, { "epoch": 0.26, "grad_norm": 4.711238384246826, "learning_rate": 1.9934406403565676e-05, "loss": 2.2962, "step": 20153 }, { "epoch": 0.26, "grad_norm": 4.008655071258545, "learning_rate": 1.9934394387283942e-05, "loss": 2.2427, "step": 20154 }, { "epoch": 0.26, "grad_norm": 4.552107334136963, "learning_rate": 1.9934382369905286e-05, "loss": 2.3847, "step": 20155 }, { "epoch": 0.26, "grad_norm": 4.657373905181885, "learning_rate": 1.9934370351429702e-05, "loss": 1.9152, "step": 20156 }, { "epoch": 0.26, "grad_norm": 4.233240127563477, "learning_rate": 1.9934358331857194e-05, "loss": 2.1193, "step": 20157 }, { "epoch": 0.26, "grad_norm": 4.095533847808838, "learning_rate": 1.9934346311187768e-05, "loss": 2.6955, "step": 20158 }, { "epoch": 0.26, "grad_norm": 4.6495490074157715, "learning_rate": 1.993433428942142e-05, "loss": 1.9065, "step": 20159 }, { "epoch": 0.26, "grad_norm": 4.238644123077393, "learning_rate": 1.993432226655815e-05, "loss": 2.3201, "step": 20160 }, { "epoch": 0.26, "grad_norm": 4.046550273895264, "learning_rate": 1.993431024259797e-05, "loss": 2.2822, "step": 20161 }, { "epoch": 0.26, "grad_norm": 3.986895799636841, "learning_rate": 1.9934298217540867e-05, "loss": 2.0973, "step": 20162 }, { "epoch": 0.26, "grad_norm": 3.4453189373016357, "learning_rate": 1.993428619138685e-05, "loss": 1.7965, "step": 20163 }, { "epoch": 0.26, "grad_norm": 4.692540168762207, "learning_rate": 1.993427416413592e-05, "loss": 2.4657, "step": 20164 }, { "epoch": 0.26, "grad_norm": 4.02698278427124, "learning_rate": 1.9934262135788075e-05, "loss": 2.0405, "step": 20165 }, { "epoch": 0.26, "grad_norm": 4.1140289306640625, "learning_rate": 1.9934250106343323e-05, "loss": 2.3076, "step": 20166 }, { "epoch": 0.26, "grad_norm": 4.393296241760254, "learning_rate": 1.993423807580166e-05, "loss": 2.3771, "step": 20167 }, { "epoch": 0.26, "grad_norm": 3.6659748554229736, "learning_rate": 1.993422604416309e-05, "loss": 1.9552, "step": 20168 }, { "epoch": 0.26, "grad_norm": 3.860633134841919, "learning_rate": 1.9934214011427607e-05, "loss": 2.1886, "step": 20169 }, { "epoch": 0.26, "grad_norm": 3.9461939334869385, "learning_rate": 1.9934201977595225e-05, "loss": 1.8664, "step": 20170 }, { "epoch": 0.26, "grad_norm": 3.9499382972717285, "learning_rate": 1.9934189942665935e-05, "loss": 2.1272, "step": 20171 }, { "epoch": 0.26, "grad_norm": 4.071500778198242, "learning_rate": 1.9934177906639743e-05, "loss": 1.7767, "step": 20172 }, { "epoch": 0.26, "grad_norm": 4.149800777435303, "learning_rate": 1.9934165869516648e-05, "loss": 2.7048, "step": 20173 }, { "epoch": 0.26, "grad_norm": 4.236618518829346, "learning_rate": 1.9934153831296655e-05, "loss": 2.2655, "step": 20174 }, { "epoch": 0.26, "grad_norm": 3.969792127609253, "learning_rate": 1.993414179197976e-05, "loss": 2.135, "step": 20175 }, { "epoch": 0.26, "grad_norm": 3.6845407485961914, "learning_rate": 1.993412975156597e-05, "loss": 1.8005, "step": 20176 }, { "epoch": 0.26, "grad_norm": 3.516230344772339, "learning_rate": 1.993411771005528e-05, "loss": 1.7211, "step": 20177 }, { "epoch": 0.26, "grad_norm": 4.330451488494873, "learning_rate": 1.9934105667447698e-05, "loss": 2.2917, "step": 20178 }, { "epoch": 0.26, "grad_norm": 4.914722919464111, "learning_rate": 1.993409362374322e-05, "loss": 2.6703, "step": 20179 }, { "epoch": 0.26, "grad_norm": 3.9166901111602783, "learning_rate": 1.9934081578941853e-05, "loss": 1.6929, "step": 20180 }, { "epoch": 0.26, "grad_norm": 3.760906219482422, "learning_rate": 1.9934069533043595e-05, "loss": 2.172, "step": 20181 }, { "epoch": 0.26, "grad_norm": 3.8619189262390137, "learning_rate": 1.9934057486048442e-05, "loss": 2.2293, "step": 20182 }, { "epoch": 0.26, "grad_norm": 3.693270683288574, "learning_rate": 1.9934045437956406e-05, "loss": 1.7003, "step": 20183 }, { "epoch": 0.26, "grad_norm": 4.006861209869385, "learning_rate": 1.993403338876748e-05, "loss": 2.3816, "step": 20184 }, { "epoch": 0.26, "grad_norm": 3.9347035884857178, "learning_rate": 1.9934021338481667e-05, "loss": 1.7784, "step": 20185 }, { "epoch": 0.26, "grad_norm": 4.695899963378906, "learning_rate": 1.993400928709897e-05, "loss": 2.1517, "step": 20186 }, { "epoch": 0.26, "grad_norm": 3.9608936309814453, "learning_rate": 1.993399723461939e-05, "loss": 1.7473, "step": 20187 }, { "epoch": 0.26, "grad_norm": 4.342446327209473, "learning_rate": 1.993398518104293e-05, "loss": 2.2003, "step": 20188 }, { "epoch": 0.26, "grad_norm": 4.546868324279785, "learning_rate": 1.993397312636959e-05, "loss": 2.3075, "step": 20189 }, { "epoch": 0.26, "grad_norm": 4.161525726318359, "learning_rate": 1.993396107059937e-05, "loss": 1.9299, "step": 20190 }, { "epoch": 0.26, "grad_norm": 4.303929805755615, "learning_rate": 1.9933949013732267e-05, "loss": 2.3159, "step": 20191 }, { "epoch": 0.26, "grad_norm": 3.3321828842163086, "learning_rate": 1.9933936955768295e-05, "loss": 1.7669, "step": 20192 }, { "epoch": 0.26, "grad_norm": 4.200976371765137, "learning_rate": 1.9933924896707443e-05, "loss": 2.4748, "step": 20193 }, { "epoch": 0.26, "grad_norm": 3.95194411277771, "learning_rate": 1.993391283654972e-05, "loss": 2.0497, "step": 20194 }, { "epoch": 0.26, "grad_norm": 4.585890769958496, "learning_rate": 1.9933900775295124e-05, "loss": 2.7119, "step": 20195 }, { "epoch": 0.26, "grad_norm": 3.6910107135772705, "learning_rate": 1.9933888712943657e-05, "loss": 1.9028, "step": 20196 }, { "epoch": 0.26, "grad_norm": 4.766327857971191, "learning_rate": 1.993387664949532e-05, "loss": 3.0882, "step": 20197 }, { "epoch": 0.26, "grad_norm": 4.655524730682373, "learning_rate": 1.9933864584950112e-05, "loss": 2.2365, "step": 20198 }, { "epoch": 0.26, "grad_norm": 3.8866965770721436, "learning_rate": 1.993385251930804e-05, "loss": 2.1726, "step": 20199 }, { "epoch": 0.26, "grad_norm": 4.18402624130249, "learning_rate": 1.99338404525691e-05, "loss": 2.2319, "step": 20200 }, { "epoch": 0.26, "grad_norm": 3.8999764919281006, "learning_rate": 1.9933828384733297e-05, "loss": 1.8838, "step": 20201 }, { "epoch": 0.26, "grad_norm": 4.264732837677002, "learning_rate": 1.9933816315800628e-05, "loss": 1.8848, "step": 20202 }, { "epoch": 0.26, "grad_norm": 4.009380340576172, "learning_rate": 1.99338042457711e-05, "loss": 2.4374, "step": 20203 }, { "epoch": 0.26, "grad_norm": 3.9172186851501465, "learning_rate": 1.993379217464471e-05, "loss": 2.0866, "step": 20204 }, { "epoch": 0.26, "grad_norm": 4.01906156539917, "learning_rate": 1.9933780102421463e-05, "loss": 2.0329, "step": 20205 }, { "epoch": 0.26, "grad_norm": 4.558675765991211, "learning_rate": 1.9933768029101353e-05, "loss": 2.4191, "step": 20206 }, { "epoch": 0.26, "grad_norm": 4.180459022521973, "learning_rate": 1.9933755954684393e-05, "loss": 2.4397, "step": 20207 }, { "epoch": 0.26, "grad_norm": 4.19126558303833, "learning_rate": 1.9933743879170576e-05, "loss": 2.3024, "step": 20208 }, { "epoch": 0.26, "grad_norm": 4.196140289306641, "learning_rate": 1.9933731802559903e-05, "loss": 2.0875, "step": 20209 }, { "epoch": 0.26, "grad_norm": 3.596362829208374, "learning_rate": 1.9933719724852382e-05, "loss": 1.8617, "step": 20210 }, { "epoch": 0.26, "grad_norm": 4.566625595092773, "learning_rate": 1.9933707646048005e-05, "loss": 2.6962, "step": 20211 }, { "epoch": 0.26, "grad_norm": 4.484437465667725, "learning_rate": 1.9933695566146778e-05, "loss": 1.6705, "step": 20212 }, { "epoch": 0.26, "grad_norm": 5.37533712387085, "learning_rate": 1.9933683485148704e-05, "loss": 2.9597, "step": 20213 }, { "epoch": 0.26, "grad_norm": 4.491657733917236, "learning_rate": 1.9933671403053783e-05, "loss": 2.3972, "step": 20214 }, { "epoch": 0.26, "grad_norm": 4.435246467590332, "learning_rate": 1.9933659319862016e-05, "loss": 1.8719, "step": 20215 }, { "epoch": 0.26, "grad_norm": 4.146576404571533, "learning_rate": 1.9933647235573405e-05, "loss": 1.9393, "step": 20216 }, { "epoch": 0.26, "grad_norm": 3.6757259368896484, "learning_rate": 1.9933635150187948e-05, "loss": 1.7855, "step": 20217 }, { "epoch": 0.26, "grad_norm": 4.5831685066223145, "learning_rate": 1.9933623063705654e-05, "loss": 2.654, "step": 20218 }, { "epoch": 0.26, "grad_norm": 4.50308084487915, "learning_rate": 1.9933610976126517e-05, "loss": 2.3219, "step": 20219 }, { "epoch": 0.26, "grad_norm": 3.801862955093384, "learning_rate": 1.993359888745054e-05, "loss": 1.9597, "step": 20220 }, { "epoch": 0.26, "grad_norm": 5.006508827209473, "learning_rate": 1.9933586797677727e-05, "loss": 1.9824, "step": 20221 }, { "epoch": 0.26, "grad_norm": 3.992751121520996, "learning_rate": 1.9933574706808077e-05, "loss": 1.9656, "step": 20222 }, { "epoch": 0.26, "grad_norm": 3.91310977935791, "learning_rate": 1.993356261484159e-05, "loss": 2.3361, "step": 20223 }, { "epoch": 0.26, "grad_norm": 3.6708834171295166, "learning_rate": 1.993355052177827e-05, "loss": 1.9298, "step": 20224 }, { "epoch": 0.26, "grad_norm": 4.087425231933594, "learning_rate": 1.993353842761812e-05, "loss": 1.9677, "step": 20225 }, { "epoch": 0.26, "grad_norm": 4.033565998077393, "learning_rate": 1.993352633236114e-05, "loss": 2.0267, "step": 20226 }, { "epoch": 0.26, "grad_norm": 3.953246831893921, "learning_rate": 1.9933514236007323e-05, "loss": 2.26, "step": 20227 }, { "epoch": 0.26, "grad_norm": 4.362458229064941, "learning_rate": 1.993350213855668e-05, "loss": 2.1869, "step": 20228 }, { "epoch": 0.26, "grad_norm": 4.757810592651367, "learning_rate": 1.9933490040009212e-05, "loss": 1.8503, "step": 20229 }, { "epoch": 0.26, "grad_norm": 3.6819167137145996, "learning_rate": 1.9933477940364918e-05, "loss": 1.8009, "step": 20230 }, { "epoch": 0.26, "grad_norm": 3.9521288871765137, "learning_rate": 1.9933465839623797e-05, "loss": 1.7043, "step": 20231 }, { "epoch": 0.26, "grad_norm": 4.0081706047058105, "learning_rate": 1.9933453737785856e-05, "loss": 2.0991, "step": 20232 }, { "epoch": 0.26, "grad_norm": 3.789290189743042, "learning_rate": 1.9933441634851092e-05, "loss": 1.759, "step": 20233 }, { "epoch": 0.26, "grad_norm": 3.9135231971740723, "learning_rate": 1.993342953081951e-05, "loss": 2.2347, "step": 20234 }, { "epoch": 0.26, "grad_norm": 3.397714376449585, "learning_rate": 1.9933417425691103e-05, "loss": 1.7671, "step": 20235 }, { "epoch": 0.26, "grad_norm": 3.990737199783325, "learning_rate": 1.993340531946588e-05, "loss": 1.9542, "step": 20236 }, { "epoch": 0.26, "grad_norm": 4.472171783447266, "learning_rate": 1.9933393212143846e-05, "loss": 2.6189, "step": 20237 }, { "epoch": 0.26, "grad_norm": 4.131335735321045, "learning_rate": 1.9933381103724994e-05, "loss": 2.4426, "step": 20238 }, { "epoch": 0.26, "grad_norm": 4.169563293457031, "learning_rate": 1.9933368994209327e-05, "loss": 2.0773, "step": 20239 }, { "epoch": 0.26, "grad_norm": 4.207061767578125, "learning_rate": 1.9933356883596847e-05, "loss": 2.1381, "step": 20240 }, { "epoch": 0.26, "grad_norm": 4.121243953704834, "learning_rate": 1.9933344771887557e-05, "loss": 1.9572, "step": 20241 }, { "epoch": 0.26, "grad_norm": 4.964822292327881, "learning_rate": 1.9933332659081458e-05, "loss": 2.5603, "step": 20242 }, { "epoch": 0.26, "grad_norm": 4.1948089599609375, "learning_rate": 1.9933320545178546e-05, "loss": 2.5312, "step": 20243 }, { "epoch": 0.26, "grad_norm": 4.466917514801025, "learning_rate": 1.993330843017883e-05, "loss": 2.1246, "step": 20244 }, { "epoch": 0.26, "grad_norm": 4.406714916229248, "learning_rate": 1.9933296314082307e-05, "loss": 2.0368, "step": 20245 }, { "epoch": 0.26, "grad_norm": 4.1805620193481445, "learning_rate": 1.9933284196888984e-05, "loss": 1.9868, "step": 20246 }, { "epoch": 0.26, "grad_norm": 3.859311580657959, "learning_rate": 1.9933272078598855e-05, "loss": 2.025, "step": 20247 }, { "epoch": 0.26, "grad_norm": 4.045276641845703, "learning_rate": 1.9933259959211923e-05, "loss": 2.0997, "step": 20248 }, { "epoch": 0.26, "grad_norm": 3.6667559146881104, "learning_rate": 1.9933247838728192e-05, "loss": 1.9672, "step": 20249 }, { "epoch": 0.26, "grad_norm": 3.981501340866089, "learning_rate": 1.9933235717147665e-05, "loss": 1.995, "step": 20250 }, { "epoch": 0.26, "grad_norm": 3.9729621410369873, "learning_rate": 1.9933223594470335e-05, "loss": 2.1764, "step": 20251 }, { "epoch": 0.26, "grad_norm": 4.155476093292236, "learning_rate": 1.993321147069621e-05, "loss": 1.9931, "step": 20252 }, { "epoch": 0.26, "grad_norm": 4.16851282119751, "learning_rate": 1.993319934582529e-05, "loss": 2.2252, "step": 20253 }, { "epoch": 0.26, "grad_norm": 4.1016716957092285, "learning_rate": 1.9933187219857578e-05, "loss": 2.0127, "step": 20254 }, { "epoch": 0.26, "grad_norm": 4.4315972328186035, "learning_rate": 1.9933175092793072e-05, "loss": 2.3782, "step": 20255 }, { "epoch": 0.26, "grad_norm": 3.386714458465576, "learning_rate": 1.9933162964631773e-05, "loss": 1.731, "step": 20256 }, { "epoch": 0.26, "grad_norm": 4.70259428024292, "learning_rate": 1.993315083537369e-05, "loss": 2.3412, "step": 20257 }, { "epoch": 0.26, "grad_norm": 3.807040214538574, "learning_rate": 1.9933138705018815e-05, "loss": 1.8985, "step": 20258 }, { "epoch": 0.26, "grad_norm": 4.34074592590332, "learning_rate": 1.9933126573567152e-05, "loss": 2.1477, "step": 20259 }, { "epoch": 0.26, "grad_norm": 4.298254013061523, "learning_rate": 1.9933114441018704e-05, "loss": 2.5884, "step": 20260 }, { "epoch": 0.26, "grad_norm": 4.653984069824219, "learning_rate": 1.9933102307373474e-05, "loss": 2.599, "step": 20261 }, { "epoch": 0.26, "grad_norm": 3.9994425773620605, "learning_rate": 1.9933090172631457e-05, "loss": 2.2336, "step": 20262 }, { "epoch": 0.26, "grad_norm": 4.506600856781006, "learning_rate": 1.9933078036792662e-05, "loss": 2.5031, "step": 20263 }, { "epoch": 0.26, "grad_norm": 4.456540107727051, "learning_rate": 1.9933065899857085e-05, "loss": 2.7392, "step": 20264 }, { "epoch": 0.26, "grad_norm": 3.930190324783325, "learning_rate": 1.9933053761824728e-05, "loss": 2.144, "step": 20265 }, { "epoch": 0.26, "grad_norm": 4.434183120727539, "learning_rate": 1.9933041622695596e-05, "loss": 2.418, "step": 20266 }, { "epoch": 0.26, "grad_norm": 4.366336822509766, "learning_rate": 1.9933029482469685e-05, "loss": 2.9392, "step": 20267 }, { "epoch": 0.26, "grad_norm": 3.979034662246704, "learning_rate": 1.9933017341147002e-05, "loss": 2.3171, "step": 20268 }, { "epoch": 0.26, "grad_norm": 4.368365287780762, "learning_rate": 1.993300519872754e-05, "loss": 2.1438, "step": 20269 }, { "epoch": 0.26, "grad_norm": 3.614835023880005, "learning_rate": 1.9932993055211313e-05, "loss": 1.8833, "step": 20270 }, { "epoch": 0.26, "grad_norm": 3.595540761947632, "learning_rate": 1.993298091059831e-05, "loss": 1.8812, "step": 20271 }, { "epoch": 0.26, "grad_norm": 4.253841876983643, "learning_rate": 1.993296876488854e-05, "loss": 2.0864, "step": 20272 }, { "epoch": 0.26, "grad_norm": 4.003468990325928, "learning_rate": 1.9932956618081998e-05, "loss": 1.7906, "step": 20273 }, { "epoch": 0.26, "grad_norm": 4.3552069664001465, "learning_rate": 1.9932944470178693e-05, "loss": 2.2305, "step": 20274 }, { "epoch": 0.26, "grad_norm": 3.9841978549957275, "learning_rate": 1.9932932321178618e-05, "loss": 2.0263, "step": 20275 }, { "epoch": 0.26, "grad_norm": 3.8829822540283203, "learning_rate": 1.9932920171081785e-05, "loss": 2.1929, "step": 20276 }, { "epoch": 0.26, "grad_norm": 3.586716890335083, "learning_rate": 1.9932908019888187e-05, "loss": 1.6422, "step": 20277 }, { "epoch": 0.26, "grad_norm": 4.145121097564697, "learning_rate": 1.9932895867597826e-05, "loss": 2.1577, "step": 20278 }, { "epoch": 0.26, "grad_norm": 3.7857394218444824, "learning_rate": 1.9932883714210704e-05, "loss": 2.1354, "step": 20279 }, { "epoch": 0.26, "grad_norm": 3.7433788776397705, "learning_rate": 1.9932871559726826e-05, "loss": 2.1596, "step": 20280 }, { "epoch": 0.26, "grad_norm": 4.208196640014648, "learning_rate": 1.9932859404146187e-05, "loss": 2.5069, "step": 20281 }, { "epoch": 0.26, "grad_norm": 3.887106418609619, "learning_rate": 1.9932847247468792e-05, "loss": 1.9081, "step": 20282 }, { "epoch": 0.26, "grad_norm": 4.323370456695557, "learning_rate": 1.9932835089694645e-05, "loss": 2.3658, "step": 20283 }, { "epoch": 0.26, "grad_norm": 4.369787693023682, "learning_rate": 1.9932822930823744e-05, "loss": 2.1336, "step": 20284 }, { "epoch": 0.26, "grad_norm": 4.206885814666748, "learning_rate": 1.9932810770856087e-05, "loss": 2.3694, "step": 20285 }, { "epoch": 0.26, "grad_norm": 3.6717820167541504, "learning_rate": 1.9932798609791685e-05, "loss": 2.0072, "step": 20286 }, { "epoch": 0.26, "grad_norm": 3.840121269226074, "learning_rate": 1.993278644763053e-05, "loss": 1.8862, "step": 20287 }, { "epoch": 0.26, "grad_norm": 4.269826889038086, "learning_rate": 1.9932774284372625e-05, "loss": 2.3294, "step": 20288 }, { "epoch": 0.26, "grad_norm": 3.8837392330169678, "learning_rate": 1.9932762120017978e-05, "loss": 1.8009, "step": 20289 }, { "epoch": 0.26, "grad_norm": 3.628326892852783, "learning_rate": 1.9932749954566582e-05, "loss": 1.9435, "step": 20290 }, { "epoch": 0.26, "grad_norm": 4.043380260467529, "learning_rate": 1.9932737788018445e-05, "loss": 1.7375, "step": 20291 }, { "epoch": 0.26, "grad_norm": 4.0137834548950195, "learning_rate": 1.993272562037356e-05, "loss": 1.8735, "step": 20292 }, { "epoch": 0.26, "grad_norm": 3.394437074661255, "learning_rate": 1.993271345163194e-05, "loss": 1.6751, "step": 20293 }, { "epoch": 0.26, "grad_norm": 3.5613934993743896, "learning_rate": 1.9932701281793576e-05, "loss": 2.0859, "step": 20294 }, { "epoch": 0.26, "grad_norm": 3.947056293487549, "learning_rate": 1.9932689110858476e-05, "loss": 1.9878, "step": 20295 }, { "epoch": 0.26, "grad_norm": 3.868805170059204, "learning_rate": 1.9932676938826634e-05, "loss": 1.8694, "step": 20296 }, { "epoch": 0.26, "grad_norm": 4.108523845672607, "learning_rate": 1.993266476569806e-05, "loss": 2.263, "step": 20297 }, { "epoch": 0.26, "grad_norm": 3.7416162490844727, "learning_rate": 1.993265259147275e-05, "loss": 1.7154, "step": 20298 }, { "epoch": 0.26, "grad_norm": 4.255414009094238, "learning_rate": 1.9932640416150705e-05, "loss": 2.4937, "step": 20299 }, { "epoch": 0.26, "grad_norm": 3.9200520515441895, "learning_rate": 1.993262823973193e-05, "loss": 2.1345, "step": 20300 }, { "epoch": 0.26, "grad_norm": 3.8110430240631104, "learning_rate": 1.9932616062216423e-05, "loss": 2.3403, "step": 20301 }, { "epoch": 0.26, "grad_norm": 3.959507465362549, "learning_rate": 1.993260388360419e-05, "loss": 2.2368, "step": 20302 }, { "epoch": 0.26, "grad_norm": 4.3255791664123535, "learning_rate": 1.9932591703895226e-05, "loss": 1.7473, "step": 20303 }, { "epoch": 0.26, "grad_norm": 4.345516204833984, "learning_rate": 1.9932579523089536e-05, "loss": 2.576, "step": 20304 }, { "epoch": 0.26, "grad_norm": 4.129347324371338, "learning_rate": 1.993256734118712e-05, "loss": 2.3905, "step": 20305 }, { "epoch": 0.26, "grad_norm": 4.071479797363281, "learning_rate": 1.993255515818798e-05, "loss": 1.8904, "step": 20306 }, { "epoch": 0.26, "grad_norm": 4.022784233093262, "learning_rate": 1.993254297409212e-05, "loss": 1.8695, "step": 20307 }, { "epoch": 0.26, "grad_norm": 3.5903072357177734, "learning_rate": 1.9932530788899536e-05, "loss": 2.0138, "step": 20308 }, { "epoch": 0.26, "grad_norm": 3.8979673385620117, "learning_rate": 1.9932518602610232e-05, "loss": 1.6801, "step": 20309 }, { "epoch": 0.26, "grad_norm": 4.1289191246032715, "learning_rate": 1.9932506415224213e-05, "loss": 2.21, "step": 20310 }, { "epoch": 0.26, "grad_norm": 3.6621365547180176, "learning_rate": 1.9932494226741473e-05, "loss": 1.9016, "step": 20311 }, { "epoch": 0.26, "grad_norm": 3.8915648460388184, "learning_rate": 1.993248203716202e-05, "loss": 2.1667, "step": 20312 }, { "epoch": 0.26, "grad_norm": 3.7817161083221436, "learning_rate": 1.993246984648585e-05, "loss": 2.2997, "step": 20313 }, { "epoch": 0.26, "grad_norm": 3.2882211208343506, "learning_rate": 1.9932457654712966e-05, "loss": 1.6618, "step": 20314 }, { "epoch": 0.26, "grad_norm": 3.9720280170440674, "learning_rate": 1.993244546184337e-05, "loss": 2.225, "step": 20315 }, { "epoch": 0.26, "grad_norm": 3.8543825149536133, "learning_rate": 1.993243326787707e-05, "loss": 2.2102, "step": 20316 }, { "epoch": 0.26, "grad_norm": 4.73820161819458, "learning_rate": 1.9932421072814056e-05, "loss": 2.2065, "step": 20317 }, { "epoch": 0.26, "grad_norm": 4.054030418395996, "learning_rate": 1.9932408876654334e-05, "loss": 2.2298, "step": 20318 }, { "epoch": 0.26, "grad_norm": 4.034531116485596, "learning_rate": 1.9932396679397905e-05, "loss": 1.9412, "step": 20319 }, { "epoch": 0.26, "grad_norm": 3.6855900287628174, "learning_rate": 1.993238448104477e-05, "loss": 2.1732, "step": 20320 }, { "epoch": 0.26, "grad_norm": 4.479857444763184, "learning_rate": 1.9932372281594937e-05, "loss": 2.0415, "step": 20321 }, { "epoch": 0.26, "grad_norm": 4.021234512329102, "learning_rate": 1.9932360081048395e-05, "loss": 1.801, "step": 20322 }, { "epoch": 0.26, "grad_norm": 3.8089687824249268, "learning_rate": 1.9932347879405155e-05, "loss": 1.8395, "step": 20323 }, { "epoch": 0.26, "grad_norm": 3.612243413925171, "learning_rate": 1.9932335676665214e-05, "loss": 1.7954, "step": 20324 }, { "epoch": 0.26, "grad_norm": 4.266246795654297, "learning_rate": 1.9932323472828575e-05, "loss": 2.4623, "step": 20325 }, { "epoch": 0.26, "grad_norm": 4.293485641479492, "learning_rate": 1.9932311267895243e-05, "loss": 2.1423, "step": 20326 }, { "epoch": 0.26, "grad_norm": 4.347574234008789, "learning_rate": 1.993229906186521e-05, "loss": 2.4419, "step": 20327 }, { "epoch": 0.26, "grad_norm": 4.585413932800293, "learning_rate": 1.9932286854738488e-05, "loss": 2.2214, "step": 20328 }, { "epoch": 0.26, "grad_norm": 3.9684677124023438, "learning_rate": 1.993227464651507e-05, "loss": 2.1982, "step": 20329 }, { "epoch": 0.26, "grad_norm": 4.042286396026611, "learning_rate": 1.993226243719496e-05, "loss": 2.3911, "step": 20330 }, { "epoch": 0.26, "grad_norm": 3.763606548309326, "learning_rate": 1.9932250226778156e-05, "loss": 1.7433, "step": 20331 }, { "epoch": 0.26, "grad_norm": 4.411806106567383, "learning_rate": 1.9932238015264668e-05, "loss": 1.7908, "step": 20332 }, { "epoch": 0.26, "grad_norm": 3.535005807876587, "learning_rate": 1.993222580265449e-05, "loss": 1.9945, "step": 20333 }, { "epoch": 0.26, "grad_norm": 3.581268072128296, "learning_rate": 1.993221358894763e-05, "loss": 1.727, "step": 20334 }, { "epoch": 0.26, "grad_norm": 4.462015628814697, "learning_rate": 1.993220137414408e-05, "loss": 2.0747, "step": 20335 }, { "epoch": 0.26, "grad_norm": 3.9569098949432373, "learning_rate": 1.993218915824385e-05, "loss": 1.9793, "step": 20336 }, { "epoch": 0.26, "grad_norm": 3.807772636413574, "learning_rate": 1.9932176941246934e-05, "loss": 2.1752, "step": 20337 }, { "epoch": 0.26, "grad_norm": 4.500911235809326, "learning_rate": 1.993216472315334e-05, "loss": 2.2745, "step": 20338 }, { "epoch": 0.26, "grad_norm": 3.5375449657440186, "learning_rate": 1.9932152503963066e-05, "loss": 1.8471, "step": 20339 }, { "epoch": 0.26, "grad_norm": 4.513862133026123, "learning_rate": 1.9932140283676116e-05, "loss": 2.0148, "step": 20340 }, { "epoch": 0.26, "grad_norm": 3.891995429992676, "learning_rate": 1.9932128062292485e-05, "loss": 1.9724, "step": 20341 }, { "epoch": 0.26, "grad_norm": 4.416954040527344, "learning_rate": 1.9932115839812184e-05, "loss": 2.125, "step": 20342 }, { "epoch": 0.26, "grad_norm": 3.8586044311523438, "learning_rate": 1.9932103616235202e-05, "loss": 2.0543, "step": 20343 }, { "epoch": 0.26, "grad_norm": 4.751982688903809, "learning_rate": 1.9932091391561553e-05, "loss": 1.8608, "step": 20344 }, { "epoch": 0.26, "grad_norm": 4.168881893157959, "learning_rate": 1.993207916579123e-05, "loss": 2.4882, "step": 20345 }, { "epoch": 0.26, "grad_norm": 4.09754753112793, "learning_rate": 1.9932066938924237e-05, "loss": 2.0917, "step": 20346 }, { "epoch": 0.26, "grad_norm": 3.559532880783081, "learning_rate": 1.9932054710960576e-05, "loss": 1.9762, "step": 20347 }, { "epoch": 0.26, "grad_norm": 3.2479166984558105, "learning_rate": 1.9932042481900245e-05, "loss": 1.4816, "step": 20348 }, { "epoch": 0.26, "grad_norm": 4.071162700653076, "learning_rate": 1.9932030251743254e-05, "loss": 2.5003, "step": 20349 }, { "epoch": 0.26, "grad_norm": 4.0560808181762695, "learning_rate": 1.9932018020489596e-05, "loss": 2.2879, "step": 20350 }, { "epoch": 0.26, "grad_norm": 3.6478521823883057, "learning_rate": 1.993200578813927e-05, "loss": 1.6607, "step": 20351 }, { "epoch": 0.26, "grad_norm": 4.017640590667725, "learning_rate": 1.9931993554692288e-05, "loss": 1.8316, "step": 20352 }, { "epoch": 0.26, "grad_norm": 4.11937952041626, "learning_rate": 1.993198132014864e-05, "loss": 2.1011, "step": 20353 }, { "epoch": 0.26, "grad_norm": 4.363526821136475, "learning_rate": 1.9931969084508335e-05, "loss": 2.0554, "step": 20354 }, { "epoch": 0.26, "grad_norm": 4.051141262054443, "learning_rate": 1.9931956847771375e-05, "loss": 2.2022, "step": 20355 }, { "epoch": 0.26, "grad_norm": 4.256369590759277, "learning_rate": 1.9931944609937758e-05, "loss": 2.2884, "step": 20356 }, { "epoch": 0.26, "grad_norm": 3.743475914001465, "learning_rate": 1.9931932371007484e-05, "loss": 1.5805, "step": 20357 }, { "epoch": 0.26, "grad_norm": 4.44750452041626, "learning_rate": 1.9931920130980553e-05, "loss": 2.5131, "step": 20358 }, { "epoch": 0.26, "grad_norm": 4.138271808624268, "learning_rate": 1.9931907889856975e-05, "loss": 2.2928, "step": 20359 }, { "epoch": 0.26, "grad_norm": 4.7238640785217285, "learning_rate": 1.9931895647636744e-05, "loss": 2.0531, "step": 20360 }, { "epoch": 0.26, "grad_norm": 3.9534096717834473, "learning_rate": 1.9931883404319863e-05, "loss": 1.9991, "step": 20361 }, { "epoch": 0.26, "grad_norm": 4.4399847984313965, "learning_rate": 1.9931871159906334e-05, "loss": 2.0248, "step": 20362 }, { "epoch": 0.26, "grad_norm": 4.218936920166016, "learning_rate": 1.9931858914396156e-05, "loss": 1.824, "step": 20363 }, { "epoch": 0.26, "grad_norm": 4.218588352203369, "learning_rate": 1.9931846667789334e-05, "loss": 2.0935, "step": 20364 }, { "epoch": 0.26, "grad_norm": 4.2209601402282715, "learning_rate": 1.9931834420085866e-05, "loss": 2.5396, "step": 20365 }, { "epoch": 0.26, "grad_norm": 4.369989395141602, "learning_rate": 1.9931822171285757e-05, "loss": 2.0756, "step": 20366 }, { "epoch": 0.26, "grad_norm": 4.2859625816345215, "learning_rate": 1.993180992138901e-05, "loss": 2.022, "step": 20367 }, { "epoch": 0.26, "grad_norm": 4.103828430175781, "learning_rate": 1.9931797670395613e-05, "loss": 1.9298, "step": 20368 }, { "epoch": 0.26, "grad_norm": 4.052725315093994, "learning_rate": 1.993178541830558e-05, "loss": 2.1234, "step": 20369 }, { "epoch": 0.26, "grad_norm": 3.9013633728027344, "learning_rate": 1.9931773165118916e-05, "loss": 2.0287, "step": 20370 }, { "epoch": 0.26, "grad_norm": 3.7581236362457275, "learning_rate": 1.993176091083561e-05, "loss": 1.784, "step": 20371 }, { "epoch": 0.26, "grad_norm": 4.597237586975098, "learning_rate": 1.9931748655455672e-05, "loss": 2.4011, "step": 20372 }, { "epoch": 0.26, "grad_norm": 4.082302093505859, "learning_rate": 1.99317363989791e-05, "loss": 2.0187, "step": 20373 }, { "epoch": 0.26, "grad_norm": 3.922236919403076, "learning_rate": 1.9931724141405896e-05, "loss": 2.1483, "step": 20374 }, { "epoch": 0.26, "grad_norm": 4.333569526672363, "learning_rate": 1.9931711882736058e-05, "loss": 2.2035, "step": 20375 }, { "epoch": 0.26, "grad_norm": 4.29524040222168, "learning_rate": 1.993169962296959e-05, "loss": 2.4018, "step": 20376 }, { "epoch": 0.26, "grad_norm": 4.108404636383057, "learning_rate": 1.99316873621065e-05, "loss": 2.0475, "step": 20377 }, { "epoch": 0.26, "grad_norm": 3.951671838760376, "learning_rate": 1.9931675100146778e-05, "loss": 2.2484, "step": 20378 }, { "epoch": 0.26, "grad_norm": 4.143736839294434, "learning_rate": 1.993166283709043e-05, "loss": 2.0743, "step": 20379 }, { "epoch": 0.26, "grad_norm": 4.141323089599609, "learning_rate": 1.993165057293746e-05, "loss": 2.0597, "step": 20380 }, { "epoch": 0.26, "grad_norm": 4.302436828613281, "learning_rate": 1.993163830768787e-05, "loss": 2.2775, "step": 20381 }, { "epoch": 0.26, "grad_norm": 3.7100343704223633, "learning_rate": 1.9931626041341657e-05, "loss": 2.1145, "step": 20382 }, { "epoch": 0.26, "grad_norm": 3.855710983276367, "learning_rate": 1.9931613773898825e-05, "loss": 2.3483, "step": 20383 }, { "epoch": 0.26, "grad_norm": 3.5999743938446045, "learning_rate": 1.993160150535937e-05, "loss": 1.7233, "step": 20384 }, { "epoch": 0.26, "grad_norm": 4.246655464172363, "learning_rate": 1.99315892357233e-05, "loss": 1.816, "step": 20385 }, { "epoch": 0.26, "grad_norm": 3.614824056625366, "learning_rate": 1.993157696499061e-05, "loss": 1.8684, "step": 20386 }, { "epoch": 0.26, "grad_norm": 4.1286773681640625, "learning_rate": 1.9931564693161314e-05, "loss": 2.3183, "step": 20387 }, { "epoch": 0.26, "grad_norm": 5.222372055053711, "learning_rate": 1.99315524202354e-05, "loss": 2.4616, "step": 20388 }, { "epoch": 0.26, "grad_norm": 5.0767998695373535, "learning_rate": 1.9931540146212876e-05, "loss": 2.5293, "step": 20389 }, { "epoch": 0.26, "grad_norm": 4.433199882507324, "learning_rate": 1.9931527871093736e-05, "loss": 2.5092, "step": 20390 }, { "epoch": 0.26, "grad_norm": 3.9818427562713623, "learning_rate": 1.9931515594877993e-05, "loss": 1.9775, "step": 20391 }, { "epoch": 0.26, "grad_norm": 4.345695495605469, "learning_rate": 1.993150331756564e-05, "loss": 2.4099, "step": 20392 }, { "epoch": 0.26, "grad_norm": 3.566204786300659, "learning_rate": 1.9931491039156682e-05, "loss": 2.0056, "step": 20393 }, { "epoch": 0.26, "grad_norm": 3.7108943462371826, "learning_rate": 1.9931478759651118e-05, "loss": 1.7887, "step": 20394 }, { "epoch": 0.26, "grad_norm": 4.212925910949707, "learning_rate": 1.9931466479048947e-05, "loss": 2.3263, "step": 20395 }, { "epoch": 0.26, "grad_norm": 4.3467278480529785, "learning_rate": 1.9931454197350177e-05, "loss": 2.6696, "step": 20396 }, { "epoch": 0.26, "grad_norm": 4.251941680908203, "learning_rate": 1.9931441914554805e-05, "loss": 2.2828, "step": 20397 }, { "epoch": 0.26, "grad_norm": 4.210996627807617, "learning_rate": 1.9931429630662832e-05, "loss": 1.8459, "step": 20398 }, { "epoch": 0.26, "grad_norm": 4.022512912750244, "learning_rate": 1.993141734567426e-05, "loss": 1.7683, "step": 20399 }, { "epoch": 0.26, "grad_norm": 4.517765045166016, "learning_rate": 1.9931405059589097e-05, "loss": 2.2126, "step": 20400 }, { "epoch": 0.26, "grad_norm": 3.8813188076019287, "learning_rate": 1.9931392772407334e-05, "loss": 2.0575, "step": 20401 }, { "epoch": 0.26, "grad_norm": 3.779376983642578, "learning_rate": 1.9931380484128978e-05, "loss": 1.7119, "step": 20402 }, { "epoch": 0.26, "grad_norm": 4.51440954208374, "learning_rate": 1.993136819475403e-05, "loss": 2.1313, "step": 20403 }, { "epoch": 0.26, "grad_norm": 3.8566536903381348, "learning_rate": 1.993135590428249e-05, "loss": 2.0816, "step": 20404 }, { "epoch": 0.26, "grad_norm": 4.405356407165527, "learning_rate": 1.9931343612714355e-05, "loss": 2.2186, "step": 20405 }, { "epoch": 0.26, "grad_norm": 3.983555316925049, "learning_rate": 1.9931331320049635e-05, "loss": 2.0957, "step": 20406 }, { "epoch": 0.26, "grad_norm": 4.224784851074219, "learning_rate": 1.9931319026288327e-05, "loss": 1.892, "step": 20407 }, { "epoch": 0.26, "grad_norm": 5.017052173614502, "learning_rate": 1.9931306731430436e-05, "loss": 2.5321, "step": 20408 }, { "epoch": 0.26, "grad_norm": 3.6392693519592285, "learning_rate": 1.9931294435475956e-05, "loss": 1.871, "step": 20409 }, { "epoch": 0.26, "grad_norm": 3.75970196723938, "learning_rate": 1.9931282138424897e-05, "loss": 1.824, "step": 20410 }, { "epoch": 0.26, "grad_norm": 4.814873218536377, "learning_rate": 1.9931269840277248e-05, "loss": 2.735, "step": 20411 }, { "epoch": 0.26, "grad_norm": 3.7097055912017822, "learning_rate": 1.9931257541033028e-05, "loss": 1.5013, "step": 20412 }, { "epoch": 0.26, "grad_norm": 4.985189914703369, "learning_rate": 1.9931245240692225e-05, "loss": 2.2091, "step": 20413 }, { "epoch": 0.26, "grad_norm": 3.8381052017211914, "learning_rate": 1.9931232939254843e-05, "loss": 1.6806, "step": 20414 }, { "epoch": 0.26, "grad_norm": 4.302628517150879, "learning_rate": 1.993122063672088e-05, "loss": 2.083, "step": 20415 }, { "epoch": 0.26, "grad_norm": 3.558183193206787, "learning_rate": 1.993120833309035e-05, "loss": 1.8068, "step": 20416 }, { "epoch": 0.26, "grad_norm": 4.159493446350098, "learning_rate": 1.9931196028363243e-05, "loss": 2.2623, "step": 20417 }, { "epoch": 0.26, "grad_norm": 3.9750146865844727, "learning_rate": 1.993118372253956e-05, "loss": 1.6918, "step": 20418 }, { "epoch": 0.26, "grad_norm": 3.7016561031341553, "learning_rate": 1.993117141561931e-05, "loss": 1.7347, "step": 20419 }, { "epoch": 0.27, "grad_norm": 4.1371073722839355, "learning_rate": 1.9931159107602493e-05, "loss": 2.1197, "step": 20420 }, { "epoch": 0.27, "grad_norm": 3.89211368560791, "learning_rate": 1.99311467984891e-05, "loss": 1.9176, "step": 20421 }, { "epoch": 0.27, "grad_norm": 4.525136470794678, "learning_rate": 1.993113448827914e-05, "loss": 2.2622, "step": 20422 }, { "epoch": 0.27, "grad_norm": 3.555377960205078, "learning_rate": 1.9931122176972623e-05, "loss": 1.6136, "step": 20423 }, { "epoch": 0.27, "grad_norm": 4.058402061462402, "learning_rate": 1.9931109864569532e-05, "loss": 1.99, "step": 20424 }, { "epoch": 0.27, "grad_norm": 4.906276226043701, "learning_rate": 1.9931097551069886e-05, "loss": 2.5825, "step": 20425 }, { "epoch": 0.27, "grad_norm": 4.31516170501709, "learning_rate": 1.9931085236473674e-05, "loss": 2.6235, "step": 20426 }, { "epoch": 0.27, "grad_norm": 3.7386133670806885, "learning_rate": 1.9931072920780904e-05, "loss": 1.7794, "step": 20427 }, { "epoch": 0.27, "grad_norm": 5.0961079597473145, "learning_rate": 1.993106060399157e-05, "loss": 2.2986, "step": 20428 }, { "epoch": 0.27, "grad_norm": 4.292500019073486, "learning_rate": 1.9931048286105684e-05, "loss": 2.3906, "step": 20429 }, { "epoch": 0.27, "grad_norm": 4.168118953704834, "learning_rate": 1.993103596712324e-05, "loss": 2.0818, "step": 20430 }, { "epoch": 0.27, "grad_norm": 5.312855243682861, "learning_rate": 1.9931023647044242e-05, "loss": 2.1485, "step": 20431 }, { "epoch": 0.27, "grad_norm": 4.507339000701904, "learning_rate": 1.993101132586869e-05, "loss": 2.5542, "step": 20432 }, { "epoch": 0.27, "grad_norm": 4.147518157958984, "learning_rate": 1.9930999003596583e-05, "loss": 2.229, "step": 20433 }, { "epoch": 0.27, "grad_norm": 3.8845927715301514, "learning_rate": 1.9930986680227926e-05, "loss": 1.9955, "step": 20434 }, { "epoch": 0.27, "grad_norm": 4.3316731452941895, "learning_rate": 1.9930974355762723e-05, "loss": 2.2157, "step": 20435 }, { "epoch": 0.27, "grad_norm": 3.8741865158081055, "learning_rate": 1.993096203020097e-05, "loss": 1.947, "step": 20436 }, { "epoch": 0.27, "grad_norm": 3.890211582183838, "learning_rate": 1.993094970354267e-05, "loss": 1.9179, "step": 20437 }, { "epoch": 0.27, "grad_norm": 4.326900482177734, "learning_rate": 1.9930937375787824e-05, "loss": 2.6042, "step": 20438 }, { "epoch": 0.27, "grad_norm": 3.9697389602661133, "learning_rate": 1.9930925046936434e-05, "loss": 2.106, "step": 20439 }, { "epoch": 0.27, "grad_norm": 3.5869808197021484, "learning_rate": 1.9930912716988503e-05, "loss": 2.0161, "step": 20440 }, { "epoch": 0.27, "grad_norm": 4.011281967163086, "learning_rate": 1.993090038594403e-05, "loss": 2.031, "step": 20441 }, { "epoch": 0.27, "grad_norm": 3.8977038860321045, "learning_rate": 1.9930888053803018e-05, "loss": 1.5737, "step": 20442 }, { "epoch": 0.27, "grad_norm": 4.4551849365234375, "learning_rate": 1.9930875720565467e-05, "loss": 2.2564, "step": 20443 }, { "epoch": 0.27, "grad_norm": 4.09979772567749, "learning_rate": 1.993086338623138e-05, "loss": 2.4242, "step": 20444 }, { "epoch": 0.27, "grad_norm": 4.872401237487793, "learning_rate": 1.9930851050800755e-05, "loss": 2.738, "step": 20445 }, { "epoch": 0.27, "grad_norm": 3.853806495666504, "learning_rate": 1.9930838714273597e-05, "loss": 2.3569, "step": 20446 }, { "epoch": 0.27, "grad_norm": 3.9328246116638184, "learning_rate": 1.9930826376649907e-05, "loss": 1.7768, "step": 20447 }, { "epoch": 0.27, "grad_norm": 4.1623148918151855, "learning_rate": 1.9930814037929682e-05, "loss": 2.3165, "step": 20448 }, { "epoch": 0.27, "grad_norm": 3.7542061805725098, "learning_rate": 1.9930801698112925e-05, "loss": 1.8614, "step": 20449 }, { "epoch": 0.27, "grad_norm": 4.287646770477295, "learning_rate": 1.9930789357199644e-05, "loss": 2.449, "step": 20450 }, { "epoch": 0.27, "grad_norm": 4.724610328674316, "learning_rate": 1.9930777015189834e-05, "loss": 2.8554, "step": 20451 }, { "epoch": 0.27, "grad_norm": 3.968489646911621, "learning_rate": 1.9930764672083497e-05, "loss": 2.4865, "step": 20452 }, { "epoch": 0.27, "grad_norm": 3.774796724319458, "learning_rate": 1.9930752327880638e-05, "loss": 2.2672, "step": 20453 }, { "epoch": 0.27, "grad_norm": 3.8955283164978027, "learning_rate": 1.993073998258125e-05, "loss": 2.3576, "step": 20454 }, { "epoch": 0.27, "grad_norm": 4.423148155212402, "learning_rate": 1.9930727636185345e-05, "loss": 2.4812, "step": 20455 }, { "epoch": 0.27, "grad_norm": 3.663684129714966, "learning_rate": 1.9930715288692918e-05, "loss": 1.5118, "step": 20456 }, { "epoch": 0.27, "grad_norm": 4.236234188079834, "learning_rate": 1.9930702940103966e-05, "loss": 2.2922, "step": 20457 }, { "epoch": 0.27, "grad_norm": 4.025634765625, "learning_rate": 1.9930690590418503e-05, "loss": 1.9258, "step": 20458 }, { "epoch": 0.27, "grad_norm": 4.194299221038818, "learning_rate": 1.993067823963652e-05, "loss": 2.3872, "step": 20459 }, { "epoch": 0.27, "grad_norm": 4.379152297973633, "learning_rate": 1.9930665887758023e-05, "loss": 2.6159, "step": 20460 }, { "epoch": 0.27, "grad_norm": 3.9442689418792725, "learning_rate": 1.993065353478301e-05, "loss": 2.1555, "step": 20461 }, { "epoch": 0.27, "grad_norm": 4.376056671142578, "learning_rate": 1.9930641180711485e-05, "loss": 1.9176, "step": 20462 }, { "epoch": 0.27, "grad_norm": 4.582681655883789, "learning_rate": 1.993062882554345e-05, "loss": 2.0813, "step": 20463 }, { "epoch": 0.27, "grad_norm": 4.682443141937256, "learning_rate": 1.9930616469278905e-05, "loss": 2.1923, "step": 20464 }, { "epoch": 0.27, "grad_norm": 4.1650614738464355, "learning_rate": 1.993060411191785e-05, "loss": 1.9717, "step": 20465 }, { "epoch": 0.27, "grad_norm": 3.665253162384033, "learning_rate": 1.993059175346029e-05, "loss": 1.8351, "step": 20466 }, { "epoch": 0.27, "grad_norm": 4.484789848327637, "learning_rate": 1.9930579393906222e-05, "loss": 2.4088, "step": 20467 }, { "epoch": 0.27, "grad_norm": 4.359847068786621, "learning_rate": 1.9930567033255654e-05, "loss": 2.011, "step": 20468 }, { "epoch": 0.27, "grad_norm": 3.7695882320404053, "learning_rate": 1.9930554671508577e-05, "loss": 2.0845, "step": 20469 }, { "epoch": 0.27, "grad_norm": 3.787916660308838, "learning_rate": 1.9930542308665003e-05, "loss": 2.5002, "step": 20470 }, { "epoch": 0.27, "grad_norm": 3.471635341644287, "learning_rate": 1.9930529944724924e-05, "loss": 1.8227, "step": 20471 }, { "epoch": 0.27, "grad_norm": 5.103334426879883, "learning_rate": 1.9930517579688348e-05, "loss": 2.5055, "step": 20472 }, { "epoch": 0.27, "grad_norm": 3.9990861415863037, "learning_rate": 1.9930505213555277e-05, "loss": 1.8234, "step": 20473 }, { "epoch": 0.27, "grad_norm": 3.9414117336273193, "learning_rate": 1.9930492846325705e-05, "loss": 2.1486, "step": 20474 }, { "epoch": 0.27, "grad_norm": 3.987196922302246, "learning_rate": 1.9930480477999642e-05, "loss": 2.1183, "step": 20475 }, { "epoch": 0.27, "grad_norm": 3.773081064224243, "learning_rate": 1.9930468108577082e-05, "loss": 1.7631, "step": 20476 }, { "epoch": 0.27, "grad_norm": 4.3984761238098145, "learning_rate": 1.9930455738058034e-05, "loss": 2.2603, "step": 20477 }, { "epoch": 0.27, "grad_norm": 3.555781364440918, "learning_rate": 1.9930443366442495e-05, "loss": 1.6897, "step": 20478 }, { "epoch": 0.27, "grad_norm": 4.082130432128906, "learning_rate": 1.9930430993730465e-05, "loss": 2.0406, "step": 20479 }, { "epoch": 0.27, "grad_norm": 4.209080696105957, "learning_rate": 1.9930418619921944e-05, "loss": 2.3044, "step": 20480 }, { "epoch": 0.27, "grad_norm": 3.974750518798828, "learning_rate": 1.9930406245016943e-05, "loss": 1.8989, "step": 20481 }, { "epoch": 0.27, "grad_norm": 3.618856430053711, "learning_rate": 1.9930393869015454e-05, "loss": 2.0118, "step": 20482 }, { "epoch": 0.27, "grad_norm": 3.8462812900543213, "learning_rate": 1.9930381491917477e-05, "loss": 2.0824, "step": 20483 }, { "epoch": 0.27, "grad_norm": 4.2581915855407715, "learning_rate": 1.993036911372302e-05, "loss": 2.6019, "step": 20484 }, { "epoch": 0.27, "grad_norm": 3.9175331592559814, "learning_rate": 1.993035673443208e-05, "loss": 2.2584, "step": 20485 }, { "epoch": 0.27, "grad_norm": 3.3502755165100098, "learning_rate": 1.9930344354044663e-05, "loss": 1.8976, "step": 20486 }, { "epoch": 0.27, "grad_norm": 3.713857650756836, "learning_rate": 1.993033197256077e-05, "loss": 1.6931, "step": 20487 }, { "epoch": 0.27, "grad_norm": 4.256346225738525, "learning_rate": 1.9930319589980396e-05, "loss": 2.1337, "step": 20488 }, { "epoch": 0.27, "grad_norm": 4.430305480957031, "learning_rate": 1.9930307206303545e-05, "loss": 1.708, "step": 20489 }, { "epoch": 0.27, "grad_norm": 4.1948089599609375, "learning_rate": 1.993029482153022e-05, "loss": 2.2376, "step": 20490 }, { "epoch": 0.27, "grad_norm": 4.3693132400512695, "learning_rate": 1.9930282435660424e-05, "loss": 2.1228, "step": 20491 }, { "epoch": 0.27, "grad_norm": 5.2495341300964355, "learning_rate": 1.9930270048694158e-05, "loss": 2.7058, "step": 20492 }, { "epoch": 0.27, "grad_norm": 3.5384292602539062, "learning_rate": 1.9930257660631418e-05, "loss": 2.029, "step": 20493 }, { "epoch": 0.27, "grad_norm": 4.009812831878662, "learning_rate": 1.993024527147221e-05, "loss": 1.6684, "step": 20494 }, { "epoch": 0.27, "grad_norm": 4.2202067375183105, "learning_rate": 1.9930232881216536e-05, "loss": 2.3393, "step": 20495 }, { "epoch": 0.27, "grad_norm": 4.126845836639404, "learning_rate": 1.9930220489864394e-05, "loss": 2.3123, "step": 20496 }, { "epoch": 0.27, "grad_norm": 3.5799038410186768, "learning_rate": 1.993020809741579e-05, "loss": 2.0331, "step": 20497 }, { "epoch": 0.27, "grad_norm": 3.467764377593994, "learning_rate": 1.993019570387072e-05, "loss": 1.7793, "step": 20498 }, { "epoch": 0.27, "grad_norm": 3.752480983734131, "learning_rate": 1.993018330922919e-05, "loss": 2.2092, "step": 20499 }, { "epoch": 0.27, "grad_norm": 4.119847774505615, "learning_rate": 1.9930170913491195e-05, "loss": 2.174, "step": 20500 }, { "epoch": 0.27, "grad_norm": 3.8471670150756836, "learning_rate": 1.9930158516656745e-05, "loss": 2.2903, "step": 20501 }, { "epoch": 0.27, "grad_norm": 4.159567356109619, "learning_rate": 1.9930146118725833e-05, "loss": 1.9666, "step": 20502 }, { "epoch": 0.27, "grad_norm": 4.445716857910156, "learning_rate": 1.993013371969847e-05, "loss": 2.0801, "step": 20503 }, { "epoch": 0.27, "grad_norm": 4.143642902374268, "learning_rate": 1.9930121319574647e-05, "loss": 2.1663, "step": 20504 }, { "epoch": 0.27, "grad_norm": 3.6788508892059326, "learning_rate": 1.993010891835437e-05, "loss": 1.9869, "step": 20505 }, { "epoch": 0.27, "grad_norm": 3.923358917236328, "learning_rate": 1.9930096516037645e-05, "loss": 1.7914, "step": 20506 }, { "epoch": 0.27, "grad_norm": 3.8589751720428467, "learning_rate": 1.9930084112624466e-05, "loss": 2.3062, "step": 20507 }, { "epoch": 0.27, "grad_norm": 4.499903678894043, "learning_rate": 1.9930071708114836e-05, "loss": 2.4463, "step": 20508 }, { "epoch": 0.27, "grad_norm": 4.1057939529418945, "learning_rate": 1.993005930250876e-05, "loss": 2.3635, "step": 20509 }, { "epoch": 0.27, "grad_norm": 3.838341474533081, "learning_rate": 1.9930046895806236e-05, "loss": 1.9031, "step": 20510 }, { "epoch": 0.27, "grad_norm": 4.599070072174072, "learning_rate": 1.9930034488007265e-05, "loss": 2.3448, "step": 20511 }, { "epoch": 0.27, "grad_norm": 3.9950969219207764, "learning_rate": 1.993002207911185e-05, "loss": 1.9294, "step": 20512 }, { "epoch": 0.27, "grad_norm": 3.932400941848755, "learning_rate": 1.9930009669119997e-05, "loss": 2.3505, "step": 20513 }, { "epoch": 0.27, "grad_norm": 4.906001091003418, "learning_rate": 1.99299972580317e-05, "loss": 2.9435, "step": 20514 }, { "epoch": 0.27, "grad_norm": 3.692974328994751, "learning_rate": 1.992998484584696e-05, "loss": 1.7429, "step": 20515 }, { "epoch": 0.27, "grad_norm": 4.329985618591309, "learning_rate": 1.992997243256578e-05, "loss": 2.1676, "step": 20516 }, { "epoch": 0.27, "grad_norm": 4.203260898590088, "learning_rate": 1.9929960018188167e-05, "loss": 2.0488, "step": 20517 }, { "epoch": 0.27, "grad_norm": 3.785921812057495, "learning_rate": 1.9929947602714116e-05, "loss": 1.7321, "step": 20518 }, { "epoch": 0.27, "grad_norm": 3.752469301223755, "learning_rate": 1.992993518614363e-05, "loss": 2.3156, "step": 20519 }, { "epoch": 0.27, "grad_norm": 4.009686470031738, "learning_rate": 1.9929922768476714e-05, "loss": 2.2201, "step": 20520 }, { "epoch": 0.27, "grad_norm": 4.529994964599609, "learning_rate": 1.9929910349713362e-05, "loss": 2.8, "step": 20521 }, { "epoch": 0.27, "grad_norm": 4.435249328613281, "learning_rate": 1.992989792985358e-05, "loss": 2.3449, "step": 20522 }, { "epoch": 0.27, "grad_norm": 3.817389965057373, "learning_rate": 1.992988550889737e-05, "loss": 1.9017, "step": 20523 }, { "epoch": 0.27, "grad_norm": 4.282207012176514, "learning_rate": 1.992987308684473e-05, "loss": 2.0905, "step": 20524 }, { "epoch": 0.27, "grad_norm": 4.4438862800598145, "learning_rate": 1.9929860663695665e-05, "loss": 2.1725, "step": 20525 }, { "epoch": 0.27, "grad_norm": 3.9230616092681885, "learning_rate": 1.9929848239450175e-05, "loss": 2.1149, "step": 20526 }, { "epoch": 0.27, "grad_norm": 3.81099009513855, "learning_rate": 1.9929835814108264e-05, "loss": 1.9748, "step": 20527 }, { "epoch": 0.27, "grad_norm": 3.7266640663146973, "learning_rate": 1.9929823387669927e-05, "loss": 1.9021, "step": 20528 }, { "epoch": 0.27, "grad_norm": 4.320706844329834, "learning_rate": 1.992981096013517e-05, "loss": 2.0906, "step": 20529 }, { "epoch": 0.27, "grad_norm": 3.941154956817627, "learning_rate": 1.9929798531503994e-05, "loss": 2.2536, "step": 20530 }, { "epoch": 0.27, "grad_norm": 4.303915023803711, "learning_rate": 1.99297861017764e-05, "loss": 2.7018, "step": 20531 }, { "epoch": 0.27, "grad_norm": 4.5443115234375, "learning_rate": 1.9929773670952392e-05, "loss": 2.6109, "step": 20532 }, { "epoch": 0.27, "grad_norm": 3.4073445796966553, "learning_rate": 1.9929761239031963e-05, "loss": 1.6786, "step": 20533 }, { "epoch": 0.27, "grad_norm": 4.2985029220581055, "learning_rate": 1.992974880601512e-05, "loss": 2.1679, "step": 20534 }, { "epoch": 0.27, "grad_norm": 4.933589458465576, "learning_rate": 1.9929736371901865e-05, "loss": 2.4716, "step": 20535 }, { "epoch": 0.27, "grad_norm": 3.7593870162963867, "learning_rate": 1.99297239366922e-05, "loss": 1.7747, "step": 20536 }, { "epoch": 0.27, "grad_norm": 3.6547889709472656, "learning_rate": 1.9929711500386126e-05, "loss": 1.8008, "step": 20537 }, { "epoch": 0.27, "grad_norm": 4.334352970123291, "learning_rate": 1.9929699062983643e-05, "loss": 2.1435, "step": 20538 }, { "epoch": 0.27, "grad_norm": 3.9885940551757812, "learning_rate": 1.9929686624484752e-05, "loss": 1.9291, "step": 20539 }, { "epoch": 0.27, "grad_norm": 4.446257591247559, "learning_rate": 1.9929674184889456e-05, "loss": 2.2341, "step": 20540 }, { "epoch": 0.27, "grad_norm": 3.919781446456909, "learning_rate": 1.9929661744197753e-05, "loss": 2.094, "step": 20541 }, { "epoch": 0.27, "grad_norm": 4.484367370605469, "learning_rate": 1.992964930240965e-05, "loss": 2.0185, "step": 20542 }, { "epoch": 0.27, "grad_norm": 3.9183149337768555, "learning_rate": 1.9929636859525143e-05, "loss": 2.061, "step": 20543 }, { "epoch": 0.27, "grad_norm": 3.8506922721862793, "learning_rate": 1.992962441554424e-05, "loss": 2.0651, "step": 20544 }, { "epoch": 0.27, "grad_norm": 3.8818624019622803, "learning_rate": 1.9929611970466932e-05, "loss": 1.9834, "step": 20545 }, { "epoch": 0.27, "grad_norm": 4.3422675132751465, "learning_rate": 1.992959952429323e-05, "loss": 2.1554, "step": 20546 }, { "epoch": 0.27, "grad_norm": 3.9743661880493164, "learning_rate": 1.992958707702313e-05, "loss": 1.9471, "step": 20547 }, { "epoch": 0.27, "grad_norm": 4.354536533355713, "learning_rate": 1.9929574628656634e-05, "loss": 2.3558, "step": 20548 }, { "epoch": 0.27, "grad_norm": 3.790682554244995, "learning_rate": 1.9929562179193746e-05, "loss": 1.7847, "step": 20549 }, { "epoch": 0.27, "grad_norm": 3.763404130935669, "learning_rate": 1.992954972863447e-05, "loss": 1.6698, "step": 20550 }, { "epoch": 0.27, "grad_norm": 3.8832454681396484, "learning_rate": 1.9929537276978795e-05, "loss": 1.7876, "step": 20551 }, { "epoch": 0.27, "grad_norm": 3.671642303466797, "learning_rate": 1.992952482422674e-05, "loss": 1.703, "step": 20552 }, { "epoch": 0.27, "grad_norm": 3.4024152755737305, "learning_rate": 1.9929512370378288e-05, "loss": 1.6445, "step": 20553 }, { "epoch": 0.27, "grad_norm": 4.528887748718262, "learning_rate": 1.9929499915433453e-05, "loss": 2.2658, "step": 20554 }, { "epoch": 0.27, "grad_norm": 3.2869391441345215, "learning_rate": 1.9929487459392236e-05, "loss": 1.7502, "step": 20555 }, { "epoch": 0.27, "grad_norm": 4.59214448928833, "learning_rate": 1.992947500225463e-05, "loss": 2.4203, "step": 20556 }, { "epoch": 0.27, "grad_norm": 4.653956890106201, "learning_rate": 1.9929462544020645e-05, "loss": 2.0621, "step": 20557 }, { "epoch": 0.27, "grad_norm": 4.191395282745361, "learning_rate": 1.9929450084690277e-05, "loss": 2.2203, "step": 20558 }, { "epoch": 0.27, "grad_norm": 3.6374340057373047, "learning_rate": 1.992943762426353e-05, "loss": 1.6208, "step": 20559 }, { "epoch": 0.27, "grad_norm": 4.667777061462402, "learning_rate": 1.9929425162740404e-05, "loss": 2.5923, "step": 20560 }, { "epoch": 0.27, "grad_norm": 4.694171905517578, "learning_rate": 1.9929412700120902e-05, "loss": 2.6828, "step": 20561 }, { "epoch": 0.27, "grad_norm": 3.7772176265716553, "learning_rate": 1.9929400236405025e-05, "loss": 1.6706, "step": 20562 }, { "epoch": 0.27, "grad_norm": 3.8241827487945557, "learning_rate": 1.992938777159277e-05, "loss": 2.1334, "step": 20563 }, { "epoch": 0.27, "grad_norm": 4.196652889251709, "learning_rate": 1.9929375305684146e-05, "loss": 1.9533, "step": 20564 }, { "epoch": 0.27, "grad_norm": 4.621365070343018, "learning_rate": 1.9929362838679148e-05, "loss": 2.2325, "step": 20565 }, { "epoch": 0.27, "grad_norm": 4.57261848449707, "learning_rate": 1.9929350370577784e-05, "loss": 2.3265, "step": 20566 }, { "epoch": 0.27, "grad_norm": 4.203921318054199, "learning_rate": 1.992933790138005e-05, "loss": 1.9919, "step": 20567 }, { "epoch": 0.27, "grad_norm": 3.696706533432007, "learning_rate": 1.9929325431085944e-05, "loss": 1.7943, "step": 20568 }, { "epoch": 0.27, "grad_norm": 4.789219379425049, "learning_rate": 1.9929312959695477e-05, "loss": 2.5592, "step": 20569 }, { "epoch": 0.27, "grad_norm": 4.182389736175537, "learning_rate": 1.9929300487208644e-05, "loss": 2.1454, "step": 20570 }, { "epoch": 0.27, "grad_norm": 4.170487880706787, "learning_rate": 1.9929288013625446e-05, "loss": 2.3934, "step": 20571 }, { "epoch": 0.27, "grad_norm": 4.7717204093933105, "learning_rate": 1.9929275538945886e-05, "loss": 2.1091, "step": 20572 }, { "epoch": 0.27, "grad_norm": 4.245279312133789, "learning_rate": 1.9929263063169967e-05, "loss": 2.4568, "step": 20573 }, { "epoch": 0.27, "grad_norm": 4.652575969696045, "learning_rate": 1.992925058629769e-05, "loss": 2.3057, "step": 20574 }, { "epoch": 0.27, "grad_norm": 3.9790802001953125, "learning_rate": 1.9929238108329056e-05, "loss": 1.928, "step": 20575 }, { "epoch": 0.27, "grad_norm": 4.459077835083008, "learning_rate": 1.992922562926406e-05, "loss": 2.3828, "step": 20576 }, { "epoch": 0.27, "grad_norm": 4.299531936645508, "learning_rate": 1.9929213149102716e-05, "loss": 2.1796, "step": 20577 }, { "epoch": 0.27, "grad_norm": 3.930121660232544, "learning_rate": 1.9929200667845013e-05, "loss": 1.9594, "step": 20578 }, { "epoch": 0.27, "grad_norm": 3.9208731651306152, "learning_rate": 1.9929188185490962e-05, "loss": 2.159, "step": 20579 }, { "epoch": 0.27, "grad_norm": 4.556215763092041, "learning_rate": 1.992917570204056e-05, "loss": 2.1706, "step": 20580 }, { "epoch": 0.27, "grad_norm": 3.9735331535339355, "learning_rate": 1.9929163217493803e-05, "loss": 2.0192, "step": 20581 }, { "epoch": 0.27, "grad_norm": 3.693052053451538, "learning_rate": 1.9929150731850705e-05, "loss": 1.7233, "step": 20582 }, { "epoch": 0.27, "grad_norm": 3.9186019897460938, "learning_rate": 1.9929138245111256e-05, "loss": 1.976, "step": 20583 }, { "epoch": 0.27, "grad_norm": 3.4613454341888428, "learning_rate": 1.992912575727546e-05, "loss": 1.5649, "step": 20584 }, { "epoch": 0.27, "grad_norm": 4.432916641235352, "learning_rate": 1.9929113268343325e-05, "loss": 2.1693, "step": 20585 }, { "epoch": 0.27, "grad_norm": 3.730508804321289, "learning_rate": 1.9929100778314843e-05, "loss": 2.0282, "step": 20586 }, { "epoch": 0.27, "grad_norm": 4.476165294647217, "learning_rate": 1.9929088287190023e-05, "loss": 2.1623, "step": 20587 }, { "epoch": 0.27, "grad_norm": 4.374103546142578, "learning_rate": 1.9929075794968865e-05, "loss": 2.1538, "step": 20588 }, { "epoch": 0.27, "grad_norm": 4.216711521148682, "learning_rate": 1.9929063301651364e-05, "loss": 2.4084, "step": 20589 }, { "epoch": 0.27, "grad_norm": 4.606785297393799, "learning_rate": 1.992905080723753e-05, "loss": 1.9153, "step": 20590 }, { "epoch": 0.27, "grad_norm": 4.137598991394043, "learning_rate": 1.9929038311727358e-05, "loss": 2.212, "step": 20591 }, { "epoch": 0.27, "grad_norm": 3.259577751159668, "learning_rate": 1.992902581512085e-05, "loss": 1.6322, "step": 20592 }, { "epoch": 0.27, "grad_norm": 3.54489803314209, "learning_rate": 1.992901331741801e-05, "loss": 1.5546, "step": 20593 }, { "epoch": 0.27, "grad_norm": 3.0941991806030273, "learning_rate": 1.9929000818618843e-05, "loss": 1.3658, "step": 20594 }, { "epoch": 0.27, "grad_norm": 3.9906973838806152, "learning_rate": 1.9928988318723342e-05, "loss": 2.0521, "step": 20595 }, { "epoch": 0.27, "grad_norm": 4.120047569274902, "learning_rate": 1.992897581773151e-05, "loss": 1.9031, "step": 20596 }, { "epoch": 0.27, "grad_norm": 4.674707412719727, "learning_rate": 1.9928963315643352e-05, "loss": 2.3392, "step": 20597 }, { "epoch": 0.27, "grad_norm": 3.8908870220184326, "learning_rate": 1.992895081245887e-05, "loss": 1.7551, "step": 20598 }, { "epoch": 0.27, "grad_norm": 3.2591476440429688, "learning_rate": 1.9928938308178063e-05, "loss": 1.5029, "step": 20599 }, { "epoch": 0.27, "grad_norm": 3.607522487640381, "learning_rate": 1.9928925802800934e-05, "loss": 1.8174, "step": 20600 }, { "epoch": 0.27, "grad_norm": 4.06835412979126, "learning_rate": 1.992891329632748e-05, "loss": 2.171, "step": 20601 }, { "epoch": 0.27, "grad_norm": 4.320252418518066, "learning_rate": 1.992890078875771e-05, "loss": 2.0859, "step": 20602 }, { "epoch": 0.27, "grad_norm": 4.167845249176025, "learning_rate": 1.9928888280091616e-05, "loss": 2.0822, "step": 20603 }, { "epoch": 0.27, "grad_norm": 4.483940124511719, "learning_rate": 1.9928875770329204e-05, "loss": 2.0767, "step": 20604 }, { "epoch": 0.27, "grad_norm": 3.767786979675293, "learning_rate": 1.992886325947048e-05, "loss": 2.1207, "step": 20605 }, { "epoch": 0.27, "grad_norm": 4.703094959259033, "learning_rate": 1.992885074751544e-05, "loss": 2.5437, "step": 20606 }, { "epoch": 0.27, "grad_norm": 4.218435287475586, "learning_rate": 1.9928838234464085e-05, "loss": 2.4385, "step": 20607 }, { "epoch": 0.27, "grad_norm": 4.277518272399902, "learning_rate": 1.9928825720316415e-05, "loss": 1.8646, "step": 20608 }, { "epoch": 0.27, "grad_norm": 4.141196250915527, "learning_rate": 1.992881320507244e-05, "loss": 2.0085, "step": 20609 }, { "epoch": 0.27, "grad_norm": 3.686041831970215, "learning_rate": 1.9928800688732154e-05, "loss": 1.9523, "step": 20610 }, { "epoch": 0.27, "grad_norm": 4.102981090545654, "learning_rate": 1.992878817129556e-05, "loss": 2.0291, "step": 20611 }, { "epoch": 0.27, "grad_norm": 4.3407182693481445, "learning_rate": 1.9928775652762654e-05, "loss": 2.4367, "step": 20612 }, { "epoch": 0.27, "grad_norm": 4.491166591644287, "learning_rate": 1.9928763133133447e-05, "loss": 2.2959, "step": 20613 }, { "epoch": 0.27, "grad_norm": 4.509035587310791, "learning_rate": 1.9928750612407936e-05, "loss": 2.3699, "step": 20614 }, { "epoch": 0.27, "grad_norm": 4.452573776245117, "learning_rate": 1.9928738090586127e-05, "loss": 2.2108, "step": 20615 }, { "epoch": 0.27, "grad_norm": 4.1763176918029785, "learning_rate": 1.992872556766801e-05, "loss": 2.1558, "step": 20616 }, { "epoch": 0.27, "grad_norm": 4.708856582641602, "learning_rate": 1.9928713043653595e-05, "loss": 2.6479, "step": 20617 }, { "epoch": 0.27, "grad_norm": 4.652231693267822, "learning_rate": 1.9928700518542882e-05, "loss": 2.6311, "step": 20618 }, { "epoch": 0.27, "grad_norm": 4.540732383728027, "learning_rate": 1.9928687992335875e-05, "loss": 1.9811, "step": 20619 }, { "epoch": 0.27, "grad_norm": 3.914177656173706, "learning_rate": 1.992867546503257e-05, "loss": 2.2961, "step": 20620 }, { "epoch": 0.27, "grad_norm": 4.26456356048584, "learning_rate": 1.9928662936632967e-05, "loss": 2.3141, "step": 20621 }, { "epoch": 0.27, "grad_norm": 3.8814589977264404, "learning_rate": 1.9928650407137077e-05, "loss": 2.3544, "step": 20622 }, { "epoch": 0.27, "grad_norm": 4.277149200439453, "learning_rate": 1.9928637876544892e-05, "loss": 2.0141, "step": 20623 }, { "epoch": 0.27, "grad_norm": 4.36476469039917, "learning_rate": 1.992862534485642e-05, "loss": 2.2241, "step": 20624 }, { "epoch": 0.27, "grad_norm": 3.890117645263672, "learning_rate": 1.9928612812071656e-05, "loss": 1.92, "step": 20625 }, { "epoch": 0.27, "grad_norm": 4.12051248550415, "learning_rate": 1.9928600278190605e-05, "loss": 1.8048, "step": 20626 }, { "epoch": 0.27, "grad_norm": 4.604636192321777, "learning_rate": 1.9928587743213273e-05, "loss": 2.297, "step": 20627 }, { "epoch": 0.27, "grad_norm": 3.765314817428589, "learning_rate": 1.992857520713965e-05, "loss": 2.3496, "step": 20628 }, { "epoch": 0.27, "grad_norm": 4.033431053161621, "learning_rate": 1.9928562669969746e-05, "loss": 1.8211, "step": 20629 }, { "epoch": 0.27, "grad_norm": 4.463249206542969, "learning_rate": 1.992855013170356e-05, "loss": 2.5209, "step": 20630 }, { "epoch": 0.27, "grad_norm": 3.940340042114258, "learning_rate": 1.9928537592341096e-05, "loss": 1.8457, "step": 20631 }, { "epoch": 0.27, "grad_norm": 4.406947612762451, "learning_rate": 1.9928525051882353e-05, "loss": 2.207, "step": 20632 }, { "epoch": 0.27, "grad_norm": 4.194075107574463, "learning_rate": 1.992851251032733e-05, "loss": 2.1308, "step": 20633 }, { "epoch": 0.27, "grad_norm": 3.980581045150757, "learning_rate": 1.992849996767603e-05, "loss": 1.8491, "step": 20634 }, { "epoch": 0.27, "grad_norm": 4.507056713104248, "learning_rate": 1.992848742392846e-05, "loss": 2.5073, "step": 20635 }, { "epoch": 0.27, "grad_norm": 3.51635479927063, "learning_rate": 1.9928474879084613e-05, "loss": 1.6761, "step": 20636 }, { "epoch": 0.27, "grad_norm": 3.6978700160980225, "learning_rate": 1.9928462333144492e-05, "loss": 1.9189, "step": 20637 }, { "epoch": 0.27, "grad_norm": 4.407719135284424, "learning_rate": 1.9928449786108105e-05, "loss": 2.637, "step": 20638 }, { "epoch": 0.27, "grad_norm": 4.409525394439697, "learning_rate": 1.9928437237975443e-05, "loss": 2.0768, "step": 20639 }, { "epoch": 0.27, "grad_norm": 4.013024806976318, "learning_rate": 1.992842468874652e-05, "loss": 1.9474, "step": 20640 }, { "epoch": 0.27, "grad_norm": 3.802823305130005, "learning_rate": 1.9928412138421325e-05, "loss": 1.7743, "step": 20641 }, { "epoch": 0.27, "grad_norm": 4.526016712188721, "learning_rate": 1.9928399586999868e-05, "loss": 2.3916, "step": 20642 }, { "epoch": 0.27, "grad_norm": 4.249054431915283, "learning_rate": 1.9928387034482147e-05, "loss": 2.3234, "step": 20643 }, { "epoch": 0.27, "grad_norm": 4.2745747566223145, "learning_rate": 1.992837448086816e-05, "loss": 2.0274, "step": 20644 }, { "epoch": 0.27, "grad_norm": 4.461627960205078, "learning_rate": 1.9928361926157914e-05, "loss": 2.3293, "step": 20645 }, { "epoch": 0.27, "grad_norm": 4.270464897155762, "learning_rate": 1.9928349370351408e-05, "loss": 1.9927, "step": 20646 }, { "epoch": 0.27, "grad_norm": 4.393044948577881, "learning_rate": 1.992833681344865e-05, "loss": 2.3776, "step": 20647 }, { "epoch": 0.27, "grad_norm": 4.346761226654053, "learning_rate": 1.992832425544963e-05, "loss": 2.1507, "step": 20648 }, { "epoch": 0.27, "grad_norm": 3.5150740146636963, "learning_rate": 1.9928311696354355e-05, "loss": 2.0087, "step": 20649 }, { "epoch": 0.27, "grad_norm": 3.7504565715789795, "learning_rate": 1.9928299136162824e-05, "loss": 1.6941, "step": 20650 }, { "epoch": 0.27, "grad_norm": 3.8271079063415527, "learning_rate": 1.992828657487504e-05, "loss": 1.7078, "step": 20651 }, { "epoch": 0.27, "grad_norm": 4.480051517486572, "learning_rate": 1.9928274012491008e-05, "loss": 2.3182, "step": 20652 }, { "epoch": 0.27, "grad_norm": 4.6764044761657715, "learning_rate": 1.9928261449010726e-05, "loss": 1.9354, "step": 20653 }, { "epoch": 0.27, "grad_norm": 4.097129821777344, "learning_rate": 1.992824888443419e-05, "loss": 2.07, "step": 20654 }, { "epoch": 0.27, "grad_norm": 3.835646629333496, "learning_rate": 1.9928236318761414e-05, "loss": 2.0783, "step": 20655 }, { "epoch": 0.27, "grad_norm": 4.0526123046875, "learning_rate": 1.9928223751992388e-05, "loss": 2.0089, "step": 20656 }, { "epoch": 0.27, "grad_norm": 4.323996543884277, "learning_rate": 1.992821118412712e-05, "loss": 2.0558, "step": 20657 }, { "epoch": 0.27, "grad_norm": 3.960052490234375, "learning_rate": 1.9928198615165607e-05, "loss": 2.1238, "step": 20658 }, { "epoch": 0.27, "grad_norm": 4.108084201812744, "learning_rate": 1.9928186045107857e-05, "loss": 2.1877, "step": 20659 }, { "epoch": 0.27, "grad_norm": 4.81778621673584, "learning_rate": 1.992817347395386e-05, "loss": 2.1522, "step": 20660 }, { "epoch": 0.27, "grad_norm": 4.493044853210449, "learning_rate": 1.992816090170363e-05, "loss": 2.3927, "step": 20661 }, { "epoch": 0.27, "grad_norm": 3.7140026092529297, "learning_rate": 1.992814832835716e-05, "loss": 1.8857, "step": 20662 }, { "epoch": 0.27, "grad_norm": 4.065035820007324, "learning_rate": 1.9928135753914454e-05, "loss": 2.0269, "step": 20663 }, { "epoch": 0.27, "grad_norm": 3.710956573486328, "learning_rate": 1.9928123178375512e-05, "loss": 1.8097, "step": 20664 }, { "epoch": 0.27, "grad_norm": 4.020358562469482, "learning_rate": 1.992811060174034e-05, "loss": 2.6902, "step": 20665 }, { "epoch": 0.27, "grad_norm": 4.061193943023682, "learning_rate": 1.9928098024008933e-05, "loss": 2.0362, "step": 20666 }, { "epoch": 0.27, "grad_norm": 3.677072763442993, "learning_rate": 1.99280854451813e-05, "loss": 2.0092, "step": 20667 }, { "epoch": 0.27, "grad_norm": 4.433876991271973, "learning_rate": 1.9928072865257434e-05, "loss": 2.8056, "step": 20668 }, { "epoch": 0.27, "grad_norm": 5.13379430770874, "learning_rate": 1.9928060284237342e-05, "loss": 2.6668, "step": 20669 }, { "epoch": 0.27, "grad_norm": 4.175055980682373, "learning_rate": 1.9928047702121024e-05, "loss": 1.9409, "step": 20670 }, { "epoch": 0.27, "grad_norm": 4.85922908782959, "learning_rate": 1.9928035118908476e-05, "loss": 2.3628, "step": 20671 }, { "epoch": 0.27, "grad_norm": 4.103166580200195, "learning_rate": 1.992802253459971e-05, "loss": 2.5549, "step": 20672 }, { "epoch": 0.27, "grad_norm": 4.23822021484375, "learning_rate": 1.9928009949194724e-05, "loss": 2.4187, "step": 20673 }, { "epoch": 0.27, "grad_norm": 3.707392930984497, "learning_rate": 1.9927997362693512e-05, "loss": 1.8283, "step": 20674 }, { "epoch": 0.27, "grad_norm": 3.797708034515381, "learning_rate": 1.992798477509608e-05, "loss": 2.1802, "step": 20675 }, { "epoch": 0.27, "grad_norm": 4.432938098907471, "learning_rate": 1.9927972186402434e-05, "loss": 2.7089, "step": 20676 }, { "epoch": 0.27, "grad_norm": 4.264862537384033, "learning_rate": 1.992795959661257e-05, "loss": 2.0838, "step": 20677 }, { "epoch": 0.27, "grad_norm": 3.4900083541870117, "learning_rate": 1.992794700572649e-05, "loss": 1.492, "step": 20678 }, { "epoch": 0.27, "grad_norm": 3.606372356414795, "learning_rate": 1.9927934413744196e-05, "loss": 1.5014, "step": 20679 }, { "epoch": 0.27, "grad_norm": 4.16061544418335, "learning_rate": 1.9927921820665693e-05, "loss": 2.0548, "step": 20680 }, { "epoch": 0.27, "grad_norm": 4.14194917678833, "learning_rate": 1.9927909226490976e-05, "loss": 2.117, "step": 20681 }, { "epoch": 0.27, "grad_norm": 3.776993989944458, "learning_rate": 1.992789663122005e-05, "loss": 1.9992, "step": 20682 }, { "epoch": 0.27, "grad_norm": 4.454812049865723, "learning_rate": 1.9927884034852914e-05, "loss": 2.4814, "step": 20683 }, { "epoch": 0.27, "grad_norm": 3.857128143310547, "learning_rate": 1.9927871437389576e-05, "loss": 1.8152, "step": 20684 }, { "epoch": 0.27, "grad_norm": 4.1255669593811035, "learning_rate": 1.9927858838830027e-05, "loss": 2.0978, "step": 20685 }, { "epoch": 0.27, "grad_norm": 4.238047122955322, "learning_rate": 1.9927846239174276e-05, "loss": 2.0716, "step": 20686 }, { "epoch": 0.27, "grad_norm": 4.498640060424805, "learning_rate": 1.992783363842232e-05, "loss": 2.4352, "step": 20687 }, { "epoch": 0.27, "grad_norm": 4.465196132659912, "learning_rate": 1.9927821036574168e-05, "loss": 2.409, "step": 20688 }, { "epoch": 0.27, "grad_norm": 4.599239826202393, "learning_rate": 1.9927808433629818e-05, "loss": 2.3984, "step": 20689 }, { "epoch": 0.27, "grad_norm": 4.17035436630249, "learning_rate": 1.992779582958926e-05, "loss": 1.8872, "step": 20690 }, { "epoch": 0.27, "grad_norm": 4.006468296051025, "learning_rate": 1.9927783224452512e-05, "loss": 1.981, "step": 20691 }, { "epoch": 0.27, "grad_norm": 5.209922790527344, "learning_rate": 1.9927770618219567e-05, "loss": 2.7169, "step": 20692 }, { "epoch": 0.27, "grad_norm": 3.711503028869629, "learning_rate": 1.992775801089043e-05, "loss": 1.9698, "step": 20693 }, { "epoch": 0.27, "grad_norm": 4.695430278778076, "learning_rate": 1.992774540246509e-05, "loss": 2.4468, "step": 20694 }, { "epoch": 0.27, "grad_norm": 4.642833232879639, "learning_rate": 1.9927732792943568e-05, "loss": 2.5188, "step": 20695 }, { "epoch": 0.27, "grad_norm": 4.050169467926025, "learning_rate": 1.9927720182325855e-05, "loss": 2.4806, "step": 20696 }, { "epoch": 0.27, "grad_norm": 3.647348165512085, "learning_rate": 1.9927707570611952e-05, "loss": 1.8343, "step": 20697 }, { "epoch": 0.27, "grad_norm": 4.3818182945251465, "learning_rate": 1.992769495780186e-05, "loss": 2.5164, "step": 20698 }, { "epoch": 0.27, "grad_norm": 4.075402736663818, "learning_rate": 1.9927682343895583e-05, "loss": 2.2962, "step": 20699 }, { "epoch": 0.27, "grad_norm": 3.9093329906463623, "learning_rate": 1.9927669728893123e-05, "loss": 2.1914, "step": 20700 }, { "epoch": 0.27, "grad_norm": 3.999579668045044, "learning_rate": 1.992765711279448e-05, "loss": 2.2001, "step": 20701 }, { "epoch": 0.27, "grad_norm": 4.450240612030029, "learning_rate": 1.9927644495599654e-05, "loss": 2.2775, "step": 20702 }, { "epoch": 0.27, "grad_norm": 4.224279880523682, "learning_rate": 1.992763187730865e-05, "loss": 2.4088, "step": 20703 }, { "epoch": 0.27, "grad_norm": 4.311537265777588, "learning_rate": 1.9927619257921462e-05, "loss": 2.3277, "step": 20704 }, { "epoch": 0.27, "grad_norm": 3.812007427215576, "learning_rate": 1.99276066374381e-05, "loss": 1.7635, "step": 20705 }, { "epoch": 0.27, "grad_norm": 3.9528486728668213, "learning_rate": 1.992759401585856e-05, "loss": 1.7913, "step": 20706 }, { "epoch": 0.27, "grad_norm": 4.416369438171387, "learning_rate": 1.9927581393182845e-05, "loss": 2.1468, "step": 20707 }, { "epoch": 0.27, "grad_norm": 3.617349624633789, "learning_rate": 1.9927568769410958e-05, "loss": 1.7982, "step": 20708 }, { "epoch": 0.27, "grad_norm": 3.990110158920288, "learning_rate": 1.9927556144542898e-05, "loss": 1.8131, "step": 20709 }, { "epoch": 0.27, "grad_norm": 4.19122314453125, "learning_rate": 1.992754351857867e-05, "loss": 2.2813, "step": 20710 }, { "epoch": 0.27, "grad_norm": 4.388702392578125, "learning_rate": 1.992753089151827e-05, "loss": 2.1769, "step": 20711 }, { "epoch": 0.27, "grad_norm": 4.025728225708008, "learning_rate": 1.99275182633617e-05, "loss": 2.368, "step": 20712 }, { "epoch": 0.27, "grad_norm": 3.998542547225952, "learning_rate": 1.992750563410897e-05, "loss": 1.8388, "step": 20713 }, { "epoch": 0.27, "grad_norm": 3.9365475177764893, "learning_rate": 1.9927493003760074e-05, "loss": 1.9802, "step": 20714 }, { "epoch": 0.27, "grad_norm": 3.4844274520874023, "learning_rate": 1.992748037231501e-05, "loss": 1.7744, "step": 20715 }, { "epoch": 0.27, "grad_norm": 4.131504535675049, "learning_rate": 1.9927467739773783e-05, "loss": 2.1279, "step": 20716 }, { "epoch": 0.27, "grad_norm": 4.381296634674072, "learning_rate": 1.99274551061364e-05, "loss": 2.1901, "step": 20717 }, { "epoch": 0.27, "grad_norm": 3.8931994438171387, "learning_rate": 1.9927442471402854e-05, "loss": 1.9889, "step": 20718 }, { "epoch": 0.27, "grad_norm": 3.951463460922241, "learning_rate": 1.992742983557315e-05, "loss": 1.8687, "step": 20719 }, { "epoch": 0.27, "grad_norm": 4.406893730163574, "learning_rate": 1.9927417198647293e-05, "loss": 2.3162, "step": 20720 }, { "epoch": 0.27, "grad_norm": 4.5405497550964355, "learning_rate": 1.992740456062528e-05, "loss": 1.816, "step": 20721 }, { "epoch": 0.27, "grad_norm": 5.275290012359619, "learning_rate": 1.992739192150711e-05, "loss": 2.6365, "step": 20722 }, { "epoch": 0.27, "grad_norm": 4.149313449859619, "learning_rate": 1.992737928129279e-05, "loss": 2.3081, "step": 20723 }, { "epoch": 0.27, "grad_norm": 4.569416046142578, "learning_rate": 1.9927366639982315e-05, "loss": 2.449, "step": 20724 }, { "epoch": 0.27, "grad_norm": 4.564822673797607, "learning_rate": 1.9927353997575693e-05, "loss": 2.5159, "step": 20725 }, { "epoch": 0.27, "grad_norm": 3.717954635620117, "learning_rate": 1.992734135407292e-05, "loss": 1.9342, "step": 20726 }, { "epoch": 0.27, "grad_norm": 3.728665351867676, "learning_rate": 1.9927328709474005e-05, "loss": 1.9999, "step": 20727 }, { "epoch": 0.27, "grad_norm": 3.8657827377319336, "learning_rate": 1.9927316063778943e-05, "loss": 2.0844, "step": 20728 }, { "epoch": 0.27, "grad_norm": 3.9213616847991943, "learning_rate": 1.9927303416987736e-05, "loss": 2.1815, "step": 20729 }, { "epoch": 0.27, "grad_norm": 4.478249549865723, "learning_rate": 1.9927290769100387e-05, "loss": 2.2853, "step": 20730 }, { "epoch": 0.27, "grad_norm": 4.125953197479248, "learning_rate": 1.9927278120116897e-05, "loss": 2.2905, "step": 20731 }, { "epoch": 0.27, "grad_norm": 4.110110282897949, "learning_rate": 1.9927265470037264e-05, "loss": 1.782, "step": 20732 }, { "epoch": 0.27, "grad_norm": 4.093946933746338, "learning_rate": 1.9927252818861496e-05, "loss": 2.0983, "step": 20733 }, { "epoch": 0.27, "grad_norm": 3.8253517150878906, "learning_rate": 1.992724016658959e-05, "loss": 1.9906, "step": 20734 }, { "epoch": 0.27, "grad_norm": 4.214470386505127, "learning_rate": 1.9927227513221547e-05, "loss": 2.044, "step": 20735 }, { "epoch": 0.27, "grad_norm": 3.516986131668091, "learning_rate": 1.992721485875737e-05, "loss": 1.6658, "step": 20736 }, { "epoch": 0.27, "grad_norm": 4.133295059204102, "learning_rate": 1.992720220319706e-05, "loss": 1.754, "step": 20737 }, { "epoch": 0.27, "grad_norm": 4.511711597442627, "learning_rate": 1.992718954654062e-05, "loss": 2.2289, "step": 20738 }, { "epoch": 0.27, "grad_norm": 3.5613510608673096, "learning_rate": 1.992717688878805e-05, "loss": 1.771, "step": 20739 }, { "epoch": 0.27, "grad_norm": 4.427135467529297, "learning_rate": 1.9927164229939348e-05, "loss": 2.3547, "step": 20740 }, { "epoch": 0.27, "grad_norm": 4.654850482940674, "learning_rate": 1.9927151569994522e-05, "loss": 2.1076, "step": 20741 }, { "epoch": 0.27, "grad_norm": 4.166781425476074, "learning_rate": 1.9927138908953568e-05, "loss": 2.0051, "step": 20742 }, { "epoch": 0.27, "grad_norm": 4.20916223526001, "learning_rate": 1.9927126246816492e-05, "loss": 1.959, "step": 20743 }, { "epoch": 0.27, "grad_norm": 4.175927639007568, "learning_rate": 1.992711358358329e-05, "loss": 2.0264, "step": 20744 }, { "epoch": 0.27, "grad_norm": 3.4234156608581543, "learning_rate": 1.992710091925397e-05, "loss": 1.6817, "step": 20745 }, { "epoch": 0.27, "grad_norm": 4.036995887756348, "learning_rate": 1.9927088253828525e-05, "loss": 2.3054, "step": 20746 }, { "epoch": 0.27, "grad_norm": 3.8740170001983643, "learning_rate": 1.9927075587306963e-05, "loss": 2.1541, "step": 20747 }, { "epoch": 0.27, "grad_norm": 3.5106120109558105, "learning_rate": 1.9927062919689282e-05, "loss": 1.6255, "step": 20748 }, { "epoch": 0.27, "grad_norm": 3.955368757247925, "learning_rate": 1.9927050250975487e-05, "loss": 2.3667, "step": 20749 }, { "epoch": 0.27, "grad_norm": 4.312599182128906, "learning_rate": 1.9927037581165577e-05, "loss": 2.4051, "step": 20750 }, { "epoch": 0.27, "grad_norm": 4.463983058929443, "learning_rate": 1.9927024910259552e-05, "loss": 2.3929, "step": 20751 }, { "epoch": 0.27, "grad_norm": 4.284254550933838, "learning_rate": 1.992701223825742e-05, "loss": 2.1437, "step": 20752 }, { "epoch": 0.27, "grad_norm": 4.470081806182861, "learning_rate": 1.9926999565159174e-05, "loss": 2.2474, "step": 20753 }, { "epoch": 0.27, "grad_norm": 3.903907060623169, "learning_rate": 1.992698689096482e-05, "loss": 1.8899, "step": 20754 }, { "epoch": 0.27, "grad_norm": 4.472834587097168, "learning_rate": 1.9926974215674354e-05, "loss": 2.3579, "step": 20755 }, { "epoch": 0.27, "grad_norm": 4.359817981719971, "learning_rate": 1.9926961539287782e-05, "loss": 2.3556, "step": 20756 }, { "epoch": 0.27, "grad_norm": 4.528748989105225, "learning_rate": 1.992694886180511e-05, "loss": 2.6752, "step": 20757 }, { "epoch": 0.27, "grad_norm": 4.619653701782227, "learning_rate": 1.992693618322633e-05, "loss": 2.6584, "step": 20758 }, { "epoch": 0.27, "grad_norm": 4.18324089050293, "learning_rate": 1.9926923503551448e-05, "loss": 2.5379, "step": 20759 }, { "epoch": 0.27, "grad_norm": 4.090983867645264, "learning_rate": 1.992691082278047e-05, "loss": 2.0662, "step": 20760 }, { "epoch": 0.27, "grad_norm": 3.9024529457092285, "learning_rate": 1.992689814091339e-05, "loss": 1.6704, "step": 20761 }, { "epoch": 0.27, "grad_norm": 4.080053329467773, "learning_rate": 1.9926885457950212e-05, "loss": 2.1645, "step": 20762 }, { "epoch": 0.27, "grad_norm": 3.9356815814971924, "learning_rate": 1.9926872773890938e-05, "loss": 1.8727, "step": 20763 }, { "epoch": 0.27, "grad_norm": 3.406219482421875, "learning_rate": 1.9926860088735566e-05, "loss": 1.7218, "step": 20764 }, { "epoch": 0.27, "grad_norm": 3.934832811355591, "learning_rate": 1.9926847402484102e-05, "loss": 1.9476, "step": 20765 }, { "epoch": 0.27, "grad_norm": 3.6282200813293457, "learning_rate": 1.9926834715136544e-05, "loss": 1.7416, "step": 20766 }, { "epoch": 0.27, "grad_norm": 4.056249141693115, "learning_rate": 1.99268220266929e-05, "loss": 1.9905, "step": 20767 }, { "epoch": 0.27, "grad_norm": 4.4672417640686035, "learning_rate": 1.9926809337153162e-05, "loss": 1.9483, "step": 20768 }, { "epoch": 0.27, "grad_norm": 3.795163631439209, "learning_rate": 1.9926796646517338e-05, "loss": 2.1122, "step": 20769 }, { "epoch": 0.27, "grad_norm": 3.983409881591797, "learning_rate": 1.9926783954785425e-05, "loss": 1.881, "step": 20770 }, { "epoch": 0.27, "grad_norm": 4.3238067626953125, "learning_rate": 1.992677126195743e-05, "loss": 1.9779, "step": 20771 }, { "epoch": 0.27, "grad_norm": 3.832477331161499, "learning_rate": 1.9926758568033348e-05, "loss": 1.8683, "step": 20772 }, { "epoch": 0.27, "grad_norm": 3.8152542114257812, "learning_rate": 1.9926745873013186e-05, "loss": 2.0693, "step": 20773 }, { "epoch": 0.27, "grad_norm": 4.191330909729004, "learning_rate": 1.9926733176896936e-05, "loss": 2.0407, "step": 20774 }, { "epoch": 0.27, "grad_norm": 3.6118321418762207, "learning_rate": 1.9926720479684612e-05, "loss": 2.1293, "step": 20775 }, { "epoch": 0.27, "grad_norm": 4.716482639312744, "learning_rate": 1.992670778137621e-05, "loss": 2.1816, "step": 20776 }, { "epoch": 0.27, "grad_norm": 4.50014591217041, "learning_rate": 1.9926695081971732e-05, "loss": 2.1538, "step": 20777 }, { "epoch": 0.27, "grad_norm": 3.5077133178710938, "learning_rate": 1.9926682381471175e-05, "loss": 1.6917, "step": 20778 }, { "epoch": 0.27, "grad_norm": 3.902352809906006, "learning_rate": 1.9926669679874548e-05, "loss": 1.9636, "step": 20779 }, { "epoch": 0.27, "grad_norm": 4.584418773651123, "learning_rate": 1.9926656977181843e-05, "loss": 2.5788, "step": 20780 }, { "epoch": 0.27, "grad_norm": 3.429516553878784, "learning_rate": 1.992664427339307e-05, "loss": 1.7314, "step": 20781 }, { "epoch": 0.27, "grad_norm": 3.831568956375122, "learning_rate": 1.9926631568508227e-05, "loss": 1.8907, "step": 20782 }, { "epoch": 0.27, "grad_norm": 4.025382995605469, "learning_rate": 1.9926618862527314e-05, "loss": 1.8662, "step": 20783 }, { "epoch": 0.27, "grad_norm": 4.715109348297119, "learning_rate": 1.9926606155450336e-05, "loss": 2.8504, "step": 20784 }, { "epoch": 0.27, "grad_norm": 4.296727180480957, "learning_rate": 1.992659344727729e-05, "loss": 2.5708, "step": 20785 }, { "epoch": 0.27, "grad_norm": 4.517719745635986, "learning_rate": 1.992658073800818e-05, "loss": 2.4778, "step": 20786 }, { "epoch": 0.27, "grad_norm": 3.8517165184020996, "learning_rate": 1.992656802764301e-05, "loss": 1.9519, "step": 20787 }, { "epoch": 0.27, "grad_norm": 4.604565143585205, "learning_rate": 1.9926555316181777e-05, "loss": 2.0405, "step": 20788 }, { "epoch": 0.27, "grad_norm": 3.7151153087615967, "learning_rate": 1.9926542603624482e-05, "loss": 2.1704, "step": 20789 }, { "epoch": 0.27, "grad_norm": 4.205578804016113, "learning_rate": 1.992652988997113e-05, "loss": 2.1042, "step": 20790 }, { "epoch": 0.27, "grad_norm": 4.680781841278076, "learning_rate": 1.992651717522172e-05, "loss": 2.3174, "step": 20791 }, { "epoch": 0.27, "grad_norm": 4.4686503410339355, "learning_rate": 1.9926504459376254e-05, "loss": 1.9645, "step": 20792 }, { "epoch": 0.27, "grad_norm": 3.9784369468688965, "learning_rate": 1.9926491742434734e-05, "loss": 2.0296, "step": 20793 }, { "epoch": 0.27, "grad_norm": 4.0805983543396, "learning_rate": 1.992647902439716e-05, "loss": 1.9808, "step": 20794 }, { "epoch": 0.27, "grad_norm": 3.9151415824890137, "learning_rate": 1.9926466305263535e-05, "loss": 2.1114, "step": 20795 }, { "epoch": 0.27, "grad_norm": 3.8483762741088867, "learning_rate": 1.9926453585033864e-05, "loss": 2.088, "step": 20796 }, { "epoch": 0.27, "grad_norm": 4.536091327667236, "learning_rate": 1.9926440863708135e-05, "loss": 2.0586, "step": 20797 }, { "epoch": 0.27, "grad_norm": 3.923110008239746, "learning_rate": 1.9926428141286366e-05, "loss": 2.2096, "step": 20798 }, { "epoch": 0.27, "grad_norm": 4.005709171295166, "learning_rate": 1.992641541776855e-05, "loss": 2.3243, "step": 20799 }, { "epoch": 0.27, "grad_norm": 4.055739402770996, "learning_rate": 1.9926402693154686e-05, "loss": 2.2549, "step": 20800 }, { "epoch": 0.27, "grad_norm": 4.271767616271973, "learning_rate": 1.992638996744478e-05, "loss": 2.5132, "step": 20801 }, { "epoch": 0.27, "grad_norm": 3.965733766555786, "learning_rate": 1.992637724063883e-05, "loss": 2.1357, "step": 20802 }, { "epoch": 0.27, "grad_norm": 4.923651695251465, "learning_rate": 1.9926364512736844e-05, "loss": 2.4616, "step": 20803 }, { "epoch": 0.27, "grad_norm": 4.370015621185303, "learning_rate": 1.992635178373882e-05, "loss": 2.3976, "step": 20804 }, { "epoch": 0.27, "grad_norm": 3.9319400787353516, "learning_rate": 1.992633905364475e-05, "loss": 2.0941, "step": 20805 }, { "epoch": 0.27, "grad_norm": 3.7018802165985107, "learning_rate": 1.9926326322454652e-05, "loss": 1.7236, "step": 20806 }, { "epoch": 0.27, "grad_norm": 4.014040946960449, "learning_rate": 1.9926313590168517e-05, "loss": 1.8446, "step": 20807 }, { "epoch": 0.27, "grad_norm": 4.069202423095703, "learning_rate": 1.9926300856786348e-05, "loss": 2.0708, "step": 20808 }, { "epoch": 0.27, "grad_norm": 4.113284587860107, "learning_rate": 1.9926288122308146e-05, "loss": 2.1944, "step": 20809 }, { "epoch": 0.27, "grad_norm": 3.619112968444824, "learning_rate": 1.9926275386733916e-05, "loss": 1.7303, "step": 20810 }, { "epoch": 0.27, "grad_norm": 3.636805534362793, "learning_rate": 1.9926262650063654e-05, "loss": 1.7327, "step": 20811 }, { "epoch": 0.27, "grad_norm": 3.69462251663208, "learning_rate": 1.9926249912297364e-05, "loss": 2.079, "step": 20812 }, { "epoch": 0.27, "grad_norm": 4.087643623352051, "learning_rate": 1.9926237173435048e-05, "loss": 2.3262, "step": 20813 }, { "epoch": 0.27, "grad_norm": 4.075960636138916, "learning_rate": 1.9926224433476712e-05, "loss": 2.115, "step": 20814 }, { "epoch": 0.27, "grad_norm": 4.340150356292725, "learning_rate": 1.9926211692422345e-05, "loss": 2.2408, "step": 20815 }, { "epoch": 0.27, "grad_norm": 4.05087423324585, "learning_rate": 1.992619895027196e-05, "loss": 2.3911, "step": 20816 }, { "epoch": 0.27, "grad_norm": 4.268445014953613, "learning_rate": 1.9926186207025553e-05, "loss": 2.4288, "step": 20817 }, { "epoch": 0.27, "grad_norm": 3.9643776416778564, "learning_rate": 1.9926173462683127e-05, "loss": 2.0022, "step": 20818 }, { "epoch": 0.27, "grad_norm": 3.6477532386779785, "learning_rate": 1.9926160717244684e-05, "loss": 1.8637, "step": 20819 }, { "epoch": 0.27, "grad_norm": 3.3533055782318115, "learning_rate": 1.9926147970710224e-05, "loss": 1.6221, "step": 20820 }, { "epoch": 0.27, "grad_norm": 4.580456733703613, "learning_rate": 1.9926135223079748e-05, "loss": 2.3494, "step": 20821 }, { "epoch": 0.27, "grad_norm": 4.071427822113037, "learning_rate": 1.992612247435326e-05, "loss": 2.0917, "step": 20822 }, { "epoch": 0.27, "grad_norm": 4.073963165283203, "learning_rate": 1.9926109724530757e-05, "loss": 1.9049, "step": 20823 }, { "epoch": 0.27, "grad_norm": 3.820665121078491, "learning_rate": 1.9926096973612244e-05, "loss": 1.966, "step": 20824 }, { "epoch": 0.27, "grad_norm": 4.702818393707275, "learning_rate": 1.9926084221597722e-05, "loss": 2.9172, "step": 20825 }, { "epoch": 0.27, "grad_norm": 3.6295218467712402, "learning_rate": 1.992607146848719e-05, "loss": 1.6857, "step": 20826 }, { "epoch": 0.27, "grad_norm": 3.962517738342285, "learning_rate": 1.9926058714280656e-05, "loss": 1.829, "step": 20827 }, { "epoch": 0.27, "grad_norm": 3.719639539718628, "learning_rate": 1.9926045958978114e-05, "loss": 2.4113, "step": 20828 }, { "epoch": 0.27, "grad_norm": 4.3939008712768555, "learning_rate": 1.9926033202579566e-05, "loss": 2.4859, "step": 20829 }, { "epoch": 0.27, "grad_norm": 4.166991710662842, "learning_rate": 1.9926020445085016e-05, "loss": 2.4166, "step": 20830 }, { "epoch": 0.27, "grad_norm": 5.077986717224121, "learning_rate": 1.992600768649447e-05, "loss": 2.3421, "step": 20831 }, { "epoch": 0.27, "grad_norm": 3.787581443786621, "learning_rate": 1.9925994926807922e-05, "loss": 2.0778, "step": 20832 }, { "epoch": 0.27, "grad_norm": 3.5635743141174316, "learning_rate": 1.992598216602537e-05, "loss": 1.6988, "step": 20833 }, { "epoch": 0.27, "grad_norm": 3.8420252799987793, "learning_rate": 1.9925969404146826e-05, "loss": 2.0961, "step": 20834 }, { "epoch": 0.27, "grad_norm": 4.799440383911133, "learning_rate": 1.9925956641172287e-05, "loss": 2.2716, "step": 20835 }, { "epoch": 0.27, "grad_norm": 4.278618812561035, "learning_rate": 1.9925943877101756e-05, "loss": 1.7861, "step": 20836 }, { "epoch": 0.27, "grad_norm": 3.9830069541931152, "learning_rate": 1.9925931111935228e-05, "loss": 2.1945, "step": 20837 }, { "epoch": 0.27, "grad_norm": 4.189311504364014, "learning_rate": 1.992591834567271e-05, "loss": 1.8403, "step": 20838 }, { "epoch": 0.27, "grad_norm": 4.563923358917236, "learning_rate": 1.9925905578314203e-05, "loss": 2.2568, "step": 20839 }, { "epoch": 0.27, "grad_norm": 4.2678985595703125, "learning_rate": 1.9925892809859707e-05, "loss": 2.1346, "step": 20840 }, { "epoch": 0.27, "grad_norm": 3.856978178024292, "learning_rate": 1.9925880040309224e-05, "loss": 2.0719, "step": 20841 }, { "epoch": 0.27, "grad_norm": 4.327990531921387, "learning_rate": 1.992586726966276e-05, "loss": 2.2084, "step": 20842 }, { "epoch": 0.27, "grad_norm": 4.4558281898498535, "learning_rate": 1.9925854497920304e-05, "loss": 2.4937, "step": 20843 }, { "epoch": 0.27, "grad_norm": 4.59482479095459, "learning_rate": 1.992584172508187e-05, "loss": 2.6459, "step": 20844 }, { "epoch": 0.27, "grad_norm": 4.515787601470947, "learning_rate": 1.9925828951147454e-05, "loss": 2.2748, "step": 20845 }, { "epoch": 0.27, "grad_norm": 3.9917333126068115, "learning_rate": 1.9925816176117058e-05, "loss": 2.1985, "step": 20846 }, { "epoch": 0.27, "grad_norm": 4.101279258728027, "learning_rate": 1.9925803399990686e-05, "loss": 2.2197, "step": 20847 }, { "epoch": 0.27, "grad_norm": 3.716905117034912, "learning_rate": 1.992579062276833e-05, "loss": 2.1059, "step": 20848 }, { "epoch": 0.27, "grad_norm": 4.1121416091918945, "learning_rate": 1.9925777844450004e-05, "loss": 1.9829, "step": 20849 }, { "epoch": 0.27, "grad_norm": 4.063420295715332, "learning_rate": 1.99257650650357e-05, "loss": 2.1719, "step": 20850 }, { "epoch": 0.27, "grad_norm": 4.521302223205566, "learning_rate": 1.9925752284525428e-05, "loss": 2.0333, "step": 20851 }, { "epoch": 0.27, "grad_norm": 4.5122880935668945, "learning_rate": 1.992573950291918e-05, "loss": 2.3016, "step": 20852 }, { "epoch": 0.27, "grad_norm": 3.774286985397339, "learning_rate": 1.9925726720216966e-05, "loss": 2.2925, "step": 20853 }, { "epoch": 0.27, "grad_norm": 4.0551886558532715, "learning_rate": 1.992571393641878e-05, "loss": 1.8223, "step": 20854 }, { "epoch": 0.27, "grad_norm": 4.264978885650635, "learning_rate": 1.9925701151524627e-05, "loss": 1.7688, "step": 20855 }, { "epoch": 0.27, "grad_norm": 4.266255855560303, "learning_rate": 1.992568836553451e-05, "loss": 1.9964, "step": 20856 }, { "epoch": 0.27, "grad_norm": 4.306110858917236, "learning_rate": 1.992567557844843e-05, "loss": 2.0373, "step": 20857 }, { "epoch": 0.27, "grad_norm": 4.154037952423096, "learning_rate": 1.9925662790266385e-05, "loss": 2.2048, "step": 20858 }, { "epoch": 0.27, "grad_norm": 4.20119047164917, "learning_rate": 1.9925650000988377e-05, "loss": 2.2503, "step": 20859 }, { "epoch": 0.27, "grad_norm": 4.174178123474121, "learning_rate": 1.992563721061441e-05, "loss": 2.0044, "step": 20860 }, { "epoch": 0.27, "grad_norm": 4.080992221832275, "learning_rate": 1.9925624419144486e-05, "loss": 2.2489, "step": 20861 }, { "epoch": 0.27, "grad_norm": 4.301136493682861, "learning_rate": 1.99256116265786e-05, "loss": 2.4885, "step": 20862 }, { "epoch": 0.27, "grad_norm": 4.101317882537842, "learning_rate": 1.9925598832916763e-05, "loss": 2.0201, "step": 20863 }, { "epoch": 0.27, "grad_norm": 4.192270278930664, "learning_rate": 1.992558603815897e-05, "loss": 2.436, "step": 20864 }, { "epoch": 0.27, "grad_norm": 4.307440757751465, "learning_rate": 1.9925573242305224e-05, "loss": 1.985, "step": 20865 }, { "epoch": 0.27, "grad_norm": 4.463769912719727, "learning_rate": 1.9925560445355526e-05, "loss": 2.395, "step": 20866 }, { "epoch": 0.27, "grad_norm": 4.689976215362549, "learning_rate": 1.9925547647309876e-05, "loss": 2.2723, "step": 20867 }, { "epoch": 0.27, "grad_norm": 4.457767486572266, "learning_rate": 1.992553484816828e-05, "loss": 2.0217, "step": 20868 }, { "epoch": 0.27, "grad_norm": 4.746998310089111, "learning_rate": 1.9925522047930736e-05, "loss": 2.3998, "step": 20869 }, { "epoch": 0.27, "grad_norm": 4.269191265106201, "learning_rate": 1.9925509246597246e-05, "loss": 2.2803, "step": 20870 }, { "epoch": 0.27, "grad_norm": 3.6897716522216797, "learning_rate": 1.9925496444167808e-05, "loss": 1.7988, "step": 20871 }, { "epoch": 0.27, "grad_norm": 4.4548516273498535, "learning_rate": 1.992548364064243e-05, "loss": 2.498, "step": 20872 }, { "epoch": 0.27, "grad_norm": 3.9591033458709717, "learning_rate": 1.992547083602111e-05, "loss": 2.4547, "step": 20873 }, { "epoch": 0.27, "grad_norm": 4.78331995010376, "learning_rate": 1.992545803030385e-05, "loss": 2.5202, "step": 20874 }, { "epoch": 0.27, "grad_norm": 4.285646438598633, "learning_rate": 1.992544522349065e-05, "loss": 2.1193, "step": 20875 }, { "epoch": 0.27, "grad_norm": 4.112438201904297, "learning_rate": 1.9925432415581515e-05, "loss": 1.908, "step": 20876 }, { "epoch": 0.27, "grad_norm": 4.539339542388916, "learning_rate": 1.992541960657644e-05, "loss": 1.8844, "step": 20877 }, { "epoch": 0.27, "grad_norm": 4.8057169914245605, "learning_rate": 1.992540679647543e-05, "loss": 2.3318, "step": 20878 }, { "epoch": 0.27, "grad_norm": 3.9447498321533203, "learning_rate": 1.992539398527849e-05, "loss": 2.439, "step": 20879 }, { "epoch": 0.27, "grad_norm": 3.7073512077331543, "learning_rate": 1.9925381172985617e-05, "loss": 1.7573, "step": 20880 }, { "epoch": 0.27, "grad_norm": 4.334427356719971, "learning_rate": 1.9925368359596816e-05, "loss": 2.1157, "step": 20881 }, { "epoch": 0.27, "grad_norm": 4.015194892883301, "learning_rate": 1.9925355545112083e-05, "loss": 2.1286, "step": 20882 }, { "epoch": 0.27, "grad_norm": 3.90226149559021, "learning_rate": 1.992534272953142e-05, "loss": 1.941, "step": 20883 }, { "epoch": 0.27, "grad_norm": 4.561120986938477, "learning_rate": 1.9925329912854834e-05, "loss": 1.9972, "step": 20884 }, { "epoch": 0.27, "grad_norm": 4.109804630279541, "learning_rate": 1.9925317095082325e-05, "loss": 2.147, "step": 20885 }, { "epoch": 0.27, "grad_norm": 3.6726765632629395, "learning_rate": 1.992530427621389e-05, "loss": 1.9556, "step": 20886 }, { "epoch": 0.27, "grad_norm": 4.377756595611572, "learning_rate": 1.9925291456249532e-05, "loss": 1.984, "step": 20887 }, { "epoch": 0.27, "grad_norm": 3.9972565174102783, "learning_rate": 1.9925278635189252e-05, "loss": 1.9305, "step": 20888 }, { "epoch": 0.27, "grad_norm": 4.35238790512085, "learning_rate": 1.992526581303306e-05, "loss": 2.0933, "step": 20889 }, { "epoch": 0.27, "grad_norm": 4.205996990203857, "learning_rate": 1.9925252989780946e-05, "loss": 1.7728, "step": 20890 }, { "epoch": 0.27, "grad_norm": 4.516195297241211, "learning_rate": 1.9925240165432914e-05, "loss": 2.3656, "step": 20891 }, { "epoch": 0.27, "grad_norm": 3.9845845699310303, "learning_rate": 1.992522733998897e-05, "loss": 2.0757, "step": 20892 }, { "epoch": 0.27, "grad_norm": 3.910478115081787, "learning_rate": 1.9925214513449108e-05, "loss": 2.0377, "step": 20893 }, { "epoch": 0.27, "grad_norm": 4.188969612121582, "learning_rate": 1.9925201685813337e-05, "loss": 2.0791, "step": 20894 }, { "epoch": 0.27, "grad_norm": 3.6738991737365723, "learning_rate": 1.9925188857081656e-05, "loss": 2.1413, "step": 20895 }, { "epoch": 0.27, "grad_norm": 4.218199729919434, "learning_rate": 1.9925176027254066e-05, "loss": 2.0274, "step": 20896 }, { "epoch": 0.27, "grad_norm": 4.049112796783447, "learning_rate": 1.9925163196330567e-05, "loss": 2.1186, "step": 20897 }, { "epoch": 0.27, "grad_norm": 4.4988112449646, "learning_rate": 1.9925150364311165e-05, "loss": 2.1749, "step": 20898 }, { "epoch": 0.27, "grad_norm": 4.307684421539307, "learning_rate": 1.9925137531195853e-05, "loss": 2.3688, "step": 20899 }, { "epoch": 0.27, "grad_norm": 3.879690170288086, "learning_rate": 1.9925124696984638e-05, "loss": 2.2365, "step": 20900 }, { "epoch": 0.27, "grad_norm": 4.002882480621338, "learning_rate": 1.9925111861677524e-05, "loss": 2.0284, "step": 20901 }, { "epoch": 0.27, "grad_norm": 3.8477590084075928, "learning_rate": 1.992509902527451e-05, "loss": 1.9072, "step": 20902 }, { "epoch": 0.27, "grad_norm": 5.0546698570251465, "learning_rate": 1.992508618777559e-05, "loss": 2.4236, "step": 20903 }, { "epoch": 0.27, "grad_norm": 3.9022395610809326, "learning_rate": 1.992507334918078e-05, "loss": 2.0132, "step": 20904 }, { "epoch": 0.27, "grad_norm": 4.468438625335693, "learning_rate": 1.9925060509490068e-05, "loss": 2.4059, "step": 20905 }, { "epoch": 0.27, "grad_norm": 4.369162559509277, "learning_rate": 1.9925047668703464e-05, "loss": 2.6548, "step": 20906 }, { "epoch": 0.27, "grad_norm": 3.915569543838501, "learning_rate": 1.9925034826820965e-05, "loss": 2.4127, "step": 20907 }, { "epoch": 0.27, "grad_norm": 3.7806057929992676, "learning_rate": 1.9925021983842575e-05, "loss": 2.3799, "step": 20908 }, { "epoch": 0.27, "grad_norm": 3.802706480026245, "learning_rate": 1.992500913976829e-05, "loss": 1.9047, "step": 20909 }, { "epoch": 0.27, "grad_norm": 4.303686141967773, "learning_rate": 1.992499629459812e-05, "loss": 1.9551, "step": 20910 }, { "epoch": 0.27, "grad_norm": 4.124557971954346, "learning_rate": 1.992498344833206e-05, "loss": 2.0284, "step": 20911 }, { "epoch": 0.27, "grad_norm": 4.387024402618408, "learning_rate": 1.9924970600970118e-05, "loss": 2.0341, "step": 20912 }, { "epoch": 0.27, "grad_norm": 4.089174747467041, "learning_rate": 1.9924957752512285e-05, "loss": 2.1008, "step": 20913 }, { "epoch": 0.27, "grad_norm": 3.994495153427124, "learning_rate": 1.9924944902958572e-05, "loss": 2.0831, "step": 20914 }, { "epoch": 0.27, "grad_norm": 3.842388153076172, "learning_rate": 1.9924932052308974e-05, "loss": 2.0043, "step": 20915 }, { "epoch": 0.27, "grad_norm": 3.8849284648895264, "learning_rate": 1.9924919200563495e-05, "loss": 2.289, "step": 20916 }, { "epoch": 0.27, "grad_norm": 4.304007530212402, "learning_rate": 1.992490634772214e-05, "loss": 1.968, "step": 20917 }, { "epoch": 0.27, "grad_norm": 4.150581359863281, "learning_rate": 1.99248934937849e-05, "loss": 2.1697, "step": 20918 }, { "epoch": 0.27, "grad_norm": 4.366603374481201, "learning_rate": 1.9924880638751792e-05, "loss": 2.1369, "step": 20919 }, { "epoch": 0.27, "grad_norm": 4.371137619018555, "learning_rate": 1.9924867782622806e-05, "loss": 2.3828, "step": 20920 }, { "epoch": 0.27, "grad_norm": 4.3687591552734375, "learning_rate": 1.9924854925397942e-05, "loss": 2.7243, "step": 20921 }, { "epoch": 0.27, "grad_norm": 3.6480281352996826, "learning_rate": 1.992484206707721e-05, "loss": 1.7988, "step": 20922 }, { "epoch": 0.27, "grad_norm": 4.0026655197143555, "learning_rate": 1.9924829207660605e-05, "loss": 1.9535, "step": 20923 }, { "epoch": 0.27, "grad_norm": 3.847083568572998, "learning_rate": 1.9924816347148132e-05, "loss": 1.722, "step": 20924 }, { "epoch": 0.27, "grad_norm": 3.684659719467163, "learning_rate": 1.992480348553979e-05, "loss": 1.7604, "step": 20925 }, { "epoch": 0.27, "grad_norm": 4.486881732940674, "learning_rate": 1.9924790622835583e-05, "loss": 2.3466, "step": 20926 }, { "epoch": 0.27, "grad_norm": 3.8275206089019775, "learning_rate": 1.992477775903551e-05, "loss": 2.0358, "step": 20927 }, { "epoch": 0.27, "grad_norm": 4.187657356262207, "learning_rate": 1.992476489413957e-05, "loss": 1.9849, "step": 20928 }, { "epoch": 0.27, "grad_norm": 3.8423385620117188, "learning_rate": 1.992475202814777e-05, "loss": 2.0908, "step": 20929 }, { "epoch": 0.27, "grad_norm": 3.3099429607391357, "learning_rate": 1.992473916106011e-05, "loss": 1.89, "step": 20930 }, { "epoch": 0.27, "grad_norm": 4.0038580894470215, "learning_rate": 1.9924726292876587e-05, "loss": 2.1305, "step": 20931 }, { "epoch": 0.27, "grad_norm": 4.214428901672363, "learning_rate": 1.992471342359721e-05, "loss": 2.3444, "step": 20932 }, { "epoch": 0.27, "grad_norm": 3.7016801834106445, "learning_rate": 1.9924700553221977e-05, "loss": 1.9457, "step": 20933 }, { "epoch": 0.27, "grad_norm": 3.996744155883789, "learning_rate": 1.9924687681750883e-05, "loss": 1.9917, "step": 20934 }, { "epoch": 0.27, "grad_norm": 4.735980987548828, "learning_rate": 1.992467480918394e-05, "loss": 2.3882, "step": 20935 }, { "epoch": 0.27, "grad_norm": 4.1100969314575195, "learning_rate": 1.9924661935521143e-05, "loss": 1.9911, "step": 20936 }, { "epoch": 0.27, "grad_norm": 3.9550609588623047, "learning_rate": 1.9924649060762494e-05, "loss": 1.868, "step": 20937 }, { "epoch": 0.27, "grad_norm": 3.9579594135284424, "learning_rate": 1.9924636184907995e-05, "loss": 1.8594, "step": 20938 }, { "epoch": 0.27, "grad_norm": 3.909780979156494, "learning_rate": 1.9924623307957648e-05, "loss": 1.7478, "step": 20939 }, { "epoch": 0.27, "grad_norm": 3.766894578933716, "learning_rate": 1.9924610429911457e-05, "loss": 2.0117, "step": 20940 }, { "epoch": 0.27, "grad_norm": 4.45396614074707, "learning_rate": 1.992459755076942e-05, "loss": 2.2004, "step": 20941 }, { "epoch": 0.27, "grad_norm": 4.177481174468994, "learning_rate": 1.9924584670531537e-05, "loss": 2.2216, "step": 20942 }, { "epoch": 0.27, "grad_norm": 4.148126602172852, "learning_rate": 1.992457178919781e-05, "loss": 2.2371, "step": 20943 }, { "epoch": 0.27, "grad_norm": 3.9597830772399902, "learning_rate": 1.9924558906768246e-05, "loss": 1.9404, "step": 20944 }, { "epoch": 0.27, "grad_norm": 3.816643238067627, "learning_rate": 1.9924546023242843e-05, "loss": 2.1473, "step": 20945 }, { "epoch": 0.27, "grad_norm": 3.7122280597686768, "learning_rate": 1.99245331386216e-05, "loss": 2.0346, "step": 20946 }, { "epoch": 0.27, "grad_norm": 4.140331268310547, "learning_rate": 1.992452025290452e-05, "loss": 2.2232, "step": 20947 }, { "epoch": 0.27, "grad_norm": 4.132875442504883, "learning_rate": 1.9924507366091602e-05, "loss": 2.2092, "step": 20948 }, { "epoch": 0.27, "grad_norm": 4.120358467102051, "learning_rate": 1.992449447818285e-05, "loss": 2.3699, "step": 20949 }, { "epoch": 0.27, "grad_norm": 4.020770072937012, "learning_rate": 1.992448158917827e-05, "loss": 1.6032, "step": 20950 }, { "epoch": 0.27, "grad_norm": 4.003241539001465, "learning_rate": 1.9924468699077855e-05, "loss": 1.9703, "step": 20951 }, { "epoch": 0.27, "grad_norm": 3.8577792644500732, "learning_rate": 1.9924455807881613e-05, "loss": 1.8897, "step": 20952 }, { "epoch": 0.27, "grad_norm": 4.875311851501465, "learning_rate": 1.9924442915589538e-05, "loss": 1.9869, "step": 20953 }, { "epoch": 0.27, "grad_norm": 4.474328517913818, "learning_rate": 1.9924430022201642e-05, "loss": 2.0988, "step": 20954 }, { "epoch": 0.27, "grad_norm": 3.5355942249298096, "learning_rate": 1.992441712771792e-05, "loss": 1.9556, "step": 20955 }, { "epoch": 0.27, "grad_norm": 3.7205846309661865, "learning_rate": 1.9924404232138367e-05, "loss": 1.6597, "step": 20956 }, { "epoch": 0.27, "grad_norm": 4.330239772796631, "learning_rate": 1.9924391335463e-05, "loss": 2.1791, "step": 20957 }, { "epoch": 0.27, "grad_norm": 3.329300880432129, "learning_rate": 1.9924378437691805e-05, "loss": 1.5433, "step": 20958 }, { "epoch": 0.27, "grad_norm": 4.243096351623535, "learning_rate": 1.9924365538824796e-05, "loss": 2.4008, "step": 20959 }, { "epoch": 0.27, "grad_norm": 4.0967326164245605, "learning_rate": 1.9924352638861967e-05, "loss": 2.0561, "step": 20960 }, { "epoch": 0.27, "grad_norm": 4.929362773895264, "learning_rate": 1.9924339737803317e-05, "loss": 2.0497, "step": 20961 }, { "epoch": 0.27, "grad_norm": 4.191218852996826, "learning_rate": 1.9924326835648858e-05, "loss": 2.0947, "step": 20962 }, { "epoch": 0.27, "grad_norm": 4.021031379699707, "learning_rate": 1.992431393239858e-05, "loss": 1.935, "step": 20963 }, { "epoch": 0.27, "grad_norm": 3.694932699203491, "learning_rate": 1.9924301028052492e-05, "loss": 1.9852, "step": 20964 }, { "epoch": 0.27, "grad_norm": 3.469651699066162, "learning_rate": 1.9924288122610593e-05, "loss": 1.6073, "step": 20965 }, { "epoch": 0.27, "grad_norm": 4.171295166015625, "learning_rate": 1.9924275216072884e-05, "loss": 2.0423, "step": 20966 }, { "epoch": 0.27, "grad_norm": 4.077709674835205, "learning_rate": 1.9924262308439365e-05, "loss": 2.3222, "step": 20967 }, { "epoch": 0.27, "grad_norm": 4.620412826538086, "learning_rate": 1.992424939971004e-05, "loss": 2.3406, "step": 20968 }, { "epoch": 0.27, "grad_norm": 4.58140230178833, "learning_rate": 1.9924236489884907e-05, "loss": 2.6507, "step": 20969 }, { "epoch": 0.27, "grad_norm": 5.7335076332092285, "learning_rate": 1.9924223578963975e-05, "loss": 2.1332, "step": 20970 }, { "epoch": 0.27, "grad_norm": 5.229110240936279, "learning_rate": 1.9924210666947236e-05, "loss": 2.2514, "step": 20971 }, { "epoch": 0.27, "grad_norm": 4.3927507400512695, "learning_rate": 1.9924197753834698e-05, "loss": 2.2833, "step": 20972 }, { "epoch": 0.27, "grad_norm": 4.316366672515869, "learning_rate": 1.992418483962636e-05, "loss": 2.0944, "step": 20973 }, { "epoch": 0.27, "grad_norm": 4.740118026733398, "learning_rate": 1.9924171924322222e-05, "loss": 2.6854, "step": 20974 }, { "epoch": 0.27, "grad_norm": 4.708709239959717, "learning_rate": 1.9924159007922288e-05, "loss": 2.441, "step": 20975 }, { "epoch": 0.27, "grad_norm": 3.9509735107421875, "learning_rate": 1.9924146090426558e-05, "loss": 2.1512, "step": 20976 }, { "epoch": 0.27, "grad_norm": 3.778381824493408, "learning_rate": 1.9924133171835038e-05, "loss": 1.9458, "step": 20977 }, { "epoch": 0.27, "grad_norm": 4.072695732116699, "learning_rate": 1.992412025214772e-05, "loss": 2.3919, "step": 20978 }, { "epoch": 0.27, "grad_norm": 4.330350399017334, "learning_rate": 1.9924107331364612e-05, "loss": 2.1559, "step": 20979 }, { "epoch": 0.27, "grad_norm": 3.938080310821533, "learning_rate": 1.9924094409485713e-05, "loss": 1.9934, "step": 20980 }, { "epoch": 0.27, "grad_norm": 3.5421712398529053, "learning_rate": 1.992408148651103e-05, "loss": 1.602, "step": 20981 }, { "epoch": 0.27, "grad_norm": 4.837985992431641, "learning_rate": 1.9924068562440557e-05, "loss": 2.642, "step": 20982 }, { "epoch": 0.27, "grad_norm": 3.677402973175049, "learning_rate": 1.99240556372743e-05, "loss": 1.7477, "step": 20983 }, { "epoch": 0.27, "grad_norm": 4.261800289154053, "learning_rate": 1.9924042711012255e-05, "loss": 2.1289, "step": 20984 }, { "epoch": 0.27, "grad_norm": 4.173195838928223, "learning_rate": 1.9924029783654432e-05, "loss": 2.2251, "step": 20985 }, { "epoch": 0.27, "grad_norm": 3.7800097465515137, "learning_rate": 1.9924016855200826e-05, "loss": 1.6895, "step": 20986 }, { "epoch": 0.27, "grad_norm": 4.203383922576904, "learning_rate": 1.992400392565144e-05, "loss": 2.013, "step": 20987 }, { "epoch": 0.27, "grad_norm": 4.189277648925781, "learning_rate": 1.9923990995006276e-05, "loss": 2.2793, "step": 20988 }, { "epoch": 0.27, "grad_norm": 4.084090232849121, "learning_rate": 1.9923978063265336e-05, "loss": 2.1179, "step": 20989 }, { "epoch": 0.27, "grad_norm": 4.0286736488342285, "learning_rate": 1.992396513042862e-05, "loss": 1.8392, "step": 20990 }, { "epoch": 0.27, "grad_norm": 4.480679035186768, "learning_rate": 1.992395219649613e-05, "loss": 2.4132, "step": 20991 }, { "epoch": 0.27, "grad_norm": 3.764671564102173, "learning_rate": 1.9923939261467867e-05, "loss": 1.6496, "step": 20992 }, { "epoch": 0.27, "grad_norm": 3.874107837677002, "learning_rate": 1.992392632534383e-05, "loss": 2.3529, "step": 20993 }, { "epoch": 0.27, "grad_norm": 4.211856842041016, "learning_rate": 1.9923913388124028e-05, "loss": 2.363, "step": 20994 }, { "epoch": 0.27, "grad_norm": 4.577627658843994, "learning_rate": 1.992390044980845e-05, "loss": 2.245, "step": 20995 }, { "epoch": 0.27, "grad_norm": 3.810930013656616, "learning_rate": 1.9923887510397113e-05, "loss": 1.842, "step": 20996 }, { "epoch": 0.27, "grad_norm": 4.611776828765869, "learning_rate": 1.992387456989001e-05, "loss": 1.6865, "step": 20997 }, { "epoch": 0.27, "grad_norm": 4.366415023803711, "learning_rate": 1.992386162828714e-05, "loss": 2.2076, "step": 20998 }, { "epoch": 0.27, "grad_norm": 4.0999321937561035, "learning_rate": 1.9923848685588507e-05, "loss": 1.8699, "step": 20999 }, { "epoch": 0.27, "grad_norm": 3.633394241333008, "learning_rate": 1.9923835741794114e-05, "loss": 1.8329, "step": 21000 }, { "epoch": 0.27, "grad_norm": 4.08695125579834, "learning_rate": 1.992382279690396e-05, "loss": 2.3386, "step": 21001 }, { "epoch": 0.27, "grad_norm": 4.555089950561523, "learning_rate": 1.992380985091805e-05, "loss": 2.6612, "step": 21002 }, { "epoch": 0.27, "grad_norm": 4.256783485412598, "learning_rate": 1.992379690383638e-05, "loss": 1.8692, "step": 21003 }, { "epoch": 0.27, "grad_norm": 3.713899612426758, "learning_rate": 1.9923783955658954e-05, "loss": 2.0764, "step": 21004 }, { "epoch": 0.27, "grad_norm": 4.093313694000244, "learning_rate": 1.9923771006385776e-05, "loss": 2.3546, "step": 21005 }, { "epoch": 0.27, "grad_norm": 3.6759331226348877, "learning_rate": 1.9923758056016846e-05, "loss": 2.044, "step": 21006 }, { "epoch": 0.27, "grad_norm": 4.0076189041137695, "learning_rate": 1.9923745104552163e-05, "loss": 2.0755, "step": 21007 }, { "epoch": 0.27, "grad_norm": 4.490671157836914, "learning_rate": 1.992373215199173e-05, "loss": 2.3253, "step": 21008 }, { "epoch": 0.27, "grad_norm": 4.74860143661499, "learning_rate": 1.9923719198335552e-05, "loss": 2.3533, "step": 21009 }, { "epoch": 0.27, "grad_norm": 3.7990620136260986, "learning_rate": 1.9923706243583623e-05, "loss": 1.8784, "step": 21010 }, { "epoch": 0.27, "grad_norm": 3.5959763526916504, "learning_rate": 1.992369328773595e-05, "loss": 1.8986, "step": 21011 }, { "epoch": 0.27, "grad_norm": 4.1335062980651855, "learning_rate": 1.992368033079253e-05, "loss": 2.7651, "step": 21012 }, { "epoch": 0.27, "grad_norm": 3.9429125785827637, "learning_rate": 1.9923667372753372e-05, "loss": 2.0626, "step": 21013 }, { "epoch": 0.27, "grad_norm": 4.2115278244018555, "learning_rate": 1.992365441361847e-05, "loss": 2.0005, "step": 21014 }, { "epoch": 0.27, "grad_norm": 4.900722980499268, "learning_rate": 1.992364145338783e-05, "loss": 2.173, "step": 21015 }, { "epoch": 0.27, "grad_norm": 3.9392220973968506, "learning_rate": 1.992362849206145e-05, "loss": 1.9668, "step": 21016 }, { "epoch": 0.27, "grad_norm": 4.051976203918457, "learning_rate": 1.9923615529639335e-05, "loss": 2.2464, "step": 21017 }, { "epoch": 0.27, "grad_norm": 4.283846855163574, "learning_rate": 1.9923602566121483e-05, "loss": 1.9873, "step": 21018 }, { "epoch": 0.27, "grad_norm": 4.331822395324707, "learning_rate": 1.99235896015079e-05, "loss": 2.2532, "step": 21019 }, { "epoch": 0.27, "grad_norm": 4.109920501708984, "learning_rate": 1.992357663579858e-05, "loss": 2.5647, "step": 21020 }, { "epoch": 0.27, "grad_norm": 4.297496318817139, "learning_rate": 1.9923563668993532e-05, "loss": 2.7324, "step": 21021 }, { "epoch": 0.27, "grad_norm": 4.163625240325928, "learning_rate": 1.9923550701092752e-05, "loss": 2.2811, "step": 21022 }, { "epoch": 0.27, "grad_norm": 3.9913899898529053, "learning_rate": 1.9923537732096242e-05, "loss": 2.3156, "step": 21023 }, { "epoch": 0.27, "grad_norm": 4.7569074630737305, "learning_rate": 1.9923524762004008e-05, "loss": 1.9489, "step": 21024 }, { "epoch": 0.27, "grad_norm": 4.14232873916626, "learning_rate": 1.992351179081605e-05, "loss": 1.9862, "step": 21025 }, { "epoch": 0.27, "grad_norm": 4.186316013336182, "learning_rate": 1.9923498818532363e-05, "loss": 2.4046, "step": 21026 }, { "epoch": 0.27, "grad_norm": 4.464112758636475, "learning_rate": 1.9923485845152956e-05, "loss": 2.1747, "step": 21027 }, { "epoch": 0.27, "grad_norm": 4.151054859161377, "learning_rate": 1.992347287067783e-05, "loss": 1.7142, "step": 21028 }, { "epoch": 0.27, "grad_norm": 3.779927968978882, "learning_rate": 1.992345989510698e-05, "loss": 1.7307, "step": 21029 }, { "epoch": 0.27, "grad_norm": 3.743999719619751, "learning_rate": 1.9923446918440415e-05, "loss": 1.8099, "step": 21030 }, { "epoch": 0.27, "grad_norm": 3.6241581439971924, "learning_rate": 1.992343394067813e-05, "loss": 1.9617, "step": 21031 }, { "epoch": 0.27, "grad_norm": 4.290160179138184, "learning_rate": 1.9923420961820136e-05, "loss": 2.0486, "step": 21032 }, { "epoch": 0.27, "grad_norm": 4.203474998474121, "learning_rate": 1.9923407981866422e-05, "loss": 2.0207, "step": 21033 }, { "epoch": 0.27, "grad_norm": 4.125453472137451, "learning_rate": 1.9923395000816997e-05, "loss": 2.453, "step": 21034 }, { "epoch": 0.27, "grad_norm": 4.1458940505981445, "learning_rate": 1.992338201867186e-05, "loss": 2.2776, "step": 21035 }, { "epoch": 0.27, "grad_norm": 3.8626580238342285, "learning_rate": 1.9923369035431014e-05, "loss": 1.8246, "step": 21036 }, { "epoch": 0.27, "grad_norm": 5.228790283203125, "learning_rate": 1.992335605109446e-05, "loss": 2.9492, "step": 21037 }, { "epoch": 0.27, "grad_norm": 3.641083002090454, "learning_rate": 1.99233430656622e-05, "loss": 1.9341, "step": 21038 }, { "epoch": 0.27, "grad_norm": 3.678165912628174, "learning_rate": 1.9923330079134234e-05, "loss": 2.0232, "step": 21039 }, { "epoch": 0.27, "grad_norm": 4.261023998260498, "learning_rate": 1.9923317091510562e-05, "loss": 2.6653, "step": 21040 }, { "epoch": 0.27, "grad_norm": 3.815516710281372, "learning_rate": 1.992330410279119e-05, "loss": 1.7417, "step": 21041 }, { "epoch": 0.27, "grad_norm": 3.9865152835845947, "learning_rate": 1.9923291112976114e-05, "loss": 2.2815, "step": 21042 }, { "epoch": 0.27, "grad_norm": 3.9057908058166504, "learning_rate": 1.992327812206534e-05, "loss": 2.0289, "step": 21043 }, { "epoch": 0.27, "grad_norm": 3.8884389400482178, "learning_rate": 1.992326513005887e-05, "loss": 2.0938, "step": 21044 }, { "epoch": 0.27, "grad_norm": 4.000949859619141, "learning_rate": 1.9923252136956698e-05, "loss": 1.7787, "step": 21045 }, { "epoch": 0.27, "grad_norm": 4.4778594970703125, "learning_rate": 1.9923239142758834e-05, "loss": 2.5445, "step": 21046 }, { "epoch": 0.27, "grad_norm": 4.007806301116943, "learning_rate": 1.992322614746528e-05, "loss": 1.9572, "step": 21047 }, { "epoch": 0.27, "grad_norm": 4.164754867553711, "learning_rate": 1.9923213151076025e-05, "loss": 2.0799, "step": 21048 }, { "epoch": 0.27, "grad_norm": 4.046420574188232, "learning_rate": 1.9923200153591084e-05, "loss": 2.3138, "step": 21049 }, { "epoch": 0.27, "grad_norm": 3.63220477104187, "learning_rate": 1.9923187155010452e-05, "loss": 2.0627, "step": 21050 }, { "epoch": 0.27, "grad_norm": 4.065985202789307, "learning_rate": 1.9923174155334135e-05, "loss": 2.2733, "step": 21051 }, { "epoch": 0.27, "grad_norm": 4.232672214508057, "learning_rate": 1.9923161154562126e-05, "loss": 1.9817, "step": 21052 }, { "epoch": 0.27, "grad_norm": 4.280035972595215, "learning_rate": 1.9923148152694437e-05, "loss": 2.1851, "step": 21053 }, { "epoch": 0.27, "grad_norm": 3.740856647491455, "learning_rate": 1.992313514973106e-05, "loss": 1.9662, "step": 21054 }, { "epoch": 0.27, "grad_norm": 3.4778213500976562, "learning_rate": 1.9923122145672004e-05, "loss": 1.9659, "step": 21055 }, { "epoch": 0.27, "grad_norm": 4.488327503204346, "learning_rate": 1.9923109140517264e-05, "loss": 2.6665, "step": 21056 }, { "epoch": 0.27, "grad_norm": 4.233565807342529, "learning_rate": 1.9923096134266843e-05, "loss": 2.1704, "step": 21057 }, { "epoch": 0.27, "grad_norm": 3.6086909770965576, "learning_rate": 1.9923083126920748e-05, "loss": 1.7056, "step": 21058 }, { "epoch": 0.27, "grad_norm": 4.060432434082031, "learning_rate": 1.9923070118478974e-05, "loss": 1.7984, "step": 21059 }, { "epoch": 0.27, "grad_norm": 3.7664506435394287, "learning_rate": 1.992305710894152e-05, "loss": 2.068, "step": 21060 }, { "epoch": 0.27, "grad_norm": 4.125385284423828, "learning_rate": 1.9923044098308398e-05, "loss": 2.6167, "step": 21061 }, { "epoch": 0.27, "grad_norm": 3.7959752082824707, "learning_rate": 1.9923031086579605e-05, "loss": 2.1452, "step": 21062 }, { "epoch": 0.27, "grad_norm": 3.9239675998687744, "learning_rate": 1.9923018073755138e-05, "loss": 1.7012, "step": 21063 }, { "epoch": 0.27, "grad_norm": 3.8718245029449463, "learning_rate": 1.9923005059835003e-05, "loss": 2.2846, "step": 21064 }, { "epoch": 0.27, "grad_norm": 3.8697919845581055, "learning_rate": 1.9922992044819197e-05, "loss": 2.2859, "step": 21065 }, { "epoch": 0.27, "grad_norm": 3.8238601684570312, "learning_rate": 1.9922979028707727e-05, "loss": 1.9609, "step": 21066 }, { "epoch": 0.27, "grad_norm": 4.06227970123291, "learning_rate": 1.9922966011500592e-05, "loss": 2.1208, "step": 21067 }, { "epoch": 0.27, "grad_norm": 4.305540561676025, "learning_rate": 1.9922952993197794e-05, "loss": 2.2521, "step": 21068 }, { "epoch": 0.27, "grad_norm": 3.5188512802124023, "learning_rate": 1.992293997379933e-05, "loss": 1.822, "step": 21069 }, { "epoch": 0.27, "grad_norm": 4.290369987487793, "learning_rate": 1.9922926953305207e-05, "loss": 1.9593, "step": 21070 }, { "epoch": 0.27, "grad_norm": 4.192923069000244, "learning_rate": 1.9922913931715423e-05, "loss": 1.8813, "step": 21071 }, { "epoch": 0.27, "grad_norm": 4.326876640319824, "learning_rate": 1.9922900909029984e-05, "loss": 2.334, "step": 21072 }, { "epoch": 0.27, "grad_norm": 3.7675437927246094, "learning_rate": 1.9922887885248888e-05, "loss": 2.2159, "step": 21073 }, { "epoch": 0.27, "grad_norm": 4.027259826660156, "learning_rate": 1.9922874860372135e-05, "loss": 2.152, "step": 21074 }, { "epoch": 0.27, "grad_norm": 4.499420166015625, "learning_rate": 1.992286183439973e-05, "loss": 2.5973, "step": 21075 }, { "epoch": 0.27, "grad_norm": 4.389410018920898, "learning_rate": 1.9922848807331672e-05, "loss": 2.2047, "step": 21076 }, { "epoch": 0.27, "grad_norm": 4.555290222167969, "learning_rate": 1.992283577916796e-05, "loss": 2.1605, "step": 21077 }, { "epoch": 0.27, "grad_norm": 3.722900152206421, "learning_rate": 1.9922822749908604e-05, "loss": 1.9474, "step": 21078 }, { "epoch": 0.27, "grad_norm": 4.233384132385254, "learning_rate": 1.9922809719553597e-05, "loss": 2.4107, "step": 21079 }, { "epoch": 0.27, "grad_norm": 4.621379375457764, "learning_rate": 1.9922796688102943e-05, "loss": 2.0478, "step": 21080 }, { "epoch": 0.27, "grad_norm": 3.5800514221191406, "learning_rate": 1.9922783655556645e-05, "loss": 1.9015, "step": 21081 }, { "epoch": 0.27, "grad_norm": 3.9190964698791504, "learning_rate": 1.9922770621914703e-05, "loss": 2.0559, "step": 21082 }, { "epoch": 0.27, "grad_norm": 3.6632704734802246, "learning_rate": 1.992275758717712e-05, "loss": 1.8999, "step": 21083 }, { "epoch": 0.27, "grad_norm": 3.8705737590789795, "learning_rate": 1.9922744551343895e-05, "loss": 1.502, "step": 21084 }, { "epoch": 0.27, "grad_norm": 4.81853723526001, "learning_rate": 1.992273151441503e-05, "loss": 2.1528, "step": 21085 }, { "epoch": 0.27, "grad_norm": 3.9133687019348145, "learning_rate": 1.992271847639053e-05, "loss": 1.7868, "step": 21086 }, { "epoch": 0.27, "grad_norm": 3.8922736644744873, "learning_rate": 1.992270543727039e-05, "loss": 1.7664, "step": 21087 }, { "epoch": 0.27, "grad_norm": 4.254021644592285, "learning_rate": 1.992269239705462e-05, "loss": 1.9937, "step": 21088 }, { "epoch": 0.27, "grad_norm": 4.908384799957275, "learning_rate": 1.992267935574321e-05, "loss": 2.4472, "step": 21089 }, { "epoch": 0.27, "grad_norm": 3.664872884750366, "learning_rate": 1.992266631333617e-05, "loss": 1.974, "step": 21090 }, { "epoch": 0.27, "grad_norm": 3.9981040954589844, "learning_rate": 1.9922653269833502e-05, "loss": 2.1601, "step": 21091 }, { "epoch": 0.27, "grad_norm": 4.097056865692139, "learning_rate": 1.9922640225235203e-05, "loss": 2.1116, "step": 21092 }, { "epoch": 0.27, "grad_norm": 4.106936931610107, "learning_rate": 1.9922627179541276e-05, "loss": 2.387, "step": 21093 }, { "epoch": 0.27, "grad_norm": 3.9547722339630127, "learning_rate": 1.992261413275172e-05, "loss": 1.9702, "step": 21094 }, { "epoch": 0.27, "grad_norm": 4.224384784698486, "learning_rate": 1.9922601084866542e-05, "loss": 2.0164, "step": 21095 }, { "epoch": 0.27, "grad_norm": 4.122386455535889, "learning_rate": 1.9922588035885742e-05, "loss": 2.0039, "step": 21096 }, { "epoch": 0.27, "grad_norm": 3.655029296875, "learning_rate": 1.9922574985809318e-05, "loss": 1.5895, "step": 21097 }, { "epoch": 0.27, "grad_norm": 3.539557695388794, "learning_rate": 1.9922561934637274e-05, "loss": 1.7157, "step": 21098 }, { "epoch": 0.27, "grad_norm": 3.9834139347076416, "learning_rate": 1.992254888236961e-05, "loss": 2.1612, "step": 21099 }, { "epoch": 0.27, "grad_norm": 3.2050015926361084, "learning_rate": 1.9922535829006328e-05, "loss": 1.4967, "step": 21100 }, { "epoch": 0.27, "grad_norm": 4.158331871032715, "learning_rate": 1.992252277454743e-05, "loss": 2.3731, "step": 21101 }, { "epoch": 0.27, "grad_norm": 3.8041303157806396, "learning_rate": 1.992250971899292e-05, "loss": 1.7562, "step": 21102 }, { "epoch": 0.27, "grad_norm": 4.166954517364502, "learning_rate": 1.992249666234279e-05, "loss": 2.1919, "step": 21103 }, { "epoch": 0.27, "grad_norm": 3.988508701324463, "learning_rate": 1.9922483604597055e-05, "loss": 2.028, "step": 21104 }, { "epoch": 0.27, "grad_norm": 4.4003190994262695, "learning_rate": 1.9922470545755707e-05, "loss": 2.2372, "step": 21105 }, { "epoch": 0.27, "grad_norm": 4.150514602661133, "learning_rate": 1.992245748581875e-05, "loss": 2.5134, "step": 21106 }, { "epoch": 0.27, "grad_norm": 4.589845180511475, "learning_rate": 1.9922444424786184e-05, "loss": 2.6103, "step": 21107 }, { "epoch": 0.27, "grad_norm": 4.574070930480957, "learning_rate": 1.9922431362658013e-05, "loss": 2.3678, "step": 21108 }, { "epoch": 0.27, "grad_norm": 3.551619052886963, "learning_rate": 1.9922418299434234e-05, "loss": 1.9171, "step": 21109 }, { "epoch": 0.27, "grad_norm": 4.20148229598999, "learning_rate": 1.9922405235114856e-05, "loss": 1.9147, "step": 21110 }, { "epoch": 0.27, "grad_norm": 4.318556785583496, "learning_rate": 1.9922392169699875e-05, "loss": 1.7679, "step": 21111 }, { "epoch": 0.27, "grad_norm": 3.448423147201538, "learning_rate": 1.992237910318929e-05, "loss": 1.8217, "step": 21112 }, { "epoch": 0.27, "grad_norm": 4.476178169250488, "learning_rate": 1.9922366035583113e-05, "loss": 2.4386, "step": 21113 }, { "epoch": 0.27, "grad_norm": 4.2729973793029785, "learning_rate": 1.9922352966881333e-05, "loss": 2.6945, "step": 21114 }, { "epoch": 0.27, "grad_norm": 3.8588757514953613, "learning_rate": 1.992233989708396e-05, "loss": 1.8643, "step": 21115 }, { "epoch": 0.27, "grad_norm": 4.320009708404541, "learning_rate": 1.992232682619099e-05, "loss": 1.7366, "step": 21116 }, { "epoch": 0.27, "grad_norm": 3.967501163482666, "learning_rate": 1.992231375420243e-05, "loss": 1.7509, "step": 21117 }, { "epoch": 0.27, "grad_norm": 4.2366943359375, "learning_rate": 1.9922300681118276e-05, "loss": 2.1675, "step": 21118 }, { "epoch": 0.27, "grad_norm": 4.452502727508545, "learning_rate": 1.992228760693853e-05, "loss": 2.1987, "step": 21119 }, { "epoch": 0.27, "grad_norm": 3.9714527130126953, "learning_rate": 1.9922274531663194e-05, "loss": 1.9749, "step": 21120 }, { "epoch": 0.27, "grad_norm": 4.2601823806762695, "learning_rate": 1.9922261455292275e-05, "loss": 1.9861, "step": 21121 }, { "epoch": 0.27, "grad_norm": 3.9302515983581543, "learning_rate": 1.9922248377825767e-05, "loss": 1.7472, "step": 21122 }, { "epoch": 0.27, "grad_norm": 3.5773394107818604, "learning_rate": 1.9922235299263675e-05, "loss": 1.6488, "step": 21123 }, { "epoch": 0.27, "grad_norm": 4.299548149108887, "learning_rate": 1.9922222219606e-05, "loss": 2.0759, "step": 21124 }, { "epoch": 0.27, "grad_norm": 3.8130078315734863, "learning_rate": 1.9922209138852747e-05, "loss": 2.163, "step": 21125 }, { "epoch": 0.27, "grad_norm": 4.282569885253906, "learning_rate": 1.992219605700391e-05, "loss": 2.1262, "step": 21126 }, { "epoch": 0.27, "grad_norm": 4.140136241912842, "learning_rate": 1.9922182974059495e-05, "loss": 1.9834, "step": 21127 }, { "epoch": 0.27, "grad_norm": 3.7995738983154297, "learning_rate": 1.9922169890019502e-05, "loss": 2.0173, "step": 21128 }, { "epoch": 0.27, "grad_norm": 4.2504963874816895, "learning_rate": 1.9922156804883934e-05, "loss": 2.1254, "step": 21129 }, { "epoch": 0.27, "grad_norm": 4.0562028884887695, "learning_rate": 1.9922143718652793e-05, "loss": 1.8698, "step": 21130 }, { "epoch": 0.27, "grad_norm": 4.113640308380127, "learning_rate": 1.992213063132608e-05, "loss": 1.9503, "step": 21131 }, { "epoch": 0.27, "grad_norm": 3.8309006690979004, "learning_rate": 1.992211754290379e-05, "loss": 1.7761, "step": 21132 }, { "epoch": 0.27, "grad_norm": 3.9741899967193604, "learning_rate": 1.9922104453385933e-05, "loss": 1.7546, "step": 21133 }, { "epoch": 0.27, "grad_norm": 3.575185537338257, "learning_rate": 1.9922091362772504e-05, "loss": 1.7436, "step": 21134 }, { "epoch": 0.27, "grad_norm": 3.629197359085083, "learning_rate": 1.9922078271063513e-05, "loss": 2.0405, "step": 21135 }, { "epoch": 0.27, "grad_norm": 3.683345079421997, "learning_rate": 1.9922065178258952e-05, "loss": 2.1552, "step": 21136 }, { "epoch": 0.27, "grad_norm": 3.904930830001831, "learning_rate": 1.992205208435883e-05, "loss": 2.0936, "step": 21137 }, { "epoch": 0.27, "grad_norm": 4.011906147003174, "learning_rate": 1.9922038989363143e-05, "loss": 2.0892, "step": 21138 }, { "epoch": 0.27, "grad_norm": 4.131063938140869, "learning_rate": 1.9922025893271898e-05, "loss": 2.1239, "step": 21139 }, { "epoch": 0.27, "grad_norm": 4.12360954284668, "learning_rate": 1.9922012796085087e-05, "loss": 2.0271, "step": 21140 }, { "epoch": 0.27, "grad_norm": 3.6585826873779297, "learning_rate": 1.992199969780272e-05, "loss": 1.7374, "step": 21141 }, { "epoch": 0.27, "grad_norm": 4.3192949295043945, "learning_rate": 1.9921986598424798e-05, "loss": 1.7134, "step": 21142 }, { "epoch": 0.27, "grad_norm": 3.650726795196533, "learning_rate": 1.9921973497951323e-05, "loss": 2.0396, "step": 21143 }, { "epoch": 0.27, "grad_norm": 3.702329158782959, "learning_rate": 1.992196039638229e-05, "loss": 1.8987, "step": 21144 }, { "epoch": 0.27, "grad_norm": 3.6931469440460205, "learning_rate": 1.9921947293717702e-05, "loss": 1.9793, "step": 21145 }, { "epoch": 0.27, "grad_norm": 4.592609405517578, "learning_rate": 1.9921934189957563e-05, "loss": 2.5113, "step": 21146 }, { "epoch": 0.27, "grad_norm": 4.160458564758301, "learning_rate": 1.9921921085101875e-05, "loss": 1.964, "step": 21147 }, { "epoch": 0.27, "grad_norm": 3.8960154056549072, "learning_rate": 1.992190797915064e-05, "loss": 1.932, "step": 21148 }, { "epoch": 0.27, "grad_norm": 4.123722076416016, "learning_rate": 1.992189487210386e-05, "loss": 1.6447, "step": 21149 }, { "epoch": 0.27, "grad_norm": 4.587039947509766, "learning_rate": 1.992188176396153e-05, "loss": 2.3825, "step": 21150 }, { "epoch": 0.27, "grad_norm": 4.034885883331299, "learning_rate": 1.9921868654723656e-05, "loss": 1.924, "step": 21151 }, { "epoch": 0.27, "grad_norm": 3.535621404647827, "learning_rate": 1.9921855544390244e-05, "loss": 1.7345, "step": 21152 }, { "epoch": 0.27, "grad_norm": 4.487105846405029, "learning_rate": 1.992184243296129e-05, "loss": 2.1568, "step": 21153 }, { "epoch": 0.27, "grad_norm": 4.050017833709717, "learning_rate": 1.992182932043679e-05, "loss": 1.854, "step": 21154 }, { "epoch": 0.27, "grad_norm": 3.970550775527954, "learning_rate": 1.992181620681676e-05, "loss": 1.8966, "step": 21155 }, { "epoch": 0.27, "grad_norm": 3.9288275241851807, "learning_rate": 1.992180309210119e-05, "loss": 2.2889, "step": 21156 }, { "epoch": 0.27, "grad_norm": 4.278364658355713, "learning_rate": 1.992178997629008e-05, "loss": 1.9934, "step": 21157 }, { "epoch": 0.27, "grad_norm": 5.224117279052734, "learning_rate": 1.992177685938344e-05, "loss": 2.3348, "step": 21158 }, { "epoch": 0.27, "grad_norm": 3.6257271766662598, "learning_rate": 1.992176374138127e-05, "loss": 1.7194, "step": 21159 }, { "epoch": 0.27, "grad_norm": 3.856160879135132, "learning_rate": 1.9921750622283566e-05, "loss": 1.9711, "step": 21160 }, { "epoch": 0.27, "grad_norm": 3.8474314212799072, "learning_rate": 1.9921737502090334e-05, "loss": 1.6034, "step": 21161 }, { "epoch": 0.27, "grad_norm": 4.081486701965332, "learning_rate": 1.9921724380801573e-05, "loss": 2.4518, "step": 21162 }, { "epoch": 0.27, "grad_norm": 4.052119255065918, "learning_rate": 1.9921711258417283e-05, "loss": 2.3346, "step": 21163 }, { "epoch": 0.27, "grad_norm": 4.439937591552734, "learning_rate": 1.992169813493747e-05, "loss": 2.7941, "step": 21164 }, { "epoch": 0.27, "grad_norm": 3.69720196723938, "learning_rate": 1.9921685010362132e-05, "loss": 1.9639, "step": 21165 }, { "epoch": 0.27, "grad_norm": 3.925851583480835, "learning_rate": 1.9921671884691273e-05, "loss": 2.1496, "step": 21166 }, { "epoch": 0.27, "grad_norm": 3.90152907371521, "learning_rate": 1.9921658757924893e-05, "loss": 2.1671, "step": 21167 }, { "epoch": 0.27, "grad_norm": 3.9107470512390137, "learning_rate": 1.9921645630062994e-05, "loss": 2.1961, "step": 21168 }, { "epoch": 0.27, "grad_norm": 3.974994659423828, "learning_rate": 1.9921632501105576e-05, "loss": 2.0408, "step": 21169 }, { "epoch": 0.27, "grad_norm": 4.305890083312988, "learning_rate": 1.992161937105264e-05, "loss": 2.2791, "step": 21170 }, { "epoch": 0.27, "grad_norm": 3.3701555728912354, "learning_rate": 1.9921606239904192e-05, "loss": 1.6611, "step": 21171 }, { "epoch": 0.27, "grad_norm": 4.044257640838623, "learning_rate": 1.992159310766023e-05, "loss": 2.2318, "step": 21172 }, { "epoch": 0.27, "grad_norm": 4.303560256958008, "learning_rate": 1.9921579974320754e-05, "loss": 2.4137, "step": 21173 }, { "epoch": 0.27, "grad_norm": 4.425353050231934, "learning_rate": 1.9921566839885767e-05, "loss": 2.7284, "step": 21174 }, { "epoch": 0.27, "grad_norm": 3.7710821628570557, "learning_rate": 1.9921553704355272e-05, "loss": 1.7489, "step": 21175 }, { "epoch": 0.27, "grad_norm": 4.153034210205078, "learning_rate": 1.9921540567729266e-05, "loss": 2.2296, "step": 21176 }, { "epoch": 0.27, "grad_norm": 4.423854351043701, "learning_rate": 1.9921527430007758e-05, "loss": 2.285, "step": 21177 }, { "epoch": 0.27, "grad_norm": 4.763607025146484, "learning_rate": 1.9921514291190744e-05, "loss": 2.3971, "step": 21178 }, { "epoch": 0.27, "grad_norm": 3.5426530838012695, "learning_rate": 1.9921501151278226e-05, "loss": 1.7336, "step": 21179 }, { "epoch": 0.27, "grad_norm": 3.66579532623291, "learning_rate": 1.9921488010270206e-05, "loss": 1.7545, "step": 21180 }, { "epoch": 0.27, "grad_norm": 4.402707576751709, "learning_rate": 1.9921474868166685e-05, "loss": 1.9983, "step": 21181 }, { "epoch": 0.27, "grad_norm": 4.677820205688477, "learning_rate": 1.9921461724967665e-05, "loss": 2.026, "step": 21182 }, { "epoch": 0.27, "grad_norm": 3.7935941219329834, "learning_rate": 1.9921448580673147e-05, "loss": 1.8575, "step": 21183 }, { "epoch": 0.27, "grad_norm": 5.012439727783203, "learning_rate": 1.9921435435283135e-05, "loss": 2.4271, "step": 21184 }, { "epoch": 0.27, "grad_norm": 4.237943649291992, "learning_rate": 1.9921422288797627e-05, "loss": 2.1935, "step": 21185 }, { "epoch": 0.27, "grad_norm": 3.8695271015167236, "learning_rate": 1.9921409141216625e-05, "loss": 2.339, "step": 21186 }, { "epoch": 0.27, "grad_norm": 4.272305488586426, "learning_rate": 1.992139599254013e-05, "loss": 1.8598, "step": 21187 }, { "epoch": 0.27, "grad_norm": 4.119228363037109, "learning_rate": 1.9921382842768146e-05, "loss": 2.1852, "step": 21188 }, { "epoch": 0.27, "grad_norm": 3.859644889831543, "learning_rate": 1.9921369691900673e-05, "loss": 2.406, "step": 21189 }, { "epoch": 0.27, "grad_norm": 3.9011125564575195, "learning_rate": 1.992135653993771e-05, "loss": 2.2796, "step": 21190 }, { "epoch": 0.28, "grad_norm": 4.001783847808838, "learning_rate": 1.9921343386879266e-05, "loss": 1.8545, "step": 21191 }, { "epoch": 0.28, "grad_norm": 4.434825897216797, "learning_rate": 1.9921330232725332e-05, "loss": 2.3544, "step": 21192 }, { "epoch": 0.28, "grad_norm": 4.2502264976501465, "learning_rate": 1.992131707747592e-05, "loss": 1.9829, "step": 21193 }, { "epoch": 0.28, "grad_norm": 4.3324432373046875, "learning_rate": 1.9921303921131024e-05, "loss": 2.466, "step": 21194 }, { "epoch": 0.28, "grad_norm": 3.821204900741577, "learning_rate": 1.992129076369065e-05, "loss": 1.6593, "step": 21195 }, { "epoch": 0.28, "grad_norm": 4.034829139709473, "learning_rate": 1.992127760515479e-05, "loss": 2.1258, "step": 21196 }, { "epoch": 0.28, "grad_norm": 4.071000576019287, "learning_rate": 1.9921264445523457e-05, "loss": 2.0161, "step": 21197 }, { "epoch": 0.28, "grad_norm": 4.266221046447754, "learning_rate": 1.992125128479665e-05, "loss": 2.2768, "step": 21198 }, { "epoch": 0.28, "grad_norm": 5.2636003494262695, "learning_rate": 1.9921238122974367e-05, "loss": 2.556, "step": 21199 }, { "epoch": 0.28, "grad_norm": 4.073200702667236, "learning_rate": 1.992122496005661e-05, "loss": 1.9612, "step": 21200 }, { "epoch": 0.28, "grad_norm": 4.041396141052246, "learning_rate": 1.992121179604338e-05, "loss": 2.307, "step": 21201 }, { "epoch": 0.28, "grad_norm": 4.21713924407959, "learning_rate": 1.9921198630934686e-05, "loss": 2.1783, "step": 21202 }, { "epoch": 0.28, "grad_norm": 3.9979310035705566, "learning_rate": 1.992118546473052e-05, "loss": 1.8101, "step": 21203 }, { "epoch": 0.28, "grad_norm": 4.3617448806762695, "learning_rate": 1.9921172297430888e-05, "loss": 2.5238, "step": 21204 }, { "epoch": 0.28, "grad_norm": 3.9499223232269287, "learning_rate": 1.9921159129035786e-05, "loss": 2.5526, "step": 21205 }, { "epoch": 0.28, "grad_norm": 3.9153940677642822, "learning_rate": 1.9921145959545223e-05, "loss": 1.9591, "step": 21206 }, { "epoch": 0.28, "grad_norm": 4.0309739112854, "learning_rate": 1.9921132788959195e-05, "loss": 1.9439, "step": 21207 }, { "epoch": 0.28, "grad_norm": 4.4863057136535645, "learning_rate": 1.992111961727771e-05, "loss": 2.47, "step": 21208 }, { "epoch": 0.28, "grad_norm": 3.5613813400268555, "learning_rate": 1.992110644450076e-05, "loss": 1.9134, "step": 21209 }, { "epoch": 0.28, "grad_norm": 4.151644706726074, "learning_rate": 1.9921093270628354e-05, "loss": 1.7552, "step": 21210 }, { "epoch": 0.28, "grad_norm": 4.292747497558594, "learning_rate": 1.992108009566049e-05, "loss": 2.0314, "step": 21211 }, { "epoch": 0.28, "grad_norm": 3.6266353130340576, "learning_rate": 1.992106691959717e-05, "loss": 1.91, "step": 21212 }, { "epoch": 0.28, "grad_norm": 3.7136192321777344, "learning_rate": 1.9921053742438398e-05, "loss": 1.981, "step": 21213 }, { "epoch": 0.28, "grad_norm": 4.014244556427002, "learning_rate": 1.9921040564184174e-05, "loss": 2.2645, "step": 21214 }, { "epoch": 0.28, "grad_norm": 4.38572359085083, "learning_rate": 1.9921027384834497e-05, "loss": 1.9399, "step": 21215 }, { "epoch": 0.28, "grad_norm": 4.4566473960876465, "learning_rate": 1.992101420438937e-05, "loss": 2.0942, "step": 21216 }, { "epoch": 0.28, "grad_norm": 4.238924026489258, "learning_rate": 1.9921001022848796e-05, "loss": 1.8739, "step": 21217 }, { "epoch": 0.28, "grad_norm": 4.434694290161133, "learning_rate": 1.9920987840212772e-05, "loss": 2.53, "step": 21218 }, { "epoch": 0.28, "grad_norm": 4.050570011138916, "learning_rate": 1.9920974656481305e-05, "loss": 2.1709, "step": 21219 }, { "epoch": 0.28, "grad_norm": 4.539417266845703, "learning_rate": 1.9920961471654397e-05, "loss": 2.9218, "step": 21220 }, { "epoch": 0.28, "grad_norm": 4.7862772941589355, "learning_rate": 1.9920948285732042e-05, "loss": 2.461, "step": 21221 }, { "epoch": 0.28, "grad_norm": 4.625918865203857, "learning_rate": 1.992093509871425e-05, "loss": 2.6808, "step": 21222 }, { "epoch": 0.28, "grad_norm": 4.609983444213867, "learning_rate": 1.9920921910601013e-05, "loss": 2.1243, "step": 21223 }, { "epoch": 0.28, "grad_norm": 3.9804110527038574, "learning_rate": 1.992090872139234e-05, "loss": 2.3609, "step": 21224 }, { "epoch": 0.28, "grad_norm": 4.110501766204834, "learning_rate": 1.9920895531088232e-05, "loss": 2.1923, "step": 21225 }, { "epoch": 0.28, "grad_norm": 4.227888584136963, "learning_rate": 1.992088233968869e-05, "loss": 2.3704, "step": 21226 }, { "epoch": 0.28, "grad_norm": 4.541038990020752, "learning_rate": 1.9920869147193712e-05, "loss": 2.3901, "step": 21227 }, { "epoch": 0.28, "grad_norm": 3.762312173843384, "learning_rate": 1.9920855953603303e-05, "loss": 2.1056, "step": 21228 }, { "epoch": 0.28, "grad_norm": 4.6431097984313965, "learning_rate": 1.9920842758917464e-05, "loss": 2.272, "step": 21229 }, { "epoch": 0.28, "grad_norm": 3.7200889587402344, "learning_rate": 1.992082956313619e-05, "loss": 1.7823, "step": 21230 }, { "epoch": 0.28, "grad_norm": 3.752835273742676, "learning_rate": 1.9920816366259494e-05, "loss": 1.7623, "step": 21231 }, { "epoch": 0.28, "grad_norm": 3.753790855407715, "learning_rate": 1.9920803168287366e-05, "loss": 2.0441, "step": 21232 }, { "epoch": 0.28, "grad_norm": 4.521903038024902, "learning_rate": 1.992078996921982e-05, "loss": 2.7356, "step": 21233 }, { "epoch": 0.28, "grad_norm": 3.9421727657318115, "learning_rate": 1.9920776769056848e-05, "loss": 2.0785, "step": 21234 }, { "epoch": 0.28, "grad_norm": 4.009324550628662, "learning_rate": 1.9920763567798453e-05, "loss": 2.4558, "step": 21235 }, { "epoch": 0.28, "grad_norm": 3.864135265350342, "learning_rate": 1.9920750365444637e-05, "loss": 2.0139, "step": 21236 }, { "epoch": 0.28, "grad_norm": 3.895678997039795, "learning_rate": 1.9920737161995402e-05, "loss": 1.9429, "step": 21237 }, { "epoch": 0.28, "grad_norm": 4.086447238922119, "learning_rate": 1.992072395745075e-05, "loss": 1.9287, "step": 21238 }, { "epoch": 0.28, "grad_norm": 4.102896690368652, "learning_rate": 1.992071075181068e-05, "loss": 1.912, "step": 21239 }, { "epoch": 0.28, "grad_norm": 3.8941335678100586, "learning_rate": 1.9920697545075197e-05, "loss": 2.0758, "step": 21240 }, { "epoch": 0.28, "grad_norm": 3.900883913040161, "learning_rate": 1.99206843372443e-05, "loss": 2.2435, "step": 21241 }, { "epoch": 0.28, "grad_norm": 5.277735233306885, "learning_rate": 1.992067112831799e-05, "loss": 2.0847, "step": 21242 }, { "epoch": 0.28, "grad_norm": 3.8793530464172363, "learning_rate": 1.9920657918296275e-05, "loss": 2.1212, "step": 21243 }, { "epoch": 0.28, "grad_norm": 3.5704054832458496, "learning_rate": 1.9920644707179145e-05, "loss": 1.9814, "step": 21244 }, { "epoch": 0.28, "grad_norm": 4.748977184295654, "learning_rate": 1.992063149496661e-05, "loss": 2.4582, "step": 21245 }, { "epoch": 0.28, "grad_norm": 3.940742015838623, "learning_rate": 1.992061828165867e-05, "loss": 2.2865, "step": 21246 }, { "epoch": 0.28, "grad_norm": 3.9950852394104004, "learning_rate": 1.9920605067255325e-05, "loss": 1.8243, "step": 21247 }, { "epoch": 0.28, "grad_norm": 3.5467727184295654, "learning_rate": 1.9920591851756575e-05, "loss": 1.826, "step": 21248 }, { "epoch": 0.28, "grad_norm": 4.48655366897583, "learning_rate": 1.9920578635162425e-05, "loss": 2.4561, "step": 21249 }, { "epoch": 0.28, "grad_norm": 5.111773490905762, "learning_rate": 1.9920565417472874e-05, "loss": 2.2969, "step": 21250 }, { "epoch": 0.28, "grad_norm": 3.709103584289551, "learning_rate": 1.9920552198687925e-05, "loss": 1.5967, "step": 21251 }, { "epoch": 0.28, "grad_norm": 3.963479518890381, "learning_rate": 1.9920538978807578e-05, "loss": 2.2214, "step": 21252 }, { "epoch": 0.28, "grad_norm": 4.0283894538879395, "learning_rate": 1.9920525757831835e-05, "loss": 2.3056, "step": 21253 }, { "epoch": 0.28, "grad_norm": 4.172975540161133, "learning_rate": 1.9920512535760695e-05, "loss": 2.0227, "step": 21254 }, { "epoch": 0.28, "grad_norm": 3.7905516624450684, "learning_rate": 1.992049931259417e-05, "loss": 1.8243, "step": 21255 }, { "epoch": 0.28, "grad_norm": 4.363894939422607, "learning_rate": 1.9920486088332247e-05, "loss": 1.9592, "step": 21256 }, { "epoch": 0.28, "grad_norm": 4.414824962615967, "learning_rate": 1.9920472862974934e-05, "loss": 2.2356, "step": 21257 }, { "epoch": 0.28, "grad_norm": 4.25384521484375, "learning_rate": 1.9920459636522235e-05, "loss": 2.0193, "step": 21258 }, { "epoch": 0.28, "grad_norm": 3.7797043323516846, "learning_rate": 1.9920446408974147e-05, "loss": 2.0691, "step": 21259 }, { "epoch": 0.28, "grad_norm": 4.0017828941345215, "learning_rate": 1.9920433180330675e-05, "loss": 2.2696, "step": 21260 }, { "epoch": 0.28, "grad_norm": 3.9957213401794434, "learning_rate": 1.9920419950591817e-05, "loss": 1.8554, "step": 21261 }, { "epoch": 0.28, "grad_norm": 4.007421493530273, "learning_rate": 1.992040671975758e-05, "loss": 2.2487, "step": 21262 }, { "epoch": 0.28, "grad_norm": 4.572605609893799, "learning_rate": 1.992039348782796e-05, "loss": 2.313, "step": 21263 }, { "epoch": 0.28, "grad_norm": 4.107333183288574, "learning_rate": 1.992038025480296e-05, "loss": 1.8513, "step": 21264 }, { "epoch": 0.28, "grad_norm": 4.095067501068115, "learning_rate": 1.992036702068258e-05, "loss": 2.4237, "step": 21265 }, { "epoch": 0.28, "grad_norm": 3.981680393218994, "learning_rate": 1.9920353785466826e-05, "loss": 2.0066, "step": 21266 }, { "epoch": 0.28, "grad_norm": 4.130771160125732, "learning_rate": 1.9920340549155694e-05, "loss": 2.223, "step": 21267 }, { "epoch": 0.28, "grad_norm": 3.5065529346466064, "learning_rate": 1.992032731174919e-05, "loss": 2.0324, "step": 21268 }, { "epoch": 0.28, "grad_norm": 3.259380340576172, "learning_rate": 1.9920314073247314e-05, "loss": 1.5168, "step": 21269 }, { "epoch": 0.28, "grad_norm": 3.6961958408355713, "learning_rate": 1.9920300833650067e-05, "loss": 2.0144, "step": 21270 }, { "epoch": 0.28, "grad_norm": 4.982455253601074, "learning_rate": 1.992028759295745e-05, "loss": 2.5475, "step": 21271 }, { "epoch": 0.28, "grad_norm": 3.545867919921875, "learning_rate": 1.9920274351169464e-05, "loss": 1.6529, "step": 21272 }, { "epoch": 0.28, "grad_norm": 4.2970476150512695, "learning_rate": 1.992026110828611e-05, "loss": 2.0763, "step": 21273 }, { "epoch": 0.28, "grad_norm": 4.069206237792969, "learning_rate": 1.9920247864307396e-05, "loss": 1.9299, "step": 21274 }, { "epoch": 0.28, "grad_norm": 4.060297012329102, "learning_rate": 1.9920234619233315e-05, "loss": 2.4019, "step": 21275 }, { "epoch": 0.28, "grad_norm": 4.651841640472412, "learning_rate": 1.992022137306387e-05, "loss": 2.047, "step": 21276 }, { "epoch": 0.28, "grad_norm": 3.981498956680298, "learning_rate": 1.9920208125799068e-05, "loss": 2.0382, "step": 21277 }, { "epoch": 0.28, "grad_norm": 3.5828418731689453, "learning_rate": 1.9920194877438902e-05, "loss": 1.775, "step": 21278 }, { "epoch": 0.28, "grad_norm": 4.336081504821777, "learning_rate": 1.9920181627983384e-05, "loss": 2.5108, "step": 21279 }, { "epoch": 0.28, "grad_norm": 4.678528785705566, "learning_rate": 1.9920168377432506e-05, "loss": 2.4276, "step": 21280 }, { "epoch": 0.28, "grad_norm": 3.8024277687072754, "learning_rate": 1.9920155125786273e-05, "loss": 1.9478, "step": 21281 }, { "epoch": 0.28, "grad_norm": 4.014674663543701, "learning_rate": 1.9920141873044687e-05, "loss": 2.0617, "step": 21282 }, { "epoch": 0.28, "grad_norm": 4.08493709564209, "learning_rate": 1.992012861920775e-05, "loss": 1.7271, "step": 21283 }, { "epoch": 0.28, "grad_norm": 4.190457344055176, "learning_rate": 1.9920115364275464e-05, "loss": 2.3476, "step": 21284 }, { "epoch": 0.28, "grad_norm": 4.472659587860107, "learning_rate": 1.9920102108247824e-05, "loss": 2.398, "step": 21285 }, { "epoch": 0.28, "grad_norm": 3.62856388092041, "learning_rate": 1.992008885112484e-05, "loss": 1.6695, "step": 21286 }, { "epoch": 0.28, "grad_norm": 4.479596138000488, "learning_rate": 1.992007559290651e-05, "loss": 2.4886, "step": 21287 }, { "epoch": 0.28, "grad_norm": 4.4440107345581055, "learning_rate": 1.9920062333592836e-05, "loss": 2.2485, "step": 21288 }, { "epoch": 0.28, "grad_norm": 4.681856155395508, "learning_rate": 1.9920049073183816e-05, "loss": 2.5298, "step": 21289 }, { "epoch": 0.28, "grad_norm": 4.423203945159912, "learning_rate": 1.9920035811679454e-05, "loss": 2.8008, "step": 21290 }, { "epoch": 0.28, "grad_norm": 3.7215702533721924, "learning_rate": 1.9920022549079756e-05, "loss": 1.9609, "step": 21291 }, { "epoch": 0.28, "grad_norm": 4.878152847290039, "learning_rate": 1.9920009285384716e-05, "loss": 2.4475, "step": 21292 }, { "epoch": 0.28, "grad_norm": 3.3673641681671143, "learning_rate": 1.991999602059434e-05, "loss": 1.6274, "step": 21293 }, { "epoch": 0.28, "grad_norm": 4.00044584274292, "learning_rate": 1.991998275470863e-05, "loss": 2.0722, "step": 21294 }, { "epoch": 0.28, "grad_norm": 3.808332920074463, "learning_rate": 1.9919969487727582e-05, "loss": 1.8605, "step": 21295 }, { "epoch": 0.28, "grad_norm": 3.52626633644104, "learning_rate": 1.9919956219651204e-05, "loss": 1.6085, "step": 21296 }, { "epoch": 0.28, "grad_norm": 4.405313968658447, "learning_rate": 1.991994295047949e-05, "loss": 2.5108, "step": 21297 }, { "epoch": 0.28, "grad_norm": 4.031277656555176, "learning_rate": 1.991992968021245e-05, "loss": 2.0025, "step": 21298 }, { "epoch": 0.28, "grad_norm": 3.99984073638916, "learning_rate": 1.991991640885008e-05, "loss": 1.7432, "step": 21299 }, { "epoch": 0.28, "grad_norm": 3.535220146179199, "learning_rate": 1.9919903136392383e-05, "loss": 1.793, "step": 21300 }, { "epoch": 0.28, "grad_norm": 4.110907554626465, "learning_rate": 1.991988986283936e-05, "loss": 2.4109, "step": 21301 }, { "epoch": 0.28, "grad_norm": 4.23576545715332, "learning_rate": 1.9919876588191012e-05, "loss": 2.1334, "step": 21302 }, { "epoch": 0.28, "grad_norm": 4.415754318237305, "learning_rate": 1.991986331244734e-05, "loss": 2.2815, "step": 21303 }, { "epoch": 0.28, "grad_norm": 4.96364164352417, "learning_rate": 1.9919850035608352e-05, "loss": 2.5956, "step": 21304 }, { "epoch": 0.28, "grad_norm": 4.107330799102783, "learning_rate": 1.991983675767404e-05, "loss": 2.4898, "step": 21305 }, { "epoch": 0.28, "grad_norm": 4.114729881286621, "learning_rate": 1.9919823478644415e-05, "loss": 2.3025, "step": 21306 }, { "epoch": 0.28, "grad_norm": 3.4974751472473145, "learning_rate": 1.991981019851947e-05, "loss": 1.5802, "step": 21307 }, { "epoch": 0.28, "grad_norm": 5.024693012237549, "learning_rate": 1.9919796917299207e-05, "loss": 2.6627, "step": 21308 }, { "epoch": 0.28, "grad_norm": 3.2303967475891113, "learning_rate": 1.9919783634983635e-05, "loss": 1.8789, "step": 21309 }, { "epoch": 0.28, "grad_norm": 3.7215235233306885, "learning_rate": 1.9919770351572748e-05, "loss": 1.8967, "step": 21310 }, { "epoch": 0.28, "grad_norm": 4.024771213531494, "learning_rate": 1.991975706706655e-05, "loss": 2.2995, "step": 21311 }, { "epoch": 0.28, "grad_norm": 4.3997483253479, "learning_rate": 1.991974378146504e-05, "loss": 2.126, "step": 21312 }, { "epoch": 0.28, "grad_norm": 4.092677116394043, "learning_rate": 1.9919730494768225e-05, "loss": 1.9654, "step": 21313 }, { "epoch": 0.28, "grad_norm": 4.455600261688232, "learning_rate": 1.9919717206976107e-05, "loss": 2.4398, "step": 21314 }, { "epoch": 0.28, "grad_norm": 4.310670375823975, "learning_rate": 1.991970391808868e-05, "loss": 2.1935, "step": 21315 }, { "epoch": 0.28, "grad_norm": 3.8832831382751465, "learning_rate": 1.9919690628105946e-05, "loss": 1.986, "step": 21316 }, { "epoch": 0.28, "grad_norm": 3.9097707271575928, "learning_rate": 1.9919677337027913e-05, "loss": 1.959, "step": 21317 }, { "epoch": 0.28, "grad_norm": 4.28603458404541, "learning_rate": 1.9919664044854582e-05, "loss": 2.2903, "step": 21318 }, { "epoch": 0.28, "grad_norm": 3.4802234172821045, "learning_rate": 1.991965075158595e-05, "loss": 1.5961, "step": 21319 }, { "epoch": 0.28, "grad_norm": 4.155106067657471, "learning_rate": 1.9919637457222018e-05, "loss": 2.0005, "step": 21320 }, { "epoch": 0.28, "grad_norm": 4.384541034698486, "learning_rate": 1.9919624161762795e-05, "loss": 2.4463, "step": 21321 }, { "epoch": 0.28, "grad_norm": 4.161215782165527, "learning_rate": 1.991961086520827e-05, "loss": 2.2858, "step": 21322 }, { "epoch": 0.28, "grad_norm": 3.6701347827911377, "learning_rate": 1.9919597567558458e-05, "loss": 2.0411, "step": 21323 }, { "epoch": 0.28, "grad_norm": 4.268776893615723, "learning_rate": 1.991958426881335e-05, "loss": 2.1619, "step": 21324 }, { "epoch": 0.28, "grad_norm": 3.733701467514038, "learning_rate": 1.9919570968972955e-05, "loss": 1.8649, "step": 21325 }, { "epoch": 0.28, "grad_norm": 4.385291576385498, "learning_rate": 1.9919557668037268e-05, "loss": 1.8222, "step": 21326 }, { "epoch": 0.28, "grad_norm": 4.193186283111572, "learning_rate": 1.9919544366006292e-05, "loss": 2.7358, "step": 21327 }, { "epoch": 0.28, "grad_norm": 4.364708423614502, "learning_rate": 1.9919531062880036e-05, "loss": 2.4904, "step": 21328 }, { "epoch": 0.28, "grad_norm": 4.177157878875732, "learning_rate": 1.991951775865849e-05, "loss": 2.2261, "step": 21329 }, { "epoch": 0.28, "grad_norm": 3.997077465057373, "learning_rate": 1.9919504453341665e-05, "loss": 2.1584, "step": 21330 }, { "epoch": 0.28, "grad_norm": 3.598120927810669, "learning_rate": 1.9919491146929557e-05, "loss": 1.6275, "step": 21331 }, { "epoch": 0.28, "grad_norm": 4.745302200317383, "learning_rate": 1.9919477839422168e-05, "loss": 2.0856, "step": 21332 }, { "epoch": 0.28, "grad_norm": 4.2969970703125, "learning_rate": 1.99194645308195e-05, "loss": 2.339, "step": 21333 }, { "epoch": 0.28, "grad_norm": 4.090710639953613, "learning_rate": 1.9919451221121556e-05, "loss": 2.1201, "step": 21334 }, { "epoch": 0.28, "grad_norm": 5.672575950622559, "learning_rate": 1.9919437910328337e-05, "loss": 2.9561, "step": 21335 }, { "epoch": 0.28, "grad_norm": 3.8788232803344727, "learning_rate": 1.9919424598439843e-05, "loss": 1.9476, "step": 21336 }, { "epoch": 0.28, "grad_norm": 3.8991708755493164, "learning_rate": 1.9919411285456074e-05, "loss": 1.8833, "step": 21337 }, { "epoch": 0.28, "grad_norm": 4.2355170249938965, "learning_rate": 1.9919397971377038e-05, "loss": 2.3879, "step": 21338 }, { "epoch": 0.28, "grad_norm": 4.4723286628723145, "learning_rate": 1.991938465620273e-05, "loss": 2.3932, "step": 21339 }, { "epoch": 0.28, "grad_norm": 3.8978354930877686, "learning_rate": 1.9919371339933155e-05, "loss": 2.0445, "step": 21340 }, { "epoch": 0.28, "grad_norm": 4.134318828582764, "learning_rate": 1.9919358022568308e-05, "loss": 2.6161, "step": 21341 }, { "epoch": 0.28, "grad_norm": 4.2550129890441895, "learning_rate": 1.99193447041082e-05, "loss": 2.3974, "step": 21342 }, { "epoch": 0.28, "grad_norm": 3.724592924118042, "learning_rate": 1.9919331384552828e-05, "loss": 1.5622, "step": 21343 }, { "epoch": 0.28, "grad_norm": 3.612999677658081, "learning_rate": 1.9919318063902192e-05, "loss": 1.6735, "step": 21344 }, { "epoch": 0.28, "grad_norm": 3.8383960723876953, "learning_rate": 1.9919304742156294e-05, "loss": 1.9632, "step": 21345 }, { "epoch": 0.28, "grad_norm": 4.471757888793945, "learning_rate": 1.9919291419315142e-05, "loss": 2.7745, "step": 21346 }, { "epoch": 0.28, "grad_norm": 3.8665995597839355, "learning_rate": 1.9919278095378726e-05, "loss": 1.9188, "step": 21347 }, { "epoch": 0.28, "grad_norm": 4.159954071044922, "learning_rate": 1.9919264770347056e-05, "loss": 2.381, "step": 21348 }, { "epoch": 0.28, "grad_norm": 3.867800712585449, "learning_rate": 1.991925144422013e-05, "loss": 2.0904, "step": 21349 }, { "epoch": 0.28, "grad_norm": 3.3036983013153076, "learning_rate": 1.9919238116997952e-05, "loss": 1.5154, "step": 21350 }, { "epoch": 0.28, "grad_norm": 4.535748481750488, "learning_rate": 1.9919224788680522e-05, "loss": 2.274, "step": 21351 }, { "epoch": 0.28, "grad_norm": 4.608798027038574, "learning_rate": 1.9919211459267838e-05, "loss": 2.3072, "step": 21352 }, { "epoch": 0.28, "grad_norm": 4.268254280090332, "learning_rate": 1.991919812875991e-05, "loss": 2.3582, "step": 21353 }, { "epoch": 0.28, "grad_norm": 3.803256034851074, "learning_rate": 1.9919184797156727e-05, "loss": 1.7559, "step": 21354 }, { "epoch": 0.28, "grad_norm": 3.511522054672241, "learning_rate": 1.9919171464458304e-05, "loss": 1.6502, "step": 21355 }, { "epoch": 0.28, "grad_norm": 4.525733470916748, "learning_rate": 1.9919158130664633e-05, "loss": 2.1911, "step": 21356 }, { "epoch": 0.28, "grad_norm": 4.167838096618652, "learning_rate": 1.991914479577572e-05, "loss": 1.9961, "step": 21357 }, { "epoch": 0.28, "grad_norm": 4.028182029724121, "learning_rate": 1.9919131459791566e-05, "loss": 1.9773, "step": 21358 }, { "epoch": 0.28, "grad_norm": 4.157041549682617, "learning_rate": 1.991911812271217e-05, "loss": 2.5408, "step": 21359 }, { "epoch": 0.28, "grad_norm": 4.062373161315918, "learning_rate": 1.9919104784537536e-05, "loss": 2.113, "step": 21360 }, { "epoch": 0.28, "grad_norm": 3.840372323989868, "learning_rate": 1.9919091445267666e-05, "loss": 2.0431, "step": 21361 }, { "epoch": 0.28, "grad_norm": 4.267221927642822, "learning_rate": 1.9919078104902554e-05, "loss": 2.2276, "step": 21362 }, { "epoch": 0.28, "grad_norm": 4.9475202560424805, "learning_rate": 1.9919064763442212e-05, "loss": 1.8918, "step": 21363 }, { "epoch": 0.28, "grad_norm": 3.974182605743408, "learning_rate": 1.9919051420886636e-05, "loss": 1.7175, "step": 21364 }, { "epoch": 0.28, "grad_norm": 3.9591422080993652, "learning_rate": 1.991903807723583e-05, "loss": 2.0431, "step": 21365 }, { "epoch": 0.28, "grad_norm": 3.644019365310669, "learning_rate": 1.9919024732489792e-05, "loss": 1.7067, "step": 21366 }, { "epoch": 0.28, "grad_norm": 3.8893887996673584, "learning_rate": 1.9919011386648525e-05, "loss": 1.9712, "step": 21367 }, { "epoch": 0.28, "grad_norm": 3.9742891788482666, "learning_rate": 1.9918998039712033e-05, "loss": 1.9591, "step": 21368 }, { "epoch": 0.28, "grad_norm": 4.694314956665039, "learning_rate": 1.9918984691680318e-05, "loss": 2.3763, "step": 21369 }, { "epoch": 0.28, "grad_norm": 4.610870838165283, "learning_rate": 1.9918971342553372e-05, "loss": 2.503, "step": 21370 }, { "epoch": 0.28, "grad_norm": 4.118624687194824, "learning_rate": 1.9918957992331206e-05, "loss": 2.3877, "step": 21371 }, { "epoch": 0.28, "grad_norm": 4.089606761932373, "learning_rate": 1.9918944641013823e-05, "loss": 1.9948, "step": 21372 }, { "epoch": 0.28, "grad_norm": 4.457275390625, "learning_rate": 1.9918931288601216e-05, "loss": 2.2016, "step": 21373 }, { "epoch": 0.28, "grad_norm": 4.4098968505859375, "learning_rate": 1.991891793509339e-05, "loss": 2.2068, "step": 21374 }, { "epoch": 0.28, "grad_norm": 4.305957317352295, "learning_rate": 1.991890458049035e-05, "loss": 2.4042, "step": 21375 }, { "epoch": 0.28, "grad_norm": 4.791134834289551, "learning_rate": 1.991889122479209e-05, "loss": 2.2443, "step": 21376 }, { "epoch": 0.28, "grad_norm": 5.631173133850098, "learning_rate": 1.991887786799862e-05, "loss": 2.3293, "step": 21377 }, { "epoch": 0.28, "grad_norm": 3.8415400981903076, "learning_rate": 1.9918864510109937e-05, "loss": 2.2311, "step": 21378 }, { "epoch": 0.28, "grad_norm": 4.153217315673828, "learning_rate": 1.9918851151126046e-05, "loss": 1.9927, "step": 21379 }, { "epoch": 0.28, "grad_norm": 3.8455939292907715, "learning_rate": 1.991883779104694e-05, "loss": 1.8209, "step": 21380 }, { "epoch": 0.28, "grad_norm": 4.6011061668396, "learning_rate": 1.991882442987263e-05, "loss": 2.4636, "step": 21381 }, { "epoch": 0.28, "grad_norm": 6.829638957977295, "learning_rate": 1.991881106760311e-05, "loss": 2.3748, "step": 21382 }, { "epoch": 0.28, "grad_norm": 3.8166909217834473, "learning_rate": 1.9918797704238386e-05, "loss": 1.8755, "step": 21383 }, { "epoch": 0.28, "grad_norm": 4.5875091552734375, "learning_rate": 1.991878433977846e-05, "loss": 2.4296, "step": 21384 }, { "epoch": 0.28, "grad_norm": 4.302220344543457, "learning_rate": 1.991877097422333e-05, "loss": 2.5165, "step": 21385 }, { "epoch": 0.28, "grad_norm": 3.5612175464630127, "learning_rate": 1.9918757607573e-05, "loss": 1.9054, "step": 21386 }, { "epoch": 0.28, "grad_norm": 4.275527477264404, "learning_rate": 1.9918744239827475e-05, "loss": 1.9131, "step": 21387 }, { "epoch": 0.28, "grad_norm": 4.293140411376953, "learning_rate": 1.991873087098675e-05, "loss": 2.2216, "step": 21388 }, { "epoch": 0.28, "grad_norm": 4.466552257537842, "learning_rate": 1.9918717501050826e-05, "loss": 2.1168, "step": 21389 }, { "epoch": 0.28, "grad_norm": 4.344511985778809, "learning_rate": 1.991870413001971e-05, "loss": 1.7421, "step": 21390 }, { "epoch": 0.28, "grad_norm": 3.9401369094848633, "learning_rate": 1.99186907578934e-05, "loss": 2.0199, "step": 21391 }, { "epoch": 0.28, "grad_norm": 3.6717841625213623, "learning_rate": 1.99186773846719e-05, "loss": 2.1152, "step": 21392 }, { "epoch": 0.28, "grad_norm": 3.8605315685272217, "learning_rate": 1.9918664010355206e-05, "loss": 1.6989, "step": 21393 }, { "epoch": 0.28, "grad_norm": 3.7933216094970703, "learning_rate": 1.9918650634943323e-05, "loss": 2.0777, "step": 21394 }, { "epoch": 0.28, "grad_norm": 3.3502039909362793, "learning_rate": 1.9918637258436258e-05, "loss": 1.3294, "step": 21395 }, { "epoch": 0.28, "grad_norm": 4.175124168395996, "learning_rate": 1.9918623880834003e-05, "loss": 2.3408, "step": 21396 }, { "epoch": 0.28, "grad_norm": 3.9428606033325195, "learning_rate": 1.9918610502136567e-05, "loss": 1.6654, "step": 21397 }, { "epoch": 0.28, "grad_norm": 4.050605297088623, "learning_rate": 1.9918597122343943e-05, "loss": 1.7953, "step": 21398 }, { "epoch": 0.28, "grad_norm": 3.792701482772827, "learning_rate": 1.991858374145614e-05, "loss": 1.8178, "step": 21399 }, { "epoch": 0.28, "grad_norm": 3.9159014225006104, "learning_rate": 1.991857035947316e-05, "loss": 2.0757, "step": 21400 }, { "epoch": 0.28, "grad_norm": 4.580268859863281, "learning_rate": 1.9918556976395e-05, "loss": 2.3731, "step": 21401 }, { "epoch": 0.28, "grad_norm": 4.176140785217285, "learning_rate": 1.991854359222166e-05, "loss": 2.0738, "step": 21402 }, { "epoch": 0.28, "grad_norm": 4.351862907409668, "learning_rate": 1.9918530206953145e-05, "loss": 2.2524, "step": 21403 }, { "epoch": 0.28, "grad_norm": 4.028441429138184, "learning_rate": 1.991851682058946e-05, "loss": 1.7235, "step": 21404 }, { "epoch": 0.28, "grad_norm": 3.9149527549743652, "learning_rate": 1.99185034331306e-05, "loss": 1.8961, "step": 21405 }, { "epoch": 0.28, "grad_norm": 3.9865379333496094, "learning_rate": 1.9918490044576568e-05, "loss": 2.1464, "step": 21406 }, { "epoch": 0.28, "grad_norm": 4.129775047302246, "learning_rate": 1.991847665492737e-05, "loss": 2.1213, "step": 21407 }, { "epoch": 0.28, "grad_norm": 3.7338602542877197, "learning_rate": 1.9918463264182998e-05, "loss": 1.704, "step": 21408 }, { "epoch": 0.28, "grad_norm": 5.008732795715332, "learning_rate": 1.9918449872343464e-05, "loss": 2.7285, "step": 21409 }, { "epoch": 0.28, "grad_norm": 3.623725175857544, "learning_rate": 1.9918436479408763e-05, "loss": 1.8146, "step": 21410 }, { "epoch": 0.28, "grad_norm": 3.8508706092834473, "learning_rate": 1.99184230853789e-05, "loss": 2.264, "step": 21411 }, { "epoch": 0.28, "grad_norm": 4.198422908782959, "learning_rate": 1.9918409690253874e-05, "loss": 1.8289, "step": 21412 }, { "epoch": 0.28, "grad_norm": 4.378411769866943, "learning_rate": 1.991839629403369e-05, "loss": 2.3525, "step": 21413 }, { "epoch": 0.28, "grad_norm": 4.486198902130127, "learning_rate": 1.991838289671834e-05, "loss": 2.0754, "step": 21414 }, { "epoch": 0.28, "grad_norm": 4.210818767547607, "learning_rate": 1.9918369498307837e-05, "loss": 2.0399, "step": 21415 }, { "epoch": 0.28, "grad_norm": 4.637997627258301, "learning_rate": 1.9918356098802177e-05, "loss": 2.7941, "step": 21416 }, { "epoch": 0.28, "grad_norm": 4.04212760925293, "learning_rate": 1.991834269820136e-05, "loss": 1.8402, "step": 21417 }, { "epoch": 0.28, "grad_norm": 4.293558120727539, "learning_rate": 1.9918329296505396e-05, "loss": 2.2108, "step": 21418 }, { "epoch": 0.28, "grad_norm": 4.350606918334961, "learning_rate": 1.9918315893714274e-05, "loss": 2.0011, "step": 21419 }, { "epoch": 0.28, "grad_norm": 3.913886308670044, "learning_rate": 1.9918302489828004e-05, "loss": 2.4066, "step": 21420 }, { "epoch": 0.28, "grad_norm": 4.012322902679443, "learning_rate": 1.9918289084846584e-05, "loss": 1.8405, "step": 21421 }, { "epoch": 0.28, "grad_norm": 4.378791809082031, "learning_rate": 1.991827567877002e-05, "loss": 2.3312, "step": 21422 }, { "epoch": 0.28, "grad_norm": 4.124448299407959, "learning_rate": 1.9918262271598304e-05, "loss": 2.3593, "step": 21423 }, { "epoch": 0.28, "grad_norm": 5.078383445739746, "learning_rate": 1.991824886333145e-05, "loss": 2.5914, "step": 21424 }, { "epoch": 0.28, "grad_norm": 4.948612213134766, "learning_rate": 1.9918235453969447e-05, "loss": 2.6137, "step": 21425 }, { "epoch": 0.28, "grad_norm": 4.0051589012146, "learning_rate": 1.9918222043512308e-05, "loss": 2.117, "step": 21426 }, { "epoch": 0.28, "grad_norm": 3.5823049545288086, "learning_rate": 1.9918208631960027e-05, "loss": 1.5242, "step": 21427 }, { "epoch": 0.28, "grad_norm": 4.149221420288086, "learning_rate": 1.9918195219312607e-05, "loss": 2.43, "step": 21428 }, { "epoch": 0.28, "grad_norm": 4.619089603424072, "learning_rate": 1.991818180557005e-05, "loss": 1.9153, "step": 21429 }, { "epoch": 0.28, "grad_norm": 4.546083927154541, "learning_rate": 1.991816839073236e-05, "loss": 2.4113, "step": 21430 }, { "epoch": 0.28, "grad_norm": 3.7529499530792236, "learning_rate": 1.9918154974799532e-05, "loss": 1.9722, "step": 21431 }, { "epoch": 0.28, "grad_norm": 4.028148174285889, "learning_rate": 1.9918141557771572e-05, "loss": 2.1332, "step": 21432 }, { "epoch": 0.28, "grad_norm": 4.754652976989746, "learning_rate": 1.9918128139648484e-05, "loss": 2.194, "step": 21433 }, { "epoch": 0.28, "grad_norm": 3.9067108631134033, "learning_rate": 1.9918114720430263e-05, "loss": 2.3106, "step": 21434 }, { "epoch": 0.28, "grad_norm": 4.005033016204834, "learning_rate": 1.9918101300116913e-05, "loss": 2.5275, "step": 21435 }, { "epoch": 0.28, "grad_norm": 3.535762310028076, "learning_rate": 1.991808787870844e-05, "loss": 1.8639, "step": 21436 }, { "epoch": 0.28, "grad_norm": 3.745405673980713, "learning_rate": 1.991807445620484e-05, "loss": 2.1126, "step": 21437 }, { "epoch": 0.28, "grad_norm": 4.078876495361328, "learning_rate": 1.9918061032606118e-05, "loss": 1.8885, "step": 21438 }, { "epoch": 0.28, "grad_norm": 4.059781074523926, "learning_rate": 1.9918047607912273e-05, "loss": 2.2529, "step": 21439 }, { "epoch": 0.28, "grad_norm": 3.695030689239502, "learning_rate": 1.9918034182123306e-05, "loss": 1.7297, "step": 21440 }, { "epoch": 0.28, "grad_norm": 4.123699188232422, "learning_rate": 1.991802075523922e-05, "loss": 1.8643, "step": 21441 }, { "epoch": 0.28, "grad_norm": 4.278009414672852, "learning_rate": 1.991800732726002e-05, "loss": 2.0916, "step": 21442 }, { "epoch": 0.28, "grad_norm": 3.9565958976745605, "learning_rate": 1.99179938981857e-05, "loss": 2.1112, "step": 21443 }, { "epoch": 0.28, "grad_norm": 4.521856307983398, "learning_rate": 1.9917980468016264e-05, "loss": 2.5075, "step": 21444 }, { "epoch": 0.28, "grad_norm": 3.9393932819366455, "learning_rate": 1.9917967036751714e-05, "loss": 2.1769, "step": 21445 }, { "epoch": 0.28, "grad_norm": 3.7083194255828857, "learning_rate": 1.9917953604392055e-05, "loss": 1.7528, "step": 21446 }, { "epoch": 0.28, "grad_norm": 3.9481189250946045, "learning_rate": 1.9917940170937287e-05, "loss": 2.1005, "step": 21447 }, { "epoch": 0.28, "grad_norm": 3.794231414794922, "learning_rate": 1.9917926736387407e-05, "loss": 1.6018, "step": 21448 }, { "epoch": 0.28, "grad_norm": 3.9293956756591797, "learning_rate": 1.9917913300742423e-05, "loss": 2.3292, "step": 21449 }, { "epoch": 0.28, "grad_norm": 4.880753993988037, "learning_rate": 1.991789986400233e-05, "loss": 2.2301, "step": 21450 }, { "epoch": 0.28, "grad_norm": 4.111353874206543, "learning_rate": 1.9917886426167134e-05, "loss": 1.9538, "step": 21451 }, { "epoch": 0.28, "grad_norm": 4.585722923278809, "learning_rate": 1.9917872987236834e-05, "loss": 2.3483, "step": 21452 }, { "epoch": 0.28, "grad_norm": 3.8773059844970703, "learning_rate": 1.9917859547211435e-05, "loss": 2.1088, "step": 21453 }, { "epoch": 0.28, "grad_norm": 4.438592433929443, "learning_rate": 1.9917846106090934e-05, "loss": 2.2368, "step": 21454 }, { "epoch": 0.28, "grad_norm": 4.003529071807861, "learning_rate": 1.9917832663875335e-05, "loss": 2.1241, "step": 21455 }, { "epoch": 0.28, "grad_norm": 5.118628978729248, "learning_rate": 1.9917819220564638e-05, "loss": 2.7843, "step": 21456 }, { "epoch": 0.28, "grad_norm": 4.141781806945801, "learning_rate": 1.9917805776158846e-05, "loss": 2.6932, "step": 21457 }, { "epoch": 0.28, "grad_norm": 4.343452453613281, "learning_rate": 1.9917792330657965e-05, "loss": 2.1088, "step": 21458 }, { "epoch": 0.28, "grad_norm": 4.355428695678711, "learning_rate": 1.9917778884061986e-05, "loss": 2.2305, "step": 21459 }, { "epoch": 0.28, "grad_norm": 4.419792175292969, "learning_rate": 1.9917765436370915e-05, "loss": 1.8677, "step": 21460 }, { "epoch": 0.28, "grad_norm": 4.078803539276123, "learning_rate": 1.9917751987584757e-05, "loss": 2.2483, "step": 21461 }, { "epoch": 0.28, "grad_norm": 3.9294965267181396, "learning_rate": 1.991773853770351e-05, "loss": 1.971, "step": 21462 }, { "epoch": 0.28, "grad_norm": 3.5826807022094727, "learning_rate": 1.9917725086727174e-05, "loss": 1.8679, "step": 21463 }, { "epoch": 0.28, "grad_norm": 4.565844535827637, "learning_rate": 1.9917711634655757e-05, "loss": 2.2601, "step": 21464 }, { "epoch": 0.28, "grad_norm": 4.677697658538818, "learning_rate": 1.9917698181489256e-05, "loss": 1.9323, "step": 21465 }, { "epoch": 0.28, "grad_norm": 3.811934471130371, "learning_rate": 1.9917684727227672e-05, "loss": 1.9551, "step": 21466 }, { "epoch": 0.28, "grad_norm": 3.931713819503784, "learning_rate": 1.9917671271871008e-05, "loss": 2.356, "step": 21467 }, { "epoch": 0.28, "grad_norm": 4.023696422576904, "learning_rate": 1.9917657815419265e-05, "loss": 2.039, "step": 21468 }, { "epoch": 0.28, "grad_norm": 3.914093494415283, "learning_rate": 1.991764435787244e-05, "loss": 1.6486, "step": 21469 }, { "epoch": 0.28, "grad_norm": 3.8283095359802246, "learning_rate": 1.9917630899230542e-05, "loss": 1.8018, "step": 21470 }, { "epoch": 0.28, "grad_norm": 4.084270000457764, "learning_rate": 1.9917617439493572e-05, "loss": 2.1003, "step": 21471 }, { "epoch": 0.28, "grad_norm": 4.3499345779418945, "learning_rate": 1.9917603978661524e-05, "loss": 2.1923, "step": 21472 }, { "epoch": 0.28, "grad_norm": 4.4224724769592285, "learning_rate": 1.9917590516734408e-05, "loss": 2.2134, "step": 21473 }, { "epoch": 0.28, "grad_norm": 4.502500534057617, "learning_rate": 1.991757705371222e-05, "loss": 2.697, "step": 21474 }, { "epoch": 0.28, "grad_norm": 4.200856685638428, "learning_rate": 1.9917563589594965e-05, "loss": 2.1019, "step": 21475 }, { "epoch": 0.28, "grad_norm": 4.464409351348877, "learning_rate": 1.9917550124382638e-05, "loss": 2.0617, "step": 21476 }, { "epoch": 0.28, "grad_norm": 4.065177917480469, "learning_rate": 1.991753665807525e-05, "loss": 2.0453, "step": 21477 }, { "epoch": 0.28, "grad_norm": 4.0247039794921875, "learning_rate": 1.9917523190672795e-05, "loss": 2.0195, "step": 21478 }, { "epoch": 0.28, "grad_norm": 4.268102169036865, "learning_rate": 1.9917509722175278e-05, "loss": 2.1931, "step": 21479 }, { "epoch": 0.28, "grad_norm": 4.804846286773682, "learning_rate": 1.9917496252582703e-05, "loss": 2.3239, "step": 21480 }, { "epoch": 0.28, "grad_norm": 3.6022753715515137, "learning_rate": 1.9917482781895064e-05, "loss": 1.7692, "step": 21481 }, { "epoch": 0.28, "grad_norm": 3.983543872833252, "learning_rate": 1.9917469310112368e-05, "loss": 2.0142, "step": 21482 }, { "epoch": 0.28, "grad_norm": 4.037694454193115, "learning_rate": 1.9917455837234613e-05, "loss": 2.3984, "step": 21483 }, { "epoch": 0.28, "grad_norm": 4.076418399810791, "learning_rate": 1.9917442363261804e-05, "loss": 1.9955, "step": 21484 }, { "epoch": 0.28, "grad_norm": 4.542364597320557, "learning_rate": 1.9917428888193944e-05, "loss": 2.3677, "step": 21485 }, { "epoch": 0.28, "grad_norm": 4.180120944976807, "learning_rate": 1.9917415412031026e-05, "loss": 2.181, "step": 21486 }, { "epoch": 0.28, "grad_norm": 4.654625415802002, "learning_rate": 1.9917401934773065e-05, "loss": 2.2484, "step": 21487 }, { "epoch": 0.28, "grad_norm": 4.000058174133301, "learning_rate": 1.991738845642005e-05, "loss": 2.0324, "step": 21488 }, { "epoch": 0.28, "grad_norm": 4.034073829650879, "learning_rate": 1.9917374976971985e-05, "loss": 2.396, "step": 21489 }, { "epoch": 0.28, "grad_norm": 4.225242614746094, "learning_rate": 1.9917361496428877e-05, "loss": 2.0762, "step": 21490 }, { "epoch": 0.28, "grad_norm": 3.9016592502593994, "learning_rate": 1.991734801479072e-05, "loss": 2.1217, "step": 21491 }, { "epoch": 0.28, "grad_norm": 4.983271598815918, "learning_rate": 1.991733453205752e-05, "loss": 2.5532, "step": 21492 }, { "epoch": 0.28, "grad_norm": 4.362184047698975, "learning_rate": 1.9917321048229284e-05, "loss": 2.45, "step": 21493 }, { "epoch": 0.28, "grad_norm": 5.492395877838135, "learning_rate": 1.9917307563306003e-05, "loss": 2.4316, "step": 21494 }, { "epoch": 0.28, "grad_norm": 4.080674648284912, "learning_rate": 1.9917294077287684e-05, "loss": 2.0665, "step": 21495 }, { "epoch": 0.28, "grad_norm": 4.256762981414795, "learning_rate": 1.9917280590174324e-05, "loss": 2.6338, "step": 21496 }, { "epoch": 0.28, "grad_norm": 3.369641065597534, "learning_rate": 1.9917267101965934e-05, "loss": 1.6241, "step": 21497 }, { "epoch": 0.28, "grad_norm": 4.535009384155273, "learning_rate": 1.9917253612662507e-05, "loss": 1.9003, "step": 21498 }, { "epoch": 0.28, "grad_norm": 4.021571159362793, "learning_rate": 1.9917240122264045e-05, "loss": 2.0187, "step": 21499 }, { "epoch": 0.28, "grad_norm": 3.969554901123047, "learning_rate": 1.9917226630770553e-05, "loss": 1.8198, "step": 21500 }, { "epoch": 0.28, "grad_norm": 4.276119709014893, "learning_rate": 1.9917213138182033e-05, "loss": 1.9306, "step": 21501 }, { "epoch": 0.28, "grad_norm": 3.7362422943115234, "learning_rate": 1.991719964449848e-05, "loss": 1.7315, "step": 21502 }, { "epoch": 0.28, "grad_norm": 3.9806630611419678, "learning_rate": 1.9917186149719902e-05, "loss": 1.9402, "step": 21503 }, { "epoch": 0.28, "grad_norm": 4.214168548583984, "learning_rate": 1.99171726538463e-05, "loss": 2.0919, "step": 21504 }, { "epoch": 0.28, "grad_norm": 3.9718329906463623, "learning_rate": 1.9917159156877672e-05, "loss": 2.1525, "step": 21505 }, { "epoch": 0.28, "grad_norm": 3.791985511779785, "learning_rate": 1.9917145658814023e-05, "loss": 1.7038, "step": 21506 }, { "epoch": 0.28, "grad_norm": 4.303567886352539, "learning_rate": 1.991713215965535e-05, "loss": 2.178, "step": 21507 }, { "epoch": 0.28, "grad_norm": 3.448058605194092, "learning_rate": 1.991711865940166e-05, "loss": 1.7028, "step": 21508 }, { "epoch": 0.28, "grad_norm": 4.517844200134277, "learning_rate": 1.991710515805295e-05, "loss": 2.3248, "step": 21509 }, { "epoch": 0.28, "grad_norm": 3.6835105419158936, "learning_rate": 1.9917091655609225e-05, "loss": 1.9535, "step": 21510 }, { "epoch": 0.28, "grad_norm": 4.529385089874268, "learning_rate": 1.9917078152070484e-05, "loss": 2.2446, "step": 21511 }, { "epoch": 0.28, "grad_norm": 4.218053340911865, "learning_rate": 1.991706464743673e-05, "loss": 1.9379, "step": 21512 }, { "epoch": 0.28, "grad_norm": 3.8244829177856445, "learning_rate": 1.991705114170796e-05, "loss": 1.8251, "step": 21513 }, { "epoch": 0.28, "grad_norm": 3.6276488304138184, "learning_rate": 1.9917037634884188e-05, "loss": 1.6692, "step": 21514 }, { "epoch": 0.28, "grad_norm": 4.921173572540283, "learning_rate": 1.99170241269654e-05, "loss": 1.9891, "step": 21515 }, { "epoch": 0.28, "grad_norm": 3.406320333480835, "learning_rate": 1.9917010617951606e-05, "loss": 1.7306, "step": 21516 }, { "epoch": 0.28, "grad_norm": 4.109382629394531, "learning_rate": 1.9916997107842803e-05, "loss": 1.9996, "step": 21517 }, { "epoch": 0.28, "grad_norm": 3.536576509475708, "learning_rate": 1.9916983596638996e-05, "loss": 1.6783, "step": 21518 }, { "epoch": 0.28, "grad_norm": 4.520694255828857, "learning_rate": 1.991697008434019e-05, "loss": 2.3374, "step": 21519 }, { "epoch": 0.28, "grad_norm": 4.033252239227295, "learning_rate": 1.991695657094638e-05, "loss": 2.665, "step": 21520 }, { "epoch": 0.28, "grad_norm": 4.14103889465332, "learning_rate": 1.9916943056457568e-05, "loss": 2.2211, "step": 21521 }, { "epoch": 0.28, "grad_norm": 4.2413506507873535, "learning_rate": 1.991692954087376e-05, "loss": 2.0514, "step": 21522 }, { "epoch": 0.28, "grad_norm": 4.173234462738037, "learning_rate": 1.9916916024194953e-05, "loss": 2.2783, "step": 21523 }, { "epoch": 0.28, "grad_norm": 4.334616184234619, "learning_rate": 1.991690250642115e-05, "loss": 2.3564, "step": 21524 }, { "epoch": 0.28, "grad_norm": 3.924753189086914, "learning_rate": 1.9916888987552355e-05, "loss": 1.9585, "step": 21525 }, { "epoch": 0.28, "grad_norm": 4.468816757202148, "learning_rate": 1.9916875467588564e-05, "loss": 2.2756, "step": 21526 }, { "epoch": 0.28, "grad_norm": 3.6439857482910156, "learning_rate": 1.9916861946529783e-05, "loss": 1.9913, "step": 21527 }, { "epoch": 0.28, "grad_norm": 4.425416469573975, "learning_rate": 1.9916848424376016e-05, "loss": 2.1979, "step": 21528 }, { "epoch": 0.28, "grad_norm": 4.401309967041016, "learning_rate": 1.9916834901127255e-05, "loss": 1.9305, "step": 21529 }, { "epoch": 0.28, "grad_norm": 3.932335615158081, "learning_rate": 1.991682137678351e-05, "loss": 1.8372, "step": 21530 }, { "epoch": 0.28, "grad_norm": 3.584195613861084, "learning_rate": 1.991680785134478e-05, "loss": 1.7664, "step": 21531 }, { "epoch": 0.28, "grad_norm": 3.806917428970337, "learning_rate": 1.9916794324811063e-05, "loss": 1.829, "step": 21532 }, { "epoch": 0.28, "grad_norm": 3.9633123874664307, "learning_rate": 1.9916780797182366e-05, "loss": 2.0678, "step": 21533 }, { "epoch": 0.28, "grad_norm": 4.392486572265625, "learning_rate": 1.991676726845869e-05, "loss": 2.2222, "step": 21534 }, { "epoch": 0.28, "grad_norm": 3.547321081161499, "learning_rate": 1.9916753738640032e-05, "loss": 1.8257, "step": 21535 }, { "epoch": 0.28, "grad_norm": 4.396369934082031, "learning_rate": 1.99167402077264e-05, "loss": 2.4672, "step": 21536 }, { "epoch": 0.28, "grad_norm": 3.948108434677124, "learning_rate": 1.9916726675717786e-05, "loss": 2.0406, "step": 21537 }, { "epoch": 0.28, "grad_norm": 3.974574565887451, "learning_rate": 1.99167131426142e-05, "loss": 2.0062, "step": 21538 }, { "epoch": 0.28, "grad_norm": 4.211256980895996, "learning_rate": 1.991669960841564e-05, "loss": 1.9141, "step": 21539 }, { "epoch": 0.28, "grad_norm": 4.228753566741943, "learning_rate": 1.9916686073122108e-05, "loss": 2.3126, "step": 21540 }, { "epoch": 0.28, "grad_norm": 3.639084815979004, "learning_rate": 1.9916672536733606e-05, "loss": 1.5915, "step": 21541 }, { "epoch": 0.28, "grad_norm": 4.0539093017578125, "learning_rate": 1.9916658999250135e-05, "loss": 2.0452, "step": 21542 }, { "epoch": 0.28, "grad_norm": 4.63796329498291, "learning_rate": 1.9916645460671694e-05, "loss": 2.4239, "step": 21543 }, { "epoch": 0.28, "grad_norm": 4.699102878570557, "learning_rate": 1.9916631920998293e-05, "loss": 2.4585, "step": 21544 }, { "epoch": 0.28, "grad_norm": 3.891251564025879, "learning_rate": 1.9916618380229923e-05, "loss": 2.2732, "step": 21545 }, { "epoch": 0.28, "grad_norm": 4.511833190917969, "learning_rate": 1.9916604838366593e-05, "loss": 1.9198, "step": 21546 }, { "epoch": 0.28, "grad_norm": 3.7891452312469482, "learning_rate": 1.99165912954083e-05, "loss": 1.8894, "step": 21547 }, { "epoch": 0.28, "grad_norm": 4.296207427978516, "learning_rate": 1.991657775135505e-05, "loss": 2.3304, "step": 21548 }, { "epoch": 0.28, "grad_norm": 3.927781343460083, "learning_rate": 1.9916564206206837e-05, "loss": 1.9984, "step": 21549 }, { "epoch": 0.28, "grad_norm": 3.6523916721343994, "learning_rate": 1.991655065996367e-05, "loss": 2.1799, "step": 21550 }, { "epoch": 0.28, "grad_norm": 3.7557315826416016, "learning_rate": 1.9916537112625545e-05, "loss": 2.0731, "step": 21551 }, { "epoch": 0.28, "grad_norm": 3.9992871284484863, "learning_rate": 1.9916523564192466e-05, "loss": 2.3682, "step": 21552 }, { "epoch": 0.28, "grad_norm": 3.7631711959838867, "learning_rate": 1.9916510014664436e-05, "loss": 2.1056, "step": 21553 }, { "epoch": 0.28, "grad_norm": 4.014278411865234, "learning_rate": 1.9916496464041458e-05, "loss": 2.2308, "step": 21554 }, { "epoch": 0.28, "grad_norm": 4.198272228240967, "learning_rate": 1.9916482912323526e-05, "loss": 2.0767, "step": 21555 }, { "epoch": 0.28, "grad_norm": 4.343640327453613, "learning_rate": 1.9916469359510646e-05, "loss": 2.1055, "step": 21556 }, { "epoch": 0.28, "grad_norm": 3.694025754928589, "learning_rate": 1.9916455805602823e-05, "loss": 1.6198, "step": 21557 }, { "epoch": 0.28, "grad_norm": 4.054576873779297, "learning_rate": 1.9916442250600054e-05, "loss": 1.8077, "step": 21558 }, { "epoch": 0.28, "grad_norm": 3.7439465522766113, "learning_rate": 1.991642869450234e-05, "loss": 1.6953, "step": 21559 }, { "epoch": 0.28, "grad_norm": 3.617258071899414, "learning_rate": 1.9916415137309684e-05, "loss": 1.9371, "step": 21560 }, { "epoch": 0.28, "grad_norm": 4.068371295928955, "learning_rate": 1.9916401579022088e-05, "loss": 2.1589, "step": 21561 }, { "epoch": 0.28, "grad_norm": 3.977116346359253, "learning_rate": 1.9916388019639555e-05, "loss": 2.1401, "step": 21562 }, { "epoch": 0.28, "grad_norm": 4.261056423187256, "learning_rate": 1.9916374459162083e-05, "loss": 2.253, "step": 21563 }, { "epoch": 0.28, "grad_norm": 3.9813852310180664, "learning_rate": 1.9916360897589676e-05, "loss": 2.1423, "step": 21564 }, { "epoch": 0.28, "grad_norm": 4.20335578918457, "learning_rate": 1.9916347334922333e-05, "loss": 2.4268, "step": 21565 }, { "epoch": 0.28, "grad_norm": 3.8138158321380615, "learning_rate": 1.9916333771160057e-05, "loss": 1.9548, "step": 21566 }, { "epoch": 0.28, "grad_norm": 4.1592841148376465, "learning_rate": 1.991632020630285e-05, "loss": 2.4962, "step": 21567 }, { "epoch": 0.28, "grad_norm": 4.130136489868164, "learning_rate": 1.9916306640350712e-05, "loss": 2.1373, "step": 21568 }, { "epoch": 0.28, "grad_norm": 3.959256410598755, "learning_rate": 1.9916293073303646e-05, "loss": 2.2026, "step": 21569 }, { "epoch": 0.28, "grad_norm": 3.992687225341797, "learning_rate": 1.9916279505161654e-05, "loss": 2.0847, "step": 21570 }, { "epoch": 0.28, "grad_norm": 3.489656925201416, "learning_rate": 1.9916265935924737e-05, "loss": 1.8669, "step": 21571 }, { "epoch": 0.28, "grad_norm": 4.385242462158203, "learning_rate": 1.9916252365592898e-05, "loss": 2.573, "step": 21572 }, { "epoch": 0.28, "grad_norm": 4.2537455558776855, "learning_rate": 1.9916238794166133e-05, "loss": 1.8738, "step": 21573 }, { "epoch": 0.28, "grad_norm": 4.896023750305176, "learning_rate": 1.991622522164445e-05, "loss": 2.47, "step": 21574 }, { "epoch": 0.28, "grad_norm": 4.209176063537598, "learning_rate": 1.9916211648027843e-05, "loss": 2.0565, "step": 21575 }, { "epoch": 0.28, "grad_norm": 3.9718170166015625, "learning_rate": 1.991619807331632e-05, "loss": 1.924, "step": 21576 }, { "epoch": 0.28, "grad_norm": 3.6500885486602783, "learning_rate": 1.991618449750988e-05, "loss": 1.8158, "step": 21577 }, { "epoch": 0.28, "grad_norm": 4.479090213775635, "learning_rate": 1.9916170920608525e-05, "loss": 2.0224, "step": 21578 }, { "epoch": 0.28, "grad_norm": 4.190891265869141, "learning_rate": 1.9916157342612257e-05, "loss": 1.8921, "step": 21579 }, { "epoch": 0.28, "grad_norm": 4.774134159088135, "learning_rate": 1.9916143763521077e-05, "loss": 2.4098, "step": 21580 }, { "epoch": 0.28, "grad_norm": 4.042956829071045, "learning_rate": 1.9916130183334985e-05, "loss": 2.4999, "step": 21581 }, { "epoch": 0.28, "grad_norm": 4.373342514038086, "learning_rate": 1.9916116602053988e-05, "loss": 1.8434, "step": 21582 }, { "epoch": 0.28, "grad_norm": 4.501405715942383, "learning_rate": 1.991610301967808e-05, "loss": 2.5105, "step": 21583 }, { "epoch": 0.28, "grad_norm": 4.406045913696289, "learning_rate": 1.9916089436207267e-05, "loss": 2.4432, "step": 21584 }, { "epoch": 0.28, "grad_norm": 4.042002201080322, "learning_rate": 1.991607585164155e-05, "loss": 2.0018, "step": 21585 }, { "epoch": 0.28, "grad_norm": 3.4525113105773926, "learning_rate": 1.991606226598093e-05, "loss": 1.7405, "step": 21586 }, { "epoch": 0.28, "grad_norm": 4.260037899017334, "learning_rate": 1.991604867922541e-05, "loss": 1.9903, "step": 21587 }, { "epoch": 0.28, "grad_norm": 4.167909145355225, "learning_rate": 1.9916035091374986e-05, "loss": 2.5484, "step": 21588 }, { "epoch": 0.28, "grad_norm": 4.098467826843262, "learning_rate": 1.9916021502429666e-05, "loss": 2.0516, "step": 21589 }, { "epoch": 0.28, "grad_norm": 4.378839492797852, "learning_rate": 1.9916007912389447e-05, "loss": 1.8758, "step": 21590 }, { "epoch": 0.28, "grad_norm": 4.153835773468018, "learning_rate": 1.9915994321254336e-05, "loss": 1.8658, "step": 21591 }, { "epoch": 0.28, "grad_norm": 3.6904478073120117, "learning_rate": 1.991598072902433e-05, "loss": 1.6401, "step": 21592 }, { "epoch": 0.28, "grad_norm": 4.259151935577393, "learning_rate": 1.991596713569943e-05, "loss": 2.3257, "step": 21593 }, { "epoch": 0.28, "grad_norm": 3.999680519104004, "learning_rate": 1.991595354127964e-05, "loss": 1.711, "step": 21594 }, { "epoch": 0.28, "grad_norm": 4.3888630867004395, "learning_rate": 1.9915939945764958e-05, "loss": 2.2338, "step": 21595 }, { "epoch": 0.28, "grad_norm": 3.4861578941345215, "learning_rate": 1.991592634915539e-05, "loss": 1.7716, "step": 21596 }, { "epoch": 0.28, "grad_norm": 4.538341999053955, "learning_rate": 1.9915912751450937e-05, "loss": 2.2978, "step": 21597 }, { "epoch": 0.28, "grad_norm": 4.281235694885254, "learning_rate": 1.9915899152651598e-05, "loss": 2.1255, "step": 21598 }, { "epoch": 0.28, "grad_norm": 4.831751823425293, "learning_rate": 1.9915885552757373e-05, "loss": 2.3785, "step": 21599 }, { "epoch": 0.28, "grad_norm": 4.164892673492432, "learning_rate": 1.9915871951768267e-05, "loss": 2.3376, "step": 21600 }, { "epoch": 0.28, "grad_norm": 4.09450101852417, "learning_rate": 1.9915858349684286e-05, "loss": 1.8227, "step": 21601 }, { "epoch": 0.28, "grad_norm": 3.9997267723083496, "learning_rate": 1.991584474650542e-05, "loss": 1.9216, "step": 21602 }, { "epoch": 0.28, "grad_norm": 3.6919922828674316, "learning_rate": 1.991583114223168e-05, "loss": 2.0241, "step": 21603 }, { "epoch": 0.28, "grad_norm": 4.124215126037598, "learning_rate": 1.991581753686306e-05, "loss": 2.2117, "step": 21604 }, { "epoch": 0.28, "grad_norm": 4.316547393798828, "learning_rate": 1.9915803930399568e-05, "loss": 2.3653, "step": 21605 }, { "epoch": 0.28, "grad_norm": 3.528118848800659, "learning_rate": 1.9915790322841203e-05, "loss": 1.519, "step": 21606 }, { "epoch": 0.28, "grad_norm": 4.066981315612793, "learning_rate": 1.9915776714187967e-05, "loss": 2.5256, "step": 21607 }, { "epoch": 0.28, "grad_norm": 3.6162407398223877, "learning_rate": 1.991576310443986e-05, "loss": 2.011, "step": 21608 }, { "epoch": 0.28, "grad_norm": 4.569202899932861, "learning_rate": 1.9915749493596887e-05, "loss": 2.5623, "step": 21609 }, { "epoch": 0.28, "grad_norm": 3.949650287628174, "learning_rate": 1.9915735881659043e-05, "loss": 2.0468, "step": 21610 }, { "epoch": 0.28, "grad_norm": 4.158560276031494, "learning_rate": 1.9915722268626335e-05, "loss": 2.2248, "step": 21611 }, { "epoch": 0.28, "grad_norm": 3.7932777404785156, "learning_rate": 1.9915708654498765e-05, "loss": 1.9349, "step": 21612 }, { "epoch": 0.28, "grad_norm": 4.956669807434082, "learning_rate": 1.991569503927633e-05, "loss": 2.7161, "step": 21613 }, { "epoch": 0.28, "grad_norm": 4.27845573425293, "learning_rate": 1.9915681422959036e-05, "loss": 2.2173, "step": 21614 }, { "epoch": 0.28, "grad_norm": 4.120034217834473, "learning_rate": 1.9915667805546882e-05, "loss": 1.6915, "step": 21615 }, { "epoch": 0.28, "grad_norm": 4.176011562347412, "learning_rate": 1.9915654187039868e-05, "loss": 2.0918, "step": 21616 }, { "epoch": 0.28, "grad_norm": 4.469414710998535, "learning_rate": 1.9915640567438e-05, "loss": 2.6268, "step": 21617 }, { "epoch": 0.28, "grad_norm": 3.84352445602417, "learning_rate": 1.9915626946741275e-05, "loss": 2.0901, "step": 21618 }, { "epoch": 0.28, "grad_norm": 2.961754083633423, "learning_rate": 1.9915613324949698e-05, "loss": 1.1628, "step": 21619 }, { "epoch": 0.28, "grad_norm": 4.159316539764404, "learning_rate": 1.9915599702063272e-05, "loss": 2.2335, "step": 21620 }, { "epoch": 0.28, "grad_norm": 4.050769805908203, "learning_rate": 1.991558607808199e-05, "loss": 1.9725, "step": 21621 }, { "epoch": 0.28, "grad_norm": 4.241339683532715, "learning_rate": 1.991557245300586e-05, "loss": 1.7952, "step": 21622 }, { "epoch": 0.28, "grad_norm": 4.133378982543945, "learning_rate": 1.9915558826834886e-05, "loss": 2.1572, "step": 21623 }, { "epoch": 0.28, "grad_norm": 3.7271792888641357, "learning_rate": 1.9915545199569062e-05, "loss": 1.9031, "step": 21624 }, { "epoch": 0.28, "grad_norm": 4.335901737213135, "learning_rate": 1.99155315712084e-05, "loss": 1.9812, "step": 21625 }, { "epoch": 0.28, "grad_norm": 3.716858148574829, "learning_rate": 1.991551794175289e-05, "loss": 1.8386, "step": 21626 }, { "epoch": 0.28, "grad_norm": 3.9397664070129395, "learning_rate": 1.9915504311202538e-05, "loss": 1.6621, "step": 21627 }, { "epoch": 0.28, "grad_norm": 4.186898708343506, "learning_rate": 1.9915490679557348e-05, "loss": 2.157, "step": 21628 }, { "epoch": 0.28, "grad_norm": 4.89000940322876, "learning_rate": 1.991547704681732e-05, "loss": 2.928, "step": 21629 }, { "epoch": 0.28, "grad_norm": 4.183668613433838, "learning_rate": 1.9915463412982455e-05, "loss": 1.9128, "step": 21630 }, { "epoch": 0.28, "grad_norm": 4.366217613220215, "learning_rate": 1.9915449778052752e-05, "loss": 2.1153, "step": 21631 }, { "epoch": 0.28, "grad_norm": 4.064390659332275, "learning_rate": 1.991543614202822e-05, "loss": 2.0416, "step": 21632 }, { "epoch": 0.28, "grad_norm": 4.301395416259766, "learning_rate": 1.991542250490885e-05, "loss": 2.6298, "step": 21633 }, { "epoch": 0.28, "grad_norm": 4.094289302825928, "learning_rate": 1.9915408866694653e-05, "loss": 2.1692, "step": 21634 }, { "epoch": 0.28, "grad_norm": 4.826318740844727, "learning_rate": 1.9915395227385624e-05, "loss": 2.4577, "step": 21635 }, { "epoch": 0.28, "grad_norm": 3.6794064044952393, "learning_rate": 1.9915381586981768e-05, "loss": 1.6518, "step": 21636 }, { "epoch": 0.28, "grad_norm": 4.122150421142578, "learning_rate": 1.9915367945483087e-05, "loss": 2.2884, "step": 21637 }, { "epoch": 0.28, "grad_norm": 4.38347864151001, "learning_rate": 1.991535430288958e-05, "loss": 2.2795, "step": 21638 }, { "epoch": 0.28, "grad_norm": 3.6716301441192627, "learning_rate": 1.991534065920125e-05, "loss": 1.8279, "step": 21639 }, { "epoch": 0.28, "grad_norm": 3.999626636505127, "learning_rate": 1.99153270144181e-05, "loss": 2.4737, "step": 21640 }, { "epoch": 0.28, "grad_norm": 3.698777675628662, "learning_rate": 1.9915313368540127e-05, "loss": 1.5363, "step": 21641 }, { "epoch": 0.28, "grad_norm": 4.144028663635254, "learning_rate": 1.9915299721567337e-05, "loss": 2.2901, "step": 21642 }, { "epoch": 0.28, "grad_norm": 4.117420673370361, "learning_rate": 1.991528607349973e-05, "loss": 2.2335, "step": 21643 }, { "epoch": 0.28, "grad_norm": 3.518223285675049, "learning_rate": 1.9915272424337305e-05, "loss": 1.792, "step": 21644 }, { "epoch": 0.28, "grad_norm": 3.9790666103363037, "learning_rate": 1.9915258774080067e-05, "loss": 2.1719, "step": 21645 }, { "epoch": 0.28, "grad_norm": 4.625423908233643, "learning_rate": 1.9915245122728015e-05, "loss": 2.8796, "step": 21646 }, { "epoch": 0.28, "grad_norm": 4.057379245758057, "learning_rate": 1.9915231470281153e-05, "loss": 2.6012, "step": 21647 }, { "epoch": 0.28, "grad_norm": 3.9920337200164795, "learning_rate": 1.991521781673948e-05, "loss": 2.031, "step": 21648 }, { "epoch": 0.28, "grad_norm": 4.557097911834717, "learning_rate": 1.9915204162103e-05, "loss": 2.4359, "step": 21649 }, { "epoch": 0.28, "grad_norm": 4.0107526779174805, "learning_rate": 1.9915190506371713e-05, "loss": 2.3604, "step": 21650 }, { "epoch": 0.28, "grad_norm": 3.783237934112549, "learning_rate": 1.991517684954562e-05, "loss": 2.1195, "step": 21651 }, { "epoch": 0.28, "grad_norm": 4.805863380432129, "learning_rate": 1.9915163191624725e-05, "loss": 2.3575, "step": 21652 }, { "epoch": 0.28, "grad_norm": 4.620567798614502, "learning_rate": 1.9915149532609028e-05, "loss": 2.112, "step": 21653 }, { "epoch": 0.28, "grad_norm": 4.342907905578613, "learning_rate": 1.9915135872498526e-05, "loss": 2.0897, "step": 21654 }, { "epoch": 0.28, "grad_norm": 4.169714450836182, "learning_rate": 1.991512221129323e-05, "loss": 2.0879, "step": 21655 }, { "epoch": 0.28, "grad_norm": 3.911738634109497, "learning_rate": 1.9915108548993133e-05, "loss": 2.1782, "step": 21656 }, { "epoch": 0.28, "grad_norm": 3.7399423122406006, "learning_rate": 1.991509488559824e-05, "loss": 1.7567, "step": 21657 }, { "epoch": 0.28, "grad_norm": 3.6875557899475098, "learning_rate": 1.9915081221108555e-05, "loss": 2.0366, "step": 21658 }, { "epoch": 0.28, "grad_norm": 3.526089906692505, "learning_rate": 1.9915067555524073e-05, "loss": 1.8503, "step": 21659 }, { "epoch": 0.28, "grad_norm": 3.6957545280456543, "learning_rate": 1.9915053888844804e-05, "loss": 1.798, "step": 21660 }, { "epoch": 0.28, "grad_norm": 3.961361885070801, "learning_rate": 1.991504022107074e-05, "loss": 1.8087, "step": 21661 }, { "epoch": 0.28, "grad_norm": 4.042009353637695, "learning_rate": 1.991502655220189e-05, "loss": 2.1074, "step": 21662 }, { "epoch": 0.28, "grad_norm": 3.8875555992126465, "learning_rate": 1.9915012882238253e-05, "loss": 1.7888, "step": 21663 }, { "epoch": 0.28, "grad_norm": 3.4617629051208496, "learning_rate": 1.991499921117983e-05, "loss": 1.9343, "step": 21664 }, { "epoch": 0.28, "grad_norm": 4.231815814971924, "learning_rate": 1.991498553902662e-05, "loss": 2.06, "step": 21665 }, { "epoch": 0.28, "grad_norm": 4.588986873626709, "learning_rate": 1.9914971865778632e-05, "loss": 2.472, "step": 21666 }, { "epoch": 0.28, "grad_norm": 4.586245059967041, "learning_rate": 1.991495819143586e-05, "loss": 2.7013, "step": 21667 }, { "epoch": 0.28, "grad_norm": 3.9330332279205322, "learning_rate": 1.9914944515998307e-05, "loss": 1.9378, "step": 21668 }, { "epoch": 0.28, "grad_norm": 3.806110382080078, "learning_rate": 1.991493083946598e-05, "loss": 2.16, "step": 21669 }, { "epoch": 0.28, "grad_norm": 3.5802414417266846, "learning_rate": 1.9914917161838876e-05, "loss": 1.8021, "step": 21670 }, { "epoch": 0.28, "grad_norm": 4.291901588439941, "learning_rate": 1.9914903483116998e-05, "loss": 2.32, "step": 21671 }, { "epoch": 0.28, "grad_norm": 3.922153949737549, "learning_rate": 1.9914889803300343e-05, "loss": 1.9567, "step": 21672 }, { "epoch": 0.28, "grad_norm": 4.471521377563477, "learning_rate": 1.9914876122388914e-05, "loss": 2.5709, "step": 21673 }, { "epoch": 0.28, "grad_norm": 3.736238479614258, "learning_rate": 1.991486244038272e-05, "loss": 1.8106, "step": 21674 }, { "epoch": 0.28, "grad_norm": 4.386834621429443, "learning_rate": 1.9914848757281752e-05, "loss": 2.2508, "step": 21675 }, { "epoch": 0.28, "grad_norm": 4.191119194030762, "learning_rate": 1.991483507308602e-05, "loss": 1.7067, "step": 21676 }, { "epoch": 0.28, "grad_norm": 3.540904998779297, "learning_rate": 1.991482138779552e-05, "loss": 2.0761, "step": 21677 }, { "epoch": 0.28, "grad_norm": 3.867184638977051, "learning_rate": 1.991480770141026e-05, "loss": 1.9067, "step": 21678 }, { "epoch": 0.28, "grad_norm": 4.059460639953613, "learning_rate": 1.991479401393023e-05, "loss": 1.9655, "step": 21679 }, { "epoch": 0.28, "grad_norm": 3.745798349380493, "learning_rate": 1.9914780325355445e-05, "loss": 2.4001, "step": 21680 }, { "epoch": 0.28, "grad_norm": 4.153713226318359, "learning_rate": 1.9914766635685896e-05, "loss": 2.2314, "step": 21681 }, { "epoch": 0.28, "grad_norm": 3.746924638748169, "learning_rate": 1.991475294492159e-05, "loss": 1.8644, "step": 21682 }, { "epoch": 0.28, "grad_norm": 4.289266586303711, "learning_rate": 1.9914739253062526e-05, "loss": 2.4378, "step": 21683 }, { "epoch": 0.28, "grad_norm": 4.363876819610596, "learning_rate": 1.9914725560108708e-05, "loss": 2.2762, "step": 21684 }, { "epoch": 0.28, "grad_norm": 4.251709938049316, "learning_rate": 1.9914711866060134e-05, "loss": 2.439, "step": 21685 }, { "epoch": 0.28, "grad_norm": 4.295785427093506, "learning_rate": 1.991469817091681e-05, "loss": 2.228, "step": 21686 }, { "epoch": 0.28, "grad_norm": 4.164144992828369, "learning_rate": 1.9914684474678735e-05, "loss": 2.2037, "step": 21687 }, { "epoch": 0.28, "grad_norm": 4.187911033630371, "learning_rate": 1.991467077734591e-05, "loss": 2.2556, "step": 21688 }, { "epoch": 0.28, "grad_norm": 4.105377674102783, "learning_rate": 1.9914657078918334e-05, "loss": 2.0952, "step": 21689 }, { "epoch": 0.28, "grad_norm": 3.572319507598877, "learning_rate": 1.9914643379396013e-05, "loss": 1.6682, "step": 21690 }, { "epoch": 0.28, "grad_norm": 4.170886039733887, "learning_rate": 1.9914629678778953e-05, "loss": 2.2026, "step": 21691 }, { "epoch": 0.28, "grad_norm": 4.008753776550293, "learning_rate": 1.9914615977067143e-05, "loss": 2.3944, "step": 21692 }, { "epoch": 0.28, "grad_norm": 3.703387975692749, "learning_rate": 1.9914602274260594e-05, "loss": 2.0061, "step": 21693 }, { "epoch": 0.28, "grad_norm": 3.572754383087158, "learning_rate": 1.9914588570359306e-05, "loss": 1.7743, "step": 21694 }, { "epoch": 0.28, "grad_norm": 3.2574141025543213, "learning_rate": 1.9914574865363275e-05, "loss": 1.7481, "step": 21695 }, { "epoch": 0.28, "grad_norm": 4.416241645812988, "learning_rate": 1.991456115927251e-05, "loss": 2.344, "step": 21696 }, { "epoch": 0.28, "grad_norm": 4.165769577026367, "learning_rate": 1.9914547452087006e-05, "loss": 1.9802, "step": 21697 }, { "epoch": 0.28, "grad_norm": 3.893235921859741, "learning_rate": 1.991453374380677e-05, "loss": 1.8962, "step": 21698 }, { "epoch": 0.28, "grad_norm": 4.341506004333496, "learning_rate": 1.9914520034431803e-05, "loss": 2.2972, "step": 21699 }, { "epoch": 0.28, "grad_norm": 3.983245611190796, "learning_rate": 1.9914506323962103e-05, "loss": 2.3992, "step": 21700 }, { "epoch": 0.28, "grad_norm": 4.470099925994873, "learning_rate": 1.9914492612397677e-05, "loss": 2.0454, "step": 21701 }, { "epoch": 0.28, "grad_norm": 3.916066884994507, "learning_rate": 1.9914478899738516e-05, "loss": 1.9035, "step": 21702 }, { "epoch": 0.28, "grad_norm": 4.12321662902832, "learning_rate": 1.991446518598463e-05, "loss": 2.0121, "step": 21703 }, { "epoch": 0.28, "grad_norm": 3.662562370300293, "learning_rate": 1.9914451471136025e-05, "loss": 1.842, "step": 21704 }, { "epoch": 0.28, "grad_norm": 3.791126251220703, "learning_rate": 1.991443775519269e-05, "loss": 1.8934, "step": 21705 }, { "epoch": 0.28, "grad_norm": 4.071908950805664, "learning_rate": 1.9914424038154636e-05, "loss": 2.0858, "step": 21706 }, { "epoch": 0.28, "grad_norm": 4.323849201202393, "learning_rate": 1.991441032002186e-05, "loss": 2.1773, "step": 21707 }, { "epoch": 0.28, "grad_norm": 4.168426036834717, "learning_rate": 1.9914396600794363e-05, "loss": 2.1797, "step": 21708 }, { "epoch": 0.28, "grad_norm": 4.572000980377197, "learning_rate": 1.9914382880472152e-05, "loss": 2.5981, "step": 21709 }, { "epoch": 0.28, "grad_norm": 3.933166027069092, "learning_rate": 1.9914369159055226e-05, "loss": 1.9888, "step": 21710 }, { "epoch": 0.28, "grad_norm": 3.8359551429748535, "learning_rate": 1.991435543654358e-05, "loss": 2.1531, "step": 21711 }, { "epoch": 0.28, "grad_norm": 4.488387584686279, "learning_rate": 1.9914341712937226e-05, "loss": 2.1572, "step": 21712 }, { "epoch": 0.28, "grad_norm": 3.976884126663208, "learning_rate": 1.9914327988236156e-05, "loss": 2.1827, "step": 21713 }, { "epoch": 0.28, "grad_norm": 3.3606767654418945, "learning_rate": 1.991431426244038e-05, "loss": 1.6166, "step": 21714 }, { "epoch": 0.28, "grad_norm": 3.912264823913574, "learning_rate": 1.9914300535549893e-05, "loss": 2.1479, "step": 21715 }, { "epoch": 0.28, "grad_norm": 4.200128078460693, "learning_rate": 1.99142868075647e-05, "loss": 2.4307, "step": 21716 }, { "epoch": 0.28, "grad_norm": 4.234504699707031, "learning_rate": 1.9914273078484802e-05, "loss": 1.988, "step": 21717 }, { "epoch": 0.28, "grad_norm": 4.19489860534668, "learning_rate": 1.9914259348310198e-05, "loss": 1.9288, "step": 21718 }, { "epoch": 0.28, "grad_norm": 3.899822235107422, "learning_rate": 1.9914245617040893e-05, "loss": 1.8826, "step": 21719 }, { "epoch": 0.28, "grad_norm": 3.9912216663360596, "learning_rate": 1.9914231884676885e-05, "loss": 2.2178, "step": 21720 }, { "epoch": 0.28, "grad_norm": 3.9418137073516846, "learning_rate": 1.9914218151218182e-05, "loss": 2.0426, "step": 21721 }, { "epoch": 0.28, "grad_norm": 4.401184558868408, "learning_rate": 1.9914204416664777e-05, "loss": 2.2449, "step": 21722 }, { "epoch": 0.28, "grad_norm": 4.999223709106445, "learning_rate": 1.9914190681016677e-05, "loss": 2.3328, "step": 21723 }, { "epoch": 0.28, "grad_norm": 4.029728889465332, "learning_rate": 1.9914176944273886e-05, "loss": 2.0682, "step": 21724 }, { "epoch": 0.28, "grad_norm": 4.468729496002197, "learning_rate": 1.9914163206436395e-05, "loss": 2.474, "step": 21725 }, { "epoch": 0.28, "grad_norm": 3.9885685443878174, "learning_rate": 1.9914149467504216e-05, "loss": 2.1261, "step": 21726 }, { "epoch": 0.28, "grad_norm": 4.441932678222656, "learning_rate": 1.9914135727477346e-05, "loss": 2.1041, "step": 21727 }, { "epoch": 0.28, "grad_norm": 4.014498233795166, "learning_rate": 1.9914121986355786e-05, "loss": 1.8904, "step": 21728 }, { "epoch": 0.28, "grad_norm": 4.624466896057129, "learning_rate": 1.9914108244139542e-05, "loss": 2.14, "step": 21729 }, { "epoch": 0.28, "grad_norm": 4.115913391113281, "learning_rate": 1.991409450082861e-05, "loss": 1.8367, "step": 21730 }, { "epoch": 0.28, "grad_norm": 4.605716228485107, "learning_rate": 1.9914080756422995e-05, "loss": 2.5137, "step": 21731 }, { "epoch": 0.28, "grad_norm": 4.306861400604248, "learning_rate": 1.9914067010922696e-05, "loss": 2.4789, "step": 21732 }, { "epoch": 0.28, "grad_norm": 4.329842567443848, "learning_rate": 1.9914053264327715e-05, "loss": 2.1187, "step": 21733 }, { "epoch": 0.28, "grad_norm": 3.46537184715271, "learning_rate": 1.991403951663806e-05, "loss": 2.2587, "step": 21734 }, { "epoch": 0.28, "grad_norm": 4.449861526489258, "learning_rate": 1.9914025767853718e-05, "loss": 2.4073, "step": 21735 }, { "epoch": 0.28, "grad_norm": 4.153562545776367, "learning_rate": 1.9914012017974705e-05, "loss": 1.8311, "step": 21736 }, { "epoch": 0.28, "grad_norm": 3.3659276962280273, "learning_rate": 1.9913998267001015e-05, "loss": 1.5687, "step": 21737 }, { "epoch": 0.28, "grad_norm": 3.851489543914795, "learning_rate": 1.9913984514932653e-05, "loss": 1.9476, "step": 21738 }, { "epoch": 0.28, "grad_norm": 4.241276264190674, "learning_rate": 1.9913970761769615e-05, "loss": 2.1645, "step": 21739 }, { "epoch": 0.28, "grad_norm": 3.4015841484069824, "learning_rate": 1.991395700751191e-05, "loss": 1.9522, "step": 21740 }, { "epoch": 0.28, "grad_norm": 3.859761953353882, "learning_rate": 1.9913943252159537e-05, "loss": 2.1274, "step": 21741 }, { "epoch": 0.28, "grad_norm": 4.381502628326416, "learning_rate": 1.9913929495712495e-05, "loss": 2.4945, "step": 21742 }, { "epoch": 0.28, "grad_norm": 3.768233060836792, "learning_rate": 1.9913915738170786e-05, "loss": 2.1526, "step": 21743 }, { "epoch": 0.28, "grad_norm": 4.38663911819458, "learning_rate": 1.9913901979534415e-05, "loss": 2.1118, "step": 21744 }, { "epoch": 0.28, "grad_norm": 3.817836046218872, "learning_rate": 1.9913888219803376e-05, "loss": 2.3728, "step": 21745 }, { "epoch": 0.28, "grad_norm": 4.504079341888428, "learning_rate": 1.991387445897768e-05, "loss": 2.374, "step": 21746 }, { "epoch": 0.28, "grad_norm": 3.6274139881134033, "learning_rate": 1.9913860697057324e-05, "loss": 2.0954, "step": 21747 }, { "epoch": 0.28, "grad_norm": 4.365284442901611, "learning_rate": 1.9913846934042308e-05, "loss": 2.4149, "step": 21748 }, { "epoch": 0.28, "grad_norm": 3.7093358039855957, "learning_rate": 1.9913833169932633e-05, "loss": 1.8272, "step": 21749 }, { "epoch": 0.28, "grad_norm": 4.12700080871582, "learning_rate": 1.9913819404728308e-05, "loss": 2.3864, "step": 21750 }, { "epoch": 0.28, "grad_norm": 4.103337287902832, "learning_rate": 1.9913805638429325e-05, "loss": 2.1766, "step": 21751 }, { "epoch": 0.28, "grad_norm": 4.08104944229126, "learning_rate": 1.991379187103569e-05, "loss": 2.182, "step": 21752 }, { "epoch": 0.28, "grad_norm": 4.43532133102417, "learning_rate": 1.9913778102547408e-05, "loss": 2.3432, "step": 21753 }, { "epoch": 0.28, "grad_norm": 4.333620071411133, "learning_rate": 1.991376433296447e-05, "loss": 2.247, "step": 21754 }, { "epoch": 0.28, "grad_norm": 3.863917112350464, "learning_rate": 1.991375056228689e-05, "loss": 2.0399, "step": 21755 }, { "epoch": 0.28, "grad_norm": 4.35998010635376, "learning_rate": 1.991373679051466e-05, "loss": 2.0888, "step": 21756 }, { "epoch": 0.28, "grad_norm": 4.937112808227539, "learning_rate": 1.991372301764779e-05, "loss": 2.4053, "step": 21757 }, { "epoch": 0.28, "grad_norm": 3.3089826107025146, "learning_rate": 1.9913709243686273e-05, "loss": 1.7108, "step": 21758 }, { "epoch": 0.28, "grad_norm": 5.190896987915039, "learning_rate": 1.9913695468630118e-05, "loss": 3.0633, "step": 21759 }, { "epoch": 0.28, "grad_norm": 4.5427327156066895, "learning_rate": 1.9913681692479318e-05, "loss": 2.2788, "step": 21760 }, { "epoch": 0.28, "grad_norm": 4.905472755432129, "learning_rate": 1.991366791523388e-05, "loss": 2.1069, "step": 21761 }, { "epoch": 0.28, "grad_norm": 4.168575763702393, "learning_rate": 1.991365413689381e-05, "loss": 2.5083, "step": 21762 }, { "epoch": 0.28, "grad_norm": 4.121818542480469, "learning_rate": 1.99136403574591e-05, "loss": 2.1042, "step": 21763 }, { "epoch": 0.28, "grad_norm": 3.7773149013519287, "learning_rate": 1.9913626576929756e-05, "loss": 2.0102, "step": 21764 }, { "epoch": 0.28, "grad_norm": 4.122497081756592, "learning_rate": 1.9913612795305778e-05, "loss": 1.6702, "step": 21765 }, { "epoch": 0.28, "grad_norm": 4.802441120147705, "learning_rate": 1.9913599012587173e-05, "loss": 2.3095, "step": 21766 }, { "epoch": 0.28, "grad_norm": 4.185340881347656, "learning_rate": 1.9913585228773934e-05, "loss": 2.1593, "step": 21767 }, { "epoch": 0.28, "grad_norm": 4.219711780548096, "learning_rate": 1.991357144386607e-05, "loss": 1.8297, "step": 21768 }, { "epoch": 0.28, "grad_norm": 3.9712467193603516, "learning_rate": 1.991355765786358e-05, "loss": 1.9746, "step": 21769 }, { "epoch": 0.28, "grad_norm": 3.43074631690979, "learning_rate": 1.9913543870766464e-05, "loss": 2.0581, "step": 21770 }, { "epoch": 0.28, "grad_norm": 5.373925685882568, "learning_rate": 1.9913530082574725e-05, "loss": 2.5453, "step": 21771 }, { "epoch": 0.28, "grad_norm": 3.868781328201294, "learning_rate": 1.9913516293288365e-05, "loss": 2.1516, "step": 21772 }, { "epoch": 0.28, "grad_norm": 4.622819423675537, "learning_rate": 1.991350250290738e-05, "loss": 2.4318, "step": 21773 }, { "epoch": 0.28, "grad_norm": 4.52796745300293, "learning_rate": 1.991348871143178e-05, "loss": 2.3809, "step": 21774 }, { "epoch": 0.28, "grad_norm": 3.9667246341705322, "learning_rate": 1.9913474918861565e-05, "loss": 1.9372, "step": 21775 }, { "epoch": 0.28, "grad_norm": 4.022397518157959, "learning_rate": 1.991346112519673e-05, "loss": 2.0075, "step": 21776 }, { "epoch": 0.28, "grad_norm": 4.686680316925049, "learning_rate": 1.9913447330437283e-05, "loss": 2.432, "step": 21777 }, { "epoch": 0.28, "grad_norm": 3.660663366317749, "learning_rate": 1.9913433534583223e-05, "loss": 1.6755, "step": 21778 }, { "epoch": 0.28, "grad_norm": 3.6828582286834717, "learning_rate": 1.9913419737634553e-05, "loss": 1.8947, "step": 21779 }, { "epoch": 0.28, "grad_norm": 4.324693202972412, "learning_rate": 1.9913405939591268e-05, "loss": 2.4333, "step": 21780 }, { "epoch": 0.28, "grad_norm": 4.042799949645996, "learning_rate": 1.991339214045338e-05, "loss": 2.4132, "step": 21781 }, { "epoch": 0.28, "grad_norm": 3.780618190765381, "learning_rate": 1.991337834022088e-05, "loss": 2.0174, "step": 21782 }, { "epoch": 0.28, "grad_norm": 3.8920400142669678, "learning_rate": 1.991336453889378e-05, "loss": 2.1184, "step": 21783 }, { "epoch": 0.28, "grad_norm": 3.8635759353637695, "learning_rate": 1.9913350736472076e-05, "loss": 1.8208, "step": 21784 }, { "epoch": 0.28, "grad_norm": 3.7905232906341553, "learning_rate": 1.9913336932955766e-05, "loss": 1.9134, "step": 21785 }, { "epoch": 0.28, "grad_norm": 4.479528903961182, "learning_rate": 1.991332312834486e-05, "loss": 2.0832, "step": 21786 }, { "epoch": 0.28, "grad_norm": 3.670898675918579, "learning_rate": 1.991330932263935e-05, "loss": 1.8214, "step": 21787 }, { "epoch": 0.28, "grad_norm": 4.277431488037109, "learning_rate": 1.9913295515839247e-05, "loss": 2.2667, "step": 21788 }, { "epoch": 0.28, "grad_norm": 3.9960484504699707, "learning_rate": 1.9913281707944546e-05, "loss": 1.7546, "step": 21789 }, { "epoch": 0.28, "grad_norm": 4.149848461151123, "learning_rate": 1.991326789895525e-05, "loss": 2.1159, "step": 21790 }, { "epoch": 0.28, "grad_norm": 4.001380920410156, "learning_rate": 1.9913254088871366e-05, "loss": 2.3953, "step": 21791 }, { "epoch": 0.28, "grad_norm": 3.5425772666931152, "learning_rate": 1.9913240277692884e-05, "loss": 1.6634, "step": 21792 }, { "epoch": 0.28, "grad_norm": 4.131638526916504, "learning_rate": 1.9913226465419816e-05, "loss": 2.5295, "step": 21793 }, { "epoch": 0.28, "grad_norm": 4.3721513748168945, "learning_rate": 1.9913212652052157e-05, "loss": 1.8791, "step": 21794 }, { "epoch": 0.28, "grad_norm": 4.5125885009765625, "learning_rate": 1.9913198837589915e-05, "loss": 2.471, "step": 21795 }, { "epoch": 0.28, "grad_norm": 5.033872604370117, "learning_rate": 1.9913185022033082e-05, "loss": 2.217, "step": 21796 }, { "epoch": 0.28, "grad_norm": 4.25316858291626, "learning_rate": 1.991317120538167e-05, "loss": 1.7598, "step": 21797 }, { "epoch": 0.28, "grad_norm": 4.395264148712158, "learning_rate": 1.9913157387635674e-05, "loss": 2.239, "step": 21798 }, { "epoch": 0.28, "grad_norm": 5.079422950744629, "learning_rate": 1.9913143568795098e-05, "loss": 2.6652, "step": 21799 }, { "epoch": 0.28, "grad_norm": 4.685564041137695, "learning_rate": 1.991312974885994e-05, "loss": 2.5254, "step": 21800 }, { "epoch": 0.28, "grad_norm": 3.6930768489837646, "learning_rate": 1.9913115927830206e-05, "loss": 1.7652, "step": 21801 }, { "epoch": 0.28, "grad_norm": 4.416604518890381, "learning_rate": 1.9913102105705896e-05, "loss": 2.3935, "step": 21802 }, { "epoch": 0.28, "grad_norm": 4.119677543640137, "learning_rate": 1.991308828248701e-05, "loss": 1.7792, "step": 21803 }, { "epoch": 0.28, "grad_norm": 4.491997718811035, "learning_rate": 1.9913074458173555e-05, "loss": 2.3471, "step": 21804 }, { "epoch": 0.28, "grad_norm": 4.056285858154297, "learning_rate": 1.9913060632765522e-05, "loss": 1.8196, "step": 21805 }, { "epoch": 0.28, "grad_norm": 3.741088390350342, "learning_rate": 1.9913046806262927e-05, "loss": 1.9487, "step": 21806 }, { "epoch": 0.28, "grad_norm": 4.003884792327881, "learning_rate": 1.991303297866576e-05, "loss": 2.1861, "step": 21807 }, { "epoch": 0.28, "grad_norm": 4.247148513793945, "learning_rate": 1.9913019149974023e-05, "loss": 2.4557, "step": 21808 }, { "epoch": 0.28, "grad_norm": 3.4776437282562256, "learning_rate": 1.991300532018772e-05, "loss": 1.7103, "step": 21809 }, { "epoch": 0.28, "grad_norm": 4.222310543060303, "learning_rate": 1.991299148930686e-05, "loss": 1.8477, "step": 21810 }, { "epoch": 0.28, "grad_norm": 4.759253025054932, "learning_rate": 1.9912977657331432e-05, "loss": 2.6595, "step": 21811 }, { "epoch": 0.28, "grad_norm": 3.90628719329834, "learning_rate": 1.9912963824261445e-05, "loss": 2.299, "step": 21812 }, { "epoch": 0.28, "grad_norm": 4.048402309417725, "learning_rate": 1.9912949990096898e-05, "loss": 1.9117, "step": 21813 }, { "epoch": 0.28, "grad_norm": 4.067041873931885, "learning_rate": 1.991293615483779e-05, "loss": 1.9368, "step": 21814 }, { "epoch": 0.28, "grad_norm": 3.5812828540802, "learning_rate": 1.991292231848413e-05, "loss": 1.9399, "step": 21815 }, { "epoch": 0.28, "grad_norm": 3.952626943588257, "learning_rate": 1.9912908481035912e-05, "loss": 2.4738, "step": 21816 }, { "epoch": 0.28, "grad_norm": 3.92911696434021, "learning_rate": 1.9912894642493146e-05, "loss": 2.4018, "step": 21817 }, { "epoch": 0.28, "grad_norm": 4.532124042510986, "learning_rate": 1.9912880802855824e-05, "loss": 2.4988, "step": 21818 }, { "epoch": 0.28, "grad_norm": 4.175066947937012, "learning_rate": 1.9912866962123953e-05, "loss": 1.8285, "step": 21819 }, { "epoch": 0.28, "grad_norm": 4.200149059295654, "learning_rate": 1.9912853120297532e-05, "loss": 2.2827, "step": 21820 }, { "epoch": 0.28, "grad_norm": 4.425530433654785, "learning_rate": 1.9912839277376565e-05, "loss": 2.1648, "step": 21821 }, { "epoch": 0.28, "grad_norm": 4.039953231811523, "learning_rate": 1.991282543336105e-05, "loss": 2.2773, "step": 21822 }, { "epoch": 0.28, "grad_norm": 4.002831935882568, "learning_rate": 1.9912811588251e-05, "loss": 2.2433, "step": 21823 }, { "epoch": 0.28, "grad_norm": 3.8505003452301025, "learning_rate": 1.9912797742046397e-05, "loss": 2.1802, "step": 21824 }, { "epoch": 0.28, "grad_norm": 4.372852802276611, "learning_rate": 1.9912783894747255e-05, "loss": 2.0817, "step": 21825 }, { "epoch": 0.28, "grad_norm": 5.121777534484863, "learning_rate": 1.9912770046353575e-05, "loss": 2.2656, "step": 21826 }, { "epoch": 0.28, "grad_norm": 3.9372317790985107, "learning_rate": 1.9912756196865358e-05, "loss": 2.1047, "step": 21827 }, { "epoch": 0.28, "grad_norm": 4.214560031890869, "learning_rate": 1.99127423462826e-05, "loss": 2.2682, "step": 21828 }, { "epoch": 0.28, "grad_norm": 3.724965810775757, "learning_rate": 1.9912728494605313e-05, "loss": 1.8241, "step": 21829 }, { "epoch": 0.28, "grad_norm": 3.7344136238098145, "learning_rate": 1.991271464183349e-05, "loss": 2.0447, "step": 21830 }, { "epoch": 0.28, "grad_norm": 5.043628215789795, "learning_rate": 1.9912700787967138e-05, "loss": 2.3409, "step": 21831 }, { "epoch": 0.28, "grad_norm": 4.633697986602783, "learning_rate": 1.9912686933006254e-05, "loss": 2.6449, "step": 21832 }, { "epoch": 0.28, "grad_norm": 3.6142446994781494, "learning_rate": 1.991267307695084e-05, "loss": 2.0292, "step": 21833 }, { "epoch": 0.28, "grad_norm": 3.3525164127349854, "learning_rate": 1.9912659219800897e-05, "loss": 1.6574, "step": 21834 }, { "epoch": 0.28, "grad_norm": 3.8322582244873047, "learning_rate": 1.991264536155643e-05, "loss": 2.0476, "step": 21835 }, { "epoch": 0.28, "grad_norm": 4.533554553985596, "learning_rate": 1.991263150221744e-05, "loss": 2.5987, "step": 21836 }, { "epoch": 0.28, "grad_norm": 3.350261926651001, "learning_rate": 1.9912617641783924e-05, "loss": 1.7749, "step": 21837 }, { "epoch": 0.28, "grad_norm": 3.8956971168518066, "learning_rate": 1.991260378025589e-05, "loss": 1.8333, "step": 21838 }, { "epoch": 0.28, "grad_norm": 3.463347911834717, "learning_rate": 1.9912589917633336e-05, "loss": 1.5418, "step": 21839 }, { "epoch": 0.28, "grad_norm": 3.6859378814697266, "learning_rate": 1.9912576053916263e-05, "loss": 1.7294, "step": 21840 }, { "epoch": 0.28, "grad_norm": 3.961700439453125, "learning_rate": 1.9912562189104675e-05, "loss": 2.042, "step": 21841 }, { "epoch": 0.28, "grad_norm": 3.8159682750701904, "learning_rate": 1.9912548323198568e-05, "loss": 2.1843, "step": 21842 }, { "epoch": 0.28, "grad_norm": 3.8586108684539795, "learning_rate": 1.991253445619795e-05, "loss": 1.7686, "step": 21843 }, { "epoch": 0.28, "grad_norm": 4.0692524909973145, "learning_rate": 1.9912520588102823e-05, "loss": 2.2921, "step": 21844 }, { "epoch": 0.28, "grad_norm": 3.6408987045288086, "learning_rate": 1.9912506718913182e-05, "loss": 1.8488, "step": 21845 }, { "epoch": 0.28, "grad_norm": 4.01743221282959, "learning_rate": 1.9912492848629036e-05, "loss": 1.9749, "step": 21846 }, { "epoch": 0.28, "grad_norm": 4.168835163116455, "learning_rate": 1.9912478977250374e-05, "loss": 2.1619, "step": 21847 }, { "epoch": 0.28, "grad_norm": 4.429807662963867, "learning_rate": 1.9912465104777214e-05, "loss": 2.3955, "step": 21848 }, { "epoch": 0.28, "grad_norm": 4.046496391296387, "learning_rate": 1.9912451231209548e-05, "loss": 2.0571, "step": 21849 }, { "epoch": 0.28, "grad_norm": 4.212314605712891, "learning_rate": 1.991243735654738e-05, "loss": 2.2222, "step": 21850 }, { "epoch": 0.28, "grad_norm": 4.091650485992432, "learning_rate": 1.991242348079071e-05, "loss": 2.3214, "step": 21851 }, { "epoch": 0.28, "grad_norm": 4.067215442657471, "learning_rate": 1.9912409603939537e-05, "loss": 2.0251, "step": 21852 }, { "epoch": 0.28, "grad_norm": 4.245033264160156, "learning_rate": 1.991239572599387e-05, "loss": 2.6661, "step": 21853 }, { "epoch": 0.28, "grad_norm": 4.219910144805908, "learning_rate": 1.9912381846953703e-05, "loss": 2.3246, "step": 21854 }, { "epoch": 0.28, "grad_norm": 4.2936177253723145, "learning_rate": 1.9912367966819045e-05, "loss": 2.2296, "step": 21855 }, { "epoch": 0.28, "grad_norm": 4.64958381652832, "learning_rate": 1.991235408558989e-05, "loss": 2.409, "step": 21856 }, { "epoch": 0.28, "grad_norm": 4.520552635192871, "learning_rate": 1.9912340203266245e-05, "loss": 2.8638, "step": 21857 }, { "epoch": 0.28, "grad_norm": 3.86647891998291, "learning_rate": 1.9912326319848108e-05, "loss": 2.1691, "step": 21858 }, { "epoch": 0.28, "grad_norm": 3.765660047531128, "learning_rate": 1.9912312435335482e-05, "loss": 1.8477, "step": 21859 }, { "epoch": 0.28, "grad_norm": 4.477412700653076, "learning_rate": 1.9912298549728367e-05, "loss": 2.4374, "step": 21860 }, { "epoch": 0.28, "grad_norm": 4.079647064208984, "learning_rate": 1.991228466302677e-05, "loss": 1.9866, "step": 21861 }, { "epoch": 0.28, "grad_norm": 4.218997955322266, "learning_rate": 1.9912270775230686e-05, "loss": 2.225, "step": 21862 }, { "epoch": 0.28, "grad_norm": 4.133369445800781, "learning_rate": 1.9912256886340123e-05, "loss": 2.3059, "step": 21863 }, { "epoch": 0.28, "grad_norm": 3.5363852977752686, "learning_rate": 1.9912242996355075e-05, "loss": 1.6893, "step": 21864 }, { "epoch": 0.28, "grad_norm": 4.1684441566467285, "learning_rate": 1.9912229105275545e-05, "loss": 2.0981, "step": 21865 }, { "epoch": 0.28, "grad_norm": 3.9370648860931396, "learning_rate": 1.991221521310154e-05, "loss": 2.3119, "step": 21866 }, { "epoch": 0.28, "grad_norm": 4.083895683288574, "learning_rate": 1.991220131983306e-05, "loss": 1.9125, "step": 21867 }, { "epoch": 0.28, "grad_norm": 4.419205665588379, "learning_rate": 1.99121874254701e-05, "loss": 2.0944, "step": 21868 }, { "epoch": 0.28, "grad_norm": 4.212542533874512, "learning_rate": 1.9912173530012667e-05, "loss": 2.2359, "step": 21869 }, { "epoch": 0.28, "grad_norm": 4.539696216583252, "learning_rate": 1.991215963346077e-05, "loss": 2.1308, "step": 21870 }, { "epoch": 0.28, "grad_norm": 4.0432515144348145, "learning_rate": 1.9912145735814392e-05, "loss": 2.4557, "step": 21871 }, { "epoch": 0.28, "grad_norm": 4.226556777954102, "learning_rate": 1.991213183707355e-05, "loss": 2.2569, "step": 21872 }, { "epoch": 0.28, "grad_norm": 4.11552619934082, "learning_rate": 1.991211793723824e-05, "loss": 2.172, "step": 21873 }, { "epoch": 0.28, "grad_norm": 3.964634895324707, "learning_rate": 1.9912104036308463e-05, "loss": 1.9786, "step": 21874 }, { "epoch": 0.28, "grad_norm": 4.256913661956787, "learning_rate": 1.9912090134284224e-05, "loss": 2.1607, "step": 21875 }, { "epoch": 0.28, "grad_norm": 4.158947944641113, "learning_rate": 1.991207623116552e-05, "loss": 2.1549, "step": 21876 }, { "epoch": 0.28, "grad_norm": 4.119540214538574, "learning_rate": 1.9912062326952354e-05, "loss": 2.4995, "step": 21877 }, { "epoch": 0.28, "grad_norm": 4.288646697998047, "learning_rate": 1.991204842164473e-05, "loss": 2.261, "step": 21878 }, { "epoch": 0.28, "grad_norm": 4.313154697418213, "learning_rate": 1.9912034515242645e-05, "loss": 1.7947, "step": 21879 }, { "epoch": 0.28, "grad_norm": 4.104356288909912, "learning_rate": 1.991202060774611e-05, "loss": 1.9838, "step": 21880 }, { "epoch": 0.28, "grad_norm": 3.859048366546631, "learning_rate": 1.991200669915511e-05, "loss": 2.0187, "step": 21881 }, { "epoch": 0.28, "grad_norm": 4.0085577964782715, "learning_rate": 1.9911992789469662e-05, "loss": 2.1819, "step": 21882 }, { "epoch": 0.28, "grad_norm": 3.671539783477783, "learning_rate": 1.991197887868976e-05, "loss": 1.9057, "step": 21883 }, { "epoch": 0.28, "grad_norm": 4.043033599853516, "learning_rate": 1.991196496681541e-05, "loss": 2.07, "step": 21884 }, { "epoch": 0.28, "grad_norm": 3.919969081878662, "learning_rate": 1.991195105384661e-05, "loss": 1.8833, "step": 21885 }, { "epoch": 0.28, "grad_norm": 4.578098297119141, "learning_rate": 1.9911937139783363e-05, "loss": 2.4945, "step": 21886 }, { "epoch": 0.28, "grad_norm": 4.3621625900268555, "learning_rate": 1.9911923224625665e-05, "loss": 2.5784, "step": 21887 }, { "epoch": 0.28, "grad_norm": 4.691409587860107, "learning_rate": 1.9911909308373528e-05, "loss": 2.3162, "step": 21888 }, { "epoch": 0.28, "grad_norm": 3.892754316329956, "learning_rate": 1.9911895391026944e-05, "loss": 1.9758, "step": 21889 }, { "epoch": 0.28, "grad_norm": 4.086189270019531, "learning_rate": 1.9911881472585922e-05, "loss": 2.0043, "step": 21890 }, { "epoch": 0.28, "grad_norm": 4.322220325469971, "learning_rate": 1.991186755305046e-05, "loss": 2.0942, "step": 21891 }, { "epoch": 0.28, "grad_norm": 3.3727545738220215, "learning_rate": 1.9911853632420558e-05, "loss": 1.6559, "step": 21892 }, { "epoch": 0.28, "grad_norm": 4.1092119216918945, "learning_rate": 1.991183971069622e-05, "loss": 2.1111, "step": 21893 }, { "epoch": 0.28, "grad_norm": 3.9686176776885986, "learning_rate": 1.9911825787877447e-05, "loss": 2.4879, "step": 21894 }, { "epoch": 0.28, "grad_norm": 3.484478712081909, "learning_rate": 1.991181186396424e-05, "loss": 1.8323, "step": 21895 }, { "epoch": 0.28, "grad_norm": 4.067102909088135, "learning_rate": 1.9911797938956598e-05, "loss": 2.1998, "step": 21896 }, { "epoch": 0.28, "grad_norm": 3.7157042026519775, "learning_rate": 1.991178401285453e-05, "loss": 1.8506, "step": 21897 }, { "epoch": 0.28, "grad_norm": 3.5178487300872803, "learning_rate": 1.9911770085658032e-05, "loss": 1.807, "step": 21898 }, { "epoch": 0.28, "grad_norm": 4.0041117668151855, "learning_rate": 1.9911756157367105e-05, "loss": 1.8434, "step": 21899 }, { "epoch": 0.28, "grad_norm": 4.170446872711182, "learning_rate": 1.9911742227981753e-05, "loss": 2.0228, "step": 21900 }, { "epoch": 0.28, "grad_norm": 4.164330959320068, "learning_rate": 1.9911728297501974e-05, "loss": 2.073, "step": 21901 }, { "epoch": 0.28, "grad_norm": 3.1611881256103516, "learning_rate": 1.9911714365927774e-05, "loss": 1.7195, "step": 21902 }, { "epoch": 0.28, "grad_norm": 3.997591018676758, "learning_rate": 1.9911700433259153e-05, "loss": 2.0827, "step": 21903 }, { "epoch": 0.28, "grad_norm": 3.9828851222991943, "learning_rate": 1.991168649949611e-05, "loss": 2.2184, "step": 21904 }, { "epoch": 0.28, "grad_norm": 5.050245761871338, "learning_rate": 1.9911672564638652e-05, "loss": 1.8826, "step": 21905 }, { "epoch": 0.28, "grad_norm": 4.040291786193848, "learning_rate": 1.9911658628686776e-05, "loss": 2.1278, "step": 21906 }, { "epoch": 0.28, "grad_norm": 3.8694844245910645, "learning_rate": 1.9911644691640482e-05, "loss": 2.162, "step": 21907 }, { "epoch": 0.28, "grad_norm": 4.192756652832031, "learning_rate": 1.9911630753499778e-05, "loss": 2.3121, "step": 21908 }, { "epoch": 0.28, "grad_norm": 4.786088466644287, "learning_rate": 1.991161681426466e-05, "loss": 2.6048, "step": 21909 }, { "epoch": 0.28, "grad_norm": 4.344480514526367, "learning_rate": 1.991160287393513e-05, "loss": 2.3894, "step": 21910 }, { "epoch": 0.28, "grad_norm": 4.122152328491211, "learning_rate": 1.9911588932511194e-05, "loss": 2.2341, "step": 21911 }, { "epoch": 0.28, "grad_norm": 4.210468292236328, "learning_rate": 1.9911574989992847e-05, "loss": 2.2431, "step": 21912 }, { "epoch": 0.28, "grad_norm": 3.9903948307037354, "learning_rate": 1.9911561046380096e-05, "loss": 2.0533, "step": 21913 }, { "epoch": 0.28, "grad_norm": 3.962998151779175, "learning_rate": 1.9911547101672942e-05, "loss": 2.3223, "step": 21914 }, { "epoch": 0.28, "grad_norm": 4.288772106170654, "learning_rate": 1.991153315587138e-05, "loss": 1.9124, "step": 21915 }, { "epoch": 0.28, "grad_norm": 4.8143792152404785, "learning_rate": 1.991151920897542e-05, "loss": 2.3778, "step": 21916 }, { "epoch": 0.28, "grad_norm": 3.8160905838012695, "learning_rate": 1.9911505260985062e-05, "loss": 2.181, "step": 21917 }, { "epoch": 0.28, "grad_norm": 4.112427711486816, "learning_rate": 1.9911491311900306e-05, "loss": 2.1461, "step": 21918 }, { "epoch": 0.28, "grad_norm": 4.155220031738281, "learning_rate": 1.991147736172115e-05, "loss": 1.9162, "step": 21919 }, { "epoch": 0.28, "grad_norm": 4.424904823303223, "learning_rate": 1.99114634104476e-05, "loss": 2.2808, "step": 21920 }, { "epoch": 0.28, "grad_norm": 4.554325580596924, "learning_rate": 1.9911449458079654e-05, "loss": 2.5867, "step": 21921 }, { "epoch": 0.28, "grad_norm": 3.9868948459625244, "learning_rate": 1.9911435504617316e-05, "loss": 2.1205, "step": 21922 }, { "epoch": 0.28, "grad_norm": 4.439155578613281, "learning_rate": 1.9911421550060592e-05, "loss": 1.8736, "step": 21923 }, { "epoch": 0.28, "grad_norm": 3.793499231338501, "learning_rate": 1.9911407594409477e-05, "loss": 1.7752, "step": 21924 }, { "epoch": 0.28, "grad_norm": 3.8792784214019775, "learning_rate": 1.991139363766397e-05, "loss": 2.0114, "step": 21925 }, { "epoch": 0.28, "grad_norm": 4.106453895568848, "learning_rate": 1.9911379679824083e-05, "loss": 2.448, "step": 21926 }, { "epoch": 0.28, "grad_norm": 3.5599405765533447, "learning_rate": 1.9911365720889807e-05, "loss": 1.6061, "step": 21927 }, { "epoch": 0.28, "grad_norm": 4.3008198738098145, "learning_rate": 1.991135176086115e-05, "loss": 2.1458, "step": 21928 }, { "epoch": 0.28, "grad_norm": 3.844709634780884, "learning_rate": 1.9911337799738115e-05, "loss": 2.1612, "step": 21929 }, { "epoch": 0.28, "grad_norm": 4.311017036437988, "learning_rate": 1.9911323837520696e-05, "loss": 2.1364, "step": 21930 }, { "epoch": 0.28, "grad_norm": 4.957146167755127, "learning_rate": 1.99113098742089e-05, "loss": 2.7287, "step": 21931 }, { "epoch": 0.28, "grad_norm": 4.099621772766113, "learning_rate": 1.9911295909802727e-05, "loss": 2.1191, "step": 21932 }, { "epoch": 0.28, "grad_norm": 4.202295303344727, "learning_rate": 1.9911281944302177e-05, "loss": 1.9276, "step": 21933 }, { "epoch": 0.28, "grad_norm": 4.314573287963867, "learning_rate": 1.9911267977707254e-05, "loss": 2.4185, "step": 21934 }, { "epoch": 0.28, "grad_norm": 3.911848545074463, "learning_rate": 1.991125401001796e-05, "loss": 1.9346, "step": 21935 }, { "epoch": 0.28, "grad_norm": 4.249546527862549, "learning_rate": 1.9911240041234295e-05, "loss": 2.4889, "step": 21936 }, { "epoch": 0.28, "grad_norm": 3.728086471557617, "learning_rate": 1.9911226071356262e-05, "loss": 1.913, "step": 21937 }, { "epoch": 0.28, "grad_norm": 3.8025002479553223, "learning_rate": 1.991121210038386e-05, "loss": 1.855, "step": 21938 }, { "epoch": 0.28, "grad_norm": 4.070052146911621, "learning_rate": 1.9911198128317094e-05, "loss": 2.114, "step": 21939 }, { "epoch": 0.28, "grad_norm": 3.810162305831909, "learning_rate": 1.991118415515596e-05, "loss": 1.7777, "step": 21940 }, { "epoch": 0.28, "grad_norm": 3.7721340656280518, "learning_rate": 1.991117018090047e-05, "loss": 1.7793, "step": 21941 }, { "epoch": 0.28, "grad_norm": 3.953078269958496, "learning_rate": 1.9911156205550613e-05, "loss": 1.8844, "step": 21942 }, { "epoch": 0.28, "grad_norm": 3.895847797393799, "learning_rate": 1.9911142229106398e-05, "loss": 2.1837, "step": 21943 }, { "epoch": 0.28, "grad_norm": 4.12499475479126, "learning_rate": 1.9911128251567823e-05, "loss": 2.1151, "step": 21944 }, { "epoch": 0.28, "grad_norm": 3.483590841293335, "learning_rate": 1.991111427293489e-05, "loss": 1.6817, "step": 21945 }, { "epoch": 0.28, "grad_norm": 3.784771680831909, "learning_rate": 1.9911100293207607e-05, "loss": 1.86, "step": 21946 }, { "epoch": 0.28, "grad_norm": 3.6172611713409424, "learning_rate": 1.991108631238597e-05, "loss": 1.8382, "step": 21947 }, { "epoch": 0.28, "grad_norm": 3.8373332023620605, "learning_rate": 1.9911072330469975e-05, "loss": 1.7311, "step": 21948 }, { "epoch": 0.28, "grad_norm": 4.571873188018799, "learning_rate": 1.9911058347459632e-05, "loss": 2.3793, "step": 21949 }, { "epoch": 0.28, "grad_norm": 3.875448703765869, "learning_rate": 1.9911044363354942e-05, "loss": 1.6527, "step": 21950 }, { "epoch": 0.28, "grad_norm": 4.197728157043457, "learning_rate": 1.9911030378155906e-05, "loss": 2.296, "step": 21951 }, { "epoch": 0.28, "grad_norm": 4.067967891693115, "learning_rate": 1.991101639186252e-05, "loss": 2.2591, "step": 21952 }, { "epoch": 0.28, "grad_norm": 4.34318208694458, "learning_rate": 1.9911002404474792e-05, "loss": 2.7148, "step": 21953 }, { "epoch": 0.28, "grad_norm": 3.9627108573913574, "learning_rate": 1.991098841599272e-05, "loss": 2.0488, "step": 21954 }, { "epoch": 0.28, "grad_norm": 4.280063629150391, "learning_rate": 1.9910974426416306e-05, "loss": 2.2312, "step": 21955 }, { "epoch": 0.28, "grad_norm": 4.487037658691406, "learning_rate": 1.9910960435745556e-05, "loss": 2.585, "step": 21956 }, { "epoch": 0.28, "grad_norm": 4.175924301147461, "learning_rate": 1.9910946443980466e-05, "loss": 1.7022, "step": 21957 }, { "epoch": 0.28, "grad_norm": 3.6682372093200684, "learning_rate": 1.9910932451121037e-05, "loss": 2.0892, "step": 21958 }, { "epoch": 0.28, "grad_norm": 4.340160369873047, "learning_rate": 1.9910918457167275e-05, "loss": 2.1552, "step": 21959 }, { "epoch": 0.28, "grad_norm": 4.043989658355713, "learning_rate": 1.9910904462119177e-05, "loss": 2.2077, "step": 21960 }, { "epoch": 0.29, "grad_norm": 4.033300876617432, "learning_rate": 1.991089046597675e-05, "loss": 2.0539, "step": 21961 }, { "epoch": 0.29, "grad_norm": 3.4586944580078125, "learning_rate": 1.9910876468739993e-05, "loss": 1.6982, "step": 21962 }, { "epoch": 0.29, "grad_norm": 4.044717788696289, "learning_rate": 1.9910862470408904e-05, "loss": 2.0556, "step": 21963 }, { "epoch": 0.29, "grad_norm": 4.185049057006836, "learning_rate": 1.991084847098349e-05, "loss": 2.1205, "step": 21964 }, { "epoch": 0.29, "grad_norm": 3.749441623687744, "learning_rate": 1.9910834470463748e-05, "loss": 1.6971, "step": 21965 }, { "epoch": 0.29, "grad_norm": 5.009381294250488, "learning_rate": 1.9910820468849684e-05, "loss": 1.933, "step": 21966 }, { "epoch": 0.29, "grad_norm": 4.6043171882629395, "learning_rate": 1.9910806466141298e-05, "loss": 2.2293, "step": 21967 }, { "epoch": 0.29, "grad_norm": 4.442728042602539, "learning_rate": 1.991079246233859e-05, "loss": 2.2206, "step": 21968 }, { "epoch": 0.29, "grad_norm": 3.6803646087646484, "learning_rate": 1.991077845744156e-05, "loss": 1.6825, "step": 21969 }, { "epoch": 0.29, "grad_norm": 4.430507183074951, "learning_rate": 1.9910764451450215e-05, "loss": 2.309, "step": 21970 }, { "epoch": 0.29, "grad_norm": 4.288082599639893, "learning_rate": 1.991075044436455e-05, "loss": 2.4279, "step": 21971 }, { "epoch": 0.29, "grad_norm": 4.154035568237305, "learning_rate": 1.9910736436184574e-05, "loss": 1.8379, "step": 21972 }, { "epoch": 0.29, "grad_norm": 5.364352703094482, "learning_rate": 1.9910722426910283e-05, "loss": 2.1043, "step": 21973 }, { "epoch": 0.29, "grad_norm": 3.988278865814209, "learning_rate": 1.991070841654168e-05, "loss": 2.283, "step": 21974 }, { "epoch": 0.29, "grad_norm": 4.472307205200195, "learning_rate": 1.9910694405078764e-05, "loss": 2.4428, "step": 21975 }, { "epoch": 0.29, "grad_norm": 3.887960195541382, "learning_rate": 1.9910680392521543e-05, "loss": 2.1085, "step": 21976 }, { "epoch": 0.29, "grad_norm": 4.020698547363281, "learning_rate": 1.9910666378870017e-05, "loss": 2.0784, "step": 21977 }, { "epoch": 0.29, "grad_norm": 4.365960597991943, "learning_rate": 1.9910652364124178e-05, "loss": 2.3844, "step": 21978 }, { "epoch": 0.29, "grad_norm": 3.6466753482818604, "learning_rate": 1.991063834828404e-05, "loss": 1.8973, "step": 21979 }, { "epoch": 0.29, "grad_norm": 3.987962484359741, "learning_rate": 1.99106243313496e-05, "loss": 2.1182, "step": 21980 }, { "epoch": 0.29, "grad_norm": 3.836049795150757, "learning_rate": 1.9910610313320854e-05, "loss": 1.9585, "step": 21981 }, { "epoch": 0.29, "grad_norm": 4.285791397094727, "learning_rate": 1.9910596294197813e-05, "loss": 2.775, "step": 21982 }, { "epoch": 0.29, "grad_norm": 3.232074499130249, "learning_rate": 1.9910582273980474e-05, "loss": 1.4021, "step": 21983 }, { "epoch": 0.29, "grad_norm": 3.846792221069336, "learning_rate": 1.9910568252668835e-05, "loss": 2.137, "step": 21984 }, { "epoch": 0.29, "grad_norm": 4.978330612182617, "learning_rate": 1.9910554230262904e-05, "loss": 2.5673, "step": 21985 }, { "epoch": 0.29, "grad_norm": 3.8609530925750732, "learning_rate": 1.9910540206762675e-05, "loss": 2.1565, "step": 21986 }, { "epoch": 0.29, "grad_norm": 5.002967834472656, "learning_rate": 1.991052618216816e-05, "loss": 3.3668, "step": 21987 }, { "epoch": 0.29, "grad_norm": 3.864698648452759, "learning_rate": 1.9910512156479354e-05, "loss": 1.5505, "step": 21988 }, { "epoch": 0.29, "grad_norm": 4.1261515617370605, "learning_rate": 1.9910498129696258e-05, "loss": 1.9631, "step": 21989 }, { "epoch": 0.29, "grad_norm": 4.096843242645264, "learning_rate": 1.9910484101818874e-05, "loss": 1.9531, "step": 21990 }, { "epoch": 0.29, "grad_norm": 4.649349689483643, "learning_rate": 1.9910470072847205e-05, "loss": 2.1106, "step": 21991 }, { "epoch": 0.29, "grad_norm": 4.040191173553467, "learning_rate": 1.9910456042781254e-05, "loss": 2.3716, "step": 21992 }, { "epoch": 0.29, "grad_norm": 3.578841209411621, "learning_rate": 1.9910442011621017e-05, "loss": 1.8082, "step": 21993 }, { "epoch": 0.29, "grad_norm": 4.292080402374268, "learning_rate": 1.9910427979366503e-05, "loss": 2.3886, "step": 21994 }, { "epoch": 0.29, "grad_norm": 4.478573322296143, "learning_rate": 1.9910413946017706e-05, "loss": 2.3894, "step": 21995 }, { "epoch": 0.29, "grad_norm": 4.19637393951416, "learning_rate": 1.9910399911574634e-05, "loss": 1.7963, "step": 21996 }, { "epoch": 0.29, "grad_norm": 3.9160523414611816, "learning_rate": 1.9910385876037287e-05, "loss": 2.1674, "step": 21997 }, { "epoch": 0.29, "grad_norm": 4.878360271453857, "learning_rate": 1.991037183940566e-05, "loss": 2.474, "step": 21998 }, { "epoch": 0.29, "grad_norm": 5.202932357788086, "learning_rate": 1.991035780167976e-05, "loss": 2.457, "step": 21999 }, { "epoch": 0.29, "grad_norm": 4.528501987457275, "learning_rate": 1.9910343762859593e-05, "loss": 2.465, "step": 22000 }, { "epoch": 0.29, "grad_norm": 4.042535305023193, "learning_rate": 1.9910329722945156e-05, "loss": 2.1209, "step": 22001 }, { "epoch": 0.29, "grad_norm": 4.283904075622559, "learning_rate": 1.9910315681936447e-05, "loss": 2.6761, "step": 22002 }, { "epoch": 0.29, "grad_norm": 3.4659013748168945, "learning_rate": 1.9910301639833473e-05, "loss": 1.7948, "step": 22003 }, { "epoch": 0.29, "grad_norm": 3.9698376655578613, "learning_rate": 1.9910287596636234e-05, "loss": 2.0708, "step": 22004 }, { "epoch": 0.29, "grad_norm": 3.7516677379608154, "learning_rate": 1.991027355234473e-05, "loss": 1.9787, "step": 22005 }, { "epoch": 0.29, "grad_norm": 4.64373779296875, "learning_rate": 1.9910259506958962e-05, "loss": 2.5327, "step": 22006 }, { "epoch": 0.29, "grad_norm": 3.986067295074463, "learning_rate": 1.9910245460478936e-05, "loss": 2.2213, "step": 22007 }, { "epoch": 0.29, "grad_norm": 3.925062894821167, "learning_rate": 1.991023141290465e-05, "loss": 1.9706, "step": 22008 }, { "epoch": 0.29, "grad_norm": 4.0267767906188965, "learning_rate": 1.9910217364236105e-05, "loss": 2.2079, "step": 22009 }, { "epoch": 0.29, "grad_norm": 4.636734962463379, "learning_rate": 1.9910203314473304e-05, "loss": 2.2858, "step": 22010 }, { "epoch": 0.29, "grad_norm": 4.2754316329956055, "learning_rate": 1.991018926361625e-05, "loss": 2.2304, "step": 22011 }, { "epoch": 0.29, "grad_norm": 4.5859293937683105, "learning_rate": 1.9910175211664944e-05, "loss": 2.6623, "step": 22012 }, { "epoch": 0.29, "grad_norm": 4.03964376449585, "learning_rate": 1.9910161158619382e-05, "loss": 2.2968, "step": 22013 }, { "epoch": 0.29, "grad_norm": 3.9336020946502686, "learning_rate": 1.9910147104479572e-05, "loss": 1.9188, "step": 22014 }, { "epoch": 0.29, "grad_norm": 4.517508029937744, "learning_rate": 1.9910133049245518e-05, "loss": 1.9108, "step": 22015 }, { "epoch": 0.29, "grad_norm": 3.787682056427002, "learning_rate": 1.9910118992917212e-05, "loss": 2.4673, "step": 22016 }, { "epoch": 0.29, "grad_norm": 3.915935516357422, "learning_rate": 1.991010493549466e-05, "loss": 2.1289, "step": 22017 }, { "epoch": 0.29, "grad_norm": 3.7079904079437256, "learning_rate": 1.991009087697787e-05, "loss": 1.5728, "step": 22018 }, { "epoch": 0.29, "grad_norm": 3.6411004066467285, "learning_rate": 1.9910076817366833e-05, "loss": 2.1463, "step": 22019 }, { "epoch": 0.29, "grad_norm": 3.7699522972106934, "learning_rate": 1.991006275666156e-05, "loss": 1.8232, "step": 22020 }, { "epoch": 0.29, "grad_norm": 3.6656737327575684, "learning_rate": 1.9910048694862044e-05, "loss": 1.6704, "step": 22021 }, { "epoch": 0.29, "grad_norm": 3.8183443546295166, "learning_rate": 1.991003463196829e-05, "loss": 2.4055, "step": 22022 }, { "epoch": 0.29, "grad_norm": 3.924766778945923, "learning_rate": 1.9910020567980303e-05, "loss": 1.9977, "step": 22023 }, { "epoch": 0.29, "grad_norm": 3.805609941482544, "learning_rate": 1.991000650289808e-05, "loss": 1.7303, "step": 22024 }, { "epoch": 0.29, "grad_norm": 3.949993133544922, "learning_rate": 1.9909992436721625e-05, "loss": 1.9568, "step": 22025 }, { "epoch": 0.29, "grad_norm": 4.074028968811035, "learning_rate": 1.9909978369450934e-05, "loss": 2.4136, "step": 22026 }, { "epoch": 0.29, "grad_norm": 4.171472072601318, "learning_rate": 1.990996430108602e-05, "loss": 1.9246, "step": 22027 }, { "epoch": 0.29, "grad_norm": 4.2252678871154785, "learning_rate": 1.9909950231626873e-05, "loss": 1.9254, "step": 22028 }, { "epoch": 0.29, "grad_norm": 3.82281756401062, "learning_rate": 1.9909936161073502e-05, "loss": 2.0309, "step": 22029 }, { "epoch": 0.29, "grad_norm": 4.067607402801514, "learning_rate": 1.9909922089425907e-05, "loss": 1.9013, "step": 22030 }, { "epoch": 0.29, "grad_norm": 4.813709259033203, "learning_rate": 1.990990801668409e-05, "loss": 2.6048, "step": 22031 }, { "epoch": 0.29, "grad_norm": 3.9511091709136963, "learning_rate": 1.9909893942848045e-05, "loss": 2.1221, "step": 22032 }, { "epoch": 0.29, "grad_norm": 4.335679054260254, "learning_rate": 1.9909879867917784e-05, "loss": 1.8216, "step": 22033 }, { "epoch": 0.29, "grad_norm": 3.689484119415283, "learning_rate": 1.99098657918933e-05, "loss": 2.0817, "step": 22034 }, { "epoch": 0.29, "grad_norm": 4.188053607940674, "learning_rate": 1.99098517147746e-05, "loss": 2.2136, "step": 22035 }, { "epoch": 0.29, "grad_norm": 4.896535396575928, "learning_rate": 1.9909837636561687e-05, "loss": 2.3458, "step": 22036 }, { "epoch": 0.29, "grad_norm": 3.594026565551758, "learning_rate": 1.9909823557254556e-05, "loss": 1.959, "step": 22037 }, { "epoch": 0.29, "grad_norm": 4.080934047698975, "learning_rate": 1.990980947685322e-05, "loss": 2.5113, "step": 22038 }, { "epoch": 0.29, "grad_norm": 4.3340044021606445, "learning_rate": 1.9909795395357663e-05, "loss": 2.516, "step": 22039 }, { "epoch": 0.29, "grad_norm": 3.9096310138702393, "learning_rate": 1.9909781312767903e-05, "loss": 1.7049, "step": 22040 }, { "epoch": 0.29, "grad_norm": 4.260075569152832, "learning_rate": 1.9909767229083933e-05, "loss": 2.5665, "step": 22041 }, { "epoch": 0.29, "grad_norm": 3.943775177001953, "learning_rate": 1.9909753144305756e-05, "loss": 2.4142, "step": 22042 }, { "epoch": 0.29, "grad_norm": 3.692958354949951, "learning_rate": 1.9909739058433374e-05, "loss": 1.9415, "step": 22043 }, { "epoch": 0.29, "grad_norm": 3.8037545680999756, "learning_rate": 1.990972497146679e-05, "loss": 2.1671, "step": 22044 }, { "epoch": 0.29, "grad_norm": 3.907717704772949, "learning_rate": 1.9909710883406003e-05, "loss": 2.119, "step": 22045 }, { "epoch": 0.29, "grad_norm": 5.004825592041016, "learning_rate": 1.990969679425102e-05, "loss": 2.0409, "step": 22046 }, { "epoch": 0.29, "grad_norm": 3.5691514015197754, "learning_rate": 1.9909682704001833e-05, "loss": 1.9689, "step": 22047 }, { "epoch": 0.29, "grad_norm": 4.746322154998779, "learning_rate": 1.990966861265845e-05, "loss": 2.561, "step": 22048 }, { "epoch": 0.29, "grad_norm": 4.376262187957764, "learning_rate": 1.990965452022087e-05, "loss": 2.2294, "step": 22049 }, { "epoch": 0.29, "grad_norm": 3.450855016708374, "learning_rate": 1.99096404266891e-05, "loss": 1.4685, "step": 22050 }, { "epoch": 0.29, "grad_norm": 5.149006366729736, "learning_rate": 1.9909626332063137e-05, "loss": 2.5523, "step": 22051 }, { "epoch": 0.29, "grad_norm": 4.473565578460693, "learning_rate": 1.990961223634298e-05, "loss": 2.1684, "step": 22052 }, { "epoch": 0.29, "grad_norm": 4.152981758117676, "learning_rate": 1.9909598139528635e-05, "loss": 1.7007, "step": 22053 }, { "epoch": 0.29, "grad_norm": 3.8983328342437744, "learning_rate": 1.9909584041620105e-05, "loss": 2.01, "step": 22054 }, { "epoch": 0.29, "grad_norm": 4.193006992340088, "learning_rate": 1.9909569942617385e-05, "loss": 2.383, "step": 22055 }, { "epoch": 0.29, "grad_norm": 4.096408367156982, "learning_rate": 1.990955584252048e-05, "loss": 1.926, "step": 22056 }, { "epoch": 0.29, "grad_norm": 4.2195539474487305, "learning_rate": 1.9909541741329397e-05, "loss": 2.4172, "step": 22057 }, { "epoch": 0.29, "grad_norm": 3.6592769622802734, "learning_rate": 1.990952763904413e-05, "loss": 1.7559, "step": 22058 }, { "epoch": 0.29, "grad_norm": 3.719599485397339, "learning_rate": 1.990951353566468e-05, "loss": 1.7313, "step": 22059 }, { "epoch": 0.29, "grad_norm": 4.054901599884033, "learning_rate": 1.9909499431191057e-05, "loss": 2.1689, "step": 22060 }, { "epoch": 0.29, "grad_norm": 3.939149856567383, "learning_rate": 1.9909485325623255e-05, "loss": 1.9423, "step": 22061 }, { "epoch": 0.29, "grad_norm": 4.173066139221191, "learning_rate": 1.9909471218961273e-05, "loss": 2.1103, "step": 22062 }, { "epoch": 0.29, "grad_norm": 4.439725399017334, "learning_rate": 1.9909457111205125e-05, "loss": 2.2218, "step": 22063 }, { "epoch": 0.29, "grad_norm": 3.8848273754119873, "learning_rate": 1.99094430023548e-05, "loss": 1.9177, "step": 22064 }, { "epoch": 0.29, "grad_norm": 3.9530413150787354, "learning_rate": 1.9909428892410304e-05, "loss": 1.7346, "step": 22065 }, { "epoch": 0.29, "grad_norm": 4.193789005279541, "learning_rate": 1.9909414781371642e-05, "loss": 2.3975, "step": 22066 }, { "epoch": 0.29, "grad_norm": 3.7405033111572266, "learning_rate": 1.990940066923881e-05, "loss": 2.0576, "step": 22067 }, { "epoch": 0.29, "grad_norm": 3.785088300704956, "learning_rate": 1.9909386556011815e-05, "loss": 1.5776, "step": 22068 }, { "epoch": 0.29, "grad_norm": 3.9170022010803223, "learning_rate": 1.990937244169065e-05, "loss": 2.261, "step": 22069 }, { "epoch": 0.29, "grad_norm": 4.276091575622559, "learning_rate": 1.9909358326275327e-05, "loss": 1.9819, "step": 22070 }, { "epoch": 0.29, "grad_norm": 3.8928329944610596, "learning_rate": 1.9909344209765842e-05, "loss": 2.01, "step": 22071 }, { "epoch": 0.29, "grad_norm": 3.5279152393341064, "learning_rate": 1.9909330092162197e-05, "loss": 1.623, "step": 22072 }, { "epoch": 0.29, "grad_norm": 3.836987018585205, "learning_rate": 1.9909315973464392e-05, "loss": 2.1618, "step": 22073 }, { "epoch": 0.29, "grad_norm": 3.6638293266296387, "learning_rate": 1.990930185367243e-05, "loss": 1.7428, "step": 22074 }, { "epoch": 0.29, "grad_norm": 4.365067958831787, "learning_rate": 1.9909287732786316e-05, "loss": 1.8908, "step": 22075 }, { "epoch": 0.29, "grad_norm": 3.5769734382629395, "learning_rate": 1.9909273610806048e-05, "loss": 1.8344, "step": 22076 }, { "epoch": 0.29, "grad_norm": 4.947819232940674, "learning_rate": 1.9909259487731627e-05, "loss": 1.9148, "step": 22077 }, { "epoch": 0.29, "grad_norm": 4.294246673583984, "learning_rate": 1.9909245363563057e-05, "loss": 2.4288, "step": 22078 }, { "epoch": 0.29, "grad_norm": 3.820134162902832, "learning_rate": 1.9909231238300337e-05, "loss": 2.2976, "step": 22079 }, { "epoch": 0.29, "grad_norm": 4.042435646057129, "learning_rate": 1.990921711194347e-05, "loss": 2.0079, "step": 22080 }, { "epoch": 0.29, "grad_norm": 3.729600191116333, "learning_rate": 1.9909202984492457e-05, "loss": 1.9381, "step": 22081 }, { "epoch": 0.29, "grad_norm": 4.430397033691406, "learning_rate": 1.9909188855947298e-05, "loss": 2.031, "step": 22082 }, { "epoch": 0.29, "grad_norm": 3.940337896347046, "learning_rate": 1.9909174726308e-05, "loss": 2.1509, "step": 22083 }, { "epoch": 0.29, "grad_norm": 4.091722011566162, "learning_rate": 1.990916059557456e-05, "loss": 2.2361, "step": 22084 }, { "epoch": 0.29, "grad_norm": 4.162154197692871, "learning_rate": 1.990914646374698e-05, "loss": 2.1802, "step": 22085 }, { "epoch": 0.29, "grad_norm": 4.44128942489624, "learning_rate": 1.990913233082526e-05, "loss": 2.3267, "step": 22086 }, { "epoch": 0.29, "grad_norm": 3.778127670288086, "learning_rate": 1.9909118196809408e-05, "loss": 2.0011, "step": 22087 }, { "epoch": 0.29, "grad_norm": 4.369661331176758, "learning_rate": 1.990910406169942e-05, "loss": 2.043, "step": 22088 }, { "epoch": 0.29, "grad_norm": 3.784050226211548, "learning_rate": 1.99090899254953e-05, "loss": 1.842, "step": 22089 }, { "epoch": 0.29, "grad_norm": 3.9978997707366943, "learning_rate": 1.9909075788197046e-05, "loss": 2.1875, "step": 22090 }, { "epoch": 0.29, "grad_norm": 3.8775930404663086, "learning_rate": 1.990906164980466e-05, "loss": 2.0221, "step": 22091 }, { "epoch": 0.29, "grad_norm": 3.689763307571411, "learning_rate": 1.990904751031815e-05, "loss": 1.9713, "step": 22092 }, { "epoch": 0.29, "grad_norm": 4.047388553619385, "learning_rate": 1.990903336973751e-05, "loss": 2.1239, "step": 22093 }, { "epoch": 0.29, "grad_norm": 3.451763391494751, "learning_rate": 1.9909019228062748e-05, "loss": 1.7311, "step": 22094 }, { "epoch": 0.29, "grad_norm": 3.5346360206604004, "learning_rate": 1.990900508529386e-05, "loss": 1.855, "step": 22095 }, { "epoch": 0.29, "grad_norm": 4.24036979675293, "learning_rate": 1.9908990941430847e-05, "loss": 2.4939, "step": 22096 }, { "epoch": 0.29, "grad_norm": 5.298299789428711, "learning_rate": 1.9908976796473713e-05, "loss": 2.8112, "step": 22097 }, { "epoch": 0.29, "grad_norm": 3.8049023151397705, "learning_rate": 1.9908962650422463e-05, "loss": 1.6055, "step": 22098 }, { "epoch": 0.29, "grad_norm": 3.982133150100708, "learning_rate": 1.9908948503277097e-05, "loss": 1.9778, "step": 22099 }, { "epoch": 0.29, "grad_norm": 3.8207345008850098, "learning_rate": 1.9908934355037612e-05, "loss": 1.8239, "step": 22100 }, { "epoch": 0.29, "grad_norm": 4.004533290863037, "learning_rate": 1.9908920205704014e-05, "loss": 1.9381, "step": 22101 }, { "epoch": 0.29, "grad_norm": 3.786904811859131, "learning_rate": 1.99089060552763e-05, "loss": 2.0434, "step": 22102 }, { "epoch": 0.29, "grad_norm": 3.9215054512023926, "learning_rate": 1.990889190375448e-05, "loss": 1.9776, "step": 22103 }, { "epoch": 0.29, "grad_norm": 4.1348419189453125, "learning_rate": 1.9908877751138547e-05, "loss": 2.0451, "step": 22104 }, { "epoch": 0.29, "grad_norm": 4.4328718185424805, "learning_rate": 1.9908863597428506e-05, "loss": 2.1483, "step": 22105 }, { "epoch": 0.29, "grad_norm": 3.8351686000823975, "learning_rate": 1.9908849442624357e-05, "loss": 2.4761, "step": 22106 }, { "epoch": 0.29, "grad_norm": 3.7801268100738525, "learning_rate": 1.9908835286726105e-05, "loss": 2.0064, "step": 22107 }, { "epoch": 0.29, "grad_norm": 3.762718677520752, "learning_rate": 1.9908821129733747e-05, "loss": 1.6917, "step": 22108 }, { "epoch": 0.29, "grad_norm": 3.915449619293213, "learning_rate": 1.990880697164729e-05, "loss": 2.1131, "step": 22109 }, { "epoch": 0.29, "grad_norm": 3.333653450012207, "learning_rate": 1.9908792812466732e-05, "loss": 1.6995, "step": 22110 }, { "epoch": 0.29, "grad_norm": 4.098236560821533, "learning_rate": 1.9908778652192077e-05, "loss": 1.9796, "step": 22111 }, { "epoch": 0.29, "grad_norm": 3.734299421310425, "learning_rate": 1.9908764490823324e-05, "loss": 1.8754, "step": 22112 }, { "epoch": 0.29, "grad_norm": 3.7073495388031006, "learning_rate": 1.9908750328360472e-05, "loss": 1.8897, "step": 22113 }, { "epoch": 0.29, "grad_norm": 4.485427379608154, "learning_rate": 1.9908736164803528e-05, "loss": 2.2866, "step": 22114 }, { "epoch": 0.29, "grad_norm": 3.89390230178833, "learning_rate": 1.9908722000152488e-05, "loss": 2.5402, "step": 22115 }, { "epoch": 0.29, "grad_norm": 3.7404897212982178, "learning_rate": 1.9908707834407363e-05, "loss": 1.7333, "step": 22116 }, { "epoch": 0.29, "grad_norm": 4.446821689605713, "learning_rate": 1.9908693667568145e-05, "loss": 2.0007, "step": 22117 }, { "epoch": 0.29, "grad_norm": 4.627101421356201, "learning_rate": 1.990867949963484e-05, "loss": 2.4537, "step": 22118 }, { "epoch": 0.29, "grad_norm": 3.9753804206848145, "learning_rate": 1.9908665330607448e-05, "loss": 2.1237, "step": 22119 }, { "epoch": 0.29, "grad_norm": 3.9588327407836914, "learning_rate": 1.990865116048597e-05, "loss": 2.1068, "step": 22120 }, { "epoch": 0.29, "grad_norm": 3.4916186332702637, "learning_rate": 1.9908636989270412e-05, "loss": 1.9701, "step": 22121 }, { "epoch": 0.29, "grad_norm": 3.9962611198425293, "learning_rate": 1.9908622816960772e-05, "loss": 2.1487, "step": 22122 }, { "epoch": 0.29, "grad_norm": 3.733088493347168, "learning_rate": 1.990860864355705e-05, "loss": 2.4266, "step": 22123 }, { "epoch": 0.29, "grad_norm": 3.2387962341308594, "learning_rate": 1.990859446905925e-05, "loss": 1.7318, "step": 22124 }, { "epoch": 0.29, "grad_norm": 5.021000862121582, "learning_rate": 1.9908580293467375e-05, "loss": 2.6152, "step": 22125 }, { "epoch": 0.29, "grad_norm": 3.7901251316070557, "learning_rate": 1.990856611678142e-05, "loss": 2.0096, "step": 22126 }, { "epoch": 0.29, "grad_norm": 3.6523516178131104, "learning_rate": 1.9908551939001396e-05, "loss": 1.8989, "step": 22127 }, { "epoch": 0.29, "grad_norm": 3.723459482192993, "learning_rate": 1.9908537760127298e-05, "loss": 1.7726, "step": 22128 }, { "epoch": 0.29, "grad_norm": 4.140043258666992, "learning_rate": 1.990852358015913e-05, "loss": 1.8527, "step": 22129 }, { "epoch": 0.29, "grad_norm": 3.889188051223755, "learning_rate": 1.9908509399096892e-05, "loss": 2.2511, "step": 22130 }, { "epoch": 0.29, "grad_norm": 4.217966079711914, "learning_rate": 1.9908495216940585e-05, "loss": 1.8369, "step": 22131 }, { "epoch": 0.29, "grad_norm": 4.275726318359375, "learning_rate": 1.9908481033690216e-05, "loss": 2.8228, "step": 22132 }, { "epoch": 0.29, "grad_norm": 4.173977375030518, "learning_rate": 1.990846684934578e-05, "loss": 1.7387, "step": 22133 }, { "epoch": 0.29, "grad_norm": 4.115201473236084, "learning_rate": 1.990845266390728e-05, "loss": 2.1934, "step": 22134 }, { "epoch": 0.29, "grad_norm": 4.601003646850586, "learning_rate": 1.990843847737472e-05, "loss": 1.4989, "step": 22135 }, { "epoch": 0.29, "grad_norm": 4.447672367095947, "learning_rate": 1.9908424289748104e-05, "loss": 2.0658, "step": 22136 }, { "epoch": 0.29, "grad_norm": 4.064955711364746, "learning_rate": 1.9908410101027423e-05, "loss": 2.2147, "step": 22137 }, { "epoch": 0.29, "grad_norm": 3.8934593200683594, "learning_rate": 1.9908395911212688e-05, "loss": 2.0147, "step": 22138 }, { "epoch": 0.29, "grad_norm": 4.27807092666626, "learning_rate": 1.99083817203039e-05, "loss": 2.1775, "step": 22139 }, { "epoch": 0.29, "grad_norm": 4.764930725097656, "learning_rate": 1.9908367528301056e-05, "loss": 2.5592, "step": 22140 }, { "epoch": 0.29, "grad_norm": 4.6837358474731445, "learning_rate": 1.9908353335204163e-05, "loss": 2.3114, "step": 22141 }, { "epoch": 0.29, "grad_norm": 4.116623878479004, "learning_rate": 1.990833914101322e-05, "loss": 2.1299, "step": 22142 }, { "epoch": 0.29, "grad_norm": 3.8847010135650635, "learning_rate": 1.9908324945728223e-05, "loss": 2.2195, "step": 22143 }, { "epoch": 0.29, "grad_norm": 4.205958843231201, "learning_rate": 1.9908310749349185e-05, "loss": 2.0905, "step": 22144 }, { "epoch": 0.29, "grad_norm": 3.957594871520996, "learning_rate": 1.99082965518761e-05, "loss": 2.0861, "step": 22145 }, { "epoch": 0.29, "grad_norm": 3.5303032398223877, "learning_rate": 1.9908282353308967e-05, "loss": 1.7565, "step": 22146 }, { "epoch": 0.29, "grad_norm": 4.1344523429870605, "learning_rate": 1.9908268153647795e-05, "loss": 2.0963, "step": 22147 }, { "epoch": 0.29, "grad_norm": 4.0030837059021, "learning_rate": 1.990825395289258e-05, "loss": 2.3237, "step": 22148 }, { "epoch": 0.29, "grad_norm": 3.79655385017395, "learning_rate": 1.990823975104333e-05, "loss": 1.6243, "step": 22149 }, { "epoch": 0.29, "grad_norm": 4.279975891113281, "learning_rate": 1.9908225548100036e-05, "loss": 2.1581, "step": 22150 }, { "epoch": 0.29, "grad_norm": 3.687717914581299, "learning_rate": 1.990821134406271e-05, "loss": 1.8228, "step": 22151 }, { "epoch": 0.29, "grad_norm": 4.718293190002441, "learning_rate": 1.9908197138931346e-05, "loss": 2.2639, "step": 22152 }, { "epoch": 0.29, "grad_norm": 4.0316314697265625, "learning_rate": 1.990818293270595e-05, "loss": 1.8474, "step": 22153 }, { "epoch": 0.29, "grad_norm": 4.114560604095459, "learning_rate": 1.9908168725386527e-05, "loss": 2.3041, "step": 22154 }, { "epoch": 0.29, "grad_norm": 4.610293388366699, "learning_rate": 1.990815451697307e-05, "loss": 1.8461, "step": 22155 }, { "epoch": 0.29, "grad_norm": 4.023144721984863, "learning_rate": 1.9908140307465583e-05, "loss": 2.3348, "step": 22156 }, { "epoch": 0.29, "grad_norm": 4.109936237335205, "learning_rate": 1.9908126096864073e-05, "loss": 1.8849, "step": 22157 }, { "epoch": 0.29, "grad_norm": 3.5038750171661377, "learning_rate": 1.9908111885168536e-05, "loss": 1.8348, "step": 22158 }, { "epoch": 0.29, "grad_norm": 4.187711238861084, "learning_rate": 1.9908097672378977e-05, "loss": 2.4664, "step": 22159 }, { "epoch": 0.29, "grad_norm": 4.241312503814697, "learning_rate": 1.9908083458495394e-05, "loss": 2.1258, "step": 22160 }, { "epoch": 0.29, "grad_norm": 3.761212110519409, "learning_rate": 1.990806924351779e-05, "loss": 1.7192, "step": 22161 }, { "epoch": 0.29, "grad_norm": 4.319293975830078, "learning_rate": 1.990805502744617e-05, "loss": 2.2921, "step": 22162 }, { "epoch": 0.29, "grad_norm": 4.0958123207092285, "learning_rate": 1.9908040810280532e-05, "loss": 2.01, "step": 22163 }, { "epoch": 0.29, "grad_norm": 4.217926025390625, "learning_rate": 1.9908026592020876e-05, "loss": 2.3552, "step": 22164 }, { "epoch": 0.29, "grad_norm": 4.108229160308838, "learning_rate": 1.9908012372667208e-05, "loss": 2.2178, "step": 22165 }, { "epoch": 0.29, "grad_norm": 5.032912731170654, "learning_rate": 1.9907998152219526e-05, "loss": 2.4994, "step": 22166 }, { "epoch": 0.29, "grad_norm": 3.9223828315734863, "learning_rate": 1.9907983930677834e-05, "loss": 2.142, "step": 22167 }, { "epoch": 0.29, "grad_norm": 4.308969497680664, "learning_rate": 1.990796970804213e-05, "loss": 2.0961, "step": 22168 }, { "epoch": 0.29, "grad_norm": 3.900399923324585, "learning_rate": 1.990795548431242e-05, "loss": 2.0389, "step": 22169 }, { "epoch": 0.29, "grad_norm": 4.294971942901611, "learning_rate": 1.9907941259488704e-05, "loss": 2.0065, "step": 22170 }, { "epoch": 0.29, "grad_norm": 4.168481826782227, "learning_rate": 1.9907927033570982e-05, "loss": 2.2222, "step": 22171 }, { "epoch": 0.29, "grad_norm": 4.333461284637451, "learning_rate": 1.9907912806559256e-05, "loss": 2.4258, "step": 22172 }, { "epoch": 0.29, "grad_norm": 3.5375475883483887, "learning_rate": 1.9907898578453532e-05, "loss": 1.5529, "step": 22173 }, { "epoch": 0.29, "grad_norm": 4.613928318023682, "learning_rate": 1.9907884349253804e-05, "loss": 2.4388, "step": 22174 }, { "epoch": 0.29, "grad_norm": 3.7956180572509766, "learning_rate": 1.990787011896008e-05, "loss": 2.097, "step": 22175 }, { "epoch": 0.29, "grad_norm": 3.745466947555542, "learning_rate": 1.9907855887572357e-05, "loss": 1.784, "step": 22176 }, { "epoch": 0.29, "grad_norm": 4.3991522789001465, "learning_rate": 1.990784165509064e-05, "loss": 2.283, "step": 22177 }, { "epoch": 0.29, "grad_norm": 4.066814422607422, "learning_rate": 1.9907827421514928e-05, "loss": 2.327, "step": 22178 }, { "epoch": 0.29, "grad_norm": 4.096816062927246, "learning_rate": 1.9907813186845224e-05, "loss": 1.944, "step": 22179 }, { "epoch": 0.29, "grad_norm": 4.314760208129883, "learning_rate": 1.990779895108153e-05, "loss": 1.9874, "step": 22180 }, { "epoch": 0.29, "grad_norm": 4.035043239593506, "learning_rate": 1.990778471422385e-05, "loss": 1.8911, "step": 22181 }, { "epoch": 0.29, "grad_norm": 3.7034831047058105, "learning_rate": 1.9907770476272177e-05, "loss": 2.1214, "step": 22182 }, { "epoch": 0.29, "grad_norm": 4.459349632263184, "learning_rate": 1.990775623722652e-05, "loss": 2.2066, "step": 22183 }, { "epoch": 0.29, "grad_norm": 4.3146867752075195, "learning_rate": 1.990774199708688e-05, "loss": 2.4471, "step": 22184 }, { "epoch": 0.29, "grad_norm": 4.1225056648254395, "learning_rate": 1.9907727755853255e-05, "loss": 2.3955, "step": 22185 }, { "epoch": 0.29, "grad_norm": 3.807184934616089, "learning_rate": 1.9907713513525652e-05, "loss": 1.9602, "step": 22186 }, { "epoch": 0.29, "grad_norm": 4.116793155670166, "learning_rate": 1.9907699270104065e-05, "loss": 2.3979, "step": 22187 }, { "epoch": 0.29, "grad_norm": 3.729832172393799, "learning_rate": 1.9907685025588504e-05, "loss": 1.8141, "step": 22188 }, { "epoch": 0.29, "grad_norm": 4.359343528747559, "learning_rate": 1.9907670779978963e-05, "loss": 2.4384, "step": 22189 }, { "epoch": 0.29, "grad_norm": 4.208593845367432, "learning_rate": 1.990765653327545e-05, "loss": 1.9227, "step": 22190 }, { "epoch": 0.29, "grad_norm": 4.526157379150391, "learning_rate": 1.990764228547796e-05, "loss": 2.4408, "step": 22191 }, { "epoch": 0.29, "grad_norm": 4.955901622772217, "learning_rate": 1.9907628036586503e-05, "loss": 2.2988, "step": 22192 }, { "epoch": 0.29, "grad_norm": 3.9061601161956787, "learning_rate": 1.990761378660107e-05, "loss": 1.8899, "step": 22193 }, { "epoch": 0.29, "grad_norm": 3.825291633605957, "learning_rate": 1.9907599535521673e-05, "loss": 2.1913, "step": 22194 }, { "epoch": 0.29, "grad_norm": 4.143065929412842, "learning_rate": 1.9907585283348307e-05, "loss": 2.7955, "step": 22195 }, { "epoch": 0.29, "grad_norm": 3.9136314392089844, "learning_rate": 1.9907571030080975e-05, "loss": 2.0774, "step": 22196 }, { "epoch": 0.29, "grad_norm": 4.678689956665039, "learning_rate": 1.9907556775719683e-05, "loss": 2.0092, "step": 22197 }, { "epoch": 0.29, "grad_norm": 4.314759731292725, "learning_rate": 1.9907542520264425e-05, "loss": 2.1691, "step": 22198 }, { "epoch": 0.29, "grad_norm": 4.053689002990723, "learning_rate": 1.990752826371521e-05, "loss": 2.1359, "step": 22199 }, { "epoch": 0.29, "grad_norm": 4.4746994972229, "learning_rate": 1.990751400607203e-05, "loss": 2.4977, "step": 22200 }, { "epoch": 0.29, "grad_norm": 3.9588851928710938, "learning_rate": 1.99074997473349e-05, "loss": 2.1105, "step": 22201 }, { "epoch": 0.29, "grad_norm": 3.716123342514038, "learning_rate": 1.9907485487503807e-05, "loss": 1.96, "step": 22202 }, { "epoch": 0.29, "grad_norm": 3.807690382003784, "learning_rate": 1.990747122657876e-05, "loss": 1.9766, "step": 22203 }, { "epoch": 0.29, "grad_norm": 4.209295749664307, "learning_rate": 1.9907456964559763e-05, "loss": 2.2249, "step": 22204 }, { "epoch": 0.29, "grad_norm": 3.6655590534210205, "learning_rate": 1.990744270144681e-05, "loss": 1.9287, "step": 22205 }, { "epoch": 0.29, "grad_norm": 3.882675886154175, "learning_rate": 1.9907428437239914e-05, "loss": 2.081, "step": 22206 }, { "epoch": 0.29, "grad_norm": 4.444141387939453, "learning_rate": 1.9907414171939066e-05, "loss": 2.3177, "step": 22207 }, { "epoch": 0.29, "grad_norm": 4.24997091293335, "learning_rate": 1.990739990554427e-05, "loss": 2.3964, "step": 22208 }, { "epoch": 0.29, "grad_norm": 4.055377006530762, "learning_rate": 1.9907385638055534e-05, "loss": 1.973, "step": 22209 }, { "epoch": 0.29, "grad_norm": 3.8258001804351807, "learning_rate": 1.990737136947285e-05, "loss": 2.0363, "step": 22210 }, { "epoch": 0.29, "grad_norm": 4.142792224884033, "learning_rate": 1.9907357099796227e-05, "loss": 1.8679, "step": 22211 }, { "epoch": 0.29, "grad_norm": 3.6142916679382324, "learning_rate": 1.9907342829025664e-05, "loss": 1.7528, "step": 22212 }, { "epoch": 0.29, "grad_norm": 4.965844631195068, "learning_rate": 1.9907328557161157e-05, "loss": 2.43, "step": 22213 }, { "epoch": 0.29, "grad_norm": 3.386481761932373, "learning_rate": 1.990731428420272e-05, "loss": 1.6855, "step": 22214 }, { "epoch": 0.29, "grad_norm": 4.139042854309082, "learning_rate": 1.990730001015034e-05, "loss": 2.0169, "step": 22215 }, { "epoch": 0.29, "grad_norm": 3.4703447818756104, "learning_rate": 1.990728573500403e-05, "loss": 1.8221, "step": 22216 }, { "epoch": 0.29, "grad_norm": 3.9176928997039795, "learning_rate": 1.9907271458763787e-05, "loss": 2.2592, "step": 22217 }, { "epoch": 0.29, "grad_norm": 4.220491409301758, "learning_rate": 1.9907257181429616e-05, "loss": 2.4949, "step": 22218 }, { "epoch": 0.29, "grad_norm": 4.420945167541504, "learning_rate": 1.9907242903001513e-05, "loss": 2.282, "step": 22219 }, { "epoch": 0.29, "grad_norm": 3.7665469646453857, "learning_rate": 1.9907228623479483e-05, "loss": 1.9652, "step": 22220 }, { "epoch": 0.29, "grad_norm": 3.6037850379943848, "learning_rate": 1.9907214342863524e-05, "loss": 1.8525, "step": 22221 }, { "epoch": 0.29, "grad_norm": 3.9643867015838623, "learning_rate": 1.9907200061153646e-05, "loss": 1.9846, "step": 22222 }, { "epoch": 0.29, "grad_norm": 4.028820514678955, "learning_rate": 1.9907185778349838e-05, "loss": 2.1606, "step": 22223 }, { "epoch": 0.29, "grad_norm": 4.097373008728027, "learning_rate": 1.9907171494452114e-05, "loss": 2.1975, "step": 22224 }, { "epoch": 0.29, "grad_norm": 3.879707098007202, "learning_rate": 1.9907157209460467e-05, "loss": 2.248, "step": 22225 }, { "epoch": 0.29, "grad_norm": 4.214197158813477, "learning_rate": 1.9907142923374905e-05, "loss": 2.1363, "step": 22226 }, { "epoch": 0.29, "grad_norm": 4.080361366271973, "learning_rate": 1.9907128636195426e-05, "loss": 1.887, "step": 22227 }, { "epoch": 0.29, "grad_norm": 4.4278788566589355, "learning_rate": 1.990711434792203e-05, "loss": 2.2666, "step": 22228 }, { "epoch": 0.29, "grad_norm": 3.7797763347625732, "learning_rate": 1.990710005855472e-05, "loss": 1.9731, "step": 22229 }, { "epoch": 0.29, "grad_norm": 3.652056932449341, "learning_rate": 1.99070857680935e-05, "loss": 1.6015, "step": 22230 }, { "epoch": 0.29, "grad_norm": 3.9541637897491455, "learning_rate": 1.990707147653837e-05, "loss": 1.9975, "step": 22231 }, { "epoch": 0.29, "grad_norm": 4.012492656707764, "learning_rate": 1.9907057183889332e-05, "loss": 1.9543, "step": 22232 }, { "epoch": 0.29, "grad_norm": 3.9013710021972656, "learning_rate": 1.990704289014638e-05, "loss": 2.155, "step": 22233 }, { "epoch": 0.29, "grad_norm": 4.317094326019287, "learning_rate": 1.990702859530953e-05, "loss": 2.1791, "step": 22234 }, { "epoch": 0.29, "grad_norm": 3.5662074089050293, "learning_rate": 1.9907014299378773e-05, "loss": 2.1979, "step": 22235 }, { "epoch": 0.29, "grad_norm": 4.828706741333008, "learning_rate": 1.9907000002354112e-05, "loss": 1.9663, "step": 22236 }, { "epoch": 0.29, "grad_norm": 3.383629322052002, "learning_rate": 1.9906985704235552e-05, "loss": 1.7247, "step": 22237 }, { "epoch": 0.29, "grad_norm": 3.8318378925323486, "learning_rate": 1.9906971405023093e-05, "loss": 1.7483, "step": 22238 }, { "epoch": 0.29, "grad_norm": 4.178491115570068, "learning_rate": 1.990695710471674e-05, "loss": 2.3186, "step": 22239 }, { "epoch": 0.29, "grad_norm": 3.5797581672668457, "learning_rate": 1.9906942803316488e-05, "loss": 1.724, "step": 22240 }, { "epoch": 0.29, "grad_norm": 3.852257013320923, "learning_rate": 1.990692850082234e-05, "loss": 2.1527, "step": 22241 }, { "epoch": 0.29, "grad_norm": 3.977102518081665, "learning_rate": 1.9906914197234297e-05, "loss": 1.8288, "step": 22242 }, { "epoch": 0.29, "grad_norm": 4.511916637420654, "learning_rate": 1.9906899892552364e-05, "loss": 2.4708, "step": 22243 }, { "epoch": 0.29, "grad_norm": 4.472805500030518, "learning_rate": 1.9906885586776545e-05, "loss": 2.2936, "step": 22244 }, { "epoch": 0.29, "grad_norm": 3.700007677078247, "learning_rate": 1.9906871279906833e-05, "loss": 1.7756, "step": 22245 }, { "epoch": 0.29, "grad_norm": 3.562302350997925, "learning_rate": 1.9906856971943236e-05, "loss": 1.9687, "step": 22246 }, { "epoch": 0.29, "grad_norm": 3.676509141921997, "learning_rate": 1.9906842662885754e-05, "loss": 2.0322, "step": 22247 }, { "epoch": 0.29, "grad_norm": 3.792165994644165, "learning_rate": 1.990682835273439e-05, "loss": 1.7185, "step": 22248 }, { "epoch": 0.29, "grad_norm": 4.418549537658691, "learning_rate": 1.9906814041489143e-05, "loss": 1.9764, "step": 22249 }, { "epoch": 0.29, "grad_norm": 4.146137714385986, "learning_rate": 1.9906799729150015e-05, "loss": 1.9556, "step": 22250 }, { "epoch": 0.29, "grad_norm": 3.9942665100097656, "learning_rate": 1.990678541571701e-05, "loss": 2.147, "step": 22251 }, { "epoch": 0.29, "grad_norm": 3.942553997039795, "learning_rate": 1.9906771101190125e-05, "loss": 2.2348, "step": 22252 }, { "epoch": 0.29, "grad_norm": 4.184426784515381, "learning_rate": 1.9906756785569367e-05, "loss": 1.8402, "step": 22253 }, { "epoch": 0.29, "grad_norm": 4.410342216491699, "learning_rate": 1.990674246885473e-05, "loss": 2.5243, "step": 22254 }, { "epoch": 0.29, "grad_norm": 4.406494617462158, "learning_rate": 1.9906728151046226e-05, "loss": 2.2438, "step": 22255 }, { "epoch": 0.29, "grad_norm": 4.466907501220703, "learning_rate": 1.9906713832143852e-05, "loss": 1.944, "step": 22256 }, { "epoch": 0.29, "grad_norm": 4.124417304992676, "learning_rate": 1.9906699512147603e-05, "loss": 2.1976, "step": 22257 }, { "epoch": 0.29, "grad_norm": 3.8414454460144043, "learning_rate": 1.990668519105749e-05, "loss": 2.0325, "step": 22258 }, { "epoch": 0.29, "grad_norm": 3.986767530441284, "learning_rate": 1.990667086887351e-05, "loss": 2.4061, "step": 22259 }, { "epoch": 0.29, "grad_norm": 4.7002763748168945, "learning_rate": 1.9906656545595666e-05, "loss": 2.4826, "step": 22260 }, { "epoch": 0.29, "grad_norm": 4.665022850036621, "learning_rate": 1.990664222122396e-05, "loss": 2.1764, "step": 22261 }, { "epoch": 0.29, "grad_norm": 4.09023380279541, "learning_rate": 1.990662789575839e-05, "loss": 1.8262, "step": 22262 }, { "epoch": 0.29, "grad_norm": 4.075938701629639, "learning_rate": 1.9906613569198963e-05, "loss": 1.9515, "step": 22263 }, { "epoch": 0.29, "grad_norm": 3.8217267990112305, "learning_rate": 1.9906599241545676e-05, "loss": 1.7571, "step": 22264 }, { "epoch": 0.29, "grad_norm": 4.074254989624023, "learning_rate": 1.9906584912798533e-05, "loss": 2.1263, "step": 22265 }, { "epoch": 0.29, "grad_norm": 4.470347881317139, "learning_rate": 1.9906570582957536e-05, "loss": 2.1728, "step": 22266 }, { "epoch": 0.29, "grad_norm": 3.9223783016204834, "learning_rate": 1.9906556252022683e-05, "loss": 2.1435, "step": 22267 }, { "epoch": 0.29, "grad_norm": 3.740952491760254, "learning_rate": 1.990654191999398e-05, "loss": 1.7147, "step": 22268 }, { "epoch": 0.29, "grad_norm": 4.41671895980835, "learning_rate": 1.9906527586871426e-05, "loss": 2.398, "step": 22269 }, { "epoch": 0.29, "grad_norm": 3.95365834236145, "learning_rate": 1.9906513252655023e-05, "loss": 2.1615, "step": 22270 }, { "epoch": 0.29, "grad_norm": 4.217646598815918, "learning_rate": 1.9906498917344774e-05, "loss": 2.3075, "step": 22271 }, { "epoch": 0.29, "grad_norm": 4.130718231201172, "learning_rate": 1.990648458094068e-05, "loss": 2.5603, "step": 22272 }, { "epoch": 0.29, "grad_norm": 3.62850284576416, "learning_rate": 1.9906470243442742e-05, "loss": 1.5602, "step": 22273 }, { "epoch": 0.29, "grad_norm": 3.930111885070801, "learning_rate": 1.990645590485096e-05, "loss": 2.443, "step": 22274 }, { "epoch": 0.29, "grad_norm": 4.52134895324707, "learning_rate": 1.9906441565165338e-05, "loss": 2.1824, "step": 22275 }, { "epoch": 0.29, "grad_norm": 3.3986105918884277, "learning_rate": 1.9906427224385875e-05, "loss": 1.9667, "step": 22276 }, { "epoch": 0.29, "grad_norm": 3.787839889526367, "learning_rate": 1.9906412882512578e-05, "loss": 1.9745, "step": 22277 }, { "epoch": 0.29, "grad_norm": 4.086117744445801, "learning_rate": 1.9906398539545443e-05, "loss": 2.0321, "step": 22278 }, { "epoch": 0.29, "grad_norm": 4.164732456207275, "learning_rate": 1.9906384195484473e-05, "loss": 2.2829, "step": 22279 }, { "epoch": 0.29, "grad_norm": 4.442957401275635, "learning_rate": 1.990636985032967e-05, "loss": 2.0919, "step": 22280 }, { "epoch": 0.29, "grad_norm": 4.265172481536865, "learning_rate": 1.9906355504081037e-05, "loss": 2.2098, "step": 22281 }, { "epoch": 0.29, "grad_norm": 4.086650371551514, "learning_rate": 1.9906341156738573e-05, "loss": 2.1791, "step": 22282 }, { "epoch": 0.29, "grad_norm": 4.412801265716553, "learning_rate": 1.990632680830228e-05, "loss": 2.1212, "step": 22283 }, { "epoch": 0.29, "grad_norm": 3.5775558948516846, "learning_rate": 1.9906312458772162e-05, "loss": 1.9835, "step": 22284 }, { "epoch": 0.29, "grad_norm": 4.379322052001953, "learning_rate": 1.990629810814822e-05, "loss": 1.9138, "step": 22285 }, { "epoch": 0.29, "grad_norm": 4.1965508460998535, "learning_rate": 1.9906283756430454e-05, "loss": 2.2362, "step": 22286 }, { "epoch": 0.29, "grad_norm": 4.824033737182617, "learning_rate": 1.9906269403618865e-05, "loss": 2.4008, "step": 22287 }, { "epoch": 0.29, "grad_norm": 4.754158020019531, "learning_rate": 1.9906255049713458e-05, "loss": 2.3445, "step": 22288 }, { "epoch": 0.29, "grad_norm": 3.684558153152466, "learning_rate": 1.990624069471423e-05, "loss": 1.8668, "step": 22289 }, { "epoch": 0.29, "grad_norm": 3.50443172454834, "learning_rate": 1.9906226338621187e-05, "loss": 1.6159, "step": 22290 }, { "epoch": 0.29, "grad_norm": 4.212428092956543, "learning_rate": 1.9906211981434324e-05, "loss": 2.0594, "step": 22291 }, { "epoch": 0.29, "grad_norm": 4.01043701171875, "learning_rate": 1.9906197623153653e-05, "loss": 1.8667, "step": 22292 }, { "epoch": 0.29, "grad_norm": 4.120639801025391, "learning_rate": 1.9906183263779168e-05, "loss": 1.895, "step": 22293 }, { "epoch": 0.29, "grad_norm": 4.372206687927246, "learning_rate": 1.990616890331087e-05, "loss": 1.9809, "step": 22294 }, { "epoch": 0.29, "grad_norm": 4.012256622314453, "learning_rate": 1.9906154541748765e-05, "loss": 2.0569, "step": 22295 }, { "epoch": 0.29, "grad_norm": 4.223135471343994, "learning_rate": 1.9906140179092853e-05, "loss": 2.3053, "step": 22296 }, { "epoch": 0.29, "grad_norm": 4.672656536102295, "learning_rate": 1.9906125815343135e-05, "loss": 3.1063, "step": 22297 }, { "epoch": 0.29, "grad_norm": 4.568138599395752, "learning_rate": 1.990611145049961e-05, "loss": 2.2922, "step": 22298 }, { "epoch": 0.29, "grad_norm": 4.11537504196167, "learning_rate": 1.9906097084562285e-05, "loss": 2.1784, "step": 22299 }, { "epoch": 0.29, "grad_norm": 3.609076976776123, "learning_rate": 1.9906082717531157e-05, "loss": 1.7514, "step": 22300 }, { "epoch": 0.29, "grad_norm": 4.7258710861206055, "learning_rate": 1.990606834940623e-05, "loss": 2.3382, "step": 22301 }, { "epoch": 0.29, "grad_norm": 3.8919711112976074, "learning_rate": 1.9906053980187506e-05, "loss": 2.1339, "step": 22302 }, { "epoch": 0.29, "grad_norm": 3.560708999633789, "learning_rate": 1.9906039609874983e-05, "loss": 1.5999, "step": 22303 }, { "epoch": 0.29, "grad_norm": 3.496450901031494, "learning_rate": 1.9906025238468666e-05, "loss": 1.8695, "step": 22304 }, { "epoch": 0.29, "grad_norm": 4.097922325134277, "learning_rate": 1.9906010865968558e-05, "loss": 2.3123, "step": 22305 }, { "epoch": 0.29, "grad_norm": 3.594099521636963, "learning_rate": 1.9905996492374656e-05, "loss": 1.7862, "step": 22306 }, { "epoch": 0.29, "grad_norm": 4.330763339996338, "learning_rate": 1.9905982117686964e-05, "loss": 2.2725, "step": 22307 }, { "epoch": 0.29, "grad_norm": 3.903313398361206, "learning_rate": 1.9905967741905488e-05, "loss": 2.0182, "step": 22308 }, { "epoch": 0.29, "grad_norm": 5.158168792724609, "learning_rate": 1.990595336503022e-05, "loss": 2.7288, "step": 22309 }, { "epoch": 0.29, "grad_norm": 3.224515438079834, "learning_rate": 1.9905938987061167e-05, "loss": 1.3066, "step": 22310 }, { "epoch": 0.29, "grad_norm": 3.615471601486206, "learning_rate": 1.9905924607998333e-05, "loss": 1.8772, "step": 22311 }, { "epoch": 0.29, "grad_norm": 5.032590389251709, "learning_rate": 1.9905910227841715e-05, "loss": 2.3161, "step": 22312 }, { "epoch": 0.29, "grad_norm": 4.1050944328308105, "learning_rate": 1.9905895846591316e-05, "loss": 2.0365, "step": 22313 }, { "epoch": 0.29, "grad_norm": 3.9389991760253906, "learning_rate": 1.9905881464247137e-05, "loss": 2.0399, "step": 22314 }, { "epoch": 0.29, "grad_norm": 3.5052616596221924, "learning_rate": 1.9905867080809184e-05, "loss": 1.8082, "step": 22315 }, { "epoch": 0.29, "grad_norm": 4.265537738800049, "learning_rate": 1.9905852696277455e-05, "loss": 2.196, "step": 22316 }, { "epoch": 0.29, "grad_norm": 3.315462827682495, "learning_rate": 1.990583831065195e-05, "loss": 1.6744, "step": 22317 }, { "epoch": 0.29, "grad_norm": 3.7177345752716064, "learning_rate": 1.990582392393267e-05, "loss": 1.7189, "step": 22318 }, { "epoch": 0.29, "grad_norm": 4.79276704788208, "learning_rate": 1.9905809536119624e-05, "loss": 2.3656, "step": 22319 }, { "epoch": 0.29, "grad_norm": 3.7818164825439453, "learning_rate": 1.9905795147212806e-05, "loss": 1.9835, "step": 22320 }, { "epoch": 0.29, "grad_norm": 4.511219501495361, "learning_rate": 1.9905780757212222e-05, "loss": 2.0815, "step": 22321 }, { "epoch": 0.29, "grad_norm": 4.529025554656982, "learning_rate": 1.9905766366117867e-05, "loss": 2.3455, "step": 22322 }, { "epoch": 0.29, "grad_norm": 4.52825927734375, "learning_rate": 1.9905751973929756e-05, "loss": 2.5914, "step": 22323 }, { "epoch": 0.29, "grad_norm": 4.325109481811523, "learning_rate": 1.9905737580647875e-05, "loss": 2.3123, "step": 22324 }, { "epoch": 0.29, "grad_norm": 4.461335182189941, "learning_rate": 1.9905723186272233e-05, "loss": 2.1217, "step": 22325 }, { "epoch": 0.29, "grad_norm": 4.695436000823975, "learning_rate": 1.9905708790802835e-05, "loss": 2.5837, "step": 22326 }, { "epoch": 0.29, "grad_norm": 3.3777966499328613, "learning_rate": 1.9905694394239673e-05, "loss": 1.579, "step": 22327 }, { "epoch": 0.29, "grad_norm": 4.45329475402832, "learning_rate": 1.9905679996582758e-05, "loss": 2.3316, "step": 22328 }, { "epoch": 0.29, "grad_norm": 3.3053040504455566, "learning_rate": 1.9905665597832086e-05, "loss": 1.4408, "step": 22329 }, { "epoch": 0.29, "grad_norm": 4.372128963470459, "learning_rate": 1.9905651197987664e-05, "loss": 2.2187, "step": 22330 }, { "epoch": 0.29, "grad_norm": 4.152142524719238, "learning_rate": 1.990563679704949e-05, "loss": 1.8533, "step": 22331 }, { "epoch": 0.29, "grad_norm": 3.582634925842285, "learning_rate": 1.990562239501756e-05, "loss": 1.4487, "step": 22332 }, { "epoch": 0.29, "grad_norm": 4.605379581451416, "learning_rate": 1.9905607991891885e-05, "loss": 2.1505, "step": 22333 }, { "epoch": 0.29, "grad_norm": 3.802870273590088, "learning_rate": 1.990559358767246e-05, "loss": 2.0061, "step": 22334 }, { "epoch": 0.29, "grad_norm": 4.201106548309326, "learning_rate": 1.9905579182359292e-05, "loss": 2.4153, "step": 22335 }, { "epoch": 0.29, "grad_norm": 3.689321279525757, "learning_rate": 1.990556477595238e-05, "loss": 1.9043, "step": 22336 }, { "epoch": 0.29, "grad_norm": 4.698666095733643, "learning_rate": 1.9905550368451727e-05, "loss": 2.3524, "step": 22337 }, { "epoch": 0.29, "grad_norm": 3.7480294704437256, "learning_rate": 1.9905535959857333e-05, "loss": 2.0596, "step": 22338 }, { "epoch": 0.29, "grad_norm": 3.9354376792907715, "learning_rate": 1.9905521550169196e-05, "loss": 2.2858, "step": 22339 }, { "epoch": 0.29, "grad_norm": 3.9075381755828857, "learning_rate": 1.9905507139387325e-05, "loss": 2.3659, "step": 22340 }, { "epoch": 0.29, "grad_norm": 4.547674655914307, "learning_rate": 1.990549272751172e-05, "loss": 2.2873, "step": 22341 }, { "epoch": 0.29, "grad_norm": 3.777406692504883, "learning_rate": 1.9905478314542376e-05, "loss": 1.6905, "step": 22342 }, { "epoch": 0.29, "grad_norm": 4.2681355476379395, "learning_rate": 1.99054639004793e-05, "loss": 2.3248, "step": 22343 }, { "epoch": 0.29, "grad_norm": 3.697885036468506, "learning_rate": 1.990544948532249e-05, "loss": 2.1795, "step": 22344 }, { "epoch": 0.29, "grad_norm": 4.0507097244262695, "learning_rate": 1.9905435069071956e-05, "loss": 2.3704, "step": 22345 }, { "epoch": 0.29, "grad_norm": 3.9266138076782227, "learning_rate": 1.990542065172769e-05, "loss": 2.0281, "step": 22346 }, { "epoch": 0.29, "grad_norm": 4.318018436431885, "learning_rate": 1.99054062332897e-05, "loss": 2.305, "step": 22347 }, { "epoch": 0.29, "grad_norm": 4.494470119476318, "learning_rate": 1.9905391813757983e-05, "loss": 2.228, "step": 22348 }, { "epoch": 0.29, "grad_norm": 4.073910713195801, "learning_rate": 1.9905377393132543e-05, "loss": 1.8765, "step": 22349 }, { "epoch": 0.29, "grad_norm": 4.935496807098389, "learning_rate": 1.9905362971413383e-05, "loss": 2.2854, "step": 22350 }, { "epoch": 0.29, "grad_norm": 3.9416840076446533, "learning_rate": 1.9905348548600504e-05, "loss": 1.9438, "step": 22351 }, { "epoch": 0.29, "grad_norm": 4.179495334625244, "learning_rate": 1.99053341246939e-05, "loss": 2.0404, "step": 22352 }, { "epoch": 0.29, "grad_norm": 3.993483543395996, "learning_rate": 1.9905319699693585e-05, "loss": 1.8817, "step": 22353 }, { "epoch": 0.29, "grad_norm": 4.30670690536499, "learning_rate": 1.9905305273599554e-05, "loss": 2.1346, "step": 22354 }, { "epoch": 0.29, "grad_norm": 3.6225435733795166, "learning_rate": 1.9905290846411808e-05, "loss": 1.8117, "step": 22355 }, { "epoch": 0.29, "grad_norm": 3.764319658279419, "learning_rate": 1.9905276418130352e-05, "loss": 1.6963, "step": 22356 }, { "epoch": 0.29, "grad_norm": 4.4641008377075195, "learning_rate": 1.990526198875518e-05, "loss": 2.5293, "step": 22357 }, { "epoch": 0.29, "grad_norm": 4.418838977813721, "learning_rate": 1.9905247558286303e-05, "loss": 2.0564, "step": 22358 }, { "epoch": 0.29, "grad_norm": 4.332991600036621, "learning_rate": 1.990523312672372e-05, "loss": 2.1871, "step": 22359 }, { "epoch": 0.29, "grad_norm": 3.7881879806518555, "learning_rate": 1.990521869406743e-05, "loss": 1.8095, "step": 22360 }, { "epoch": 0.29, "grad_norm": 4.000315189361572, "learning_rate": 1.9905204260317434e-05, "loss": 1.9156, "step": 22361 }, { "epoch": 0.29, "grad_norm": 4.084198474884033, "learning_rate": 1.9905189825473736e-05, "loss": 2.1548, "step": 22362 }, { "epoch": 0.29, "grad_norm": 3.644146203994751, "learning_rate": 1.9905175389536336e-05, "loss": 1.551, "step": 22363 }, { "epoch": 0.29, "grad_norm": 3.298609972000122, "learning_rate": 1.990516095250524e-05, "loss": 1.5979, "step": 22364 }, { "epoch": 0.29, "grad_norm": 3.2901315689086914, "learning_rate": 1.9905146514380445e-05, "loss": 1.7096, "step": 22365 }, { "epoch": 0.29, "grad_norm": 4.365687847137451, "learning_rate": 1.9905132075161954e-05, "loss": 2.7666, "step": 22366 }, { "epoch": 0.29, "grad_norm": 3.9399025440216064, "learning_rate": 1.9905117634849768e-05, "loss": 2.0443, "step": 22367 }, { "epoch": 0.29, "grad_norm": 3.8214828968048096, "learning_rate": 1.9905103193443885e-05, "loss": 1.9705, "step": 22368 }, { "epoch": 0.29, "grad_norm": 3.7527613639831543, "learning_rate": 1.9905088750944314e-05, "loss": 1.9225, "step": 22369 }, { "epoch": 0.29, "grad_norm": 3.8823163509368896, "learning_rate": 1.9905074307351054e-05, "loss": 2.3311, "step": 22370 }, { "epoch": 0.29, "grad_norm": 4.280679225921631, "learning_rate": 1.9905059862664108e-05, "loss": 2.4712, "step": 22371 }, { "epoch": 0.29, "grad_norm": 4.2257490158081055, "learning_rate": 1.990504541688347e-05, "loss": 2.0289, "step": 22372 }, { "epoch": 0.29, "grad_norm": 4.570096969604492, "learning_rate": 1.990503097000915e-05, "loss": 2.3704, "step": 22373 }, { "epoch": 0.29, "grad_norm": 4.488887786865234, "learning_rate": 1.9905016522041144e-05, "loss": 2.6047, "step": 22374 }, { "epoch": 0.29, "grad_norm": 4.50576114654541, "learning_rate": 1.990500207297946e-05, "loss": 2.1651, "step": 22375 }, { "epoch": 0.29, "grad_norm": 3.7335257530212402, "learning_rate": 1.9904987622824093e-05, "loss": 2.2276, "step": 22376 }, { "epoch": 0.29, "grad_norm": 4.007202625274658, "learning_rate": 1.9904973171575047e-05, "loss": 1.9498, "step": 22377 }, { "epoch": 0.29, "grad_norm": 4.237701416015625, "learning_rate": 1.9904958719232327e-05, "loss": 2.3231, "step": 22378 }, { "epoch": 0.29, "grad_norm": 3.794300079345703, "learning_rate": 1.990494426579593e-05, "loss": 2.1227, "step": 22379 }, { "epoch": 0.29, "grad_norm": 4.152174472808838, "learning_rate": 1.990492981126586e-05, "loss": 1.924, "step": 22380 }, { "epoch": 0.29, "grad_norm": 3.9915883541107178, "learning_rate": 1.9904915355642115e-05, "loss": 1.9059, "step": 22381 }, { "epoch": 0.29, "grad_norm": 3.8439595699310303, "learning_rate": 1.99049008989247e-05, "loss": 2.0571, "step": 22382 }, { "epoch": 0.29, "grad_norm": 3.6321873664855957, "learning_rate": 1.990488644111362e-05, "loss": 1.9394, "step": 22383 }, { "epoch": 0.29, "grad_norm": 4.0153279304504395, "learning_rate": 1.9904871982208866e-05, "loss": 1.9788, "step": 22384 }, { "epoch": 0.29, "grad_norm": 3.746011972427368, "learning_rate": 1.9904857522210448e-05, "loss": 1.8675, "step": 22385 }, { "epoch": 0.29, "grad_norm": 3.6084787845611572, "learning_rate": 1.9904843061118368e-05, "loss": 2.0331, "step": 22386 }, { "epoch": 0.29, "grad_norm": 3.9975850582122803, "learning_rate": 1.9904828598932622e-05, "loss": 1.9888, "step": 22387 }, { "epoch": 0.29, "grad_norm": 3.7600996494293213, "learning_rate": 1.990481413565322e-05, "loss": 1.7983, "step": 22388 }, { "epoch": 0.29, "grad_norm": 4.2433905601501465, "learning_rate": 1.9904799671280153e-05, "loss": 2.3146, "step": 22389 }, { "epoch": 0.29, "grad_norm": 4.126262664794922, "learning_rate": 1.990478520581343e-05, "loss": 2.5102, "step": 22390 }, { "epoch": 0.29, "grad_norm": 4.321104049682617, "learning_rate": 1.9904770739253054e-05, "loss": 2.0217, "step": 22391 }, { "epoch": 0.29, "grad_norm": 3.704106569290161, "learning_rate": 1.990475627159902e-05, "loss": 1.9384, "step": 22392 }, { "epoch": 0.29, "grad_norm": 4.692698955535889, "learning_rate": 1.9904741802851334e-05, "loss": 2.4018, "step": 22393 }, { "epoch": 0.29, "grad_norm": 3.6813201904296875, "learning_rate": 1.9904727333009993e-05, "loss": 2.0312, "step": 22394 }, { "epoch": 0.29, "grad_norm": 4.079331874847412, "learning_rate": 1.9904712862075008e-05, "loss": 1.91, "step": 22395 }, { "epoch": 0.29, "grad_norm": 3.8529043197631836, "learning_rate": 1.990469839004637e-05, "loss": 2.147, "step": 22396 }, { "epoch": 0.29, "grad_norm": 3.5230743885040283, "learning_rate": 1.9904683916924088e-05, "loss": 1.7378, "step": 22397 }, { "epoch": 0.29, "grad_norm": 4.759613037109375, "learning_rate": 1.990466944270816e-05, "loss": 2.2141, "step": 22398 }, { "epoch": 0.29, "grad_norm": 3.783539056777954, "learning_rate": 1.9904654967398586e-05, "loss": 1.639, "step": 22399 }, { "epoch": 0.29, "grad_norm": 4.035512924194336, "learning_rate": 1.9904640490995372e-05, "loss": 2.1147, "step": 22400 }, { "epoch": 0.29, "grad_norm": 3.4177584648132324, "learning_rate": 1.9904626013498517e-05, "loss": 1.6026, "step": 22401 }, { "epoch": 0.29, "grad_norm": 3.5337657928466797, "learning_rate": 1.9904611534908027e-05, "loss": 2.0219, "step": 22402 }, { "epoch": 0.29, "grad_norm": 3.1801979541778564, "learning_rate": 1.9904597055223896e-05, "loss": 1.5953, "step": 22403 }, { "epoch": 0.29, "grad_norm": 3.868971586227417, "learning_rate": 1.990458257444613e-05, "loss": 2.2712, "step": 22404 }, { "epoch": 0.29, "grad_norm": 4.375014781951904, "learning_rate": 1.9904568092574733e-05, "loss": 2.305, "step": 22405 }, { "epoch": 0.29, "grad_norm": 3.317843198776245, "learning_rate": 1.99045536096097e-05, "loss": 1.7228, "step": 22406 }, { "epoch": 0.29, "grad_norm": 3.335502862930298, "learning_rate": 1.9904539125551037e-05, "loss": 1.6068, "step": 22407 }, { "epoch": 0.29, "grad_norm": 4.40909481048584, "learning_rate": 1.9904524640398746e-05, "loss": 1.9772, "step": 22408 }, { "epoch": 0.29, "grad_norm": 3.8651037216186523, "learning_rate": 1.9904510154152827e-05, "loss": 2.0036, "step": 22409 }, { "epoch": 0.29, "grad_norm": 4.406988143920898, "learning_rate": 1.9904495666813283e-05, "loss": 2.2223, "step": 22410 }, { "epoch": 0.29, "grad_norm": 3.5866079330444336, "learning_rate": 1.990448117838011e-05, "loss": 2.0268, "step": 22411 }, { "epoch": 0.29, "grad_norm": 4.215826034545898, "learning_rate": 1.9904466688853318e-05, "loss": 2.203, "step": 22412 }, { "epoch": 0.29, "grad_norm": 3.9960408210754395, "learning_rate": 1.9904452198232904e-05, "loss": 2.2738, "step": 22413 }, { "epoch": 0.29, "grad_norm": 4.342125415802002, "learning_rate": 1.9904437706518872e-05, "loss": 2.0836, "step": 22414 }, { "epoch": 0.29, "grad_norm": 3.778043508529663, "learning_rate": 1.9904423213711222e-05, "loss": 1.9407, "step": 22415 }, { "epoch": 0.29, "grad_norm": 3.7751383781433105, "learning_rate": 1.990440871980995e-05, "loss": 1.8169, "step": 22416 }, { "epoch": 0.29, "grad_norm": 3.6533398628234863, "learning_rate": 1.9904394224815072e-05, "loss": 1.843, "step": 22417 }, { "epoch": 0.29, "grad_norm": 4.154345512390137, "learning_rate": 1.9904379728726576e-05, "loss": 2.2651, "step": 22418 }, { "epoch": 0.29, "grad_norm": 4.340866565704346, "learning_rate": 1.990436523154447e-05, "loss": 2.2062, "step": 22419 }, { "epoch": 0.29, "grad_norm": 4.091460704803467, "learning_rate": 1.990435073326875e-05, "loss": 2.3638, "step": 22420 }, { "epoch": 0.29, "grad_norm": 3.9092185497283936, "learning_rate": 1.9904336233899428e-05, "loss": 1.8916, "step": 22421 }, { "epoch": 0.29, "grad_norm": 4.388880252838135, "learning_rate": 1.9904321733436497e-05, "loss": 2.5077, "step": 22422 }, { "epoch": 0.29, "grad_norm": 3.769606590270996, "learning_rate": 1.990430723187996e-05, "loss": 1.7012, "step": 22423 }, { "epoch": 0.29, "grad_norm": 4.155336856842041, "learning_rate": 1.9904292729229817e-05, "loss": 2.5748, "step": 22424 }, { "epoch": 0.29, "grad_norm": 4.094029903411865, "learning_rate": 1.9904278225486074e-05, "loss": 2.1435, "step": 22425 }, { "epoch": 0.29, "grad_norm": 4.067488670349121, "learning_rate": 1.990426372064873e-05, "loss": 2.4189, "step": 22426 }, { "epoch": 0.29, "grad_norm": 3.8656628131866455, "learning_rate": 1.990424921471779e-05, "loss": 2.4429, "step": 22427 }, { "epoch": 0.29, "grad_norm": 4.356024742126465, "learning_rate": 1.990423470769325e-05, "loss": 2.1445, "step": 22428 }, { "epoch": 0.29, "grad_norm": 4.109136581420898, "learning_rate": 1.9904220199575114e-05, "loss": 1.9977, "step": 22429 }, { "epoch": 0.29, "grad_norm": 3.9922878742218018, "learning_rate": 1.990420569036339e-05, "loss": 2.0333, "step": 22430 }, { "epoch": 0.29, "grad_norm": 4.451341152191162, "learning_rate": 1.9904191180058066e-05, "loss": 2.1135, "step": 22431 }, { "epoch": 0.29, "grad_norm": 4.120941638946533, "learning_rate": 1.9904176668659158e-05, "loss": 2.2678, "step": 22432 }, { "epoch": 0.29, "grad_norm": 4.919968605041504, "learning_rate": 1.9904162156166656e-05, "loss": 2.1048, "step": 22433 }, { "epoch": 0.29, "grad_norm": 4.147128582000732, "learning_rate": 1.990414764258057e-05, "loss": 2.0433, "step": 22434 }, { "epoch": 0.29, "grad_norm": 3.928415298461914, "learning_rate": 1.9904133127900893e-05, "loss": 1.8731, "step": 22435 }, { "epoch": 0.29, "grad_norm": 3.951582908630371, "learning_rate": 1.9904118612127635e-05, "loss": 2.4155, "step": 22436 }, { "epoch": 0.29, "grad_norm": 4.2847795486450195, "learning_rate": 1.9904104095260794e-05, "loss": 2.5559, "step": 22437 }, { "epoch": 0.29, "grad_norm": 3.750776529312134, "learning_rate": 1.9904089577300372e-05, "loss": 1.9861, "step": 22438 }, { "epoch": 0.29, "grad_norm": 4.050950050354004, "learning_rate": 1.990407505824637e-05, "loss": 2.0094, "step": 22439 }, { "epoch": 0.29, "grad_norm": 4.586703777313232, "learning_rate": 1.990406053809879e-05, "loss": 2.5324, "step": 22440 }, { "epoch": 0.29, "grad_norm": 4.538647174835205, "learning_rate": 1.9904046016857633e-05, "loss": 2.2621, "step": 22441 }, { "epoch": 0.29, "grad_norm": 3.9710257053375244, "learning_rate": 1.9904031494522903e-05, "loss": 1.8961, "step": 22442 }, { "epoch": 0.29, "grad_norm": 3.884516716003418, "learning_rate": 1.99040169710946e-05, "loss": 2.2337, "step": 22443 }, { "epoch": 0.29, "grad_norm": 4.063614368438721, "learning_rate": 1.9904002446572724e-05, "loss": 1.8095, "step": 22444 }, { "epoch": 0.29, "grad_norm": 4.751559257507324, "learning_rate": 1.9903987920957282e-05, "loss": 2.5111, "step": 22445 }, { "epoch": 0.29, "grad_norm": 4.03535795211792, "learning_rate": 1.9903973394248267e-05, "loss": 1.8448, "step": 22446 }, { "epoch": 0.29, "grad_norm": 3.4312751293182373, "learning_rate": 1.990395886644569e-05, "loss": 1.5174, "step": 22447 }, { "epoch": 0.29, "grad_norm": 4.831080436706543, "learning_rate": 1.9903944337549543e-05, "loss": 2.6068, "step": 22448 }, { "epoch": 0.29, "grad_norm": 4.111822605133057, "learning_rate": 1.9903929807559833e-05, "loss": 2.1018, "step": 22449 }, { "epoch": 0.29, "grad_norm": 4.2851738929748535, "learning_rate": 1.9903915276476564e-05, "loss": 2.1665, "step": 22450 }, { "epoch": 0.29, "grad_norm": 3.857243299484253, "learning_rate": 1.9903900744299734e-05, "loss": 2.2062, "step": 22451 }, { "epoch": 0.29, "grad_norm": 3.838982582092285, "learning_rate": 1.9903886211029344e-05, "loss": 1.8114, "step": 22452 }, { "epoch": 0.29, "grad_norm": 4.704464912414551, "learning_rate": 1.9903871676665397e-05, "loss": 2.3678, "step": 22453 }, { "epoch": 0.29, "grad_norm": 4.389005661010742, "learning_rate": 1.9903857141207897e-05, "loss": 2.0043, "step": 22454 }, { "epoch": 0.29, "grad_norm": 4.140627861022949, "learning_rate": 1.990384260465684e-05, "loss": 1.6606, "step": 22455 }, { "epoch": 0.29, "grad_norm": 4.498624801635742, "learning_rate": 1.9903828067012233e-05, "loss": 2.3373, "step": 22456 }, { "epoch": 0.29, "grad_norm": 3.8495078086853027, "learning_rate": 1.9903813528274073e-05, "loss": 1.7695, "step": 22457 }, { "epoch": 0.29, "grad_norm": 3.5706427097320557, "learning_rate": 1.9903798988442366e-05, "loss": 1.8264, "step": 22458 }, { "epoch": 0.29, "grad_norm": 4.038104057312012, "learning_rate": 1.990378444751711e-05, "loss": 2.2808, "step": 22459 }, { "epoch": 0.29, "grad_norm": 3.911740303039551, "learning_rate": 1.9903769905498308e-05, "loss": 1.7822, "step": 22460 }, { "epoch": 0.29, "grad_norm": 4.5115509033203125, "learning_rate": 1.9903755362385965e-05, "loss": 2.1554, "step": 22461 }, { "epoch": 0.29, "grad_norm": 4.146066665649414, "learning_rate": 1.9903740818180078e-05, "loss": 2.2227, "step": 22462 }, { "epoch": 0.29, "grad_norm": 5.376228332519531, "learning_rate": 1.9903726272880648e-05, "loss": 2.7904, "step": 22463 }, { "epoch": 0.29, "grad_norm": 3.8494601249694824, "learning_rate": 1.990371172648768e-05, "loss": 1.9888, "step": 22464 }, { "epoch": 0.29, "grad_norm": 3.782505512237549, "learning_rate": 1.9903697179001175e-05, "loss": 2.0863, "step": 22465 }, { "epoch": 0.29, "grad_norm": 4.062546730041504, "learning_rate": 1.990368263042113e-05, "loss": 2.1582, "step": 22466 }, { "epoch": 0.29, "grad_norm": 4.250208854675293, "learning_rate": 1.9903668080747556e-05, "loss": 2.2526, "step": 22467 }, { "epoch": 0.29, "grad_norm": 3.8748133182525635, "learning_rate": 1.9903653529980443e-05, "loss": 1.6745, "step": 22468 }, { "epoch": 0.29, "grad_norm": 4.015970706939697, "learning_rate": 1.9903638978119804e-05, "loss": 2.0048, "step": 22469 }, { "epoch": 0.29, "grad_norm": 3.958099365234375, "learning_rate": 1.9903624425165633e-05, "loss": 1.9518, "step": 22470 }, { "epoch": 0.29, "grad_norm": 3.9463982582092285, "learning_rate": 1.9903609871117934e-05, "loss": 1.8061, "step": 22471 }, { "epoch": 0.29, "grad_norm": 4.422111511230469, "learning_rate": 1.9903595315976706e-05, "loss": 1.9225, "step": 22472 }, { "epoch": 0.29, "grad_norm": 3.8802196979522705, "learning_rate": 1.990358075974196e-05, "loss": 2.039, "step": 22473 }, { "epoch": 0.29, "grad_norm": 3.951509475708008, "learning_rate": 1.9903566202413685e-05, "loss": 1.6801, "step": 22474 }, { "epoch": 0.29, "grad_norm": 4.479893207550049, "learning_rate": 1.990355164399189e-05, "loss": 2.5544, "step": 22475 }, { "epoch": 0.29, "grad_norm": 3.6150259971618652, "learning_rate": 1.9903537084476572e-05, "loss": 1.8525, "step": 22476 }, { "epoch": 0.29, "grad_norm": 4.264878749847412, "learning_rate": 1.990352252386774e-05, "loss": 2.3847, "step": 22477 }, { "epoch": 0.29, "grad_norm": 4.672395706176758, "learning_rate": 1.9903507962165388e-05, "loss": 2.1445, "step": 22478 }, { "epoch": 0.29, "grad_norm": 4.077519416809082, "learning_rate": 1.9903493399369524e-05, "loss": 2.2926, "step": 22479 }, { "epoch": 0.29, "grad_norm": 3.4125850200653076, "learning_rate": 1.9903478835480143e-05, "loss": 1.7547, "step": 22480 }, { "epoch": 0.29, "grad_norm": 4.323334217071533, "learning_rate": 1.990346427049725e-05, "loss": 2.3156, "step": 22481 }, { "epoch": 0.29, "grad_norm": 4.0173516273498535, "learning_rate": 1.9903449704420846e-05, "loss": 2.2712, "step": 22482 }, { "epoch": 0.29, "grad_norm": 4.1757073402404785, "learning_rate": 1.9903435137250934e-05, "loss": 2.1789, "step": 22483 }, { "epoch": 0.29, "grad_norm": 3.952683448791504, "learning_rate": 1.9903420568987516e-05, "loss": 2.0061, "step": 22484 }, { "epoch": 0.29, "grad_norm": 4.51522970199585, "learning_rate": 1.990340599963059e-05, "loss": 2.6467, "step": 22485 }, { "epoch": 0.29, "grad_norm": 3.7823498249053955, "learning_rate": 1.9903391429180165e-05, "loss": 1.9133, "step": 22486 }, { "epoch": 0.29, "grad_norm": 3.8301806449890137, "learning_rate": 1.9903376857636232e-05, "loss": 1.6598, "step": 22487 }, { "epoch": 0.29, "grad_norm": 4.269881725311279, "learning_rate": 1.9903362284998803e-05, "loss": 1.8215, "step": 22488 }, { "epoch": 0.29, "grad_norm": 4.1130146980285645, "learning_rate": 1.9903347711267872e-05, "loss": 2.2302, "step": 22489 }, { "epoch": 0.29, "grad_norm": 4.264496803283691, "learning_rate": 1.990333313644344e-05, "loss": 2.083, "step": 22490 }, { "epoch": 0.29, "grad_norm": 3.28243350982666, "learning_rate": 1.990331856052552e-05, "loss": 1.6384, "step": 22491 }, { "epoch": 0.29, "grad_norm": 4.246237277984619, "learning_rate": 1.99033039835141e-05, "loss": 2.2914, "step": 22492 }, { "epoch": 0.29, "grad_norm": 4.226983070373535, "learning_rate": 1.990328940540919e-05, "loss": 2.3149, "step": 22493 }, { "epoch": 0.29, "grad_norm": 3.6702723503112793, "learning_rate": 1.9903274826210785e-05, "loss": 1.8796, "step": 22494 }, { "epoch": 0.29, "grad_norm": 4.027608394622803, "learning_rate": 1.9903260245918893e-05, "loss": 1.9851, "step": 22495 }, { "epoch": 0.29, "grad_norm": 4.5042524337768555, "learning_rate": 1.990324566453351e-05, "loss": 2.1751, "step": 22496 }, { "epoch": 0.29, "grad_norm": 4.333140850067139, "learning_rate": 1.9903231082054647e-05, "loss": 2.17, "step": 22497 }, { "epoch": 0.29, "grad_norm": 3.411984443664551, "learning_rate": 1.9903216498482294e-05, "loss": 1.8671, "step": 22498 }, { "epoch": 0.29, "grad_norm": 4.046570777893066, "learning_rate": 1.990320191381646e-05, "loss": 2.1066, "step": 22499 }, { "epoch": 0.29, "grad_norm": 4.97055196762085, "learning_rate": 1.9903187328057142e-05, "loss": 2.8131, "step": 22500 }, { "epoch": 0.29, "grad_norm": 3.3406827449798584, "learning_rate": 1.990317274120435e-05, "loss": 1.6771, "step": 22501 }, { "epoch": 0.29, "grad_norm": 3.825166940689087, "learning_rate": 1.9903158153258074e-05, "loss": 1.9226, "step": 22502 }, { "epoch": 0.29, "grad_norm": 3.4568448066711426, "learning_rate": 1.9903143564218323e-05, "loss": 1.4679, "step": 22503 }, { "epoch": 0.29, "grad_norm": 4.52194356918335, "learning_rate": 1.9903128974085097e-05, "loss": 1.999, "step": 22504 }, { "epoch": 0.29, "grad_norm": 4.189008712768555, "learning_rate": 1.99031143828584e-05, "loss": 2.0864, "step": 22505 }, { "epoch": 0.29, "grad_norm": 3.9414727687835693, "learning_rate": 1.9903099790538228e-05, "loss": 1.8407, "step": 22506 }, { "epoch": 0.29, "grad_norm": 4.513892650604248, "learning_rate": 1.990308519712459e-05, "loss": 2.4661, "step": 22507 }, { "epoch": 0.29, "grad_norm": 3.482795476913452, "learning_rate": 1.9903070602617474e-05, "loss": 1.6821, "step": 22508 }, { "epoch": 0.29, "grad_norm": 3.729384422302246, "learning_rate": 1.9903056007016898e-05, "loss": 1.7004, "step": 22509 }, { "epoch": 0.29, "grad_norm": 4.387443542480469, "learning_rate": 1.9903041410322856e-05, "loss": 1.976, "step": 22510 }, { "epoch": 0.29, "grad_norm": 4.201040744781494, "learning_rate": 1.9903026812535352e-05, "loss": 2.0079, "step": 22511 }, { "epoch": 0.29, "grad_norm": 4.3395209312438965, "learning_rate": 1.9903012213654383e-05, "loss": 2.5256, "step": 22512 }, { "epoch": 0.29, "grad_norm": 4.119121551513672, "learning_rate": 1.9902997613679952e-05, "loss": 2.1022, "step": 22513 }, { "epoch": 0.29, "grad_norm": 4.731935501098633, "learning_rate": 1.9902983012612066e-05, "loss": 2.4953, "step": 22514 }, { "epoch": 0.29, "grad_norm": 3.82495379447937, "learning_rate": 1.990296841045072e-05, "loss": 1.8174, "step": 22515 }, { "epoch": 0.29, "grad_norm": 4.263896465301514, "learning_rate": 1.990295380719592e-05, "loss": 2.5879, "step": 22516 }, { "epoch": 0.29, "grad_norm": 3.7943100929260254, "learning_rate": 1.9902939202847666e-05, "loss": 2.12, "step": 22517 }, { "epoch": 0.29, "grad_norm": 4.268696308135986, "learning_rate": 1.9902924597405956e-05, "loss": 2.2619, "step": 22518 }, { "epoch": 0.29, "grad_norm": 3.7233939170837402, "learning_rate": 1.9902909990870797e-05, "loss": 2.1643, "step": 22519 }, { "epoch": 0.29, "grad_norm": 3.912843942642212, "learning_rate": 1.9902895383242193e-05, "loss": 2.1575, "step": 22520 }, { "epoch": 0.29, "grad_norm": 3.74226975440979, "learning_rate": 1.9902880774520138e-05, "loss": 1.9745, "step": 22521 }, { "epoch": 0.29, "grad_norm": 3.748035192489624, "learning_rate": 1.9902866164704634e-05, "loss": 1.8898, "step": 22522 }, { "epoch": 0.29, "grad_norm": 4.08241081237793, "learning_rate": 1.990285155379569e-05, "loss": 2.3902, "step": 22523 }, { "epoch": 0.29, "grad_norm": 4.20372200012207, "learning_rate": 1.99028369417933e-05, "loss": 2.1767, "step": 22524 }, { "epoch": 0.29, "grad_norm": 4.1953582763671875, "learning_rate": 1.990282232869747e-05, "loss": 1.9303, "step": 22525 }, { "epoch": 0.29, "grad_norm": 4.348700046539307, "learning_rate": 1.99028077145082e-05, "loss": 2.3459, "step": 22526 }, { "epoch": 0.29, "grad_norm": 3.59328293800354, "learning_rate": 1.9902793099225494e-05, "loss": 1.6621, "step": 22527 }, { "epoch": 0.29, "grad_norm": 3.8383727073669434, "learning_rate": 1.990277848284935e-05, "loss": 1.7739, "step": 22528 }, { "epoch": 0.29, "grad_norm": 3.552973747253418, "learning_rate": 1.9902763865379773e-05, "loss": 1.6796, "step": 22529 }, { "epoch": 0.29, "grad_norm": 4.233720302581787, "learning_rate": 1.990274924681676e-05, "loss": 2.3027, "step": 22530 }, { "epoch": 0.29, "grad_norm": 4.028425693511963, "learning_rate": 1.990273462716032e-05, "loss": 2.3144, "step": 22531 }, { "epoch": 0.29, "grad_norm": 4.42658805847168, "learning_rate": 1.9902720006410447e-05, "loss": 2.0784, "step": 22532 }, { "epoch": 0.29, "grad_norm": 4.009160995483398, "learning_rate": 1.9902705384567143e-05, "loss": 2.077, "step": 22533 }, { "epoch": 0.29, "grad_norm": 3.512862205505371, "learning_rate": 1.9902690761630418e-05, "loss": 1.8826, "step": 22534 }, { "epoch": 0.29, "grad_norm": 3.934640645980835, "learning_rate": 1.9902676137600264e-05, "loss": 2.246, "step": 22535 }, { "epoch": 0.29, "grad_norm": 3.630826950073242, "learning_rate": 1.9902661512476687e-05, "loss": 2.1877, "step": 22536 }, { "epoch": 0.29, "grad_norm": 3.947176694869995, "learning_rate": 1.9902646886259687e-05, "loss": 1.8864, "step": 22537 }, { "epoch": 0.29, "grad_norm": 3.9898297786712646, "learning_rate": 1.990263225894927e-05, "loss": 1.9383, "step": 22538 }, { "epoch": 0.29, "grad_norm": 3.8136556148529053, "learning_rate": 1.9902617630545432e-05, "loss": 1.9334, "step": 22539 }, { "epoch": 0.29, "grad_norm": 3.7647976875305176, "learning_rate": 1.9902603001048176e-05, "loss": 2.0113, "step": 22540 }, { "epoch": 0.29, "grad_norm": 3.444396734237671, "learning_rate": 1.990258837045751e-05, "loss": 1.4965, "step": 22541 }, { "epoch": 0.29, "grad_norm": 4.2235541343688965, "learning_rate": 1.9902573738773427e-05, "loss": 2.5525, "step": 22542 }, { "epoch": 0.29, "grad_norm": 4.2847371101379395, "learning_rate": 1.990255910599593e-05, "loss": 1.739, "step": 22543 }, { "epoch": 0.29, "grad_norm": 4.237553596496582, "learning_rate": 1.9902544472125026e-05, "loss": 2.1112, "step": 22544 }, { "epoch": 0.29, "grad_norm": 3.9481277465820312, "learning_rate": 1.9902529837160708e-05, "loss": 2.0879, "step": 22545 }, { "epoch": 0.29, "grad_norm": 4.466728210449219, "learning_rate": 1.9902515201102988e-05, "loss": 2.2111, "step": 22546 }, { "epoch": 0.29, "grad_norm": 4.294085502624512, "learning_rate": 1.990250056395186e-05, "loss": 2.2265, "step": 22547 }, { "epoch": 0.29, "grad_norm": 4.737196445465088, "learning_rate": 1.990248592570733e-05, "loss": 2.6526, "step": 22548 }, { "epoch": 0.29, "grad_norm": 3.8720757961273193, "learning_rate": 1.9902471286369397e-05, "loss": 1.8992, "step": 22549 }, { "epoch": 0.29, "grad_norm": 3.8893845081329346, "learning_rate": 1.990245664593806e-05, "loss": 2.0265, "step": 22550 }, { "epoch": 0.29, "grad_norm": 4.188279628753662, "learning_rate": 1.9902442004413326e-05, "loss": 2.5971, "step": 22551 }, { "epoch": 0.29, "grad_norm": 3.786895990371704, "learning_rate": 1.9902427361795194e-05, "loss": 2.0101, "step": 22552 }, { "epoch": 0.29, "grad_norm": 3.752112865447998, "learning_rate": 1.9902412718083667e-05, "loss": 1.9472, "step": 22553 }, { "epoch": 0.29, "grad_norm": 3.764587163925171, "learning_rate": 1.9902398073278744e-05, "loss": 1.9747, "step": 22554 }, { "epoch": 0.29, "grad_norm": 4.448166370391846, "learning_rate": 1.9902383427380426e-05, "loss": 2.2719, "step": 22555 }, { "epoch": 0.29, "grad_norm": 3.678802728652954, "learning_rate": 1.9902368780388724e-05, "loss": 1.844, "step": 22556 }, { "epoch": 0.29, "grad_norm": 4.003427505493164, "learning_rate": 1.9902354132303628e-05, "loss": 2.2071, "step": 22557 }, { "epoch": 0.29, "grad_norm": 3.9902291297912598, "learning_rate": 1.9902339483125144e-05, "loss": 2.3353, "step": 22558 }, { "epoch": 0.29, "grad_norm": 4.087562561035156, "learning_rate": 1.9902324832853273e-05, "loss": 2.2138, "step": 22559 }, { "epoch": 0.29, "grad_norm": 3.8327982425689697, "learning_rate": 1.990231018148802e-05, "loss": 2.2296, "step": 22560 }, { "epoch": 0.29, "grad_norm": 4.33453893661499, "learning_rate": 1.990229552902938e-05, "loss": 1.9761, "step": 22561 }, { "epoch": 0.29, "grad_norm": 3.942652463912964, "learning_rate": 1.9902280875477363e-05, "loss": 2.022, "step": 22562 }, { "epoch": 0.29, "grad_norm": 4.483393669128418, "learning_rate": 1.9902266220831967e-05, "loss": 2.3957, "step": 22563 }, { "epoch": 0.29, "grad_norm": 4.82244873046875, "learning_rate": 1.9902251565093186e-05, "loss": 2.5878, "step": 22564 }, { "epoch": 0.29, "grad_norm": 4.868110656738281, "learning_rate": 1.9902236908261035e-05, "loss": 2.8104, "step": 22565 }, { "epoch": 0.29, "grad_norm": 3.817756414413452, "learning_rate": 1.9902222250335503e-05, "loss": 2.0973, "step": 22566 }, { "epoch": 0.29, "grad_norm": 3.694862127304077, "learning_rate": 1.9902207591316604e-05, "loss": 2.139, "step": 22567 }, { "epoch": 0.29, "grad_norm": 4.610971927642822, "learning_rate": 1.9902192931204328e-05, "loss": 2.3972, "step": 22568 }, { "epoch": 0.29, "grad_norm": 3.727687358856201, "learning_rate": 1.9902178269998688e-05, "loss": 1.9913, "step": 22569 }, { "epoch": 0.29, "grad_norm": 3.226125717163086, "learning_rate": 1.9902163607699674e-05, "loss": 1.5211, "step": 22570 }, { "epoch": 0.29, "grad_norm": 3.9283437728881836, "learning_rate": 1.9902148944307297e-05, "loss": 1.9722, "step": 22571 }, { "epoch": 0.29, "grad_norm": 4.501183032989502, "learning_rate": 1.990213427982155e-05, "loss": 2.5239, "step": 22572 }, { "epoch": 0.29, "grad_norm": 3.694277048110962, "learning_rate": 1.9902119614242444e-05, "loss": 2.0585, "step": 22573 }, { "epoch": 0.29, "grad_norm": 3.646078586578369, "learning_rate": 1.9902104947569973e-05, "loss": 1.8776, "step": 22574 }, { "epoch": 0.29, "grad_norm": 4.240541934967041, "learning_rate": 1.9902090279804144e-05, "loss": 1.8426, "step": 22575 }, { "epoch": 0.29, "grad_norm": 3.594836473464966, "learning_rate": 1.9902075610944955e-05, "loss": 1.977, "step": 22576 }, { "epoch": 0.29, "grad_norm": 4.34425687789917, "learning_rate": 1.990206094099241e-05, "loss": 1.8713, "step": 22577 }, { "epoch": 0.29, "grad_norm": 3.985898971557617, "learning_rate": 1.990204626994651e-05, "loss": 1.7764, "step": 22578 }, { "epoch": 0.29, "grad_norm": 4.462373733520508, "learning_rate": 1.9902031597807253e-05, "loss": 2.1029, "step": 22579 }, { "epoch": 0.29, "grad_norm": 4.143810272216797, "learning_rate": 1.9902016924574643e-05, "loss": 2.1262, "step": 22580 }, { "epoch": 0.29, "grad_norm": 4.323856830596924, "learning_rate": 1.9902002250248687e-05, "loss": 2.361, "step": 22581 }, { "epoch": 0.29, "grad_norm": 4.473555088043213, "learning_rate": 1.9901987574829377e-05, "loss": 2.2021, "step": 22582 }, { "epoch": 0.29, "grad_norm": 4.091012001037598, "learning_rate": 1.9901972898316723e-05, "loss": 1.9724, "step": 22583 }, { "epoch": 0.29, "grad_norm": 3.8373000621795654, "learning_rate": 1.9901958220710723e-05, "loss": 2.3603, "step": 22584 }, { "epoch": 0.29, "grad_norm": 4.040380954742432, "learning_rate": 1.9901943542011376e-05, "loss": 1.9374, "step": 22585 }, { "epoch": 0.29, "grad_norm": 4.13870096206665, "learning_rate": 1.9901928862218693e-05, "loss": 1.9142, "step": 22586 }, { "epoch": 0.29, "grad_norm": 4.06416654586792, "learning_rate": 1.9901914181332663e-05, "loss": 1.9804, "step": 22587 }, { "epoch": 0.29, "grad_norm": 3.982680320739746, "learning_rate": 1.9901899499353296e-05, "loss": 2.184, "step": 22588 }, { "epoch": 0.29, "grad_norm": 3.6853208541870117, "learning_rate": 1.990188481628059e-05, "loss": 2.1949, "step": 22589 }, { "epoch": 0.29, "grad_norm": 4.39858865737915, "learning_rate": 1.990187013211455e-05, "loss": 2.305, "step": 22590 }, { "epoch": 0.29, "grad_norm": 4.56801176071167, "learning_rate": 1.9901855446855172e-05, "loss": 2.3402, "step": 22591 }, { "epoch": 0.29, "grad_norm": 3.990173816680908, "learning_rate": 1.9901840760502463e-05, "loss": 1.8328, "step": 22592 }, { "epoch": 0.29, "grad_norm": 4.235346794128418, "learning_rate": 1.9901826073056423e-05, "loss": 2.3181, "step": 22593 }, { "epoch": 0.29, "grad_norm": 4.637533187866211, "learning_rate": 1.9901811384517057e-05, "loss": 2.2488, "step": 22594 }, { "epoch": 0.29, "grad_norm": 4.118386268615723, "learning_rate": 1.9901796694884358e-05, "loss": 2.0289, "step": 22595 }, { "epoch": 0.29, "grad_norm": 3.7413790225982666, "learning_rate": 1.9901782004158333e-05, "loss": 1.9681, "step": 22596 }, { "epoch": 0.29, "grad_norm": 3.9640049934387207, "learning_rate": 1.9901767312338984e-05, "loss": 1.8087, "step": 22597 }, { "epoch": 0.29, "grad_norm": 3.992236375808716, "learning_rate": 1.9901752619426316e-05, "loss": 2.2962, "step": 22598 }, { "epoch": 0.29, "grad_norm": 3.967738389968872, "learning_rate": 1.990173792542032e-05, "loss": 1.8664, "step": 22599 }, { "epoch": 0.29, "grad_norm": 5.869726657867432, "learning_rate": 1.9901723230321007e-05, "loss": 2.4816, "step": 22600 }, { "epoch": 0.29, "grad_norm": 3.7453114986419678, "learning_rate": 1.9901708534128377e-05, "loss": 1.7917, "step": 22601 }, { "epoch": 0.29, "grad_norm": 4.109940528869629, "learning_rate": 1.990169383684243e-05, "loss": 2.1985, "step": 22602 }, { "epoch": 0.29, "grad_norm": 4.004821300506592, "learning_rate": 1.9901679138463167e-05, "loss": 2.1013, "step": 22603 }, { "epoch": 0.29, "grad_norm": 3.7363269329071045, "learning_rate": 1.9901664438990588e-05, "loss": 1.7966, "step": 22604 }, { "epoch": 0.29, "grad_norm": 3.548610210418701, "learning_rate": 1.9901649738424706e-05, "loss": 1.8508, "step": 22605 }, { "epoch": 0.29, "grad_norm": 3.635870933532715, "learning_rate": 1.9901635036765504e-05, "loss": 2.5395, "step": 22606 }, { "epoch": 0.29, "grad_norm": 3.9333457946777344, "learning_rate": 1.9901620334013e-05, "loss": 2.2699, "step": 22607 }, { "epoch": 0.29, "grad_norm": 3.655778408050537, "learning_rate": 1.9901605630167186e-05, "loss": 1.9109, "step": 22608 }, { "epoch": 0.29, "grad_norm": 3.9042160511016846, "learning_rate": 1.9901590925228066e-05, "loss": 1.9882, "step": 22609 }, { "epoch": 0.29, "grad_norm": 3.846623659133911, "learning_rate": 1.9901576219195644e-05, "loss": 2.0736, "step": 22610 }, { "epoch": 0.29, "grad_norm": 4.021834850311279, "learning_rate": 1.990156151206992e-05, "loss": 2.0463, "step": 22611 }, { "epoch": 0.29, "grad_norm": 4.027489185333252, "learning_rate": 1.9901546803850898e-05, "loss": 1.9471, "step": 22612 }, { "epoch": 0.29, "grad_norm": 3.972775936126709, "learning_rate": 1.9901532094538574e-05, "loss": 1.8521, "step": 22613 }, { "epoch": 0.29, "grad_norm": 3.90144419670105, "learning_rate": 1.990151738413295e-05, "loss": 2.0454, "step": 22614 }, { "epoch": 0.29, "grad_norm": 4.629220008850098, "learning_rate": 1.9901502672634035e-05, "loss": 2.1543, "step": 22615 }, { "epoch": 0.29, "grad_norm": 3.877772092819214, "learning_rate": 1.9901487960041827e-05, "loss": 1.7556, "step": 22616 }, { "epoch": 0.29, "grad_norm": 3.8021178245544434, "learning_rate": 1.9901473246356323e-05, "loss": 1.7725, "step": 22617 }, { "epoch": 0.29, "grad_norm": 5.473092079162598, "learning_rate": 1.990145853157753e-05, "loss": 2.253, "step": 22618 }, { "epoch": 0.29, "grad_norm": 4.304577827453613, "learning_rate": 1.990144381570545e-05, "loss": 1.9893, "step": 22619 }, { "epoch": 0.29, "grad_norm": 3.9288034439086914, "learning_rate": 1.9901429098740082e-05, "loss": 2.0879, "step": 22620 }, { "epoch": 0.29, "grad_norm": 4.241372108459473, "learning_rate": 1.990141438068143e-05, "loss": 2.1699, "step": 22621 }, { "epoch": 0.29, "grad_norm": 4.148128986358643, "learning_rate": 1.990139966152949e-05, "loss": 2.1771, "step": 22622 }, { "epoch": 0.29, "grad_norm": 3.80818772315979, "learning_rate": 1.9901384941284267e-05, "loss": 2.3008, "step": 22623 }, { "epoch": 0.29, "grad_norm": 3.648974895477295, "learning_rate": 1.9901370219945762e-05, "loss": 1.6674, "step": 22624 }, { "epoch": 0.29, "grad_norm": 4.210301876068115, "learning_rate": 1.9901355497513983e-05, "loss": 2.3747, "step": 22625 }, { "epoch": 0.29, "grad_norm": 3.842353105545044, "learning_rate": 1.9901340773988924e-05, "loss": 1.8829, "step": 22626 }, { "epoch": 0.29, "grad_norm": 3.578197956085205, "learning_rate": 1.990132604937059e-05, "loss": 1.8221, "step": 22627 }, { "epoch": 0.29, "grad_norm": 5.805032253265381, "learning_rate": 1.990131132365898e-05, "loss": 1.9565, "step": 22628 }, { "epoch": 0.29, "grad_norm": 4.404558181762695, "learning_rate": 1.9901296596854097e-05, "loss": 2.3103, "step": 22629 }, { "epoch": 0.29, "grad_norm": 4.120184421539307, "learning_rate": 1.9901281868955942e-05, "loss": 1.7374, "step": 22630 }, { "epoch": 0.29, "grad_norm": 3.779583215713501, "learning_rate": 1.990126713996452e-05, "loss": 2.0776, "step": 22631 }, { "epoch": 0.29, "grad_norm": 3.9271440505981445, "learning_rate": 1.990125240987983e-05, "loss": 2.1986, "step": 22632 }, { "epoch": 0.29, "grad_norm": 4.072139263153076, "learning_rate": 1.9901237678701872e-05, "loss": 2.2587, "step": 22633 }, { "epoch": 0.29, "grad_norm": 6.449601650238037, "learning_rate": 1.9901222946430653e-05, "loss": 2.6572, "step": 22634 }, { "epoch": 0.29, "grad_norm": 4.106854438781738, "learning_rate": 1.9901208213066164e-05, "loss": 2.1757, "step": 22635 }, { "epoch": 0.29, "grad_norm": 4.087186813354492, "learning_rate": 1.990119347860842e-05, "loss": 2.2173, "step": 22636 }, { "epoch": 0.29, "grad_norm": 3.6459944248199463, "learning_rate": 1.9901178743057415e-05, "loss": 1.8667, "step": 22637 }, { "epoch": 0.29, "grad_norm": 3.9433302879333496, "learning_rate": 1.990116400641315e-05, "loss": 1.8592, "step": 22638 }, { "epoch": 0.29, "grad_norm": 4.589236736297607, "learning_rate": 1.990114926867563e-05, "loss": 2.159, "step": 22639 }, { "epoch": 0.29, "grad_norm": 3.3466522693634033, "learning_rate": 1.9901134529844856e-05, "loss": 1.9323, "step": 22640 }, { "epoch": 0.29, "grad_norm": 4.057908535003662, "learning_rate": 1.9901119789920826e-05, "loss": 2.1344, "step": 22641 }, { "epoch": 0.29, "grad_norm": 3.834012031555176, "learning_rate": 1.9901105048903547e-05, "loss": 1.6475, "step": 22642 }, { "epoch": 0.29, "grad_norm": 4.607934951782227, "learning_rate": 1.9901090306793017e-05, "loss": 2.5561, "step": 22643 }, { "epoch": 0.29, "grad_norm": 4.339140892028809, "learning_rate": 1.9901075563589238e-05, "loss": 2.0051, "step": 22644 }, { "epoch": 0.29, "grad_norm": 3.8267300128936768, "learning_rate": 1.9901060819292214e-05, "loss": 2.0426, "step": 22645 }, { "epoch": 0.29, "grad_norm": 4.381044864654541, "learning_rate": 1.9901046073901945e-05, "loss": 1.8774, "step": 22646 }, { "epoch": 0.29, "grad_norm": 4.654969692230225, "learning_rate": 1.990103132741843e-05, "loss": 1.8981, "step": 22647 }, { "epoch": 0.29, "grad_norm": 4.5175371170043945, "learning_rate": 1.9901016579841674e-05, "loss": 2.8619, "step": 22648 }, { "epoch": 0.29, "grad_norm": 4.2603840827941895, "learning_rate": 1.9901001831171677e-05, "loss": 1.8085, "step": 22649 }, { "epoch": 0.29, "grad_norm": 3.2212438583374023, "learning_rate": 1.9900987081408444e-05, "loss": 1.7503, "step": 22650 }, { "epoch": 0.29, "grad_norm": 3.781653881072998, "learning_rate": 1.9900972330551973e-05, "loss": 1.9195, "step": 22651 }, { "epoch": 0.29, "grad_norm": 3.779012680053711, "learning_rate": 1.9900957578602267e-05, "loss": 2.1927, "step": 22652 }, { "epoch": 0.29, "grad_norm": 4.030139923095703, "learning_rate": 1.9900942825559326e-05, "loss": 2.0778, "step": 22653 }, { "epoch": 0.29, "grad_norm": 4.068667411804199, "learning_rate": 1.9900928071423154e-05, "loss": 2.2129, "step": 22654 }, { "epoch": 0.29, "grad_norm": 4.2415056228637695, "learning_rate": 1.9900913316193754e-05, "loss": 2.2001, "step": 22655 }, { "epoch": 0.29, "grad_norm": 4.737491607666016, "learning_rate": 1.990089855987112e-05, "loss": 2.04, "step": 22656 }, { "epoch": 0.29, "grad_norm": 4.010738372802734, "learning_rate": 1.990088380245526e-05, "loss": 2.3008, "step": 22657 }, { "epoch": 0.29, "grad_norm": 3.7082111835479736, "learning_rate": 1.990086904394618e-05, "loss": 2.1179, "step": 22658 }, { "epoch": 0.29, "grad_norm": 4.236144542694092, "learning_rate": 1.990085428434387e-05, "loss": 2.1097, "step": 22659 }, { "epoch": 0.29, "grad_norm": 4.507319450378418, "learning_rate": 1.990083952364834e-05, "loss": 2.3463, "step": 22660 }, { "epoch": 0.29, "grad_norm": 4.288296222686768, "learning_rate": 1.990082476185959e-05, "loss": 2.5055, "step": 22661 }, { "epoch": 0.29, "grad_norm": 3.8880231380462646, "learning_rate": 1.990080999897762e-05, "loss": 2.1442, "step": 22662 }, { "epoch": 0.29, "grad_norm": 3.6892309188842773, "learning_rate": 1.990079523500243e-05, "loss": 1.9403, "step": 22663 }, { "epoch": 0.29, "grad_norm": 3.782371759414673, "learning_rate": 1.9900780469934028e-05, "loss": 2.0725, "step": 22664 }, { "epoch": 0.29, "grad_norm": 4.218002796173096, "learning_rate": 1.990076570377241e-05, "loss": 1.8757, "step": 22665 }, { "epoch": 0.29, "grad_norm": 3.7725937366485596, "learning_rate": 1.990075093651758e-05, "loss": 1.9785, "step": 22666 }, { "epoch": 0.29, "grad_norm": 4.249769687652588, "learning_rate": 1.9900736168169538e-05, "loss": 2.0224, "step": 22667 }, { "epoch": 0.29, "grad_norm": 3.6818184852600098, "learning_rate": 1.990072139872829e-05, "loss": 1.744, "step": 22668 }, { "epoch": 0.29, "grad_norm": 4.407542705535889, "learning_rate": 1.9900706628193834e-05, "loss": 1.995, "step": 22669 }, { "epoch": 0.29, "grad_norm": 4.085696697235107, "learning_rate": 1.990069185656617e-05, "loss": 2.2258, "step": 22670 }, { "epoch": 0.29, "grad_norm": 4.317594051361084, "learning_rate": 1.9900677083845302e-05, "loss": 3.0131, "step": 22671 }, { "epoch": 0.29, "grad_norm": 4.718000888824463, "learning_rate": 1.990066231003123e-05, "loss": 2.725, "step": 22672 }, { "epoch": 0.29, "grad_norm": 4.158336162567139, "learning_rate": 1.990064753512396e-05, "loss": 2.1605, "step": 22673 }, { "epoch": 0.29, "grad_norm": 4.5376973152160645, "learning_rate": 1.990063275912349e-05, "loss": 2.2432, "step": 22674 }, { "epoch": 0.29, "grad_norm": 3.167050361633301, "learning_rate": 1.990061798202982e-05, "loss": 1.5429, "step": 22675 }, { "epoch": 0.29, "grad_norm": 3.698974847793579, "learning_rate": 1.9900603203842953e-05, "loss": 2.002, "step": 22676 }, { "epoch": 0.29, "grad_norm": 4.406347274780273, "learning_rate": 1.9900588424562892e-05, "loss": 2.2882, "step": 22677 }, { "epoch": 0.29, "grad_norm": 4.21183967590332, "learning_rate": 1.9900573644189644e-05, "loss": 1.9487, "step": 22678 }, { "epoch": 0.29, "grad_norm": 4.207836627960205, "learning_rate": 1.9900558862723195e-05, "loss": 2.477, "step": 22679 }, { "epoch": 0.29, "grad_norm": 3.8547115325927734, "learning_rate": 1.990054408016356e-05, "loss": 1.8571, "step": 22680 }, { "epoch": 0.29, "grad_norm": 3.9497599601745605, "learning_rate": 1.9900529296510744e-05, "loss": 1.9874, "step": 22681 }, { "epoch": 0.29, "grad_norm": 4.010037899017334, "learning_rate": 1.9900514511764735e-05, "loss": 2.2586, "step": 22682 }, { "epoch": 0.29, "grad_norm": 4.057958126068115, "learning_rate": 1.9900499725925543e-05, "loss": 2.5291, "step": 22683 }, { "epoch": 0.29, "grad_norm": 4.0731000900268555, "learning_rate": 1.9900484938993166e-05, "loss": 2.0787, "step": 22684 }, { "epoch": 0.29, "grad_norm": 3.694578170776367, "learning_rate": 1.990047015096761e-05, "loss": 2.0349, "step": 22685 }, { "epoch": 0.29, "grad_norm": 4.238864898681641, "learning_rate": 1.990045536184887e-05, "loss": 1.9004, "step": 22686 }, { "epoch": 0.29, "grad_norm": 3.9539811611175537, "learning_rate": 1.9900440571636955e-05, "loss": 1.9547, "step": 22687 }, { "epoch": 0.29, "grad_norm": 3.7656211853027344, "learning_rate": 1.9900425780331862e-05, "loss": 1.8743, "step": 22688 }, { "epoch": 0.29, "grad_norm": 4.258091449737549, "learning_rate": 1.9900410987933596e-05, "loss": 2.5643, "step": 22689 }, { "epoch": 0.29, "grad_norm": 3.5152840614318848, "learning_rate": 1.9900396194442155e-05, "loss": 1.9808, "step": 22690 }, { "epoch": 0.29, "grad_norm": 3.584587335586548, "learning_rate": 1.9900381399857544e-05, "loss": 1.4664, "step": 22691 }, { "epoch": 0.29, "grad_norm": 3.98486328125, "learning_rate": 1.990036660417976e-05, "loss": 2.1377, "step": 22692 }, { "epoch": 0.29, "grad_norm": 3.639859437942505, "learning_rate": 1.990035180740881e-05, "loss": 1.8007, "step": 22693 }, { "epoch": 0.29, "grad_norm": 4.170965671539307, "learning_rate": 1.9900337009544694e-05, "loss": 2.3857, "step": 22694 }, { "epoch": 0.29, "grad_norm": 3.9850196838378906, "learning_rate": 1.990032221058741e-05, "loss": 1.9497, "step": 22695 }, { "epoch": 0.29, "grad_norm": 4.022523880004883, "learning_rate": 1.9900307410536963e-05, "loss": 1.9861, "step": 22696 }, { "epoch": 0.29, "grad_norm": 3.7563841342926025, "learning_rate": 1.9900292609393356e-05, "loss": 1.8656, "step": 22697 }, { "epoch": 0.29, "grad_norm": 3.8567099571228027, "learning_rate": 1.9900277807156588e-05, "loss": 2.0918, "step": 22698 }, { "epoch": 0.29, "grad_norm": 4.160699844360352, "learning_rate": 1.990026300382666e-05, "loss": 2.0868, "step": 22699 }, { "epoch": 0.29, "grad_norm": 4.296678066253662, "learning_rate": 1.9900248199403574e-05, "loss": 2.284, "step": 22700 }, { "epoch": 0.29, "grad_norm": 4.2766923904418945, "learning_rate": 1.9900233393887336e-05, "loss": 2.1124, "step": 22701 }, { "epoch": 0.29, "grad_norm": 4.15187931060791, "learning_rate": 1.9900218587277943e-05, "loss": 2.51, "step": 22702 }, { "epoch": 0.29, "grad_norm": 4.5024333000183105, "learning_rate": 1.99002037795754e-05, "loss": 2.1618, "step": 22703 }, { "epoch": 0.29, "grad_norm": 4.085465908050537, "learning_rate": 1.9900188970779704e-05, "loss": 2.0293, "step": 22704 }, { "epoch": 0.29, "grad_norm": 4.321706295013428, "learning_rate": 1.990017416089086e-05, "loss": 2.4358, "step": 22705 }, { "epoch": 0.29, "grad_norm": 3.78143048286438, "learning_rate": 1.9900159349908868e-05, "loss": 2.416, "step": 22706 }, { "epoch": 0.29, "grad_norm": 3.7998533248901367, "learning_rate": 1.990014453783373e-05, "loss": 1.5934, "step": 22707 }, { "epoch": 0.29, "grad_norm": 4.345265865325928, "learning_rate": 1.990012972466545e-05, "loss": 2.5031, "step": 22708 }, { "epoch": 0.29, "grad_norm": 3.78206467628479, "learning_rate": 1.9900114910404027e-05, "loss": 2.0424, "step": 22709 }, { "epoch": 0.29, "grad_norm": 3.6422157287597656, "learning_rate": 1.9900100095049463e-05, "loss": 1.9736, "step": 22710 }, { "epoch": 0.29, "grad_norm": 4.290879726409912, "learning_rate": 1.990008527860176e-05, "loss": 2.0067, "step": 22711 }, { "epoch": 0.29, "grad_norm": 4.306687355041504, "learning_rate": 1.990007046106092e-05, "loss": 2.3194, "step": 22712 }, { "epoch": 0.29, "grad_norm": 3.553280830383301, "learning_rate": 1.990005564242694e-05, "loss": 1.9673, "step": 22713 }, { "epoch": 0.29, "grad_norm": 3.7098684310913086, "learning_rate": 1.9900040822699834e-05, "loss": 1.8569, "step": 22714 }, { "epoch": 0.29, "grad_norm": 3.227306604385376, "learning_rate": 1.990002600187959e-05, "loss": 1.3558, "step": 22715 }, { "epoch": 0.29, "grad_norm": 4.163387775421143, "learning_rate": 1.9900011179966216e-05, "loss": 2.2336, "step": 22716 }, { "epoch": 0.29, "grad_norm": 3.8241395950317383, "learning_rate": 1.9899996356959713e-05, "loss": 1.9356, "step": 22717 }, { "epoch": 0.29, "grad_norm": 4.2053985595703125, "learning_rate": 1.9899981532860083e-05, "loss": 2.7724, "step": 22718 }, { "epoch": 0.29, "grad_norm": 3.434328556060791, "learning_rate": 1.9899966707667326e-05, "loss": 1.9187, "step": 22719 }, { "epoch": 0.29, "grad_norm": 4.481021404266357, "learning_rate": 1.9899951881381446e-05, "loss": 2.2114, "step": 22720 }, { "epoch": 0.29, "grad_norm": 3.729898691177368, "learning_rate": 1.9899937054002443e-05, "loss": 1.9466, "step": 22721 }, { "epoch": 0.29, "grad_norm": 4.504248142242432, "learning_rate": 1.989992222553032e-05, "loss": 2.333, "step": 22722 }, { "epoch": 0.29, "grad_norm": 5.15822696685791, "learning_rate": 1.9899907395965075e-05, "loss": 3.0364, "step": 22723 }, { "epoch": 0.29, "grad_norm": 4.218759059906006, "learning_rate": 1.9899892565306716e-05, "loss": 1.9591, "step": 22724 }, { "epoch": 0.29, "grad_norm": 5.084462642669678, "learning_rate": 1.9899877733555238e-05, "loss": 2.4446, "step": 22725 }, { "epoch": 0.29, "grad_norm": 4.068542003631592, "learning_rate": 1.9899862900710647e-05, "loss": 1.9261, "step": 22726 }, { "epoch": 0.29, "grad_norm": 3.958455801010132, "learning_rate": 1.9899848066772942e-05, "loss": 2.5964, "step": 22727 }, { "epoch": 0.29, "grad_norm": 4.276625633239746, "learning_rate": 1.9899833231742125e-05, "loss": 2.2421, "step": 22728 }, { "epoch": 0.29, "grad_norm": 4.466207027435303, "learning_rate": 1.98998183956182e-05, "loss": 2.4793, "step": 22729 }, { "epoch": 0.29, "grad_norm": 4.050022602081299, "learning_rate": 1.989980355840117e-05, "loss": 2.1799, "step": 22730 }, { "epoch": 0.29, "grad_norm": 3.5312047004699707, "learning_rate": 1.9899788720091026e-05, "loss": 1.4478, "step": 22731 }, { "epoch": 0.3, "grad_norm": 3.9855785369873047, "learning_rate": 1.9899773880687784e-05, "loss": 2.0082, "step": 22732 }, { "epoch": 0.3, "grad_norm": 4.550546169281006, "learning_rate": 1.9899759040191435e-05, "loss": 2.6425, "step": 22733 }, { "epoch": 0.3, "grad_norm": 4.242201328277588, "learning_rate": 1.9899744198601988e-05, "loss": 1.975, "step": 22734 }, { "epoch": 0.3, "grad_norm": 4.482921600341797, "learning_rate": 1.989972935591944e-05, "loss": 2.0856, "step": 22735 }, { "epoch": 0.3, "grad_norm": 3.570641040802002, "learning_rate": 1.9899714512143794e-05, "loss": 1.7454, "step": 22736 }, { "epoch": 0.3, "grad_norm": 4.379983901977539, "learning_rate": 1.9899699667275052e-05, "loss": 2.2197, "step": 22737 }, { "epoch": 0.3, "grad_norm": 4.063460826873779, "learning_rate": 1.9899684821313213e-05, "loss": 1.8642, "step": 22738 }, { "epoch": 0.3, "grad_norm": 3.7635767459869385, "learning_rate": 1.9899669974258282e-05, "loss": 2.258, "step": 22739 }, { "epoch": 0.3, "grad_norm": 3.6961328983306885, "learning_rate": 1.989965512611026e-05, "loss": 2.0254, "step": 22740 }, { "epoch": 0.3, "grad_norm": 4.173260688781738, "learning_rate": 1.989964027686915e-05, "loss": 1.8652, "step": 22741 }, { "epoch": 0.3, "grad_norm": 4.379106044769287, "learning_rate": 1.989962542653495e-05, "loss": 2.095, "step": 22742 }, { "epoch": 0.3, "grad_norm": 3.8286638259887695, "learning_rate": 1.9899610575107662e-05, "loss": 2.1953, "step": 22743 }, { "epoch": 0.3, "grad_norm": 4.140234470367432, "learning_rate": 1.989959572258729e-05, "loss": 1.9643, "step": 22744 }, { "epoch": 0.3, "grad_norm": 3.629068613052368, "learning_rate": 1.9899580868973835e-05, "loss": 2.119, "step": 22745 }, { "epoch": 0.3, "grad_norm": 3.811823844909668, "learning_rate": 1.9899566014267296e-05, "loss": 2.0072, "step": 22746 }, { "epoch": 0.3, "grad_norm": 4.583179473876953, "learning_rate": 1.9899551158467682e-05, "loss": 2.8061, "step": 22747 }, { "epoch": 0.3, "grad_norm": 5.467411041259766, "learning_rate": 1.9899536301574988e-05, "loss": 2.7577, "step": 22748 }, { "epoch": 0.3, "grad_norm": 3.9541494846343994, "learning_rate": 1.9899521443589215e-05, "loss": 1.9202, "step": 22749 }, { "epoch": 0.3, "grad_norm": 4.311159610748291, "learning_rate": 1.9899506584510367e-05, "loss": 1.7021, "step": 22750 }, { "epoch": 0.3, "grad_norm": 3.84806227684021, "learning_rate": 1.9899491724338446e-05, "loss": 1.7599, "step": 22751 }, { "epoch": 0.3, "grad_norm": 3.8913187980651855, "learning_rate": 1.9899476863073453e-05, "loss": 1.9477, "step": 22752 }, { "epoch": 0.3, "grad_norm": 3.8687222003936768, "learning_rate": 1.989946200071539e-05, "loss": 2.2194, "step": 22753 }, { "epoch": 0.3, "grad_norm": 4.006970405578613, "learning_rate": 1.989944713726426e-05, "loss": 2.1309, "step": 22754 }, { "epoch": 0.3, "grad_norm": 4.00167179107666, "learning_rate": 1.9899432272720064e-05, "loss": 2.3753, "step": 22755 }, { "epoch": 0.3, "grad_norm": 4.118442058563232, "learning_rate": 1.98994174070828e-05, "loss": 2.2614, "step": 22756 }, { "epoch": 0.3, "grad_norm": 4.135845184326172, "learning_rate": 1.989940254035247e-05, "loss": 1.7966, "step": 22757 }, { "epoch": 0.3, "grad_norm": 4.426350116729736, "learning_rate": 1.9899387672529085e-05, "loss": 2.6937, "step": 22758 }, { "epoch": 0.3, "grad_norm": 4.215397834777832, "learning_rate": 1.9899372803612637e-05, "loss": 1.9746, "step": 22759 }, { "epoch": 0.3, "grad_norm": 3.77254056930542, "learning_rate": 1.9899357933603127e-05, "loss": 1.9511, "step": 22760 }, { "epoch": 0.3, "grad_norm": 3.4670629501342773, "learning_rate": 1.989934306250056e-05, "loss": 1.7718, "step": 22761 }, { "epoch": 0.3, "grad_norm": 4.024878978729248, "learning_rate": 1.989932819030494e-05, "loss": 1.9933, "step": 22762 }, { "epoch": 0.3, "grad_norm": 3.712921619415283, "learning_rate": 1.9899313317016267e-05, "loss": 2.0463, "step": 22763 }, { "epoch": 0.3, "grad_norm": 4.265034198760986, "learning_rate": 1.989929844263454e-05, "loss": 2.3946, "step": 22764 }, { "epoch": 0.3, "grad_norm": 4.2830986976623535, "learning_rate": 1.9899283567159764e-05, "loss": 2.1558, "step": 22765 }, { "epoch": 0.3, "grad_norm": 4.2436203956604, "learning_rate": 1.989926869059194e-05, "loss": 2.3828, "step": 22766 }, { "epoch": 0.3, "grad_norm": 4.206227779388428, "learning_rate": 1.9899253812931064e-05, "loss": 2.2666, "step": 22767 }, { "epoch": 0.3, "grad_norm": 4.326817512512207, "learning_rate": 1.989923893417715e-05, "loss": 1.9193, "step": 22768 }, { "epoch": 0.3, "grad_norm": 4.0533857345581055, "learning_rate": 1.9899224054330184e-05, "loss": 2.1775, "step": 22769 }, { "epoch": 0.3, "grad_norm": 4.446964263916016, "learning_rate": 1.989920917339018e-05, "loss": 2.2962, "step": 22770 }, { "epoch": 0.3, "grad_norm": 3.457583427429199, "learning_rate": 1.9899194291357133e-05, "loss": 1.5691, "step": 22771 }, { "epoch": 0.3, "grad_norm": 3.91308331489563, "learning_rate": 1.989917940823105e-05, "loss": 2.1715, "step": 22772 }, { "epoch": 0.3, "grad_norm": 4.448330879211426, "learning_rate": 1.9899164524011928e-05, "loss": 1.9867, "step": 22773 }, { "epoch": 0.3, "grad_norm": 4.1286396980285645, "learning_rate": 1.989914963869977e-05, "loss": 2.1258, "step": 22774 }, { "epoch": 0.3, "grad_norm": 4.1932573318481445, "learning_rate": 1.989913475229458e-05, "loss": 1.8946, "step": 22775 }, { "epoch": 0.3, "grad_norm": 3.832974433898926, "learning_rate": 1.9899119864796356e-05, "loss": 1.7723, "step": 22776 }, { "epoch": 0.3, "grad_norm": 3.500572919845581, "learning_rate": 1.98991049762051e-05, "loss": 1.8361, "step": 22777 }, { "epoch": 0.3, "grad_norm": 4.179625034332275, "learning_rate": 1.9899090086520817e-05, "loss": 1.9324, "step": 22778 }, { "epoch": 0.3, "grad_norm": 4.205900192260742, "learning_rate": 1.9899075195743506e-05, "loss": 2.2521, "step": 22779 }, { "epoch": 0.3, "grad_norm": 4.254006862640381, "learning_rate": 1.989906030387317e-05, "loss": 1.7752, "step": 22780 }, { "epoch": 0.3, "grad_norm": 4.599991321563721, "learning_rate": 1.9899045410909808e-05, "loss": 1.9513, "step": 22781 }, { "epoch": 0.3, "grad_norm": 3.6994123458862305, "learning_rate": 1.9899030516853425e-05, "loss": 1.9105, "step": 22782 }, { "epoch": 0.3, "grad_norm": 4.2698869705200195, "learning_rate": 1.989901562170402e-05, "loss": 1.6439, "step": 22783 }, { "epoch": 0.3, "grad_norm": 3.9213814735412598, "learning_rate": 1.9899000725461595e-05, "loss": 1.9203, "step": 22784 }, { "epoch": 0.3, "grad_norm": 3.6346240043640137, "learning_rate": 1.9898985828126155e-05, "loss": 1.988, "step": 22785 }, { "epoch": 0.3, "grad_norm": 4.038201808929443, "learning_rate": 1.9898970929697696e-05, "loss": 2.3207, "step": 22786 }, { "epoch": 0.3, "grad_norm": 4.20891809463501, "learning_rate": 1.9898956030176223e-05, "loss": 1.816, "step": 22787 }, { "epoch": 0.3, "grad_norm": 4.052426338195801, "learning_rate": 1.989894112956174e-05, "loss": 2.4385, "step": 22788 }, { "epoch": 0.3, "grad_norm": 5.231398105621338, "learning_rate": 1.9898926227854246e-05, "loss": 2.6689, "step": 22789 }, { "epoch": 0.3, "grad_norm": 4.374009609222412, "learning_rate": 1.989891132505374e-05, "loss": 2.0903, "step": 22790 }, { "epoch": 0.3, "grad_norm": 4.2088727951049805, "learning_rate": 1.9898896421160224e-05, "loss": 2.1429, "step": 22791 }, { "epoch": 0.3, "grad_norm": 3.8151352405548096, "learning_rate": 1.9898881516173707e-05, "loss": 1.9572, "step": 22792 }, { "epoch": 0.3, "grad_norm": 3.652862548828125, "learning_rate": 1.9898866610094183e-05, "loss": 1.8912, "step": 22793 }, { "epoch": 0.3, "grad_norm": 4.645118713378906, "learning_rate": 1.9898851702921658e-05, "loss": 2.271, "step": 22794 }, { "epoch": 0.3, "grad_norm": 3.479020118713379, "learning_rate": 1.989883679465613e-05, "loss": 2.0014, "step": 22795 }, { "epoch": 0.3, "grad_norm": 3.9848108291625977, "learning_rate": 1.9898821885297604e-05, "loss": 2.0235, "step": 22796 }, { "epoch": 0.3, "grad_norm": 3.625302314758301, "learning_rate": 1.989880697484608e-05, "loss": 1.9636, "step": 22797 }, { "epoch": 0.3, "grad_norm": 3.4824838638305664, "learning_rate": 1.989879206330156e-05, "loss": 1.7301, "step": 22798 }, { "epoch": 0.3, "grad_norm": 4.134695053100586, "learning_rate": 1.9898777150664044e-05, "loss": 2.1617, "step": 22799 }, { "epoch": 0.3, "grad_norm": 3.679398775100708, "learning_rate": 1.9898762236933536e-05, "loss": 1.8354, "step": 22800 }, { "epoch": 0.3, "grad_norm": 4.052846908569336, "learning_rate": 1.9898747322110033e-05, "loss": 2.0937, "step": 22801 }, { "epoch": 0.3, "grad_norm": 4.226553916931152, "learning_rate": 1.9898732406193546e-05, "loss": 2.1461, "step": 22802 }, { "epoch": 0.3, "grad_norm": 4.615499019622803, "learning_rate": 1.9898717489184068e-05, "loss": 2.5599, "step": 22803 }, { "epoch": 0.3, "grad_norm": 3.927400588989258, "learning_rate": 1.9898702571081606e-05, "loss": 2.202, "step": 22804 }, { "epoch": 0.3, "grad_norm": 4.872286319732666, "learning_rate": 1.9898687651886156e-05, "loss": 2.0354, "step": 22805 }, { "epoch": 0.3, "grad_norm": 4.631285667419434, "learning_rate": 1.9898672731597725e-05, "loss": 2.4569, "step": 22806 }, { "epoch": 0.3, "grad_norm": 4.533773899078369, "learning_rate": 1.989865781021631e-05, "loss": 2.401, "step": 22807 }, { "epoch": 0.3, "grad_norm": 4.961392879486084, "learning_rate": 1.9898642887741918e-05, "loss": 2.2335, "step": 22808 }, { "epoch": 0.3, "grad_norm": 3.646592855453491, "learning_rate": 1.9898627964174548e-05, "loss": 2.117, "step": 22809 }, { "epoch": 0.3, "grad_norm": 4.687611103057861, "learning_rate": 1.98986130395142e-05, "loss": 2.2258, "step": 22810 }, { "epoch": 0.3, "grad_norm": 4.131224632263184, "learning_rate": 1.989859811376088e-05, "loss": 2.3523, "step": 22811 }, { "epoch": 0.3, "grad_norm": 3.905419111251831, "learning_rate": 1.9898583186914584e-05, "loss": 2.0399, "step": 22812 }, { "epoch": 0.3, "grad_norm": 4.04146146774292, "learning_rate": 1.9898568258975318e-05, "loss": 1.8771, "step": 22813 }, { "epoch": 0.3, "grad_norm": 4.1466498374938965, "learning_rate": 1.989855332994308e-05, "loss": 1.8424, "step": 22814 }, { "epoch": 0.3, "grad_norm": 3.962754964828491, "learning_rate": 1.9898538399817874e-05, "loss": 2.1841, "step": 22815 }, { "epoch": 0.3, "grad_norm": 4.003161430358887, "learning_rate": 1.9898523468599703e-05, "loss": 1.8649, "step": 22816 }, { "epoch": 0.3, "grad_norm": 4.1598615646362305, "learning_rate": 1.9898508536288568e-05, "loss": 2.8348, "step": 22817 }, { "epoch": 0.3, "grad_norm": 4.548873424530029, "learning_rate": 1.989849360288447e-05, "loss": 2.0591, "step": 22818 }, { "epoch": 0.3, "grad_norm": 4.38447904586792, "learning_rate": 1.9898478668387406e-05, "loss": 2.4014, "step": 22819 }, { "epoch": 0.3, "grad_norm": 4.031786918640137, "learning_rate": 1.9898463732797386e-05, "loss": 2.2931, "step": 22820 }, { "epoch": 0.3, "grad_norm": 3.844984292984009, "learning_rate": 1.9898448796114406e-05, "loss": 2.1694, "step": 22821 }, { "epoch": 0.3, "grad_norm": 4.44429874420166, "learning_rate": 1.989843385833847e-05, "loss": 2.5914, "step": 22822 }, { "epoch": 0.3, "grad_norm": 3.7768824100494385, "learning_rate": 1.989841891946958e-05, "loss": 1.6603, "step": 22823 }, { "epoch": 0.3, "grad_norm": 4.129742622375488, "learning_rate": 1.9898403979507735e-05, "loss": 2.1896, "step": 22824 }, { "epoch": 0.3, "grad_norm": 4.210737228393555, "learning_rate": 1.989838903845294e-05, "loss": 2.5934, "step": 22825 }, { "epoch": 0.3, "grad_norm": 3.8611505031585693, "learning_rate": 1.9898374096305194e-05, "loss": 1.9089, "step": 22826 }, { "epoch": 0.3, "grad_norm": 4.651010990142822, "learning_rate": 1.98983591530645e-05, "loss": 2.6464, "step": 22827 }, { "epoch": 0.3, "grad_norm": 4.397029399871826, "learning_rate": 1.9898344208730856e-05, "loss": 1.9868, "step": 22828 }, { "epoch": 0.3, "grad_norm": 4.615394115447998, "learning_rate": 1.989832926330427e-05, "loss": 2.0067, "step": 22829 }, { "epoch": 0.3, "grad_norm": 4.066532135009766, "learning_rate": 1.989831431678474e-05, "loss": 2.0863, "step": 22830 }, { "epoch": 0.3, "grad_norm": 3.401912212371826, "learning_rate": 1.9898299369172266e-05, "loss": 1.5667, "step": 22831 }, { "epoch": 0.3, "grad_norm": 3.891514539718628, "learning_rate": 1.9898284420466855e-05, "loss": 1.8353, "step": 22832 }, { "epoch": 0.3, "grad_norm": 4.335697174072266, "learning_rate": 1.9898269470668505e-05, "loss": 2.2318, "step": 22833 }, { "epoch": 0.3, "grad_norm": 3.9988009929656982, "learning_rate": 1.9898254519777218e-05, "loss": 1.9965, "step": 22834 }, { "epoch": 0.3, "grad_norm": 3.511762857437134, "learning_rate": 1.9898239567792993e-05, "loss": 1.7079, "step": 22835 }, { "epoch": 0.3, "grad_norm": 4.148287773132324, "learning_rate": 1.9898224614715836e-05, "loss": 1.8208, "step": 22836 }, { "epoch": 0.3, "grad_norm": 4.6666741371154785, "learning_rate": 1.9898209660545752e-05, "loss": 2.457, "step": 22837 }, { "epoch": 0.3, "grad_norm": 4.284521102905273, "learning_rate": 1.989819470528273e-05, "loss": 2.0514, "step": 22838 }, { "epoch": 0.3, "grad_norm": 3.7914018630981445, "learning_rate": 1.9898179748926784e-05, "loss": 2.1759, "step": 22839 }, { "epoch": 0.3, "grad_norm": 4.249617576599121, "learning_rate": 1.989816479147791e-05, "loss": 2.6742, "step": 22840 }, { "epoch": 0.3, "grad_norm": 3.745137929916382, "learning_rate": 1.9898149832936112e-05, "loss": 2.0744, "step": 22841 }, { "epoch": 0.3, "grad_norm": 3.863024950027466, "learning_rate": 1.9898134873301385e-05, "loss": 1.6958, "step": 22842 }, { "epoch": 0.3, "grad_norm": 3.8614275455474854, "learning_rate": 1.989811991257374e-05, "loss": 2.0185, "step": 22843 }, { "epoch": 0.3, "grad_norm": 3.515799045562744, "learning_rate": 1.9898104950753178e-05, "loss": 1.9822, "step": 22844 }, { "epoch": 0.3, "grad_norm": 4.350600242614746, "learning_rate": 1.989808998783969e-05, "loss": 2.1223, "step": 22845 }, { "epoch": 0.3, "grad_norm": 4.058485507965088, "learning_rate": 1.989807502383329e-05, "loss": 2.4521, "step": 22846 }, { "epoch": 0.3, "grad_norm": 3.7884128093719482, "learning_rate": 1.9898060058733975e-05, "loss": 1.7502, "step": 22847 }, { "epoch": 0.3, "grad_norm": 3.6964449882507324, "learning_rate": 1.9898045092541746e-05, "loss": 1.9451, "step": 22848 }, { "epoch": 0.3, "grad_norm": 4.063676357269287, "learning_rate": 1.98980301252566e-05, "loss": 1.9009, "step": 22849 }, { "epoch": 0.3, "grad_norm": 3.5847878456115723, "learning_rate": 1.9898015156878548e-05, "loss": 2.2357, "step": 22850 }, { "epoch": 0.3, "grad_norm": 4.658271312713623, "learning_rate": 1.9898000187407586e-05, "loss": 2.8123, "step": 22851 }, { "epoch": 0.3, "grad_norm": 3.852295398712158, "learning_rate": 1.9897985216843718e-05, "loss": 1.9944, "step": 22852 }, { "epoch": 0.3, "grad_norm": 3.7281060218811035, "learning_rate": 1.989797024518694e-05, "loss": 2.05, "step": 22853 }, { "epoch": 0.3, "grad_norm": 4.173577785491943, "learning_rate": 1.9897955272437267e-05, "loss": 1.98, "step": 22854 }, { "epoch": 0.3, "grad_norm": 3.9427053928375244, "learning_rate": 1.9897940298594683e-05, "loss": 2.0934, "step": 22855 }, { "epoch": 0.3, "grad_norm": 3.7262051105499268, "learning_rate": 1.9897925323659204e-05, "loss": 2.2644, "step": 22856 }, { "epoch": 0.3, "grad_norm": 3.617427349090576, "learning_rate": 1.989791034763082e-05, "loss": 2.0064, "step": 22857 }, { "epoch": 0.3, "grad_norm": 3.6459646224975586, "learning_rate": 1.9897895370509546e-05, "loss": 1.7331, "step": 22858 }, { "epoch": 0.3, "grad_norm": 3.9055607318878174, "learning_rate": 1.9897880392295374e-05, "loss": 2.1858, "step": 22859 }, { "epoch": 0.3, "grad_norm": 4.3060808181762695, "learning_rate": 1.9897865412988307e-05, "loss": 2.0499, "step": 22860 }, { "epoch": 0.3, "grad_norm": 3.964092969894409, "learning_rate": 1.9897850432588347e-05, "loss": 2.088, "step": 22861 }, { "epoch": 0.3, "grad_norm": 4.389913558959961, "learning_rate": 1.9897835451095497e-05, "loss": 2.3607, "step": 22862 }, { "epoch": 0.3, "grad_norm": 4.025681018829346, "learning_rate": 1.9897820468509762e-05, "loss": 2.1572, "step": 22863 }, { "epoch": 0.3, "grad_norm": 4.143309593200684, "learning_rate": 1.9897805484831134e-05, "loss": 2.2672, "step": 22864 }, { "epoch": 0.3, "grad_norm": 3.571173667907715, "learning_rate": 1.9897790500059623e-05, "loss": 1.4511, "step": 22865 }, { "epoch": 0.3, "grad_norm": 3.6387109756469727, "learning_rate": 1.989777551419523e-05, "loss": 1.7748, "step": 22866 }, { "epoch": 0.3, "grad_norm": 4.398543834686279, "learning_rate": 1.989776052723795e-05, "loss": 2.0247, "step": 22867 }, { "epoch": 0.3, "grad_norm": 3.650055170059204, "learning_rate": 1.989774553918779e-05, "loss": 1.8201, "step": 22868 }, { "epoch": 0.3, "grad_norm": 3.8351962566375732, "learning_rate": 1.9897730550044754e-05, "loss": 1.6636, "step": 22869 }, { "epoch": 0.3, "grad_norm": 3.6743922233581543, "learning_rate": 1.989771555980884e-05, "loss": 2.0142, "step": 22870 }, { "epoch": 0.3, "grad_norm": 3.580045461654663, "learning_rate": 1.9897700568480048e-05, "loss": 1.5802, "step": 22871 }, { "epoch": 0.3, "grad_norm": 3.898430347442627, "learning_rate": 1.9897685576058386e-05, "loss": 2.3083, "step": 22872 }, { "epoch": 0.3, "grad_norm": 3.8597452640533447, "learning_rate": 1.989767058254385e-05, "loss": 1.8297, "step": 22873 }, { "epoch": 0.3, "grad_norm": 3.855341672897339, "learning_rate": 1.9897655587936442e-05, "loss": 1.9412, "step": 22874 }, { "epoch": 0.3, "grad_norm": 4.3914594650268555, "learning_rate": 1.9897640592236164e-05, "loss": 2.4178, "step": 22875 }, { "epoch": 0.3, "grad_norm": 4.2754387855529785, "learning_rate": 1.989762559544302e-05, "loss": 2.4471, "step": 22876 }, { "epoch": 0.3, "grad_norm": 3.7388839721679688, "learning_rate": 1.9897610597557012e-05, "loss": 1.9637, "step": 22877 }, { "epoch": 0.3, "grad_norm": 3.723116874694824, "learning_rate": 1.9897595598578133e-05, "loss": 2.0664, "step": 22878 }, { "epoch": 0.3, "grad_norm": 4.09039306640625, "learning_rate": 1.98975805985064e-05, "loss": 1.8711, "step": 22879 }, { "epoch": 0.3, "grad_norm": 4.489072322845459, "learning_rate": 1.9897565597341802e-05, "loss": 2.5881, "step": 22880 }, { "epoch": 0.3, "grad_norm": 4.180746078491211, "learning_rate": 1.9897550595084346e-05, "loss": 2.4593, "step": 22881 }, { "epoch": 0.3, "grad_norm": 3.761134147644043, "learning_rate": 1.989753559173403e-05, "loss": 1.7321, "step": 22882 }, { "epoch": 0.3, "grad_norm": 3.8843135833740234, "learning_rate": 1.9897520587290862e-05, "loss": 2.4826, "step": 22883 }, { "epoch": 0.3, "grad_norm": 4.066295146942139, "learning_rate": 1.9897505581754836e-05, "loss": 2.1067, "step": 22884 }, { "epoch": 0.3, "grad_norm": 3.7584187984466553, "learning_rate": 1.989749057512596e-05, "loss": 2.0916, "step": 22885 }, { "epoch": 0.3, "grad_norm": 4.700049877166748, "learning_rate": 1.9897475567404233e-05, "loss": 2.3826, "step": 22886 }, { "epoch": 0.3, "grad_norm": 3.8083906173706055, "learning_rate": 1.9897460558589658e-05, "loss": 2.0184, "step": 22887 }, { "epoch": 0.3, "grad_norm": 4.4102959632873535, "learning_rate": 1.9897445548682234e-05, "loss": 2.4662, "step": 22888 }, { "epoch": 0.3, "grad_norm": 3.6177985668182373, "learning_rate": 1.9897430537681962e-05, "loss": 1.8542, "step": 22889 }, { "epoch": 0.3, "grad_norm": 3.2855982780456543, "learning_rate": 1.9897415525588848e-05, "loss": 1.801, "step": 22890 }, { "epoch": 0.3, "grad_norm": 3.898862361907959, "learning_rate": 1.989740051240289e-05, "loss": 2.1861, "step": 22891 }, { "epoch": 0.3, "grad_norm": 4.267538547515869, "learning_rate": 1.9897385498124094e-05, "loss": 2.4377, "step": 22892 }, { "epoch": 0.3, "grad_norm": 3.2024402618408203, "learning_rate": 1.9897370482752457e-05, "loss": 1.5532, "step": 22893 }, { "epoch": 0.3, "grad_norm": 3.9705283641815186, "learning_rate": 1.989735546628798e-05, "loss": 1.7687, "step": 22894 }, { "epoch": 0.3, "grad_norm": 4.026398181915283, "learning_rate": 1.9897340448730672e-05, "loss": 2.1769, "step": 22895 }, { "epoch": 0.3, "grad_norm": 4.29892635345459, "learning_rate": 1.9897325430080527e-05, "loss": 2.2103, "step": 22896 }, { "epoch": 0.3, "grad_norm": 3.9720137119293213, "learning_rate": 1.9897310410337546e-05, "loss": 2.1333, "step": 22897 }, { "epoch": 0.3, "grad_norm": 3.733607530593872, "learning_rate": 1.9897295389501738e-05, "loss": 1.5903, "step": 22898 }, { "epoch": 0.3, "grad_norm": 3.9231369495391846, "learning_rate": 1.98972803675731e-05, "loss": 1.7875, "step": 22899 }, { "epoch": 0.3, "grad_norm": 4.225621700286865, "learning_rate": 1.9897265344551635e-05, "loss": 2.0424, "step": 22900 }, { "epoch": 0.3, "grad_norm": 4.3470001220703125, "learning_rate": 1.9897250320437345e-05, "loss": 2.4347, "step": 22901 }, { "epoch": 0.3, "grad_norm": 4.007780075073242, "learning_rate": 1.9897235295230226e-05, "loss": 2.1521, "step": 22902 }, { "epoch": 0.3, "grad_norm": 4.186593532562256, "learning_rate": 1.989722026893029e-05, "loss": 2.0235, "step": 22903 }, { "epoch": 0.3, "grad_norm": 3.5364909172058105, "learning_rate": 1.989720524153753e-05, "loss": 1.8984, "step": 22904 }, { "epoch": 0.3, "grad_norm": 4.1324143409729, "learning_rate": 1.989719021305195e-05, "loss": 2.3831, "step": 22905 }, { "epoch": 0.3, "grad_norm": 4.152272701263428, "learning_rate": 1.9897175183473555e-05, "loss": 1.8872, "step": 22906 }, { "epoch": 0.3, "grad_norm": 3.742906332015991, "learning_rate": 1.9897160152802342e-05, "loss": 2.0345, "step": 22907 }, { "epoch": 0.3, "grad_norm": 4.012808799743652, "learning_rate": 1.9897145121038314e-05, "loss": 2.0165, "step": 22908 }, { "epoch": 0.3, "grad_norm": 4.2320451736450195, "learning_rate": 1.9897130088181475e-05, "loss": 2.2355, "step": 22909 }, { "epoch": 0.3, "grad_norm": 3.6340644359588623, "learning_rate": 1.9897115054231824e-05, "loss": 1.7391, "step": 22910 }, { "epoch": 0.3, "grad_norm": 3.985154151916504, "learning_rate": 1.989710001918936e-05, "loss": 1.7894, "step": 22911 }, { "epoch": 0.3, "grad_norm": 3.2364277839660645, "learning_rate": 1.9897084983054095e-05, "loss": 1.3235, "step": 22912 }, { "epoch": 0.3, "grad_norm": 3.5190908908843994, "learning_rate": 1.989706994582602e-05, "loss": 1.8818, "step": 22913 }, { "epoch": 0.3, "grad_norm": 4.264517307281494, "learning_rate": 1.989705490750514e-05, "loss": 2.663, "step": 22914 }, { "epoch": 0.3, "grad_norm": 4.134859085083008, "learning_rate": 1.989703986809146e-05, "loss": 2.25, "step": 22915 }, { "epoch": 0.3, "grad_norm": 3.873826265335083, "learning_rate": 1.9897024827584978e-05, "loss": 1.96, "step": 22916 }, { "epoch": 0.3, "grad_norm": 4.366524696350098, "learning_rate": 1.9897009785985694e-05, "loss": 2.1851, "step": 22917 }, { "epoch": 0.3, "grad_norm": 4.119992256164551, "learning_rate": 1.9896994743293617e-05, "loss": 2.4099, "step": 22918 }, { "epoch": 0.3, "grad_norm": 3.6701242923736572, "learning_rate": 1.989697969950874e-05, "loss": 1.9337, "step": 22919 }, { "epoch": 0.3, "grad_norm": 4.094356060028076, "learning_rate": 1.9896964654631068e-05, "loss": 2.0704, "step": 22920 }, { "epoch": 0.3, "grad_norm": 4.498606204986572, "learning_rate": 1.9896949608660606e-05, "loss": 1.7824, "step": 22921 }, { "epoch": 0.3, "grad_norm": 4.167422771453857, "learning_rate": 1.989693456159735e-05, "loss": 2.33, "step": 22922 }, { "epoch": 0.3, "grad_norm": 4.136218547821045, "learning_rate": 1.9896919513441308e-05, "loss": 2.2987, "step": 22923 }, { "epoch": 0.3, "grad_norm": 3.896522045135498, "learning_rate": 1.9896904464192473e-05, "loss": 2.0817, "step": 22924 }, { "epoch": 0.3, "grad_norm": 4.04468297958374, "learning_rate": 1.9896889413850858e-05, "loss": 2.0662, "step": 22925 }, { "epoch": 0.3, "grad_norm": 4.107512950897217, "learning_rate": 1.9896874362416455e-05, "loss": 2.466, "step": 22926 }, { "epoch": 0.3, "grad_norm": 3.4809510707855225, "learning_rate": 1.989685930988927e-05, "loss": 1.5672, "step": 22927 }, { "epoch": 0.3, "grad_norm": 4.354001045227051, "learning_rate": 1.9896844256269303e-05, "loss": 2.1483, "step": 22928 }, { "epoch": 0.3, "grad_norm": 3.8016889095306396, "learning_rate": 1.989682920155656e-05, "loss": 1.9349, "step": 22929 }, { "epoch": 0.3, "grad_norm": 4.491657257080078, "learning_rate": 1.9896814145751033e-05, "loss": 2.5479, "step": 22930 }, { "epoch": 0.3, "grad_norm": 4.321062088012695, "learning_rate": 1.9896799088852736e-05, "loss": 2.3169, "step": 22931 }, { "epoch": 0.3, "grad_norm": 3.907978057861328, "learning_rate": 1.989678403086166e-05, "loss": 2.4945, "step": 22932 }, { "epoch": 0.3, "grad_norm": 3.3596365451812744, "learning_rate": 1.9896768971777812e-05, "loss": 1.5044, "step": 22933 }, { "epoch": 0.3, "grad_norm": 4.1423773765563965, "learning_rate": 1.9896753911601195e-05, "loss": 2.0829, "step": 22934 }, { "epoch": 0.3, "grad_norm": 3.989847421646118, "learning_rate": 1.9896738850331808e-05, "loss": 1.8382, "step": 22935 }, { "epoch": 0.3, "grad_norm": 3.8795254230499268, "learning_rate": 1.9896723787969653e-05, "loss": 2.1984, "step": 22936 }, { "epoch": 0.3, "grad_norm": 4.306225776672363, "learning_rate": 1.9896708724514728e-05, "loss": 2.1744, "step": 22937 }, { "epoch": 0.3, "grad_norm": 4.404286861419678, "learning_rate": 1.9896693659967042e-05, "loss": 2.0242, "step": 22938 }, { "epoch": 0.3, "grad_norm": 3.69258189201355, "learning_rate": 1.9896678594326592e-05, "loss": 1.8901, "step": 22939 }, { "epoch": 0.3, "grad_norm": 3.7465131282806396, "learning_rate": 1.989666352759338e-05, "loss": 1.7987, "step": 22940 }, { "epoch": 0.3, "grad_norm": 4.0006422996521, "learning_rate": 1.989664845976741e-05, "loss": 1.9132, "step": 22941 }, { "epoch": 0.3, "grad_norm": 3.9775688648223877, "learning_rate": 1.9896633390848685e-05, "loss": 2.1161, "step": 22942 }, { "epoch": 0.3, "grad_norm": 3.6308584213256836, "learning_rate": 1.98966183208372e-05, "loss": 2.091, "step": 22943 }, { "epoch": 0.3, "grad_norm": 4.554623603820801, "learning_rate": 1.9896603249732963e-05, "loss": 2.0767, "step": 22944 }, { "epoch": 0.3, "grad_norm": 4.202062606811523, "learning_rate": 1.989658817753597e-05, "loss": 2.1888, "step": 22945 }, { "epoch": 0.3, "grad_norm": 4.152304649353027, "learning_rate": 1.9896573104246227e-05, "loss": 2.1936, "step": 22946 }, { "epoch": 0.3, "grad_norm": 4.171506881713867, "learning_rate": 1.9896558029863736e-05, "loss": 2.4299, "step": 22947 }, { "epoch": 0.3, "grad_norm": 3.564466714859009, "learning_rate": 1.9896542954388495e-05, "loss": 1.804, "step": 22948 }, { "epoch": 0.3, "grad_norm": 3.7849299907684326, "learning_rate": 1.9896527877820507e-05, "loss": 2.1211, "step": 22949 }, { "epoch": 0.3, "grad_norm": 3.740563154220581, "learning_rate": 1.9896512800159775e-05, "loss": 1.9072, "step": 22950 }, { "epoch": 0.3, "grad_norm": 4.3630266189575195, "learning_rate": 1.9896497721406303e-05, "loss": 2.343, "step": 22951 }, { "epoch": 0.3, "grad_norm": 3.9095029830932617, "learning_rate": 1.9896482641560087e-05, "loss": 1.7595, "step": 22952 }, { "epoch": 0.3, "grad_norm": 3.478301525115967, "learning_rate": 1.989646756062113e-05, "loss": 1.8245, "step": 22953 }, { "epoch": 0.3, "grad_norm": 4.02115535736084, "learning_rate": 1.9896452478589435e-05, "loss": 2.0901, "step": 22954 }, { "epoch": 0.3, "grad_norm": 3.7100069522857666, "learning_rate": 1.989643739546501e-05, "loss": 2.1137, "step": 22955 }, { "epoch": 0.3, "grad_norm": 4.169460773468018, "learning_rate": 1.9896422311247845e-05, "loss": 2.28, "step": 22956 }, { "epoch": 0.3, "grad_norm": 4.644205093383789, "learning_rate": 1.989640722593795e-05, "loss": 2.5715, "step": 22957 }, { "epoch": 0.3, "grad_norm": 4.025203227996826, "learning_rate": 1.9896392139535318e-05, "loss": 2.0416, "step": 22958 }, { "epoch": 0.3, "grad_norm": 3.8713622093200684, "learning_rate": 1.9896377052039965e-05, "loss": 2.4439, "step": 22959 }, { "epoch": 0.3, "grad_norm": 3.822122812271118, "learning_rate": 1.9896361963451878e-05, "loss": 2.0695, "step": 22960 }, { "epoch": 0.3, "grad_norm": 3.552396059036255, "learning_rate": 1.9896346873771067e-05, "loss": 1.5478, "step": 22961 }, { "epoch": 0.3, "grad_norm": 3.7205960750579834, "learning_rate": 1.989633178299753e-05, "loss": 1.8132, "step": 22962 }, { "epoch": 0.3, "grad_norm": 3.9853053092956543, "learning_rate": 1.989631669113127e-05, "loss": 2.1512, "step": 22963 }, { "epoch": 0.3, "grad_norm": 5.187310218811035, "learning_rate": 1.989630159817229e-05, "loss": 2.3346, "step": 22964 }, { "epoch": 0.3, "grad_norm": 4.1318583488464355, "learning_rate": 1.989628650412059e-05, "loss": 2.4514, "step": 22965 }, { "epoch": 0.3, "grad_norm": 4.090717315673828, "learning_rate": 1.9896271408976172e-05, "loss": 1.915, "step": 22966 }, { "epoch": 0.3, "grad_norm": 4.120509147644043, "learning_rate": 1.9896256312739036e-05, "loss": 2.3842, "step": 22967 }, { "epoch": 0.3, "grad_norm": 3.7003469467163086, "learning_rate": 1.989624121540919e-05, "loss": 1.5766, "step": 22968 }, { "epoch": 0.3, "grad_norm": 3.6266114711761475, "learning_rate": 1.9896226116986626e-05, "loss": 1.7916, "step": 22969 }, { "epoch": 0.3, "grad_norm": 4.173683166503906, "learning_rate": 1.9896211017471353e-05, "loss": 2.6117, "step": 22970 }, { "epoch": 0.3, "grad_norm": 4.212706565856934, "learning_rate": 1.989619591686337e-05, "loss": 2.4483, "step": 22971 }, { "epoch": 0.3, "grad_norm": 3.9619364738464355, "learning_rate": 1.9896180815162682e-05, "loss": 2.4511, "step": 22972 }, { "epoch": 0.3, "grad_norm": 4.067045211791992, "learning_rate": 1.9896165712369283e-05, "loss": 1.8923, "step": 22973 }, { "epoch": 0.3, "grad_norm": 3.863563060760498, "learning_rate": 1.989615060848318e-05, "loss": 1.9497, "step": 22974 }, { "epoch": 0.3, "grad_norm": 4.017895221710205, "learning_rate": 1.9896135503504376e-05, "loss": 2.0422, "step": 22975 }, { "epoch": 0.3, "grad_norm": 3.7622218132019043, "learning_rate": 1.9896120397432872e-05, "loss": 1.9316, "step": 22976 }, { "epoch": 0.3, "grad_norm": 3.94521427154541, "learning_rate": 1.9896105290268668e-05, "loss": 2.0681, "step": 22977 }, { "epoch": 0.3, "grad_norm": 3.616569995880127, "learning_rate": 1.9896090182011765e-05, "loss": 1.7042, "step": 22978 }, { "epoch": 0.3, "grad_norm": 4.382004261016846, "learning_rate": 1.9896075072662165e-05, "loss": 2.2767, "step": 22979 }, { "epoch": 0.3, "grad_norm": 3.7462592124938965, "learning_rate": 1.9896059962219872e-05, "loss": 2.0864, "step": 22980 }, { "epoch": 0.3, "grad_norm": 4.281938552856445, "learning_rate": 1.9896044850684887e-05, "loss": 2.1748, "step": 22981 }, { "epoch": 0.3, "grad_norm": 3.7992846965789795, "learning_rate": 1.989602973805721e-05, "loss": 2.2483, "step": 22982 }, { "epoch": 0.3, "grad_norm": 4.398818016052246, "learning_rate": 1.989601462433684e-05, "loss": 2.1706, "step": 22983 }, { "epoch": 0.3, "grad_norm": 4.11548376083374, "learning_rate": 1.9895999509523784e-05, "loss": 1.885, "step": 22984 }, { "epoch": 0.3, "grad_norm": 4.361670017242432, "learning_rate": 1.989598439361804e-05, "loss": 2.685, "step": 22985 }, { "epoch": 0.3, "grad_norm": 4.138566970825195, "learning_rate": 1.9895969276619614e-05, "loss": 2.0183, "step": 22986 }, { "epoch": 0.3, "grad_norm": 4.256028175354004, "learning_rate": 1.9895954158528506e-05, "loss": 1.7971, "step": 22987 }, { "epoch": 0.3, "grad_norm": 3.8222124576568604, "learning_rate": 1.9895939039344715e-05, "loss": 2.1128, "step": 22988 }, { "epoch": 0.3, "grad_norm": 4.189845085144043, "learning_rate": 1.9895923919068244e-05, "loss": 2.4107, "step": 22989 }, { "epoch": 0.3, "grad_norm": 3.928586721420288, "learning_rate": 1.9895908797699098e-05, "loss": 2.0856, "step": 22990 }, { "epoch": 0.3, "grad_norm": 3.8601558208465576, "learning_rate": 1.9895893675237272e-05, "loss": 1.7463, "step": 22991 }, { "epoch": 0.3, "grad_norm": 4.378598690032959, "learning_rate": 1.9895878551682774e-05, "loss": 2.5484, "step": 22992 }, { "epoch": 0.3, "grad_norm": 3.5400285720825195, "learning_rate": 1.9895863427035603e-05, "loss": 1.7592, "step": 22993 }, { "epoch": 0.3, "grad_norm": 3.6874823570251465, "learning_rate": 1.989584830129576e-05, "loss": 2.2689, "step": 22994 }, { "epoch": 0.3, "grad_norm": 4.097304344177246, "learning_rate": 1.9895833174463248e-05, "loss": 2.3379, "step": 22995 }, { "epoch": 0.3, "grad_norm": 4.081460475921631, "learning_rate": 1.9895818046538066e-05, "loss": 1.9583, "step": 22996 }, { "epoch": 0.3, "grad_norm": 3.8380978107452393, "learning_rate": 1.9895802917520223e-05, "loss": 2.3149, "step": 22997 }, { "epoch": 0.3, "grad_norm": 3.681065082550049, "learning_rate": 1.9895787787409713e-05, "loss": 2.1242, "step": 22998 }, { "epoch": 0.3, "grad_norm": 4.385785102844238, "learning_rate": 1.9895772656206538e-05, "loss": 2.5583, "step": 22999 }, { "epoch": 0.3, "grad_norm": 4.0796332359313965, "learning_rate": 1.9895757523910704e-05, "loss": 1.9704, "step": 23000 }, { "epoch": 0.3, "grad_norm": 3.710679531097412, "learning_rate": 1.9895742390522207e-05, "loss": 1.6938, "step": 23001 }, { "epoch": 0.3, "grad_norm": 4.043797492980957, "learning_rate": 1.989572725604106e-05, "loss": 2.0426, "step": 23002 }, { "epoch": 0.3, "grad_norm": 3.8604371547698975, "learning_rate": 1.9895712120467248e-05, "loss": 2.3642, "step": 23003 }, { "epoch": 0.3, "grad_norm": 3.8336546421051025, "learning_rate": 1.9895696983800785e-05, "loss": 2.0232, "step": 23004 }, { "epoch": 0.3, "grad_norm": 4.092528820037842, "learning_rate": 1.989568184604167e-05, "loss": 2.3615, "step": 23005 }, { "epoch": 0.3, "grad_norm": 4.337471961975098, "learning_rate": 1.9895666707189902e-05, "loss": 2.4581, "step": 23006 }, { "epoch": 0.3, "grad_norm": 4.782651901245117, "learning_rate": 1.9895651567245486e-05, "loss": 2.6299, "step": 23007 }, { "epoch": 0.3, "grad_norm": 3.979245185852051, "learning_rate": 1.989563642620842e-05, "loss": 1.906, "step": 23008 }, { "epoch": 0.3, "grad_norm": 4.171651363372803, "learning_rate": 1.989562128407871e-05, "loss": 2.2993, "step": 23009 }, { "epoch": 0.3, "grad_norm": 4.501865863800049, "learning_rate": 1.9895606140856357e-05, "loss": 2.46, "step": 23010 }, { "epoch": 0.3, "grad_norm": 4.078638553619385, "learning_rate": 1.989559099654136e-05, "loss": 1.865, "step": 23011 }, { "epoch": 0.3, "grad_norm": 3.7802135944366455, "learning_rate": 1.9895575851133723e-05, "loss": 2.0401, "step": 23012 }, { "epoch": 0.3, "grad_norm": 3.7031478881835938, "learning_rate": 1.989556070463344e-05, "loss": 1.6534, "step": 23013 }, { "epoch": 0.3, "grad_norm": 3.857396364212036, "learning_rate": 1.9895545557040524e-05, "loss": 1.9739, "step": 23014 }, { "epoch": 0.3, "grad_norm": 3.827441692352295, "learning_rate": 1.9895530408354972e-05, "loss": 1.924, "step": 23015 }, { "epoch": 0.3, "grad_norm": 3.7627577781677246, "learning_rate": 1.9895515258576788e-05, "loss": 1.7878, "step": 23016 }, { "epoch": 0.3, "grad_norm": 3.892260789871216, "learning_rate": 1.9895500107705968e-05, "loss": 1.7098, "step": 23017 }, { "epoch": 0.3, "grad_norm": 3.7758610248565674, "learning_rate": 1.9895484955742517e-05, "loss": 1.89, "step": 23018 }, { "epoch": 0.3, "grad_norm": 3.7010090351104736, "learning_rate": 1.9895469802686437e-05, "loss": 1.7235, "step": 23019 }, { "epoch": 0.3, "grad_norm": 4.351327419281006, "learning_rate": 1.9895454648537732e-05, "loss": 2.6522, "step": 23020 }, { "epoch": 0.3, "grad_norm": 4.118782043457031, "learning_rate": 1.98954394932964e-05, "loss": 2.6286, "step": 23021 }, { "epoch": 0.3, "grad_norm": 4.138044834136963, "learning_rate": 1.9895424336962437e-05, "loss": 2.1917, "step": 23022 }, { "epoch": 0.3, "grad_norm": 3.8242316246032715, "learning_rate": 1.9895409179535857e-05, "loss": 1.8852, "step": 23023 }, { "epoch": 0.3, "grad_norm": 4.763054847717285, "learning_rate": 1.9895394021016656e-05, "loss": 1.8309, "step": 23024 }, { "epoch": 0.3, "grad_norm": 4.438569068908691, "learning_rate": 1.9895378861404836e-05, "loss": 2.5086, "step": 23025 }, { "epoch": 0.3, "grad_norm": 4.193953514099121, "learning_rate": 1.9895363700700397e-05, "loss": 2.0805, "step": 23026 }, { "epoch": 0.3, "grad_norm": 4.0299391746521, "learning_rate": 1.989534853890334e-05, "loss": 1.9537, "step": 23027 }, { "epoch": 0.3, "grad_norm": 4.0862321853637695, "learning_rate": 1.9895333376013673e-05, "loss": 1.7766, "step": 23028 }, { "epoch": 0.3, "grad_norm": 4.164355278015137, "learning_rate": 1.989531821203139e-05, "loss": 2.25, "step": 23029 }, { "epoch": 0.3, "grad_norm": 3.8956973552703857, "learning_rate": 1.9895303046956495e-05, "loss": 1.9031, "step": 23030 }, { "epoch": 0.3, "grad_norm": 3.971407175064087, "learning_rate": 1.9895287880788996e-05, "loss": 2.0297, "step": 23031 }, { "epoch": 0.3, "grad_norm": 3.683901071548462, "learning_rate": 1.9895272713528885e-05, "loss": 1.711, "step": 23032 }, { "epoch": 0.3, "grad_norm": 3.506941080093384, "learning_rate": 1.989525754517617e-05, "loss": 2.0374, "step": 23033 }, { "epoch": 0.3, "grad_norm": 3.295764207839966, "learning_rate": 1.9895242375730846e-05, "loss": 1.7362, "step": 23034 }, { "epoch": 0.3, "grad_norm": 3.680387258529663, "learning_rate": 1.9895227205192924e-05, "loss": 1.9069, "step": 23035 }, { "epoch": 0.3, "grad_norm": 4.1255903244018555, "learning_rate": 1.9895212033562405e-05, "loss": 2.0779, "step": 23036 }, { "epoch": 0.3, "grad_norm": 4.353588581085205, "learning_rate": 1.9895196860839277e-05, "loss": 2.0089, "step": 23037 }, { "epoch": 0.3, "grad_norm": 4.578430652618408, "learning_rate": 1.989518168702356e-05, "loss": 2.299, "step": 23038 }, { "epoch": 0.3, "grad_norm": 4.631317615509033, "learning_rate": 1.989516651211524e-05, "loss": 2.5393, "step": 23039 }, { "epoch": 0.3, "grad_norm": 4.5669050216674805, "learning_rate": 1.989515133611433e-05, "loss": 2.5581, "step": 23040 }, { "epoch": 0.3, "grad_norm": 3.863754987716675, "learning_rate": 1.9895136159020828e-05, "loss": 2.264, "step": 23041 }, { "epoch": 0.3, "grad_norm": 4.616299629211426, "learning_rate": 1.9895120980834734e-05, "loss": 2.5155, "step": 23042 }, { "epoch": 0.3, "grad_norm": 4.080574035644531, "learning_rate": 1.989510580155605e-05, "loss": 2.2167, "step": 23043 }, { "epoch": 0.3, "grad_norm": 4.121558666229248, "learning_rate": 1.9895090621184776e-05, "loss": 1.9997, "step": 23044 }, { "epoch": 0.3, "grad_norm": 3.7751662731170654, "learning_rate": 1.9895075439720922e-05, "loss": 2.1341, "step": 23045 }, { "epoch": 0.3, "grad_norm": 3.4938511848449707, "learning_rate": 1.9895060257164477e-05, "loss": 1.7946, "step": 23046 }, { "epoch": 0.3, "grad_norm": 3.718834161758423, "learning_rate": 1.9895045073515455e-05, "loss": 1.9686, "step": 23047 }, { "epoch": 0.3, "grad_norm": 4.258033752441406, "learning_rate": 1.9895029888773848e-05, "loss": 1.8766, "step": 23048 }, { "epoch": 0.3, "grad_norm": 3.853832960128784, "learning_rate": 1.9895014702939666e-05, "loss": 2.0182, "step": 23049 }, { "epoch": 0.3, "grad_norm": 3.4643516540527344, "learning_rate": 1.9894999516012908e-05, "loss": 1.712, "step": 23050 }, { "epoch": 0.3, "grad_norm": 3.610034227371216, "learning_rate": 1.9894984327993568e-05, "loss": 1.8472, "step": 23051 }, { "epoch": 0.3, "grad_norm": 4.313111305236816, "learning_rate": 1.9894969138881657e-05, "loss": 2.157, "step": 23052 }, { "epoch": 0.3, "grad_norm": 4.105673789978027, "learning_rate": 1.9894953948677175e-05, "loss": 2.338, "step": 23053 }, { "epoch": 0.3, "grad_norm": 3.842921257019043, "learning_rate": 1.989493875738012e-05, "loss": 1.7902, "step": 23054 }, { "epoch": 0.3, "grad_norm": 3.989717721939087, "learning_rate": 1.9894923564990493e-05, "loss": 2.046, "step": 23055 }, { "epoch": 0.3, "grad_norm": 4.0995635986328125, "learning_rate": 1.9894908371508305e-05, "loss": 2.0981, "step": 23056 }, { "epoch": 0.3, "grad_norm": 4.139785289764404, "learning_rate": 1.9894893176933547e-05, "loss": 2.3592, "step": 23057 }, { "epoch": 0.3, "grad_norm": 4.214754104614258, "learning_rate": 1.9894877981266228e-05, "loss": 2.3546, "step": 23058 }, { "epoch": 0.3, "grad_norm": 3.682389497756958, "learning_rate": 1.9894862784506345e-05, "loss": 2.1358, "step": 23059 }, { "epoch": 0.3, "grad_norm": 3.506561040878296, "learning_rate": 1.9894847586653898e-05, "loss": 1.6045, "step": 23060 }, { "epoch": 0.3, "grad_norm": 4.366522312164307, "learning_rate": 1.9894832387708897e-05, "loss": 2.2256, "step": 23061 }, { "epoch": 0.3, "grad_norm": 3.6839635372161865, "learning_rate": 1.9894817187671335e-05, "loss": 1.7399, "step": 23062 }, { "epoch": 0.3, "grad_norm": 3.715487003326416, "learning_rate": 1.9894801986541223e-05, "loss": 1.7704, "step": 23063 }, { "epoch": 0.3, "grad_norm": 4.1843695640563965, "learning_rate": 1.989478678431855e-05, "loss": 2.1271, "step": 23064 }, { "epoch": 0.3, "grad_norm": 4.341441631317139, "learning_rate": 1.9894771581003326e-05, "loss": 2.4409, "step": 23065 }, { "epoch": 0.3, "grad_norm": 4.321254730224609, "learning_rate": 1.9894756376595555e-05, "loss": 1.9568, "step": 23066 }, { "epoch": 0.3, "grad_norm": 3.900662660598755, "learning_rate": 1.989474117109523e-05, "loss": 1.965, "step": 23067 }, { "epoch": 0.3, "grad_norm": 4.36685848236084, "learning_rate": 1.9894725964502363e-05, "loss": 2.2091, "step": 23068 }, { "epoch": 0.3, "grad_norm": 4.005530834197998, "learning_rate": 1.9894710756816947e-05, "loss": 2.2148, "step": 23069 }, { "epoch": 0.3, "grad_norm": 3.644728183746338, "learning_rate": 1.9894695548038988e-05, "loss": 1.5767, "step": 23070 }, { "epoch": 0.3, "grad_norm": 4.514923095703125, "learning_rate": 1.9894680338168485e-05, "loss": 2.3832, "step": 23071 }, { "epoch": 0.3, "grad_norm": 3.7275376319885254, "learning_rate": 1.9894665127205445e-05, "loss": 1.8047, "step": 23072 }, { "epoch": 0.3, "grad_norm": 4.02683162689209, "learning_rate": 1.9894649915149865e-05, "loss": 2.0795, "step": 23073 }, { "epoch": 0.3, "grad_norm": 4.093232154846191, "learning_rate": 1.9894634702001747e-05, "loss": 2.1954, "step": 23074 }, { "epoch": 0.3, "grad_norm": 3.972572088241577, "learning_rate": 1.9894619487761093e-05, "loss": 2.0368, "step": 23075 }, { "epoch": 0.3, "grad_norm": 4.519997596740723, "learning_rate": 1.9894604272427905e-05, "loss": 2.2881, "step": 23076 }, { "epoch": 0.3, "grad_norm": 4.285459518432617, "learning_rate": 1.9894589056002187e-05, "loss": 2.1885, "step": 23077 }, { "epoch": 0.3, "grad_norm": 3.5461761951446533, "learning_rate": 1.9894573838483936e-05, "loss": 1.6024, "step": 23078 }, { "epoch": 0.3, "grad_norm": 4.0547895431518555, "learning_rate": 1.989455861987316e-05, "loss": 2.2633, "step": 23079 }, { "epoch": 0.3, "grad_norm": 4.026736259460449, "learning_rate": 1.9894543400169852e-05, "loss": 2.1305, "step": 23080 }, { "epoch": 0.3, "grad_norm": 4.51900577545166, "learning_rate": 1.9894528179374023e-05, "loss": 2.3598, "step": 23081 }, { "epoch": 0.3, "grad_norm": 4.239145278930664, "learning_rate": 1.9894512957485664e-05, "loss": 2.2041, "step": 23082 }, { "epoch": 0.3, "grad_norm": 3.7850565910339355, "learning_rate": 1.989449773450479e-05, "loss": 1.9883, "step": 23083 }, { "epoch": 0.3, "grad_norm": 4.444901943206787, "learning_rate": 1.9894482510431393e-05, "loss": 2.0962, "step": 23084 }, { "epoch": 0.3, "grad_norm": 4.314205169677734, "learning_rate": 1.9894467285265474e-05, "loss": 2.0679, "step": 23085 }, { "epoch": 0.3, "grad_norm": 3.8545212745666504, "learning_rate": 1.9894452059007042e-05, "loss": 1.8405, "step": 23086 }, { "epoch": 0.3, "grad_norm": 4.649161338806152, "learning_rate": 1.9894436831656093e-05, "loss": 2.4978, "step": 23087 }, { "epoch": 0.3, "grad_norm": 3.9088187217712402, "learning_rate": 1.989442160321263e-05, "loss": 1.7462, "step": 23088 }, { "epoch": 0.3, "grad_norm": 4.184013843536377, "learning_rate": 1.9894406373676655e-05, "loss": 2.2643, "step": 23089 }, { "epoch": 0.3, "grad_norm": 3.9143028259277344, "learning_rate": 1.9894391143048174e-05, "loss": 1.9177, "step": 23090 }, { "epoch": 0.3, "grad_norm": 3.4962751865386963, "learning_rate": 1.989437591132718e-05, "loss": 1.9173, "step": 23091 }, { "epoch": 0.3, "grad_norm": 3.70670485496521, "learning_rate": 1.9894360678513677e-05, "loss": 1.78, "step": 23092 }, { "epoch": 0.3, "grad_norm": 3.7450568675994873, "learning_rate": 1.9894345444607672e-05, "loss": 1.5488, "step": 23093 }, { "epoch": 0.3, "grad_norm": 4.353614330291748, "learning_rate": 1.9894330209609164e-05, "loss": 2.4248, "step": 23094 }, { "epoch": 0.3, "grad_norm": 4.255100250244141, "learning_rate": 1.9894314973518153e-05, "loss": 2.2035, "step": 23095 }, { "epoch": 0.3, "grad_norm": 4.317346096038818, "learning_rate": 1.9894299736334643e-05, "loss": 2.0826, "step": 23096 }, { "epoch": 0.3, "grad_norm": 3.7087998390197754, "learning_rate": 1.9894284498058632e-05, "loss": 1.9853, "step": 23097 }, { "epoch": 0.3, "grad_norm": 4.454641819000244, "learning_rate": 1.9894269258690126e-05, "loss": 2.3619, "step": 23098 }, { "epoch": 0.3, "grad_norm": 4.077583312988281, "learning_rate": 1.9894254018229123e-05, "loss": 2.205, "step": 23099 }, { "epoch": 0.3, "grad_norm": 4.670332431793213, "learning_rate": 1.9894238776675627e-05, "loss": 2.2537, "step": 23100 }, { "epoch": 0.3, "grad_norm": 4.037581920623779, "learning_rate": 1.9894223534029638e-05, "loss": 2.0222, "step": 23101 }, { "epoch": 0.3, "grad_norm": 4.054427146911621, "learning_rate": 1.9894208290291163e-05, "loss": 1.9807, "step": 23102 }, { "epoch": 0.3, "grad_norm": 3.476473808288574, "learning_rate": 1.9894193045460195e-05, "loss": 2.1819, "step": 23103 }, { "epoch": 0.3, "grad_norm": 4.080208778381348, "learning_rate": 1.9894177799536744e-05, "loss": 2.0677, "step": 23104 }, { "epoch": 0.3, "grad_norm": 3.6457159519195557, "learning_rate": 1.9894162552520804e-05, "loss": 1.9888, "step": 23105 }, { "epoch": 0.3, "grad_norm": 4.139842510223389, "learning_rate": 1.9894147304412384e-05, "loss": 2.2932, "step": 23106 }, { "epoch": 0.3, "grad_norm": 3.9048919677734375, "learning_rate": 1.9894132055211482e-05, "loss": 1.9794, "step": 23107 }, { "epoch": 0.3, "grad_norm": 5.047466278076172, "learning_rate": 1.9894116804918097e-05, "loss": 2.747, "step": 23108 }, { "epoch": 0.3, "grad_norm": 3.9216156005859375, "learning_rate": 1.9894101553532236e-05, "loss": 2.4517, "step": 23109 }, { "epoch": 0.3, "grad_norm": 4.349145412445068, "learning_rate": 1.98940863010539e-05, "loss": 1.7689, "step": 23110 }, { "epoch": 0.3, "grad_norm": 3.866480588912964, "learning_rate": 1.9894071047483085e-05, "loss": 2.016, "step": 23111 }, { "epoch": 0.3, "grad_norm": 4.841888427734375, "learning_rate": 1.98940557928198e-05, "loss": 2.151, "step": 23112 }, { "epoch": 0.3, "grad_norm": 3.7834665775299072, "learning_rate": 1.9894040537064045e-05, "loss": 1.7268, "step": 23113 }, { "epoch": 0.3, "grad_norm": 4.688434600830078, "learning_rate": 1.9894025280215814e-05, "loss": 2.5344, "step": 23114 }, { "epoch": 0.3, "grad_norm": 3.8157095909118652, "learning_rate": 1.989401002227512e-05, "loss": 2.0432, "step": 23115 }, { "epoch": 0.3, "grad_norm": 3.685523748397827, "learning_rate": 1.9893994763241954e-05, "loss": 1.8253, "step": 23116 }, { "epoch": 0.3, "grad_norm": 3.8992919921875, "learning_rate": 1.989397950311633e-05, "loss": 1.8972, "step": 23117 }, { "epoch": 0.3, "grad_norm": 3.864527463912964, "learning_rate": 1.9893964241898235e-05, "loss": 1.9303, "step": 23118 }, { "epoch": 0.3, "grad_norm": 3.56152606010437, "learning_rate": 1.9893948979587685e-05, "loss": 1.7044, "step": 23119 }, { "epoch": 0.3, "grad_norm": 4.206576824188232, "learning_rate": 1.9893933716184676e-05, "loss": 2.2561, "step": 23120 }, { "epoch": 0.3, "grad_norm": 3.527045249938965, "learning_rate": 1.98939184516892e-05, "loss": 1.6553, "step": 23121 }, { "epoch": 0.3, "grad_norm": 4.289409637451172, "learning_rate": 1.9893903186101274e-05, "loss": 2.5668, "step": 23122 }, { "epoch": 0.3, "grad_norm": 3.8185203075408936, "learning_rate": 1.9893887919420895e-05, "loss": 2.1728, "step": 23123 }, { "epoch": 0.3, "grad_norm": 3.7749249935150146, "learning_rate": 1.989387265164806e-05, "loss": 2.0586, "step": 23124 }, { "epoch": 0.3, "grad_norm": 4.473927021026611, "learning_rate": 1.9893857382782776e-05, "loss": 2.3263, "step": 23125 }, { "epoch": 0.3, "grad_norm": 3.5846142768859863, "learning_rate": 1.989384211282504e-05, "loss": 1.5439, "step": 23126 }, { "epoch": 0.3, "grad_norm": 3.8306570053100586, "learning_rate": 1.9893826841774855e-05, "loss": 2.0316, "step": 23127 }, { "epoch": 0.3, "grad_norm": 3.6818747520446777, "learning_rate": 1.9893811569632228e-05, "loss": 2.0076, "step": 23128 }, { "epoch": 0.3, "grad_norm": 3.9335520267486572, "learning_rate": 1.9893796296397152e-05, "loss": 1.8032, "step": 23129 }, { "epoch": 0.3, "grad_norm": 3.619368076324463, "learning_rate": 1.9893781022069634e-05, "loss": 1.6202, "step": 23130 }, { "epoch": 0.3, "grad_norm": 3.777066946029663, "learning_rate": 1.9893765746649677e-05, "loss": 1.8215, "step": 23131 }, { "epoch": 0.3, "grad_norm": 4.125927925109863, "learning_rate": 1.9893750470137278e-05, "loss": 2.2977, "step": 23132 }, { "epoch": 0.3, "grad_norm": 4.314541339874268, "learning_rate": 1.989373519253244e-05, "loss": 1.9514, "step": 23133 }, { "epoch": 0.3, "grad_norm": 4.398264408111572, "learning_rate": 1.989371991383517e-05, "loss": 2.3577, "step": 23134 }, { "epoch": 0.3, "grad_norm": 3.9478955268859863, "learning_rate": 1.9893704634045462e-05, "loss": 1.6215, "step": 23135 }, { "epoch": 0.3, "grad_norm": 3.531891345977783, "learning_rate": 1.9893689353163322e-05, "loss": 1.829, "step": 23136 }, { "epoch": 0.3, "grad_norm": 3.4413068294525146, "learning_rate": 1.989367407118875e-05, "loss": 1.9011, "step": 23137 }, { "epoch": 0.3, "grad_norm": 5.074006080627441, "learning_rate": 1.989365878812175e-05, "loss": 2.5424, "step": 23138 }, { "epoch": 0.3, "grad_norm": 3.8577563762664795, "learning_rate": 1.9893643503962323e-05, "loss": 2.0124, "step": 23139 }, { "epoch": 0.3, "grad_norm": 3.8765041828155518, "learning_rate": 1.989362821871047e-05, "loss": 2.0581, "step": 23140 }, { "epoch": 0.3, "grad_norm": 4.501212120056152, "learning_rate": 1.989361293236619e-05, "loss": 2.1468, "step": 23141 }, { "epoch": 0.3, "grad_norm": 3.8639299869537354, "learning_rate": 1.9893597644929487e-05, "loss": 2.2684, "step": 23142 }, { "epoch": 0.3, "grad_norm": 3.996359348297119, "learning_rate": 1.9893582356400364e-05, "loss": 1.8485, "step": 23143 }, { "epoch": 0.3, "grad_norm": 4.032024383544922, "learning_rate": 1.9893567066778826e-05, "loss": 2.026, "step": 23144 }, { "epoch": 0.3, "grad_norm": 3.4538047313690186, "learning_rate": 1.9893551776064862e-05, "loss": 1.6501, "step": 23145 }, { "epoch": 0.3, "grad_norm": 4.827461242675781, "learning_rate": 1.9893536484258487e-05, "loss": 2.5211, "step": 23146 }, { "epoch": 0.3, "grad_norm": 3.7381680011749268, "learning_rate": 1.98935211913597e-05, "loss": 1.8203, "step": 23147 }, { "epoch": 0.3, "grad_norm": 4.076030731201172, "learning_rate": 1.9893505897368496e-05, "loss": 2.3432, "step": 23148 }, { "epoch": 0.3, "grad_norm": 3.8702850341796875, "learning_rate": 1.9893490602284882e-05, "loss": 1.9194, "step": 23149 }, { "epoch": 0.3, "grad_norm": 4.451109886169434, "learning_rate": 1.989347530610886e-05, "loss": 2.3646, "step": 23150 }, { "epoch": 0.3, "grad_norm": 4.437335014343262, "learning_rate": 1.989346000884043e-05, "loss": 2.0142, "step": 23151 }, { "epoch": 0.3, "grad_norm": 3.420058488845825, "learning_rate": 1.9893444710479593e-05, "loss": 1.8144, "step": 23152 }, { "epoch": 0.3, "grad_norm": 4.571031093597412, "learning_rate": 1.9893429411026357e-05, "loss": 2.2613, "step": 23153 }, { "epoch": 0.3, "grad_norm": 3.6233396530151367, "learning_rate": 1.9893414110480713e-05, "loss": 1.6555, "step": 23154 }, { "epoch": 0.3, "grad_norm": 4.4610066413879395, "learning_rate": 1.989339880884267e-05, "loss": 2.4236, "step": 23155 }, { "epoch": 0.3, "grad_norm": 4.397043228149414, "learning_rate": 1.989338350611223e-05, "loss": 2.0983, "step": 23156 }, { "epoch": 0.3, "grad_norm": 3.859422206878662, "learning_rate": 1.989336820228939e-05, "loss": 1.866, "step": 23157 }, { "epoch": 0.3, "grad_norm": 4.046595096588135, "learning_rate": 1.9893352897374152e-05, "loss": 2.3806, "step": 23158 }, { "epoch": 0.3, "grad_norm": 3.9215402603149414, "learning_rate": 1.9893337591366524e-05, "loss": 2.0923, "step": 23159 }, { "epoch": 0.3, "grad_norm": 3.9497382640838623, "learning_rate": 1.98933222842665e-05, "loss": 1.937, "step": 23160 }, { "epoch": 0.3, "grad_norm": 3.547257900238037, "learning_rate": 1.989330697607409e-05, "loss": 1.7086, "step": 23161 }, { "epoch": 0.3, "grad_norm": 3.5547642707824707, "learning_rate": 1.989329166678929e-05, "loss": 2.0825, "step": 23162 }, { "epoch": 0.3, "grad_norm": 3.6433722972869873, "learning_rate": 1.98932763564121e-05, "loss": 1.9963, "step": 23163 }, { "epoch": 0.3, "grad_norm": 3.749295473098755, "learning_rate": 1.9893261044942526e-05, "loss": 1.9957, "step": 23164 }, { "epoch": 0.3, "grad_norm": 3.96419095993042, "learning_rate": 1.989324573238057e-05, "loss": 2.2378, "step": 23165 }, { "epoch": 0.3, "grad_norm": 4.391991138458252, "learning_rate": 1.989323041872623e-05, "loss": 2.4061, "step": 23166 }, { "epoch": 0.3, "grad_norm": 4.051482677459717, "learning_rate": 1.989321510397951e-05, "loss": 1.9907, "step": 23167 }, { "epoch": 0.3, "grad_norm": 3.948737144470215, "learning_rate": 1.9893199788140412e-05, "loss": 1.7017, "step": 23168 }, { "epoch": 0.3, "grad_norm": 4.1353654861450195, "learning_rate": 1.9893184471208932e-05, "loss": 2.2887, "step": 23169 }, { "epoch": 0.3, "grad_norm": 3.8437695503234863, "learning_rate": 1.9893169153185085e-05, "loss": 1.8043, "step": 23170 }, { "epoch": 0.3, "grad_norm": 5.148796558380127, "learning_rate": 1.9893153834068857e-05, "loss": 2.134, "step": 23171 }, { "epoch": 0.3, "grad_norm": 3.7634787559509277, "learning_rate": 1.989313851386026e-05, "loss": 1.6808, "step": 23172 }, { "epoch": 0.3, "grad_norm": 3.8777313232421875, "learning_rate": 1.989312319255929e-05, "loss": 1.9672, "step": 23173 }, { "epoch": 0.3, "grad_norm": 3.662755012512207, "learning_rate": 1.9893107870165956e-05, "loss": 1.6906, "step": 23174 }, { "epoch": 0.3, "grad_norm": 4.010166168212891, "learning_rate": 1.989309254668025e-05, "loss": 2.073, "step": 23175 }, { "epoch": 0.3, "grad_norm": 4.368136405944824, "learning_rate": 1.989307722210218e-05, "loss": 2.6124, "step": 23176 }, { "epoch": 0.3, "grad_norm": 4.488168716430664, "learning_rate": 1.989306189643175e-05, "loss": 2.125, "step": 23177 }, { "epoch": 0.3, "grad_norm": 3.5844266414642334, "learning_rate": 1.9893046569668956e-05, "loss": 1.8591, "step": 23178 }, { "epoch": 0.3, "grad_norm": 4.053227424621582, "learning_rate": 1.9893031241813804e-05, "loss": 2.2982, "step": 23179 }, { "epoch": 0.3, "grad_norm": 4.090979099273682, "learning_rate": 1.9893015912866288e-05, "loss": 2.1706, "step": 23180 }, { "epoch": 0.3, "grad_norm": 3.9168076515197754, "learning_rate": 1.989300058282642e-05, "loss": 2.0041, "step": 23181 }, { "epoch": 0.3, "grad_norm": 4.241785526275635, "learning_rate": 1.9892985251694193e-05, "loss": 2.2192, "step": 23182 }, { "epoch": 0.3, "grad_norm": 4.741342067718506, "learning_rate": 1.989296991946962e-05, "loss": 2.4445, "step": 23183 }, { "epoch": 0.3, "grad_norm": 3.423090696334839, "learning_rate": 1.989295458615269e-05, "loss": 1.5378, "step": 23184 }, { "epoch": 0.3, "grad_norm": 4.217714309692383, "learning_rate": 1.9892939251743407e-05, "loss": 2.4934, "step": 23185 }, { "epoch": 0.3, "grad_norm": 7.0752081871032715, "learning_rate": 1.989292391624178e-05, "loss": 2.4329, "step": 23186 }, { "epoch": 0.3, "grad_norm": 4.494335174560547, "learning_rate": 1.9892908579647806e-05, "loss": 2.3952, "step": 23187 }, { "epoch": 0.3, "grad_norm": 4.022193431854248, "learning_rate": 1.9892893241961485e-05, "loss": 2.4719, "step": 23188 }, { "epoch": 0.3, "grad_norm": 4.0691657066345215, "learning_rate": 1.989287790318282e-05, "loss": 2.3404, "step": 23189 }, { "epoch": 0.3, "grad_norm": 4.382175922393799, "learning_rate": 1.9892862563311818e-05, "loss": 1.9219, "step": 23190 }, { "epoch": 0.3, "grad_norm": 3.77301025390625, "learning_rate": 1.9892847222348472e-05, "loss": 1.9183, "step": 23191 }, { "epoch": 0.3, "grad_norm": 3.9050607681274414, "learning_rate": 1.9892831880292792e-05, "loss": 1.7803, "step": 23192 }, { "epoch": 0.3, "grad_norm": 4.2039713859558105, "learning_rate": 1.9892816537144772e-05, "loss": 2.0527, "step": 23193 }, { "epoch": 0.3, "grad_norm": 3.6978752613067627, "learning_rate": 1.9892801192904418e-05, "loss": 1.7029, "step": 23194 }, { "epoch": 0.3, "grad_norm": 3.931040048599243, "learning_rate": 1.989278584757173e-05, "loss": 1.6011, "step": 23195 }, { "epoch": 0.3, "grad_norm": 3.715923309326172, "learning_rate": 1.9892770501146713e-05, "loss": 1.8398, "step": 23196 }, { "epoch": 0.3, "grad_norm": 3.7296080589294434, "learning_rate": 1.9892755153629365e-05, "loss": 2.3377, "step": 23197 }, { "epoch": 0.3, "grad_norm": 4.114867210388184, "learning_rate": 1.989273980501969e-05, "loss": 2.024, "step": 23198 }, { "epoch": 0.3, "grad_norm": 3.82304310798645, "learning_rate": 1.9892724455317688e-05, "loss": 1.8119, "step": 23199 }, { "epoch": 0.3, "grad_norm": 3.815668821334839, "learning_rate": 1.989270910452336e-05, "loss": 2.0562, "step": 23200 }, { "epoch": 0.3, "grad_norm": 3.5725417137145996, "learning_rate": 1.989269375263671e-05, "loss": 2.0109, "step": 23201 }, { "epoch": 0.3, "grad_norm": 4.058468341827393, "learning_rate": 1.9892678399657737e-05, "loss": 2.57, "step": 23202 }, { "epoch": 0.3, "grad_norm": 3.737945318222046, "learning_rate": 1.9892663045586448e-05, "loss": 1.6719, "step": 23203 }, { "epoch": 0.3, "grad_norm": 3.859286069869995, "learning_rate": 1.9892647690422842e-05, "loss": 2.0051, "step": 23204 }, { "epoch": 0.3, "grad_norm": 4.299705505371094, "learning_rate": 1.9892632334166916e-05, "loss": 2.5006, "step": 23205 }, { "epoch": 0.3, "grad_norm": 4.044569492340088, "learning_rate": 1.9892616976818676e-05, "loss": 2.0694, "step": 23206 }, { "epoch": 0.3, "grad_norm": 3.8071787357330322, "learning_rate": 1.9892601618378124e-05, "loss": 1.9515, "step": 23207 }, { "epoch": 0.3, "grad_norm": 4.60864782333374, "learning_rate": 1.9892586258845264e-05, "loss": 1.9465, "step": 23208 }, { "epoch": 0.3, "grad_norm": 4.372124671936035, "learning_rate": 1.9892570898220088e-05, "loss": 2.1664, "step": 23209 }, { "epoch": 0.3, "grad_norm": 4.257070064544678, "learning_rate": 1.9892555536502612e-05, "loss": 2.1534, "step": 23210 }, { "epoch": 0.3, "grad_norm": 4.055994033813477, "learning_rate": 1.9892540173692826e-05, "loss": 2.2696, "step": 23211 }, { "epoch": 0.3, "grad_norm": 4.1163105964660645, "learning_rate": 1.9892524809790737e-05, "loss": 2.2301, "step": 23212 }, { "epoch": 0.3, "grad_norm": 3.9963457584381104, "learning_rate": 1.9892509444796344e-05, "loss": 2.0598, "step": 23213 }, { "epoch": 0.3, "grad_norm": 3.3300743103027344, "learning_rate": 1.989249407870965e-05, "loss": 1.498, "step": 23214 }, { "epoch": 0.3, "grad_norm": 4.2456817626953125, "learning_rate": 1.9892478711530657e-05, "loss": 1.9901, "step": 23215 }, { "epoch": 0.3, "grad_norm": 3.5407867431640625, "learning_rate": 1.989246334325937e-05, "loss": 1.8539, "step": 23216 }, { "epoch": 0.3, "grad_norm": 4.1778244972229, "learning_rate": 1.989244797389578e-05, "loss": 1.9857, "step": 23217 }, { "epoch": 0.3, "grad_norm": 3.9190618991851807, "learning_rate": 1.9892432603439902e-05, "loss": 2.0051, "step": 23218 }, { "epoch": 0.3, "grad_norm": 4.330641269683838, "learning_rate": 1.9892417231891728e-05, "loss": 2.7222, "step": 23219 }, { "epoch": 0.3, "grad_norm": 3.996129274368286, "learning_rate": 1.9892401859251268e-05, "loss": 1.8951, "step": 23220 }, { "epoch": 0.3, "grad_norm": 4.207405090332031, "learning_rate": 1.9892386485518514e-05, "loss": 1.9222, "step": 23221 }, { "epoch": 0.3, "grad_norm": 4.56719970703125, "learning_rate": 1.9892371110693474e-05, "loss": 3.0257, "step": 23222 }, { "epoch": 0.3, "grad_norm": 3.762894630432129, "learning_rate": 1.9892355734776148e-05, "loss": 1.8611, "step": 23223 }, { "epoch": 0.3, "grad_norm": 4.505393028259277, "learning_rate": 1.989234035776654e-05, "loss": 2.307, "step": 23224 }, { "epoch": 0.3, "grad_norm": 3.660892963409424, "learning_rate": 1.989232497966465e-05, "loss": 2.1215, "step": 23225 }, { "epoch": 0.3, "grad_norm": 3.9784107208251953, "learning_rate": 1.989230960047048e-05, "loss": 1.9252, "step": 23226 }, { "epoch": 0.3, "grad_norm": 4.436726093292236, "learning_rate": 1.989229422018403e-05, "loss": 2.2393, "step": 23227 }, { "epoch": 0.3, "grad_norm": 4.3249640464782715, "learning_rate": 1.9892278838805302e-05, "loss": 1.8334, "step": 23228 }, { "epoch": 0.3, "grad_norm": 4.37312650680542, "learning_rate": 1.98922634563343e-05, "loss": 1.9566, "step": 23229 }, { "epoch": 0.3, "grad_norm": 3.9718515872955322, "learning_rate": 1.989224807277102e-05, "loss": 2.3264, "step": 23230 }, { "epoch": 0.3, "grad_norm": 3.501533269882202, "learning_rate": 1.9892232688115475e-05, "loss": 1.5878, "step": 23231 }, { "epoch": 0.3, "grad_norm": 3.707444429397583, "learning_rate": 1.9892217302367657e-05, "loss": 2.1358, "step": 23232 }, { "epoch": 0.3, "grad_norm": 4.650345325469971, "learning_rate": 1.9892201915527567e-05, "loss": 2.3772, "step": 23233 }, { "epoch": 0.3, "grad_norm": 4.353426933288574, "learning_rate": 1.989218652759521e-05, "loss": 2.3802, "step": 23234 }, { "epoch": 0.3, "grad_norm": 4.012331962585449, "learning_rate": 1.9892171138570592e-05, "loss": 2.5667, "step": 23235 }, { "epoch": 0.3, "grad_norm": 4.240157127380371, "learning_rate": 1.989215574845371e-05, "loss": 2.2052, "step": 23236 }, { "epoch": 0.3, "grad_norm": 3.4074673652648926, "learning_rate": 1.9892140357244563e-05, "loss": 1.8039, "step": 23237 }, { "epoch": 0.3, "grad_norm": 3.782491445541382, "learning_rate": 1.9892124964943157e-05, "loss": 1.81, "step": 23238 }, { "epoch": 0.3, "grad_norm": 3.8167099952697754, "learning_rate": 1.9892109571549495e-05, "loss": 1.8497, "step": 23239 }, { "epoch": 0.3, "grad_norm": 4.410500526428223, "learning_rate": 1.9892094177063573e-05, "loss": 2.1645, "step": 23240 }, { "epoch": 0.3, "grad_norm": 3.7094595432281494, "learning_rate": 1.9892078781485403e-05, "loss": 2.0569, "step": 23241 }, { "epoch": 0.3, "grad_norm": 4.558906078338623, "learning_rate": 1.9892063384814973e-05, "loss": 2.3652, "step": 23242 }, { "epoch": 0.3, "grad_norm": 3.6769890785217285, "learning_rate": 1.989204798705229e-05, "loss": 2.0055, "step": 23243 }, { "epoch": 0.3, "grad_norm": 3.9039719104766846, "learning_rate": 1.9892032588197364e-05, "loss": 1.9411, "step": 23244 }, { "epoch": 0.3, "grad_norm": 3.4504222869873047, "learning_rate": 1.9892017188250184e-05, "loss": 1.678, "step": 23245 }, { "epoch": 0.3, "grad_norm": 3.557973861694336, "learning_rate": 1.989200178721076e-05, "loss": 1.7122, "step": 23246 }, { "epoch": 0.3, "grad_norm": 3.809025526046753, "learning_rate": 1.989198638507909e-05, "loss": 2.0333, "step": 23247 }, { "epoch": 0.3, "grad_norm": 3.8493731021881104, "learning_rate": 1.9891970981855175e-05, "loss": 1.8404, "step": 23248 }, { "epoch": 0.3, "grad_norm": 4.20746374130249, "learning_rate": 1.9891955577539023e-05, "loss": 2.0739, "step": 23249 }, { "epoch": 0.3, "grad_norm": 4.019258499145508, "learning_rate": 1.9891940172130628e-05, "loss": 1.984, "step": 23250 }, { "epoch": 0.3, "grad_norm": 3.9930036067962646, "learning_rate": 1.9891924765629995e-05, "loss": 1.7402, "step": 23251 }, { "epoch": 0.3, "grad_norm": 3.9288277626037598, "learning_rate": 1.9891909358037127e-05, "loss": 1.9374, "step": 23252 }, { "epoch": 0.3, "grad_norm": 4.144259452819824, "learning_rate": 1.9891893949352026e-05, "loss": 2.295, "step": 23253 }, { "epoch": 0.3, "grad_norm": 4.044044017791748, "learning_rate": 1.989187853957469e-05, "loss": 2.0731, "step": 23254 }, { "epoch": 0.3, "grad_norm": 4.080416679382324, "learning_rate": 1.9891863128705125e-05, "loss": 2.2955, "step": 23255 }, { "epoch": 0.3, "grad_norm": 4.224486351013184, "learning_rate": 1.9891847716743325e-05, "loss": 2.0847, "step": 23256 }, { "epoch": 0.3, "grad_norm": 3.851581335067749, "learning_rate": 1.9891832303689303e-05, "loss": 1.8723, "step": 23257 }, { "epoch": 0.3, "grad_norm": 3.7913591861724854, "learning_rate": 1.9891816889543053e-05, "loss": 2.2726, "step": 23258 }, { "epoch": 0.3, "grad_norm": 3.861384868621826, "learning_rate": 1.9891801474304577e-05, "loss": 2.0866, "step": 23259 }, { "epoch": 0.3, "grad_norm": 3.6880459785461426, "learning_rate": 1.989178605797388e-05, "loss": 1.7282, "step": 23260 }, { "epoch": 0.3, "grad_norm": 4.058943748474121, "learning_rate": 1.9891770640550963e-05, "loss": 2.1683, "step": 23261 }, { "epoch": 0.3, "grad_norm": 4.193850994110107, "learning_rate": 1.989175522203582e-05, "loss": 2.0988, "step": 23262 }, { "epoch": 0.3, "grad_norm": 3.8891854286193848, "learning_rate": 1.9891739802428468e-05, "loss": 1.8851, "step": 23263 }, { "epoch": 0.3, "grad_norm": 4.709659576416016, "learning_rate": 1.9891724381728896e-05, "loss": 2.1375, "step": 23264 }, { "epoch": 0.3, "grad_norm": 3.9105615615844727, "learning_rate": 1.989170895993711e-05, "loss": 2.0511, "step": 23265 }, { "epoch": 0.3, "grad_norm": 3.965447425842285, "learning_rate": 1.9891693537053114e-05, "loss": 2.0348, "step": 23266 }, { "epoch": 0.3, "grad_norm": 3.7910139560699463, "learning_rate": 1.9891678113076907e-05, "loss": 2.0059, "step": 23267 }, { "epoch": 0.3, "grad_norm": 4.053134441375732, "learning_rate": 1.989166268800849e-05, "loss": 2.105, "step": 23268 }, { "epoch": 0.3, "grad_norm": 5.1940789222717285, "learning_rate": 1.9891647261847862e-05, "loss": 2.6617, "step": 23269 }, { "epoch": 0.3, "grad_norm": 3.921717405319214, "learning_rate": 1.989163183459503e-05, "loss": 2.0183, "step": 23270 }, { "epoch": 0.3, "grad_norm": 3.752560615539551, "learning_rate": 1.9891616406249997e-05, "loss": 2.1538, "step": 23271 }, { "epoch": 0.3, "grad_norm": 4.290190696716309, "learning_rate": 1.9891600976812758e-05, "loss": 2.4374, "step": 23272 }, { "epoch": 0.3, "grad_norm": 3.906069040298462, "learning_rate": 1.989158554628332e-05, "loss": 2.0094, "step": 23273 }, { "epoch": 0.3, "grad_norm": 4.037435054779053, "learning_rate": 1.9891570114661687e-05, "loss": 2.4106, "step": 23274 }, { "epoch": 0.3, "grad_norm": 3.8223087787628174, "learning_rate": 1.989155468194785e-05, "loss": 2.1203, "step": 23275 }, { "epoch": 0.3, "grad_norm": 3.833197832107544, "learning_rate": 1.9891539248141823e-05, "loss": 1.7713, "step": 23276 }, { "epoch": 0.3, "grad_norm": 3.9563846588134766, "learning_rate": 1.98915238132436e-05, "loss": 1.8935, "step": 23277 }, { "epoch": 0.3, "grad_norm": 3.938422679901123, "learning_rate": 1.9891508377253187e-05, "loss": 2.3145, "step": 23278 }, { "epoch": 0.3, "grad_norm": 4.2133097648620605, "learning_rate": 1.9891492940170582e-05, "loss": 2.5215, "step": 23279 }, { "epoch": 0.3, "grad_norm": 4.279882907867432, "learning_rate": 1.9891477501995785e-05, "loss": 2.2302, "step": 23280 }, { "epoch": 0.3, "grad_norm": 4.064009189605713, "learning_rate": 1.9891462062728807e-05, "loss": 2.329, "step": 23281 }, { "epoch": 0.3, "grad_norm": 4.658007621765137, "learning_rate": 1.9891446622369638e-05, "loss": 2.7565, "step": 23282 }, { "epoch": 0.3, "grad_norm": 4.483489990234375, "learning_rate": 1.989143118091829e-05, "loss": 2.3552, "step": 23283 }, { "epoch": 0.3, "grad_norm": 3.778059244155884, "learning_rate": 1.989141573837476e-05, "loss": 2.2588, "step": 23284 }, { "epoch": 0.3, "grad_norm": 3.7843596935272217, "learning_rate": 1.9891400294739047e-05, "loss": 1.7671, "step": 23285 }, { "epoch": 0.3, "grad_norm": 3.679723024368286, "learning_rate": 1.9891384850011156e-05, "loss": 1.672, "step": 23286 }, { "epoch": 0.3, "grad_norm": 3.220022201538086, "learning_rate": 1.989136940419109e-05, "loss": 1.4709, "step": 23287 }, { "epoch": 0.3, "grad_norm": 3.8812849521636963, "learning_rate": 1.9891353957278848e-05, "loss": 1.9711, "step": 23288 }, { "epoch": 0.3, "grad_norm": 4.221950054168701, "learning_rate": 1.9891338509274437e-05, "loss": 2.0287, "step": 23289 }, { "epoch": 0.3, "grad_norm": 4.094718933105469, "learning_rate": 1.989132306017785e-05, "loss": 2.141, "step": 23290 }, { "epoch": 0.3, "grad_norm": 3.18182373046875, "learning_rate": 1.9891307609989093e-05, "loss": 1.9582, "step": 23291 }, { "epoch": 0.3, "grad_norm": 3.7325074672698975, "learning_rate": 1.9891292158708165e-05, "loss": 1.7504, "step": 23292 }, { "epoch": 0.3, "grad_norm": 3.902245283126831, "learning_rate": 1.9891276706335077e-05, "loss": 2.1917, "step": 23293 }, { "epoch": 0.3, "grad_norm": 4.110717296600342, "learning_rate": 1.989126125286982e-05, "loss": 1.9651, "step": 23294 }, { "epoch": 0.3, "grad_norm": 3.959232807159424, "learning_rate": 1.98912457983124e-05, "loss": 2.1314, "step": 23295 }, { "epoch": 0.3, "grad_norm": 3.8492743968963623, "learning_rate": 1.989123034266282e-05, "loss": 1.8817, "step": 23296 }, { "epoch": 0.3, "grad_norm": 3.7578017711639404, "learning_rate": 1.9891214885921082e-05, "loss": 1.8592, "step": 23297 }, { "epoch": 0.3, "grad_norm": 4.126053810119629, "learning_rate": 1.9891199428087183e-05, "loss": 2.2454, "step": 23298 }, { "epoch": 0.3, "grad_norm": 4.2234015464782715, "learning_rate": 1.989118396916113e-05, "loss": 2.544, "step": 23299 }, { "epoch": 0.3, "grad_norm": 4.8941826820373535, "learning_rate": 1.9891168509142922e-05, "loss": 2.8577, "step": 23300 }, { "epoch": 0.3, "grad_norm": 3.9003958702087402, "learning_rate": 1.989115304803256e-05, "loss": 1.9793, "step": 23301 }, { "epoch": 0.3, "grad_norm": 5.162700653076172, "learning_rate": 1.9891137585830048e-05, "loss": 2.4365, "step": 23302 }, { "epoch": 0.3, "grad_norm": 4.11905574798584, "learning_rate": 1.9891122122535388e-05, "loss": 1.8334, "step": 23303 }, { "epoch": 0.3, "grad_norm": 4.006232738494873, "learning_rate": 1.9891106658148577e-05, "loss": 2.1722, "step": 23304 }, { "epoch": 0.3, "grad_norm": 3.6680831909179688, "learning_rate": 1.9891091192669623e-05, "loss": 1.8518, "step": 23305 }, { "epoch": 0.3, "grad_norm": 3.557194232940674, "learning_rate": 1.9891075726098525e-05, "loss": 1.6565, "step": 23306 }, { "epoch": 0.3, "grad_norm": 3.8203017711639404, "learning_rate": 1.989106025843528e-05, "loss": 2.1973, "step": 23307 }, { "epoch": 0.3, "grad_norm": 4.3487677574157715, "learning_rate": 1.98910447896799e-05, "loss": 2.1801, "step": 23308 }, { "epoch": 0.3, "grad_norm": 3.915902614593506, "learning_rate": 1.9891029319832376e-05, "loss": 2.2582, "step": 23309 }, { "epoch": 0.3, "grad_norm": 4.05670166015625, "learning_rate": 1.9891013848892715e-05, "loss": 2.1649, "step": 23310 }, { "epoch": 0.3, "grad_norm": 4.134002208709717, "learning_rate": 1.989099837686092e-05, "loss": 2.3327, "step": 23311 }, { "epoch": 0.3, "grad_norm": 4.11303186416626, "learning_rate": 1.9890982903736992e-05, "loss": 2.5895, "step": 23312 }, { "epoch": 0.3, "grad_norm": 4.093637466430664, "learning_rate": 1.9890967429520927e-05, "loss": 2.0654, "step": 23313 }, { "epoch": 0.3, "grad_norm": 3.8071982860565186, "learning_rate": 1.9890951954212738e-05, "loss": 1.7766, "step": 23314 }, { "epoch": 0.3, "grad_norm": 3.6791279315948486, "learning_rate": 1.989093647781242e-05, "loss": 1.6557, "step": 23315 }, { "epoch": 0.3, "grad_norm": 3.669583320617676, "learning_rate": 1.989092100031997e-05, "loss": 1.741, "step": 23316 }, { "epoch": 0.3, "grad_norm": 4.303121089935303, "learning_rate": 1.9890905521735395e-05, "loss": 2.3997, "step": 23317 }, { "epoch": 0.3, "grad_norm": 4.395397186279297, "learning_rate": 1.9890890042058698e-05, "loss": 2.2423, "step": 23318 }, { "epoch": 0.3, "grad_norm": 4.516530513763428, "learning_rate": 1.9890874561289877e-05, "loss": 2.4832, "step": 23319 }, { "epoch": 0.3, "grad_norm": 4.156576156616211, "learning_rate": 1.9890859079428937e-05, "loss": 2.1206, "step": 23320 }, { "epoch": 0.3, "grad_norm": 3.9485414028167725, "learning_rate": 1.989084359647588e-05, "loss": 2.0382, "step": 23321 }, { "epoch": 0.3, "grad_norm": 3.9135005474090576, "learning_rate": 1.9890828112430704e-05, "loss": 1.9114, "step": 23322 }, { "epoch": 0.3, "grad_norm": 3.835814952850342, "learning_rate": 1.9890812627293413e-05, "loss": 1.916, "step": 23323 }, { "epoch": 0.3, "grad_norm": 4.015039443969727, "learning_rate": 1.9890797141064008e-05, "loss": 2.3154, "step": 23324 }, { "epoch": 0.3, "grad_norm": 4.4572224617004395, "learning_rate": 1.9890781653742493e-05, "loss": 2.746, "step": 23325 }, { "epoch": 0.3, "grad_norm": 4.162439823150635, "learning_rate": 1.9890766165328864e-05, "loss": 2.3512, "step": 23326 }, { "epoch": 0.3, "grad_norm": 4.196985721588135, "learning_rate": 1.989075067582313e-05, "loss": 2.6695, "step": 23327 }, { "epoch": 0.3, "grad_norm": 4.692340850830078, "learning_rate": 1.989073518522529e-05, "loss": 2.5501, "step": 23328 }, { "epoch": 0.3, "grad_norm": 4.615701198577881, "learning_rate": 1.9890719693535342e-05, "loss": 2.4669, "step": 23329 }, { "epoch": 0.3, "grad_norm": 3.8430492877960205, "learning_rate": 1.9890704200753293e-05, "loss": 2.0132, "step": 23330 }, { "epoch": 0.3, "grad_norm": 3.916181802749634, "learning_rate": 1.9890688706879142e-05, "loss": 1.8999, "step": 23331 }, { "epoch": 0.3, "grad_norm": 4.301630020141602, "learning_rate": 1.9890673211912893e-05, "loss": 2.3695, "step": 23332 }, { "epoch": 0.3, "grad_norm": 3.9959185123443604, "learning_rate": 1.9890657715854542e-05, "loss": 2.1337, "step": 23333 }, { "epoch": 0.3, "grad_norm": 4.019157409667969, "learning_rate": 1.9890642218704097e-05, "loss": 2.0427, "step": 23334 }, { "epoch": 0.3, "grad_norm": 3.882284164428711, "learning_rate": 1.9890626720461556e-05, "loss": 1.8897, "step": 23335 }, { "epoch": 0.3, "grad_norm": 3.3430416584014893, "learning_rate": 1.9890611221126925e-05, "loss": 1.4818, "step": 23336 }, { "epoch": 0.3, "grad_norm": 4.55689001083374, "learning_rate": 1.98905957207002e-05, "loss": 1.935, "step": 23337 }, { "epoch": 0.3, "grad_norm": 3.597349166870117, "learning_rate": 1.9890580219181387e-05, "loss": 1.689, "step": 23338 }, { "epoch": 0.3, "grad_norm": 3.485116720199585, "learning_rate": 1.9890564716570484e-05, "loss": 1.8923, "step": 23339 }, { "epoch": 0.3, "grad_norm": 3.8145878314971924, "learning_rate": 1.9890549212867497e-05, "loss": 2.0192, "step": 23340 }, { "epoch": 0.3, "grad_norm": 4.559315204620361, "learning_rate": 1.9890533708072424e-05, "loss": 2.3755, "step": 23341 }, { "epoch": 0.3, "grad_norm": 4.247946739196777, "learning_rate": 1.9890518202185268e-05, "loss": 2.1539, "step": 23342 }, { "epoch": 0.3, "grad_norm": 4.204800128936768, "learning_rate": 1.989050269520603e-05, "loss": 2.1073, "step": 23343 }, { "epoch": 0.3, "grad_norm": 3.751678228378296, "learning_rate": 1.9890487187134718e-05, "loss": 1.7244, "step": 23344 }, { "epoch": 0.3, "grad_norm": 3.7929303646087646, "learning_rate": 1.9890471677971323e-05, "loss": 2.03, "step": 23345 }, { "epoch": 0.3, "grad_norm": 4.169190406799316, "learning_rate": 1.9890456167715856e-05, "loss": 2.0761, "step": 23346 }, { "epoch": 0.3, "grad_norm": 3.926854133605957, "learning_rate": 1.9890440656368312e-05, "loss": 2.2699, "step": 23347 }, { "epoch": 0.3, "grad_norm": 4.529273986816406, "learning_rate": 1.98904251439287e-05, "loss": 2.4352, "step": 23348 }, { "epoch": 0.3, "grad_norm": 4.186059951782227, "learning_rate": 1.9890409630397013e-05, "loss": 2.5744, "step": 23349 }, { "epoch": 0.3, "grad_norm": 3.421900987625122, "learning_rate": 1.9890394115773255e-05, "loss": 1.4385, "step": 23350 }, { "epoch": 0.3, "grad_norm": 4.404055595397949, "learning_rate": 1.9890378600057434e-05, "loss": 2.1544, "step": 23351 }, { "epoch": 0.3, "grad_norm": 4.481174945831299, "learning_rate": 1.9890363083249543e-05, "loss": 2.8, "step": 23352 }, { "epoch": 0.3, "grad_norm": 4.016655445098877, "learning_rate": 1.9890347565349593e-05, "loss": 2.1707, "step": 23353 }, { "epoch": 0.3, "grad_norm": 3.909540891647339, "learning_rate": 1.989033204635758e-05, "loss": 2.158, "step": 23354 }, { "epoch": 0.3, "grad_norm": 3.741549491882324, "learning_rate": 1.9890316526273502e-05, "loss": 2.1454, "step": 23355 }, { "epoch": 0.3, "grad_norm": 5.902995586395264, "learning_rate": 1.989030100509737e-05, "loss": 2.4884, "step": 23356 }, { "epoch": 0.3, "grad_norm": 4.693362236022949, "learning_rate": 1.989028548282918e-05, "loss": 2.204, "step": 23357 }, { "epoch": 0.3, "grad_norm": 4.071565628051758, "learning_rate": 1.9890269959468934e-05, "loss": 1.9216, "step": 23358 }, { "epoch": 0.3, "grad_norm": 4.170571327209473, "learning_rate": 1.9890254435016636e-05, "loss": 2.5178, "step": 23359 }, { "epoch": 0.3, "grad_norm": 3.890268564224243, "learning_rate": 1.9890238909472285e-05, "loss": 1.6843, "step": 23360 }, { "epoch": 0.3, "grad_norm": 4.226413726806641, "learning_rate": 1.989022338283588e-05, "loss": 1.9997, "step": 23361 }, { "epoch": 0.3, "grad_norm": 4.236897945404053, "learning_rate": 1.989020785510743e-05, "loss": 2.2568, "step": 23362 }, { "epoch": 0.3, "grad_norm": 4.015254497528076, "learning_rate": 1.9890192326286937e-05, "loss": 1.7207, "step": 23363 }, { "epoch": 0.3, "grad_norm": 4.229968070983887, "learning_rate": 1.9890176796374393e-05, "loss": 2.0715, "step": 23364 }, { "epoch": 0.3, "grad_norm": 4.401108741760254, "learning_rate": 1.989016126536981e-05, "loss": 2.1207, "step": 23365 }, { "epoch": 0.3, "grad_norm": 4.252239227294922, "learning_rate": 1.9890145733273184e-05, "loss": 1.9129, "step": 23366 }, { "epoch": 0.3, "grad_norm": 4.277008056640625, "learning_rate": 1.989013020008452e-05, "loss": 2.5354, "step": 23367 }, { "epoch": 0.3, "grad_norm": 4.589428424835205, "learning_rate": 1.9890114665803814e-05, "loss": 2.185, "step": 23368 }, { "epoch": 0.3, "grad_norm": 4.61681604385376, "learning_rate": 1.989009913043107e-05, "loss": 1.9499, "step": 23369 }, { "epoch": 0.3, "grad_norm": 3.973389148712158, "learning_rate": 1.9890083593966297e-05, "loss": 1.5948, "step": 23370 }, { "epoch": 0.3, "grad_norm": 3.9249088764190674, "learning_rate": 1.9890068056409488e-05, "loss": 1.6575, "step": 23371 }, { "epoch": 0.3, "grad_norm": 4.978504657745361, "learning_rate": 1.9890052517760646e-05, "loss": 2.3724, "step": 23372 }, { "epoch": 0.3, "grad_norm": 5.9107232093811035, "learning_rate": 1.989003697801978e-05, "loss": 2.8288, "step": 23373 }, { "epoch": 0.3, "grad_norm": 4.074819564819336, "learning_rate": 1.9890021437186882e-05, "loss": 2.2974, "step": 23374 }, { "epoch": 0.3, "grad_norm": 3.9256362915039062, "learning_rate": 1.9890005895261958e-05, "loss": 2.3215, "step": 23375 }, { "epoch": 0.3, "grad_norm": 4.195743560791016, "learning_rate": 1.988999035224501e-05, "loss": 2.0135, "step": 23376 }, { "epoch": 0.3, "grad_norm": 3.929422616958618, "learning_rate": 1.9889974808136042e-05, "loss": 2.0151, "step": 23377 }, { "epoch": 0.3, "grad_norm": 4.037558078765869, "learning_rate": 1.9889959262935048e-05, "loss": 1.8658, "step": 23378 }, { "epoch": 0.3, "grad_norm": 3.9746220111846924, "learning_rate": 1.988994371664204e-05, "loss": 2.0612, "step": 23379 }, { "epoch": 0.3, "grad_norm": 4.583277225494385, "learning_rate": 1.9889928169257008e-05, "loss": 2.2327, "step": 23380 }, { "epoch": 0.3, "grad_norm": 4.414013385772705, "learning_rate": 1.9889912620779964e-05, "loss": 2.1079, "step": 23381 }, { "epoch": 0.3, "grad_norm": 4.289599895477295, "learning_rate": 1.9889897071210903e-05, "loss": 2.2736, "step": 23382 }, { "epoch": 0.3, "grad_norm": 4.653954029083252, "learning_rate": 1.9889881520549833e-05, "loss": 2.5573, "step": 23383 }, { "epoch": 0.3, "grad_norm": 4.551776885986328, "learning_rate": 1.9889865968796754e-05, "loss": 2.3072, "step": 23384 }, { "epoch": 0.3, "grad_norm": 4.182258605957031, "learning_rate": 1.988985041595166e-05, "loss": 2.1581, "step": 23385 }, { "epoch": 0.3, "grad_norm": 3.9503042697906494, "learning_rate": 1.988983486201456e-05, "loss": 2.0005, "step": 23386 }, { "epoch": 0.3, "grad_norm": 4.342808723449707, "learning_rate": 1.988981930698546e-05, "loss": 2.3912, "step": 23387 }, { "epoch": 0.3, "grad_norm": 3.55092716217041, "learning_rate": 1.9889803750864352e-05, "loss": 1.8516, "step": 23388 }, { "epoch": 0.3, "grad_norm": 4.1659722328186035, "learning_rate": 1.988978819365124e-05, "loss": 2.0877, "step": 23389 }, { "epoch": 0.3, "grad_norm": 4.760610103607178, "learning_rate": 1.9889772635346133e-05, "loss": 1.8833, "step": 23390 }, { "epoch": 0.3, "grad_norm": 3.9202139377593994, "learning_rate": 1.9889757075949026e-05, "loss": 2.0203, "step": 23391 }, { "epoch": 0.3, "grad_norm": 4.078895568847656, "learning_rate": 1.988974151545992e-05, "loss": 2.7004, "step": 23392 }, { "epoch": 0.3, "grad_norm": 4.411016941070557, "learning_rate": 1.9889725953878817e-05, "loss": 2.3511, "step": 23393 }, { "epoch": 0.3, "grad_norm": 3.3864331245422363, "learning_rate": 1.9889710391205725e-05, "loss": 1.6808, "step": 23394 }, { "epoch": 0.3, "grad_norm": 3.865795612335205, "learning_rate": 1.988969482744064e-05, "loss": 1.592, "step": 23395 }, { "epoch": 0.3, "grad_norm": 4.2078633308410645, "learning_rate": 1.988967926258356e-05, "loss": 2.5414, "step": 23396 }, { "epoch": 0.3, "grad_norm": 4.202749252319336, "learning_rate": 1.9889663696634498e-05, "loss": 1.8781, "step": 23397 }, { "epoch": 0.3, "grad_norm": 3.5468690395355225, "learning_rate": 1.9889648129593447e-05, "loss": 1.4998, "step": 23398 }, { "epoch": 0.3, "grad_norm": 3.2758677005767822, "learning_rate": 1.988963256146041e-05, "loss": 1.6288, "step": 23399 }, { "epoch": 0.3, "grad_norm": 6.231438636779785, "learning_rate": 1.988961699223539e-05, "loss": 2.4671, "step": 23400 }, { "epoch": 0.3, "grad_norm": 3.8148248195648193, "learning_rate": 1.988960142191839e-05, "loss": 1.8566, "step": 23401 }, { "epoch": 0.3, "grad_norm": 4.208024501800537, "learning_rate": 1.988958585050941e-05, "loss": 2.3475, "step": 23402 }, { "epoch": 0.3, "grad_norm": 4.359589576721191, "learning_rate": 1.988957027800845e-05, "loss": 2.142, "step": 23403 }, { "epoch": 0.3, "grad_norm": 3.7911930084228516, "learning_rate": 1.9889554704415517e-05, "loss": 2.2395, "step": 23404 }, { "epoch": 0.3, "grad_norm": 3.5447776317596436, "learning_rate": 1.9889539129730607e-05, "loss": 1.924, "step": 23405 }, { "epoch": 0.3, "grad_norm": 4.315688610076904, "learning_rate": 1.9889523553953726e-05, "loss": 2.0918, "step": 23406 }, { "epoch": 0.3, "grad_norm": 3.7078235149383545, "learning_rate": 1.988950797708487e-05, "loss": 1.6349, "step": 23407 }, { "epoch": 0.3, "grad_norm": 3.715348958969116, "learning_rate": 1.9889492399124047e-05, "loss": 1.9725, "step": 23408 }, { "epoch": 0.3, "grad_norm": 3.2928996086120605, "learning_rate": 1.9889476820071256e-05, "loss": 1.7747, "step": 23409 }, { "epoch": 0.3, "grad_norm": 3.8429436683654785, "learning_rate": 1.98894612399265e-05, "loss": 1.9714, "step": 23410 }, { "epoch": 0.3, "grad_norm": 3.9183812141418457, "learning_rate": 1.988944565868978e-05, "loss": 2.1288, "step": 23411 }, { "epoch": 0.3, "grad_norm": 3.5553979873657227, "learning_rate": 1.9889430076361096e-05, "loss": 1.8851, "step": 23412 }, { "epoch": 0.3, "grad_norm": 4.027029991149902, "learning_rate": 1.9889414492940447e-05, "loss": 1.9639, "step": 23413 }, { "epoch": 0.3, "grad_norm": 4.068516254425049, "learning_rate": 1.9889398908427848e-05, "loss": 2.2752, "step": 23414 }, { "epoch": 0.3, "grad_norm": 3.104085922241211, "learning_rate": 1.9889383322823287e-05, "loss": 1.4605, "step": 23415 }, { "epoch": 0.3, "grad_norm": 3.9814300537109375, "learning_rate": 1.988936773612677e-05, "loss": 1.9859, "step": 23416 }, { "epoch": 0.3, "grad_norm": 4.320509433746338, "learning_rate": 1.9889352148338297e-05, "loss": 2.0861, "step": 23417 }, { "epoch": 0.3, "grad_norm": 4.350501537322998, "learning_rate": 1.9889336559457873e-05, "loss": 2.436, "step": 23418 }, { "epoch": 0.3, "grad_norm": 3.8118786811828613, "learning_rate": 1.98893209694855e-05, "loss": 1.937, "step": 23419 }, { "epoch": 0.3, "grad_norm": 3.9083728790283203, "learning_rate": 1.9889305378421178e-05, "loss": 1.9703, "step": 23420 }, { "epoch": 0.3, "grad_norm": 3.7524707317352295, "learning_rate": 1.9889289786264906e-05, "loss": 2.327, "step": 23421 }, { "epoch": 0.3, "grad_norm": 4.227956295013428, "learning_rate": 1.9889274193016696e-05, "loss": 1.6306, "step": 23422 }, { "epoch": 0.3, "grad_norm": 3.885605573654175, "learning_rate": 1.9889258598676536e-05, "loss": 1.8194, "step": 23423 }, { "epoch": 0.3, "grad_norm": 3.869723081588745, "learning_rate": 1.9889243003244435e-05, "loss": 2.0104, "step": 23424 }, { "epoch": 0.3, "grad_norm": 4.098839282989502, "learning_rate": 1.9889227406720396e-05, "loss": 2.2267, "step": 23425 }, { "epoch": 0.3, "grad_norm": 4.379004955291748, "learning_rate": 1.9889211809104414e-05, "loss": 2.0445, "step": 23426 }, { "epoch": 0.3, "grad_norm": 3.2326951026916504, "learning_rate": 1.9889196210396497e-05, "loss": 1.776, "step": 23427 }, { "epoch": 0.3, "grad_norm": 3.7681403160095215, "learning_rate": 1.9889180610596647e-05, "loss": 2.149, "step": 23428 }, { "epoch": 0.3, "grad_norm": 3.999559164047241, "learning_rate": 1.9889165009704863e-05, "loss": 2.4074, "step": 23429 }, { "epoch": 0.3, "grad_norm": 4.231451511383057, "learning_rate": 1.988914940772115e-05, "loss": 2.3276, "step": 23430 }, { "epoch": 0.3, "grad_norm": 4.126073360443115, "learning_rate": 1.98891338046455e-05, "loss": 2.0436, "step": 23431 }, { "epoch": 0.3, "grad_norm": 3.915266752243042, "learning_rate": 1.9889118200477928e-05, "loss": 2.0909, "step": 23432 }, { "epoch": 0.3, "grad_norm": 4.05150842666626, "learning_rate": 1.988910259521843e-05, "loss": 2.3562, "step": 23433 }, { "epoch": 0.3, "grad_norm": 4.0102691650390625, "learning_rate": 1.9889086988867002e-05, "loss": 2.0412, "step": 23434 }, { "epoch": 0.3, "grad_norm": 4.179862976074219, "learning_rate": 1.9889071381423656e-05, "loss": 1.8177, "step": 23435 }, { "epoch": 0.3, "grad_norm": 3.7728047370910645, "learning_rate": 1.9889055772888385e-05, "loss": 1.8632, "step": 23436 }, { "epoch": 0.3, "grad_norm": 4.272509574890137, "learning_rate": 1.9889040163261194e-05, "loss": 2.3035, "step": 23437 }, { "epoch": 0.3, "grad_norm": 3.684154510498047, "learning_rate": 1.988902455254209e-05, "loss": 1.6255, "step": 23438 }, { "epoch": 0.3, "grad_norm": 4.282339096069336, "learning_rate": 1.988900894073107e-05, "loss": 2.3961, "step": 23439 }, { "epoch": 0.3, "grad_norm": 4.252037525177002, "learning_rate": 1.988899332782813e-05, "loss": 2.1305, "step": 23440 }, { "epoch": 0.3, "grad_norm": 3.679914951324463, "learning_rate": 1.9888977713833282e-05, "loss": 1.8696, "step": 23441 }, { "epoch": 0.3, "grad_norm": 4.724677085876465, "learning_rate": 1.988896209874652e-05, "loss": 2.6618, "step": 23442 }, { "epoch": 0.3, "grad_norm": 3.533735990524292, "learning_rate": 1.988894648256785e-05, "loss": 1.7824, "step": 23443 }, { "epoch": 0.3, "grad_norm": 3.8412833213806152, "learning_rate": 1.9888930865297272e-05, "loss": 1.8155, "step": 23444 }, { "epoch": 0.3, "grad_norm": 4.551708221435547, "learning_rate": 1.988891524693479e-05, "loss": 2.7411, "step": 23445 }, { "epoch": 0.3, "grad_norm": 3.6860435009002686, "learning_rate": 1.9888899627480404e-05, "loss": 1.7288, "step": 23446 }, { "epoch": 0.3, "grad_norm": 4.975376129150391, "learning_rate": 1.9888884006934115e-05, "loss": 2.0108, "step": 23447 }, { "epoch": 0.3, "grad_norm": 3.9583802223205566, "learning_rate": 1.9888868385295926e-05, "loss": 2.0509, "step": 23448 }, { "epoch": 0.3, "grad_norm": 4.6717305183410645, "learning_rate": 1.9888852762565836e-05, "loss": 2.7212, "step": 23449 }, { "epoch": 0.3, "grad_norm": 3.7794582843780518, "learning_rate": 1.988883713874385e-05, "loss": 1.947, "step": 23450 }, { "epoch": 0.3, "grad_norm": 4.370059013366699, "learning_rate": 1.9888821513829968e-05, "loss": 2.2994, "step": 23451 }, { "epoch": 0.3, "grad_norm": 3.613347053527832, "learning_rate": 1.9888805887824193e-05, "loss": 2.0201, "step": 23452 }, { "epoch": 0.3, "grad_norm": 4.570078372955322, "learning_rate": 1.988879026072653e-05, "loss": 2.5673, "step": 23453 }, { "epoch": 0.3, "grad_norm": 4.099215984344482, "learning_rate": 1.9888774632536972e-05, "loss": 1.844, "step": 23454 }, { "epoch": 0.3, "grad_norm": 3.7945761680603027, "learning_rate": 1.9888759003255527e-05, "loss": 1.7326, "step": 23455 }, { "epoch": 0.3, "grad_norm": 4.318455219268799, "learning_rate": 1.988874337288219e-05, "loss": 2.0935, "step": 23456 }, { "epoch": 0.3, "grad_norm": 3.7179455757141113, "learning_rate": 1.9888727741416977e-05, "loss": 2.1611, "step": 23457 }, { "epoch": 0.3, "grad_norm": 3.777681589126587, "learning_rate": 1.9888712108859876e-05, "loss": 2.1207, "step": 23458 }, { "epoch": 0.3, "grad_norm": 3.2227275371551514, "learning_rate": 1.9888696475210895e-05, "loss": 1.4303, "step": 23459 }, { "epoch": 0.3, "grad_norm": 4.473508834838867, "learning_rate": 1.988868084047003e-05, "loss": 1.8683, "step": 23460 }, { "epoch": 0.3, "grad_norm": 3.9925858974456787, "learning_rate": 1.988866520463729e-05, "loss": 1.6476, "step": 23461 }, { "epoch": 0.3, "grad_norm": 3.377058982849121, "learning_rate": 1.9888649567712674e-05, "loss": 1.8387, "step": 23462 }, { "epoch": 0.3, "grad_norm": 4.063830852508545, "learning_rate": 1.9888633929696185e-05, "loss": 2.2257, "step": 23463 }, { "epoch": 0.3, "grad_norm": 3.977139472961426, "learning_rate": 1.988861829058782e-05, "loss": 2.0822, "step": 23464 }, { "epoch": 0.3, "grad_norm": 3.706373453140259, "learning_rate": 1.9888602650387584e-05, "loss": 1.822, "step": 23465 }, { "epoch": 0.3, "grad_norm": 3.9715850353240967, "learning_rate": 1.988858700909548e-05, "loss": 2.1159, "step": 23466 }, { "epoch": 0.3, "grad_norm": 4.003974914550781, "learning_rate": 1.9888571366711505e-05, "loss": 1.7793, "step": 23467 }, { "epoch": 0.3, "grad_norm": 4.257954120635986, "learning_rate": 1.988855572323567e-05, "loss": 2.2, "step": 23468 }, { "epoch": 0.3, "grad_norm": 4.28904390335083, "learning_rate": 1.9888540078667965e-05, "loss": 2.0637, "step": 23469 }, { "epoch": 0.3, "grad_norm": 3.8582162857055664, "learning_rate": 1.98885244330084e-05, "loss": 1.9786, "step": 23470 }, { "epoch": 0.3, "grad_norm": 4.1750969886779785, "learning_rate": 1.9888508786256974e-05, "loss": 2.0609, "step": 23471 }, { "epoch": 0.3, "grad_norm": 3.775874376296997, "learning_rate": 1.9888493138413686e-05, "loss": 2.0179, "step": 23472 }, { "epoch": 0.3, "grad_norm": 4.073462963104248, "learning_rate": 1.9888477489478542e-05, "loss": 1.7987, "step": 23473 }, { "epoch": 0.3, "grad_norm": 4.062949180603027, "learning_rate": 1.9888461839451546e-05, "loss": 2.4168, "step": 23474 }, { "epoch": 0.3, "grad_norm": 4.036409854888916, "learning_rate": 1.9888446188332694e-05, "loss": 2.3795, "step": 23475 }, { "epoch": 0.3, "grad_norm": 4.118657112121582, "learning_rate": 1.988843053612199e-05, "loss": 1.9208, "step": 23476 }, { "epoch": 0.3, "grad_norm": 3.4564309120178223, "learning_rate": 1.9888414882819435e-05, "loss": 1.6426, "step": 23477 }, { "epoch": 0.3, "grad_norm": 4.3996052742004395, "learning_rate": 1.988839922842503e-05, "loss": 2.3467, "step": 23478 }, { "epoch": 0.3, "grad_norm": 3.770573377609253, "learning_rate": 1.988838357293878e-05, "loss": 2.1774, "step": 23479 }, { "epoch": 0.3, "grad_norm": 3.8485565185546875, "learning_rate": 1.9888367916360684e-05, "loss": 2.4104, "step": 23480 }, { "epoch": 0.3, "grad_norm": 4.701595783233643, "learning_rate": 1.9888352258690743e-05, "loss": 2.5241, "step": 23481 }, { "epoch": 0.3, "grad_norm": 3.350224256515503, "learning_rate": 1.988833659992896e-05, "loss": 1.2431, "step": 23482 }, { "epoch": 0.3, "grad_norm": 3.9814369678497314, "learning_rate": 1.9888320940075338e-05, "loss": 2.0548, "step": 23483 }, { "epoch": 0.3, "grad_norm": 4.095105171203613, "learning_rate": 1.9888305279129876e-05, "loss": 2.1517, "step": 23484 }, { "epoch": 0.3, "grad_norm": 4.327674865722656, "learning_rate": 1.9888289617092583e-05, "loss": 2.188, "step": 23485 }, { "epoch": 0.3, "grad_norm": 4.194705486297607, "learning_rate": 1.9888273953963447e-05, "loss": 2.1149, "step": 23486 }, { "epoch": 0.3, "grad_norm": 3.419090986251831, "learning_rate": 1.9888258289742482e-05, "loss": 1.574, "step": 23487 }, { "epoch": 0.3, "grad_norm": 3.8462016582489014, "learning_rate": 1.988824262442969e-05, "loss": 2.4871, "step": 23488 }, { "epoch": 0.3, "grad_norm": 4.160234451293945, "learning_rate": 1.9888226958025063e-05, "loss": 2.1351, "step": 23489 }, { "epoch": 0.3, "grad_norm": 4.4087300300598145, "learning_rate": 1.988821129052861e-05, "loss": 2.3558, "step": 23490 }, { "epoch": 0.3, "grad_norm": 3.7320785522460938, "learning_rate": 1.9888195621940328e-05, "loss": 1.9402, "step": 23491 }, { "epoch": 0.3, "grad_norm": 3.6961216926574707, "learning_rate": 1.9888179952260223e-05, "loss": 2.0654, "step": 23492 }, { "epoch": 0.3, "grad_norm": 4.3030476570129395, "learning_rate": 1.9888164281488295e-05, "loss": 2.4554, "step": 23493 }, { "epoch": 0.3, "grad_norm": 4.131770133972168, "learning_rate": 1.9888148609624545e-05, "loss": 2.0643, "step": 23494 }, { "epoch": 0.3, "grad_norm": 4.114760398864746, "learning_rate": 1.9888132936668977e-05, "loss": 2.259, "step": 23495 }, { "epoch": 0.3, "grad_norm": 3.8138346672058105, "learning_rate": 1.988811726262159e-05, "loss": 2.1522, "step": 23496 }, { "epoch": 0.3, "grad_norm": 3.328447103500366, "learning_rate": 1.9888101587482387e-05, "loss": 1.5207, "step": 23497 }, { "epoch": 0.3, "grad_norm": 3.8621251583099365, "learning_rate": 1.9888085911251373e-05, "loss": 2.1996, "step": 23498 }, { "epoch": 0.3, "grad_norm": 4.000484943389893, "learning_rate": 1.9888070233928543e-05, "loss": 1.8108, "step": 23499 }, { "epoch": 0.3, "grad_norm": 3.678157329559326, "learning_rate": 1.9888054555513905e-05, "loss": 1.8108, "step": 23500 }, { "epoch": 0.3, "grad_norm": 4.052366256713867, "learning_rate": 1.9888038876007455e-05, "loss": 1.9623, "step": 23501 }, { "epoch": 0.31, "grad_norm": 3.908254384994507, "learning_rate": 1.98880231954092e-05, "loss": 2.1082, "step": 23502 }, { "epoch": 0.31, "grad_norm": 3.9044032096862793, "learning_rate": 1.9888007513719142e-05, "loss": 2.3011, "step": 23503 }, { "epoch": 0.31, "grad_norm": 3.9985768795013428, "learning_rate": 1.9887991830937274e-05, "loss": 2.0252, "step": 23504 }, { "epoch": 0.31, "grad_norm": 3.920776128768921, "learning_rate": 1.9887976147063606e-05, "loss": 1.8857, "step": 23505 }, { "epoch": 0.31, "grad_norm": 4.4056572914123535, "learning_rate": 1.988796046209814e-05, "loss": 2.3516, "step": 23506 }, { "epoch": 0.31, "grad_norm": 3.8709731101989746, "learning_rate": 1.9887944776040877e-05, "loss": 1.5905, "step": 23507 }, { "epoch": 0.31, "grad_norm": 4.299551010131836, "learning_rate": 1.9887929088891814e-05, "loss": 2.6085, "step": 23508 }, { "epoch": 0.31, "grad_norm": 4.068082332611084, "learning_rate": 1.9887913400650953e-05, "loss": 2.1717, "step": 23509 }, { "epoch": 0.31, "grad_norm": 4.043166160583496, "learning_rate": 1.98878977113183e-05, "loss": 2.0839, "step": 23510 }, { "epoch": 0.31, "grad_norm": 3.756781578063965, "learning_rate": 1.988788202089386e-05, "loss": 2.0421, "step": 23511 }, { "epoch": 0.31, "grad_norm": 3.583289623260498, "learning_rate": 1.988786632937763e-05, "loss": 1.8253, "step": 23512 }, { "epoch": 0.31, "grad_norm": 3.655780553817749, "learning_rate": 1.9887850636769608e-05, "loss": 1.8213, "step": 23513 }, { "epoch": 0.31, "grad_norm": 4.902281284332275, "learning_rate": 1.9887834943069803e-05, "loss": 2.8235, "step": 23514 }, { "epoch": 0.31, "grad_norm": 3.736457586288452, "learning_rate": 1.988781924827821e-05, "loss": 1.5939, "step": 23515 }, { "epoch": 0.31, "grad_norm": 3.7708332538604736, "learning_rate": 1.9887803552394835e-05, "loss": 1.8403, "step": 23516 }, { "epoch": 0.31, "grad_norm": 3.8457398414611816, "learning_rate": 1.9887787855419675e-05, "loss": 1.4063, "step": 23517 }, { "epoch": 0.31, "grad_norm": 3.9862232208251953, "learning_rate": 1.988777215735274e-05, "loss": 2.0192, "step": 23518 }, { "epoch": 0.31, "grad_norm": 3.7942590713500977, "learning_rate": 1.988775645819403e-05, "loss": 1.8594, "step": 23519 }, { "epoch": 0.31, "grad_norm": 4.244572162628174, "learning_rate": 1.988774075794354e-05, "loss": 2.1244, "step": 23520 }, { "epoch": 0.31, "grad_norm": 3.7510616779327393, "learning_rate": 1.9887725056601274e-05, "loss": 1.7543, "step": 23521 }, { "epoch": 0.31, "grad_norm": 4.086206436157227, "learning_rate": 1.9887709354167234e-05, "loss": 1.9441, "step": 23522 }, { "epoch": 0.31, "grad_norm": 3.916015863418579, "learning_rate": 1.988769365064143e-05, "loss": 1.9898, "step": 23523 }, { "epoch": 0.31, "grad_norm": 4.373450756072998, "learning_rate": 1.988767794602385e-05, "loss": 2.238, "step": 23524 }, { "epoch": 0.31, "grad_norm": 4.278793811798096, "learning_rate": 1.9887662240314508e-05, "loss": 2.3963, "step": 23525 }, { "epoch": 0.31, "grad_norm": 3.860877513885498, "learning_rate": 1.9887646533513397e-05, "loss": 1.7372, "step": 23526 }, { "epoch": 0.31, "grad_norm": 3.7740774154663086, "learning_rate": 1.9887630825620525e-05, "loss": 1.9741, "step": 23527 }, { "epoch": 0.31, "grad_norm": 4.378615379333496, "learning_rate": 1.9887615116635888e-05, "loss": 2.1023, "step": 23528 }, { "epoch": 0.31, "grad_norm": 3.6050608158111572, "learning_rate": 1.9887599406559494e-05, "loss": 1.518, "step": 23529 }, { "epoch": 0.31, "grad_norm": 3.73645281791687, "learning_rate": 1.988758369539134e-05, "loss": 1.6036, "step": 23530 }, { "epoch": 0.31, "grad_norm": 3.6592702865600586, "learning_rate": 1.9887567983131425e-05, "loss": 2.0852, "step": 23531 }, { "epoch": 0.31, "grad_norm": 3.8356711864471436, "learning_rate": 1.988755226977976e-05, "loss": 1.8528, "step": 23532 }, { "epoch": 0.31, "grad_norm": 4.02256965637207, "learning_rate": 1.9887536555336336e-05, "loss": 1.8691, "step": 23533 }, { "epoch": 0.31, "grad_norm": 4.846343994140625, "learning_rate": 1.9887520839801167e-05, "loss": 2.3667, "step": 23534 }, { "epoch": 0.31, "grad_norm": 5.214580059051514, "learning_rate": 1.9887505123174243e-05, "loss": 2.5629, "step": 23535 }, { "epoch": 0.31, "grad_norm": 4.219767093658447, "learning_rate": 1.9887489405455572e-05, "loss": 2.1837, "step": 23536 }, { "epoch": 0.31, "grad_norm": 4.020559787750244, "learning_rate": 1.9887473686645156e-05, "loss": 2.3532, "step": 23537 }, { "epoch": 0.31, "grad_norm": 4.853815078735352, "learning_rate": 1.9887457966742994e-05, "loss": 2.7046, "step": 23538 }, { "epoch": 0.31, "grad_norm": 3.722651720046997, "learning_rate": 1.9887442245749086e-05, "loss": 1.9491, "step": 23539 }, { "epoch": 0.31, "grad_norm": 4.082599639892578, "learning_rate": 1.988742652366344e-05, "loss": 2.0573, "step": 23540 }, { "epoch": 0.31, "grad_norm": 4.22964334487915, "learning_rate": 1.9887410800486054e-05, "loss": 2.3361, "step": 23541 }, { "epoch": 0.31, "grad_norm": 3.8402440547943115, "learning_rate": 1.988739507621693e-05, "loss": 1.9342, "step": 23542 }, { "epoch": 0.31, "grad_norm": 3.6680328845977783, "learning_rate": 1.9887379350856068e-05, "loss": 2.0457, "step": 23543 }, { "epoch": 0.31, "grad_norm": 4.26474666595459, "learning_rate": 1.9887363624403474e-05, "loss": 2.1783, "step": 23544 }, { "epoch": 0.31, "grad_norm": 4.035212516784668, "learning_rate": 1.9887347896859146e-05, "loss": 2.059, "step": 23545 }, { "epoch": 0.31, "grad_norm": 3.796698570251465, "learning_rate": 1.988733216822309e-05, "loss": 2.1379, "step": 23546 }, { "epoch": 0.31, "grad_norm": 4.293420314788818, "learning_rate": 1.98873164384953e-05, "loss": 2.3828, "step": 23547 }, { "epoch": 0.31, "grad_norm": 3.794271230697632, "learning_rate": 1.9887300707675785e-05, "loss": 1.846, "step": 23548 }, { "epoch": 0.31, "grad_norm": 3.9688239097595215, "learning_rate": 1.9887284975764545e-05, "loss": 1.5428, "step": 23549 }, { "epoch": 0.31, "grad_norm": 4.522562026977539, "learning_rate": 1.988726924276158e-05, "loss": 2.4737, "step": 23550 }, { "epoch": 0.31, "grad_norm": 4.0226640701293945, "learning_rate": 1.9887253508666893e-05, "loss": 2.1997, "step": 23551 }, { "epoch": 0.31, "grad_norm": 4.438762664794922, "learning_rate": 1.9887237773480485e-05, "loss": 2.0315, "step": 23552 }, { "epoch": 0.31, "grad_norm": 3.712653875350952, "learning_rate": 1.988722203720236e-05, "loss": 2.1268, "step": 23553 }, { "epoch": 0.31, "grad_norm": 3.9302544593811035, "learning_rate": 1.9887206299832514e-05, "loss": 2.0173, "step": 23554 }, { "epoch": 0.31, "grad_norm": 3.732574462890625, "learning_rate": 1.9887190561370958e-05, "loss": 1.8037, "step": 23555 }, { "epoch": 0.31, "grad_norm": 4.061233043670654, "learning_rate": 1.988717482181768e-05, "loss": 2.1847, "step": 23556 }, { "epoch": 0.31, "grad_norm": 3.829216241836548, "learning_rate": 1.98871590811727e-05, "loss": 1.922, "step": 23557 }, { "epoch": 0.31, "grad_norm": 4.41619873046875, "learning_rate": 1.9887143339436005e-05, "loss": 2.2718, "step": 23558 }, { "epoch": 0.31, "grad_norm": 4.2102789878845215, "learning_rate": 1.9887127596607604e-05, "loss": 2.1822, "step": 23559 }, { "epoch": 0.31, "grad_norm": 3.58957839012146, "learning_rate": 1.9887111852687493e-05, "loss": 1.6872, "step": 23560 }, { "epoch": 0.31, "grad_norm": 3.978300094604492, "learning_rate": 1.988709610767568e-05, "loss": 1.7611, "step": 23561 }, { "epoch": 0.31, "grad_norm": 3.7339439392089844, "learning_rate": 1.988708036157216e-05, "loss": 1.8518, "step": 23562 }, { "epoch": 0.31, "grad_norm": 4.2158026695251465, "learning_rate": 1.9887064614376944e-05, "loss": 1.8424, "step": 23563 }, { "epoch": 0.31, "grad_norm": 4.034082412719727, "learning_rate": 1.9887048866090026e-05, "loss": 2.2786, "step": 23564 }, { "epoch": 0.31, "grad_norm": 3.6331787109375, "learning_rate": 1.9887033116711407e-05, "loss": 1.9877, "step": 23565 }, { "epoch": 0.31, "grad_norm": 4.3372931480407715, "learning_rate": 1.9887017366241096e-05, "loss": 2.3389, "step": 23566 }, { "epoch": 0.31, "grad_norm": 3.2350103855133057, "learning_rate": 1.9887001614679088e-05, "loss": 1.6328, "step": 23567 }, { "epoch": 0.31, "grad_norm": 4.033432483673096, "learning_rate": 1.988698586202539e-05, "loss": 1.6723, "step": 23568 }, { "epoch": 0.31, "grad_norm": 3.9650425910949707, "learning_rate": 1.988697010828e-05, "loss": 1.9592, "step": 23569 }, { "epoch": 0.31, "grad_norm": 3.565850257873535, "learning_rate": 1.9886954353442917e-05, "loss": 1.7395, "step": 23570 }, { "epoch": 0.31, "grad_norm": 3.7297894954681396, "learning_rate": 1.9886938597514153e-05, "loss": 1.7863, "step": 23571 }, { "epoch": 0.31, "grad_norm": 3.7174453735351562, "learning_rate": 1.9886922840493695e-05, "loss": 1.8989, "step": 23572 }, { "epoch": 0.31, "grad_norm": 3.8244004249572754, "learning_rate": 1.988690708238156e-05, "loss": 1.9472, "step": 23573 }, { "epoch": 0.31, "grad_norm": 4.0063090324401855, "learning_rate": 1.9886891323177743e-05, "loss": 2.0867, "step": 23574 }, { "epoch": 0.31, "grad_norm": 4.292581081390381, "learning_rate": 1.988687556288224e-05, "loss": 2.2098, "step": 23575 }, { "epoch": 0.31, "grad_norm": 3.9675652980804443, "learning_rate": 1.988685980149506e-05, "loss": 2.0175, "step": 23576 }, { "epoch": 0.31, "grad_norm": 4.223226547241211, "learning_rate": 1.9886844039016206e-05, "loss": 2.2406, "step": 23577 }, { "epoch": 0.31, "grad_norm": 3.6087958812713623, "learning_rate": 1.9886828275445676e-05, "loss": 1.8563, "step": 23578 }, { "epoch": 0.31, "grad_norm": 4.856653213500977, "learning_rate": 1.988681251078347e-05, "loss": 2.6759, "step": 23579 }, { "epoch": 0.31, "grad_norm": 4.667789936065674, "learning_rate": 1.9886796745029594e-05, "loss": 2.1026, "step": 23580 }, { "epoch": 0.31, "grad_norm": 4.23948860168457, "learning_rate": 1.9886780978184045e-05, "loss": 2.1438, "step": 23581 }, { "epoch": 0.31, "grad_norm": 3.431382417678833, "learning_rate": 1.988676521024683e-05, "loss": 1.9178, "step": 23582 }, { "epoch": 0.31, "grad_norm": 4.7215447425842285, "learning_rate": 1.988674944121795e-05, "loss": 2.0213, "step": 23583 }, { "epoch": 0.31, "grad_norm": 3.66013503074646, "learning_rate": 1.98867336710974e-05, "loss": 2.1723, "step": 23584 }, { "epoch": 0.31, "grad_norm": 3.79103684425354, "learning_rate": 1.988671789988519e-05, "loss": 2.2192, "step": 23585 }, { "epoch": 0.31, "grad_norm": 4.562812805175781, "learning_rate": 1.988670212758132e-05, "loss": 2.1777, "step": 23586 }, { "epoch": 0.31, "grad_norm": 4.637188911437988, "learning_rate": 1.9886686354185792e-05, "loss": 2.0302, "step": 23587 }, { "epoch": 0.31, "grad_norm": 4.089221954345703, "learning_rate": 1.98866705796986e-05, "loss": 1.8709, "step": 23588 }, { "epoch": 0.31, "grad_norm": 3.4840810298919678, "learning_rate": 1.988665480411976e-05, "loss": 1.6804, "step": 23589 }, { "epoch": 0.31, "grad_norm": 5.009462356567383, "learning_rate": 1.988663902744926e-05, "loss": 2.4568, "step": 23590 }, { "epoch": 0.31, "grad_norm": 3.800640106201172, "learning_rate": 1.988662324968711e-05, "loss": 1.5469, "step": 23591 }, { "epoch": 0.31, "grad_norm": 4.3211212158203125, "learning_rate": 1.9886607470833302e-05, "loss": 2.1081, "step": 23592 }, { "epoch": 0.31, "grad_norm": 4.498840808868408, "learning_rate": 1.988659169088785e-05, "loss": 2.0297, "step": 23593 }, { "epoch": 0.31, "grad_norm": 3.743886947631836, "learning_rate": 1.9886575909850752e-05, "loss": 1.8705, "step": 23594 }, { "epoch": 0.31, "grad_norm": 4.07084321975708, "learning_rate": 1.9886560127722005e-05, "loss": 1.9487, "step": 23595 }, { "epoch": 0.31, "grad_norm": 3.5973262786865234, "learning_rate": 1.9886544344501615e-05, "loss": 1.7173, "step": 23596 }, { "epoch": 0.31, "grad_norm": 4.275271415710449, "learning_rate": 1.9886528560189587e-05, "loss": 2.2022, "step": 23597 }, { "epoch": 0.31, "grad_norm": 4.225884914398193, "learning_rate": 1.9886512774785912e-05, "loss": 2.2834, "step": 23598 }, { "epoch": 0.31, "grad_norm": 3.619438409805298, "learning_rate": 1.9886496988290605e-05, "loss": 1.8368, "step": 23599 }, { "epoch": 0.31, "grad_norm": 3.7337074279785156, "learning_rate": 1.988648120070366e-05, "loss": 2.0885, "step": 23600 }, { "epoch": 0.31, "grad_norm": 4.095818996429443, "learning_rate": 1.9886465412025073e-05, "loss": 1.8383, "step": 23601 }, { "epoch": 0.31, "grad_norm": 3.9148004055023193, "learning_rate": 1.988644962225486e-05, "loss": 1.9596, "step": 23602 }, { "epoch": 0.31, "grad_norm": 3.6104846000671387, "learning_rate": 1.988643383139301e-05, "loss": 2.0978, "step": 23603 }, { "epoch": 0.31, "grad_norm": 3.826575756072998, "learning_rate": 1.9886418039439534e-05, "loss": 1.95, "step": 23604 }, { "epoch": 0.31, "grad_norm": 4.281100749969482, "learning_rate": 1.9886402246394426e-05, "loss": 2.6114, "step": 23605 }, { "epoch": 0.31, "grad_norm": 3.5243825912475586, "learning_rate": 1.9886386452257692e-05, "loss": 2.082, "step": 23606 }, { "epoch": 0.31, "grad_norm": 4.118173122406006, "learning_rate": 1.9886370657029333e-05, "loss": 2.3164, "step": 23607 }, { "epoch": 0.31, "grad_norm": 4.120785236358643, "learning_rate": 1.9886354860709355e-05, "loss": 2.085, "step": 23608 }, { "epoch": 0.31, "grad_norm": 3.948641061782837, "learning_rate": 1.9886339063297752e-05, "loss": 1.989, "step": 23609 }, { "epoch": 0.31, "grad_norm": 4.144916534423828, "learning_rate": 1.988632326479453e-05, "loss": 2.1867, "step": 23610 }, { "epoch": 0.31, "grad_norm": 3.9883785247802734, "learning_rate": 1.988630746519969e-05, "loss": 2.4341, "step": 23611 }, { "epoch": 0.31, "grad_norm": 4.085121154785156, "learning_rate": 1.9886291664513236e-05, "loss": 1.7695, "step": 23612 }, { "epoch": 0.31, "grad_norm": 3.432833671569824, "learning_rate": 1.9886275862735164e-05, "loss": 1.9065, "step": 23613 }, { "epoch": 0.31, "grad_norm": 3.8537373542785645, "learning_rate": 1.9886260059865484e-05, "loss": 1.947, "step": 23614 }, { "epoch": 0.31, "grad_norm": 4.002563953399658, "learning_rate": 1.9886244255904188e-05, "loss": 2.3086, "step": 23615 }, { "epoch": 0.31, "grad_norm": 3.665092706680298, "learning_rate": 1.9886228450851287e-05, "loss": 1.7978, "step": 23616 }, { "epoch": 0.31, "grad_norm": 3.822126626968384, "learning_rate": 1.9886212644706777e-05, "loss": 2.1924, "step": 23617 }, { "epoch": 0.31, "grad_norm": 3.9302821159362793, "learning_rate": 1.9886196837470662e-05, "loss": 2.1479, "step": 23618 }, { "epoch": 0.31, "grad_norm": 4.133181095123291, "learning_rate": 1.9886181029142945e-05, "loss": 2.1777, "step": 23619 }, { "epoch": 0.31, "grad_norm": 4.6743693351745605, "learning_rate": 1.9886165219723623e-05, "loss": 2.4212, "step": 23620 }, { "epoch": 0.31, "grad_norm": 3.7056829929351807, "learning_rate": 1.98861494092127e-05, "loss": 1.5869, "step": 23621 }, { "epoch": 0.31, "grad_norm": 4.064515113830566, "learning_rate": 1.988613359761018e-05, "loss": 1.91, "step": 23622 }, { "epoch": 0.31, "grad_norm": 4.533421516418457, "learning_rate": 1.9886117784916063e-05, "loss": 2.4296, "step": 23623 }, { "epoch": 0.31, "grad_norm": 4.452511787414551, "learning_rate": 1.988610197113035e-05, "loss": 2.463, "step": 23624 }, { "epoch": 0.31, "grad_norm": 4.689346790313721, "learning_rate": 1.9886086156253043e-05, "loss": 2.544, "step": 23625 }, { "epoch": 0.31, "grad_norm": 4.663408279418945, "learning_rate": 1.988607034028415e-05, "loss": 2.3808, "step": 23626 }, { "epoch": 0.31, "grad_norm": 3.9898908138275146, "learning_rate": 1.9886054523223663e-05, "loss": 1.7993, "step": 23627 }, { "epoch": 0.31, "grad_norm": 4.797102928161621, "learning_rate": 1.9886038705071584e-05, "loss": 2.4833, "step": 23628 }, { "epoch": 0.31, "grad_norm": 4.173235893249512, "learning_rate": 1.9886022885827923e-05, "loss": 2.103, "step": 23629 }, { "epoch": 0.31, "grad_norm": 3.4884543418884277, "learning_rate": 1.9886007065492677e-05, "loss": 1.9466, "step": 23630 }, { "epoch": 0.31, "grad_norm": 4.470766544342041, "learning_rate": 1.988599124406585e-05, "loss": 2.6305, "step": 23631 }, { "epoch": 0.31, "grad_norm": 4.175132751464844, "learning_rate": 1.9885975421547438e-05, "loss": 2.3805, "step": 23632 }, { "epoch": 0.31, "grad_norm": 4.4716410636901855, "learning_rate": 1.9885959597937448e-05, "loss": 2.1069, "step": 23633 }, { "epoch": 0.31, "grad_norm": 4.140956878662109, "learning_rate": 1.988594377323588e-05, "loss": 2.1006, "step": 23634 }, { "epoch": 0.31, "grad_norm": 3.8542640209198, "learning_rate": 1.988592794744274e-05, "loss": 2.1056, "step": 23635 }, { "epoch": 0.31, "grad_norm": 4.105301856994629, "learning_rate": 1.9885912120558018e-05, "loss": 2.4822, "step": 23636 }, { "epoch": 0.31, "grad_norm": 4.2702250480651855, "learning_rate": 1.988589629258173e-05, "loss": 2.1286, "step": 23637 }, { "epoch": 0.31, "grad_norm": 4.088560581207275, "learning_rate": 1.9885880463513868e-05, "loss": 1.653, "step": 23638 }, { "epoch": 0.31, "grad_norm": 3.8116238117218018, "learning_rate": 1.9885864633354443e-05, "loss": 2.2276, "step": 23639 }, { "epoch": 0.31, "grad_norm": 4.042373180389404, "learning_rate": 1.9885848802103443e-05, "loss": 2.0988, "step": 23640 }, { "epoch": 0.31, "grad_norm": 4.071450233459473, "learning_rate": 1.9885832969760882e-05, "loss": 1.6142, "step": 23641 }, { "epoch": 0.31, "grad_norm": 3.7613677978515625, "learning_rate": 1.9885817136326756e-05, "loss": 1.6145, "step": 23642 }, { "epoch": 0.31, "grad_norm": 3.944676160812378, "learning_rate": 1.9885801301801066e-05, "loss": 1.9274, "step": 23643 }, { "epoch": 0.31, "grad_norm": 4.090122699737549, "learning_rate": 1.988578546618382e-05, "loss": 1.917, "step": 23644 }, { "epoch": 0.31, "grad_norm": 3.778116464614868, "learning_rate": 1.9885769629475012e-05, "loss": 1.7509, "step": 23645 }, { "epoch": 0.31, "grad_norm": 4.094799518585205, "learning_rate": 1.988575379167465e-05, "loss": 1.8679, "step": 23646 }, { "epoch": 0.31, "grad_norm": 4.203605651855469, "learning_rate": 1.9885737952782733e-05, "loss": 1.8867, "step": 23647 }, { "epoch": 0.31, "grad_norm": 4.243361473083496, "learning_rate": 1.988572211279926e-05, "loss": 2.2693, "step": 23648 }, { "epoch": 0.31, "grad_norm": 3.709137439727783, "learning_rate": 1.988570627172424e-05, "loss": 2.0841, "step": 23649 }, { "epoch": 0.31, "grad_norm": 3.888629674911499, "learning_rate": 1.9885690429557666e-05, "loss": 1.9405, "step": 23650 }, { "epoch": 0.31, "grad_norm": 3.848445177078247, "learning_rate": 1.9885674586299547e-05, "loss": 2.0778, "step": 23651 }, { "epoch": 0.31, "grad_norm": 5.524487018585205, "learning_rate": 1.988565874194988e-05, "loss": 2.5895, "step": 23652 }, { "epoch": 0.31, "grad_norm": 4.43388557434082, "learning_rate": 1.988564289650867e-05, "loss": 2.5004, "step": 23653 }, { "epoch": 0.31, "grad_norm": 3.942373752593994, "learning_rate": 1.9885627049975915e-05, "loss": 1.8031, "step": 23654 }, { "epoch": 0.31, "grad_norm": 4.61729097366333, "learning_rate": 1.9885611202351622e-05, "loss": 2.4156, "step": 23655 }, { "epoch": 0.31, "grad_norm": 3.694662570953369, "learning_rate": 1.988559535363579e-05, "loss": 1.699, "step": 23656 }, { "epoch": 0.31, "grad_norm": 4.125635147094727, "learning_rate": 1.9885579503828415e-05, "loss": 1.8904, "step": 23657 }, { "epoch": 0.31, "grad_norm": 4.344300746917725, "learning_rate": 1.988556365292951e-05, "loss": 2.3123, "step": 23658 }, { "epoch": 0.31, "grad_norm": 3.9340922832489014, "learning_rate": 1.988554780093907e-05, "loss": 2.1312, "step": 23659 }, { "epoch": 0.31, "grad_norm": 3.4338762760162354, "learning_rate": 1.9885531947857098e-05, "loss": 1.7853, "step": 23660 }, { "epoch": 0.31, "grad_norm": 3.541781425476074, "learning_rate": 1.9885516093683594e-05, "loss": 2.0731, "step": 23661 }, { "epoch": 0.31, "grad_norm": 4.00837516784668, "learning_rate": 1.9885500238418563e-05, "loss": 2.1233, "step": 23662 }, { "epoch": 0.31, "grad_norm": 3.5389866828918457, "learning_rate": 1.9885484382062004e-05, "loss": 2.0577, "step": 23663 }, { "epoch": 0.31, "grad_norm": 3.990471601486206, "learning_rate": 1.988546852461392e-05, "loss": 2.067, "step": 23664 }, { "epoch": 0.31, "grad_norm": 3.712249279022217, "learning_rate": 1.9885452666074312e-05, "loss": 2.1358, "step": 23665 }, { "epoch": 0.31, "grad_norm": 4.0222296714782715, "learning_rate": 1.9885436806443182e-05, "loss": 2.1485, "step": 23666 }, { "epoch": 0.31, "grad_norm": 4.142920017242432, "learning_rate": 1.988542094572053e-05, "loss": 2.3571, "step": 23667 }, { "epoch": 0.31, "grad_norm": 3.4549214839935303, "learning_rate": 1.9885405083906367e-05, "loss": 1.69, "step": 23668 }, { "epoch": 0.31, "grad_norm": 4.323197841644287, "learning_rate": 1.9885389221000682e-05, "loss": 1.9246, "step": 23669 }, { "epoch": 0.31, "grad_norm": 3.784794330596924, "learning_rate": 1.9885373357003483e-05, "loss": 1.7243, "step": 23670 }, { "epoch": 0.31, "grad_norm": 3.94028902053833, "learning_rate": 1.988535749191477e-05, "loss": 2.1358, "step": 23671 }, { "epoch": 0.31, "grad_norm": 4.089061737060547, "learning_rate": 1.9885341625734548e-05, "loss": 2.1538, "step": 23672 }, { "epoch": 0.31, "grad_norm": 4.551540851593018, "learning_rate": 1.9885325758462818e-05, "loss": 2.4897, "step": 23673 }, { "epoch": 0.31, "grad_norm": 4.17362642288208, "learning_rate": 1.988530989009958e-05, "loss": 2.2344, "step": 23674 }, { "epoch": 0.31, "grad_norm": 3.2910356521606445, "learning_rate": 1.9885294020644833e-05, "loss": 1.6641, "step": 23675 }, { "epoch": 0.31, "grad_norm": 3.6012661457061768, "learning_rate": 1.9885278150098583e-05, "loss": 1.7025, "step": 23676 }, { "epoch": 0.31, "grad_norm": 3.5661847591400146, "learning_rate": 1.988526227846083e-05, "loss": 2.0056, "step": 23677 }, { "epoch": 0.31, "grad_norm": 4.217507839202881, "learning_rate": 1.9885246405731577e-05, "loss": 2.1714, "step": 23678 }, { "epoch": 0.31, "grad_norm": 3.971179723739624, "learning_rate": 1.9885230531910825e-05, "loss": 2.045, "step": 23679 }, { "epoch": 0.31, "grad_norm": 4.062404155731201, "learning_rate": 1.988521465699858e-05, "loss": 2.5235, "step": 23680 }, { "epoch": 0.31, "grad_norm": 4.044680595397949, "learning_rate": 1.9885198780994832e-05, "loss": 2.4041, "step": 23681 }, { "epoch": 0.31, "grad_norm": 3.7615127563476562, "learning_rate": 1.9885182903899597e-05, "loss": 2.0157, "step": 23682 }, { "epoch": 0.31, "grad_norm": 3.613933801651001, "learning_rate": 1.9885167025712865e-05, "loss": 1.8816, "step": 23683 }, { "epoch": 0.31, "grad_norm": 4.070341110229492, "learning_rate": 1.9885151146434646e-05, "loss": 1.9065, "step": 23684 }, { "epoch": 0.31, "grad_norm": 3.7654311656951904, "learning_rate": 1.9885135266064937e-05, "loss": 1.8533, "step": 23685 }, { "epoch": 0.31, "grad_norm": 3.9905970096588135, "learning_rate": 1.9885119384603745e-05, "loss": 2.3805, "step": 23686 }, { "epoch": 0.31, "grad_norm": 3.846817970275879, "learning_rate": 1.9885103502051063e-05, "loss": 2.0457, "step": 23687 }, { "epoch": 0.31, "grad_norm": 3.651752471923828, "learning_rate": 1.98850876184069e-05, "loss": 1.818, "step": 23688 }, { "epoch": 0.31, "grad_norm": 4.246973037719727, "learning_rate": 1.9885071733671256e-05, "loss": 2.5216, "step": 23689 }, { "epoch": 0.31, "grad_norm": 3.8584229946136475, "learning_rate": 1.988505584784413e-05, "loss": 2.0169, "step": 23690 }, { "epoch": 0.31, "grad_norm": 4.603558540344238, "learning_rate": 1.988503996092553e-05, "loss": 1.9461, "step": 23691 }, { "epoch": 0.31, "grad_norm": 4.002505779266357, "learning_rate": 1.988502407291545e-05, "loss": 2.1119, "step": 23692 }, { "epoch": 0.31, "grad_norm": 3.9464497566223145, "learning_rate": 1.98850081838139e-05, "loss": 2.0351, "step": 23693 }, { "epoch": 0.31, "grad_norm": 3.9264533519744873, "learning_rate": 1.9884992293620876e-05, "loss": 1.9841, "step": 23694 }, { "epoch": 0.31, "grad_norm": 3.5451090335845947, "learning_rate": 1.988497640233638e-05, "loss": 1.5478, "step": 23695 }, { "epoch": 0.31, "grad_norm": 4.076396942138672, "learning_rate": 1.9884960509960415e-05, "loss": 2.2747, "step": 23696 }, { "epoch": 0.31, "grad_norm": 4.41869592666626, "learning_rate": 1.988494461649298e-05, "loss": 2.1887, "step": 23697 }, { "epoch": 0.31, "grad_norm": 3.554849624633789, "learning_rate": 1.9884928721934082e-05, "loss": 1.6295, "step": 23698 }, { "epoch": 0.31, "grad_norm": 4.505432605743408, "learning_rate": 1.9884912826283723e-05, "loss": 2.3277, "step": 23699 }, { "epoch": 0.31, "grad_norm": 4.016830921173096, "learning_rate": 1.9884896929541897e-05, "loss": 2.002, "step": 23700 }, { "epoch": 0.31, "grad_norm": 4.16250467300415, "learning_rate": 1.9884881031708615e-05, "loss": 2.1588, "step": 23701 }, { "epoch": 0.31, "grad_norm": 4.213192939758301, "learning_rate": 1.988486513278387e-05, "loss": 2.1786, "step": 23702 }, { "epoch": 0.31, "grad_norm": 3.6670968532562256, "learning_rate": 1.9884849232767673e-05, "loss": 2.0974, "step": 23703 }, { "epoch": 0.31, "grad_norm": 4.359889030456543, "learning_rate": 1.988483333166002e-05, "loss": 2.4692, "step": 23704 }, { "epoch": 0.31, "grad_norm": 4.108075141906738, "learning_rate": 1.988481742946091e-05, "loss": 1.8802, "step": 23705 }, { "epoch": 0.31, "grad_norm": 4.3720316886901855, "learning_rate": 1.988480152617035e-05, "loss": 2.465, "step": 23706 }, { "epoch": 0.31, "grad_norm": 4.184793472290039, "learning_rate": 1.9884785621788342e-05, "loss": 2.1191, "step": 23707 }, { "epoch": 0.31, "grad_norm": 3.9384605884552, "learning_rate": 1.9884769716314884e-05, "loss": 1.7859, "step": 23708 }, { "epoch": 0.31, "grad_norm": 3.581591844558716, "learning_rate": 1.988475380974998e-05, "loss": 1.8461, "step": 23709 }, { "epoch": 0.31, "grad_norm": 4.06475305557251, "learning_rate": 1.9884737902093634e-05, "loss": 2.1037, "step": 23710 }, { "epoch": 0.31, "grad_norm": 4.279673099517822, "learning_rate": 1.9884721993345844e-05, "loss": 2.1961, "step": 23711 }, { "epoch": 0.31, "grad_norm": 4.289836406707764, "learning_rate": 1.988470608350661e-05, "loss": 2.2465, "step": 23712 }, { "epoch": 0.31, "grad_norm": 4.303369045257568, "learning_rate": 1.988469017257594e-05, "loss": 2.0162, "step": 23713 }, { "epoch": 0.31, "grad_norm": 4.184370040893555, "learning_rate": 1.988467426055383e-05, "loss": 2.101, "step": 23714 }, { "epoch": 0.31, "grad_norm": 4.56353235244751, "learning_rate": 1.988465834744029e-05, "loss": 2.448, "step": 23715 }, { "epoch": 0.31, "grad_norm": 4.294472694396973, "learning_rate": 1.9884642433235307e-05, "loss": 2.4735, "step": 23716 }, { "epoch": 0.31, "grad_norm": 4.179165363311768, "learning_rate": 1.98846265179389e-05, "loss": 2.1811, "step": 23717 }, { "epoch": 0.31, "grad_norm": 3.5684750080108643, "learning_rate": 1.988461060155106e-05, "loss": 1.6486, "step": 23718 }, { "epoch": 0.31, "grad_norm": 3.3445069789886475, "learning_rate": 1.9884594684071787e-05, "loss": 1.4305, "step": 23719 }, { "epoch": 0.31, "grad_norm": 3.1755876541137695, "learning_rate": 1.9884578765501092e-05, "loss": 1.467, "step": 23720 }, { "epoch": 0.31, "grad_norm": 4.1007080078125, "learning_rate": 1.988456284583897e-05, "loss": 2.139, "step": 23721 }, { "epoch": 0.31, "grad_norm": 4.113475799560547, "learning_rate": 1.9884546925085426e-05, "loss": 2.0087, "step": 23722 }, { "epoch": 0.31, "grad_norm": 4.279797077178955, "learning_rate": 1.988453100324046e-05, "loss": 2.2156, "step": 23723 }, { "epoch": 0.31, "grad_norm": 3.470473527908325, "learning_rate": 1.9884515080304073e-05, "loss": 1.9722, "step": 23724 }, { "epoch": 0.31, "grad_norm": 4.18439245223999, "learning_rate": 1.9884499156276267e-05, "loss": 2.1037, "step": 23725 }, { "epoch": 0.31, "grad_norm": 3.811065673828125, "learning_rate": 1.9884483231157046e-05, "loss": 1.9544, "step": 23726 }, { "epoch": 0.31, "grad_norm": 4.253569602966309, "learning_rate": 1.9884467304946412e-05, "loss": 2.3465, "step": 23727 }, { "epoch": 0.31, "grad_norm": 3.9832394123077393, "learning_rate": 1.988445137764436e-05, "loss": 1.8076, "step": 23728 }, { "epoch": 0.31, "grad_norm": 4.903538227081299, "learning_rate": 1.98844354492509e-05, "loss": 2.3933, "step": 23729 }, { "epoch": 0.31, "grad_norm": 3.827498197555542, "learning_rate": 1.9884419519766034e-05, "loss": 1.814, "step": 23730 }, { "epoch": 0.31, "grad_norm": 4.386720180511475, "learning_rate": 1.9884403589189756e-05, "loss": 2.0696, "step": 23731 }, { "epoch": 0.31, "grad_norm": 3.5168983936309814, "learning_rate": 1.988438765752207e-05, "loss": 1.8046, "step": 23732 }, { "epoch": 0.31, "grad_norm": 4.674340724945068, "learning_rate": 1.9884371724762985e-05, "loss": 2.4842, "step": 23733 }, { "epoch": 0.31, "grad_norm": 3.804023027420044, "learning_rate": 1.9884355790912497e-05, "loss": 2.1163, "step": 23734 }, { "epoch": 0.31, "grad_norm": 3.5387237071990967, "learning_rate": 1.9884339855970607e-05, "loss": 1.723, "step": 23735 }, { "epoch": 0.31, "grad_norm": 4.7794108390808105, "learning_rate": 1.988432391993732e-05, "loss": 2.3544, "step": 23736 }, { "epoch": 0.31, "grad_norm": 3.7553672790527344, "learning_rate": 1.988430798281263e-05, "loss": 2.2746, "step": 23737 }, { "epoch": 0.31, "grad_norm": 4.31144905090332, "learning_rate": 1.9884292044596552e-05, "loss": 2.1212, "step": 23738 }, { "epoch": 0.31, "grad_norm": 3.7740046977996826, "learning_rate": 1.988427610528908e-05, "loss": 1.6271, "step": 23739 }, { "epoch": 0.31, "grad_norm": 4.059699058532715, "learning_rate": 1.988426016489021e-05, "loss": 2.2616, "step": 23740 }, { "epoch": 0.31, "grad_norm": 3.958300828933716, "learning_rate": 1.9884244223399955e-05, "loss": 2.0199, "step": 23741 }, { "epoch": 0.31, "grad_norm": 4.284557819366455, "learning_rate": 1.9884228280818312e-05, "loss": 2.1706, "step": 23742 }, { "epoch": 0.31, "grad_norm": 4.419948577880859, "learning_rate": 1.988421233714528e-05, "loss": 2.3926, "step": 23743 }, { "epoch": 0.31, "grad_norm": 4.0811357498168945, "learning_rate": 1.9884196392380865e-05, "loss": 2.15, "step": 23744 }, { "epoch": 0.31, "grad_norm": 3.5686049461364746, "learning_rate": 1.9884180446525067e-05, "loss": 1.5619, "step": 23745 }, { "epoch": 0.31, "grad_norm": 3.874677896499634, "learning_rate": 1.9884164499577884e-05, "loss": 2.0033, "step": 23746 }, { "epoch": 0.31, "grad_norm": 4.200413227081299, "learning_rate": 1.988414855153933e-05, "loss": 2.0326, "step": 23747 }, { "epoch": 0.31, "grad_norm": 4.472530364990234, "learning_rate": 1.988413260240939e-05, "loss": 2.3224, "step": 23748 }, { "epoch": 0.31, "grad_norm": 4.152092933654785, "learning_rate": 1.9884116652188077e-05, "loss": 2.2743, "step": 23749 }, { "epoch": 0.31, "grad_norm": 4.46774959564209, "learning_rate": 1.988410070087539e-05, "loss": 2.3181, "step": 23750 }, { "epoch": 0.31, "grad_norm": 4.299025535583496, "learning_rate": 1.988408474847133e-05, "loss": 2.4814, "step": 23751 }, { "epoch": 0.31, "grad_norm": 3.437818765640259, "learning_rate": 1.98840687949759e-05, "loss": 1.6526, "step": 23752 }, { "epoch": 0.31, "grad_norm": 3.9585230350494385, "learning_rate": 1.9884052840389102e-05, "loss": 1.9583, "step": 23753 }, { "epoch": 0.31, "grad_norm": 4.005355358123779, "learning_rate": 1.9884036884710934e-05, "loss": 2.1113, "step": 23754 }, { "epoch": 0.31, "grad_norm": 4.425078392028809, "learning_rate": 1.98840209279414e-05, "loss": 2.1336, "step": 23755 }, { "epoch": 0.31, "grad_norm": 4.210372447967529, "learning_rate": 1.9884004970080508e-05, "loss": 2.2089, "step": 23756 }, { "epoch": 0.31, "grad_norm": 4.183443069458008, "learning_rate": 1.9883989011128252e-05, "loss": 2.3634, "step": 23757 }, { "epoch": 0.31, "grad_norm": 3.8483774662017822, "learning_rate": 1.9883973051084636e-05, "loss": 2.1124, "step": 23758 }, { "epoch": 0.31, "grad_norm": 4.407794952392578, "learning_rate": 1.9883957089949658e-05, "loss": 2.2941, "step": 23759 }, { "epoch": 0.31, "grad_norm": 3.8616974353790283, "learning_rate": 1.988394112772333e-05, "loss": 1.984, "step": 23760 }, { "epoch": 0.31, "grad_norm": 4.013991355895996, "learning_rate": 1.988392516440564e-05, "loss": 1.9707, "step": 23761 }, { "epoch": 0.31, "grad_norm": 3.89353346824646, "learning_rate": 1.98839091999966e-05, "loss": 1.8967, "step": 23762 }, { "epoch": 0.31, "grad_norm": 3.6658804416656494, "learning_rate": 1.988389323449621e-05, "loss": 2.097, "step": 23763 }, { "epoch": 0.31, "grad_norm": 4.059556007385254, "learning_rate": 1.988387726790447e-05, "loss": 2.2379, "step": 23764 }, { "epoch": 0.31, "grad_norm": 4.133569240570068, "learning_rate": 1.9883861300221383e-05, "loss": 1.793, "step": 23765 }, { "epoch": 0.31, "grad_norm": 3.213994264602661, "learning_rate": 1.9883845331446947e-05, "loss": 1.4669, "step": 23766 }, { "epoch": 0.31, "grad_norm": 3.9353606700897217, "learning_rate": 1.988382936158117e-05, "loss": 1.9666, "step": 23767 }, { "epoch": 0.31, "grad_norm": 4.116227626800537, "learning_rate": 1.988381339062405e-05, "loss": 2.1427, "step": 23768 }, { "epoch": 0.31, "grad_norm": 3.9526360034942627, "learning_rate": 1.9883797418575588e-05, "loss": 1.8539, "step": 23769 }, { "epoch": 0.31, "grad_norm": 4.109251022338867, "learning_rate": 1.9883781445435788e-05, "loss": 2.4444, "step": 23770 }, { "epoch": 0.31, "grad_norm": 3.625631332397461, "learning_rate": 1.988376547120465e-05, "loss": 1.9127, "step": 23771 }, { "epoch": 0.31, "grad_norm": 4.643106460571289, "learning_rate": 1.988374949588218e-05, "loss": 2.879, "step": 23772 }, { "epoch": 0.31, "grad_norm": 4.542290210723877, "learning_rate": 1.988373351946837e-05, "loss": 1.9781, "step": 23773 }, { "epoch": 0.31, "grad_norm": 3.472371816635132, "learning_rate": 1.9883717541963234e-05, "loss": 1.8205, "step": 23774 }, { "epoch": 0.31, "grad_norm": 3.902252435684204, "learning_rate": 1.9883701563366763e-05, "loss": 1.9481, "step": 23775 }, { "epoch": 0.31, "grad_norm": 3.470911979675293, "learning_rate": 1.9883685583678965e-05, "loss": 1.6859, "step": 23776 }, { "epoch": 0.31, "grad_norm": 4.501235008239746, "learning_rate": 1.9883669602899843e-05, "loss": 1.9081, "step": 23777 }, { "epoch": 0.31, "grad_norm": 4.047355651855469, "learning_rate": 1.9883653621029397e-05, "loss": 2.0553, "step": 23778 }, { "epoch": 0.31, "grad_norm": 4.79103946685791, "learning_rate": 1.9883637638067627e-05, "loss": 2.8158, "step": 23779 }, { "epoch": 0.31, "grad_norm": 4.453634262084961, "learning_rate": 1.988362165401453e-05, "loss": 2.3894, "step": 23780 }, { "epoch": 0.31, "grad_norm": 4.3372907638549805, "learning_rate": 1.9883605668870118e-05, "loss": 2.171, "step": 23781 }, { "epoch": 0.31, "grad_norm": 3.801673412322998, "learning_rate": 1.988358968263439e-05, "loss": 1.7963, "step": 23782 }, { "epoch": 0.31, "grad_norm": 3.7425179481506348, "learning_rate": 1.9883573695307344e-05, "loss": 2.0816, "step": 23783 }, { "epoch": 0.31, "grad_norm": 4.119379043579102, "learning_rate": 1.9883557706888984e-05, "loss": 2.0291, "step": 23784 }, { "epoch": 0.31, "grad_norm": 3.9950389862060547, "learning_rate": 1.9883541717379314e-05, "loss": 2.5284, "step": 23785 }, { "epoch": 0.31, "grad_norm": 4.453464031219482, "learning_rate": 1.988352572677833e-05, "loss": 2.6644, "step": 23786 }, { "epoch": 0.31, "grad_norm": 4.100940227508545, "learning_rate": 1.988350973508604e-05, "loss": 2.2345, "step": 23787 }, { "epoch": 0.31, "grad_norm": 3.4969377517700195, "learning_rate": 1.9883493742302437e-05, "loss": 1.694, "step": 23788 }, { "epoch": 0.31, "grad_norm": 3.89457106590271, "learning_rate": 1.9883477748427532e-05, "loss": 2.3155, "step": 23789 }, { "epoch": 0.31, "grad_norm": 3.912961959838867, "learning_rate": 1.9883461753461326e-05, "loss": 1.8521, "step": 23790 }, { "epoch": 0.31, "grad_norm": 3.6595704555511475, "learning_rate": 1.9883445757403814e-05, "loss": 1.5757, "step": 23791 }, { "epoch": 0.31, "grad_norm": 4.22873067855835, "learning_rate": 1.9883429760255004e-05, "loss": 2.5918, "step": 23792 }, { "epoch": 0.31, "grad_norm": 4.018387794494629, "learning_rate": 1.9883413762014897e-05, "loss": 2.0355, "step": 23793 }, { "epoch": 0.31, "grad_norm": 4.521195888519287, "learning_rate": 1.988339776268349e-05, "loss": 2.3309, "step": 23794 }, { "epoch": 0.31, "grad_norm": 4.096506595611572, "learning_rate": 1.9883381762260794e-05, "loss": 2.0184, "step": 23795 }, { "epoch": 0.31, "grad_norm": 4.302511215209961, "learning_rate": 1.98833657607468e-05, "loss": 2.0577, "step": 23796 }, { "epoch": 0.31, "grad_norm": 3.9034323692321777, "learning_rate": 1.9883349758141516e-05, "loss": 2.2443, "step": 23797 }, { "epoch": 0.31, "grad_norm": 4.343876361846924, "learning_rate": 1.988333375444494e-05, "loss": 1.8746, "step": 23798 }, { "epoch": 0.31, "grad_norm": 4.133007526397705, "learning_rate": 1.9883317749657084e-05, "loss": 2.4001, "step": 23799 }, { "epoch": 0.31, "grad_norm": 4.054854393005371, "learning_rate": 1.9883301743777933e-05, "loss": 1.9759, "step": 23800 }, { "epoch": 0.31, "grad_norm": 3.8308355808258057, "learning_rate": 1.9883285736807502e-05, "loss": 1.9253, "step": 23801 }, { "epoch": 0.31, "grad_norm": 4.361831188201904, "learning_rate": 1.9883269728745787e-05, "loss": 2.4503, "step": 23802 }, { "epoch": 0.31, "grad_norm": 3.8430914878845215, "learning_rate": 1.9883253719592793e-05, "loss": 1.6189, "step": 23803 }, { "epoch": 0.31, "grad_norm": 4.365262031555176, "learning_rate": 1.9883237709348522e-05, "loss": 2.3994, "step": 23804 }, { "epoch": 0.31, "grad_norm": 4.264152526855469, "learning_rate": 1.988322169801297e-05, "loss": 2.0599, "step": 23805 }, { "epoch": 0.31, "grad_norm": 4.404865264892578, "learning_rate": 1.9883205685586143e-05, "loss": 2.623, "step": 23806 }, { "epoch": 0.31, "grad_norm": 3.973437547683716, "learning_rate": 1.9883189672068046e-05, "loss": 2.224, "step": 23807 }, { "epoch": 0.31, "grad_norm": 4.068108558654785, "learning_rate": 1.9883173657458675e-05, "loss": 2.4818, "step": 23808 }, { "epoch": 0.31, "grad_norm": 4.304001808166504, "learning_rate": 1.988315764175803e-05, "loss": 1.8139, "step": 23809 }, { "epoch": 0.31, "grad_norm": 3.49444842338562, "learning_rate": 1.9883141624966125e-05, "loss": 2.0787, "step": 23810 }, { "epoch": 0.31, "grad_norm": 4.393448829650879, "learning_rate": 1.9883125607082948e-05, "loss": 2.4514, "step": 23811 }, { "epoch": 0.31, "grad_norm": 3.5572080612182617, "learning_rate": 1.9883109588108508e-05, "loss": 1.8233, "step": 23812 }, { "epoch": 0.31, "grad_norm": 3.6270291805267334, "learning_rate": 1.9883093568042805e-05, "loss": 1.8411, "step": 23813 }, { "epoch": 0.31, "grad_norm": 3.6489768028259277, "learning_rate": 1.988307754688584e-05, "loss": 2.0622, "step": 23814 }, { "epoch": 0.31, "grad_norm": 4.583521842956543, "learning_rate": 1.9883061524637614e-05, "loss": 2.1035, "step": 23815 }, { "epoch": 0.31, "grad_norm": 3.7181363105773926, "learning_rate": 1.9883045501298133e-05, "loss": 1.938, "step": 23816 }, { "epoch": 0.31, "grad_norm": 4.141073703765869, "learning_rate": 1.9883029476867395e-05, "loss": 2.0489, "step": 23817 }, { "epoch": 0.31, "grad_norm": 3.33170747756958, "learning_rate": 1.9883013451345404e-05, "loss": 1.6723, "step": 23818 }, { "epoch": 0.31, "grad_norm": 4.032530784606934, "learning_rate": 1.988299742473216e-05, "loss": 2.452, "step": 23819 }, { "epoch": 0.31, "grad_norm": 4.123103618621826, "learning_rate": 1.9882981397027663e-05, "loss": 2.2551, "step": 23820 }, { "epoch": 0.31, "grad_norm": 3.5365147590637207, "learning_rate": 1.9882965368231917e-05, "loss": 1.7789, "step": 23821 }, { "epoch": 0.31, "grad_norm": 3.695885181427002, "learning_rate": 1.988294933834493e-05, "loss": 1.7505, "step": 23822 }, { "epoch": 0.31, "grad_norm": 3.8708834648132324, "learning_rate": 1.9882933307366694e-05, "loss": 2.3106, "step": 23823 }, { "epoch": 0.31, "grad_norm": 3.7791943550109863, "learning_rate": 1.9882917275297213e-05, "loss": 1.973, "step": 23824 }, { "epoch": 0.31, "grad_norm": 4.3346028327941895, "learning_rate": 1.9882901242136492e-05, "loss": 1.8584, "step": 23825 }, { "epoch": 0.31, "grad_norm": 4.127446174621582, "learning_rate": 1.9882885207884533e-05, "loss": 2.3033, "step": 23826 }, { "epoch": 0.31, "grad_norm": 3.653507709503174, "learning_rate": 1.9882869172541334e-05, "loss": 1.9834, "step": 23827 }, { "epoch": 0.31, "grad_norm": 3.651156425476074, "learning_rate": 1.9882853136106896e-05, "loss": 1.942, "step": 23828 }, { "epoch": 0.31, "grad_norm": 4.231325626373291, "learning_rate": 1.9882837098581228e-05, "loss": 2.1849, "step": 23829 }, { "epoch": 0.31, "grad_norm": 3.974094867706299, "learning_rate": 1.988282105996432e-05, "loss": 1.9192, "step": 23830 }, { "epoch": 0.31, "grad_norm": 4.219024181365967, "learning_rate": 1.9882805020256186e-05, "loss": 2.1699, "step": 23831 }, { "epoch": 0.31, "grad_norm": 4.301177501678467, "learning_rate": 1.9882788979456824e-05, "loss": 2.7904, "step": 23832 }, { "epoch": 0.31, "grad_norm": 3.9997594356536865, "learning_rate": 1.9882772937566233e-05, "loss": 2.2073, "step": 23833 }, { "epoch": 0.31, "grad_norm": 3.8335230350494385, "learning_rate": 1.9882756894584413e-05, "loss": 1.9519, "step": 23834 }, { "epoch": 0.31, "grad_norm": 3.8667690753936768, "learning_rate": 1.9882740850511374e-05, "loss": 1.5075, "step": 23835 }, { "epoch": 0.31, "grad_norm": 4.1361494064331055, "learning_rate": 1.9882724805347113e-05, "loss": 2.3461, "step": 23836 }, { "epoch": 0.31, "grad_norm": 4.277857780456543, "learning_rate": 1.9882708759091626e-05, "loss": 2.4004, "step": 23837 }, { "epoch": 0.31, "grad_norm": 4.278081893920898, "learning_rate": 1.9882692711744923e-05, "loss": 2.0615, "step": 23838 }, { "epoch": 0.31, "grad_norm": 4.166065692901611, "learning_rate": 1.9882676663307005e-05, "loss": 2.0543, "step": 23839 }, { "epoch": 0.31, "grad_norm": 3.8071999549865723, "learning_rate": 1.9882660613777867e-05, "loss": 1.9889, "step": 23840 }, { "epoch": 0.31, "grad_norm": 3.705338716506958, "learning_rate": 1.988264456315752e-05, "loss": 1.9301, "step": 23841 }, { "epoch": 0.31, "grad_norm": 4.270467281341553, "learning_rate": 1.988262851144596e-05, "loss": 2.3606, "step": 23842 }, { "epoch": 0.31, "grad_norm": 3.196465492248535, "learning_rate": 1.9882612458643192e-05, "loss": 1.3948, "step": 23843 }, { "epoch": 0.31, "grad_norm": 4.499903202056885, "learning_rate": 1.9882596404749216e-05, "loss": 2.4477, "step": 23844 }, { "epoch": 0.31, "grad_norm": 3.4524788856506348, "learning_rate": 1.9882580349764028e-05, "loss": 1.3958, "step": 23845 }, { "epoch": 0.31, "grad_norm": 4.033818244934082, "learning_rate": 1.988256429368764e-05, "loss": 1.9763, "step": 23846 }, { "epoch": 0.31, "grad_norm": 4.4860663414001465, "learning_rate": 1.988254823652005e-05, "loss": 2.1229, "step": 23847 }, { "epoch": 0.31, "grad_norm": 4.01611852645874, "learning_rate": 1.9882532178261257e-05, "loss": 1.9507, "step": 23848 }, { "epoch": 0.31, "grad_norm": 3.6964194774627686, "learning_rate": 1.9882516118911268e-05, "loss": 2.0862, "step": 23849 }, { "epoch": 0.31, "grad_norm": 3.764033794403076, "learning_rate": 1.988250005847008e-05, "loss": 1.8399, "step": 23850 }, { "epoch": 0.31, "grad_norm": 4.049926280975342, "learning_rate": 1.9882483996937692e-05, "loss": 2.1392, "step": 23851 }, { "epoch": 0.31, "grad_norm": 4.07292366027832, "learning_rate": 1.9882467934314117e-05, "loss": 2.2457, "step": 23852 }, { "epoch": 0.31, "grad_norm": 3.265231132507324, "learning_rate": 1.9882451870599343e-05, "loss": 1.5589, "step": 23853 }, { "epoch": 0.31, "grad_norm": 4.414419174194336, "learning_rate": 1.9882435805793383e-05, "loss": 1.9843, "step": 23854 }, { "epoch": 0.31, "grad_norm": 4.072949409484863, "learning_rate": 1.9882419739896232e-05, "loss": 2.1868, "step": 23855 }, { "epoch": 0.31, "grad_norm": 3.5535290241241455, "learning_rate": 1.9882403672907896e-05, "loss": 1.6117, "step": 23856 }, { "epoch": 0.31, "grad_norm": 4.011760711669922, "learning_rate": 1.9882387604828378e-05, "loss": 2.643, "step": 23857 }, { "epoch": 0.31, "grad_norm": 3.6469361782073975, "learning_rate": 1.988237153565767e-05, "loss": 2.0226, "step": 23858 }, { "epoch": 0.31, "grad_norm": 4.1626763343811035, "learning_rate": 1.9882355465395786e-05, "loss": 2.4122, "step": 23859 }, { "epoch": 0.31, "grad_norm": 3.6996326446533203, "learning_rate": 1.988233939404272e-05, "loss": 1.7445, "step": 23860 }, { "epoch": 0.31, "grad_norm": 3.999265432357788, "learning_rate": 1.9882323321598475e-05, "loss": 2.1402, "step": 23861 }, { "epoch": 0.31, "grad_norm": 4.3018927574157715, "learning_rate": 1.9882307248063055e-05, "loss": 2.1538, "step": 23862 }, { "epoch": 0.31, "grad_norm": 3.5047261714935303, "learning_rate": 1.988229117343646e-05, "loss": 1.5817, "step": 23863 }, { "epoch": 0.31, "grad_norm": 3.585232973098755, "learning_rate": 1.9882275097718694e-05, "loss": 1.4614, "step": 23864 }, { "epoch": 0.31, "grad_norm": 3.9830031394958496, "learning_rate": 1.988225902090976e-05, "loss": 1.7917, "step": 23865 }, { "epoch": 0.31, "grad_norm": 3.869124412536621, "learning_rate": 1.988224294300965e-05, "loss": 2.0981, "step": 23866 }, { "epoch": 0.31, "grad_norm": 4.151995658874512, "learning_rate": 1.9882226864018376e-05, "loss": 2.1355, "step": 23867 }, { "epoch": 0.31, "grad_norm": 4.0072245597839355, "learning_rate": 1.9882210783935936e-05, "loss": 1.9873, "step": 23868 }, { "epoch": 0.31, "grad_norm": 4.254624366760254, "learning_rate": 1.9882194702762333e-05, "loss": 2.1039, "step": 23869 }, { "epoch": 0.31, "grad_norm": 3.7671000957489014, "learning_rate": 1.9882178620497567e-05, "loss": 1.7266, "step": 23870 }, { "epoch": 0.31, "grad_norm": 3.668226480484009, "learning_rate": 1.9882162537141644e-05, "loss": 1.9246, "step": 23871 }, { "epoch": 0.31, "grad_norm": 4.102222919464111, "learning_rate": 1.988214645269456e-05, "loss": 2.1272, "step": 23872 }, { "epoch": 0.31, "grad_norm": 4.747189998626709, "learning_rate": 1.988213036715632e-05, "loss": 2.3645, "step": 23873 }, { "epoch": 0.31, "grad_norm": 4.670597553253174, "learning_rate": 1.9882114280526923e-05, "loss": 2.1633, "step": 23874 }, { "epoch": 0.31, "grad_norm": 3.1743974685668945, "learning_rate": 1.9882098192806375e-05, "loss": 1.3754, "step": 23875 }, { "epoch": 0.31, "grad_norm": 4.270395755767822, "learning_rate": 1.9882082103994675e-05, "loss": 2.6284, "step": 23876 }, { "epoch": 0.31, "grad_norm": 3.8388209342956543, "learning_rate": 1.9882066014091824e-05, "loss": 2.0661, "step": 23877 }, { "epoch": 0.31, "grad_norm": 4.244025230407715, "learning_rate": 1.9882049923097832e-05, "loss": 2.3105, "step": 23878 }, { "epoch": 0.31, "grad_norm": 4.2740654945373535, "learning_rate": 1.9882033831012685e-05, "loss": 2.2871, "step": 23879 }, { "epoch": 0.31, "grad_norm": 4.115016460418701, "learning_rate": 1.98820177378364e-05, "loss": 1.791, "step": 23880 }, { "epoch": 0.31, "grad_norm": 4.819066047668457, "learning_rate": 1.9882001643568972e-05, "loss": 2.032, "step": 23881 }, { "epoch": 0.31, "grad_norm": 4.257000923156738, "learning_rate": 1.9881985548210402e-05, "loss": 2.092, "step": 23882 }, { "epoch": 0.31, "grad_norm": 4.434286117553711, "learning_rate": 1.988196945176069e-05, "loss": 2.2088, "step": 23883 }, { "epoch": 0.31, "grad_norm": 4.182329177856445, "learning_rate": 1.9881953354219846e-05, "loss": 1.8491, "step": 23884 }, { "epoch": 0.31, "grad_norm": 4.470537185668945, "learning_rate": 1.9881937255587863e-05, "loss": 2.7361, "step": 23885 }, { "epoch": 0.31, "grad_norm": 4.7852606773376465, "learning_rate": 1.988192115586475e-05, "loss": 2.4701, "step": 23886 }, { "epoch": 0.31, "grad_norm": 4.104122638702393, "learning_rate": 1.9881905055050505e-05, "loss": 2.1905, "step": 23887 }, { "epoch": 0.31, "grad_norm": 4.169769763946533, "learning_rate": 1.988188895314513e-05, "loss": 2.1346, "step": 23888 }, { "epoch": 0.31, "grad_norm": 3.9404537677764893, "learning_rate": 1.9881872850148623e-05, "loss": 1.7862, "step": 23889 }, { "epoch": 0.31, "grad_norm": 4.729125022888184, "learning_rate": 1.9881856746060992e-05, "loss": 2.479, "step": 23890 }, { "epoch": 0.31, "grad_norm": 4.131462574005127, "learning_rate": 1.9881840640882237e-05, "loss": 2.4889, "step": 23891 }, { "epoch": 0.31, "grad_norm": 4.544416904449463, "learning_rate": 1.988182453461236e-05, "loss": 2.491, "step": 23892 }, { "epoch": 0.31, "grad_norm": 3.7898271083831787, "learning_rate": 1.988180842725136e-05, "loss": 2.0792, "step": 23893 }, { "epoch": 0.31, "grad_norm": 3.9990060329437256, "learning_rate": 1.988179231879924e-05, "loss": 2.1004, "step": 23894 }, { "epoch": 0.31, "grad_norm": 3.779864549636841, "learning_rate": 1.9881776209256004e-05, "loss": 1.6427, "step": 23895 }, { "epoch": 0.31, "grad_norm": 3.831164836883545, "learning_rate": 1.988176009862165e-05, "loss": 1.8701, "step": 23896 }, { "epoch": 0.31, "grad_norm": 3.866870880126953, "learning_rate": 1.9881743986896184e-05, "loss": 1.6266, "step": 23897 }, { "epoch": 0.31, "grad_norm": 4.237382411956787, "learning_rate": 1.9881727874079604e-05, "loss": 1.7109, "step": 23898 }, { "epoch": 0.31, "grad_norm": 3.4788272380828857, "learning_rate": 1.9881711760171917e-05, "loss": 1.5271, "step": 23899 }, { "epoch": 0.31, "grad_norm": 4.138326644897461, "learning_rate": 1.9881695645173117e-05, "loss": 2.2526, "step": 23900 }, { "epoch": 0.31, "grad_norm": 3.6334362030029297, "learning_rate": 1.9881679529083215e-05, "loss": 2.1949, "step": 23901 }, { "epoch": 0.31, "grad_norm": 3.950456380844116, "learning_rate": 1.9881663411902207e-05, "loss": 2.2149, "step": 23902 }, { "epoch": 0.31, "grad_norm": 3.444431781768799, "learning_rate": 1.988164729363009e-05, "loss": 1.7417, "step": 23903 }, { "epoch": 0.31, "grad_norm": 4.837049961090088, "learning_rate": 1.988163117426688e-05, "loss": 2.3918, "step": 23904 }, { "epoch": 0.31, "grad_norm": 4.56140661239624, "learning_rate": 1.9881615053812563e-05, "loss": 1.7013, "step": 23905 }, { "epoch": 0.31, "grad_norm": 4.122535228729248, "learning_rate": 1.9881598932267153e-05, "loss": 1.9984, "step": 23906 }, { "epoch": 0.31, "grad_norm": 4.19629430770874, "learning_rate": 1.9881582809630642e-05, "loss": 2.0739, "step": 23907 }, { "epoch": 0.31, "grad_norm": 3.8039932250976562, "learning_rate": 1.9881566685903042e-05, "loss": 1.8414, "step": 23908 }, { "epoch": 0.31, "grad_norm": 3.8355228900909424, "learning_rate": 1.9881550561084348e-05, "loss": 1.75, "step": 23909 }, { "epoch": 0.31, "grad_norm": 3.894838333129883, "learning_rate": 1.9881534435174563e-05, "loss": 2.1549, "step": 23910 }, { "epoch": 0.31, "grad_norm": 4.417268753051758, "learning_rate": 1.9881518308173685e-05, "loss": 2.4137, "step": 23911 }, { "epoch": 0.31, "grad_norm": 3.905365228652954, "learning_rate": 1.9881502180081727e-05, "loss": 2.0575, "step": 23912 }, { "epoch": 0.31, "grad_norm": 4.755784034729004, "learning_rate": 1.988148605089868e-05, "loss": 2.2615, "step": 23913 }, { "epoch": 0.31, "grad_norm": 3.831667900085449, "learning_rate": 1.9881469920624546e-05, "loss": 2.0768, "step": 23914 }, { "epoch": 0.31, "grad_norm": 3.9726173877716064, "learning_rate": 1.9881453789259335e-05, "loss": 2.0166, "step": 23915 }, { "epoch": 0.31, "grad_norm": 4.158377170562744, "learning_rate": 1.9881437656803042e-05, "loss": 2.0851, "step": 23916 }, { "epoch": 0.31, "grad_norm": 3.906479597091675, "learning_rate": 1.9881421523255667e-05, "loss": 1.8297, "step": 23917 }, { "epoch": 0.31, "grad_norm": 3.578678607940674, "learning_rate": 1.988140538861722e-05, "loss": 1.4022, "step": 23918 }, { "epoch": 0.31, "grad_norm": 5.0173773765563965, "learning_rate": 1.9881389252887696e-05, "loss": 2.358, "step": 23919 }, { "epoch": 0.31, "grad_norm": 4.126441478729248, "learning_rate": 1.98813731160671e-05, "loss": 1.7883, "step": 23920 }, { "epoch": 0.31, "grad_norm": 3.734790563583374, "learning_rate": 1.9881356978155435e-05, "loss": 1.4845, "step": 23921 }, { "epoch": 0.31, "grad_norm": 4.15410852432251, "learning_rate": 1.9881340839152696e-05, "loss": 2.0743, "step": 23922 }, { "epoch": 0.31, "grad_norm": 4.143603324890137, "learning_rate": 1.988132469905889e-05, "loss": 1.7291, "step": 23923 }, { "epoch": 0.31, "grad_norm": 4.185454845428467, "learning_rate": 1.9881308557874023e-05, "loss": 2.3695, "step": 23924 }, { "epoch": 0.31, "grad_norm": 4.496804237365723, "learning_rate": 1.988129241559809e-05, "loss": 1.8831, "step": 23925 }, { "epoch": 0.31, "grad_norm": 3.9905707836151123, "learning_rate": 1.988127627223109e-05, "loss": 1.9915, "step": 23926 }, { "epoch": 0.31, "grad_norm": 4.501553535461426, "learning_rate": 1.9881260127773036e-05, "loss": 2.1684, "step": 23927 }, { "epoch": 0.31, "grad_norm": 4.0126471519470215, "learning_rate": 1.9881243982223918e-05, "loss": 2.1937, "step": 23928 }, { "epoch": 0.31, "grad_norm": 3.808065891265869, "learning_rate": 1.9881227835583744e-05, "loss": 1.9256, "step": 23929 }, { "epoch": 0.31, "grad_norm": 4.176733493804932, "learning_rate": 1.9881211687852518e-05, "loss": 1.9266, "step": 23930 }, { "epoch": 0.31, "grad_norm": 3.39324688911438, "learning_rate": 1.9881195539030234e-05, "loss": 1.5967, "step": 23931 }, { "epoch": 0.31, "grad_norm": 4.252347469329834, "learning_rate": 1.98811793891169e-05, "loss": 1.9027, "step": 23932 }, { "epoch": 0.31, "grad_norm": 3.788220167160034, "learning_rate": 1.988116323811252e-05, "loss": 2.0971, "step": 23933 }, { "epoch": 0.31, "grad_norm": 3.798337459564209, "learning_rate": 1.988114708601709e-05, "loss": 2.0397, "step": 23934 }, { "epoch": 0.31, "grad_norm": 3.8503098487854004, "learning_rate": 1.9881130932830612e-05, "loss": 1.8971, "step": 23935 }, { "epoch": 0.31, "grad_norm": 4.06394100189209, "learning_rate": 1.988111477855309e-05, "loss": 2.4465, "step": 23936 }, { "epoch": 0.31, "grad_norm": 4.124107360839844, "learning_rate": 1.9881098623184524e-05, "loss": 1.691, "step": 23937 }, { "epoch": 0.31, "grad_norm": 3.8853349685668945, "learning_rate": 1.9881082466724918e-05, "loss": 2.1809, "step": 23938 }, { "epoch": 0.31, "grad_norm": 3.942110776901245, "learning_rate": 1.9881066309174275e-05, "loss": 1.8037, "step": 23939 }, { "epoch": 0.31, "grad_norm": 3.7252063751220703, "learning_rate": 1.9881050150532593e-05, "loss": 1.8519, "step": 23940 }, { "epoch": 0.31, "grad_norm": 4.020545959472656, "learning_rate": 1.9881033990799875e-05, "loss": 2.0425, "step": 23941 }, { "epoch": 0.31, "grad_norm": 4.193795204162598, "learning_rate": 1.9881017829976124e-05, "loss": 1.9473, "step": 23942 }, { "epoch": 0.31, "grad_norm": 5.135159969329834, "learning_rate": 1.988100166806134e-05, "loss": 2.3217, "step": 23943 }, { "epoch": 0.31, "grad_norm": 4.182311534881592, "learning_rate": 1.9880985505055528e-05, "loss": 2.0253, "step": 23944 }, { "epoch": 0.31, "grad_norm": 4.172926425933838, "learning_rate": 1.9880969340958683e-05, "loss": 2.0703, "step": 23945 }, { "epoch": 0.31, "grad_norm": 4.049033164978027, "learning_rate": 1.9880953175770815e-05, "loss": 1.9569, "step": 23946 }, { "epoch": 0.31, "grad_norm": 4.215927600860596, "learning_rate": 1.988093700949192e-05, "loss": 2.1648, "step": 23947 }, { "epoch": 0.31, "grad_norm": 4.415611267089844, "learning_rate": 1.9880920842122005e-05, "loss": 2.2916, "step": 23948 }, { "epoch": 0.31, "grad_norm": 4.565372467041016, "learning_rate": 1.988090467366107e-05, "loss": 2.2581, "step": 23949 }, { "epoch": 0.31, "grad_norm": 3.7646026611328125, "learning_rate": 1.988088850410911e-05, "loss": 1.7243, "step": 23950 }, { "epoch": 0.31, "grad_norm": 4.4979729652404785, "learning_rate": 1.9880872333466136e-05, "loss": 2.0365, "step": 23951 }, { "epoch": 0.31, "grad_norm": 3.9007818698883057, "learning_rate": 1.9880856161732143e-05, "loss": 2.0584, "step": 23952 }, { "epoch": 0.31, "grad_norm": 3.8926658630371094, "learning_rate": 1.988083998890714e-05, "loss": 2.0901, "step": 23953 }, { "epoch": 0.31, "grad_norm": 3.659865617752075, "learning_rate": 1.9880823814991125e-05, "loss": 1.8482, "step": 23954 }, { "epoch": 0.31, "grad_norm": 4.016839504241943, "learning_rate": 1.9880807639984095e-05, "loss": 1.8869, "step": 23955 }, { "epoch": 0.31, "grad_norm": 4.335190773010254, "learning_rate": 1.9880791463886057e-05, "loss": 2.1428, "step": 23956 }, { "epoch": 0.31, "grad_norm": 3.977417469024658, "learning_rate": 1.9880775286697013e-05, "loss": 2.069, "step": 23957 }, { "epoch": 0.31, "grad_norm": 3.913590669631958, "learning_rate": 1.9880759108416964e-05, "loss": 2.0845, "step": 23958 }, { "epoch": 0.31, "grad_norm": 4.301795482635498, "learning_rate": 1.9880742929045912e-05, "loss": 2.1341, "step": 23959 }, { "epoch": 0.31, "grad_norm": 4.475207805633545, "learning_rate": 1.9880726748583855e-05, "loss": 2.5188, "step": 23960 }, { "epoch": 0.31, "grad_norm": 4.103458881378174, "learning_rate": 1.9880710567030802e-05, "loss": 2.4048, "step": 23961 }, { "epoch": 0.31, "grad_norm": 4.113325119018555, "learning_rate": 1.988069438438675e-05, "loss": 1.9433, "step": 23962 }, { "epoch": 0.31, "grad_norm": 4.596792697906494, "learning_rate": 1.98806782006517e-05, "loss": 2.2003, "step": 23963 }, { "epoch": 0.31, "grad_norm": 3.9060254096984863, "learning_rate": 1.988066201582566e-05, "loss": 2.3122, "step": 23964 }, { "epoch": 0.31, "grad_norm": 4.74259614944458, "learning_rate": 1.9880645829908625e-05, "loss": 2.6803, "step": 23965 }, { "epoch": 0.31, "grad_norm": 3.909984588623047, "learning_rate": 1.98806296429006e-05, "loss": 2.1029, "step": 23966 }, { "epoch": 0.31, "grad_norm": 4.1145453453063965, "learning_rate": 1.988061345480158e-05, "loss": 2.2206, "step": 23967 }, { "epoch": 0.31, "grad_norm": 3.8739213943481445, "learning_rate": 1.9880597265611578e-05, "loss": 2.2514, "step": 23968 }, { "epoch": 0.31, "grad_norm": 4.028406143188477, "learning_rate": 1.988058107533059e-05, "loss": 2.4072, "step": 23969 }, { "epoch": 0.31, "grad_norm": 4.329470157623291, "learning_rate": 1.988056488395862e-05, "loss": 2.2994, "step": 23970 }, { "epoch": 0.31, "grad_norm": 4.024591445922852, "learning_rate": 1.9880548691495664e-05, "loss": 1.912, "step": 23971 }, { "epoch": 0.31, "grad_norm": 3.8304378986358643, "learning_rate": 1.988053249794173e-05, "loss": 2.0286, "step": 23972 }, { "epoch": 0.31, "grad_norm": 4.002945899963379, "learning_rate": 1.9880516303296814e-05, "loss": 2.5299, "step": 23973 }, { "epoch": 0.31, "grad_norm": 4.218808650970459, "learning_rate": 1.9880500107560927e-05, "loss": 1.9296, "step": 23974 }, { "epoch": 0.31, "grad_norm": 4.631807327270508, "learning_rate": 1.988048391073406e-05, "loss": 2.2012, "step": 23975 }, { "epoch": 0.31, "grad_norm": 3.9934911727905273, "learning_rate": 1.9880467712816223e-05, "loss": 1.8452, "step": 23976 }, { "epoch": 0.31, "grad_norm": 4.236912250518799, "learning_rate": 1.9880451513807414e-05, "loss": 2.2635, "step": 23977 }, { "epoch": 0.31, "grad_norm": 3.552429676055908, "learning_rate": 1.9880435313707637e-05, "loss": 2.0373, "step": 23978 }, { "epoch": 0.31, "grad_norm": 3.9156577587127686, "learning_rate": 1.9880419112516888e-05, "loss": 2.1655, "step": 23979 }, { "epoch": 0.31, "grad_norm": 4.260447025299072, "learning_rate": 1.9880402910235177e-05, "loss": 2.6323, "step": 23980 }, { "epoch": 0.31, "grad_norm": 3.9509096145629883, "learning_rate": 1.98803867068625e-05, "loss": 1.8702, "step": 23981 }, { "epoch": 0.31, "grad_norm": 4.167388439178467, "learning_rate": 1.9880370502398865e-05, "loss": 2.1553, "step": 23982 }, { "epoch": 0.31, "grad_norm": 3.8407399654388428, "learning_rate": 1.9880354296844263e-05, "loss": 1.8087, "step": 23983 }, { "epoch": 0.31, "grad_norm": 4.423655986785889, "learning_rate": 1.9880338090198706e-05, "loss": 2.3339, "step": 23984 }, { "epoch": 0.31, "grad_norm": 3.9133217334747314, "learning_rate": 1.988032188246219e-05, "loss": 2.2621, "step": 23985 }, { "epoch": 0.31, "grad_norm": 4.074039459228516, "learning_rate": 1.9880305673634722e-05, "loss": 2.3813, "step": 23986 }, { "epoch": 0.31, "grad_norm": 4.292867660522461, "learning_rate": 1.98802894637163e-05, "loss": 1.8171, "step": 23987 }, { "epoch": 0.31, "grad_norm": 3.316359281539917, "learning_rate": 1.9880273252706926e-05, "loss": 1.6419, "step": 23988 }, { "epoch": 0.31, "grad_norm": 4.497650146484375, "learning_rate": 1.98802570406066e-05, "loss": 2.4819, "step": 23989 }, { "epoch": 0.31, "grad_norm": 3.3845574855804443, "learning_rate": 1.9880240827415328e-05, "loss": 1.6479, "step": 23990 }, { "epoch": 0.31, "grad_norm": 3.708885908126831, "learning_rate": 1.988022461313311e-05, "loss": 1.6853, "step": 23991 }, { "epoch": 0.31, "grad_norm": 4.161561965942383, "learning_rate": 1.988020839775995e-05, "loss": 2.4016, "step": 23992 }, { "epoch": 0.31, "grad_norm": 3.7683229446411133, "learning_rate": 1.988019218129584e-05, "loss": 1.953, "step": 23993 }, { "epoch": 0.31, "grad_norm": 3.2587807178497314, "learning_rate": 1.9880175963740794e-05, "loss": 1.58, "step": 23994 }, { "epoch": 0.31, "grad_norm": 3.7556235790252686, "learning_rate": 1.988015974509481e-05, "loss": 2.0534, "step": 23995 }, { "epoch": 0.31, "grad_norm": 4.094293594360352, "learning_rate": 1.9880143525357888e-05, "loss": 1.8562, "step": 23996 }, { "epoch": 0.31, "grad_norm": 4.226029396057129, "learning_rate": 1.988012730453003e-05, "loss": 1.603, "step": 23997 }, { "epoch": 0.31, "grad_norm": 3.6397595405578613, "learning_rate": 1.9880111082611235e-05, "loss": 1.8742, "step": 23998 }, { "epoch": 0.31, "grad_norm": 4.505083084106445, "learning_rate": 1.9880094859601513e-05, "loss": 2.4199, "step": 23999 }, { "epoch": 0.31, "grad_norm": 3.914801836013794, "learning_rate": 1.9880078635500858e-05, "loss": 2.0528, "step": 24000 }, { "epoch": 0.31, "grad_norm": 4.450676441192627, "learning_rate": 1.9880062410309277e-05, "loss": 2.0455, "step": 24001 }, { "epoch": 0.31, "grad_norm": 4.501296520233154, "learning_rate": 1.988004618402677e-05, "loss": 2.5675, "step": 24002 }, { "epoch": 0.31, "grad_norm": 3.9638795852661133, "learning_rate": 1.9880029956653334e-05, "loss": 2.2083, "step": 24003 }, { "epoch": 0.31, "grad_norm": 3.4961764812469482, "learning_rate": 1.9880013728188978e-05, "loss": 1.7004, "step": 24004 }, { "epoch": 0.31, "grad_norm": 4.526034355163574, "learning_rate": 1.98799974986337e-05, "loss": 2.4545, "step": 24005 }, { "epoch": 0.31, "grad_norm": 3.959341287612915, "learning_rate": 1.9879981267987503e-05, "loss": 2.2803, "step": 24006 }, { "epoch": 0.31, "grad_norm": 4.318887710571289, "learning_rate": 1.987996503625039e-05, "loss": 2.2558, "step": 24007 }, { "epoch": 0.31, "grad_norm": 4.304013729095459, "learning_rate": 1.987994880342236e-05, "loss": 1.9, "step": 24008 }, { "epoch": 0.31, "grad_norm": 4.3840718269348145, "learning_rate": 1.9879932569503415e-05, "loss": 2.3677, "step": 24009 }, { "epoch": 0.31, "grad_norm": 4.223344802856445, "learning_rate": 1.9879916334493558e-05, "loss": 2.0335, "step": 24010 }, { "epoch": 0.31, "grad_norm": 4.262258529663086, "learning_rate": 1.9879900098392793e-05, "loss": 1.9266, "step": 24011 }, { "epoch": 0.31, "grad_norm": 3.761409282684326, "learning_rate": 1.9879883861201118e-05, "loss": 1.6996, "step": 24012 }, { "epoch": 0.31, "grad_norm": 3.9639625549316406, "learning_rate": 1.9879867622918535e-05, "loss": 1.9874, "step": 24013 }, { "epoch": 0.31, "grad_norm": 3.939119577407837, "learning_rate": 1.9879851383545048e-05, "loss": 2.3178, "step": 24014 }, { "epoch": 0.31, "grad_norm": 4.918507099151611, "learning_rate": 1.9879835143080658e-05, "loss": 2.1541, "step": 24015 }, { "epoch": 0.31, "grad_norm": 4.151493072509766, "learning_rate": 1.9879818901525367e-05, "loss": 1.9045, "step": 24016 }, { "epoch": 0.31, "grad_norm": 4.466175079345703, "learning_rate": 1.9879802658879175e-05, "loss": 2.0613, "step": 24017 }, { "epoch": 0.31, "grad_norm": 3.6588504314422607, "learning_rate": 1.9879786415142087e-05, "loss": 1.797, "step": 24018 }, { "epoch": 0.31, "grad_norm": 3.876018762588501, "learning_rate": 1.98797701703141e-05, "loss": 2.2403, "step": 24019 }, { "epoch": 0.31, "grad_norm": 3.02927827835083, "learning_rate": 1.987975392439522e-05, "loss": 1.3209, "step": 24020 }, { "epoch": 0.31, "grad_norm": 3.9759721755981445, "learning_rate": 1.9879737677385448e-05, "loss": 1.8871, "step": 24021 }, { "epoch": 0.31, "grad_norm": 3.539119243621826, "learning_rate": 1.9879721429284788e-05, "loss": 1.6124, "step": 24022 }, { "epoch": 0.31, "grad_norm": 4.181412696838379, "learning_rate": 1.9879705180093234e-05, "loss": 2.0129, "step": 24023 }, { "epoch": 0.31, "grad_norm": 4.388165473937988, "learning_rate": 1.9879688929810796e-05, "loss": 2.4066, "step": 24024 }, { "epoch": 0.31, "grad_norm": 3.7960071563720703, "learning_rate": 1.9879672678437472e-05, "loss": 1.7597, "step": 24025 }, { "epoch": 0.31, "grad_norm": 4.5009355545043945, "learning_rate": 1.9879656425973267e-05, "loss": 2.3221, "step": 24026 }, { "epoch": 0.31, "grad_norm": 3.8579509258270264, "learning_rate": 1.987964017241818e-05, "loss": 1.8353, "step": 24027 }, { "epoch": 0.31, "grad_norm": 3.421480894088745, "learning_rate": 1.9879623917772207e-05, "loss": 1.7707, "step": 24028 }, { "epoch": 0.31, "grad_norm": 3.714557647705078, "learning_rate": 1.9879607662035362e-05, "loss": 1.6305, "step": 24029 }, { "epoch": 0.31, "grad_norm": 3.777744770050049, "learning_rate": 1.987959140520764e-05, "loss": 1.733, "step": 24030 }, { "epoch": 0.31, "grad_norm": 3.8738489151000977, "learning_rate": 1.987957514728904e-05, "loss": 2.3015, "step": 24031 }, { "epoch": 0.31, "grad_norm": 4.143584728240967, "learning_rate": 1.987955888827957e-05, "loss": 2.275, "step": 24032 }, { "epoch": 0.31, "grad_norm": 4.3649396896362305, "learning_rate": 1.987954262817923e-05, "loss": 2.4097, "step": 24033 }, { "epoch": 0.31, "grad_norm": 4.643054485321045, "learning_rate": 1.987952636698802e-05, "loss": 2.5439, "step": 24034 }, { "epoch": 0.31, "grad_norm": 4.373265743255615, "learning_rate": 1.9879510104705943e-05, "loss": 2.2162, "step": 24035 }, { "epoch": 0.31, "grad_norm": 3.49987530708313, "learning_rate": 1.9879493841333e-05, "loss": 1.4565, "step": 24036 }, { "epoch": 0.31, "grad_norm": 3.8680315017700195, "learning_rate": 1.9879477576869192e-05, "loss": 1.9926, "step": 24037 }, { "epoch": 0.31, "grad_norm": 4.098644256591797, "learning_rate": 1.9879461311314524e-05, "loss": 2.2915, "step": 24038 }, { "epoch": 0.31, "grad_norm": 3.7835350036621094, "learning_rate": 1.9879445044668992e-05, "loss": 1.6861, "step": 24039 }, { "epoch": 0.31, "grad_norm": 3.7096755504608154, "learning_rate": 1.9879428776932607e-05, "loss": 1.9277, "step": 24040 }, { "epoch": 0.31, "grad_norm": 4.368227481842041, "learning_rate": 1.9879412508105362e-05, "loss": 2.3483, "step": 24041 }, { "epoch": 0.31, "grad_norm": 3.2078146934509277, "learning_rate": 1.9879396238187264e-05, "loss": 1.6341, "step": 24042 }, { "epoch": 0.31, "grad_norm": 3.9794461727142334, "learning_rate": 1.987937996717831e-05, "loss": 1.8453, "step": 24043 }, { "epoch": 0.31, "grad_norm": 3.9843361377716064, "learning_rate": 1.987936369507851e-05, "loss": 2.1025, "step": 24044 }, { "epoch": 0.31, "grad_norm": 3.8425543308258057, "learning_rate": 1.987934742188786e-05, "loss": 1.9993, "step": 24045 }, { "epoch": 0.31, "grad_norm": 3.8943560123443604, "learning_rate": 1.9879331147606356e-05, "loss": 1.8704, "step": 24046 }, { "epoch": 0.31, "grad_norm": 4.094720840454102, "learning_rate": 1.9879314872234013e-05, "loss": 2.1687, "step": 24047 }, { "epoch": 0.31, "grad_norm": 4.105532646179199, "learning_rate": 1.9879298595770824e-05, "loss": 2.1055, "step": 24048 }, { "epoch": 0.31, "grad_norm": 3.4378368854522705, "learning_rate": 1.987928231821679e-05, "loss": 2.0077, "step": 24049 }, { "epoch": 0.31, "grad_norm": 4.222729682922363, "learning_rate": 1.9879266039571917e-05, "loss": 2.184, "step": 24050 }, { "epoch": 0.31, "grad_norm": 3.959294319152832, "learning_rate": 1.9879249759836206e-05, "loss": 1.7545, "step": 24051 }, { "epoch": 0.31, "grad_norm": 4.346386432647705, "learning_rate": 1.9879233479009662e-05, "loss": 2.4155, "step": 24052 }, { "epoch": 0.31, "grad_norm": 4.12795352935791, "learning_rate": 1.987921719709228e-05, "loss": 1.9187, "step": 24053 }, { "epoch": 0.31, "grad_norm": 4.066689968109131, "learning_rate": 1.9879200914084066e-05, "loss": 2.268, "step": 24054 }, { "epoch": 0.31, "grad_norm": 3.5066611766815186, "learning_rate": 1.9879184629985017e-05, "loss": 1.5545, "step": 24055 }, { "epoch": 0.31, "grad_norm": 3.790342330932617, "learning_rate": 1.9879168344795143e-05, "loss": 2.0811, "step": 24056 }, { "epoch": 0.31, "grad_norm": 4.249728202819824, "learning_rate": 1.987915205851444e-05, "loss": 2.3547, "step": 24057 }, { "epoch": 0.31, "grad_norm": 4.2661943435668945, "learning_rate": 1.9879135771142905e-05, "loss": 2.6333, "step": 24058 }, { "epoch": 0.31, "grad_norm": 4.431358814239502, "learning_rate": 1.9879119482680552e-05, "loss": 2.53, "step": 24059 }, { "epoch": 0.31, "grad_norm": 3.776120185852051, "learning_rate": 1.9879103193127377e-05, "loss": 2.1272, "step": 24060 }, { "epoch": 0.31, "grad_norm": 3.996183395385742, "learning_rate": 1.987908690248338e-05, "loss": 2.0333, "step": 24061 }, { "epoch": 0.31, "grad_norm": 4.131536483764648, "learning_rate": 1.9879070610748562e-05, "loss": 2.3542, "step": 24062 }, { "epoch": 0.31, "grad_norm": 4.690213680267334, "learning_rate": 1.987905431792293e-05, "loss": 1.9361, "step": 24063 }, { "epoch": 0.31, "grad_norm": 3.673445701599121, "learning_rate": 1.987903802400648e-05, "loss": 1.6711, "step": 24064 }, { "epoch": 0.31, "grad_norm": 3.5435309410095215, "learning_rate": 1.987902172899922e-05, "loss": 1.6303, "step": 24065 }, { "epoch": 0.31, "grad_norm": 4.268212795257568, "learning_rate": 1.9879005432901146e-05, "loss": 2.2065, "step": 24066 }, { "epoch": 0.31, "grad_norm": 4.437704563140869, "learning_rate": 1.9878989135712263e-05, "loss": 2.1509, "step": 24067 }, { "epoch": 0.31, "grad_norm": 4.147599697113037, "learning_rate": 1.987897283743257e-05, "loss": 2.2156, "step": 24068 }, { "epoch": 0.31, "grad_norm": 4.170502185821533, "learning_rate": 1.9878956538062074e-05, "loss": 1.8421, "step": 24069 }, { "epoch": 0.31, "grad_norm": 4.278313636779785, "learning_rate": 1.987894023760077e-05, "loss": 2.1041, "step": 24070 }, { "epoch": 0.31, "grad_norm": 4.178470134735107, "learning_rate": 1.9878923936048666e-05, "loss": 1.9831, "step": 24071 }, { "epoch": 0.31, "grad_norm": 4.503110408782959, "learning_rate": 1.9878907633405766e-05, "loss": 2.1317, "step": 24072 }, { "epoch": 0.31, "grad_norm": 3.657270908355713, "learning_rate": 1.987889132967206e-05, "loss": 1.6756, "step": 24073 }, { "epoch": 0.31, "grad_norm": 4.011695384979248, "learning_rate": 1.9878875024847558e-05, "loss": 1.9011, "step": 24074 }, { "epoch": 0.31, "grad_norm": 4.114474296569824, "learning_rate": 1.987885871893226e-05, "loss": 2.2968, "step": 24075 }, { "epoch": 0.31, "grad_norm": 3.731745958328247, "learning_rate": 1.9878842411926168e-05, "loss": 2.055, "step": 24076 }, { "epoch": 0.31, "grad_norm": 3.563425064086914, "learning_rate": 1.9878826103829286e-05, "loss": 1.9946, "step": 24077 }, { "epoch": 0.31, "grad_norm": 4.289847373962402, "learning_rate": 1.9878809794641616e-05, "loss": 2.4909, "step": 24078 }, { "epoch": 0.31, "grad_norm": 3.8543918132781982, "learning_rate": 1.9878793484363154e-05, "loss": 2.516, "step": 24079 }, { "epoch": 0.31, "grad_norm": 3.868015766143799, "learning_rate": 1.9878777172993907e-05, "loss": 2.0169, "step": 24080 }, { "epoch": 0.31, "grad_norm": 3.7166221141815186, "learning_rate": 1.9878760860533877e-05, "loss": 2.186, "step": 24081 }, { "epoch": 0.31, "grad_norm": 4.492910385131836, "learning_rate": 1.9878744546983063e-05, "loss": 1.8556, "step": 24082 }, { "epoch": 0.31, "grad_norm": 4.349643707275391, "learning_rate": 1.987872823234147e-05, "loss": 2.2376, "step": 24083 }, { "epoch": 0.31, "grad_norm": 4.553001880645752, "learning_rate": 1.9878711916609094e-05, "loss": 2.2549, "step": 24084 }, { "epoch": 0.31, "grad_norm": 4.037333011627197, "learning_rate": 1.987869559978594e-05, "loss": 2.0595, "step": 24085 }, { "epoch": 0.31, "grad_norm": 3.6905598640441895, "learning_rate": 1.9878679281872015e-05, "loss": 2.1153, "step": 24086 }, { "epoch": 0.31, "grad_norm": 3.858783006668091, "learning_rate": 1.9878662962867314e-05, "loss": 1.9843, "step": 24087 }, { "epoch": 0.31, "grad_norm": 3.5703248977661133, "learning_rate": 1.9878646642771842e-05, "loss": 2.0246, "step": 24088 }, { "epoch": 0.31, "grad_norm": 3.9598958492279053, "learning_rate": 1.9878630321585598e-05, "loss": 2.0659, "step": 24089 }, { "epoch": 0.31, "grad_norm": 3.665022850036621, "learning_rate": 1.9878613999308585e-05, "loss": 1.745, "step": 24090 }, { "epoch": 0.31, "grad_norm": 4.074376583099365, "learning_rate": 1.9878597675940808e-05, "loss": 2.2892, "step": 24091 }, { "epoch": 0.31, "grad_norm": 3.368469476699829, "learning_rate": 1.9878581351482265e-05, "loss": 1.4698, "step": 24092 }, { "epoch": 0.31, "grad_norm": 4.268126964569092, "learning_rate": 1.9878565025932958e-05, "loss": 2.4246, "step": 24093 }, { "epoch": 0.31, "grad_norm": 4.346993446350098, "learning_rate": 1.9878548699292892e-05, "loss": 2.4196, "step": 24094 }, { "epoch": 0.31, "grad_norm": 4.0244059562683105, "learning_rate": 1.9878532371562065e-05, "loss": 2.1129, "step": 24095 }, { "epoch": 0.31, "grad_norm": 4.484743595123291, "learning_rate": 1.987851604274048e-05, "loss": 2.2268, "step": 24096 }, { "epoch": 0.31, "grad_norm": 4.450926780700684, "learning_rate": 1.9878499712828143e-05, "loss": 2.3467, "step": 24097 }, { "epoch": 0.31, "grad_norm": 3.820525646209717, "learning_rate": 1.9878483381825047e-05, "loss": 1.994, "step": 24098 }, { "epoch": 0.31, "grad_norm": 3.8730545043945312, "learning_rate": 1.9878467049731204e-05, "loss": 2.043, "step": 24099 }, { "epoch": 0.31, "grad_norm": 4.2086334228515625, "learning_rate": 1.9878450716546606e-05, "loss": 1.909, "step": 24100 }, { "epoch": 0.31, "grad_norm": 4.4774556159973145, "learning_rate": 1.987843438227126e-05, "loss": 2.315, "step": 24101 }, { "epoch": 0.31, "grad_norm": 3.7747161388397217, "learning_rate": 1.987841804690517e-05, "loss": 1.8824, "step": 24102 }, { "epoch": 0.31, "grad_norm": 3.6607141494750977, "learning_rate": 1.9878401710448334e-05, "loss": 1.6914, "step": 24103 }, { "epoch": 0.31, "grad_norm": 4.248675346374512, "learning_rate": 1.987838537290075e-05, "loss": 2.1974, "step": 24104 }, { "epoch": 0.31, "grad_norm": 4.448562145233154, "learning_rate": 1.987836903426243e-05, "loss": 2.1843, "step": 24105 }, { "epoch": 0.31, "grad_norm": 4.258464336395264, "learning_rate": 1.987835269453337e-05, "loss": 2.1036, "step": 24106 }, { "epoch": 0.31, "grad_norm": 3.824143409729004, "learning_rate": 1.9878336353713574e-05, "loss": 2.1499, "step": 24107 }, { "epoch": 0.31, "grad_norm": 3.823943853378296, "learning_rate": 1.987832001180304e-05, "loss": 1.8289, "step": 24108 }, { "epoch": 0.31, "grad_norm": 4.138948440551758, "learning_rate": 1.987830366880177e-05, "loss": 2.1995, "step": 24109 }, { "epoch": 0.31, "grad_norm": 3.822605848312378, "learning_rate": 1.987828732470977e-05, "loss": 1.9464, "step": 24110 }, { "epoch": 0.31, "grad_norm": 4.713536739349365, "learning_rate": 1.987827097952704e-05, "loss": 2.5948, "step": 24111 }, { "epoch": 0.31, "grad_norm": 4.46116304397583, "learning_rate": 1.9878254633253578e-05, "loss": 1.9586, "step": 24112 }, { "epoch": 0.31, "grad_norm": 4.919584274291992, "learning_rate": 1.987823828588939e-05, "loss": 1.8827, "step": 24113 }, { "epoch": 0.31, "grad_norm": 3.433358907699585, "learning_rate": 1.987822193743448e-05, "loss": 1.6562, "step": 24114 }, { "epoch": 0.31, "grad_norm": 4.562629699707031, "learning_rate": 1.9878205587888844e-05, "loss": 2.548, "step": 24115 }, { "epoch": 0.31, "grad_norm": 4.11693811416626, "learning_rate": 1.9878189237252487e-05, "loss": 1.8411, "step": 24116 }, { "epoch": 0.31, "grad_norm": 3.833186149597168, "learning_rate": 1.9878172885525413e-05, "loss": 2.1857, "step": 24117 }, { "epoch": 0.31, "grad_norm": 4.446139335632324, "learning_rate": 1.987815653270762e-05, "loss": 2.3091, "step": 24118 }, { "epoch": 0.31, "grad_norm": 4.357680797576904, "learning_rate": 1.987814017879911e-05, "loss": 1.8116, "step": 24119 }, { "epoch": 0.31, "grad_norm": 3.6477527618408203, "learning_rate": 1.9878123823799882e-05, "loss": 1.9631, "step": 24120 }, { "epoch": 0.31, "grad_norm": 4.366621017456055, "learning_rate": 1.9878107467709948e-05, "loss": 2.194, "step": 24121 }, { "epoch": 0.31, "grad_norm": 3.9682374000549316, "learning_rate": 1.98780911105293e-05, "loss": 2.1002, "step": 24122 }, { "epoch": 0.31, "grad_norm": 4.1438517570495605, "learning_rate": 1.987807475225794e-05, "loss": 2.1402, "step": 24123 }, { "epoch": 0.31, "grad_norm": 4.640621662139893, "learning_rate": 1.987805839289588e-05, "loss": 2.6274, "step": 24124 }, { "epoch": 0.31, "grad_norm": 3.3692800998687744, "learning_rate": 1.987804203244311e-05, "loss": 1.8683, "step": 24125 }, { "epoch": 0.31, "grad_norm": 4.233883380889893, "learning_rate": 1.987802567089964e-05, "loss": 2.127, "step": 24126 }, { "epoch": 0.31, "grad_norm": 4.478038787841797, "learning_rate": 1.9878009308265464e-05, "loss": 2.3157, "step": 24127 }, { "epoch": 0.31, "grad_norm": 4.390406608581543, "learning_rate": 1.9877992944540588e-05, "loss": 2.2065, "step": 24128 }, { "epoch": 0.31, "grad_norm": 4.033969879150391, "learning_rate": 1.987797657972502e-05, "loss": 2.2779, "step": 24129 }, { "epoch": 0.31, "grad_norm": 3.8645126819610596, "learning_rate": 1.9877960213818752e-05, "loss": 1.7752, "step": 24130 }, { "epoch": 0.31, "grad_norm": 3.767455577850342, "learning_rate": 1.987794384682179e-05, "loss": 2.1186, "step": 24131 }, { "epoch": 0.31, "grad_norm": 4.522188186645508, "learning_rate": 1.9877927478734133e-05, "loss": 1.9131, "step": 24132 }, { "epoch": 0.31, "grad_norm": 3.608402967453003, "learning_rate": 1.987791110955579e-05, "loss": 1.7429, "step": 24133 }, { "epoch": 0.31, "grad_norm": 3.676734209060669, "learning_rate": 1.9877894739286753e-05, "loss": 2.0792, "step": 24134 }, { "epoch": 0.31, "grad_norm": 3.7689740657806396, "learning_rate": 1.9877878367927034e-05, "loss": 1.9671, "step": 24135 }, { "epoch": 0.31, "grad_norm": 3.8485500812530518, "learning_rate": 1.9877861995476625e-05, "loss": 2.0294, "step": 24136 }, { "epoch": 0.31, "grad_norm": 3.676497459411621, "learning_rate": 1.9877845621935535e-05, "loss": 1.8561, "step": 24137 }, { "epoch": 0.31, "grad_norm": 3.4894590377807617, "learning_rate": 1.9877829247303765e-05, "loss": 1.982, "step": 24138 }, { "epoch": 0.31, "grad_norm": 4.657277584075928, "learning_rate": 1.987781287158131e-05, "loss": 2.0168, "step": 24139 }, { "epoch": 0.31, "grad_norm": 3.7163562774658203, "learning_rate": 1.987779649476818e-05, "loss": 1.9231, "step": 24140 }, { "epoch": 0.31, "grad_norm": 4.014022350311279, "learning_rate": 1.9877780116864373e-05, "loss": 1.7735, "step": 24141 }, { "epoch": 0.31, "grad_norm": 3.2735161781311035, "learning_rate": 1.9877763737869892e-05, "loss": 1.5407, "step": 24142 }, { "epoch": 0.31, "grad_norm": 4.041114330291748, "learning_rate": 1.9877747357784738e-05, "loss": 1.8882, "step": 24143 }, { "epoch": 0.31, "grad_norm": 4.0868024826049805, "learning_rate": 1.9877730976608913e-05, "loss": 2.4377, "step": 24144 }, { "epoch": 0.31, "grad_norm": 5.087981224060059, "learning_rate": 1.9877714594342422e-05, "loss": 2.5295, "step": 24145 }, { "epoch": 0.31, "grad_norm": 4.40866756439209, "learning_rate": 1.987769821098526e-05, "loss": 2.3925, "step": 24146 }, { "epoch": 0.31, "grad_norm": 3.5160043239593506, "learning_rate": 1.9877681826537432e-05, "loss": 1.8793, "step": 24147 }, { "epoch": 0.31, "grad_norm": 3.9794607162475586, "learning_rate": 1.9877665440998944e-05, "loss": 1.8439, "step": 24148 }, { "epoch": 0.31, "grad_norm": 3.640995979309082, "learning_rate": 1.9877649054369793e-05, "loss": 1.9526, "step": 24149 }, { "epoch": 0.31, "grad_norm": 3.6037371158599854, "learning_rate": 1.9877632666649978e-05, "loss": 1.806, "step": 24150 }, { "epoch": 0.31, "grad_norm": 4.371586322784424, "learning_rate": 1.987761627783951e-05, "loss": 1.8231, "step": 24151 }, { "epoch": 0.31, "grad_norm": 3.857757806777954, "learning_rate": 1.9877599887938382e-05, "loss": 2.0796, "step": 24152 }, { "epoch": 0.31, "grad_norm": 3.4126975536346436, "learning_rate": 1.98775834969466e-05, "loss": 1.6827, "step": 24153 }, { "epoch": 0.31, "grad_norm": 3.9679322242736816, "learning_rate": 1.9877567104864167e-05, "loss": 2.0502, "step": 24154 }, { "epoch": 0.31, "grad_norm": 4.1169657707214355, "learning_rate": 1.987755071169108e-05, "loss": 2.395, "step": 24155 }, { "epoch": 0.31, "grad_norm": 3.910447835922241, "learning_rate": 1.9877534317427346e-05, "loss": 2.1193, "step": 24156 }, { "epoch": 0.31, "grad_norm": 4.667511463165283, "learning_rate": 1.9877517922072965e-05, "loss": 2.5604, "step": 24157 }, { "epoch": 0.31, "grad_norm": 4.250264644622803, "learning_rate": 1.987750152562794e-05, "loss": 2.0512, "step": 24158 }, { "epoch": 0.31, "grad_norm": 3.501737594604492, "learning_rate": 1.9877485128092266e-05, "loss": 2.1018, "step": 24159 }, { "epoch": 0.31, "grad_norm": 4.028620719909668, "learning_rate": 1.9877468729465956e-05, "loss": 2.3631, "step": 24160 }, { "epoch": 0.31, "grad_norm": 5.162671089172363, "learning_rate": 1.9877452329749004e-05, "loss": 1.9306, "step": 24161 }, { "epoch": 0.31, "grad_norm": 3.5200114250183105, "learning_rate": 1.9877435928941412e-05, "loss": 1.7455, "step": 24162 }, { "epoch": 0.31, "grad_norm": 4.28717041015625, "learning_rate": 1.9877419527043184e-05, "loss": 2.1574, "step": 24163 }, { "epoch": 0.31, "grad_norm": 3.8366804122924805, "learning_rate": 1.987740312405432e-05, "loss": 1.9241, "step": 24164 }, { "epoch": 0.31, "grad_norm": 4.521238803863525, "learning_rate": 1.9877386719974826e-05, "loss": 2.5695, "step": 24165 }, { "epoch": 0.31, "grad_norm": 3.8252665996551514, "learning_rate": 1.98773703148047e-05, "loss": 1.9477, "step": 24166 }, { "epoch": 0.31, "grad_norm": 3.9404120445251465, "learning_rate": 1.9877353908543943e-05, "loss": 1.8663, "step": 24167 }, { "epoch": 0.31, "grad_norm": 3.774294137954712, "learning_rate": 1.987733750119256e-05, "loss": 2.0153, "step": 24168 }, { "epoch": 0.31, "grad_norm": 4.13795280456543, "learning_rate": 1.9877321092750553e-05, "loss": 2.1925, "step": 24169 }, { "epoch": 0.31, "grad_norm": 4.082700252532959, "learning_rate": 1.9877304683217918e-05, "loss": 2.3464, "step": 24170 }, { "epoch": 0.31, "grad_norm": 3.9394102096557617, "learning_rate": 1.9877288272594665e-05, "loss": 2.0148, "step": 24171 }, { "epoch": 0.31, "grad_norm": 3.9381353855133057, "learning_rate": 1.9877271860880788e-05, "loss": 1.9775, "step": 24172 }, { "epoch": 0.31, "grad_norm": 3.833327293395996, "learning_rate": 1.9877255448076296e-05, "loss": 1.837, "step": 24173 }, { "epoch": 0.31, "grad_norm": 3.6869845390319824, "learning_rate": 1.9877239034181185e-05, "loss": 1.7171, "step": 24174 }, { "epoch": 0.31, "grad_norm": 3.691180467605591, "learning_rate": 1.987722261919546e-05, "loss": 1.8221, "step": 24175 }, { "epoch": 0.31, "grad_norm": 3.6929073333740234, "learning_rate": 1.9877206203119127e-05, "loss": 1.9674, "step": 24176 }, { "epoch": 0.31, "grad_norm": 4.187746524810791, "learning_rate": 1.9877189785952175e-05, "loss": 2.7791, "step": 24177 }, { "epoch": 0.31, "grad_norm": 3.5899646282196045, "learning_rate": 1.9877173367694617e-05, "loss": 1.623, "step": 24178 }, { "epoch": 0.31, "grad_norm": 3.6942434310913086, "learning_rate": 1.9877156948346453e-05, "loss": 2.0529, "step": 24179 }, { "epoch": 0.31, "grad_norm": 3.3944201469421387, "learning_rate": 1.9877140527907683e-05, "loss": 1.9412, "step": 24180 }, { "epoch": 0.31, "grad_norm": 3.928753137588501, "learning_rate": 1.9877124106378308e-05, "loss": 2.1312, "step": 24181 }, { "epoch": 0.31, "grad_norm": 3.2550086975097656, "learning_rate": 1.987710768375833e-05, "loss": 1.6271, "step": 24182 }, { "epoch": 0.31, "grad_norm": 3.893242120742798, "learning_rate": 1.9877091260047753e-05, "loss": 2.1158, "step": 24183 }, { "epoch": 0.31, "grad_norm": 3.609936237335205, "learning_rate": 1.9877074835246577e-05, "loss": 1.9748, "step": 24184 }, { "epoch": 0.31, "grad_norm": 4.010500431060791, "learning_rate": 1.9877058409354806e-05, "loss": 2.2462, "step": 24185 }, { "epoch": 0.31, "grad_norm": 3.7080724239349365, "learning_rate": 1.987704198237244e-05, "loss": 1.7333, "step": 24186 }, { "epoch": 0.31, "grad_norm": 4.3072285652160645, "learning_rate": 1.9877025554299477e-05, "loss": 2.2797, "step": 24187 }, { "epoch": 0.31, "grad_norm": 4.134244918823242, "learning_rate": 1.9877009125135925e-05, "loss": 2.114, "step": 24188 }, { "epoch": 0.31, "grad_norm": 3.930633068084717, "learning_rate": 1.9876992694881785e-05, "loss": 1.9256, "step": 24189 }, { "epoch": 0.31, "grad_norm": 3.9155313968658447, "learning_rate": 1.9876976263537057e-05, "loss": 2.6347, "step": 24190 }, { "epoch": 0.31, "grad_norm": 3.722346544265747, "learning_rate": 1.9876959831101742e-05, "loss": 1.7265, "step": 24191 }, { "epoch": 0.31, "grad_norm": 4.6141676902771, "learning_rate": 1.9876943397575846e-05, "loss": 2.1176, "step": 24192 }, { "epoch": 0.31, "grad_norm": 3.1913294792175293, "learning_rate": 1.9876926962959368e-05, "loss": 1.4243, "step": 24193 }, { "epoch": 0.31, "grad_norm": 4.098053932189941, "learning_rate": 1.9876910527252304e-05, "loss": 2.0564, "step": 24194 }, { "epoch": 0.31, "grad_norm": 3.674766778945923, "learning_rate": 1.9876894090454665e-05, "loss": 1.8664, "step": 24195 }, { "epoch": 0.31, "grad_norm": 4.712647438049316, "learning_rate": 1.987687765256645e-05, "loss": 2.1958, "step": 24196 }, { "epoch": 0.31, "grad_norm": 4.106294631958008, "learning_rate": 1.9876861213587658e-05, "loss": 2.1986, "step": 24197 }, { "epoch": 0.31, "grad_norm": 3.7062838077545166, "learning_rate": 1.9876844773518297e-05, "loss": 1.5472, "step": 24198 }, { "epoch": 0.31, "grad_norm": 4.057616233825684, "learning_rate": 1.9876828332358363e-05, "loss": 1.9122, "step": 24199 }, { "epoch": 0.31, "grad_norm": 3.846583843231201, "learning_rate": 1.9876811890107855e-05, "loss": 2.164, "step": 24200 }, { "epoch": 0.31, "grad_norm": 3.914125919342041, "learning_rate": 1.9876795446766785e-05, "loss": 1.8053, "step": 24201 }, { "epoch": 0.31, "grad_norm": 4.23921012878418, "learning_rate": 1.987677900233515e-05, "loss": 2.0193, "step": 24202 }, { "epoch": 0.31, "grad_norm": 3.9006152153015137, "learning_rate": 1.9876762556812946e-05, "loss": 2.2819, "step": 24203 }, { "epoch": 0.31, "grad_norm": 3.776683807373047, "learning_rate": 1.9876746110200184e-05, "loss": 2.2019, "step": 24204 }, { "epoch": 0.31, "grad_norm": 3.745368480682373, "learning_rate": 1.987672966249686e-05, "loss": 2.0501, "step": 24205 }, { "epoch": 0.31, "grad_norm": 4.823757171630859, "learning_rate": 1.9876713213702978e-05, "loss": 2.4445, "step": 24206 }, { "epoch": 0.31, "grad_norm": 3.8253333568573, "learning_rate": 1.9876696763818538e-05, "loss": 2.0373, "step": 24207 }, { "epoch": 0.31, "grad_norm": 3.3209011554718018, "learning_rate": 1.9876680312843546e-05, "loss": 1.5515, "step": 24208 }, { "epoch": 0.31, "grad_norm": 4.386130332946777, "learning_rate": 1.9876663860777996e-05, "loss": 2.129, "step": 24209 }, { "epoch": 0.31, "grad_norm": 3.268240451812744, "learning_rate": 1.98766474076219e-05, "loss": 1.5191, "step": 24210 }, { "epoch": 0.31, "grad_norm": 3.7797951698303223, "learning_rate": 1.987663095337525e-05, "loss": 1.9909, "step": 24211 }, { "epoch": 0.31, "grad_norm": 4.2112040519714355, "learning_rate": 1.9876614498038057e-05, "loss": 2.2767, "step": 24212 }, { "epoch": 0.31, "grad_norm": 4.647199630737305, "learning_rate": 1.9876598041610314e-05, "loss": 2.4642, "step": 24213 }, { "epoch": 0.31, "grad_norm": 4.015270709991455, "learning_rate": 1.987658158409203e-05, "loss": 2.3536, "step": 24214 }, { "epoch": 0.31, "grad_norm": 4.066537857055664, "learning_rate": 1.98765651254832e-05, "loss": 2.202, "step": 24215 }, { "epoch": 0.31, "grad_norm": 4.156346321105957, "learning_rate": 1.9876548665783834e-05, "loss": 2.2048, "step": 24216 }, { "epoch": 0.31, "grad_norm": 3.986269950866699, "learning_rate": 1.987653220499393e-05, "loss": 2.0386, "step": 24217 }, { "epoch": 0.31, "grad_norm": 4.076180934906006, "learning_rate": 1.9876515743113483e-05, "loss": 2.1916, "step": 24218 }, { "epoch": 0.31, "grad_norm": 4.453785419464111, "learning_rate": 1.9876499280142506e-05, "loss": 2.6959, "step": 24219 }, { "epoch": 0.31, "grad_norm": 3.555129289627075, "learning_rate": 1.9876482816080997e-05, "loss": 1.9295, "step": 24220 }, { "epoch": 0.31, "grad_norm": 3.8128743171691895, "learning_rate": 1.9876466350928954e-05, "loss": 1.9852, "step": 24221 }, { "epoch": 0.31, "grad_norm": 4.612781047821045, "learning_rate": 1.9876449884686383e-05, "loss": 2.3457, "step": 24222 }, { "epoch": 0.31, "grad_norm": 3.4019038677215576, "learning_rate": 1.9876433417353284e-05, "loss": 1.7461, "step": 24223 }, { "epoch": 0.31, "grad_norm": 4.944045543670654, "learning_rate": 1.987641694892966e-05, "loss": 2.1307, "step": 24224 }, { "epoch": 0.31, "grad_norm": 4.262121677398682, "learning_rate": 1.987640047941551e-05, "loss": 2.3381, "step": 24225 }, { "epoch": 0.31, "grad_norm": 3.2075467109680176, "learning_rate": 1.9876384008810838e-05, "loss": 1.7358, "step": 24226 }, { "epoch": 0.31, "grad_norm": 3.7622230052948, "learning_rate": 1.9876367537115645e-05, "loss": 1.9479, "step": 24227 }, { "epoch": 0.31, "grad_norm": 4.00885009765625, "learning_rate": 1.9876351064329937e-05, "loss": 2.3679, "step": 24228 }, { "epoch": 0.31, "grad_norm": 4.120128154754639, "learning_rate": 1.987633459045371e-05, "loss": 2.0869, "step": 24229 }, { "epoch": 0.31, "grad_norm": 3.807202100753784, "learning_rate": 1.9876318115486966e-05, "loss": 2.0384, "step": 24230 }, { "epoch": 0.31, "grad_norm": 3.1486709117889404, "learning_rate": 1.9876301639429713e-05, "loss": 1.524, "step": 24231 }, { "epoch": 0.31, "grad_norm": 3.91794753074646, "learning_rate": 1.987628516228195e-05, "loss": 1.8981, "step": 24232 }, { "epoch": 0.31, "grad_norm": 4.094024658203125, "learning_rate": 1.9876268684043673e-05, "loss": 2.0001, "step": 24233 }, { "epoch": 0.31, "grad_norm": 4.773262977600098, "learning_rate": 1.987625220471489e-05, "loss": 2.3098, "step": 24234 }, { "epoch": 0.31, "grad_norm": 3.715467929840088, "learning_rate": 1.98762357242956e-05, "loss": 1.9451, "step": 24235 }, { "epoch": 0.31, "grad_norm": 3.441221237182617, "learning_rate": 1.987621924278581e-05, "loss": 1.6137, "step": 24236 }, { "epoch": 0.31, "grad_norm": 3.9249300956726074, "learning_rate": 1.9876202760185514e-05, "loss": 1.9159, "step": 24237 }, { "epoch": 0.31, "grad_norm": 3.9046366214752197, "learning_rate": 1.9876186276494718e-05, "loss": 1.7536, "step": 24238 }, { "epoch": 0.31, "grad_norm": 3.940683603286743, "learning_rate": 1.9876169791713425e-05, "loss": 2.0726, "step": 24239 }, { "epoch": 0.31, "grad_norm": 3.7492783069610596, "learning_rate": 1.987615330584163e-05, "loss": 2.1349, "step": 24240 }, { "epoch": 0.31, "grad_norm": 3.9501614570617676, "learning_rate": 1.9876136818879347e-05, "loss": 2.1721, "step": 24241 }, { "epoch": 0.31, "grad_norm": 4.428010940551758, "learning_rate": 1.987612033082657e-05, "loss": 2.26, "step": 24242 }, { "epoch": 0.31, "grad_norm": 4.26790189743042, "learning_rate": 1.98761038416833e-05, "loss": 2.0981, "step": 24243 }, { "epoch": 0.31, "grad_norm": 4.537304878234863, "learning_rate": 1.9876087351449544e-05, "loss": 2.3541, "step": 24244 }, { "epoch": 0.31, "grad_norm": 3.9786715507507324, "learning_rate": 1.9876070860125294e-05, "loss": 2.0807, "step": 24245 }, { "epoch": 0.31, "grad_norm": 3.7435696125030518, "learning_rate": 1.9876054367710564e-05, "loss": 1.7088, "step": 24246 }, { "epoch": 0.31, "grad_norm": 3.6689212322235107, "learning_rate": 1.9876037874205346e-05, "loss": 1.7406, "step": 24247 }, { "epoch": 0.31, "grad_norm": 4.239294052124023, "learning_rate": 1.9876021379609648e-05, "loss": 2.507, "step": 24248 }, { "epoch": 0.31, "grad_norm": 4.401719093322754, "learning_rate": 1.9876004883923467e-05, "loss": 2.9142, "step": 24249 }, { "epoch": 0.31, "grad_norm": 3.6865875720977783, "learning_rate": 1.987598838714681e-05, "loss": 1.8839, "step": 24250 }, { "epoch": 0.31, "grad_norm": 3.9345226287841797, "learning_rate": 1.9875971889279676e-05, "loss": 2.2012, "step": 24251 }, { "epoch": 0.31, "grad_norm": 4.354613780975342, "learning_rate": 1.9875955390322067e-05, "loss": 2.3617, "step": 24252 }, { "epoch": 0.31, "grad_norm": 3.788975477218628, "learning_rate": 1.9875938890273984e-05, "loss": 1.9113, "step": 24253 }, { "epoch": 0.31, "grad_norm": 4.312420845031738, "learning_rate": 1.987592238913543e-05, "loss": 2.1508, "step": 24254 }, { "epoch": 0.31, "grad_norm": 4.8931660652160645, "learning_rate": 1.987590588690641e-05, "loss": 2.5863, "step": 24255 }, { "epoch": 0.31, "grad_norm": 3.9391427040100098, "learning_rate": 1.987588938358692e-05, "loss": 2.122, "step": 24256 }, { "epoch": 0.31, "grad_norm": 3.965064287185669, "learning_rate": 1.9875872879176964e-05, "loss": 1.7113, "step": 24257 }, { "epoch": 0.31, "grad_norm": 3.719987154006958, "learning_rate": 1.9875856373676547e-05, "loss": 1.9476, "step": 24258 }, { "epoch": 0.31, "grad_norm": 3.5910227298736572, "learning_rate": 1.9875839867085663e-05, "loss": 1.683, "step": 24259 }, { "epoch": 0.31, "grad_norm": 3.756861925125122, "learning_rate": 1.987582335940432e-05, "loss": 1.8613, "step": 24260 }, { "epoch": 0.31, "grad_norm": 3.900580406188965, "learning_rate": 1.987580685063252e-05, "loss": 2.0063, "step": 24261 }, { "epoch": 0.31, "grad_norm": 3.9368736743927, "learning_rate": 1.9875790340770265e-05, "loss": 1.7952, "step": 24262 }, { "epoch": 0.31, "grad_norm": 3.8060131072998047, "learning_rate": 1.987577382981755e-05, "loss": 1.9403, "step": 24263 }, { "epoch": 0.31, "grad_norm": 4.537502288818359, "learning_rate": 1.9875757317774388e-05, "loss": 1.9267, "step": 24264 }, { "epoch": 0.31, "grad_norm": 3.495969295501709, "learning_rate": 1.987574080464077e-05, "loss": 1.8748, "step": 24265 }, { "epoch": 0.31, "grad_norm": 3.5699000358581543, "learning_rate": 1.9875724290416704e-05, "loss": 1.6853, "step": 24266 }, { "epoch": 0.31, "grad_norm": 3.5895183086395264, "learning_rate": 1.9875707775102194e-05, "loss": 1.8267, "step": 24267 }, { "epoch": 0.31, "grad_norm": 3.2030835151672363, "learning_rate": 1.9875691258697234e-05, "loss": 1.3272, "step": 24268 }, { "epoch": 0.31, "grad_norm": 3.6214582920074463, "learning_rate": 1.987567474120183e-05, "loss": 1.8585, "step": 24269 }, { "epoch": 0.31, "grad_norm": 4.070328235626221, "learning_rate": 1.9875658222615985e-05, "loss": 1.9707, "step": 24270 }, { "epoch": 0.31, "grad_norm": 4.857154846191406, "learning_rate": 1.9875641702939702e-05, "loss": 2.6438, "step": 24271 }, { "epoch": 0.31, "grad_norm": 3.9254345893859863, "learning_rate": 1.987562518217298e-05, "loss": 1.9491, "step": 24272 }, { "epoch": 0.32, "grad_norm": 4.375768184661865, "learning_rate": 1.987560866031582e-05, "loss": 2.3383, "step": 24273 }, { "epoch": 0.32, "grad_norm": 3.8631043434143066, "learning_rate": 1.9875592137368224e-05, "loss": 2.0654, "step": 24274 }, { "epoch": 0.32, "grad_norm": 3.9081802368164062, "learning_rate": 1.98755756133302e-05, "loss": 2.0528, "step": 24275 }, { "epoch": 0.32, "grad_norm": 4.301490783691406, "learning_rate": 1.987555908820174e-05, "loss": 2.6879, "step": 24276 }, { "epoch": 0.32, "grad_norm": 3.87943172454834, "learning_rate": 1.987554256198285e-05, "loss": 2.1234, "step": 24277 }, { "epoch": 0.32, "grad_norm": 4.258492946624756, "learning_rate": 1.9875526034673535e-05, "loss": 1.9161, "step": 24278 }, { "epoch": 0.32, "grad_norm": 3.4474523067474365, "learning_rate": 1.98755095062738e-05, "loss": 1.7501, "step": 24279 }, { "epoch": 0.32, "grad_norm": 3.6901047229766846, "learning_rate": 1.9875492976783634e-05, "loss": 1.623, "step": 24280 }, { "epoch": 0.32, "grad_norm": 4.118075847625732, "learning_rate": 1.9875476446203046e-05, "loss": 1.7506, "step": 24281 }, { "epoch": 0.32, "grad_norm": 4.018924236297607, "learning_rate": 1.987545991453204e-05, "loss": 2.1556, "step": 24282 }, { "epoch": 0.32, "grad_norm": 3.9216537475585938, "learning_rate": 1.9875443381770615e-05, "loss": 1.9318, "step": 24283 }, { "epoch": 0.32, "grad_norm": 4.33262300491333, "learning_rate": 1.987542684791877e-05, "loss": 2.4038, "step": 24284 }, { "epoch": 0.32, "grad_norm": 4.0105695724487305, "learning_rate": 1.9875410312976516e-05, "loss": 2.1747, "step": 24285 }, { "epoch": 0.32, "grad_norm": 3.7936336994171143, "learning_rate": 1.9875393776943848e-05, "loss": 1.8739, "step": 24286 }, { "epoch": 0.32, "grad_norm": 4.069534778594971, "learning_rate": 1.9875377239820767e-05, "loss": 2.0385, "step": 24287 }, { "epoch": 0.32, "grad_norm": 3.9267079830169678, "learning_rate": 1.987536070160728e-05, "loss": 2.1732, "step": 24288 }, { "epoch": 0.32, "grad_norm": 4.069152355194092, "learning_rate": 1.9875344162303384e-05, "loss": 2.0986, "step": 24289 }, { "epoch": 0.32, "grad_norm": 3.768193483352661, "learning_rate": 1.9875327621909083e-05, "loss": 1.9935, "step": 24290 }, { "epoch": 0.32, "grad_norm": 3.470198631286621, "learning_rate": 1.9875311080424375e-05, "loss": 1.9413, "step": 24291 }, { "epoch": 0.32, "grad_norm": 4.1466779708862305, "learning_rate": 1.987529453784927e-05, "loss": 2.4354, "step": 24292 }, { "epoch": 0.32, "grad_norm": 3.5004501342773438, "learning_rate": 1.9875277994183763e-05, "loss": 1.7701, "step": 24293 }, { "epoch": 0.32, "grad_norm": 3.8965694904327393, "learning_rate": 1.9875261449427854e-05, "loss": 2.4095, "step": 24294 }, { "epoch": 0.32, "grad_norm": 3.8695764541625977, "learning_rate": 1.987524490358155e-05, "loss": 1.9722, "step": 24295 }, { "epoch": 0.32, "grad_norm": 4.927338600158691, "learning_rate": 1.9875228356644854e-05, "loss": 2.4925, "step": 24296 }, { "epoch": 0.32, "grad_norm": 4.632991790771484, "learning_rate": 1.9875211808617766e-05, "loss": 2.2587, "step": 24297 }, { "epoch": 0.32, "grad_norm": 3.321046829223633, "learning_rate": 1.9875195259500285e-05, "loss": 1.7316, "step": 24298 }, { "epoch": 0.32, "grad_norm": 4.421176433563232, "learning_rate": 1.9875178709292416e-05, "loss": 2.0405, "step": 24299 }, { "epoch": 0.32, "grad_norm": 3.833254814147949, "learning_rate": 1.987516215799416e-05, "loss": 1.6596, "step": 24300 }, { "epoch": 0.32, "grad_norm": 4.19625186920166, "learning_rate": 1.9875145605605513e-05, "loss": 2.454, "step": 24301 }, { "epoch": 0.32, "grad_norm": 4.022425174713135, "learning_rate": 1.987512905212649e-05, "loss": 1.6955, "step": 24302 }, { "epoch": 0.32, "grad_norm": 4.271697521209717, "learning_rate": 1.9875112497557082e-05, "loss": 2.078, "step": 24303 }, { "epoch": 0.32, "grad_norm": 3.7489326000213623, "learning_rate": 1.9875095941897295e-05, "loss": 1.9801, "step": 24304 }, { "epoch": 0.32, "grad_norm": 4.090690612792969, "learning_rate": 1.987507938514713e-05, "loss": 1.9701, "step": 24305 }, { "epoch": 0.32, "grad_norm": 3.6853909492492676, "learning_rate": 1.9875062827306588e-05, "loss": 2.0536, "step": 24306 }, { "epoch": 0.32, "grad_norm": 3.967466354370117, "learning_rate": 1.9875046268375672e-05, "loss": 2.4026, "step": 24307 }, { "epoch": 0.32, "grad_norm": 4.322990417480469, "learning_rate": 1.987502970835438e-05, "loss": 2.3944, "step": 24308 }, { "epoch": 0.32, "grad_norm": 4.236984729766846, "learning_rate": 1.987501314724272e-05, "loss": 2.6414, "step": 24309 }, { "epoch": 0.32, "grad_norm": 4.111525535583496, "learning_rate": 1.9874996585040694e-05, "loss": 2.2254, "step": 24310 }, { "epoch": 0.32, "grad_norm": 4.194701671600342, "learning_rate": 1.9874980021748297e-05, "loss": 2.2018, "step": 24311 }, { "epoch": 0.32, "grad_norm": 3.6451261043548584, "learning_rate": 1.9874963457365534e-05, "loss": 1.7372, "step": 24312 }, { "epoch": 0.32, "grad_norm": 4.419431209564209, "learning_rate": 1.987494689189241e-05, "loss": 2.1068, "step": 24313 }, { "epoch": 0.32, "grad_norm": 3.7361505031585693, "learning_rate": 1.9874930325328923e-05, "loss": 1.6653, "step": 24314 }, { "epoch": 0.32, "grad_norm": 4.027487754821777, "learning_rate": 1.987491375767508e-05, "loss": 2.1674, "step": 24315 }, { "epoch": 0.32, "grad_norm": 3.978633165359497, "learning_rate": 1.9874897188930872e-05, "loss": 2.24, "step": 24316 }, { "epoch": 0.32, "grad_norm": 4.07313871383667, "learning_rate": 1.9874880619096314e-05, "loss": 2.0656, "step": 24317 }, { "epoch": 0.32, "grad_norm": 4.036099433898926, "learning_rate": 1.9874864048171397e-05, "loss": 2.151, "step": 24318 }, { "epoch": 0.32, "grad_norm": 3.7004940509796143, "learning_rate": 1.987484747615613e-05, "loss": 1.7648, "step": 24319 }, { "epoch": 0.32, "grad_norm": 3.6245346069335938, "learning_rate": 1.9874830903050513e-05, "loss": 2.1743, "step": 24320 }, { "epoch": 0.32, "grad_norm": 4.2686028480529785, "learning_rate": 1.9874814328854545e-05, "loss": 2.4403, "step": 24321 }, { "epoch": 0.32, "grad_norm": 4.156257629394531, "learning_rate": 1.9874797753568232e-05, "loss": 2.4413, "step": 24322 }, { "epoch": 0.32, "grad_norm": 4.139094829559326, "learning_rate": 1.987478117719157e-05, "loss": 2.2809, "step": 24323 }, { "epoch": 0.32, "grad_norm": 3.313204288482666, "learning_rate": 1.9874764599724568e-05, "loss": 1.6239, "step": 24324 }, { "epoch": 0.32, "grad_norm": 4.373759746551514, "learning_rate": 1.9874748021167224e-05, "loss": 2.3451, "step": 24325 }, { "epoch": 0.32, "grad_norm": 4.515878200531006, "learning_rate": 1.9874731441519542e-05, "loss": 2.6633, "step": 24326 }, { "epoch": 0.32, "grad_norm": 4.264474391937256, "learning_rate": 1.9874714860781518e-05, "loss": 2.1292, "step": 24327 }, { "epoch": 0.32, "grad_norm": 3.5203943252563477, "learning_rate": 1.9874698278953163e-05, "loss": 1.4958, "step": 24328 }, { "epoch": 0.32, "grad_norm": 4.264100551605225, "learning_rate": 1.9874681696034467e-05, "loss": 2.562, "step": 24329 }, { "epoch": 0.32, "grad_norm": 3.320878505706787, "learning_rate": 1.9874665112025442e-05, "loss": 1.6341, "step": 24330 }, { "epoch": 0.32, "grad_norm": 4.0475921630859375, "learning_rate": 1.987464852692609e-05, "loss": 1.8131, "step": 24331 }, { "epoch": 0.32, "grad_norm": 3.7730154991149902, "learning_rate": 1.9874631940736407e-05, "loss": 2.0973, "step": 24332 }, { "epoch": 0.32, "grad_norm": 4.670793533325195, "learning_rate": 1.9874615353456395e-05, "loss": 1.96, "step": 24333 }, { "epoch": 0.32, "grad_norm": 3.9259941577911377, "learning_rate": 1.9874598765086063e-05, "loss": 2.0193, "step": 24334 }, { "epoch": 0.32, "grad_norm": 4.091168403625488, "learning_rate": 1.9874582175625402e-05, "loss": 2.0715, "step": 24335 }, { "epoch": 0.32, "grad_norm": 3.5643794536590576, "learning_rate": 1.987456558507442e-05, "loss": 1.9549, "step": 24336 }, { "epoch": 0.32, "grad_norm": 4.447431564331055, "learning_rate": 1.987454899343312e-05, "loss": 1.9681, "step": 24337 }, { "epoch": 0.32, "grad_norm": 4.8582282066345215, "learning_rate": 1.9874532400701504e-05, "loss": 2.1988, "step": 24338 }, { "epoch": 0.32, "grad_norm": 3.7757277488708496, "learning_rate": 1.987451580687957e-05, "loss": 2.0085, "step": 24339 }, { "epoch": 0.32, "grad_norm": 4.38774299621582, "learning_rate": 1.9874499211967323e-05, "loss": 2.0698, "step": 24340 }, { "epoch": 0.32, "grad_norm": 3.5571799278259277, "learning_rate": 1.9874482615964763e-05, "loss": 1.6994, "step": 24341 }, { "epoch": 0.32, "grad_norm": 4.018024921417236, "learning_rate": 1.9874466018871895e-05, "loss": 1.9373, "step": 24342 }, { "epoch": 0.32, "grad_norm": 3.8563263416290283, "learning_rate": 1.9874449420688717e-05, "loss": 2.0216, "step": 24343 }, { "epoch": 0.32, "grad_norm": 3.7992258071899414, "learning_rate": 1.9874432821415227e-05, "loss": 1.755, "step": 24344 }, { "epoch": 0.32, "grad_norm": 4.386812686920166, "learning_rate": 1.987441622105144e-05, "loss": 2.3419, "step": 24345 }, { "epoch": 0.32, "grad_norm": 3.7238190174102783, "learning_rate": 1.9874399619597345e-05, "loss": 1.7932, "step": 24346 }, { "epoch": 0.32, "grad_norm": 3.9290096759796143, "learning_rate": 1.987438301705295e-05, "loss": 2.3874, "step": 24347 }, { "epoch": 0.32, "grad_norm": 4.790225982666016, "learning_rate": 1.9874366413418256e-05, "loss": 1.972, "step": 24348 }, { "epoch": 0.32, "grad_norm": 3.4749467372894287, "learning_rate": 1.9874349808693262e-05, "loss": 1.5208, "step": 24349 }, { "epoch": 0.32, "grad_norm": 3.6550588607788086, "learning_rate": 1.9874333202877975e-05, "loss": 1.6037, "step": 24350 }, { "epoch": 0.32, "grad_norm": 4.248902320861816, "learning_rate": 1.9874316595972396e-05, "loss": 2.2928, "step": 24351 }, { "epoch": 0.32, "grad_norm": 3.841217041015625, "learning_rate": 1.9874299987976524e-05, "loss": 2.1412, "step": 24352 }, { "epoch": 0.32, "grad_norm": 4.092426776885986, "learning_rate": 1.9874283378890357e-05, "loss": 1.9761, "step": 24353 }, { "epoch": 0.32, "grad_norm": 4.070991516113281, "learning_rate": 1.9874266768713907e-05, "loss": 2.5617, "step": 24354 }, { "epoch": 0.32, "grad_norm": 4.735146522521973, "learning_rate": 1.987425015744717e-05, "loss": 2.3308, "step": 24355 }, { "epoch": 0.32, "grad_norm": 3.8309013843536377, "learning_rate": 1.9874233545090144e-05, "loss": 1.7327, "step": 24356 }, { "epoch": 0.32, "grad_norm": 3.7985057830810547, "learning_rate": 1.987421693164284e-05, "loss": 1.5703, "step": 24357 }, { "epoch": 0.32, "grad_norm": 3.57854962348938, "learning_rate": 1.9874200317105253e-05, "loss": 1.8005, "step": 24358 }, { "epoch": 0.32, "grad_norm": 3.8674604892730713, "learning_rate": 1.9874183701477383e-05, "loss": 1.9177, "step": 24359 }, { "epoch": 0.32, "grad_norm": 3.7956159114837646, "learning_rate": 1.987416708475924e-05, "loss": 2.0136, "step": 24360 }, { "epoch": 0.32, "grad_norm": 4.317359924316406, "learning_rate": 1.9874150466950822e-05, "loss": 2.3752, "step": 24361 }, { "epoch": 0.32, "grad_norm": 4.207850933074951, "learning_rate": 1.9874133848052128e-05, "loss": 2.8303, "step": 24362 }, { "epoch": 0.32, "grad_norm": 3.6229660511016846, "learning_rate": 1.987411722806316e-05, "loss": 1.9378, "step": 24363 }, { "epoch": 0.32, "grad_norm": 3.6854677200317383, "learning_rate": 1.987410060698393e-05, "loss": 1.9853, "step": 24364 }, { "epoch": 0.32, "grad_norm": 4.360490322113037, "learning_rate": 1.9874083984814427e-05, "loss": 2.4234, "step": 24365 }, { "epoch": 0.32, "grad_norm": 4.2866740226745605, "learning_rate": 1.9874067361554655e-05, "loss": 2.0161, "step": 24366 }, { "epoch": 0.32, "grad_norm": 3.3036229610443115, "learning_rate": 1.987405073720462e-05, "loss": 1.7108, "step": 24367 }, { "epoch": 0.32, "grad_norm": 3.831132650375366, "learning_rate": 1.9874034111764323e-05, "loss": 1.869, "step": 24368 }, { "epoch": 0.32, "grad_norm": 3.9773333072662354, "learning_rate": 1.9874017485233764e-05, "loss": 2.162, "step": 24369 }, { "epoch": 0.32, "grad_norm": 4.225798606872559, "learning_rate": 1.9874000857612948e-05, "loss": 2.4739, "step": 24370 }, { "epoch": 0.32, "grad_norm": 3.901350498199463, "learning_rate": 1.987398422890187e-05, "loss": 2.1451, "step": 24371 }, { "epoch": 0.32, "grad_norm": 3.9837262630462646, "learning_rate": 1.987396759910054e-05, "loss": 2.1464, "step": 24372 }, { "epoch": 0.32, "grad_norm": 3.9183521270751953, "learning_rate": 1.9873950968208962e-05, "loss": 2.0712, "step": 24373 }, { "epoch": 0.32, "grad_norm": 3.3556652069091797, "learning_rate": 1.9873934336227126e-05, "loss": 1.7118, "step": 24374 }, { "epoch": 0.32, "grad_norm": 3.6140570640563965, "learning_rate": 1.9873917703155043e-05, "loss": 2.0962, "step": 24375 }, { "epoch": 0.32, "grad_norm": 4.455347061157227, "learning_rate": 1.987390106899271e-05, "loss": 2.397, "step": 24376 }, { "epoch": 0.32, "grad_norm": 4.253969192504883, "learning_rate": 1.987388443374013e-05, "loss": 2.4747, "step": 24377 }, { "epoch": 0.32, "grad_norm": 3.8376519680023193, "learning_rate": 1.9873867797397307e-05, "loss": 2.0893, "step": 24378 }, { "epoch": 0.32, "grad_norm": 4.369193077087402, "learning_rate": 1.9873851159964238e-05, "loss": 2.0121, "step": 24379 }, { "epoch": 0.32, "grad_norm": 4.18202543258667, "learning_rate": 1.9873834521440932e-05, "loss": 2.0395, "step": 24380 }, { "epoch": 0.32, "grad_norm": 4.019674301147461, "learning_rate": 1.987381788182739e-05, "loss": 2.2271, "step": 24381 }, { "epoch": 0.32, "grad_norm": 3.95300555229187, "learning_rate": 1.9873801241123604e-05, "loss": 2.2012, "step": 24382 }, { "epoch": 0.32, "grad_norm": 3.6981608867645264, "learning_rate": 1.987378459932959e-05, "loss": 2.0813, "step": 24383 }, { "epoch": 0.32, "grad_norm": 4.476992130279541, "learning_rate": 1.9873767956445337e-05, "loss": 2.2475, "step": 24384 }, { "epoch": 0.32, "grad_norm": 4.022282600402832, "learning_rate": 1.9873751312470856e-05, "loss": 2.008, "step": 24385 }, { "epoch": 0.32, "grad_norm": 3.991990089416504, "learning_rate": 1.9873734667406145e-05, "loss": 1.9092, "step": 24386 }, { "epoch": 0.32, "grad_norm": 4.144410133361816, "learning_rate": 1.98737180212512e-05, "loss": 2.4628, "step": 24387 }, { "epoch": 0.32, "grad_norm": 4.133439064025879, "learning_rate": 1.9873701374006038e-05, "loss": 2.4443, "step": 24388 }, { "epoch": 0.32, "grad_norm": 4.354748249053955, "learning_rate": 1.9873684725670648e-05, "loss": 2.2048, "step": 24389 }, { "epoch": 0.32, "grad_norm": 3.8243470191955566, "learning_rate": 1.9873668076245035e-05, "loss": 2.1015, "step": 24390 }, { "epoch": 0.32, "grad_norm": 3.7145984172821045, "learning_rate": 1.9873651425729203e-05, "loss": 1.6209, "step": 24391 }, { "epoch": 0.32, "grad_norm": 4.188894271850586, "learning_rate": 1.9873634774123152e-05, "loss": 1.811, "step": 24392 }, { "epoch": 0.32, "grad_norm": 4.853704929351807, "learning_rate": 1.9873618121426884e-05, "loss": 2.5165, "step": 24393 }, { "epoch": 0.32, "grad_norm": 3.746438503265381, "learning_rate": 1.98736014676404e-05, "loss": 1.7555, "step": 24394 }, { "epoch": 0.32, "grad_norm": 3.871724843978882, "learning_rate": 1.9873584812763704e-05, "loss": 2.6067, "step": 24395 }, { "epoch": 0.32, "grad_norm": 4.125870704650879, "learning_rate": 1.9873568156796798e-05, "loss": 2.2303, "step": 24396 }, { "epoch": 0.32, "grad_norm": 3.721306085586548, "learning_rate": 1.987355149973968e-05, "loss": 2.287, "step": 24397 }, { "epoch": 0.32, "grad_norm": 3.8348309993743896, "learning_rate": 1.9873534841592358e-05, "loss": 2.0661, "step": 24398 }, { "epoch": 0.32, "grad_norm": 4.0004563331604, "learning_rate": 1.9873518182354824e-05, "loss": 2.1958, "step": 24399 }, { "epoch": 0.32, "grad_norm": 3.3979835510253906, "learning_rate": 1.987350152202709e-05, "loss": 1.4823, "step": 24400 }, { "epoch": 0.32, "grad_norm": 3.309206008911133, "learning_rate": 1.9873484860609156e-05, "loss": 1.7025, "step": 24401 }, { "epoch": 0.32, "grad_norm": 3.441713809967041, "learning_rate": 1.9873468198101018e-05, "loss": 1.8307, "step": 24402 }, { "epoch": 0.32, "grad_norm": 3.921581268310547, "learning_rate": 1.9873451534502684e-05, "loss": 1.9937, "step": 24403 }, { "epoch": 0.32, "grad_norm": 4.220152378082275, "learning_rate": 1.987343486981415e-05, "loss": 1.9297, "step": 24404 }, { "epoch": 0.32, "grad_norm": 4.281701564788818, "learning_rate": 1.9873418204035425e-05, "loss": 2.1061, "step": 24405 }, { "epoch": 0.32, "grad_norm": 4.943142890930176, "learning_rate": 1.9873401537166507e-05, "loss": 2.5023, "step": 24406 }, { "epoch": 0.32, "grad_norm": 4.060999870300293, "learning_rate": 1.9873384869207397e-05, "loss": 1.8561, "step": 24407 }, { "epoch": 0.32, "grad_norm": 4.112906455993652, "learning_rate": 1.9873368200158098e-05, "loss": 2.474, "step": 24408 }, { "epoch": 0.32, "grad_norm": 3.9471335411071777, "learning_rate": 1.9873351530018613e-05, "loss": 2.16, "step": 24409 }, { "epoch": 0.32, "grad_norm": 4.49043083190918, "learning_rate": 1.987333485878894e-05, "loss": 2.4023, "step": 24410 }, { "epoch": 0.32, "grad_norm": 3.619931697845459, "learning_rate": 1.9873318186469083e-05, "loss": 1.9441, "step": 24411 }, { "epoch": 0.32, "grad_norm": 3.375983238220215, "learning_rate": 1.9873301513059048e-05, "loss": 1.7213, "step": 24412 }, { "epoch": 0.32, "grad_norm": 4.5596923828125, "learning_rate": 1.9873284838558828e-05, "loss": 2.3788, "step": 24413 }, { "epoch": 0.32, "grad_norm": 3.6711530685424805, "learning_rate": 1.9873268162968432e-05, "loss": 1.8542, "step": 24414 }, { "epoch": 0.32, "grad_norm": 4.259732246398926, "learning_rate": 1.987325148628786e-05, "loss": 1.9654, "step": 24415 }, { "epoch": 0.32, "grad_norm": 4.117828845977783, "learning_rate": 1.9873234808517113e-05, "loss": 2.0691, "step": 24416 }, { "epoch": 0.32, "grad_norm": 3.2724907398223877, "learning_rate": 1.9873218129656194e-05, "loss": 1.3327, "step": 24417 }, { "epoch": 0.32, "grad_norm": 4.358875751495361, "learning_rate": 1.9873201449705103e-05, "loss": 1.889, "step": 24418 }, { "epoch": 0.32, "grad_norm": 3.585008382797241, "learning_rate": 1.9873184768663847e-05, "loss": 1.8138, "step": 24419 }, { "epoch": 0.32, "grad_norm": 3.7713465690612793, "learning_rate": 1.9873168086532422e-05, "loss": 2.0071, "step": 24420 }, { "epoch": 0.32, "grad_norm": 4.063074588775635, "learning_rate": 1.9873151403310832e-05, "loss": 1.904, "step": 24421 }, { "epoch": 0.32, "grad_norm": 4.264977931976318, "learning_rate": 1.9873134718999077e-05, "loss": 2.3869, "step": 24422 }, { "epoch": 0.32, "grad_norm": 3.9971742630004883, "learning_rate": 1.987311803359716e-05, "loss": 1.8018, "step": 24423 }, { "epoch": 0.32, "grad_norm": 3.9917690753936768, "learning_rate": 1.9873101347105083e-05, "loss": 2.0728, "step": 24424 }, { "epoch": 0.32, "grad_norm": 4.506555080413818, "learning_rate": 1.987308465952285e-05, "loss": 2.1969, "step": 24425 }, { "epoch": 0.32, "grad_norm": 3.9606900215148926, "learning_rate": 1.987306797085046e-05, "loss": 1.8568, "step": 24426 }, { "epoch": 0.32, "grad_norm": 3.8858754634857178, "learning_rate": 1.9873051281087915e-05, "loss": 1.9993, "step": 24427 }, { "epoch": 0.32, "grad_norm": 3.6655385494232178, "learning_rate": 1.9873034590235218e-05, "loss": 1.8317, "step": 24428 }, { "epoch": 0.32, "grad_norm": 4.0945281982421875, "learning_rate": 1.9873017898292373e-05, "loss": 1.959, "step": 24429 }, { "epoch": 0.32, "grad_norm": 3.67397403717041, "learning_rate": 1.9873001205259377e-05, "loss": 1.7471, "step": 24430 }, { "epoch": 0.32, "grad_norm": 4.168645858764648, "learning_rate": 1.9872984511136235e-05, "loss": 1.8995, "step": 24431 }, { "epoch": 0.32, "grad_norm": 4.401792049407959, "learning_rate": 1.987296781592295e-05, "loss": 1.8606, "step": 24432 }, { "epoch": 0.32, "grad_norm": 3.713209390640259, "learning_rate": 1.9872951119619514e-05, "loss": 1.8478, "step": 24433 }, { "epoch": 0.32, "grad_norm": 4.454636096954346, "learning_rate": 1.9872934422225945e-05, "loss": 2.2844, "step": 24434 }, { "epoch": 0.32, "grad_norm": 3.665933609008789, "learning_rate": 1.9872917723742234e-05, "loss": 2.0296, "step": 24435 }, { "epoch": 0.32, "grad_norm": 3.335425615310669, "learning_rate": 1.9872901024168384e-05, "loss": 1.3459, "step": 24436 }, { "epoch": 0.32, "grad_norm": 4.623098850250244, "learning_rate": 1.98728843235044e-05, "loss": 2.0226, "step": 24437 }, { "epoch": 0.32, "grad_norm": 4.301891803741455, "learning_rate": 1.987286762175028e-05, "loss": 2.5653, "step": 24438 }, { "epoch": 0.32, "grad_norm": 4.353443145751953, "learning_rate": 1.9872850918906028e-05, "loss": 2.5247, "step": 24439 }, { "epoch": 0.32, "grad_norm": 3.864762783050537, "learning_rate": 1.987283421497165e-05, "loss": 1.9521, "step": 24440 }, { "epoch": 0.32, "grad_norm": 3.7295639514923096, "learning_rate": 1.987281750994714e-05, "loss": 2.0566, "step": 24441 }, { "epoch": 0.32, "grad_norm": 3.6448745727539062, "learning_rate": 1.9872800803832502e-05, "loss": 1.5786, "step": 24442 }, { "epoch": 0.32, "grad_norm": 3.5793731212615967, "learning_rate": 1.9872784096627743e-05, "loss": 1.9364, "step": 24443 }, { "epoch": 0.32, "grad_norm": 4.009851455688477, "learning_rate": 1.987276738833286e-05, "loss": 1.7475, "step": 24444 }, { "epoch": 0.32, "grad_norm": 4.705514907836914, "learning_rate": 1.9872750678947855e-05, "loss": 2.3521, "step": 24445 }, { "epoch": 0.32, "grad_norm": 3.8517727851867676, "learning_rate": 1.987273396847273e-05, "loss": 1.9949, "step": 24446 }, { "epoch": 0.32, "grad_norm": 3.2980313301086426, "learning_rate": 1.987271725690749e-05, "loss": 1.6348, "step": 24447 }, { "epoch": 0.32, "grad_norm": 4.4445037841796875, "learning_rate": 1.9872700544252134e-05, "loss": 2.1467, "step": 24448 }, { "epoch": 0.32, "grad_norm": 3.9805381298065186, "learning_rate": 1.9872683830506664e-05, "loss": 1.9687, "step": 24449 }, { "epoch": 0.32, "grad_norm": 4.347168922424316, "learning_rate": 1.987266711567108e-05, "loss": 2.4105, "step": 24450 }, { "epoch": 0.32, "grad_norm": 4.165886878967285, "learning_rate": 1.987265039974539e-05, "loss": 2.1982, "step": 24451 }, { "epoch": 0.32, "grad_norm": 3.8344345092773438, "learning_rate": 1.987263368272959e-05, "loss": 2.0005, "step": 24452 }, { "epoch": 0.32, "grad_norm": 4.080204486846924, "learning_rate": 1.9872616964623686e-05, "loss": 1.8919, "step": 24453 }, { "epoch": 0.32, "grad_norm": 4.498798370361328, "learning_rate": 1.9872600245427675e-05, "loss": 2.9015, "step": 24454 }, { "epoch": 0.32, "grad_norm": 3.666132926940918, "learning_rate": 1.987258352514156e-05, "loss": 1.8106, "step": 24455 }, { "epoch": 0.32, "grad_norm": 4.186601638793945, "learning_rate": 1.9872566803765346e-05, "loss": 1.9203, "step": 24456 }, { "epoch": 0.32, "grad_norm": 3.856851100921631, "learning_rate": 1.9872550081299032e-05, "loss": 2.0749, "step": 24457 }, { "epoch": 0.32, "grad_norm": 4.6185479164123535, "learning_rate": 1.9872533357742624e-05, "loss": 2.5851, "step": 24458 }, { "epoch": 0.32, "grad_norm": 3.850846290588379, "learning_rate": 1.987251663309612e-05, "loss": 2.0679, "step": 24459 }, { "epoch": 0.32, "grad_norm": 3.494950294494629, "learning_rate": 1.987249990735952e-05, "loss": 1.4874, "step": 24460 }, { "epoch": 0.32, "grad_norm": 4.376867771148682, "learning_rate": 1.987248318053283e-05, "loss": 2.577, "step": 24461 }, { "epoch": 0.32, "grad_norm": 3.395705461502075, "learning_rate": 1.987246645261605e-05, "loss": 1.8508, "step": 24462 }, { "epoch": 0.32, "grad_norm": 3.6922903060913086, "learning_rate": 1.9872449723609185e-05, "loss": 1.9268, "step": 24463 }, { "epoch": 0.32, "grad_norm": 4.144404888153076, "learning_rate": 1.9872432993512232e-05, "loss": 2.2038, "step": 24464 }, { "epoch": 0.32, "grad_norm": 4.1837568283081055, "learning_rate": 1.9872416262325192e-05, "loss": 2.3377, "step": 24465 }, { "epoch": 0.32, "grad_norm": 4.160608768463135, "learning_rate": 1.9872399530048075e-05, "loss": 2.2879, "step": 24466 }, { "epoch": 0.32, "grad_norm": 3.6074295043945312, "learning_rate": 1.9872382796680874e-05, "loss": 1.7707, "step": 24467 }, { "epoch": 0.32, "grad_norm": 4.562308311462402, "learning_rate": 1.98723660622236e-05, "loss": 2.621, "step": 24468 }, { "epoch": 0.32, "grad_norm": 4.513528823852539, "learning_rate": 1.987234932667624e-05, "loss": 2.3929, "step": 24469 }, { "epoch": 0.32, "grad_norm": 3.7646162509918213, "learning_rate": 1.987233259003881e-05, "loss": 2.1141, "step": 24470 }, { "epoch": 0.32, "grad_norm": 4.218074321746826, "learning_rate": 1.9872315852311308e-05, "loss": 2.2789, "step": 24471 }, { "epoch": 0.32, "grad_norm": 4.256773948669434, "learning_rate": 1.9872299113493735e-05, "loss": 2.5936, "step": 24472 }, { "epoch": 0.32, "grad_norm": 3.697059154510498, "learning_rate": 1.987228237358609e-05, "loss": 1.6303, "step": 24473 }, { "epoch": 0.32, "grad_norm": 3.741790533065796, "learning_rate": 1.987226563258838e-05, "loss": 2.0314, "step": 24474 }, { "epoch": 0.32, "grad_norm": 3.715885639190674, "learning_rate": 1.9872248890500605e-05, "loss": 2.1033, "step": 24475 }, { "epoch": 0.32, "grad_norm": 3.376039505004883, "learning_rate": 1.9872232147322764e-05, "loss": 1.91, "step": 24476 }, { "epoch": 0.32, "grad_norm": 3.4811463356018066, "learning_rate": 1.987221540305486e-05, "loss": 1.8213, "step": 24477 }, { "epoch": 0.32, "grad_norm": 4.033869743347168, "learning_rate": 1.98721986576969e-05, "loss": 2.051, "step": 24478 }, { "epoch": 0.32, "grad_norm": 3.9148330688476562, "learning_rate": 1.987218191124888e-05, "loss": 2.0851, "step": 24479 }, { "epoch": 0.32, "grad_norm": 3.606698751449585, "learning_rate": 1.9872165163710805e-05, "loss": 1.4323, "step": 24480 }, { "epoch": 0.32, "grad_norm": 3.732088088989258, "learning_rate": 1.9872148415082672e-05, "loss": 2.1002, "step": 24481 }, { "epoch": 0.32, "grad_norm": 4.189031600952148, "learning_rate": 1.987213166536449e-05, "loss": 2.2419, "step": 24482 }, { "epoch": 0.32, "grad_norm": 3.2721338272094727, "learning_rate": 1.9872114914556253e-05, "loss": 1.5154, "step": 24483 }, { "epoch": 0.32, "grad_norm": 3.8601739406585693, "learning_rate": 1.987209816265797e-05, "loss": 2.1397, "step": 24484 }, { "epoch": 0.32, "grad_norm": 3.7056028842926025, "learning_rate": 1.987208140966964e-05, "loss": 1.5973, "step": 24485 }, { "epoch": 0.32, "grad_norm": 4.669124603271484, "learning_rate": 1.987206465559126e-05, "loss": 2.6967, "step": 24486 }, { "epoch": 0.32, "grad_norm": 4.0044355392456055, "learning_rate": 1.9872047900422844e-05, "loss": 2.2165, "step": 24487 }, { "epoch": 0.32, "grad_norm": 4.186075210571289, "learning_rate": 1.9872031144164384e-05, "loss": 1.8532, "step": 24488 }, { "epoch": 0.32, "grad_norm": 5.105030536651611, "learning_rate": 1.987201438681588e-05, "loss": 2.1001, "step": 24489 }, { "epoch": 0.32, "grad_norm": 3.446387767791748, "learning_rate": 1.9871997628377342e-05, "loss": 1.4243, "step": 24490 }, { "epoch": 0.32, "grad_norm": 3.829652786254883, "learning_rate": 1.987198086884877e-05, "loss": 2.1721, "step": 24491 }, { "epoch": 0.32, "grad_norm": 4.009110927581787, "learning_rate": 1.9871964108230157e-05, "loss": 2.2555, "step": 24492 }, { "epoch": 0.32, "grad_norm": 3.389965295791626, "learning_rate": 1.9871947346521518e-05, "loss": 1.5225, "step": 24493 }, { "epoch": 0.32, "grad_norm": 4.319577217102051, "learning_rate": 1.9871930583722846e-05, "loss": 2.2213, "step": 24494 }, { "epoch": 0.32, "grad_norm": 3.995896816253662, "learning_rate": 1.9871913819834144e-05, "loss": 2.2624, "step": 24495 }, { "epoch": 0.32, "grad_norm": 4.392730236053467, "learning_rate": 1.987189705485542e-05, "loss": 2.3228, "step": 24496 }, { "epoch": 0.32, "grad_norm": 3.7004640102386475, "learning_rate": 1.9871880288786668e-05, "loss": 1.4674, "step": 24497 }, { "epoch": 0.32, "grad_norm": 3.600841999053955, "learning_rate": 1.987186352162789e-05, "loss": 2.2053, "step": 24498 }, { "epoch": 0.32, "grad_norm": 4.035797119140625, "learning_rate": 1.9871846753379094e-05, "loss": 2.1902, "step": 24499 }, { "epoch": 0.32, "grad_norm": 3.666517734527588, "learning_rate": 1.9871829984040277e-05, "loss": 1.8813, "step": 24500 }, { "epoch": 0.32, "grad_norm": 4.126009941101074, "learning_rate": 1.9871813213611447e-05, "loss": 2.189, "step": 24501 }, { "epoch": 0.32, "grad_norm": 3.7608022689819336, "learning_rate": 1.9871796442092598e-05, "loss": 2.1348, "step": 24502 }, { "epoch": 0.32, "grad_norm": 4.094299793243408, "learning_rate": 1.9871779669483736e-05, "loss": 2.096, "step": 24503 }, { "epoch": 0.32, "grad_norm": 4.458271026611328, "learning_rate": 1.987176289578486e-05, "loss": 2.1057, "step": 24504 }, { "epoch": 0.32, "grad_norm": 3.182075262069702, "learning_rate": 1.9871746120995977e-05, "loss": 1.4432, "step": 24505 }, { "epoch": 0.32, "grad_norm": 3.557501792907715, "learning_rate": 1.9871729345117084e-05, "loss": 1.9773, "step": 24506 }, { "epoch": 0.32, "grad_norm": 4.108611583709717, "learning_rate": 1.9871712568148184e-05, "loss": 2.1107, "step": 24507 }, { "epoch": 0.32, "grad_norm": 3.412263870239258, "learning_rate": 1.987169579008928e-05, "loss": 1.446, "step": 24508 }, { "epoch": 0.32, "grad_norm": 4.2204742431640625, "learning_rate": 1.9871679010940373e-05, "loss": 2.3063, "step": 24509 }, { "epoch": 0.32, "grad_norm": 4.107176303863525, "learning_rate": 1.987166223070147e-05, "loss": 2.1743, "step": 24510 }, { "epoch": 0.32, "grad_norm": 3.8629300594329834, "learning_rate": 1.987164544937256e-05, "loss": 1.9715, "step": 24511 }, { "epoch": 0.32, "grad_norm": 3.0433058738708496, "learning_rate": 1.9871628666953656e-05, "loss": 1.4525, "step": 24512 }, { "epoch": 0.32, "grad_norm": 4.344361782073975, "learning_rate": 1.9871611883444757e-05, "loss": 2.0406, "step": 24513 }, { "epoch": 0.32, "grad_norm": 3.556593894958496, "learning_rate": 1.9871595098845866e-05, "loss": 1.7589, "step": 24514 }, { "epoch": 0.32, "grad_norm": 3.9843063354492188, "learning_rate": 1.987157831315698e-05, "loss": 2.3243, "step": 24515 }, { "epoch": 0.32, "grad_norm": 3.8498382568359375, "learning_rate": 1.9871561526378106e-05, "loss": 1.7888, "step": 24516 }, { "epoch": 0.32, "grad_norm": 3.8111956119537354, "learning_rate": 1.9871544738509248e-05, "loss": 2.1733, "step": 24517 }, { "epoch": 0.32, "grad_norm": 3.147841215133667, "learning_rate": 1.98715279495504e-05, "loss": 1.8293, "step": 24518 }, { "epoch": 0.32, "grad_norm": 3.248164176940918, "learning_rate": 1.987151115950157e-05, "loss": 1.4938, "step": 24519 }, { "epoch": 0.32, "grad_norm": 4.271063804626465, "learning_rate": 1.9871494368362755e-05, "loss": 1.7128, "step": 24520 }, { "epoch": 0.32, "grad_norm": 3.776174783706665, "learning_rate": 1.987147757613396e-05, "loss": 2.2228, "step": 24521 }, { "epoch": 0.32, "grad_norm": 4.149323463439941, "learning_rate": 1.987146078281519e-05, "loss": 1.9101, "step": 24522 }, { "epoch": 0.32, "grad_norm": 4.115532875061035, "learning_rate": 1.9871443988406438e-05, "loss": 2.4177, "step": 24523 }, { "epoch": 0.32, "grad_norm": 3.5609304904937744, "learning_rate": 1.9871427192907713e-05, "loss": 1.9563, "step": 24524 }, { "epoch": 0.32, "grad_norm": 4.404036045074463, "learning_rate": 1.9871410396319016e-05, "loss": 2.5517, "step": 24525 }, { "epoch": 0.32, "grad_norm": 3.609419345855713, "learning_rate": 1.9871393598640347e-05, "loss": 2.0416, "step": 24526 }, { "epoch": 0.32, "grad_norm": 4.04787015914917, "learning_rate": 1.987137679987171e-05, "loss": 1.7746, "step": 24527 }, { "epoch": 0.32, "grad_norm": 4.241761684417725, "learning_rate": 1.9871360000013103e-05, "loss": 1.7772, "step": 24528 }, { "epoch": 0.32, "grad_norm": 4.090067386627197, "learning_rate": 1.987134319906453e-05, "loss": 1.8673, "step": 24529 }, { "epoch": 0.32, "grad_norm": 4.121644496917725, "learning_rate": 1.9871326397025998e-05, "loss": 1.9473, "step": 24530 }, { "epoch": 0.32, "grad_norm": 4.285637378692627, "learning_rate": 1.9871309593897502e-05, "loss": 2.5852, "step": 24531 }, { "epoch": 0.32, "grad_norm": 4.282782554626465, "learning_rate": 1.9871292789679044e-05, "loss": 2.4686, "step": 24532 }, { "epoch": 0.32, "grad_norm": 4.020381450653076, "learning_rate": 1.9871275984370628e-05, "loss": 2.2732, "step": 24533 }, { "epoch": 0.32, "grad_norm": 3.902674436569214, "learning_rate": 1.9871259177972256e-05, "loss": 2.0392, "step": 24534 }, { "epoch": 0.32, "grad_norm": 4.348830699920654, "learning_rate": 1.9871242370483933e-05, "loss": 2.2009, "step": 24535 }, { "epoch": 0.32, "grad_norm": 3.7235515117645264, "learning_rate": 1.9871225561905654e-05, "loss": 1.7918, "step": 24536 }, { "epoch": 0.32, "grad_norm": 3.846982002258301, "learning_rate": 1.9871208752237427e-05, "loss": 1.9301, "step": 24537 }, { "epoch": 0.32, "grad_norm": 3.8336052894592285, "learning_rate": 1.9871191941479248e-05, "loss": 1.8401, "step": 24538 }, { "epoch": 0.32, "grad_norm": 3.9840593338012695, "learning_rate": 1.987117512963112e-05, "loss": 2.0993, "step": 24539 }, { "epoch": 0.32, "grad_norm": 3.6712615489959717, "learning_rate": 1.9871158316693048e-05, "loss": 1.7577, "step": 24540 }, { "epoch": 0.32, "grad_norm": 5.411723613739014, "learning_rate": 1.9871141502665037e-05, "loss": 2.2137, "step": 24541 }, { "epoch": 0.32, "grad_norm": 3.544443368911743, "learning_rate": 1.987112468754708e-05, "loss": 1.4762, "step": 24542 }, { "epoch": 0.32, "grad_norm": 4.510655403137207, "learning_rate": 1.9871107871339187e-05, "loss": 2.2471, "step": 24543 }, { "epoch": 0.32, "grad_norm": 3.9531941413879395, "learning_rate": 1.9871091054041355e-05, "loss": 2.1413, "step": 24544 }, { "epoch": 0.32, "grad_norm": 3.8057610988616943, "learning_rate": 1.9871074235653584e-05, "loss": 2.0317, "step": 24545 }, { "epoch": 0.32, "grad_norm": 4.530510425567627, "learning_rate": 1.9871057416175882e-05, "loss": 1.8218, "step": 24546 }, { "epoch": 0.32, "grad_norm": 4.327255725860596, "learning_rate": 1.987104059560825e-05, "loss": 2.0725, "step": 24547 }, { "epoch": 0.32, "grad_norm": 4.608561992645264, "learning_rate": 1.9871023773950683e-05, "loss": 2.4929, "step": 24548 }, { "epoch": 0.32, "grad_norm": 3.820073127746582, "learning_rate": 1.987100695120319e-05, "loss": 2.3855, "step": 24549 }, { "epoch": 0.32, "grad_norm": 5.6577301025390625, "learning_rate": 1.987099012736577e-05, "loss": 2.5822, "step": 24550 }, { "epoch": 0.32, "grad_norm": 4.56978702545166, "learning_rate": 1.9870973302438422e-05, "loss": 2.4132, "step": 24551 }, { "epoch": 0.32, "grad_norm": 3.5739643573760986, "learning_rate": 1.987095647642116e-05, "loss": 1.6666, "step": 24552 }, { "epoch": 0.32, "grad_norm": 3.926917314529419, "learning_rate": 1.9870939649313966e-05, "loss": 1.7852, "step": 24553 }, { "epoch": 0.32, "grad_norm": 4.097470760345459, "learning_rate": 1.987092282111686e-05, "loss": 2.177, "step": 24554 }, { "epoch": 0.32, "grad_norm": 4.0770063400268555, "learning_rate": 1.987090599182983e-05, "loss": 2.3673, "step": 24555 }, { "epoch": 0.32, "grad_norm": 4.180966377258301, "learning_rate": 1.9870889161452892e-05, "loss": 2.1291, "step": 24556 }, { "epoch": 0.32, "grad_norm": 4.083854675292969, "learning_rate": 1.987087232998604e-05, "loss": 2.4843, "step": 24557 }, { "epoch": 0.32, "grad_norm": 3.756385564804077, "learning_rate": 1.987085549742927e-05, "loss": 2.0592, "step": 24558 }, { "epoch": 0.32, "grad_norm": 4.437815189361572, "learning_rate": 1.9870838663782594e-05, "loss": 2.1272, "step": 24559 }, { "epoch": 0.32, "grad_norm": 3.8556339740753174, "learning_rate": 1.987082182904601e-05, "loss": 2.3565, "step": 24560 }, { "epoch": 0.32, "grad_norm": 3.7962186336517334, "learning_rate": 1.9870804993219518e-05, "loss": 2.0263, "step": 24561 }, { "epoch": 0.32, "grad_norm": 3.880399227142334, "learning_rate": 1.9870788156303125e-05, "loss": 1.9805, "step": 24562 }, { "epoch": 0.32, "grad_norm": 3.385246992111206, "learning_rate": 1.9870771318296825e-05, "loss": 1.5657, "step": 24563 }, { "epoch": 0.32, "grad_norm": 3.924633502960205, "learning_rate": 1.987075447920063e-05, "loss": 2.3895, "step": 24564 }, { "epoch": 0.32, "grad_norm": 4.120338439941406, "learning_rate": 1.987073763901453e-05, "loss": 2.1676, "step": 24565 }, { "epoch": 0.32, "grad_norm": 3.918210983276367, "learning_rate": 1.9870720797738537e-05, "loss": 2.1279, "step": 24566 }, { "epoch": 0.32, "grad_norm": 4.450941562652588, "learning_rate": 1.987070395537265e-05, "loss": 1.8712, "step": 24567 }, { "epoch": 0.32, "grad_norm": 4.071579933166504, "learning_rate": 1.9870687111916868e-05, "loss": 2.0574, "step": 24568 }, { "epoch": 0.32, "grad_norm": 4.120004177093506, "learning_rate": 1.9870670267371195e-05, "loss": 2.6599, "step": 24569 }, { "epoch": 0.32, "grad_norm": 3.5293824672698975, "learning_rate": 1.987065342173563e-05, "loss": 1.6052, "step": 24570 }, { "epoch": 0.32, "grad_norm": 4.283315181732178, "learning_rate": 1.9870636575010184e-05, "loss": 1.972, "step": 24571 }, { "epoch": 0.32, "grad_norm": 4.528436660766602, "learning_rate": 1.9870619727194846e-05, "loss": 2.4118, "step": 24572 }, { "epoch": 0.32, "grad_norm": 3.58402419090271, "learning_rate": 1.9870602878289626e-05, "loss": 1.4935, "step": 24573 }, { "epoch": 0.32, "grad_norm": 3.899503469467163, "learning_rate": 1.9870586028294527e-05, "loss": 2.2173, "step": 24574 }, { "epoch": 0.32, "grad_norm": 4.684043884277344, "learning_rate": 1.9870569177209543e-05, "loss": 2.462, "step": 24575 }, { "epoch": 0.32, "grad_norm": 3.7536919116973877, "learning_rate": 1.987055232503468e-05, "loss": 1.8812, "step": 24576 }, { "epoch": 0.32, "grad_norm": 3.8348538875579834, "learning_rate": 1.9870535471769946e-05, "loss": 1.9747, "step": 24577 }, { "epoch": 0.32, "grad_norm": 4.65425443649292, "learning_rate": 1.9870518617415334e-05, "loss": 1.9282, "step": 24578 }, { "epoch": 0.32, "grad_norm": 4.259923934936523, "learning_rate": 1.987050176197085e-05, "loss": 2.5176, "step": 24579 }, { "epoch": 0.32, "grad_norm": 4.203073501586914, "learning_rate": 1.9870484905436494e-05, "loss": 2.0447, "step": 24580 }, { "epoch": 0.32, "grad_norm": 4.086024284362793, "learning_rate": 1.987046804781227e-05, "loss": 2.3031, "step": 24581 }, { "epoch": 0.32, "grad_norm": 3.9088375568389893, "learning_rate": 1.987045118909818e-05, "loss": 1.875, "step": 24582 }, { "epoch": 0.32, "grad_norm": 3.638498306274414, "learning_rate": 1.9870434329294224e-05, "loss": 2.0468, "step": 24583 }, { "epoch": 0.32, "grad_norm": 3.7060956954956055, "learning_rate": 1.9870417468400402e-05, "loss": 1.974, "step": 24584 }, { "epoch": 0.32, "grad_norm": 4.699275493621826, "learning_rate": 1.9870400606416724e-05, "loss": 2.4515, "step": 24585 }, { "epoch": 0.32, "grad_norm": 4.192836284637451, "learning_rate": 1.987038374334318e-05, "loss": 2.2988, "step": 24586 }, { "epoch": 0.32, "grad_norm": 4.0979156494140625, "learning_rate": 1.9870366879179785e-05, "loss": 2.038, "step": 24587 }, { "epoch": 0.32, "grad_norm": 3.663276195526123, "learning_rate": 1.9870350013926528e-05, "loss": 1.8464, "step": 24588 }, { "epoch": 0.32, "grad_norm": 4.114049911499023, "learning_rate": 1.987033314758342e-05, "loss": 2.2408, "step": 24589 }, { "epoch": 0.32, "grad_norm": 3.715902805328369, "learning_rate": 1.9870316280150458e-05, "loss": 1.703, "step": 24590 }, { "epoch": 0.32, "grad_norm": 3.6888086795806885, "learning_rate": 1.9870299411627647e-05, "loss": 1.8441, "step": 24591 }, { "epoch": 0.32, "grad_norm": 4.086912631988525, "learning_rate": 1.9870282542014986e-05, "loss": 1.8195, "step": 24592 }, { "epoch": 0.32, "grad_norm": 4.373195648193359, "learning_rate": 1.9870265671312482e-05, "loss": 2.2016, "step": 24593 }, { "epoch": 0.32, "grad_norm": 3.830221176147461, "learning_rate": 1.9870248799520128e-05, "loss": 2.1932, "step": 24594 }, { "epoch": 0.32, "grad_norm": 3.9530551433563232, "learning_rate": 1.9870231926637933e-05, "loss": 2.3707, "step": 24595 }, { "epoch": 0.32, "grad_norm": 4.741703033447266, "learning_rate": 1.98702150526659e-05, "loss": 1.981, "step": 24596 }, { "epoch": 0.32, "grad_norm": 4.355310440063477, "learning_rate": 1.9870198177604025e-05, "loss": 2.2194, "step": 24597 }, { "epoch": 0.32, "grad_norm": 4.160473346710205, "learning_rate": 1.9870181301452314e-05, "loss": 1.7454, "step": 24598 }, { "epoch": 0.32, "grad_norm": 4.118105888366699, "learning_rate": 1.9870164424210766e-05, "loss": 2.4259, "step": 24599 }, { "epoch": 0.32, "grad_norm": 3.6611056327819824, "learning_rate": 1.9870147545879385e-05, "loss": 1.705, "step": 24600 }, { "epoch": 0.32, "grad_norm": 4.365188121795654, "learning_rate": 1.987013066645817e-05, "loss": 2.2494, "step": 24601 }, { "epoch": 0.32, "grad_norm": 4.522053241729736, "learning_rate": 1.987011378594713e-05, "loss": 2.377, "step": 24602 }, { "epoch": 0.32, "grad_norm": 3.805302858352661, "learning_rate": 1.987009690434626e-05, "loss": 1.8442, "step": 24603 }, { "epoch": 0.32, "grad_norm": 3.977973461151123, "learning_rate": 1.9870080021655563e-05, "loss": 1.7471, "step": 24604 }, { "epoch": 0.32, "grad_norm": 3.7531657218933105, "learning_rate": 1.9870063137875043e-05, "loss": 1.7612, "step": 24605 }, { "epoch": 0.32, "grad_norm": 3.6378636360168457, "learning_rate": 1.98700462530047e-05, "loss": 1.8919, "step": 24606 }, { "epoch": 0.32, "grad_norm": 4.46487283706665, "learning_rate": 1.9870029367044533e-05, "loss": 2.5636, "step": 24607 }, { "epoch": 0.32, "grad_norm": 3.806439161300659, "learning_rate": 1.987001247999455e-05, "loss": 2.257, "step": 24608 }, { "epoch": 0.32, "grad_norm": 4.049983501434326, "learning_rate": 1.9869995591854755e-05, "loss": 2.1728, "step": 24609 }, { "epoch": 0.32, "grad_norm": 3.9333176612854004, "learning_rate": 1.986997870262514e-05, "loss": 2.0787, "step": 24610 }, { "epoch": 0.32, "grad_norm": 3.82887601852417, "learning_rate": 1.9869961812305712e-05, "loss": 1.6782, "step": 24611 }, { "epoch": 0.32, "grad_norm": 3.31030535697937, "learning_rate": 1.986994492089647e-05, "loss": 1.5508, "step": 24612 }, { "epoch": 0.32, "grad_norm": 4.193256378173828, "learning_rate": 1.9869928028397424e-05, "loss": 1.5778, "step": 24613 }, { "epoch": 0.32, "grad_norm": 4.1083197593688965, "learning_rate": 1.9869911134808568e-05, "loss": 2.1009, "step": 24614 }, { "epoch": 0.32, "grad_norm": 3.5343687534332275, "learning_rate": 1.9869894240129908e-05, "loss": 2.0022, "step": 24615 }, { "epoch": 0.32, "grad_norm": 4.361434459686279, "learning_rate": 1.9869877344361443e-05, "loss": 2.3527, "step": 24616 }, { "epoch": 0.32, "grad_norm": 3.9364657402038574, "learning_rate": 1.9869860447503174e-05, "loss": 1.7312, "step": 24617 }, { "epoch": 0.32, "grad_norm": 3.8438119888305664, "learning_rate": 1.986984354955511e-05, "loss": 1.9607, "step": 24618 }, { "epoch": 0.32, "grad_norm": 3.636305332183838, "learning_rate": 1.9869826650517243e-05, "loss": 1.7281, "step": 24619 }, { "epoch": 0.32, "grad_norm": 4.196942329406738, "learning_rate": 1.986980975038958e-05, "loss": 2.0966, "step": 24620 }, { "epoch": 0.32, "grad_norm": 4.216366767883301, "learning_rate": 1.9869792849172124e-05, "loss": 2.1422, "step": 24621 }, { "epoch": 0.32, "grad_norm": 3.6341335773468018, "learning_rate": 1.9869775946864876e-05, "loss": 1.9501, "step": 24622 }, { "epoch": 0.32, "grad_norm": 3.846900463104248, "learning_rate": 1.986975904346784e-05, "loss": 2.0893, "step": 24623 }, { "epoch": 0.32, "grad_norm": 4.092269420623779, "learning_rate": 1.9869742138981012e-05, "loss": 2.3263, "step": 24624 }, { "epoch": 0.32, "grad_norm": 4.099786281585693, "learning_rate": 1.9869725233404396e-05, "loss": 2.3426, "step": 24625 }, { "epoch": 0.32, "grad_norm": 3.94423246383667, "learning_rate": 1.9869708326737995e-05, "loss": 1.6923, "step": 24626 }, { "epoch": 0.32, "grad_norm": 4.402603626251221, "learning_rate": 1.986969141898181e-05, "loss": 2.4207, "step": 24627 }, { "epoch": 0.32, "grad_norm": 3.737037181854248, "learning_rate": 1.9869674510135848e-05, "loss": 2.0316, "step": 24628 }, { "epoch": 0.32, "grad_norm": 3.7588953971862793, "learning_rate": 1.9869657600200102e-05, "loss": 2.0948, "step": 24629 }, { "epoch": 0.32, "grad_norm": 4.050410270690918, "learning_rate": 1.986964068917458e-05, "loss": 2.315, "step": 24630 }, { "epoch": 0.32, "grad_norm": 3.6741600036621094, "learning_rate": 1.9869623777059284e-05, "loss": 1.845, "step": 24631 }, { "epoch": 0.32, "grad_norm": 4.2002763748168945, "learning_rate": 1.9869606863854215e-05, "loss": 1.9071, "step": 24632 }, { "epoch": 0.32, "grad_norm": 4.529409885406494, "learning_rate": 1.986958994955937e-05, "loss": 2.2286, "step": 24633 }, { "epoch": 0.32, "grad_norm": 4.6059465408325195, "learning_rate": 1.9869573034174757e-05, "loss": 2.5091, "step": 24634 }, { "epoch": 0.32, "grad_norm": 4.465925693511963, "learning_rate": 1.9869556117700375e-05, "loss": 2.5142, "step": 24635 }, { "epoch": 0.32, "grad_norm": 3.8645973205566406, "learning_rate": 1.9869539200136227e-05, "loss": 2.0572, "step": 24636 }, { "epoch": 0.32, "grad_norm": 4.250767707824707, "learning_rate": 1.9869522281482313e-05, "loss": 2.2902, "step": 24637 }, { "epoch": 0.32, "grad_norm": 4.1650567054748535, "learning_rate": 1.9869505361738634e-05, "loss": 2.5694, "step": 24638 }, { "epoch": 0.32, "grad_norm": 3.8902552127838135, "learning_rate": 1.9869488440905197e-05, "loss": 2.1453, "step": 24639 }, { "epoch": 0.32, "grad_norm": 3.9775116443634033, "learning_rate": 1.9869471518982004e-05, "loss": 2.225, "step": 24640 }, { "epoch": 0.32, "grad_norm": 4.221691131591797, "learning_rate": 1.9869454595969052e-05, "loss": 2.3525, "step": 24641 }, { "epoch": 0.32, "grad_norm": 3.680356025695801, "learning_rate": 1.9869437671866345e-05, "loss": 1.7206, "step": 24642 }, { "epoch": 0.32, "grad_norm": 4.134745121002197, "learning_rate": 1.9869420746673883e-05, "loss": 1.9743, "step": 24643 }, { "epoch": 0.32, "grad_norm": 3.9510796070098877, "learning_rate": 1.986940382039167e-05, "loss": 2.1348, "step": 24644 }, { "epoch": 0.32, "grad_norm": 4.048953056335449, "learning_rate": 1.9869386893019706e-05, "loss": 1.7412, "step": 24645 }, { "epoch": 0.32, "grad_norm": 3.600123167037964, "learning_rate": 1.9869369964557998e-05, "loss": 1.8588, "step": 24646 }, { "epoch": 0.32, "grad_norm": 3.9401135444641113, "learning_rate": 1.986935303500654e-05, "loss": 2.0152, "step": 24647 }, { "epoch": 0.32, "grad_norm": 3.9425723552703857, "learning_rate": 1.9869336104365343e-05, "loss": 2.2322, "step": 24648 }, { "epoch": 0.32, "grad_norm": 4.09993839263916, "learning_rate": 1.9869319172634397e-05, "loss": 2.2554, "step": 24649 }, { "epoch": 0.32, "grad_norm": 3.891129970550537, "learning_rate": 1.9869302239813718e-05, "loss": 1.6508, "step": 24650 }, { "epoch": 0.32, "grad_norm": 3.881547451019287, "learning_rate": 1.9869285305903298e-05, "loss": 2.1503, "step": 24651 }, { "epoch": 0.32, "grad_norm": 4.491387367248535, "learning_rate": 1.986926837090314e-05, "loss": 2.3896, "step": 24652 }, { "epoch": 0.32, "grad_norm": 4.19108247756958, "learning_rate": 1.9869251434813246e-05, "loss": 2.3698, "step": 24653 }, { "epoch": 0.32, "grad_norm": 3.6174252033233643, "learning_rate": 1.9869234497633624e-05, "loss": 1.6774, "step": 24654 }, { "epoch": 0.32, "grad_norm": 4.153694152832031, "learning_rate": 1.9869217559364267e-05, "loss": 1.963, "step": 24655 }, { "epoch": 0.32, "grad_norm": 3.5720064640045166, "learning_rate": 1.9869200620005183e-05, "loss": 1.5411, "step": 24656 }, { "epoch": 0.32, "grad_norm": 4.151142120361328, "learning_rate": 1.986918367955637e-05, "loss": 1.8911, "step": 24657 }, { "epoch": 0.32, "grad_norm": 4.306240558624268, "learning_rate": 1.9869166738017835e-05, "loss": 2.4834, "step": 24658 }, { "epoch": 0.32, "grad_norm": 4.243272304534912, "learning_rate": 1.9869149795389572e-05, "loss": 2.3177, "step": 24659 }, { "epoch": 0.32, "grad_norm": 3.3215277194976807, "learning_rate": 1.9869132851671588e-05, "loss": 1.8065, "step": 24660 }, { "epoch": 0.32, "grad_norm": 3.804391622543335, "learning_rate": 1.986911590686389e-05, "loss": 2.147, "step": 24661 }, { "epoch": 0.32, "grad_norm": 4.066583633422852, "learning_rate": 1.986909896096647e-05, "loss": 2.2405, "step": 24662 }, { "epoch": 0.32, "grad_norm": 3.537595510482788, "learning_rate": 1.9869082013979333e-05, "loss": 1.5232, "step": 24663 }, { "epoch": 0.32, "grad_norm": 4.264163970947266, "learning_rate": 1.9869065065902484e-05, "loss": 2.3202, "step": 24664 }, { "epoch": 0.32, "grad_norm": 3.5203330516815186, "learning_rate": 1.986904811673592e-05, "loss": 1.8099, "step": 24665 }, { "epoch": 0.32, "grad_norm": 4.29240608215332, "learning_rate": 1.9869031166479647e-05, "loss": 2.006, "step": 24666 }, { "epoch": 0.32, "grad_norm": 4.101914405822754, "learning_rate": 1.9869014215133667e-05, "loss": 2.1658, "step": 24667 }, { "epoch": 0.32, "grad_norm": 4.212657451629639, "learning_rate": 1.986899726269798e-05, "loss": 2.3319, "step": 24668 }, { "epoch": 0.32, "grad_norm": 4.0072407722473145, "learning_rate": 1.9868980309172587e-05, "loss": 1.9451, "step": 24669 }, { "epoch": 0.32, "grad_norm": 3.9612581729888916, "learning_rate": 1.9868963354557493e-05, "loss": 2.1648, "step": 24670 }, { "epoch": 0.32, "grad_norm": 3.961456537246704, "learning_rate": 1.98689463988527e-05, "loss": 1.9052, "step": 24671 }, { "epoch": 0.32, "grad_norm": 3.1143858432769775, "learning_rate": 1.9868929442058203e-05, "loss": 1.6046, "step": 24672 }, { "epoch": 0.32, "grad_norm": 3.8250529766082764, "learning_rate": 1.986891248417401e-05, "loss": 1.8782, "step": 24673 }, { "epoch": 0.32, "grad_norm": 3.7105486392974854, "learning_rate": 1.986889552520012e-05, "loss": 2.0107, "step": 24674 }, { "epoch": 0.32, "grad_norm": 3.9460222721099854, "learning_rate": 1.9868878565136542e-05, "loss": 1.9938, "step": 24675 }, { "epoch": 0.32, "grad_norm": 3.8241729736328125, "learning_rate": 1.986886160398327e-05, "loss": 2.0654, "step": 24676 }, { "epoch": 0.32, "grad_norm": 4.608048915863037, "learning_rate": 1.9868844641740305e-05, "loss": 2.8422, "step": 24677 }, { "epoch": 0.32, "grad_norm": 4.245017051696777, "learning_rate": 1.9868827678407653e-05, "loss": 2.5353, "step": 24678 }, { "epoch": 0.32, "grad_norm": 3.8535265922546387, "learning_rate": 1.9868810713985317e-05, "loss": 1.7172, "step": 24679 }, { "epoch": 0.32, "grad_norm": 4.299408435821533, "learning_rate": 1.9868793748473297e-05, "loss": 2.3631, "step": 24680 }, { "epoch": 0.32, "grad_norm": 3.783308267593384, "learning_rate": 1.9868776781871593e-05, "loss": 1.9641, "step": 24681 }, { "epoch": 0.32, "grad_norm": 4.254871845245361, "learning_rate": 1.9868759814180212e-05, "loss": 2.2189, "step": 24682 }, { "epoch": 0.32, "grad_norm": 3.7148563861846924, "learning_rate": 1.9868742845399153e-05, "loss": 1.7755, "step": 24683 }, { "epoch": 0.32, "grad_norm": 3.8324100971221924, "learning_rate": 1.9868725875528413e-05, "loss": 1.9652, "step": 24684 }, { "epoch": 0.32, "grad_norm": 4.310916900634766, "learning_rate": 1.9868708904568e-05, "loss": 2.429, "step": 24685 }, { "epoch": 0.32, "grad_norm": 3.7457497119903564, "learning_rate": 1.986869193251791e-05, "loss": 1.9876, "step": 24686 }, { "epoch": 0.32, "grad_norm": 4.346723556518555, "learning_rate": 1.9868674959378157e-05, "loss": 2.3091, "step": 24687 }, { "epoch": 0.32, "grad_norm": 3.3860230445861816, "learning_rate": 1.986865798514873e-05, "loss": 1.7728, "step": 24688 }, { "epoch": 0.32, "grad_norm": 4.487186431884766, "learning_rate": 1.9868641009829638e-05, "loss": 1.9668, "step": 24689 }, { "epoch": 0.32, "grad_norm": 4.4605631828308105, "learning_rate": 1.9868624033420878e-05, "loss": 2.7586, "step": 24690 }, { "epoch": 0.32, "grad_norm": 4.04176664352417, "learning_rate": 1.9868607055922454e-05, "loss": 1.9205, "step": 24691 }, { "epoch": 0.32, "grad_norm": 4.343925952911377, "learning_rate": 1.986859007733437e-05, "loss": 2.4398, "step": 24692 }, { "epoch": 0.32, "grad_norm": 3.720637559890747, "learning_rate": 1.986857309765663e-05, "loss": 1.9879, "step": 24693 }, { "epoch": 0.32, "grad_norm": 3.9001686573028564, "learning_rate": 1.9868556116889226e-05, "loss": 1.7672, "step": 24694 }, { "epoch": 0.32, "grad_norm": 4.700599193572998, "learning_rate": 1.9868539135032168e-05, "loss": 2.4047, "step": 24695 }, { "epoch": 0.32, "grad_norm": 4.34749698638916, "learning_rate": 1.9868522152085455e-05, "loss": 2.064, "step": 24696 }, { "epoch": 0.32, "grad_norm": 4.490475654602051, "learning_rate": 1.9868505168049092e-05, "loss": 2.3036, "step": 24697 }, { "epoch": 0.32, "grad_norm": 4.0134735107421875, "learning_rate": 1.986848818292308e-05, "loss": 2.2041, "step": 24698 }, { "epoch": 0.32, "grad_norm": 4.241981029510498, "learning_rate": 1.9868471196707415e-05, "loss": 2.3074, "step": 24699 }, { "epoch": 0.32, "grad_norm": 3.7603070735931396, "learning_rate": 1.9868454209402105e-05, "loss": 1.7654, "step": 24700 }, { "epoch": 0.32, "grad_norm": 3.7181711196899414, "learning_rate": 1.986843722100715e-05, "loss": 1.9049, "step": 24701 }, { "epoch": 0.32, "grad_norm": 4.44252872467041, "learning_rate": 1.986842023152255e-05, "loss": 2.6985, "step": 24702 }, { "epoch": 0.32, "grad_norm": 4.130817890167236, "learning_rate": 1.9868403240948316e-05, "loss": 2.014, "step": 24703 }, { "epoch": 0.32, "grad_norm": 4.242916107177734, "learning_rate": 1.986838624928444e-05, "loss": 2.1446, "step": 24704 }, { "epoch": 0.32, "grad_norm": 3.8548049926757812, "learning_rate": 1.9868369256530924e-05, "loss": 2.0093, "step": 24705 }, { "epoch": 0.32, "grad_norm": 4.027625560760498, "learning_rate": 1.9868352262687773e-05, "loss": 2.0371, "step": 24706 }, { "epoch": 0.32, "grad_norm": 3.564255475997925, "learning_rate": 1.986833526775499e-05, "loss": 2.1459, "step": 24707 }, { "epoch": 0.32, "grad_norm": 4.175909519195557, "learning_rate": 1.9868318271732574e-05, "loss": 1.8982, "step": 24708 }, { "epoch": 0.32, "grad_norm": 4.150198936462402, "learning_rate": 1.986830127462053e-05, "loss": 2.1296, "step": 24709 }, { "epoch": 0.32, "grad_norm": 3.5678935050964355, "learning_rate": 1.9868284276418855e-05, "loss": 1.7795, "step": 24710 }, { "epoch": 0.32, "grad_norm": 3.768404722213745, "learning_rate": 1.9868267277127558e-05, "loss": 1.708, "step": 24711 }, { "epoch": 0.32, "grad_norm": 3.8252944946289062, "learning_rate": 1.9868250276746634e-05, "loss": 1.6871, "step": 24712 }, { "epoch": 0.32, "grad_norm": 4.216634273529053, "learning_rate": 1.986823327527609e-05, "loss": 2.1086, "step": 24713 }, { "epoch": 0.32, "grad_norm": 4.244517803192139, "learning_rate": 1.9868216272715923e-05, "loss": 2.2734, "step": 24714 }, { "epoch": 0.32, "grad_norm": 4.068222999572754, "learning_rate": 1.986819926906614e-05, "loss": 2.126, "step": 24715 }, { "epoch": 0.32, "grad_norm": 3.9890267848968506, "learning_rate": 1.986818226432674e-05, "loss": 1.9845, "step": 24716 }, { "epoch": 0.32, "grad_norm": 3.947887420654297, "learning_rate": 1.9868165258497723e-05, "loss": 1.6549, "step": 24717 }, { "epoch": 0.32, "grad_norm": 4.670300483703613, "learning_rate": 1.9868148251579097e-05, "loss": 2.4527, "step": 24718 }, { "epoch": 0.32, "grad_norm": 4.338697910308838, "learning_rate": 1.9868131243570855e-05, "loss": 2.3958, "step": 24719 }, { "epoch": 0.32, "grad_norm": 3.483365535736084, "learning_rate": 1.986811423447301e-05, "loss": 1.6597, "step": 24720 }, { "epoch": 0.32, "grad_norm": 4.386631011962891, "learning_rate": 1.986809722428555e-05, "loss": 2.2974, "step": 24721 }, { "epoch": 0.32, "grad_norm": 4.749397277832031, "learning_rate": 1.986808021300849e-05, "loss": 2.5189, "step": 24722 }, { "epoch": 0.32, "grad_norm": 4.723531246185303, "learning_rate": 1.9868063200641826e-05, "loss": 2.6042, "step": 24723 }, { "epoch": 0.32, "grad_norm": 3.6969261169433594, "learning_rate": 1.986804618718556e-05, "loss": 2.2047, "step": 24724 }, { "epoch": 0.32, "grad_norm": 3.9177141189575195, "learning_rate": 1.9868029172639693e-05, "loss": 2.3505, "step": 24725 }, { "epoch": 0.32, "grad_norm": 4.9936299324035645, "learning_rate": 1.986801215700423e-05, "loss": 2.2887, "step": 24726 }, { "epoch": 0.32, "grad_norm": 4.580594062805176, "learning_rate": 1.9867995140279173e-05, "loss": 2.591, "step": 24727 }, { "epoch": 0.32, "grad_norm": 3.787729263305664, "learning_rate": 1.9867978122464518e-05, "loss": 1.5977, "step": 24728 }, { "epoch": 0.32, "grad_norm": 3.8909342288970947, "learning_rate": 1.986796110356027e-05, "loss": 1.8557, "step": 24729 }, { "epoch": 0.32, "grad_norm": 4.63158655166626, "learning_rate": 1.9867944083566435e-05, "loss": 2.1346, "step": 24730 }, { "epoch": 0.32, "grad_norm": 4.240646839141846, "learning_rate": 1.9867927062483012e-05, "loss": 1.9257, "step": 24731 }, { "epoch": 0.32, "grad_norm": 3.847628116607666, "learning_rate": 1.986791004031e-05, "loss": 2.4014, "step": 24732 }, { "epoch": 0.32, "grad_norm": 3.51933217048645, "learning_rate": 1.9867893017047403e-05, "loss": 1.7666, "step": 24733 }, { "epoch": 0.32, "grad_norm": 3.8035998344421387, "learning_rate": 1.9867875992695226e-05, "loss": 1.8724, "step": 24734 }, { "epoch": 0.32, "grad_norm": 3.893174648284912, "learning_rate": 1.9867858967253467e-05, "loss": 1.7622, "step": 24735 }, { "epoch": 0.32, "grad_norm": 4.489707946777344, "learning_rate": 1.986784194072213e-05, "loss": 2.1247, "step": 24736 }, { "epoch": 0.32, "grad_norm": 3.968679904937744, "learning_rate": 1.986782491310121e-05, "loss": 1.7866, "step": 24737 }, { "epoch": 0.32, "grad_norm": 3.9205126762390137, "learning_rate": 1.986780788439072e-05, "loss": 2.0478, "step": 24738 }, { "epoch": 0.32, "grad_norm": 3.9308741092681885, "learning_rate": 1.9867790854590655e-05, "loss": 1.9327, "step": 24739 }, { "epoch": 0.32, "grad_norm": 3.3486275672912598, "learning_rate": 1.986777382370102e-05, "loss": 1.751, "step": 24740 }, { "epoch": 0.32, "grad_norm": 4.02654504776001, "learning_rate": 1.9867756791721814e-05, "loss": 2.0436, "step": 24741 }, { "epoch": 0.32, "grad_norm": 3.9989004135131836, "learning_rate": 1.986773975865304e-05, "loss": 2.4814, "step": 24742 }, { "epoch": 0.32, "grad_norm": 4.308846473693848, "learning_rate": 1.9867722724494703e-05, "loss": 2.2087, "step": 24743 }, { "epoch": 0.32, "grad_norm": 3.9735021591186523, "learning_rate": 1.98677056892468e-05, "loss": 2.5329, "step": 24744 }, { "epoch": 0.32, "grad_norm": 3.497670888900757, "learning_rate": 1.9867688652909333e-05, "loss": 1.6815, "step": 24745 }, { "epoch": 0.32, "grad_norm": 4.087836265563965, "learning_rate": 1.986767161548231e-05, "loss": 1.8909, "step": 24746 }, { "epoch": 0.32, "grad_norm": 4.131470203399658, "learning_rate": 1.9867654576965725e-05, "loss": 1.9732, "step": 24747 }, { "epoch": 0.32, "grad_norm": 4.55019474029541, "learning_rate": 1.9867637537359583e-05, "loss": 2.8049, "step": 24748 }, { "epoch": 0.32, "grad_norm": 3.990907907485962, "learning_rate": 1.9867620496663888e-05, "loss": 1.7476, "step": 24749 }, { "epoch": 0.32, "grad_norm": 4.47332239151001, "learning_rate": 1.9867603454878642e-05, "loss": 1.9674, "step": 24750 }, { "epoch": 0.32, "grad_norm": 3.556710958480835, "learning_rate": 1.986758641200384e-05, "loss": 1.5977, "step": 24751 }, { "epoch": 0.32, "grad_norm": 3.8720498085021973, "learning_rate": 1.9867569368039493e-05, "loss": 1.9005, "step": 24752 }, { "epoch": 0.32, "grad_norm": 4.801461696624756, "learning_rate": 1.98675523229856e-05, "loss": 2.7401, "step": 24753 }, { "epoch": 0.32, "grad_norm": 4.132720947265625, "learning_rate": 1.986753527684216e-05, "loss": 2.4786, "step": 24754 }, { "epoch": 0.32, "grad_norm": 4.729537487030029, "learning_rate": 1.9867518229609177e-05, "loss": 2.6217, "step": 24755 }, { "epoch": 0.32, "grad_norm": 4.595329284667969, "learning_rate": 1.9867501181286652e-05, "loss": 2.2021, "step": 24756 }, { "epoch": 0.32, "grad_norm": 3.989349126815796, "learning_rate": 1.986748413187459e-05, "loss": 2.0953, "step": 24757 }, { "epoch": 0.32, "grad_norm": 3.6012368202209473, "learning_rate": 1.9867467081372987e-05, "loss": 1.6063, "step": 24758 }, { "epoch": 0.32, "grad_norm": 3.4268555641174316, "learning_rate": 1.986745002978185e-05, "loss": 1.7435, "step": 24759 }, { "epoch": 0.32, "grad_norm": 4.47121524810791, "learning_rate": 1.986743297710118e-05, "loss": 2.5589, "step": 24760 }, { "epoch": 0.32, "grad_norm": 3.7975316047668457, "learning_rate": 1.9867415923330978e-05, "loss": 1.9998, "step": 24761 }, { "epoch": 0.32, "grad_norm": 4.019698143005371, "learning_rate": 1.9867398868471243e-05, "loss": 2.0323, "step": 24762 }, { "epoch": 0.32, "grad_norm": 3.5923478603363037, "learning_rate": 1.9867381812521983e-05, "loss": 1.735, "step": 24763 }, { "epoch": 0.32, "grad_norm": 3.8786375522613525, "learning_rate": 1.986736475548319e-05, "loss": 2.2941, "step": 24764 }, { "epoch": 0.32, "grad_norm": 3.5341439247131348, "learning_rate": 1.986734769735488e-05, "loss": 1.8473, "step": 24765 }, { "epoch": 0.32, "grad_norm": 3.5600690841674805, "learning_rate": 1.9867330638137042e-05, "loss": 2.097, "step": 24766 }, { "epoch": 0.32, "grad_norm": 3.974933385848999, "learning_rate": 1.986731357782969e-05, "loss": 2.3705, "step": 24767 }, { "epoch": 0.32, "grad_norm": 3.576521873474121, "learning_rate": 1.9867296516432814e-05, "loss": 1.4384, "step": 24768 }, { "epoch": 0.32, "grad_norm": 3.615886688232422, "learning_rate": 1.9867279453946425e-05, "loss": 1.9652, "step": 24769 }, { "epoch": 0.32, "grad_norm": 3.854738473892212, "learning_rate": 1.9867262390370517e-05, "loss": 2.1815, "step": 24770 }, { "epoch": 0.32, "grad_norm": 3.1773438453674316, "learning_rate": 1.9867245325705098e-05, "loss": 1.6889, "step": 24771 }, { "epoch": 0.32, "grad_norm": 3.95898175239563, "learning_rate": 1.9867228259950165e-05, "loss": 2.2309, "step": 24772 }, { "epoch": 0.32, "grad_norm": 3.922743797302246, "learning_rate": 1.9867211193105723e-05, "loss": 2.1099, "step": 24773 }, { "epoch": 0.32, "grad_norm": 4.035739421844482, "learning_rate": 1.9867194125171776e-05, "loss": 1.722, "step": 24774 }, { "epoch": 0.32, "grad_norm": 3.4096202850341797, "learning_rate": 1.9867177056148322e-05, "loss": 1.4905, "step": 24775 }, { "epoch": 0.32, "grad_norm": 4.211600303649902, "learning_rate": 1.9867159986035367e-05, "loss": 2.5424, "step": 24776 }, { "epoch": 0.32, "grad_norm": 4.596067905426025, "learning_rate": 1.9867142914832906e-05, "loss": 2.4755, "step": 24777 }, { "epoch": 0.32, "grad_norm": 4.0722174644470215, "learning_rate": 1.986712584254095e-05, "loss": 2.1574, "step": 24778 }, { "epoch": 0.32, "grad_norm": 3.650367498397827, "learning_rate": 1.986710876915949e-05, "loss": 1.5922, "step": 24779 }, { "epoch": 0.32, "grad_norm": 3.8922860622406006, "learning_rate": 1.9867091694688536e-05, "loss": 2.0217, "step": 24780 }, { "epoch": 0.32, "grad_norm": 3.778102159500122, "learning_rate": 1.986707461912809e-05, "loss": 2.1752, "step": 24781 }, { "epoch": 0.32, "grad_norm": 4.151725769042969, "learning_rate": 1.986705754247815e-05, "loss": 2.0748, "step": 24782 }, { "epoch": 0.32, "grad_norm": 3.609510660171509, "learning_rate": 1.986704046473872e-05, "loss": 1.7526, "step": 24783 }, { "epoch": 0.32, "grad_norm": 3.7761166095733643, "learning_rate": 1.98670233859098e-05, "loss": 1.8548, "step": 24784 }, { "epoch": 0.32, "grad_norm": 4.2372355461120605, "learning_rate": 1.9867006305991392e-05, "loss": 2.2788, "step": 24785 }, { "epoch": 0.32, "grad_norm": 3.9280574321746826, "learning_rate": 1.9866989224983502e-05, "loss": 2.3728, "step": 24786 }, { "epoch": 0.32, "grad_norm": 4.032135009765625, "learning_rate": 1.9866972142886128e-05, "loss": 2.4886, "step": 24787 }, { "epoch": 0.32, "grad_norm": 3.8000810146331787, "learning_rate": 1.986695505969927e-05, "loss": 2.223, "step": 24788 }, { "epoch": 0.32, "grad_norm": 3.624318838119507, "learning_rate": 1.986693797542294e-05, "loss": 1.6925, "step": 24789 }, { "epoch": 0.32, "grad_norm": 4.53789758682251, "learning_rate": 1.9866920890057128e-05, "loss": 2.3868, "step": 24790 }, { "epoch": 0.32, "grad_norm": 3.619121551513672, "learning_rate": 1.9866903803601837e-05, "loss": 1.7737, "step": 24791 }, { "epoch": 0.32, "grad_norm": 3.3334553241729736, "learning_rate": 1.9866886716057076e-05, "loss": 1.4922, "step": 24792 }, { "epoch": 0.32, "grad_norm": 4.606813907623291, "learning_rate": 1.9866869627422843e-05, "loss": 2.2986, "step": 24793 }, { "epoch": 0.32, "grad_norm": 3.7307095527648926, "learning_rate": 1.986685253769914e-05, "loss": 1.8263, "step": 24794 }, { "epoch": 0.32, "grad_norm": 3.951021194458008, "learning_rate": 1.9866835446885974e-05, "loss": 1.9242, "step": 24795 }, { "epoch": 0.32, "grad_norm": 4.104890823364258, "learning_rate": 1.9866818354983336e-05, "loss": 2.3056, "step": 24796 }, { "epoch": 0.32, "grad_norm": 4.087052822113037, "learning_rate": 1.9866801261991235e-05, "loss": 2.5881, "step": 24797 }, { "epoch": 0.32, "grad_norm": 4.351891994476318, "learning_rate": 1.9866784167909673e-05, "loss": 2.2273, "step": 24798 }, { "epoch": 0.32, "grad_norm": 3.7337701320648193, "learning_rate": 1.986676707273865e-05, "loss": 1.7383, "step": 24799 }, { "epoch": 0.32, "grad_norm": 4.176537036895752, "learning_rate": 1.9866749976478167e-05, "loss": 2.1202, "step": 24800 }, { "epoch": 0.32, "grad_norm": 4.003261089324951, "learning_rate": 1.9866732879128233e-05, "loss": 2.2257, "step": 24801 }, { "epoch": 0.32, "grad_norm": 4.483567237854004, "learning_rate": 1.986671578068884e-05, "loss": 2.3567, "step": 24802 }, { "epoch": 0.32, "grad_norm": 3.6749632358551025, "learning_rate": 1.986669868115999e-05, "loss": 1.7196, "step": 24803 }, { "epoch": 0.32, "grad_norm": 4.08514404296875, "learning_rate": 1.9866681580541697e-05, "loss": 1.9598, "step": 24804 }, { "epoch": 0.32, "grad_norm": 4.476796627044678, "learning_rate": 1.986666447883395e-05, "loss": 2.815, "step": 24805 }, { "epoch": 0.32, "grad_norm": 4.329064846038818, "learning_rate": 1.986664737603676e-05, "loss": 2.3366, "step": 24806 }, { "epoch": 0.32, "grad_norm": 3.7772343158721924, "learning_rate": 1.9866630272150116e-05, "loss": 1.9875, "step": 24807 }, { "epoch": 0.32, "grad_norm": 4.0744404792785645, "learning_rate": 1.9866613167174037e-05, "loss": 2.3286, "step": 24808 }, { "epoch": 0.32, "grad_norm": 3.984421968460083, "learning_rate": 1.9866596061108514e-05, "loss": 1.8746, "step": 24809 }, { "epoch": 0.32, "grad_norm": 4.336127758026123, "learning_rate": 1.9866578953953554e-05, "loss": 2.1025, "step": 24810 }, { "epoch": 0.32, "grad_norm": 4.1309123039245605, "learning_rate": 1.986656184570915e-05, "loss": 2.3803, "step": 24811 }, { "epoch": 0.32, "grad_norm": 4.321372032165527, "learning_rate": 1.9866544736375317e-05, "loss": 2.3876, "step": 24812 }, { "epoch": 0.32, "grad_norm": 4.865678310394287, "learning_rate": 1.9866527625952046e-05, "loss": 2.4655, "step": 24813 }, { "epoch": 0.32, "grad_norm": 4.147444248199463, "learning_rate": 1.9866510514439346e-05, "loss": 2.3244, "step": 24814 }, { "epoch": 0.32, "grad_norm": 3.8602986335754395, "learning_rate": 1.9866493401837212e-05, "loss": 2.0586, "step": 24815 }, { "epoch": 0.32, "grad_norm": 3.559732437133789, "learning_rate": 1.986647628814565e-05, "loss": 1.9399, "step": 24816 }, { "epoch": 0.32, "grad_norm": 3.840479850769043, "learning_rate": 1.9866459173364667e-05, "loss": 1.9984, "step": 24817 }, { "epoch": 0.32, "grad_norm": 3.9041903018951416, "learning_rate": 1.9866442057494254e-05, "loss": 1.9511, "step": 24818 }, { "epoch": 0.32, "grad_norm": 3.6341605186462402, "learning_rate": 1.986642494053442e-05, "loss": 1.7333, "step": 24819 }, { "epoch": 0.32, "grad_norm": 4.762962818145752, "learning_rate": 1.9866407822485166e-05, "loss": 2.202, "step": 24820 }, { "epoch": 0.32, "grad_norm": 3.727297067642212, "learning_rate": 1.9866390703346492e-05, "loss": 1.5641, "step": 24821 }, { "epoch": 0.32, "grad_norm": 3.864150285720825, "learning_rate": 1.9866373583118402e-05, "loss": 2.1625, "step": 24822 }, { "epoch": 0.32, "grad_norm": 4.356705665588379, "learning_rate": 1.98663564618009e-05, "loss": 2.5486, "step": 24823 }, { "epoch": 0.32, "grad_norm": 3.796560764312744, "learning_rate": 1.986633933939398e-05, "loss": 2.0746, "step": 24824 }, { "epoch": 0.32, "grad_norm": 4.15639591217041, "learning_rate": 1.986632221589765e-05, "loss": 1.8156, "step": 24825 }, { "epoch": 0.32, "grad_norm": 4.374992847442627, "learning_rate": 1.986630509131191e-05, "loss": 2.6072, "step": 24826 }, { "epoch": 0.32, "grad_norm": 3.676669120788574, "learning_rate": 1.9866287965636765e-05, "loss": 1.5573, "step": 24827 }, { "epoch": 0.32, "grad_norm": 3.490185022354126, "learning_rate": 1.9866270838872213e-05, "loss": 1.6962, "step": 24828 }, { "epoch": 0.32, "grad_norm": 4.1974263191223145, "learning_rate": 1.9866253711018255e-05, "loss": 2.3211, "step": 24829 }, { "epoch": 0.32, "grad_norm": 3.7724199295043945, "learning_rate": 1.9866236582074897e-05, "loss": 1.9437, "step": 24830 }, { "epoch": 0.32, "grad_norm": 3.915618658065796, "learning_rate": 1.9866219452042143e-05, "loss": 1.828, "step": 24831 }, { "epoch": 0.32, "grad_norm": 3.968501329421997, "learning_rate": 1.9866202320919986e-05, "loss": 2.0578, "step": 24832 }, { "epoch": 0.32, "grad_norm": 4.60908842086792, "learning_rate": 1.9866185188708433e-05, "loss": 2.6787, "step": 24833 }, { "epoch": 0.32, "grad_norm": 3.3062245845794678, "learning_rate": 1.986616805540749e-05, "loss": 1.5787, "step": 24834 }, { "epoch": 0.32, "grad_norm": 4.34064245223999, "learning_rate": 1.986615092101715e-05, "loss": 2.3562, "step": 24835 }, { "epoch": 0.32, "grad_norm": 4.061059474945068, "learning_rate": 1.9866133785537423e-05, "loss": 1.752, "step": 24836 }, { "epoch": 0.32, "grad_norm": 3.568118095397949, "learning_rate": 1.9866116648968306e-05, "loss": 2.056, "step": 24837 }, { "epoch": 0.32, "grad_norm": 4.472674369812012, "learning_rate": 1.9866099511309802e-05, "loss": 2.9311, "step": 24838 }, { "epoch": 0.32, "grad_norm": 4.802292346954346, "learning_rate": 1.9866082372561913e-05, "loss": 2.0463, "step": 24839 }, { "epoch": 0.32, "grad_norm": 3.481785297393799, "learning_rate": 1.9866065232724644e-05, "loss": 2.0951, "step": 24840 }, { "epoch": 0.32, "grad_norm": 3.4463589191436768, "learning_rate": 1.986604809179799e-05, "loss": 1.7798, "step": 24841 }, { "epoch": 0.32, "grad_norm": 4.112120628356934, "learning_rate": 1.986603094978196e-05, "loss": 2.4324, "step": 24842 }, { "epoch": 0.32, "grad_norm": 3.667569160461426, "learning_rate": 1.986601380667655e-05, "loss": 1.5056, "step": 24843 }, { "epoch": 0.32, "grad_norm": 4.2581706047058105, "learning_rate": 1.9865996662481766e-05, "loss": 2.2267, "step": 24844 }, { "epoch": 0.32, "grad_norm": 3.8282511234283447, "learning_rate": 1.9865979517197608e-05, "loss": 1.9943, "step": 24845 }, { "epoch": 0.32, "grad_norm": 3.9421799182891846, "learning_rate": 1.986596237082408e-05, "loss": 1.9183, "step": 24846 }, { "epoch": 0.32, "grad_norm": 4.268388271331787, "learning_rate": 1.986594522336118e-05, "loss": 2.4468, "step": 24847 }, { "epoch": 0.32, "grad_norm": 3.700281858444214, "learning_rate": 1.9865928074808915e-05, "loss": 1.9544, "step": 24848 }, { "epoch": 0.32, "grad_norm": 4.141209602355957, "learning_rate": 1.986591092516728e-05, "loss": 2.2717, "step": 24849 }, { "epoch": 0.32, "grad_norm": 4.353316307067871, "learning_rate": 1.9865893774436288e-05, "loss": 1.9633, "step": 24850 }, { "epoch": 0.32, "grad_norm": 3.5198042392730713, "learning_rate": 1.986587662261593e-05, "loss": 2.0015, "step": 24851 }, { "epoch": 0.32, "grad_norm": 3.96220326423645, "learning_rate": 1.986585946970621e-05, "loss": 1.802, "step": 24852 }, { "epoch": 0.32, "grad_norm": 4.043088436126709, "learning_rate": 1.9865842315707133e-05, "loss": 1.8582, "step": 24853 }, { "epoch": 0.32, "grad_norm": 3.591463327407837, "learning_rate": 1.98658251606187e-05, "loss": 1.5214, "step": 24854 }, { "epoch": 0.32, "grad_norm": 3.9147582054138184, "learning_rate": 1.9865808004440913e-05, "loss": 1.6224, "step": 24855 }, { "epoch": 0.32, "grad_norm": 4.048557758331299, "learning_rate": 1.9865790847173773e-05, "loss": 1.9801, "step": 24856 }, { "epoch": 0.32, "grad_norm": 4.28727388381958, "learning_rate": 1.9865773688817284e-05, "loss": 2.1702, "step": 24857 }, { "epoch": 0.32, "grad_norm": 3.62131404876709, "learning_rate": 1.9865756529371443e-05, "loss": 1.9, "step": 24858 }, { "epoch": 0.32, "grad_norm": 3.8447012901306152, "learning_rate": 1.9865739368836257e-05, "loss": 2.055, "step": 24859 }, { "epoch": 0.32, "grad_norm": 3.7848236560821533, "learning_rate": 1.9865722207211725e-05, "loss": 2.0689, "step": 24860 }, { "epoch": 0.32, "grad_norm": 3.6629953384399414, "learning_rate": 1.986570504449785e-05, "loss": 2.0365, "step": 24861 }, { "epoch": 0.32, "grad_norm": 4.14157772064209, "learning_rate": 1.9865687880694636e-05, "loss": 2.2344, "step": 24862 }, { "epoch": 0.32, "grad_norm": 3.6215217113494873, "learning_rate": 1.9865670715802082e-05, "loss": 1.7459, "step": 24863 }, { "epoch": 0.32, "grad_norm": 3.3773341178894043, "learning_rate": 1.986565354982019e-05, "loss": 1.8028, "step": 24864 }, { "epoch": 0.32, "grad_norm": 4.036203861236572, "learning_rate": 1.986563638274896e-05, "loss": 2.2703, "step": 24865 }, { "epoch": 0.32, "grad_norm": 3.5302164554595947, "learning_rate": 1.9865619214588398e-05, "loss": 1.8471, "step": 24866 }, { "epoch": 0.32, "grad_norm": 3.6187191009521484, "learning_rate": 1.9865602045338506e-05, "loss": 1.9394, "step": 24867 }, { "epoch": 0.32, "grad_norm": 3.9857213497161865, "learning_rate": 1.9865584874999283e-05, "loss": 2.0157, "step": 24868 }, { "epoch": 0.32, "grad_norm": 3.373281717300415, "learning_rate": 1.986556770357073e-05, "loss": 1.6952, "step": 24869 }, { "epoch": 0.32, "grad_norm": 4.533719539642334, "learning_rate": 1.9865550531052854e-05, "loss": 2.5523, "step": 24870 }, { "epoch": 0.32, "grad_norm": 3.807309865951538, "learning_rate": 1.9865533357445656e-05, "loss": 1.9669, "step": 24871 }, { "epoch": 0.32, "grad_norm": 3.9105679988861084, "learning_rate": 1.986551618274913e-05, "loss": 2.3705, "step": 24872 }, { "epoch": 0.32, "grad_norm": 3.660506248474121, "learning_rate": 1.9865499006963284e-05, "loss": 1.7759, "step": 24873 }, { "epoch": 0.32, "grad_norm": 4.847094535827637, "learning_rate": 1.9865481830088124e-05, "loss": 2.5598, "step": 24874 }, { "epoch": 0.32, "grad_norm": 4.447354316711426, "learning_rate": 1.9865464652123646e-05, "loss": 2.4342, "step": 24875 }, { "epoch": 0.32, "grad_norm": 4.098264694213867, "learning_rate": 1.9865447473069852e-05, "loss": 1.995, "step": 24876 }, { "epoch": 0.32, "grad_norm": 3.6958024501800537, "learning_rate": 1.9865430292926744e-05, "loss": 2.1816, "step": 24877 }, { "epoch": 0.32, "grad_norm": 4.10140323638916, "learning_rate": 1.986541311169433e-05, "loss": 2.0457, "step": 24878 }, { "epoch": 0.32, "grad_norm": 3.7280476093292236, "learning_rate": 1.98653959293726e-05, "loss": 1.8801, "step": 24879 }, { "epoch": 0.32, "grad_norm": 3.960951328277588, "learning_rate": 1.986537874596157e-05, "loss": 2.1877, "step": 24880 }, { "epoch": 0.32, "grad_norm": 4.38079309463501, "learning_rate": 1.9865361561461228e-05, "loss": 2.3242, "step": 24881 }, { "epoch": 0.32, "grad_norm": 4.705415725708008, "learning_rate": 1.986534437587159e-05, "loss": 2.7435, "step": 24882 }, { "epoch": 0.32, "grad_norm": 3.578517198562622, "learning_rate": 1.9865327189192647e-05, "loss": 1.6243, "step": 24883 }, { "epoch": 0.32, "grad_norm": 3.9186670780181885, "learning_rate": 1.9865310001424403e-05, "loss": 1.7479, "step": 24884 }, { "epoch": 0.32, "grad_norm": 3.7992782592773438, "learning_rate": 1.9865292812566864e-05, "loss": 1.6522, "step": 24885 }, { "epoch": 0.32, "grad_norm": 3.5090701580047607, "learning_rate": 1.9865275622620027e-05, "loss": 1.6822, "step": 24886 }, { "epoch": 0.32, "grad_norm": 4.040552139282227, "learning_rate": 1.98652584315839e-05, "loss": 2.0503, "step": 24887 }, { "epoch": 0.32, "grad_norm": 3.9053447246551514, "learning_rate": 1.9865241239458477e-05, "loss": 1.8387, "step": 24888 }, { "epoch": 0.32, "grad_norm": 3.5091893672943115, "learning_rate": 1.9865224046243764e-05, "loss": 1.5667, "step": 24889 }, { "epoch": 0.32, "grad_norm": 5.045852184295654, "learning_rate": 1.9865206851939766e-05, "loss": 2.6858, "step": 24890 }, { "epoch": 0.32, "grad_norm": 3.4767913818359375, "learning_rate": 1.9865189656546477e-05, "loss": 1.5531, "step": 24891 }, { "epoch": 0.32, "grad_norm": 4.236862659454346, "learning_rate": 1.986517246006391e-05, "loss": 1.9834, "step": 24892 }, { "epoch": 0.32, "grad_norm": 4.39235258102417, "learning_rate": 1.9865155262492056e-05, "loss": 2.229, "step": 24893 }, { "epoch": 0.32, "grad_norm": 4.217859745025635, "learning_rate": 1.9865138063830924e-05, "loss": 2.2958, "step": 24894 }, { "epoch": 0.32, "grad_norm": 3.6765058040618896, "learning_rate": 1.986512086408051e-05, "loss": 1.5749, "step": 24895 }, { "epoch": 0.32, "grad_norm": 3.830214738845825, "learning_rate": 1.9865103663240823e-05, "loss": 1.8199, "step": 24896 }, { "epoch": 0.32, "grad_norm": 3.987213373184204, "learning_rate": 1.986508646131186e-05, "loss": 2.0164, "step": 24897 }, { "epoch": 0.32, "grad_norm": 3.9094107151031494, "learning_rate": 1.9865069258293623e-05, "loss": 2.0769, "step": 24898 }, { "epoch": 0.32, "grad_norm": 4.579214572906494, "learning_rate": 1.9865052054186117e-05, "loss": 2.3055, "step": 24899 }, { "epoch": 0.32, "grad_norm": 4.680727958679199, "learning_rate": 1.986503484898934e-05, "loss": 1.9672, "step": 24900 }, { "epoch": 0.32, "grad_norm": 3.4624133110046387, "learning_rate": 1.9865017642703298e-05, "loss": 1.8421, "step": 24901 }, { "epoch": 0.32, "grad_norm": 4.4198174476623535, "learning_rate": 1.986500043532799e-05, "loss": 2.0304, "step": 24902 }, { "epoch": 0.32, "grad_norm": 4.013145923614502, "learning_rate": 1.9864983226863413e-05, "loss": 2.0115, "step": 24903 }, { "epoch": 0.32, "grad_norm": 4.163908004760742, "learning_rate": 1.9864966017309583e-05, "loss": 2.1461, "step": 24904 }, { "epoch": 0.32, "grad_norm": 3.625389337539673, "learning_rate": 1.986494880666649e-05, "loss": 1.6302, "step": 24905 }, { "epoch": 0.32, "grad_norm": 4.235046863555908, "learning_rate": 1.9864931594934138e-05, "loss": 2.4419, "step": 24906 }, { "epoch": 0.32, "grad_norm": 3.5843708515167236, "learning_rate": 1.9864914382112533e-05, "loss": 1.8206, "step": 24907 }, { "epoch": 0.32, "grad_norm": 3.629701614379883, "learning_rate": 1.986489716820167e-05, "loss": 2.0172, "step": 24908 }, { "epoch": 0.32, "grad_norm": 4.134527683258057, "learning_rate": 1.9864879953201557e-05, "loss": 2.098, "step": 24909 }, { "epoch": 0.32, "grad_norm": 3.748256206512451, "learning_rate": 1.9864862737112194e-05, "loss": 1.7956, "step": 24910 }, { "epoch": 0.32, "grad_norm": 3.8804726600646973, "learning_rate": 1.9864845519933583e-05, "loss": 1.7387, "step": 24911 }, { "epoch": 0.32, "grad_norm": 3.5625662803649902, "learning_rate": 1.9864828301665726e-05, "loss": 1.5022, "step": 24912 }, { "epoch": 0.32, "grad_norm": 4.026766300201416, "learning_rate": 1.9864811082308628e-05, "loss": 2.2101, "step": 24913 }, { "epoch": 0.32, "grad_norm": 4.352559566497803, "learning_rate": 1.9864793861862283e-05, "loss": 2.3479, "step": 24914 }, { "epoch": 0.32, "grad_norm": 3.5906801223754883, "learning_rate": 1.9864776640326697e-05, "loss": 1.6275, "step": 24915 }, { "epoch": 0.32, "grad_norm": 4.368102550506592, "learning_rate": 1.9864759417701875e-05, "loss": 1.9802, "step": 24916 }, { "epoch": 0.32, "grad_norm": 3.977489948272705, "learning_rate": 1.9864742193987812e-05, "loss": 2.2998, "step": 24917 }, { "epoch": 0.32, "grad_norm": 4.265379905700684, "learning_rate": 1.9864724969184516e-05, "loss": 2.396, "step": 24918 }, { "epoch": 0.32, "grad_norm": 3.5017125606536865, "learning_rate": 1.986470774329199e-05, "loss": 1.9099, "step": 24919 }, { "epoch": 0.32, "grad_norm": 4.047435760498047, "learning_rate": 1.9864690516310226e-05, "loss": 2.037, "step": 24920 }, { "epoch": 0.32, "grad_norm": 4.612569332122803, "learning_rate": 1.986467328823924e-05, "loss": 2.3194, "step": 24921 }, { "epoch": 0.32, "grad_norm": 3.996065139770508, "learning_rate": 1.9864656059079026e-05, "loss": 2.3544, "step": 24922 }, { "epoch": 0.32, "grad_norm": 3.526700019836426, "learning_rate": 1.9864638828829583e-05, "loss": 2.0468, "step": 24923 }, { "epoch": 0.32, "grad_norm": 3.956639528274536, "learning_rate": 1.986462159749092e-05, "loss": 2.3199, "step": 24924 }, { "epoch": 0.32, "grad_norm": 4.1315107345581055, "learning_rate": 1.9864604365063035e-05, "loss": 1.6327, "step": 24925 }, { "epoch": 0.32, "grad_norm": 4.177799701690674, "learning_rate": 1.986458713154593e-05, "loss": 2.4238, "step": 24926 }, { "epoch": 0.32, "grad_norm": 3.8330109119415283, "learning_rate": 1.98645698969396e-05, "loss": 2.0946, "step": 24927 }, { "epoch": 0.32, "grad_norm": 3.4489829540252686, "learning_rate": 1.9864552661244065e-05, "loss": 1.7581, "step": 24928 }, { "epoch": 0.32, "grad_norm": 4.271345138549805, "learning_rate": 1.986453542445931e-05, "loss": 2.0568, "step": 24929 }, { "epoch": 0.32, "grad_norm": 4.187973976135254, "learning_rate": 1.9864518186585344e-05, "loss": 1.9783, "step": 24930 }, { "epoch": 0.32, "grad_norm": 3.796196937561035, "learning_rate": 1.9864500947622168e-05, "loss": 2.1874, "step": 24931 }, { "epoch": 0.32, "grad_norm": 3.765305995941162, "learning_rate": 1.9864483707569785e-05, "loss": 2.0995, "step": 24932 }, { "epoch": 0.32, "grad_norm": 3.6758131980895996, "learning_rate": 1.9864466466428192e-05, "loss": 2.0988, "step": 24933 }, { "epoch": 0.32, "grad_norm": 3.7041406631469727, "learning_rate": 1.98644492241974e-05, "loss": 1.9659, "step": 24934 }, { "epoch": 0.32, "grad_norm": 4.068721294403076, "learning_rate": 1.98644319808774e-05, "loss": 2.1122, "step": 24935 }, { "epoch": 0.32, "grad_norm": 4.293546676635742, "learning_rate": 1.9864414736468203e-05, "loss": 1.9287, "step": 24936 }, { "epoch": 0.32, "grad_norm": 4.150006294250488, "learning_rate": 1.9864397490969808e-05, "loss": 1.9729, "step": 24937 }, { "epoch": 0.32, "grad_norm": 3.8165223598480225, "learning_rate": 1.9864380244382213e-05, "loss": 2.0032, "step": 24938 }, { "epoch": 0.32, "grad_norm": 3.8207197189331055, "learning_rate": 1.9864362996705426e-05, "loss": 1.7621, "step": 24939 }, { "epoch": 0.32, "grad_norm": 4.135936737060547, "learning_rate": 1.9864345747939442e-05, "loss": 1.912, "step": 24940 }, { "epoch": 0.32, "grad_norm": 4.078593730926514, "learning_rate": 1.9864328498084273e-05, "loss": 2.3737, "step": 24941 }, { "epoch": 0.32, "grad_norm": 3.460876941680908, "learning_rate": 1.986431124713991e-05, "loss": 1.7878, "step": 24942 }, { "epoch": 0.32, "grad_norm": 4.231565475463867, "learning_rate": 1.986429399510636e-05, "loss": 2.5174, "step": 24943 }, { "epoch": 0.32, "grad_norm": 4.220617771148682, "learning_rate": 1.9864276741983626e-05, "loss": 2.1958, "step": 24944 }, { "epoch": 0.32, "grad_norm": 3.9022164344787598, "learning_rate": 1.9864259487771705e-05, "loss": 2.1787, "step": 24945 }, { "epoch": 0.32, "grad_norm": 4.070754051208496, "learning_rate": 1.986424223247061e-05, "loss": 2.4409, "step": 24946 }, { "epoch": 0.32, "grad_norm": 3.3564651012420654, "learning_rate": 1.986422497608033e-05, "loss": 1.6537, "step": 24947 }, { "epoch": 0.32, "grad_norm": 3.939429759979248, "learning_rate": 1.986420771860087e-05, "loss": 2.0198, "step": 24948 }, { "epoch": 0.32, "grad_norm": 4.746933460235596, "learning_rate": 1.9864190460032238e-05, "loss": 2.5066, "step": 24949 }, { "epoch": 0.32, "grad_norm": 4.171497344970703, "learning_rate": 1.986417320037443e-05, "loss": 2.1192, "step": 24950 }, { "epoch": 0.32, "grad_norm": 4.180923938751221, "learning_rate": 1.986415593962745e-05, "loss": 1.9454, "step": 24951 }, { "epoch": 0.32, "grad_norm": 3.8931686878204346, "learning_rate": 1.98641386777913e-05, "loss": 1.9581, "step": 24952 }, { "epoch": 0.32, "grad_norm": 3.694828510284424, "learning_rate": 1.9864121414865983e-05, "loss": 1.8354, "step": 24953 }, { "epoch": 0.32, "grad_norm": 4.139150619506836, "learning_rate": 1.9864104150851502e-05, "loss": 2.0395, "step": 24954 }, { "epoch": 0.32, "grad_norm": 3.6529550552368164, "learning_rate": 1.9864086885747852e-05, "loss": 2.0402, "step": 24955 }, { "epoch": 0.32, "grad_norm": 3.709425926208496, "learning_rate": 1.9864069619555043e-05, "loss": 1.9151, "step": 24956 }, { "epoch": 0.32, "grad_norm": 3.469177007675171, "learning_rate": 1.986405235227307e-05, "loss": 1.7323, "step": 24957 }, { "epoch": 0.32, "grad_norm": 3.8137950897216797, "learning_rate": 1.986403508390194e-05, "loss": 1.9945, "step": 24958 }, { "epoch": 0.32, "grad_norm": 4.403915882110596, "learning_rate": 1.9864017814441655e-05, "loss": 2.332, "step": 24959 }, { "epoch": 0.32, "grad_norm": 4.145206928253174, "learning_rate": 1.986400054389221e-05, "loss": 2.4811, "step": 24960 }, { "epoch": 0.32, "grad_norm": 3.7151153087615967, "learning_rate": 1.9863983272253616e-05, "loss": 1.9332, "step": 24961 }, { "epoch": 0.32, "grad_norm": 3.140481948852539, "learning_rate": 1.986396599952587e-05, "loss": 1.4637, "step": 24962 }, { "epoch": 0.32, "grad_norm": 4.3709635734558105, "learning_rate": 1.9863948725708975e-05, "loss": 2.4202, "step": 24963 }, { "epoch": 0.32, "grad_norm": 4.170511245727539, "learning_rate": 1.9863931450802935e-05, "loss": 1.7475, "step": 24964 }, { "epoch": 0.32, "grad_norm": 3.6920320987701416, "learning_rate": 1.9863914174807742e-05, "loss": 2.1855, "step": 24965 }, { "epoch": 0.32, "grad_norm": 4.4516496658325195, "learning_rate": 1.986389689772341e-05, "loss": 2.101, "step": 24966 }, { "epoch": 0.32, "grad_norm": 3.8486855030059814, "learning_rate": 1.986387961954994e-05, "loss": 1.9608, "step": 24967 }, { "epoch": 0.32, "grad_norm": 4.719847202301025, "learning_rate": 1.986386234028733e-05, "loss": 2.1154, "step": 24968 }, { "epoch": 0.32, "grad_norm": 4.409156799316406, "learning_rate": 1.9863845059935575e-05, "loss": 2.638, "step": 24969 }, { "epoch": 0.32, "grad_norm": 3.631038188934326, "learning_rate": 1.9863827778494692e-05, "loss": 1.9148, "step": 24970 }, { "epoch": 0.32, "grad_norm": 3.6908185482025146, "learning_rate": 1.986381049596467e-05, "loss": 1.8558, "step": 24971 }, { "epoch": 0.32, "grad_norm": 4.198519229888916, "learning_rate": 1.986379321234552e-05, "loss": 2.0847, "step": 24972 }, { "epoch": 0.32, "grad_norm": 4.220805644989014, "learning_rate": 1.9863775927637235e-05, "loss": 2.1184, "step": 24973 }, { "epoch": 0.32, "grad_norm": 3.9315133094787598, "learning_rate": 1.9863758641839825e-05, "loss": 1.9846, "step": 24974 }, { "epoch": 0.32, "grad_norm": 3.7327327728271484, "learning_rate": 1.986374135495329e-05, "loss": 1.986, "step": 24975 }, { "epoch": 0.32, "grad_norm": 3.9405081272125244, "learning_rate": 1.986372406697763e-05, "loss": 2.1523, "step": 24976 }, { "epoch": 0.32, "grad_norm": 3.8142950534820557, "learning_rate": 1.9863706777912847e-05, "loss": 1.7866, "step": 24977 }, { "epoch": 0.32, "grad_norm": 4.49226188659668, "learning_rate": 1.9863689487758943e-05, "loss": 2.3517, "step": 24978 }, { "epoch": 0.32, "grad_norm": 4.367557525634766, "learning_rate": 1.9863672196515917e-05, "loss": 2.5977, "step": 24979 }, { "epoch": 0.32, "grad_norm": 4.573734283447266, "learning_rate": 1.9863654904183776e-05, "loss": 2.6748, "step": 24980 }, { "epoch": 0.32, "grad_norm": 4.134267807006836, "learning_rate": 1.9863637610762524e-05, "loss": 2.4179, "step": 24981 }, { "epoch": 0.32, "grad_norm": 4.981654644012451, "learning_rate": 1.9863620316252157e-05, "loss": 2.9543, "step": 24982 }, { "epoch": 0.32, "grad_norm": 4.260098934173584, "learning_rate": 1.9863603020652678e-05, "loss": 2.4562, "step": 24983 }, { "epoch": 0.32, "grad_norm": 3.9178261756896973, "learning_rate": 1.986358572396409e-05, "loss": 1.9905, "step": 24984 }, { "epoch": 0.32, "grad_norm": 3.367795467376709, "learning_rate": 1.9863568426186396e-05, "loss": 1.2788, "step": 24985 }, { "epoch": 0.32, "grad_norm": 3.4735794067382812, "learning_rate": 1.9863551127319596e-05, "loss": 1.7715, "step": 24986 }, { "epoch": 0.32, "grad_norm": 4.188483238220215, "learning_rate": 1.986353382736369e-05, "loss": 2.0146, "step": 24987 }, { "epoch": 0.32, "grad_norm": 3.8219094276428223, "learning_rate": 1.9863516526318688e-05, "loss": 1.9524, "step": 24988 }, { "epoch": 0.32, "grad_norm": 3.6997275352478027, "learning_rate": 1.9863499224184584e-05, "loss": 1.7169, "step": 24989 }, { "epoch": 0.32, "grad_norm": 3.9162774085998535, "learning_rate": 1.986348192096138e-05, "loss": 2.1178, "step": 24990 }, { "epoch": 0.32, "grad_norm": 4.2298407554626465, "learning_rate": 1.9863464616649085e-05, "loss": 2.2521, "step": 24991 }, { "epoch": 0.32, "grad_norm": 3.855278491973877, "learning_rate": 1.9863447311247693e-05, "loss": 2.3197, "step": 24992 }, { "epoch": 0.32, "grad_norm": 3.7395992279052734, "learning_rate": 1.9863430004757207e-05, "loss": 2.0761, "step": 24993 }, { "epoch": 0.32, "grad_norm": 4.113420486450195, "learning_rate": 1.9863412697177633e-05, "loss": 1.9616, "step": 24994 }, { "epoch": 0.32, "grad_norm": 3.749732732772827, "learning_rate": 1.986339538850897e-05, "loss": 2.0631, "step": 24995 }, { "epoch": 0.32, "grad_norm": 4.198204517364502, "learning_rate": 1.9863378078751225e-05, "loss": 2.0241, "step": 24996 }, { "epoch": 0.32, "grad_norm": 4.331805229187012, "learning_rate": 1.986336076790439e-05, "loss": 1.8627, "step": 24997 }, { "epoch": 0.32, "grad_norm": 3.9519200325012207, "learning_rate": 1.986334345596848e-05, "loss": 2.1543, "step": 24998 }, { "epoch": 0.32, "grad_norm": 4.050539016723633, "learning_rate": 1.9863326142943486e-05, "loss": 1.8492, "step": 24999 }, { "epoch": 0.32, "grad_norm": 3.746901512145996, "learning_rate": 1.9863308828829408e-05, "loss": 2.0027, "step": 25000 }, { "epoch": 0.32, "grad_norm": 3.7446329593658447, "learning_rate": 1.986329151362626e-05, "loss": 1.977, "step": 25001 }, { "epoch": 0.32, "grad_norm": 4.1968913078308105, "learning_rate": 1.9863274197334034e-05, "loss": 2.0502, "step": 25002 }, { "epoch": 0.32, "grad_norm": 4.148970603942871, "learning_rate": 1.9863256879952737e-05, "loss": 1.6839, "step": 25003 }, { "epoch": 0.32, "grad_norm": 4.764533519744873, "learning_rate": 1.986323956148237e-05, "loss": 2.8106, "step": 25004 }, { "epoch": 0.32, "grad_norm": 4.2042460441589355, "learning_rate": 1.986322224192293e-05, "loss": 2.1946, "step": 25005 }, { "epoch": 0.32, "grad_norm": 3.8574535846710205, "learning_rate": 1.9863204921274427e-05, "loss": 2.2287, "step": 25006 }, { "epoch": 0.32, "grad_norm": 3.6953420639038086, "learning_rate": 1.9863187599536857e-05, "loss": 2.1436, "step": 25007 }, { "epoch": 0.32, "grad_norm": 3.800159454345703, "learning_rate": 1.9863170276710225e-05, "loss": 1.9979, "step": 25008 }, { "epoch": 0.32, "grad_norm": 4.406791687011719, "learning_rate": 1.986315295279453e-05, "loss": 2.0643, "step": 25009 }, { "epoch": 0.32, "grad_norm": 3.8572134971618652, "learning_rate": 1.9863135627789777e-05, "loss": 2.4533, "step": 25010 }, { "epoch": 0.32, "grad_norm": 4.406711101531982, "learning_rate": 1.9863118301695964e-05, "loss": 2.5506, "step": 25011 }, { "epoch": 0.32, "grad_norm": 3.6207215785980225, "learning_rate": 1.98631009745131e-05, "loss": 2.0771, "step": 25012 }, { "epoch": 0.32, "grad_norm": 4.219793796539307, "learning_rate": 1.9863083646241178e-05, "loss": 2.3916, "step": 25013 }, { "epoch": 0.32, "grad_norm": 3.72637677192688, "learning_rate": 1.9863066316880206e-05, "loss": 2.0278, "step": 25014 }, { "epoch": 0.32, "grad_norm": 3.8847436904907227, "learning_rate": 1.9863048986430183e-05, "loss": 2.0939, "step": 25015 }, { "epoch": 0.32, "grad_norm": 4.174421787261963, "learning_rate": 1.9863031654891113e-05, "loss": 2.2632, "step": 25016 }, { "epoch": 0.32, "grad_norm": 4.817842960357666, "learning_rate": 1.9863014322262996e-05, "loss": 2.0248, "step": 25017 }, { "epoch": 0.32, "grad_norm": 3.8452372550964355, "learning_rate": 1.9862996988545836e-05, "loss": 1.9364, "step": 25018 }, { "epoch": 0.32, "grad_norm": 3.787503242492676, "learning_rate": 1.9862979653739634e-05, "loss": 2.2895, "step": 25019 }, { "epoch": 0.32, "grad_norm": 4.25788688659668, "learning_rate": 1.9862962317844393e-05, "loss": 2.2071, "step": 25020 }, { "epoch": 0.32, "grad_norm": 4.142570972442627, "learning_rate": 1.986294498086011e-05, "loss": 2.1288, "step": 25021 }, { "epoch": 0.32, "grad_norm": 4.105769634246826, "learning_rate": 1.9862927642786795e-05, "loss": 2.2977, "step": 25022 }, { "epoch": 0.32, "grad_norm": 3.5455756187438965, "learning_rate": 1.9862910303624442e-05, "loss": 1.6923, "step": 25023 }, { "epoch": 0.32, "grad_norm": 3.6316142082214355, "learning_rate": 1.986289296337306e-05, "loss": 1.8874, "step": 25024 }, { "epoch": 0.32, "grad_norm": 3.666456699371338, "learning_rate": 1.986287562203264e-05, "loss": 2.0191, "step": 25025 }, { "epoch": 0.32, "grad_norm": 4.121302127838135, "learning_rate": 1.9862858279603195e-05, "loss": 2.1112, "step": 25026 }, { "epoch": 0.32, "grad_norm": 3.5097570419311523, "learning_rate": 1.9862840936084728e-05, "loss": 2.0174, "step": 25027 }, { "epoch": 0.32, "grad_norm": 4.726931095123291, "learning_rate": 1.986282359147723e-05, "loss": 2.5151, "step": 25028 }, { "epoch": 0.32, "grad_norm": 4.192911148071289, "learning_rate": 1.986280624578071e-05, "loss": 2.0792, "step": 25029 }, { "epoch": 0.32, "grad_norm": 3.679928779602051, "learning_rate": 1.9862788898995172e-05, "loss": 2.0075, "step": 25030 }, { "epoch": 0.32, "grad_norm": 4.254648208618164, "learning_rate": 1.986277155112061e-05, "loss": 1.8166, "step": 25031 }, { "epoch": 0.32, "grad_norm": 4.269240856170654, "learning_rate": 1.9862754202157033e-05, "loss": 1.7576, "step": 25032 }, { "epoch": 0.32, "grad_norm": 4.049345016479492, "learning_rate": 1.9862736852104442e-05, "loss": 2.0746, "step": 25033 }, { "epoch": 0.32, "grad_norm": 4.461515426635742, "learning_rate": 1.9862719500962838e-05, "loss": 2.0644, "step": 25034 }, { "epoch": 0.32, "grad_norm": 3.9440619945526123, "learning_rate": 1.986270214873222e-05, "loss": 2.2049, "step": 25035 }, { "epoch": 0.32, "grad_norm": 5.053102493286133, "learning_rate": 1.9862684795412593e-05, "loss": 2.5697, "step": 25036 }, { "epoch": 0.32, "grad_norm": 4.087937831878662, "learning_rate": 1.986266744100396e-05, "loss": 2.2493, "step": 25037 }, { "epoch": 0.32, "grad_norm": 3.928619384765625, "learning_rate": 1.9862650085506318e-05, "loss": 1.8383, "step": 25038 }, { "epoch": 0.32, "grad_norm": 4.0434465408325195, "learning_rate": 1.9862632728919674e-05, "loss": 1.838, "step": 25039 }, { "epoch": 0.32, "grad_norm": 4.296560287475586, "learning_rate": 1.9862615371244027e-05, "loss": 2.1065, "step": 25040 }, { "epoch": 0.32, "grad_norm": 4.037564754486084, "learning_rate": 1.986259801247938e-05, "loss": 2.173, "step": 25041 }, { "epoch": 0.32, "grad_norm": 3.9357924461364746, "learning_rate": 1.9862580652625737e-05, "loss": 2.1214, "step": 25042 }, { "epoch": 0.33, "grad_norm": 4.9958815574646, "learning_rate": 1.9862563291683097e-05, "loss": 2.9435, "step": 25043 }, { "epoch": 0.33, "grad_norm": 3.7608485221862793, "learning_rate": 1.986254592965146e-05, "loss": 2.1744, "step": 25044 }, { "epoch": 0.33, "grad_norm": 4.48883056640625, "learning_rate": 1.9862528566530833e-05, "loss": 1.8025, "step": 25045 }, { "epoch": 0.33, "grad_norm": 4.596338272094727, "learning_rate": 1.9862511202321218e-05, "loss": 2.5407, "step": 25046 }, { "epoch": 0.33, "grad_norm": 3.6341054439544678, "learning_rate": 1.9862493837022613e-05, "loss": 1.5353, "step": 25047 }, { "epoch": 0.33, "grad_norm": 3.9971415996551514, "learning_rate": 1.986247647063502e-05, "loss": 1.8444, "step": 25048 }, { "epoch": 0.33, "grad_norm": 4.029233932495117, "learning_rate": 1.986245910315844e-05, "loss": 2.2034, "step": 25049 }, { "epoch": 0.33, "grad_norm": 4.674548625946045, "learning_rate": 1.986244173459288e-05, "loss": 2.604, "step": 25050 }, { "epoch": 0.33, "grad_norm": 3.646256923675537, "learning_rate": 1.986242436493834e-05, "loss": 2.1399, "step": 25051 }, { "epoch": 0.33, "grad_norm": 4.041036605834961, "learning_rate": 1.986240699419482e-05, "loss": 1.9187, "step": 25052 }, { "epoch": 0.33, "grad_norm": 4.1439080238342285, "learning_rate": 1.9862389622362318e-05, "loss": 1.9792, "step": 25053 }, { "epoch": 0.33, "grad_norm": 4.122561931610107, "learning_rate": 1.986237224944085e-05, "loss": 2.2445, "step": 25054 }, { "epoch": 0.33, "grad_norm": 4.289710521697998, "learning_rate": 1.98623548754304e-05, "loss": 2.0707, "step": 25055 }, { "epoch": 0.33, "grad_norm": 4.1872334480285645, "learning_rate": 1.9862337500330984e-05, "loss": 2.1975, "step": 25056 }, { "epoch": 0.33, "grad_norm": 3.794830799102783, "learning_rate": 1.9862320124142598e-05, "loss": 2.3653, "step": 25057 }, { "epoch": 0.33, "grad_norm": 3.9015486240386963, "learning_rate": 1.9862302746865247e-05, "loss": 2.0671, "step": 25058 }, { "epoch": 0.33, "grad_norm": 4.4446306228637695, "learning_rate": 1.9862285368498926e-05, "loss": 1.8532, "step": 25059 }, { "epoch": 0.33, "grad_norm": 3.6272692680358887, "learning_rate": 1.9862267989043643e-05, "loss": 2.0407, "step": 25060 }, { "epoch": 0.33, "grad_norm": 3.796872854232788, "learning_rate": 1.9862250608499397e-05, "loss": 1.9274, "step": 25061 }, { "epoch": 0.33, "grad_norm": 4.275226593017578, "learning_rate": 1.9862233226866192e-05, "loss": 1.9165, "step": 25062 }, { "epoch": 0.33, "grad_norm": 3.712661027908325, "learning_rate": 1.986221584414403e-05, "loss": 2.0078, "step": 25063 }, { "epoch": 0.33, "grad_norm": 3.854438066482544, "learning_rate": 1.9862198460332912e-05, "loss": 1.9176, "step": 25064 }, { "epoch": 0.33, "grad_norm": 4.595845699310303, "learning_rate": 1.986218107543284e-05, "loss": 2.3527, "step": 25065 }, { "epoch": 0.33, "grad_norm": 4.073748588562012, "learning_rate": 1.9862163689443816e-05, "loss": 2.0281, "step": 25066 }, { "epoch": 0.33, "grad_norm": 4.5079026222229, "learning_rate": 1.9862146302365842e-05, "loss": 2.2906, "step": 25067 }, { "epoch": 0.33, "grad_norm": 4.50537633895874, "learning_rate": 1.986212891419892e-05, "loss": 2.6925, "step": 25068 }, { "epoch": 0.33, "grad_norm": 4.608235836029053, "learning_rate": 1.986211152494305e-05, "loss": 2.3119, "step": 25069 }, { "epoch": 0.33, "grad_norm": 3.7699241638183594, "learning_rate": 1.9862094134598236e-05, "loss": 1.9255, "step": 25070 }, { "epoch": 0.33, "grad_norm": 4.288858413696289, "learning_rate": 1.986207674316448e-05, "loss": 2.2672, "step": 25071 }, { "epoch": 0.33, "grad_norm": 4.887584209442139, "learning_rate": 1.9862059350641783e-05, "loss": 2.4762, "step": 25072 }, { "epoch": 0.33, "grad_norm": 3.9390079975128174, "learning_rate": 1.9862041957030146e-05, "loss": 2.0206, "step": 25073 }, { "epoch": 0.33, "grad_norm": 4.136254787445068, "learning_rate": 1.9862024562329574e-05, "loss": 2.2578, "step": 25074 }, { "epoch": 0.33, "grad_norm": 3.4772682189941406, "learning_rate": 1.9862007166540067e-05, "loss": 1.6695, "step": 25075 }, { "epoch": 0.33, "grad_norm": 3.7266440391540527, "learning_rate": 1.9861989769661628e-05, "loss": 1.9663, "step": 25076 }, { "epoch": 0.33, "grad_norm": 3.9039599895477295, "learning_rate": 1.9861972371694257e-05, "loss": 1.9541, "step": 25077 }, { "epoch": 0.33, "grad_norm": 4.033047199249268, "learning_rate": 1.9861954972637957e-05, "loss": 1.8448, "step": 25078 }, { "epoch": 0.33, "grad_norm": 4.013068199157715, "learning_rate": 1.9861937572492728e-05, "loss": 2.1434, "step": 25079 }, { "epoch": 0.33, "grad_norm": 4.065740585327148, "learning_rate": 1.9861920171258578e-05, "loss": 2.3481, "step": 25080 }, { "epoch": 0.33, "grad_norm": 3.7372589111328125, "learning_rate": 1.9861902768935502e-05, "loss": 1.8951, "step": 25081 }, { "epoch": 0.33, "grad_norm": 4.278488636016846, "learning_rate": 1.9861885365523505e-05, "loss": 2.4455, "step": 25082 }, { "epoch": 0.33, "grad_norm": 3.852796792984009, "learning_rate": 1.986186796102259e-05, "loss": 2.1, "step": 25083 }, { "epoch": 0.33, "grad_norm": 4.190178871154785, "learning_rate": 1.9861850555432755e-05, "loss": 1.8653, "step": 25084 }, { "epoch": 0.33, "grad_norm": 3.624311685562134, "learning_rate": 1.9861833148754006e-05, "loss": 1.8283, "step": 25085 }, { "epoch": 0.33, "grad_norm": 3.7655577659606934, "learning_rate": 1.9861815740986345e-05, "loss": 1.9664, "step": 25086 }, { "epoch": 0.33, "grad_norm": 4.1865034103393555, "learning_rate": 1.986179833212977e-05, "loss": 2.0021, "step": 25087 }, { "epoch": 0.33, "grad_norm": 4.088984489440918, "learning_rate": 1.9861780922184284e-05, "loss": 2.2103, "step": 25088 }, { "epoch": 0.33, "grad_norm": 4.453736305236816, "learning_rate": 1.9861763511149888e-05, "loss": 2.3459, "step": 25089 }, { "epoch": 0.33, "grad_norm": 4.08961296081543, "learning_rate": 1.986174609902659e-05, "loss": 2.5042, "step": 25090 }, { "epoch": 0.33, "grad_norm": 4.701409339904785, "learning_rate": 1.9861728685814388e-05, "loss": 2.0651, "step": 25091 }, { "epoch": 0.33, "grad_norm": 4.006587982177734, "learning_rate": 1.9861711271513284e-05, "loss": 2.2829, "step": 25092 }, { "epoch": 0.33, "grad_norm": 4.712395668029785, "learning_rate": 1.986169385612328e-05, "loss": 2.0558, "step": 25093 }, { "epoch": 0.33, "grad_norm": 4.321805000305176, "learning_rate": 1.9861676439644375e-05, "loss": 2.2824, "step": 25094 }, { "epoch": 0.33, "grad_norm": 4.274973392486572, "learning_rate": 1.9861659022076573e-05, "loss": 1.8424, "step": 25095 }, { "epoch": 0.33, "grad_norm": 4.699433326721191, "learning_rate": 1.986164160341988e-05, "loss": 2.4455, "step": 25096 }, { "epoch": 0.33, "grad_norm": 3.9378724098205566, "learning_rate": 1.9861624183674292e-05, "loss": 2.0839, "step": 25097 }, { "epoch": 0.33, "grad_norm": 3.93931245803833, "learning_rate": 1.9861606762839813e-05, "loss": 1.9686, "step": 25098 }, { "epoch": 0.33, "grad_norm": 3.297086238861084, "learning_rate": 1.9861589340916446e-05, "loss": 1.56, "step": 25099 }, { "epoch": 0.33, "grad_norm": 4.1012444496154785, "learning_rate": 1.9861571917904198e-05, "loss": 1.9381, "step": 25100 }, { "epoch": 0.33, "grad_norm": 3.8775017261505127, "learning_rate": 1.9861554493803058e-05, "loss": 1.7912, "step": 25101 }, { "epoch": 0.33, "grad_norm": 4.239542484283447, "learning_rate": 1.9861537068613037e-05, "loss": 2.1043, "step": 25102 }, { "epoch": 0.33, "grad_norm": 3.794475793838501, "learning_rate": 1.9861519642334135e-05, "loss": 1.466, "step": 25103 }, { "epoch": 0.33, "grad_norm": 3.992062568664551, "learning_rate": 1.9861502214966355e-05, "loss": 2.5584, "step": 25104 }, { "epoch": 0.33, "grad_norm": 3.879516839981079, "learning_rate": 1.9861484786509698e-05, "loss": 2.0712, "step": 25105 }, { "epoch": 0.33, "grad_norm": 4.002524375915527, "learning_rate": 1.9861467356964162e-05, "loss": 2.2195, "step": 25106 }, { "epoch": 0.33, "grad_norm": 3.4875733852386475, "learning_rate": 1.9861449926329756e-05, "loss": 1.9287, "step": 25107 }, { "epoch": 0.33, "grad_norm": 4.354691028594971, "learning_rate": 1.9861432494606482e-05, "loss": 2.4696, "step": 25108 }, { "epoch": 0.33, "grad_norm": 3.515852928161621, "learning_rate": 1.986141506179433e-05, "loss": 1.8829, "step": 25109 }, { "epoch": 0.33, "grad_norm": 4.1299214363098145, "learning_rate": 1.9861397627893317e-05, "loss": 2.0213, "step": 25110 }, { "epoch": 0.33, "grad_norm": 4.094552040100098, "learning_rate": 1.9861380192903437e-05, "loss": 2.0461, "step": 25111 }, { "epoch": 0.33, "grad_norm": 3.716496467590332, "learning_rate": 1.9861362756824693e-05, "loss": 1.9353, "step": 25112 }, { "epoch": 0.33, "grad_norm": 4.507640361785889, "learning_rate": 1.9861345319657087e-05, "loss": 2.5723, "step": 25113 }, { "epoch": 0.33, "grad_norm": 4.308902740478516, "learning_rate": 1.986132788140062e-05, "loss": 2.1763, "step": 25114 }, { "epoch": 0.33, "grad_norm": 3.8605988025665283, "learning_rate": 1.9861310442055297e-05, "loss": 2.0492, "step": 25115 }, { "epoch": 0.33, "grad_norm": 4.611175060272217, "learning_rate": 1.986129300162112e-05, "loss": 2.4442, "step": 25116 }, { "epoch": 0.33, "grad_norm": 3.694603443145752, "learning_rate": 1.9861275560098084e-05, "loss": 1.935, "step": 25117 }, { "epoch": 0.33, "grad_norm": 4.391449451446533, "learning_rate": 1.9861258117486198e-05, "loss": 2.2734, "step": 25118 }, { "epoch": 0.33, "grad_norm": 3.7621216773986816, "learning_rate": 1.986124067378546e-05, "loss": 2.0685, "step": 25119 }, { "epoch": 0.33, "grad_norm": 3.632936477661133, "learning_rate": 1.9861223228995877e-05, "loss": 2.0506, "step": 25120 }, { "epoch": 0.33, "grad_norm": 3.6164894104003906, "learning_rate": 1.9861205783117446e-05, "loss": 1.7856, "step": 25121 }, { "epoch": 0.33, "grad_norm": 3.762620210647583, "learning_rate": 1.986118833615017e-05, "loss": 1.9828, "step": 25122 }, { "epoch": 0.33, "grad_norm": 4.536606311798096, "learning_rate": 1.9861170888094055e-05, "loss": 2.207, "step": 25123 }, { "epoch": 0.33, "grad_norm": 4.201236248016357, "learning_rate": 1.9861153438949096e-05, "loss": 2.2334, "step": 25124 }, { "epoch": 0.33, "grad_norm": 4.6754889488220215, "learning_rate": 1.98611359887153e-05, "loss": 2.6628, "step": 25125 }, { "epoch": 0.33, "grad_norm": 3.3315975666046143, "learning_rate": 1.986111853739267e-05, "loss": 1.8158, "step": 25126 }, { "epoch": 0.33, "grad_norm": 4.25454044342041, "learning_rate": 1.98611010849812e-05, "loss": 2.4242, "step": 25127 }, { "epoch": 0.33, "grad_norm": 4.368804454803467, "learning_rate": 1.9861083631480896e-05, "loss": 2.4546, "step": 25128 }, { "epoch": 0.33, "grad_norm": 3.9397692680358887, "learning_rate": 1.9861066176891764e-05, "loss": 2.1149, "step": 25129 }, { "epoch": 0.33, "grad_norm": 4.015300273895264, "learning_rate": 1.9861048721213806e-05, "loss": 2.1855, "step": 25130 }, { "epoch": 0.33, "grad_norm": 3.7936558723449707, "learning_rate": 1.9861031264447017e-05, "loss": 1.5859, "step": 25131 }, { "epoch": 0.33, "grad_norm": 4.132277488708496, "learning_rate": 1.9861013806591404e-05, "loss": 2.2641, "step": 25132 }, { "epoch": 0.33, "grad_norm": 4.243268966674805, "learning_rate": 1.9860996347646968e-05, "loss": 2.0061, "step": 25133 }, { "epoch": 0.33, "grad_norm": 3.6777453422546387, "learning_rate": 1.9860978887613708e-05, "loss": 1.9228, "step": 25134 }, { "epoch": 0.33, "grad_norm": 4.065666675567627, "learning_rate": 1.986096142649163e-05, "loss": 2.4035, "step": 25135 }, { "epoch": 0.33, "grad_norm": 4.179133415222168, "learning_rate": 1.9860943964280738e-05, "loss": 1.9661, "step": 25136 }, { "epoch": 0.33, "grad_norm": 3.945286273956299, "learning_rate": 1.9860926500981027e-05, "loss": 2.3256, "step": 25137 }, { "epoch": 0.33, "grad_norm": 3.9896767139434814, "learning_rate": 1.9860909036592507e-05, "loss": 2.0679, "step": 25138 }, { "epoch": 0.33, "grad_norm": 3.762885093688965, "learning_rate": 1.986089157111517e-05, "loss": 1.9832, "step": 25139 }, { "epoch": 0.33, "grad_norm": 4.251684665679932, "learning_rate": 1.9860874104549026e-05, "loss": 2.4649, "step": 25140 }, { "epoch": 0.33, "grad_norm": 4.523189544677734, "learning_rate": 1.9860856636894073e-05, "loss": 2.6238, "step": 25141 }, { "epoch": 0.33, "grad_norm": 3.9743614196777344, "learning_rate": 1.9860839168150316e-05, "loss": 1.8444, "step": 25142 }, { "epoch": 0.33, "grad_norm": 4.263285160064697, "learning_rate": 1.9860821698317752e-05, "loss": 1.9882, "step": 25143 }, { "epoch": 0.33, "grad_norm": 4.1874847412109375, "learning_rate": 1.9860804227396385e-05, "loss": 2.0758, "step": 25144 }, { "epoch": 0.33, "grad_norm": 4.0340986251831055, "learning_rate": 1.986078675538622e-05, "loss": 2.2099, "step": 25145 }, { "epoch": 0.33, "grad_norm": 4.57582426071167, "learning_rate": 1.986076928228726e-05, "loss": 2.2485, "step": 25146 }, { "epoch": 0.33, "grad_norm": 3.5288968086242676, "learning_rate": 1.98607518080995e-05, "loss": 2.0821, "step": 25147 }, { "epoch": 0.33, "grad_norm": 3.8109331130981445, "learning_rate": 1.9860734332822946e-05, "loss": 2.2028, "step": 25148 }, { "epoch": 0.33, "grad_norm": 3.7671124935150146, "learning_rate": 1.98607168564576e-05, "loss": 2.0153, "step": 25149 }, { "epoch": 0.33, "grad_norm": 3.977383852005005, "learning_rate": 1.9860699379003464e-05, "loss": 2.1109, "step": 25150 }, { "epoch": 0.33, "grad_norm": 3.8982093334198, "learning_rate": 1.9860681900460542e-05, "loss": 1.9857, "step": 25151 }, { "epoch": 0.33, "grad_norm": 3.7805705070495605, "learning_rate": 1.986066442082883e-05, "loss": 1.9011, "step": 25152 }, { "epoch": 0.33, "grad_norm": 4.262625217437744, "learning_rate": 1.9860646940108333e-05, "loss": 2.181, "step": 25153 }, { "epoch": 0.33, "grad_norm": 4.453469753265381, "learning_rate": 1.9860629458299055e-05, "loss": 2.2497, "step": 25154 }, { "epoch": 0.33, "grad_norm": 4.239772796630859, "learning_rate": 1.9860611975400995e-05, "loss": 2.0006, "step": 25155 }, { "epoch": 0.33, "grad_norm": 4.565547943115234, "learning_rate": 1.9860594491414158e-05, "loss": 2.5415, "step": 25156 }, { "epoch": 0.33, "grad_norm": 4.092007637023926, "learning_rate": 1.9860577006338545e-05, "loss": 1.811, "step": 25157 }, { "epoch": 0.33, "grad_norm": 4.356128692626953, "learning_rate": 1.9860559520174153e-05, "loss": 1.9196, "step": 25158 }, { "epoch": 0.33, "grad_norm": 3.6733174324035645, "learning_rate": 1.986054203292099e-05, "loss": 2.1263, "step": 25159 }, { "epoch": 0.33, "grad_norm": 3.6683390140533447, "learning_rate": 1.9860524544579057e-05, "loss": 2.1277, "step": 25160 }, { "epoch": 0.33, "grad_norm": 4.352462291717529, "learning_rate": 1.9860507055148353e-05, "loss": 2.1091, "step": 25161 }, { "epoch": 0.33, "grad_norm": 4.125951766967773, "learning_rate": 1.986048956462888e-05, "loss": 2.1792, "step": 25162 }, { "epoch": 0.33, "grad_norm": 4.202004432678223, "learning_rate": 1.9860472073020644e-05, "loss": 2.0678, "step": 25163 }, { "epoch": 0.33, "grad_norm": 4.067206382751465, "learning_rate": 1.9860454580323646e-05, "loss": 2.0546, "step": 25164 }, { "epoch": 0.33, "grad_norm": 3.7145628929138184, "learning_rate": 1.9860437086537885e-05, "loss": 1.7911, "step": 25165 }, { "epoch": 0.33, "grad_norm": 4.046070575714111, "learning_rate": 1.9860419591663365e-05, "loss": 2.2927, "step": 25166 }, { "epoch": 0.33, "grad_norm": 3.8216941356658936, "learning_rate": 1.9860402095700086e-05, "loss": 1.7898, "step": 25167 }, { "epoch": 0.33, "grad_norm": 3.8336071968078613, "learning_rate": 1.9860384598648054e-05, "loss": 1.802, "step": 25168 }, { "epoch": 0.33, "grad_norm": 3.9463539123535156, "learning_rate": 1.9860367100507266e-05, "loss": 2.2853, "step": 25169 }, { "epoch": 0.33, "grad_norm": 3.9896957874298096, "learning_rate": 1.9860349601277726e-05, "loss": 2.3459, "step": 25170 }, { "epoch": 0.33, "grad_norm": 4.317629337310791, "learning_rate": 1.9860332100959437e-05, "loss": 1.9169, "step": 25171 }, { "epoch": 0.33, "grad_norm": 4.258448123931885, "learning_rate": 1.98603145995524e-05, "loss": 2.0418, "step": 25172 }, { "epoch": 0.33, "grad_norm": 3.432535171508789, "learning_rate": 1.986029709705662e-05, "loss": 1.5404, "step": 25173 }, { "epoch": 0.33, "grad_norm": 3.907593250274658, "learning_rate": 1.986027959347209e-05, "loss": 2.0214, "step": 25174 }, { "epoch": 0.33, "grad_norm": 4.8876633644104, "learning_rate": 1.9860262088798817e-05, "loss": 1.9519, "step": 25175 }, { "epoch": 0.33, "grad_norm": 4.331203937530518, "learning_rate": 1.986024458303681e-05, "loss": 2.1284, "step": 25176 }, { "epoch": 0.33, "grad_norm": 3.7669601440429688, "learning_rate": 1.9860227076186064e-05, "loss": 1.9368, "step": 25177 }, { "epoch": 0.33, "grad_norm": 3.962550401687622, "learning_rate": 1.986020956824658e-05, "loss": 2.4434, "step": 25178 }, { "epoch": 0.33, "grad_norm": 3.9190382957458496, "learning_rate": 1.986019205921836e-05, "loss": 2.3715, "step": 25179 }, { "epoch": 0.33, "grad_norm": 4.3898210525512695, "learning_rate": 1.9860174549101406e-05, "loss": 2.9945, "step": 25180 }, { "epoch": 0.33, "grad_norm": 3.710198402404785, "learning_rate": 1.9860157037895728e-05, "loss": 2.1655, "step": 25181 }, { "epoch": 0.33, "grad_norm": 4.096523284912109, "learning_rate": 1.9860139525601318e-05, "loss": 1.9136, "step": 25182 }, { "epoch": 0.33, "grad_norm": 3.742213010787964, "learning_rate": 1.9860122012218183e-05, "loss": 2.1298, "step": 25183 }, { "epoch": 0.33, "grad_norm": 3.8778817653656006, "learning_rate": 1.986010449774632e-05, "loss": 1.7935, "step": 25184 }, { "epoch": 0.33, "grad_norm": 4.543208122253418, "learning_rate": 1.9860086982185733e-05, "loss": 2.1714, "step": 25185 }, { "epoch": 0.33, "grad_norm": 3.377044439315796, "learning_rate": 1.9860069465536428e-05, "loss": 1.6387, "step": 25186 }, { "epoch": 0.33, "grad_norm": 3.6321818828582764, "learning_rate": 1.9860051947798405e-05, "loss": 1.931, "step": 25187 }, { "epoch": 0.33, "grad_norm": 3.8046319484710693, "learning_rate": 1.9860034428971663e-05, "loss": 2.4882, "step": 25188 }, { "epoch": 0.33, "grad_norm": 4.073830604553223, "learning_rate": 1.9860016909056206e-05, "loss": 2.1364, "step": 25189 }, { "epoch": 0.33, "grad_norm": 3.6396567821502686, "learning_rate": 1.9859999388052034e-05, "loss": 2.0031, "step": 25190 }, { "epoch": 0.33, "grad_norm": 4.02785062789917, "learning_rate": 1.9859981865959157e-05, "loss": 2.2101, "step": 25191 }, { "epoch": 0.33, "grad_norm": 3.9402458667755127, "learning_rate": 1.9859964342777565e-05, "loss": 2.0311, "step": 25192 }, { "epoch": 0.33, "grad_norm": 4.207383155822754, "learning_rate": 1.9859946818507268e-05, "loss": 2.0816, "step": 25193 }, { "epoch": 0.33, "grad_norm": 3.8554491996765137, "learning_rate": 1.9859929293148266e-05, "loss": 1.8861, "step": 25194 }, { "epoch": 0.33, "grad_norm": 3.896146059036255, "learning_rate": 1.985991176670056e-05, "loss": 2.2045, "step": 25195 }, { "epoch": 0.33, "grad_norm": 4.1279215812683105, "learning_rate": 1.985989423916415e-05, "loss": 2.4382, "step": 25196 }, { "epoch": 0.33, "grad_norm": 3.855003595352173, "learning_rate": 1.9859876710539043e-05, "loss": 1.7132, "step": 25197 }, { "epoch": 0.33, "grad_norm": 4.155524253845215, "learning_rate": 1.9859859180825238e-05, "loss": 2.4076, "step": 25198 }, { "epoch": 0.33, "grad_norm": 4.116643905639648, "learning_rate": 1.985984165002274e-05, "loss": 2.0652, "step": 25199 }, { "epoch": 0.33, "grad_norm": 3.890251636505127, "learning_rate": 1.9859824118131543e-05, "loss": 1.8944, "step": 25200 }, { "epoch": 0.33, "grad_norm": 4.335855484008789, "learning_rate": 1.9859806585151657e-05, "loss": 2.0392, "step": 25201 }, { "epoch": 0.33, "grad_norm": 4.149676322937012, "learning_rate": 1.985978905108308e-05, "loss": 2.1814, "step": 25202 }, { "epoch": 0.33, "grad_norm": 4.072813987731934, "learning_rate": 1.9859771515925816e-05, "loss": 2.2454, "step": 25203 }, { "epoch": 0.33, "grad_norm": 3.841852903366089, "learning_rate": 1.9859753979679866e-05, "loss": 2.097, "step": 25204 }, { "epoch": 0.33, "grad_norm": 3.895833730697632, "learning_rate": 1.985973644234523e-05, "loss": 2.3976, "step": 25205 }, { "epoch": 0.33, "grad_norm": 3.7194247245788574, "learning_rate": 1.9859718903921914e-05, "loss": 1.6953, "step": 25206 }, { "epoch": 0.33, "grad_norm": 5.022153377532959, "learning_rate": 1.9859701364409917e-05, "loss": 2.5877, "step": 25207 }, { "epoch": 0.33, "grad_norm": 3.2215654850006104, "learning_rate": 1.985968382380924e-05, "loss": 1.6993, "step": 25208 }, { "epoch": 0.33, "grad_norm": 3.720796585083008, "learning_rate": 1.985966628211989e-05, "loss": 2.0804, "step": 25209 }, { "epoch": 0.33, "grad_norm": 4.315974712371826, "learning_rate": 1.9859648739341866e-05, "loss": 1.7855, "step": 25210 }, { "epoch": 0.33, "grad_norm": 3.2554776668548584, "learning_rate": 1.9859631195475167e-05, "loss": 1.7034, "step": 25211 }, { "epoch": 0.33, "grad_norm": 3.812513828277588, "learning_rate": 1.9859613650519797e-05, "loss": 1.6459, "step": 25212 }, { "epoch": 0.33, "grad_norm": 4.138243675231934, "learning_rate": 1.985959610447576e-05, "loss": 2.2431, "step": 25213 }, { "epoch": 0.33, "grad_norm": 4.288421154022217, "learning_rate": 1.9859578557343056e-05, "loss": 2.4184, "step": 25214 }, { "epoch": 0.33, "grad_norm": 4.362796306610107, "learning_rate": 1.9859561009121686e-05, "loss": 2.3559, "step": 25215 }, { "epoch": 0.33, "grad_norm": 3.840507745742798, "learning_rate": 1.9859543459811655e-05, "loss": 2.0479, "step": 25216 }, { "epoch": 0.33, "grad_norm": 4.068177700042725, "learning_rate": 1.9859525909412963e-05, "loss": 2.2949, "step": 25217 }, { "epoch": 0.33, "grad_norm": 4.2944254875183105, "learning_rate": 1.985950835792561e-05, "loss": 2.3216, "step": 25218 }, { "epoch": 0.33, "grad_norm": 4.139455318450928, "learning_rate": 1.9859490805349603e-05, "loss": 1.9551, "step": 25219 }, { "epoch": 0.33, "grad_norm": 3.877997875213623, "learning_rate": 1.985947325168494e-05, "loss": 1.9452, "step": 25220 }, { "epoch": 0.33, "grad_norm": 4.14888334274292, "learning_rate": 1.9859455696931623e-05, "loss": 2.1215, "step": 25221 }, { "epoch": 0.33, "grad_norm": 3.796268939971924, "learning_rate": 1.985943814108965e-05, "loss": 1.5961, "step": 25222 }, { "epoch": 0.33, "grad_norm": 3.600182056427002, "learning_rate": 1.9859420584159036e-05, "loss": 1.7917, "step": 25223 }, { "epoch": 0.33, "grad_norm": 3.519557237625122, "learning_rate": 1.985940302613977e-05, "loss": 1.6279, "step": 25224 }, { "epoch": 0.33, "grad_norm": 3.905651330947876, "learning_rate": 1.985938546703186e-05, "loss": 1.7103, "step": 25225 }, { "epoch": 0.33, "grad_norm": 4.332520961761475, "learning_rate": 1.9859367906835308e-05, "loss": 2.4649, "step": 25226 }, { "epoch": 0.33, "grad_norm": 4.477601051330566, "learning_rate": 1.985935034555011e-05, "loss": 2.2396, "step": 25227 }, { "epoch": 0.33, "grad_norm": 5.063908576965332, "learning_rate": 1.985933278317628e-05, "loss": 2.7582, "step": 25228 }, { "epoch": 0.33, "grad_norm": 3.8567676544189453, "learning_rate": 1.9859315219713807e-05, "loss": 1.9946, "step": 25229 }, { "epoch": 0.33, "grad_norm": 3.7148454189300537, "learning_rate": 1.98592976551627e-05, "loss": 1.7051, "step": 25230 }, { "epoch": 0.33, "grad_norm": 4.110988140106201, "learning_rate": 1.9859280089522956e-05, "loss": 2.1342, "step": 25231 }, { "epoch": 0.33, "grad_norm": 4.603381633758545, "learning_rate": 1.9859262522794584e-05, "loss": 2.498, "step": 25232 }, { "epoch": 0.33, "grad_norm": 4.0284953117370605, "learning_rate": 1.9859244954977577e-05, "loss": 2.1363, "step": 25233 }, { "epoch": 0.33, "grad_norm": 4.107368469238281, "learning_rate": 1.985922738607195e-05, "loss": 1.7543, "step": 25234 }, { "epoch": 0.33, "grad_norm": 4.864278316497803, "learning_rate": 1.985920981607769e-05, "loss": 2.4188, "step": 25235 }, { "epoch": 0.33, "grad_norm": 4.36826229095459, "learning_rate": 1.9859192244994813e-05, "loss": 2.201, "step": 25236 }, { "epoch": 0.33, "grad_norm": 3.6776340007781982, "learning_rate": 1.985917467282331e-05, "loss": 1.8268, "step": 25237 }, { "epoch": 0.33, "grad_norm": 3.6088063716888428, "learning_rate": 1.9859157099563187e-05, "loss": 2.084, "step": 25238 }, { "epoch": 0.33, "grad_norm": 3.770212173461914, "learning_rate": 1.9859139525214445e-05, "loss": 2.1022, "step": 25239 }, { "epoch": 0.33, "grad_norm": 4.305344104766846, "learning_rate": 1.9859121949777087e-05, "loss": 2.5843, "step": 25240 }, { "epoch": 0.33, "grad_norm": 4.563063144683838, "learning_rate": 1.9859104373251115e-05, "loss": 2.1481, "step": 25241 }, { "epoch": 0.33, "grad_norm": 4.499453067779541, "learning_rate": 1.985908679563653e-05, "loss": 2.4092, "step": 25242 }, { "epoch": 0.33, "grad_norm": 4.184887886047363, "learning_rate": 1.9859069216933337e-05, "loss": 2.0003, "step": 25243 }, { "epoch": 0.33, "grad_norm": 3.5438485145568848, "learning_rate": 1.9859051637141534e-05, "loss": 1.6929, "step": 25244 }, { "epoch": 0.33, "grad_norm": 4.321535587310791, "learning_rate": 1.985903405626112e-05, "loss": 2.318, "step": 25245 }, { "epoch": 0.33, "grad_norm": 3.8265082836151123, "learning_rate": 1.985901647429211e-05, "loss": 1.7817, "step": 25246 }, { "epoch": 0.33, "grad_norm": 4.181319236755371, "learning_rate": 1.985899889123449e-05, "loss": 2.2079, "step": 25247 }, { "epoch": 0.33, "grad_norm": 3.9828853607177734, "learning_rate": 1.9858981307088274e-05, "loss": 1.9082, "step": 25248 }, { "epoch": 0.33, "grad_norm": 3.366838216781616, "learning_rate": 1.9858963721853458e-05, "loss": 1.6521, "step": 25249 }, { "epoch": 0.33, "grad_norm": 5.074569225311279, "learning_rate": 1.9858946135530042e-05, "loss": 2.2814, "step": 25250 }, { "epoch": 0.33, "grad_norm": 3.9601545333862305, "learning_rate": 1.9858928548118036e-05, "loss": 2.1931, "step": 25251 }, { "epoch": 0.33, "grad_norm": 4.0265092849731445, "learning_rate": 1.9858910959617434e-05, "loss": 2.3444, "step": 25252 }, { "epoch": 0.33, "grad_norm": 3.5152664184570312, "learning_rate": 1.985889337002824e-05, "loss": 1.7939, "step": 25253 }, { "epoch": 0.33, "grad_norm": 4.250807762145996, "learning_rate": 1.9858875779350457e-05, "loss": 2.5879, "step": 25254 }, { "epoch": 0.33, "grad_norm": 3.8692312240600586, "learning_rate": 1.9858858187584088e-05, "loss": 2.2475, "step": 25255 }, { "epoch": 0.33, "grad_norm": 3.8842456340789795, "learning_rate": 1.9858840594729134e-05, "loss": 1.9469, "step": 25256 }, { "epoch": 0.33, "grad_norm": 3.598818063735962, "learning_rate": 1.98588230007856e-05, "loss": 1.7344, "step": 25257 }, { "epoch": 0.33, "grad_norm": 3.825815439224243, "learning_rate": 1.9858805405753482e-05, "loss": 1.9447, "step": 25258 }, { "epoch": 0.33, "grad_norm": 3.878434181213379, "learning_rate": 1.985878780963278e-05, "loss": 1.9131, "step": 25259 }, { "epoch": 0.33, "grad_norm": 3.7199783325195312, "learning_rate": 1.9858770212423505e-05, "loss": 2.4013, "step": 25260 }, { "epoch": 0.33, "grad_norm": 4.305940628051758, "learning_rate": 1.9858752614125655e-05, "loss": 2.5159, "step": 25261 }, { "epoch": 0.33, "grad_norm": 4.298929691314697, "learning_rate": 1.9858735014739233e-05, "loss": 2.4234, "step": 25262 }, { "epoch": 0.33, "grad_norm": 3.878019332885742, "learning_rate": 1.9858717414264234e-05, "loss": 2.0872, "step": 25263 }, { "epoch": 0.33, "grad_norm": 4.421825885772705, "learning_rate": 1.985869981270067e-05, "loss": 2.232, "step": 25264 }, { "epoch": 0.33, "grad_norm": 4.630970478057861, "learning_rate": 1.9858682210048536e-05, "loss": 2.7075, "step": 25265 }, { "epoch": 0.33, "grad_norm": 3.620569944381714, "learning_rate": 1.985866460630784e-05, "loss": 2.0348, "step": 25266 }, { "epoch": 0.33, "grad_norm": 4.260927677154541, "learning_rate": 1.9858647001478576e-05, "loss": 2.3961, "step": 25267 }, { "epoch": 0.33, "grad_norm": 4.4742817878723145, "learning_rate": 1.985862939556075e-05, "loss": 2.2963, "step": 25268 }, { "epoch": 0.33, "grad_norm": 4.293624401092529, "learning_rate": 1.9858611788554365e-05, "loss": 1.4855, "step": 25269 }, { "epoch": 0.33, "grad_norm": 4.2978105545043945, "learning_rate": 1.9858594180459424e-05, "loss": 1.9955, "step": 25270 }, { "epoch": 0.33, "grad_norm": 3.660400867462158, "learning_rate": 1.9858576571275924e-05, "loss": 1.9376, "step": 25271 }, { "epoch": 0.33, "grad_norm": 4.022063255310059, "learning_rate": 1.985855896100387e-05, "loss": 2.0558, "step": 25272 }, { "epoch": 0.33, "grad_norm": 4.225983619689941, "learning_rate": 1.9858541349643266e-05, "loss": 2.5946, "step": 25273 }, { "epoch": 0.33, "grad_norm": 3.634967565536499, "learning_rate": 1.9858523737194112e-05, "loss": 1.7327, "step": 25274 }, { "epoch": 0.33, "grad_norm": 4.0956525802612305, "learning_rate": 1.9858506123656408e-05, "loss": 2.3647, "step": 25275 }, { "epoch": 0.33, "grad_norm": 4.822639465332031, "learning_rate": 1.9858488509030155e-05, "loss": 2.178, "step": 25276 }, { "epoch": 0.33, "grad_norm": 4.227029800415039, "learning_rate": 1.9858470893315364e-05, "loss": 2.2011, "step": 25277 }, { "epoch": 0.33, "grad_norm": 3.7263269424438477, "learning_rate": 1.9858453276512026e-05, "loss": 1.9135, "step": 25278 }, { "epoch": 0.33, "grad_norm": 3.993940830230713, "learning_rate": 1.9858435658620147e-05, "loss": 2.3329, "step": 25279 }, { "epoch": 0.33, "grad_norm": 4.046736240386963, "learning_rate": 1.9858418039639735e-05, "loss": 2.0601, "step": 25280 }, { "epoch": 0.33, "grad_norm": 3.8056890964508057, "learning_rate": 1.985840041957078e-05, "loss": 1.9501, "step": 25281 }, { "epoch": 0.33, "grad_norm": 4.363504409790039, "learning_rate": 1.9858382798413294e-05, "loss": 2.5317, "step": 25282 }, { "epoch": 0.33, "grad_norm": 3.9459705352783203, "learning_rate": 1.9858365176167275e-05, "loss": 1.987, "step": 25283 }, { "epoch": 0.33, "grad_norm": 3.96891713142395, "learning_rate": 1.9858347552832724e-05, "loss": 1.872, "step": 25284 }, { "epoch": 0.33, "grad_norm": 3.9281723499298096, "learning_rate": 1.9858329928409644e-05, "loss": 2.0612, "step": 25285 }, { "epoch": 0.33, "grad_norm": 4.896892547607422, "learning_rate": 1.985831230289804e-05, "loss": 1.9074, "step": 25286 }, { "epoch": 0.33, "grad_norm": 3.7373571395874023, "learning_rate": 1.9858294676297908e-05, "loss": 1.8866, "step": 25287 }, { "epoch": 0.33, "grad_norm": 3.5926437377929688, "learning_rate": 1.9858277048609252e-05, "loss": 2.015, "step": 25288 }, { "epoch": 0.33, "grad_norm": 4.21051025390625, "learning_rate": 1.9858259419832077e-05, "loss": 2.2286, "step": 25289 }, { "epoch": 0.33, "grad_norm": 3.1360738277435303, "learning_rate": 1.9858241789966384e-05, "loss": 1.4314, "step": 25290 }, { "epoch": 0.33, "grad_norm": 4.830245018005371, "learning_rate": 1.9858224159012175e-05, "loss": 2.3612, "step": 25291 }, { "epoch": 0.33, "grad_norm": 3.636059284210205, "learning_rate": 1.9858206526969444e-05, "loss": 1.6254, "step": 25292 }, { "epoch": 0.33, "grad_norm": 3.686837673187256, "learning_rate": 1.9858188893838205e-05, "loss": 2.1893, "step": 25293 }, { "epoch": 0.33, "grad_norm": 3.7125837802886963, "learning_rate": 1.9858171259618454e-05, "loss": 2.0181, "step": 25294 }, { "epoch": 0.33, "grad_norm": 4.158401012420654, "learning_rate": 1.9858153624310195e-05, "loss": 1.9183, "step": 25295 }, { "epoch": 0.33, "grad_norm": 6.8492584228515625, "learning_rate": 1.9858135987913427e-05, "loss": 2.7223, "step": 25296 }, { "epoch": 0.33, "grad_norm": 4.906438827514648, "learning_rate": 1.9858118350428153e-05, "loss": 2.7536, "step": 25297 }, { "epoch": 0.33, "grad_norm": 3.9514145851135254, "learning_rate": 1.9858100711854375e-05, "loss": 2.0711, "step": 25298 }, { "epoch": 0.33, "grad_norm": 3.81866717338562, "learning_rate": 1.98580830721921e-05, "loss": 1.4499, "step": 25299 }, { "epoch": 0.33, "grad_norm": 3.4163968563079834, "learning_rate": 1.985806543144132e-05, "loss": 1.4142, "step": 25300 }, { "epoch": 0.33, "grad_norm": 4.867303371429443, "learning_rate": 1.9858047789602044e-05, "loss": 2.7211, "step": 25301 }, { "epoch": 0.33, "grad_norm": 4.289685249328613, "learning_rate": 1.9858030146674272e-05, "loss": 2.4307, "step": 25302 }, { "epoch": 0.33, "grad_norm": 4.079410552978516, "learning_rate": 1.985801250265801e-05, "loss": 2.2393, "step": 25303 }, { "epoch": 0.33, "grad_norm": 3.274872303009033, "learning_rate": 1.985799485755325e-05, "loss": 1.9604, "step": 25304 }, { "epoch": 0.33, "grad_norm": 3.975446939468384, "learning_rate": 1.9857977211360005e-05, "loss": 1.9222, "step": 25305 }, { "epoch": 0.33, "grad_norm": 3.9477975368499756, "learning_rate": 1.9857959564078267e-05, "loss": 2.3376, "step": 25306 }, { "epoch": 0.33, "grad_norm": 3.563474178314209, "learning_rate": 1.985794191570805e-05, "loss": 1.7433, "step": 25307 }, { "epoch": 0.33, "grad_norm": 3.925618886947632, "learning_rate": 1.9857924266249344e-05, "loss": 1.9677, "step": 25308 }, { "epoch": 0.33, "grad_norm": 3.9263579845428467, "learning_rate": 1.985790661570216e-05, "loss": 1.7557, "step": 25309 }, { "epoch": 0.33, "grad_norm": 4.354927062988281, "learning_rate": 1.985788896406649e-05, "loss": 2.3934, "step": 25310 }, { "epoch": 0.33, "grad_norm": 4.208222389221191, "learning_rate": 1.9857871311342344e-05, "loss": 2.0987, "step": 25311 }, { "epoch": 0.33, "grad_norm": 4.357600688934326, "learning_rate": 1.9857853657529725e-05, "loss": 2.76, "step": 25312 }, { "epoch": 0.33, "grad_norm": 4.173896789550781, "learning_rate": 1.9857836002628628e-05, "loss": 2.578, "step": 25313 }, { "epoch": 0.33, "grad_norm": 3.819490909576416, "learning_rate": 1.985781834663906e-05, "loss": 2.0309, "step": 25314 }, { "epoch": 0.33, "grad_norm": 4.04708194732666, "learning_rate": 1.985780068956102e-05, "loss": 2.5125, "step": 25315 }, { "epoch": 0.33, "grad_norm": 3.0773708820343018, "learning_rate": 1.9857783031394512e-05, "loss": 1.3484, "step": 25316 }, { "epoch": 0.33, "grad_norm": 3.8053083419799805, "learning_rate": 1.985776537213954e-05, "loss": 2.2813, "step": 25317 }, { "epoch": 0.33, "grad_norm": 3.5213496685028076, "learning_rate": 1.9857747711796104e-05, "loss": 1.8881, "step": 25318 }, { "epoch": 0.33, "grad_norm": 3.6986677646636963, "learning_rate": 1.9857730050364203e-05, "loss": 1.6609, "step": 25319 }, { "epoch": 0.33, "grad_norm": 3.9733543395996094, "learning_rate": 1.985771238784384e-05, "loss": 1.9279, "step": 25320 }, { "epoch": 0.33, "grad_norm": 3.5910890102386475, "learning_rate": 1.985769472423502e-05, "loss": 2.0056, "step": 25321 }, { "epoch": 0.33, "grad_norm": 3.7214443683624268, "learning_rate": 1.9857677059537744e-05, "loss": 1.8192, "step": 25322 }, { "epoch": 0.33, "grad_norm": 4.299983501434326, "learning_rate": 1.9857659393752016e-05, "loss": 2.6623, "step": 25323 }, { "epoch": 0.33, "grad_norm": 4.185444355010986, "learning_rate": 1.985764172687783e-05, "loss": 2.0474, "step": 25324 }, { "epoch": 0.33, "grad_norm": 4.21364164352417, "learning_rate": 1.9857624058915194e-05, "loss": 2.2788, "step": 25325 }, { "epoch": 0.33, "grad_norm": 3.599008083343506, "learning_rate": 1.985760638986411e-05, "loss": 2.0187, "step": 25326 }, { "epoch": 0.33, "grad_norm": 4.907016277313232, "learning_rate": 1.985758871972458e-05, "loss": 2.7811, "step": 25327 }, { "epoch": 0.33, "grad_norm": 4.288336277008057, "learning_rate": 1.9857571048496606e-05, "loss": 2.3129, "step": 25328 }, { "epoch": 0.33, "grad_norm": 3.9711251258850098, "learning_rate": 1.9857553376180186e-05, "loss": 1.7799, "step": 25329 }, { "epoch": 0.33, "grad_norm": 3.5471816062927246, "learning_rate": 1.9857535702775325e-05, "loss": 1.7552, "step": 25330 }, { "epoch": 0.33, "grad_norm": 4.309708595275879, "learning_rate": 1.9857518028282027e-05, "loss": 2.1086, "step": 25331 }, { "epoch": 0.33, "grad_norm": 3.7687644958496094, "learning_rate": 1.985750035270029e-05, "loss": 2.2058, "step": 25332 }, { "epoch": 0.33, "grad_norm": 3.6586968898773193, "learning_rate": 1.9857482676030116e-05, "loss": 1.9257, "step": 25333 }, { "epoch": 0.33, "grad_norm": 4.2461934089660645, "learning_rate": 1.9857464998271513e-05, "loss": 2.5329, "step": 25334 }, { "epoch": 0.33, "grad_norm": 4.338064193725586, "learning_rate": 1.9857447319424475e-05, "loss": 2.6184, "step": 25335 }, { "epoch": 0.33, "grad_norm": 3.6936545372009277, "learning_rate": 1.985742963948901e-05, "loss": 1.7345, "step": 25336 }, { "epoch": 0.33, "grad_norm": 3.262392282485962, "learning_rate": 1.9857411958465116e-05, "loss": 1.7304, "step": 25337 }, { "epoch": 0.33, "grad_norm": 4.401744842529297, "learning_rate": 1.9857394276352797e-05, "loss": 2.4771, "step": 25338 }, { "epoch": 0.33, "grad_norm": 3.711597442626953, "learning_rate": 1.9857376593152055e-05, "loss": 1.853, "step": 25339 }, { "epoch": 0.33, "grad_norm": 3.946470022201538, "learning_rate": 1.985735890886289e-05, "loss": 2.1219, "step": 25340 }, { "epoch": 0.33, "grad_norm": 3.5226545333862305, "learning_rate": 1.9857341223485306e-05, "loss": 1.7898, "step": 25341 }, { "epoch": 0.33, "grad_norm": 3.4685869216918945, "learning_rate": 1.9857323537019305e-05, "loss": 1.5313, "step": 25342 }, { "epoch": 0.33, "grad_norm": 3.980085849761963, "learning_rate": 1.9857305849464888e-05, "loss": 2.1053, "step": 25343 }, { "epoch": 0.33, "grad_norm": 4.617813587188721, "learning_rate": 1.9857288160822057e-05, "loss": 2.7341, "step": 25344 }, { "epoch": 0.33, "grad_norm": 4.110257625579834, "learning_rate": 1.9857270471090816e-05, "loss": 2.2811, "step": 25345 }, { "epoch": 0.33, "grad_norm": 4.530849456787109, "learning_rate": 1.985725278027116e-05, "loss": 2.7008, "step": 25346 }, { "epoch": 0.33, "grad_norm": 3.8819336891174316, "learning_rate": 1.98572350883631e-05, "loss": 2.0425, "step": 25347 }, { "epoch": 0.33, "grad_norm": 3.318150758743286, "learning_rate": 1.9857217395366635e-05, "loss": 1.7198, "step": 25348 }, { "epoch": 0.33, "grad_norm": 3.7710108757019043, "learning_rate": 1.9857199701281763e-05, "loss": 1.8494, "step": 25349 }, { "epoch": 0.33, "grad_norm": 4.400841236114502, "learning_rate": 1.9857182006108488e-05, "loss": 2.7646, "step": 25350 }, { "epoch": 0.33, "grad_norm": 4.164058685302734, "learning_rate": 1.9857164309846816e-05, "loss": 2.3824, "step": 25351 }, { "epoch": 0.33, "grad_norm": 4.065594673156738, "learning_rate": 1.9857146612496743e-05, "loss": 2.2803, "step": 25352 }, { "epoch": 0.33, "grad_norm": 4.14541482925415, "learning_rate": 1.9857128914058278e-05, "loss": 2.6697, "step": 25353 }, { "epoch": 0.33, "grad_norm": 3.7345781326293945, "learning_rate": 1.9857111214531416e-05, "loss": 2.1815, "step": 25354 }, { "epoch": 0.33, "grad_norm": 3.937382936477661, "learning_rate": 1.9857093513916164e-05, "loss": 2.1759, "step": 25355 }, { "epoch": 0.33, "grad_norm": 3.6537420749664307, "learning_rate": 1.985707581221252e-05, "loss": 1.8101, "step": 25356 }, { "epoch": 0.33, "grad_norm": 3.800093412399292, "learning_rate": 1.9857058109420487e-05, "loss": 2.3035, "step": 25357 }, { "epoch": 0.33, "grad_norm": 3.537039041519165, "learning_rate": 1.9857040405540065e-05, "loss": 1.7804, "step": 25358 }, { "epoch": 0.33, "grad_norm": 3.4800100326538086, "learning_rate": 1.9857022700571264e-05, "loss": 1.5723, "step": 25359 }, { "epoch": 0.33, "grad_norm": 3.8168156147003174, "learning_rate": 1.9857004994514076e-05, "loss": 2.4079, "step": 25360 }, { "epoch": 0.33, "grad_norm": 4.12612247467041, "learning_rate": 1.9856987287368512e-05, "loss": 2.444, "step": 25361 }, { "epoch": 0.33, "grad_norm": 4.497659206390381, "learning_rate": 1.9856969579134568e-05, "loss": 2.1499, "step": 25362 }, { "epoch": 0.33, "grad_norm": 3.6546881198883057, "learning_rate": 1.985695186981224e-05, "loss": 1.8294, "step": 25363 }, { "epoch": 0.33, "grad_norm": 4.23823881149292, "learning_rate": 1.9856934159401547e-05, "loss": 1.9874, "step": 25364 }, { "epoch": 0.33, "grad_norm": 3.5758137702941895, "learning_rate": 1.9856916447902478e-05, "loss": 1.9019, "step": 25365 }, { "epoch": 0.33, "grad_norm": 4.052855491638184, "learning_rate": 1.9856898735315035e-05, "loss": 1.908, "step": 25366 }, { "epoch": 0.33, "grad_norm": 4.062585830688477, "learning_rate": 1.985688102163923e-05, "loss": 2.1361, "step": 25367 }, { "epoch": 0.33, "grad_norm": 4.261397361755371, "learning_rate": 1.985686330687505e-05, "loss": 2.0501, "step": 25368 }, { "epoch": 0.33, "grad_norm": 4.095063209533691, "learning_rate": 1.985684559102251e-05, "loss": 2.1317, "step": 25369 }, { "epoch": 0.33, "grad_norm": 3.208953380584717, "learning_rate": 1.985682787408161e-05, "loss": 1.3167, "step": 25370 }, { "epoch": 0.33, "grad_norm": 4.118300914764404, "learning_rate": 1.9856810156052343e-05, "loss": 2.237, "step": 25371 }, { "epoch": 0.33, "grad_norm": 2.9432239532470703, "learning_rate": 1.985679243693472e-05, "loss": 1.425, "step": 25372 }, { "epoch": 0.33, "grad_norm": 4.402863025665283, "learning_rate": 1.9856774716728738e-05, "loss": 2.2384, "step": 25373 }, { "epoch": 0.33, "grad_norm": 3.923673152923584, "learning_rate": 1.98567569954344e-05, "loss": 2.4991, "step": 25374 }, { "epoch": 0.33, "grad_norm": 4.369879722595215, "learning_rate": 1.9856739273051712e-05, "loss": 2.2319, "step": 25375 }, { "epoch": 0.33, "grad_norm": 4.051589012145996, "learning_rate": 1.985672154958067e-05, "loss": 2.1856, "step": 25376 }, { "epoch": 0.33, "grad_norm": 3.608848810195923, "learning_rate": 1.9856703825021282e-05, "loss": 1.8315, "step": 25377 }, { "epoch": 0.33, "grad_norm": 3.9821884632110596, "learning_rate": 1.9856686099373547e-05, "loss": 2.3135, "step": 25378 }, { "epoch": 0.33, "grad_norm": 3.6993248462677, "learning_rate": 1.9856668372637463e-05, "loss": 1.9081, "step": 25379 }, { "epoch": 0.33, "grad_norm": 4.416519641876221, "learning_rate": 1.9856650644813036e-05, "loss": 2.2404, "step": 25380 }, { "epoch": 0.33, "grad_norm": 4.386394023895264, "learning_rate": 1.9856632915900266e-05, "loss": 2.5464, "step": 25381 }, { "epoch": 0.33, "grad_norm": 3.6663966178894043, "learning_rate": 1.985661518589916e-05, "loss": 1.8271, "step": 25382 }, { "epoch": 0.33, "grad_norm": 3.3635005950927734, "learning_rate": 1.9856597454809717e-05, "loss": 1.6936, "step": 25383 }, { "epoch": 0.33, "grad_norm": 4.235503673553467, "learning_rate": 1.9856579722631934e-05, "loss": 1.9642, "step": 25384 }, { "epoch": 0.33, "grad_norm": 4.329076766967773, "learning_rate": 1.985656198936582e-05, "loss": 2.2906, "step": 25385 }, { "epoch": 0.33, "grad_norm": 4.57094669342041, "learning_rate": 1.9856544255011377e-05, "loss": 2.2302, "step": 25386 }, { "epoch": 0.33, "grad_norm": 4.413535118103027, "learning_rate": 1.98565265195686e-05, "loss": 2.1995, "step": 25387 }, { "epoch": 0.33, "grad_norm": 3.97818660736084, "learning_rate": 1.98565087830375e-05, "loss": 2.0813, "step": 25388 }, { "epoch": 0.33, "grad_norm": 4.051177978515625, "learning_rate": 1.985649104541807e-05, "loss": 2.4123, "step": 25389 }, { "epoch": 0.33, "grad_norm": 3.467913866043091, "learning_rate": 1.9856473306710317e-05, "loss": 1.7452, "step": 25390 }, { "epoch": 0.33, "grad_norm": 4.410880088806152, "learning_rate": 1.9856455566914245e-05, "loss": 2.2083, "step": 25391 }, { "epoch": 0.33, "grad_norm": 3.691917657852173, "learning_rate": 1.9856437826029847e-05, "loss": 2.0913, "step": 25392 }, { "epoch": 0.33, "grad_norm": 4.08491325378418, "learning_rate": 1.9856420084057137e-05, "loss": 2.0035, "step": 25393 }, { "epoch": 0.33, "grad_norm": 3.9665980339050293, "learning_rate": 1.985640234099611e-05, "loss": 1.9526, "step": 25394 }, { "epoch": 0.33, "grad_norm": 4.233813762664795, "learning_rate": 1.985638459684677e-05, "loss": 1.9818, "step": 25395 }, { "epoch": 0.33, "grad_norm": 4.221645355224609, "learning_rate": 1.9856366851609113e-05, "loss": 2.4455, "step": 25396 }, { "epoch": 0.33, "grad_norm": 4.436112880706787, "learning_rate": 1.985634910528315e-05, "loss": 2.3831, "step": 25397 }, { "epoch": 0.33, "grad_norm": 4.149008274078369, "learning_rate": 1.9856331357868875e-05, "loss": 2.1244, "step": 25398 }, { "epoch": 0.33, "grad_norm": 3.7919626235961914, "learning_rate": 1.9856313609366297e-05, "loss": 1.7268, "step": 25399 }, { "epoch": 0.33, "grad_norm": 4.179787635803223, "learning_rate": 1.9856295859775414e-05, "loss": 2.4649, "step": 25400 }, { "epoch": 0.33, "grad_norm": 4.186784267425537, "learning_rate": 1.985627810909623e-05, "loss": 2.3763, "step": 25401 }, { "epoch": 0.33, "grad_norm": 4.690212249755859, "learning_rate": 1.9856260357328742e-05, "loss": 2.3635, "step": 25402 }, { "epoch": 0.33, "grad_norm": 3.826190948486328, "learning_rate": 1.985624260447296e-05, "loss": 2.1175, "step": 25403 }, { "epoch": 0.33, "grad_norm": 4.237477779388428, "learning_rate": 1.985622485052888e-05, "loss": 2.1196, "step": 25404 }, { "epoch": 0.33, "grad_norm": 4.136421203613281, "learning_rate": 1.98562070954965e-05, "loss": 1.8214, "step": 25405 }, { "epoch": 0.33, "grad_norm": 3.4954094886779785, "learning_rate": 1.9856189339375834e-05, "loss": 1.661, "step": 25406 }, { "epoch": 0.33, "grad_norm": 4.0795979499816895, "learning_rate": 1.985617158216688e-05, "loss": 2.2974, "step": 25407 }, { "epoch": 0.33, "grad_norm": 3.901582717895508, "learning_rate": 1.985615382386963e-05, "loss": 2.1409, "step": 25408 }, { "epoch": 0.33, "grad_norm": 5.0515947341918945, "learning_rate": 1.98561360644841e-05, "loss": 2.7371, "step": 25409 }, { "epoch": 0.33, "grad_norm": 3.7139782905578613, "learning_rate": 1.9856118304010283e-05, "loss": 2.1381, "step": 25410 }, { "epoch": 0.33, "grad_norm": 3.4417455196380615, "learning_rate": 1.985610054244818e-05, "loss": 1.6787, "step": 25411 }, { "epoch": 0.33, "grad_norm": 3.4766392707824707, "learning_rate": 1.98560827797978e-05, "loss": 1.9292, "step": 25412 }, { "epoch": 0.33, "grad_norm": 4.037191867828369, "learning_rate": 1.985606501605914e-05, "loss": 2.0311, "step": 25413 }, { "epoch": 0.33, "grad_norm": 4.323462009429932, "learning_rate": 1.9856047251232204e-05, "loss": 2.0586, "step": 25414 }, { "epoch": 0.33, "grad_norm": 3.951052665710449, "learning_rate": 1.985602948531699e-05, "loss": 2.0384, "step": 25415 }, { "epoch": 0.33, "grad_norm": 3.6718552112579346, "learning_rate": 1.9856011718313512e-05, "loss": 1.8201, "step": 25416 }, { "epoch": 0.33, "grad_norm": 3.624601125717163, "learning_rate": 1.9855993950221752e-05, "loss": 1.6723, "step": 25417 }, { "epoch": 0.33, "grad_norm": 4.393163204193115, "learning_rate": 1.985597618104173e-05, "loss": 2.3619, "step": 25418 }, { "epoch": 0.33, "grad_norm": 4.550313949584961, "learning_rate": 1.985595841077344e-05, "loss": 1.9964, "step": 25419 }, { "epoch": 0.33, "grad_norm": 3.6764206886291504, "learning_rate": 1.9855940639416885e-05, "loss": 1.7185, "step": 25420 }, { "epoch": 0.33, "grad_norm": 3.67045521736145, "learning_rate": 1.9855922866972063e-05, "loss": 1.9281, "step": 25421 }, { "epoch": 0.33, "grad_norm": 3.809692144393921, "learning_rate": 1.9855905093438984e-05, "loss": 1.8842, "step": 25422 }, { "epoch": 0.33, "grad_norm": 3.6977813243865967, "learning_rate": 1.9855887318817643e-05, "loss": 1.9616, "step": 25423 }, { "epoch": 0.33, "grad_norm": 3.4201772212982178, "learning_rate": 1.985586954310805e-05, "loss": 1.5569, "step": 25424 }, { "epoch": 0.33, "grad_norm": 3.97713565826416, "learning_rate": 1.9855851766310196e-05, "loss": 2.0972, "step": 25425 }, { "epoch": 0.33, "grad_norm": 3.984421491622925, "learning_rate": 1.9855833988424092e-05, "loss": 2.5701, "step": 25426 }, { "epoch": 0.33, "grad_norm": 3.7272393703460693, "learning_rate": 1.9855816209449738e-05, "loss": 2.0749, "step": 25427 }, { "epoch": 0.33, "grad_norm": 3.768355131149292, "learning_rate": 1.985579842938713e-05, "loss": 2.1282, "step": 25428 }, { "epoch": 0.33, "grad_norm": 3.6764419078826904, "learning_rate": 1.985578064823628e-05, "loss": 1.6949, "step": 25429 }, { "epoch": 0.33, "grad_norm": 4.0863213539123535, "learning_rate": 1.9855762865997178e-05, "loss": 2.1134, "step": 25430 }, { "epoch": 0.33, "grad_norm": 4.074569225311279, "learning_rate": 1.9855745082669836e-05, "loss": 2.2087, "step": 25431 }, { "epoch": 0.33, "grad_norm": 4.0162529945373535, "learning_rate": 1.9855727298254254e-05, "loss": 2.4093, "step": 25432 }, { "epoch": 0.33, "grad_norm": 3.4142401218414307, "learning_rate": 1.985570951275043e-05, "loss": 1.3827, "step": 25433 }, { "epoch": 0.33, "grad_norm": 3.510596513748169, "learning_rate": 1.985569172615837e-05, "loss": 1.8541, "step": 25434 }, { "epoch": 0.33, "grad_norm": 4.093907356262207, "learning_rate": 1.9855673938478075e-05, "loss": 2.0151, "step": 25435 }, { "epoch": 0.33, "grad_norm": 3.6774752140045166, "learning_rate": 1.9855656149709546e-05, "loss": 1.9877, "step": 25436 }, { "epoch": 0.33, "grad_norm": 4.039875507354736, "learning_rate": 1.9855638359852783e-05, "loss": 2.1122, "step": 25437 }, { "epoch": 0.33, "grad_norm": 3.9881668090820312, "learning_rate": 1.9855620568907793e-05, "loss": 1.6118, "step": 25438 }, { "epoch": 0.33, "grad_norm": 4.1711273193359375, "learning_rate": 1.9855602776874576e-05, "loss": 2.4671, "step": 25439 }, { "epoch": 0.33, "grad_norm": 4.735293865203857, "learning_rate": 1.985558498375313e-05, "loss": 2.3927, "step": 25440 }, { "epoch": 0.33, "grad_norm": 4.0680108070373535, "learning_rate": 1.9855567189543463e-05, "loss": 1.8225, "step": 25441 }, { "epoch": 0.33, "grad_norm": 3.653881549835205, "learning_rate": 1.9855549394245574e-05, "loss": 1.5974, "step": 25442 }, { "epoch": 0.33, "grad_norm": 3.8056867122650146, "learning_rate": 1.9855531597859464e-05, "loss": 1.902, "step": 25443 }, { "epoch": 0.33, "grad_norm": 4.267029762268066, "learning_rate": 1.9855513800385137e-05, "loss": 2.6479, "step": 25444 }, { "epoch": 0.33, "grad_norm": 4.011100769042969, "learning_rate": 1.9855496001822594e-05, "loss": 1.9549, "step": 25445 }, { "epoch": 0.33, "grad_norm": 4.200180530548096, "learning_rate": 1.9855478202171836e-05, "loss": 2.0572, "step": 25446 }, { "epoch": 0.33, "grad_norm": 3.9261393547058105, "learning_rate": 1.9855460401432868e-05, "loss": 2.0288, "step": 25447 }, { "epoch": 0.33, "grad_norm": 4.037707805633545, "learning_rate": 1.9855442599605687e-05, "loss": 2.3784, "step": 25448 }, { "epoch": 0.33, "grad_norm": 4.186760902404785, "learning_rate": 1.9855424796690298e-05, "loss": 2.5161, "step": 25449 }, { "epoch": 0.33, "grad_norm": 4.128947734832764, "learning_rate": 1.9855406992686706e-05, "loss": 2.0231, "step": 25450 }, { "epoch": 0.33, "grad_norm": 4.342897415161133, "learning_rate": 1.985538918759491e-05, "loss": 2.1516, "step": 25451 }, { "epoch": 0.33, "grad_norm": 3.474238872528076, "learning_rate": 1.9855371381414908e-05, "loss": 1.7046, "step": 25452 }, { "epoch": 0.33, "grad_norm": 3.7966058254241943, "learning_rate": 1.985535357414671e-05, "loss": 1.6673, "step": 25453 }, { "epoch": 0.33, "grad_norm": 3.946666955947876, "learning_rate": 1.9855335765790313e-05, "loss": 1.9667, "step": 25454 }, { "epoch": 0.33, "grad_norm": 4.619274139404297, "learning_rate": 1.9855317956345716e-05, "loss": 2.4446, "step": 25455 }, { "epoch": 0.33, "grad_norm": 3.947113037109375, "learning_rate": 1.9855300145812927e-05, "loss": 2.0069, "step": 25456 }, { "epoch": 0.33, "grad_norm": 4.138064384460449, "learning_rate": 1.9855282334191947e-05, "loss": 1.6439, "step": 25457 }, { "epoch": 0.33, "grad_norm": 3.189310073852539, "learning_rate": 1.9855264521482774e-05, "loss": 1.4347, "step": 25458 }, { "epoch": 0.33, "grad_norm": 3.5821714401245117, "learning_rate": 1.9855246707685414e-05, "loss": 1.6814, "step": 25459 }, { "epoch": 0.33, "grad_norm": 3.528738260269165, "learning_rate": 1.9855228892799868e-05, "loss": 1.8291, "step": 25460 }, { "epoch": 0.33, "grad_norm": 3.940918445587158, "learning_rate": 1.9855211076826135e-05, "loss": 2.0684, "step": 25461 }, { "epoch": 0.33, "grad_norm": 4.207390785217285, "learning_rate": 1.9855193259764222e-05, "loss": 2.1121, "step": 25462 }, { "epoch": 0.33, "grad_norm": 3.857454299926758, "learning_rate": 1.985517544161413e-05, "loss": 2.0002, "step": 25463 }, { "epoch": 0.33, "grad_norm": 3.6745758056640625, "learning_rate": 1.9855157622375857e-05, "loss": 2.1196, "step": 25464 }, { "epoch": 0.33, "grad_norm": 3.9790122509002686, "learning_rate": 1.985513980204941e-05, "loss": 1.8543, "step": 25465 }, { "epoch": 0.33, "grad_norm": 4.068685531616211, "learning_rate": 1.9855121980634783e-05, "loss": 1.9678, "step": 25466 }, { "epoch": 0.33, "grad_norm": 4.262838840484619, "learning_rate": 1.9855104158131988e-05, "loss": 2.122, "step": 25467 }, { "epoch": 0.33, "grad_norm": 3.5249791145324707, "learning_rate": 1.985508633454102e-05, "loss": 1.6559, "step": 25468 }, { "epoch": 0.33, "grad_norm": 3.90476655960083, "learning_rate": 1.9855068509861882e-05, "loss": 1.7349, "step": 25469 }, { "epoch": 0.33, "grad_norm": 3.883342981338501, "learning_rate": 1.985505068409458e-05, "loss": 1.662, "step": 25470 }, { "epoch": 0.33, "grad_norm": 4.24395227432251, "learning_rate": 1.9855032857239115e-05, "loss": 2.0306, "step": 25471 }, { "epoch": 0.33, "grad_norm": 4.5602240562438965, "learning_rate": 1.9855015029295485e-05, "loss": 2.0074, "step": 25472 }, { "epoch": 0.33, "grad_norm": 3.170844554901123, "learning_rate": 1.9854997200263692e-05, "loss": 1.6533, "step": 25473 }, { "epoch": 0.33, "grad_norm": 4.185342311859131, "learning_rate": 1.9854979370143743e-05, "loss": 2.1486, "step": 25474 }, { "epoch": 0.33, "grad_norm": 4.250524520874023, "learning_rate": 1.9854961538935638e-05, "loss": 2.7257, "step": 25475 }, { "epoch": 0.33, "grad_norm": 3.3336691856384277, "learning_rate": 1.9854943706639377e-05, "loss": 1.6364, "step": 25476 }, { "epoch": 0.33, "grad_norm": 3.556979179382324, "learning_rate": 1.9854925873254963e-05, "loss": 1.8472, "step": 25477 }, { "epoch": 0.33, "grad_norm": 3.7542178630828857, "learning_rate": 1.9854908038782397e-05, "loss": 2.3315, "step": 25478 }, { "epoch": 0.33, "grad_norm": 3.8946492671966553, "learning_rate": 1.985489020322168e-05, "loss": 2.0016, "step": 25479 }, { "epoch": 0.33, "grad_norm": 3.6846628189086914, "learning_rate": 1.985487236657282e-05, "loss": 1.8238, "step": 25480 }, { "epoch": 0.33, "grad_norm": 3.638929843902588, "learning_rate": 1.985485452883581e-05, "loss": 1.5225, "step": 25481 }, { "epoch": 0.33, "grad_norm": 4.616853713989258, "learning_rate": 1.985483669001066e-05, "loss": 2.4267, "step": 25482 }, { "epoch": 0.33, "grad_norm": 3.9428441524505615, "learning_rate": 1.985481885009737e-05, "loss": 1.9041, "step": 25483 }, { "epoch": 0.33, "grad_norm": 3.8020293712615967, "learning_rate": 1.9854801009095942e-05, "loss": 2.0695, "step": 25484 }, { "epoch": 0.33, "grad_norm": 3.5007824897766113, "learning_rate": 1.985478316700637e-05, "loss": 1.8018, "step": 25485 }, { "epoch": 0.33, "grad_norm": 4.153321743011475, "learning_rate": 1.985476532382867e-05, "loss": 2.0493, "step": 25486 }, { "epoch": 0.33, "grad_norm": 3.6730802059173584, "learning_rate": 1.985474747956283e-05, "loss": 1.8106, "step": 25487 }, { "epoch": 0.33, "grad_norm": 4.139875888824463, "learning_rate": 1.9854729634208866e-05, "loss": 1.9735, "step": 25488 }, { "epoch": 0.33, "grad_norm": 4.400256156921387, "learning_rate": 1.985471178776677e-05, "loss": 2.6876, "step": 25489 }, { "epoch": 0.33, "grad_norm": 3.558614492416382, "learning_rate": 1.9854693940236545e-05, "loss": 1.8691, "step": 25490 }, { "epoch": 0.33, "grad_norm": 4.410339832305908, "learning_rate": 1.9854676091618196e-05, "loss": 2.4216, "step": 25491 }, { "epoch": 0.33, "grad_norm": 4.278016567230225, "learning_rate": 1.9854658241911722e-05, "loss": 2.3294, "step": 25492 }, { "epoch": 0.33, "grad_norm": 3.4416990280151367, "learning_rate": 1.985464039111713e-05, "loss": 1.6357, "step": 25493 }, { "epoch": 0.33, "grad_norm": 4.447800636291504, "learning_rate": 1.9854622539234413e-05, "loss": 2.1763, "step": 25494 }, { "epoch": 0.33, "grad_norm": 3.6173362731933594, "learning_rate": 1.9854604686263582e-05, "loss": 2.0317, "step": 25495 }, { "epoch": 0.33, "grad_norm": 3.355916976928711, "learning_rate": 1.9854586832204636e-05, "loss": 1.9281, "step": 25496 }, { "epoch": 0.33, "grad_norm": 4.059465408325195, "learning_rate": 1.9854568977057577e-05, "loss": 2.2031, "step": 25497 }, { "epoch": 0.33, "grad_norm": 3.964261531829834, "learning_rate": 1.9854551120822403e-05, "loss": 2.1027, "step": 25498 }, { "epoch": 0.33, "grad_norm": 4.115058898925781, "learning_rate": 1.985453326349912e-05, "loss": 2.1797, "step": 25499 }, { "epoch": 0.33, "grad_norm": 4.1918487548828125, "learning_rate": 1.9854515405087732e-05, "loss": 2.2264, "step": 25500 }, { "epoch": 0.33, "grad_norm": 4.581357479095459, "learning_rate": 1.9854497545588236e-05, "loss": 2.6678, "step": 25501 }, { "epoch": 0.33, "grad_norm": 4.8428850173950195, "learning_rate": 1.9854479685000637e-05, "loss": 2.8976, "step": 25502 }, { "epoch": 0.33, "grad_norm": 5.399225234985352, "learning_rate": 1.9854461823324937e-05, "loss": 3.1614, "step": 25503 }, { "epoch": 0.33, "grad_norm": 4.251430988311768, "learning_rate": 1.9854443960561138e-05, "loss": 2.5055, "step": 25504 }, { "epoch": 0.33, "grad_norm": 3.708385467529297, "learning_rate": 1.985442609670924e-05, "loss": 1.8978, "step": 25505 }, { "epoch": 0.33, "grad_norm": 3.90262770652771, "learning_rate": 1.9854408231769244e-05, "loss": 2.0761, "step": 25506 }, { "epoch": 0.33, "grad_norm": 4.099893569946289, "learning_rate": 1.9854390365741156e-05, "loss": 2.1939, "step": 25507 }, { "epoch": 0.33, "grad_norm": 4.081127643585205, "learning_rate": 1.9854372498624976e-05, "loss": 2.3634, "step": 25508 }, { "epoch": 0.33, "grad_norm": 3.932034969329834, "learning_rate": 1.985435463042071e-05, "loss": 2.0489, "step": 25509 }, { "epoch": 0.33, "grad_norm": 3.8536624908447266, "learning_rate": 1.985433676112835e-05, "loss": 1.9273, "step": 25510 }, { "epoch": 0.33, "grad_norm": 4.048465728759766, "learning_rate": 1.9854318890747904e-05, "loss": 2.5356, "step": 25511 }, { "epoch": 0.33, "grad_norm": 4.960590839385986, "learning_rate": 1.9854301019279378e-05, "loss": 2.4699, "step": 25512 }, { "epoch": 0.33, "grad_norm": 3.4788997173309326, "learning_rate": 1.985428314672277e-05, "loss": 1.5608, "step": 25513 }, { "epoch": 0.33, "grad_norm": 3.7936370372772217, "learning_rate": 1.985426527307808e-05, "loss": 1.9092, "step": 25514 }, { "epoch": 0.33, "grad_norm": 4.032467365264893, "learning_rate": 1.9854247398345313e-05, "loss": 2.1989, "step": 25515 }, { "epoch": 0.33, "grad_norm": 3.734095573425293, "learning_rate": 1.9854229522524467e-05, "loss": 1.8812, "step": 25516 }, { "epoch": 0.33, "grad_norm": 3.538255214691162, "learning_rate": 1.985421164561555e-05, "loss": 2.188, "step": 25517 }, { "epoch": 0.33, "grad_norm": 3.9650135040283203, "learning_rate": 1.9854193767618558e-05, "loss": 1.7999, "step": 25518 }, { "epoch": 0.33, "grad_norm": 3.6218957901000977, "learning_rate": 1.98541758885335e-05, "loss": 1.7329, "step": 25519 }, { "epoch": 0.33, "grad_norm": 3.5629849433898926, "learning_rate": 1.985415800836037e-05, "loss": 1.7382, "step": 25520 }, { "epoch": 0.33, "grad_norm": 3.4735774993896484, "learning_rate": 1.9854140127099177e-05, "loss": 2.0298, "step": 25521 }, { "epoch": 0.33, "grad_norm": 3.8269217014312744, "learning_rate": 1.985412224474992e-05, "loss": 1.7899, "step": 25522 }, { "epoch": 0.33, "grad_norm": 4.029417037963867, "learning_rate": 1.9854104361312598e-05, "loss": 1.8639, "step": 25523 }, { "epoch": 0.33, "grad_norm": 3.5858731269836426, "learning_rate": 1.9854086476787214e-05, "loss": 1.623, "step": 25524 }, { "epoch": 0.33, "grad_norm": 3.7716782093048096, "learning_rate": 1.9854068591173777e-05, "loss": 1.9397, "step": 25525 }, { "epoch": 0.33, "grad_norm": 3.801067352294922, "learning_rate": 1.9854050704472278e-05, "loss": 1.9691, "step": 25526 }, { "epoch": 0.33, "grad_norm": 3.535325288772583, "learning_rate": 1.9854032816682728e-05, "loss": 1.8829, "step": 25527 }, { "epoch": 0.33, "grad_norm": 3.7223434448242188, "learning_rate": 1.9854014927805126e-05, "loss": 2.0625, "step": 25528 }, { "epoch": 0.33, "grad_norm": 3.7441930770874023, "learning_rate": 1.9853997037839474e-05, "loss": 1.7837, "step": 25529 }, { "epoch": 0.33, "grad_norm": 3.8777616024017334, "learning_rate": 1.985397914678577e-05, "loss": 1.8287, "step": 25530 }, { "epoch": 0.33, "grad_norm": 3.9504175186157227, "learning_rate": 1.9853961254644023e-05, "loss": 1.7472, "step": 25531 }, { "epoch": 0.33, "grad_norm": 4.54725456237793, "learning_rate": 1.985394336141423e-05, "loss": 2.4947, "step": 25532 }, { "epoch": 0.33, "grad_norm": 4.067291259765625, "learning_rate": 1.9853925467096395e-05, "loss": 2.3731, "step": 25533 }, { "epoch": 0.33, "grad_norm": 3.9571568965911865, "learning_rate": 1.9853907571690518e-05, "loss": 2.0551, "step": 25534 }, { "epoch": 0.33, "grad_norm": 4.1097235679626465, "learning_rate": 1.9853889675196606e-05, "loss": 2.29, "step": 25535 }, { "epoch": 0.33, "grad_norm": 3.3811850547790527, "learning_rate": 1.9853871777614654e-05, "loss": 1.7228, "step": 25536 }, { "epoch": 0.33, "grad_norm": 3.985426187515259, "learning_rate": 1.9853853878944667e-05, "loss": 2.2139, "step": 25537 }, { "epoch": 0.33, "grad_norm": 3.838792562484741, "learning_rate": 1.985383597918665e-05, "loss": 1.8806, "step": 25538 }, { "epoch": 0.33, "grad_norm": 3.7460451126098633, "learning_rate": 1.9853818078340602e-05, "loss": 1.7317, "step": 25539 }, { "epoch": 0.33, "grad_norm": 3.363157272338867, "learning_rate": 1.9853800176406523e-05, "loss": 1.7365, "step": 25540 }, { "epoch": 0.33, "grad_norm": 3.7084012031555176, "learning_rate": 1.9853782273384417e-05, "loss": 1.9085, "step": 25541 }, { "epoch": 0.33, "grad_norm": 4.035400390625, "learning_rate": 1.985376436927429e-05, "loss": 2.273, "step": 25542 }, { "epoch": 0.33, "grad_norm": 3.6439690589904785, "learning_rate": 1.985374646407614e-05, "loss": 1.8303, "step": 25543 }, { "epoch": 0.33, "grad_norm": 4.464640140533447, "learning_rate": 1.9853728557789965e-05, "loss": 1.81, "step": 25544 }, { "epoch": 0.33, "grad_norm": 4.416048049926758, "learning_rate": 1.9853710650415773e-05, "loss": 2.1505, "step": 25545 }, { "epoch": 0.33, "grad_norm": 4.731289386749268, "learning_rate": 1.9853692741953568e-05, "loss": 1.942, "step": 25546 }, { "epoch": 0.33, "grad_norm": 3.526963710784912, "learning_rate": 1.9853674832403345e-05, "loss": 1.7192, "step": 25547 }, { "epoch": 0.33, "grad_norm": 3.999791383743286, "learning_rate": 1.9853656921765108e-05, "loss": 2.6275, "step": 25548 }, { "epoch": 0.33, "grad_norm": 3.543267011642456, "learning_rate": 1.9853639010038864e-05, "loss": 1.785, "step": 25549 }, { "epoch": 0.33, "grad_norm": 4.091733932495117, "learning_rate": 1.9853621097224603e-05, "loss": 1.8256, "step": 25550 }, { "epoch": 0.33, "grad_norm": 3.930532932281494, "learning_rate": 1.9853603183322342e-05, "loss": 1.9273, "step": 25551 }, { "epoch": 0.33, "grad_norm": 3.561979293823242, "learning_rate": 1.9853585268332077e-05, "loss": 1.719, "step": 25552 }, { "epoch": 0.33, "grad_norm": 3.377938747406006, "learning_rate": 1.9853567352253805e-05, "loss": 1.7938, "step": 25553 }, { "epoch": 0.33, "grad_norm": 3.6987829208374023, "learning_rate": 1.9853549435087533e-05, "loss": 1.8227, "step": 25554 }, { "epoch": 0.33, "grad_norm": 4.950443744659424, "learning_rate": 1.9853531516833264e-05, "loss": 2.3063, "step": 25555 }, { "epoch": 0.33, "grad_norm": 4.090979099273682, "learning_rate": 1.9853513597490998e-05, "loss": 2.1737, "step": 25556 }, { "epoch": 0.33, "grad_norm": 4.007039546966553, "learning_rate": 1.985349567706073e-05, "loss": 2.4322, "step": 25557 }, { "epoch": 0.33, "grad_norm": 3.800684690475464, "learning_rate": 1.9853477755542475e-05, "loss": 1.9164, "step": 25558 }, { "epoch": 0.33, "grad_norm": 3.83396053314209, "learning_rate": 1.9853459832936232e-05, "loss": 1.7891, "step": 25559 }, { "epoch": 0.33, "grad_norm": 4.377411365509033, "learning_rate": 1.9853441909241992e-05, "loss": 2.2942, "step": 25560 }, { "epoch": 0.33, "grad_norm": 4.1195220947265625, "learning_rate": 1.985342398445977e-05, "loss": 2.2307, "step": 25561 }, { "epoch": 0.33, "grad_norm": 4.1178879737854, "learning_rate": 1.9853406058589562e-05, "loss": 2.1191, "step": 25562 }, { "epoch": 0.33, "grad_norm": 4.5111985206604, "learning_rate": 1.985338813163137e-05, "loss": 2.0875, "step": 25563 }, { "epoch": 0.33, "grad_norm": 4.145697116851807, "learning_rate": 1.98533702035852e-05, "loss": 2.5046, "step": 25564 }, { "epoch": 0.33, "grad_norm": 3.572969913482666, "learning_rate": 1.985335227445105e-05, "loss": 1.4071, "step": 25565 }, { "epoch": 0.33, "grad_norm": 4.025489807128906, "learning_rate": 1.985333434422892e-05, "loss": 1.71, "step": 25566 }, { "epoch": 0.33, "grad_norm": 3.863502264022827, "learning_rate": 1.985331641291881e-05, "loss": 1.9695, "step": 25567 }, { "epoch": 0.33, "grad_norm": 3.5620439052581787, "learning_rate": 1.9853298480520734e-05, "loss": 2.0176, "step": 25568 }, { "epoch": 0.33, "grad_norm": 4.473036766052246, "learning_rate": 1.9853280547034684e-05, "loss": 2.2214, "step": 25569 }, { "epoch": 0.33, "grad_norm": 4.262366771697998, "learning_rate": 1.9853262612460665e-05, "loss": 2.1385, "step": 25570 }, { "epoch": 0.33, "grad_norm": 4.161717414855957, "learning_rate": 1.9853244676798683e-05, "loss": 1.824, "step": 25571 }, { "epoch": 0.33, "grad_norm": 4.330039024353027, "learning_rate": 1.985322674004873e-05, "loss": 2.3294, "step": 25572 }, { "epoch": 0.33, "grad_norm": 3.589341402053833, "learning_rate": 1.9853208802210813e-05, "loss": 1.7414, "step": 25573 }, { "epoch": 0.33, "grad_norm": 3.8442251682281494, "learning_rate": 1.985319086328494e-05, "loss": 1.8328, "step": 25574 }, { "epoch": 0.33, "grad_norm": 3.880920886993408, "learning_rate": 1.9853172923271105e-05, "loss": 1.9125, "step": 25575 }, { "epoch": 0.33, "grad_norm": 4.0736260414123535, "learning_rate": 1.9853154982169308e-05, "loss": 2.1889, "step": 25576 }, { "epoch": 0.33, "grad_norm": 4.00935697555542, "learning_rate": 1.985313703997956e-05, "loss": 2.2103, "step": 25577 }, { "epoch": 0.33, "grad_norm": 3.893080711364746, "learning_rate": 1.9853119096701862e-05, "loss": 2.1118, "step": 25578 }, { "epoch": 0.33, "grad_norm": 3.875981569290161, "learning_rate": 1.9853101152336207e-05, "loss": 2.3551, "step": 25579 }, { "epoch": 0.33, "grad_norm": 3.3821935653686523, "learning_rate": 1.9853083206882605e-05, "loss": 2.0162, "step": 25580 }, { "epoch": 0.33, "grad_norm": 3.581263303756714, "learning_rate": 1.9853065260341054e-05, "loss": 1.9337, "step": 25581 }, { "epoch": 0.33, "grad_norm": 3.923408269882202, "learning_rate": 1.9853047312711557e-05, "loss": 2.206, "step": 25582 }, { "epoch": 0.33, "grad_norm": 3.7744970321655273, "learning_rate": 1.9853029363994117e-05, "loss": 2.1795, "step": 25583 }, { "epoch": 0.33, "grad_norm": 3.9736392498016357, "learning_rate": 1.9853011414188738e-05, "loss": 2.0843, "step": 25584 }, { "epoch": 0.33, "grad_norm": 3.8922488689422607, "learning_rate": 1.9852993463295416e-05, "loss": 2.1835, "step": 25585 }, { "epoch": 0.33, "grad_norm": 3.506347417831421, "learning_rate": 1.985297551131416e-05, "loss": 1.5753, "step": 25586 }, { "epoch": 0.33, "grad_norm": 3.1214404106140137, "learning_rate": 1.9852957558244962e-05, "loss": 1.6071, "step": 25587 }, { "epoch": 0.33, "grad_norm": 4.033096790313721, "learning_rate": 1.9852939604087836e-05, "loss": 1.8435, "step": 25588 }, { "epoch": 0.33, "grad_norm": 4.555647850036621, "learning_rate": 1.9852921648842774e-05, "loss": 2.1942, "step": 25589 }, { "epoch": 0.33, "grad_norm": 3.6181557178497314, "learning_rate": 1.9852903692509786e-05, "loss": 1.9365, "step": 25590 }, { "epoch": 0.33, "grad_norm": 4.251628875732422, "learning_rate": 1.985288573508887e-05, "loss": 2.0734, "step": 25591 }, { "epoch": 0.33, "grad_norm": 3.570723295211792, "learning_rate": 1.9852867776580027e-05, "loss": 1.6079, "step": 25592 }, { "epoch": 0.33, "grad_norm": 3.7655014991760254, "learning_rate": 1.985284981698326e-05, "loss": 1.9873, "step": 25593 }, { "epoch": 0.33, "grad_norm": 3.983457565307617, "learning_rate": 1.9852831856298572e-05, "loss": 2.5868, "step": 25594 }, { "epoch": 0.33, "grad_norm": 3.954984664916992, "learning_rate": 1.9852813894525966e-05, "loss": 2.2921, "step": 25595 }, { "epoch": 0.33, "grad_norm": 3.8917574882507324, "learning_rate": 1.9852795931665437e-05, "loss": 2.2659, "step": 25596 }, { "epoch": 0.33, "grad_norm": 3.735621690750122, "learning_rate": 1.9852777967717e-05, "loss": 1.7993, "step": 25597 }, { "epoch": 0.33, "grad_norm": 3.7590067386627197, "learning_rate": 1.9852760002680644e-05, "loss": 2.1526, "step": 25598 }, { "epoch": 0.33, "grad_norm": 4.425048351287842, "learning_rate": 1.9852742036556375e-05, "loss": 2.1907, "step": 25599 }, { "epoch": 0.33, "grad_norm": 3.8277995586395264, "learning_rate": 1.9852724069344198e-05, "loss": 1.9613, "step": 25600 }, { "epoch": 0.33, "grad_norm": 3.9194581508636475, "learning_rate": 1.985270610104411e-05, "loss": 2.1869, "step": 25601 }, { "epoch": 0.33, "grad_norm": 4.11040735244751, "learning_rate": 1.985268813165612e-05, "loss": 2.1611, "step": 25602 }, { "epoch": 0.33, "grad_norm": 4.03356409072876, "learning_rate": 1.9852670161180224e-05, "loss": 1.8941, "step": 25603 }, { "epoch": 0.33, "grad_norm": 4.477169513702393, "learning_rate": 1.985265218961643e-05, "loss": 2.0187, "step": 25604 }, { "epoch": 0.33, "grad_norm": 4.156832218170166, "learning_rate": 1.985263421696473e-05, "loss": 1.9726, "step": 25605 }, { "epoch": 0.33, "grad_norm": 3.8717472553253174, "learning_rate": 1.985261624322514e-05, "loss": 2.038, "step": 25606 }, { "epoch": 0.33, "grad_norm": 4.224579811096191, "learning_rate": 1.9852598268397648e-05, "loss": 1.9847, "step": 25607 }, { "epoch": 0.33, "grad_norm": 4.502526760101318, "learning_rate": 1.985258029248226e-05, "loss": 2.3171, "step": 25608 }, { "epoch": 0.33, "grad_norm": 4.121284008026123, "learning_rate": 1.9852562315478986e-05, "loss": 2.1485, "step": 25609 }, { "epoch": 0.33, "grad_norm": 3.8351898193359375, "learning_rate": 1.985254433738782e-05, "loss": 1.7593, "step": 25610 }, { "epoch": 0.33, "grad_norm": 3.852614402770996, "learning_rate": 1.9852526358208764e-05, "loss": 2.1301, "step": 25611 }, { "epoch": 0.33, "grad_norm": 3.7735326290130615, "learning_rate": 1.9852508377941824e-05, "loss": 2.1419, "step": 25612 }, { "epoch": 0.33, "grad_norm": 3.673264503479004, "learning_rate": 1.9852490396586995e-05, "loss": 2.09, "step": 25613 }, { "epoch": 0.33, "grad_norm": 3.597686529159546, "learning_rate": 1.985247241414429e-05, "loss": 1.5421, "step": 25614 }, { "epoch": 0.33, "grad_norm": 4.16015625, "learning_rate": 1.9852454430613703e-05, "loss": 2.3416, "step": 25615 }, { "epoch": 0.33, "grad_norm": 3.606041669845581, "learning_rate": 1.985243644599524e-05, "loss": 1.8179, "step": 25616 }, { "epoch": 0.33, "grad_norm": 4.1632232666015625, "learning_rate": 1.9852418460288897e-05, "loss": 2.1988, "step": 25617 }, { "epoch": 0.33, "grad_norm": 4.229452133178711, "learning_rate": 1.985240047349468e-05, "loss": 2.4813, "step": 25618 }, { "epoch": 0.33, "grad_norm": 4.106551170349121, "learning_rate": 1.9852382485612592e-05, "loss": 1.9527, "step": 25619 }, { "epoch": 0.33, "grad_norm": 4.724817752838135, "learning_rate": 1.9852364496642632e-05, "loss": 2.5227, "step": 25620 }, { "epoch": 0.33, "grad_norm": 3.902167558670044, "learning_rate": 1.9852346506584807e-05, "loss": 1.9049, "step": 25621 }, { "epoch": 0.33, "grad_norm": 4.050365447998047, "learning_rate": 1.9852328515439118e-05, "loss": 1.6907, "step": 25622 }, { "epoch": 0.33, "grad_norm": 3.8118736743927, "learning_rate": 1.985231052320556e-05, "loss": 2.2619, "step": 25623 }, { "epoch": 0.33, "grad_norm": 3.673444986343384, "learning_rate": 1.985229252988414e-05, "loss": 2.263, "step": 25624 }, { "epoch": 0.33, "grad_norm": 4.036586761474609, "learning_rate": 1.985227453547486e-05, "loss": 2.2399, "step": 25625 }, { "epoch": 0.33, "grad_norm": 4.12037992477417, "learning_rate": 1.9852256539977723e-05, "loss": 1.9191, "step": 25626 }, { "epoch": 0.33, "grad_norm": 4.444700241088867, "learning_rate": 1.9852238543392734e-05, "loss": 2.4984, "step": 25627 }, { "epoch": 0.33, "grad_norm": 4.007823467254639, "learning_rate": 1.9852220545719883e-05, "loss": 2.2668, "step": 25628 }, { "epoch": 0.33, "grad_norm": 3.6294708251953125, "learning_rate": 1.9852202546959184e-05, "loss": 2.2516, "step": 25629 }, { "epoch": 0.33, "grad_norm": 3.905895233154297, "learning_rate": 1.9852184547110635e-05, "loss": 2.0515, "step": 25630 }, { "epoch": 0.33, "grad_norm": 4.204405307769775, "learning_rate": 1.9852166546174234e-05, "loss": 2.0533, "step": 25631 }, { "epoch": 0.33, "grad_norm": 4.378454208374023, "learning_rate": 1.985214854414999e-05, "loss": 1.9746, "step": 25632 }, { "epoch": 0.33, "grad_norm": 4.171804428100586, "learning_rate": 1.98521305410379e-05, "loss": 2.0017, "step": 25633 }, { "epoch": 0.33, "grad_norm": 3.661153793334961, "learning_rate": 1.9852112536837968e-05, "loss": 1.681, "step": 25634 }, { "epoch": 0.33, "grad_norm": 3.8936495780944824, "learning_rate": 1.9852094531550196e-05, "loss": 2.0873, "step": 25635 }, { "epoch": 0.33, "grad_norm": 4.713878631591797, "learning_rate": 1.9852076525174583e-05, "loss": 2.5814, "step": 25636 }, { "epoch": 0.33, "grad_norm": 4.325951099395752, "learning_rate": 1.985205851771114e-05, "loss": 2.5739, "step": 25637 }, { "epoch": 0.33, "grad_norm": 4.066107749938965, "learning_rate": 1.9852040509159858e-05, "loss": 2.1382, "step": 25638 }, { "epoch": 0.33, "grad_norm": 4.440251350402832, "learning_rate": 1.9852022499520746e-05, "loss": 1.9708, "step": 25639 }, { "epoch": 0.33, "grad_norm": 3.7759757041931152, "learning_rate": 1.98520044887938e-05, "loss": 1.7506, "step": 25640 }, { "epoch": 0.33, "grad_norm": 3.764716625213623, "learning_rate": 1.9851986476979026e-05, "loss": 1.6601, "step": 25641 }, { "epoch": 0.33, "grad_norm": 4.153554439544678, "learning_rate": 1.9851968464076428e-05, "loss": 2.4575, "step": 25642 }, { "epoch": 0.33, "grad_norm": 4.0862202644348145, "learning_rate": 1.9851950450086005e-05, "loss": 1.9228, "step": 25643 }, { "epoch": 0.33, "grad_norm": 3.9103565216064453, "learning_rate": 1.985193243500776e-05, "loss": 1.8412, "step": 25644 }, { "epoch": 0.33, "grad_norm": 3.9206440448760986, "learning_rate": 1.9851914418841692e-05, "loss": 2.1967, "step": 25645 }, { "epoch": 0.33, "grad_norm": 3.8457601070404053, "learning_rate": 1.9851896401587808e-05, "loss": 1.8015, "step": 25646 }, { "epoch": 0.33, "grad_norm": 4.886895179748535, "learning_rate": 1.9851878383246107e-05, "loss": 2.1614, "step": 25647 }, { "epoch": 0.33, "grad_norm": 4.564316272735596, "learning_rate": 1.9851860363816588e-05, "loss": 2.4322, "step": 25648 }, { "epoch": 0.33, "grad_norm": 4.309081554412842, "learning_rate": 1.9851842343299262e-05, "loss": 2.4221, "step": 25649 }, { "epoch": 0.33, "grad_norm": 4.321869850158691, "learning_rate": 1.9851824321694122e-05, "loss": 2.0414, "step": 25650 }, { "epoch": 0.33, "grad_norm": 3.9444875717163086, "learning_rate": 1.9851806299001172e-05, "loss": 1.7179, "step": 25651 }, { "epoch": 0.33, "grad_norm": 3.961402416229248, "learning_rate": 1.985178827522042e-05, "loss": 1.933, "step": 25652 }, { "epoch": 0.33, "grad_norm": 4.105198383331299, "learning_rate": 1.9851770250351863e-05, "loss": 2.1745, "step": 25653 }, { "epoch": 0.33, "grad_norm": 3.452914237976074, "learning_rate": 1.9851752224395498e-05, "loss": 1.558, "step": 25654 }, { "epoch": 0.33, "grad_norm": 4.191147804260254, "learning_rate": 1.985173419735134e-05, "loss": 2.1748, "step": 25655 }, { "epoch": 0.33, "grad_norm": 3.5584585666656494, "learning_rate": 1.9851716169219377e-05, "loss": 1.9032, "step": 25656 }, { "epoch": 0.33, "grad_norm": 4.1314473152160645, "learning_rate": 1.985169813999962e-05, "loss": 2.2673, "step": 25657 }, { "epoch": 0.33, "grad_norm": 4.19912576675415, "learning_rate": 1.9851680109692068e-05, "loss": 2.3083, "step": 25658 }, { "epoch": 0.33, "grad_norm": 3.93613862991333, "learning_rate": 1.9851662078296725e-05, "loss": 1.9294, "step": 25659 }, { "epoch": 0.33, "grad_norm": 4.784022808074951, "learning_rate": 1.9851644045813588e-05, "loss": 2.7865, "step": 25660 }, { "epoch": 0.33, "grad_norm": 3.6883044242858887, "learning_rate": 1.9851626012242668e-05, "loss": 2.371, "step": 25661 }, { "epoch": 0.33, "grad_norm": 4.440400123596191, "learning_rate": 1.9851607977583954e-05, "loss": 2.4416, "step": 25662 }, { "epoch": 0.33, "grad_norm": 4.231785774230957, "learning_rate": 1.985158994183746e-05, "loss": 2.2117, "step": 25663 }, { "epoch": 0.33, "grad_norm": 5.053293704986572, "learning_rate": 1.9851571905003183e-05, "loss": 2.1258, "step": 25664 }, { "epoch": 0.33, "grad_norm": 4.0417256355285645, "learning_rate": 1.9851553867081126e-05, "loss": 2.2376, "step": 25665 }, { "epoch": 0.33, "grad_norm": 3.4983203411102295, "learning_rate": 1.985153582807129e-05, "loss": 1.7318, "step": 25666 }, { "epoch": 0.33, "grad_norm": 3.7452216148376465, "learning_rate": 1.9851517787973674e-05, "loss": 1.9444, "step": 25667 }, { "epoch": 0.33, "grad_norm": 4.131732940673828, "learning_rate": 1.9851499746788287e-05, "loss": 2.3157, "step": 25668 }, { "epoch": 0.33, "grad_norm": 4.1681742668151855, "learning_rate": 1.9851481704515127e-05, "loss": 2.2057, "step": 25669 }, { "epoch": 0.33, "grad_norm": 4.710596084594727, "learning_rate": 1.9851463661154193e-05, "loss": 2.5713, "step": 25670 }, { "epoch": 0.33, "grad_norm": 3.9915146827697754, "learning_rate": 1.9851445616705493e-05, "loss": 2.0477, "step": 25671 }, { "epoch": 0.33, "grad_norm": 3.69651198387146, "learning_rate": 1.9851427571169026e-05, "loss": 1.9254, "step": 25672 }, { "epoch": 0.33, "grad_norm": 3.948270082473755, "learning_rate": 1.9851409524544793e-05, "loss": 1.9057, "step": 25673 }, { "epoch": 0.33, "grad_norm": 4.317414283752441, "learning_rate": 1.98513914768328e-05, "loss": 2.3702, "step": 25674 }, { "epoch": 0.33, "grad_norm": 3.715106248855591, "learning_rate": 1.9851373428033044e-05, "loss": 2.0334, "step": 25675 }, { "epoch": 0.33, "grad_norm": 4.265444755554199, "learning_rate": 1.985135537814553e-05, "loss": 1.955, "step": 25676 }, { "epoch": 0.33, "grad_norm": 4.434043884277344, "learning_rate": 1.9851337327170262e-05, "loss": 2.306, "step": 25677 }, { "epoch": 0.33, "grad_norm": 3.7999236583709717, "learning_rate": 1.9851319275107234e-05, "loss": 1.6382, "step": 25678 }, { "epoch": 0.33, "grad_norm": 3.8154218196868896, "learning_rate": 1.9851301221956456e-05, "loss": 1.9598, "step": 25679 }, { "epoch": 0.33, "grad_norm": 4.063305854797363, "learning_rate": 1.9851283167717925e-05, "loss": 2.147, "step": 25680 }, { "epoch": 0.33, "grad_norm": 4.016801834106445, "learning_rate": 1.9851265112391648e-05, "loss": 2.0339, "step": 25681 }, { "epoch": 0.33, "grad_norm": 3.719994068145752, "learning_rate": 1.9851247055977625e-05, "loss": 1.6852, "step": 25682 }, { "epoch": 0.33, "grad_norm": 3.9402661323547363, "learning_rate": 1.9851228998475856e-05, "loss": 1.928, "step": 25683 }, { "epoch": 0.33, "grad_norm": 3.8463194370269775, "learning_rate": 1.9851210939886343e-05, "loss": 1.9893, "step": 25684 }, { "epoch": 0.33, "grad_norm": 3.8732755184173584, "learning_rate": 1.985119288020909e-05, "loss": 1.9352, "step": 25685 }, { "epoch": 0.33, "grad_norm": 3.613203287124634, "learning_rate": 1.9851174819444098e-05, "loss": 1.8314, "step": 25686 }, { "epoch": 0.33, "grad_norm": 4.055156230926514, "learning_rate": 1.9851156757591367e-05, "loss": 1.9949, "step": 25687 }, { "epoch": 0.33, "grad_norm": 3.612741470336914, "learning_rate": 1.9851138694650905e-05, "loss": 1.9485, "step": 25688 }, { "epoch": 0.33, "grad_norm": 3.693795680999756, "learning_rate": 1.9851120630622706e-05, "loss": 1.7278, "step": 25689 }, { "epoch": 0.33, "grad_norm": 4.136421203613281, "learning_rate": 1.9851102565506778e-05, "loss": 2.2564, "step": 25690 }, { "epoch": 0.33, "grad_norm": 4.094226360321045, "learning_rate": 1.9851084499303124e-05, "loss": 1.7168, "step": 25691 }, { "epoch": 0.33, "grad_norm": 4.245975494384766, "learning_rate": 1.985106643201174e-05, "loss": 2.3447, "step": 25692 }, { "epoch": 0.33, "grad_norm": 4.3019585609436035, "learning_rate": 1.985104836363263e-05, "loss": 2.3748, "step": 25693 }, { "epoch": 0.33, "grad_norm": 3.2314586639404297, "learning_rate": 1.98510302941658e-05, "loss": 1.482, "step": 25694 }, { "epoch": 0.33, "grad_norm": 4.19079065322876, "learning_rate": 1.985101222361125e-05, "loss": 2.4399, "step": 25695 }, { "epoch": 0.33, "grad_norm": 4.039697647094727, "learning_rate": 1.985099415196898e-05, "loss": 2.1028, "step": 25696 }, { "epoch": 0.33, "grad_norm": 3.5273163318634033, "learning_rate": 1.985097607923899e-05, "loss": 1.7657, "step": 25697 }, { "epoch": 0.33, "grad_norm": 3.668329954147339, "learning_rate": 1.985095800542129e-05, "loss": 1.7803, "step": 25698 }, { "epoch": 0.33, "grad_norm": 4.361430644989014, "learning_rate": 1.9850939930515874e-05, "loss": 2.3325, "step": 25699 }, { "epoch": 0.33, "grad_norm": 4.25089168548584, "learning_rate": 1.985092185452275e-05, "loss": 2.8377, "step": 25700 }, { "epoch": 0.33, "grad_norm": 3.8211171627044678, "learning_rate": 1.985090377744191e-05, "loss": 2.043, "step": 25701 }, { "epoch": 0.33, "grad_norm": 3.9691479206085205, "learning_rate": 1.985088569927337e-05, "loss": 2.3653, "step": 25702 }, { "epoch": 0.33, "grad_norm": 4.084926605224609, "learning_rate": 1.9850867620017126e-05, "loss": 2.1054, "step": 25703 }, { "epoch": 0.33, "grad_norm": 4.4189629554748535, "learning_rate": 1.9850849539673175e-05, "loss": 2.233, "step": 25704 }, { "epoch": 0.33, "grad_norm": 3.9938924312591553, "learning_rate": 1.9850831458241526e-05, "loss": 1.9413, "step": 25705 }, { "epoch": 0.33, "grad_norm": 3.9224421977996826, "learning_rate": 1.9850813375722174e-05, "loss": 2.0467, "step": 25706 }, { "epoch": 0.33, "grad_norm": 3.5059661865234375, "learning_rate": 1.9850795292115127e-05, "loss": 1.5607, "step": 25707 }, { "epoch": 0.33, "grad_norm": 4.0726776123046875, "learning_rate": 1.9850777207420385e-05, "loss": 2.2712, "step": 25708 }, { "epoch": 0.33, "grad_norm": 3.4521799087524414, "learning_rate": 1.9850759121637954e-05, "loss": 1.8598, "step": 25709 }, { "epoch": 0.33, "grad_norm": 4.15250301361084, "learning_rate": 1.9850741034767828e-05, "loss": 2.1855, "step": 25710 }, { "epoch": 0.33, "grad_norm": 4.487582206726074, "learning_rate": 1.9850722946810013e-05, "loss": 2.9058, "step": 25711 }, { "epoch": 0.33, "grad_norm": 3.5399439334869385, "learning_rate": 1.985070485776451e-05, "loss": 2.009, "step": 25712 }, { "epoch": 0.33, "grad_norm": 4.0527119636535645, "learning_rate": 1.9850686767631328e-05, "loss": 2.0211, "step": 25713 }, { "epoch": 0.33, "grad_norm": 3.6610031127929688, "learning_rate": 1.9850668676410458e-05, "loss": 2.2473, "step": 25714 }, { "epoch": 0.33, "grad_norm": 3.834256887435913, "learning_rate": 1.9850650584101906e-05, "loss": 1.9152, "step": 25715 }, { "epoch": 0.33, "grad_norm": 4.1593804359436035, "learning_rate": 1.985063249070568e-05, "loss": 2.1475, "step": 25716 }, { "epoch": 0.33, "grad_norm": 3.4672913551330566, "learning_rate": 1.9850614396221773e-05, "loss": 1.7654, "step": 25717 }, { "epoch": 0.33, "grad_norm": 4.047234535217285, "learning_rate": 1.9850596300650193e-05, "loss": 1.7025, "step": 25718 }, { "epoch": 0.33, "grad_norm": 4.546236991882324, "learning_rate": 1.9850578203990937e-05, "loss": 2.402, "step": 25719 }, { "epoch": 0.33, "grad_norm": 3.931723117828369, "learning_rate": 1.9850560106244013e-05, "loss": 2.0206, "step": 25720 }, { "epoch": 0.33, "grad_norm": 3.6352529525756836, "learning_rate": 1.985054200740942e-05, "loss": 1.5638, "step": 25721 }, { "epoch": 0.33, "grad_norm": 3.503192186355591, "learning_rate": 1.9850523907487158e-05, "loss": 1.7092, "step": 25722 }, { "epoch": 0.33, "grad_norm": 3.3298158645629883, "learning_rate": 1.9850505806477233e-05, "loss": 1.7054, "step": 25723 }, { "epoch": 0.33, "grad_norm": 4.023340702056885, "learning_rate": 1.9850487704379646e-05, "loss": 2.6395, "step": 25724 }, { "epoch": 0.33, "grad_norm": 4.056247234344482, "learning_rate": 1.98504696011944e-05, "loss": 2.4603, "step": 25725 }, { "epoch": 0.33, "grad_norm": 3.992220163345337, "learning_rate": 1.985045149692149e-05, "loss": 2.273, "step": 25726 }, { "epoch": 0.33, "grad_norm": 3.6511781215667725, "learning_rate": 1.9850433391560925e-05, "loss": 1.7507, "step": 25727 }, { "epoch": 0.33, "grad_norm": 3.5325260162353516, "learning_rate": 1.9850415285112703e-05, "loss": 1.7598, "step": 25728 }, { "epoch": 0.33, "grad_norm": 3.244844675064087, "learning_rate": 1.985039717757683e-05, "loss": 1.551, "step": 25729 }, { "epoch": 0.33, "grad_norm": 4.130532264709473, "learning_rate": 1.9850379068953305e-05, "loss": 2.2582, "step": 25730 }, { "epoch": 0.33, "grad_norm": 4.323229789733887, "learning_rate": 1.9850360959242132e-05, "loss": 2.0075, "step": 25731 }, { "epoch": 0.33, "grad_norm": 4.14937686920166, "learning_rate": 1.985034284844331e-05, "loss": 2.1133, "step": 25732 }, { "epoch": 0.33, "grad_norm": 3.9667351245880127, "learning_rate": 1.9850324736556847e-05, "loss": 2.0015, "step": 25733 }, { "epoch": 0.33, "grad_norm": 3.6458470821380615, "learning_rate": 1.9850306623582737e-05, "loss": 1.8623, "step": 25734 }, { "epoch": 0.33, "grad_norm": 3.576573133468628, "learning_rate": 1.985028850952099e-05, "loss": 1.7918, "step": 25735 }, { "epoch": 0.33, "grad_norm": 4.488110065460205, "learning_rate": 1.98502703943716e-05, "loss": 1.9248, "step": 25736 }, { "epoch": 0.33, "grad_norm": 3.697601795196533, "learning_rate": 1.9850252278134575e-05, "loss": 1.742, "step": 25737 }, { "epoch": 0.33, "grad_norm": 4.36206579208374, "learning_rate": 1.9850234160809915e-05, "loss": 2.241, "step": 25738 }, { "epoch": 0.33, "grad_norm": 4.177418231964111, "learning_rate": 1.9850216042397623e-05, "loss": 2.1954, "step": 25739 }, { "epoch": 0.33, "grad_norm": 3.822385787963867, "learning_rate": 1.9850197922897697e-05, "loss": 2.3285, "step": 25740 }, { "epoch": 0.33, "grad_norm": 3.9882686138153076, "learning_rate": 1.9850179802310147e-05, "loss": 2.3575, "step": 25741 }, { "epoch": 0.33, "grad_norm": 3.787855863571167, "learning_rate": 1.9850161680634966e-05, "loss": 2.5068, "step": 25742 }, { "epoch": 0.33, "grad_norm": 3.8607842922210693, "learning_rate": 1.985014355787216e-05, "loss": 2.2605, "step": 25743 }, { "epoch": 0.33, "grad_norm": 3.9974746704101562, "learning_rate": 1.9850125434021735e-05, "loss": 1.9886, "step": 25744 }, { "epoch": 0.33, "grad_norm": 3.477160692214966, "learning_rate": 1.9850107309083685e-05, "loss": 1.8231, "step": 25745 }, { "epoch": 0.33, "grad_norm": 4.026139736175537, "learning_rate": 1.9850089183058018e-05, "loss": 2.2313, "step": 25746 }, { "epoch": 0.33, "grad_norm": 3.7258095741271973, "learning_rate": 1.9850071055944733e-05, "loss": 2.1096, "step": 25747 }, { "epoch": 0.33, "grad_norm": 4.0377373695373535, "learning_rate": 1.9850052927743834e-05, "loss": 2.0208, "step": 25748 }, { "epoch": 0.33, "grad_norm": 3.9928088188171387, "learning_rate": 1.985003479845532e-05, "loss": 2.1005, "step": 25749 }, { "epoch": 0.33, "grad_norm": 4.550259113311768, "learning_rate": 1.9850016668079197e-05, "loss": 2.5207, "step": 25750 }, { "epoch": 0.33, "grad_norm": 4.506165981292725, "learning_rate": 1.9849998536615466e-05, "loss": 2.5691, "step": 25751 }, { "epoch": 0.33, "grad_norm": 3.8847789764404297, "learning_rate": 1.9849980404064125e-05, "loss": 2.1719, "step": 25752 }, { "epoch": 0.33, "grad_norm": 3.6836366653442383, "learning_rate": 1.984996227042518e-05, "loss": 1.9303, "step": 25753 }, { "epoch": 0.33, "grad_norm": 3.4738099575042725, "learning_rate": 1.9849944135698634e-05, "loss": 1.8827, "step": 25754 }, { "epoch": 0.33, "grad_norm": 4.139431953430176, "learning_rate": 1.9849925999884484e-05, "loss": 2.2168, "step": 25755 }, { "epoch": 0.33, "grad_norm": 3.821863889694214, "learning_rate": 1.984990786298274e-05, "loss": 1.8534, "step": 25756 }, { "epoch": 0.33, "grad_norm": 3.598870277404785, "learning_rate": 1.9849889724993393e-05, "loss": 1.9001, "step": 25757 }, { "epoch": 0.33, "grad_norm": 3.590721368789673, "learning_rate": 1.9849871585916456e-05, "loss": 1.6851, "step": 25758 }, { "epoch": 0.33, "grad_norm": 3.740246534347534, "learning_rate": 1.9849853445751925e-05, "loss": 1.9368, "step": 25759 }, { "epoch": 0.33, "grad_norm": 3.569586753845215, "learning_rate": 1.9849835304499803e-05, "loss": 1.8554, "step": 25760 }, { "epoch": 0.33, "grad_norm": 3.911963701248169, "learning_rate": 1.9849817162160088e-05, "loss": 2.2476, "step": 25761 }, { "epoch": 0.33, "grad_norm": 3.955731153488159, "learning_rate": 1.984979901873279e-05, "loss": 2.0397, "step": 25762 }, { "epoch": 0.33, "grad_norm": 4.016998291015625, "learning_rate": 1.9849780874217907e-05, "loss": 1.9368, "step": 25763 }, { "epoch": 0.33, "grad_norm": 4.0397443771362305, "learning_rate": 1.984976272861544e-05, "loss": 2.2368, "step": 25764 }, { "epoch": 0.33, "grad_norm": 4.316715717315674, "learning_rate": 1.9849744581925395e-05, "loss": 1.985, "step": 25765 }, { "epoch": 0.33, "grad_norm": 4.886240005493164, "learning_rate": 1.9849726434147766e-05, "loss": 2.4921, "step": 25766 }, { "epoch": 0.33, "grad_norm": 4.0534987449646, "learning_rate": 1.9849708285282566e-05, "loss": 2.1414, "step": 25767 }, { "epoch": 0.33, "grad_norm": 3.3412134647369385, "learning_rate": 1.9849690135329783e-05, "loss": 1.4755, "step": 25768 }, { "epoch": 0.33, "grad_norm": 4.0284833908081055, "learning_rate": 1.9849671984289434e-05, "loss": 2.3254, "step": 25769 }, { "epoch": 0.33, "grad_norm": 4.125415802001953, "learning_rate": 1.9849653832161514e-05, "loss": 1.9972, "step": 25770 }, { "epoch": 0.33, "grad_norm": 3.721222400665283, "learning_rate": 1.984963567894602e-05, "loss": 1.8765, "step": 25771 }, { "epoch": 0.33, "grad_norm": 4.256104946136475, "learning_rate": 1.9849617524642965e-05, "loss": 2.1787, "step": 25772 }, { "epoch": 0.33, "grad_norm": 3.8405723571777344, "learning_rate": 1.9849599369252342e-05, "loss": 2.1374, "step": 25773 }, { "epoch": 0.33, "grad_norm": 4.0921549797058105, "learning_rate": 1.9849581212774156e-05, "loss": 2.3096, "step": 25774 }, { "epoch": 0.33, "grad_norm": 4.329068660736084, "learning_rate": 1.984956305520841e-05, "loss": 2.0345, "step": 25775 }, { "epoch": 0.33, "grad_norm": 2.9974396228790283, "learning_rate": 1.9849544896555105e-05, "loss": 1.4818, "step": 25776 }, { "epoch": 0.33, "grad_norm": 4.1083879470825195, "learning_rate": 1.9849526736814243e-05, "loss": 2.0176, "step": 25777 }, { "epoch": 0.33, "grad_norm": 3.419938802719116, "learning_rate": 1.9849508575985825e-05, "loss": 1.8254, "step": 25778 }, { "epoch": 0.33, "grad_norm": 3.596561908721924, "learning_rate": 1.9849490414069857e-05, "loss": 1.8265, "step": 25779 }, { "epoch": 0.33, "grad_norm": 4.000603199005127, "learning_rate": 1.9849472251066336e-05, "loss": 2.0453, "step": 25780 }, { "epoch": 0.33, "grad_norm": 3.8455464839935303, "learning_rate": 1.9849454086975265e-05, "loss": 2.0043, "step": 25781 }, { "epoch": 0.33, "grad_norm": 4.270411014556885, "learning_rate": 1.9849435921796648e-05, "loss": 2.3482, "step": 25782 }, { "epoch": 0.33, "grad_norm": 3.884737968444824, "learning_rate": 1.984941775553049e-05, "loss": 2.165, "step": 25783 }, { "epoch": 0.33, "grad_norm": 3.9995338916778564, "learning_rate": 1.9849399588176786e-05, "loss": 1.9313, "step": 25784 }, { "epoch": 0.33, "grad_norm": 3.986309766769409, "learning_rate": 1.984938141973554e-05, "loss": 2.461, "step": 25785 }, { "epoch": 0.33, "grad_norm": 4.310764312744141, "learning_rate": 1.9849363250206755e-05, "loss": 2.1469, "step": 25786 }, { "epoch": 0.33, "grad_norm": 4.104485034942627, "learning_rate": 1.9849345079590437e-05, "loss": 1.8455, "step": 25787 }, { "epoch": 0.33, "grad_norm": 4.11968994140625, "learning_rate": 1.984932690788658e-05, "loss": 2.1162, "step": 25788 }, { "epoch": 0.33, "grad_norm": 4.181787967681885, "learning_rate": 1.9849308735095194e-05, "loss": 2.0574, "step": 25789 }, { "epoch": 0.33, "grad_norm": 3.902909278869629, "learning_rate": 1.9849290561216278e-05, "loss": 1.6916, "step": 25790 }, { "epoch": 0.33, "grad_norm": 3.896855354309082, "learning_rate": 1.984927238624983e-05, "loss": 2.2435, "step": 25791 }, { "epoch": 0.33, "grad_norm": 3.9100935459136963, "learning_rate": 1.9849254210195855e-05, "loss": 1.9857, "step": 25792 }, { "epoch": 0.33, "grad_norm": 4.370516777038574, "learning_rate": 1.9849236033054354e-05, "loss": 2.2953, "step": 25793 }, { "epoch": 0.33, "grad_norm": 4.077975749969482, "learning_rate": 1.9849217854825335e-05, "loss": 2.0654, "step": 25794 }, { "epoch": 0.33, "grad_norm": 4.089919090270996, "learning_rate": 1.9849199675508793e-05, "loss": 2.1967, "step": 25795 }, { "epoch": 0.33, "grad_norm": 4.643202304840088, "learning_rate": 1.984918149510473e-05, "loss": 2.4736, "step": 25796 }, { "epoch": 0.33, "grad_norm": 3.824448585510254, "learning_rate": 1.984916331361315e-05, "loss": 1.9835, "step": 25797 }, { "epoch": 0.33, "grad_norm": 3.775757312774658, "learning_rate": 1.984914513103406e-05, "loss": 2.1379, "step": 25798 }, { "epoch": 0.33, "grad_norm": 3.971458911895752, "learning_rate": 1.9849126947367453e-05, "loss": 2.3699, "step": 25799 }, { "epoch": 0.33, "grad_norm": 4.248331069946289, "learning_rate": 1.9849108762613335e-05, "loss": 2.3345, "step": 25800 }, { "epoch": 0.33, "grad_norm": 4.819249629974365, "learning_rate": 1.9849090576771712e-05, "loss": 2.6663, "step": 25801 }, { "epoch": 0.33, "grad_norm": 4.06437873840332, "learning_rate": 1.984907238984258e-05, "loss": 1.998, "step": 25802 }, { "epoch": 0.33, "grad_norm": 4.228928089141846, "learning_rate": 1.9849054201825946e-05, "loss": 2.6174, "step": 25803 }, { "epoch": 0.33, "grad_norm": 3.4979398250579834, "learning_rate": 1.9849036012721806e-05, "loss": 1.5606, "step": 25804 }, { "epoch": 0.33, "grad_norm": 3.868062734603882, "learning_rate": 1.9849017822530167e-05, "loss": 1.9339, "step": 25805 }, { "epoch": 0.33, "grad_norm": 3.9703400135040283, "learning_rate": 1.984899963125103e-05, "loss": 2.0011, "step": 25806 }, { "epoch": 0.33, "grad_norm": 3.9627017974853516, "learning_rate": 1.9848981438884393e-05, "loss": 2.0761, "step": 25807 }, { "epoch": 0.33, "grad_norm": 3.8411667346954346, "learning_rate": 1.9848963245430264e-05, "loss": 1.9379, "step": 25808 }, { "epoch": 0.33, "grad_norm": 3.8812365531921387, "learning_rate": 1.9848945050888644e-05, "loss": 1.9138, "step": 25809 }, { "epoch": 0.33, "grad_norm": 3.465803861618042, "learning_rate": 1.9848926855259528e-05, "loss": 1.9812, "step": 25810 }, { "epoch": 0.33, "grad_norm": 3.432482957839966, "learning_rate": 1.984890865854293e-05, "loss": 1.5919, "step": 25811 }, { "epoch": 0.33, "grad_norm": 3.5441017150878906, "learning_rate": 1.984889046073884e-05, "loss": 1.9175, "step": 25812 }, { "epoch": 0.33, "grad_norm": 3.951528549194336, "learning_rate": 1.9848872261847268e-05, "loss": 1.7627, "step": 25813 }, { "epoch": 0.34, "grad_norm": 4.256555557250977, "learning_rate": 1.9848854061868215e-05, "loss": 2.2011, "step": 25814 }, { "epoch": 0.34, "grad_norm": 3.3930790424346924, "learning_rate": 1.9848835860801676e-05, "loss": 1.465, "step": 25815 }, { "epoch": 0.34, "grad_norm": 4.463486671447754, "learning_rate": 1.984881765864766e-05, "loss": 1.7356, "step": 25816 }, { "epoch": 0.34, "grad_norm": 3.9735403060913086, "learning_rate": 1.9848799455406172e-05, "loss": 2.1445, "step": 25817 }, { "epoch": 0.34, "grad_norm": 3.4173524379730225, "learning_rate": 1.9848781251077208e-05, "loss": 1.6695, "step": 25818 }, { "epoch": 0.34, "grad_norm": 4.324386119842529, "learning_rate": 1.984876304566077e-05, "loss": 2.2875, "step": 25819 }, { "epoch": 0.34, "grad_norm": 4.032585620880127, "learning_rate": 1.984874483915686e-05, "loss": 1.9398, "step": 25820 }, { "epoch": 0.34, "grad_norm": 3.503676414489746, "learning_rate": 1.984872663156548e-05, "loss": 1.8356, "step": 25821 }, { "epoch": 0.34, "grad_norm": 3.7700741291046143, "learning_rate": 1.984870842288664e-05, "loss": 2.289, "step": 25822 }, { "epoch": 0.34, "grad_norm": 4.236647129058838, "learning_rate": 1.9848690213120335e-05, "loss": 2.2535, "step": 25823 }, { "epoch": 0.34, "grad_norm": 3.8481454849243164, "learning_rate": 1.9848672002266564e-05, "loss": 1.5333, "step": 25824 }, { "epoch": 0.34, "grad_norm": 4.187697410583496, "learning_rate": 1.984865379032533e-05, "loss": 2.5469, "step": 25825 }, { "epoch": 0.34, "grad_norm": 3.8052265644073486, "learning_rate": 1.984863557729664e-05, "loss": 1.9128, "step": 25826 }, { "epoch": 0.34, "grad_norm": 3.994846820831299, "learning_rate": 1.98486173631805e-05, "loss": 2.1929, "step": 25827 }, { "epoch": 0.34, "grad_norm": 4.1248884201049805, "learning_rate": 1.9848599147976895e-05, "loss": 2.2649, "step": 25828 }, { "epoch": 0.34, "grad_norm": 3.9081735610961914, "learning_rate": 1.9848580931685844e-05, "loss": 2.1711, "step": 25829 }, { "epoch": 0.34, "grad_norm": 3.7277634143829346, "learning_rate": 1.984856271430734e-05, "loss": 1.8847, "step": 25830 }, { "epoch": 0.34, "grad_norm": 3.8819949626922607, "learning_rate": 1.984854449584139e-05, "loss": 2.0793, "step": 25831 }, { "epoch": 0.34, "grad_norm": 3.327422857284546, "learning_rate": 1.984852627628799e-05, "loss": 1.7597, "step": 25832 }, { "epoch": 0.34, "grad_norm": 5.327510833740234, "learning_rate": 1.9848508055647147e-05, "loss": 2.5535, "step": 25833 }, { "epoch": 0.34, "grad_norm": 3.5388851165771484, "learning_rate": 1.9848489833918866e-05, "loss": 1.7483, "step": 25834 }, { "epoch": 0.34, "grad_norm": 3.901937484741211, "learning_rate": 1.984847161110314e-05, "loss": 2.2154, "step": 25835 }, { "epoch": 0.34, "grad_norm": 4.287769794464111, "learning_rate": 1.9848453387199976e-05, "loss": 2.2846, "step": 25836 }, { "epoch": 0.34, "grad_norm": 4.035714149475098, "learning_rate": 1.9848435162209378e-05, "loss": 2.1771, "step": 25837 }, { "epoch": 0.34, "grad_norm": 3.431523084640503, "learning_rate": 1.9848416936131342e-05, "loss": 1.8861, "step": 25838 }, { "epoch": 0.34, "grad_norm": 3.8025963306427, "learning_rate": 1.9848398708965875e-05, "loss": 1.6852, "step": 25839 }, { "epoch": 0.34, "grad_norm": 3.9402015209198, "learning_rate": 1.984838048071298e-05, "loss": 1.9504, "step": 25840 }, { "epoch": 0.34, "grad_norm": 3.791030168533325, "learning_rate": 1.9848362251372654e-05, "loss": 1.9008, "step": 25841 }, { "epoch": 0.34, "grad_norm": 3.7232766151428223, "learning_rate": 1.9848344020944904e-05, "loss": 1.6829, "step": 25842 }, { "epoch": 0.34, "grad_norm": 4.430275917053223, "learning_rate": 1.984832578942973e-05, "loss": 2.7723, "step": 25843 }, { "epoch": 0.34, "grad_norm": 3.720492362976074, "learning_rate": 1.984830755682713e-05, "loss": 1.6296, "step": 25844 }, { "epoch": 0.34, "grad_norm": 3.4650537967681885, "learning_rate": 1.984828932313711e-05, "loss": 1.6058, "step": 25845 }, { "epoch": 0.34, "grad_norm": 4.383738040924072, "learning_rate": 1.9848271088359677e-05, "loss": 2.1112, "step": 25846 }, { "epoch": 0.34, "grad_norm": 3.5591187477111816, "learning_rate": 1.9848252852494822e-05, "loss": 1.7399, "step": 25847 }, { "epoch": 0.34, "grad_norm": 4.445565223693848, "learning_rate": 1.984823461554256e-05, "loss": 2.1496, "step": 25848 }, { "epoch": 0.34, "grad_norm": 4.356668472290039, "learning_rate": 1.9848216377502877e-05, "loss": 2.0304, "step": 25849 }, { "epoch": 0.34, "grad_norm": 4.49599027633667, "learning_rate": 1.984819813837579e-05, "loss": 2.4451, "step": 25850 }, { "epoch": 0.34, "grad_norm": 3.9755916595458984, "learning_rate": 1.9848179898161293e-05, "loss": 2.1279, "step": 25851 }, { "epoch": 0.34, "grad_norm": 3.7436177730560303, "learning_rate": 1.9848161656859388e-05, "loss": 2.0222, "step": 25852 }, { "epoch": 0.34, "grad_norm": 4.251412391662598, "learning_rate": 1.9848143414470082e-05, "loss": 2.4829, "step": 25853 }, { "epoch": 0.34, "grad_norm": 3.9339632987976074, "learning_rate": 1.9848125170993373e-05, "loss": 2.1288, "step": 25854 }, { "epoch": 0.34, "grad_norm": 3.8249309062957764, "learning_rate": 1.9848106926429262e-05, "loss": 2.1546, "step": 25855 }, { "epoch": 0.34, "grad_norm": 3.9648735523223877, "learning_rate": 1.984808868077776e-05, "loss": 1.9075, "step": 25856 }, { "epoch": 0.34, "grad_norm": 3.930271863937378, "learning_rate": 1.984807043403885e-05, "loss": 2.0513, "step": 25857 }, { "epoch": 0.34, "grad_norm": 3.563380241394043, "learning_rate": 1.9848052186212552e-05, "loss": 1.9235, "step": 25858 }, { "epoch": 0.34, "grad_norm": 4.2996826171875, "learning_rate": 1.9848033937298864e-05, "loss": 2.522, "step": 25859 }, { "epoch": 0.34, "grad_norm": 3.635115146636963, "learning_rate": 1.9848015687297785e-05, "loss": 1.9766, "step": 25860 }, { "epoch": 0.34, "grad_norm": 3.9718711376190186, "learning_rate": 1.984799743620932e-05, "loss": 2.075, "step": 25861 }, { "epoch": 0.34, "grad_norm": 3.680960178375244, "learning_rate": 1.9847979184033462e-05, "loss": 1.708, "step": 25862 }, { "epoch": 0.34, "grad_norm": 3.8227434158325195, "learning_rate": 1.9847960930770226e-05, "loss": 1.7205, "step": 25863 }, { "epoch": 0.34, "grad_norm": 4.167795181274414, "learning_rate": 1.984794267641961e-05, "loss": 2.104, "step": 25864 }, { "epoch": 0.34, "grad_norm": 4.730154037475586, "learning_rate": 1.9847924420981605e-05, "loss": 2.4547, "step": 25865 }, { "epoch": 0.34, "grad_norm": 3.982754945755005, "learning_rate": 1.984790616445623e-05, "loss": 1.8902, "step": 25866 }, { "epoch": 0.34, "grad_norm": 4.211399078369141, "learning_rate": 1.9847887906843476e-05, "loss": 2.0021, "step": 25867 }, { "epoch": 0.34, "grad_norm": 3.953888416290283, "learning_rate": 1.984786964814335e-05, "loss": 1.8558, "step": 25868 }, { "epoch": 0.34, "grad_norm": 4.576660633087158, "learning_rate": 1.984785138835585e-05, "loss": 2.1012, "step": 25869 }, { "epoch": 0.34, "grad_norm": 4.387521743774414, "learning_rate": 1.984783312748098e-05, "loss": 2.0908, "step": 25870 }, { "epoch": 0.34, "grad_norm": 3.7427680492401123, "learning_rate": 1.9847814865518745e-05, "loss": 1.8201, "step": 25871 }, { "epoch": 0.34, "grad_norm": 4.18021821975708, "learning_rate": 1.984779660246914e-05, "loss": 2.1884, "step": 25872 }, { "epoch": 0.34, "grad_norm": 3.7026569843292236, "learning_rate": 1.9847778338332172e-05, "loss": 1.7685, "step": 25873 }, { "epoch": 0.34, "grad_norm": 3.726966619491577, "learning_rate": 1.9847760073107845e-05, "loss": 1.9304, "step": 25874 }, { "epoch": 0.34, "grad_norm": 3.720935106277466, "learning_rate": 1.9847741806796157e-05, "loss": 1.8821, "step": 25875 }, { "epoch": 0.34, "grad_norm": 3.558291435241699, "learning_rate": 1.984772353939711e-05, "loss": 1.7769, "step": 25876 }, { "epoch": 0.34, "grad_norm": 3.8009865283966064, "learning_rate": 1.9847705270910708e-05, "loss": 1.7778, "step": 25877 }, { "epoch": 0.34, "grad_norm": 3.910036325454712, "learning_rate": 1.9847687001336954e-05, "loss": 2.4848, "step": 25878 }, { "epoch": 0.34, "grad_norm": 3.8727924823760986, "learning_rate": 1.9847668730675847e-05, "loss": 1.9264, "step": 25879 }, { "epoch": 0.34, "grad_norm": 3.890155553817749, "learning_rate": 1.984765045892739e-05, "loss": 1.959, "step": 25880 }, { "epoch": 0.34, "grad_norm": 3.5622620582580566, "learning_rate": 1.9847632186091587e-05, "loss": 2.0383, "step": 25881 }, { "epoch": 0.34, "grad_norm": 4.700601100921631, "learning_rate": 1.9847613912168434e-05, "loss": 2.383, "step": 25882 }, { "epoch": 0.34, "grad_norm": 3.426187038421631, "learning_rate": 1.984759563715794e-05, "loss": 1.75, "step": 25883 }, { "epoch": 0.34, "grad_norm": 3.2007713317871094, "learning_rate": 1.9847577361060106e-05, "loss": 1.7303, "step": 25884 }, { "epoch": 0.34, "grad_norm": 4.601456165313721, "learning_rate": 1.984755908387493e-05, "loss": 2.7989, "step": 25885 }, { "epoch": 0.34, "grad_norm": 4.154323577880859, "learning_rate": 1.984754080560242e-05, "loss": 2.3786, "step": 25886 }, { "epoch": 0.34, "grad_norm": 4.109786510467529, "learning_rate": 1.9847522526242572e-05, "loss": 2.0596, "step": 25887 }, { "epoch": 0.34, "grad_norm": 4.078126907348633, "learning_rate": 1.984750424579539e-05, "loss": 2.3722, "step": 25888 }, { "epoch": 0.34, "grad_norm": 4.736322402954102, "learning_rate": 1.9847485964260876e-05, "loss": 2.437, "step": 25889 }, { "epoch": 0.34, "grad_norm": 3.378119945526123, "learning_rate": 1.9847467681639033e-05, "loss": 1.7525, "step": 25890 }, { "epoch": 0.34, "grad_norm": 4.153289318084717, "learning_rate": 1.984744939792986e-05, "loss": 2.1599, "step": 25891 }, { "epoch": 0.34, "grad_norm": 4.152339935302734, "learning_rate": 1.9847431113133367e-05, "loss": 2.3552, "step": 25892 }, { "epoch": 0.34, "grad_norm": 4.359018802642822, "learning_rate": 1.984741282724955e-05, "loss": 2.3495, "step": 25893 }, { "epoch": 0.34, "grad_norm": 3.713496685028076, "learning_rate": 1.9847394540278406e-05, "loss": 1.8666, "step": 25894 }, { "epoch": 0.34, "grad_norm": 4.354537487030029, "learning_rate": 1.9847376252219944e-05, "loss": 2.2133, "step": 25895 }, { "epoch": 0.34, "grad_norm": 3.8522658348083496, "learning_rate": 1.984735796307417e-05, "loss": 2.035, "step": 25896 }, { "epoch": 0.34, "grad_norm": 3.923628330230713, "learning_rate": 1.9847339672841075e-05, "loss": 2.0711, "step": 25897 }, { "epoch": 0.34, "grad_norm": 3.793076753616333, "learning_rate": 1.984732138152067e-05, "loss": 1.8172, "step": 25898 }, { "epoch": 0.34, "grad_norm": 3.8967490196228027, "learning_rate": 1.9847303089112952e-05, "loss": 1.9722, "step": 25899 }, { "epoch": 0.34, "grad_norm": 4.503415584564209, "learning_rate": 1.9847284795617924e-05, "loss": 2.5714, "step": 25900 }, { "epoch": 0.34, "grad_norm": 4.383897304534912, "learning_rate": 1.984726650103559e-05, "loss": 2.3447, "step": 25901 }, { "epoch": 0.34, "grad_norm": 3.2887301445007324, "learning_rate": 1.9847248205365954e-05, "loss": 1.6933, "step": 25902 }, { "epoch": 0.34, "grad_norm": 4.125831604003906, "learning_rate": 1.9847229908609008e-05, "loss": 2.2914, "step": 25903 }, { "epoch": 0.34, "grad_norm": 3.983049154281616, "learning_rate": 1.9847211610764764e-05, "loss": 1.5769, "step": 25904 }, { "epoch": 0.34, "grad_norm": 4.293881416320801, "learning_rate": 1.984719331183322e-05, "loss": 2.363, "step": 25905 }, { "epoch": 0.34, "grad_norm": 3.969588041305542, "learning_rate": 1.9847175011814382e-05, "loss": 2.0568, "step": 25906 }, { "epoch": 0.34, "grad_norm": 4.083897113800049, "learning_rate": 1.9847156710708244e-05, "loss": 2.4452, "step": 25907 }, { "epoch": 0.34, "grad_norm": 3.5503528118133545, "learning_rate": 1.9847138408514818e-05, "loss": 1.8144, "step": 25908 }, { "epoch": 0.34, "grad_norm": 4.477118968963623, "learning_rate": 1.9847120105234096e-05, "loss": 2.3473, "step": 25909 }, { "epoch": 0.34, "grad_norm": 4.024990081787109, "learning_rate": 1.9847101800866085e-05, "loss": 1.7401, "step": 25910 }, { "epoch": 0.34, "grad_norm": 3.805548667907715, "learning_rate": 1.984708349541079e-05, "loss": 1.4162, "step": 25911 }, { "epoch": 0.34, "grad_norm": 4.227837085723877, "learning_rate": 1.9847065188868208e-05, "loss": 2.3891, "step": 25912 }, { "epoch": 0.34, "grad_norm": 3.6681902408599854, "learning_rate": 1.9847046881238344e-05, "loss": 2.2014, "step": 25913 }, { "epoch": 0.34, "grad_norm": 4.266081809997559, "learning_rate": 1.9847028572521202e-05, "loss": 2.3055, "step": 25914 }, { "epoch": 0.34, "grad_norm": 4.330992221832275, "learning_rate": 1.9847010262716778e-05, "loss": 2.537, "step": 25915 }, { "epoch": 0.34, "grad_norm": 3.933676242828369, "learning_rate": 1.9846991951825075e-05, "loss": 2.16, "step": 25916 }, { "epoch": 0.34, "grad_norm": 4.321989059448242, "learning_rate": 1.9846973639846097e-05, "loss": 2.3799, "step": 25917 }, { "epoch": 0.34, "grad_norm": 3.741205930709839, "learning_rate": 1.9846955326779848e-05, "loss": 1.9356, "step": 25918 }, { "epoch": 0.34, "grad_norm": 3.849052667617798, "learning_rate": 1.984693701262633e-05, "loss": 1.9446, "step": 25919 }, { "epoch": 0.34, "grad_norm": 3.6522738933563232, "learning_rate": 1.984691869738554e-05, "loss": 2.1885, "step": 25920 }, { "epoch": 0.34, "grad_norm": 3.9398155212402344, "learning_rate": 1.9846900381057485e-05, "loss": 2.1179, "step": 25921 }, { "epoch": 0.34, "grad_norm": 3.974883794784546, "learning_rate": 1.9846882063642165e-05, "loss": 2.0927, "step": 25922 }, { "epoch": 0.34, "grad_norm": 3.75765061378479, "learning_rate": 1.984686374513958e-05, "loss": 1.8423, "step": 25923 }, { "epoch": 0.34, "grad_norm": 4.192130088806152, "learning_rate": 1.9846845425549733e-05, "loss": 2.3516, "step": 25924 }, { "epoch": 0.34, "grad_norm": 3.2321224212646484, "learning_rate": 1.984682710487263e-05, "loss": 1.5491, "step": 25925 }, { "epoch": 0.34, "grad_norm": 3.439871311187744, "learning_rate": 1.984680878310827e-05, "loss": 1.7338, "step": 25926 }, { "epoch": 0.34, "grad_norm": 4.132746696472168, "learning_rate": 1.9846790460256657e-05, "loss": 2.3063, "step": 25927 }, { "epoch": 0.34, "grad_norm": 4.099266052246094, "learning_rate": 1.9846772136317787e-05, "loss": 2.3888, "step": 25928 }, { "epoch": 0.34, "grad_norm": 4.150462627410889, "learning_rate": 1.9846753811291673e-05, "loss": 2.4574, "step": 25929 }, { "epoch": 0.34, "grad_norm": 4.163423538208008, "learning_rate": 1.9846735485178304e-05, "loss": 2.1577, "step": 25930 }, { "epoch": 0.34, "grad_norm": 3.6031556129455566, "learning_rate": 1.984671715797769e-05, "loss": 1.8358, "step": 25931 }, { "epoch": 0.34, "grad_norm": 3.9702835083007812, "learning_rate": 1.984669882968983e-05, "loss": 1.9781, "step": 25932 }, { "epoch": 0.34, "grad_norm": 3.696791172027588, "learning_rate": 1.984668050031473e-05, "loss": 2.3338, "step": 25933 }, { "epoch": 0.34, "grad_norm": 4.197836399078369, "learning_rate": 1.984666216985239e-05, "loss": 2.0595, "step": 25934 }, { "epoch": 0.34, "grad_norm": 4.076621055603027, "learning_rate": 1.984664383830281e-05, "loss": 2.2218, "step": 25935 }, { "epoch": 0.34, "grad_norm": 4.174284934997559, "learning_rate": 1.9846625505665994e-05, "loss": 2.0944, "step": 25936 }, { "epoch": 0.34, "grad_norm": 4.461104869842529, "learning_rate": 1.9846607171941942e-05, "loss": 2.6229, "step": 25937 }, { "epoch": 0.34, "grad_norm": 3.6843926906585693, "learning_rate": 1.984658883713066e-05, "loss": 1.6894, "step": 25938 }, { "epoch": 0.34, "grad_norm": 3.638880729675293, "learning_rate": 1.9846570501232145e-05, "loss": 1.8294, "step": 25939 }, { "epoch": 0.34, "grad_norm": 4.366363525390625, "learning_rate": 1.9846552164246403e-05, "loss": 2.1248, "step": 25940 }, { "epoch": 0.34, "grad_norm": 3.647037982940674, "learning_rate": 1.9846533826173437e-05, "loss": 1.8506, "step": 25941 }, { "epoch": 0.34, "grad_norm": 3.569382667541504, "learning_rate": 1.9846515487013243e-05, "loss": 1.7139, "step": 25942 }, { "epoch": 0.34, "grad_norm": 4.360838413238525, "learning_rate": 1.9846497146765825e-05, "loss": 2.3648, "step": 25943 }, { "epoch": 0.34, "grad_norm": 4.035220146179199, "learning_rate": 1.9846478805431192e-05, "loss": 2.2212, "step": 25944 }, { "epoch": 0.34, "grad_norm": 3.8709583282470703, "learning_rate": 1.9846460463009335e-05, "loss": 2.0563, "step": 25945 }, { "epoch": 0.34, "grad_norm": 3.466219663619995, "learning_rate": 1.9846442119500267e-05, "loss": 1.9053, "step": 25946 }, { "epoch": 0.34, "grad_norm": 4.293635845184326, "learning_rate": 1.9846423774903982e-05, "loss": 1.9577, "step": 25947 }, { "epoch": 0.34, "grad_norm": 3.845438241958618, "learning_rate": 1.9846405429220486e-05, "loss": 2.4847, "step": 25948 }, { "epoch": 0.34, "grad_norm": 4.122180461883545, "learning_rate": 1.984638708244978e-05, "loss": 2.2981, "step": 25949 }, { "epoch": 0.34, "grad_norm": 4.397745132446289, "learning_rate": 1.9846368734591864e-05, "loss": 2.157, "step": 25950 }, { "epoch": 0.34, "grad_norm": 3.467998504638672, "learning_rate": 1.9846350385646742e-05, "loss": 1.7623, "step": 25951 }, { "epoch": 0.34, "grad_norm": 4.418949127197266, "learning_rate": 1.9846332035614416e-05, "loss": 2.2004, "step": 25952 }, { "epoch": 0.34, "grad_norm": 4.293998718261719, "learning_rate": 1.984631368449489e-05, "loss": 1.8767, "step": 25953 }, { "epoch": 0.34, "grad_norm": 3.7920591831207275, "learning_rate": 1.9846295332288162e-05, "loss": 1.8078, "step": 25954 }, { "epoch": 0.34, "grad_norm": 4.416858673095703, "learning_rate": 1.9846276978994234e-05, "loss": 2.3744, "step": 25955 }, { "epoch": 0.34, "grad_norm": 4.18995475769043, "learning_rate": 1.9846258624613112e-05, "loss": 1.8263, "step": 25956 }, { "epoch": 0.34, "grad_norm": 3.3808181285858154, "learning_rate": 1.98462402691448e-05, "loss": 1.6932, "step": 25957 }, { "epoch": 0.34, "grad_norm": 3.819612979888916, "learning_rate": 1.9846221912589293e-05, "loss": 1.8189, "step": 25958 }, { "epoch": 0.34, "grad_norm": 4.508869647979736, "learning_rate": 1.9846203554946593e-05, "loss": 2.1703, "step": 25959 }, { "epoch": 0.34, "grad_norm": 4.06485652923584, "learning_rate": 1.984618519621671e-05, "loss": 2.2657, "step": 25960 }, { "epoch": 0.34, "grad_norm": 4.347719192504883, "learning_rate": 1.9846166836399635e-05, "loss": 2.3641, "step": 25961 }, { "epoch": 0.34, "grad_norm": 3.994647264480591, "learning_rate": 1.984614847549538e-05, "loss": 2.1712, "step": 25962 }, { "epoch": 0.34, "grad_norm": 3.8711788654327393, "learning_rate": 1.9846130113503945e-05, "loss": 2.095, "step": 25963 }, { "epoch": 0.34, "grad_norm": 4.361548900604248, "learning_rate": 1.984611175042533e-05, "loss": 2.2128, "step": 25964 }, { "epoch": 0.34, "grad_norm": 4.081007957458496, "learning_rate": 1.984609338625953e-05, "loss": 1.9473, "step": 25965 }, { "epoch": 0.34, "grad_norm": 3.9885730743408203, "learning_rate": 1.984607502100656e-05, "loss": 1.8787, "step": 25966 }, { "epoch": 0.34, "grad_norm": 3.90165114402771, "learning_rate": 1.9846056654666418e-05, "loss": 1.9216, "step": 25967 }, { "epoch": 0.34, "grad_norm": 4.2176361083984375, "learning_rate": 1.98460382872391e-05, "loss": 2.1175, "step": 25968 }, { "epoch": 0.34, "grad_norm": 3.6265170574188232, "learning_rate": 1.9846019918724615e-05, "loss": 1.8173, "step": 25969 }, { "epoch": 0.34, "grad_norm": 3.6588432788848877, "learning_rate": 1.984600154912296e-05, "loss": 2.2506, "step": 25970 }, { "epoch": 0.34, "grad_norm": 4.385370254516602, "learning_rate": 1.9845983178434144e-05, "loss": 2.5084, "step": 25971 }, { "epoch": 0.34, "grad_norm": 4.178582668304443, "learning_rate": 1.984596480665816e-05, "loss": 2.3423, "step": 25972 }, { "epoch": 0.34, "grad_norm": 3.7081732749938965, "learning_rate": 1.9845946433795017e-05, "loss": 2.1485, "step": 25973 }, { "epoch": 0.34, "grad_norm": 4.107567310333252, "learning_rate": 1.9845928059844713e-05, "loss": 2.4289, "step": 25974 }, { "epoch": 0.34, "grad_norm": 4.549182891845703, "learning_rate": 1.9845909684807253e-05, "loss": 2.233, "step": 25975 }, { "epoch": 0.34, "grad_norm": 3.6766836643218994, "learning_rate": 1.9845891308682636e-05, "loss": 1.6419, "step": 25976 }, { "epoch": 0.34, "grad_norm": 4.695468425750732, "learning_rate": 1.9845872931470866e-05, "loss": 2.3004, "step": 25977 }, { "epoch": 0.34, "grad_norm": 3.7784476280212402, "learning_rate": 1.9845854553171942e-05, "loss": 1.763, "step": 25978 }, { "epoch": 0.34, "grad_norm": 3.645446538925171, "learning_rate": 1.9845836173785873e-05, "loss": 1.897, "step": 25979 }, { "epoch": 0.34, "grad_norm": 4.2366766929626465, "learning_rate": 1.9845817793312656e-05, "loss": 2.3958, "step": 25980 }, { "epoch": 0.34, "grad_norm": 3.2920961380004883, "learning_rate": 1.984579941175229e-05, "loss": 1.7278, "step": 25981 }, { "epoch": 0.34, "grad_norm": 4.050708770751953, "learning_rate": 1.9845781029104784e-05, "loss": 2.4024, "step": 25982 }, { "epoch": 0.34, "grad_norm": 4.733123779296875, "learning_rate": 1.984576264537014e-05, "loss": 1.8251, "step": 25983 }, { "epoch": 0.34, "grad_norm": 4.2652788162231445, "learning_rate": 1.984574426054835e-05, "loss": 2.5669, "step": 25984 }, { "epoch": 0.34, "grad_norm": 3.977334976196289, "learning_rate": 1.9845725874639425e-05, "loss": 2.4023, "step": 25985 }, { "epoch": 0.34, "grad_norm": 3.94911789894104, "learning_rate": 1.9845707487643367e-05, "loss": 2.0196, "step": 25986 }, { "epoch": 0.34, "grad_norm": 3.816314458847046, "learning_rate": 1.9845689099560173e-05, "loss": 2.2476, "step": 25987 }, { "epoch": 0.34, "grad_norm": 3.523789405822754, "learning_rate": 1.984567071038985e-05, "loss": 1.7618, "step": 25988 }, { "epoch": 0.34, "grad_norm": 4.208736419677734, "learning_rate": 1.9845652320132397e-05, "loss": 2.4088, "step": 25989 }, { "epoch": 0.34, "grad_norm": 3.856559991836548, "learning_rate": 1.9845633928787817e-05, "loss": 1.8335, "step": 25990 }, { "epoch": 0.34, "grad_norm": 3.7960894107818604, "learning_rate": 1.9845615536356112e-05, "loss": 1.813, "step": 25991 }, { "epoch": 0.34, "grad_norm": 3.6562018394470215, "learning_rate": 1.9845597142837284e-05, "loss": 1.7435, "step": 25992 }, { "epoch": 0.34, "grad_norm": 4.437608242034912, "learning_rate": 1.9845578748231336e-05, "loss": 1.9873, "step": 25993 }, { "epoch": 0.34, "grad_norm": 4.052530288696289, "learning_rate": 1.984556035253827e-05, "loss": 2.3466, "step": 25994 }, { "epoch": 0.34, "grad_norm": 3.6509897708892822, "learning_rate": 1.9845541955758083e-05, "loss": 1.6492, "step": 25995 }, { "epoch": 0.34, "grad_norm": 3.7841930389404297, "learning_rate": 1.9845523557890785e-05, "loss": 1.7284, "step": 25996 }, { "epoch": 0.34, "grad_norm": 3.288522243499756, "learning_rate": 1.9845505158936373e-05, "loss": 1.459, "step": 25997 }, { "epoch": 0.34, "grad_norm": 3.5900158882141113, "learning_rate": 1.9845486758894846e-05, "loss": 2.1128, "step": 25998 }, { "epoch": 0.34, "grad_norm": 3.5159051418304443, "learning_rate": 1.9845468357766216e-05, "loss": 1.6226, "step": 25999 }, { "epoch": 0.34, "grad_norm": 4.268218517303467, "learning_rate": 1.9845449955550477e-05, "loss": 2.5668, "step": 26000 }, { "epoch": 0.34, "grad_norm": 3.9455807209014893, "learning_rate": 1.9845431552247632e-05, "loss": 1.966, "step": 26001 }, { "epoch": 0.34, "grad_norm": 3.959630250930786, "learning_rate": 1.984541314785769e-05, "loss": 1.873, "step": 26002 }, { "epoch": 0.34, "grad_norm": 3.9981632232666016, "learning_rate": 1.9845394742380643e-05, "loss": 1.8065, "step": 26003 }, { "epoch": 0.34, "grad_norm": 4.136552333831787, "learning_rate": 1.9845376335816494e-05, "loss": 1.9783, "step": 26004 }, { "epoch": 0.34, "grad_norm": 3.727764129638672, "learning_rate": 1.9845357928165253e-05, "loss": 2.1652, "step": 26005 }, { "epoch": 0.34, "grad_norm": 3.6209323406219482, "learning_rate": 1.9845339519426917e-05, "loss": 2.1847, "step": 26006 }, { "epoch": 0.34, "grad_norm": 3.957761287689209, "learning_rate": 1.9845321109601488e-05, "loss": 1.9936, "step": 26007 }, { "epoch": 0.34, "grad_norm": 3.746249198913574, "learning_rate": 1.984530269868897e-05, "loss": 1.7792, "step": 26008 }, { "epoch": 0.34, "grad_norm": 4.469202995300293, "learning_rate": 1.984528428668936e-05, "loss": 2.5339, "step": 26009 }, { "epoch": 0.34, "grad_norm": 3.633193016052246, "learning_rate": 1.984526587360267e-05, "loss": 1.6858, "step": 26010 }, { "epoch": 0.34, "grad_norm": 4.688668727874756, "learning_rate": 1.9845247459428888e-05, "loss": 1.9194, "step": 26011 }, { "epoch": 0.34, "grad_norm": 4.308936595916748, "learning_rate": 1.984522904416803e-05, "loss": 1.9717, "step": 26012 }, { "epoch": 0.34, "grad_norm": 3.177534341812134, "learning_rate": 1.9845210627820087e-05, "loss": 1.4517, "step": 26013 }, { "epoch": 0.34, "grad_norm": 4.1333136558532715, "learning_rate": 1.9845192210385067e-05, "loss": 2.149, "step": 26014 }, { "epoch": 0.34, "grad_norm": 3.5332136154174805, "learning_rate": 1.984517379186297e-05, "loss": 2.1786, "step": 26015 }, { "epoch": 0.34, "grad_norm": 3.4344708919525146, "learning_rate": 1.98451553722538e-05, "loss": 1.8408, "step": 26016 }, { "epoch": 0.34, "grad_norm": 4.1425652503967285, "learning_rate": 1.984513695155756e-05, "loss": 1.7846, "step": 26017 }, { "epoch": 0.34, "grad_norm": 3.8846161365509033, "learning_rate": 1.9845118529774246e-05, "loss": 2.0494, "step": 26018 }, { "epoch": 0.34, "grad_norm": 3.9402029514312744, "learning_rate": 1.9845100106903866e-05, "loss": 1.7466, "step": 26019 }, { "epoch": 0.34, "grad_norm": 3.4056830406188965, "learning_rate": 1.9845081682946418e-05, "loss": 1.5011, "step": 26020 }, { "epoch": 0.34, "grad_norm": 4.132900238037109, "learning_rate": 1.9845063257901908e-05, "loss": 2.2071, "step": 26021 }, { "epoch": 0.34, "grad_norm": 4.528040409088135, "learning_rate": 1.9845044831770336e-05, "loss": 2.0653, "step": 26022 }, { "epoch": 0.34, "grad_norm": 3.8257057666778564, "learning_rate": 1.9845026404551702e-05, "loss": 1.7755, "step": 26023 }, { "epoch": 0.34, "grad_norm": 3.5899336338043213, "learning_rate": 1.9845007976246014e-05, "loss": 1.7738, "step": 26024 }, { "epoch": 0.34, "grad_norm": 3.6133549213409424, "learning_rate": 1.9844989546853267e-05, "loss": 1.7149, "step": 26025 }, { "epoch": 0.34, "grad_norm": 4.4173383712768555, "learning_rate": 1.9844971116373465e-05, "loss": 2.0024, "step": 26026 }, { "epoch": 0.34, "grad_norm": 4.322809219360352, "learning_rate": 1.9844952684806615e-05, "loss": 1.8881, "step": 26027 }, { "epoch": 0.34, "grad_norm": 4.795734882354736, "learning_rate": 1.9844934252152713e-05, "loss": 1.9718, "step": 26028 }, { "epoch": 0.34, "grad_norm": 3.7700324058532715, "learning_rate": 1.984491581841176e-05, "loss": 1.8739, "step": 26029 }, { "epoch": 0.34, "grad_norm": 3.613614082336426, "learning_rate": 1.9844897383583768e-05, "loss": 2.0221, "step": 26030 }, { "epoch": 0.34, "grad_norm": 3.7199606895446777, "learning_rate": 1.984487894766873e-05, "loss": 1.897, "step": 26031 }, { "epoch": 0.34, "grad_norm": 3.3106226921081543, "learning_rate": 1.9844860510666648e-05, "loss": 1.8068, "step": 26032 }, { "epoch": 0.34, "grad_norm": 3.8433890342712402, "learning_rate": 1.9844842072577525e-05, "loss": 2.1418, "step": 26033 }, { "epoch": 0.34, "grad_norm": 4.026299476623535, "learning_rate": 1.9844823633401368e-05, "loss": 2.0414, "step": 26034 }, { "epoch": 0.34, "grad_norm": 4.218103885650635, "learning_rate": 1.9844805193138176e-05, "loss": 2.6754, "step": 26035 }, { "epoch": 0.34, "grad_norm": 3.678628921508789, "learning_rate": 1.9844786751787947e-05, "loss": 2.3005, "step": 26036 }, { "epoch": 0.34, "grad_norm": 4.444812774658203, "learning_rate": 1.984476830935069e-05, "loss": 2.5846, "step": 26037 }, { "epoch": 0.34, "grad_norm": 3.779309034347534, "learning_rate": 1.9844749865826403e-05, "loss": 1.8581, "step": 26038 }, { "epoch": 0.34, "grad_norm": 4.399190425872803, "learning_rate": 1.9844731421215083e-05, "loss": 2.397, "step": 26039 }, { "epoch": 0.34, "grad_norm": 3.984578847885132, "learning_rate": 1.9844712975516745e-05, "loss": 2.2515, "step": 26040 }, { "epoch": 0.34, "grad_norm": 4.061046123504639, "learning_rate": 1.984469452873138e-05, "loss": 2.2686, "step": 26041 }, { "epoch": 0.34, "grad_norm": 4.729488849639893, "learning_rate": 1.9844676080858996e-05, "loss": 2.4153, "step": 26042 }, { "epoch": 0.34, "grad_norm": 3.402475595474243, "learning_rate": 1.984465763189959e-05, "loss": 1.9566, "step": 26043 }, { "epoch": 0.34, "grad_norm": 4.615382194519043, "learning_rate": 1.984463918185317e-05, "loss": 2.5494, "step": 26044 }, { "epoch": 0.34, "grad_norm": 3.8219854831695557, "learning_rate": 1.9844620730719733e-05, "loss": 1.9719, "step": 26045 }, { "epoch": 0.34, "grad_norm": 4.8080878257751465, "learning_rate": 1.984460227849928e-05, "loss": 2.3682, "step": 26046 }, { "epoch": 0.34, "grad_norm": 4.0244340896606445, "learning_rate": 1.984458382519182e-05, "loss": 2.1329, "step": 26047 }, { "epoch": 0.34, "grad_norm": 4.558438777923584, "learning_rate": 1.984456537079735e-05, "loss": 2.1204, "step": 26048 }, { "epoch": 0.34, "grad_norm": 3.8934149742126465, "learning_rate": 1.9844546915315873e-05, "loss": 1.9515, "step": 26049 }, { "epoch": 0.34, "grad_norm": 3.9182846546173096, "learning_rate": 1.984452845874739e-05, "loss": 2.1125, "step": 26050 }, { "epoch": 0.34, "grad_norm": 3.9350616931915283, "learning_rate": 1.9844510001091907e-05, "loss": 2.1328, "step": 26051 }, { "epoch": 0.34, "grad_norm": 3.960768222808838, "learning_rate": 1.984449154234942e-05, "loss": 2.115, "step": 26052 }, { "epoch": 0.34, "grad_norm": 3.967031240463257, "learning_rate": 1.9844473082519934e-05, "loss": 2.1694, "step": 26053 }, { "epoch": 0.34, "grad_norm": 4.227790832519531, "learning_rate": 1.9844454621603453e-05, "loss": 2.5724, "step": 26054 }, { "epoch": 0.34, "grad_norm": 4.233778953552246, "learning_rate": 1.9844436159599975e-05, "loss": 2.2811, "step": 26055 }, { "epoch": 0.34, "grad_norm": 3.731600522994995, "learning_rate": 1.9844417696509506e-05, "loss": 1.556, "step": 26056 }, { "epoch": 0.34, "grad_norm": 3.5373153686523438, "learning_rate": 1.9844399232332047e-05, "loss": 1.9991, "step": 26057 }, { "epoch": 0.34, "grad_norm": 3.9888393878936768, "learning_rate": 1.9844380767067597e-05, "loss": 1.5754, "step": 26058 }, { "epoch": 0.34, "grad_norm": 3.9060537815093994, "learning_rate": 1.984436230071616e-05, "loss": 2.5347, "step": 26059 }, { "epoch": 0.34, "grad_norm": 4.017452239990234, "learning_rate": 1.984434383327774e-05, "loss": 1.8173, "step": 26060 }, { "epoch": 0.34, "grad_norm": 4.169091701507568, "learning_rate": 1.9844325364752337e-05, "loss": 2.3861, "step": 26061 }, { "epoch": 0.34, "grad_norm": 4.790555953979492, "learning_rate": 1.9844306895139955e-05, "loss": 2.4589, "step": 26062 }, { "epoch": 0.34, "grad_norm": 3.6304211616516113, "learning_rate": 1.9844288424440593e-05, "loss": 1.6004, "step": 26063 }, { "epoch": 0.34, "grad_norm": 3.2733190059661865, "learning_rate": 1.9844269952654253e-05, "loss": 1.593, "step": 26064 }, { "epoch": 0.34, "grad_norm": 4.385847091674805, "learning_rate": 1.9844251479780944e-05, "loss": 2.8851, "step": 26065 }, { "epoch": 0.34, "grad_norm": 4.066067695617676, "learning_rate": 1.9844233005820657e-05, "loss": 2.2291, "step": 26066 }, { "epoch": 0.34, "grad_norm": 3.709742546081543, "learning_rate": 1.98442145307734e-05, "loss": 1.9553, "step": 26067 }, { "epoch": 0.34, "grad_norm": 3.676023244857788, "learning_rate": 1.9844196054639176e-05, "loss": 1.6714, "step": 26068 }, { "epoch": 0.34, "grad_norm": 3.815903425216675, "learning_rate": 1.9844177577417985e-05, "loss": 1.8617, "step": 26069 }, { "epoch": 0.34, "grad_norm": 3.8842387199401855, "learning_rate": 1.9844159099109834e-05, "loss": 1.7745, "step": 26070 }, { "epoch": 0.34, "grad_norm": 3.7156739234924316, "learning_rate": 1.9844140619714717e-05, "loss": 1.7172, "step": 26071 }, { "epoch": 0.34, "grad_norm": 3.687915563583374, "learning_rate": 1.984412213923264e-05, "loss": 1.7432, "step": 26072 }, { "epoch": 0.34, "grad_norm": 3.753936529159546, "learning_rate": 1.9844103657663604e-05, "loss": 1.9079, "step": 26073 }, { "epoch": 0.34, "grad_norm": 4.04660701751709, "learning_rate": 1.9844085175007616e-05, "loss": 1.9138, "step": 26074 }, { "epoch": 0.34, "grad_norm": 4.241140842437744, "learning_rate": 1.984406669126467e-05, "loss": 2.0039, "step": 26075 }, { "epoch": 0.34, "grad_norm": 4.036492347717285, "learning_rate": 1.9844048206434773e-05, "loss": 2.4228, "step": 26076 }, { "epoch": 0.34, "grad_norm": 4.181039810180664, "learning_rate": 1.984402972051793e-05, "loss": 2.2904, "step": 26077 }, { "epoch": 0.34, "grad_norm": 5.700067520141602, "learning_rate": 1.9844011233514134e-05, "loss": 2.3831, "step": 26078 }, { "epoch": 0.34, "grad_norm": 3.4677345752716064, "learning_rate": 1.9843992745423393e-05, "loss": 1.6274, "step": 26079 }, { "epoch": 0.34, "grad_norm": 3.9891319274902344, "learning_rate": 1.984397425624571e-05, "loss": 1.9555, "step": 26080 }, { "epoch": 0.34, "grad_norm": 3.8990297317504883, "learning_rate": 1.984395576598108e-05, "loss": 1.91, "step": 26081 }, { "epoch": 0.34, "grad_norm": 3.5099856853485107, "learning_rate": 1.9843937274629518e-05, "loss": 1.5612, "step": 26082 }, { "epoch": 0.34, "grad_norm": 4.56735372543335, "learning_rate": 1.9843918782191014e-05, "loss": 2.4351, "step": 26083 }, { "epoch": 0.34, "grad_norm": 4.743720054626465, "learning_rate": 1.9843900288665577e-05, "loss": 2.7897, "step": 26084 }, { "epoch": 0.34, "grad_norm": 4.182519435882568, "learning_rate": 1.98438817940532e-05, "loss": 2.1718, "step": 26085 }, { "epoch": 0.34, "grad_norm": 3.835078477859497, "learning_rate": 1.98438632983539e-05, "loss": 2.0634, "step": 26086 }, { "epoch": 0.34, "grad_norm": 3.4476075172424316, "learning_rate": 1.9843844801567663e-05, "loss": 1.6757, "step": 26087 }, { "epoch": 0.34, "grad_norm": 4.731142044067383, "learning_rate": 1.9843826303694504e-05, "loss": 2.4368, "step": 26088 }, { "epoch": 0.34, "grad_norm": 3.6221070289611816, "learning_rate": 1.984380780473442e-05, "loss": 1.8477, "step": 26089 }, { "epoch": 0.34, "grad_norm": 4.245360374450684, "learning_rate": 1.984378930468741e-05, "loss": 2.2622, "step": 26090 }, { "epoch": 0.34, "grad_norm": 4.26918888092041, "learning_rate": 1.9843770803553478e-05, "loss": 2.202, "step": 26091 }, { "epoch": 0.34, "grad_norm": 4.465672016143799, "learning_rate": 1.9843752301332625e-05, "loss": 1.9301, "step": 26092 }, { "epoch": 0.34, "grad_norm": 3.9565415382385254, "learning_rate": 1.984373379802486e-05, "loss": 1.6556, "step": 26093 }, { "epoch": 0.34, "grad_norm": 4.185340404510498, "learning_rate": 1.9843715293630174e-05, "loss": 2.4876, "step": 26094 }, { "epoch": 0.34, "grad_norm": 4.163882732391357, "learning_rate": 1.984369678814858e-05, "loss": 1.9717, "step": 26095 }, { "epoch": 0.34, "grad_norm": 3.623225450515747, "learning_rate": 1.984367828158007e-05, "loss": 1.8644, "step": 26096 }, { "epoch": 0.34, "grad_norm": 4.129310131072998, "learning_rate": 1.984365977392466e-05, "loss": 2.0592, "step": 26097 }, { "epoch": 0.34, "grad_norm": 3.394770860671997, "learning_rate": 1.9843641265182332e-05, "loss": 1.635, "step": 26098 }, { "epoch": 0.34, "grad_norm": 4.119596004486084, "learning_rate": 1.9843622755353104e-05, "loss": 1.8388, "step": 26099 }, { "epoch": 0.34, "grad_norm": 4.820556163787842, "learning_rate": 1.9843604244436973e-05, "loss": 2.4046, "step": 26100 }, { "epoch": 0.34, "grad_norm": 4.552235126495361, "learning_rate": 1.984358573243394e-05, "loss": 2.3368, "step": 26101 }, { "epoch": 0.34, "grad_norm": 4.1069254875183105, "learning_rate": 1.984356721934401e-05, "loss": 1.8653, "step": 26102 }, { "epoch": 0.34, "grad_norm": 3.999453067779541, "learning_rate": 1.9843548705167182e-05, "loss": 2.0908, "step": 26103 }, { "epoch": 0.34, "grad_norm": 3.266509771347046, "learning_rate": 1.9843530189903458e-05, "loss": 1.4093, "step": 26104 }, { "epoch": 0.34, "grad_norm": 3.837819814682007, "learning_rate": 1.9843511673552845e-05, "loss": 1.9847, "step": 26105 }, { "epoch": 0.34, "grad_norm": 4.047712326049805, "learning_rate": 1.984349315611534e-05, "loss": 2.2617, "step": 26106 }, { "epoch": 0.34, "grad_norm": 4.206180095672607, "learning_rate": 1.9843474637590942e-05, "loss": 2.2625, "step": 26107 }, { "epoch": 0.34, "grad_norm": 3.878999710083008, "learning_rate": 1.9843456117979662e-05, "loss": 1.8491, "step": 26108 }, { "epoch": 0.34, "grad_norm": 3.46339750289917, "learning_rate": 1.9843437597281497e-05, "loss": 1.7074, "step": 26109 }, { "epoch": 0.34, "grad_norm": 3.9198789596557617, "learning_rate": 1.9843419075496445e-05, "loss": 1.8436, "step": 26110 }, { "epoch": 0.34, "grad_norm": 3.640094518661499, "learning_rate": 1.984340055262452e-05, "loss": 1.8737, "step": 26111 }, { "epoch": 0.34, "grad_norm": 3.627131223678589, "learning_rate": 1.984338202866571e-05, "loss": 1.9992, "step": 26112 }, { "epoch": 0.34, "grad_norm": 4.262767791748047, "learning_rate": 1.9843363503620026e-05, "loss": 2.0565, "step": 26113 }, { "epoch": 0.34, "grad_norm": 3.7859010696411133, "learning_rate": 1.984334497748747e-05, "loss": 1.7682, "step": 26114 }, { "epoch": 0.34, "grad_norm": 3.5339317321777344, "learning_rate": 1.984332645026804e-05, "loss": 1.5489, "step": 26115 }, { "epoch": 0.34, "grad_norm": 3.5481228828430176, "learning_rate": 1.984330792196174e-05, "loss": 1.8185, "step": 26116 }, { "epoch": 0.34, "grad_norm": 3.995706081390381, "learning_rate": 1.984328939256857e-05, "loss": 1.9693, "step": 26117 }, { "epoch": 0.34, "grad_norm": 3.784341335296631, "learning_rate": 1.9843270862088536e-05, "loss": 1.8351, "step": 26118 }, { "epoch": 0.34, "grad_norm": 3.922576665878296, "learning_rate": 1.9843252330521637e-05, "loss": 2.2706, "step": 26119 }, { "epoch": 0.34, "grad_norm": 3.8932502269744873, "learning_rate": 1.9843233797867876e-05, "loss": 1.9813, "step": 26120 }, { "epoch": 0.34, "grad_norm": 4.02945613861084, "learning_rate": 1.9843215264127256e-05, "loss": 2.2426, "step": 26121 }, { "epoch": 0.34, "grad_norm": 3.845240831375122, "learning_rate": 1.9843196729299775e-05, "loss": 1.4842, "step": 26122 }, { "epoch": 0.34, "grad_norm": 4.50265645980835, "learning_rate": 1.984317819338544e-05, "loss": 2.4085, "step": 26123 }, { "epoch": 0.34, "grad_norm": 4.30194616317749, "learning_rate": 1.9843159656384253e-05, "loss": 2.2524, "step": 26124 }, { "epoch": 0.34, "grad_norm": 3.8219127655029297, "learning_rate": 1.9843141118296213e-05, "loss": 1.5159, "step": 26125 }, { "epoch": 0.34, "grad_norm": 4.148067474365234, "learning_rate": 1.9843122579121322e-05, "loss": 1.9667, "step": 26126 }, { "epoch": 0.34, "grad_norm": 4.074227333068848, "learning_rate": 1.9843104038859585e-05, "loss": 2.6335, "step": 26127 }, { "epoch": 0.34, "grad_norm": 3.9786477088928223, "learning_rate": 1.9843085497510997e-05, "loss": 2.1791, "step": 26128 }, { "epoch": 0.34, "grad_norm": 4.212026596069336, "learning_rate": 1.984306695507557e-05, "loss": 2.4038, "step": 26129 }, { "epoch": 0.34, "grad_norm": 3.8604397773742676, "learning_rate": 1.9843048411553303e-05, "loss": 1.9358, "step": 26130 }, { "epoch": 0.34, "grad_norm": 3.975490093231201, "learning_rate": 1.9843029866944197e-05, "loss": 2.2021, "step": 26131 }, { "epoch": 0.34, "grad_norm": 3.2246310710906982, "learning_rate": 1.984301132124825e-05, "loss": 1.5593, "step": 26132 }, { "epoch": 0.34, "grad_norm": 4.221138000488281, "learning_rate": 1.9842992774465467e-05, "loss": 2.2182, "step": 26133 }, { "epoch": 0.34, "grad_norm": 4.403132915496826, "learning_rate": 1.9842974226595854e-05, "loss": 2.2751, "step": 26134 }, { "epoch": 0.34, "grad_norm": 4.197953224182129, "learning_rate": 1.984295567763941e-05, "loss": 2.3311, "step": 26135 }, { "epoch": 0.34, "grad_norm": 3.8727822303771973, "learning_rate": 1.9842937127596132e-05, "loss": 1.6822, "step": 26136 }, { "epoch": 0.34, "grad_norm": 3.3466503620147705, "learning_rate": 1.984291857646603e-05, "loss": 1.7364, "step": 26137 }, { "epoch": 0.34, "grad_norm": 3.6124660968780518, "learning_rate": 1.9842900024249103e-05, "loss": 1.9388, "step": 26138 }, { "epoch": 0.34, "grad_norm": 3.9080562591552734, "learning_rate": 1.9842881470945352e-05, "loss": 2.1377, "step": 26139 }, { "epoch": 0.34, "grad_norm": 3.781165361404419, "learning_rate": 1.9842862916554778e-05, "loss": 2.0672, "step": 26140 }, { "epoch": 0.34, "grad_norm": 3.6338694095611572, "learning_rate": 1.9842844361077385e-05, "loss": 1.8022, "step": 26141 }, { "epoch": 0.34, "grad_norm": 3.903510808944702, "learning_rate": 1.9842825804513178e-05, "loss": 1.4863, "step": 26142 }, { "epoch": 0.34, "grad_norm": 4.016891002655029, "learning_rate": 1.9842807246862157e-05, "loss": 2.257, "step": 26143 }, { "epoch": 0.34, "grad_norm": 3.5428202152252197, "learning_rate": 1.984278868812432e-05, "loss": 1.7154, "step": 26144 }, { "epoch": 0.34, "grad_norm": 3.590078830718994, "learning_rate": 1.9842770128299674e-05, "loss": 1.7376, "step": 26145 }, { "epoch": 0.34, "grad_norm": 3.6252031326293945, "learning_rate": 1.984275156738822e-05, "loss": 1.6548, "step": 26146 }, { "epoch": 0.34, "grad_norm": 4.013343811035156, "learning_rate": 1.9842733005389954e-05, "loss": 2.2596, "step": 26147 }, { "epoch": 0.34, "grad_norm": 3.981109380722046, "learning_rate": 1.9842714442304888e-05, "loss": 1.9387, "step": 26148 }, { "epoch": 0.34, "grad_norm": 4.052400588989258, "learning_rate": 1.9842695878133017e-05, "loss": 1.8466, "step": 26149 }, { "epoch": 0.34, "grad_norm": 4.327376842498779, "learning_rate": 1.984267731287435e-05, "loss": 2.4039, "step": 26150 }, { "epoch": 0.34, "grad_norm": 3.706305503845215, "learning_rate": 1.9842658746528877e-05, "loss": 1.8648, "step": 26151 }, { "epoch": 0.34, "grad_norm": 4.745628833770752, "learning_rate": 1.9842640179096614e-05, "loss": 2.8327, "step": 26152 }, { "epoch": 0.34, "grad_norm": 4.434712886810303, "learning_rate": 1.9842621610577553e-05, "loss": 2.0976, "step": 26153 }, { "epoch": 0.34, "grad_norm": 4.046011447906494, "learning_rate": 1.9842603040971702e-05, "loss": 2.0421, "step": 26154 }, { "epoch": 0.34, "grad_norm": 4.415282249450684, "learning_rate": 1.9842584470279057e-05, "loss": 2.0082, "step": 26155 }, { "epoch": 0.34, "grad_norm": 3.5276105403900146, "learning_rate": 1.9842565898499628e-05, "loss": 1.9963, "step": 26156 }, { "epoch": 0.34, "grad_norm": 4.041837215423584, "learning_rate": 1.984254732563341e-05, "loss": 2.1554, "step": 26157 }, { "epoch": 0.34, "grad_norm": 3.589996337890625, "learning_rate": 1.984252875168041e-05, "loss": 1.7791, "step": 26158 }, { "epoch": 0.34, "grad_norm": 4.095710754394531, "learning_rate": 1.9842510176640623e-05, "loss": 2.2233, "step": 26159 }, { "epoch": 0.34, "grad_norm": 4.268110752105713, "learning_rate": 1.984249160051406e-05, "loss": 2.4572, "step": 26160 }, { "epoch": 0.34, "grad_norm": 4.174074649810791, "learning_rate": 1.9842473023300722e-05, "loss": 2.572, "step": 26161 }, { "epoch": 0.34, "grad_norm": 3.6244935989379883, "learning_rate": 1.98424544450006e-05, "loss": 1.8922, "step": 26162 }, { "epoch": 0.34, "grad_norm": 4.2025909423828125, "learning_rate": 1.984243586561371e-05, "loss": 2.2022, "step": 26163 }, { "epoch": 0.34, "grad_norm": 3.3952479362487793, "learning_rate": 1.9842417285140047e-05, "loss": 1.514, "step": 26164 }, { "epoch": 0.34, "grad_norm": 4.4597883224487305, "learning_rate": 1.9842398703579613e-05, "loss": 1.868, "step": 26165 }, { "epoch": 0.34, "grad_norm": 3.447467803955078, "learning_rate": 1.9842380120932412e-05, "loss": 1.6774, "step": 26166 }, { "epoch": 0.34, "grad_norm": 4.032259464263916, "learning_rate": 1.9842361537198444e-05, "loss": 1.8808, "step": 26167 }, { "epoch": 0.34, "grad_norm": 3.932466983795166, "learning_rate": 1.9842342952377716e-05, "loss": 1.7433, "step": 26168 }, { "epoch": 0.34, "grad_norm": 4.542582035064697, "learning_rate": 1.984232436647022e-05, "loss": 2.4257, "step": 26169 }, { "epoch": 0.34, "grad_norm": 3.9121124744415283, "learning_rate": 1.984230577947597e-05, "loss": 1.7222, "step": 26170 }, { "epoch": 0.34, "grad_norm": 4.43735408782959, "learning_rate": 1.984228719139496e-05, "loss": 1.9058, "step": 26171 }, { "epoch": 0.34, "grad_norm": 4.182741165161133, "learning_rate": 1.9842268602227195e-05, "loss": 2.2372, "step": 26172 }, { "epoch": 0.34, "grad_norm": 3.951801300048828, "learning_rate": 1.9842250011972677e-05, "loss": 1.643, "step": 26173 }, { "epoch": 0.34, "grad_norm": 3.651336431503296, "learning_rate": 1.9842231420631408e-05, "loss": 1.7091, "step": 26174 }, { "epoch": 0.34, "grad_norm": 4.210778713226318, "learning_rate": 1.9842212828203385e-05, "loss": 1.8511, "step": 26175 }, { "epoch": 0.34, "grad_norm": 3.921677589416504, "learning_rate": 1.9842194234688623e-05, "loss": 2.2075, "step": 26176 }, { "epoch": 0.34, "grad_norm": 4.374270439147949, "learning_rate": 1.984217564008711e-05, "loss": 2.3191, "step": 26177 }, { "epoch": 0.34, "grad_norm": 4.452078819274902, "learning_rate": 1.9842157044398855e-05, "loss": 2.0871, "step": 26178 }, { "epoch": 0.34, "grad_norm": 3.802358865737915, "learning_rate": 1.984213844762386e-05, "loss": 1.939, "step": 26179 }, { "epoch": 0.34, "grad_norm": 3.8841514587402344, "learning_rate": 1.9842119849762124e-05, "loss": 2.1494, "step": 26180 }, { "epoch": 0.34, "grad_norm": 4.291191101074219, "learning_rate": 1.9842101250813652e-05, "loss": 2.1309, "step": 26181 }, { "epoch": 0.34, "grad_norm": 5.806186199188232, "learning_rate": 1.9842082650778444e-05, "loss": 2.7198, "step": 26182 }, { "epoch": 0.34, "grad_norm": 4.1455488204956055, "learning_rate": 1.9842064049656506e-05, "loss": 1.9903, "step": 26183 }, { "epoch": 0.34, "grad_norm": 3.899322032928467, "learning_rate": 1.9842045447447835e-05, "loss": 2.1633, "step": 26184 }, { "epoch": 0.34, "grad_norm": 4.195610046386719, "learning_rate": 1.9842026844152434e-05, "loss": 2.4292, "step": 26185 }, { "epoch": 0.34, "grad_norm": 4.069189548492432, "learning_rate": 1.984200823977031e-05, "loss": 2.2719, "step": 26186 }, { "epoch": 0.34, "grad_norm": 3.650726318359375, "learning_rate": 1.984198963430146e-05, "loss": 2.0349, "step": 26187 }, { "epoch": 0.34, "grad_norm": 3.9506795406341553, "learning_rate": 1.9841971027745887e-05, "loss": 1.8713, "step": 26188 }, { "epoch": 0.34, "grad_norm": 3.9160561561584473, "learning_rate": 1.984195242010359e-05, "loss": 1.8953, "step": 26189 }, { "epoch": 0.34, "grad_norm": 4.887356758117676, "learning_rate": 1.9841933811374582e-05, "loss": 2.5498, "step": 26190 }, { "epoch": 0.34, "grad_norm": 4.129993438720703, "learning_rate": 1.984191520155885e-05, "loss": 2.0462, "step": 26191 }, { "epoch": 0.34, "grad_norm": 3.872515916824341, "learning_rate": 1.9841896590656406e-05, "loss": 2.1138, "step": 26192 }, { "epoch": 0.34, "grad_norm": 4.358657360076904, "learning_rate": 1.9841877978667252e-05, "loss": 2.1594, "step": 26193 }, { "epoch": 0.34, "grad_norm": 4.257307052612305, "learning_rate": 1.9841859365591385e-05, "loss": 1.9279, "step": 26194 }, { "epoch": 0.34, "grad_norm": 4.096940517425537, "learning_rate": 1.984184075142881e-05, "loss": 2.1896, "step": 26195 }, { "epoch": 0.34, "grad_norm": 3.8476758003234863, "learning_rate": 1.984182213617953e-05, "loss": 2.1306, "step": 26196 }, { "epoch": 0.34, "grad_norm": 4.100691795349121, "learning_rate": 1.9841803519843546e-05, "loss": 2.1616, "step": 26197 }, { "epoch": 0.34, "grad_norm": 4.23769474029541, "learning_rate": 1.984178490242086e-05, "loss": 2.3216, "step": 26198 }, { "epoch": 0.34, "grad_norm": 3.9555206298828125, "learning_rate": 1.9841766283911472e-05, "loss": 1.8072, "step": 26199 }, { "epoch": 0.34, "grad_norm": 3.3843274116516113, "learning_rate": 1.9841747664315386e-05, "loss": 1.9105, "step": 26200 }, { "epoch": 0.34, "grad_norm": 3.3996009826660156, "learning_rate": 1.9841729043632608e-05, "loss": 1.6971, "step": 26201 }, { "epoch": 0.34, "grad_norm": 3.465500593185425, "learning_rate": 1.984171042186313e-05, "loss": 1.7014, "step": 26202 }, { "epoch": 0.34, "grad_norm": 3.710693597793579, "learning_rate": 1.9841691799006967e-05, "loss": 2.2877, "step": 26203 }, { "epoch": 0.34, "grad_norm": 4.021554470062256, "learning_rate": 1.984167317506411e-05, "loss": 1.7828, "step": 26204 }, { "epoch": 0.34, "grad_norm": 4.037043571472168, "learning_rate": 1.9841654550034567e-05, "loss": 1.8941, "step": 26205 }, { "epoch": 0.34, "grad_norm": 3.811012029647827, "learning_rate": 1.9841635923918337e-05, "loss": 1.9792, "step": 26206 }, { "epoch": 0.34, "grad_norm": 4.648167610168457, "learning_rate": 1.9841617296715424e-05, "loss": 2.1584, "step": 26207 }, { "epoch": 0.34, "grad_norm": 3.678257465362549, "learning_rate": 1.984159866842583e-05, "loss": 2.0902, "step": 26208 }, { "epoch": 0.34, "grad_norm": 3.9293904304504395, "learning_rate": 1.9841580039049556e-05, "loss": 1.9706, "step": 26209 }, { "epoch": 0.34, "grad_norm": 4.405266761779785, "learning_rate": 1.9841561408586606e-05, "loss": 2.6053, "step": 26210 }, { "epoch": 0.34, "grad_norm": 3.9538445472717285, "learning_rate": 1.9841542777036978e-05, "loss": 2.0247, "step": 26211 }, { "epoch": 0.34, "grad_norm": 4.51356315612793, "learning_rate": 1.9841524144400678e-05, "loss": 2.4191, "step": 26212 }, { "epoch": 0.34, "grad_norm": 3.619030475616455, "learning_rate": 1.984150551067771e-05, "loss": 1.6397, "step": 26213 }, { "epoch": 0.34, "grad_norm": 3.92983341217041, "learning_rate": 1.9841486875868064e-05, "loss": 2.2226, "step": 26214 }, { "epoch": 0.34, "grad_norm": 3.639561414718628, "learning_rate": 1.9841468239971758e-05, "loss": 2.0722, "step": 26215 }, { "epoch": 0.34, "grad_norm": 3.845149040222168, "learning_rate": 1.9841449602988786e-05, "loss": 1.9561, "step": 26216 }, { "epoch": 0.34, "grad_norm": 4.572114944458008, "learning_rate": 1.984143096491915e-05, "loss": 2.4391, "step": 26217 }, { "epoch": 0.34, "grad_norm": 4.423295021057129, "learning_rate": 1.9841412325762853e-05, "loss": 2.1348, "step": 26218 }, { "epoch": 0.34, "grad_norm": 3.8416097164154053, "learning_rate": 1.98413936855199e-05, "loss": 2.0552, "step": 26219 }, { "epoch": 0.34, "grad_norm": 4.255554676055908, "learning_rate": 1.9841375044190286e-05, "loss": 2.4544, "step": 26220 }, { "epoch": 0.34, "grad_norm": 3.4809634685516357, "learning_rate": 1.984135640177402e-05, "loss": 1.7732, "step": 26221 }, { "epoch": 0.34, "grad_norm": 4.347461223602295, "learning_rate": 1.98413377582711e-05, "loss": 2.1917, "step": 26222 }, { "epoch": 0.34, "grad_norm": 4.098797798156738, "learning_rate": 1.9841319113681533e-05, "loss": 2.4469, "step": 26223 }, { "epoch": 0.34, "grad_norm": 3.9544742107391357, "learning_rate": 1.984130046800531e-05, "loss": 2.1579, "step": 26224 }, { "epoch": 0.34, "grad_norm": 4.371168613433838, "learning_rate": 1.9841281821242445e-05, "loss": 2.2701, "step": 26225 }, { "epoch": 0.34, "grad_norm": 3.9308178424835205, "learning_rate": 1.9841263173392935e-05, "loss": 1.9688, "step": 26226 }, { "epoch": 0.34, "grad_norm": 3.6152963638305664, "learning_rate": 1.9841244524456783e-05, "loss": 2.0822, "step": 26227 }, { "epoch": 0.34, "grad_norm": 3.974311113357544, "learning_rate": 1.9841225874433993e-05, "loss": 2.2396, "step": 26228 }, { "epoch": 0.34, "grad_norm": 4.376421928405762, "learning_rate": 1.984120722332456e-05, "loss": 2.3694, "step": 26229 }, { "epoch": 0.34, "grad_norm": 3.8486037254333496, "learning_rate": 1.9841188571128495e-05, "loss": 2.0729, "step": 26230 }, { "epoch": 0.34, "grad_norm": 3.708850383758545, "learning_rate": 1.9841169917845794e-05, "loss": 1.6479, "step": 26231 }, { "epoch": 0.34, "grad_norm": 4.155200481414795, "learning_rate": 1.9841151263476458e-05, "loss": 2.4261, "step": 26232 }, { "epoch": 0.34, "grad_norm": 4.140247821807861, "learning_rate": 1.9841132608020497e-05, "loss": 2.3609, "step": 26233 }, { "epoch": 0.34, "grad_norm": 3.944735527038574, "learning_rate": 1.9841113951477904e-05, "loss": 1.9889, "step": 26234 }, { "epoch": 0.34, "grad_norm": 3.775606870651245, "learning_rate": 1.9841095293848687e-05, "loss": 2.0088, "step": 26235 }, { "epoch": 0.34, "grad_norm": 3.636211633682251, "learning_rate": 1.9841076635132846e-05, "loss": 1.6802, "step": 26236 }, { "epoch": 0.34, "grad_norm": 3.892592668533325, "learning_rate": 1.9841057975330383e-05, "loss": 2.1762, "step": 26237 }, { "epoch": 0.34, "grad_norm": 4.052183151245117, "learning_rate": 1.98410393144413e-05, "loss": 2.3364, "step": 26238 }, { "epoch": 0.34, "grad_norm": 4.281386852264404, "learning_rate": 1.98410206524656e-05, "loss": 2.0542, "step": 26239 }, { "epoch": 0.34, "grad_norm": 3.8624532222747803, "learning_rate": 1.984100198940328e-05, "loss": 1.7131, "step": 26240 }, { "epoch": 0.34, "grad_norm": 3.6881866455078125, "learning_rate": 1.9840983325254352e-05, "loss": 2.1775, "step": 26241 }, { "epoch": 0.34, "grad_norm": 3.799582004547119, "learning_rate": 1.984096466001881e-05, "loss": 1.703, "step": 26242 }, { "epoch": 0.34, "grad_norm": 3.993809223175049, "learning_rate": 1.984094599369666e-05, "loss": 1.4387, "step": 26243 }, { "epoch": 0.34, "grad_norm": 4.006734848022461, "learning_rate": 1.9840927326287902e-05, "loss": 2.3765, "step": 26244 }, { "epoch": 0.34, "grad_norm": 3.8502113819122314, "learning_rate": 1.984090865779254e-05, "loss": 2.0886, "step": 26245 }, { "epoch": 0.34, "grad_norm": 3.907860040664673, "learning_rate": 1.9840889988210573e-05, "loss": 2.2899, "step": 26246 }, { "epoch": 0.34, "grad_norm": 3.996593475341797, "learning_rate": 1.9840871317542002e-05, "loss": 1.9222, "step": 26247 }, { "epoch": 0.34, "grad_norm": 3.673762083053589, "learning_rate": 1.984085264578684e-05, "loss": 1.7721, "step": 26248 }, { "epoch": 0.34, "grad_norm": 3.9451160430908203, "learning_rate": 1.9840833972945074e-05, "loss": 2.0476, "step": 26249 }, { "epoch": 0.34, "grad_norm": 4.182268142700195, "learning_rate": 1.9840815299016714e-05, "loss": 1.9281, "step": 26250 }, { "epoch": 0.34, "grad_norm": 3.562285900115967, "learning_rate": 1.9840796624001763e-05, "loss": 1.7905, "step": 26251 }, { "epoch": 0.34, "grad_norm": 3.6293342113494873, "learning_rate": 1.9840777947900218e-05, "loss": 1.7651, "step": 26252 }, { "epoch": 0.34, "grad_norm": 4.45373010635376, "learning_rate": 1.9840759270712085e-05, "loss": 2.0925, "step": 26253 }, { "epoch": 0.34, "grad_norm": 4.047065258026123, "learning_rate": 1.984074059243737e-05, "loss": 2.0455, "step": 26254 }, { "epoch": 0.34, "grad_norm": 3.8450469970703125, "learning_rate": 1.9840721913076064e-05, "loss": 2.0549, "step": 26255 }, { "epoch": 0.34, "grad_norm": 3.950646162033081, "learning_rate": 1.984070323262818e-05, "loss": 1.8881, "step": 26256 }, { "epoch": 0.34, "grad_norm": 4.2001543045043945, "learning_rate": 1.984068455109371e-05, "loss": 1.8206, "step": 26257 }, { "epoch": 0.34, "grad_norm": 3.8205792903900146, "learning_rate": 1.9840665868472668e-05, "loss": 2.0115, "step": 26258 }, { "epoch": 0.34, "grad_norm": 3.636521339416504, "learning_rate": 1.9840647184765045e-05, "loss": 1.8684, "step": 26259 }, { "epoch": 0.34, "grad_norm": 3.6683108806610107, "learning_rate": 1.9840628499970848e-05, "loss": 2.0891, "step": 26260 }, { "epoch": 0.34, "grad_norm": 3.8497235774993896, "learning_rate": 1.984060981409008e-05, "loss": 1.9716, "step": 26261 }, { "epoch": 0.34, "grad_norm": 4.009505748748779, "learning_rate": 1.9840591127122742e-05, "loss": 1.7994, "step": 26262 }, { "epoch": 0.34, "grad_norm": 3.8171558380126953, "learning_rate": 1.9840572439068837e-05, "loss": 1.7654, "step": 26263 }, { "epoch": 0.34, "grad_norm": 3.4002890586853027, "learning_rate": 1.984055374992836e-05, "loss": 1.5571, "step": 26264 }, { "epoch": 0.34, "grad_norm": 3.7599377632141113, "learning_rate": 1.9840535059701325e-05, "loss": 2.1185, "step": 26265 }, { "epoch": 0.34, "grad_norm": 4.282838344573975, "learning_rate": 1.9840516368387725e-05, "loss": 2.0701, "step": 26266 }, { "epoch": 0.34, "grad_norm": 4.04279088973999, "learning_rate": 1.984049767598757e-05, "loss": 2.4295, "step": 26267 }, { "epoch": 0.34, "grad_norm": 3.0180866718292236, "learning_rate": 1.984047898250085e-05, "loss": 1.5486, "step": 26268 }, { "epoch": 0.34, "grad_norm": 4.512703895568848, "learning_rate": 1.9840460287927578e-05, "loss": 2.4773, "step": 26269 }, { "epoch": 0.34, "grad_norm": 3.7861855030059814, "learning_rate": 1.984044159226775e-05, "loss": 1.8325, "step": 26270 }, { "epoch": 0.34, "grad_norm": 5.130395889282227, "learning_rate": 1.9840422895521373e-05, "loss": 2.455, "step": 26271 }, { "epoch": 0.34, "grad_norm": 4.145605087280273, "learning_rate": 1.9840404197688445e-05, "loss": 2.2578, "step": 26272 }, { "epoch": 0.34, "grad_norm": 4.1003336906433105, "learning_rate": 1.984038549876897e-05, "loss": 2.1987, "step": 26273 }, { "epoch": 0.34, "grad_norm": 3.906888723373413, "learning_rate": 1.984036679876295e-05, "loss": 2.0069, "step": 26274 }, { "epoch": 0.34, "grad_norm": 4.130745887756348, "learning_rate": 1.9840348097670385e-05, "loss": 2.3646, "step": 26275 }, { "epoch": 0.34, "grad_norm": 3.6655313968658447, "learning_rate": 1.984032939549128e-05, "loss": 1.6773, "step": 26276 }, { "epoch": 0.34, "grad_norm": 3.625065326690674, "learning_rate": 1.9840310692225632e-05, "loss": 1.7868, "step": 26277 }, { "epoch": 0.34, "grad_norm": 4.049015045166016, "learning_rate": 1.984029198787345e-05, "loss": 2.1494, "step": 26278 }, { "epoch": 0.34, "grad_norm": 4.116377353668213, "learning_rate": 1.9840273282434734e-05, "loss": 1.8544, "step": 26279 }, { "epoch": 0.34, "grad_norm": 3.682537078857422, "learning_rate": 1.9840254575909483e-05, "loss": 1.7414, "step": 26280 }, { "epoch": 0.34, "grad_norm": 3.7384254932403564, "learning_rate": 1.9840235868297702e-05, "loss": 1.5957, "step": 26281 }, { "epoch": 0.34, "grad_norm": 3.736496925354004, "learning_rate": 1.984021715959939e-05, "loss": 1.7262, "step": 26282 }, { "epoch": 0.34, "grad_norm": 3.73877215385437, "learning_rate": 1.9840198449814554e-05, "loss": 1.8253, "step": 26283 }, { "epoch": 0.34, "grad_norm": 3.6333441734313965, "learning_rate": 1.9840179738943194e-05, "loss": 2.0142, "step": 26284 }, { "epoch": 0.34, "grad_norm": 4.055039405822754, "learning_rate": 1.9840161026985308e-05, "loss": 2.1993, "step": 26285 }, { "epoch": 0.34, "grad_norm": 3.837898015975952, "learning_rate": 1.98401423139409e-05, "loss": 1.6896, "step": 26286 }, { "epoch": 0.34, "grad_norm": 4.0090203285217285, "learning_rate": 1.9840123599809976e-05, "loss": 2.0299, "step": 26287 }, { "epoch": 0.34, "grad_norm": 3.769731283187866, "learning_rate": 1.9840104884592534e-05, "loss": 1.6706, "step": 26288 }, { "epoch": 0.34, "grad_norm": 4.1383256912231445, "learning_rate": 1.984008616828858e-05, "loss": 2.191, "step": 26289 }, { "epoch": 0.34, "grad_norm": 3.997812032699585, "learning_rate": 1.984006745089811e-05, "loss": 2.1829, "step": 26290 }, { "epoch": 0.34, "grad_norm": 3.8547375202178955, "learning_rate": 1.984004873242113e-05, "loss": 2.3622, "step": 26291 }, { "epoch": 0.34, "grad_norm": 3.2808616161346436, "learning_rate": 1.9840030012857644e-05, "loss": 1.5173, "step": 26292 }, { "epoch": 0.34, "grad_norm": 3.980958938598633, "learning_rate": 1.9840011292207654e-05, "loss": 2.4301, "step": 26293 }, { "epoch": 0.34, "grad_norm": 4.114819049835205, "learning_rate": 1.9839992570471155e-05, "loss": 2.0728, "step": 26294 }, { "epoch": 0.34, "grad_norm": 4.152163505554199, "learning_rate": 1.9839973847648157e-05, "loss": 1.8642, "step": 26295 }, { "epoch": 0.34, "grad_norm": 3.5921521186828613, "learning_rate": 1.9839955123738656e-05, "loss": 1.9677, "step": 26296 }, { "epoch": 0.34, "grad_norm": 3.745516538619995, "learning_rate": 1.9839936398742662e-05, "loss": 1.7955, "step": 26297 }, { "epoch": 0.34, "grad_norm": 3.824097156524658, "learning_rate": 1.9839917672660166e-05, "loss": 1.4786, "step": 26298 }, { "epoch": 0.34, "grad_norm": 3.5215442180633545, "learning_rate": 1.9839898945491177e-05, "loss": 1.9266, "step": 26299 }, { "epoch": 0.34, "grad_norm": 3.6031088829040527, "learning_rate": 1.98398802172357e-05, "loss": 1.8365, "step": 26300 }, { "epoch": 0.34, "grad_norm": 3.312455415725708, "learning_rate": 1.9839861487893732e-05, "loss": 1.4771, "step": 26301 }, { "epoch": 0.34, "grad_norm": 4.227169513702393, "learning_rate": 1.9839842757465275e-05, "loss": 2.5048, "step": 26302 }, { "epoch": 0.34, "grad_norm": 3.8740148544311523, "learning_rate": 1.9839824025950333e-05, "loss": 2.1976, "step": 26303 }, { "epoch": 0.34, "grad_norm": 4.218230724334717, "learning_rate": 1.983980529334891e-05, "loss": 1.9988, "step": 26304 }, { "epoch": 0.34, "grad_norm": 4.037731647491455, "learning_rate": 1.9839786559661002e-05, "loss": 2.1084, "step": 26305 }, { "epoch": 0.34, "grad_norm": 3.9468202590942383, "learning_rate": 1.9839767824886616e-05, "loss": 2.1909, "step": 26306 }, { "epoch": 0.34, "grad_norm": 4.001460552215576, "learning_rate": 1.983974908902575e-05, "loss": 2.1755, "step": 26307 }, { "epoch": 0.34, "grad_norm": 3.8266148567199707, "learning_rate": 1.9839730352078414e-05, "loss": 1.8128, "step": 26308 }, { "epoch": 0.34, "grad_norm": 3.907301425933838, "learning_rate": 1.98397116140446e-05, "loss": 1.889, "step": 26309 }, { "epoch": 0.34, "grad_norm": 4.192088603973389, "learning_rate": 1.983969287492432e-05, "loss": 1.6291, "step": 26310 }, { "epoch": 0.34, "grad_norm": 3.846572160720825, "learning_rate": 1.9839674134717566e-05, "loss": 1.855, "step": 26311 }, { "epoch": 0.34, "grad_norm": 3.635930061340332, "learning_rate": 1.983965539342435e-05, "loss": 2.1361, "step": 26312 }, { "epoch": 0.34, "grad_norm": 3.903630018234253, "learning_rate": 1.9839636651044666e-05, "loss": 1.8073, "step": 26313 }, { "epoch": 0.34, "grad_norm": 3.65598726272583, "learning_rate": 1.9839617907578515e-05, "loss": 1.9696, "step": 26314 }, { "epoch": 0.34, "grad_norm": 3.5710928440093994, "learning_rate": 1.983959916302591e-05, "loss": 1.5911, "step": 26315 }, { "epoch": 0.34, "grad_norm": 3.6812644004821777, "learning_rate": 1.9839580417386843e-05, "loss": 2.1997, "step": 26316 }, { "epoch": 0.34, "grad_norm": 3.4568393230438232, "learning_rate": 1.983956167066132e-05, "loss": 1.7747, "step": 26317 }, { "epoch": 0.34, "grad_norm": 3.4867169857025146, "learning_rate": 1.9839542922849345e-05, "loss": 1.4784, "step": 26318 }, { "epoch": 0.34, "grad_norm": 3.84855580329895, "learning_rate": 1.9839524173950915e-05, "loss": 2.2458, "step": 26319 }, { "epoch": 0.34, "grad_norm": 3.7837541103363037, "learning_rate": 1.9839505423966034e-05, "loss": 1.9822, "step": 26320 }, { "epoch": 0.34, "grad_norm": 4.56569766998291, "learning_rate": 1.9839486672894703e-05, "loss": 2.429, "step": 26321 }, { "epoch": 0.34, "grad_norm": 4.221386909484863, "learning_rate": 1.983946792073693e-05, "loss": 2.3796, "step": 26322 }, { "epoch": 0.34, "grad_norm": 4.102799892425537, "learning_rate": 1.983944916749271e-05, "loss": 1.9846, "step": 26323 }, { "epoch": 0.34, "grad_norm": 4.0176191329956055, "learning_rate": 1.9839430413162052e-05, "loss": 1.9156, "step": 26324 }, { "epoch": 0.34, "grad_norm": 4.387041091918945, "learning_rate": 1.9839411657744948e-05, "loss": 2.0833, "step": 26325 }, { "epoch": 0.34, "grad_norm": 4.5408034324646, "learning_rate": 1.983939290124141e-05, "loss": 2.2888, "step": 26326 }, { "epoch": 0.34, "grad_norm": 3.8958306312561035, "learning_rate": 1.9839374143651435e-05, "loss": 1.8407, "step": 26327 }, { "epoch": 0.34, "grad_norm": 3.7260987758636475, "learning_rate": 1.9839355384975026e-05, "loss": 1.7942, "step": 26328 }, { "epoch": 0.34, "grad_norm": 3.6089513301849365, "learning_rate": 1.9839336625212186e-05, "loss": 1.7178, "step": 26329 }, { "epoch": 0.34, "grad_norm": 3.4137532711029053, "learning_rate": 1.9839317864362914e-05, "loss": 1.7443, "step": 26330 }, { "epoch": 0.34, "grad_norm": 4.2570905685424805, "learning_rate": 1.9839299102427217e-05, "loss": 2.0685, "step": 26331 }, { "epoch": 0.34, "grad_norm": 4.157824516296387, "learning_rate": 1.9839280339405093e-05, "loss": 2.2416, "step": 26332 }, { "epoch": 0.34, "grad_norm": 3.657787799835205, "learning_rate": 1.9839261575296546e-05, "loss": 1.9286, "step": 26333 }, { "epoch": 0.34, "grad_norm": 4.0387749671936035, "learning_rate": 1.983924281010158e-05, "loss": 2.3156, "step": 26334 }, { "epoch": 0.34, "grad_norm": 4.631213665008545, "learning_rate": 1.9839224043820193e-05, "loss": 2.3682, "step": 26335 }, { "epoch": 0.34, "grad_norm": 4.212562084197998, "learning_rate": 1.9839205276452386e-05, "loss": 2.1419, "step": 26336 }, { "epoch": 0.34, "grad_norm": 3.707846164703369, "learning_rate": 1.9839186507998167e-05, "loss": 2.0877, "step": 26337 }, { "epoch": 0.34, "grad_norm": 4.437475204467773, "learning_rate": 1.9839167738457534e-05, "loss": 2.3797, "step": 26338 }, { "epoch": 0.34, "grad_norm": 4.769163131713867, "learning_rate": 1.9839148967830487e-05, "loss": 2.7094, "step": 26339 }, { "epoch": 0.34, "grad_norm": 3.546113967895508, "learning_rate": 1.9839130196117038e-05, "loss": 1.7988, "step": 26340 }, { "epoch": 0.34, "grad_norm": 3.383645534515381, "learning_rate": 1.9839111423317178e-05, "loss": 1.5025, "step": 26341 }, { "epoch": 0.34, "grad_norm": 4.053619861602783, "learning_rate": 1.9839092649430914e-05, "loss": 1.9346, "step": 26342 }, { "epoch": 0.34, "grad_norm": 4.050906658172607, "learning_rate": 1.9839073874458245e-05, "loss": 2.1271, "step": 26343 }, { "epoch": 0.34, "grad_norm": 4.46980619430542, "learning_rate": 1.9839055098399175e-05, "loss": 1.9306, "step": 26344 }, { "epoch": 0.34, "grad_norm": 4.397058963775635, "learning_rate": 1.9839036321253708e-05, "loss": 2.0852, "step": 26345 }, { "epoch": 0.34, "grad_norm": 4.406789779663086, "learning_rate": 1.9839017543021846e-05, "loss": 2.2264, "step": 26346 }, { "epoch": 0.34, "grad_norm": 4.479260444641113, "learning_rate": 1.9838998763703586e-05, "loss": 2.6067, "step": 26347 }, { "epoch": 0.34, "grad_norm": 4.468563556671143, "learning_rate": 1.983897998329894e-05, "loss": 2.1079, "step": 26348 }, { "epoch": 0.34, "grad_norm": 4.228771686553955, "learning_rate": 1.9838961201807894e-05, "loss": 1.977, "step": 26349 }, { "epoch": 0.34, "grad_norm": 3.7414169311523438, "learning_rate": 1.983894241923047e-05, "loss": 1.7913, "step": 26350 }, { "epoch": 0.34, "grad_norm": 3.5010719299316406, "learning_rate": 1.9838923635566653e-05, "loss": 1.7644, "step": 26351 }, { "epoch": 0.34, "grad_norm": 4.296661853790283, "learning_rate": 1.9838904850816455e-05, "loss": 2.1267, "step": 26352 }, { "epoch": 0.34, "grad_norm": 4.085175514221191, "learning_rate": 1.9838886064979874e-05, "loss": 2.1953, "step": 26353 }, { "epoch": 0.34, "grad_norm": 3.106856107711792, "learning_rate": 1.983886727805691e-05, "loss": 1.574, "step": 26354 }, { "epoch": 0.34, "grad_norm": 4.802020072937012, "learning_rate": 1.9838848490047573e-05, "loss": 2.7797, "step": 26355 }, { "epoch": 0.34, "grad_norm": 3.782221794128418, "learning_rate": 1.983882970095186e-05, "loss": 2.3071, "step": 26356 }, { "epoch": 0.34, "grad_norm": 4.058922290802002, "learning_rate": 1.983881091076977e-05, "loss": 2.2453, "step": 26357 }, { "epoch": 0.34, "grad_norm": 3.916465997695923, "learning_rate": 1.9838792119501308e-05, "loss": 1.8762, "step": 26358 }, { "epoch": 0.34, "grad_norm": 3.667597770690918, "learning_rate": 1.9838773327146483e-05, "loss": 1.4675, "step": 26359 }, { "epoch": 0.34, "grad_norm": 4.479726314544678, "learning_rate": 1.9838754533705284e-05, "loss": 2.1382, "step": 26360 }, { "epoch": 0.34, "grad_norm": 3.6701393127441406, "learning_rate": 1.983873573917772e-05, "loss": 2.1336, "step": 26361 }, { "epoch": 0.34, "grad_norm": 4.138761043548584, "learning_rate": 1.9838716943563794e-05, "loss": 2.0396, "step": 26362 }, { "epoch": 0.34, "grad_norm": 3.7201781272888184, "learning_rate": 1.9838698146863507e-05, "loss": 2.1306, "step": 26363 }, { "epoch": 0.34, "grad_norm": 3.0353121757507324, "learning_rate": 1.9838679349076863e-05, "loss": 1.4414, "step": 26364 }, { "epoch": 0.34, "grad_norm": 4.552013397216797, "learning_rate": 1.9838660550203855e-05, "loss": 2.6124, "step": 26365 }, { "epoch": 0.34, "grad_norm": 3.924833297729492, "learning_rate": 1.9838641750244497e-05, "loss": 2.1588, "step": 26366 }, { "epoch": 0.34, "grad_norm": 3.7354423999786377, "learning_rate": 1.9838622949198785e-05, "loss": 2.1171, "step": 26367 }, { "epoch": 0.34, "grad_norm": 4.959699630737305, "learning_rate": 1.9838604147066727e-05, "loss": 2.385, "step": 26368 }, { "epoch": 0.34, "grad_norm": 4.576727390289307, "learning_rate": 1.9838585343848314e-05, "loss": 1.893, "step": 26369 }, { "epoch": 0.34, "grad_norm": 3.4820377826690674, "learning_rate": 1.9838566539543555e-05, "loss": 1.5492, "step": 26370 }, { "epoch": 0.34, "grad_norm": 4.078396320343018, "learning_rate": 1.9838547734152453e-05, "loss": 2.1195, "step": 26371 }, { "epoch": 0.34, "grad_norm": 3.8558456897735596, "learning_rate": 1.9838528927675007e-05, "loss": 1.9517, "step": 26372 }, { "epoch": 0.34, "grad_norm": 3.664952039718628, "learning_rate": 1.9838510120111224e-05, "loss": 2.0249, "step": 26373 }, { "epoch": 0.34, "grad_norm": 4.180379390716553, "learning_rate": 1.9838491311461098e-05, "loss": 2.4228, "step": 26374 }, { "epoch": 0.34, "grad_norm": 3.225902795791626, "learning_rate": 1.983847250172464e-05, "loss": 1.4236, "step": 26375 }, { "epoch": 0.34, "grad_norm": 3.53959059715271, "learning_rate": 1.9838453690901846e-05, "loss": 2.0733, "step": 26376 }, { "epoch": 0.34, "grad_norm": 3.971900463104248, "learning_rate": 1.9838434878992716e-05, "loss": 1.8937, "step": 26377 }, { "epoch": 0.34, "grad_norm": 3.741262674331665, "learning_rate": 1.983841606599726e-05, "loss": 2.1493, "step": 26378 }, { "epoch": 0.34, "grad_norm": 3.901179552078247, "learning_rate": 1.9838397251915474e-05, "loss": 2.4142, "step": 26379 }, { "epoch": 0.34, "grad_norm": 3.3099582195281982, "learning_rate": 1.983837843674736e-05, "loss": 1.6202, "step": 26380 }, { "epoch": 0.34, "grad_norm": 3.6570825576782227, "learning_rate": 1.983835962049293e-05, "loss": 1.7933, "step": 26381 }, { "epoch": 0.34, "grad_norm": 3.353511333465576, "learning_rate": 1.983834080315217e-05, "loss": 1.7894, "step": 26382 }, { "epoch": 0.34, "grad_norm": 3.8671445846557617, "learning_rate": 1.9838321984725095e-05, "loss": 1.9228, "step": 26383 }, { "epoch": 0.34, "grad_norm": 4.05963134765625, "learning_rate": 1.98383031652117e-05, "loss": 1.6902, "step": 26384 }, { "epoch": 0.34, "grad_norm": 3.805283784866333, "learning_rate": 1.9838284344611993e-05, "loss": 1.9631, "step": 26385 }, { "epoch": 0.34, "grad_norm": 3.923926591873169, "learning_rate": 1.983826552292597e-05, "loss": 2.1617, "step": 26386 }, { "epoch": 0.34, "grad_norm": 3.734410524368286, "learning_rate": 1.9838246700153634e-05, "loss": 1.9713, "step": 26387 }, { "epoch": 0.34, "grad_norm": 3.6474802494049072, "learning_rate": 1.983822787629499e-05, "loss": 1.865, "step": 26388 }, { "epoch": 0.34, "grad_norm": 4.6180419921875, "learning_rate": 1.9838209051350038e-05, "loss": 2.2321, "step": 26389 }, { "epoch": 0.34, "grad_norm": 3.4517710208892822, "learning_rate": 1.9838190225318782e-05, "loss": 2.0954, "step": 26390 }, { "epoch": 0.34, "grad_norm": 4.050531387329102, "learning_rate": 1.9838171398201224e-05, "loss": 2.0885, "step": 26391 }, { "epoch": 0.34, "grad_norm": 3.597393274307251, "learning_rate": 1.9838152569997362e-05, "loss": 1.9089, "step": 26392 }, { "epoch": 0.34, "grad_norm": 4.17769718170166, "learning_rate": 1.9838133740707205e-05, "loss": 2.2789, "step": 26393 }, { "epoch": 0.34, "grad_norm": 3.917022466659546, "learning_rate": 1.9838114910330744e-05, "loss": 2.2206, "step": 26394 }, { "epoch": 0.34, "grad_norm": 4.414881706237793, "learning_rate": 1.9838096078867995e-05, "loss": 2.1384, "step": 26395 }, { "epoch": 0.34, "grad_norm": 4.930187702178955, "learning_rate": 1.9838077246318952e-05, "loss": 2.3913, "step": 26396 }, { "epoch": 0.34, "grad_norm": 4.641992568969727, "learning_rate": 1.9838058412683618e-05, "loss": 2.1223, "step": 26397 }, { "epoch": 0.34, "grad_norm": 4.232603073120117, "learning_rate": 1.9838039577961993e-05, "loss": 2.3198, "step": 26398 }, { "epoch": 0.34, "grad_norm": 3.7999351024627686, "learning_rate": 1.9838020742154083e-05, "loss": 2.0279, "step": 26399 }, { "epoch": 0.34, "grad_norm": 4.6148271560668945, "learning_rate": 1.983800190525989e-05, "loss": 2.5664, "step": 26400 }, { "epoch": 0.34, "grad_norm": 3.5734899044036865, "learning_rate": 1.9837983067279413e-05, "loss": 1.6187, "step": 26401 }, { "epoch": 0.34, "grad_norm": 3.641373872756958, "learning_rate": 1.9837964228212655e-05, "loss": 1.8948, "step": 26402 }, { "epoch": 0.34, "grad_norm": 3.6734044551849365, "learning_rate": 1.9837945388059622e-05, "loss": 1.9232, "step": 26403 }, { "epoch": 0.34, "grad_norm": 3.9440298080444336, "learning_rate": 1.9837926546820313e-05, "loss": 1.9716, "step": 26404 }, { "epoch": 0.34, "grad_norm": 4.253240585327148, "learning_rate": 1.983790770449473e-05, "loss": 1.8855, "step": 26405 }, { "epoch": 0.34, "grad_norm": 4.0013837814331055, "learning_rate": 1.983788886108287e-05, "loss": 2.3362, "step": 26406 }, { "epoch": 0.34, "grad_norm": 4.086711883544922, "learning_rate": 1.9837870016584742e-05, "loss": 2.0742, "step": 26407 }, { "epoch": 0.34, "grad_norm": 4.3558807373046875, "learning_rate": 1.9837851171000348e-05, "loss": 2.4444, "step": 26408 }, { "epoch": 0.34, "grad_norm": 3.883168935775757, "learning_rate": 1.9837832324329687e-05, "loss": 1.5811, "step": 26409 }, { "epoch": 0.34, "grad_norm": 3.5790581703186035, "learning_rate": 1.9837813476572765e-05, "loss": 1.6947, "step": 26410 }, { "epoch": 0.34, "grad_norm": 4.41916036605835, "learning_rate": 1.983779462772958e-05, "loss": 2.4848, "step": 26411 }, { "epoch": 0.34, "grad_norm": 3.693413734436035, "learning_rate": 1.9837775777800137e-05, "loss": 1.5868, "step": 26412 }, { "epoch": 0.34, "grad_norm": 3.6411426067352295, "learning_rate": 1.9837756926784435e-05, "loss": 1.7656, "step": 26413 }, { "epoch": 0.34, "grad_norm": 3.7413127422332764, "learning_rate": 1.9837738074682477e-05, "loss": 1.8504, "step": 26414 }, { "epoch": 0.34, "grad_norm": 3.9642276763916016, "learning_rate": 1.9837719221494267e-05, "loss": 1.8588, "step": 26415 }, { "epoch": 0.34, "grad_norm": 4.865520477294922, "learning_rate": 1.9837700367219806e-05, "loss": 1.8489, "step": 26416 }, { "epoch": 0.34, "grad_norm": 4.177759170532227, "learning_rate": 1.9837681511859096e-05, "loss": 2.0956, "step": 26417 }, { "epoch": 0.34, "grad_norm": 3.867250680923462, "learning_rate": 1.9837662655412137e-05, "loss": 2.14, "step": 26418 }, { "epoch": 0.34, "grad_norm": 4.022645950317383, "learning_rate": 1.9837643797878936e-05, "loss": 2.3183, "step": 26419 }, { "epoch": 0.34, "grad_norm": 4.19891357421875, "learning_rate": 1.983762493925949e-05, "loss": 2.0529, "step": 26420 }, { "epoch": 0.34, "grad_norm": 4.497743129730225, "learning_rate": 1.9837606079553806e-05, "loss": 1.8228, "step": 26421 }, { "epoch": 0.34, "grad_norm": 4.587368488311768, "learning_rate": 1.9837587218761884e-05, "loss": 2.5375, "step": 26422 }, { "epoch": 0.34, "grad_norm": 4.010683536529541, "learning_rate": 1.983756835688372e-05, "loss": 2.0232, "step": 26423 }, { "epoch": 0.34, "grad_norm": 3.7435028553009033, "learning_rate": 1.9837549493919326e-05, "loss": 2.0477, "step": 26424 }, { "epoch": 0.34, "grad_norm": 3.332798719406128, "learning_rate": 1.98375306298687e-05, "loss": 1.5133, "step": 26425 }, { "epoch": 0.34, "grad_norm": 3.446066379547119, "learning_rate": 1.983751176473184e-05, "loss": 1.7274, "step": 26426 }, { "epoch": 0.34, "grad_norm": 4.036458969116211, "learning_rate": 1.9837492898508755e-05, "loss": 1.9038, "step": 26427 }, { "epoch": 0.34, "grad_norm": 3.2180662155151367, "learning_rate": 1.9837474031199442e-05, "loss": 1.6067, "step": 26428 }, { "epoch": 0.34, "grad_norm": 3.803823709487915, "learning_rate": 1.9837455162803905e-05, "loss": 2.0473, "step": 26429 }, { "epoch": 0.34, "grad_norm": 3.8324954509735107, "learning_rate": 1.983743629332215e-05, "loss": 2.0671, "step": 26430 }, { "epoch": 0.34, "grad_norm": 3.9165725708007812, "learning_rate": 1.9837417422754172e-05, "loss": 2.0543, "step": 26431 }, { "epoch": 0.34, "grad_norm": 4.434915065765381, "learning_rate": 1.9837398551099977e-05, "loss": 2.3539, "step": 26432 }, { "epoch": 0.34, "grad_norm": 3.587310314178467, "learning_rate": 1.9837379678359563e-05, "loss": 1.7306, "step": 26433 }, { "epoch": 0.34, "grad_norm": 4.192529201507568, "learning_rate": 1.983736080453294e-05, "loss": 2.0333, "step": 26434 }, { "epoch": 0.34, "grad_norm": 3.551342725753784, "learning_rate": 1.9837341929620102e-05, "loss": 1.6662, "step": 26435 }, { "epoch": 0.34, "grad_norm": 4.079762935638428, "learning_rate": 1.9837323053621055e-05, "loss": 1.9163, "step": 26436 }, { "epoch": 0.34, "grad_norm": 4.485111713409424, "learning_rate": 1.9837304176535803e-05, "loss": 2.6659, "step": 26437 }, { "epoch": 0.34, "grad_norm": 3.375908613204956, "learning_rate": 1.9837285298364343e-05, "loss": 1.8964, "step": 26438 }, { "epoch": 0.34, "grad_norm": 3.8515384197235107, "learning_rate": 1.9837266419106682e-05, "loss": 2.2269, "step": 26439 }, { "epoch": 0.34, "grad_norm": 4.205151557922363, "learning_rate": 1.983724753876282e-05, "loss": 2.0321, "step": 26440 }, { "epoch": 0.34, "grad_norm": 3.485846757888794, "learning_rate": 1.9837228657332757e-05, "loss": 1.8327, "step": 26441 }, { "epoch": 0.34, "grad_norm": 3.890244722366333, "learning_rate": 1.98372097748165e-05, "loss": 1.8594, "step": 26442 }, { "epoch": 0.34, "grad_norm": 3.603627920150757, "learning_rate": 1.9837190891214043e-05, "loss": 1.7712, "step": 26443 }, { "epoch": 0.34, "grad_norm": 4.064785003662109, "learning_rate": 1.9837172006525397e-05, "loss": 2.3413, "step": 26444 }, { "epoch": 0.34, "grad_norm": 3.713437080383301, "learning_rate": 1.983715312075056e-05, "loss": 1.8188, "step": 26445 }, { "epoch": 0.34, "grad_norm": 4.065375804901123, "learning_rate": 1.9837134233889534e-05, "loss": 1.814, "step": 26446 }, { "epoch": 0.34, "grad_norm": 4.716038227081299, "learning_rate": 1.983711534594232e-05, "loss": 2.231, "step": 26447 }, { "epoch": 0.34, "grad_norm": 4.57110595703125, "learning_rate": 1.9837096456908927e-05, "loss": 2.4264, "step": 26448 }, { "epoch": 0.34, "grad_norm": 3.6509830951690674, "learning_rate": 1.9837077566789346e-05, "loss": 1.7397, "step": 26449 }, { "epoch": 0.34, "grad_norm": 4.167213439941406, "learning_rate": 1.9837058675583587e-05, "loss": 2.0326, "step": 26450 }, { "epoch": 0.34, "grad_norm": 3.7574410438537598, "learning_rate": 1.9837039783291648e-05, "loss": 2.2463, "step": 26451 }, { "epoch": 0.34, "grad_norm": 3.807878255844116, "learning_rate": 1.9837020889913538e-05, "loss": 1.9286, "step": 26452 }, { "epoch": 0.34, "grad_norm": 4.643551349639893, "learning_rate": 1.9837001995449246e-05, "loss": 2.0368, "step": 26453 }, { "epoch": 0.34, "grad_norm": 3.4750828742980957, "learning_rate": 1.9836983099898788e-05, "loss": 2.0084, "step": 26454 }, { "epoch": 0.34, "grad_norm": 4.22622013092041, "learning_rate": 1.983696420326216e-05, "loss": 2.2083, "step": 26455 }, { "epoch": 0.34, "grad_norm": 3.682873487472534, "learning_rate": 1.9836945305539363e-05, "loss": 1.7467, "step": 26456 }, { "epoch": 0.34, "grad_norm": 4.305452346801758, "learning_rate": 1.9836926406730402e-05, "loss": 2.4177, "step": 26457 }, { "epoch": 0.34, "grad_norm": 3.7972335815429688, "learning_rate": 1.983690750683527e-05, "loss": 1.9418, "step": 26458 }, { "epoch": 0.34, "grad_norm": 3.6964166164398193, "learning_rate": 1.9836888605853987e-05, "loss": 1.8649, "step": 26459 }, { "epoch": 0.34, "grad_norm": 4.225133895874023, "learning_rate": 1.983686970378654e-05, "loss": 2.4835, "step": 26460 }, { "epoch": 0.34, "grad_norm": 3.7759485244750977, "learning_rate": 1.9836850800632933e-05, "loss": 2.1528, "step": 26461 }, { "epoch": 0.34, "grad_norm": 3.8861985206604004, "learning_rate": 1.9836831896393173e-05, "loss": 2.159, "step": 26462 }, { "epoch": 0.34, "grad_norm": 4.233482360839844, "learning_rate": 1.9836812991067263e-05, "loss": 2.1351, "step": 26463 }, { "epoch": 0.34, "grad_norm": 4.070346355438232, "learning_rate": 1.9836794084655197e-05, "loss": 1.9534, "step": 26464 }, { "epoch": 0.34, "grad_norm": 4.265103340148926, "learning_rate": 1.9836775177156986e-05, "loss": 2.412, "step": 26465 }, { "epoch": 0.34, "grad_norm": 3.5119965076446533, "learning_rate": 1.9836756268572625e-05, "loss": 1.7411, "step": 26466 }, { "epoch": 0.34, "grad_norm": 3.8926141262054443, "learning_rate": 1.983673735890212e-05, "loss": 1.9182, "step": 26467 }, { "epoch": 0.34, "grad_norm": 4.113955974578857, "learning_rate": 1.9836718448145476e-05, "loss": 2.3031, "step": 26468 }, { "epoch": 0.34, "grad_norm": 3.85980486869812, "learning_rate": 1.9836699536302685e-05, "loss": 2.1082, "step": 26469 }, { "epoch": 0.34, "grad_norm": 3.7670671939849854, "learning_rate": 1.983668062337376e-05, "loss": 1.8369, "step": 26470 }, { "epoch": 0.34, "grad_norm": 4.01381778717041, "learning_rate": 1.9836661709358698e-05, "loss": 2.0311, "step": 26471 }, { "epoch": 0.34, "grad_norm": 3.729098320007324, "learning_rate": 1.98366427942575e-05, "loss": 1.7125, "step": 26472 }, { "epoch": 0.34, "grad_norm": 4.130268096923828, "learning_rate": 1.983662387807017e-05, "loss": 2.2607, "step": 26473 }, { "epoch": 0.34, "grad_norm": 3.2141835689544678, "learning_rate": 1.9836604960796713e-05, "loss": 1.5832, "step": 26474 }, { "epoch": 0.34, "grad_norm": 4.713789939880371, "learning_rate": 1.9836586042437127e-05, "loss": 2.1007, "step": 26475 }, { "epoch": 0.34, "grad_norm": 3.8974151611328125, "learning_rate": 1.9836567122991412e-05, "loss": 2.2147, "step": 26476 }, { "epoch": 0.34, "grad_norm": 4.138011932373047, "learning_rate": 1.9836548202459577e-05, "loss": 2.4282, "step": 26477 }, { "epoch": 0.34, "grad_norm": 3.697814702987671, "learning_rate": 1.9836529280841615e-05, "loss": 1.9129, "step": 26478 }, { "epoch": 0.34, "grad_norm": 4.083367824554443, "learning_rate": 1.9836510358137537e-05, "loss": 2.0564, "step": 26479 }, { "epoch": 0.34, "grad_norm": 3.785818576812744, "learning_rate": 1.983649143434734e-05, "loss": 2.2766, "step": 26480 }, { "epoch": 0.34, "grad_norm": 4.0550336837768555, "learning_rate": 1.9836472509471028e-05, "loss": 2.0715, "step": 26481 }, { "epoch": 0.34, "grad_norm": 4.2104172706604, "learning_rate": 1.9836453583508604e-05, "loss": 2.4132, "step": 26482 }, { "epoch": 0.34, "grad_norm": 4.4894280433654785, "learning_rate": 1.9836434656460067e-05, "loss": 2.2457, "step": 26483 }, { "epoch": 0.34, "grad_norm": 4.201344966888428, "learning_rate": 1.983641572832542e-05, "loss": 2.4972, "step": 26484 }, { "epoch": 0.34, "grad_norm": 3.6590559482574463, "learning_rate": 1.983639679910467e-05, "loss": 1.9059, "step": 26485 }, { "epoch": 0.34, "grad_norm": 3.917788028717041, "learning_rate": 1.983637786879781e-05, "loss": 2.11, "step": 26486 }, { "epoch": 0.34, "grad_norm": 3.925415277481079, "learning_rate": 1.9836358937404848e-05, "loss": 1.847, "step": 26487 }, { "epoch": 0.34, "grad_norm": 3.966715097427368, "learning_rate": 1.9836340004925786e-05, "loss": 2.1566, "step": 26488 }, { "epoch": 0.34, "grad_norm": 3.138974905014038, "learning_rate": 1.9836321071360624e-05, "loss": 1.6078, "step": 26489 }, { "epoch": 0.34, "grad_norm": 4.5785346031188965, "learning_rate": 1.9836302136709367e-05, "loss": 2.7125, "step": 26490 }, { "epoch": 0.34, "grad_norm": 4.101443290710449, "learning_rate": 1.9836283200972016e-05, "loss": 2.0669, "step": 26491 }, { "epoch": 0.34, "grad_norm": 4.021734237670898, "learning_rate": 1.983626426414857e-05, "loss": 2.2855, "step": 26492 }, { "epoch": 0.34, "grad_norm": 3.866065263748169, "learning_rate": 1.9836245326239037e-05, "loss": 2.0198, "step": 26493 }, { "epoch": 0.34, "grad_norm": 4.267795085906982, "learning_rate": 1.9836226387243412e-05, "loss": 2.1612, "step": 26494 }, { "epoch": 0.34, "grad_norm": 4.129580020904541, "learning_rate": 1.98362074471617e-05, "loss": 2.3011, "step": 26495 }, { "epoch": 0.34, "grad_norm": 3.6989622116088867, "learning_rate": 1.9836188505993907e-05, "loss": 1.9024, "step": 26496 }, { "epoch": 0.34, "grad_norm": 4.019682884216309, "learning_rate": 1.983616956374003e-05, "loss": 1.9395, "step": 26497 }, { "epoch": 0.34, "grad_norm": 3.5081348419189453, "learning_rate": 1.9836150620400075e-05, "loss": 1.858, "step": 26498 }, { "epoch": 0.34, "grad_norm": 3.84968638420105, "learning_rate": 1.9836131675974043e-05, "loss": 1.9716, "step": 26499 }, { "epoch": 0.34, "grad_norm": 4.026146411895752, "learning_rate": 1.9836112730461933e-05, "loss": 1.934, "step": 26500 }, { "epoch": 0.34, "grad_norm": 4.348301887512207, "learning_rate": 1.9836093783863746e-05, "loss": 2.2793, "step": 26501 }, { "epoch": 0.34, "grad_norm": 3.769392490386963, "learning_rate": 1.983607483617949e-05, "loss": 1.8507, "step": 26502 }, { "epoch": 0.34, "grad_norm": 4.271664619445801, "learning_rate": 1.9836055887409166e-05, "loss": 2.3186, "step": 26503 }, { "epoch": 0.34, "grad_norm": 3.9220621585845947, "learning_rate": 1.9836036937552773e-05, "loss": 1.8314, "step": 26504 }, { "epoch": 0.34, "grad_norm": 4.2109761238098145, "learning_rate": 1.9836017986610317e-05, "loss": 2.2093, "step": 26505 }, { "epoch": 0.34, "grad_norm": 3.743793487548828, "learning_rate": 1.9835999034581796e-05, "loss": 1.6445, "step": 26506 }, { "epoch": 0.34, "grad_norm": 3.9757895469665527, "learning_rate": 1.9835980081467215e-05, "loss": 1.8719, "step": 26507 }, { "epoch": 0.34, "grad_norm": 3.418382406234741, "learning_rate": 1.9835961127266573e-05, "loss": 1.8475, "step": 26508 }, { "epoch": 0.34, "grad_norm": 3.9684507846832275, "learning_rate": 1.9835942171979875e-05, "loss": 2.1327, "step": 26509 }, { "epoch": 0.34, "grad_norm": 3.7655627727508545, "learning_rate": 1.983592321560712e-05, "loss": 1.6006, "step": 26510 }, { "epoch": 0.34, "grad_norm": 4.1619791984558105, "learning_rate": 1.983590425814832e-05, "loss": 1.9981, "step": 26511 }, { "epoch": 0.34, "grad_norm": 4.119438648223877, "learning_rate": 1.983588529960346e-05, "loss": 2.0042, "step": 26512 }, { "epoch": 0.34, "grad_norm": 3.3441035747528076, "learning_rate": 1.9835866339972555e-05, "loss": 2.0339, "step": 26513 }, { "epoch": 0.34, "grad_norm": 3.6386895179748535, "learning_rate": 1.9835847379255603e-05, "loss": 1.9931, "step": 26514 }, { "epoch": 0.34, "grad_norm": 3.3822100162506104, "learning_rate": 1.983582841745261e-05, "loss": 1.529, "step": 26515 }, { "epoch": 0.34, "grad_norm": 3.5346972942352295, "learning_rate": 1.983580945456357e-05, "loss": 1.8896, "step": 26516 }, { "epoch": 0.34, "grad_norm": 3.8266971111297607, "learning_rate": 1.983579049058849e-05, "loss": 1.9477, "step": 26517 }, { "epoch": 0.34, "grad_norm": 3.170480966567993, "learning_rate": 1.9835771525527377e-05, "loss": 1.5494, "step": 26518 }, { "epoch": 0.34, "grad_norm": 3.430607557296753, "learning_rate": 1.9835752559380222e-05, "loss": 1.914, "step": 26519 }, { "epoch": 0.34, "grad_norm": 3.667746067047119, "learning_rate": 1.9835733592147036e-05, "loss": 1.6655, "step": 26520 }, { "epoch": 0.34, "grad_norm": 3.895723581314087, "learning_rate": 1.9835714623827814e-05, "loss": 2.0607, "step": 26521 }, { "epoch": 0.34, "grad_norm": 3.4652469158172607, "learning_rate": 1.9835695654422568e-05, "loss": 1.868, "step": 26522 }, { "epoch": 0.34, "grad_norm": 4.405390739440918, "learning_rate": 1.9835676683931293e-05, "loss": 2.3838, "step": 26523 }, { "epoch": 0.34, "grad_norm": 3.365515947341919, "learning_rate": 1.9835657712353993e-05, "loss": 1.5521, "step": 26524 }, { "epoch": 0.34, "grad_norm": 4.449245452880859, "learning_rate": 1.983563873969067e-05, "loss": 2.0671, "step": 26525 }, { "epoch": 0.34, "grad_norm": 3.865370035171509, "learning_rate": 1.983561976594132e-05, "loss": 1.7888, "step": 26526 }, { "epoch": 0.34, "grad_norm": 4.524409770965576, "learning_rate": 1.9835600791105954e-05, "loss": 2.1243, "step": 26527 }, { "epoch": 0.34, "grad_norm": 4.001152992248535, "learning_rate": 1.983558181518457e-05, "loss": 1.901, "step": 26528 }, { "epoch": 0.34, "grad_norm": 4.261551856994629, "learning_rate": 1.9835562838177173e-05, "loss": 2.0619, "step": 26529 }, { "epoch": 0.34, "grad_norm": 3.594475269317627, "learning_rate": 1.9835543860083764e-05, "loss": 1.5923, "step": 26530 }, { "epoch": 0.34, "grad_norm": 4.155481815338135, "learning_rate": 1.983552488090434e-05, "loss": 2.2793, "step": 26531 }, { "epoch": 0.34, "grad_norm": 3.7081692218780518, "learning_rate": 1.983550590063891e-05, "loss": 1.8669, "step": 26532 }, { "epoch": 0.34, "grad_norm": 3.714566707611084, "learning_rate": 1.9835486919287472e-05, "loss": 2.1146, "step": 26533 }, { "epoch": 0.34, "grad_norm": 4.066374778747559, "learning_rate": 1.9835467936850034e-05, "loss": 2.1928, "step": 26534 }, { "epoch": 0.34, "grad_norm": 3.352907180786133, "learning_rate": 1.9835448953326584e-05, "loss": 1.6873, "step": 26535 }, { "epoch": 0.34, "grad_norm": 3.6857223510742188, "learning_rate": 1.983542996871714e-05, "loss": 1.8245, "step": 26536 }, { "epoch": 0.34, "grad_norm": 3.35654354095459, "learning_rate": 1.98354109830217e-05, "loss": 1.73, "step": 26537 }, { "epoch": 0.34, "grad_norm": 3.9043214321136475, "learning_rate": 1.983539199624026e-05, "loss": 1.7067, "step": 26538 }, { "epoch": 0.34, "grad_norm": 4.176903247833252, "learning_rate": 1.9835373008372825e-05, "loss": 1.8672, "step": 26539 }, { "epoch": 0.34, "grad_norm": 3.5063087940216064, "learning_rate": 1.98353540194194e-05, "loss": 1.5237, "step": 26540 }, { "epoch": 0.34, "grad_norm": 4.002431869506836, "learning_rate": 1.9835335029379984e-05, "loss": 2.017, "step": 26541 }, { "epoch": 0.34, "grad_norm": 4.166479587554932, "learning_rate": 1.983531603825458e-05, "loss": 2.3346, "step": 26542 }, { "epoch": 0.34, "grad_norm": 5.040102005004883, "learning_rate": 1.983529704604319e-05, "loss": 2.1611, "step": 26543 }, { "epoch": 0.34, "grad_norm": 3.4573922157287598, "learning_rate": 1.983527805274582e-05, "loss": 1.5614, "step": 26544 }, { "epoch": 0.34, "grad_norm": 4.304983139038086, "learning_rate": 1.9835259058362465e-05, "loss": 1.9044, "step": 26545 }, { "epoch": 0.34, "grad_norm": 4.495212078094482, "learning_rate": 1.983524006289313e-05, "loss": 2.5289, "step": 26546 }, { "epoch": 0.34, "grad_norm": 3.630309820175171, "learning_rate": 1.9835221066337818e-05, "loss": 1.6655, "step": 26547 }, { "epoch": 0.34, "grad_norm": 4.055059432983398, "learning_rate": 1.9835202068696532e-05, "loss": 2.2648, "step": 26548 }, { "epoch": 0.34, "grad_norm": 3.8763983249664307, "learning_rate": 1.983518306996927e-05, "loss": 1.8728, "step": 26549 }, { "epoch": 0.34, "grad_norm": 3.7891716957092285, "learning_rate": 1.9835164070156044e-05, "loss": 2.0443, "step": 26550 }, { "epoch": 0.34, "grad_norm": 4.894175052642822, "learning_rate": 1.983514506925684e-05, "loss": 2.6374, "step": 26551 }, { "epoch": 0.34, "grad_norm": 4.077974796295166, "learning_rate": 1.9835126067271675e-05, "loss": 2.105, "step": 26552 }, { "epoch": 0.34, "grad_norm": 4.180267810821533, "learning_rate": 1.9835107064200545e-05, "loss": 2.1894, "step": 26553 }, { "epoch": 0.34, "grad_norm": 3.517228364944458, "learning_rate": 1.983508806004345e-05, "loss": 1.5759, "step": 26554 }, { "epoch": 0.34, "grad_norm": 4.306237697601318, "learning_rate": 1.9835069054800397e-05, "loss": 2.4196, "step": 26555 }, { "epoch": 0.34, "grad_norm": 3.8888258934020996, "learning_rate": 1.9835050048471385e-05, "loss": 2.0766, "step": 26556 }, { "epoch": 0.34, "grad_norm": 4.112359046936035, "learning_rate": 1.9835031041056412e-05, "loss": 1.9828, "step": 26557 }, { "epoch": 0.34, "grad_norm": 4.2386932373046875, "learning_rate": 1.983501203255549e-05, "loss": 2.3146, "step": 26558 }, { "epoch": 0.34, "grad_norm": 4.01193904876709, "learning_rate": 1.9834993022968616e-05, "loss": 2.1184, "step": 26559 }, { "epoch": 0.34, "grad_norm": 3.743990659713745, "learning_rate": 1.983497401229579e-05, "loss": 1.778, "step": 26560 }, { "epoch": 0.34, "grad_norm": 4.096269607543945, "learning_rate": 1.9834955000537015e-05, "loss": 2.3205, "step": 26561 }, { "epoch": 0.34, "grad_norm": 3.7926855087280273, "learning_rate": 1.9834935987692297e-05, "loss": 2.314, "step": 26562 }, { "epoch": 0.34, "grad_norm": 4.446581840515137, "learning_rate": 1.9834916973761632e-05, "loss": 2.3663, "step": 26563 }, { "epoch": 0.34, "grad_norm": 3.764784097671509, "learning_rate": 1.9834897958745026e-05, "loss": 2.0302, "step": 26564 }, { "epoch": 0.34, "grad_norm": 3.5433878898620605, "learning_rate": 1.983487894264248e-05, "loss": 1.6968, "step": 26565 }, { "epoch": 0.34, "grad_norm": 3.835096836090088, "learning_rate": 1.9834859925454e-05, "loss": 2.0754, "step": 26566 }, { "epoch": 0.34, "grad_norm": 3.5082104206085205, "learning_rate": 1.983484090717958e-05, "loss": 1.5653, "step": 26567 }, { "epoch": 0.34, "grad_norm": 3.8962855339050293, "learning_rate": 1.983482188781923e-05, "loss": 2.0416, "step": 26568 }, { "epoch": 0.34, "grad_norm": 3.6993730068206787, "learning_rate": 1.9834802867372947e-05, "loss": 1.6361, "step": 26569 }, { "epoch": 0.34, "grad_norm": 3.730726957321167, "learning_rate": 1.9834783845840734e-05, "loss": 1.8016, "step": 26570 }, { "epoch": 0.34, "grad_norm": 3.6808929443359375, "learning_rate": 1.9834764823222597e-05, "loss": 2.1591, "step": 26571 }, { "epoch": 0.34, "grad_norm": 4.366365909576416, "learning_rate": 1.9834745799518534e-05, "loss": 2.3397, "step": 26572 }, { "epoch": 0.34, "grad_norm": 3.679792881011963, "learning_rate": 1.9834726774728547e-05, "loss": 1.6662, "step": 26573 }, { "epoch": 0.34, "grad_norm": 4.620400428771973, "learning_rate": 1.983470774885264e-05, "loss": 2.4313, "step": 26574 }, { "epoch": 0.34, "grad_norm": 3.827052116394043, "learning_rate": 1.9834688721890818e-05, "loss": 2.3118, "step": 26575 }, { "epoch": 0.34, "grad_norm": 3.8390307426452637, "learning_rate": 1.9834669693843075e-05, "loss": 2.1759, "step": 26576 }, { "epoch": 0.34, "grad_norm": 3.3406660556793213, "learning_rate": 1.9834650664709422e-05, "loss": 1.7372, "step": 26577 }, { "epoch": 0.34, "grad_norm": 4.351102352142334, "learning_rate": 1.9834631634489853e-05, "loss": 2.035, "step": 26578 }, { "epoch": 0.34, "grad_norm": 4.318978786468506, "learning_rate": 1.9834612603184374e-05, "loss": 2.288, "step": 26579 }, { "epoch": 0.34, "grad_norm": 4.159379482269287, "learning_rate": 1.9834593570792985e-05, "loss": 2.111, "step": 26580 }, { "epoch": 0.34, "grad_norm": 4.176924228668213, "learning_rate": 1.9834574537315693e-05, "loss": 2.2839, "step": 26581 }, { "epoch": 0.34, "grad_norm": 3.6906702518463135, "learning_rate": 1.9834555502752498e-05, "loss": 1.6681, "step": 26582 }, { "epoch": 0.34, "grad_norm": 3.7236931324005127, "learning_rate": 1.9834536467103396e-05, "loss": 2.1234, "step": 26583 }, { "epoch": 0.35, "grad_norm": 3.4141898155212402, "learning_rate": 1.98345174303684e-05, "loss": 1.643, "step": 26584 }, { "epoch": 0.35, "grad_norm": 4.324052810668945, "learning_rate": 1.9834498392547505e-05, "loss": 2.5576, "step": 26585 }, { "epoch": 0.35, "grad_norm": 4.074825763702393, "learning_rate": 1.983447935364071e-05, "loss": 1.7449, "step": 26586 }, { "epoch": 0.35, "grad_norm": 3.885099172592163, "learning_rate": 1.983446031364803e-05, "loss": 2.007, "step": 26587 }, { "epoch": 0.35, "grad_norm": 3.90810227394104, "learning_rate": 1.9834441272569453e-05, "loss": 1.9386, "step": 26588 }, { "epoch": 0.35, "grad_norm": 3.2561795711517334, "learning_rate": 1.9834422230404987e-05, "loss": 1.5512, "step": 26589 }, { "epoch": 0.35, "grad_norm": 4.022467136383057, "learning_rate": 1.983440318715464e-05, "loss": 1.963, "step": 26590 }, { "epoch": 0.35, "grad_norm": 4.321819305419922, "learning_rate": 1.9834384142818402e-05, "loss": 2.2819, "step": 26591 }, { "epoch": 0.35, "grad_norm": 3.615224599838257, "learning_rate": 1.9834365097396282e-05, "loss": 1.8658, "step": 26592 }, { "epoch": 0.35, "grad_norm": 3.104854106903076, "learning_rate": 1.983434605088828e-05, "loss": 1.4353, "step": 26593 }, { "epoch": 0.35, "grad_norm": 4.5574517250061035, "learning_rate": 1.98343270032944e-05, "loss": 2.4384, "step": 26594 }, { "epoch": 0.35, "grad_norm": 3.554670572280884, "learning_rate": 1.9834307954614646e-05, "loss": 1.5882, "step": 26595 }, { "epoch": 0.35, "grad_norm": 3.656517267227173, "learning_rate": 1.9834288904849015e-05, "loss": 1.7663, "step": 26596 }, { "epoch": 0.35, "grad_norm": 4.860119342803955, "learning_rate": 1.9834269853997516e-05, "loss": 2.4945, "step": 26597 }, { "epoch": 0.35, "grad_norm": 3.9428634643554688, "learning_rate": 1.983425080206014e-05, "loss": 1.895, "step": 26598 }, { "epoch": 0.35, "grad_norm": 4.744366645812988, "learning_rate": 1.9834231749036902e-05, "loss": 2.9872, "step": 26599 }, { "epoch": 0.35, "grad_norm": 3.8920984268188477, "learning_rate": 1.9834212694927792e-05, "loss": 1.7903, "step": 26600 }, { "epoch": 0.35, "grad_norm": 4.218562602996826, "learning_rate": 1.9834193639732823e-05, "loss": 2.2624, "step": 26601 }, { "epoch": 0.35, "grad_norm": 3.538504123687744, "learning_rate": 1.983417458345199e-05, "loss": 1.8033, "step": 26602 }, { "epoch": 0.35, "grad_norm": 3.129870891571045, "learning_rate": 1.9834155526085297e-05, "loss": 1.5931, "step": 26603 }, { "epoch": 0.35, "grad_norm": 4.5869245529174805, "learning_rate": 1.9834136467632747e-05, "loss": 2.5248, "step": 26604 }, { "epoch": 0.35, "grad_norm": 3.852799892425537, "learning_rate": 1.9834117408094342e-05, "loss": 2.049, "step": 26605 }, { "epoch": 0.35, "grad_norm": 3.0105130672454834, "learning_rate": 1.9834098347470084e-05, "loss": 1.4426, "step": 26606 }, { "epoch": 0.35, "grad_norm": 3.7180750370025635, "learning_rate": 1.983407928575997e-05, "loss": 2.0488, "step": 26607 }, { "epoch": 0.35, "grad_norm": 4.48767614364624, "learning_rate": 1.9834060222964013e-05, "loss": 2.4079, "step": 26608 }, { "epoch": 0.35, "grad_norm": 3.540207862854004, "learning_rate": 1.9834041159082205e-05, "loss": 1.6466, "step": 26609 }, { "epoch": 0.35, "grad_norm": 4.271398544311523, "learning_rate": 1.9834022094114556e-05, "loss": 2.0222, "step": 26610 }, { "epoch": 0.35, "grad_norm": 3.7296926975250244, "learning_rate": 1.983400302806106e-05, "loss": 1.9785, "step": 26611 }, { "epoch": 0.35, "grad_norm": 4.124882221221924, "learning_rate": 1.9833983960921728e-05, "loss": 2.4188, "step": 26612 }, { "epoch": 0.35, "grad_norm": 3.8551433086395264, "learning_rate": 1.9833964892696556e-05, "loss": 2.1081, "step": 26613 }, { "epoch": 0.35, "grad_norm": 3.9496121406555176, "learning_rate": 1.9833945823385546e-05, "loss": 1.8183, "step": 26614 }, { "epoch": 0.35, "grad_norm": 3.859713077545166, "learning_rate": 1.98339267529887e-05, "loss": 1.9468, "step": 26615 }, { "epoch": 0.35, "grad_norm": 4.167324542999268, "learning_rate": 1.9833907681506025e-05, "loss": 2.2444, "step": 26616 }, { "epoch": 0.35, "grad_norm": 4.250953674316406, "learning_rate": 1.9833888608937516e-05, "loss": 1.9333, "step": 26617 }, { "epoch": 0.35, "grad_norm": 4.428985595703125, "learning_rate": 1.983386953528318e-05, "loss": 2.2267, "step": 26618 }, { "epoch": 0.35, "grad_norm": 3.9853694438934326, "learning_rate": 1.9833850460543018e-05, "loss": 2.027, "step": 26619 }, { "epoch": 0.35, "grad_norm": 4.21069860458374, "learning_rate": 1.9833831384717036e-05, "loss": 2.4597, "step": 26620 }, { "epoch": 0.35, "grad_norm": 4.192768573760986, "learning_rate": 1.9833812307805226e-05, "loss": 1.6915, "step": 26621 }, { "epoch": 0.35, "grad_norm": 3.592050790786743, "learning_rate": 1.9833793229807598e-05, "loss": 1.6189, "step": 26622 }, { "epoch": 0.35, "grad_norm": 3.8704378604888916, "learning_rate": 1.9833774150724155e-05, "loss": 1.9261, "step": 26623 }, { "epoch": 0.35, "grad_norm": 4.292246341705322, "learning_rate": 1.9833755070554893e-05, "loss": 2.4987, "step": 26624 }, { "epoch": 0.35, "grad_norm": 3.831707239151001, "learning_rate": 1.983373598929982e-05, "loss": 1.8367, "step": 26625 }, { "epoch": 0.35, "grad_norm": 4.490051746368408, "learning_rate": 1.9833716906958936e-05, "loss": 2.3881, "step": 26626 }, { "epoch": 0.35, "grad_norm": 3.5294573307037354, "learning_rate": 1.9833697823532243e-05, "loss": 2.0151, "step": 26627 }, { "epoch": 0.35, "grad_norm": 4.100408554077148, "learning_rate": 1.9833678739019742e-05, "loss": 2.0085, "step": 26628 }, { "epoch": 0.35, "grad_norm": 4.877793788909912, "learning_rate": 1.9833659653421437e-05, "loss": 2.3935, "step": 26629 }, { "epoch": 0.35, "grad_norm": 3.4350693225860596, "learning_rate": 1.983364056673733e-05, "loss": 1.89, "step": 26630 }, { "epoch": 0.35, "grad_norm": 3.892139434814453, "learning_rate": 1.9833621478967418e-05, "loss": 2.1, "step": 26631 }, { "epoch": 0.35, "grad_norm": 3.8775601387023926, "learning_rate": 1.983360239011171e-05, "loss": 2.115, "step": 26632 }, { "epoch": 0.35, "grad_norm": 3.982694149017334, "learning_rate": 1.9833583300170204e-05, "loss": 1.823, "step": 26633 }, { "epoch": 0.35, "grad_norm": 3.9024949073791504, "learning_rate": 1.9833564209142908e-05, "loss": 1.7786, "step": 26634 }, { "epoch": 0.35, "grad_norm": 3.434370756149292, "learning_rate": 1.9833545117029815e-05, "loss": 1.6272, "step": 26635 }, { "epoch": 0.35, "grad_norm": 4.3172407150268555, "learning_rate": 1.9833526023830933e-05, "loss": 2.1847, "step": 26636 }, { "epoch": 0.35, "grad_norm": 3.5610063076019287, "learning_rate": 1.9833506929546264e-05, "loss": 1.5683, "step": 26637 }, { "epoch": 0.35, "grad_norm": 3.562033176422119, "learning_rate": 1.9833487834175807e-05, "loss": 2.0596, "step": 26638 }, { "epoch": 0.35, "grad_norm": 3.931488513946533, "learning_rate": 1.983346873771957e-05, "loss": 1.9463, "step": 26639 }, { "epoch": 0.35, "grad_norm": 3.7632479667663574, "learning_rate": 1.983344964017755e-05, "loss": 1.8026, "step": 26640 }, { "epoch": 0.35, "grad_norm": 4.274645805358887, "learning_rate": 1.9833430541549747e-05, "loss": 2.1048, "step": 26641 }, { "epoch": 0.35, "grad_norm": 4.2426557540893555, "learning_rate": 1.983341144183617e-05, "loss": 2.3412, "step": 26642 }, { "epoch": 0.35, "grad_norm": 4.2018938064575195, "learning_rate": 1.9833392341036814e-05, "loss": 2.227, "step": 26643 }, { "epoch": 0.35, "grad_norm": 4.452885627746582, "learning_rate": 1.9833373239151687e-05, "loss": 2.1412, "step": 26644 }, { "epoch": 0.35, "grad_norm": 3.392749071121216, "learning_rate": 1.983335413618079e-05, "loss": 1.3748, "step": 26645 }, { "epoch": 0.35, "grad_norm": 3.8570539951324463, "learning_rate": 1.9833335032124122e-05, "loss": 1.75, "step": 26646 }, { "epoch": 0.35, "grad_norm": 3.8526546955108643, "learning_rate": 1.9833315926981686e-05, "loss": 2.2417, "step": 26647 }, { "epoch": 0.35, "grad_norm": 3.3964269161224365, "learning_rate": 1.9833296820753487e-05, "loss": 1.6423, "step": 26648 }, { "epoch": 0.35, "grad_norm": 4.01478385925293, "learning_rate": 1.9833277713439527e-05, "loss": 2.1427, "step": 26649 }, { "epoch": 0.35, "grad_norm": 3.9573047161102295, "learning_rate": 1.98332586050398e-05, "loss": 1.987, "step": 26650 }, { "epoch": 0.35, "grad_norm": 4.611915111541748, "learning_rate": 1.983323949555432e-05, "loss": 2.7891, "step": 26651 }, { "epoch": 0.35, "grad_norm": 4.413525581359863, "learning_rate": 1.983322038498308e-05, "loss": 2.3653, "step": 26652 }, { "epoch": 0.35, "grad_norm": 3.9474236965179443, "learning_rate": 1.983320127332609e-05, "loss": 2.2307, "step": 26653 }, { "epoch": 0.35, "grad_norm": 3.599668264389038, "learning_rate": 1.9833182160583345e-05, "loss": 1.8998, "step": 26654 }, { "epoch": 0.35, "grad_norm": 4.164652347564697, "learning_rate": 1.9833163046754847e-05, "loss": 2.2875, "step": 26655 }, { "epoch": 0.35, "grad_norm": 4.482100486755371, "learning_rate": 1.9833143931840606e-05, "loss": 2.126, "step": 26656 }, { "epoch": 0.35, "grad_norm": 3.6234757900238037, "learning_rate": 1.9833124815840617e-05, "loss": 1.9397, "step": 26657 }, { "epoch": 0.35, "grad_norm": 4.04209041595459, "learning_rate": 1.9833105698754885e-05, "loss": 1.6513, "step": 26658 }, { "epoch": 0.35, "grad_norm": 4.183794975280762, "learning_rate": 1.9833086580583412e-05, "loss": 2.4592, "step": 26659 }, { "epoch": 0.35, "grad_norm": 3.644407272338867, "learning_rate": 1.9833067461326196e-05, "loss": 1.7576, "step": 26660 }, { "epoch": 0.35, "grad_norm": 4.1072211265563965, "learning_rate": 1.9833048340983246e-05, "loss": 1.8275, "step": 26661 }, { "epoch": 0.35, "grad_norm": 4.341220378875732, "learning_rate": 1.983302921955456e-05, "loss": 2.6635, "step": 26662 }, { "epoch": 0.35, "grad_norm": 3.4087822437286377, "learning_rate": 1.983301009704014e-05, "loss": 1.4396, "step": 26663 }, { "epoch": 0.35, "grad_norm": 3.616884469985962, "learning_rate": 1.983299097343999e-05, "loss": 1.5463, "step": 26664 }, { "epoch": 0.35, "grad_norm": 4.0053582191467285, "learning_rate": 1.9832971848754107e-05, "loss": 1.964, "step": 26665 }, { "epoch": 0.35, "grad_norm": 3.8378610610961914, "learning_rate": 1.9832952722982502e-05, "loss": 2.1473, "step": 26666 }, { "epoch": 0.35, "grad_norm": 4.8932108879089355, "learning_rate": 1.983293359612517e-05, "loss": 2.2887, "step": 26667 }, { "epoch": 0.35, "grad_norm": 3.7912862300872803, "learning_rate": 1.9832914468182113e-05, "loss": 1.6739, "step": 26668 }, { "epoch": 0.35, "grad_norm": 3.5765140056610107, "learning_rate": 1.9832895339153338e-05, "loss": 1.7119, "step": 26669 }, { "epoch": 0.35, "grad_norm": 4.05955171585083, "learning_rate": 1.9832876209038846e-05, "loss": 2.4293, "step": 26670 }, { "epoch": 0.35, "grad_norm": 3.315807580947876, "learning_rate": 1.9832857077838635e-05, "loss": 1.5423, "step": 26671 }, { "epoch": 0.35, "grad_norm": 3.4597833156585693, "learning_rate": 1.9832837945552714e-05, "loss": 1.9124, "step": 26672 }, { "epoch": 0.35, "grad_norm": 3.6269497871398926, "learning_rate": 1.9832818812181075e-05, "loss": 1.722, "step": 26673 }, { "epoch": 0.35, "grad_norm": 3.6322498321533203, "learning_rate": 1.983279967772373e-05, "loss": 1.7682, "step": 26674 }, { "epoch": 0.35, "grad_norm": 3.48772931098938, "learning_rate": 1.9832780542180676e-05, "loss": 2.1067, "step": 26675 }, { "epoch": 0.35, "grad_norm": 3.9194228649139404, "learning_rate": 1.9832761405551915e-05, "loss": 1.8768, "step": 26676 }, { "epoch": 0.35, "grad_norm": 4.069603443145752, "learning_rate": 1.983274226783745e-05, "loss": 2.1748, "step": 26677 }, { "epoch": 0.35, "grad_norm": 3.781118392944336, "learning_rate": 1.9832723129037287e-05, "loss": 1.6615, "step": 26678 }, { "epoch": 0.35, "grad_norm": 4.424892902374268, "learning_rate": 1.9832703989151424e-05, "loss": 2.0568, "step": 26679 }, { "epoch": 0.35, "grad_norm": 4.121720314025879, "learning_rate": 1.983268484817986e-05, "loss": 2.0218, "step": 26680 }, { "epoch": 0.35, "grad_norm": 4.070247650146484, "learning_rate": 1.98326657061226e-05, "loss": 1.9333, "step": 26681 }, { "epoch": 0.35, "grad_norm": 3.4244801998138428, "learning_rate": 1.9832646562979655e-05, "loss": 1.471, "step": 26682 }, { "epoch": 0.35, "grad_norm": 4.333176136016846, "learning_rate": 1.9832627418751013e-05, "loss": 2.226, "step": 26683 }, { "epoch": 0.35, "grad_norm": 4.11588716506958, "learning_rate": 1.983260827343668e-05, "loss": 2.5618, "step": 26684 }, { "epoch": 0.35, "grad_norm": 3.8929803371429443, "learning_rate": 1.9832589127036667e-05, "loss": 2.048, "step": 26685 }, { "epoch": 0.35, "grad_norm": 4.109168529510498, "learning_rate": 1.9832569979550963e-05, "loss": 2.3143, "step": 26686 }, { "epoch": 0.35, "grad_norm": 4.244062423706055, "learning_rate": 1.983255083097958e-05, "loss": 2.3732, "step": 26687 }, { "epoch": 0.35, "grad_norm": 4.084561824798584, "learning_rate": 1.9832531681322513e-05, "loss": 2.2468, "step": 26688 }, { "epoch": 0.35, "grad_norm": 3.5542726516723633, "learning_rate": 1.983251253057977e-05, "loss": 2.0046, "step": 26689 }, { "epoch": 0.35, "grad_norm": 3.689398765563965, "learning_rate": 1.9832493378751353e-05, "loss": 2.2235, "step": 26690 }, { "epoch": 0.35, "grad_norm": 4.073275566101074, "learning_rate": 1.983247422583726e-05, "loss": 2.1035, "step": 26691 }, { "epoch": 0.35, "grad_norm": 3.9964897632598877, "learning_rate": 1.9832455071837495e-05, "loss": 1.9548, "step": 26692 }, { "epoch": 0.35, "grad_norm": 4.205728530883789, "learning_rate": 1.983243591675206e-05, "loss": 2.2557, "step": 26693 }, { "epoch": 0.35, "grad_norm": 3.4851040840148926, "learning_rate": 1.9832416760580957e-05, "loss": 1.922, "step": 26694 }, { "epoch": 0.35, "grad_norm": 4.106000900268555, "learning_rate": 1.983239760332419e-05, "loss": 2.1717, "step": 26695 }, { "epoch": 0.35, "grad_norm": 3.945042848587036, "learning_rate": 1.9832378444981755e-05, "loss": 2.5013, "step": 26696 }, { "epoch": 0.35, "grad_norm": 3.617093086242676, "learning_rate": 1.9832359285553663e-05, "loss": 1.8012, "step": 26697 }, { "epoch": 0.35, "grad_norm": 3.8469479084014893, "learning_rate": 1.9832340125039914e-05, "loss": 2.0108, "step": 26698 }, { "epoch": 0.35, "grad_norm": 4.178289890289307, "learning_rate": 1.9832320963440503e-05, "loss": 2.5295, "step": 26699 }, { "epoch": 0.35, "grad_norm": 3.6172714233398438, "learning_rate": 1.9832301800755437e-05, "loss": 1.8704, "step": 26700 }, { "epoch": 0.35, "grad_norm": 3.8262927532196045, "learning_rate": 1.983228263698472e-05, "loss": 2.0301, "step": 26701 }, { "epoch": 0.35, "grad_norm": 3.5924158096313477, "learning_rate": 1.9832263472128354e-05, "loss": 1.5644, "step": 26702 }, { "epoch": 0.35, "grad_norm": 3.5818235874176025, "learning_rate": 1.9832244306186334e-05, "loss": 1.8819, "step": 26703 }, { "epoch": 0.35, "grad_norm": 3.560870885848999, "learning_rate": 1.9832225139158668e-05, "loss": 1.6741, "step": 26704 }, { "epoch": 0.35, "grad_norm": 3.7770655155181885, "learning_rate": 1.9832205971045364e-05, "loss": 1.928, "step": 26705 }, { "epoch": 0.35, "grad_norm": 3.8925371170043945, "learning_rate": 1.983218680184641e-05, "loss": 1.8977, "step": 26706 }, { "epoch": 0.35, "grad_norm": 3.4161882400512695, "learning_rate": 1.9832167631561822e-05, "loss": 1.5471, "step": 26707 }, { "epoch": 0.35, "grad_norm": 3.584101438522339, "learning_rate": 1.983214846019159e-05, "loss": 1.9947, "step": 26708 }, { "epoch": 0.35, "grad_norm": 4.78127384185791, "learning_rate": 1.983212928773573e-05, "loss": 2.8227, "step": 26709 }, { "epoch": 0.35, "grad_norm": 3.7909905910491943, "learning_rate": 1.983211011419423e-05, "loss": 2.1312, "step": 26710 }, { "epoch": 0.35, "grad_norm": 4.2253193855285645, "learning_rate": 1.9832090939567098e-05, "loss": 2.3243, "step": 26711 }, { "epoch": 0.35, "grad_norm": 4.075401782989502, "learning_rate": 1.9832071763854336e-05, "loss": 2.0473, "step": 26712 }, { "epoch": 0.35, "grad_norm": 4.663324356079102, "learning_rate": 1.9832052587055952e-05, "loss": 1.9813, "step": 26713 }, { "epoch": 0.35, "grad_norm": 3.803239107131958, "learning_rate": 1.983203340917194e-05, "loss": 1.7668, "step": 26714 }, { "epoch": 0.35, "grad_norm": 3.8488879203796387, "learning_rate": 1.9832014230202302e-05, "loss": 2.2406, "step": 26715 }, { "epoch": 0.35, "grad_norm": 4.339341640472412, "learning_rate": 1.9831995050147043e-05, "loss": 2.0329, "step": 26716 }, { "epoch": 0.35, "grad_norm": 4.9345703125, "learning_rate": 1.983197586900617e-05, "loss": 2.4338, "step": 26717 }, { "epoch": 0.35, "grad_norm": 3.861957311630249, "learning_rate": 1.9831956686779676e-05, "loss": 1.8602, "step": 26718 }, { "epoch": 0.35, "grad_norm": 4.145265579223633, "learning_rate": 1.9831937503467565e-05, "loss": 2.3222, "step": 26719 }, { "epoch": 0.35, "grad_norm": 4.091412544250488, "learning_rate": 1.9831918319069845e-05, "loss": 2.1833, "step": 26720 }, { "epoch": 0.35, "grad_norm": 4.204451084136963, "learning_rate": 1.9831899133586514e-05, "loss": 1.9836, "step": 26721 }, { "epoch": 0.35, "grad_norm": 4.4489827156066895, "learning_rate": 1.9831879947017572e-05, "loss": 2.5154, "step": 26722 }, { "epoch": 0.35, "grad_norm": 3.1941044330596924, "learning_rate": 1.9831860759363028e-05, "loss": 1.3239, "step": 26723 }, { "epoch": 0.35, "grad_norm": 4.0532546043396, "learning_rate": 1.9831841570622875e-05, "loss": 2.0327, "step": 26724 }, { "epoch": 0.35, "grad_norm": 3.444873809814453, "learning_rate": 1.9831822380797125e-05, "loss": 1.666, "step": 26725 }, { "epoch": 0.35, "grad_norm": 4.7076287269592285, "learning_rate": 1.983180318988577e-05, "loss": 1.9084, "step": 26726 }, { "epoch": 0.35, "grad_norm": 3.955416202545166, "learning_rate": 1.983178399788882e-05, "loss": 1.9163, "step": 26727 }, { "epoch": 0.35, "grad_norm": 4.048296928405762, "learning_rate": 1.983176480480627e-05, "loss": 1.9597, "step": 26728 }, { "epoch": 0.35, "grad_norm": 3.963975667953491, "learning_rate": 1.9831745610638132e-05, "loss": 2.3822, "step": 26729 }, { "epoch": 0.35, "grad_norm": 4.497639179229736, "learning_rate": 1.98317264153844e-05, "loss": 2.5869, "step": 26730 }, { "epoch": 0.35, "grad_norm": 4.349751949310303, "learning_rate": 1.9831707219045083e-05, "loss": 2.3998, "step": 26731 }, { "epoch": 0.35, "grad_norm": 3.7328290939331055, "learning_rate": 1.9831688021620176e-05, "loss": 2.2355, "step": 26732 }, { "epoch": 0.35, "grad_norm": 3.945352792739868, "learning_rate": 1.983166882310968e-05, "loss": 1.9912, "step": 26733 }, { "epoch": 0.35, "grad_norm": 3.927293539047241, "learning_rate": 1.9831649623513606e-05, "loss": 2.0604, "step": 26734 }, { "epoch": 0.35, "grad_norm": 3.486692190170288, "learning_rate": 1.9831630422831946e-05, "loss": 1.8681, "step": 26735 }, { "epoch": 0.35, "grad_norm": 3.802561044692993, "learning_rate": 1.983161122106471e-05, "loss": 2.123, "step": 26736 }, { "epoch": 0.35, "grad_norm": 3.227065324783325, "learning_rate": 1.98315920182119e-05, "loss": 1.4572, "step": 26737 }, { "epoch": 0.35, "grad_norm": 3.4022724628448486, "learning_rate": 1.983157281427351e-05, "loss": 1.9251, "step": 26738 }, { "epoch": 0.35, "grad_norm": 4.485835552215576, "learning_rate": 1.983155360924955e-05, "loss": 2.0302, "step": 26739 }, { "epoch": 0.35, "grad_norm": 4.533114910125732, "learning_rate": 1.983153440314002e-05, "loss": 2.2236, "step": 26740 }, { "epoch": 0.35, "grad_norm": 3.8201138973236084, "learning_rate": 1.9831515195944924e-05, "loss": 1.8412, "step": 26741 }, { "epoch": 0.35, "grad_norm": 3.5312488079071045, "learning_rate": 1.9831495987664257e-05, "loss": 1.6071, "step": 26742 }, { "epoch": 0.35, "grad_norm": 4.535791873931885, "learning_rate": 1.983147677829803e-05, "loss": 2.5535, "step": 26743 }, { "epoch": 0.35, "grad_norm": 4.529564380645752, "learning_rate": 1.983145756784624e-05, "loss": 2.8124, "step": 26744 }, { "epoch": 0.35, "grad_norm": 3.617072582244873, "learning_rate": 1.983143835630889e-05, "loss": 1.857, "step": 26745 }, { "epoch": 0.35, "grad_norm": 4.119487285614014, "learning_rate": 1.983141914368598e-05, "loss": 1.6485, "step": 26746 }, { "epoch": 0.35, "grad_norm": 3.951663017272949, "learning_rate": 1.983139992997752e-05, "loss": 2.1811, "step": 26747 }, { "epoch": 0.35, "grad_norm": 3.494830369949341, "learning_rate": 1.9831380715183504e-05, "loss": 1.9783, "step": 26748 }, { "epoch": 0.35, "grad_norm": 4.062542915344238, "learning_rate": 1.9831361499303935e-05, "loss": 2.4717, "step": 26749 }, { "epoch": 0.35, "grad_norm": 4.518055438995361, "learning_rate": 1.9831342282338818e-05, "loss": 2.109, "step": 26750 }, { "epoch": 0.35, "grad_norm": 4.6714677810668945, "learning_rate": 1.9831323064288158e-05, "loss": 2.3441, "step": 26751 }, { "epoch": 0.35, "grad_norm": 4.289785861968994, "learning_rate": 1.9831303845151947e-05, "loss": 2.3035, "step": 26752 }, { "epoch": 0.35, "grad_norm": 3.9766974449157715, "learning_rate": 1.9831284624930196e-05, "loss": 1.9176, "step": 26753 }, { "epoch": 0.35, "grad_norm": 4.348808288574219, "learning_rate": 1.9831265403622907e-05, "loss": 2.1414, "step": 26754 }, { "epoch": 0.35, "grad_norm": 4.024979114532471, "learning_rate": 1.9831246181230075e-05, "loss": 2.2602, "step": 26755 }, { "epoch": 0.35, "grad_norm": 4.421732425689697, "learning_rate": 1.983122695775171e-05, "loss": 2.0658, "step": 26756 }, { "epoch": 0.35, "grad_norm": 4.041445732116699, "learning_rate": 1.9831207733187806e-05, "loss": 2.2841, "step": 26757 }, { "epoch": 0.35, "grad_norm": 3.999215841293335, "learning_rate": 1.9831188507538376e-05, "loss": 2.304, "step": 26758 }, { "epoch": 0.35, "grad_norm": 4.141293525695801, "learning_rate": 1.9831169280803412e-05, "loss": 2.1791, "step": 26759 }, { "epoch": 0.35, "grad_norm": 4.542544364929199, "learning_rate": 1.9831150052982918e-05, "loss": 2.0132, "step": 26760 }, { "epoch": 0.35, "grad_norm": 4.007665634155273, "learning_rate": 1.9831130824076904e-05, "loss": 2.1519, "step": 26761 }, { "epoch": 0.35, "grad_norm": 3.8126087188720703, "learning_rate": 1.9831111594085362e-05, "loss": 2.2891, "step": 26762 }, { "epoch": 0.35, "grad_norm": 4.137672424316406, "learning_rate": 1.98310923630083e-05, "loss": 1.9693, "step": 26763 }, { "epoch": 0.35, "grad_norm": 4.78178071975708, "learning_rate": 1.9831073130845723e-05, "loss": 2.3151, "step": 26764 }, { "epoch": 0.35, "grad_norm": 4.377998352050781, "learning_rate": 1.9831053897597625e-05, "loss": 2.7035, "step": 26765 }, { "epoch": 0.35, "grad_norm": 3.825623035430908, "learning_rate": 1.983103466326401e-05, "loss": 2.182, "step": 26766 }, { "epoch": 0.35, "grad_norm": 4.063125133514404, "learning_rate": 1.983101542784488e-05, "loss": 2.2996, "step": 26767 }, { "epoch": 0.35, "grad_norm": 4.099000930786133, "learning_rate": 1.9830996191340243e-05, "loss": 2.2213, "step": 26768 }, { "epoch": 0.35, "grad_norm": 4.063518047332764, "learning_rate": 1.98309769537501e-05, "loss": 2.444, "step": 26769 }, { "epoch": 0.35, "grad_norm": 4.824848651885986, "learning_rate": 1.9830957715074446e-05, "loss": 2.7202, "step": 26770 }, { "epoch": 0.35, "grad_norm": 3.6887691020965576, "learning_rate": 1.983093847531329e-05, "loss": 1.8764, "step": 26771 }, { "epoch": 0.35, "grad_norm": 4.113806247711182, "learning_rate": 1.9830919234466626e-05, "loss": 2.016, "step": 26772 }, { "epoch": 0.35, "grad_norm": 4.383037090301514, "learning_rate": 1.983089999253447e-05, "loss": 2.3298, "step": 26773 }, { "epoch": 0.35, "grad_norm": 3.893486499786377, "learning_rate": 1.983088074951681e-05, "loss": 2.363, "step": 26774 }, { "epoch": 0.35, "grad_norm": 3.7623467445373535, "learning_rate": 1.9830861505413657e-05, "loss": 1.8529, "step": 26775 }, { "epoch": 0.35, "grad_norm": 4.167281627655029, "learning_rate": 1.983084226022501e-05, "loss": 1.9266, "step": 26776 }, { "epoch": 0.35, "grad_norm": 4.241173267364502, "learning_rate": 1.983082301395087e-05, "loss": 2.0397, "step": 26777 }, { "epoch": 0.35, "grad_norm": 4.134915828704834, "learning_rate": 1.983080376659124e-05, "loss": 1.9048, "step": 26778 }, { "epoch": 0.35, "grad_norm": 4.597983360290527, "learning_rate": 1.9830784518146123e-05, "loss": 2.597, "step": 26779 }, { "epoch": 0.35, "grad_norm": 3.6862571239471436, "learning_rate": 1.983076526861552e-05, "loss": 1.7023, "step": 26780 }, { "epoch": 0.35, "grad_norm": 3.5755672454833984, "learning_rate": 1.9830746017999435e-05, "loss": 1.7535, "step": 26781 }, { "epoch": 0.35, "grad_norm": 3.91044020652771, "learning_rate": 1.983072676629787e-05, "loss": 1.9494, "step": 26782 }, { "epoch": 0.35, "grad_norm": 3.894333600997925, "learning_rate": 1.9830707513510823e-05, "loss": 2.2677, "step": 26783 }, { "epoch": 0.35, "grad_norm": 3.9378201961517334, "learning_rate": 1.98306882596383e-05, "loss": 1.5194, "step": 26784 }, { "epoch": 0.35, "grad_norm": 3.9001102447509766, "learning_rate": 1.98306690046803e-05, "loss": 1.9611, "step": 26785 }, { "epoch": 0.35, "grad_norm": 3.8385841846466064, "learning_rate": 1.9830649748636833e-05, "loss": 2.221, "step": 26786 }, { "epoch": 0.35, "grad_norm": 4.084465980529785, "learning_rate": 1.983063049150789e-05, "loss": 2.0846, "step": 26787 }, { "epoch": 0.35, "grad_norm": 3.929384469985962, "learning_rate": 1.9830611233293483e-05, "loss": 2.1527, "step": 26788 }, { "epoch": 0.35, "grad_norm": 3.5908873081207275, "learning_rate": 1.9830591973993606e-05, "loss": 1.9612, "step": 26789 }, { "epoch": 0.35, "grad_norm": 4.019558429718018, "learning_rate": 1.983057271360827e-05, "loss": 2.1185, "step": 26790 }, { "epoch": 0.35, "grad_norm": 3.9597668647766113, "learning_rate": 1.9830553452137467e-05, "loss": 2.0378, "step": 26791 }, { "epoch": 0.35, "grad_norm": 3.6637673377990723, "learning_rate": 1.9830534189581206e-05, "loss": 1.9915, "step": 26792 }, { "epoch": 0.35, "grad_norm": 3.701275110244751, "learning_rate": 1.9830514925939485e-05, "loss": 1.6332, "step": 26793 }, { "epoch": 0.35, "grad_norm": 3.9685049057006836, "learning_rate": 1.9830495661212312e-05, "loss": 1.8878, "step": 26794 }, { "epoch": 0.35, "grad_norm": 3.303882360458374, "learning_rate": 1.9830476395399683e-05, "loss": 1.7405, "step": 26795 }, { "epoch": 0.35, "grad_norm": 4.90326452255249, "learning_rate": 1.9830457128501605e-05, "loss": 2.0334, "step": 26796 }, { "epoch": 0.35, "grad_norm": 4.207725524902344, "learning_rate": 1.9830437860518078e-05, "loss": 2.0551, "step": 26797 }, { "epoch": 0.35, "grad_norm": 3.550239086151123, "learning_rate": 1.9830418591449102e-05, "loss": 2.0868, "step": 26798 }, { "epoch": 0.35, "grad_norm": 4.329525947570801, "learning_rate": 1.9830399321294677e-05, "loss": 2.7142, "step": 26799 }, { "epoch": 0.35, "grad_norm": 3.7065317630767822, "learning_rate": 1.9830380050054817e-05, "loss": 1.9831, "step": 26800 }, { "epoch": 0.35, "grad_norm": 3.5952024459838867, "learning_rate": 1.983036077772951e-05, "loss": 1.9708, "step": 26801 }, { "epoch": 0.35, "grad_norm": 4.13185977935791, "learning_rate": 1.983034150431877e-05, "loss": 2.0318, "step": 26802 }, { "epoch": 0.35, "grad_norm": 4.006681442260742, "learning_rate": 1.9830322229822586e-05, "loss": 1.832, "step": 26803 }, { "epoch": 0.35, "grad_norm": 3.612626314163208, "learning_rate": 1.9830302954240973e-05, "loss": 1.8482, "step": 26804 }, { "epoch": 0.35, "grad_norm": 4.001890659332275, "learning_rate": 1.9830283677573925e-05, "loss": 2.2063, "step": 26805 }, { "epoch": 0.35, "grad_norm": 4.098911285400391, "learning_rate": 1.9830264399821452e-05, "loss": 2.041, "step": 26806 }, { "epoch": 0.35, "grad_norm": 3.9695918560028076, "learning_rate": 1.983024512098355e-05, "loss": 1.9132, "step": 26807 }, { "epoch": 0.35, "grad_norm": 3.6679890155792236, "learning_rate": 1.9830225841060217e-05, "loss": 1.9855, "step": 26808 }, { "epoch": 0.35, "grad_norm": 3.9144370555877686, "learning_rate": 1.9830206560051464e-05, "loss": 2.1586, "step": 26809 }, { "epoch": 0.35, "grad_norm": 3.900390148162842, "learning_rate": 1.983018727795729e-05, "loss": 1.8134, "step": 26810 }, { "epoch": 0.35, "grad_norm": 3.8976311683654785, "learning_rate": 1.9830167994777697e-05, "loss": 2.0868, "step": 26811 }, { "epoch": 0.35, "grad_norm": 3.8244073390960693, "learning_rate": 1.9830148710512683e-05, "loss": 1.9943, "step": 26812 }, { "epoch": 0.35, "grad_norm": 3.6518197059631348, "learning_rate": 1.9830129425162256e-05, "loss": 2.0389, "step": 26813 }, { "epoch": 0.35, "grad_norm": 4.579280376434326, "learning_rate": 1.9830110138726415e-05, "loss": 2.3634, "step": 26814 }, { "epoch": 0.35, "grad_norm": 3.8660504817962646, "learning_rate": 1.9830090851205165e-05, "loss": 1.8778, "step": 26815 }, { "epoch": 0.35, "grad_norm": 4.745010852813721, "learning_rate": 1.9830071562598503e-05, "loss": 2.5207, "step": 26816 }, { "epoch": 0.35, "grad_norm": 3.459157705307007, "learning_rate": 1.9830052272906437e-05, "loss": 1.6274, "step": 26817 }, { "epoch": 0.35, "grad_norm": 3.9531431198120117, "learning_rate": 1.9830032982128965e-05, "loss": 2.0218, "step": 26818 }, { "epoch": 0.35, "grad_norm": 4.1002197265625, "learning_rate": 1.9830013690266092e-05, "loss": 2.2123, "step": 26819 }, { "epoch": 0.35, "grad_norm": 3.758251428604126, "learning_rate": 1.9829994397317818e-05, "loss": 1.8266, "step": 26820 }, { "epoch": 0.35, "grad_norm": 3.998497724533081, "learning_rate": 1.9829975103284148e-05, "loss": 2.3249, "step": 26821 }, { "epoch": 0.35, "grad_norm": 3.5261611938476562, "learning_rate": 1.9829955808165077e-05, "loss": 1.6602, "step": 26822 }, { "epoch": 0.35, "grad_norm": 4.441483020782471, "learning_rate": 1.9829936511960618e-05, "loss": 2.3265, "step": 26823 }, { "epoch": 0.35, "grad_norm": 4.611352443695068, "learning_rate": 1.982991721467076e-05, "loss": 2.8309, "step": 26824 }, { "epoch": 0.35, "grad_norm": 3.561295747756958, "learning_rate": 1.982989791629552e-05, "loss": 1.9268, "step": 26825 }, { "epoch": 0.35, "grad_norm": 3.7262675762176514, "learning_rate": 1.982987861683489e-05, "loss": 1.8502, "step": 26826 }, { "epoch": 0.35, "grad_norm": 3.5405080318450928, "learning_rate": 1.9829859316288875e-05, "loss": 1.9154, "step": 26827 }, { "epoch": 0.35, "grad_norm": 4.16030740737915, "learning_rate": 1.9829840014657472e-05, "loss": 1.9573, "step": 26828 }, { "epoch": 0.35, "grad_norm": 3.874711513519287, "learning_rate": 1.982982071194069e-05, "loss": 2.1559, "step": 26829 }, { "epoch": 0.35, "grad_norm": 3.484759569168091, "learning_rate": 1.982980140813853e-05, "loss": 2.0572, "step": 26830 }, { "epoch": 0.35, "grad_norm": 3.960387706756592, "learning_rate": 1.9829782103250997e-05, "loss": 1.5723, "step": 26831 }, { "epoch": 0.35, "grad_norm": 4.433709144592285, "learning_rate": 1.9829762797278083e-05, "loss": 2.2356, "step": 26832 }, { "epoch": 0.35, "grad_norm": 3.5122900009155273, "learning_rate": 1.9829743490219802e-05, "loss": 1.8703, "step": 26833 }, { "epoch": 0.35, "grad_norm": 3.3425912857055664, "learning_rate": 1.9829724182076146e-05, "loss": 1.5937, "step": 26834 }, { "epoch": 0.35, "grad_norm": 3.7706282138824463, "learning_rate": 1.9829704872847125e-05, "loss": 1.7524, "step": 26835 }, { "epoch": 0.35, "grad_norm": 4.866633892059326, "learning_rate": 1.9829685562532737e-05, "loss": 2.253, "step": 26836 }, { "epoch": 0.35, "grad_norm": 3.9079294204711914, "learning_rate": 1.9829666251132985e-05, "loss": 2.2579, "step": 26837 }, { "epoch": 0.35, "grad_norm": 3.7539548873901367, "learning_rate": 1.982964693864787e-05, "loss": 1.8338, "step": 26838 }, { "epoch": 0.35, "grad_norm": 3.5970733165740967, "learning_rate": 1.9829627625077394e-05, "loss": 1.4846, "step": 26839 }, { "epoch": 0.35, "grad_norm": 3.7809228897094727, "learning_rate": 1.9829608310421565e-05, "loss": 2.0469, "step": 26840 }, { "epoch": 0.35, "grad_norm": 4.038088798522949, "learning_rate": 1.9829588994680376e-05, "loss": 1.8555, "step": 26841 }, { "epoch": 0.35, "grad_norm": 4.1945719718933105, "learning_rate": 1.9829569677853836e-05, "loss": 2.2248, "step": 26842 }, { "epoch": 0.35, "grad_norm": 4.521093368530273, "learning_rate": 1.9829550359941942e-05, "loss": 2.2832, "step": 26843 }, { "epoch": 0.35, "grad_norm": 4.3859076499938965, "learning_rate": 1.9829531040944704e-05, "loss": 1.944, "step": 26844 }, { "epoch": 0.35, "grad_norm": 3.9966471195220947, "learning_rate": 1.9829511720862115e-05, "loss": 2.1661, "step": 26845 }, { "epoch": 0.35, "grad_norm": 4.207127094268799, "learning_rate": 1.982949239969418e-05, "loss": 2.3459, "step": 26846 }, { "epoch": 0.35, "grad_norm": 3.9006264209747314, "learning_rate": 1.9829473077440905e-05, "loss": 2.0351, "step": 26847 }, { "epoch": 0.35, "grad_norm": 3.899399518966675, "learning_rate": 1.9829453754102288e-05, "loss": 2.3506, "step": 26848 }, { "epoch": 0.35, "grad_norm": 4.869020462036133, "learning_rate": 1.9829434429678333e-05, "loss": 2.789, "step": 26849 }, { "epoch": 0.35, "grad_norm": 4.253282070159912, "learning_rate": 1.982941510416904e-05, "loss": 2.153, "step": 26850 }, { "epoch": 0.35, "grad_norm": 4.214224338531494, "learning_rate": 1.9829395777574416e-05, "loss": 2.3054, "step": 26851 }, { "epoch": 0.35, "grad_norm": 4.122220993041992, "learning_rate": 1.982937644989446e-05, "loss": 2.0804, "step": 26852 }, { "epoch": 0.35, "grad_norm": 4.468835353851318, "learning_rate": 1.982935712112917e-05, "loss": 1.9541, "step": 26853 }, { "epoch": 0.35, "grad_norm": 3.7049524784088135, "learning_rate": 1.9829337791278557e-05, "loss": 2.0258, "step": 26854 }, { "epoch": 0.35, "grad_norm": 4.824758052825928, "learning_rate": 1.9829318460342616e-05, "loss": 3.1558, "step": 26855 }, { "epoch": 0.35, "grad_norm": 4.11270809173584, "learning_rate": 1.9829299128321352e-05, "loss": 1.8549, "step": 26856 }, { "epoch": 0.35, "grad_norm": 4.340245723724365, "learning_rate": 1.9829279795214767e-05, "loss": 2.3406, "step": 26857 }, { "epoch": 0.35, "grad_norm": 3.8640213012695312, "learning_rate": 1.9829260461022863e-05, "loss": 2.2232, "step": 26858 }, { "epoch": 0.35, "grad_norm": 3.9642579555511475, "learning_rate": 1.9829241125745638e-05, "loss": 1.9161, "step": 26859 }, { "epoch": 0.35, "grad_norm": 3.9431779384613037, "learning_rate": 1.9829221789383103e-05, "loss": 1.773, "step": 26860 }, { "epoch": 0.35, "grad_norm": 4.241359233856201, "learning_rate": 1.982920245193525e-05, "loss": 2.6428, "step": 26861 }, { "epoch": 0.35, "grad_norm": 4.066153526306152, "learning_rate": 1.9829183113402092e-05, "loss": 2.2961, "step": 26862 }, { "epoch": 0.35, "grad_norm": 3.9847288131713867, "learning_rate": 1.9829163773783624e-05, "loss": 2.3412, "step": 26863 }, { "epoch": 0.35, "grad_norm": 3.8131906986236572, "learning_rate": 1.9829144433079848e-05, "loss": 1.8769, "step": 26864 }, { "epoch": 0.35, "grad_norm": 3.9788756370544434, "learning_rate": 1.9829125091290766e-05, "loss": 2.0716, "step": 26865 }, { "epoch": 0.35, "grad_norm": 3.7499282360076904, "learning_rate": 1.9829105748416384e-05, "loss": 2.2109, "step": 26866 }, { "epoch": 0.35, "grad_norm": 4.256555557250977, "learning_rate": 1.9829086404456702e-05, "loss": 2.1278, "step": 26867 }, { "epoch": 0.35, "grad_norm": 4.43690824508667, "learning_rate": 1.982906705941172e-05, "loss": 2.2346, "step": 26868 }, { "epoch": 0.35, "grad_norm": 3.358555316925049, "learning_rate": 1.9829047713281445e-05, "loss": 1.5156, "step": 26869 }, { "epoch": 0.35, "grad_norm": 3.9706411361694336, "learning_rate": 1.982902836606588e-05, "loss": 2.0213, "step": 26870 }, { "epoch": 0.35, "grad_norm": 3.6963632106781006, "learning_rate": 1.9829009017765016e-05, "loss": 1.9554, "step": 26871 }, { "epoch": 0.35, "grad_norm": 4.356752395629883, "learning_rate": 1.9828989668378868e-05, "loss": 2.4022, "step": 26872 }, { "epoch": 0.35, "grad_norm": 3.5472538471221924, "learning_rate": 1.982897031790743e-05, "loss": 2.0197, "step": 26873 }, { "epoch": 0.35, "grad_norm": 4.371097087860107, "learning_rate": 1.9828950966350707e-05, "loss": 2.0962, "step": 26874 }, { "epoch": 0.35, "grad_norm": 4.388209819793701, "learning_rate": 1.9828931613708703e-05, "loss": 2.1235, "step": 26875 }, { "epoch": 0.35, "grad_norm": 5.297712802886963, "learning_rate": 1.9828912259981417e-05, "loss": 3.1266, "step": 26876 }, { "epoch": 0.35, "grad_norm": 3.975147247314453, "learning_rate": 1.982889290516885e-05, "loss": 2.0324, "step": 26877 }, { "epoch": 0.35, "grad_norm": 3.8352210521698, "learning_rate": 1.9828873549271012e-05, "loss": 1.8408, "step": 26878 }, { "epoch": 0.35, "grad_norm": 3.810741901397705, "learning_rate": 1.9828854192287894e-05, "loss": 1.8439, "step": 26879 }, { "epoch": 0.35, "grad_norm": 3.5746145248413086, "learning_rate": 1.9828834834219506e-05, "loss": 1.6271, "step": 26880 }, { "epoch": 0.35, "grad_norm": 4.53661584854126, "learning_rate": 1.982881547506585e-05, "loss": 1.9372, "step": 26881 }, { "epoch": 0.35, "grad_norm": 3.8523406982421875, "learning_rate": 1.9828796114826922e-05, "loss": 2.2308, "step": 26882 }, { "epoch": 0.35, "grad_norm": 3.695077657699585, "learning_rate": 1.982877675350273e-05, "loss": 1.9568, "step": 26883 }, { "epoch": 0.35, "grad_norm": 4.2886738777160645, "learning_rate": 1.9828757391093273e-05, "loss": 2.1964, "step": 26884 }, { "epoch": 0.35, "grad_norm": 3.893301486968994, "learning_rate": 1.982873802759856e-05, "loss": 1.591, "step": 26885 }, { "epoch": 0.35, "grad_norm": 3.647932291030884, "learning_rate": 1.982871866301858e-05, "loss": 1.7702, "step": 26886 }, { "epoch": 0.35, "grad_norm": 3.810856580734253, "learning_rate": 1.9828699297353348e-05, "loss": 1.7897, "step": 26887 }, { "epoch": 0.35, "grad_norm": 3.913062334060669, "learning_rate": 1.982867993060286e-05, "loss": 1.9932, "step": 26888 }, { "epoch": 0.35, "grad_norm": 3.6643424034118652, "learning_rate": 1.9828660562767114e-05, "loss": 1.8829, "step": 26889 }, { "epoch": 0.35, "grad_norm": 3.5848960876464844, "learning_rate": 1.9828641193846123e-05, "loss": 2.0467, "step": 26890 }, { "epoch": 0.35, "grad_norm": 4.211221694946289, "learning_rate": 1.982862182383988e-05, "loss": 2.253, "step": 26891 }, { "epoch": 0.35, "grad_norm": 4.136984348297119, "learning_rate": 1.9828602452748394e-05, "loss": 1.9294, "step": 26892 }, { "epoch": 0.35, "grad_norm": 4.211555480957031, "learning_rate": 1.982858308057166e-05, "loss": 2.095, "step": 26893 }, { "epoch": 0.35, "grad_norm": 3.971168041229248, "learning_rate": 1.9828563707309686e-05, "loss": 2.103, "step": 26894 }, { "epoch": 0.35, "grad_norm": 4.06830358505249, "learning_rate": 1.982854433296247e-05, "loss": 2.1681, "step": 26895 }, { "epoch": 0.35, "grad_norm": 3.8532280921936035, "learning_rate": 1.9828524957530015e-05, "loss": 1.8602, "step": 26896 }, { "epoch": 0.35, "grad_norm": 3.5230679512023926, "learning_rate": 1.982850558101233e-05, "loss": 1.7172, "step": 26897 }, { "epoch": 0.35, "grad_norm": 3.9494223594665527, "learning_rate": 1.9828486203409402e-05, "loss": 1.8492, "step": 26898 }, { "epoch": 0.35, "grad_norm": 3.384645938873291, "learning_rate": 1.982846682472125e-05, "loss": 1.7654, "step": 26899 }, { "epoch": 0.35, "grad_norm": 4.153275012969971, "learning_rate": 1.9828447444947864e-05, "loss": 2.3112, "step": 26900 }, { "epoch": 0.35, "grad_norm": 4.3292365074157715, "learning_rate": 1.9828428064089255e-05, "loss": 2.1889, "step": 26901 }, { "epoch": 0.35, "grad_norm": 3.5178720951080322, "learning_rate": 1.9828408682145417e-05, "loss": 1.7576, "step": 26902 }, { "epoch": 0.35, "grad_norm": 4.099374771118164, "learning_rate": 1.9828389299116358e-05, "loss": 2.3658, "step": 26903 }, { "epoch": 0.35, "grad_norm": 3.8952393531799316, "learning_rate": 1.9828369915002076e-05, "loss": 1.6419, "step": 26904 }, { "epoch": 0.35, "grad_norm": 4.282251834869385, "learning_rate": 1.982835052980258e-05, "loss": 2.5706, "step": 26905 }, { "epoch": 0.35, "grad_norm": 3.9267730712890625, "learning_rate": 1.9828331143517864e-05, "loss": 2.0175, "step": 26906 }, { "epoch": 0.35, "grad_norm": 4.252142429351807, "learning_rate": 1.982831175614793e-05, "loss": 2.3246, "step": 26907 }, { "epoch": 0.35, "grad_norm": 4.240710258483887, "learning_rate": 1.9828292367692787e-05, "loss": 2.0207, "step": 26908 }, { "epoch": 0.35, "grad_norm": 4.136656761169434, "learning_rate": 1.9828272978152436e-05, "loss": 2.4971, "step": 26909 }, { "epoch": 0.35, "grad_norm": 4.125362873077393, "learning_rate": 1.9828253587526873e-05, "loss": 2.3245, "step": 26910 }, { "epoch": 0.35, "grad_norm": 3.3484556674957275, "learning_rate": 1.982823419581611e-05, "loss": 1.5976, "step": 26911 }, { "epoch": 0.35, "grad_norm": 4.271584987640381, "learning_rate": 1.982821480302014e-05, "loss": 1.9942, "step": 26912 }, { "epoch": 0.35, "grad_norm": 3.721773386001587, "learning_rate": 1.9828195409138964e-05, "loss": 1.8403, "step": 26913 }, { "epoch": 0.35, "grad_norm": 3.779114246368408, "learning_rate": 1.982817601417259e-05, "loss": 1.4242, "step": 26914 }, { "epoch": 0.35, "grad_norm": 3.719005584716797, "learning_rate": 1.9828156618121023e-05, "loss": 1.6171, "step": 26915 }, { "epoch": 0.35, "grad_norm": 4.083793640136719, "learning_rate": 1.9828137220984257e-05, "loss": 2.0411, "step": 26916 }, { "epoch": 0.35, "grad_norm": 3.1958250999450684, "learning_rate": 1.98281178227623e-05, "loss": 1.733, "step": 26917 }, { "epoch": 0.35, "grad_norm": 3.657073974609375, "learning_rate": 1.9828098423455154e-05, "loss": 1.687, "step": 26918 }, { "epoch": 0.35, "grad_norm": 4.087247371673584, "learning_rate": 1.9828079023062813e-05, "loss": 2.511, "step": 26919 }, { "epoch": 0.35, "grad_norm": 4.087744235992432, "learning_rate": 1.9828059621585288e-05, "loss": 2.0135, "step": 26920 }, { "epoch": 0.35, "grad_norm": 3.957341432571411, "learning_rate": 1.982804021902258e-05, "loss": 2.1294, "step": 26921 }, { "epoch": 0.35, "grad_norm": 3.7707886695861816, "learning_rate": 1.982802081537469e-05, "loss": 1.9809, "step": 26922 }, { "epoch": 0.35, "grad_norm": 3.9750828742980957, "learning_rate": 1.982800141064162e-05, "loss": 2.2985, "step": 26923 }, { "epoch": 0.35, "grad_norm": 4.09065580368042, "learning_rate": 1.982798200482337e-05, "loss": 2.1647, "step": 26924 }, { "epoch": 0.35, "grad_norm": 3.6062796115875244, "learning_rate": 1.9827962597919944e-05, "loss": 1.7434, "step": 26925 }, { "epoch": 0.35, "grad_norm": 4.791954040527344, "learning_rate": 1.9827943189931344e-05, "loss": 2.241, "step": 26926 }, { "epoch": 0.35, "grad_norm": 4.303659915924072, "learning_rate": 1.9827923780857573e-05, "loss": 2.4609, "step": 26927 }, { "epoch": 0.35, "grad_norm": 4.2364606857299805, "learning_rate": 1.9827904370698638e-05, "loss": 2.1238, "step": 26928 }, { "epoch": 0.35, "grad_norm": 3.7803122997283936, "learning_rate": 1.9827884959454528e-05, "loss": 1.9645, "step": 26929 }, { "epoch": 0.35, "grad_norm": 3.6125402450561523, "learning_rate": 1.9827865547125253e-05, "loss": 1.8879, "step": 26930 }, { "epoch": 0.35, "grad_norm": 3.663083553314209, "learning_rate": 1.9827846133710818e-05, "loss": 1.979, "step": 26931 }, { "epoch": 0.35, "grad_norm": 3.681641101837158, "learning_rate": 1.982782671921122e-05, "loss": 2.045, "step": 26932 }, { "epoch": 0.35, "grad_norm": 4.704119682312012, "learning_rate": 1.9827807303626464e-05, "loss": 2.1239, "step": 26933 }, { "epoch": 0.35, "grad_norm": 4.016756534576416, "learning_rate": 1.9827787886956552e-05, "loss": 2.3479, "step": 26934 }, { "epoch": 0.35, "grad_norm": 4.145920276641846, "learning_rate": 1.9827768469201486e-05, "loss": 2.231, "step": 26935 }, { "epoch": 0.35, "grad_norm": 4.073997497558594, "learning_rate": 1.9827749050361266e-05, "loss": 2.308, "step": 26936 }, { "epoch": 0.35, "grad_norm": 3.7169713973999023, "learning_rate": 1.98277296304359e-05, "loss": 1.9451, "step": 26937 }, { "epoch": 0.35, "grad_norm": 4.529075622558594, "learning_rate": 1.982771020942538e-05, "loss": 2.0493, "step": 26938 }, { "epoch": 0.35, "grad_norm": 4.166497707366943, "learning_rate": 1.9827690787329717e-05, "loss": 2.4911, "step": 26939 }, { "epoch": 0.35, "grad_norm": 3.6349194049835205, "learning_rate": 1.982767136414891e-05, "loss": 1.7897, "step": 26940 }, { "epoch": 0.35, "grad_norm": 3.1883468627929688, "learning_rate": 1.982765193988296e-05, "loss": 1.5849, "step": 26941 }, { "epoch": 0.35, "grad_norm": 3.6782968044281006, "learning_rate": 1.9827632514531872e-05, "loss": 1.8328, "step": 26942 }, { "epoch": 0.35, "grad_norm": 3.9456493854522705, "learning_rate": 1.9827613088095647e-05, "loss": 2.1505, "step": 26943 }, { "epoch": 0.35, "grad_norm": 3.6800267696380615, "learning_rate": 1.9827593660574287e-05, "loss": 1.914, "step": 26944 }, { "epoch": 0.35, "grad_norm": 3.522249698638916, "learning_rate": 1.982757423196779e-05, "loss": 1.7431, "step": 26945 }, { "epoch": 0.35, "grad_norm": 3.3355674743652344, "learning_rate": 1.9827554802276164e-05, "loss": 1.4808, "step": 26946 }, { "epoch": 0.35, "grad_norm": 4.583188533782959, "learning_rate": 1.9827535371499413e-05, "loss": 2.6462, "step": 26947 }, { "epoch": 0.35, "grad_norm": 3.973843574523926, "learning_rate": 1.9827515939637535e-05, "loss": 2.1879, "step": 26948 }, { "epoch": 0.35, "grad_norm": 4.31123161315918, "learning_rate": 1.9827496506690527e-05, "loss": 2.1275, "step": 26949 }, { "epoch": 0.35, "grad_norm": 4.774312973022461, "learning_rate": 1.98274770726584e-05, "loss": 2.2245, "step": 26950 }, { "epoch": 0.35, "grad_norm": 4.055876731872559, "learning_rate": 1.9827457637541152e-05, "loss": 2.2932, "step": 26951 }, { "epoch": 0.35, "grad_norm": 4.328588008880615, "learning_rate": 1.982743820133879e-05, "loss": 2.3771, "step": 26952 }, { "epoch": 0.35, "grad_norm": 3.951809883117676, "learning_rate": 1.9827418764051306e-05, "loss": 2.2951, "step": 26953 }, { "epoch": 0.35, "grad_norm": 4.315804958343506, "learning_rate": 1.982739932567871e-05, "loss": 2.276, "step": 26954 }, { "epoch": 0.35, "grad_norm": 4.15837287902832, "learning_rate": 1.9827379886221006e-05, "loss": 2.6496, "step": 26955 }, { "epoch": 0.35, "grad_norm": 4.3232293128967285, "learning_rate": 1.982736044567819e-05, "loss": 2.3675, "step": 26956 }, { "epoch": 0.35, "grad_norm": 4.576560974121094, "learning_rate": 1.9827341004050265e-05, "loss": 2.3502, "step": 26957 }, { "epoch": 0.35, "grad_norm": 4.207335948944092, "learning_rate": 1.9827321561337235e-05, "loss": 2.1426, "step": 26958 }, { "epoch": 0.35, "grad_norm": 3.2855381965637207, "learning_rate": 1.9827302117539104e-05, "loss": 1.4708, "step": 26959 }, { "epoch": 0.35, "grad_norm": 4.2225542068481445, "learning_rate": 1.9827282672655873e-05, "loss": 2.1039, "step": 26960 }, { "epoch": 0.35, "grad_norm": 3.648203134536743, "learning_rate": 1.982726322668754e-05, "loss": 1.9126, "step": 26961 }, { "epoch": 0.35, "grad_norm": 4.510008811950684, "learning_rate": 1.9827243779634112e-05, "loss": 2.1076, "step": 26962 }, { "epoch": 0.35, "grad_norm": 4.065502643585205, "learning_rate": 1.9827224331495588e-05, "loss": 2.2061, "step": 26963 }, { "epoch": 0.35, "grad_norm": 3.8485894203186035, "learning_rate": 1.9827204882271974e-05, "loss": 2.2886, "step": 26964 }, { "epoch": 0.35, "grad_norm": 5.327865123748779, "learning_rate": 1.982718543196327e-05, "loss": 2.3257, "step": 26965 }, { "epoch": 0.35, "grad_norm": 4.20441198348999, "learning_rate": 1.9827165980569477e-05, "loss": 1.8683, "step": 26966 }, { "epoch": 0.35, "grad_norm": 3.799755334854126, "learning_rate": 1.98271465280906e-05, "loss": 1.9541, "step": 26967 }, { "epoch": 0.35, "grad_norm": 3.822751522064209, "learning_rate": 1.9827127074526637e-05, "loss": 1.9796, "step": 26968 }, { "epoch": 0.35, "grad_norm": 4.002532005310059, "learning_rate": 1.9827107619877594e-05, "loss": 2.0824, "step": 26969 }, { "epoch": 0.35, "grad_norm": 4.080221652984619, "learning_rate": 1.982708816414347e-05, "loss": 1.9887, "step": 26970 }, { "epoch": 0.35, "grad_norm": 3.472175121307373, "learning_rate": 1.982706870732427e-05, "loss": 1.7633, "step": 26971 }, { "epoch": 0.35, "grad_norm": 3.4662177562713623, "learning_rate": 1.9827049249419994e-05, "loss": 1.6823, "step": 26972 }, { "epoch": 0.35, "grad_norm": 3.795747756958008, "learning_rate": 1.9827029790430646e-05, "loss": 1.7116, "step": 26973 }, { "epoch": 0.35, "grad_norm": 3.7520053386688232, "learning_rate": 1.9827010330356225e-05, "loss": 2.1608, "step": 26974 }, { "epoch": 0.35, "grad_norm": 4.22049617767334, "learning_rate": 1.9826990869196738e-05, "loss": 2.1484, "step": 26975 }, { "epoch": 0.35, "grad_norm": 3.7522661685943604, "learning_rate": 1.982697140695218e-05, "loss": 2.2297, "step": 26976 }, { "epoch": 0.35, "grad_norm": 3.309915542602539, "learning_rate": 1.9826951943622565e-05, "loss": 1.5173, "step": 26977 }, { "epoch": 0.35, "grad_norm": 4.1166300773620605, "learning_rate": 1.9826932479207883e-05, "loss": 1.7167, "step": 26978 }, { "epoch": 0.35, "grad_norm": 3.65144944190979, "learning_rate": 1.982691301370814e-05, "loss": 1.9896, "step": 26979 }, { "epoch": 0.35, "grad_norm": 3.933181047439575, "learning_rate": 1.9826893547123344e-05, "loss": 2.158, "step": 26980 }, { "epoch": 0.35, "grad_norm": 4.382958889007568, "learning_rate": 1.982687407945349e-05, "loss": 2.3103, "step": 26981 }, { "epoch": 0.35, "grad_norm": 3.6923577785491943, "learning_rate": 1.982685461069858e-05, "loss": 1.9413, "step": 26982 }, { "epoch": 0.35, "grad_norm": 3.738237142562866, "learning_rate": 1.982683514085862e-05, "loss": 1.9171, "step": 26983 }, { "epoch": 0.35, "grad_norm": 5.067402362823486, "learning_rate": 1.9826815669933614e-05, "loss": 2.4182, "step": 26984 }, { "epoch": 0.35, "grad_norm": 3.859402656555176, "learning_rate": 1.9826796197923553e-05, "loss": 1.7707, "step": 26985 }, { "epoch": 0.35, "grad_norm": 4.418461799621582, "learning_rate": 1.9826776724828454e-05, "loss": 2.1803, "step": 26986 }, { "epoch": 0.35, "grad_norm": 3.481151580810547, "learning_rate": 1.9826757250648308e-05, "loss": 1.9256, "step": 26987 }, { "epoch": 0.35, "grad_norm": 3.7115607261657715, "learning_rate": 1.9826737775383124e-05, "loss": 1.6528, "step": 26988 }, { "epoch": 0.35, "grad_norm": 3.645315647125244, "learning_rate": 1.98267182990329e-05, "loss": 1.8394, "step": 26989 }, { "epoch": 0.35, "grad_norm": 3.913231134414673, "learning_rate": 1.982669882159764e-05, "loss": 2.1769, "step": 26990 }, { "epoch": 0.35, "grad_norm": 3.460373640060425, "learning_rate": 1.9826679343077346e-05, "loss": 1.6292, "step": 26991 }, { "epoch": 0.35, "grad_norm": 4.0952677726745605, "learning_rate": 1.982665986347202e-05, "loss": 2.3628, "step": 26992 }, { "epoch": 0.35, "grad_norm": 3.911832809448242, "learning_rate": 1.9826640382781663e-05, "loss": 2.4287, "step": 26993 }, { "epoch": 0.35, "grad_norm": 4.396920680999756, "learning_rate": 1.982662090100628e-05, "loss": 2.5005, "step": 26994 }, { "epoch": 0.35, "grad_norm": 3.817617654800415, "learning_rate": 1.9826601418145868e-05, "loss": 2.1132, "step": 26995 }, { "epoch": 0.35, "grad_norm": 4.237934112548828, "learning_rate": 1.9826581934200436e-05, "loss": 2.2018, "step": 26996 }, { "epoch": 0.35, "grad_norm": 3.9833457469940186, "learning_rate": 1.982656244916998e-05, "loss": 2.2261, "step": 26997 }, { "epoch": 0.35, "grad_norm": 4.182979583740234, "learning_rate": 1.9826542963054508e-05, "loss": 2.152, "step": 26998 }, { "epoch": 0.35, "grad_norm": 4.009753704071045, "learning_rate": 1.982652347585402e-05, "loss": 2.0297, "step": 26999 }, { "epoch": 0.35, "grad_norm": 3.8828041553497314, "learning_rate": 1.982650398756851e-05, "loss": 2.2104, "step": 27000 }, { "epoch": 0.35, "grad_norm": 3.749025583267212, "learning_rate": 1.982648449819799e-05, "loss": 2.118, "step": 27001 }, { "epoch": 0.35, "grad_norm": 4.04307746887207, "learning_rate": 1.9826465007742462e-05, "loss": 1.7867, "step": 27002 }, { "epoch": 0.35, "grad_norm": 3.910517692565918, "learning_rate": 1.9826445516201926e-05, "loss": 2.1096, "step": 27003 }, { "epoch": 0.35, "grad_norm": 4.562839031219482, "learning_rate": 1.982642602357638e-05, "loss": 2.2706, "step": 27004 }, { "epoch": 0.35, "grad_norm": 4.009239196777344, "learning_rate": 1.9826406529865834e-05, "loss": 2.3009, "step": 27005 }, { "epoch": 0.35, "grad_norm": 4.046966075897217, "learning_rate": 1.9826387035070286e-05, "loss": 1.9188, "step": 27006 }, { "epoch": 0.35, "grad_norm": 4.024011135101318, "learning_rate": 1.9826367539189736e-05, "loss": 1.9059, "step": 27007 }, { "epoch": 0.35, "grad_norm": 4.1547627449035645, "learning_rate": 1.9826348042224187e-05, "loss": 2.2325, "step": 27008 }, { "epoch": 0.35, "grad_norm": 4.200163841247559, "learning_rate": 1.9826328544173644e-05, "loss": 2.2495, "step": 27009 }, { "epoch": 0.35, "grad_norm": 3.436472177505493, "learning_rate": 1.9826309045038105e-05, "loss": 1.6721, "step": 27010 }, { "epoch": 0.35, "grad_norm": 4.136249542236328, "learning_rate": 1.982628954481758e-05, "loss": 2.2192, "step": 27011 }, { "epoch": 0.35, "grad_norm": 4.706307411193848, "learning_rate": 1.982627004351206e-05, "loss": 2.0274, "step": 27012 }, { "epoch": 0.35, "grad_norm": 4.747231960296631, "learning_rate": 1.982625054112156e-05, "loss": 2.5149, "step": 27013 }, { "epoch": 0.35, "grad_norm": 3.953632354736328, "learning_rate": 1.9826231037646073e-05, "loss": 2.1072, "step": 27014 }, { "epoch": 0.35, "grad_norm": 4.071174621582031, "learning_rate": 1.98262115330856e-05, "loss": 2.0081, "step": 27015 }, { "epoch": 0.35, "grad_norm": 4.049001216888428, "learning_rate": 1.982619202744015e-05, "loss": 2.2777, "step": 27016 }, { "epoch": 0.35, "grad_norm": 3.755692958831787, "learning_rate": 1.9826172520709716e-05, "loss": 2.045, "step": 27017 }, { "epoch": 0.35, "grad_norm": 4.071987152099609, "learning_rate": 1.9826153012894313e-05, "loss": 2.2079, "step": 27018 }, { "epoch": 0.35, "grad_norm": 4.649916648864746, "learning_rate": 1.9826133503993935e-05, "loss": 2.5361, "step": 27019 }, { "epoch": 0.35, "grad_norm": 4.175264358520508, "learning_rate": 1.982611399400858e-05, "loss": 1.8909, "step": 27020 }, { "epoch": 0.35, "grad_norm": 3.627652883529663, "learning_rate": 1.9826094482938257e-05, "loss": 1.8724, "step": 27021 }, { "epoch": 0.35, "grad_norm": 4.004426002502441, "learning_rate": 1.9826074970782972e-05, "loss": 2.2901, "step": 27022 }, { "epoch": 0.35, "grad_norm": 4.269763469696045, "learning_rate": 1.9826055457542716e-05, "loss": 1.7074, "step": 27023 }, { "epoch": 0.35, "grad_norm": 3.7049286365509033, "learning_rate": 1.9826035943217498e-05, "loss": 1.8086, "step": 27024 }, { "epoch": 0.35, "grad_norm": 3.876262903213501, "learning_rate": 1.982601642780732e-05, "loss": 2.0177, "step": 27025 }, { "epoch": 0.35, "grad_norm": 3.9193034172058105, "learning_rate": 1.9825996911312182e-05, "loss": 1.9522, "step": 27026 }, { "epoch": 0.35, "grad_norm": 3.6084370613098145, "learning_rate": 1.9825977393732088e-05, "loss": 1.8289, "step": 27027 }, { "epoch": 0.35, "grad_norm": 4.49962043762207, "learning_rate": 1.982595787506704e-05, "loss": 2.0038, "step": 27028 }, { "epoch": 0.35, "grad_norm": 3.5750632286071777, "learning_rate": 1.9825938355317036e-05, "loss": 1.8654, "step": 27029 }, { "epoch": 0.35, "grad_norm": 4.345395565032959, "learning_rate": 1.9825918834482086e-05, "loss": 2.2447, "step": 27030 }, { "epoch": 0.35, "grad_norm": 3.9688267707824707, "learning_rate": 1.9825899312562184e-05, "loss": 2.1417, "step": 27031 }, { "epoch": 0.35, "grad_norm": 3.821904182434082, "learning_rate": 1.9825879789557342e-05, "loss": 1.7219, "step": 27032 }, { "epoch": 0.35, "grad_norm": 4.447120666503906, "learning_rate": 1.9825860265467552e-05, "loss": 2.761, "step": 27033 }, { "epoch": 0.35, "grad_norm": 3.6911699771881104, "learning_rate": 1.982584074029282e-05, "loss": 1.7415, "step": 27034 }, { "epoch": 0.35, "grad_norm": 4.568526268005371, "learning_rate": 1.982582121403315e-05, "loss": 2.1794, "step": 27035 }, { "epoch": 0.35, "grad_norm": 3.6099374294281006, "learning_rate": 1.9825801686688538e-05, "loss": 2.103, "step": 27036 }, { "epoch": 0.35, "grad_norm": 4.247210502624512, "learning_rate": 1.9825782158258995e-05, "loss": 2.4808, "step": 27037 }, { "epoch": 0.35, "grad_norm": 3.9370696544647217, "learning_rate": 1.9825762628744522e-05, "loss": 2.1372, "step": 27038 }, { "epoch": 0.35, "grad_norm": 4.697386264801025, "learning_rate": 1.982574309814511e-05, "loss": 2.1957, "step": 27039 }, { "epoch": 0.35, "grad_norm": 4.2604546546936035, "learning_rate": 1.9825723566460777e-05, "loss": 2.6681, "step": 27040 }, { "epoch": 0.35, "grad_norm": 4.365277290344238, "learning_rate": 1.9825704033691515e-05, "loss": 1.9624, "step": 27041 }, { "epoch": 0.35, "grad_norm": 3.2178752422332764, "learning_rate": 1.982568449983733e-05, "loss": 1.6856, "step": 27042 }, { "epoch": 0.35, "grad_norm": 3.5160107612609863, "learning_rate": 1.9825664964898216e-05, "loss": 1.7578, "step": 27043 }, { "epoch": 0.35, "grad_norm": 3.703824520111084, "learning_rate": 1.9825645428874186e-05, "loss": 1.935, "step": 27044 }, { "epoch": 0.35, "grad_norm": 4.057374954223633, "learning_rate": 1.9825625891765242e-05, "loss": 2.3595, "step": 27045 }, { "epoch": 0.35, "grad_norm": 3.5191421508789062, "learning_rate": 1.9825606353571375e-05, "loss": 1.6235, "step": 27046 }, { "epoch": 0.35, "grad_norm": 3.8677055835723877, "learning_rate": 1.98255868142926e-05, "loss": 1.6605, "step": 27047 }, { "epoch": 0.35, "grad_norm": 4.828877925872803, "learning_rate": 1.9825567273928912e-05, "loss": 2.7241, "step": 27048 }, { "epoch": 0.35, "grad_norm": 3.7341196537017822, "learning_rate": 1.9825547732480314e-05, "loss": 1.5165, "step": 27049 }, { "epoch": 0.35, "grad_norm": 3.7018067836761475, "learning_rate": 1.982552818994681e-05, "loss": 2.013, "step": 27050 }, { "epoch": 0.35, "grad_norm": 3.404719829559326, "learning_rate": 1.98255086463284e-05, "loss": 1.6408, "step": 27051 }, { "epoch": 0.35, "grad_norm": 3.536623954772949, "learning_rate": 1.9825489101625087e-05, "loss": 2.0561, "step": 27052 }, { "epoch": 0.35, "grad_norm": 4.096966743469238, "learning_rate": 1.9825469555836874e-05, "loss": 1.867, "step": 27053 }, { "epoch": 0.35, "grad_norm": 3.241882801055908, "learning_rate": 1.9825450008963762e-05, "loss": 1.5175, "step": 27054 }, { "epoch": 0.35, "grad_norm": 4.154307842254639, "learning_rate": 1.9825430461005756e-05, "loss": 2.2807, "step": 27055 }, { "epoch": 0.35, "grad_norm": 3.8342809677124023, "learning_rate": 1.9825410911962853e-05, "loss": 1.9701, "step": 27056 }, { "epoch": 0.35, "grad_norm": 3.926123857498169, "learning_rate": 1.9825391361835057e-05, "loss": 2.1078, "step": 27057 }, { "epoch": 0.35, "grad_norm": 3.7494735717773438, "learning_rate": 1.9825371810622374e-05, "loss": 1.7373, "step": 27058 }, { "epoch": 0.35, "grad_norm": 3.903672218322754, "learning_rate": 1.98253522583248e-05, "loss": 2.0953, "step": 27059 }, { "epoch": 0.35, "grad_norm": 4.143683433532715, "learning_rate": 1.9825332704942343e-05, "loss": 2.1999, "step": 27060 }, { "epoch": 0.35, "grad_norm": 3.719550371170044, "learning_rate": 1.9825313150475e-05, "loss": 1.7542, "step": 27061 }, { "epoch": 0.35, "grad_norm": 3.3804070949554443, "learning_rate": 1.9825293594922778e-05, "loss": 1.4411, "step": 27062 }, { "epoch": 0.35, "grad_norm": 4.229006767272949, "learning_rate": 1.982527403828568e-05, "loss": 2.0614, "step": 27063 }, { "epoch": 0.35, "grad_norm": 3.7968978881835938, "learning_rate": 1.9825254480563698e-05, "loss": 1.9873, "step": 27064 }, { "epoch": 0.35, "grad_norm": 3.4309587478637695, "learning_rate": 1.9825234921756846e-05, "loss": 2.0519, "step": 27065 }, { "epoch": 0.35, "grad_norm": 3.864819288253784, "learning_rate": 1.982521536186512e-05, "loss": 1.7541, "step": 27066 }, { "epoch": 0.35, "grad_norm": 4.071322441101074, "learning_rate": 1.9825195800888524e-05, "loss": 2.5457, "step": 27067 }, { "epoch": 0.35, "grad_norm": 3.5854294300079346, "learning_rate": 1.982517623882706e-05, "loss": 1.9092, "step": 27068 }, { "epoch": 0.35, "grad_norm": 3.9664528369903564, "learning_rate": 1.9825156675680727e-05, "loss": 2.0978, "step": 27069 }, { "epoch": 0.35, "grad_norm": 4.076970100402832, "learning_rate": 1.982513711144953e-05, "loss": 2.1134, "step": 27070 }, { "epoch": 0.35, "grad_norm": 3.7873823642730713, "learning_rate": 1.9825117546133476e-05, "loss": 1.8738, "step": 27071 }, { "epoch": 0.35, "grad_norm": 4.018298625946045, "learning_rate": 1.982509797973256e-05, "loss": 1.9943, "step": 27072 }, { "epoch": 0.35, "grad_norm": 3.919097423553467, "learning_rate": 1.982507841224679e-05, "loss": 2.1905, "step": 27073 }, { "epoch": 0.35, "grad_norm": 3.790125608444214, "learning_rate": 1.982505884367616e-05, "loss": 2.1391, "step": 27074 }, { "epoch": 0.35, "grad_norm": 4.3832244873046875, "learning_rate": 1.9825039274020675e-05, "loss": 2.128, "step": 27075 }, { "epoch": 0.35, "grad_norm": 4.004334449768066, "learning_rate": 1.9825019703280342e-05, "loss": 2.3928, "step": 27076 }, { "epoch": 0.35, "grad_norm": 3.6967179775238037, "learning_rate": 1.982500013145516e-05, "loss": 1.7828, "step": 27077 }, { "epoch": 0.35, "grad_norm": 3.414808988571167, "learning_rate": 1.9824980558545133e-05, "loss": 1.7517, "step": 27078 }, { "epoch": 0.35, "grad_norm": 3.705474853515625, "learning_rate": 1.982496098455026e-05, "loss": 2.1701, "step": 27079 }, { "epoch": 0.35, "grad_norm": 4.248988628387451, "learning_rate": 1.9824941409470542e-05, "loss": 2.365, "step": 27080 }, { "epoch": 0.35, "grad_norm": 4.563882827758789, "learning_rate": 1.9824921833305987e-05, "loss": 2.5485, "step": 27081 }, { "epoch": 0.35, "grad_norm": 3.5822982788085938, "learning_rate": 1.9824902256056592e-05, "loss": 1.9538, "step": 27082 }, { "epoch": 0.35, "grad_norm": 3.8471949100494385, "learning_rate": 1.9824882677722362e-05, "loss": 1.8615, "step": 27083 }, { "epoch": 0.35, "grad_norm": 3.737276315689087, "learning_rate": 1.98248630983033e-05, "loss": 2.1061, "step": 27084 }, { "epoch": 0.35, "grad_norm": 4.321389198303223, "learning_rate": 1.9824843517799404e-05, "loss": 2.49, "step": 27085 }, { "epoch": 0.35, "grad_norm": 3.6159884929656982, "learning_rate": 1.982482393621068e-05, "loss": 2.09, "step": 27086 }, { "epoch": 0.35, "grad_norm": 3.619431257247925, "learning_rate": 1.9824804353537127e-05, "loss": 1.6154, "step": 27087 }, { "epoch": 0.35, "grad_norm": 3.1726605892181396, "learning_rate": 1.982478476977875e-05, "loss": 1.3909, "step": 27088 }, { "epoch": 0.35, "grad_norm": 3.8269171714782715, "learning_rate": 1.982476518493555e-05, "loss": 1.9268, "step": 27089 }, { "epoch": 0.35, "grad_norm": 3.6996238231658936, "learning_rate": 1.982474559900753e-05, "loss": 1.634, "step": 27090 }, { "epoch": 0.35, "grad_norm": 3.654738426208496, "learning_rate": 1.982472601199469e-05, "loss": 1.8571, "step": 27091 }, { "epoch": 0.35, "grad_norm": 4.00936222076416, "learning_rate": 1.9824706423897035e-05, "loss": 1.908, "step": 27092 }, { "epoch": 0.35, "grad_norm": 3.8651270866394043, "learning_rate": 1.9824686834714566e-05, "loss": 2.2014, "step": 27093 }, { "epoch": 0.35, "grad_norm": 3.9357945919036865, "learning_rate": 1.9824667244447285e-05, "loss": 1.9189, "step": 27094 }, { "epoch": 0.35, "grad_norm": 3.7198755741119385, "learning_rate": 1.982464765309519e-05, "loss": 2.0736, "step": 27095 }, { "epoch": 0.35, "grad_norm": 3.9374475479125977, "learning_rate": 1.9824628060658292e-05, "loss": 2.3285, "step": 27096 }, { "epoch": 0.35, "grad_norm": 3.918619394302368, "learning_rate": 1.9824608467136585e-05, "loss": 1.9916, "step": 27097 }, { "epoch": 0.35, "grad_norm": 3.696322202682495, "learning_rate": 1.982458887253008e-05, "loss": 1.8522, "step": 27098 }, { "epoch": 0.35, "grad_norm": 4.190439701080322, "learning_rate": 1.9824569276838768e-05, "loss": 2.0202, "step": 27099 }, { "epoch": 0.35, "grad_norm": 4.772432804107666, "learning_rate": 1.982454968006266e-05, "loss": 2.6886, "step": 27100 }, { "epoch": 0.35, "grad_norm": 5.080267906188965, "learning_rate": 1.9824530082201754e-05, "loss": 2.7602, "step": 27101 }, { "epoch": 0.35, "grad_norm": 3.925255298614502, "learning_rate": 1.982451048325605e-05, "loss": 1.7262, "step": 27102 }, { "epoch": 0.35, "grad_norm": 3.653778314590454, "learning_rate": 1.982449088322556e-05, "loss": 2.0887, "step": 27103 }, { "epoch": 0.35, "grad_norm": 3.6820757389068604, "learning_rate": 1.9824471282110273e-05, "loss": 1.9846, "step": 27104 }, { "epoch": 0.35, "grad_norm": 4.181591987609863, "learning_rate": 1.98244516799102e-05, "loss": 1.9272, "step": 27105 }, { "epoch": 0.35, "grad_norm": 3.9163620471954346, "learning_rate": 1.9824432076625345e-05, "loss": 1.8511, "step": 27106 }, { "epoch": 0.35, "grad_norm": 4.000644683837891, "learning_rate": 1.98244124722557e-05, "loss": 2.1667, "step": 27107 }, { "epoch": 0.35, "grad_norm": 4.316951274871826, "learning_rate": 1.9824392866801278e-05, "loss": 2.165, "step": 27108 }, { "epoch": 0.35, "grad_norm": 3.400728702545166, "learning_rate": 1.9824373260262073e-05, "loss": 1.8384, "step": 27109 }, { "epoch": 0.35, "grad_norm": 3.7651004791259766, "learning_rate": 1.9824353652638094e-05, "loss": 2.1649, "step": 27110 }, { "epoch": 0.35, "grad_norm": 3.888024091720581, "learning_rate": 1.9824334043929333e-05, "loss": 2.0302, "step": 27111 }, { "epoch": 0.35, "grad_norm": 4.150640964508057, "learning_rate": 1.9824314434135805e-05, "loss": 2.2817, "step": 27112 }, { "epoch": 0.35, "grad_norm": 4.134558200836182, "learning_rate": 1.9824294823257504e-05, "loss": 1.747, "step": 27113 }, { "epoch": 0.35, "grad_norm": 3.879232883453369, "learning_rate": 1.9824275211294436e-05, "loss": 1.648, "step": 27114 }, { "epoch": 0.35, "grad_norm": 5.114383220672607, "learning_rate": 1.9824255598246597e-05, "loss": 2.2551, "step": 27115 }, { "epoch": 0.35, "grad_norm": 4.215968132019043, "learning_rate": 1.9824235984113996e-05, "loss": 2.0001, "step": 27116 }, { "epoch": 0.35, "grad_norm": 4.072531223297119, "learning_rate": 1.9824216368896634e-05, "loss": 1.9611, "step": 27117 }, { "epoch": 0.35, "grad_norm": 3.804795742034912, "learning_rate": 1.982419675259451e-05, "loss": 2.0156, "step": 27118 }, { "epoch": 0.35, "grad_norm": 3.5719077587127686, "learning_rate": 1.982417713520763e-05, "loss": 1.907, "step": 27119 }, { "epoch": 0.35, "grad_norm": 3.709059000015259, "learning_rate": 1.982415751673599e-05, "loss": 2.0592, "step": 27120 }, { "epoch": 0.35, "grad_norm": 4.100866317749023, "learning_rate": 1.98241378971796e-05, "loss": 2.2158, "step": 27121 }, { "epoch": 0.35, "grad_norm": 4.149795055389404, "learning_rate": 1.982411827653846e-05, "loss": 2.2205, "step": 27122 }, { "epoch": 0.35, "grad_norm": 4.074233531951904, "learning_rate": 1.9824098654812566e-05, "loss": 1.9575, "step": 27123 }, { "epoch": 0.35, "grad_norm": 3.932352066040039, "learning_rate": 1.982407903200193e-05, "loss": 1.9441, "step": 27124 }, { "epoch": 0.35, "grad_norm": 4.31895637512207, "learning_rate": 1.9824059408106546e-05, "loss": 2.2216, "step": 27125 }, { "epoch": 0.35, "grad_norm": 3.4309825897216797, "learning_rate": 1.9824039783126417e-05, "loss": 1.5922, "step": 27126 }, { "epoch": 0.35, "grad_norm": 3.9132773876190186, "learning_rate": 1.9824020157061552e-05, "loss": 2.2879, "step": 27127 }, { "epoch": 0.35, "grad_norm": 3.847715377807617, "learning_rate": 1.9824000529911945e-05, "loss": 1.8117, "step": 27128 }, { "epoch": 0.35, "grad_norm": 3.9420385360717773, "learning_rate": 1.9823980901677605e-05, "loss": 2.1976, "step": 27129 }, { "epoch": 0.35, "grad_norm": 3.7475202083587646, "learning_rate": 1.9823961272358527e-05, "loss": 1.8977, "step": 27130 }, { "epoch": 0.35, "grad_norm": 4.210230350494385, "learning_rate": 1.9823941641954718e-05, "loss": 2.4245, "step": 27131 }, { "epoch": 0.35, "grad_norm": 4.071472644805908, "learning_rate": 1.982392201046618e-05, "loss": 2.104, "step": 27132 }, { "epoch": 0.35, "grad_norm": 4.061581134796143, "learning_rate": 1.9823902377892915e-05, "loss": 2.1141, "step": 27133 }, { "epoch": 0.35, "grad_norm": 3.7622642517089844, "learning_rate": 1.9823882744234924e-05, "loss": 1.8065, "step": 27134 }, { "epoch": 0.35, "grad_norm": 3.4809203147888184, "learning_rate": 1.982386310949221e-05, "loss": 2.011, "step": 27135 }, { "epoch": 0.35, "grad_norm": 3.729653835296631, "learning_rate": 1.9823843473664774e-05, "loss": 2.2505, "step": 27136 }, { "epoch": 0.35, "grad_norm": 3.910349130630493, "learning_rate": 1.9823823836752618e-05, "loss": 2.233, "step": 27137 }, { "epoch": 0.35, "grad_norm": 4.1823201179504395, "learning_rate": 1.9823804198755748e-05, "loss": 2.4309, "step": 27138 }, { "epoch": 0.35, "grad_norm": 4.11358118057251, "learning_rate": 1.982378455967416e-05, "loss": 2.4139, "step": 27139 }, { "epoch": 0.35, "grad_norm": 3.5105316638946533, "learning_rate": 1.9823764919507864e-05, "loss": 1.759, "step": 27140 }, { "epoch": 0.35, "grad_norm": 4.584065914154053, "learning_rate": 1.9823745278256853e-05, "loss": 2.6813, "step": 27141 }, { "epoch": 0.35, "grad_norm": 3.5335545539855957, "learning_rate": 1.982372563592114e-05, "loss": 2.1, "step": 27142 }, { "epoch": 0.35, "grad_norm": 4.212527751922607, "learning_rate": 1.9823705992500714e-05, "loss": 1.9622, "step": 27143 }, { "epoch": 0.35, "grad_norm": 3.8663246631622314, "learning_rate": 1.9823686347995588e-05, "loss": 2.0117, "step": 27144 }, { "epoch": 0.35, "grad_norm": 3.6411492824554443, "learning_rate": 1.982366670240576e-05, "loss": 1.9374, "step": 27145 }, { "epoch": 0.35, "grad_norm": 4.242074966430664, "learning_rate": 1.9823647055731234e-05, "loss": 2.0507, "step": 27146 }, { "epoch": 0.35, "grad_norm": 3.2818453311920166, "learning_rate": 1.9823627407972004e-05, "loss": 1.7795, "step": 27147 }, { "epoch": 0.35, "grad_norm": 4.052355766296387, "learning_rate": 1.9823607759128085e-05, "loss": 2.1333, "step": 27148 }, { "epoch": 0.35, "grad_norm": 3.115469217300415, "learning_rate": 1.9823588109199472e-05, "loss": 1.341, "step": 27149 }, { "epoch": 0.35, "grad_norm": 3.3770837783813477, "learning_rate": 1.982356845818617e-05, "loss": 1.7922, "step": 27150 }, { "epoch": 0.35, "grad_norm": 4.005000591278076, "learning_rate": 1.9823548806088176e-05, "loss": 2.0132, "step": 27151 }, { "epoch": 0.35, "grad_norm": 3.598724842071533, "learning_rate": 1.9823529152905497e-05, "loss": 1.6125, "step": 27152 }, { "epoch": 0.35, "grad_norm": 3.762066125869751, "learning_rate": 1.9823509498638134e-05, "loss": 2.26, "step": 27153 }, { "epoch": 0.35, "grad_norm": 4.407718658447266, "learning_rate": 1.9823489843286088e-05, "loss": 2.4173, "step": 27154 }, { "epoch": 0.35, "grad_norm": 3.625277280807495, "learning_rate": 1.982347018684936e-05, "loss": 1.9618, "step": 27155 }, { "epoch": 0.35, "grad_norm": 3.8771026134490967, "learning_rate": 1.982345052932796e-05, "loss": 2.631, "step": 27156 }, { "epoch": 0.35, "grad_norm": 3.7063004970550537, "learning_rate": 1.982343087072188e-05, "loss": 2.1911, "step": 27157 }, { "epoch": 0.35, "grad_norm": 4.256312370300293, "learning_rate": 1.982341121103113e-05, "loss": 2.2537, "step": 27158 }, { "epoch": 0.35, "grad_norm": 3.7047338485717773, "learning_rate": 1.9823391550255706e-05, "loss": 1.6284, "step": 27159 }, { "epoch": 0.35, "grad_norm": 3.7012789249420166, "learning_rate": 1.9823371888395614e-05, "loss": 2.3719, "step": 27160 }, { "epoch": 0.35, "grad_norm": 5.075660705566406, "learning_rate": 1.9823352225450852e-05, "loss": 2.3554, "step": 27161 }, { "epoch": 0.35, "grad_norm": 3.6278696060180664, "learning_rate": 1.9823332561421428e-05, "loss": 1.6996, "step": 27162 }, { "epoch": 0.35, "grad_norm": 3.9104058742523193, "learning_rate": 1.982331289630734e-05, "loss": 2.0538, "step": 27163 }, { "epoch": 0.35, "grad_norm": 3.7584989070892334, "learning_rate": 1.9823293230108594e-05, "loss": 1.6547, "step": 27164 }, { "epoch": 0.35, "grad_norm": 3.8396594524383545, "learning_rate": 1.982327356282519e-05, "loss": 1.7534, "step": 27165 }, { "epoch": 0.35, "grad_norm": 4.05218505859375, "learning_rate": 1.9823253894457128e-05, "loss": 2.2607, "step": 27166 }, { "epoch": 0.35, "grad_norm": 4.274078369140625, "learning_rate": 1.9823234225004412e-05, "loss": 2.2154, "step": 27167 }, { "epoch": 0.35, "grad_norm": 4.108244895935059, "learning_rate": 1.9823214554467044e-05, "loss": 1.9558, "step": 27168 }, { "epoch": 0.35, "grad_norm": 4.036996364593506, "learning_rate": 1.9823194882845026e-05, "loss": 2.4619, "step": 27169 }, { "epoch": 0.35, "grad_norm": 4.399521350860596, "learning_rate": 1.9823175210138363e-05, "loss": 2.2643, "step": 27170 }, { "epoch": 0.35, "grad_norm": 3.857389450073242, "learning_rate": 1.9823155536347056e-05, "loss": 2.1984, "step": 27171 }, { "epoch": 0.35, "grad_norm": 4.07185697555542, "learning_rate": 1.98231358614711e-05, "loss": 1.8084, "step": 27172 }, { "epoch": 0.35, "grad_norm": 4.5015482902526855, "learning_rate": 1.982311618551051e-05, "loss": 2.6225, "step": 27173 }, { "epoch": 0.35, "grad_norm": 3.872211456298828, "learning_rate": 1.9823096508465278e-05, "loss": 1.6265, "step": 27174 }, { "epoch": 0.35, "grad_norm": 3.862992525100708, "learning_rate": 1.982307683033541e-05, "loss": 2.2922, "step": 27175 }, { "epoch": 0.35, "grad_norm": 4.506941318511963, "learning_rate": 1.982305715112091e-05, "loss": 2.0811, "step": 27176 }, { "epoch": 0.35, "grad_norm": 3.503988027572632, "learning_rate": 1.982303747082177e-05, "loss": 1.8127, "step": 27177 }, { "epoch": 0.35, "grad_norm": 4.3168535232543945, "learning_rate": 1.982301778943801e-05, "loss": 1.891, "step": 27178 }, { "epoch": 0.35, "grad_norm": 3.520329713821411, "learning_rate": 1.9822998106969617e-05, "loss": 1.6423, "step": 27179 }, { "epoch": 0.35, "grad_norm": 3.7477951049804688, "learning_rate": 1.9822978423416598e-05, "loss": 1.8784, "step": 27180 }, { "epoch": 0.35, "grad_norm": 3.810450553894043, "learning_rate": 1.9822958738778953e-05, "loss": 1.6355, "step": 27181 }, { "epoch": 0.35, "grad_norm": 4.079878330230713, "learning_rate": 1.982293905305669e-05, "loss": 2.0858, "step": 27182 }, { "epoch": 0.35, "grad_norm": 4.093715667724609, "learning_rate": 1.9822919366249808e-05, "loss": 1.7493, "step": 27183 }, { "epoch": 0.35, "grad_norm": 4.086605548858643, "learning_rate": 1.982289967835831e-05, "loss": 1.9841, "step": 27184 }, { "epoch": 0.35, "grad_norm": 3.89900279045105, "learning_rate": 1.9822879989382196e-05, "loss": 2.4555, "step": 27185 }, { "epoch": 0.35, "grad_norm": 4.487255573272705, "learning_rate": 1.982286029932147e-05, "loss": 2.5627, "step": 27186 }, { "epoch": 0.35, "grad_norm": 3.7845704555511475, "learning_rate": 1.9822840608176133e-05, "loss": 2.1384, "step": 27187 }, { "epoch": 0.35, "grad_norm": 3.969123125076294, "learning_rate": 1.9822820915946187e-05, "loss": 1.9148, "step": 27188 }, { "epoch": 0.35, "grad_norm": 3.6597485542297363, "learning_rate": 1.9822801222631638e-05, "loss": 1.8201, "step": 27189 }, { "epoch": 0.35, "grad_norm": 4.356002330780029, "learning_rate": 1.9822781528232484e-05, "loss": 2.0459, "step": 27190 }, { "epoch": 0.35, "grad_norm": 4.110029220581055, "learning_rate": 1.9822761832748725e-05, "loss": 1.9805, "step": 27191 }, { "epoch": 0.35, "grad_norm": 3.6899595260620117, "learning_rate": 1.982274213618037e-05, "loss": 2.004, "step": 27192 }, { "epoch": 0.35, "grad_norm": 3.7782645225524902, "learning_rate": 1.9822722438527418e-05, "loss": 2.3734, "step": 27193 }, { "epoch": 0.35, "grad_norm": 3.9524312019348145, "learning_rate": 1.9822702739789867e-05, "loss": 2.1046, "step": 27194 }, { "epoch": 0.35, "grad_norm": 4.150345325469971, "learning_rate": 1.9822683039967727e-05, "loss": 1.9954, "step": 27195 }, { "epoch": 0.35, "grad_norm": 4.108353614807129, "learning_rate": 1.982266333906099e-05, "loss": 1.9707, "step": 27196 }, { "epoch": 0.35, "grad_norm": 3.6469812393188477, "learning_rate": 1.982264363706967e-05, "loss": 1.6809, "step": 27197 }, { "epoch": 0.35, "grad_norm": 3.4193036556243896, "learning_rate": 1.9822623933993763e-05, "loss": 1.6651, "step": 27198 }, { "epoch": 0.35, "grad_norm": 3.8101518154144287, "learning_rate": 1.9822604229833273e-05, "loss": 1.91, "step": 27199 }, { "epoch": 0.35, "grad_norm": 4.228043556213379, "learning_rate": 1.9822584524588197e-05, "loss": 1.9613, "step": 27200 }, { "epoch": 0.35, "grad_norm": 3.7129263877868652, "learning_rate": 1.982256481825854e-05, "loss": 1.6654, "step": 27201 }, { "epoch": 0.35, "grad_norm": 3.478835344314575, "learning_rate": 1.9822545110844312e-05, "loss": 1.7583, "step": 27202 }, { "epoch": 0.35, "grad_norm": 3.7926394939422607, "learning_rate": 1.9822525402345502e-05, "loss": 2.331, "step": 27203 }, { "epoch": 0.35, "grad_norm": 3.432922124862671, "learning_rate": 1.982250569276212e-05, "loss": 1.8292, "step": 27204 }, { "epoch": 0.35, "grad_norm": 4.174010753631592, "learning_rate": 1.9822485982094168e-05, "loss": 1.9841, "step": 27205 }, { "epoch": 0.35, "grad_norm": 4.164388656616211, "learning_rate": 1.9822466270341647e-05, "loss": 1.9535, "step": 27206 }, { "epoch": 0.35, "grad_norm": 4.263652324676514, "learning_rate": 1.9822446557504558e-05, "loss": 2.1444, "step": 27207 }, { "epoch": 0.35, "grad_norm": 3.9072811603546143, "learning_rate": 1.9822426843582904e-05, "loss": 2.444, "step": 27208 }, { "epoch": 0.35, "grad_norm": 3.9030659198760986, "learning_rate": 1.982240712857669e-05, "loss": 2.0692, "step": 27209 }, { "epoch": 0.35, "grad_norm": 3.8581907749176025, "learning_rate": 1.9822387412485912e-05, "loss": 2.2727, "step": 27210 }, { "epoch": 0.35, "grad_norm": 3.9670817852020264, "learning_rate": 1.9822367695310578e-05, "loss": 2.1325, "step": 27211 }, { "epoch": 0.35, "grad_norm": 3.7329676151275635, "learning_rate": 1.982234797705069e-05, "loss": 2.0156, "step": 27212 }, { "epoch": 0.35, "grad_norm": 3.555514097213745, "learning_rate": 1.9822328257706242e-05, "loss": 1.8193, "step": 27213 }, { "epoch": 0.35, "grad_norm": 3.789872169494629, "learning_rate": 1.9822308537277247e-05, "loss": 1.9332, "step": 27214 }, { "epoch": 0.35, "grad_norm": 4.175966262817383, "learning_rate": 1.98222888157637e-05, "loss": 2.1369, "step": 27215 }, { "epoch": 0.35, "grad_norm": 3.83627986907959, "learning_rate": 1.9822269093165608e-05, "loss": 2.26, "step": 27216 }, { "epoch": 0.35, "grad_norm": 4.0931477546691895, "learning_rate": 1.982224936948297e-05, "loss": 2.2987, "step": 27217 }, { "epoch": 0.35, "grad_norm": 3.707688570022583, "learning_rate": 1.9822229644715788e-05, "loss": 1.6012, "step": 27218 }, { "epoch": 0.35, "grad_norm": 3.687281608581543, "learning_rate": 1.9822209918864068e-05, "loss": 1.7163, "step": 27219 }, { "epoch": 0.35, "grad_norm": 3.776512861251831, "learning_rate": 1.9822190191927807e-05, "loss": 1.9267, "step": 27220 }, { "epoch": 0.35, "grad_norm": 3.1654372215270996, "learning_rate": 1.9822170463907012e-05, "loss": 1.684, "step": 27221 }, { "epoch": 0.35, "grad_norm": 4.098867893218994, "learning_rate": 1.982215073480168e-05, "loss": 2.1113, "step": 27222 }, { "epoch": 0.35, "grad_norm": 4.311990737915039, "learning_rate": 1.9822131004611816e-05, "loss": 2.136, "step": 27223 }, { "epoch": 0.35, "grad_norm": 3.70550274848938, "learning_rate": 1.982211127333742e-05, "loss": 1.8196, "step": 27224 }, { "epoch": 0.35, "grad_norm": 3.942990779876709, "learning_rate": 1.9822091540978503e-05, "loss": 1.9246, "step": 27225 }, { "epoch": 0.35, "grad_norm": 3.922736883163452, "learning_rate": 1.9822071807535057e-05, "loss": 1.9382, "step": 27226 }, { "epoch": 0.35, "grad_norm": 3.1609766483306885, "learning_rate": 1.9822052073007088e-05, "loss": 1.4054, "step": 27227 }, { "epoch": 0.35, "grad_norm": 4.09212589263916, "learning_rate": 1.9822032337394597e-05, "loss": 2.1294, "step": 27228 }, { "epoch": 0.35, "grad_norm": 3.4236016273498535, "learning_rate": 1.9822012600697586e-05, "loss": 1.9403, "step": 27229 }, { "epoch": 0.35, "grad_norm": 4.049520969390869, "learning_rate": 1.982199286291606e-05, "loss": 2.0425, "step": 27230 }, { "epoch": 0.35, "grad_norm": 4.082945346832275, "learning_rate": 1.982197312405002e-05, "loss": 1.9802, "step": 27231 }, { "epoch": 0.35, "grad_norm": 3.998758316040039, "learning_rate": 1.9821953384099462e-05, "loss": 2.0536, "step": 27232 }, { "epoch": 0.35, "grad_norm": 3.5942723751068115, "learning_rate": 1.98219336430644e-05, "loss": 2.0344, "step": 27233 }, { "epoch": 0.35, "grad_norm": 4.527509689331055, "learning_rate": 1.982191390094483e-05, "loss": 2.3265, "step": 27234 }, { "epoch": 0.35, "grad_norm": 4.383853912353516, "learning_rate": 1.982189415774075e-05, "loss": 2.2059, "step": 27235 }, { "epoch": 0.35, "grad_norm": 4.139986038208008, "learning_rate": 1.982187441345217e-05, "loss": 2.2431, "step": 27236 }, { "epoch": 0.35, "grad_norm": 3.6543221473693848, "learning_rate": 1.9821854668079085e-05, "loss": 1.7959, "step": 27237 }, { "epoch": 0.35, "grad_norm": 3.5415685176849365, "learning_rate": 1.9821834921621503e-05, "loss": 1.8993, "step": 27238 }, { "epoch": 0.35, "grad_norm": 3.4643166065216064, "learning_rate": 1.982181517407942e-05, "loss": 1.6504, "step": 27239 }, { "epoch": 0.35, "grad_norm": 4.245015621185303, "learning_rate": 1.982179542545285e-05, "loss": 2.345, "step": 27240 }, { "epoch": 0.35, "grad_norm": 3.78934645652771, "learning_rate": 1.9821775675741783e-05, "loss": 1.7296, "step": 27241 }, { "epoch": 0.35, "grad_norm": 4.7995924949646, "learning_rate": 1.9821755924946223e-05, "loss": 2.2515, "step": 27242 }, { "epoch": 0.35, "grad_norm": 3.6899023056030273, "learning_rate": 1.9821736173066174e-05, "loss": 2.0696, "step": 27243 }, { "epoch": 0.35, "grad_norm": 3.5267887115478516, "learning_rate": 1.982171642010164e-05, "loss": 1.9403, "step": 27244 }, { "epoch": 0.35, "grad_norm": 4.275173664093018, "learning_rate": 1.9821696666052624e-05, "loss": 2.2584, "step": 27245 }, { "epoch": 0.35, "grad_norm": 3.784660577774048, "learning_rate": 1.9821676910919125e-05, "loss": 2.144, "step": 27246 }, { "epoch": 0.35, "grad_norm": 3.615346908569336, "learning_rate": 1.9821657154701145e-05, "loss": 2.0067, "step": 27247 }, { "epoch": 0.35, "grad_norm": 4.486062049865723, "learning_rate": 1.982163739739869e-05, "loss": 2.4792, "step": 27248 }, { "epoch": 0.35, "grad_norm": 3.4917917251586914, "learning_rate": 1.9821617639011755e-05, "loss": 1.7657, "step": 27249 }, { "epoch": 0.35, "grad_norm": 3.634838104248047, "learning_rate": 1.982159787954035e-05, "loss": 1.7544, "step": 27250 }, { "epoch": 0.35, "grad_norm": 3.7906603813171387, "learning_rate": 1.9821578118984473e-05, "loss": 2.0271, "step": 27251 }, { "epoch": 0.35, "grad_norm": 4.111458778381348, "learning_rate": 1.982155835734413e-05, "loss": 2.2545, "step": 27252 }, { "epoch": 0.35, "grad_norm": 3.999391794204712, "learning_rate": 1.9821538594619318e-05, "loss": 2.186, "step": 27253 }, { "epoch": 0.35, "grad_norm": 3.8241329193115234, "learning_rate": 1.982151883081004e-05, "loss": 1.9896, "step": 27254 }, { "epoch": 0.35, "grad_norm": 3.989341974258423, "learning_rate": 1.98214990659163e-05, "loss": 2.5106, "step": 27255 }, { "epoch": 0.35, "grad_norm": 3.8684821128845215, "learning_rate": 1.9821479299938103e-05, "loss": 2.2342, "step": 27256 }, { "epoch": 0.35, "grad_norm": 3.6226367950439453, "learning_rate": 1.9821459532875445e-05, "loss": 1.7829, "step": 27257 }, { "epoch": 0.35, "grad_norm": 4.224526882171631, "learning_rate": 1.9821439764728334e-05, "loss": 2.6947, "step": 27258 }, { "epoch": 0.35, "grad_norm": 4.223232269287109, "learning_rate": 1.9821419995496767e-05, "loss": 2.0168, "step": 27259 }, { "epoch": 0.35, "grad_norm": 3.6649250984191895, "learning_rate": 1.982140022518075e-05, "loss": 1.8175, "step": 27260 }, { "epoch": 0.35, "grad_norm": 4.5034379959106445, "learning_rate": 1.9821380453780285e-05, "loss": 2.3945, "step": 27261 }, { "epoch": 0.35, "grad_norm": 3.729691982269287, "learning_rate": 1.982136068129537e-05, "loss": 2.1055, "step": 27262 }, { "epoch": 0.35, "grad_norm": 3.4150009155273438, "learning_rate": 1.982134090772601e-05, "loss": 1.7723, "step": 27263 }, { "epoch": 0.35, "grad_norm": 6.582322597503662, "learning_rate": 1.982132113307221e-05, "loss": 2.0079, "step": 27264 }, { "epoch": 0.35, "grad_norm": 4.024595737457275, "learning_rate": 1.982130135733397e-05, "loss": 1.9936, "step": 27265 }, { "epoch": 0.35, "grad_norm": 4.436814785003662, "learning_rate": 1.982128158051129e-05, "loss": 2.1387, "step": 27266 }, { "epoch": 0.35, "grad_norm": 3.7658729553222656, "learning_rate": 1.9821261802604175e-05, "loss": 1.9063, "step": 27267 }, { "epoch": 0.35, "grad_norm": 3.761718273162842, "learning_rate": 1.9821242023612623e-05, "loss": 1.9703, "step": 27268 }, { "epoch": 0.35, "grad_norm": 3.9869203567504883, "learning_rate": 1.9821222243536642e-05, "loss": 2.1031, "step": 27269 }, { "epoch": 0.35, "grad_norm": 4.279815196990967, "learning_rate": 1.982120246237623e-05, "loss": 1.9572, "step": 27270 }, { "epoch": 0.35, "grad_norm": 3.7273736000061035, "learning_rate": 1.9821182680131393e-05, "loss": 1.866, "step": 27271 }, { "epoch": 0.35, "grad_norm": 3.7343385219573975, "learning_rate": 1.982116289680213e-05, "loss": 2.1194, "step": 27272 }, { "epoch": 0.35, "grad_norm": 4.5186991691589355, "learning_rate": 1.982114311238844e-05, "loss": 2.7247, "step": 27273 }, { "epoch": 0.35, "grad_norm": 3.9013588428497314, "learning_rate": 1.982112332689033e-05, "loss": 2.0409, "step": 27274 }, { "epoch": 0.35, "grad_norm": 4.341969966888428, "learning_rate": 1.982110354030781e-05, "loss": 2.1547, "step": 27275 }, { "epoch": 0.35, "grad_norm": 3.722909450531006, "learning_rate": 1.9821083752640865e-05, "loss": 2.2704, "step": 27276 }, { "epoch": 0.35, "grad_norm": 3.9923300743103027, "learning_rate": 1.9821063963889505e-05, "loss": 1.96, "step": 27277 }, { "epoch": 0.35, "grad_norm": 3.3900012969970703, "learning_rate": 1.982104417405374e-05, "loss": 1.8852, "step": 27278 }, { "epoch": 0.35, "grad_norm": 4.100962162017822, "learning_rate": 1.982102438313356e-05, "loss": 2.3524, "step": 27279 }, { "epoch": 0.35, "grad_norm": 4.131293296813965, "learning_rate": 1.9821004591128972e-05, "loss": 2.3327, "step": 27280 }, { "epoch": 0.35, "grad_norm": 3.8636221885681152, "learning_rate": 1.982098479803998e-05, "loss": 1.9997, "step": 27281 }, { "epoch": 0.35, "grad_norm": 3.4910495281219482, "learning_rate": 1.9820965003866587e-05, "loss": 1.6579, "step": 27282 }, { "epoch": 0.35, "grad_norm": 3.479121446609497, "learning_rate": 1.9820945208608787e-05, "loss": 1.7343, "step": 27283 }, { "epoch": 0.35, "grad_norm": 3.84536075592041, "learning_rate": 1.9820925412266592e-05, "loss": 2.2032, "step": 27284 }, { "epoch": 0.35, "grad_norm": 3.5303308963775635, "learning_rate": 1.982090561484e-05, "loss": 1.8539, "step": 27285 }, { "epoch": 0.35, "grad_norm": 3.858036756515503, "learning_rate": 1.9820885816329013e-05, "loss": 1.9027, "step": 27286 }, { "epoch": 0.35, "grad_norm": 3.6600732803344727, "learning_rate": 1.9820866016733636e-05, "loss": 1.9988, "step": 27287 }, { "epoch": 0.35, "grad_norm": 4.080792427062988, "learning_rate": 1.9820846216053864e-05, "loss": 1.9508, "step": 27288 }, { "epoch": 0.35, "grad_norm": 3.561718463897705, "learning_rate": 1.9820826414289706e-05, "loss": 1.9365, "step": 27289 }, { "epoch": 0.35, "grad_norm": 3.9448652267456055, "learning_rate": 1.9820806611441163e-05, "loss": 2.1911, "step": 27290 }, { "epoch": 0.35, "grad_norm": 3.9128103256225586, "learning_rate": 1.9820786807508235e-05, "loss": 1.9264, "step": 27291 }, { "epoch": 0.35, "grad_norm": 4.146219253540039, "learning_rate": 1.982076700249093e-05, "loss": 2.4333, "step": 27292 }, { "epoch": 0.35, "grad_norm": 3.5979604721069336, "learning_rate": 1.982074719638924e-05, "loss": 1.88, "step": 27293 }, { "epoch": 0.35, "grad_norm": 4.041165351867676, "learning_rate": 1.9820727389203174e-05, "loss": 2.1985, "step": 27294 }, { "epoch": 0.35, "grad_norm": 4.070696830749512, "learning_rate": 1.9820707580932733e-05, "loss": 1.763, "step": 27295 }, { "epoch": 0.35, "grad_norm": 4.0648193359375, "learning_rate": 1.9820687771577923e-05, "loss": 2.545, "step": 27296 }, { "epoch": 0.35, "grad_norm": 3.932673454284668, "learning_rate": 1.982066796113874e-05, "loss": 1.8389, "step": 27297 }, { "epoch": 0.35, "grad_norm": 4.1091084480285645, "learning_rate": 1.9820648149615186e-05, "loss": 2.1191, "step": 27298 }, { "epoch": 0.35, "grad_norm": 4.328396320343018, "learning_rate": 1.982062833700727e-05, "loss": 2.2852, "step": 27299 }, { "epoch": 0.35, "grad_norm": 3.7057759761810303, "learning_rate": 1.9820608523314986e-05, "loss": 1.9908, "step": 27300 }, { "epoch": 0.35, "grad_norm": 3.5528481006622314, "learning_rate": 1.9820588708538343e-05, "loss": 1.6892, "step": 27301 }, { "epoch": 0.35, "grad_norm": 4.007756233215332, "learning_rate": 1.982056889267734e-05, "loss": 2.1204, "step": 27302 }, { "epoch": 0.35, "grad_norm": 3.838040828704834, "learning_rate": 1.982054907573198e-05, "loss": 1.8372, "step": 27303 }, { "epoch": 0.35, "grad_norm": 4.132654666900635, "learning_rate": 1.9820529257702263e-05, "loss": 2.1403, "step": 27304 }, { "epoch": 0.35, "grad_norm": 4.044121742248535, "learning_rate": 1.9820509438588197e-05, "loss": 1.9829, "step": 27305 }, { "epoch": 0.35, "grad_norm": 3.8774466514587402, "learning_rate": 1.9820489618389777e-05, "loss": 2.0344, "step": 27306 }, { "epoch": 0.35, "grad_norm": 3.985090732574463, "learning_rate": 1.9820469797107005e-05, "loss": 2.2993, "step": 27307 }, { "epoch": 0.35, "grad_norm": 3.9480979442596436, "learning_rate": 1.9820449974739893e-05, "loss": 1.9165, "step": 27308 }, { "epoch": 0.35, "grad_norm": 3.7814958095550537, "learning_rate": 1.9820430151288432e-05, "loss": 1.9468, "step": 27309 }, { "epoch": 0.35, "grad_norm": 3.660154342651367, "learning_rate": 1.9820410326752634e-05, "loss": 2.262, "step": 27310 }, { "epoch": 0.35, "grad_norm": 3.5927846431732178, "learning_rate": 1.982039050113249e-05, "loss": 1.9415, "step": 27311 }, { "epoch": 0.35, "grad_norm": 3.357067108154297, "learning_rate": 1.9820370674428014e-05, "loss": 1.7291, "step": 27312 }, { "epoch": 0.35, "grad_norm": 4.283767223358154, "learning_rate": 1.98203508466392e-05, "loss": 2.0318, "step": 27313 }, { "epoch": 0.35, "grad_norm": 4.686897277832031, "learning_rate": 1.982033101776605e-05, "loss": 2.4455, "step": 27314 }, { "epoch": 0.35, "grad_norm": 4.98245906829834, "learning_rate": 1.982031118780857e-05, "loss": 2.3826, "step": 27315 }, { "epoch": 0.35, "grad_norm": 4.391617298126221, "learning_rate": 1.9820291356766767e-05, "loss": 2.5841, "step": 27316 }, { "epoch": 0.35, "grad_norm": 3.8117308616638184, "learning_rate": 1.9820271524640628e-05, "loss": 1.8477, "step": 27317 }, { "epoch": 0.35, "grad_norm": 3.6512136459350586, "learning_rate": 1.982025169143017e-05, "loss": 1.7797, "step": 27318 }, { "epoch": 0.35, "grad_norm": 4.057882785797119, "learning_rate": 1.982023185713539e-05, "loss": 2.2425, "step": 27319 }, { "epoch": 0.35, "grad_norm": 4.596173286437988, "learning_rate": 1.982021202175629e-05, "loss": 2.739, "step": 27320 }, { "epoch": 0.35, "grad_norm": 3.8688101768493652, "learning_rate": 1.9820192185292867e-05, "loss": 2.1805, "step": 27321 }, { "epoch": 0.35, "grad_norm": 5.095461845397949, "learning_rate": 1.982017234774513e-05, "loss": 2.4745, "step": 27322 }, { "epoch": 0.35, "grad_norm": 3.9696578979492188, "learning_rate": 1.9820152509113086e-05, "loss": 2.0064, "step": 27323 }, { "epoch": 0.35, "grad_norm": 3.4505043029785156, "learning_rate": 1.9820132669396722e-05, "loss": 1.7536, "step": 27324 }, { "epoch": 0.35, "grad_norm": 4.162705421447754, "learning_rate": 1.9820112828596052e-05, "loss": 1.9901, "step": 27325 }, { "epoch": 0.35, "grad_norm": 4.216669082641602, "learning_rate": 1.9820092986711077e-05, "loss": 2.2065, "step": 27326 }, { "epoch": 0.35, "grad_norm": 3.5425915718078613, "learning_rate": 1.9820073143741792e-05, "loss": 1.9254, "step": 27327 }, { "epoch": 0.35, "grad_norm": 4.2403693199157715, "learning_rate": 1.982005329968821e-05, "loss": 2.181, "step": 27328 }, { "epoch": 0.35, "grad_norm": 3.279402256011963, "learning_rate": 1.9820033454550324e-05, "loss": 1.7644, "step": 27329 }, { "epoch": 0.35, "grad_norm": 3.992304801940918, "learning_rate": 1.982001360832814e-05, "loss": 1.9727, "step": 27330 }, { "epoch": 0.35, "grad_norm": 4.061139106750488, "learning_rate": 1.9819993761021662e-05, "loss": 2.4096, "step": 27331 }, { "epoch": 0.35, "grad_norm": 3.6056935787200928, "learning_rate": 1.981997391263089e-05, "loss": 1.9591, "step": 27332 }, { "epoch": 0.35, "grad_norm": 4.069424152374268, "learning_rate": 1.9819954063155824e-05, "loss": 1.9883, "step": 27333 }, { "epoch": 0.35, "grad_norm": 3.308506727218628, "learning_rate": 1.981993421259647e-05, "loss": 1.6935, "step": 27334 }, { "epoch": 0.35, "grad_norm": 3.879520893096924, "learning_rate": 1.981991436095283e-05, "loss": 2.036, "step": 27335 }, { "epoch": 0.35, "grad_norm": 4.435160160064697, "learning_rate": 1.98198945082249e-05, "loss": 2.3082, "step": 27336 }, { "epoch": 0.35, "grad_norm": 3.8304176330566406, "learning_rate": 1.981987465441269e-05, "loss": 2.1679, "step": 27337 }, { "epoch": 0.35, "grad_norm": 4.020602226257324, "learning_rate": 1.98198547995162e-05, "loss": 1.9379, "step": 27338 }, { "epoch": 0.35, "grad_norm": 3.9482457637786865, "learning_rate": 1.981983494353543e-05, "loss": 1.9117, "step": 27339 }, { "epoch": 0.35, "grad_norm": 3.5880534648895264, "learning_rate": 1.9819815086470386e-05, "loss": 1.7709, "step": 27340 }, { "epoch": 0.35, "grad_norm": 3.7788071632385254, "learning_rate": 1.9819795228321067e-05, "loss": 1.9313, "step": 27341 }, { "epoch": 0.35, "grad_norm": 3.997941732406616, "learning_rate": 1.9819775369087473e-05, "loss": 2.6236, "step": 27342 }, { "epoch": 0.35, "grad_norm": 4.019903182983398, "learning_rate": 1.981975550876961e-05, "loss": 2.0837, "step": 27343 }, { "epoch": 0.35, "grad_norm": 4.31234884262085, "learning_rate": 1.9819735647367483e-05, "loss": 2.2251, "step": 27344 }, { "epoch": 0.35, "grad_norm": 3.626938581466675, "learning_rate": 1.981971578488109e-05, "loss": 2.0101, "step": 27345 }, { "epoch": 0.35, "grad_norm": 4.33237361907959, "learning_rate": 1.9819695921310427e-05, "loss": 2.1704, "step": 27346 }, { "epoch": 0.35, "grad_norm": 3.9422202110290527, "learning_rate": 1.9819676056655513e-05, "loss": 2.0496, "step": 27347 }, { "epoch": 0.35, "grad_norm": 4.8573384284973145, "learning_rate": 1.9819656190916333e-05, "loss": 2.3859, "step": 27348 }, { "epoch": 0.35, "grad_norm": 3.899770975112915, "learning_rate": 1.98196363240929e-05, "loss": 1.7941, "step": 27349 }, { "epoch": 0.35, "grad_norm": 3.529815912246704, "learning_rate": 1.9819616456185213e-05, "loss": 1.6188, "step": 27350 }, { "epoch": 0.35, "grad_norm": 4.015085697174072, "learning_rate": 1.981959658719327e-05, "loss": 2.0745, "step": 27351 }, { "epoch": 0.35, "grad_norm": 3.380164861679077, "learning_rate": 1.9819576717117077e-05, "loss": 1.4696, "step": 27352 }, { "epoch": 0.35, "grad_norm": 3.974254846572876, "learning_rate": 1.981955684595664e-05, "loss": 2.0419, "step": 27353 }, { "epoch": 0.35, "grad_norm": 3.960561752319336, "learning_rate": 1.9819536973711953e-05, "loss": 2.0844, "step": 27354 }, { "epoch": 0.36, "grad_norm": 3.6007394790649414, "learning_rate": 1.9819517100383025e-05, "loss": 1.9709, "step": 27355 }, { "epoch": 0.36, "grad_norm": 4.008804798126221, "learning_rate": 1.9819497225969858e-05, "loss": 1.8634, "step": 27356 }, { "epoch": 0.36, "grad_norm": 4.045074462890625, "learning_rate": 1.9819477350472448e-05, "loss": 2.2331, "step": 27357 }, { "epoch": 0.36, "grad_norm": 4.660915851593018, "learning_rate": 1.9819457473890805e-05, "loss": 2.7479, "step": 27358 }, { "epoch": 0.36, "grad_norm": 3.8818907737731934, "learning_rate": 1.9819437596224923e-05, "loss": 1.8385, "step": 27359 }, { "epoch": 0.36, "grad_norm": 4.449219703674316, "learning_rate": 1.981941771747481e-05, "loss": 2.0116, "step": 27360 }, { "epoch": 0.36, "grad_norm": 3.97749400138855, "learning_rate": 1.9819397837640467e-05, "loss": 2.1067, "step": 27361 }, { "epoch": 0.36, "grad_norm": 4.43917179107666, "learning_rate": 1.98193779567219e-05, "loss": 2.2975, "step": 27362 }, { "epoch": 0.36, "grad_norm": 4.104349136352539, "learning_rate": 1.98193580747191e-05, "loss": 2.4987, "step": 27363 }, { "epoch": 0.36, "grad_norm": 4.0689592361450195, "learning_rate": 1.981933819163208e-05, "loss": 2.1834, "step": 27364 }, { "epoch": 0.36, "grad_norm": 3.8595662117004395, "learning_rate": 1.9819318307460838e-05, "loss": 1.6927, "step": 27365 }, { "epoch": 0.36, "grad_norm": 3.622239828109741, "learning_rate": 1.9819298422205375e-05, "loss": 2.1047, "step": 27366 }, { "epoch": 0.36, "grad_norm": 4.114792346954346, "learning_rate": 1.98192785358657e-05, "loss": 2.3051, "step": 27367 }, { "epoch": 0.36, "grad_norm": 3.4053726196289062, "learning_rate": 1.9819258648441804e-05, "loss": 1.5803, "step": 27368 }, { "epoch": 0.36, "grad_norm": 4.365724086761475, "learning_rate": 1.98192387599337e-05, "loss": 2.3538, "step": 27369 }, { "epoch": 0.36, "grad_norm": 3.674591541290283, "learning_rate": 1.981921887034138e-05, "loss": 2.016, "step": 27370 }, { "epoch": 0.36, "grad_norm": 3.769432306289673, "learning_rate": 1.981919897966486e-05, "loss": 1.6802, "step": 27371 }, { "epoch": 0.36, "grad_norm": 4.0583600997924805, "learning_rate": 1.9819179087904125e-05, "loss": 2.3546, "step": 27372 }, { "epoch": 0.36, "grad_norm": 3.7211060523986816, "learning_rate": 1.981915919505919e-05, "loss": 1.7696, "step": 27373 }, { "epoch": 0.36, "grad_norm": 4.13356351852417, "learning_rate": 1.9819139301130053e-05, "loss": 2.0245, "step": 27374 }, { "epoch": 0.36, "grad_norm": 3.9925379753112793, "learning_rate": 1.9819119406116716e-05, "loss": 2.3363, "step": 27375 }, { "epoch": 0.36, "grad_norm": 3.5607800483703613, "learning_rate": 1.9819099510019183e-05, "loss": 2.2107, "step": 27376 }, { "epoch": 0.36, "grad_norm": 3.757617235183716, "learning_rate": 1.9819079612837452e-05, "loss": 2.0634, "step": 27377 }, { "epoch": 0.36, "grad_norm": 4.312187671661377, "learning_rate": 1.9819059714571532e-05, "loss": 2.0465, "step": 27378 }, { "epoch": 0.36, "grad_norm": 3.41015887260437, "learning_rate": 1.981903981522142e-05, "loss": 1.7954, "step": 27379 }, { "epoch": 0.36, "grad_norm": 3.7877495288848877, "learning_rate": 1.981901991478712e-05, "loss": 2.081, "step": 27380 }, { "epoch": 0.36, "grad_norm": 3.600252151489258, "learning_rate": 1.981900001326863e-05, "loss": 1.9609, "step": 27381 }, { "epoch": 0.36, "grad_norm": 3.3513402938842773, "learning_rate": 1.981898011066596e-05, "loss": 1.6028, "step": 27382 }, { "epoch": 0.36, "grad_norm": 3.7801523208618164, "learning_rate": 1.9818960206979106e-05, "loss": 1.7564, "step": 27383 }, { "epoch": 0.36, "grad_norm": 4.027955055236816, "learning_rate": 1.981894030220807e-05, "loss": 2.1951, "step": 27384 }, { "epoch": 0.36, "grad_norm": 4.148510932922363, "learning_rate": 1.9818920396352858e-05, "loss": 2.3001, "step": 27385 }, { "epoch": 0.36, "grad_norm": 3.986764907836914, "learning_rate": 1.981890048941347e-05, "loss": 2.2042, "step": 27386 }, { "epoch": 0.36, "grad_norm": 3.747964382171631, "learning_rate": 1.9818880581389913e-05, "loss": 1.8605, "step": 27387 }, { "epoch": 0.36, "grad_norm": 3.929790735244751, "learning_rate": 1.981886067228218e-05, "loss": 2.2022, "step": 27388 }, { "epoch": 0.36, "grad_norm": 3.71439266204834, "learning_rate": 1.981884076209028e-05, "loss": 1.5683, "step": 27389 }, { "epoch": 0.36, "grad_norm": 3.90179443359375, "learning_rate": 1.9818820850814216e-05, "loss": 1.6846, "step": 27390 }, { "epoch": 0.36, "grad_norm": 3.4095916748046875, "learning_rate": 1.9818800938453986e-05, "loss": 1.8046, "step": 27391 }, { "epoch": 0.36, "grad_norm": 3.558987855911255, "learning_rate": 1.9818781025009592e-05, "loss": 1.7393, "step": 27392 }, { "epoch": 0.36, "grad_norm": 3.6157679557800293, "learning_rate": 1.981876111048104e-05, "loss": 2.1012, "step": 27393 }, { "epoch": 0.36, "grad_norm": 4.166466236114502, "learning_rate": 1.9818741194868328e-05, "loss": 2.0682, "step": 27394 }, { "epoch": 0.36, "grad_norm": 3.539672613143921, "learning_rate": 1.9818721278171462e-05, "loss": 1.9867, "step": 27395 }, { "epoch": 0.36, "grad_norm": 3.8574304580688477, "learning_rate": 1.9818701360390442e-05, "loss": 2.1549, "step": 27396 }, { "epoch": 0.36, "grad_norm": 4.158032417297363, "learning_rate": 1.981868144152527e-05, "loss": 1.8638, "step": 27397 }, { "epoch": 0.36, "grad_norm": 4.417689323425293, "learning_rate": 1.9818661521575954e-05, "loss": 2.2439, "step": 27398 }, { "epoch": 0.36, "grad_norm": 3.6469855308532715, "learning_rate": 1.9818641600542486e-05, "loss": 2.2085, "step": 27399 }, { "epoch": 0.36, "grad_norm": 3.6814422607421875, "learning_rate": 1.981862167842487e-05, "loss": 2.053, "step": 27400 }, { "epoch": 0.36, "grad_norm": 3.558637857437134, "learning_rate": 1.981860175522312e-05, "loss": 1.923, "step": 27401 }, { "epoch": 0.36, "grad_norm": 3.7342450618743896, "learning_rate": 1.9818581830937226e-05, "loss": 1.8603, "step": 27402 }, { "epoch": 0.36, "grad_norm": 4.704434871673584, "learning_rate": 1.9818561905567193e-05, "loss": 1.9288, "step": 27403 }, { "epoch": 0.36, "grad_norm": 4.006227016448975, "learning_rate": 1.9818541979113023e-05, "loss": 1.8429, "step": 27404 }, { "epoch": 0.36, "grad_norm": 3.9615280628204346, "learning_rate": 1.9818522051574725e-05, "loss": 2.1923, "step": 27405 }, { "epoch": 0.36, "grad_norm": 3.8346920013427734, "learning_rate": 1.9818502122952293e-05, "loss": 2.0372, "step": 27406 }, { "epoch": 0.36, "grad_norm": 4.107015609741211, "learning_rate": 1.981848219324573e-05, "loss": 2.0301, "step": 27407 }, { "epoch": 0.36, "grad_norm": 3.4889190196990967, "learning_rate": 1.9818462262455042e-05, "loss": 1.7882, "step": 27408 }, { "epoch": 0.36, "grad_norm": 4.450140953063965, "learning_rate": 1.9818442330580227e-05, "loss": 2.3585, "step": 27409 }, { "epoch": 0.36, "grad_norm": 3.942678451538086, "learning_rate": 1.9818422397621287e-05, "loss": 2.237, "step": 27410 }, { "epoch": 0.36, "grad_norm": 4.509683609008789, "learning_rate": 1.9818402463578234e-05, "loss": 2.5725, "step": 27411 }, { "epoch": 0.36, "grad_norm": 3.8763890266418457, "learning_rate": 1.9818382528451056e-05, "loss": 2.2502, "step": 27412 }, { "epoch": 0.36, "grad_norm": 3.9689061641693115, "learning_rate": 1.9818362592239768e-05, "loss": 1.8078, "step": 27413 }, { "epoch": 0.36, "grad_norm": 3.361963987350464, "learning_rate": 1.981834265494436e-05, "loss": 1.6435, "step": 27414 }, { "epoch": 0.36, "grad_norm": 4.335671901702881, "learning_rate": 1.9818322716564845e-05, "loss": 2.5874, "step": 27415 }, { "epoch": 0.36, "grad_norm": 3.4380974769592285, "learning_rate": 1.9818302777101218e-05, "loss": 1.5956, "step": 27416 }, { "epoch": 0.36, "grad_norm": 3.719831705093384, "learning_rate": 1.9818282836553487e-05, "loss": 1.8922, "step": 27417 }, { "epoch": 0.36, "grad_norm": 3.860382556915283, "learning_rate": 1.9818262894921648e-05, "loss": 2.1721, "step": 27418 }, { "epoch": 0.36, "grad_norm": 3.5469348430633545, "learning_rate": 1.981824295220571e-05, "loss": 2.0078, "step": 27419 }, { "epoch": 0.36, "grad_norm": 4.0860700607299805, "learning_rate": 1.9818223008405666e-05, "loss": 2.3516, "step": 27420 }, { "epoch": 0.36, "grad_norm": 3.8971827030181885, "learning_rate": 1.9818203063521526e-05, "loss": 2.2369, "step": 27421 }, { "epoch": 0.36, "grad_norm": 3.632852077484131, "learning_rate": 1.981818311755329e-05, "loss": 1.9492, "step": 27422 }, { "epoch": 0.36, "grad_norm": 4.3971991539001465, "learning_rate": 1.9818163170500958e-05, "loss": 2.0923, "step": 27423 }, { "epoch": 0.36, "grad_norm": 3.586338758468628, "learning_rate": 1.981814322236454e-05, "loss": 1.899, "step": 27424 }, { "epoch": 0.36, "grad_norm": 3.510463237762451, "learning_rate": 1.9818123273144028e-05, "loss": 1.7114, "step": 27425 }, { "epoch": 0.36, "grad_norm": 3.3749969005584717, "learning_rate": 1.981810332283943e-05, "loss": 1.7097, "step": 27426 }, { "epoch": 0.36, "grad_norm": 3.412520408630371, "learning_rate": 1.9818083371450743e-05, "loss": 1.6459, "step": 27427 }, { "epoch": 0.36, "grad_norm": 3.723623514175415, "learning_rate": 1.9818063418977978e-05, "loss": 1.9718, "step": 27428 }, { "epoch": 0.36, "grad_norm": 3.29604172706604, "learning_rate": 1.981804346542113e-05, "loss": 1.4073, "step": 27429 }, { "epoch": 0.36, "grad_norm": 3.7015628814697266, "learning_rate": 1.9818023510780205e-05, "loss": 1.9577, "step": 27430 }, { "epoch": 0.36, "grad_norm": 3.56937837600708, "learning_rate": 1.9818003555055202e-05, "loss": 2.0424, "step": 27431 }, { "epoch": 0.36, "grad_norm": 3.2249562740325928, "learning_rate": 1.9817983598246126e-05, "loss": 1.7001, "step": 27432 }, { "epoch": 0.36, "grad_norm": 3.8358049392700195, "learning_rate": 1.981796364035298e-05, "loss": 2.0198, "step": 27433 }, { "epoch": 0.36, "grad_norm": 3.410578727722168, "learning_rate": 1.981794368137576e-05, "loss": 1.7492, "step": 27434 }, { "epoch": 0.36, "grad_norm": 3.6587276458740234, "learning_rate": 1.9817923721314472e-05, "loss": 2.1743, "step": 27435 }, { "epoch": 0.36, "grad_norm": 4.436335563659668, "learning_rate": 1.9817903760169123e-05, "loss": 2.3675, "step": 27436 }, { "epoch": 0.36, "grad_norm": 3.2930877208709717, "learning_rate": 1.9817883797939706e-05, "loss": 1.3774, "step": 27437 }, { "epoch": 0.36, "grad_norm": 3.3907816410064697, "learning_rate": 1.981786383462623e-05, "loss": 1.4069, "step": 27438 }, { "epoch": 0.36, "grad_norm": 4.23276424407959, "learning_rate": 1.9817843870228697e-05, "loss": 2.5438, "step": 27439 }, { "epoch": 0.36, "grad_norm": 3.6555368900299072, "learning_rate": 1.9817823904747104e-05, "loss": 1.9421, "step": 27440 }, { "epoch": 0.36, "grad_norm": 3.8913707733154297, "learning_rate": 1.9817803938181458e-05, "loss": 2.1212, "step": 27441 }, { "epoch": 0.36, "grad_norm": 3.8401968479156494, "learning_rate": 1.9817783970531763e-05, "loss": 1.7715, "step": 27442 }, { "epoch": 0.36, "grad_norm": 4.442810535430908, "learning_rate": 1.9817764001798017e-05, "loss": 2.3601, "step": 27443 }, { "epoch": 0.36, "grad_norm": 3.529308557510376, "learning_rate": 1.9817744031980222e-05, "loss": 1.9542, "step": 27444 }, { "epoch": 0.36, "grad_norm": 4.202883720397949, "learning_rate": 1.981772406107838e-05, "loss": 1.9615, "step": 27445 }, { "epoch": 0.36, "grad_norm": 4.763254642486572, "learning_rate": 1.9817704089092497e-05, "loss": 2.3113, "step": 27446 }, { "epoch": 0.36, "grad_norm": 3.9103357791900635, "learning_rate": 1.9817684116022573e-05, "loss": 1.9585, "step": 27447 }, { "epoch": 0.36, "grad_norm": 3.4913229942321777, "learning_rate": 1.981766414186861e-05, "loss": 1.9289, "step": 27448 }, { "epoch": 0.36, "grad_norm": 3.694826364517212, "learning_rate": 1.981764416663061e-05, "loss": 1.7297, "step": 27449 }, { "epoch": 0.36, "grad_norm": 3.621771812438965, "learning_rate": 1.9817624190308575e-05, "loss": 1.6049, "step": 27450 }, { "epoch": 0.36, "grad_norm": 3.989320993423462, "learning_rate": 1.981760421290251e-05, "loss": 1.6655, "step": 27451 }, { "epoch": 0.36, "grad_norm": 4.23784875869751, "learning_rate": 1.9817584234412412e-05, "loss": 1.9385, "step": 27452 }, { "epoch": 0.36, "grad_norm": 3.729339838027954, "learning_rate": 1.9817564254838285e-05, "loss": 1.6274, "step": 27453 }, { "epoch": 0.36, "grad_norm": 3.8266026973724365, "learning_rate": 1.9817544274180135e-05, "loss": 2.0201, "step": 27454 }, { "epoch": 0.36, "grad_norm": 3.9989945888519287, "learning_rate": 1.9817524292437963e-05, "loss": 1.8835, "step": 27455 }, { "epoch": 0.36, "grad_norm": 4.179439544677734, "learning_rate": 1.9817504309611764e-05, "loss": 2.0595, "step": 27456 }, { "epoch": 0.36, "grad_norm": 3.5831005573272705, "learning_rate": 1.9817484325701554e-05, "loss": 1.8442, "step": 27457 }, { "epoch": 0.36, "grad_norm": 3.7410149574279785, "learning_rate": 1.9817464340707324e-05, "loss": 2.0019, "step": 27458 }, { "epoch": 0.36, "grad_norm": 4.229098796844482, "learning_rate": 1.9817444354629078e-05, "loss": 1.9241, "step": 27459 }, { "epoch": 0.36, "grad_norm": 3.5341832637786865, "learning_rate": 1.9817424367466816e-05, "loss": 1.5142, "step": 27460 }, { "epoch": 0.36, "grad_norm": 4.264534950256348, "learning_rate": 1.9817404379220552e-05, "loss": 2.3798, "step": 27461 }, { "epoch": 0.36, "grad_norm": 3.7936935424804688, "learning_rate": 1.9817384389890276e-05, "loss": 1.7078, "step": 27462 }, { "epoch": 0.36, "grad_norm": 3.676699638366699, "learning_rate": 1.981736439947599e-05, "loss": 2.3399, "step": 27463 }, { "epoch": 0.36, "grad_norm": 3.9574246406555176, "learning_rate": 1.9817344407977706e-05, "loss": 1.9078, "step": 27464 }, { "epoch": 0.36, "grad_norm": 3.6900269985198975, "learning_rate": 1.981732441539542e-05, "loss": 2.009, "step": 27465 }, { "epoch": 0.36, "grad_norm": 4.357723712921143, "learning_rate": 1.9817304421729134e-05, "loss": 2.1084, "step": 27466 }, { "epoch": 0.36, "grad_norm": 3.8895623683929443, "learning_rate": 1.981728442697885e-05, "loss": 1.7147, "step": 27467 }, { "epoch": 0.36, "grad_norm": 3.978959798812866, "learning_rate": 1.981726443114457e-05, "loss": 2.253, "step": 27468 }, { "epoch": 0.36, "grad_norm": 4.142663955688477, "learning_rate": 1.98172444342263e-05, "loss": 1.9251, "step": 27469 }, { "epoch": 0.36, "grad_norm": 4.634858131408691, "learning_rate": 1.981722443622404e-05, "loss": 1.9113, "step": 27470 }, { "epoch": 0.36, "grad_norm": 3.8109912872314453, "learning_rate": 1.981720443713779e-05, "loss": 1.8268, "step": 27471 }, { "epoch": 0.36, "grad_norm": 3.431706428527832, "learning_rate": 1.9817184436967557e-05, "loss": 1.8239, "step": 27472 }, { "epoch": 0.36, "grad_norm": 3.57919979095459, "learning_rate": 1.981716443571334e-05, "loss": 1.678, "step": 27473 }, { "epoch": 0.36, "grad_norm": 3.7600550651550293, "learning_rate": 1.9817144433375138e-05, "loss": 1.5525, "step": 27474 }, { "epoch": 0.36, "grad_norm": 3.856203556060791, "learning_rate": 1.9817124429952958e-05, "loss": 2.0443, "step": 27475 }, { "epoch": 0.36, "grad_norm": 3.698195695877075, "learning_rate": 1.9817104425446802e-05, "loss": 1.7984, "step": 27476 }, { "epoch": 0.36, "grad_norm": 3.923764705657959, "learning_rate": 1.981708441985667e-05, "loss": 1.8459, "step": 27477 }, { "epoch": 0.36, "grad_norm": 3.7045319080352783, "learning_rate": 1.981706441318257e-05, "loss": 2.0491, "step": 27478 }, { "epoch": 0.36, "grad_norm": 4.594089984893799, "learning_rate": 1.9817044405424494e-05, "loss": 2.4951, "step": 27479 }, { "epoch": 0.36, "grad_norm": 3.7052223682403564, "learning_rate": 1.9817024396582448e-05, "loss": 2.0919, "step": 27480 }, { "epoch": 0.36, "grad_norm": 4.002490997314453, "learning_rate": 1.981700438665644e-05, "loss": 2.1487, "step": 27481 }, { "epoch": 0.36, "grad_norm": 3.593512535095215, "learning_rate": 1.9816984375646467e-05, "loss": 2.0603, "step": 27482 }, { "epoch": 0.36, "grad_norm": 4.012092113494873, "learning_rate": 1.9816964363552533e-05, "loss": 2.5126, "step": 27483 }, { "epoch": 0.36, "grad_norm": 4.6808905601501465, "learning_rate": 1.981694435037464e-05, "loss": 2.3726, "step": 27484 }, { "epoch": 0.36, "grad_norm": 3.9573891162872314, "learning_rate": 1.981692433611279e-05, "loss": 1.772, "step": 27485 }, { "epoch": 0.36, "grad_norm": 4.238359451293945, "learning_rate": 1.9816904320766987e-05, "loss": 2.6118, "step": 27486 }, { "epoch": 0.36, "grad_norm": 4.0488362312316895, "learning_rate": 1.9816884304337226e-05, "loss": 2.174, "step": 27487 }, { "epoch": 0.36, "grad_norm": 4.398979663848877, "learning_rate": 1.9816864286823518e-05, "loss": 2.1481, "step": 27488 }, { "epoch": 0.36, "grad_norm": 3.9670534133911133, "learning_rate": 1.9816844268225858e-05, "loss": 1.7467, "step": 27489 }, { "epoch": 0.36, "grad_norm": 3.579768657684326, "learning_rate": 1.9816824248544256e-05, "loss": 1.7844, "step": 27490 }, { "epoch": 0.36, "grad_norm": 3.7692792415618896, "learning_rate": 1.9816804227778707e-05, "loss": 1.7189, "step": 27491 }, { "epoch": 0.36, "grad_norm": 4.100662708282471, "learning_rate": 1.981678420592922e-05, "loss": 2.3705, "step": 27492 }, { "epoch": 0.36, "grad_norm": 3.731038808822632, "learning_rate": 1.9816764182995788e-05, "loss": 1.7555, "step": 27493 }, { "epoch": 0.36, "grad_norm": 4.499124050140381, "learning_rate": 1.981674415897842e-05, "loss": 2.4265, "step": 27494 }, { "epoch": 0.36, "grad_norm": 4.168710708618164, "learning_rate": 1.9816724133877123e-05, "loss": 2.0252, "step": 27495 }, { "epoch": 0.36, "grad_norm": 4.617830753326416, "learning_rate": 1.9816704107691888e-05, "loss": 2.6229, "step": 27496 }, { "epoch": 0.36, "grad_norm": 3.5211124420166016, "learning_rate": 1.9816684080422725e-05, "loss": 1.7252, "step": 27497 }, { "epoch": 0.36, "grad_norm": 3.7606582641601562, "learning_rate": 1.981666405206963e-05, "loss": 1.8104, "step": 27498 }, { "epoch": 0.36, "grad_norm": 4.079957962036133, "learning_rate": 1.9816644022632608e-05, "loss": 1.8394, "step": 27499 }, { "epoch": 0.36, "grad_norm": 5.038482189178467, "learning_rate": 1.9816623992111665e-05, "loss": 2.565, "step": 27500 }, { "epoch": 0.36, "grad_norm": 3.790531873703003, "learning_rate": 1.98166039605068e-05, "loss": 1.8938, "step": 27501 }, { "epoch": 0.36, "grad_norm": 3.8748691082000732, "learning_rate": 1.9816583927818016e-05, "loss": 2.2325, "step": 27502 }, { "epoch": 0.36, "grad_norm": 3.6813130378723145, "learning_rate": 1.9816563894045314e-05, "loss": 2.198, "step": 27503 }, { "epoch": 0.36, "grad_norm": 4.0283308029174805, "learning_rate": 1.9816543859188697e-05, "loss": 2.2321, "step": 27504 }, { "epoch": 0.36, "grad_norm": 4.365664958953857, "learning_rate": 1.9816523823248162e-05, "loss": 1.9266, "step": 27505 }, { "epoch": 0.36, "grad_norm": 4.250714302062988, "learning_rate": 1.9816503786223724e-05, "loss": 2.209, "step": 27506 }, { "epoch": 0.36, "grad_norm": 3.599696636199951, "learning_rate": 1.9816483748115375e-05, "loss": 2.1513, "step": 27507 }, { "epoch": 0.36, "grad_norm": 3.6442551612854004, "learning_rate": 1.981646370892312e-05, "loss": 1.7807, "step": 27508 }, { "epoch": 0.36, "grad_norm": 3.64634108543396, "learning_rate": 1.9816443668646958e-05, "loss": 1.5934, "step": 27509 }, { "epoch": 0.36, "grad_norm": 3.5990593433380127, "learning_rate": 1.9816423627286893e-05, "loss": 1.7815, "step": 27510 }, { "epoch": 0.36, "grad_norm": 3.421462297439575, "learning_rate": 1.981640358484293e-05, "loss": 1.5275, "step": 27511 }, { "epoch": 0.36, "grad_norm": 3.860041618347168, "learning_rate": 1.9816383541315073e-05, "loss": 2.2627, "step": 27512 }, { "epoch": 0.36, "grad_norm": 3.641117811203003, "learning_rate": 1.981636349670332e-05, "loss": 1.8343, "step": 27513 }, { "epoch": 0.36, "grad_norm": 4.086450576782227, "learning_rate": 1.981634345100767e-05, "loss": 2.1313, "step": 27514 }, { "epoch": 0.36, "grad_norm": 4.050398826599121, "learning_rate": 1.981632340422813e-05, "loss": 2.3507, "step": 27515 }, { "epoch": 0.36, "grad_norm": 3.9380688667297363, "learning_rate": 1.9816303356364705e-05, "loss": 1.659, "step": 27516 }, { "epoch": 0.36, "grad_norm": 3.4660439491271973, "learning_rate": 1.981628330741739e-05, "loss": 1.729, "step": 27517 }, { "epoch": 0.36, "grad_norm": 4.092050075531006, "learning_rate": 1.981626325738619e-05, "loss": 2.3775, "step": 27518 }, { "epoch": 0.36, "grad_norm": 3.9731991291046143, "learning_rate": 1.9816243206271113e-05, "loss": 2.0706, "step": 27519 }, { "epoch": 0.36, "grad_norm": 4.034520149230957, "learning_rate": 1.9816223154072154e-05, "loss": 2.0854, "step": 27520 }, { "epoch": 0.36, "grad_norm": 3.930966854095459, "learning_rate": 1.9816203100789314e-05, "loss": 2.0737, "step": 27521 }, { "epoch": 0.36, "grad_norm": 3.651772975921631, "learning_rate": 1.98161830464226e-05, "loss": 1.8108, "step": 27522 }, { "epoch": 0.36, "grad_norm": 3.6457977294921875, "learning_rate": 1.9816162990972015e-05, "loss": 2.0163, "step": 27523 }, { "epoch": 0.36, "grad_norm": 3.537325143814087, "learning_rate": 1.9816142934437562e-05, "loss": 1.7969, "step": 27524 }, { "epoch": 0.36, "grad_norm": 3.645918607711792, "learning_rate": 1.9816122876819235e-05, "loss": 1.8362, "step": 27525 }, { "epoch": 0.36, "grad_norm": 3.9897401332855225, "learning_rate": 1.9816102818117042e-05, "loss": 2.0612, "step": 27526 }, { "epoch": 0.36, "grad_norm": 3.7343533039093018, "learning_rate": 1.9816082758330985e-05, "loss": 1.9982, "step": 27527 }, { "epoch": 0.36, "grad_norm": 3.585935115814209, "learning_rate": 1.981606269746107e-05, "loss": 1.6775, "step": 27528 }, { "epoch": 0.36, "grad_norm": 4.171779155731201, "learning_rate": 1.981604263550729e-05, "loss": 2.1462, "step": 27529 }, { "epoch": 0.36, "grad_norm": 4.009692668914795, "learning_rate": 1.981602257246965e-05, "loss": 1.8036, "step": 27530 }, { "epoch": 0.36, "grad_norm": 3.8597464561462402, "learning_rate": 1.9816002508348162e-05, "loss": 1.9937, "step": 27531 }, { "epoch": 0.36, "grad_norm": 4.710925579071045, "learning_rate": 1.981598244314282e-05, "loss": 2.0076, "step": 27532 }, { "epoch": 0.36, "grad_norm": 3.917513132095337, "learning_rate": 1.9815962376853622e-05, "loss": 2.4536, "step": 27533 }, { "epoch": 0.36, "grad_norm": 4.08571720123291, "learning_rate": 1.981594230948058e-05, "loss": 2.3325, "step": 27534 }, { "epoch": 0.36, "grad_norm": 3.805788278579712, "learning_rate": 1.9815922241023687e-05, "loss": 2.0442, "step": 27535 }, { "epoch": 0.36, "grad_norm": 3.8418657779693604, "learning_rate": 1.9815902171482952e-05, "loss": 2.0915, "step": 27536 }, { "epoch": 0.36, "grad_norm": 4.080912113189697, "learning_rate": 1.9815882100858373e-05, "loss": 2.136, "step": 27537 }, { "epoch": 0.36, "grad_norm": 3.774658441543579, "learning_rate": 1.981586202914996e-05, "loss": 2.0345, "step": 27538 }, { "epoch": 0.36, "grad_norm": 3.7051968574523926, "learning_rate": 1.98158419563577e-05, "loss": 1.8413, "step": 27539 }, { "epoch": 0.36, "grad_norm": 4.0213727951049805, "learning_rate": 1.9815821882481613e-05, "loss": 1.9911, "step": 27540 }, { "epoch": 0.36, "grad_norm": 4.230111122131348, "learning_rate": 1.9815801807521686e-05, "loss": 2.2679, "step": 27541 }, { "epoch": 0.36, "grad_norm": 4.536388874053955, "learning_rate": 1.9815781731477933e-05, "loss": 1.9918, "step": 27542 }, { "epoch": 0.36, "grad_norm": 3.896233558654785, "learning_rate": 1.981576165435035e-05, "loss": 1.9684, "step": 27543 }, { "epoch": 0.36, "grad_norm": 3.499906301498413, "learning_rate": 1.981574157613894e-05, "loss": 1.7184, "step": 27544 }, { "epoch": 0.36, "grad_norm": 4.396976947784424, "learning_rate": 1.9815721496843704e-05, "loss": 2.5924, "step": 27545 }, { "epoch": 0.36, "grad_norm": 3.684619188308716, "learning_rate": 1.9815701416464647e-05, "loss": 1.9398, "step": 27546 }, { "epoch": 0.36, "grad_norm": 3.531611919403076, "learning_rate": 1.981568133500177e-05, "loss": 1.5829, "step": 27547 }, { "epoch": 0.36, "grad_norm": 3.454007387161255, "learning_rate": 1.9815661252455077e-05, "loss": 1.9377, "step": 27548 }, { "epoch": 0.36, "grad_norm": 4.186891555786133, "learning_rate": 1.981564116882457e-05, "loss": 1.821, "step": 27549 }, { "epoch": 0.36, "grad_norm": 3.9639153480529785, "learning_rate": 1.9815621084110244e-05, "loss": 2.3024, "step": 27550 }, { "epoch": 0.36, "grad_norm": 3.9528167247772217, "learning_rate": 1.981560099831211e-05, "loss": 1.8775, "step": 27551 }, { "epoch": 0.36, "grad_norm": 3.817584991455078, "learning_rate": 1.9815580911430164e-05, "loss": 1.9465, "step": 27552 }, { "epoch": 0.36, "grad_norm": 3.9322588443756104, "learning_rate": 1.9815560823464416e-05, "loss": 2.1117, "step": 27553 }, { "epoch": 0.36, "grad_norm": 4.0344462394714355, "learning_rate": 1.981554073441486e-05, "loss": 2.352, "step": 27554 }, { "epoch": 0.36, "grad_norm": 4.072168827056885, "learning_rate": 1.9815520644281505e-05, "loss": 1.9008, "step": 27555 }, { "epoch": 0.36, "grad_norm": 3.5135750770568848, "learning_rate": 1.981550055306435e-05, "loss": 2.016, "step": 27556 }, { "epoch": 0.36, "grad_norm": 3.750282049179077, "learning_rate": 1.9815480460763394e-05, "loss": 1.8035, "step": 27557 }, { "epoch": 0.36, "grad_norm": 3.99873948097229, "learning_rate": 1.9815460367378644e-05, "loss": 2.2451, "step": 27558 }, { "epoch": 0.36, "grad_norm": 3.6997954845428467, "learning_rate": 1.9815440272910102e-05, "loss": 1.8608, "step": 27559 }, { "epoch": 0.36, "grad_norm": 3.6053011417388916, "learning_rate": 1.9815420177357767e-05, "loss": 1.875, "step": 27560 }, { "epoch": 0.36, "grad_norm": 4.210494518280029, "learning_rate": 1.9815400080721644e-05, "loss": 2.756, "step": 27561 }, { "epoch": 0.36, "grad_norm": 4.193328380584717, "learning_rate": 1.981537998300173e-05, "loss": 2.1195, "step": 27562 }, { "epoch": 0.36, "grad_norm": 4.190094470977783, "learning_rate": 1.981535988419804e-05, "loss": 1.9573, "step": 27563 }, { "epoch": 0.36, "grad_norm": 4.059311389923096, "learning_rate": 1.981533978431056e-05, "loss": 2.0124, "step": 27564 }, { "epoch": 0.36, "grad_norm": 3.692902088165283, "learning_rate": 1.9815319683339305e-05, "loss": 1.9632, "step": 27565 }, { "epoch": 0.36, "grad_norm": 3.5969231128692627, "learning_rate": 1.9815299581284272e-05, "loss": 2.2042, "step": 27566 }, { "epoch": 0.36, "grad_norm": 3.902240514755249, "learning_rate": 1.981527947814546e-05, "loss": 2.0248, "step": 27567 }, { "epoch": 0.36, "grad_norm": 4.043280601501465, "learning_rate": 1.981525937392288e-05, "loss": 2.1263, "step": 27568 }, { "epoch": 0.36, "grad_norm": 3.784257650375366, "learning_rate": 1.9815239268616522e-05, "loss": 1.9682, "step": 27569 }, { "epoch": 0.36, "grad_norm": 3.7366132736206055, "learning_rate": 1.98152191622264e-05, "loss": 1.9834, "step": 27570 }, { "epoch": 0.36, "grad_norm": 3.5370900630950928, "learning_rate": 1.9815199054752508e-05, "loss": 1.8347, "step": 27571 }, { "epoch": 0.36, "grad_norm": 3.7173798084259033, "learning_rate": 1.9815178946194857e-05, "loss": 2.2693, "step": 27572 }, { "epoch": 0.36, "grad_norm": 3.92832612991333, "learning_rate": 1.9815158836553437e-05, "loss": 2.1262, "step": 27573 }, { "epoch": 0.36, "grad_norm": 3.9226651191711426, "learning_rate": 1.981513872582826e-05, "loss": 2.1422, "step": 27574 }, { "epoch": 0.36, "grad_norm": 3.8761990070343018, "learning_rate": 1.9815118614019325e-05, "loss": 2.2627, "step": 27575 }, { "epoch": 0.36, "grad_norm": 3.903740882873535, "learning_rate": 1.9815098501126634e-05, "loss": 2.1629, "step": 27576 }, { "epoch": 0.36, "grad_norm": 4.1094651222229, "learning_rate": 1.981507838715019e-05, "loss": 2.1073, "step": 27577 }, { "epoch": 0.36, "grad_norm": 3.7269678115844727, "learning_rate": 1.9815058272089996e-05, "loss": 1.9868, "step": 27578 }, { "epoch": 0.36, "grad_norm": 4.437435150146484, "learning_rate": 1.981503815594605e-05, "loss": 2.1853, "step": 27579 }, { "epoch": 0.36, "grad_norm": 4.05794095993042, "learning_rate": 1.9815018038718362e-05, "loss": 1.9505, "step": 27580 }, { "epoch": 0.36, "grad_norm": 3.9711196422576904, "learning_rate": 1.9814997920406923e-05, "loss": 1.9194, "step": 27581 }, { "epoch": 0.36, "grad_norm": 3.474855899810791, "learning_rate": 1.9814977801011746e-05, "loss": 1.6453, "step": 27582 }, { "epoch": 0.36, "grad_norm": 3.859670877456665, "learning_rate": 1.9814957680532828e-05, "loss": 2.1394, "step": 27583 }, { "epoch": 0.36, "grad_norm": 4.389676570892334, "learning_rate": 1.9814937558970174e-05, "loss": 2.8727, "step": 27584 }, { "epoch": 0.36, "grad_norm": 3.768378734588623, "learning_rate": 1.9814917436323783e-05, "loss": 2.2902, "step": 27585 }, { "epoch": 0.36, "grad_norm": 3.4451165199279785, "learning_rate": 1.981489731259366e-05, "loss": 1.7634, "step": 27586 }, { "epoch": 0.36, "grad_norm": 4.021571159362793, "learning_rate": 1.98148771877798e-05, "loss": 2.0563, "step": 27587 }, { "epoch": 0.36, "grad_norm": 3.9258275032043457, "learning_rate": 1.981485706188222e-05, "loss": 1.812, "step": 27588 }, { "epoch": 0.36, "grad_norm": 3.3483357429504395, "learning_rate": 1.9814836934900907e-05, "loss": 1.7768, "step": 27589 }, { "epoch": 0.36, "grad_norm": 3.734158515930176, "learning_rate": 1.981481680683587e-05, "loss": 2.0025, "step": 27590 }, { "epoch": 0.36, "grad_norm": 4.140298366546631, "learning_rate": 1.9814796677687115e-05, "loss": 2.1659, "step": 27591 }, { "epoch": 0.36, "grad_norm": 3.6026344299316406, "learning_rate": 1.9814776547454632e-05, "loss": 1.9318, "step": 27592 }, { "epoch": 0.36, "grad_norm": 3.560760974884033, "learning_rate": 1.9814756416138436e-05, "loss": 1.91, "step": 27593 }, { "epoch": 0.36, "grad_norm": 3.913271903991699, "learning_rate": 1.9814736283738524e-05, "loss": 2.0974, "step": 27594 }, { "epoch": 0.36, "grad_norm": 4.012119293212891, "learning_rate": 1.9814716150254902e-05, "loss": 2.3537, "step": 27595 }, { "epoch": 0.36, "grad_norm": 3.9030187129974365, "learning_rate": 1.9814696015687566e-05, "loss": 2.1, "step": 27596 }, { "epoch": 0.36, "grad_norm": 3.690459966659546, "learning_rate": 1.981467588003652e-05, "loss": 2.0132, "step": 27597 }, { "epoch": 0.36, "grad_norm": 4.113909721374512, "learning_rate": 1.9814655743301768e-05, "loss": 2.1842, "step": 27598 }, { "epoch": 0.36, "grad_norm": 3.7853808403015137, "learning_rate": 1.9814635605483312e-05, "loss": 1.8277, "step": 27599 }, { "epoch": 0.36, "grad_norm": 4.294497013092041, "learning_rate": 1.9814615466581153e-05, "loss": 2.2117, "step": 27600 }, { "epoch": 0.36, "grad_norm": 3.5162363052368164, "learning_rate": 1.9814595326595292e-05, "loss": 1.5472, "step": 27601 }, { "epoch": 0.36, "grad_norm": 4.156948566436768, "learning_rate": 1.9814575185525738e-05, "loss": 2.0689, "step": 27602 }, { "epoch": 0.36, "grad_norm": 3.8745226860046387, "learning_rate": 1.9814555043372487e-05, "loss": 1.9933, "step": 27603 }, { "epoch": 0.36, "grad_norm": 4.585824966430664, "learning_rate": 1.981453490013554e-05, "loss": 2.3952, "step": 27604 }, { "epoch": 0.36, "grad_norm": 3.980307102203369, "learning_rate": 1.98145147558149e-05, "loss": 1.8435, "step": 27605 }, { "epoch": 0.36, "grad_norm": 4.077503681182861, "learning_rate": 1.9814494610410575e-05, "loss": 2.1715, "step": 27606 }, { "epoch": 0.36, "grad_norm": 4.056481838226318, "learning_rate": 1.9814474463922565e-05, "loss": 1.6638, "step": 27607 }, { "epoch": 0.36, "grad_norm": 3.8088560104370117, "learning_rate": 1.9814454316350867e-05, "loss": 1.9335, "step": 27608 }, { "epoch": 0.36, "grad_norm": 3.819265365600586, "learning_rate": 1.981443416769549e-05, "loss": 1.843, "step": 27609 }, { "epoch": 0.36, "grad_norm": 3.7937803268432617, "learning_rate": 1.981441401795643e-05, "loss": 1.7737, "step": 27610 }, { "epoch": 0.36, "grad_norm": 4.041507720947266, "learning_rate": 1.9814393867133692e-05, "loss": 2.3526, "step": 27611 }, { "epoch": 0.36, "grad_norm": 3.8363943099975586, "learning_rate": 1.981437371522728e-05, "loss": 1.8804, "step": 27612 }, { "epoch": 0.36, "grad_norm": 4.299401760101318, "learning_rate": 1.9814353562237193e-05, "loss": 2.2221, "step": 27613 }, { "epoch": 0.36, "grad_norm": 3.3714189529418945, "learning_rate": 1.9814333408163437e-05, "loss": 1.6408, "step": 27614 }, { "epoch": 0.36, "grad_norm": 3.9075756072998047, "learning_rate": 1.981431325300601e-05, "loss": 2.2223, "step": 27615 }, { "epoch": 0.36, "grad_norm": 4.100164890289307, "learning_rate": 1.9814293096764917e-05, "loss": 1.8452, "step": 27616 }, { "epoch": 0.36, "grad_norm": 4.041095733642578, "learning_rate": 1.981427293944016e-05, "loss": 1.905, "step": 27617 }, { "epoch": 0.36, "grad_norm": 3.5822103023529053, "learning_rate": 1.9814252781031743e-05, "loss": 1.8051, "step": 27618 }, { "epoch": 0.36, "grad_norm": 3.5141985416412354, "learning_rate": 1.9814232621539663e-05, "loss": 1.7302, "step": 27619 }, { "epoch": 0.36, "grad_norm": 3.660423517227173, "learning_rate": 1.9814212460963922e-05, "loss": 2.0216, "step": 27620 }, { "epoch": 0.36, "grad_norm": 4.2414398193359375, "learning_rate": 1.9814192299304532e-05, "loss": 1.9001, "step": 27621 }, { "epoch": 0.36, "grad_norm": 4.211598873138428, "learning_rate": 1.9814172136561485e-05, "loss": 2.4671, "step": 27622 }, { "epoch": 0.36, "grad_norm": 3.9736440181732178, "learning_rate": 1.9814151972734788e-05, "loss": 2.1261, "step": 27623 }, { "epoch": 0.36, "grad_norm": 3.6930277347564697, "learning_rate": 1.981413180782444e-05, "loss": 1.9068, "step": 27624 }, { "epoch": 0.36, "grad_norm": 3.522366523742676, "learning_rate": 1.9814111641830448e-05, "loss": 1.7178, "step": 27625 }, { "epoch": 0.36, "grad_norm": 3.7839856147766113, "learning_rate": 1.981409147475281e-05, "loss": 2.3089, "step": 27626 }, { "epoch": 0.36, "grad_norm": 3.7490391731262207, "learning_rate": 1.9814071306591532e-05, "loss": 1.6963, "step": 27627 }, { "epoch": 0.36, "grad_norm": 3.3827431201934814, "learning_rate": 1.9814051137346616e-05, "loss": 1.8315, "step": 27628 }, { "epoch": 0.36, "grad_norm": 3.9386050701141357, "learning_rate": 1.9814030967018057e-05, "loss": 2.0857, "step": 27629 }, { "epoch": 0.36, "grad_norm": 4.401181221008301, "learning_rate": 1.981401079560586e-05, "loss": 2.2949, "step": 27630 }, { "epoch": 0.36, "grad_norm": 4.381798267364502, "learning_rate": 1.9813990623110036e-05, "loss": 2.4999, "step": 27631 }, { "epoch": 0.36, "grad_norm": 3.7233145236968994, "learning_rate": 1.981397044953058e-05, "loss": 1.5726, "step": 27632 }, { "epoch": 0.36, "grad_norm": 3.267350673675537, "learning_rate": 1.9813950274867494e-05, "loss": 1.2861, "step": 27633 }, { "epoch": 0.36, "grad_norm": 4.161404132843018, "learning_rate": 1.981393009912078e-05, "loss": 2.4519, "step": 27634 }, { "epoch": 0.36, "grad_norm": 3.953932285308838, "learning_rate": 1.9813909922290447e-05, "loss": 1.8907, "step": 27635 }, { "epoch": 0.36, "grad_norm": 3.744936227798462, "learning_rate": 1.9813889744376488e-05, "loss": 1.8512, "step": 27636 }, { "epoch": 0.36, "grad_norm": 4.257767200469971, "learning_rate": 1.981386956537891e-05, "loss": 1.8467, "step": 27637 }, { "epoch": 0.36, "grad_norm": 3.773935556411743, "learning_rate": 1.981384938529771e-05, "loss": 2.0797, "step": 27638 }, { "epoch": 0.36, "grad_norm": 4.0520243644714355, "learning_rate": 1.9813829204132903e-05, "loss": 2.1804, "step": 27639 }, { "epoch": 0.36, "grad_norm": 4.019687652587891, "learning_rate": 1.9813809021884473e-05, "loss": 2.2396, "step": 27640 }, { "epoch": 0.36, "grad_norm": 4.2932233810424805, "learning_rate": 1.981378883855244e-05, "loss": 1.9965, "step": 27641 }, { "epoch": 0.36, "grad_norm": 3.584907293319702, "learning_rate": 1.9813768654136795e-05, "loss": 1.9079, "step": 27642 }, { "epoch": 0.36, "grad_norm": 3.7960197925567627, "learning_rate": 1.981374846863754e-05, "loss": 1.9858, "step": 27643 }, { "epoch": 0.36, "grad_norm": 4.233276844024658, "learning_rate": 1.9813728282054687e-05, "loss": 2.3539, "step": 27644 }, { "epoch": 0.36, "grad_norm": 4.183286190032959, "learning_rate": 1.981370809438823e-05, "loss": 2.1338, "step": 27645 }, { "epoch": 0.36, "grad_norm": 4.209591388702393, "learning_rate": 1.9813687905638172e-05, "loss": 1.8092, "step": 27646 }, { "epoch": 0.36, "grad_norm": 3.9649717807769775, "learning_rate": 1.9813667715804517e-05, "loss": 1.6524, "step": 27647 }, { "epoch": 0.36, "grad_norm": 3.8906004428863525, "learning_rate": 1.9813647524887264e-05, "loss": 2.2146, "step": 27648 }, { "epoch": 0.36, "grad_norm": 3.806210994720459, "learning_rate": 1.981362733288642e-05, "loss": 2.1439, "step": 27649 }, { "epoch": 0.36, "grad_norm": 4.008014678955078, "learning_rate": 1.9813607139801984e-05, "loss": 2.116, "step": 27650 }, { "epoch": 0.36, "grad_norm": 3.4574756622314453, "learning_rate": 1.981358694563396e-05, "loss": 1.7291, "step": 27651 }, { "epoch": 0.36, "grad_norm": 3.872846841812134, "learning_rate": 1.981356675038235e-05, "loss": 2.363, "step": 27652 }, { "epoch": 0.36, "grad_norm": 3.813687324523926, "learning_rate": 1.9813546554047156e-05, "loss": 1.9355, "step": 27653 }, { "epoch": 0.36, "grad_norm": 3.6379969120025635, "learning_rate": 1.981352635662838e-05, "loss": 1.71, "step": 27654 }, { "epoch": 0.36, "grad_norm": 4.059844017028809, "learning_rate": 1.9813506158126024e-05, "loss": 2.1887, "step": 27655 }, { "epoch": 0.36, "grad_norm": 4.0779852867126465, "learning_rate": 1.981348595854009e-05, "loss": 2.3852, "step": 27656 }, { "epoch": 0.36, "grad_norm": 3.883622646331787, "learning_rate": 1.981346575787058e-05, "loss": 1.8449, "step": 27657 }, { "epoch": 0.36, "grad_norm": 4.403926372528076, "learning_rate": 1.9813445556117498e-05, "loss": 2.5417, "step": 27658 }, { "epoch": 0.36, "grad_norm": 4.315877437591553, "learning_rate": 1.9813425353280845e-05, "loss": 2.3794, "step": 27659 }, { "epoch": 0.36, "grad_norm": 3.8614673614501953, "learning_rate": 1.9813405149360622e-05, "loss": 2.1983, "step": 27660 }, { "epoch": 0.36, "grad_norm": 3.9669296741485596, "learning_rate": 1.9813384944356835e-05, "loss": 2.0313, "step": 27661 }, { "epoch": 0.36, "grad_norm": 4.381324768066406, "learning_rate": 1.9813364738269482e-05, "loss": 2.2423, "step": 27662 }, { "epoch": 0.36, "grad_norm": 4.095590591430664, "learning_rate": 1.9813344531098568e-05, "loss": 2.2467, "step": 27663 }, { "epoch": 0.36, "grad_norm": 4.09067964553833, "learning_rate": 1.9813324322844095e-05, "loss": 1.9501, "step": 27664 }, { "epoch": 0.36, "grad_norm": 4.263105869293213, "learning_rate": 1.9813304113506064e-05, "loss": 2.6599, "step": 27665 }, { "epoch": 0.36, "grad_norm": 4.212759017944336, "learning_rate": 1.9813283903084474e-05, "loss": 2.013, "step": 27666 }, { "epoch": 0.36, "grad_norm": 4.196096420288086, "learning_rate": 1.9813263691579336e-05, "loss": 2.0112, "step": 27667 }, { "epoch": 0.36, "grad_norm": 4.24513053894043, "learning_rate": 1.9813243478990646e-05, "loss": 2.1766, "step": 27668 }, { "epoch": 0.36, "grad_norm": 3.9903903007507324, "learning_rate": 1.9813223265318405e-05, "loss": 2.0489, "step": 27669 }, { "epoch": 0.36, "grad_norm": 3.9888663291931152, "learning_rate": 1.981320305056262e-05, "loss": 2.1635, "step": 27670 }, { "epoch": 0.36, "grad_norm": 3.595890760421753, "learning_rate": 1.981318283472329e-05, "loss": 1.6106, "step": 27671 }, { "epoch": 0.36, "grad_norm": 4.07939338684082, "learning_rate": 1.981316261780042e-05, "loss": 2.1754, "step": 27672 }, { "epoch": 0.36, "grad_norm": 4.021485805511475, "learning_rate": 1.9813142399794008e-05, "loss": 1.8929, "step": 27673 }, { "epoch": 0.36, "grad_norm": 3.6853888034820557, "learning_rate": 1.981312218070406e-05, "loss": 1.8818, "step": 27674 }, { "epoch": 0.36, "grad_norm": 3.822726249694824, "learning_rate": 1.9813101960530576e-05, "loss": 2.1171, "step": 27675 }, { "epoch": 0.36, "grad_norm": 3.338672637939453, "learning_rate": 1.9813081739273558e-05, "loss": 1.7528, "step": 27676 }, { "epoch": 0.36, "grad_norm": 3.47885799407959, "learning_rate": 1.9813061516933012e-05, "loss": 1.7585, "step": 27677 }, { "epoch": 0.36, "grad_norm": 4.127649307250977, "learning_rate": 1.9813041293508934e-05, "loss": 1.9096, "step": 27678 }, { "epoch": 0.36, "grad_norm": 3.8920321464538574, "learning_rate": 1.9813021069001332e-05, "loss": 2.2202, "step": 27679 }, { "epoch": 0.36, "grad_norm": 4.308099746704102, "learning_rate": 1.9813000843410206e-05, "loss": 1.9656, "step": 27680 }, { "epoch": 0.36, "grad_norm": 4.1551513671875, "learning_rate": 1.9812980616735558e-05, "loss": 1.9973, "step": 27681 }, { "epoch": 0.36, "grad_norm": 3.8284378051757812, "learning_rate": 1.9812960388977393e-05, "loss": 1.7586, "step": 27682 }, { "epoch": 0.36, "grad_norm": 3.5727696418762207, "learning_rate": 1.9812940160135706e-05, "loss": 1.8725, "step": 27683 }, { "epoch": 0.36, "grad_norm": 4.454657077789307, "learning_rate": 1.981291993021051e-05, "loss": 2.1425, "step": 27684 }, { "epoch": 0.36, "grad_norm": 3.7302656173706055, "learning_rate": 1.9812899699201797e-05, "loss": 1.8116, "step": 27685 }, { "epoch": 0.36, "grad_norm": 3.990549325942993, "learning_rate": 1.981287946710957e-05, "loss": 2.1417, "step": 27686 }, { "epoch": 0.36, "grad_norm": 3.781085252761841, "learning_rate": 1.981285923393384e-05, "loss": 2.1227, "step": 27687 }, { "epoch": 0.36, "grad_norm": 3.792487382888794, "learning_rate": 1.98128389996746e-05, "loss": 1.9249, "step": 27688 }, { "epoch": 0.36, "grad_norm": 4.294372081756592, "learning_rate": 1.981281876433186e-05, "loss": 2.2168, "step": 27689 }, { "epoch": 0.36, "grad_norm": 3.9580397605895996, "learning_rate": 1.9812798527905616e-05, "loss": 2.1584, "step": 27690 }, { "epoch": 0.36, "grad_norm": 3.6730873584747314, "learning_rate": 1.9812778290395872e-05, "loss": 1.7943, "step": 27691 }, { "epoch": 0.36, "grad_norm": 3.5697453022003174, "learning_rate": 1.9812758051802634e-05, "loss": 2.0698, "step": 27692 }, { "epoch": 0.36, "grad_norm": 3.9490957260131836, "learning_rate": 1.98127378121259e-05, "loss": 1.9913, "step": 27693 }, { "epoch": 0.36, "grad_norm": 3.3988850116729736, "learning_rate": 1.9812717571365672e-05, "loss": 1.6416, "step": 27694 }, { "epoch": 0.36, "grad_norm": 4.498408317565918, "learning_rate": 1.9812697329521952e-05, "loss": 2.3473, "step": 27695 }, { "epoch": 0.36, "grad_norm": 3.819211006164551, "learning_rate": 1.9812677086594748e-05, "loss": 1.9663, "step": 27696 }, { "epoch": 0.36, "grad_norm": 3.649815559387207, "learning_rate": 1.9812656842584056e-05, "loss": 1.9026, "step": 27697 }, { "epoch": 0.36, "grad_norm": 3.369713306427002, "learning_rate": 1.9812636597489878e-05, "loss": 1.4056, "step": 27698 }, { "epoch": 0.36, "grad_norm": 3.7731919288635254, "learning_rate": 1.981261635131222e-05, "loss": 2.3235, "step": 27699 }, { "epoch": 0.36, "grad_norm": 3.577707529067993, "learning_rate": 1.9812596104051086e-05, "loss": 1.6675, "step": 27700 }, { "epoch": 0.36, "grad_norm": 4.286660194396973, "learning_rate": 1.9812575855706472e-05, "loss": 2.063, "step": 27701 }, { "epoch": 0.36, "grad_norm": 4.026738166809082, "learning_rate": 1.981255560627838e-05, "loss": 1.9324, "step": 27702 }, { "epoch": 0.36, "grad_norm": 3.4554073810577393, "learning_rate": 1.981253535576682e-05, "loss": 1.7292, "step": 27703 }, { "epoch": 0.36, "grad_norm": 3.840179681777954, "learning_rate": 1.981251510417179e-05, "loss": 2.2496, "step": 27704 }, { "epoch": 0.36, "grad_norm": 3.9628868103027344, "learning_rate": 1.9812494851493285e-05, "loss": 2.4929, "step": 27705 }, { "epoch": 0.36, "grad_norm": 3.8634307384490967, "learning_rate": 1.981247459773132e-05, "loss": 2.0555, "step": 27706 }, { "epoch": 0.36, "grad_norm": 3.9302735328674316, "learning_rate": 1.981245434288589e-05, "loss": 2.0252, "step": 27707 }, { "epoch": 0.36, "grad_norm": 3.9524333477020264, "learning_rate": 1.9812434086957e-05, "loss": 2.1714, "step": 27708 }, { "epoch": 0.36, "grad_norm": 4.69701623916626, "learning_rate": 1.981241382994465e-05, "loss": 2.8694, "step": 27709 }, { "epoch": 0.36, "grad_norm": 4.334181785583496, "learning_rate": 1.981239357184884e-05, "loss": 2.6769, "step": 27710 }, { "epoch": 0.36, "grad_norm": 3.844019889831543, "learning_rate": 1.9812373312669578e-05, "loss": 2.019, "step": 27711 }, { "epoch": 0.36, "grad_norm": 3.8151841163635254, "learning_rate": 1.9812353052406865e-05, "loss": 2.3192, "step": 27712 }, { "epoch": 0.36, "grad_norm": 4.365219593048096, "learning_rate": 1.98123327910607e-05, "loss": 2.0378, "step": 27713 }, { "epoch": 0.36, "grad_norm": 3.72888445854187, "learning_rate": 1.9812312528631087e-05, "loss": 1.7716, "step": 27714 }, { "epoch": 0.36, "grad_norm": 4.020360946655273, "learning_rate": 1.9812292265118028e-05, "loss": 2.1739, "step": 27715 }, { "epoch": 0.36, "grad_norm": 3.72283935546875, "learning_rate": 1.9812272000521525e-05, "loss": 1.9185, "step": 27716 }, { "epoch": 0.36, "grad_norm": 3.854945421218872, "learning_rate": 1.9812251734841583e-05, "loss": 2.0811, "step": 27717 }, { "epoch": 0.36, "grad_norm": 4.5226922035217285, "learning_rate": 1.9812231468078204e-05, "loss": 2.1994, "step": 27718 }, { "epoch": 0.36, "grad_norm": 3.6077308654785156, "learning_rate": 1.9812211200231382e-05, "loss": 1.7302, "step": 27719 }, { "epoch": 0.36, "grad_norm": 4.199674606323242, "learning_rate": 1.9812190931301127e-05, "loss": 2.0252, "step": 27720 }, { "epoch": 0.36, "grad_norm": 3.920168161392212, "learning_rate": 1.9812170661287442e-05, "loss": 2.0022, "step": 27721 }, { "epoch": 0.36, "grad_norm": 4.26131010055542, "learning_rate": 1.9812150390190325e-05, "loss": 2.3594, "step": 27722 }, { "epoch": 0.36, "grad_norm": 3.3976356983184814, "learning_rate": 1.981213011800978e-05, "loss": 1.6546, "step": 27723 }, { "epoch": 0.36, "grad_norm": 3.363359212875366, "learning_rate": 1.981210984474581e-05, "loss": 1.9369, "step": 27724 }, { "epoch": 0.36, "grad_norm": 4.286771774291992, "learning_rate": 1.9812089570398417e-05, "loss": 2.2001, "step": 27725 }, { "epoch": 0.36, "grad_norm": 3.613258123397827, "learning_rate": 1.9812069294967604e-05, "loss": 1.8285, "step": 27726 }, { "epoch": 0.36, "grad_norm": 3.8031342029571533, "learning_rate": 1.981204901845337e-05, "loss": 1.9908, "step": 27727 }, { "epoch": 0.36, "grad_norm": 4.13236141204834, "learning_rate": 1.981202874085572e-05, "loss": 2.101, "step": 27728 }, { "epoch": 0.36, "grad_norm": 3.782534122467041, "learning_rate": 1.9812008462174655e-05, "loss": 1.6972, "step": 27729 }, { "epoch": 0.36, "grad_norm": 3.8708059787750244, "learning_rate": 1.9811988182410178e-05, "loss": 2.3246, "step": 27730 }, { "epoch": 0.36, "grad_norm": 4.072222709655762, "learning_rate": 1.981196790156229e-05, "loss": 2.3179, "step": 27731 }, { "epoch": 0.36, "grad_norm": 4.418169021606445, "learning_rate": 1.9811947619631e-05, "loss": 2.2083, "step": 27732 }, { "epoch": 0.36, "grad_norm": 4.169797420501709, "learning_rate": 1.98119273366163e-05, "loss": 2.1254, "step": 27733 }, { "epoch": 0.36, "grad_norm": 4.067375183105469, "learning_rate": 1.9811907052518196e-05, "loss": 2.2595, "step": 27734 }, { "epoch": 0.36, "grad_norm": 3.82789945602417, "learning_rate": 1.9811886767336692e-05, "loss": 1.9218, "step": 27735 }, { "epoch": 0.36, "grad_norm": 3.6959524154663086, "learning_rate": 1.981186648107179e-05, "loss": 1.8288, "step": 27736 }, { "epoch": 0.36, "grad_norm": 4.1814751625061035, "learning_rate": 1.981184619372349e-05, "loss": 2.0577, "step": 27737 }, { "epoch": 0.36, "grad_norm": 3.9932143688201904, "learning_rate": 1.9811825905291796e-05, "loss": 2.2361, "step": 27738 }, { "epoch": 0.36, "grad_norm": 3.7719597816467285, "learning_rate": 1.9811805615776713e-05, "loss": 1.8778, "step": 27739 }, { "epoch": 0.36, "grad_norm": 3.853454113006592, "learning_rate": 1.9811785325178237e-05, "loss": 2.0713, "step": 27740 }, { "epoch": 0.36, "grad_norm": 3.7720494270324707, "learning_rate": 1.9811765033496372e-05, "loss": 2.382, "step": 27741 }, { "epoch": 0.36, "grad_norm": 3.4349327087402344, "learning_rate": 1.9811744740731125e-05, "loss": 1.7546, "step": 27742 }, { "epoch": 0.36, "grad_norm": 3.380007743835449, "learning_rate": 1.9811724446882495e-05, "loss": 1.8836, "step": 27743 }, { "epoch": 0.36, "grad_norm": 3.593538999557495, "learning_rate": 1.981170415195048e-05, "loss": 2.1105, "step": 27744 }, { "epoch": 0.36, "grad_norm": 4.1270976066589355, "learning_rate": 1.981168385593509e-05, "loss": 2.2102, "step": 27745 }, { "epoch": 0.36, "grad_norm": 3.76916241645813, "learning_rate": 1.9811663558836324e-05, "loss": 2.2001, "step": 27746 }, { "epoch": 0.36, "grad_norm": 3.796328544616699, "learning_rate": 1.9811643260654182e-05, "loss": 1.7741, "step": 27747 }, { "epoch": 0.36, "grad_norm": 3.4771595001220703, "learning_rate": 1.981162296138867e-05, "loss": 1.7354, "step": 27748 }, { "epoch": 0.36, "grad_norm": 3.1984667778015137, "learning_rate": 1.9811602661039786e-05, "loss": 1.6571, "step": 27749 }, { "epoch": 0.36, "grad_norm": 4.462131023406982, "learning_rate": 1.9811582359607535e-05, "loss": 2.6368, "step": 27750 }, { "epoch": 0.36, "grad_norm": 3.9773576259613037, "learning_rate": 1.981156205709192e-05, "loss": 2.2964, "step": 27751 }, { "epoch": 0.36, "grad_norm": 4.740195274353027, "learning_rate": 1.981154175349294e-05, "loss": 2.1877, "step": 27752 }, { "epoch": 0.36, "grad_norm": 3.71632719039917, "learning_rate": 1.9811521448810604e-05, "loss": 1.808, "step": 27753 }, { "epoch": 0.36, "grad_norm": 4.138430595397949, "learning_rate": 1.9811501143044906e-05, "loss": 2.2452, "step": 27754 }, { "epoch": 0.36, "grad_norm": 3.6107406616210938, "learning_rate": 1.9811480836195852e-05, "loss": 2.0746, "step": 27755 }, { "epoch": 0.36, "grad_norm": 3.1131176948547363, "learning_rate": 1.9811460528263447e-05, "loss": 1.7141, "step": 27756 }, { "epoch": 0.36, "grad_norm": 4.0323381423950195, "learning_rate": 1.9811440219247686e-05, "loss": 2.0849, "step": 27757 }, { "epoch": 0.36, "grad_norm": 3.7388298511505127, "learning_rate": 1.9811419909148577e-05, "loss": 1.8719, "step": 27758 }, { "epoch": 0.36, "grad_norm": 3.9302761554718018, "learning_rate": 1.981139959796612e-05, "loss": 2.3297, "step": 27759 }, { "epoch": 0.36, "grad_norm": 3.8107433319091797, "learning_rate": 1.981137928570032e-05, "loss": 1.829, "step": 27760 }, { "epoch": 0.36, "grad_norm": 3.4704065322875977, "learning_rate": 1.9811358972351177e-05, "loss": 2.0368, "step": 27761 }, { "epoch": 0.36, "grad_norm": 3.8304243087768555, "learning_rate": 1.981133865791869e-05, "loss": 2.0803, "step": 27762 }, { "epoch": 0.36, "grad_norm": 3.5714259147644043, "learning_rate": 1.981131834240287e-05, "loss": 1.8063, "step": 27763 }, { "epoch": 0.36, "grad_norm": 3.5241127014160156, "learning_rate": 1.9811298025803713e-05, "loss": 1.9685, "step": 27764 }, { "epoch": 0.36, "grad_norm": 3.6798903942108154, "learning_rate": 1.9811277708121223e-05, "loss": 1.9356, "step": 27765 }, { "epoch": 0.36, "grad_norm": 3.659674882888794, "learning_rate": 1.9811257389355398e-05, "loss": 1.9741, "step": 27766 }, { "epoch": 0.36, "grad_norm": 3.8990936279296875, "learning_rate": 1.9811237069506245e-05, "loss": 2.2774, "step": 27767 }, { "epoch": 0.36, "grad_norm": 4.049057960510254, "learning_rate": 1.9811216748573764e-05, "loss": 2.4067, "step": 27768 }, { "epoch": 0.36, "grad_norm": 3.7811532020568848, "learning_rate": 1.981119642655796e-05, "loss": 1.7993, "step": 27769 }, { "epoch": 0.36, "grad_norm": 3.022414445877075, "learning_rate": 1.9811176103458832e-05, "loss": 1.4929, "step": 27770 }, { "epoch": 0.36, "grad_norm": 3.671168565750122, "learning_rate": 1.9811155779276388e-05, "loss": 1.9768, "step": 27771 }, { "epoch": 0.36, "grad_norm": 4.232965469360352, "learning_rate": 1.9811135454010622e-05, "loss": 2.3317, "step": 27772 }, { "epoch": 0.36, "grad_norm": 4.423627853393555, "learning_rate": 1.981111512766154e-05, "loss": 2.157, "step": 27773 }, { "epoch": 0.36, "grad_norm": 4.120040416717529, "learning_rate": 1.9811094800229143e-05, "loss": 2.0047, "step": 27774 }, { "epoch": 0.36, "grad_norm": 3.5909411907196045, "learning_rate": 1.9811074471713437e-05, "loss": 1.6577, "step": 27775 }, { "epoch": 0.36, "grad_norm": 4.42214298248291, "learning_rate": 1.9811054142114424e-05, "loss": 2.5259, "step": 27776 }, { "epoch": 0.36, "grad_norm": 4.252243518829346, "learning_rate": 1.9811033811432103e-05, "loss": 2.1414, "step": 27777 }, { "epoch": 0.36, "grad_norm": 4.054529190063477, "learning_rate": 1.9811013479666473e-05, "loss": 2.3696, "step": 27778 }, { "epoch": 0.36, "grad_norm": 3.8978328704833984, "learning_rate": 1.9810993146817547e-05, "loss": 1.761, "step": 27779 }, { "epoch": 0.36, "grad_norm": 3.9586520195007324, "learning_rate": 1.981097281288532e-05, "loss": 2.0457, "step": 27780 }, { "epoch": 0.36, "grad_norm": 3.9403228759765625, "learning_rate": 1.9810952477869788e-05, "loss": 1.9318, "step": 27781 }, { "epoch": 0.36, "grad_norm": 3.9795479774475098, "learning_rate": 1.981093214177097e-05, "loss": 2.05, "step": 27782 }, { "epoch": 0.36, "grad_norm": 4.239035606384277, "learning_rate": 1.9810911804588852e-05, "loss": 1.917, "step": 27783 }, { "epoch": 0.36, "grad_norm": 3.883474826812744, "learning_rate": 1.9810891466323444e-05, "loss": 1.9387, "step": 27784 }, { "epoch": 0.36, "grad_norm": 3.5752389430999756, "learning_rate": 1.9810871126974753e-05, "loss": 1.7563, "step": 27785 }, { "epoch": 0.36, "grad_norm": 3.728607416152954, "learning_rate": 1.9810850786542767e-05, "loss": 1.9163, "step": 27786 }, { "epoch": 0.36, "grad_norm": 3.909219264984131, "learning_rate": 1.98108304450275e-05, "loss": 2.0631, "step": 27787 }, { "epoch": 0.36, "grad_norm": 4.009792804718018, "learning_rate": 1.981081010242895e-05, "loss": 2.1637, "step": 27788 }, { "epoch": 0.36, "grad_norm": 4.189492225646973, "learning_rate": 1.9810789758747123e-05, "loss": 1.9847, "step": 27789 }, { "epoch": 0.36, "grad_norm": 4.2163825035095215, "learning_rate": 1.9810769413982015e-05, "loss": 1.938, "step": 27790 }, { "epoch": 0.36, "grad_norm": 3.380187511444092, "learning_rate": 1.9810749068133637e-05, "loss": 1.7784, "step": 27791 }, { "epoch": 0.36, "grad_norm": 3.5453109741210938, "learning_rate": 1.981072872120198e-05, "loss": 1.7925, "step": 27792 }, { "epoch": 0.36, "grad_norm": 3.6738359928131104, "learning_rate": 1.9810708373187055e-05, "loss": 1.8567, "step": 27793 }, { "epoch": 0.36, "grad_norm": 4.42990779876709, "learning_rate": 1.981068802408886e-05, "loss": 2.3373, "step": 27794 }, { "epoch": 0.36, "grad_norm": 3.8913869857788086, "learning_rate": 1.9810667673907398e-05, "loss": 1.8273, "step": 27795 }, { "epoch": 0.36, "grad_norm": 3.959754705429077, "learning_rate": 1.9810647322642674e-05, "loss": 2.1881, "step": 27796 }, { "epoch": 0.36, "grad_norm": 3.3861429691314697, "learning_rate": 1.9810626970294687e-05, "loss": 1.7055, "step": 27797 }, { "epoch": 0.36, "grad_norm": 4.314398765563965, "learning_rate": 1.9810606616863442e-05, "loss": 2.3801, "step": 27798 }, { "epoch": 0.36, "grad_norm": 3.6115100383758545, "learning_rate": 1.9810586262348938e-05, "loss": 1.7549, "step": 27799 }, { "epoch": 0.36, "grad_norm": 4.197246074676514, "learning_rate": 1.981056590675118e-05, "loss": 2.1299, "step": 27800 }, { "epoch": 0.36, "grad_norm": 3.9088456630706787, "learning_rate": 1.9810545550070165e-05, "loss": 2.39, "step": 27801 }, { "epoch": 0.36, "grad_norm": 3.267423629760742, "learning_rate": 1.9810525192305903e-05, "loss": 1.4577, "step": 27802 }, { "epoch": 0.36, "grad_norm": 4.656209468841553, "learning_rate": 1.9810504833458394e-05, "loss": 2.5848, "step": 27803 }, { "epoch": 0.36, "grad_norm": 3.505385637283325, "learning_rate": 1.9810484473527632e-05, "loss": 1.931, "step": 27804 }, { "epoch": 0.36, "grad_norm": 3.7117068767547607, "learning_rate": 1.9810464112513634e-05, "loss": 2.2537, "step": 27805 }, { "epoch": 0.36, "grad_norm": 4.636855125427246, "learning_rate": 1.9810443750416392e-05, "loss": 2.4705, "step": 27806 }, { "epoch": 0.36, "grad_norm": 3.5987706184387207, "learning_rate": 1.981042338723591e-05, "loss": 1.7448, "step": 27807 }, { "epoch": 0.36, "grad_norm": 3.8772222995758057, "learning_rate": 1.9810403022972188e-05, "loss": 2.4552, "step": 27808 }, { "epoch": 0.36, "grad_norm": 3.4875102043151855, "learning_rate": 1.9810382657625237e-05, "loss": 1.8432, "step": 27809 }, { "epoch": 0.36, "grad_norm": 3.935882568359375, "learning_rate": 1.981036229119505e-05, "loss": 1.7798, "step": 27810 }, { "epoch": 0.36, "grad_norm": 3.6542437076568604, "learning_rate": 1.981034192368163e-05, "loss": 1.9958, "step": 27811 }, { "epoch": 0.36, "grad_norm": 3.807227373123169, "learning_rate": 1.9810321555084986e-05, "loss": 1.9652, "step": 27812 }, { "epoch": 0.36, "grad_norm": 4.182093620300293, "learning_rate": 1.9810301185405116e-05, "loss": 1.9528, "step": 27813 }, { "epoch": 0.36, "grad_norm": 4.175350189208984, "learning_rate": 1.981028081464202e-05, "loss": 2.2665, "step": 27814 }, { "epoch": 0.36, "grad_norm": 3.767242193222046, "learning_rate": 1.9810260442795702e-05, "loss": 1.899, "step": 27815 }, { "epoch": 0.36, "grad_norm": 3.9958059787750244, "learning_rate": 1.9810240069866166e-05, "loss": 1.9871, "step": 27816 }, { "epoch": 0.36, "grad_norm": 3.3885297775268555, "learning_rate": 1.9810219695853413e-05, "loss": 1.8059, "step": 27817 }, { "epoch": 0.36, "grad_norm": 4.041010856628418, "learning_rate": 1.9810199320757445e-05, "loss": 2.3022, "step": 27818 }, { "epoch": 0.36, "grad_norm": 4.028384208679199, "learning_rate": 1.9810178944578263e-05, "loss": 1.8644, "step": 27819 }, { "epoch": 0.36, "grad_norm": 3.976952075958252, "learning_rate": 1.9810158567315872e-05, "loss": 2.1878, "step": 27820 }, { "epoch": 0.36, "grad_norm": 3.4892618656158447, "learning_rate": 1.9810138188970275e-05, "loss": 1.6866, "step": 27821 }, { "epoch": 0.36, "grad_norm": 3.4276669025421143, "learning_rate": 1.9810117809541475e-05, "loss": 1.6248, "step": 27822 }, { "epoch": 0.36, "grad_norm": 4.100031852722168, "learning_rate": 1.9810097429029466e-05, "loss": 2.3452, "step": 27823 }, { "epoch": 0.36, "grad_norm": 4.746285438537598, "learning_rate": 1.9810077047434254e-05, "loss": 2.3802, "step": 27824 }, { "epoch": 0.36, "grad_norm": 3.954284191131592, "learning_rate": 1.9810056664755846e-05, "loss": 1.6732, "step": 27825 }, { "epoch": 0.36, "grad_norm": 3.7653019428253174, "learning_rate": 1.9810036280994246e-05, "loss": 1.865, "step": 27826 }, { "epoch": 0.36, "grad_norm": 4.028039455413818, "learning_rate": 1.9810015896149446e-05, "loss": 2.2139, "step": 27827 }, { "epoch": 0.36, "grad_norm": 4.02999210357666, "learning_rate": 1.9809995510221454e-05, "loss": 1.8818, "step": 27828 }, { "epoch": 0.36, "grad_norm": 3.4444355964660645, "learning_rate": 1.9809975123210273e-05, "loss": 2.1055, "step": 27829 }, { "epoch": 0.36, "grad_norm": 4.12063455581665, "learning_rate": 1.9809954735115906e-05, "loss": 2.3011, "step": 27830 }, { "epoch": 0.36, "grad_norm": 4.289140701293945, "learning_rate": 1.9809934345938354e-05, "loss": 2.4547, "step": 27831 }, { "epoch": 0.36, "grad_norm": 3.924435615539551, "learning_rate": 1.980991395567762e-05, "loss": 1.8428, "step": 27832 }, { "epoch": 0.36, "grad_norm": 3.9498960971832275, "learning_rate": 1.9809893564333698e-05, "loss": 2.3394, "step": 27833 }, { "epoch": 0.36, "grad_norm": 3.6656177043914795, "learning_rate": 1.98098731719066e-05, "loss": 1.9833, "step": 27834 }, { "epoch": 0.36, "grad_norm": 3.5430169105529785, "learning_rate": 1.980985277839633e-05, "loss": 1.6127, "step": 27835 }, { "epoch": 0.36, "grad_norm": 4.223998546600342, "learning_rate": 1.9809832383802882e-05, "loss": 2.5984, "step": 27836 }, { "epoch": 0.36, "grad_norm": 3.6019701957702637, "learning_rate": 1.9809811988126262e-05, "loss": 1.8894, "step": 27837 }, { "epoch": 0.36, "grad_norm": 4.1013078689575195, "learning_rate": 1.9809791591366474e-05, "loss": 2.0853, "step": 27838 }, { "epoch": 0.36, "grad_norm": 3.5813820362091064, "learning_rate": 1.980977119352352e-05, "loss": 1.9423, "step": 27839 }, { "epoch": 0.36, "grad_norm": 4.682153701782227, "learning_rate": 1.9809750794597398e-05, "loss": 2.9888, "step": 27840 }, { "epoch": 0.36, "grad_norm": 3.3603315353393555, "learning_rate": 1.9809730394588113e-05, "loss": 1.7303, "step": 27841 }, { "epoch": 0.36, "grad_norm": 4.199971675872803, "learning_rate": 1.980970999349567e-05, "loss": 2.5526, "step": 27842 }, { "epoch": 0.36, "grad_norm": 3.9064431190490723, "learning_rate": 1.9809689591320064e-05, "loss": 1.937, "step": 27843 }, { "epoch": 0.36, "grad_norm": 3.452315092086792, "learning_rate": 1.9809669188061304e-05, "loss": 1.7296, "step": 27844 }, { "epoch": 0.36, "grad_norm": 3.533169746398926, "learning_rate": 1.980964878371939e-05, "loss": 2.0721, "step": 27845 }, { "epoch": 0.36, "grad_norm": 4.211177825927734, "learning_rate": 1.9809628378294324e-05, "loss": 2.1734, "step": 27846 }, { "epoch": 0.36, "grad_norm": 4.084140777587891, "learning_rate": 1.9809607971786115e-05, "loss": 2.3064, "step": 27847 }, { "epoch": 0.36, "grad_norm": 3.764291286468506, "learning_rate": 1.980958756419475e-05, "loss": 1.9723, "step": 27848 }, { "epoch": 0.36, "grad_norm": 3.710191488265991, "learning_rate": 1.9809567155520244e-05, "loss": 1.6915, "step": 27849 }, { "epoch": 0.36, "grad_norm": 3.6589503288269043, "learning_rate": 1.9809546745762596e-05, "loss": 2.1174, "step": 27850 }, { "epoch": 0.36, "grad_norm": 4.0699543952941895, "learning_rate": 1.9809526334921806e-05, "loss": 2.5383, "step": 27851 }, { "epoch": 0.36, "grad_norm": 3.8449318408966064, "learning_rate": 1.980950592299788e-05, "loss": 2.0749, "step": 27852 }, { "epoch": 0.36, "grad_norm": 4.333728313446045, "learning_rate": 1.9809485509990812e-05, "loss": 2.3031, "step": 27853 }, { "epoch": 0.36, "grad_norm": 3.925323247909546, "learning_rate": 1.9809465095900618e-05, "loss": 2.0862, "step": 27854 }, { "epoch": 0.36, "grad_norm": 3.811691999435425, "learning_rate": 1.980944468072729e-05, "loss": 1.8693, "step": 27855 }, { "epoch": 0.36, "grad_norm": 3.6391496658325195, "learning_rate": 1.9809424264470835e-05, "loss": 1.6434, "step": 27856 }, { "epoch": 0.36, "grad_norm": 3.246771812438965, "learning_rate": 1.980940384713125e-05, "loss": 1.8476, "step": 27857 }, { "epoch": 0.36, "grad_norm": 3.897688388824463, "learning_rate": 1.980938342870854e-05, "loss": 2.2099, "step": 27858 }, { "epoch": 0.36, "grad_norm": 4.0607686042785645, "learning_rate": 1.9809363009202707e-05, "loss": 2.1021, "step": 27859 }, { "epoch": 0.36, "grad_norm": 4.083628177642822, "learning_rate": 1.9809342588613754e-05, "loss": 2.0839, "step": 27860 }, { "epoch": 0.36, "grad_norm": 3.8223962783813477, "learning_rate": 1.9809322166941687e-05, "loss": 2.0228, "step": 27861 }, { "epoch": 0.36, "grad_norm": 3.971301794052124, "learning_rate": 1.98093017441865e-05, "loss": 2.1764, "step": 27862 }, { "epoch": 0.36, "grad_norm": 3.3620128631591797, "learning_rate": 1.9809281320348204e-05, "loss": 1.4357, "step": 27863 }, { "epoch": 0.36, "grad_norm": 3.8031234741210938, "learning_rate": 1.9809260895426793e-05, "loss": 1.7687, "step": 27864 }, { "epoch": 0.36, "grad_norm": 3.624699592590332, "learning_rate": 1.9809240469422274e-05, "loss": 1.7564, "step": 27865 }, { "epoch": 0.36, "grad_norm": 4.089574813842773, "learning_rate": 1.980922004233465e-05, "loss": 1.8074, "step": 27866 }, { "epoch": 0.36, "grad_norm": 4.350522518157959, "learning_rate": 1.9809199614163923e-05, "loss": 2.1992, "step": 27867 }, { "epoch": 0.36, "grad_norm": 3.852933406829834, "learning_rate": 1.980917918491009e-05, "loss": 2.1538, "step": 27868 }, { "epoch": 0.36, "grad_norm": 3.9282631874084473, "learning_rate": 1.980915875457316e-05, "loss": 2.06, "step": 27869 }, { "epoch": 0.36, "grad_norm": 4.026673316955566, "learning_rate": 1.9809138323153133e-05, "loss": 2.3899, "step": 27870 }, { "epoch": 0.36, "grad_norm": 3.850752592086792, "learning_rate": 1.980911789065001e-05, "loss": 1.9584, "step": 27871 }, { "epoch": 0.36, "grad_norm": 3.5244367122650146, "learning_rate": 1.980909745706379e-05, "loss": 1.7863, "step": 27872 }, { "epoch": 0.36, "grad_norm": 3.0139145851135254, "learning_rate": 1.9809077022394484e-05, "loss": 1.4153, "step": 27873 }, { "epoch": 0.36, "grad_norm": 4.07448673248291, "learning_rate": 1.980905658664209e-05, "loss": 2.3587, "step": 27874 }, { "epoch": 0.36, "grad_norm": 3.765204906463623, "learning_rate": 1.9809036149806608e-05, "loss": 2.0414, "step": 27875 }, { "epoch": 0.36, "grad_norm": 4.090045928955078, "learning_rate": 1.980901571188804e-05, "loss": 1.8181, "step": 27876 }, { "epoch": 0.36, "grad_norm": 3.1284873485565186, "learning_rate": 1.980899527288639e-05, "loss": 1.8534, "step": 27877 }, { "epoch": 0.36, "grad_norm": 3.7712509632110596, "learning_rate": 1.9808974832801664e-05, "loss": 1.9066, "step": 27878 }, { "epoch": 0.36, "grad_norm": 3.390197277069092, "learning_rate": 1.9808954391633863e-05, "loss": 1.7987, "step": 27879 }, { "epoch": 0.36, "grad_norm": 4.462306976318359, "learning_rate": 1.980893394938298e-05, "loss": 1.6789, "step": 27880 }, { "epoch": 0.36, "grad_norm": 3.792699098587036, "learning_rate": 1.9808913506049028e-05, "loss": 1.9439, "step": 27881 }, { "epoch": 0.36, "grad_norm": 3.917470693588257, "learning_rate": 1.9808893061632005e-05, "loss": 1.8864, "step": 27882 }, { "epoch": 0.36, "grad_norm": 4.079187870025635, "learning_rate": 1.9808872616131914e-05, "loss": 1.8652, "step": 27883 }, { "epoch": 0.36, "grad_norm": 4.011742115020752, "learning_rate": 1.9808852169548757e-05, "loss": 2.284, "step": 27884 }, { "epoch": 0.36, "grad_norm": 3.5387461185455322, "learning_rate": 1.9808831721882536e-05, "loss": 1.9998, "step": 27885 }, { "epoch": 0.36, "grad_norm": 3.550137758255005, "learning_rate": 1.9808811273133254e-05, "loss": 1.5966, "step": 27886 }, { "epoch": 0.36, "grad_norm": 4.192013740539551, "learning_rate": 1.9808790823300912e-05, "loss": 2.4945, "step": 27887 }, { "epoch": 0.36, "grad_norm": 4.488698959350586, "learning_rate": 1.9808770372385517e-05, "loss": 2.0007, "step": 27888 }, { "epoch": 0.36, "grad_norm": 4.29712438583374, "learning_rate": 1.980874992038706e-05, "loss": 2.4236, "step": 27889 }, { "epoch": 0.36, "grad_norm": 4.137863636016846, "learning_rate": 1.9808729467305558e-05, "loss": 1.883, "step": 27890 }, { "epoch": 0.36, "grad_norm": 4.034943103790283, "learning_rate": 1.9808709013141e-05, "loss": 1.9523, "step": 27891 }, { "epoch": 0.36, "grad_norm": 4.082348346710205, "learning_rate": 1.9808688557893395e-05, "loss": 2.359, "step": 27892 }, { "epoch": 0.36, "grad_norm": 3.8942861557006836, "learning_rate": 1.980866810156275e-05, "loss": 2.1537, "step": 27893 }, { "epoch": 0.36, "grad_norm": 3.7579333782196045, "learning_rate": 1.980864764414906e-05, "loss": 2.0106, "step": 27894 }, { "epoch": 0.36, "grad_norm": 4.205079078674316, "learning_rate": 1.9808627185652325e-05, "loss": 2.0883, "step": 27895 }, { "epoch": 0.36, "grad_norm": 4.483007907867432, "learning_rate": 1.980860672607255e-05, "loss": 2.4788, "step": 27896 }, { "epoch": 0.36, "grad_norm": 3.786041021347046, "learning_rate": 1.9808586265409745e-05, "loss": 1.882, "step": 27897 }, { "epoch": 0.36, "grad_norm": 3.4161932468414307, "learning_rate": 1.98085658036639e-05, "loss": 1.793, "step": 27898 }, { "epoch": 0.36, "grad_norm": 3.4803524017333984, "learning_rate": 1.9808545340835024e-05, "loss": 1.5539, "step": 27899 }, { "epoch": 0.36, "grad_norm": 3.967613935470581, "learning_rate": 1.9808524876923118e-05, "loss": 2.2977, "step": 27900 }, { "epoch": 0.36, "grad_norm": 4.303341388702393, "learning_rate": 1.9808504411928188e-05, "loss": 1.9294, "step": 27901 }, { "epoch": 0.36, "grad_norm": 4.299962997436523, "learning_rate": 1.980848394585023e-05, "loss": 2.4461, "step": 27902 }, { "epoch": 0.36, "grad_norm": 3.8890507221221924, "learning_rate": 1.9808463478689246e-05, "loss": 1.6486, "step": 27903 }, { "epoch": 0.36, "grad_norm": 4.66893196105957, "learning_rate": 1.9808443010445247e-05, "loss": 2.4698, "step": 27904 }, { "epoch": 0.36, "grad_norm": 3.9757332801818848, "learning_rate": 1.9808422541118225e-05, "loss": 2.0019, "step": 27905 }, { "epoch": 0.36, "grad_norm": 3.9076008796691895, "learning_rate": 1.980840207070819e-05, "loss": 2.1033, "step": 27906 }, { "epoch": 0.36, "grad_norm": 3.8204474449157715, "learning_rate": 1.980838159921514e-05, "loss": 1.6103, "step": 27907 }, { "epoch": 0.36, "grad_norm": 3.767930269241333, "learning_rate": 1.9808361126639078e-05, "loss": 1.6257, "step": 27908 }, { "epoch": 0.36, "grad_norm": 4.0437188148498535, "learning_rate": 1.9808340652980004e-05, "loss": 1.9757, "step": 27909 }, { "epoch": 0.36, "grad_norm": 4.265703201293945, "learning_rate": 1.9808320178237927e-05, "loss": 2.1591, "step": 27910 }, { "epoch": 0.36, "grad_norm": 4.244842052459717, "learning_rate": 1.9808299702412844e-05, "loss": 2.2299, "step": 27911 }, { "epoch": 0.36, "grad_norm": 3.918621778488159, "learning_rate": 1.9808279225504755e-05, "loss": 1.7352, "step": 27912 }, { "epoch": 0.36, "grad_norm": 3.8862509727478027, "learning_rate": 1.980825874751367e-05, "loss": 2.1332, "step": 27913 }, { "epoch": 0.36, "grad_norm": 3.473386764526367, "learning_rate": 1.9808238268439583e-05, "loss": 2.0966, "step": 27914 }, { "epoch": 0.36, "grad_norm": 4.062744617462158, "learning_rate": 1.9808217788282503e-05, "loss": 2.0769, "step": 27915 }, { "epoch": 0.36, "grad_norm": 3.8024327754974365, "learning_rate": 1.980819730704243e-05, "loss": 1.9543, "step": 27916 }, { "epoch": 0.36, "grad_norm": 3.5665857791900635, "learning_rate": 1.980817682471936e-05, "loss": 1.8127, "step": 27917 }, { "epoch": 0.36, "grad_norm": 4.114813804626465, "learning_rate": 1.9808156341313306e-05, "loss": 2.3481, "step": 27918 }, { "epoch": 0.36, "grad_norm": 4.049529075622559, "learning_rate": 1.9808135856824265e-05, "loss": 1.8846, "step": 27919 }, { "epoch": 0.36, "grad_norm": 4.431116104125977, "learning_rate": 1.980811537125224e-05, "loss": 2.3359, "step": 27920 }, { "epoch": 0.36, "grad_norm": 3.630298614501953, "learning_rate": 1.980809488459723e-05, "loss": 1.6965, "step": 27921 }, { "epoch": 0.36, "grad_norm": 3.7179818153381348, "learning_rate": 1.980807439685924e-05, "loss": 2.0612, "step": 27922 }, { "epoch": 0.36, "grad_norm": 3.6478641033172607, "learning_rate": 1.9808053908038275e-05, "loss": 1.5556, "step": 27923 }, { "epoch": 0.36, "grad_norm": 3.38663649559021, "learning_rate": 1.9808033418134332e-05, "loss": 1.6923, "step": 27924 }, { "epoch": 0.36, "grad_norm": 3.6660659313201904, "learning_rate": 1.9808012927147414e-05, "loss": 1.7304, "step": 27925 }, { "epoch": 0.36, "grad_norm": 3.6509628295898438, "learning_rate": 1.980799243507753e-05, "loss": 1.8602, "step": 27926 }, { "epoch": 0.36, "grad_norm": 3.6007275581359863, "learning_rate": 1.9807971941924674e-05, "loss": 1.9421, "step": 27927 }, { "epoch": 0.36, "grad_norm": 3.405480146408081, "learning_rate": 1.9807951447688853e-05, "loss": 1.8011, "step": 27928 }, { "epoch": 0.36, "grad_norm": 3.7545971870422363, "learning_rate": 1.9807930952370065e-05, "loss": 1.9622, "step": 27929 }, { "epoch": 0.36, "grad_norm": 4.528227806091309, "learning_rate": 1.9807910455968316e-05, "loss": 2.4678, "step": 27930 }, { "epoch": 0.36, "grad_norm": 4.0443291664123535, "learning_rate": 1.980788995848361e-05, "loss": 2.0447, "step": 27931 }, { "epoch": 0.36, "grad_norm": 3.352753162384033, "learning_rate": 1.9807869459915945e-05, "loss": 1.587, "step": 27932 }, { "epoch": 0.36, "grad_norm": 3.784884452819824, "learning_rate": 1.9807848960265326e-05, "loss": 1.9052, "step": 27933 }, { "epoch": 0.36, "grad_norm": 3.9211769104003906, "learning_rate": 1.980782845953175e-05, "loss": 2.1804, "step": 27934 }, { "epoch": 0.36, "grad_norm": 3.60056471824646, "learning_rate": 1.980780795771523e-05, "loss": 2.0495, "step": 27935 }, { "epoch": 0.36, "grad_norm": 3.5995521545410156, "learning_rate": 1.9807787454815755e-05, "loss": 1.8753, "step": 27936 }, { "epoch": 0.36, "grad_norm": 4.074568748474121, "learning_rate": 1.9807766950833336e-05, "loss": 2.1911, "step": 27937 }, { "epoch": 0.36, "grad_norm": 3.639469623565674, "learning_rate": 1.980774644576797e-05, "loss": 1.9787, "step": 27938 }, { "epoch": 0.36, "grad_norm": 3.899005174636841, "learning_rate": 1.9807725939619668e-05, "loss": 1.761, "step": 27939 }, { "epoch": 0.36, "grad_norm": 3.8754758834838867, "learning_rate": 1.9807705432388426e-05, "loss": 1.9481, "step": 27940 }, { "epoch": 0.36, "grad_norm": 3.9723093509674072, "learning_rate": 1.9807684924074246e-05, "loss": 1.8851, "step": 27941 }, { "epoch": 0.36, "grad_norm": 4.022511005401611, "learning_rate": 1.980766441467713e-05, "loss": 2.0897, "step": 27942 }, { "epoch": 0.36, "grad_norm": 4.069015979766846, "learning_rate": 1.9807643904197084e-05, "loss": 2.2891, "step": 27943 }, { "epoch": 0.36, "grad_norm": 3.7506048679351807, "learning_rate": 1.9807623392634102e-05, "loss": 1.8631, "step": 27944 }, { "epoch": 0.36, "grad_norm": 3.8505163192749023, "learning_rate": 1.98076028799882e-05, "loss": 2.0775, "step": 27945 }, { "epoch": 0.36, "grad_norm": 3.761763334274292, "learning_rate": 1.9807582366259365e-05, "loss": 1.8935, "step": 27946 }, { "epoch": 0.36, "grad_norm": 3.8262033462524414, "learning_rate": 1.980756185144761e-05, "loss": 1.9819, "step": 27947 }, { "epoch": 0.36, "grad_norm": 4.801921844482422, "learning_rate": 1.9807541335552934e-05, "loss": 2.2674, "step": 27948 }, { "epoch": 0.36, "grad_norm": 4.255814075469971, "learning_rate": 1.980752081857534e-05, "loss": 2.1806, "step": 27949 }, { "epoch": 0.36, "grad_norm": 4.527841091156006, "learning_rate": 1.9807500300514828e-05, "loss": 2.1588, "step": 27950 }, { "epoch": 0.36, "grad_norm": 4.134469985961914, "learning_rate": 1.9807479781371403e-05, "loss": 2.1785, "step": 27951 }, { "epoch": 0.36, "grad_norm": 4.016840934753418, "learning_rate": 1.9807459261145063e-05, "loss": 1.6114, "step": 27952 }, { "epoch": 0.36, "grad_norm": 3.572908639907837, "learning_rate": 1.9807438739835815e-05, "loss": 1.7576, "step": 27953 }, { "epoch": 0.36, "grad_norm": 3.6434130668640137, "learning_rate": 1.9807418217443656e-05, "loss": 1.8041, "step": 27954 }, { "epoch": 0.36, "grad_norm": 4.042424201965332, "learning_rate": 1.9807397693968596e-05, "loss": 2.5227, "step": 27955 }, { "epoch": 0.36, "grad_norm": 3.6985669136047363, "learning_rate": 1.980737716941063e-05, "loss": 2.0268, "step": 27956 }, { "epoch": 0.36, "grad_norm": 4.03312349319458, "learning_rate": 1.9807356643769766e-05, "loss": 1.9558, "step": 27957 }, { "epoch": 0.36, "grad_norm": 3.77038836479187, "learning_rate": 1.9807336117046002e-05, "loss": 2.2021, "step": 27958 }, { "epoch": 0.36, "grad_norm": 3.411325216293335, "learning_rate": 1.980731558923934e-05, "loss": 1.6224, "step": 27959 }, { "epoch": 0.36, "grad_norm": 3.6067774295806885, "learning_rate": 1.980729506034979e-05, "loss": 1.9902, "step": 27960 }, { "epoch": 0.36, "grad_norm": 4.3663763999938965, "learning_rate": 1.9807274530377342e-05, "loss": 2.3127, "step": 27961 }, { "epoch": 0.36, "grad_norm": 4.310068130493164, "learning_rate": 1.9807253999322004e-05, "loss": 2.2178, "step": 27962 }, { "epoch": 0.36, "grad_norm": 3.8430979251861572, "learning_rate": 1.9807233467183783e-05, "loss": 2.0742, "step": 27963 }, { "epoch": 0.36, "grad_norm": 4.248108863830566, "learning_rate": 1.9807212933962673e-05, "loss": 2.1757, "step": 27964 }, { "epoch": 0.36, "grad_norm": 4.218009948730469, "learning_rate": 1.9807192399658683e-05, "loss": 2.1755, "step": 27965 }, { "epoch": 0.36, "grad_norm": 3.6343536376953125, "learning_rate": 1.9807171864271812e-05, "loss": 1.6892, "step": 27966 }, { "epoch": 0.36, "grad_norm": 4.272615432739258, "learning_rate": 1.9807151327802064e-05, "loss": 2.3252, "step": 27967 }, { "epoch": 0.36, "grad_norm": 3.873887062072754, "learning_rate": 1.9807130790249438e-05, "loss": 2.2739, "step": 27968 }, { "epoch": 0.36, "grad_norm": 3.3806493282318115, "learning_rate": 1.9807110251613942e-05, "loss": 1.483, "step": 27969 }, { "epoch": 0.36, "grad_norm": 4.2098212242126465, "learning_rate": 1.980708971189557e-05, "loss": 2.3041, "step": 27970 }, { "epoch": 0.36, "grad_norm": 3.9069604873657227, "learning_rate": 1.980706917109433e-05, "loss": 1.7434, "step": 27971 }, { "epoch": 0.36, "grad_norm": 3.9796841144561768, "learning_rate": 1.9807048629210225e-05, "loss": 2.0166, "step": 27972 }, { "epoch": 0.36, "grad_norm": 3.9173898696899414, "learning_rate": 1.9807028086243257e-05, "loss": 2.1183, "step": 27973 }, { "epoch": 0.36, "grad_norm": 4.474874496459961, "learning_rate": 1.9807007542193424e-05, "loss": 2.114, "step": 27974 }, { "epoch": 0.36, "grad_norm": 4.085780143737793, "learning_rate": 1.980698699706073e-05, "loss": 1.7898, "step": 27975 }, { "epoch": 0.36, "grad_norm": 4.142542362213135, "learning_rate": 1.980696645084518e-05, "loss": 2.3841, "step": 27976 }, { "epoch": 0.36, "grad_norm": 4.088687896728516, "learning_rate": 1.9806945903546774e-05, "loss": 2.2547, "step": 27977 }, { "epoch": 0.36, "grad_norm": 3.7276053428649902, "learning_rate": 1.9806925355165516e-05, "loss": 1.9504, "step": 27978 }, { "epoch": 0.36, "grad_norm": 3.5288138389587402, "learning_rate": 1.9806904805701405e-05, "loss": 1.7541, "step": 27979 }, { "epoch": 0.36, "grad_norm": 3.6691713333129883, "learning_rate": 1.9806884255154447e-05, "loss": 1.9866, "step": 27980 }, { "epoch": 0.36, "grad_norm": 3.602808713912964, "learning_rate": 1.980686370352464e-05, "loss": 1.8821, "step": 27981 }, { "epoch": 0.36, "grad_norm": 3.444678783416748, "learning_rate": 1.9806843150811993e-05, "loss": 1.7968, "step": 27982 }, { "epoch": 0.36, "grad_norm": 3.8490381240844727, "learning_rate": 1.98068225970165e-05, "loss": 1.7378, "step": 27983 }, { "epoch": 0.36, "grad_norm": 3.508131504058838, "learning_rate": 1.980680204213817e-05, "loss": 1.4494, "step": 27984 }, { "epoch": 0.36, "grad_norm": 4.067254066467285, "learning_rate": 1.9806781486177003e-05, "loss": 2.1756, "step": 27985 }, { "epoch": 0.36, "grad_norm": 3.6922028064727783, "learning_rate": 1.9806760929133e-05, "loss": 2.0384, "step": 27986 }, { "epoch": 0.36, "grad_norm": 3.1319503784179688, "learning_rate": 1.9806740371006164e-05, "loss": 1.6024, "step": 27987 }, { "epoch": 0.36, "grad_norm": 4.851800441741943, "learning_rate": 1.98067198117965e-05, "loss": 2.6495, "step": 27988 }, { "epoch": 0.36, "grad_norm": 3.7171339988708496, "learning_rate": 1.9806699251504003e-05, "loss": 2.175, "step": 27989 }, { "epoch": 0.36, "grad_norm": 3.956150770187378, "learning_rate": 1.9806678690128684e-05, "loss": 2.1379, "step": 27990 }, { "epoch": 0.36, "grad_norm": 3.7447028160095215, "learning_rate": 1.9806658127670538e-05, "loss": 1.9699, "step": 27991 }, { "epoch": 0.36, "grad_norm": 3.937148332595825, "learning_rate": 1.9806637564129573e-05, "loss": 2.0287, "step": 27992 }, { "epoch": 0.36, "grad_norm": 4.134027481079102, "learning_rate": 1.9806616999505788e-05, "loss": 2.377, "step": 27993 }, { "epoch": 0.36, "grad_norm": 3.165053129196167, "learning_rate": 1.9806596433799186e-05, "loss": 1.5851, "step": 27994 }, { "epoch": 0.36, "grad_norm": 3.723203420639038, "learning_rate": 1.9806575867009768e-05, "loss": 1.8594, "step": 27995 }, { "epoch": 0.36, "grad_norm": 3.750584125518799, "learning_rate": 1.980655529913754e-05, "loss": 1.8866, "step": 27996 }, { "epoch": 0.36, "grad_norm": 3.785148859024048, "learning_rate": 1.9806534730182503e-05, "loss": 1.9815, "step": 27997 }, { "epoch": 0.36, "grad_norm": 3.998516798019409, "learning_rate": 1.9806514160144656e-05, "loss": 2.0484, "step": 27998 }, { "epoch": 0.36, "grad_norm": 3.761784553527832, "learning_rate": 1.9806493589024006e-05, "loss": 2.0544, "step": 27999 }, { "epoch": 0.36, "grad_norm": 4.138552665710449, "learning_rate": 1.980647301682055e-05, "loss": 1.8887, "step": 28000 }, { "epoch": 0.36, "grad_norm": 3.2704076766967773, "learning_rate": 1.980645244353429e-05, "loss": 1.7749, "step": 28001 }, { "epoch": 0.36, "grad_norm": 4.275131702423096, "learning_rate": 1.9806431869165236e-05, "loss": 2.8037, "step": 28002 }, { "epoch": 0.36, "grad_norm": 3.954164981842041, "learning_rate": 1.9806411293713385e-05, "loss": 1.8382, "step": 28003 }, { "epoch": 0.36, "grad_norm": 3.9891395568847656, "learning_rate": 1.980639071717874e-05, "loss": 2.1074, "step": 28004 }, { "epoch": 0.36, "grad_norm": 4.06980037689209, "learning_rate": 1.98063701395613e-05, "loss": 2.0311, "step": 28005 }, { "epoch": 0.36, "grad_norm": 3.427567958831787, "learning_rate": 1.9806349560861075e-05, "loss": 1.8963, "step": 28006 }, { "epoch": 0.36, "grad_norm": 4.097609996795654, "learning_rate": 1.980632898107806e-05, "loss": 2.2246, "step": 28007 }, { "epoch": 0.36, "grad_norm": 4.347663402557373, "learning_rate": 1.9806308400212262e-05, "loss": 1.8703, "step": 28008 }, { "epoch": 0.36, "grad_norm": 3.782410144805908, "learning_rate": 1.9806287818263678e-05, "loss": 1.9253, "step": 28009 }, { "epoch": 0.36, "grad_norm": 3.4202542304992676, "learning_rate": 1.9806267235232315e-05, "loss": 2.0269, "step": 28010 }, { "epoch": 0.36, "grad_norm": 4.063338756561279, "learning_rate": 1.9806246651118176e-05, "loss": 2.2047, "step": 28011 }, { "epoch": 0.36, "grad_norm": 3.8682987689971924, "learning_rate": 1.9806226065921255e-05, "loss": 1.7278, "step": 28012 }, { "epoch": 0.36, "grad_norm": 3.8891007900238037, "learning_rate": 1.9806205479641565e-05, "loss": 2.1542, "step": 28013 }, { "epoch": 0.36, "grad_norm": 4.101349353790283, "learning_rate": 1.9806184892279102e-05, "loss": 2.4375, "step": 28014 }, { "epoch": 0.36, "grad_norm": 3.8658785820007324, "learning_rate": 1.980616430383387e-05, "loss": 1.864, "step": 28015 }, { "epoch": 0.36, "grad_norm": 3.368356466293335, "learning_rate": 1.9806143714305874e-05, "loss": 1.7578, "step": 28016 }, { "epoch": 0.36, "grad_norm": 3.576127290725708, "learning_rate": 1.980612312369511e-05, "loss": 2.1061, "step": 28017 }, { "epoch": 0.36, "grad_norm": 3.540614128112793, "learning_rate": 1.9806102532001583e-05, "loss": 1.8499, "step": 28018 }, { "epoch": 0.36, "grad_norm": 4.110457897186279, "learning_rate": 1.98060819392253e-05, "loss": 2.3311, "step": 28019 }, { "epoch": 0.36, "grad_norm": 4.105559825897217, "learning_rate": 1.9806061345366253e-05, "loss": 2.2665, "step": 28020 }, { "epoch": 0.36, "grad_norm": 3.5727617740631104, "learning_rate": 1.9806040750424458e-05, "loss": 1.685, "step": 28021 }, { "epoch": 0.36, "grad_norm": 3.516390323638916, "learning_rate": 1.9806020154399904e-05, "loss": 1.8055, "step": 28022 }, { "epoch": 0.36, "grad_norm": 3.494166851043701, "learning_rate": 1.9805999557292602e-05, "loss": 1.9669, "step": 28023 }, { "epoch": 0.36, "grad_norm": 3.589479923248291, "learning_rate": 1.980597895910255e-05, "loss": 1.9896, "step": 28024 }, { "epoch": 0.36, "grad_norm": 3.61543869972229, "learning_rate": 1.9805958359829752e-05, "loss": 1.8799, "step": 28025 }, { "epoch": 0.36, "grad_norm": 4.512337684631348, "learning_rate": 1.980593775947421e-05, "loss": 2.3925, "step": 28026 }, { "epoch": 0.36, "grad_norm": 4.478307247161865, "learning_rate": 1.9805917158035924e-05, "loss": 1.9775, "step": 28027 }, { "epoch": 0.36, "grad_norm": 4.183848857879639, "learning_rate": 1.98058965555149e-05, "loss": 2.3077, "step": 28028 }, { "epoch": 0.36, "grad_norm": 4.331993103027344, "learning_rate": 1.9805875951911138e-05, "loss": 2.4605, "step": 28029 }, { "epoch": 0.36, "grad_norm": 3.827543020248413, "learning_rate": 1.9805855347224643e-05, "loss": 1.9976, "step": 28030 }, { "epoch": 0.36, "grad_norm": 4.060161113739014, "learning_rate": 1.9805834741455416e-05, "loss": 2.34, "step": 28031 }, { "epoch": 0.36, "grad_norm": 3.839959144592285, "learning_rate": 1.980581413460345e-05, "loss": 2.4093, "step": 28032 }, { "epoch": 0.36, "grad_norm": 3.3417773246765137, "learning_rate": 1.9805793526668763e-05, "loss": 1.6076, "step": 28033 }, { "epoch": 0.36, "grad_norm": 3.8468997478485107, "learning_rate": 1.980577291765135e-05, "loss": 2.0111, "step": 28034 }, { "epoch": 0.36, "grad_norm": 3.5979058742523193, "learning_rate": 1.9805752307551214e-05, "loss": 1.8432, "step": 28035 }, { "epoch": 0.36, "grad_norm": 4.700645446777344, "learning_rate": 1.9805731696368355e-05, "loss": 1.9044, "step": 28036 }, { "epoch": 0.36, "grad_norm": 3.8455417156219482, "learning_rate": 1.9805711084102778e-05, "loss": 1.9554, "step": 28037 }, { "epoch": 0.36, "grad_norm": 3.8346197605133057, "learning_rate": 1.9805690470754483e-05, "loss": 2.0251, "step": 28038 }, { "epoch": 0.36, "grad_norm": 3.8014461994171143, "learning_rate": 1.980566985632347e-05, "loss": 2.5407, "step": 28039 }, { "epoch": 0.36, "grad_norm": 3.940314292907715, "learning_rate": 1.980564924080975e-05, "loss": 1.9037, "step": 28040 }, { "epoch": 0.36, "grad_norm": 3.3564867973327637, "learning_rate": 1.9805628624213316e-05, "loss": 1.4593, "step": 28041 }, { "epoch": 0.36, "grad_norm": 4.15713357925415, "learning_rate": 1.980560800653418e-05, "loss": 2.2266, "step": 28042 }, { "epoch": 0.36, "grad_norm": 4.231529235839844, "learning_rate": 1.980558738777233e-05, "loss": 2.3316, "step": 28043 }, { "epoch": 0.36, "grad_norm": 3.81291127204895, "learning_rate": 1.9805566767927784e-05, "loss": 2.3672, "step": 28044 }, { "epoch": 0.36, "grad_norm": 3.7223477363586426, "learning_rate": 1.9805546147000533e-05, "loss": 1.8642, "step": 28045 }, { "epoch": 0.36, "grad_norm": 4.497034549713135, "learning_rate": 1.9805525524990587e-05, "loss": 2.3029, "step": 28046 }, { "epoch": 0.36, "grad_norm": 3.652296781539917, "learning_rate": 1.9805504901897944e-05, "loss": 1.8201, "step": 28047 }, { "epoch": 0.36, "grad_norm": 3.7009363174438477, "learning_rate": 1.9805484277722603e-05, "loss": 1.9687, "step": 28048 }, { "epoch": 0.36, "grad_norm": 4.894650459289551, "learning_rate": 1.9805463652464577e-05, "loss": 2.6481, "step": 28049 }, { "epoch": 0.36, "grad_norm": 3.7873926162719727, "learning_rate": 1.9805443026123858e-05, "loss": 1.7053, "step": 28050 }, { "epoch": 0.36, "grad_norm": 3.906616449356079, "learning_rate": 1.9805422398700447e-05, "loss": 2.1517, "step": 28051 }, { "epoch": 0.36, "grad_norm": 4.33595609664917, "learning_rate": 1.9805401770194358e-05, "loss": 2.4143, "step": 28052 }, { "epoch": 0.36, "grad_norm": 4.376701831817627, "learning_rate": 1.9805381140605582e-05, "loss": 2.238, "step": 28053 }, { "epoch": 0.36, "grad_norm": 3.610347032546997, "learning_rate": 1.9805360509934125e-05, "loss": 1.7049, "step": 28054 }, { "epoch": 0.36, "grad_norm": 3.53074312210083, "learning_rate": 1.9805339878179994e-05, "loss": 1.7498, "step": 28055 }, { "epoch": 0.36, "grad_norm": 3.932582139968872, "learning_rate": 1.980531924534318e-05, "loss": 2.4119, "step": 28056 }, { "epoch": 0.36, "grad_norm": 4.0208845138549805, "learning_rate": 1.98052986114237e-05, "loss": 1.8715, "step": 28057 }, { "epoch": 0.36, "grad_norm": 4.1260199546813965, "learning_rate": 1.9805277976421545e-05, "loss": 2.5903, "step": 28058 }, { "epoch": 0.36, "grad_norm": 4.310278415679932, "learning_rate": 1.9805257340336724e-05, "loss": 2.0864, "step": 28059 }, { "epoch": 0.36, "grad_norm": 3.8467049598693848, "learning_rate": 1.9805236703169232e-05, "loss": 1.9724, "step": 28060 }, { "epoch": 0.36, "grad_norm": 4.457405090332031, "learning_rate": 1.980521606491908e-05, "loss": 2.1585, "step": 28061 }, { "epoch": 0.36, "grad_norm": 3.1853201389312744, "learning_rate": 1.9805195425586264e-05, "loss": 1.5582, "step": 28062 }, { "epoch": 0.36, "grad_norm": 3.814633369445801, "learning_rate": 1.9805174785170784e-05, "loss": 1.8183, "step": 28063 }, { "epoch": 0.36, "grad_norm": 4.243645668029785, "learning_rate": 1.980515414367265e-05, "loss": 2.2096, "step": 28064 }, { "epoch": 0.36, "grad_norm": 3.651312828063965, "learning_rate": 1.9805133501091862e-05, "loss": 1.8156, "step": 28065 }, { "epoch": 0.36, "grad_norm": 3.930999279022217, "learning_rate": 1.980511285742842e-05, "loss": 2.3797, "step": 28066 }, { "epoch": 0.36, "grad_norm": 3.400693655014038, "learning_rate": 1.9805092212682325e-05, "loss": 1.8056, "step": 28067 }, { "epoch": 0.36, "grad_norm": 3.1863834857940674, "learning_rate": 1.9805071566853583e-05, "loss": 1.5926, "step": 28068 }, { "epoch": 0.36, "grad_norm": 3.769300699234009, "learning_rate": 1.9805050919942194e-05, "loss": 2.0174, "step": 28069 }, { "epoch": 0.36, "grad_norm": 3.704486846923828, "learning_rate": 1.9805030271948164e-05, "loss": 1.7427, "step": 28070 }, { "epoch": 0.36, "grad_norm": 3.8040382862091064, "learning_rate": 1.980500962287149e-05, "loss": 2.4523, "step": 28071 }, { "epoch": 0.36, "grad_norm": 3.860267400741577, "learning_rate": 1.9804988972712177e-05, "loss": 1.9675, "step": 28072 }, { "epoch": 0.36, "grad_norm": 3.6693053245544434, "learning_rate": 1.9804968321470227e-05, "loss": 2.0051, "step": 28073 }, { "epoch": 0.36, "grad_norm": 3.8161532878875732, "learning_rate": 1.9804947669145643e-05, "loss": 2.1412, "step": 28074 }, { "epoch": 0.36, "grad_norm": 4.010778903961182, "learning_rate": 1.9804927015738422e-05, "loss": 1.9239, "step": 28075 }, { "epoch": 0.36, "grad_norm": 4.054476737976074, "learning_rate": 1.9804906361248575e-05, "loss": 2.2433, "step": 28076 }, { "epoch": 0.36, "grad_norm": 3.882784366607666, "learning_rate": 1.98048857056761e-05, "loss": 2.0841, "step": 28077 }, { "epoch": 0.36, "grad_norm": 4.238120079040527, "learning_rate": 1.9804865049020997e-05, "loss": 1.6234, "step": 28078 }, { "epoch": 0.36, "grad_norm": 4.240159034729004, "learning_rate": 1.9804844391283273e-05, "loss": 2.3914, "step": 28079 }, { "epoch": 0.36, "grad_norm": 5.077322483062744, "learning_rate": 1.9804823732462925e-05, "loss": 2.4239, "step": 28080 }, { "epoch": 0.36, "grad_norm": 4.303864002227783, "learning_rate": 1.9804803072559958e-05, "loss": 2.2405, "step": 28081 }, { "epoch": 0.36, "grad_norm": 4.234029769897461, "learning_rate": 1.9804782411574378e-05, "loss": 2.0727, "step": 28082 }, { "epoch": 0.36, "grad_norm": 3.6110692024230957, "learning_rate": 1.9804761749506184e-05, "loss": 1.8307, "step": 28083 }, { "epoch": 0.36, "grad_norm": 3.9589757919311523, "learning_rate": 1.9804741086355374e-05, "loss": 1.7986, "step": 28084 }, { "epoch": 0.36, "grad_norm": 4.385982513427734, "learning_rate": 1.9804720422121958e-05, "loss": 2.4853, "step": 28085 }, { "epoch": 0.36, "grad_norm": 3.7065682411193848, "learning_rate": 1.980469975680593e-05, "loss": 2.1842, "step": 28086 }, { "epoch": 0.36, "grad_norm": 3.710937976837158, "learning_rate": 1.98046790904073e-05, "loss": 1.6552, "step": 28087 }, { "epoch": 0.36, "grad_norm": 3.692850351333618, "learning_rate": 1.9804658422926067e-05, "loss": 2.1253, "step": 28088 }, { "epoch": 0.36, "grad_norm": 4.110816478729248, "learning_rate": 1.980463775436223e-05, "loss": 2.4108, "step": 28089 }, { "epoch": 0.36, "grad_norm": 3.090756416320801, "learning_rate": 1.98046170847158e-05, "loss": 1.6314, "step": 28090 }, { "epoch": 0.36, "grad_norm": 3.9228484630584717, "learning_rate": 1.9804596413986773e-05, "loss": 2.1408, "step": 28091 }, { "epoch": 0.36, "grad_norm": 4.116783142089844, "learning_rate": 1.980457574217515e-05, "loss": 2.1614, "step": 28092 }, { "epoch": 0.36, "grad_norm": 4.120394706726074, "learning_rate": 1.9804555069280936e-05, "loss": 2.3014, "step": 28093 }, { "epoch": 0.36, "grad_norm": 3.93992018699646, "learning_rate": 1.9804534395304133e-05, "loss": 2.1404, "step": 28094 }, { "epoch": 0.36, "grad_norm": 4.4205474853515625, "learning_rate": 1.980451372024474e-05, "loss": 2.5932, "step": 28095 }, { "epoch": 0.36, "grad_norm": 3.507750988006592, "learning_rate": 1.9804493044102767e-05, "loss": 1.7393, "step": 28096 }, { "epoch": 0.36, "grad_norm": 3.950657606124878, "learning_rate": 1.980447236687821e-05, "loss": 2.1499, "step": 28097 }, { "epoch": 0.36, "grad_norm": 3.5677649974823, "learning_rate": 1.9804451688571073e-05, "loss": 1.7028, "step": 28098 }, { "epoch": 0.36, "grad_norm": 4.155029773712158, "learning_rate": 1.980443100918136e-05, "loss": 1.9085, "step": 28099 }, { "epoch": 0.36, "grad_norm": 3.8941097259521484, "learning_rate": 1.980441032870907e-05, "loss": 1.7592, "step": 28100 }, { "epoch": 0.36, "grad_norm": 3.8402278423309326, "learning_rate": 1.9804389647154205e-05, "loss": 1.8876, "step": 28101 }, { "epoch": 0.36, "grad_norm": 3.8129758834838867, "learning_rate": 1.9804368964516768e-05, "loss": 1.9837, "step": 28102 }, { "epoch": 0.36, "grad_norm": 4.017940998077393, "learning_rate": 1.9804348280796766e-05, "loss": 2.035, "step": 28103 }, { "epoch": 0.36, "grad_norm": 3.716681957244873, "learning_rate": 1.9804327595994194e-05, "loss": 1.9656, "step": 28104 }, { "epoch": 0.36, "grad_norm": 3.7105767726898193, "learning_rate": 1.980430691010906e-05, "loss": 1.8958, "step": 28105 }, { "epoch": 0.36, "grad_norm": 3.778031826019287, "learning_rate": 1.9804286223141368e-05, "loss": 1.7997, "step": 28106 }, { "epoch": 0.36, "grad_norm": 4.281643867492676, "learning_rate": 1.9804265535091113e-05, "loss": 2.4794, "step": 28107 }, { "epoch": 0.36, "grad_norm": 3.777657985687256, "learning_rate": 1.98042448459583e-05, "loss": 1.9063, "step": 28108 }, { "epoch": 0.36, "grad_norm": 3.6777045726776123, "learning_rate": 1.9804224155742933e-05, "loss": 2.1757, "step": 28109 }, { "epoch": 0.36, "grad_norm": 3.4735310077667236, "learning_rate": 1.980420346444501e-05, "loss": 1.7881, "step": 28110 }, { "epoch": 0.36, "grad_norm": 3.6171348094940186, "learning_rate": 1.980418277206454e-05, "loss": 1.6336, "step": 28111 }, { "epoch": 0.36, "grad_norm": 3.6760642528533936, "learning_rate": 1.9804162078601522e-05, "loss": 1.9254, "step": 28112 }, { "epoch": 0.36, "grad_norm": 4.078512191772461, "learning_rate": 1.980414138405596e-05, "loss": 2.0645, "step": 28113 }, { "epoch": 0.36, "grad_norm": 4.742659568786621, "learning_rate": 1.980412068842785e-05, "loss": 2.5697, "step": 28114 }, { "epoch": 0.36, "grad_norm": 4.071633815765381, "learning_rate": 1.98040999917172e-05, "loss": 2.0386, "step": 28115 }, { "epoch": 0.36, "grad_norm": 3.3121695518493652, "learning_rate": 1.9804079293924016e-05, "loss": 1.7164, "step": 28116 }, { "epoch": 0.36, "grad_norm": 3.5243356227874756, "learning_rate": 1.980405859504829e-05, "loss": 1.651, "step": 28117 }, { "epoch": 0.36, "grad_norm": 3.740527868270874, "learning_rate": 1.9804037895090032e-05, "loss": 1.6866, "step": 28118 }, { "epoch": 0.36, "grad_norm": 4.0282392501831055, "learning_rate": 1.980401719404924e-05, "loss": 1.9906, "step": 28119 }, { "epoch": 0.36, "grad_norm": 3.627357244491577, "learning_rate": 1.9803996491925915e-05, "loss": 2.0219, "step": 28120 }, { "epoch": 0.36, "grad_norm": 4.307430267333984, "learning_rate": 1.980397578872007e-05, "loss": 2.3336, "step": 28121 }, { "epoch": 0.36, "grad_norm": 3.7279131412506104, "learning_rate": 1.9803955084431698e-05, "loss": 1.9893, "step": 28122 }, { "epoch": 0.36, "grad_norm": 3.924410104751587, "learning_rate": 1.9803934379060798e-05, "loss": 2.1669, "step": 28123 }, { "epoch": 0.36, "grad_norm": 3.3766872882843018, "learning_rate": 1.980391367260738e-05, "loss": 1.3837, "step": 28124 }, { "epoch": 0.36, "grad_norm": 3.49792218208313, "learning_rate": 1.9803892965071444e-05, "loss": 1.7632, "step": 28125 }, { "epoch": 0.37, "grad_norm": 3.118474006652832, "learning_rate": 1.9803872256452993e-05, "loss": 1.5696, "step": 28126 }, { "epoch": 0.37, "grad_norm": 3.1019113063812256, "learning_rate": 1.9803851546752027e-05, "loss": 1.6686, "step": 28127 }, { "epoch": 0.37, "grad_norm": 4.218879699707031, "learning_rate": 1.980383083596855e-05, "loss": 2.4513, "step": 28128 }, { "epoch": 0.37, "grad_norm": 4.189600467681885, "learning_rate": 1.9803810124102564e-05, "loss": 2.4907, "step": 28129 }, { "epoch": 0.37, "grad_norm": 3.8779683113098145, "learning_rate": 1.980378941115407e-05, "loss": 1.8653, "step": 28130 }, { "epoch": 0.37, "grad_norm": 4.345383167266846, "learning_rate": 1.9803768697123068e-05, "loss": 2.1391, "step": 28131 }, { "epoch": 0.37, "grad_norm": 3.9614310264587402, "learning_rate": 1.980374798200957e-05, "loss": 2.0228, "step": 28132 }, { "epoch": 0.37, "grad_norm": 3.982755184173584, "learning_rate": 1.9803727265813567e-05, "loss": 2.0355, "step": 28133 }, { "epoch": 0.37, "grad_norm": 3.7818899154663086, "learning_rate": 1.980370654853507e-05, "loss": 1.7708, "step": 28134 }, { "epoch": 0.37, "grad_norm": 4.056725978851318, "learning_rate": 1.9803685830174078e-05, "loss": 2.3704, "step": 28135 }, { "epoch": 0.37, "grad_norm": 3.8467891216278076, "learning_rate": 1.9803665110730587e-05, "loss": 1.469, "step": 28136 }, { "epoch": 0.37, "grad_norm": 3.7514841556549072, "learning_rate": 1.980364439020461e-05, "loss": 1.8284, "step": 28137 }, { "epoch": 0.37, "grad_norm": 3.805588722229004, "learning_rate": 1.9803623668596144e-05, "loss": 1.5632, "step": 28138 }, { "epoch": 0.37, "grad_norm": 4.887326240539551, "learning_rate": 1.980360294590519e-05, "loss": 2.4856, "step": 28139 }, { "epoch": 0.37, "grad_norm": 3.7827341556549072, "learning_rate": 1.9803582222131753e-05, "loss": 2.0921, "step": 28140 }, { "epoch": 0.37, "grad_norm": 3.549774646759033, "learning_rate": 1.9803561497275833e-05, "loss": 1.7791, "step": 28141 }, { "epoch": 0.37, "grad_norm": 4.0654683113098145, "learning_rate": 1.980354077133743e-05, "loss": 2.0806, "step": 28142 }, { "epoch": 0.37, "grad_norm": 3.757810115814209, "learning_rate": 1.980352004431656e-05, "loss": 2.0178, "step": 28143 }, { "epoch": 0.37, "grad_norm": 3.3084349632263184, "learning_rate": 1.9803499316213207e-05, "loss": 1.6124, "step": 28144 }, { "epoch": 0.37, "grad_norm": 4.224225044250488, "learning_rate": 1.9803478587027386e-05, "loss": 2.1949, "step": 28145 }, { "epoch": 0.37, "grad_norm": 3.6020872592926025, "learning_rate": 1.980345785675909e-05, "loss": 1.8881, "step": 28146 }, { "epoch": 0.37, "grad_norm": 4.575000286102295, "learning_rate": 1.9803437125408326e-05, "loss": 2.3815, "step": 28147 }, { "epoch": 0.37, "grad_norm": 3.7444584369659424, "learning_rate": 1.98034163929751e-05, "loss": 2.0356, "step": 28148 }, { "epoch": 0.37, "grad_norm": 3.49865984916687, "learning_rate": 1.980339565945941e-05, "loss": 1.8917, "step": 28149 }, { "epoch": 0.37, "grad_norm": 3.345217704772949, "learning_rate": 1.9803374924861255e-05, "loss": 1.6188, "step": 28150 }, { "epoch": 0.37, "grad_norm": 3.9305050373077393, "learning_rate": 1.9803354189180646e-05, "loss": 2.0382, "step": 28151 }, { "epoch": 0.37, "grad_norm": 3.7254488468170166, "learning_rate": 1.9803333452417577e-05, "loss": 1.8545, "step": 28152 }, { "epoch": 0.37, "grad_norm": 4.1351318359375, "learning_rate": 1.9803312714572055e-05, "loss": 2.212, "step": 28153 }, { "epoch": 0.37, "grad_norm": 3.720686912536621, "learning_rate": 1.9803291975644078e-05, "loss": 1.982, "step": 28154 }, { "epoch": 0.37, "grad_norm": 3.8030922412872314, "learning_rate": 1.9803271235633657e-05, "loss": 1.9102, "step": 28155 }, { "epoch": 0.37, "grad_norm": 4.1413984298706055, "learning_rate": 1.9803250494540783e-05, "loss": 2.1744, "step": 28156 }, { "epoch": 0.37, "grad_norm": 3.7914562225341797, "learning_rate": 1.980322975236547e-05, "loss": 1.6691, "step": 28157 }, { "epoch": 0.37, "grad_norm": 3.969005823135376, "learning_rate": 1.980320900910771e-05, "loss": 1.9421, "step": 28158 }, { "epoch": 0.37, "grad_norm": 3.652817487716675, "learning_rate": 1.9803188264767508e-05, "loss": 1.9441, "step": 28159 }, { "epoch": 0.37, "grad_norm": 3.670732021331787, "learning_rate": 1.980316751934487e-05, "loss": 1.5374, "step": 28160 }, { "epoch": 0.37, "grad_norm": 3.791534423828125, "learning_rate": 1.9803146772839796e-05, "loss": 2.2018, "step": 28161 }, { "epoch": 0.37, "grad_norm": 3.567270278930664, "learning_rate": 1.9803126025252286e-05, "loss": 2.0919, "step": 28162 }, { "epoch": 0.37, "grad_norm": 3.449761390686035, "learning_rate": 1.980310527658235e-05, "loss": 1.6299, "step": 28163 }, { "epoch": 0.37, "grad_norm": 3.4891669750213623, "learning_rate": 1.980308452682998e-05, "loss": 1.8859, "step": 28164 }, { "epoch": 0.37, "grad_norm": 4.070998191833496, "learning_rate": 1.9803063775995183e-05, "loss": 2.38, "step": 28165 }, { "epoch": 0.37, "grad_norm": 3.8018791675567627, "learning_rate": 1.9803043024077963e-05, "loss": 2.0285, "step": 28166 }, { "epoch": 0.37, "grad_norm": 3.9159440994262695, "learning_rate": 1.980302227107832e-05, "loss": 2.0282, "step": 28167 }, { "epoch": 0.37, "grad_norm": 3.6339313983917236, "learning_rate": 1.9803001516996256e-05, "loss": 1.6922, "step": 28168 }, { "epoch": 0.37, "grad_norm": 3.9640026092529297, "learning_rate": 1.980298076183178e-05, "loss": 2.1451, "step": 28169 }, { "epoch": 0.37, "grad_norm": 4.128853797912598, "learning_rate": 1.9802960005584882e-05, "loss": 2.0658, "step": 28170 }, { "epoch": 0.37, "grad_norm": 4.462397575378418, "learning_rate": 1.980293924825557e-05, "loss": 2.394, "step": 28171 }, { "epoch": 0.37, "grad_norm": 4.259158611297607, "learning_rate": 1.9802918489843855e-05, "loss": 2.7842, "step": 28172 }, { "epoch": 0.37, "grad_norm": 3.637732744216919, "learning_rate": 1.9802897730349725e-05, "loss": 1.8443, "step": 28173 }, { "epoch": 0.37, "grad_norm": 4.264204978942871, "learning_rate": 1.9802876969773192e-05, "loss": 2.4021, "step": 28174 }, { "epoch": 0.37, "grad_norm": 3.613344430923462, "learning_rate": 1.9802856208114253e-05, "loss": 1.8853, "step": 28175 }, { "epoch": 0.37, "grad_norm": 3.790388822555542, "learning_rate": 1.9802835445372914e-05, "loss": 1.9612, "step": 28176 }, { "epoch": 0.37, "grad_norm": 4.367836952209473, "learning_rate": 1.9802814681549175e-05, "loss": 2.1827, "step": 28177 }, { "epoch": 0.37, "grad_norm": 3.6292953491210938, "learning_rate": 1.980279391664304e-05, "loss": 1.8528, "step": 28178 }, { "epoch": 0.37, "grad_norm": 4.04172945022583, "learning_rate": 1.980277315065451e-05, "loss": 2.4472, "step": 28179 }, { "epoch": 0.37, "grad_norm": 3.604039430618286, "learning_rate": 1.9802752383583585e-05, "loss": 1.7338, "step": 28180 }, { "epoch": 0.37, "grad_norm": 3.9482200145721436, "learning_rate": 1.980273161543027e-05, "loss": 1.8715, "step": 28181 }, { "epoch": 0.37, "grad_norm": 3.7945117950439453, "learning_rate": 1.980271084619457e-05, "loss": 2.2662, "step": 28182 }, { "epoch": 0.37, "grad_norm": 3.7142672538757324, "learning_rate": 1.980269007587648e-05, "loss": 1.7869, "step": 28183 }, { "epoch": 0.37, "grad_norm": 4.379141807556152, "learning_rate": 1.9802669304476013e-05, "loss": 2.2625, "step": 28184 }, { "epoch": 0.37, "grad_norm": 4.681943893432617, "learning_rate": 1.980264853199316e-05, "loss": 2.877, "step": 28185 }, { "epoch": 0.37, "grad_norm": 4.250970840454102, "learning_rate": 1.9802627758427934e-05, "loss": 2.1111, "step": 28186 }, { "epoch": 0.37, "grad_norm": 3.749774694442749, "learning_rate": 1.9802606983780327e-05, "loss": 1.8828, "step": 28187 }, { "epoch": 0.37, "grad_norm": 3.9867939949035645, "learning_rate": 1.9802586208050348e-05, "loss": 2.5575, "step": 28188 }, { "epoch": 0.37, "grad_norm": 3.439997434616089, "learning_rate": 1.9802565431237992e-05, "loss": 1.7853, "step": 28189 }, { "epoch": 0.37, "grad_norm": 3.976003885269165, "learning_rate": 1.980254465334327e-05, "loss": 1.9891, "step": 28190 }, { "epoch": 0.37, "grad_norm": 3.4299910068511963, "learning_rate": 1.980252387436618e-05, "loss": 1.5381, "step": 28191 }, { "epoch": 0.37, "grad_norm": 4.173340320587158, "learning_rate": 1.9802503094306728e-05, "loss": 2.4307, "step": 28192 }, { "epoch": 0.37, "grad_norm": 3.4920952320098877, "learning_rate": 1.9802482313164912e-05, "loss": 1.7164, "step": 28193 }, { "epoch": 0.37, "grad_norm": 3.868055820465088, "learning_rate": 1.9802461530940735e-05, "loss": 2.0219, "step": 28194 }, { "epoch": 0.37, "grad_norm": 3.6754212379455566, "learning_rate": 1.9802440747634198e-05, "loss": 2.1366, "step": 28195 }, { "epoch": 0.37, "grad_norm": 3.560549736022949, "learning_rate": 1.980241996324531e-05, "loss": 1.7236, "step": 28196 }, { "epoch": 0.37, "grad_norm": 3.3939781188964844, "learning_rate": 1.9802399177774065e-05, "loss": 1.7225, "step": 28197 }, { "epoch": 0.37, "grad_norm": 3.865856170654297, "learning_rate": 1.9802378391220468e-05, "loss": 2.321, "step": 28198 }, { "epoch": 0.37, "grad_norm": 3.499976396560669, "learning_rate": 1.980235760358453e-05, "loss": 1.9297, "step": 28199 }, { "epoch": 0.37, "grad_norm": 3.792168140411377, "learning_rate": 1.9802336814866234e-05, "loss": 1.5912, "step": 28200 }, { "epoch": 0.37, "grad_norm": 4.281800270080566, "learning_rate": 1.98023160250656e-05, "loss": 2.1272, "step": 28201 }, { "epoch": 0.37, "grad_norm": 3.8838117122650146, "learning_rate": 1.9802295234182622e-05, "loss": 2.1391, "step": 28202 }, { "epoch": 0.37, "grad_norm": 3.9252703189849854, "learning_rate": 1.9802274442217305e-05, "loss": 2.0468, "step": 28203 }, { "epoch": 0.37, "grad_norm": 3.543227195739746, "learning_rate": 1.9802253649169652e-05, "loss": 1.6699, "step": 28204 }, { "epoch": 0.37, "grad_norm": 3.8304550647735596, "learning_rate": 1.9802232855039664e-05, "loss": 2.118, "step": 28205 }, { "epoch": 0.37, "grad_norm": 3.617424726486206, "learning_rate": 1.980221205982734e-05, "loss": 2.0583, "step": 28206 }, { "epoch": 0.37, "grad_norm": 4.1995463371276855, "learning_rate": 1.980219126353269e-05, "loss": 1.9777, "step": 28207 }, { "epoch": 0.37, "grad_norm": 3.384730815887451, "learning_rate": 1.980217046615571e-05, "loss": 1.7884, "step": 28208 }, { "epoch": 0.37, "grad_norm": 3.7808876037597656, "learning_rate": 1.9802149667696403e-05, "loss": 1.9229, "step": 28209 }, { "epoch": 0.37, "grad_norm": 3.916853904724121, "learning_rate": 1.9802128868154772e-05, "loss": 2.1033, "step": 28210 }, { "epoch": 0.37, "grad_norm": 3.8818328380584717, "learning_rate": 1.980210806753082e-05, "loss": 1.9434, "step": 28211 }, { "epoch": 0.37, "grad_norm": 3.2897870540618896, "learning_rate": 1.980208726582455e-05, "loss": 1.5755, "step": 28212 }, { "epoch": 0.37, "grad_norm": 4.031137943267822, "learning_rate": 1.980206646303596e-05, "loss": 1.9085, "step": 28213 }, { "epoch": 0.37, "grad_norm": 4.451114654541016, "learning_rate": 1.980204565916506e-05, "loss": 2.117, "step": 28214 }, { "epoch": 0.37, "grad_norm": 3.783679962158203, "learning_rate": 1.9802024854211846e-05, "loss": 2.1168, "step": 28215 }, { "epoch": 0.37, "grad_norm": 3.9623937606811523, "learning_rate": 1.980200404817632e-05, "loss": 1.9807, "step": 28216 }, { "epoch": 0.37, "grad_norm": 4.64260721206665, "learning_rate": 1.980198324105849e-05, "loss": 2.0109, "step": 28217 }, { "epoch": 0.37, "grad_norm": 3.7518177032470703, "learning_rate": 1.9801962432858353e-05, "loss": 1.9874, "step": 28218 }, { "epoch": 0.37, "grad_norm": 4.195886611938477, "learning_rate": 1.9801941623575915e-05, "loss": 2.2999, "step": 28219 }, { "epoch": 0.37, "grad_norm": 3.842665672302246, "learning_rate": 1.980192081321117e-05, "loss": 2.4424, "step": 28220 }, { "epoch": 0.37, "grad_norm": 3.4414658546447754, "learning_rate": 1.9801900001764137e-05, "loss": 1.6285, "step": 28221 }, { "epoch": 0.37, "grad_norm": 3.786742925643921, "learning_rate": 1.98018791892348e-05, "loss": 1.875, "step": 28222 }, { "epoch": 0.37, "grad_norm": 3.910003900527954, "learning_rate": 1.980185837562317e-05, "loss": 2.3672, "step": 28223 }, { "epoch": 0.37, "grad_norm": 4.154327392578125, "learning_rate": 1.980183756092925e-05, "loss": 2.5747, "step": 28224 }, { "epoch": 0.37, "grad_norm": 3.683558702468872, "learning_rate": 1.9801816745153042e-05, "loss": 1.8809, "step": 28225 }, { "epoch": 0.37, "grad_norm": 3.8818233013153076, "learning_rate": 1.9801795928294548e-05, "loss": 1.7959, "step": 28226 }, { "epoch": 0.37, "grad_norm": 3.6743991374969482, "learning_rate": 1.9801775110353765e-05, "loss": 2.2171, "step": 28227 }, { "epoch": 0.37, "grad_norm": 4.207501411437988, "learning_rate": 1.9801754291330702e-05, "loss": 2.2573, "step": 28228 }, { "epoch": 0.37, "grad_norm": 3.9937503337860107, "learning_rate": 1.980173347122536e-05, "loss": 2.4866, "step": 28229 }, { "epoch": 0.37, "grad_norm": 3.4223439693450928, "learning_rate": 1.9801712650037743e-05, "loss": 1.4777, "step": 28230 }, { "epoch": 0.37, "grad_norm": 3.4371497631073, "learning_rate": 1.9801691827767846e-05, "loss": 1.7233, "step": 28231 }, { "epoch": 0.37, "grad_norm": 3.431776285171509, "learning_rate": 1.9801671004415675e-05, "loss": 1.5066, "step": 28232 }, { "epoch": 0.37, "grad_norm": 3.5299060344696045, "learning_rate": 1.9801650179981237e-05, "loss": 1.836, "step": 28233 }, { "epoch": 0.37, "grad_norm": 3.441678047180176, "learning_rate": 1.9801629354464528e-05, "loss": 1.625, "step": 28234 }, { "epoch": 0.37, "grad_norm": 4.181521892547607, "learning_rate": 1.9801608527865555e-05, "loss": 1.9237, "step": 28235 }, { "epoch": 0.37, "grad_norm": 3.5330042839050293, "learning_rate": 1.9801587700184317e-05, "loss": 1.9441, "step": 28236 }, { "epoch": 0.37, "grad_norm": 3.5639660358428955, "learning_rate": 1.9801566871420815e-05, "loss": 1.5201, "step": 28237 }, { "epoch": 0.37, "grad_norm": 3.69747257232666, "learning_rate": 1.9801546041575056e-05, "loss": 1.8905, "step": 28238 }, { "epoch": 0.37, "grad_norm": 3.468242645263672, "learning_rate": 1.9801525210647043e-05, "loss": 1.8223, "step": 28239 }, { "epoch": 0.37, "grad_norm": 3.8864898681640625, "learning_rate": 1.9801504378636773e-05, "loss": 2.1846, "step": 28240 }, { "epoch": 0.37, "grad_norm": 3.5251617431640625, "learning_rate": 1.9801483545544248e-05, "loss": 1.9012, "step": 28241 }, { "epoch": 0.37, "grad_norm": 4.140627384185791, "learning_rate": 1.9801462711369477e-05, "loss": 2.0833, "step": 28242 }, { "epoch": 0.37, "grad_norm": 4.169143199920654, "learning_rate": 1.9801441876112454e-05, "loss": 1.8368, "step": 28243 }, { "epoch": 0.37, "grad_norm": 4.373768329620361, "learning_rate": 1.9801421039773188e-05, "loss": 2.0792, "step": 28244 }, { "epoch": 0.37, "grad_norm": 3.6315555572509766, "learning_rate": 1.9801400202351678e-05, "loss": 1.7922, "step": 28245 }, { "epoch": 0.37, "grad_norm": 4.5522236824035645, "learning_rate": 1.980137936384793e-05, "loss": 1.8695, "step": 28246 }, { "epoch": 0.37, "grad_norm": 3.3461945056915283, "learning_rate": 1.9801358524261944e-05, "loss": 1.6318, "step": 28247 }, { "epoch": 0.37, "grad_norm": 4.223656177520752, "learning_rate": 1.980133768359372e-05, "loss": 2.0796, "step": 28248 }, { "epoch": 0.37, "grad_norm": 4.44162654876709, "learning_rate": 1.9801316841843258e-05, "loss": 2.103, "step": 28249 }, { "epoch": 0.37, "grad_norm": 3.486271858215332, "learning_rate": 1.980129599901057e-05, "loss": 1.5534, "step": 28250 }, { "epoch": 0.37, "grad_norm": 3.649082660675049, "learning_rate": 1.9801275155095647e-05, "loss": 2.1764, "step": 28251 }, { "epoch": 0.37, "grad_norm": 3.673696279525757, "learning_rate": 1.9801254310098498e-05, "loss": 1.5628, "step": 28252 }, { "epoch": 0.37, "grad_norm": 4.481714725494385, "learning_rate": 1.980123346401913e-05, "loss": 2.4249, "step": 28253 }, { "epoch": 0.37, "grad_norm": 3.7922556400299072, "learning_rate": 1.9801212616857533e-05, "loss": 2.4754, "step": 28254 }, { "epoch": 0.37, "grad_norm": 4.054638862609863, "learning_rate": 1.980119176861372e-05, "loss": 2.2676, "step": 28255 }, { "epoch": 0.37, "grad_norm": 4.335914134979248, "learning_rate": 1.9801170919287687e-05, "loss": 2.2345, "step": 28256 }, { "epoch": 0.37, "grad_norm": 2.853633403778076, "learning_rate": 1.980115006887944e-05, "loss": 1.3189, "step": 28257 }, { "epoch": 0.37, "grad_norm": 3.90944766998291, "learning_rate": 1.980112921738898e-05, "loss": 1.68, "step": 28258 }, { "epoch": 0.37, "grad_norm": 3.6085400581359863, "learning_rate": 1.9801108364816305e-05, "loss": 1.6386, "step": 28259 }, { "epoch": 0.37, "grad_norm": 3.815964460372925, "learning_rate": 1.9801087511161426e-05, "loss": 1.7997, "step": 28260 }, { "epoch": 0.37, "grad_norm": 4.153319835662842, "learning_rate": 1.980106665642434e-05, "loss": 2.4233, "step": 28261 }, { "epoch": 0.37, "grad_norm": 3.9467315673828125, "learning_rate": 1.9801045800605047e-05, "loss": 2.004, "step": 28262 }, { "epoch": 0.37, "grad_norm": 4.737447261810303, "learning_rate": 1.9801024943703555e-05, "loss": 2.2304, "step": 28263 }, { "epoch": 0.37, "grad_norm": 4.230428695678711, "learning_rate": 1.9801004085719863e-05, "loss": 2.2841, "step": 28264 }, { "epoch": 0.37, "grad_norm": 3.972503423690796, "learning_rate": 1.9800983226653974e-05, "loss": 1.8739, "step": 28265 }, { "epoch": 0.37, "grad_norm": 3.606834650039673, "learning_rate": 1.980096236650589e-05, "loss": 1.619, "step": 28266 }, { "epoch": 0.37, "grad_norm": 4.377460479736328, "learning_rate": 1.980094150527561e-05, "loss": 2.0459, "step": 28267 }, { "epoch": 0.37, "grad_norm": 3.951913595199585, "learning_rate": 1.9800920642963147e-05, "loss": 2.2383, "step": 28268 }, { "epoch": 0.37, "grad_norm": 4.452670097351074, "learning_rate": 1.980089977956849e-05, "loss": 2.1301, "step": 28269 }, { "epoch": 0.37, "grad_norm": 4.243063926696777, "learning_rate": 1.980087891509165e-05, "loss": 1.9069, "step": 28270 }, { "epoch": 0.37, "grad_norm": 3.8752644062042236, "learning_rate": 1.9800858049532627e-05, "loss": 2.1564, "step": 28271 }, { "epoch": 0.37, "grad_norm": 3.8779611587524414, "learning_rate": 1.980083718289142e-05, "loss": 2.1768, "step": 28272 }, { "epoch": 0.37, "grad_norm": 4.027087688446045, "learning_rate": 1.980081631516804e-05, "loss": 2.2455, "step": 28273 }, { "epoch": 0.37, "grad_norm": 3.7894346714019775, "learning_rate": 1.980079544636248e-05, "loss": 1.8297, "step": 28274 }, { "epoch": 0.37, "grad_norm": 3.651332378387451, "learning_rate": 1.9800774576474745e-05, "loss": 1.7756, "step": 28275 }, { "epoch": 0.37, "grad_norm": 3.832350492477417, "learning_rate": 1.980075370550484e-05, "loss": 1.8702, "step": 28276 }, { "epoch": 0.37, "grad_norm": 3.9515368938446045, "learning_rate": 1.9800732833452765e-05, "loss": 1.7994, "step": 28277 }, { "epoch": 0.37, "grad_norm": 3.2803683280944824, "learning_rate": 1.980071196031852e-05, "loss": 1.5756, "step": 28278 }, { "epoch": 0.37, "grad_norm": 4.165101528167725, "learning_rate": 1.9800691086102116e-05, "loss": 2.0493, "step": 28279 }, { "epoch": 0.37, "grad_norm": 3.8732035160064697, "learning_rate": 1.9800670210803546e-05, "loss": 2.247, "step": 28280 }, { "epoch": 0.37, "grad_norm": 3.8981120586395264, "learning_rate": 1.9800649334422816e-05, "loss": 1.8117, "step": 28281 }, { "epoch": 0.37, "grad_norm": 3.964414358139038, "learning_rate": 1.9800628456959928e-05, "loss": 1.8321, "step": 28282 }, { "epoch": 0.37, "grad_norm": 4.375979423522949, "learning_rate": 1.9800607578414883e-05, "loss": 2.2513, "step": 28283 }, { "epoch": 0.37, "grad_norm": 3.9769465923309326, "learning_rate": 1.9800586698787687e-05, "loss": 2.0692, "step": 28284 }, { "epoch": 0.37, "grad_norm": 4.269630432128906, "learning_rate": 1.9800565818078338e-05, "loss": 2.5668, "step": 28285 }, { "epoch": 0.37, "grad_norm": 4.099216938018799, "learning_rate": 1.9800544936286843e-05, "loss": 1.9228, "step": 28286 }, { "epoch": 0.37, "grad_norm": 4.412238121032715, "learning_rate": 1.98005240534132e-05, "loss": 2.171, "step": 28287 }, { "epoch": 0.37, "grad_norm": 3.5761497020721436, "learning_rate": 1.980050316945741e-05, "loss": 1.8699, "step": 28288 }, { "epoch": 0.37, "grad_norm": 4.028847694396973, "learning_rate": 1.9800482284419483e-05, "loss": 2.079, "step": 28289 }, { "epoch": 0.37, "grad_norm": 4.529924392700195, "learning_rate": 1.9800461398299413e-05, "loss": 2.2981, "step": 28290 }, { "epoch": 0.37, "grad_norm": 3.7821385860443115, "learning_rate": 1.9800440511097207e-05, "loss": 2.4144, "step": 28291 }, { "epoch": 0.37, "grad_norm": 4.583939552307129, "learning_rate": 1.9800419622812867e-05, "loss": 2.3224, "step": 28292 }, { "epoch": 0.37, "grad_norm": 3.7965896129608154, "learning_rate": 1.9800398733446394e-05, "loss": 1.8604, "step": 28293 }, { "epoch": 0.37, "grad_norm": 4.532847881317139, "learning_rate": 1.980037784299779e-05, "loss": 2.1089, "step": 28294 }, { "epoch": 0.37, "grad_norm": 3.759960889816284, "learning_rate": 1.9800356951467055e-05, "loss": 1.8439, "step": 28295 }, { "epoch": 0.37, "grad_norm": 3.48850417137146, "learning_rate": 1.98003360588542e-05, "loss": 1.8728, "step": 28296 }, { "epoch": 0.37, "grad_norm": 3.5172760486602783, "learning_rate": 1.980031516515922e-05, "loss": 1.8792, "step": 28297 }, { "epoch": 0.37, "grad_norm": 4.294108867645264, "learning_rate": 1.980029427038212e-05, "loss": 2.1717, "step": 28298 }, { "epoch": 0.37, "grad_norm": 3.8616719245910645, "learning_rate": 1.98002733745229e-05, "loss": 1.7618, "step": 28299 }, { "epoch": 0.37, "grad_norm": 3.8802483081817627, "learning_rate": 1.980025247758156e-05, "loss": 2.085, "step": 28300 }, { "epoch": 0.37, "grad_norm": 3.791827917098999, "learning_rate": 1.980023157955811e-05, "loss": 2.302, "step": 28301 }, { "epoch": 0.37, "grad_norm": 4.413743495941162, "learning_rate": 1.980021068045255e-05, "loss": 2.5092, "step": 28302 }, { "epoch": 0.37, "grad_norm": 3.72910213470459, "learning_rate": 1.9800189780264874e-05, "loss": 1.5359, "step": 28303 }, { "epoch": 0.37, "grad_norm": 4.190402030944824, "learning_rate": 1.9800168878995095e-05, "loss": 2.3552, "step": 28304 }, { "epoch": 0.37, "grad_norm": 4.469699382781982, "learning_rate": 1.980014797664321e-05, "loss": 2.4163, "step": 28305 }, { "epoch": 0.37, "grad_norm": 4.054511070251465, "learning_rate": 1.9800127073209225e-05, "loss": 2.1827, "step": 28306 }, { "epoch": 0.37, "grad_norm": 4.249385356903076, "learning_rate": 1.9800106168693137e-05, "loss": 2.0178, "step": 28307 }, { "epoch": 0.37, "grad_norm": 3.439662456512451, "learning_rate": 1.980008526309495e-05, "loss": 1.8078, "step": 28308 }, { "epoch": 0.37, "grad_norm": 4.1962432861328125, "learning_rate": 1.980006435641467e-05, "loss": 2.3751, "step": 28309 }, { "epoch": 0.37, "grad_norm": 4.53890323638916, "learning_rate": 1.9800043448652296e-05, "loss": 2.5459, "step": 28310 }, { "epoch": 0.37, "grad_norm": 4.499697208404541, "learning_rate": 1.980002253980783e-05, "loss": 2.336, "step": 28311 }, { "epoch": 0.37, "grad_norm": 3.6747336387634277, "learning_rate": 1.9800001629881275e-05, "loss": 2.2952, "step": 28312 }, { "epoch": 0.37, "grad_norm": 4.2274675369262695, "learning_rate": 1.9799980718872636e-05, "loss": 1.8784, "step": 28313 }, { "epoch": 0.37, "grad_norm": 3.708549737930298, "learning_rate": 1.979995980678191e-05, "loss": 2.4165, "step": 28314 }, { "epoch": 0.37, "grad_norm": 4.046371936798096, "learning_rate": 1.9799938893609104e-05, "loss": 2.4755, "step": 28315 }, { "epoch": 0.37, "grad_norm": 3.616528272628784, "learning_rate": 1.9799917979354218e-05, "loss": 1.9486, "step": 28316 }, { "epoch": 0.37, "grad_norm": 3.2254018783569336, "learning_rate": 1.9799897064017254e-05, "loss": 1.4516, "step": 28317 }, { "epoch": 0.37, "grad_norm": 3.7569916248321533, "learning_rate": 1.9799876147598216e-05, "loss": 2.1877, "step": 28318 }, { "epoch": 0.37, "grad_norm": 3.832274913787842, "learning_rate": 1.9799855230097103e-05, "loss": 2.2148, "step": 28319 }, { "epoch": 0.37, "grad_norm": 3.7173099517822266, "learning_rate": 1.9799834311513923e-05, "loss": 2.0104, "step": 28320 }, { "epoch": 0.37, "grad_norm": 4.238694667816162, "learning_rate": 1.9799813391848675e-05, "loss": 1.8835, "step": 28321 }, { "epoch": 0.37, "grad_norm": 3.4579224586486816, "learning_rate": 1.979979247110136e-05, "loss": 1.9551, "step": 28322 }, { "epoch": 0.37, "grad_norm": 3.698509931564331, "learning_rate": 1.9799771549271983e-05, "loss": 1.7325, "step": 28323 }, { "epoch": 0.37, "grad_norm": 3.8927576541900635, "learning_rate": 1.9799750626360542e-05, "loss": 2.34, "step": 28324 }, { "epoch": 0.37, "grad_norm": 3.754171371459961, "learning_rate": 1.9799729702367044e-05, "loss": 1.8462, "step": 28325 }, { "epoch": 0.37, "grad_norm": 4.531864166259766, "learning_rate": 1.979970877729149e-05, "loss": 2.0708, "step": 28326 }, { "epoch": 0.37, "grad_norm": 3.8067543506622314, "learning_rate": 1.979968785113388e-05, "loss": 2.4309, "step": 28327 }, { "epoch": 0.37, "grad_norm": 3.518712043762207, "learning_rate": 1.979966692389422e-05, "loss": 1.8105, "step": 28328 }, { "epoch": 0.37, "grad_norm": 4.233952522277832, "learning_rate": 1.9799645995572513e-05, "loss": 2.4853, "step": 28329 }, { "epoch": 0.37, "grad_norm": 4.880305767059326, "learning_rate": 1.9799625066168758e-05, "loss": 2.5532, "step": 28330 }, { "epoch": 0.37, "grad_norm": 4.301541805267334, "learning_rate": 1.9799604135682955e-05, "loss": 2.936, "step": 28331 }, { "epoch": 0.37, "grad_norm": 3.944164752960205, "learning_rate": 1.979958320411511e-05, "loss": 1.7292, "step": 28332 }, { "epoch": 0.37, "grad_norm": 3.947279691696167, "learning_rate": 1.9799562271465225e-05, "loss": 1.8523, "step": 28333 }, { "epoch": 0.37, "grad_norm": 4.3600358963012695, "learning_rate": 1.9799541337733305e-05, "loss": 2.3066, "step": 28334 }, { "epoch": 0.37, "grad_norm": 4.382669925689697, "learning_rate": 1.9799520402919343e-05, "loss": 1.8774, "step": 28335 }, { "epoch": 0.37, "grad_norm": 3.2468390464782715, "learning_rate": 1.979949946702335e-05, "loss": 1.3806, "step": 28336 }, { "epoch": 0.37, "grad_norm": 4.318386554718018, "learning_rate": 1.979947853004533e-05, "loss": 2.4533, "step": 28337 }, { "epoch": 0.37, "grad_norm": 4.116631031036377, "learning_rate": 1.979945759198528e-05, "loss": 1.9117, "step": 28338 }, { "epoch": 0.37, "grad_norm": 3.8381221294403076, "learning_rate": 1.97994366528432e-05, "loss": 1.9244, "step": 28339 }, { "epoch": 0.37, "grad_norm": 4.213726043701172, "learning_rate": 1.97994157126191e-05, "loss": 2.2516, "step": 28340 }, { "epoch": 0.37, "grad_norm": 3.175144910812378, "learning_rate": 1.9799394771312978e-05, "loss": 1.6676, "step": 28341 }, { "epoch": 0.37, "grad_norm": 3.7864503860473633, "learning_rate": 1.9799373828924838e-05, "loss": 1.6898, "step": 28342 }, { "epoch": 0.37, "grad_norm": 3.660174608230591, "learning_rate": 1.9799352885454678e-05, "loss": 1.632, "step": 28343 }, { "epoch": 0.37, "grad_norm": 4.009968280792236, "learning_rate": 1.97993319409025e-05, "loss": 2.374, "step": 28344 }, { "epoch": 0.37, "grad_norm": 3.790022134780884, "learning_rate": 1.9799310995268313e-05, "loss": 2.15, "step": 28345 }, { "epoch": 0.37, "grad_norm": 3.914517879486084, "learning_rate": 1.9799290048552116e-05, "loss": 2.2942, "step": 28346 }, { "epoch": 0.37, "grad_norm": 4.338380336761475, "learning_rate": 1.9799269100753912e-05, "loss": 1.8454, "step": 28347 }, { "epoch": 0.37, "grad_norm": 4.196358680725098, "learning_rate": 1.9799248151873702e-05, "loss": 2.2011, "step": 28348 }, { "epoch": 0.37, "grad_norm": 3.713874101638794, "learning_rate": 1.9799227201911488e-05, "loss": 1.6257, "step": 28349 }, { "epoch": 0.37, "grad_norm": 3.5605969429016113, "learning_rate": 1.9799206250867274e-05, "loss": 1.8152, "step": 28350 }, { "epoch": 0.37, "grad_norm": 4.061454772949219, "learning_rate": 1.979918529874106e-05, "loss": 1.7667, "step": 28351 }, { "epoch": 0.37, "grad_norm": 3.467726469039917, "learning_rate": 1.979916434553285e-05, "loss": 1.7688, "step": 28352 }, { "epoch": 0.37, "grad_norm": 4.237091541290283, "learning_rate": 1.9799143391242647e-05, "loss": 2.2142, "step": 28353 }, { "epoch": 0.37, "grad_norm": 3.765864849090576, "learning_rate": 1.979912243587045e-05, "loss": 2.1385, "step": 28354 }, { "epoch": 0.37, "grad_norm": 3.98176646232605, "learning_rate": 1.9799101479416265e-05, "loss": 2.2498, "step": 28355 }, { "epoch": 0.37, "grad_norm": 5.01624870300293, "learning_rate": 1.9799080521880096e-05, "loss": 2.8658, "step": 28356 }, { "epoch": 0.37, "grad_norm": 3.6135683059692383, "learning_rate": 1.979905956326194e-05, "loss": 1.8304, "step": 28357 }, { "epoch": 0.37, "grad_norm": 3.682629346847534, "learning_rate": 1.97990386035618e-05, "loss": 2.1083, "step": 28358 }, { "epoch": 0.37, "grad_norm": 4.098138332366943, "learning_rate": 1.979901764277968e-05, "loss": 1.9843, "step": 28359 }, { "epoch": 0.37, "grad_norm": 3.297583818435669, "learning_rate": 1.9798996680915584e-05, "loss": 1.6138, "step": 28360 }, { "epoch": 0.37, "grad_norm": 4.180704116821289, "learning_rate": 1.979897571796951e-05, "loss": 2.2359, "step": 28361 }, { "epoch": 0.37, "grad_norm": 3.7950079441070557, "learning_rate": 1.9798954753941467e-05, "loss": 1.8294, "step": 28362 }, { "epoch": 0.37, "grad_norm": 3.3983702659606934, "learning_rate": 1.9798933788831448e-05, "loss": 1.6296, "step": 28363 }, { "epoch": 0.37, "grad_norm": 4.249314308166504, "learning_rate": 1.9798912822639463e-05, "loss": 1.9122, "step": 28364 }, { "epoch": 0.37, "grad_norm": 3.607896566390991, "learning_rate": 1.9798891855365515e-05, "loss": 1.9258, "step": 28365 }, { "epoch": 0.37, "grad_norm": 4.60899543762207, "learning_rate": 1.9798870887009598e-05, "loss": 2.742, "step": 28366 }, { "epoch": 0.37, "grad_norm": 4.343780994415283, "learning_rate": 1.979884991757172e-05, "loss": 2.3388, "step": 28367 }, { "epoch": 0.37, "grad_norm": 3.472576856613159, "learning_rate": 1.9798828947051886e-05, "loss": 2.011, "step": 28368 }, { "epoch": 0.37, "grad_norm": 3.6785144805908203, "learning_rate": 1.9798807975450095e-05, "loss": 1.8506, "step": 28369 }, { "epoch": 0.37, "grad_norm": 4.210968971252441, "learning_rate": 1.9798787002766347e-05, "loss": 2.4866, "step": 28370 }, { "epoch": 0.37, "grad_norm": 3.8954105377197266, "learning_rate": 1.9798766029000648e-05, "loss": 1.7119, "step": 28371 }, { "epoch": 0.37, "grad_norm": 3.834055185317993, "learning_rate": 1.9798745054152995e-05, "loss": 1.8141, "step": 28372 }, { "epoch": 0.37, "grad_norm": 3.7019994258880615, "learning_rate": 1.9798724078223397e-05, "loss": 1.8659, "step": 28373 }, { "epoch": 0.37, "grad_norm": 4.117913722991943, "learning_rate": 1.9798703101211857e-05, "loss": 2.0243, "step": 28374 }, { "epoch": 0.37, "grad_norm": 3.7245383262634277, "learning_rate": 1.979868212311837e-05, "loss": 1.8832, "step": 28375 }, { "epoch": 0.37, "grad_norm": 4.040502548217773, "learning_rate": 1.9798661143942947e-05, "loss": 2.183, "step": 28376 }, { "epoch": 0.37, "grad_norm": 4.684711933135986, "learning_rate": 1.979864016368558e-05, "loss": 2.2861, "step": 28377 }, { "epoch": 0.37, "grad_norm": 5.126986980438232, "learning_rate": 1.9798619182346277e-05, "loss": 2.215, "step": 28378 }, { "epoch": 0.37, "grad_norm": 3.3105199337005615, "learning_rate": 1.9798598199925043e-05, "loss": 1.5852, "step": 28379 }, { "epoch": 0.37, "grad_norm": 4.586096286773682, "learning_rate": 1.9798577216421876e-05, "loss": 2.2315, "step": 28380 }, { "epoch": 0.37, "grad_norm": 4.369915008544922, "learning_rate": 1.979855623183678e-05, "loss": 2.2247, "step": 28381 }, { "epoch": 0.37, "grad_norm": 4.257659435272217, "learning_rate": 1.979853524616976e-05, "loss": 1.9834, "step": 28382 }, { "epoch": 0.37, "grad_norm": 3.8262453079223633, "learning_rate": 1.979851425942081e-05, "loss": 1.7384, "step": 28383 }, { "epoch": 0.37, "grad_norm": 4.028515338897705, "learning_rate": 1.979849327158994e-05, "loss": 2.2973, "step": 28384 }, { "epoch": 0.37, "grad_norm": 3.961291551589966, "learning_rate": 1.9798472282677155e-05, "loss": 2.325, "step": 28385 }, { "epoch": 0.37, "grad_norm": 3.989473819732666, "learning_rate": 1.979845129268245e-05, "loss": 1.8337, "step": 28386 }, { "epoch": 0.37, "grad_norm": 4.104954242706299, "learning_rate": 1.9798430301605826e-05, "loss": 2.1475, "step": 28387 }, { "epoch": 0.37, "grad_norm": 3.797905921936035, "learning_rate": 1.979840930944729e-05, "loss": 1.8464, "step": 28388 }, { "epoch": 0.37, "grad_norm": 3.530484437942505, "learning_rate": 1.9798388316206846e-05, "loss": 1.8893, "step": 28389 }, { "epoch": 0.37, "grad_norm": 3.9525575637817383, "learning_rate": 1.979836732188449e-05, "loss": 1.8172, "step": 28390 }, { "epoch": 0.37, "grad_norm": 3.8861496448516846, "learning_rate": 1.9798346326480232e-05, "loss": 1.9584, "step": 28391 }, { "epoch": 0.37, "grad_norm": 4.188544273376465, "learning_rate": 1.9798325329994067e-05, "loss": 2.1653, "step": 28392 }, { "epoch": 0.37, "grad_norm": 3.57492733001709, "learning_rate": 1.9798304332426002e-05, "loss": 1.9674, "step": 28393 }, { "epoch": 0.37, "grad_norm": 3.5961074829101562, "learning_rate": 1.979828333377604e-05, "loss": 1.7007, "step": 28394 }, { "epoch": 0.37, "grad_norm": 3.745525360107422, "learning_rate": 1.979826233404418e-05, "loss": 2.132, "step": 28395 }, { "epoch": 0.37, "grad_norm": 4.02202033996582, "learning_rate": 1.9798241333230424e-05, "loss": 1.7541, "step": 28396 }, { "epoch": 0.37, "grad_norm": 3.4233126640319824, "learning_rate": 1.979822033133478e-05, "loss": 1.6355, "step": 28397 }, { "epoch": 0.37, "grad_norm": 5.120960235595703, "learning_rate": 1.979819932835724e-05, "loss": 2.8918, "step": 28398 }, { "epoch": 0.37, "grad_norm": 3.8273544311523438, "learning_rate": 1.9798178324297815e-05, "loss": 1.808, "step": 28399 }, { "epoch": 0.37, "grad_norm": 3.6828699111938477, "learning_rate": 1.9798157319156507e-05, "loss": 1.5998, "step": 28400 }, { "epoch": 0.37, "grad_norm": 4.184103012084961, "learning_rate": 1.9798136312933315e-05, "loss": 2.2117, "step": 28401 }, { "epoch": 0.37, "grad_norm": 3.632905960083008, "learning_rate": 1.979811530562824e-05, "loss": 2.0058, "step": 28402 }, { "epoch": 0.37, "grad_norm": 3.8447234630584717, "learning_rate": 1.9798094297241293e-05, "loss": 2.0031, "step": 28403 }, { "epoch": 0.37, "grad_norm": 4.294594764709473, "learning_rate": 1.9798073287772467e-05, "loss": 2.228, "step": 28404 }, { "epoch": 0.37, "grad_norm": 4.475952625274658, "learning_rate": 1.9798052277221766e-05, "loss": 2.2032, "step": 28405 }, { "epoch": 0.37, "grad_norm": 3.46993088722229, "learning_rate": 1.9798031265589194e-05, "loss": 1.713, "step": 28406 }, { "epoch": 0.37, "grad_norm": 3.4827940464019775, "learning_rate": 1.9798010252874754e-05, "loss": 1.8795, "step": 28407 }, { "epoch": 0.37, "grad_norm": 3.9896059036254883, "learning_rate": 1.9797989239078447e-05, "loss": 1.8913, "step": 28408 }, { "epoch": 0.37, "grad_norm": 4.22947359085083, "learning_rate": 1.979796822420028e-05, "loss": 2.0687, "step": 28409 }, { "epoch": 0.37, "grad_norm": 4.180618762969971, "learning_rate": 1.9797947208240242e-05, "loss": 2.2987, "step": 28410 }, { "epoch": 0.37, "grad_norm": 4.28423547744751, "learning_rate": 1.9797926191198352e-05, "loss": 2.6197, "step": 28411 }, { "epoch": 0.37, "grad_norm": 3.523037910461426, "learning_rate": 1.97979051730746e-05, "loss": 1.8225, "step": 28412 }, { "epoch": 0.37, "grad_norm": 3.748659133911133, "learning_rate": 1.9797884153869e-05, "loss": 1.7847, "step": 28413 }, { "epoch": 0.37, "grad_norm": 3.7736313343048096, "learning_rate": 1.9797863133581542e-05, "loss": 1.8666, "step": 28414 }, { "epoch": 0.37, "grad_norm": 3.603177309036255, "learning_rate": 1.979784211221223e-05, "loss": 1.8891, "step": 28415 }, { "epoch": 0.37, "grad_norm": 4.033152103424072, "learning_rate": 1.9797821089761077e-05, "loss": 1.997, "step": 28416 }, { "epoch": 0.37, "grad_norm": 3.55053973197937, "learning_rate": 1.9797800066228075e-05, "loss": 1.8594, "step": 28417 }, { "epoch": 0.37, "grad_norm": 4.296630859375, "learning_rate": 1.9797779041613232e-05, "loss": 2.3433, "step": 28418 }, { "epoch": 0.37, "grad_norm": 3.9755866527557373, "learning_rate": 1.9797758015916546e-05, "loss": 2.0071, "step": 28419 }, { "epoch": 0.37, "grad_norm": 4.265017509460449, "learning_rate": 1.9797736989138022e-05, "loss": 2.2261, "step": 28420 }, { "epoch": 0.37, "grad_norm": 3.2373127937316895, "learning_rate": 1.979771596127766e-05, "loss": 1.7993, "step": 28421 }, { "epoch": 0.37, "grad_norm": 3.814871072769165, "learning_rate": 1.9797694932335466e-05, "loss": 2.1217, "step": 28422 }, { "epoch": 0.37, "grad_norm": 3.355173110961914, "learning_rate": 1.9797673902311437e-05, "loss": 1.5621, "step": 28423 }, { "epoch": 0.37, "grad_norm": 3.850362777709961, "learning_rate": 1.979765287120558e-05, "loss": 1.9764, "step": 28424 }, { "epoch": 0.37, "grad_norm": 3.7660582065582275, "learning_rate": 1.9797631839017898e-05, "loss": 1.9754, "step": 28425 }, { "epoch": 0.37, "grad_norm": 3.8495421409606934, "learning_rate": 1.9797610805748394e-05, "loss": 1.8265, "step": 28426 }, { "epoch": 0.37, "grad_norm": 3.6227035522460938, "learning_rate": 1.979758977139706e-05, "loss": 2.1884, "step": 28427 }, { "epoch": 0.37, "grad_norm": 4.31732702255249, "learning_rate": 1.979756873596391e-05, "loss": 2.5492, "step": 28428 }, { "epoch": 0.37, "grad_norm": 3.28641939163208, "learning_rate": 1.9797547699448942e-05, "loss": 1.6428, "step": 28429 }, { "epoch": 0.37, "grad_norm": 3.7004568576812744, "learning_rate": 1.979752666185216e-05, "loss": 2.1726, "step": 28430 }, { "epoch": 0.37, "grad_norm": 4.163865089416504, "learning_rate": 1.979750562317356e-05, "loss": 2.337, "step": 28431 }, { "epoch": 0.37, "grad_norm": 3.8668453693389893, "learning_rate": 1.9797484583413154e-05, "loss": 1.8735, "step": 28432 }, { "epoch": 0.37, "grad_norm": 3.9737050533294678, "learning_rate": 1.979746354257094e-05, "loss": 2.4035, "step": 28433 }, { "epoch": 0.37, "grad_norm": 3.662233352661133, "learning_rate": 1.9797442500646914e-05, "loss": 1.6879, "step": 28434 }, { "epoch": 0.37, "grad_norm": 3.341684103012085, "learning_rate": 1.979742145764109e-05, "loss": 1.5586, "step": 28435 }, { "epoch": 0.37, "grad_norm": 4.0974507331848145, "learning_rate": 1.979740041355346e-05, "loss": 1.9492, "step": 28436 }, { "epoch": 0.37, "grad_norm": 3.3097164630889893, "learning_rate": 1.9797379368384035e-05, "loss": 1.7865, "step": 28437 }, { "epoch": 0.37, "grad_norm": 3.9206299781799316, "learning_rate": 1.9797358322132813e-05, "loss": 2.1256, "step": 28438 }, { "epoch": 0.37, "grad_norm": 3.8416855335235596, "learning_rate": 1.9797337274799794e-05, "loss": 1.9686, "step": 28439 }, { "epoch": 0.37, "grad_norm": 4.06589937210083, "learning_rate": 1.9797316226384983e-05, "loss": 2.0171, "step": 28440 }, { "epoch": 0.37, "grad_norm": 4.663124084472656, "learning_rate": 1.979729517688838e-05, "loss": 2.4904, "step": 28441 }, { "epoch": 0.37, "grad_norm": 3.344792366027832, "learning_rate": 1.9797274126309993e-05, "loss": 1.7377, "step": 28442 }, { "epoch": 0.37, "grad_norm": 4.445926666259766, "learning_rate": 1.979725307464982e-05, "loss": 2.393, "step": 28443 }, { "epoch": 0.37, "grad_norm": 4.186014175415039, "learning_rate": 1.9797232021907865e-05, "loss": 1.6448, "step": 28444 }, { "epoch": 0.37, "grad_norm": 4.058935165405273, "learning_rate": 1.9797210968084125e-05, "loss": 2.1242, "step": 28445 }, { "epoch": 0.37, "grad_norm": 3.194453001022339, "learning_rate": 1.9797189913178608e-05, "loss": 1.6397, "step": 28446 }, { "epoch": 0.37, "grad_norm": 3.9779810905456543, "learning_rate": 1.979716885719132e-05, "loss": 2.3548, "step": 28447 }, { "epoch": 0.37, "grad_norm": 3.7538342475891113, "learning_rate": 1.9797147800122253e-05, "loss": 2.02, "step": 28448 }, { "epoch": 0.37, "grad_norm": 3.7699854373931885, "learning_rate": 1.9797126741971417e-05, "loss": 2.0227, "step": 28449 }, { "epoch": 0.37, "grad_norm": 3.879855155944824, "learning_rate": 1.9797105682738812e-05, "loss": 2.0867, "step": 28450 }, { "epoch": 0.37, "grad_norm": 3.672257423400879, "learning_rate": 1.979708462242444e-05, "loss": 2.0507, "step": 28451 }, { "epoch": 0.37, "grad_norm": 4.057889461517334, "learning_rate": 1.97970635610283e-05, "loss": 2.3587, "step": 28452 }, { "epoch": 0.37, "grad_norm": 4.40626859664917, "learning_rate": 1.9797042498550405e-05, "loss": 2.2535, "step": 28453 }, { "epoch": 0.37, "grad_norm": 3.743330717086792, "learning_rate": 1.9797021434990746e-05, "loss": 2.1084, "step": 28454 }, { "epoch": 0.37, "grad_norm": 3.466874122619629, "learning_rate": 1.979700037034933e-05, "loss": 2.0301, "step": 28455 }, { "epoch": 0.37, "grad_norm": 4.054919719696045, "learning_rate": 1.979697930462616e-05, "loss": 2.0748, "step": 28456 }, { "epoch": 0.37, "grad_norm": 3.648313283920288, "learning_rate": 1.9796958237821236e-05, "loss": 1.4419, "step": 28457 }, { "epoch": 0.37, "grad_norm": 3.204709768295288, "learning_rate": 1.9796937169934563e-05, "loss": 1.9434, "step": 28458 }, { "epoch": 0.37, "grad_norm": 3.912024974822998, "learning_rate": 1.979691610096614e-05, "loss": 1.7595, "step": 28459 }, { "epoch": 0.37, "grad_norm": 4.672935485839844, "learning_rate": 1.9796895030915974e-05, "loss": 2.1743, "step": 28460 }, { "epoch": 0.37, "grad_norm": 4.065230846405029, "learning_rate": 1.9796873959784064e-05, "loss": 1.9897, "step": 28461 }, { "epoch": 0.37, "grad_norm": 3.7662160396575928, "learning_rate": 1.979685288757041e-05, "loss": 1.6563, "step": 28462 }, { "epoch": 0.37, "grad_norm": 4.335583209991455, "learning_rate": 1.979683181427502e-05, "loss": 2.156, "step": 28463 }, { "epoch": 0.37, "grad_norm": 4.297170162200928, "learning_rate": 1.9796810739897893e-05, "loss": 2.4588, "step": 28464 }, { "epoch": 0.37, "grad_norm": 4.785972595214844, "learning_rate": 1.979678966443903e-05, "loss": 2.269, "step": 28465 }, { "epoch": 0.37, "grad_norm": 3.8556361198425293, "learning_rate": 1.979676858789844e-05, "loss": 1.8739, "step": 28466 }, { "epoch": 0.37, "grad_norm": 3.762974739074707, "learning_rate": 1.9796747510276115e-05, "loss": 1.9867, "step": 28467 }, { "epoch": 0.37, "grad_norm": 4.389749050140381, "learning_rate": 1.9796726431572066e-05, "loss": 2.1785, "step": 28468 }, { "epoch": 0.37, "grad_norm": 3.736387252807617, "learning_rate": 1.979670535178629e-05, "loss": 1.9373, "step": 28469 }, { "epoch": 0.37, "grad_norm": 3.6023619174957275, "learning_rate": 1.9796684270918795e-05, "loss": 2.0357, "step": 28470 }, { "epoch": 0.37, "grad_norm": 3.6161723136901855, "learning_rate": 1.979666318896958e-05, "loss": 1.5459, "step": 28471 }, { "epoch": 0.37, "grad_norm": 4.144042015075684, "learning_rate": 1.9796642105938642e-05, "loss": 2.0469, "step": 28472 }, { "epoch": 0.37, "grad_norm": 3.6794490814208984, "learning_rate": 1.9796621021825993e-05, "loss": 1.9502, "step": 28473 }, { "epoch": 0.37, "grad_norm": 3.5096030235290527, "learning_rate": 1.979659993663163e-05, "loss": 1.7179, "step": 28474 }, { "epoch": 0.37, "grad_norm": 3.9625661373138428, "learning_rate": 1.9796578850355555e-05, "loss": 2.0297, "step": 28475 }, { "epoch": 0.37, "grad_norm": 4.168084621429443, "learning_rate": 1.9796557762997774e-05, "loss": 2.4764, "step": 28476 }, { "epoch": 0.37, "grad_norm": 3.725295305252075, "learning_rate": 1.9796536674558284e-05, "loss": 1.8592, "step": 28477 }, { "epoch": 0.37, "grad_norm": 3.407620429992676, "learning_rate": 1.979651558503709e-05, "loss": 1.9252, "step": 28478 }, { "epoch": 0.37, "grad_norm": 4.19257116317749, "learning_rate": 1.979649449443419e-05, "loss": 2.2211, "step": 28479 }, { "epoch": 0.37, "grad_norm": 4.091010093688965, "learning_rate": 1.97964734027496e-05, "loss": 2.4263, "step": 28480 }, { "epoch": 0.37, "grad_norm": 4.374794960021973, "learning_rate": 1.979645230998331e-05, "loss": 1.8624, "step": 28481 }, { "epoch": 0.37, "grad_norm": 3.738265037536621, "learning_rate": 1.9796431216135324e-05, "loss": 1.6269, "step": 28482 }, { "epoch": 0.37, "grad_norm": 3.699021339416504, "learning_rate": 1.9796410121205647e-05, "loss": 1.8399, "step": 28483 }, { "epoch": 0.37, "grad_norm": 3.7660152912139893, "learning_rate": 1.979638902519428e-05, "loss": 2.1765, "step": 28484 }, { "epoch": 0.37, "grad_norm": 3.728461980819702, "learning_rate": 1.9796367928101226e-05, "loss": 1.7218, "step": 28485 }, { "epoch": 0.37, "grad_norm": 3.452133893966675, "learning_rate": 1.9796346829926488e-05, "loss": 1.7382, "step": 28486 }, { "epoch": 0.37, "grad_norm": 3.398533582687378, "learning_rate": 1.9796325730670062e-05, "loss": 1.8505, "step": 28487 }, { "epoch": 0.37, "grad_norm": 4.276178359985352, "learning_rate": 1.9796304630331958e-05, "loss": 2.3853, "step": 28488 }, { "epoch": 0.37, "grad_norm": 3.688220500946045, "learning_rate": 1.9796283528912176e-05, "loss": 1.6011, "step": 28489 }, { "epoch": 0.37, "grad_norm": 3.9727377891540527, "learning_rate": 1.979626242641072e-05, "loss": 1.9427, "step": 28490 }, { "epoch": 0.37, "grad_norm": 3.7760169506073, "learning_rate": 1.979624132282759e-05, "loss": 2.0834, "step": 28491 }, { "epoch": 0.37, "grad_norm": 4.285569667816162, "learning_rate": 1.979622021816279e-05, "loss": 2.3594, "step": 28492 }, { "epoch": 0.37, "grad_norm": 3.966887950897217, "learning_rate": 1.9796199112416318e-05, "loss": 1.9444, "step": 28493 }, { "epoch": 0.37, "grad_norm": 3.7803704738616943, "learning_rate": 1.979617800558818e-05, "loss": 1.7331, "step": 28494 }, { "epoch": 0.37, "grad_norm": 3.791818618774414, "learning_rate": 1.9796156897678377e-05, "loss": 1.9738, "step": 28495 }, { "epoch": 0.37, "grad_norm": 3.8369174003601074, "learning_rate": 1.9796135788686913e-05, "loss": 2.2184, "step": 28496 }, { "epoch": 0.37, "grad_norm": 3.6062419414520264, "learning_rate": 1.9796114678613788e-05, "loss": 1.7846, "step": 28497 }, { "epoch": 0.37, "grad_norm": 3.9436097145080566, "learning_rate": 1.9796093567459006e-05, "loss": 2.3243, "step": 28498 }, { "epoch": 0.37, "grad_norm": 3.714695930480957, "learning_rate": 1.9796072455222572e-05, "loss": 1.8959, "step": 28499 }, { "epoch": 0.37, "grad_norm": 3.747471570968628, "learning_rate": 1.9796051341904484e-05, "loss": 1.8284, "step": 28500 }, { "epoch": 0.37, "grad_norm": 3.4401566982269287, "learning_rate": 1.9796030227504746e-05, "loss": 1.7442, "step": 28501 }, { "epoch": 0.37, "grad_norm": 3.7942752838134766, "learning_rate": 1.979600911202336e-05, "loss": 1.8958, "step": 28502 }, { "epoch": 0.37, "grad_norm": 3.7148683071136475, "learning_rate": 1.9795987995460326e-05, "loss": 1.9836, "step": 28503 }, { "epoch": 0.37, "grad_norm": 3.5547847747802734, "learning_rate": 1.979596687781565e-05, "loss": 1.6933, "step": 28504 }, { "epoch": 0.37, "grad_norm": 4.120507717132568, "learning_rate": 1.9795945759089336e-05, "loss": 2.3029, "step": 28505 }, { "epoch": 0.37, "grad_norm": 3.583495616912842, "learning_rate": 1.979592463928138e-05, "loss": 1.6676, "step": 28506 }, { "epoch": 0.37, "grad_norm": 3.5153727531433105, "learning_rate": 1.9795903518391787e-05, "loss": 2.0223, "step": 28507 }, { "epoch": 0.37, "grad_norm": 4.7108354568481445, "learning_rate": 1.9795882396420563e-05, "loss": 2.6113, "step": 28508 }, { "epoch": 0.37, "grad_norm": 4.052125453948975, "learning_rate": 1.9795861273367708e-05, "loss": 2.0829, "step": 28509 }, { "epoch": 0.37, "grad_norm": 4.461705684661865, "learning_rate": 1.979584014923322e-05, "loss": 2.2807, "step": 28510 }, { "epoch": 0.37, "grad_norm": 4.080513000488281, "learning_rate": 1.9795819024017107e-05, "loss": 1.969, "step": 28511 }, { "epoch": 0.37, "grad_norm": 4.1413798332214355, "learning_rate": 1.979579789771937e-05, "loss": 2.1935, "step": 28512 }, { "epoch": 0.37, "grad_norm": 4.402431011199951, "learning_rate": 1.9795776770340008e-05, "loss": 2.2414, "step": 28513 }, { "epoch": 0.37, "grad_norm": 4.095357418060303, "learning_rate": 1.979575564187903e-05, "loss": 2.6744, "step": 28514 }, { "epoch": 0.37, "grad_norm": 4.099390029907227, "learning_rate": 1.9795734512336434e-05, "loss": 2.1597, "step": 28515 }, { "epoch": 0.37, "grad_norm": 4.7582011222839355, "learning_rate": 1.979571338171222e-05, "loss": 2.2532, "step": 28516 }, { "epoch": 0.37, "grad_norm": 4.282952308654785, "learning_rate": 1.9795692250006395e-05, "loss": 2.2104, "step": 28517 }, { "epoch": 0.37, "grad_norm": 4.330527305603027, "learning_rate": 1.9795671117218957e-05, "loss": 2.3103, "step": 28518 }, { "epoch": 0.37, "grad_norm": 3.732325315475464, "learning_rate": 1.9795649983349915e-05, "loss": 1.7605, "step": 28519 }, { "epoch": 0.37, "grad_norm": 3.5639729499816895, "learning_rate": 1.9795628848399263e-05, "loss": 1.9248, "step": 28520 }, { "epoch": 0.37, "grad_norm": 3.925572395324707, "learning_rate": 1.979560771236701e-05, "loss": 2.0632, "step": 28521 }, { "epoch": 0.37, "grad_norm": 4.285560607910156, "learning_rate": 1.9795586575253155e-05, "loss": 2.3912, "step": 28522 }, { "epoch": 0.37, "grad_norm": 3.535447835922241, "learning_rate": 1.97955654370577e-05, "loss": 1.6087, "step": 28523 }, { "epoch": 0.37, "grad_norm": 3.6662840843200684, "learning_rate": 1.9795544297780647e-05, "loss": 1.9243, "step": 28524 }, { "epoch": 0.37, "grad_norm": 3.4095404148101807, "learning_rate": 1.9795523157422005e-05, "loss": 2.0965, "step": 28525 }, { "epoch": 0.37, "grad_norm": 4.2116498947143555, "learning_rate": 1.9795502015981766e-05, "loss": 2.1842, "step": 28526 }, { "epoch": 0.37, "grad_norm": 3.9774558544158936, "learning_rate": 1.979548087345994e-05, "loss": 2.1149, "step": 28527 }, { "epoch": 0.37, "grad_norm": 4.499311923980713, "learning_rate": 1.9795459729856525e-05, "loss": 1.9396, "step": 28528 }, { "epoch": 0.37, "grad_norm": 3.7716755867004395, "learning_rate": 1.9795438585171527e-05, "loss": 2.2745, "step": 28529 }, { "epoch": 0.37, "grad_norm": 3.6793038845062256, "learning_rate": 1.9795417439404945e-05, "loss": 2.0102, "step": 28530 }, { "epoch": 0.37, "grad_norm": 3.9795424938201904, "learning_rate": 1.9795396292556783e-05, "loss": 2.0793, "step": 28531 }, { "epoch": 0.37, "grad_norm": 3.9443695545196533, "learning_rate": 1.9795375144627042e-05, "loss": 1.8164, "step": 28532 }, { "epoch": 0.37, "grad_norm": 3.7451515197753906, "learning_rate": 1.9795353995615728e-05, "loss": 1.9534, "step": 28533 }, { "epoch": 0.37, "grad_norm": 4.019316673278809, "learning_rate": 1.979533284552284e-05, "loss": 1.9032, "step": 28534 }, { "epoch": 0.37, "grad_norm": 3.62107515335083, "learning_rate": 1.979531169434838e-05, "loss": 1.677, "step": 28535 }, { "epoch": 0.37, "grad_norm": 4.437732219696045, "learning_rate": 1.9795290542092354e-05, "loss": 2.5499, "step": 28536 }, { "epoch": 0.37, "grad_norm": 3.8400468826293945, "learning_rate": 1.979526938875476e-05, "loss": 2.3017, "step": 28537 }, { "epoch": 0.37, "grad_norm": 3.9854116439819336, "learning_rate": 1.97952482343356e-05, "loss": 2.0366, "step": 28538 }, { "epoch": 0.37, "grad_norm": 4.081534385681152, "learning_rate": 1.979522707883488e-05, "loss": 2.2146, "step": 28539 }, { "epoch": 0.37, "grad_norm": 3.5402448177337646, "learning_rate": 1.97952059222526e-05, "loss": 2.0179, "step": 28540 }, { "epoch": 0.37, "grad_norm": 3.488294839859009, "learning_rate": 1.9795184764588767e-05, "loss": 1.9415, "step": 28541 }, { "epoch": 0.37, "grad_norm": 3.814082384109497, "learning_rate": 1.9795163605843374e-05, "loss": 1.8484, "step": 28542 }, { "epoch": 0.37, "grad_norm": 4.701780796051025, "learning_rate": 1.979514244601643e-05, "loss": 2.5446, "step": 28543 }, { "epoch": 0.37, "grad_norm": 4.337211608886719, "learning_rate": 1.979512128510794e-05, "loss": 2.265, "step": 28544 }, { "epoch": 0.37, "grad_norm": 3.9285895824432373, "learning_rate": 1.97951001231179e-05, "loss": 1.6715, "step": 28545 }, { "epoch": 0.37, "grad_norm": 3.656741142272949, "learning_rate": 1.9795078960046315e-05, "loss": 2.3495, "step": 28546 }, { "epoch": 0.37, "grad_norm": 3.6615381240844727, "learning_rate": 1.9795057795893186e-05, "loss": 1.696, "step": 28547 }, { "epoch": 0.37, "grad_norm": 3.409177303314209, "learning_rate": 1.9795036630658514e-05, "loss": 1.6906, "step": 28548 }, { "epoch": 0.37, "grad_norm": 4.084166049957275, "learning_rate": 1.979501546434231e-05, "loss": 2.4171, "step": 28549 }, { "epoch": 0.37, "grad_norm": 4.618167877197266, "learning_rate": 1.9794994296944564e-05, "loss": 1.7083, "step": 28550 }, { "epoch": 0.37, "grad_norm": 3.793691873550415, "learning_rate": 1.9794973128465288e-05, "loss": 1.8279, "step": 28551 }, { "epoch": 0.37, "grad_norm": 3.6866095066070557, "learning_rate": 1.979495195890448e-05, "loss": 1.6253, "step": 28552 }, { "epoch": 0.37, "grad_norm": 3.451871871948242, "learning_rate": 1.9794930788262143e-05, "loss": 1.9673, "step": 28553 }, { "epoch": 0.37, "grad_norm": 3.5639588832855225, "learning_rate": 1.979490961653828e-05, "loss": 1.7423, "step": 28554 }, { "epoch": 0.37, "grad_norm": 3.759584665298462, "learning_rate": 1.9794888443732893e-05, "loss": 2.1785, "step": 28555 }, { "epoch": 0.37, "grad_norm": 4.166558265686035, "learning_rate": 1.9794867269845987e-05, "loss": 1.7074, "step": 28556 }, { "epoch": 0.37, "grad_norm": 3.817530632019043, "learning_rate": 1.9794846094877554e-05, "loss": 1.6363, "step": 28557 }, { "epoch": 0.37, "grad_norm": 3.754503011703491, "learning_rate": 1.979482491882761e-05, "loss": 1.5779, "step": 28558 }, { "epoch": 0.37, "grad_norm": 3.8126354217529297, "learning_rate": 1.979480374169615e-05, "loss": 2.3081, "step": 28559 }, { "epoch": 0.37, "grad_norm": 4.074035167694092, "learning_rate": 1.9794782563483176e-05, "loss": 2.1426, "step": 28560 }, { "epoch": 0.37, "grad_norm": 3.527212381362915, "learning_rate": 1.9794761384188692e-05, "loss": 2.0099, "step": 28561 }, { "epoch": 0.37, "grad_norm": 3.5106441974639893, "learning_rate": 1.97947402038127e-05, "loss": 1.7436, "step": 28562 }, { "epoch": 0.37, "grad_norm": 4.1450724601745605, "learning_rate": 1.9794719022355206e-05, "loss": 2.0214, "step": 28563 }, { "epoch": 0.37, "grad_norm": 3.907865285873413, "learning_rate": 1.9794697839816203e-05, "loss": 2.0083, "step": 28564 }, { "epoch": 0.37, "grad_norm": 3.925677537918091, "learning_rate": 1.9794676656195702e-05, "loss": 1.8546, "step": 28565 }, { "epoch": 0.37, "grad_norm": 3.5836572647094727, "learning_rate": 1.9794655471493703e-05, "loss": 1.8202, "step": 28566 }, { "epoch": 0.37, "grad_norm": 4.0542802810668945, "learning_rate": 1.9794634285710208e-05, "loss": 2.0402, "step": 28567 }, { "epoch": 0.37, "grad_norm": 3.4516634941101074, "learning_rate": 1.979461309884522e-05, "loss": 1.9326, "step": 28568 }, { "epoch": 0.37, "grad_norm": 3.7356481552124023, "learning_rate": 1.9794591910898737e-05, "loss": 2.0388, "step": 28569 }, { "epoch": 0.37, "grad_norm": 3.449782133102417, "learning_rate": 1.9794570721870768e-05, "loss": 1.7603, "step": 28570 }, { "epoch": 0.37, "grad_norm": 4.504281520843506, "learning_rate": 1.979454953176131e-05, "loss": 2.2215, "step": 28571 }, { "epoch": 0.37, "grad_norm": 4.050880432128906, "learning_rate": 1.979452834057037e-05, "loss": 2.3269, "step": 28572 }, { "epoch": 0.37, "grad_norm": 4.315536975860596, "learning_rate": 1.9794507148297946e-05, "loss": 2.3137, "step": 28573 }, { "epoch": 0.37, "grad_norm": 3.9694602489471436, "learning_rate": 1.9794485954944044e-05, "loss": 2.3117, "step": 28574 }, { "epoch": 0.37, "grad_norm": 3.6961026191711426, "learning_rate": 1.9794464760508664e-05, "loss": 1.7902, "step": 28575 }, { "epoch": 0.37, "grad_norm": 3.8207309246063232, "learning_rate": 1.979444356499181e-05, "loss": 2.0072, "step": 28576 }, { "epoch": 0.37, "grad_norm": 3.998079538345337, "learning_rate": 1.9794422368393482e-05, "loss": 1.9779, "step": 28577 }, { "epoch": 0.37, "grad_norm": 3.691887855529785, "learning_rate": 1.979440117071368e-05, "loss": 2.1782, "step": 28578 }, { "epoch": 0.37, "grad_norm": 3.6928725242614746, "learning_rate": 1.9794379971952415e-05, "loss": 2.3503, "step": 28579 }, { "epoch": 0.37, "grad_norm": 3.5137314796447754, "learning_rate": 1.9794358772109685e-05, "loss": 1.7846, "step": 28580 }, { "epoch": 0.37, "grad_norm": 4.117191791534424, "learning_rate": 1.979433757118549e-05, "loss": 2.196, "step": 28581 }, { "epoch": 0.37, "grad_norm": 3.783752202987671, "learning_rate": 1.9794316369179833e-05, "loss": 1.8757, "step": 28582 }, { "epoch": 0.37, "grad_norm": 3.2925186157226562, "learning_rate": 1.9794295166092716e-05, "loss": 1.3321, "step": 28583 }, { "epoch": 0.37, "grad_norm": 4.031874656677246, "learning_rate": 1.9794273961924145e-05, "loss": 2.0362, "step": 28584 }, { "epoch": 0.37, "grad_norm": 4.3063578605651855, "learning_rate": 1.979425275667412e-05, "loss": 2.1584, "step": 28585 }, { "epoch": 0.37, "grad_norm": 3.999476194381714, "learning_rate": 1.9794231550342645e-05, "loss": 2.0909, "step": 28586 }, { "epoch": 0.37, "grad_norm": 3.582265615463257, "learning_rate": 1.9794210342929717e-05, "loss": 1.9062, "step": 28587 }, { "epoch": 0.37, "grad_norm": 3.7759997844696045, "learning_rate": 1.979418913443534e-05, "loss": 1.8627, "step": 28588 }, { "epoch": 0.37, "grad_norm": 3.341630697250366, "learning_rate": 1.9794167924859524e-05, "loss": 1.8607, "step": 28589 }, { "epoch": 0.37, "grad_norm": 4.111353874206543, "learning_rate": 1.9794146714202264e-05, "loss": 1.9432, "step": 28590 }, { "epoch": 0.37, "grad_norm": 3.712271213531494, "learning_rate": 1.9794125502463562e-05, "loss": 2.0604, "step": 28591 }, { "epoch": 0.37, "grad_norm": 3.7820632457733154, "learning_rate": 1.9794104289643426e-05, "loss": 2.2206, "step": 28592 }, { "epoch": 0.37, "grad_norm": 3.712294578552246, "learning_rate": 1.9794083075741853e-05, "loss": 1.8504, "step": 28593 }, { "epoch": 0.37, "grad_norm": 3.4920403957366943, "learning_rate": 1.9794061860758846e-05, "loss": 1.9071, "step": 28594 }, { "epoch": 0.37, "grad_norm": 3.736780881881714, "learning_rate": 1.9794040644694408e-05, "loss": 1.9576, "step": 28595 }, { "epoch": 0.37, "grad_norm": 3.5722241401672363, "learning_rate": 1.9794019427548543e-05, "loss": 1.8863, "step": 28596 }, { "epoch": 0.37, "grad_norm": 3.7696516513824463, "learning_rate": 1.979399820932125e-05, "loss": 2.1839, "step": 28597 }, { "epoch": 0.37, "grad_norm": 4.177719593048096, "learning_rate": 1.9793976990012537e-05, "loss": 2.0114, "step": 28598 }, { "epoch": 0.37, "grad_norm": 4.821547508239746, "learning_rate": 1.97939557696224e-05, "loss": 2.5004, "step": 28599 }, { "epoch": 0.37, "grad_norm": 4.029935359954834, "learning_rate": 1.9793934548150846e-05, "loss": 1.7885, "step": 28600 }, { "epoch": 0.37, "grad_norm": 4.298653602600098, "learning_rate": 1.9793913325597875e-05, "loss": 2.2287, "step": 28601 }, { "epoch": 0.37, "grad_norm": 4.2118425369262695, "learning_rate": 1.979389210196349e-05, "loss": 2.3484, "step": 28602 }, { "epoch": 0.37, "grad_norm": 4.50749397277832, "learning_rate": 1.9793870877247695e-05, "loss": 2.4283, "step": 28603 }, { "epoch": 0.37, "grad_norm": 3.754823923110962, "learning_rate": 1.9793849651450486e-05, "loss": 1.9639, "step": 28604 }, { "epoch": 0.37, "grad_norm": 4.149132251739502, "learning_rate": 1.979382842457187e-05, "loss": 2.2391, "step": 28605 }, { "epoch": 0.37, "grad_norm": 4.225354194641113, "learning_rate": 1.9793807196611854e-05, "loss": 1.8872, "step": 28606 }, { "epoch": 0.37, "grad_norm": 3.52345871925354, "learning_rate": 1.979378596757043e-05, "loss": 1.8539, "step": 28607 }, { "epoch": 0.37, "grad_norm": 3.700392484664917, "learning_rate": 1.979376473744761e-05, "loss": 2.0353, "step": 28608 }, { "epoch": 0.37, "grad_norm": 4.046914100646973, "learning_rate": 1.979374350624339e-05, "loss": 2.2093, "step": 28609 }, { "epoch": 0.37, "grad_norm": 3.8636364936828613, "learning_rate": 1.9793722273957777e-05, "loss": 1.7065, "step": 28610 }, { "epoch": 0.37, "grad_norm": 4.370857238769531, "learning_rate": 1.979370104059077e-05, "loss": 2.1821, "step": 28611 }, { "epoch": 0.37, "grad_norm": 4.25607967376709, "learning_rate": 1.979367980614237e-05, "loss": 2.268, "step": 28612 }, { "epoch": 0.37, "grad_norm": 3.5848348140716553, "learning_rate": 1.9793658570612583e-05, "loss": 1.8728, "step": 28613 }, { "epoch": 0.37, "grad_norm": 3.6240134239196777, "learning_rate": 1.979363733400141e-05, "loss": 1.6922, "step": 28614 }, { "epoch": 0.37, "grad_norm": 4.001210689544678, "learning_rate": 1.9793616096308856e-05, "loss": 1.9964, "step": 28615 }, { "epoch": 0.37, "grad_norm": 3.6781435012817383, "learning_rate": 1.9793594857534916e-05, "loss": 1.7591, "step": 28616 }, { "epoch": 0.37, "grad_norm": 3.7803919315338135, "learning_rate": 1.97935736176796e-05, "loss": 1.8864, "step": 28617 }, { "epoch": 0.37, "grad_norm": 4.5111403465271, "learning_rate": 1.9793552376742906e-05, "loss": 2.2756, "step": 28618 }, { "epoch": 0.37, "grad_norm": 3.673495292663574, "learning_rate": 1.9793531134724837e-05, "loss": 1.9218, "step": 28619 }, { "epoch": 0.37, "grad_norm": 4.407736301422119, "learning_rate": 1.9793509891625397e-05, "loss": 2.3783, "step": 28620 }, { "epoch": 0.37, "grad_norm": 4.148456573486328, "learning_rate": 1.979348864744459e-05, "loss": 1.9116, "step": 28621 }, { "epoch": 0.37, "grad_norm": 5.13498067855835, "learning_rate": 1.979346740218241e-05, "loss": 2.2476, "step": 28622 }, { "epoch": 0.37, "grad_norm": 3.894848346710205, "learning_rate": 1.979344615583887e-05, "loss": 2.2023, "step": 28623 }, { "epoch": 0.37, "grad_norm": 4.718419075012207, "learning_rate": 1.9793424908413964e-05, "loss": 2.0727, "step": 28624 }, { "epoch": 0.37, "grad_norm": 4.111863136291504, "learning_rate": 1.97934036599077e-05, "loss": 2.0811, "step": 28625 }, { "epoch": 0.37, "grad_norm": 3.8184382915496826, "learning_rate": 1.9793382410320076e-05, "loss": 2.1592, "step": 28626 }, { "epoch": 0.37, "grad_norm": 3.36550235748291, "learning_rate": 1.97933611596511e-05, "loss": 1.9021, "step": 28627 }, { "epoch": 0.37, "grad_norm": 3.5708048343658447, "learning_rate": 1.9793339907900768e-05, "loss": 1.9552, "step": 28628 }, { "epoch": 0.37, "grad_norm": 4.042896747589111, "learning_rate": 1.9793318655069086e-05, "loss": 1.8471, "step": 28629 }, { "epoch": 0.37, "grad_norm": 3.5810277462005615, "learning_rate": 1.979329740115605e-05, "loss": 1.882, "step": 28630 }, { "epoch": 0.37, "grad_norm": 3.976733446121216, "learning_rate": 1.9793276146161678e-05, "loss": 1.896, "step": 28631 }, { "epoch": 0.37, "grad_norm": 4.304017066955566, "learning_rate": 1.9793254890085958e-05, "loss": 2.424, "step": 28632 }, { "epoch": 0.37, "grad_norm": 3.8420474529266357, "learning_rate": 1.9793233632928896e-05, "loss": 2.0062, "step": 28633 }, { "epoch": 0.37, "grad_norm": 4.111639976501465, "learning_rate": 1.979321237469049e-05, "loss": 1.997, "step": 28634 }, { "epoch": 0.37, "grad_norm": 3.5561020374298096, "learning_rate": 1.9793191115370757e-05, "loss": 1.6234, "step": 28635 }, { "epoch": 0.37, "grad_norm": 4.4432525634765625, "learning_rate": 1.979316985496968e-05, "loss": 2.6093, "step": 28636 }, { "epoch": 0.37, "grad_norm": 3.6017844676971436, "learning_rate": 1.979314859348728e-05, "loss": 1.9004, "step": 28637 }, { "epoch": 0.37, "grad_norm": 3.584019660949707, "learning_rate": 1.9793127330923544e-05, "loss": 1.7324, "step": 28638 }, { "epoch": 0.37, "grad_norm": 3.442896604537964, "learning_rate": 1.979310606727848e-05, "loss": 1.8862, "step": 28639 }, { "epoch": 0.37, "grad_norm": 3.84572172164917, "learning_rate": 1.9793084802552094e-05, "loss": 2.0502, "step": 28640 }, { "epoch": 0.37, "grad_norm": 3.821624755859375, "learning_rate": 1.9793063536744383e-05, "loss": 2.0033, "step": 28641 }, { "epoch": 0.37, "grad_norm": 3.7808823585510254, "learning_rate": 1.9793042269855352e-05, "loss": 2.1407, "step": 28642 }, { "epoch": 0.37, "grad_norm": 3.463081121444702, "learning_rate": 1.9793021001885003e-05, "loss": 1.6248, "step": 28643 }, { "epoch": 0.37, "grad_norm": 4.058984279632568, "learning_rate": 1.9792999732833342e-05, "loss": 1.9343, "step": 28644 }, { "epoch": 0.37, "grad_norm": 3.709332227706909, "learning_rate": 1.9792978462700365e-05, "loss": 1.9621, "step": 28645 }, { "epoch": 0.37, "grad_norm": 3.961456060409546, "learning_rate": 1.9792957191486076e-05, "loss": 1.8546, "step": 28646 }, { "epoch": 0.37, "grad_norm": 3.749423027038574, "learning_rate": 1.979293591919048e-05, "loss": 1.8769, "step": 28647 }, { "epoch": 0.37, "grad_norm": 4.206126689910889, "learning_rate": 1.979291464581358e-05, "loss": 2.1924, "step": 28648 }, { "epoch": 0.37, "grad_norm": 3.5521140098571777, "learning_rate": 1.979289337135537e-05, "loss": 1.8493, "step": 28649 }, { "epoch": 0.37, "grad_norm": 4.499714374542236, "learning_rate": 1.9792872095815864e-05, "loss": 2.2441, "step": 28650 }, { "epoch": 0.37, "grad_norm": 4.361083030700684, "learning_rate": 1.9792850819195055e-05, "loss": 2.0274, "step": 28651 }, { "epoch": 0.37, "grad_norm": 3.623579740524292, "learning_rate": 1.979282954149295e-05, "loss": 1.8729, "step": 28652 }, { "epoch": 0.37, "grad_norm": 3.6777827739715576, "learning_rate": 1.9792808262709552e-05, "loss": 1.8495, "step": 28653 }, { "epoch": 0.37, "grad_norm": 3.9656403064727783, "learning_rate": 1.979278698284486e-05, "loss": 2.066, "step": 28654 }, { "epoch": 0.37, "grad_norm": 3.2574639320373535, "learning_rate": 1.979276570189888e-05, "loss": 1.6564, "step": 28655 }, { "epoch": 0.37, "grad_norm": 4.033343315124512, "learning_rate": 1.979274441987161e-05, "loss": 2.1521, "step": 28656 }, { "epoch": 0.37, "grad_norm": 3.740541934967041, "learning_rate": 1.979272313676306e-05, "loss": 1.8766, "step": 28657 }, { "epoch": 0.37, "grad_norm": 4.128506660461426, "learning_rate": 1.979270185257322e-05, "loss": 2.0039, "step": 28658 }, { "epoch": 0.37, "grad_norm": 3.928969621658325, "learning_rate": 1.97926805673021e-05, "loss": 2.5232, "step": 28659 }, { "epoch": 0.37, "grad_norm": 3.7216343879699707, "learning_rate": 1.979265928094971e-05, "loss": 1.9804, "step": 28660 }, { "epoch": 0.37, "grad_norm": 3.7529633045196533, "learning_rate": 1.979263799351604e-05, "loss": 2.0855, "step": 28661 }, { "epoch": 0.37, "grad_norm": 4.118528366088867, "learning_rate": 1.9792616705001095e-05, "loss": 2.2235, "step": 28662 }, { "epoch": 0.37, "grad_norm": 3.6915695667266846, "learning_rate": 1.979259541540488e-05, "loss": 2.0249, "step": 28663 }, { "epoch": 0.37, "grad_norm": 4.372523784637451, "learning_rate": 1.9792574124727397e-05, "loss": 2.5575, "step": 28664 }, { "epoch": 0.37, "grad_norm": 4.092273712158203, "learning_rate": 1.9792552832968646e-05, "loss": 1.8261, "step": 28665 }, { "epoch": 0.37, "grad_norm": 3.9534237384796143, "learning_rate": 1.9792531540128633e-05, "loss": 1.6113, "step": 28666 }, { "epoch": 0.37, "grad_norm": 3.843768835067749, "learning_rate": 1.9792510246207356e-05, "loss": 1.9465, "step": 28667 }, { "epoch": 0.37, "grad_norm": 3.6284873485565186, "learning_rate": 1.979248895120482e-05, "loss": 1.7336, "step": 28668 }, { "epoch": 0.37, "grad_norm": 4.069435119628906, "learning_rate": 1.979246765512103e-05, "loss": 2.1313, "step": 28669 }, { "epoch": 0.37, "grad_norm": 4.17814826965332, "learning_rate": 1.9792446357955984e-05, "loss": 2.2864, "step": 28670 }, { "epoch": 0.37, "grad_norm": 4.219285488128662, "learning_rate": 1.9792425059709686e-05, "loss": 2.0752, "step": 28671 }, { "epoch": 0.37, "grad_norm": 3.7991321086883545, "learning_rate": 1.9792403760382136e-05, "loss": 2.0184, "step": 28672 }, { "epoch": 0.37, "grad_norm": 3.6725268363952637, "learning_rate": 1.9792382459973342e-05, "loss": 1.9717, "step": 28673 }, { "epoch": 0.37, "grad_norm": 4.136007308959961, "learning_rate": 1.97923611584833e-05, "loss": 1.8161, "step": 28674 }, { "epoch": 0.37, "grad_norm": 3.7852988243103027, "learning_rate": 1.9792339855912018e-05, "loss": 2.2066, "step": 28675 }, { "epoch": 0.37, "grad_norm": 5.041709899902344, "learning_rate": 1.9792318552259494e-05, "loss": 2.1957, "step": 28676 }, { "epoch": 0.37, "grad_norm": 4.279016017913818, "learning_rate": 1.979229724752573e-05, "loss": 2.2903, "step": 28677 }, { "epoch": 0.37, "grad_norm": 4.261997699737549, "learning_rate": 1.979227594171073e-05, "loss": 2.0115, "step": 28678 }, { "epoch": 0.37, "grad_norm": 3.209439277648926, "learning_rate": 1.97922546348145e-05, "loss": 1.2798, "step": 28679 }, { "epoch": 0.37, "grad_norm": 4.322474956512451, "learning_rate": 1.979223332683704e-05, "loss": 2.2189, "step": 28680 }, { "epoch": 0.37, "grad_norm": 4.415542125701904, "learning_rate": 1.9792212017778347e-05, "loss": 2.3792, "step": 28681 }, { "epoch": 0.37, "grad_norm": 4.072912216186523, "learning_rate": 1.979219070763843e-05, "loss": 1.9662, "step": 28682 }, { "epoch": 0.37, "grad_norm": 3.975177526473999, "learning_rate": 1.979216939641729e-05, "loss": 2.2301, "step": 28683 }, { "epoch": 0.37, "grad_norm": 3.8184521198272705, "learning_rate": 1.9792148084114923e-05, "loss": 1.8991, "step": 28684 }, { "epoch": 0.37, "grad_norm": 4.09739875793457, "learning_rate": 1.9792126770731343e-05, "loss": 2.0101, "step": 28685 }, { "epoch": 0.37, "grad_norm": 4.138063430786133, "learning_rate": 1.9792105456266545e-05, "loss": 2.3834, "step": 28686 }, { "epoch": 0.37, "grad_norm": 4.211374282836914, "learning_rate": 1.979208414072053e-05, "loss": 2.467, "step": 28687 }, { "epoch": 0.37, "grad_norm": 3.901066541671753, "learning_rate": 1.9792062824093302e-05, "loss": 2.1054, "step": 28688 }, { "epoch": 0.37, "grad_norm": 4.805196762084961, "learning_rate": 1.979204150638487e-05, "loss": 2.3063, "step": 28689 }, { "epoch": 0.37, "grad_norm": 4.166317939758301, "learning_rate": 1.9792020187595224e-05, "loss": 1.9876, "step": 28690 }, { "epoch": 0.37, "grad_norm": 3.6970887184143066, "learning_rate": 1.9791998867724377e-05, "loss": 1.8671, "step": 28691 }, { "epoch": 0.37, "grad_norm": 4.027235984802246, "learning_rate": 1.9791977546772324e-05, "loss": 2.1864, "step": 28692 }, { "epoch": 0.37, "grad_norm": 3.804272174835205, "learning_rate": 1.9791956224739074e-05, "loss": 2.0324, "step": 28693 }, { "epoch": 0.37, "grad_norm": 3.7317235469818115, "learning_rate": 1.9791934901624623e-05, "loss": 1.9033, "step": 28694 }, { "epoch": 0.37, "grad_norm": 3.948796510696411, "learning_rate": 1.979191357742898e-05, "loss": 2.442, "step": 28695 }, { "epoch": 0.37, "grad_norm": 3.8332834243774414, "learning_rate": 1.9791892252152138e-05, "loss": 1.62, "step": 28696 }, { "epoch": 0.37, "grad_norm": 4.216508865356445, "learning_rate": 1.9791870925794107e-05, "loss": 1.9846, "step": 28697 }, { "epoch": 0.37, "grad_norm": 4.145347595214844, "learning_rate": 1.979184959835489e-05, "loss": 1.99, "step": 28698 }, { "epoch": 0.37, "grad_norm": 3.656155586242676, "learning_rate": 1.9791828269834484e-05, "loss": 1.6637, "step": 28699 }, { "epoch": 0.37, "grad_norm": 4.680867671966553, "learning_rate": 1.9791806940232896e-05, "loss": 2.4053, "step": 28700 }, { "epoch": 0.37, "grad_norm": 3.624915599822998, "learning_rate": 1.9791785609550125e-05, "loss": 1.8819, "step": 28701 }, { "epoch": 0.37, "grad_norm": 3.7766544818878174, "learning_rate": 1.9791764277786174e-05, "loss": 1.9362, "step": 28702 }, { "epoch": 0.37, "grad_norm": 3.3446555137634277, "learning_rate": 1.979174294494105e-05, "loss": 1.8874, "step": 28703 }, { "epoch": 0.37, "grad_norm": 3.084340810775757, "learning_rate": 1.9791721611014748e-05, "loss": 1.5666, "step": 28704 }, { "epoch": 0.37, "grad_norm": 3.639116048812866, "learning_rate": 1.9791700276007274e-05, "loss": 1.9072, "step": 28705 }, { "epoch": 0.37, "grad_norm": 3.594636917114258, "learning_rate": 1.979167893991863e-05, "loss": 1.7198, "step": 28706 }, { "epoch": 0.37, "grad_norm": 3.592071771621704, "learning_rate": 1.979165760274882e-05, "loss": 2.0193, "step": 28707 }, { "epoch": 0.37, "grad_norm": 3.6433322429656982, "learning_rate": 1.9791636264497843e-05, "loss": 1.5052, "step": 28708 }, { "epoch": 0.37, "grad_norm": 4.078941822052002, "learning_rate": 1.9791614925165703e-05, "loss": 1.6845, "step": 28709 }, { "epoch": 0.37, "grad_norm": 3.584995746612549, "learning_rate": 1.9791593584752403e-05, "loss": 1.7483, "step": 28710 }, { "epoch": 0.37, "grad_norm": 4.122188568115234, "learning_rate": 1.9791572243257948e-05, "loss": 2.0325, "step": 28711 }, { "epoch": 0.37, "grad_norm": 3.8727493286132812, "learning_rate": 1.9791550900682336e-05, "loss": 2.0141, "step": 28712 }, { "epoch": 0.37, "grad_norm": 3.6496734619140625, "learning_rate": 1.9791529557025567e-05, "loss": 1.7612, "step": 28713 }, { "epoch": 0.37, "grad_norm": 4.430723667144775, "learning_rate": 1.979150821228765e-05, "loss": 2.2174, "step": 28714 }, { "epoch": 0.37, "grad_norm": 4.330470561981201, "learning_rate": 1.9791486866468586e-05, "loss": 2.5951, "step": 28715 }, { "epoch": 0.37, "grad_norm": 3.770087718963623, "learning_rate": 1.9791465519568373e-05, "loss": 2.0927, "step": 28716 }, { "epoch": 0.37, "grad_norm": 3.884359836578369, "learning_rate": 1.9791444171587017e-05, "loss": 2.1464, "step": 28717 }, { "epoch": 0.37, "grad_norm": 3.7673697471618652, "learning_rate": 1.9791422822524518e-05, "loss": 1.9105, "step": 28718 }, { "epoch": 0.37, "grad_norm": 3.4868712425231934, "learning_rate": 1.9791401472380884e-05, "loss": 1.5715, "step": 28719 }, { "epoch": 0.37, "grad_norm": 3.77260684967041, "learning_rate": 1.979138012115611e-05, "loss": 1.6318, "step": 28720 }, { "epoch": 0.37, "grad_norm": 4.096673965454102, "learning_rate": 1.97913587688502e-05, "loss": 2.246, "step": 28721 }, { "epoch": 0.37, "grad_norm": 3.7775702476501465, "learning_rate": 1.979133741546316e-05, "loss": 1.8175, "step": 28722 }, { "epoch": 0.37, "grad_norm": 4.024687767028809, "learning_rate": 1.979131606099499e-05, "loss": 2.0647, "step": 28723 }, { "epoch": 0.37, "grad_norm": 3.5405118465423584, "learning_rate": 1.9791294705445693e-05, "loss": 1.5992, "step": 28724 }, { "epoch": 0.37, "grad_norm": 3.8222479820251465, "learning_rate": 1.9791273348815273e-05, "loss": 1.5005, "step": 28725 }, { "epoch": 0.37, "grad_norm": 3.77050518989563, "learning_rate": 1.9791251991103725e-05, "loss": 2.2068, "step": 28726 }, { "epoch": 0.37, "grad_norm": 4.312683582305908, "learning_rate": 1.9791230632311064e-05, "loss": 2.2324, "step": 28727 }, { "epoch": 0.37, "grad_norm": 4.191326141357422, "learning_rate": 1.9791209272437277e-05, "loss": 2.2596, "step": 28728 }, { "epoch": 0.37, "grad_norm": 3.909317970275879, "learning_rate": 1.979118791148238e-05, "loss": 1.6497, "step": 28729 }, { "epoch": 0.37, "grad_norm": 3.7588424682617188, "learning_rate": 1.9791166549446368e-05, "loss": 1.9963, "step": 28730 }, { "epoch": 0.37, "grad_norm": 4.110306262969971, "learning_rate": 1.9791145186329245e-05, "loss": 1.8902, "step": 28731 }, { "epoch": 0.37, "grad_norm": 3.608257293701172, "learning_rate": 1.979112382213101e-05, "loss": 1.8676, "step": 28732 }, { "epoch": 0.37, "grad_norm": 4.923637390136719, "learning_rate": 1.9791102456851675e-05, "loss": 2.3987, "step": 28733 }, { "epoch": 0.37, "grad_norm": 4.056295871734619, "learning_rate": 1.9791081090491232e-05, "loss": 1.7893, "step": 28734 }, { "epoch": 0.37, "grad_norm": 3.8253612518310547, "learning_rate": 1.9791059723049688e-05, "loss": 2.1164, "step": 28735 }, { "epoch": 0.37, "grad_norm": 3.615386486053467, "learning_rate": 1.9791038354527048e-05, "loss": 1.8057, "step": 28736 }, { "epoch": 0.37, "grad_norm": 3.9205756187438965, "learning_rate": 1.979101698492331e-05, "loss": 1.7949, "step": 28737 }, { "epoch": 0.37, "grad_norm": 3.7457494735717773, "learning_rate": 1.9790995614238475e-05, "loss": 1.8986, "step": 28738 }, { "epoch": 0.37, "grad_norm": 3.9905502796173096, "learning_rate": 1.979097424247255e-05, "loss": 2.0254, "step": 28739 }, { "epoch": 0.37, "grad_norm": 4.498834133148193, "learning_rate": 1.9790952869625535e-05, "loss": 2.0955, "step": 28740 }, { "epoch": 0.37, "grad_norm": 4.578226089477539, "learning_rate": 1.9790931495697432e-05, "loss": 2.1053, "step": 28741 }, { "epoch": 0.37, "grad_norm": 3.6836342811584473, "learning_rate": 1.9790910120688244e-05, "loss": 1.9137, "step": 28742 }, { "epoch": 0.37, "grad_norm": 4.541559219360352, "learning_rate": 1.9790888744597974e-05, "loss": 2.1294, "step": 28743 }, { "epoch": 0.37, "grad_norm": 4.321257591247559, "learning_rate": 1.9790867367426626e-05, "loss": 2.4646, "step": 28744 }, { "epoch": 0.37, "grad_norm": 3.8886101245880127, "learning_rate": 1.9790845989174197e-05, "loss": 2.5355, "step": 28745 }, { "epoch": 0.37, "grad_norm": 3.540510416030884, "learning_rate": 1.9790824609840692e-05, "loss": 1.7004, "step": 28746 }, { "epoch": 0.37, "grad_norm": 4.012284755706787, "learning_rate": 1.9790803229426116e-05, "loss": 1.8048, "step": 28747 }, { "epoch": 0.37, "grad_norm": 3.78760027885437, "learning_rate": 1.9790781847930472e-05, "loss": 1.5108, "step": 28748 }, { "epoch": 0.37, "grad_norm": 4.250114440917969, "learning_rate": 1.9790760465353753e-05, "loss": 1.7425, "step": 28749 }, { "epoch": 0.37, "grad_norm": 4.43075704574585, "learning_rate": 1.9790739081695972e-05, "loss": 2.1288, "step": 28750 }, { "epoch": 0.37, "grad_norm": 3.610100030899048, "learning_rate": 1.9790717696957127e-05, "loss": 1.7035, "step": 28751 }, { "epoch": 0.37, "grad_norm": 3.702085494995117, "learning_rate": 1.979069631113722e-05, "loss": 1.6463, "step": 28752 }, { "epoch": 0.37, "grad_norm": 3.6880855560302734, "learning_rate": 1.9790674924236252e-05, "loss": 1.9687, "step": 28753 }, { "epoch": 0.37, "grad_norm": 3.6466472148895264, "learning_rate": 1.9790653536254233e-05, "loss": 1.6567, "step": 28754 }, { "epoch": 0.37, "grad_norm": 4.326729774475098, "learning_rate": 1.9790632147191156e-05, "loss": 2.1701, "step": 28755 }, { "epoch": 0.37, "grad_norm": 3.833451986312866, "learning_rate": 1.9790610757047027e-05, "loss": 2.0752, "step": 28756 }, { "epoch": 0.37, "grad_norm": 4.228866100311279, "learning_rate": 1.9790589365821847e-05, "loss": 2.2793, "step": 28757 }, { "epoch": 0.37, "grad_norm": 4.139664173126221, "learning_rate": 1.9790567973515623e-05, "loss": 2.1411, "step": 28758 }, { "epoch": 0.37, "grad_norm": 3.891928195953369, "learning_rate": 1.979054658012835e-05, "loss": 2.1676, "step": 28759 }, { "epoch": 0.37, "grad_norm": 3.5533571243286133, "learning_rate": 1.979052518566004e-05, "loss": 1.3056, "step": 28760 }, { "epoch": 0.37, "grad_norm": 3.3317043781280518, "learning_rate": 1.9790503790110688e-05, "loss": 1.4655, "step": 28761 }, { "epoch": 0.37, "grad_norm": 4.226105690002441, "learning_rate": 1.9790482393480296e-05, "loss": 2.1356, "step": 28762 }, { "epoch": 0.37, "grad_norm": 4.330816268920898, "learning_rate": 1.9790460995768874e-05, "loss": 2.4864, "step": 28763 }, { "epoch": 0.37, "grad_norm": 4.093153953552246, "learning_rate": 1.9790439596976413e-05, "loss": 1.9963, "step": 28764 }, { "epoch": 0.37, "grad_norm": 3.8470027446746826, "learning_rate": 1.9790418197102922e-05, "loss": 1.9891, "step": 28765 }, { "epoch": 0.37, "grad_norm": 3.8738396167755127, "learning_rate": 1.9790396796148407e-05, "loss": 2.1228, "step": 28766 }, { "epoch": 0.37, "grad_norm": 4.030194282531738, "learning_rate": 1.9790375394112864e-05, "loss": 2.493, "step": 28767 }, { "epoch": 0.37, "grad_norm": 3.7047317028045654, "learning_rate": 1.9790353990996298e-05, "loss": 1.6262, "step": 28768 }, { "epoch": 0.37, "grad_norm": 4.103978157043457, "learning_rate": 1.979033258679871e-05, "loss": 2.3459, "step": 28769 }, { "epoch": 0.37, "grad_norm": 3.6544852256774902, "learning_rate": 1.9790311181520102e-05, "loss": 1.5183, "step": 28770 }, { "epoch": 0.37, "grad_norm": 3.4046335220336914, "learning_rate": 1.9790289775160476e-05, "loss": 1.8295, "step": 28771 }, { "epoch": 0.37, "grad_norm": 3.923567056655884, "learning_rate": 1.979026836771984e-05, "loss": 2.2337, "step": 28772 }, { "epoch": 0.37, "grad_norm": 3.2460293769836426, "learning_rate": 1.979024695919819e-05, "loss": 1.484, "step": 28773 }, { "epoch": 0.37, "grad_norm": 3.962104082107544, "learning_rate": 1.9790225549595533e-05, "loss": 1.9866, "step": 28774 }, { "epoch": 0.37, "grad_norm": 3.6655378341674805, "learning_rate": 1.9790204138911865e-05, "loss": 1.8086, "step": 28775 }, { "epoch": 0.37, "grad_norm": 4.545395851135254, "learning_rate": 1.9790182727147197e-05, "loss": 2.3224, "step": 28776 }, { "epoch": 0.37, "grad_norm": 3.7950751781463623, "learning_rate": 1.9790161314301525e-05, "loss": 2.1168, "step": 28777 }, { "epoch": 0.37, "grad_norm": 4.009832382202148, "learning_rate": 1.9790139900374853e-05, "loss": 2.2254, "step": 28778 }, { "epoch": 0.37, "grad_norm": 4.2178730964660645, "learning_rate": 1.979011848536718e-05, "loss": 2.2344, "step": 28779 }, { "epoch": 0.37, "grad_norm": 4.376896381378174, "learning_rate": 1.9790097069278517e-05, "loss": 2.393, "step": 28780 }, { "epoch": 0.37, "grad_norm": 4.329484462738037, "learning_rate": 1.9790075652108857e-05, "loss": 2.5089, "step": 28781 }, { "epoch": 0.37, "grad_norm": 3.758915901184082, "learning_rate": 1.979005423385821e-05, "loss": 1.7578, "step": 28782 }, { "epoch": 0.37, "grad_norm": 3.971423625946045, "learning_rate": 1.9790032814526573e-05, "loss": 2.0839, "step": 28783 }, { "epoch": 0.37, "grad_norm": 4.198339939117432, "learning_rate": 1.979001139411395e-05, "loss": 2.1759, "step": 28784 }, { "epoch": 0.37, "grad_norm": 4.122554779052734, "learning_rate": 1.9789989972620346e-05, "loss": 1.7788, "step": 28785 }, { "epoch": 0.37, "grad_norm": 3.71836256980896, "learning_rate": 1.9789968550045758e-05, "loss": 1.9306, "step": 28786 }, { "epoch": 0.37, "grad_norm": 3.991022825241089, "learning_rate": 1.978994712639019e-05, "loss": 1.9256, "step": 28787 }, { "epoch": 0.37, "grad_norm": 3.889714479446411, "learning_rate": 1.978992570165365e-05, "loss": 1.9026, "step": 28788 }, { "epoch": 0.37, "grad_norm": 3.96413516998291, "learning_rate": 1.9789904275836136e-05, "loss": 1.2835, "step": 28789 }, { "epoch": 0.37, "grad_norm": 3.2992117404937744, "learning_rate": 1.978988284893765e-05, "loss": 1.5853, "step": 28790 }, { "epoch": 0.37, "grad_norm": 4.248437881469727, "learning_rate": 1.9789861420958195e-05, "loss": 2.2995, "step": 28791 }, { "epoch": 0.37, "grad_norm": 4.681859493255615, "learning_rate": 1.978983999189777e-05, "loss": 2.8234, "step": 28792 }, { "epoch": 0.37, "grad_norm": 4.254900932312012, "learning_rate": 1.9789818561756385e-05, "loss": 1.8069, "step": 28793 }, { "epoch": 0.37, "grad_norm": 4.090935707092285, "learning_rate": 1.9789797130534035e-05, "loss": 2.2033, "step": 28794 }, { "epoch": 0.37, "grad_norm": 3.7071926593780518, "learning_rate": 1.9789775698230725e-05, "loss": 2.1633, "step": 28795 }, { "epoch": 0.37, "grad_norm": 4.318903923034668, "learning_rate": 1.978975426484646e-05, "loss": 2.3339, "step": 28796 }, { "epoch": 0.37, "grad_norm": 4.573109149932861, "learning_rate": 1.9789732830381237e-05, "loss": 2.6835, "step": 28797 }, { "epoch": 0.37, "grad_norm": 3.4870407581329346, "learning_rate": 1.9789711394835065e-05, "loss": 1.5703, "step": 28798 }, { "epoch": 0.37, "grad_norm": 3.771815776824951, "learning_rate": 1.978968995820794e-05, "loss": 2.0553, "step": 28799 }, { "epoch": 0.37, "grad_norm": 4.0418195724487305, "learning_rate": 1.978966852049987e-05, "loss": 1.7085, "step": 28800 }, { "epoch": 0.37, "grad_norm": 4.437357425689697, "learning_rate": 1.978964708171085e-05, "loss": 2.4942, "step": 28801 }, { "epoch": 0.37, "grad_norm": 4.166556358337402, "learning_rate": 1.978962564184089e-05, "loss": 2.6034, "step": 28802 }, { "epoch": 0.37, "grad_norm": 4.439268112182617, "learning_rate": 1.9789604200889993e-05, "loss": 2.754, "step": 28803 }, { "epoch": 0.37, "grad_norm": 4.137852191925049, "learning_rate": 1.978958275885815e-05, "loss": 2.1467, "step": 28804 }, { "epoch": 0.37, "grad_norm": 3.870537519454956, "learning_rate": 1.9789561315745373e-05, "loss": 1.8175, "step": 28805 }, { "epoch": 0.37, "grad_norm": 3.6232197284698486, "learning_rate": 1.9789539871551665e-05, "loss": 2.1652, "step": 28806 }, { "epoch": 0.37, "grad_norm": 3.4267795085906982, "learning_rate": 1.9789518426277022e-05, "loss": 1.5303, "step": 28807 }, { "epoch": 0.37, "grad_norm": 3.8707969188690186, "learning_rate": 1.9789496979921456e-05, "loss": 1.968, "step": 28808 }, { "epoch": 0.37, "grad_norm": 3.8768460750579834, "learning_rate": 1.9789475532484957e-05, "loss": 1.9897, "step": 28809 }, { "epoch": 0.37, "grad_norm": 3.2762503623962402, "learning_rate": 1.978945408396754e-05, "loss": 1.8638, "step": 28810 }, { "epoch": 0.37, "grad_norm": 3.6439619064331055, "learning_rate": 1.9789432634369196e-05, "loss": 1.7853, "step": 28811 }, { "epoch": 0.37, "grad_norm": 4.389088153839111, "learning_rate": 1.9789411183689934e-05, "loss": 2.1284, "step": 28812 }, { "epoch": 0.37, "grad_norm": 3.9871580600738525, "learning_rate": 1.9789389731929756e-05, "loss": 2.18, "step": 28813 }, { "epoch": 0.37, "grad_norm": 3.32966947555542, "learning_rate": 1.9789368279088662e-05, "loss": 1.8841, "step": 28814 }, { "epoch": 0.37, "grad_norm": 3.575070381164551, "learning_rate": 1.978934682516666e-05, "loss": 1.7916, "step": 28815 }, { "epoch": 0.37, "grad_norm": 4.127628326416016, "learning_rate": 1.978932537016374e-05, "loss": 2.051, "step": 28816 }, { "epoch": 0.37, "grad_norm": 3.729956865310669, "learning_rate": 1.978930391407992e-05, "loss": 1.8996, "step": 28817 }, { "epoch": 0.37, "grad_norm": 3.7579689025878906, "learning_rate": 1.978928245691519e-05, "loss": 1.7884, "step": 28818 }, { "epoch": 0.37, "grad_norm": 3.788823127746582, "learning_rate": 1.978926099866956e-05, "loss": 1.8472, "step": 28819 }, { "epoch": 0.37, "grad_norm": 3.852360963821411, "learning_rate": 1.978923953934303e-05, "loss": 1.9447, "step": 28820 }, { "epoch": 0.37, "grad_norm": 4.087685585021973, "learning_rate": 1.97892180789356e-05, "loss": 2.1838, "step": 28821 }, { "epoch": 0.37, "grad_norm": 4.001791477203369, "learning_rate": 1.978919661744727e-05, "loss": 2.0894, "step": 28822 }, { "epoch": 0.37, "grad_norm": 3.8652117252349854, "learning_rate": 1.9789175154878052e-05, "loss": 1.8786, "step": 28823 }, { "epoch": 0.37, "grad_norm": 3.511601448059082, "learning_rate": 1.9789153691227943e-05, "loss": 2.2201, "step": 28824 }, { "epoch": 0.37, "grad_norm": 4.374152183532715, "learning_rate": 1.9789132226496946e-05, "loss": 2.1317, "step": 28825 }, { "epoch": 0.37, "grad_norm": 3.9474189281463623, "learning_rate": 1.978911076068506e-05, "loss": 2.2434, "step": 28826 }, { "epoch": 0.37, "grad_norm": 3.791541576385498, "learning_rate": 1.978908929379229e-05, "loss": 1.9888, "step": 28827 }, { "epoch": 0.37, "grad_norm": 3.9683327674865723, "learning_rate": 1.978906782581864e-05, "loss": 2.0968, "step": 28828 }, { "epoch": 0.37, "grad_norm": 3.6828925609588623, "learning_rate": 1.9789046356764113e-05, "loss": 1.7816, "step": 28829 }, { "epoch": 0.37, "grad_norm": 3.960397243499756, "learning_rate": 1.9789024886628704e-05, "loss": 1.9035, "step": 28830 }, { "epoch": 0.37, "grad_norm": 4.098737716674805, "learning_rate": 1.9789003415412423e-05, "loss": 2.3025, "step": 28831 }, { "epoch": 0.37, "grad_norm": 3.7273826599121094, "learning_rate": 1.978898194311527e-05, "loss": 1.6961, "step": 28832 }, { "epoch": 0.37, "grad_norm": 3.9895193576812744, "learning_rate": 1.978896046973725e-05, "loss": 2.1356, "step": 28833 }, { "epoch": 0.37, "grad_norm": 3.4775848388671875, "learning_rate": 1.978893899527836e-05, "loss": 1.4243, "step": 28834 }, { "epoch": 0.37, "grad_norm": 4.4151434898376465, "learning_rate": 1.9788917519738606e-05, "loss": 2.2336, "step": 28835 }, { "epoch": 0.37, "grad_norm": 3.975529909133911, "learning_rate": 1.9788896043117987e-05, "loss": 2.3258, "step": 28836 }, { "epoch": 0.37, "grad_norm": 3.5328097343444824, "learning_rate": 1.978887456541651e-05, "loss": 1.9906, "step": 28837 }, { "epoch": 0.37, "grad_norm": 4.399777889251709, "learning_rate": 1.9788853086634175e-05, "loss": 2.4002, "step": 28838 }, { "epoch": 0.37, "grad_norm": 4.898308277130127, "learning_rate": 1.9788831606770985e-05, "loss": 2.7315, "step": 28839 }, { "epoch": 0.37, "grad_norm": 4.06468391418457, "learning_rate": 1.978881012582694e-05, "loss": 2.1194, "step": 28840 }, { "epoch": 0.37, "grad_norm": 4.018423080444336, "learning_rate": 1.9788788643802045e-05, "loss": 1.8488, "step": 28841 }, { "epoch": 0.37, "grad_norm": 4.380136013031006, "learning_rate": 1.97887671606963e-05, "loss": 2.6307, "step": 28842 }, { "epoch": 0.37, "grad_norm": 3.990985155105591, "learning_rate": 1.9788745676509717e-05, "loss": 1.8741, "step": 28843 }, { "epoch": 0.37, "grad_norm": 3.537989377975464, "learning_rate": 1.978872419124228e-05, "loss": 1.6979, "step": 28844 }, { "epoch": 0.37, "grad_norm": 3.441631555557251, "learning_rate": 1.978870270489401e-05, "loss": 1.5588, "step": 28845 }, { "epoch": 0.37, "grad_norm": 3.7912206649780273, "learning_rate": 1.9788681217464895e-05, "loss": 2.0331, "step": 28846 }, { "epoch": 0.37, "grad_norm": 3.707463502883911, "learning_rate": 1.9788659728954945e-05, "loss": 1.5514, "step": 28847 }, { "epoch": 0.37, "grad_norm": 3.6085939407348633, "learning_rate": 1.978863823936416e-05, "loss": 2.2953, "step": 28848 }, { "epoch": 0.37, "grad_norm": 3.952543020248413, "learning_rate": 1.9788616748692547e-05, "loss": 2.5565, "step": 28849 }, { "epoch": 0.37, "grad_norm": 4.265981197357178, "learning_rate": 1.9788595256940104e-05, "loss": 1.8465, "step": 28850 }, { "epoch": 0.37, "grad_norm": 2.9948837757110596, "learning_rate": 1.9788573764106834e-05, "loss": 1.3639, "step": 28851 }, { "epoch": 0.37, "grad_norm": 4.397154808044434, "learning_rate": 1.9788552270192737e-05, "loss": 2.6775, "step": 28852 }, { "epoch": 0.37, "grad_norm": 4.709755897521973, "learning_rate": 1.978853077519782e-05, "loss": 2.3734, "step": 28853 }, { "epoch": 0.37, "grad_norm": 3.8048250675201416, "learning_rate": 1.9788509279122083e-05, "loss": 2.2168, "step": 28854 }, { "epoch": 0.37, "grad_norm": 4.122913837432861, "learning_rate": 1.9788487781965526e-05, "loss": 2.2312, "step": 28855 }, { "epoch": 0.37, "grad_norm": 4.544686317443848, "learning_rate": 1.9788466283728155e-05, "loss": 2.5238, "step": 28856 }, { "epoch": 0.37, "grad_norm": 3.8919949531555176, "learning_rate": 1.9788444784409974e-05, "loss": 2.2645, "step": 28857 }, { "epoch": 0.37, "grad_norm": 4.123494625091553, "learning_rate": 1.978842328401098e-05, "loss": 1.8656, "step": 28858 }, { "epoch": 0.37, "grad_norm": 3.7496089935302734, "learning_rate": 1.9788401782531176e-05, "loss": 1.8384, "step": 28859 }, { "epoch": 0.37, "grad_norm": 3.801865816116333, "learning_rate": 1.978838027997057e-05, "loss": 1.8426, "step": 28860 }, { "epoch": 0.37, "grad_norm": 4.402750015258789, "learning_rate": 1.978835877632916e-05, "loss": 2.2942, "step": 28861 }, { "epoch": 0.37, "grad_norm": 3.4285709857940674, "learning_rate": 1.978833727160695e-05, "loss": 1.8732, "step": 28862 }, { "epoch": 0.37, "grad_norm": 3.8772847652435303, "learning_rate": 1.9788315765803937e-05, "loss": 2.0172, "step": 28863 }, { "epoch": 0.37, "grad_norm": 3.428950548171997, "learning_rate": 1.9788294258920132e-05, "loss": 1.4746, "step": 28864 }, { "epoch": 0.37, "grad_norm": 3.281517505645752, "learning_rate": 1.9788272750955537e-05, "loss": 1.7597, "step": 28865 }, { "epoch": 0.37, "grad_norm": 3.3685622215270996, "learning_rate": 1.978825124191014e-05, "loss": 1.8381, "step": 28866 }, { "epoch": 0.37, "grad_norm": 3.4315385818481445, "learning_rate": 1.9788229731783965e-05, "loss": 1.7444, "step": 28867 }, { "epoch": 0.37, "grad_norm": 4.52919340133667, "learning_rate": 1.9788208220576995e-05, "loss": 2.1491, "step": 28868 }, { "epoch": 0.37, "grad_norm": 3.721346139907837, "learning_rate": 1.9788186708289245e-05, "loss": 1.8865, "step": 28869 }, { "epoch": 0.37, "grad_norm": 3.6875972747802734, "learning_rate": 1.978816519492071e-05, "loss": 2.1487, "step": 28870 }, { "epoch": 0.37, "grad_norm": 3.494426965713501, "learning_rate": 1.9788143680471398e-05, "loss": 2.0185, "step": 28871 }, { "epoch": 0.37, "grad_norm": 3.6524908542633057, "learning_rate": 1.978812216494131e-05, "loss": 1.8446, "step": 28872 }, { "epoch": 0.37, "grad_norm": 4.237405776977539, "learning_rate": 1.9788100648330447e-05, "loss": 2.1623, "step": 28873 }, { "epoch": 0.37, "grad_norm": 3.4205684661865234, "learning_rate": 1.978807913063881e-05, "loss": 1.8, "step": 28874 }, { "epoch": 0.37, "grad_norm": 4.022182464599609, "learning_rate": 1.9788057611866406e-05, "loss": 2.0478, "step": 28875 }, { "epoch": 0.37, "grad_norm": 3.7266783714294434, "learning_rate": 1.978803609201323e-05, "loss": 1.8762, "step": 28876 }, { "epoch": 0.37, "grad_norm": 3.6157896518707275, "learning_rate": 1.978801457107929e-05, "loss": 2.0239, "step": 28877 }, { "epoch": 0.37, "grad_norm": 3.919060468673706, "learning_rate": 1.978799304906459e-05, "loss": 1.6934, "step": 28878 }, { "epoch": 0.37, "grad_norm": 3.385334014892578, "learning_rate": 1.9787971525969125e-05, "loss": 1.5621, "step": 28879 }, { "epoch": 0.37, "grad_norm": 4.240584850311279, "learning_rate": 1.9787950001792905e-05, "loss": 2.0535, "step": 28880 }, { "epoch": 0.37, "grad_norm": 4.286930084228516, "learning_rate": 1.9787928476535926e-05, "loss": 2.3712, "step": 28881 }, { "epoch": 0.37, "grad_norm": 4.5752177238464355, "learning_rate": 1.9787906950198196e-05, "loss": 2.3655, "step": 28882 }, { "epoch": 0.37, "grad_norm": 3.8298592567443848, "learning_rate": 1.9787885422779717e-05, "loss": 1.9751, "step": 28883 }, { "epoch": 0.37, "grad_norm": 4.401597499847412, "learning_rate": 1.978786389428049e-05, "loss": 2.2166, "step": 28884 }, { "epoch": 0.37, "grad_norm": 3.92281436920166, "learning_rate": 1.9787842364700514e-05, "loss": 2.0034, "step": 28885 }, { "epoch": 0.37, "grad_norm": 3.600090980529785, "learning_rate": 1.9787820834039792e-05, "loss": 1.7916, "step": 28886 }, { "epoch": 0.37, "grad_norm": 4.009016036987305, "learning_rate": 1.978779930229833e-05, "loss": 2.2236, "step": 28887 }, { "epoch": 0.37, "grad_norm": 4.646456241607666, "learning_rate": 1.978777776947613e-05, "loss": 2.1418, "step": 28888 }, { "epoch": 0.37, "grad_norm": 4.046404838562012, "learning_rate": 1.9787756235573195e-05, "loss": 2.0012, "step": 28889 }, { "epoch": 0.37, "grad_norm": 3.982297897338867, "learning_rate": 1.9787734700589523e-05, "loss": 1.9939, "step": 28890 }, { "epoch": 0.37, "grad_norm": 4.06293249130249, "learning_rate": 1.978771316452512e-05, "loss": 2.0238, "step": 28891 }, { "epoch": 0.37, "grad_norm": 4.670137405395508, "learning_rate": 1.9787691627379986e-05, "loss": 2.4515, "step": 28892 }, { "epoch": 0.37, "grad_norm": 3.75673770904541, "learning_rate": 1.9787670089154126e-05, "loss": 1.7062, "step": 28893 }, { "epoch": 0.37, "grad_norm": 4.194290637969971, "learning_rate": 1.9787648549847543e-05, "loss": 2.0417, "step": 28894 }, { "epoch": 0.37, "grad_norm": 4.172124862670898, "learning_rate": 1.9787627009460234e-05, "loss": 1.9082, "step": 28895 }, { "epoch": 0.38, "grad_norm": 3.5805580615997314, "learning_rate": 1.978760546799221e-05, "loss": 2.0091, "step": 28896 }, { "epoch": 0.38, "grad_norm": 4.312502861022949, "learning_rate": 1.978758392544346e-05, "loss": 2.5028, "step": 28897 }, { "epoch": 0.38, "grad_norm": 3.956346035003662, "learning_rate": 1.9787562381814002e-05, "loss": 2.3604, "step": 28898 }, { "epoch": 0.38, "grad_norm": 3.865846872329712, "learning_rate": 1.978754083710383e-05, "loss": 2.0646, "step": 28899 }, { "epoch": 0.38, "grad_norm": 4.12108039855957, "learning_rate": 1.9787519291312945e-05, "loss": 2.0818, "step": 28900 }, { "epoch": 0.38, "grad_norm": 3.640458822250366, "learning_rate": 1.9787497744441352e-05, "loss": 1.9568, "step": 28901 }, { "epoch": 0.38, "grad_norm": 4.004305362701416, "learning_rate": 1.9787476196489056e-05, "loss": 2.0773, "step": 28902 }, { "epoch": 0.38, "grad_norm": 4.555481433868408, "learning_rate": 1.9787454647456055e-05, "loss": 1.7927, "step": 28903 }, { "epoch": 0.38, "grad_norm": 4.016014575958252, "learning_rate": 1.978743309734235e-05, "loss": 2.4173, "step": 28904 }, { "epoch": 0.38, "grad_norm": 3.9906318187713623, "learning_rate": 1.9787411546147952e-05, "loss": 2.1005, "step": 28905 }, { "epoch": 0.38, "grad_norm": 4.150264263153076, "learning_rate": 1.9787389993872854e-05, "loss": 1.7141, "step": 28906 }, { "epoch": 0.38, "grad_norm": 3.403290033340454, "learning_rate": 1.9787368440517064e-05, "loss": 1.5692, "step": 28907 }, { "epoch": 0.38, "grad_norm": 3.9945316314697266, "learning_rate": 1.9787346886080582e-05, "loss": 1.9805, "step": 28908 }, { "epoch": 0.38, "grad_norm": 3.977140188217163, "learning_rate": 1.9787325330563408e-05, "loss": 1.9158, "step": 28909 }, { "epoch": 0.38, "grad_norm": 4.343508243560791, "learning_rate": 1.9787303773965552e-05, "loss": 2.6611, "step": 28910 }, { "epoch": 0.38, "grad_norm": 4.104690074920654, "learning_rate": 1.9787282216287007e-05, "loss": 2.1969, "step": 28911 }, { "epoch": 0.38, "grad_norm": 3.8377904891967773, "learning_rate": 1.9787260657527784e-05, "loss": 1.9995, "step": 28912 }, { "epoch": 0.38, "grad_norm": 3.7793056964874268, "learning_rate": 1.978723909768788e-05, "loss": 2.0521, "step": 28913 }, { "epoch": 0.38, "grad_norm": 4.374803066253662, "learning_rate": 1.97872175367673e-05, "loss": 2.4426, "step": 28914 }, { "epoch": 0.38, "grad_norm": 3.8887176513671875, "learning_rate": 1.978719597476604e-05, "loss": 1.9834, "step": 28915 }, { "epoch": 0.38, "grad_norm": 3.890159845352173, "learning_rate": 1.9787174411684113e-05, "loss": 2.1516, "step": 28916 }, { "epoch": 0.38, "grad_norm": 3.905418872833252, "learning_rate": 1.978715284752151e-05, "loss": 2.0097, "step": 28917 }, { "epoch": 0.38, "grad_norm": 3.965845823287964, "learning_rate": 1.9787131282278243e-05, "loss": 2.2342, "step": 28918 }, { "epoch": 0.38, "grad_norm": 4.095028400421143, "learning_rate": 1.9787109715954314e-05, "loss": 2.4962, "step": 28919 }, { "epoch": 0.38, "grad_norm": 3.9375219345092773, "learning_rate": 1.9787088148549717e-05, "loss": 1.9322, "step": 28920 }, { "epoch": 0.38, "grad_norm": 3.774782419204712, "learning_rate": 1.9787066580064462e-05, "loss": 1.9647, "step": 28921 }, { "epoch": 0.38, "grad_norm": 4.2909836769104, "learning_rate": 1.978704501049855e-05, "loss": 2.6582, "step": 28922 }, { "epoch": 0.38, "grad_norm": 3.9119765758514404, "learning_rate": 1.9787023439851977e-05, "loss": 2.2302, "step": 28923 }, { "epoch": 0.38, "grad_norm": 4.07252311706543, "learning_rate": 1.9787001868124757e-05, "loss": 2.2243, "step": 28924 }, { "epoch": 0.38, "grad_norm": 3.916846990585327, "learning_rate": 1.978698029531688e-05, "loss": 1.8786, "step": 28925 }, { "epoch": 0.38, "grad_norm": 4.117733001708984, "learning_rate": 1.9786958721428357e-05, "loss": 1.7958, "step": 28926 }, { "epoch": 0.38, "grad_norm": 4.317379474639893, "learning_rate": 1.978693714645919e-05, "loss": 2.0631, "step": 28927 }, { "epoch": 0.38, "grad_norm": 3.7689716815948486, "learning_rate": 1.9786915570409374e-05, "loss": 1.9886, "step": 28928 }, { "epoch": 0.38, "grad_norm": 4.430126667022705, "learning_rate": 1.978689399327892e-05, "loss": 2.0436, "step": 28929 }, { "epoch": 0.38, "grad_norm": 4.0156989097595215, "learning_rate": 1.9786872415067827e-05, "loss": 1.8138, "step": 28930 }, { "epoch": 0.38, "grad_norm": 4.092166900634766, "learning_rate": 1.9786850835776098e-05, "loss": 2.0336, "step": 28931 }, { "epoch": 0.38, "grad_norm": 4.52848482131958, "learning_rate": 1.978682925540373e-05, "loss": 2.1987, "step": 28932 }, { "epoch": 0.38, "grad_norm": 4.371143817901611, "learning_rate": 1.9786807673950737e-05, "loss": 1.9122, "step": 28933 }, { "epoch": 0.38, "grad_norm": 3.727479934692383, "learning_rate": 1.978678609141711e-05, "loss": 1.7737, "step": 28934 }, { "epoch": 0.38, "grad_norm": 3.6064388751983643, "learning_rate": 1.9786764507802855e-05, "loss": 1.9767, "step": 28935 }, { "epoch": 0.38, "grad_norm": 3.905433177947998, "learning_rate": 1.9786742923107978e-05, "loss": 2.0482, "step": 28936 }, { "epoch": 0.38, "grad_norm": 3.7614285945892334, "learning_rate": 1.9786721337332476e-05, "loss": 1.9358, "step": 28937 }, { "epoch": 0.38, "grad_norm": 4.076787948608398, "learning_rate": 1.9786699750476353e-05, "loss": 2.0341, "step": 28938 }, { "epoch": 0.38, "grad_norm": 3.9361422061920166, "learning_rate": 1.9786678162539617e-05, "loss": 1.8821, "step": 28939 }, { "epoch": 0.38, "grad_norm": 4.153403282165527, "learning_rate": 1.9786656573522262e-05, "loss": 2.2523, "step": 28940 }, { "epoch": 0.38, "grad_norm": 3.18117618560791, "learning_rate": 1.9786634983424294e-05, "loss": 1.5671, "step": 28941 }, { "epoch": 0.38, "grad_norm": 4.184779644012451, "learning_rate": 1.9786613392245718e-05, "loss": 1.9985, "step": 28942 }, { "epoch": 0.38, "grad_norm": 3.5406908988952637, "learning_rate": 1.978659179998653e-05, "loss": 1.6491, "step": 28943 }, { "epoch": 0.38, "grad_norm": 3.5097551345825195, "learning_rate": 1.978657020664674e-05, "loss": 1.9189, "step": 28944 }, { "epoch": 0.38, "grad_norm": 3.6438868045806885, "learning_rate": 1.9786548612226347e-05, "loss": 1.6978, "step": 28945 }, { "epoch": 0.38, "grad_norm": 4.3152594566345215, "learning_rate": 1.978652701672535e-05, "loss": 2.5633, "step": 28946 }, { "epoch": 0.38, "grad_norm": 3.8270251750946045, "learning_rate": 1.9786505420143756e-05, "loss": 2.425, "step": 28947 }, { "epoch": 0.38, "grad_norm": 3.3819801807403564, "learning_rate": 1.9786483822481566e-05, "loss": 1.568, "step": 28948 }, { "epoch": 0.38, "grad_norm": 3.7040109634399414, "learning_rate": 1.978646222373878e-05, "loss": 2.294, "step": 28949 }, { "epoch": 0.38, "grad_norm": 3.6200990676879883, "learning_rate": 1.9786440623915406e-05, "loss": 1.8209, "step": 28950 }, { "epoch": 0.38, "grad_norm": 4.39181661605835, "learning_rate": 1.9786419023011442e-05, "loss": 2.4648, "step": 28951 }, { "epoch": 0.38, "grad_norm": 3.9379961490631104, "learning_rate": 1.9786397421026887e-05, "loss": 1.9757, "step": 28952 }, { "epoch": 0.38, "grad_norm": 3.973224639892578, "learning_rate": 1.9786375817961752e-05, "loss": 2.0912, "step": 28953 }, { "epoch": 0.38, "grad_norm": 4.317152976989746, "learning_rate": 1.9786354213816034e-05, "loss": 2.5445, "step": 28954 }, { "epoch": 0.38, "grad_norm": 4.1586480140686035, "learning_rate": 1.978633260858974e-05, "loss": 1.8966, "step": 28955 }, { "epoch": 0.38, "grad_norm": 3.93585467338562, "learning_rate": 1.9786311002282863e-05, "loss": 2.2301, "step": 28956 }, { "epoch": 0.38, "grad_norm": 3.8239359855651855, "learning_rate": 1.9786289394895414e-05, "loss": 2.1212, "step": 28957 }, { "epoch": 0.38, "grad_norm": 3.889141082763672, "learning_rate": 1.978626778642739e-05, "loss": 2.0299, "step": 28958 }, { "epoch": 0.38, "grad_norm": 3.6458868980407715, "learning_rate": 1.9786246176878802e-05, "loss": 1.9192, "step": 28959 }, { "epoch": 0.38, "grad_norm": 3.906020164489746, "learning_rate": 1.9786224566249643e-05, "loss": 1.9427, "step": 28960 }, { "epoch": 0.38, "grad_norm": 3.4897472858428955, "learning_rate": 1.9786202954539916e-05, "loss": 1.7825, "step": 28961 }, { "epoch": 0.38, "grad_norm": 3.114074230194092, "learning_rate": 1.9786181341749627e-05, "loss": 1.4809, "step": 28962 }, { "epoch": 0.38, "grad_norm": 4.973426818847656, "learning_rate": 1.978615972787878e-05, "loss": 2.6012, "step": 28963 }, { "epoch": 0.38, "grad_norm": 3.2611420154571533, "learning_rate": 1.9786138112927375e-05, "loss": 1.5829, "step": 28964 }, { "epoch": 0.38, "grad_norm": 3.651540517807007, "learning_rate": 1.9786116496895413e-05, "loss": 2.3067, "step": 28965 }, { "epoch": 0.38, "grad_norm": 3.6478335857391357, "learning_rate": 1.97860948797829e-05, "loss": 2.1266, "step": 28966 }, { "epoch": 0.38, "grad_norm": 4.127043724060059, "learning_rate": 1.9786073261589832e-05, "loss": 2.1543, "step": 28967 }, { "epoch": 0.38, "grad_norm": 3.918025255203247, "learning_rate": 1.9786051642316217e-05, "loss": 1.9622, "step": 28968 }, { "epoch": 0.38, "grad_norm": 3.7944929599761963, "learning_rate": 1.978603002196206e-05, "loss": 2.2226, "step": 28969 }, { "epoch": 0.38, "grad_norm": 4.321712017059326, "learning_rate": 1.9786008400527354e-05, "loss": 2.244, "step": 28970 }, { "epoch": 0.38, "grad_norm": 4.604569911956787, "learning_rate": 1.978598677801211e-05, "loss": 1.9209, "step": 28971 }, { "epoch": 0.38, "grad_norm": 3.3436331748962402, "learning_rate": 1.9785965154416323e-05, "loss": 1.5571, "step": 28972 }, { "epoch": 0.38, "grad_norm": 3.915447235107422, "learning_rate": 1.9785943529740003e-05, "loss": 2.0112, "step": 28973 }, { "epoch": 0.38, "grad_norm": 4.003089427947998, "learning_rate": 1.978592190398315e-05, "loss": 1.9939, "step": 28974 }, { "epoch": 0.38, "grad_norm": 3.848008155822754, "learning_rate": 1.978590027714576e-05, "loss": 2.1714, "step": 28975 }, { "epoch": 0.38, "grad_norm": 3.677534341812134, "learning_rate": 1.9785878649227846e-05, "loss": 1.9312, "step": 28976 }, { "epoch": 0.38, "grad_norm": 4.466490745544434, "learning_rate": 1.9785857020229402e-05, "loss": 1.7886, "step": 28977 }, { "epoch": 0.38, "grad_norm": 3.8007583618164062, "learning_rate": 1.9785835390150435e-05, "loss": 1.9845, "step": 28978 }, { "epoch": 0.38, "grad_norm": 4.267416477203369, "learning_rate": 1.9785813758990945e-05, "loss": 2.3807, "step": 28979 }, { "epoch": 0.38, "grad_norm": 4.26718282699585, "learning_rate": 1.9785792126750933e-05, "loss": 2.0828, "step": 28980 }, { "epoch": 0.38, "grad_norm": 4.040567398071289, "learning_rate": 1.9785770493430404e-05, "loss": 2.2677, "step": 28981 }, { "epoch": 0.38, "grad_norm": 4.456022262573242, "learning_rate": 1.9785748859029364e-05, "loss": 2.1075, "step": 28982 }, { "epoch": 0.38, "grad_norm": 3.9209210872650146, "learning_rate": 1.9785727223547807e-05, "loss": 1.9554, "step": 28983 }, { "epoch": 0.38, "grad_norm": 5.191616535186768, "learning_rate": 1.978570558698574e-05, "loss": 2.4007, "step": 28984 }, { "epoch": 0.38, "grad_norm": 3.551457405090332, "learning_rate": 1.978568394934317e-05, "loss": 1.6939, "step": 28985 }, { "epoch": 0.38, "grad_norm": 3.532719373703003, "learning_rate": 1.978566231062009e-05, "loss": 1.6856, "step": 28986 }, { "epoch": 0.38, "grad_norm": 4.177242755889893, "learning_rate": 1.9785640670816506e-05, "loss": 2.4845, "step": 28987 }, { "epoch": 0.38, "grad_norm": 3.8066811561584473, "learning_rate": 1.9785619029932424e-05, "loss": 1.9782, "step": 28988 }, { "epoch": 0.38, "grad_norm": 3.460967779159546, "learning_rate": 1.9785597387967843e-05, "loss": 1.5587, "step": 28989 }, { "epoch": 0.38, "grad_norm": 3.2874724864959717, "learning_rate": 1.9785575744922764e-05, "loss": 1.7924, "step": 28990 }, { "epoch": 0.38, "grad_norm": 4.237700462341309, "learning_rate": 1.9785554100797195e-05, "loss": 2.1861, "step": 28991 }, { "epoch": 0.38, "grad_norm": 3.8286726474761963, "learning_rate": 1.978553245559113e-05, "loss": 1.7106, "step": 28992 }, { "epoch": 0.38, "grad_norm": 4.089054107666016, "learning_rate": 1.978551080930458e-05, "loss": 1.8558, "step": 28993 }, { "epoch": 0.38, "grad_norm": 4.130480766296387, "learning_rate": 1.9785489161937543e-05, "loss": 1.8549, "step": 28994 }, { "epoch": 0.38, "grad_norm": 4.281991958618164, "learning_rate": 1.9785467513490023e-05, "loss": 2.1046, "step": 28995 }, { "epoch": 0.38, "grad_norm": 3.987884283065796, "learning_rate": 1.978544586396202e-05, "loss": 1.969, "step": 28996 }, { "epoch": 0.38, "grad_norm": 3.9843907356262207, "learning_rate": 1.978542421335354e-05, "loss": 1.9007, "step": 28997 }, { "epoch": 0.38, "grad_norm": 4.023611545562744, "learning_rate": 1.978540256166458e-05, "loss": 1.9146, "step": 28998 }, { "epoch": 0.38, "grad_norm": 3.9821836948394775, "learning_rate": 1.9785380908895145e-05, "loss": 2.011, "step": 28999 }, { "epoch": 0.38, "grad_norm": 3.6943507194519043, "learning_rate": 1.978535925504524e-05, "loss": 1.9687, "step": 29000 }, { "epoch": 0.38, "grad_norm": 3.9739022254943848, "learning_rate": 1.9785337600114864e-05, "loss": 2.5599, "step": 29001 }, { "epoch": 0.38, "grad_norm": 3.9557697772979736, "learning_rate": 1.9785315944104024e-05, "loss": 2.033, "step": 29002 }, { "epoch": 0.38, "grad_norm": 4.483046531677246, "learning_rate": 1.9785294287012715e-05, "loss": 2.6142, "step": 29003 }, { "epoch": 0.38, "grad_norm": 4.027750015258789, "learning_rate": 1.9785272628840945e-05, "loss": 2.075, "step": 29004 }, { "epoch": 0.38, "grad_norm": 4.004018783569336, "learning_rate": 1.9785250969588716e-05, "loss": 1.9567, "step": 29005 }, { "epoch": 0.38, "grad_norm": 4.2430243492126465, "learning_rate": 1.978522930925603e-05, "loss": 2.0127, "step": 29006 }, { "epoch": 0.38, "grad_norm": 3.916835069656372, "learning_rate": 1.9785207647842884e-05, "loss": 2.2509, "step": 29007 }, { "epoch": 0.38, "grad_norm": 4.049461841583252, "learning_rate": 1.9785185985349288e-05, "loss": 2.135, "step": 29008 }, { "epoch": 0.38, "grad_norm": 3.8318941593170166, "learning_rate": 1.978516432177524e-05, "loss": 2.1688, "step": 29009 }, { "epoch": 0.38, "grad_norm": 3.7152340412139893, "learning_rate": 1.9785142657120747e-05, "loss": 1.8868, "step": 29010 }, { "epoch": 0.38, "grad_norm": 3.8787362575531006, "learning_rate": 1.9785120991385806e-05, "loss": 2.1282, "step": 29011 }, { "epoch": 0.38, "grad_norm": 3.8128714561462402, "learning_rate": 1.9785099324570424e-05, "loss": 1.7924, "step": 29012 }, { "epoch": 0.38, "grad_norm": 3.647136688232422, "learning_rate": 1.97850776566746e-05, "loss": 2.0032, "step": 29013 }, { "epoch": 0.38, "grad_norm": 4.234766006469727, "learning_rate": 1.9785055987698335e-05, "loss": 2.0654, "step": 29014 }, { "epoch": 0.38, "grad_norm": 3.9624688625335693, "learning_rate": 1.9785034317641635e-05, "loss": 2.3565, "step": 29015 }, { "epoch": 0.38, "grad_norm": 3.8033804893493652, "learning_rate": 1.97850126465045e-05, "loss": 1.9285, "step": 29016 }, { "epoch": 0.38, "grad_norm": 3.4745607376098633, "learning_rate": 1.9784990974286936e-05, "loss": 2.1186, "step": 29017 }, { "epoch": 0.38, "grad_norm": 3.9224119186401367, "learning_rate": 1.9784969300988943e-05, "loss": 2.0227, "step": 29018 }, { "epoch": 0.38, "grad_norm": 4.035572528839111, "learning_rate": 1.9784947626610522e-05, "loss": 1.9497, "step": 29019 }, { "epoch": 0.38, "grad_norm": 4.003934383392334, "learning_rate": 1.978492595115168e-05, "loss": 1.7843, "step": 29020 }, { "epoch": 0.38, "grad_norm": 3.789668321609497, "learning_rate": 1.978490427461241e-05, "loss": 1.8667, "step": 29021 }, { "epoch": 0.38, "grad_norm": 4.065593719482422, "learning_rate": 1.9784882596992726e-05, "loss": 2.3568, "step": 29022 }, { "epoch": 0.38, "grad_norm": 3.4836950302124023, "learning_rate": 1.9784860918292624e-05, "loss": 1.8974, "step": 29023 }, { "epoch": 0.38, "grad_norm": 3.970647096633911, "learning_rate": 1.9784839238512105e-05, "loss": 1.9554, "step": 29024 }, { "epoch": 0.38, "grad_norm": 4.424967288970947, "learning_rate": 1.978481755765118e-05, "loss": 2.1279, "step": 29025 }, { "epoch": 0.38, "grad_norm": 3.9640305042266846, "learning_rate": 1.9784795875709838e-05, "loss": 2.1398, "step": 29026 }, { "epoch": 0.38, "grad_norm": 4.457883834838867, "learning_rate": 1.978477419268809e-05, "loss": 2.407, "step": 29027 }, { "epoch": 0.38, "grad_norm": 4.0098876953125, "learning_rate": 1.9784752508585937e-05, "loss": 1.8389, "step": 29028 }, { "epoch": 0.38, "grad_norm": 4.333818435668945, "learning_rate": 1.9784730823403384e-05, "loss": 2.5357, "step": 29029 }, { "epoch": 0.38, "grad_norm": 3.433758020401001, "learning_rate": 1.9784709137140426e-05, "loss": 1.6193, "step": 29030 }, { "epoch": 0.38, "grad_norm": 4.377997875213623, "learning_rate": 1.9784687449797072e-05, "loss": 2.241, "step": 29031 }, { "epoch": 0.38, "grad_norm": 3.7655885219573975, "learning_rate": 1.9784665761373327e-05, "loss": 1.95, "step": 29032 }, { "epoch": 0.38, "grad_norm": 3.086444139480591, "learning_rate": 1.9784644071869184e-05, "loss": 1.6378, "step": 29033 }, { "epoch": 0.38, "grad_norm": 3.57084584236145, "learning_rate": 1.978462238128465e-05, "loss": 1.6821, "step": 29034 }, { "epoch": 0.38, "grad_norm": 3.9510068893432617, "learning_rate": 1.978460068961973e-05, "loss": 1.8805, "step": 29035 }, { "epoch": 0.38, "grad_norm": 4.095591068267822, "learning_rate": 1.9784578996874428e-05, "loss": 1.8624, "step": 29036 }, { "epoch": 0.38, "grad_norm": 4.060424327850342, "learning_rate": 1.9784557303048733e-05, "loss": 1.9725, "step": 29037 }, { "epoch": 0.38, "grad_norm": 3.5013530254364014, "learning_rate": 1.9784535608142662e-05, "loss": 1.596, "step": 29038 }, { "epoch": 0.38, "grad_norm": 3.705347776412964, "learning_rate": 1.9784513912156214e-05, "loss": 1.7144, "step": 29039 }, { "epoch": 0.38, "grad_norm": 4.626001358032227, "learning_rate": 1.978449221508939e-05, "loss": 2.3984, "step": 29040 }, { "epoch": 0.38, "grad_norm": 3.8878250122070312, "learning_rate": 1.978447051694219e-05, "loss": 2.0509, "step": 29041 }, { "epoch": 0.38, "grad_norm": 4.295711040496826, "learning_rate": 1.9784448817714617e-05, "loss": 2.1455, "step": 29042 }, { "epoch": 0.38, "grad_norm": 3.8677175045013428, "learning_rate": 1.9784427117406675e-05, "loss": 1.7755, "step": 29043 }, { "epoch": 0.38, "grad_norm": 4.3794636726379395, "learning_rate": 1.978440541601837e-05, "loss": 1.916, "step": 29044 }, { "epoch": 0.38, "grad_norm": 4.236027717590332, "learning_rate": 1.9784383713549696e-05, "loss": 2.8069, "step": 29045 }, { "epoch": 0.38, "grad_norm": 3.9637508392333984, "learning_rate": 1.9784362010000663e-05, "loss": 2.1082, "step": 29046 }, { "epoch": 0.38, "grad_norm": 3.8758325576782227, "learning_rate": 1.9784340305371267e-05, "loss": 2.0859, "step": 29047 }, { "epoch": 0.38, "grad_norm": 3.5252115726470947, "learning_rate": 1.9784318599661517e-05, "loss": 1.7063, "step": 29048 }, { "epoch": 0.38, "grad_norm": 3.5650136470794678, "learning_rate": 1.978429689287141e-05, "loss": 1.824, "step": 29049 }, { "epoch": 0.38, "grad_norm": 4.000065803527832, "learning_rate": 1.9784275185000955e-05, "loss": 2.0536, "step": 29050 }, { "epoch": 0.38, "grad_norm": 4.022262096405029, "learning_rate": 1.978425347605015e-05, "loss": 2.3397, "step": 29051 }, { "epoch": 0.38, "grad_norm": 3.5383360385894775, "learning_rate": 1.978423176601899e-05, "loss": 1.688, "step": 29052 }, { "epoch": 0.38, "grad_norm": 4.636419296264648, "learning_rate": 1.978421005490749e-05, "loss": 2.3961, "step": 29053 }, { "epoch": 0.38, "grad_norm": 4.176359176635742, "learning_rate": 1.978418834271565e-05, "loss": 2.2908, "step": 29054 }, { "epoch": 0.38, "grad_norm": 3.8776965141296387, "learning_rate": 1.9784166629443464e-05, "loss": 1.8085, "step": 29055 }, { "epoch": 0.38, "grad_norm": 4.048693656921387, "learning_rate": 1.9784144915090943e-05, "loss": 2.1271, "step": 29056 }, { "epoch": 0.38, "grad_norm": 4.006070137023926, "learning_rate": 1.9784123199658085e-05, "loss": 2.3335, "step": 29057 }, { "epoch": 0.38, "grad_norm": 4.242495536804199, "learning_rate": 1.9784101483144895e-05, "loss": 2.1763, "step": 29058 }, { "epoch": 0.38, "grad_norm": 3.528571367263794, "learning_rate": 1.978407976555137e-05, "loss": 1.8388, "step": 29059 }, { "epoch": 0.38, "grad_norm": 3.3763647079467773, "learning_rate": 1.9784058046877525e-05, "loss": 1.848, "step": 29060 }, { "epoch": 0.38, "grad_norm": 3.8941774368286133, "learning_rate": 1.978403632712335e-05, "loss": 1.9835, "step": 29061 }, { "epoch": 0.38, "grad_norm": 3.888484239578247, "learning_rate": 1.9784014606288847e-05, "loss": 2.131, "step": 29062 }, { "epoch": 0.38, "grad_norm": 4.268805027008057, "learning_rate": 1.978399288437403e-05, "loss": 2.2496, "step": 29063 }, { "epoch": 0.38, "grad_norm": 4.302575588226318, "learning_rate": 1.978397116137889e-05, "loss": 2.1075, "step": 29064 }, { "epoch": 0.38, "grad_norm": 4.183679103851318, "learning_rate": 1.9783949437303432e-05, "loss": 1.8485, "step": 29065 }, { "epoch": 0.38, "grad_norm": 3.7706193923950195, "learning_rate": 1.9783927712147663e-05, "loss": 1.8478, "step": 29066 }, { "epoch": 0.38, "grad_norm": 3.96270489692688, "learning_rate": 1.978390598591158e-05, "loss": 1.9665, "step": 29067 }, { "epoch": 0.38, "grad_norm": 3.4749324321746826, "learning_rate": 1.9783884258595193e-05, "loss": 1.6174, "step": 29068 }, { "epoch": 0.38, "grad_norm": 3.686609983444214, "learning_rate": 1.9783862530198493e-05, "loss": 1.6913, "step": 29069 }, { "epoch": 0.38, "grad_norm": 4.110947608947754, "learning_rate": 1.9783840800721493e-05, "loss": 2.669, "step": 29070 }, { "epoch": 0.38, "grad_norm": 3.4095559120178223, "learning_rate": 1.9783819070164188e-05, "loss": 1.6423, "step": 29071 }, { "epoch": 0.38, "grad_norm": 3.2898242473602295, "learning_rate": 1.9783797338526587e-05, "loss": 1.6898, "step": 29072 }, { "epoch": 0.38, "grad_norm": 3.9909403324127197, "learning_rate": 1.9783775605808687e-05, "loss": 1.7318, "step": 29073 }, { "epoch": 0.38, "grad_norm": 3.9073843955993652, "learning_rate": 1.978375387201049e-05, "loss": 2.1821, "step": 29074 }, { "epoch": 0.38, "grad_norm": 3.7481465339660645, "learning_rate": 1.9783732137132e-05, "loss": 1.9296, "step": 29075 }, { "epoch": 0.38, "grad_norm": 3.494196653366089, "learning_rate": 1.9783710401173227e-05, "loss": 2.0854, "step": 29076 }, { "epoch": 0.38, "grad_norm": 3.7431743144989014, "learning_rate": 1.978368866413416e-05, "loss": 1.6795, "step": 29077 }, { "epoch": 0.38, "grad_norm": 3.5102946758270264, "learning_rate": 1.9783666926014807e-05, "loss": 1.6877, "step": 29078 }, { "epoch": 0.38, "grad_norm": 3.7720656394958496, "learning_rate": 1.9783645186815174e-05, "loss": 2.0624, "step": 29079 }, { "epoch": 0.38, "grad_norm": 3.788499593734741, "learning_rate": 1.978362344653526e-05, "loss": 1.7977, "step": 29080 }, { "epoch": 0.38, "grad_norm": 3.3600194454193115, "learning_rate": 1.978360170517507e-05, "loss": 1.7412, "step": 29081 }, { "epoch": 0.38, "grad_norm": 4.02900505065918, "learning_rate": 1.9783579962734603e-05, "loss": 2.126, "step": 29082 }, { "epoch": 0.38, "grad_norm": 3.794280767440796, "learning_rate": 1.9783558219213865e-05, "loss": 1.9638, "step": 29083 }, { "epoch": 0.38, "grad_norm": 3.401660919189453, "learning_rate": 1.978353647461285e-05, "loss": 1.391, "step": 29084 }, { "epoch": 0.38, "grad_norm": 3.1996304988861084, "learning_rate": 1.9783514728931573e-05, "loss": 1.6652, "step": 29085 }, { "epoch": 0.38, "grad_norm": 3.805696725845337, "learning_rate": 1.9783492982170026e-05, "loss": 1.9358, "step": 29086 }, { "epoch": 0.38, "grad_norm": 4.093649387359619, "learning_rate": 1.9783471234328214e-05, "loss": 2.2349, "step": 29087 }, { "epoch": 0.38, "grad_norm": 3.2349042892456055, "learning_rate": 1.9783449485406144e-05, "loss": 1.5503, "step": 29088 }, { "epoch": 0.38, "grad_norm": 4.322726726531982, "learning_rate": 1.9783427735403815e-05, "loss": 2.4902, "step": 29089 }, { "epoch": 0.38, "grad_norm": 3.6338155269622803, "learning_rate": 1.9783405984321228e-05, "loss": 1.6119, "step": 29090 }, { "epoch": 0.38, "grad_norm": 3.654611349105835, "learning_rate": 1.978338423215839e-05, "loss": 2.0994, "step": 29091 }, { "epoch": 0.38, "grad_norm": 3.8168113231658936, "learning_rate": 1.97833624789153e-05, "loss": 1.7745, "step": 29092 }, { "epoch": 0.38, "grad_norm": 3.7715978622436523, "learning_rate": 1.9783340724591956e-05, "loss": 1.9977, "step": 29093 }, { "epoch": 0.38, "grad_norm": 3.5433619022369385, "learning_rate": 1.978331896918837e-05, "loss": 1.9036, "step": 29094 }, { "epoch": 0.38, "grad_norm": 4.2587504386901855, "learning_rate": 1.9783297212704538e-05, "loss": 2.1138, "step": 29095 }, { "epoch": 0.38, "grad_norm": 3.908118724822998, "learning_rate": 1.9783275455140465e-05, "loss": 1.8897, "step": 29096 }, { "epoch": 0.38, "grad_norm": 4.365614891052246, "learning_rate": 1.978325369649615e-05, "loss": 2.4321, "step": 29097 }, { "epoch": 0.38, "grad_norm": 3.3959455490112305, "learning_rate": 1.97832319367716e-05, "loss": 1.7344, "step": 29098 }, { "epoch": 0.38, "grad_norm": 4.114786624908447, "learning_rate": 1.9783210175966813e-05, "loss": 2.4429, "step": 29099 }, { "epoch": 0.38, "grad_norm": 4.47629976272583, "learning_rate": 1.9783188414081798e-05, "loss": 2.1779, "step": 29100 }, { "epoch": 0.38, "grad_norm": 3.8380446434020996, "learning_rate": 1.978316665111655e-05, "loss": 2.2087, "step": 29101 }, { "epoch": 0.38, "grad_norm": 3.756457805633545, "learning_rate": 1.9783144887071074e-05, "loss": 1.9953, "step": 29102 }, { "epoch": 0.38, "grad_norm": 3.530268669128418, "learning_rate": 1.9783123121945375e-05, "loss": 1.6988, "step": 29103 }, { "epoch": 0.38, "grad_norm": 3.9848833084106445, "learning_rate": 1.978310135573945e-05, "loss": 1.9268, "step": 29104 }, { "epoch": 0.38, "grad_norm": 4.0655927658081055, "learning_rate": 1.9783079588453307e-05, "loss": 1.9337, "step": 29105 }, { "epoch": 0.38, "grad_norm": 3.832267999649048, "learning_rate": 1.9783057820086945e-05, "loss": 1.8399, "step": 29106 }, { "epoch": 0.38, "grad_norm": 3.5658044815063477, "learning_rate": 1.9783036050640368e-05, "loss": 1.8905, "step": 29107 }, { "epoch": 0.38, "grad_norm": 4.18330717086792, "learning_rate": 1.9783014280113577e-05, "loss": 1.9347, "step": 29108 }, { "epoch": 0.38, "grad_norm": 3.954012632369995, "learning_rate": 1.9782992508506575e-05, "loss": 2.27, "step": 29109 }, { "epoch": 0.38, "grad_norm": 4.028080463409424, "learning_rate": 1.9782970735819366e-05, "loss": 1.6847, "step": 29110 }, { "epoch": 0.38, "grad_norm": 3.7060115337371826, "learning_rate": 1.9782948962051952e-05, "loss": 1.8132, "step": 29111 }, { "epoch": 0.38, "grad_norm": 3.9455933570861816, "learning_rate": 1.978292718720433e-05, "loss": 2.1433, "step": 29112 }, { "epoch": 0.38, "grad_norm": 3.9225594997406006, "learning_rate": 1.9782905411276512e-05, "loss": 1.8635, "step": 29113 }, { "epoch": 0.38, "grad_norm": 4.060516357421875, "learning_rate": 1.9782883634268496e-05, "loss": 2.3225, "step": 29114 }, { "epoch": 0.38, "grad_norm": 3.7787158489227295, "learning_rate": 1.978286185618028e-05, "loss": 2.2127, "step": 29115 }, { "epoch": 0.38, "grad_norm": 4.02402400970459, "learning_rate": 1.9782840077011872e-05, "loss": 2.2769, "step": 29116 }, { "epoch": 0.38, "grad_norm": 3.3073508739471436, "learning_rate": 1.978281829676327e-05, "loss": 1.8407, "step": 29117 }, { "epoch": 0.38, "grad_norm": 3.6760752201080322, "learning_rate": 1.9782796515434482e-05, "loss": 1.9994, "step": 29118 }, { "epoch": 0.38, "grad_norm": 3.6857757568359375, "learning_rate": 1.9782774733025506e-05, "loss": 1.5926, "step": 29119 }, { "epoch": 0.38, "grad_norm": 4.08854866027832, "learning_rate": 1.9782752949536346e-05, "loss": 2.4725, "step": 29120 }, { "epoch": 0.38, "grad_norm": 3.7211601734161377, "learning_rate": 1.9782731164967003e-05, "loss": 1.8366, "step": 29121 }, { "epoch": 0.38, "grad_norm": 3.8129334449768066, "learning_rate": 1.978270937931748e-05, "loss": 2.0009, "step": 29122 }, { "epoch": 0.38, "grad_norm": 3.364651918411255, "learning_rate": 1.978268759258778e-05, "loss": 1.6912, "step": 29123 }, { "epoch": 0.38, "grad_norm": 3.744637966156006, "learning_rate": 1.9782665804777906e-05, "loss": 2.3513, "step": 29124 }, { "epoch": 0.38, "grad_norm": 4.161663055419922, "learning_rate": 1.978264401588786e-05, "loss": 2.4165, "step": 29125 }, { "epoch": 0.38, "grad_norm": 4.0381388664245605, "learning_rate": 1.9782622225917646e-05, "loss": 2.5383, "step": 29126 }, { "epoch": 0.38, "grad_norm": 4.2615509033203125, "learning_rate": 1.978260043486726e-05, "loss": 2.5818, "step": 29127 }, { "epoch": 0.38, "grad_norm": 3.4563422203063965, "learning_rate": 1.9782578642736714e-05, "loss": 1.9489, "step": 29128 }, { "epoch": 0.38, "grad_norm": 4.014429569244385, "learning_rate": 1.9782556849526e-05, "loss": 2.2515, "step": 29129 }, { "epoch": 0.38, "grad_norm": 3.8891806602478027, "learning_rate": 1.9782535055235132e-05, "loss": 2.1158, "step": 29130 }, { "epoch": 0.38, "grad_norm": 3.925877094268799, "learning_rate": 1.9782513259864102e-05, "loss": 1.936, "step": 29131 }, { "epoch": 0.38, "grad_norm": 3.80780029296875, "learning_rate": 1.978249146341292e-05, "loss": 2.1259, "step": 29132 }, { "epoch": 0.38, "grad_norm": 3.3864269256591797, "learning_rate": 1.978246966588158e-05, "loss": 1.596, "step": 29133 }, { "epoch": 0.38, "grad_norm": 4.032309055328369, "learning_rate": 1.978244786727009e-05, "loss": 2.1948, "step": 29134 }, { "epoch": 0.38, "grad_norm": 3.8444173336029053, "learning_rate": 1.9782426067578457e-05, "loss": 1.7822, "step": 29135 }, { "epoch": 0.38, "grad_norm": 3.480729818344116, "learning_rate": 1.9782404266806674e-05, "loss": 1.6978, "step": 29136 }, { "epoch": 0.38, "grad_norm": 3.769167900085449, "learning_rate": 1.9782382464954748e-05, "loss": 1.9297, "step": 29137 }, { "epoch": 0.38, "grad_norm": 4.307275295257568, "learning_rate": 1.9782360662022682e-05, "loss": 2.4004, "step": 29138 }, { "epoch": 0.38, "grad_norm": 3.304250955581665, "learning_rate": 1.9782338858010477e-05, "loss": 1.6044, "step": 29139 }, { "epoch": 0.38, "grad_norm": 3.2529830932617188, "learning_rate": 1.9782317052918138e-05, "loss": 1.4565, "step": 29140 }, { "epoch": 0.38, "grad_norm": 3.872878313064575, "learning_rate": 1.978229524674566e-05, "loss": 1.8629, "step": 29141 }, { "epoch": 0.38, "grad_norm": 4.33343505859375, "learning_rate": 1.9782273439493055e-05, "loss": 2.3912, "step": 29142 }, { "epoch": 0.38, "grad_norm": 3.9185783863067627, "learning_rate": 1.9782251631160317e-05, "loss": 2.1958, "step": 29143 }, { "epoch": 0.38, "grad_norm": 4.273622989654541, "learning_rate": 1.978222982174746e-05, "loss": 2.6189, "step": 29144 }, { "epoch": 0.38, "grad_norm": 4.261318206787109, "learning_rate": 1.9782208011254473e-05, "loss": 2.0812, "step": 29145 }, { "epoch": 0.38, "grad_norm": 4.948023796081543, "learning_rate": 1.9782186199681363e-05, "loss": 2.6107, "step": 29146 }, { "epoch": 0.38, "grad_norm": 3.63018536567688, "learning_rate": 1.9782164387028134e-05, "loss": 1.7363, "step": 29147 }, { "epoch": 0.38, "grad_norm": 3.7449605464935303, "learning_rate": 1.9782142573294792e-05, "loss": 2.0158, "step": 29148 }, { "epoch": 0.38, "grad_norm": 3.7239973545074463, "learning_rate": 1.9782120758481334e-05, "loss": 1.9312, "step": 29149 }, { "epoch": 0.38, "grad_norm": 4.273963451385498, "learning_rate": 1.9782098942587763e-05, "loss": 2.2083, "step": 29150 }, { "epoch": 0.38, "grad_norm": 4.231903553009033, "learning_rate": 1.9782077125614083e-05, "loss": 2.294, "step": 29151 }, { "epoch": 0.38, "grad_norm": 4.3295159339904785, "learning_rate": 1.9782055307560294e-05, "loss": 2.4768, "step": 29152 }, { "epoch": 0.38, "grad_norm": 3.764697790145874, "learning_rate": 1.9782033488426406e-05, "loss": 2.052, "step": 29153 }, { "epoch": 0.38, "grad_norm": 3.4343514442443848, "learning_rate": 1.9782011668212408e-05, "loss": 1.8506, "step": 29154 }, { "epoch": 0.38, "grad_norm": 4.203056812286377, "learning_rate": 1.9781989846918314e-05, "loss": 2.2956, "step": 29155 }, { "epoch": 0.38, "grad_norm": 4.578289031982422, "learning_rate": 1.9781968024544122e-05, "loss": 2.2457, "step": 29156 }, { "epoch": 0.38, "grad_norm": 4.047414779663086, "learning_rate": 1.9781946201089833e-05, "loss": 2.0611, "step": 29157 }, { "epoch": 0.38, "grad_norm": 3.7856457233428955, "learning_rate": 1.9781924376555453e-05, "loss": 2.2, "step": 29158 }, { "epoch": 0.38, "grad_norm": 4.002516746520996, "learning_rate": 1.9781902550940983e-05, "loss": 1.9254, "step": 29159 }, { "epoch": 0.38, "grad_norm": 2.979588747024536, "learning_rate": 1.9781880724246424e-05, "loss": 1.468, "step": 29160 }, { "epoch": 0.38, "grad_norm": 3.372413158416748, "learning_rate": 1.978185889647178e-05, "loss": 1.8686, "step": 29161 }, { "epoch": 0.38, "grad_norm": 4.147698402404785, "learning_rate": 1.978183706761705e-05, "loss": 1.9752, "step": 29162 }, { "epoch": 0.38, "grad_norm": 3.355501174926758, "learning_rate": 1.978181523768224e-05, "loss": 1.6447, "step": 29163 }, { "epoch": 0.38, "grad_norm": 4.184317111968994, "learning_rate": 1.9781793406667357e-05, "loss": 2.8049, "step": 29164 }, { "epoch": 0.38, "grad_norm": 3.694014549255371, "learning_rate": 1.9781771574572394e-05, "loss": 1.7853, "step": 29165 }, { "epoch": 0.38, "grad_norm": 4.025670051574707, "learning_rate": 1.978174974139736e-05, "loss": 1.9833, "step": 29166 }, { "epoch": 0.38, "grad_norm": 4.062645435333252, "learning_rate": 1.9781727907142252e-05, "loss": 2.1518, "step": 29167 }, { "epoch": 0.38, "grad_norm": 3.7792656421661377, "learning_rate": 1.9781706071807073e-05, "loss": 1.7883, "step": 29168 }, { "epoch": 0.38, "grad_norm": 3.4795587062835693, "learning_rate": 1.9781684235391832e-05, "loss": 1.7652, "step": 29169 }, { "epoch": 0.38, "grad_norm": 3.946270704269409, "learning_rate": 1.9781662397896526e-05, "loss": 2.0442, "step": 29170 }, { "epoch": 0.38, "grad_norm": 4.7884521484375, "learning_rate": 1.9781640559321158e-05, "loss": 2.1479, "step": 29171 }, { "epoch": 0.38, "grad_norm": 3.9625113010406494, "learning_rate": 1.978161871966573e-05, "loss": 2.1926, "step": 29172 }, { "epoch": 0.38, "grad_norm": 4.037537574768066, "learning_rate": 1.9781596878930246e-05, "loss": 2.4534, "step": 29173 }, { "epoch": 0.38, "grad_norm": 4.223880290985107, "learning_rate": 1.978157503711471e-05, "loss": 1.8428, "step": 29174 }, { "epoch": 0.38, "grad_norm": 3.805187225341797, "learning_rate": 1.978155319421912e-05, "loss": 2.0639, "step": 29175 }, { "epoch": 0.38, "grad_norm": 5.2024736404418945, "learning_rate": 1.978153135024348e-05, "loss": 2.711, "step": 29176 }, { "epoch": 0.38, "grad_norm": 3.680595636367798, "learning_rate": 1.97815095051878e-05, "loss": 1.9419, "step": 29177 }, { "epoch": 0.38, "grad_norm": 3.9743902683258057, "learning_rate": 1.9781487659052065e-05, "loss": 2.0453, "step": 29178 }, { "epoch": 0.38, "grad_norm": 3.9099061489105225, "learning_rate": 1.978146581183629e-05, "loss": 1.99, "step": 29179 }, { "epoch": 0.38, "grad_norm": 3.9028067588806152, "learning_rate": 1.978144396354048e-05, "loss": 1.9275, "step": 29180 }, { "epoch": 0.38, "grad_norm": 3.507035732269287, "learning_rate": 1.978142211416463e-05, "loss": 1.7179, "step": 29181 }, { "epoch": 0.38, "grad_norm": 3.9100241661071777, "learning_rate": 1.9781400263708746e-05, "loss": 1.8049, "step": 29182 }, { "epoch": 0.38, "grad_norm": 3.55841064453125, "learning_rate": 1.978137841217283e-05, "loss": 1.9728, "step": 29183 }, { "epoch": 0.38, "grad_norm": 4.207315444946289, "learning_rate": 1.978135655955688e-05, "loss": 2.1648, "step": 29184 }, { "epoch": 0.38, "grad_norm": 4.111509323120117, "learning_rate": 1.9781334705860903e-05, "loss": 2.4086, "step": 29185 }, { "epoch": 0.38, "grad_norm": 3.9878122806549072, "learning_rate": 1.9781312851084905e-05, "loss": 1.9821, "step": 29186 }, { "epoch": 0.38, "grad_norm": 4.3201727867126465, "learning_rate": 1.9781290995228882e-05, "loss": 2.121, "step": 29187 }, { "epoch": 0.38, "grad_norm": 4.511735439300537, "learning_rate": 1.9781269138292838e-05, "loss": 2.512, "step": 29188 }, { "epoch": 0.38, "grad_norm": 3.8575642108917236, "learning_rate": 1.9781247280276776e-05, "loss": 2.3396, "step": 29189 }, { "epoch": 0.38, "grad_norm": 3.7680301666259766, "learning_rate": 1.97812254211807e-05, "loss": 1.6618, "step": 29190 }, { "epoch": 0.38, "grad_norm": 4.033621788024902, "learning_rate": 1.9781203561004606e-05, "loss": 2.0054, "step": 29191 }, { "epoch": 0.38, "grad_norm": 4.2610392570495605, "learning_rate": 1.9781181699748505e-05, "loss": 2.564, "step": 29192 }, { "epoch": 0.38, "grad_norm": 3.823047637939453, "learning_rate": 1.9781159837412395e-05, "loss": 2.0769, "step": 29193 }, { "epoch": 0.38, "grad_norm": 3.998171091079712, "learning_rate": 1.978113797399628e-05, "loss": 2.1884, "step": 29194 }, { "epoch": 0.38, "grad_norm": 3.7231273651123047, "learning_rate": 1.978111610950016e-05, "loss": 1.9144, "step": 29195 }, { "epoch": 0.38, "grad_norm": 4.039770603179932, "learning_rate": 1.9781094243924038e-05, "loss": 2.3114, "step": 29196 }, { "epoch": 0.38, "grad_norm": 3.9880831241607666, "learning_rate": 1.9781072377267922e-05, "loss": 1.9427, "step": 29197 }, { "epoch": 0.38, "grad_norm": 4.002963066101074, "learning_rate": 1.9781050509531805e-05, "loss": 2.3043, "step": 29198 }, { "epoch": 0.38, "grad_norm": 3.802516222000122, "learning_rate": 1.9781028640715693e-05, "loss": 1.9827, "step": 29199 }, { "epoch": 0.38, "grad_norm": 4.197860240936279, "learning_rate": 1.9781006770819595e-05, "loss": 2.1129, "step": 29200 }, { "epoch": 0.38, "grad_norm": 4.090639114379883, "learning_rate": 1.9780984899843502e-05, "loss": 2.6004, "step": 29201 }, { "epoch": 0.38, "grad_norm": 3.9008491039276123, "learning_rate": 1.9780963027787425e-05, "loss": 1.9923, "step": 29202 }, { "epoch": 0.38, "grad_norm": 4.004594326019287, "learning_rate": 1.9780941154651365e-05, "loss": 1.8622, "step": 29203 }, { "epoch": 0.38, "grad_norm": 3.3609821796417236, "learning_rate": 1.9780919280435324e-05, "loss": 1.6984, "step": 29204 }, { "epoch": 0.38, "grad_norm": 4.253662586212158, "learning_rate": 1.9780897405139302e-05, "loss": 2.5945, "step": 29205 }, { "epoch": 0.38, "grad_norm": 3.3190298080444336, "learning_rate": 1.9780875528763303e-05, "loss": 1.8012, "step": 29206 }, { "epoch": 0.38, "grad_norm": 3.834033966064453, "learning_rate": 1.9780853651307327e-05, "loss": 1.643, "step": 29207 }, { "epoch": 0.38, "grad_norm": 3.7808854579925537, "learning_rate": 1.978083177277138e-05, "loss": 2.2115, "step": 29208 }, { "epoch": 0.38, "grad_norm": 3.7004923820495605, "learning_rate": 1.9780809893155463e-05, "loss": 1.9965, "step": 29209 }, { "epoch": 0.38, "grad_norm": 4.115019798278809, "learning_rate": 1.9780788012459583e-05, "loss": 2.162, "step": 29210 }, { "epoch": 0.38, "grad_norm": 4.04169225692749, "learning_rate": 1.9780766130683732e-05, "loss": 2.2013, "step": 29211 }, { "epoch": 0.38, "grad_norm": 4.0989909172058105, "learning_rate": 1.978074424782792e-05, "loss": 2.1638, "step": 29212 }, { "epoch": 0.38, "grad_norm": 4.295224189758301, "learning_rate": 1.978072236389215e-05, "loss": 2.8531, "step": 29213 }, { "epoch": 0.38, "grad_norm": 5.3962321281433105, "learning_rate": 1.978070047887642e-05, "loss": 3.3452, "step": 29214 }, { "epoch": 0.38, "grad_norm": 3.851776361465454, "learning_rate": 1.978067859278074e-05, "loss": 2.2168, "step": 29215 }, { "epoch": 0.38, "grad_norm": 3.660008192062378, "learning_rate": 1.9780656705605104e-05, "loss": 1.6231, "step": 29216 }, { "epoch": 0.38, "grad_norm": 3.691903591156006, "learning_rate": 1.9780634817349516e-05, "loss": 1.6661, "step": 29217 }, { "epoch": 0.38, "grad_norm": 4.017378330230713, "learning_rate": 1.978061292801398e-05, "loss": 2.2916, "step": 29218 }, { "epoch": 0.38, "grad_norm": 4.8392534255981445, "learning_rate": 1.97805910375985e-05, "loss": 2.0421, "step": 29219 }, { "epoch": 0.38, "grad_norm": 3.935873031616211, "learning_rate": 1.9780569146103077e-05, "loss": 1.8471, "step": 29220 }, { "epoch": 0.38, "grad_norm": 3.9335477352142334, "learning_rate": 1.978054725352771e-05, "loss": 2.2732, "step": 29221 }, { "epoch": 0.38, "grad_norm": 3.944032669067383, "learning_rate": 1.978052535987241e-05, "loss": 1.7719, "step": 29222 }, { "epoch": 0.38, "grad_norm": 3.6986968517303467, "learning_rate": 1.9780503465137172e-05, "loss": 1.6613, "step": 29223 }, { "epoch": 0.38, "grad_norm": 4.120110034942627, "learning_rate": 1.9780481569321997e-05, "loss": 1.9817, "step": 29224 }, { "epoch": 0.38, "grad_norm": 3.7942113876342773, "learning_rate": 1.9780459672426896e-05, "loss": 1.8452, "step": 29225 }, { "epoch": 0.38, "grad_norm": 3.724695920944214, "learning_rate": 1.9780437774451862e-05, "loss": 2.1791, "step": 29226 }, { "epoch": 0.38, "grad_norm": 4.968438625335693, "learning_rate": 1.97804158753969e-05, "loss": 1.8909, "step": 29227 }, { "epoch": 0.38, "grad_norm": 4.457488536834717, "learning_rate": 1.978039397526202e-05, "loss": 2.2056, "step": 29228 }, { "epoch": 0.38, "grad_norm": 3.5708208084106445, "learning_rate": 1.9780372074047215e-05, "loss": 1.9171, "step": 29229 }, { "epoch": 0.38, "grad_norm": 4.363813400268555, "learning_rate": 1.9780350171752492e-05, "loss": 2.1577, "step": 29230 }, { "epoch": 0.38, "grad_norm": 3.7264907360076904, "learning_rate": 1.9780328268377854e-05, "loss": 1.7807, "step": 29231 }, { "epoch": 0.38, "grad_norm": 3.9002485275268555, "learning_rate": 1.9780306363923302e-05, "loss": 1.8813, "step": 29232 }, { "epoch": 0.38, "grad_norm": 3.629950761795044, "learning_rate": 1.9780284458388834e-05, "loss": 2.0943, "step": 29233 }, { "epoch": 0.38, "grad_norm": 5.215505123138428, "learning_rate": 1.9780262551774464e-05, "loss": 2.9173, "step": 29234 }, { "epoch": 0.38, "grad_norm": 4.270963668823242, "learning_rate": 1.978024064408018e-05, "loss": 2.2971, "step": 29235 }, { "epoch": 0.38, "grad_norm": 3.563922643661499, "learning_rate": 1.9780218735305995e-05, "loss": 1.7483, "step": 29236 }, { "epoch": 0.38, "grad_norm": 3.8181746006011963, "learning_rate": 1.9780196825451906e-05, "loss": 2.0329, "step": 29237 }, { "epoch": 0.38, "grad_norm": 4.524304389953613, "learning_rate": 1.978017491451792e-05, "loss": 2.4472, "step": 29238 }, { "epoch": 0.38, "grad_norm": 3.843872547149658, "learning_rate": 1.9780153002504035e-05, "loss": 1.8935, "step": 29239 }, { "epoch": 0.38, "grad_norm": 4.142213821411133, "learning_rate": 1.9780131089410255e-05, "loss": 2.7832, "step": 29240 }, { "epoch": 0.38, "grad_norm": 3.199192762374878, "learning_rate": 1.978010917523658e-05, "loss": 1.5978, "step": 29241 }, { "epoch": 0.38, "grad_norm": 3.863694667816162, "learning_rate": 1.978008725998302e-05, "loss": 2.0865, "step": 29242 }, { "epoch": 0.38, "grad_norm": 3.6325652599334717, "learning_rate": 1.978006534364957e-05, "loss": 1.4472, "step": 29243 }, { "epoch": 0.38, "grad_norm": 5.658394813537598, "learning_rate": 1.9780043426236237e-05, "loss": 2.3891, "step": 29244 }, { "epoch": 0.38, "grad_norm": 4.124986171722412, "learning_rate": 1.9780021507743015e-05, "loss": 2.1119, "step": 29245 }, { "epoch": 0.38, "grad_norm": 3.757812976837158, "learning_rate": 1.977999958816992e-05, "loss": 2.115, "step": 29246 }, { "epoch": 0.38, "grad_norm": 3.8801770210266113, "learning_rate": 1.9779977667516946e-05, "loss": 2.3827, "step": 29247 }, { "epoch": 0.38, "grad_norm": 4.105132579803467, "learning_rate": 1.9779955745784094e-05, "loss": 1.8128, "step": 29248 }, { "epoch": 0.38, "grad_norm": 3.500190496444702, "learning_rate": 1.9779933822971367e-05, "loss": 1.7806, "step": 29249 }, { "epoch": 0.38, "grad_norm": 4.025542736053467, "learning_rate": 1.9779911899078775e-05, "loss": 2.2165, "step": 29250 }, { "epoch": 0.38, "grad_norm": 3.685479164123535, "learning_rate": 1.977988997410631e-05, "loss": 1.8076, "step": 29251 }, { "epoch": 0.38, "grad_norm": 3.927321195602417, "learning_rate": 1.977986804805398e-05, "loss": 2.2349, "step": 29252 }, { "epoch": 0.38, "grad_norm": 4.356815814971924, "learning_rate": 1.977984612092179e-05, "loss": 2.2365, "step": 29253 }, { "epoch": 0.38, "grad_norm": 3.711416721343994, "learning_rate": 1.9779824192709735e-05, "loss": 1.8312, "step": 29254 }, { "epoch": 0.38, "grad_norm": 3.2917697429656982, "learning_rate": 1.9779802263417823e-05, "loss": 1.7766, "step": 29255 }, { "epoch": 0.38, "grad_norm": 3.876207113265991, "learning_rate": 1.9779780333046055e-05, "loss": 1.9326, "step": 29256 }, { "epoch": 0.38, "grad_norm": 4.6911725997924805, "learning_rate": 1.977975840159444e-05, "loss": 2.2868, "step": 29257 }, { "epoch": 0.38, "grad_norm": 3.9900412559509277, "learning_rate": 1.9779736469062965e-05, "loss": 1.8307, "step": 29258 }, { "epoch": 0.38, "grad_norm": 3.5319807529449463, "learning_rate": 1.9779714535451645e-05, "loss": 1.778, "step": 29259 }, { "epoch": 0.38, "grad_norm": 3.525799512863159, "learning_rate": 1.9779692600760474e-05, "loss": 2.0361, "step": 29260 }, { "epoch": 0.38, "grad_norm": 3.6598007678985596, "learning_rate": 1.9779670664989464e-05, "loss": 1.889, "step": 29261 }, { "epoch": 0.38, "grad_norm": 3.7984461784362793, "learning_rate": 1.977964872813861e-05, "loss": 1.8412, "step": 29262 }, { "epoch": 0.38, "grad_norm": 3.865518093109131, "learning_rate": 1.9779626790207916e-05, "loss": 2.0282, "step": 29263 }, { "epoch": 0.38, "grad_norm": 4.381520748138428, "learning_rate": 1.977960485119739e-05, "loss": 2.2864, "step": 29264 }, { "epoch": 0.38, "grad_norm": 4.069152355194092, "learning_rate": 1.9779582911107026e-05, "loss": 1.8259, "step": 29265 }, { "epoch": 0.38, "grad_norm": 4.00429630279541, "learning_rate": 1.9779560969936828e-05, "loss": 1.9202, "step": 29266 }, { "epoch": 0.38, "grad_norm": 3.3293254375457764, "learning_rate": 1.9779539027686806e-05, "loss": 1.7911, "step": 29267 }, { "epoch": 0.38, "grad_norm": 3.5785298347473145, "learning_rate": 1.9779517084356952e-05, "loss": 1.646, "step": 29268 }, { "epoch": 0.38, "grad_norm": 3.9751901626586914, "learning_rate": 1.9779495139947277e-05, "loss": 1.9157, "step": 29269 }, { "epoch": 0.38, "grad_norm": 4.220414161682129, "learning_rate": 1.9779473194457777e-05, "loss": 1.9651, "step": 29270 }, { "epoch": 0.38, "grad_norm": 3.919825315475464, "learning_rate": 1.9779451247888463e-05, "loss": 2.0476, "step": 29271 }, { "epoch": 0.38, "grad_norm": 3.4310357570648193, "learning_rate": 1.9779429300239324e-05, "loss": 2.0674, "step": 29272 }, { "epoch": 0.38, "grad_norm": 4.057346343994141, "learning_rate": 1.9779407351510374e-05, "loss": 2.1342, "step": 29273 }, { "epoch": 0.38, "grad_norm": 4.349934101104736, "learning_rate": 1.9779385401701612e-05, "loss": 2.5317, "step": 29274 }, { "epoch": 0.38, "grad_norm": 4.14315128326416, "learning_rate": 1.977936345081304e-05, "loss": 2.0885, "step": 29275 }, { "epoch": 0.38, "grad_norm": 4.4015703201293945, "learning_rate": 1.9779341498844656e-05, "loss": 1.8736, "step": 29276 }, { "epoch": 0.38, "grad_norm": 3.752124071121216, "learning_rate": 1.977931954579647e-05, "loss": 2.2516, "step": 29277 }, { "epoch": 0.38, "grad_norm": 3.819220542907715, "learning_rate": 1.9779297591668483e-05, "loss": 1.7678, "step": 29278 }, { "epoch": 0.38, "grad_norm": 3.736544609069824, "learning_rate": 1.9779275636460693e-05, "loss": 1.6915, "step": 29279 }, { "epoch": 0.38, "grad_norm": 4.46345329284668, "learning_rate": 1.977925368017311e-05, "loss": 2.2697, "step": 29280 }, { "epoch": 0.38, "grad_norm": 4.170022487640381, "learning_rate": 1.9779231722805723e-05, "loss": 2.4452, "step": 29281 }, { "epoch": 0.38, "grad_norm": 3.316053628921509, "learning_rate": 1.977920976435855e-05, "loss": 1.8477, "step": 29282 }, { "epoch": 0.38, "grad_norm": 4.020639419555664, "learning_rate": 1.977918780483158e-05, "loss": 1.886, "step": 29283 }, { "epoch": 0.38, "grad_norm": 5.225813388824463, "learning_rate": 1.977916584422483e-05, "loss": 2.0982, "step": 29284 }, { "epoch": 0.38, "grad_norm": 3.983379364013672, "learning_rate": 1.9779143882538288e-05, "loss": 1.9251, "step": 29285 }, { "epoch": 0.38, "grad_norm": 3.62573504447937, "learning_rate": 1.9779121919771965e-05, "loss": 1.9295, "step": 29286 }, { "epoch": 0.38, "grad_norm": 3.6301751136779785, "learning_rate": 1.977909995592586e-05, "loss": 1.6285, "step": 29287 }, { "epoch": 0.38, "grad_norm": 3.6802525520324707, "learning_rate": 1.9779077990999975e-05, "loss": 1.5765, "step": 29288 }, { "epoch": 0.38, "grad_norm": 3.969403028488159, "learning_rate": 1.9779056024994317e-05, "loss": 2.4659, "step": 29289 }, { "epoch": 0.38, "grad_norm": 3.389784336090088, "learning_rate": 1.9779034057908882e-05, "loss": 1.6536, "step": 29290 }, { "epoch": 0.38, "grad_norm": 3.710679531097412, "learning_rate": 1.9779012089743677e-05, "loss": 2.071, "step": 29291 }, { "epoch": 0.38, "grad_norm": 3.717677354812622, "learning_rate": 1.9778990120498708e-05, "loss": 2.0125, "step": 29292 }, { "epoch": 0.38, "grad_norm": 3.092647075653076, "learning_rate": 1.977896815017397e-05, "loss": 1.4447, "step": 29293 }, { "epoch": 0.38, "grad_norm": 3.727261543273926, "learning_rate": 1.9778946178769462e-05, "loss": 2.0614, "step": 29294 }, { "epoch": 0.38, "grad_norm": 3.8144278526306152, "learning_rate": 1.9778924206285198e-05, "loss": 1.8794, "step": 29295 }, { "epoch": 0.38, "grad_norm": 3.9769108295440674, "learning_rate": 1.9778902232721174e-05, "loss": 2.3672, "step": 29296 }, { "epoch": 0.38, "grad_norm": 3.745380401611328, "learning_rate": 1.9778880258077393e-05, "loss": 1.9917, "step": 29297 }, { "epoch": 0.38, "grad_norm": 3.990593910217285, "learning_rate": 1.977885828235386e-05, "loss": 2.1234, "step": 29298 }, { "epoch": 0.38, "grad_norm": 3.857567548751831, "learning_rate": 1.977883630555057e-05, "loss": 1.9533, "step": 29299 }, { "epoch": 0.38, "grad_norm": 3.381373882293701, "learning_rate": 1.9778814327667536e-05, "loss": 1.7466, "step": 29300 }, { "epoch": 0.38, "grad_norm": 4.500833511352539, "learning_rate": 1.977879234870475e-05, "loss": 2.6004, "step": 29301 }, { "epoch": 0.38, "grad_norm": 4.06226921081543, "learning_rate": 1.9778770368662223e-05, "loss": 2.0782, "step": 29302 }, { "epoch": 0.38, "grad_norm": 3.776358127593994, "learning_rate": 1.977874838753995e-05, "loss": 2.2126, "step": 29303 }, { "epoch": 0.38, "grad_norm": 3.802020788192749, "learning_rate": 1.9778726405337942e-05, "loss": 1.9626, "step": 29304 }, { "epoch": 0.38, "grad_norm": 3.3574726581573486, "learning_rate": 1.9778704422056197e-05, "loss": 1.9011, "step": 29305 }, { "epoch": 0.38, "grad_norm": 4.190054893493652, "learning_rate": 1.9778682437694712e-05, "loss": 2.0965, "step": 29306 }, { "epoch": 0.38, "grad_norm": 3.5794196128845215, "learning_rate": 1.97786604522535e-05, "loss": 1.8178, "step": 29307 }, { "epoch": 0.38, "grad_norm": 3.483982563018799, "learning_rate": 1.9778638465732553e-05, "loss": 1.7891, "step": 29308 }, { "epoch": 0.38, "grad_norm": 3.932103395462036, "learning_rate": 1.9778616478131882e-05, "loss": 2.0271, "step": 29309 }, { "epoch": 0.38, "grad_norm": 4.455655574798584, "learning_rate": 1.9778594489451484e-05, "loss": 2.5097, "step": 29310 }, { "epoch": 0.38, "grad_norm": 3.9028337001800537, "learning_rate": 1.9778572499691364e-05, "loss": 2.1491, "step": 29311 }, { "epoch": 0.38, "grad_norm": 4.372951030731201, "learning_rate": 1.9778550508851527e-05, "loss": 2.2254, "step": 29312 }, { "epoch": 0.38, "grad_norm": 4.141228199005127, "learning_rate": 1.9778528516931967e-05, "loss": 2.2028, "step": 29313 }, { "epoch": 0.38, "grad_norm": 3.7225358486175537, "learning_rate": 1.9778506523932694e-05, "loss": 2.087, "step": 29314 }, { "epoch": 0.38, "grad_norm": 4.08782434463501, "learning_rate": 1.9778484529853705e-05, "loss": 2.0094, "step": 29315 }, { "epoch": 0.38, "grad_norm": 3.793532133102417, "learning_rate": 1.977846253469501e-05, "loss": 1.8831, "step": 29316 }, { "epoch": 0.38, "grad_norm": 4.220425605773926, "learning_rate": 1.97784405384566e-05, "loss": 2.4637, "step": 29317 }, { "epoch": 0.38, "grad_norm": 3.8573737144470215, "learning_rate": 1.9778418541138494e-05, "loss": 2.4715, "step": 29318 }, { "epoch": 0.38, "grad_norm": 3.4829413890838623, "learning_rate": 1.977839654274068e-05, "loss": 2.1715, "step": 29319 }, { "epoch": 0.38, "grad_norm": 4.230095863342285, "learning_rate": 1.9778374543263164e-05, "loss": 2.0199, "step": 29320 }, { "epoch": 0.38, "grad_norm": 4.269527912139893, "learning_rate": 1.977835254270595e-05, "loss": 2.2625, "step": 29321 }, { "epoch": 0.38, "grad_norm": 3.727358341217041, "learning_rate": 1.977833054106904e-05, "loss": 1.9581, "step": 29322 }, { "epoch": 0.38, "grad_norm": 3.490281105041504, "learning_rate": 1.9778308538352435e-05, "loss": 2.2146, "step": 29323 }, { "epoch": 0.38, "grad_norm": 3.368703603744507, "learning_rate": 1.9778286534556142e-05, "loss": 1.5425, "step": 29324 }, { "epoch": 0.38, "grad_norm": 4.057783126831055, "learning_rate": 1.977826452968016e-05, "loss": 2.5024, "step": 29325 }, { "epoch": 0.38, "grad_norm": 3.9764785766601562, "learning_rate": 1.9778242523724488e-05, "loss": 1.753, "step": 29326 }, { "epoch": 0.38, "grad_norm": 4.132884502410889, "learning_rate": 1.9778220516689133e-05, "loss": 2.0766, "step": 29327 }, { "epoch": 0.38, "grad_norm": 3.7433931827545166, "learning_rate": 1.9778198508574104e-05, "loss": 1.6482, "step": 29328 }, { "epoch": 0.38, "grad_norm": 3.482161045074463, "learning_rate": 1.9778176499379388e-05, "loss": 1.7694, "step": 29329 }, { "epoch": 0.38, "grad_norm": 4.279045104980469, "learning_rate": 1.9778154489104997e-05, "loss": 2.2533, "step": 29330 }, { "epoch": 0.38, "grad_norm": 3.6841282844543457, "learning_rate": 1.9778132477750933e-05, "loss": 2.2022, "step": 29331 }, { "epoch": 0.38, "grad_norm": 4.0927839279174805, "learning_rate": 1.9778110465317194e-05, "loss": 2.3966, "step": 29332 }, { "epoch": 0.38, "grad_norm": 3.457972526550293, "learning_rate": 1.9778088451803793e-05, "loss": 1.6778, "step": 29333 }, { "epoch": 0.38, "grad_norm": 3.8457696437835693, "learning_rate": 1.977806643721072e-05, "loss": 2.2525, "step": 29334 }, { "epoch": 0.38, "grad_norm": 3.770001173019409, "learning_rate": 1.977804442153798e-05, "loss": 1.9869, "step": 29335 }, { "epoch": 0.38, "grad_norm": 3.9286112785339355, "learning_rate": 1.9778022404785583e-05, "loss": 2.0026, "step": 29336 }, { "epoch": 0.38, "grad_norm": 3.8486216068267822, "learning_rate": 1.9778000386953523e-05, "loss": 2.1525, "step": 29337 }, { "epoch": 0.38, "grad_norm": 3.369392156600952, "learning_rate": 1.977797836804181e-05, "loss": 1.6629, "step": 29338 }, { "epoch": 0.38, "grad_norm": 4.486483573913574, "learning_rate": 1.9777956348050438e-05, "loss": 2.6478, "step": 29339 }, { "epoch": 0.38, "grad_norm": 4.300930976867676, "learning_rate": 1.9777934326979416e-05, "loss": 2.2974, "step": 29340 }, { "epoch": 0.38, "grad_norm": 3.8291280269622803, "learning_rate": 1.9777912304828745e-05, "loss": 2.0672, "step": 29341 }, { "epoch": 0.38, "grad_norm": 3.4041035175323486, "learning_rate": 1.9777890281598423e-05, "loss": 1.5388, "step": 29342 }, { "epoch": 0.38, "grad_norm": 4.09357213973999, "learning_rate": 1.9777868257288458e-05, "loss": 1.8593, "step": 29343 }, { "epoch": 0.38, "grad_norm": 3.288562059402466, "learning_rate": 1.9777846231898852e-05, "loss": 1.6482, "step": 29344 }, { "epoch": 0.38, "grad_norm": 4.017317295074463, "learning_rate": 1.9777824205429604e-05, "loss": 1.9125, "step": 29345 }, { "epoch": 0.38, "grad_norm": 4.372862339019775, "learning_rate": 1.9777802177880718e-05, "loss": 2.1233, "step": 29346 }, { "epoch": 0.38, "grad_norm": 3.0531466007232666, "learning_rate": 1.97777801492522e-05, "loss": 1.3762, "step": 29347 }, { "epoch": 0.38, "grad_norm": 3.804929256439209, "learning_rate": 1.9777758119544048e-05, "loss": 1.7289, "step": 29348 }, { "epoch": 0.38, "grad_norm": 4.414991855621338, "learning_rate": 1.9777736088756263e-05, "loss": 2.1605, "step": 29349 }, { "epoch": 0.38, "grad_norm": 3.1315901279449463, "learning_rate": 1.977771405688885e-05, "loss": 1.4711, "step": 29350 }, { "epoch": 0.38, "grad_norm": 3.337578058242798, "learning_rate": 1.9777692023941813e-05, "loss": 1.8197, "step": 29351 }, { "epoch": 0.38, "grad_norm": 4.340143203735352, "learning_rate": 1.9777669989915155e-05, "loss": 2.3267, "step": 29352 }, { "epoch": 0.38, "grad_norm": 3.2257161140441895, "learning_rate": 1.9777647954808874e-05, "loss": 1.7268, "step": 29353 }, { "epoch": 0.38, "grad_norm": 3.8740181922912598, "learning_rate": 1.9777625918622976e-05, "loss": 2.1568, "step": 29354 }, { "epoch": 0.38, "grad_norm": 3.8229269981384277, "learning_rate": 1.977760388135746e-05, "loss": 1.4719, "step": 29355 }, { "epoch": 0.38, "grad_norm": 3.982349395751953, "learning_rate": 1.9777581843012332e-05, "loss": 2.215, "step": 29356 }, { "epoch": 0.38, "grad_norm": 3.9986767768859863, "learning_rate": 1.9777559803587594e-05, "loss": 2.3882, "step": 29357 }, { "epoch": 0.38, "grad_norm": 4.269733428955078, "learning_rate": 1.9777537763083247e-05, "loss": 2.2664, "step": 29358 }, { "epoch": 0.38, "grad_norm": 3.2723872661590576, "learning_rate": 1.9777515721499293e-05, "loss": 1.6089, "step": 29359 }, { "epoch": 0.38, "grad_norm": 3.9405672550201416, "learning_rate": 1.9777493678835737e-05, "loss": 2.2933, "step": 29360 }, { "epoch": 0.38, "grad_norm": 4.169879913330078, "learning_rate": 1.9777471635092577e-05, "loss": 2.068, "step": 29361 }, { "epoch": 0.38, "grad_norm": 4.29029655456543, "learning_rate": 1.9777449590269823e-05, "loss": 2.2025, "step": 29362 }, { "epoch": 0.38, "grad_norm": 3.7449004650115967, "learning_rate": 1.9777427544367467e-05, "loss": 1.6614, "step": 29363 }, { "epoch": 0.38, "grad_norm": 3.8438801765441895, "learning_rate": 1.977740549738552e-05, "loss": 2.2049, "step": 29364 }, { "epoch": 0.38, "grad_norm": 3.652884006500244, "learning_rate": 1.9777383449323983e-05, "loss": 2.0945, "step": 29365 }, { "epoch": 0.38, "grad_norm": 4.3888959884643555, "learning_rate": 1.9777361400182855e-05, "loss": 2.2143, "step": 29366 }, { "epoch": 0.38, "grad_norm": 3.535541534423828, "learning_rate": 1.9777339349962143e-05, "loss": 1.8259, "step": 29367 }, { "epoch": 0.38, "grad_norm": 3.9423365592956543, "learning_rate": 1.9777317298661843e-05, "loss": 1.9269, "step": 29368 }, { "epoch": 0.38, "grad_norm": 3.8109450340270996, "learning_rate": 1.9777295246281962e-05, "loss": 1.9234, "step": 29369 }, { "epoch": 0.38, "grad_norm": 3.8241772651672363, "learning_rate": 1.9777273192822504e-05, "loss": 1.7812, "step": 29370 }, { "epoch": 0.38, "grad_norm": 3.7661948204040527, "learning_rate": 1.977725113828347e-05, "loss": 2.1326, "step": 29371 }, { "epoch": 0.38, "grad_norm": 4.319313049316406, "learning_rate": 1.9777229082664856e-05, "loss": 2.142, "step": 29372 }, { "epoch": 0.38, "grad_norm": 3.6137373447418213, "learning_rate": 1.9777207025966673e-05, "loss": 1.9413, "step": 29373 }, { "epoch": 0.38, "grad_norm": 3.8246631622314453, "learning_rate": 1.9777184968188922e-05, "loss": 1.9088, "step": 29374 }, { "epoch": 0.38, "grad_norm": 4.341457843780518, "learning_rate": 1.9777162909331604e-05, "loss": 2.4984, "step": 29375 }, { "epoch": 0.38, "grad_norm": 3.613309144973755, "learning_rate": 1.977714084939472e-05, "loss": 1.763, "step": 29376 }, { "epoch": 0.38, "grad_norm": 4.472614288330078, "learning_rate": 1.9777118788378274e-05, "loss": 2.4215, "step": 29377 }, { "epoch": 0.38, "grad_norm": 4.417725086212158, "learning_rate": 1.9777096726282268e-05, "loss": 2.0916, "step": 29378 }, { "epoch": 0.38, "grad_norm": 4.000981330871582, "learning_rate": 1.9777074663106705e-05, "loss": 1.9625, "step": 29379 }, { "epoch": 0.38, "grad_norm": 3.68923282623291, "learning_rate": 1.977705259885159e-05, "loss": 1.5788, "step": 29380 }, { "epoch": 0.38, "grad_norm": 4.050331115722656, "learning_rate": 1.9777030533516918e-05, "loss": 2.1444, "step": 29381 }, { "epoch": 0.38, "grad_norm": 4.266487121582031, "learning_rate": 1.9777008467102698e-05, "loss": 2.2353, "step": 29382 }, { "epoch": 0.38, "grad_norm": 3.9204025268554688, "learning_rate": 1.9776986399608927e-05, "loss": 2.0475, "step": 29383 }, { "epoch": 0.38, "grad_norm": 3.6677629947662354, "learning_rate": 1.9776964331035616e-05, "loss": 1.6386, "step": 29384 }, { "epoch": 0.38, "grad_norm": 3.974551200866699, "learning_rate": 1.977694226138276e-05, "loss": 2.4433, "step": 29385 }, { "epoch": 0.38, "grad_norm": 4.163234710693359, "learning_rate": 1.977692019065036e-05, "loss": 2.635, "step": 29386 }, { "epoch": 0.38, "grad_norm": 4.2813005447387695, "learning_rate": 1.977689811883843e-05, "loss": 2.1256, "step": 29387 }, { "epoch": 0.38, "grad_norm": 3.845487117767334, "learning_rate": 1.9776876045946958e-05, "loss": 1.8681, "step": 29388 }, { "epoch": 0.38, "grad_norm": 4.112082004547119, "learning_rate": 1.9776853971975957e-05, "loss": 2.4277, "step": 29389 }, { "epoch": 0.38, "grad_norm": 3.9074819087982178, "learning_rate": 1.9776831896925422e-05, "loss": 1.8556, "step": 29390 }, { "epoch": 0.38, "grad_norm": 3.7029311656951904, "learning_rate": 1.977680982079536e-05, "loss": 2.048, "step": 29391 }, { "epoch": 0.38, "grad_norm": 3.8228259086608887, "learning_rate": 1.9776787743585773e-05, "loss": 1.8026, "step": 29392 }, { "epoch": 0.38, "grad_norm": 4.093358516693115, "learning_rate": 1.9776765665296666e-05, "loss": 1.9925, "step": 29393 }, { "epoch": 0.38, "grad_norm": 3.5950701236724854, "learning_rate": 1.9776743585928032e-05, "loss": 1.7784, "step": 29394 }, { "epoch": 0.38, "grad_norm": 3.706859588623047, "learning_rate": 1.9776721505479885e-05, "loss": 1.9469, "step": 29395 }, { "epoch": 0.38, "grad_norm": 4.093385696411133, "learning_rate": 1.977669942395222e-05, "loss": 2.2536, "step": 29396 }, { "epoch": 0.38, "grad_norm": 3.576430559158325, "learning_rate": 1.977667734134504e-05, "loss": 2.071, "step": 29397 }, { "epoch": 0.38, "grad_norm": 3.8799469470977783, "learning_rate": 1.977665525765835e-05, "loss": 1.9889, "step": 29398 }, { "epoch": 0.38, "grad_norm": 4.1436896324157715, "learning_rate": 1.9776633172892154e-05, "loss": 2.4298, "step": 29399 }, { "epoch": 0.38, "grad_norm": 4.283401012420654, "learning_rate": 1.977661108704645e-05, "loss": 1.7239, "step": 29400 }, { "epoch": 0.38, "grad_norm": 3.8686420917510986, "learning_rate": 1.977658900012124e-05, "loss": 1.5694, "step": 29401 }, { "epoch": 0.38, "grad_norm": 3.6739110946655273, "learning_rate": 1.977656691211653e-05, "loss": 1.8759, "step": 29402 }, { "epoch": 0.38, "grad_norm": 3.882917642593384, "learning_rate": 1.977654482303232e-05, "loss": 1.9347, "step": 29403 }, { "epoch": 0.38, "grad_norm": 4.075038909912109, "learning_rate": 1.9776522732868616e-05, "loss": 2.3828, "step": 29404 }, { "epoch": 0.38, "grad_norm": 3.9297280311584473, "learning_rate": 1.9776500641625417e-05, "loss": 1.876, "step": 29405 }, { "epoch": 0.38, "grad_norm": 3.6821441650390625, "learning_rate": 1.9776478549302726e-05, "loss": 1.9032, "step": 29406 }, { "epoch": 0.38, "grad_norm": 3.347928285598755, "learning_rate": 1.9776456455900548e-05, "loss": 1.6572, "step": 29407 }, { "epoch": 0.38, "grad_norm": 3.145784378051758, "learning_rate": 1.977643436141888e-05, "loss": 1.4878, "step": 29408 }, { "epoch": 0.38, "grad_norm": 3.431166410446167, "learning_rate": 1.9776412265857732e-05, "loss": 1.6834, "step": 29409 }, { "epoch": 0.38, "grad_norm": 4.0393967628479, "learning_rate": 1.97763901692171e-05, "loss": 1.9262, "step": 29410 }, { "epoch": 0.38, "grad_norm": 3.7268927097320557, "learning_rate": 1.977636807149699e-05, "loss": 2.148, "step": 29411 }, { "epoch": 0.38, "grad_norm": 3.6058337688446045, "learning_rate": 1.9776345972697402e-05, "loss": 1.7406, "step": 29412 }, { "epoch": 0.38, "grad_norm": 3.996289014816284, "learning_rate": 1.9776323872818336e-05, "loss": 1.9268, "step": 29413 }, { "epoch": 0.38, "grad_norm": 4.241666793823242, "learning_rate": 1.97763017718598e-05, "loss": 2.0449, "step": 29414 }, { "epoch": 0.38, "grad_norm": 3.732191562652588, "learning_rate": 1.9776279669821798e-05, "loss": 2.0606, "step": 29415 }, { "epoch": 0.38, "grad_norm": 3.8440449237823486, "learning_rate": 1.977625756670433e-05, "loss": 1.9925, "step": 29416 }, { "epoch": 0.38, "grad_norm": 3.762223958969116, "learning_rate": 1.977623546250739e-05, "loss": 2.0079, "step": 29417 }, { "epoch": 0.38, "grad_norm": 4.497656345367432, "learning_rate": 1.9776213357230993e-05, "loss": 1.8412, "step": 29418 }, { "epoch": 0.38, "grad_norm": 3.8685245513916016, "learning_rate": 1.9776191250875137e-05, "loss": 2.0397, "step": 29419 }, { "epoch": 0.38, "grad_norm": 3.85422945022583, "learning_rate": 1.977616914343982e-05, "loss": 1.8817, "step": 29420 }, { "epoch": 0.38, "grad_norm": 3.676285743713379, "learning_rate": 1.977614703492505e-05, "loss": 1.8384, "step": 29421 }, { "epoch": 0.38, "grad_norm": 3.43624210357666, "learning_rate": 1.9776124925330827e-05, "loss": 1.8372, "step": 29422 }, { "epoch": 0.38, "grad_norm": 3.634049415588379, "learning_rate": 1.9776102814657153e-05, "loss": 1.8045, "step": 29423 }, { "epoch": 0.38, "grad_norm": 3.9249703884124756, "learning_rate": 1.9776080702904034e-05, "loss": 2.2968, "step": 29424 }, { "epoch": 0.38, "grad_norm": 3.420645236968994, "learning_rate": 1.9776058590071466e-05, "loss": 1.965, "step": 29425 }, { "epoch": 0.38, "grad_norm": 4.328873157501221, "learning_rate": 1.9776036476159456e-05, "loss": 2.6997, "step": 29426 }, { "epoch": 0.38, "grad_norm": 3.6586594581604004, "learning_rate": 1.977601436116801e-05, "loss": 1.9452, "step": 29427 }, { "epoch": 0.38, "grad_norm": 4.1442060470581055, "learning_rate": 1.977599224509712e-05, "loss": 2.1509, "step": 29428 }, { "epoch": 0.38, "grad_norm": 3.515488386154175, "learning_rate": 1.97759701279468e-05, "loss": 1.6028, "step": 29429 }, { "epoch": 0.38, "grad_norm": 3.6362152099609375, "learning_rate": 1.9775948009717046e-05, "loss": 1.9705, "step": 29430 }, { "epoch": 0.38, "grad_norm": 3.54557466506958, "learning_rate": 1.977592589040786e-05, "loss": 1.7019, "step": 29431 }, { "epoch": 0.38, "grad_norm": 3.2553908824920654, "learning_rate": 1.9775903770019248e-05, "loss": 1.6074, "step": 29432 }, { "epoch": 0.38, "grad_norm": 4.3130388259887695, "learning_rate": 1.9775881648551206e-05, "loss": 2.0884, "step": 29433 }, { "epoch": 0.38, "grad_norm": 3.5311028957366943, "learning_rate": 1.9775859526003743e-05, "loss": 1.7906, "step": 29434 }, { "epoch": 0.38, "grad_norm": 3.4082658290863037, "learning_rate": 1.977583740237686e-05, "loss": 1.6592, "step": 29435 }, { "epoch": 0.38, "grad_norm": 4.127318859100342, "learning_rate": 1.9775815277670562e-05, "loss": 2.1749, "step": 29436 }, { "epoch": 0.38, "grad_norm": 3.603677272796631, "learning_rate": 1.9775793151884844e-05, "loss": 1.8199, "step": 29437 }, { "epoch": 0.38, "grad_norm": 4.0358147621154785, "learning_rate": 1.9775771025019712e-05, "loss": 2.5959, "step": 29438 }, { "epoch": 0.38, "grad_norm": 3.9209556579589844, "learning_rate": 1.977574889707517e-05, "loss": 2.0715, "step": 29439 }, { "epoch": 0.38, "grad_norm": 4.160996913909912, "learning_rate": 1.9775726768051223e-05, "loss": 3.0263, "step": 29440 }, { "epoch": 0.38, "grad_norm": 4.134115219116211, "learning_rate": 1.9775704637947864e-05, "loss": 2.3085, "step": 29441 }, { "epoch": 0.38, "grad_norm": 3.6356208324432373, "learning_rate": 1.9775682506765106e-05, "loss": 1.9299, "step": 29442 }, { "epoch": 0.38, "grad_norm": 3.394338846206665, "learning_rate": 1.9775660374502944e-05, "loss": 1.4712, "step": 29443 }, { "epoch": 0.38, "grad_norm": 3.959010601043701, "learning_rate": 1.9775638241161384e-05, "loss": 2.0003, "step": 29444 }, { "epoch": 0.38, "grad_norm": 3.6533615589141846, "learning_rate": 1.9775616106740427e-05, "loss": 2.1233, "step": 29445 }, { "epoch": 0.38, "grad_norm": 3.8009798526763916, "learning_rate": 1.9775593971240078e-05, "loss": 2.2332, "step": 29446 }, { "epoch": 0.38, "grad_norm": 3.460352659225464, "learning_rate": 1.9775571834660335e-05, "loss": 1.2489, "step": 29447 }, { "epoch": 0.38, "grad_norm": 3.531937599182129, "learning_rate": 1.9775549697001208e-05, "loss": 1.7845, "step": 29448 }, { "epoch": 0.38, "grad_norm": 3.457326650619507, "learning_rate": 1.977552755826269e-05, "loss": 1.8835, "step": 29449 }, { "epoch": 0.38, "grad_norm": 3.237333297729492, "learning_rate": 1.9775505418444787e-05, "loss": 1.498, "step": 29450 }, { "epoch": 0.38, "grad_norm": 3.3239099979400635, "learning_rate": 1.9775483277547503e-05, "loss": 1.734, "step": 29451 }, { "epoch": 0.38, "grad_norm": 4.026188850402832, "learning_rate": 1.9775461135570843e-05, "loss": 2.2547, "step": 29452 }, { "epoch": 0.38, "grad_norm": 3.609041452407837, "learning_rate": 1.9775438992514804e-05, "loss": 1.76, "step": 29453 }, { "epoch": 0.38, "grad_norm": 3.3618593215942383, "learning_rate": 1.9775416848379392e-05, "loss": 1.4564, "step": 29454 }, { "epoch": 0.38, "grad_norm": 3.7366647720336914, "learning_rate": 1.977539470316461e-05, "loss": 2.0421, "step": 29455 }, { "epoch": 0.38, "grad_norm": 3.426683187484741, "learning_rate": 1.9775372556870455e-05, "loss": 1.5565, "step": 29456 }, { "epoch": 0.38, "grad_norm": 3.901993989944458, "learning_rate": 1.977535040949693e-05, "loss": 2.1839, "step": 29457 }, { "epoch": 0.38, "grad_norm": 4.14872407913208, "learning_rate": 1.9775328261044045e-05, "loss": 2.1565, "step": 29458 }, { "epoch": 0.38, "grad_norm": 4.314866542816162, "learning_rate": 1.97753061115118e-05, "loss": 2.4003, "step": 29459 }, { "epoch": 0.38, "grad_norm": 3.7581565380096436, "learning_rate": 1.977528396090019e-05, "loss": 2.021, "step": 29460 }, { "epoch": 0.38, "grad_norm": 3.636181354522705, "learning_rate": 1.9775261809209227e-05, "loss": 1.9603, "step": 29461 }, { "epoch": 0.38, "grad_norm": 3.566514730453491, "learning_rate": 1.9775239656438906e-05, "loss": 1.9028, "step": 29462 }, { "epoch": 0.38, "grad_norm": 3.75848126411438, "learning_rate": 1.9775217502589235e-05, "loss": 1.9029, "step": 29463 }, { "epoch": 0.38, "grad_norm": 3.8674042224884033, "learning_rate": 1.9775195347660214e-05, "loss": 1.7591, "step": 29464 }, { "epoch": 0.38, "grad_norm": 4.001192092895508, "learning_rate": 1.9775173191651843e-05, "loss": 2.0958, "step": 29465 }, { "epoch": 0.38, "grad_norm": 3.842425584793091, "learning_rate": 1.977515103456413e-05, "loss": 1.7647, "step": 29466 }, { "epoch": 0.38, "grad_norm": 3.5032846927642822, "learning_rate": 1.9775128876397075e-05, "loss": 1.5432, "step": 29467 }, { "epoch": 0.38, "grad_norm": 4.0120625495910645, "learning_rate": 1.977510671715068e-05, "loss": 1.7407, "step": 29468 }, { "epoch": 0.38, "grad_norm": 3.2025146484375, "learning_rate": 1.9775084556824944e-05, "loss": 1.5709, "step": 29469 }, { "epoch": 0.38, "grad_norm": 4.060543060302734, "learning_rate": 1.9775062395419875e-05, "loss": 1.8156, "step": 29470 }, { "epoch": 0.38, "grad_norm": 3.8106272220611572, "learning_rate": 1.9775040232935476e-05, "loss": 2.1552, "step": 29471 }, { "epoch": 0.38, "grad_norm": 3.822023630142212, "learning_rate": 1.977501806937174e-05, "loss": 1.5594, "step": 29472 }, { "epoch": 0.38, "grad_norm": 4.134352684020996, "learning_rate": 1.977499590472868e-05, "loss": 2.0964, "step": 29473 }, { "epoch": 0.38, "grad_norm": 3.245521068572998, "learning_rate": 1.9774973739006294e-05, "loss": 1.5547, "step": 29474 }, { "epoch": 0.38, "grad_norm": 3.8225176334381104, "learning_rate": 1.977495157220459e-05, "loss": 1.9668, "step": 29475 }, { "epoch": 0.38, "grad_norm": 4.241228103637695, "learning_rate": 1.977492940432356e-05, "loss": 2.091, "step": 29476 }, { "epoch": 0.38, "grad_norm": 4.1810383796691895, "learning_rate": 1.977490723536321e-05, "loss": 1.8549, "step": 29477 }, { "epoch": 0.38, "grad_norm": 4.268651008605957, "learning_rate": 1.9774885065323552e-05, "loss": 2.2895, "step": 29478 }, { "epoch": 0.38, "grad_norm": 3.592050313949585, "learning_rate": 1.9774862894204573e-05, "loss": 2.0831, "step": 29479 }, { "epoch": 0.38, "grad_norm": 3.6614835262298584, "learning_rate": 1.977484072200629e-05, "loss": 1.9245, "step": 29480 }, { "epoch": 0.38, "grad_norm": 3.804932117462158, "learning_rate": 1.977481854872869e-05, "loss": 1.7157, "step": 29481 }, { "epoch": 0.38, "grad_norm": 3.8664753437042236, "learning_rate": 1.9774796374371793e-05, "loss": 1.869, "step": 29482 }, { "epoch": 0.38, "grad_norm": 3.998387098312378, "learning_rate": 1.9774774198935586e-05, "loss": 2.5017, "step": 29483 }, { "epoch": 0.38, "grad_norm": 4.501736640930176, "learning_rate": 1.9774752022420082e-05, "loss": 2.0435, "step": 29484 }, { "epoch": 0.38, "grad_norm": 3.8515806198120117, "learning_rate": 1.977472984482528e-05, "loss": 2.0151, "step": 29485 }, { "epoch": 0.38, "grad_norm": 3.735412836074829, "learning_rate": 1.977470766615118e-05, "loss": 2.0626, "step": 29486 }, { "epoch": 0.38, "grad_norm": 3.9221649169921875, "learning_rate": 1.977468548639779e-05, "loss": 1.9604, "step": 29487 }, { "epoch": 0.38, "grad_norm": 3.7838737964630127, "learning_rate": 1.9774663305565104e-05, "loss": 2.0908, "step": 29488 }, { "epoch": 0.38, "grad_norm": 3.55501651763916, "learning_rate": 1.977464112365313e-05, "loss": 1.7789, "step": 29489 }, { "epoch": 0.38, "grad_norm": 3.707381248474121, "learning_rate": 1.9774618940661873e-05, "loss": 2.0348, "step": 29490 }, { "epoch": 0.38, "grad_norm": 3.4986953735351562, "learning_rate": 1.977459675659133e-05, "loss": 1.8667, "step": 29491 }, { "epoch": 0.38, "grad_norm": 3.7583258152008057, "learning_rate": 1.9774574571441505e-05, "loss": 2.0762, "step": 29492 }, { "epoch": 0.38, "grad_norm": 4.25651216506958, "learning_rate": 1.9774552385212404e-05, "loss": 2.3661, "step": 29493 }, { "epoch": 0.38, "grad_norm": 3.666137218475342, "learning_rate": 1.9774530197904024e-05, "loss": 1.9673, "step": 29494 }, { "epoch": 0.38, "grad_norm": 3.3209362030029297, "learning_rate": 1.9774508009516375e-05, "loss": 1.6075, "step": 29495 }, { "epoch": 0.38, "grad_norm": 4.082306385040283, "learning_rate": 1.9774485820049446e-05, "loss": 2.2106, "step": 29496 }, { "epoch": 0.38, "grad_norm": 4.041445255279541, "learning_rate": 1.9774463629503256e-05, "loss": 2.5363, "step": 29497 }, { "epoch": 0.38, "grad_norm": 4.292292594909668, "learning_rate": 1.9774441437877792e-05, "loss": 2.2499, "step": 29498 }, { "epoch": 0.38, "grad_norm": 3.808945894241333, "learning_rate": 1.977441924517307e-05, "loss": 1.9, "step": 29499 }, { "epoch": 0.38, "grad_norm": 3.3955957889556885, "learning_rate": 1.9774397051389086e-05, "loss": 1.7667, "step": 29500 }, { "epoch": 0.38, "grad_norm": 3.7711057662963867, "learning_rate": 1.977437485652584e-05, "loss": 1.9515, "step": 29501 }, { "epoch": 0.38, "grad_norm": 3.675507068634033, "learning_rate": 1.9774352660583337e-05, "loss": 1.9405, "step": 29502 }, { "epoch": 0.38, "grad_norm": 4.08511209487915, "learning_rate": 1.9774330463561583e-05, "loss": 2.1101, "step": 29503 }, { "epoch": 0.38, "grad_norm": 3.790847063064575, "learning_rate": 1.977430826546057e-05, "loss": 1.6599, "step": 29504 }, { "epoch": 0.38, "grad_norm": 3.8255786895751953, "learning_rate": 1.9774286066280314e-05, "loss": 2.2186, "step": 29505 }, { "epoch": 0.38, "grad_norm": 3.7228260040283203, "learning_rate": 1.977426386602081e-05, "loss": 1.9905, "step": 29506 }, { "epoch": 0.38, "grad_norm": 3.57843279838562, "learning_rate": 1.977424166468206e-05, "loss": 1.6398, "step": 29507 }, { "epoch": 0.38, "grad_norm": 3.0543315410614014, "learning_rate": 1.9774219462264067e-05, "loss": 1.4757, "step": 29508 }, { "epoch": 0.38, "grad_norm": 4.520242214202881, "learning_rate": 1.9774197258766837e-05, "loss": 2.6417, "step": 29509 }, { "epoch": 0.38, "grad_norm": 4.344214916229248, "learning_rate": 1.977417505419037e-05, "loss": 2.1953, "step": 29510 }, { "epoch": 0.38, "grad_norm": 3.7963461875915527, "learning_rate": 1.9774152848534665e-05, "loss": 1.8004, "step": 29511 }, { "epoch": 0.38, "grad_norm": 3.842039108276367, "learning_rate": 1.977413064179973e-05, "loss": 1.8477, "step": 29512 }, { "epoch": 0.38, "grad_norm": 3.840043067932129, "learning_rate": 1.9774108433985567e-05, "loss": 1.9938, "step": 29513 }, { "epoch": 0.38, "grad_norm": 4.0660576820373535, "learning_rate": 1.977408622509217e-05, "loss": 2.3835, "step": 29514 }, { "epoch": 0.38, "grad_norm": 4.161553859710693, "learning_rate": 1.9774064015119558e-05, "loss": 2.1283, "step": 29515 }, { "epoch": 0.38, "grad_norm": 3.6371755599975586, "learning_rate": 1.9774041804067716e-05, "loss": 1.9182, "step": 29516 }, { "epoch": 0.38, "grad_norm": 3.446420431137085, "learning_rate": 1.9774019591936655e-05, "loss": 2.0282, "step": 29517 }, { "epoch": 0.38, "grad_norm": 4.025312900543213, "learning_rate": 1.977399737872638e-05, "loss": 2.2948, "step": 29518 }, { "epoch": 0.38, "grad_norm": 5.084052562713623, "learning_rate": 1.9773975164436884e-05, "loss": 2.163, "step": 29519 }, { "epoch": 0.38, "grad_norm": 4.037583827972412, "learning_rate": 1.977395294906818e-05, "loss": 2.3026, "step": 29520 }, { "epoch": 0.38, "grad_norm": 3.6946449279785156, "learning_rate": 1.9773930732620266e-05, "loss": 1.9877, "step": 29521 }, { "epoch": 0.38, "grad_norm": 3.719155788421631, "learning_rate": 1.977390851509314e-05, "loss": 2.0673, "step": 29522 }, { "epoch": 0.38, "grad_norm": 3.617125988006592, "learning_rate": 1.9773886296486813e-05, "loss": 2.0828, "step": 29523 }, { "epoch": 0.38, "grad_norm": 4.029096603393555, "learning_rate": 1.9773864076801282e-05, "loss": 1.721, "step": 29524 }, { "epoch": 0.38, "grad_norm": 3.884127378463745, "learning_rate": 1.9773841856036547e-05, "loss": 2.0845, "step": 29525 }, { "epoch": 0.38, "grad_norm": 3.802495002746582, "learning_rate": 1.9773819634192618e-05, "loss": 1.845, "step": 29526 }, { "epoch": 0.38, "grad_norm": 3.6901605129241943, "learning_rate": 1.9773797411269494e-05, "loss": 1.9713, "step": 29527 }, { "epoch": 0.38, "grad_norm": 3.7562975883483887, "learning_rate": 1.9773775187267176e-05, "loss": 1.9106, "step": 29528 }, { "epoch": 0.38, "grad_norm": 4.0398268699646, "learning_rate": 1.9773752962185666e-05, "loss": 2.0958, "step": 29529 }, { "epoch": 0.38, "grad_norm": 4.072375297546387, "learning_rate": 1.977373073602497e-05, "loss": 2.2064, "step": 29530 }, { "epoch": 0.38, "grad_norm": 4.016073703765869, "learning_rate": 1.9773708508785087e-05, "loss": 2.0501, "step": 29531 }, { "epoch": 0.38, "grad_norm": 4.327970027923584, "learning_rate": 1.9773686280466017e-05, "loss": 2.8014, "step": 29532 }, { "epoch": 0.38, "grad_norm": 3.912828207015991, "learning_rate": 1.977366405106777e-05, "loss": 1.9436, "step": 29533 }, { "epoch": 0.38, "grad_norm": 3.890443801879883, "learning_rate": 1.977364182059035e-05, "loss": 2.3036, "step": 29534 }, { "epoch": 0.38, "grad_norm": 3.8115017414093018, "learning_rate": 1.9773619589033747e-05, "loss": 1.9068, "step": 29535 }, { "epoch": 0.38, "grad_norm": 3.4339277744293213, "learning_rate": 1.977359735639797e-05, "loss": 1.8167, "step": 29536 }, { "epoch": 0.38, "grad_norm": 3.65260910987854, "learning_rate": 1.9773575122683027e-05, "loss": 2.0602, "step": 29537 }, { "epoch": 0.38, "grad_norm": 3.6559107303619385, "learning_rate": 1.9773552887888913e-05, "loss": 1.7584, "step": 29538 }, { "epoch": 0.38, "grad_norm": 3.7969651222229004, "learning_rate": 1.977353065201563e-05, "loss": 1.922, "step": 29539 }, { "epoch": 0.38, "grad_norm": 3.9728150367736816, "learning_rate": 1.9773508415063188e-05, "loss": 2.1334, "step": 29540 }, { "epoch": 0.38, "grad_norm": 4.224746227264404, "learning_rate": 1.977348617703158e-05, "loss": 2.146, "step": 29541 }, { "epoch": 0.38, "grad_norm": 3.3367865085601807, "learning_rate": 1.977346393792082e-05, "loss": 1.6288, "step": 29542 }, { "epoch": 0.38, "grad_norm": 3.763033390045166, "learning_rate": 1.97734416977309e-05, "loss": 2.3742, "step": 29543 }, { "epoch": 0.38, "grad_norm": 3.7397148609161377, "learning_rate": 1.9773419456461825e-05, "loss": 1.7864, "step": 29544 }, { "epoch": 0.38, "grad_norm": 3.6820082664489746, "learning_rate": 1.97733972141136e-05, "loss": 2.1004, "step": 29545 }, { "epoch": 0.38, "grad_norm": 3.7882039546966553, "learning_rate": 1.9773374970686227e-05, "loss": 2.1876, "step": 29546 }, { "epoch": 0.38, "grad_norm": 4.139519691467285, "learning_rate": 1.9773352726179707e-05, "loss": 2.2336, "step": 29547 }, { "epoch": 0.38, "grad_norm": 3.741290807723999, "learning_rate": 1.9773330480594044e-05, "loss": 1.8028, "step": 29548 }, { "epoch": 0.38, "grad_norm": 4.427938938140869, "learning_rate": 1.9773308233929237e-05, "loss": 2.6123, "step": 29549 }, { "epoch": 0.38, "grad_norm": 3.9066808223724365, "learning_rate": 1.9773285986185293e-05, "loss": 1.7835, "step": 29550 }, { "epoch": 0.38, "grad_norm": 3.460697650909424, "learning_rate": 1.9773263737362213e-05, "loss": 1.9038, "step": 29551 }, { "epoch": 0.38, "grad_norm": 3.7080588340759277, "learning_rate": 1.9773241487459995e-05, "loss": 1.7876, "step": 29552 }, { "epoch": 0.38, "grad_norm": 3.9023237228393555, "learning_rate": 1.977321923647865e-05, "loss": 2.3371, "step": 29553 }, { "epoch": 0.38, "grad_norm": 3.8003666400909424, "learning_rate": 1.9773196984418176e-05, "loss": 2.1045, "step": 29554 }, { "epoch": 0.38, "grad_norm": 4.208982944488525, "learning_rate": 1.977317473127857e-05, "loss": 2.4156, "step": 29555 }, { "epoch": 0.38, "grad_norm": 3.975294351577759, "learning_rate": 1.9773152477059843e-05, "loss": 1.8467, "step": 29556 }, { "epoch": 0.38, "grad_norm": 4.271078586578369, "learning_rate": 1.9773130221761995e-05, "loss": 2.263, "step": 29557 }, { "epoch": 0.38, "grad_norm": 3.466857433319092, "learning_rate": 1.9773107965385027e-05, "loss": 1.8991, "step": 29558 }, { "epoch": 0.38, "grad_norm": 4.433435916900635, "learning_rate": 1.9773085707928942e-05, "loss": 1.6108, "step": 29559 }, { "epoch": 0.38, "grad_norm": 3.6366634368896484, "learning_rate": 1.977306344939374e-05, "loss": 2.1596, "step": 29560 }, { "epoch": 0.38, "grad_norm": 3.6437530517578125, "learning_rate": 1.977304118977943e-05, "loss": 2.1803, "step": 29561 }, { "epoch": 0.38, "grad_norm": 4.463578701019287, "learning_rate": 1.977301892908601e-05, "loss": 2.3851, "step": 29562 }, { "epoch": 0.38, "grad_norm": 3.6299967765808105, "learning_rate": 1.977299666731348e-05, "loss": 1.7909, "step": 29563 }, { "epoch": 0.38, "grad_norm": 4.117164611816406, "learning_rate": 1.9772974404461848e-05, "loss": 2.096, "step": 29564 }, { "epoch": 0.38, "grad_norm": 3.270933151245117, "learning_rate": 1.977295214053111e-05, "loss": 1.5288, "step": 29565 }, { "epoch": 0.38, "grad_norm": 4.2346906661987305, "learning_rate": 1.9772929875521275e-05, "loss": 2.1417, "step": 29566 }, { "epoch": 0.38, "grad_norm": 3.7815568447113037, "learning_rate": 1.9772907609432343e-05, "loss": 1.7295, "step": 29567 }, { "epoch": 0.38, "grad_norm": 3.8082432746887207, "learning_rate": 1.9772885342264317e-05, "loss": 2.2367, "step": 29568 }, { "epoch": 0.38, "grad_norm": 4.058021068572998, "learning_rate": 1.9772863074017195e-05, "loss": 1.9938, "step": 29569 }, { "epoch": 0.38, "grad_norm": 3.9250035285949707, "learning_rate": 1.9772840804690987e-05, "loss": 1.6673, "step": 29570 }, { "epoch": 0.38, "grad_norm": 3.5776422023773193, "learning_rate": 1.977281853428569e-05, "loss": 1.7645, "step": 29571 }, { "epoch": 0.38, "grad_norm": 3.820608615875244, "learning_rate": 1.977279626280131e-05, "loss": 1.8727, "step": 29572 }, { "epoch": 0.38, "grad_norm": 3.15327787399292, "learning_rate": 1.9772773990237847e-05, "loss": 1.5736, "step": 29573 }, { "epoch": 0.38, "grad_norm": 3.4629530906677246, "learning_rate": 1.9772751716595302e-05, "loss": 1.7979, "step": 29574 }, { "epoch": 0.38, "grad_norm": 4.408496856689453, "learning_rate": 1.977272944187368e-05, "loss": 1.8438, "step": 29575 }, { "epoch": 0.38, "grad_norm": 4.7000908851623535, "learning_rate": 1.9772707166072983e-05, "loss": 2.3668, "step": 29576 }, { "epoch": 0.38, "grad_norm": 4.341405391693115, "learning_rate": 1.9772684889193213e-05, "loss": 2.2663, "step": 29577 }, { "epoch": 0.38, "grad_norm": 4.2639617919921875, "learning_rate": 1.9772662611234374e-05, "loss": 2.0281, "step": 29578 }, { "epoch": 0.38, "grad_norm": 3.130230665206909, "learning_rate": 1.9772640332196466e-05, "loss": 1.6207, "step": 29579 }, { "epoch": 0.38, "grad_norm": 4.45882511138916, "learning_rate": 1.9772618052079492e-05, "loss": 2.5839, "step": 29580 }, { "epoch": 0.38, "grad_norm": 4.037469387054443, "learning_rate": 1.977259577088346e-05, "loss": 2.2933, "step": 29581 }, { "epoch": 0.38, "grad_norm": 4.15317964553833, "learning_rate": 1.977257348860836e-05, "loss": 1.9114, "step": 29582 }, { "epoch": 0.38, "grad_norm": 3.8076794147491455, "learning_rate": 1.977255120525421e-05, "loss": 2.0868, "step": 29583 }, { "epoch": 0.38, "grad_norm": 4.713222026824951, "learning_rate": 1.9772528920821e-05, "loss": 2.3719, "step": 29584 }, { "epoch": 0.38, "grad_norm": 4.0134382247924805, "learning_rate": 1.9772506635308738e-05, "loss": 2.2182, "step": 29585 }, { "epoch": 0.38, "grad_norm": 4.505755424499512, "learning_rate": 1.9772484348717424e-05, "loss": 2.4577, "step": 29586 }, { "epoch": 0.38, "grad_norm": 3.342818260192871, "learning_rate": 1.977246206104706e-05, "loss": 1.5658, "step": 29587 }, { "epoch": 0.38, "grad_norm": 4.126662254333496, "learning_rate": 1.9772439772297655e-05, "loss": 2.1402, "step": 29588 }, { "epoch": 0.38, "grad_norm": 4.371057033538818, "learning_rate": 1.9772417482469205e-05, "loss": 2.1529, "step": 29589 }, { "epoch": 0.38, "grad_norm": 4.03356409072876, "learning_rate": 1.9772395191561716e-05, "loss": 1.8229, "step": 29590 }, { "epoch": 0.38, "grad_norm": 3.5925052165985107, "learning_rate": 1.9772372899575188e-05, "loss": 1.7719, "step": 29591 }, { "epoch": 0.38, "grad_norm": 4.060407638549805, "learning_rate": 1.977235060650962e-05, "loss": 2.3127, "step": 29592 }, { "epoch": 0.38, "grad_norm": 4.010520935058594, "learning_rate": 1.9772328312365026e-05, "loss": 2.1229, "step": 29593 }, { "epoch": 0.38, "grad_norm": 3.848440408706665, "learning_rate": 1.9772306017141396e-05, "loss": 1.9083, "step": 29594 }, { "epoch": 0.38, "grad_norm": 3.762238025665283, "learning_rate": 1.9772283720838737e-05, "loss": 1.6484, "step": 29595 }, { "epoch": 0.38, "grad_norm": 3.5473854541778564, "learning_rate": 1.9772261423457056e-05, "loss": 1.8621, "step": 29596 }, { "epoch": 0.38, "grad_norm": 4.289809703826904, "learning_rate": 1.977223912499635e-05, "loss": 2.2271, "step": 29597 }, { "epoch": 0.38, "grad_norm": 3.571631908416748, "learning_rate": 1.9772216825456623e-05, "loss": 1.7118, "step": 29598 }, { "epoch": 0.38, "grad_norm": 4.429197311401367, "learning_rate": 1.9772194524837878e-05, "loss": 2.5844, "step": 29599 }, { "epoch": 0.38, "grad_norm": 3.9168829917907715, "learning_rate": 1.9772172223140117e-05, "loss": 2.3532, "step": 29600 }, { "epoch": 0.38, "grad_norm": 3.664924383163452, "learning_rate": 1.977214992036334e-05, "loss": 1.8843, "step": 29601 }, { "epoch": 0.38, "grad_norm": 3.887906551361084, "learning_rate": 1.9772127616507554e-05, "loss": 1.9038, "step": 29602 }, { "epoch": 0.38, "grad_norm": 3.8947291374206543, "learning_rate": 1.977210531157276e-05, "loss": 1.7699, "step": 29603 }, { "epoch": 0.38, "grad_norm": 3.650014638900757, "learning_rate": 1.977208300555896e-05, "loss": 2.2507, "step": 29604 }, { "epoch": 0.38, "grad_norm": 4.01423454284668, "learning_rate": 1.977206069846615e-05, "loss": 1.9245, "step": 29605 }, { "epoch": 0.38, "grad_norm": 4.398544788360596, "learning_rate": 1.9772038390294344e-05, "loss": 2.4647, "step": 29606 }, { "epoch": 0.38, "grad_norm": 4.1661882400512695, "learning_rate": 1.977201608104354e-05, "loss": 2.044, "step": 29607 }, { "epoch": 0.38, "grad_norm": 4.166681289672852, "learning_rate": 1.977199377071374e-05, "loss": 1.8634, "step": 29608 }, { "epoch": 0.38, "grad_norm": 4.187302112579346, "learning_rate": 1.9771971459304944e-05, "loss": 2.2429, "step": 29609 }, { "epoch": 0.38, "grad_norm": 4.009012699127197, "learning_rate": 1.9771949146817156e-05, "loss": 2.0296, "step": 29610 }, { "epoch": 0.38, "grad_norm": 3.9162330627441406, "learning_rate": 1.977192683325038e-05, "loss": 2.0925, "step": 29611 }, { "epoch": 0.38, "grad_norm": 3.943873405456543, "learning_rate": 1.977190451860462e-05, "loss": 2.0144, "step": 29612 }, { "epoch": 0.38, "grad_norm": 4.0177106857299805, "learning_rate": 1.977188220287987e-05, "loss": 2.1886, "step": 29613 }, { "epoch": 0.38, "grad_norm": 3.8631820678710938, "learning_rate": 1.9771859886076142e-05, "loss": 2.2552, "step": 29614 }, { "epoch": 0.38, "grad_norm": 3.4113354682922363, "learning_rate": 1.9771837568193435e-05, "loss": 1.6145, "step": 29615 }, { "epoch": 0.38, "grad_norm": 3.9303789138793945, "learning_rate": 1.977181524923175e-05, "loss": 2.4062, "step": 29616 }, { "epoch": 0.38, "grad_norm": 3.371567487716675, "learning_rate": 1.9771792929191095e-05, "loss": 1.6549, "step": 29617 }, { "epoch": 0.38, "grad_norm": 3.4587361812591553, "learning_rate": 1.9771770608071462e-05, "loss": 1.6326, "step": 29618 }, { "epoch": 0.38, "grad_norm": 3.5349385738372803, "learning_rate": 1.9771748285872865e-05, "loss": 1.4869, "step": 29619 }, { "epoch": 0.38, "grad_norm": 3.4245386123657227, "learning_rate": 1.97717259625953e-05, "loss": 1.8704, "step": 29620 }, { "epoch": 0.38, "grad_norm": 3.7226107120513916, "learning_rate": 1.977170363823877e-05, "loss": 1.8208, "step": 29621 }, { "epoch": 0.38, "grad_norm": 3.373792886734009, "learning_rate": 1.9771681312803278e-05, "loss": 1.8191, "step": 29622 }, { "epoch": 0.38, "grad_norm": 3.844536066055298, "learning_rate": 1.9771658986288827e-05, "loss": 1.8538, "step": 29623 }, { "epoch": 0.38, "grad_norm": 3.8383328914642334, "learning_rate": 1.977163665869542e-05, "loss": 2.0779, "step": 29624 }, { "epoch": 0.38, "grad_norm": 4.051039695739746, "learning_rate": 1.9771614330023054e-05, "loss": 2.1617, "step": 29625 }, { "epoch": 0.38, "grad_norm": 3.9565842151641846, "learning_rate": 1.977159200027174e-05, "loss": 2.1469, "step": 29626 }, { "epoch": 0.38, "grad_norm": 3.9557816982269287, "learning_rate": 1.9771569669441476e-05, "loss": 1.9054, "step": 29627 }, { "epoch": 0.38, "grad_norm": 3.301717758178711, "learning_rate": 1.9771547337532266e-05, "loss": 1.648, "step": 29628 }, { "epoch": 0.38, "grad_norm": 4.708829879760742, "learning_rate": 1.977152500454411e-05, "loss": 2.4963, "step": 29629 }, { "epoch": 0.38, "grad_norm": 4.3142876625061035, "learning_rate": 1.977150267047701e-05, "loss": 2.3065, "step": 29630 }, { "epoch": 0.38, "grad_norm": 3.6881251335144043, "learning_rate": 1.9771480335330973e-05, "loss": 2.0087, "step": 29631 }, { "epoch": 0.38, "grad_norm": 3.3488821983337402, "learning_rate": 1.9771457999105998e-05, "loss": 1.9572, "step": 29632 }, { "epoch": 0.38, "grad_norm": 3.5561656951904297, "learning_rate": 1.977143566180209e-05, "loss": 2.0592, "step": 29633 }, { "epoch": 0.38, "grad_norm": 4.110960006713867, "learning_rate": 1.9771413323419247e-05, "loss": 2.3593, "step": 29634 }, { "epoch": 0.38, "grad_norm": 3.3758537769317627, "learning_rate": 1.9771390983957474e-05, "loss": 1.7846, "step": 29635 }, { "epoch": 0.38, "grad_norm": 3.487478494644165, "learning_rate": 1.9771368643416775e-05, "loss": 1.7263, "step": 29636 }, { "epoch": 0.38, "grad_norm": 3.844106674194336, "learning_rate": 1.9771346301797152e-05, "loss": 2.0864, "step": 29637 }, { "epoch": 0.38, "grad_norm": 3.510145425796509, "learning_rate": 1.9771323959098606e-05, "loss": 1.9338, "step": 29638 }, { "epoch": 0.38, "grad_norm": 3.67798113822937, "learning_rate": 1.977130161532114e-05, "loss": 2.1769, "step": 29639 }, { "epoch": 0.38, "grad_norm": 4.16354513168335, "learning_rate": 1.9771279270464753e-05, "loss": 1.9466, "step": 29640 }, { "epoch": 0.38, "grad_norm": 4.160655498504639, "learning_rate": 1.9771256924529453e-05, "loss": 2.5272, "step": 29641 }, { "epoch": 0.38, "grad_norm": 3.889296770095825, "learning_rate": 1.9771234577515243e-05, "loss": 2.0337, "step": 29642 }, { "epoch": 0.38, "grad_norm": 3.4569122791290283, "learning_rate": 1.9771212229422124e-05, "loss": 1.4749, "step": 29643 }, { "epoch": 0.38, "grad_norm": 3.293006181716919, "learning_rate": 1.9771189880250094e-05, "loss": 1.4306, "step": 29644 }, { "epoch": 0.38, "grad_norm": 3.949145793914795, "learning_rate": 1.9771167529999162e-05, "loss": 2.0473, "step": 29645 }, { "epoch": 0.38, "grad_norm": 3.549755334854126, "learning_rate": 1.9771145178669323e-05, "loss": 1.7701, "step": 29646 }, { "epoch": 0.38, "grad_norm": 3.880913019180298, "learning_rate": 1.9771122826260587e-05, "loss": 1.9204, "step": 29647 }, { "epoch": 0.38, "grad_norm": 3.738342046737671, "learning_rate": 1.9771100472772952e-05, "loss": 1.9355, "step": 29648 }, { "epoch": 0.38, "grad_norm": 3.299241542816162, "learning_rate": 1.977107811820642e-05, "loss": 1.9076, "step": 29649 }, { "epoch": 0.38, "grad_norm": 3.9695851802825928, "learning_rate": 1.9771055762561e-05, "loss": 2.2282, "step": 29650 }, { "epoch": 0.38, "grad_norm": 3.9452595710754395, "learning_rate": 1.9771033405836685e-05, "loss": 2.0112, "step": 29651 }, { "epoch": 0.38, "grad_norm": 3.645744800567627, "learning_rate": 1.9771011048033484e-05, "loss": 2.0398, "step": 29652 }, { "epoch": 0.38, "grad_norm": 3.7152631282806396, "learning_rate": 1.9770988689151398e-05, "loss": 1.9363, "step": 29653 }, { "epoch": 0.38, "grad_norm": 3.9601309299468994, "learning_rate": 1.977096632919043e-05, "loss": 1.6839, "step": 29654 }, { "epoch": 0.38, "grad_norm": 4.442783832550049, "learning_rate": 1.9770943968150576e-05, "loss": 2.2747, "step": 29655 }, { "epoch": 0.38, "grad_norm": 3.5089168548583984, "learning_rate": 1.977092160603185e-05, "loss": 2.2882, "step": 29656 }, { "epoch": 0.38, "grad_norm": 4.339521408081055, "learning_rate": 1.9770899242834247e-05, "loss": 2.0504, "step": 29657 }, { "epoch": 0.38, "grad_norm": 3.6623036861419678, "learning_rate": 1.977087687855777e-05, "loss": 2.002, "step": 29658 }, { "epoch": 0.38, "grad_norm": 3.07285213470459, "learning_rate": 1.9770854513202424e-05, "loss": 1.3212, "step": 29659 }, { "epoch": 0.38, "grad_norm": 3.81884503364563, "learning_rate": 1.977083214676821e-05, "loss": 2.0484, "step": 29660 }, { "epoch": 0.38, "grad_norm": 4.177774429321289, "learning_rate": 1.9770809779255128e-05, "loss": 2.0373, "step": 29661 }, { "epoch": 0.38, "grad_norm": 4.339084625244141, "learning_rate": 1.9770787410663184e-05, "loss": 2.079, "step": 29662 }, { "epoch": 0.38, "grad_norm": 3.7368388175964355, "learning_rate": 1.9770765040992378e-05, "loss": 1.9982, "step": 29663 }, { "epoch": 0.38, "grad_norm": 3.935424327850342, "learning_rate": 1.9770742670242716e-05, "loss": 2.1406, "step": 29664 }, { "epoch": 0.38, "grad_norm": 4.7311601638793945, "learning_rate": 1.9770720298414198e-05, "loss": 2.5007, "step": 29665 }, { "epoch": 0.38, "grad_norm": 3.8854258060455322, "learning_rate": 1.977069792550683e-05, "loss": 2.3196, "step": 29666 }, { "epoch": 0.39, "grad_norm": 3.699592351913452, "learning_rate": 1.9770675551520603e-05, "loss": 1.9857, "step": 29667 }, { "epoch": 0.39, "grad_norm": 3.7199230194091797, "learning_rate": 1.9770653176455535e-05, "loss": 1.5211, "step": 29668 }, { "epoch": 0.39, "grad_norm": 3.477847099304199, "learning_rate": 1.9770630800311615e-05, "loss": 1.8619, "step": 29669 }, { "epoch": 0.39, "grad_norm": 4.228627681732178, "learning_rate": 1.9770608423088856e-05, "loss": 2.1234, "step": 29670 }, { "epoch": 0.39, "grad_norm": 4.241711139678955, "learning_rate": 1.9770586044787255e-05, "loss": 2.1934, "step": 29671 }, { "epoch": 0.39, "grad_norm": 3.8193280696868896, "learning_rate": 1.9770563665406815e-05, "loss": 2.084, "step": 29672 }, { "epoch": 0.39, "grad_norm": 4.1946516036987305, "learning_rate": 1.977054128494754e-05, "loss": 2.1504, "step": 29673 }, { "epoch": 0.39, "grad_norm": 3.7785518169403076, "learning_rate": 1.9770518903409433e-05, "loss": 2.3392, "step": 29674 }, { "epoch": 0.39, "grad_norm": 3.857081413269043, "learning_rate": 1.977049652079249e-05, "loss": 2.2793, "step": 29675 }, { "epoch": 0.39, "grad_norm": 3.428379774093628, "learning_rate": 1.9770474137096723e-05, "loss": 1.7983, "step": 29676 }, { "epoch": 0.39, "grad_norm": 3.945638656616211, "learning_rate": 1.9770451752322127e-05, "loss": 2.0428, "step": 29677 }, { "epoch": 0.39, "grad_norm": 4.17795991897583, "learning_rate": 1.977042936646871e-05, "loss": 2.0802, "step": 29678 }, { "epoch": 0.39, "grad_norm": 3.493403434753418, "learning_rate": 1.9770406979536472e-05, "loss": 1.9588, "step": 29679 }, { "epoch": 0.39, "grad_norm": 3.9188084602355957, "learning_rate": 1.9770384591525414e-05, "loss": 2.2179, "step": 29680 }, { "epoch": 0.39, "grad_norm": 3.7905869483947754, "learning_rate": 1.9770362202435537e-05, "loss": 2.2545, "step": 29681 }, { "epoch": 0.39, "grad_norm": 3.7399606704711914, "learning_rate": 1.977033981226685e-05, "loss": 1.7833, "step": 29682 }, { "epoch": 0.39, "grad_norm": 3.9512410163879395, "learning_rate": 1.977031742101935e-05, "loss": 2.1064, "step": 29683 }, { "epoch": 0.39, "grad_norm": 4.339443683624268, "learning_rate": 1.977029502869304e-05, "loss": 2.4502, "step": 29684 }, { "epoch": 0.39, "grad_norm": 4.139860153198242, "learning_rate": 1.9770272635287927e-05, "loss": 2.2585, "step": 29685 }, { "epoch": 0.39, "grad_norm": 3.983125686645508, "learning_rate": 1.977025024080401e-05, "loss": 1.6795, "step": 29686 }, { "epoch": 0.39, "grad_norm": 3.7123537063598633, "learning_rate": 1.977022784524129e-05, "loss": 2.0066, "step": 29687 }, { "epoch": 0.39, "grad_norm": 3.945277452468872, "learning_rate": 1.9770205448599768e-05, "loss": 1.842, "step": 29688 }, { "epoch": 0.39, "grad_norm": 3.4264214038848877, "learning_rate": 1.9770183050879456e-05, "loss": 1.958, "step": 29689 }, { "epoch": 0.39, "grad_norm": 3.8588900566101074, "learning_rate": 1.9770160652080345e-05, "loss": 2.046, "step": 29690 }, { "epoch": 0.39, "grad_norm": 4.2114033699035645, "learning_rate": 1.9770138252202444e-05, "loss": 2.2016, "step": 29691 }, { "epoch": 0.39, "grad_norm": 4.158600330352783, "learning_rate": 1.9770115851245754e-05, "loss": 1.6116, "step": 29692 }, { "epoch": 0.39, "grad_norm": 3.577847957611084, "learning_rate": 1.9770093449210276e-05, "loss": 1.8468, "step": 29693 }, { "epoch": 0.39, "grad_norm": 4.60432243347168, "learning_rate": 1.977007104609602e-05, "loss": 1.9514, "step": 29694 }, { "epoch": 0.39, "grad_norm": 3.7296383380889893, "learning_rate": 1.9770048641902977e-05, "loss": 1.6645, "step": 29695 }, { "epoch": 0.39, "grad_norm": 4.19830322265625, "learning_rate": 1.9770026236631156e-05, "loss": 2.2506, "step": 29696 }, { "epoch": 0.39, "grad_norm": 3.697760820388794, "learning_rate": 1.9770003830280557e-05, "loss": 1.9616, "step": 29697 }, { "epoch": 0.39, "grad_norm": 4.31135368347168, "learning_rate": 1.9769981422851187e-05, "loss": 1.9274, "step": 29698 }, { "epoch": 0.39, "grad_norm": 3.9086852073669434, "learning_rate": 1.9769959014343043e-05, "loss": 1.8538, "step": 29699 }, { "epoch": 0.39, "grad_norm": 3.7696802616119385, "learning_rate": 1.9769936604756127e-05, "loss": 1.7726, "step": 29700 }, { "epoch": 0.39, "grad_norm": 3.6285274028778076, "learning_rate": 1.9769914194090448e-05, "loss": 1.8199, "step": 29701 }, { "epoch": 0.39, "grad_norm": 3.729344367980957, "learning_rate": 1.9769891782346007e-05, "loss": 2.0331, "step": 29702 }, { "epoch": 0.39, "grad_norm": 3.8764381408691406, "learning_rate": 1.97698693695228e-05, "loss": 2.5398, "step": 29703 }, { "epoch": 0.39, "grad_norm": 3.618036985397339, "learning_rate": 1.9769846955620834e-05, "loss": 1.4475, "step": 29704 }, { "epoch": 0.39, "grad_norm": 3.984720230102539, "learning_rate": 1.9769824540640114e-05, "loss": 1.9005, "step": 29705 }, { "epoch": 0.39, "grad_norm": 4.034154415130615, "learning_rate": 1.976980212458064e-05, "loss": 1.92, "step": 29706 }, { "epoch": 0.39, "grad_norm": 3.608421564102173, "learning_rate": 1.976977970744241e-05, "loss": 1.8355, "step": 29707 }, { "epoch": 0.39, "grad_norm": 3.5786681175231934, "learning_rate": 1.976975728922543e-05, "loss": 1.5411, "step": 29708 }, { "epoch": 0.39, "grad_norm": 3.770153284072876, "learning_rate": 1.9769734869929706e-05, "loss": 2.1666, "step": 29709 }, { "epoch": 0.39, "grad_norm": 3.440821409225464, "learning_rate": 1.976971244955524e-05, "loss": 1.8679, "step": 29710 }, { "epoch": 0.39, "grad_norm": 3.5956835746765137, "learning_rate": 1.976969002810203e-05, "loss": 1.7778, "step": 29711 }, { "epoch": 0.39, "grad_norm": 3.4810376167297363, "learning_rate": 1.9769667605570076e-05, "loss": 1.7302, "step": 29712 }, { "epoch": 0.39, "grad_norm": 4.221557140350342, "learning_rate": 1.976964518195939e-05, "loss": 2.3194, "step": 29713 }, { "epoch": 0.39, "grad_norm": 3.9831366539001465, "learning_rate": 1.9769622757269967e-05, "loss": 1.8974, "step": 29714 }, { "epoch": 0.39, "grad_norm": 4.284501552581787, "learning_rate": 1.9769600331501815e-05, "loss": 2.2757, "step": 29715 }, { "epoch": 0.39, "grad_norm": 4.473092555999756, "learning_rate": 1.976957790465493e-05, "loss": 2.4085, "step": 29716 }, { "epoch": 0.39, "grad_norm": 3.263939380645752, "learning_rate": 1.9769555476729318e-05, "loss": 1.5969, "step": 29717 }, { "epoch": 0.39, "grad_norm": 3.939450979232788, "learning_rate": 1.9769533047724985e-05, "loss": 2.2072, "step": 29718 }, { "epoch": 0.39, "grad_norm": 3.325866460800171, "learning_rate": 1.9769510617641926e-05, "loss": 1.7349, "step": 29719 }, { "epoch": 0.39, "grad_norm": 3.8182592391967773, "learning_rate": 1.976948818648015e-05, "loss": 1.9453, "step": 29720 }, { "epoch": 0.39, "grad_norm": 3.5040767192840576, "learning_rate": 1.9769465754239653e-05, "loss": 1.6911, "step": 29721 }, { "epoch": 0.39, "grad_norm": 4.111819267272949, "learning_rate": 1.9769443320920447e-05, "loss": 2.6843, "step": 29722 }, { "epoch": 0.39, "grad_norm": 3.856292247772217, "learning_rate": 1.976942088652252e-05, "loss": 2.1237, "step": 29723 }, { "epoch": 0.39, "grad_norm": 3.435107469558716, "learning_rate": 1.976939845104589e-05, "loss": 1.7599, "step": 29724 }, { "epoch": 0.39, "grad_norm": 4.041045665740967, "learning_rate": 1.976937601449055e-05, "loss": 1.9081, "step": 29725 }, { "epoch": 0.39, "grad_norm": 4.405488014221191, "learning_rate": 1.9769353576856507e-05, "loss": 2.3413, "step": 29726 }, { "epoch": 0.39, "grad_norm": 3.694593667984009, "learning_rate": 1.976933113814376e-05, "loss": 1.8577, "step": 29727 }, { "epoch": 0.39, "grad_norm": 4.851388931274414, "learning_rate": 1.9769308698352318e-05, "loss": 2.4001, "step": 29728 }, { "epoch": 0.39, "grad_norm": 3.874955892562866, "learning_rate": 1.9769286257482175e-05, "loss": 2.0627, "step": 29729 }, { "epoch": 0.39, "grad_norm": 3.316208839416504, "learning_rate": 1.9769263815533336e-05, "loss": 1.7941, "step": 29730 }, { "epoch": 0.39, "grad_norm": 3.392238140106201, "learning_rate": 1.9769241372505804e-05, "loss": 1.9212, "step": 29731 }, { "epoch": 0.39, "grad_norm": 4.285427093505859, "learning_rate": 1.9769218928399582e-05, "loss": 2.1418, "step": 29732 }, { "epoch": 0.39, "grad_norm": 3.6324033737182617, "learning_rate": 1.9769196483214674e-05, "loss": 1.7045, "step": 29733 }, { "epoch": 0.39, "grad_norm": 3.708669900894165, "learning_rate": 1.976917403695108e-05, "loss": 1.8562, "step": 29734 }, { "epoch": 0.39, "grad_norm": 4.270244121551514, "learning_rate": 1.9769151589608806e-05, "loss": 2.361, "step": 29735 }, { "epoch": 0.39, "grad_norm": 3.9388267993927, "learning_rate": 1.976912914118785e-05, "loss": 2.2405, "step": 29736 }, { "epoch": 0.39, "grad_norm": 4.000275611877441, "learning_rate": 1.9769106691688216e-05, "loss": 2.2014, "step": 29737 }, { "epoch": 0.39, "grad_norm": 3.3706047534942627, "learning_rate": 1.9769084241109907e-05, "loss": 1.7688, "step": 29738 }, { "epoch": 0.39, "grad_norm": 4.737175941467285, "learning_rate": 1.9769061789452925e-05, "loss": 2.6357, "step": 29739 }, { "epoch": 0.39, "grad_norm": 3.391603946685791, "learning_rate": 1.9769039336717274e-05, "loss": 1.8381, "step": 29740 }, { "epoch": 0.39, "grad_norm": 4.361406326293945, "learning_rate": 1.9769016882902953e-05, "loss": 1.8907, "step": 29741 }, { "epoch": 0.39, "grad_norm": 3.9402432441711426, "learning_rate": 1.976899442800997e-05, "loss": 2.1834, "step": 29742 }, { "epoch": 0.39, "grad_norm": 4.066978454589844, "learning_rate": 1.976897197203832e-05, "loss": 1.8484, "step": 29743 }, { "epoch": 0.39, "grad_norm": 3.6104702949523926, "learning_rate": 1.9768949514988016e-05, "loss": 1.6399, "step": 29744 }, { "epoch": 0.39, "grad_norm": 4.43980073928833, "learning_rate": 1.9768927056859052e-05, "loss": 1.9881, "step": 29745 }, { "epoch": 0.39, "grad_norm": 3.8552727699279785, "learning_rate": 1.9768904597651432e-05, "loss": 1.5677, "step": 29746 }, { "epoch": 0.39, "grad_norm": 3.4520184993743896, "learning_rate": 1.9768882137365156e-05, "loss": 1.8137, "step": 29747 }, { "epoch": 0.39, "grad_norm": 4.311336517333984, "learning_rate": 1.9768859676000235e-05, "loss": 2.0846, "step": 29748 }, { "epoch": 0.39, "grad_norm": 3.974187135696411, "learning_rate": 1.9768837213556665e-05, "loss": 1.8514, "step": 29749 }, { "epoch": 0.39, "grad_norm": 4.136630535125732, "learning_rate": 1.9768814750034445e-05, "loss": 1.9407, "step": 29750 }, { "epoch": 0.39, "grad_norm": 3.6895132064819336, "learning_rate": 1.9768792285433587e-05, "loss": 1.772, "step": 29751 }, { "epoch": 0.39, "grad_norm": 3.777949810028076, "learning_rate": 1.9768769819754087e-05, "loss": 1.8596, "step": 29752 }, { "epoch": 0.39, "grad_norm": 3.898637056350708, "learning_rate": 1.976874735299595e-05, "loss": 2.233, "step": 29753 }, { "epoch": 0.39, "grad_norm": 3.5471911430358887, "learning_rate": 1.9768724885159172e-05, "loss": 1.901, "step": 29754 }, { "epoch": 0.39, "grad_norm": 4.459401607513428, "learning_rate": 1.976870241624377e-05, "loss": 2.2597, "step": 29755 }, { "epoch": 0.39, "grad_norm": 4.14308500289917, "learning_rate": 1.976867994624973e-05, "loss": 2.176, "step": 29756 }, { "epoch": 0.39, "grad_norm": 3.784571647644043, "learning_rate": 1.9768657475177066e-05, "loss": 2.0239, "step": 29757 }, { "epoch": 0.39, "grad_norm": 3.539928913116455, "learning_rate": 1.976863500302578e-05, "loss": 1.6558, "step": 29758 }, { "epoch": 0.39, "grad_norm": 3.951098680496216, "learning_rate": 1.9768612529795864e-05, "loss": 2.2794, "step": 29759 }, { "epoch": 0.39, "grad_norm": 5.362786769866943, "learning_rate": 1.9768590055487328e-05, "loss": 2.3738, "step": 29760 }, { "epoch": 0.39, "grad_norm": 3.357649326324463, "learning_rate": 1.976856758010018e-05, "loss": 1.8369, "step": 29761 }, { "epoch": 0.39, "grad_norm": 3.8186681270599365, "learning_rate": 1.9768545103634412e-05, "loss": 2.2687, "step": 29762 }, { "epoch": 0.39, "grad_norm": 4.0156683921813965, "learning_rate": 1.9768522626090033e-05, "loss": 2.0689, "step": 29763 }, { "epoch": 0.39, "grad_norm": 3.420051336288452, "learning_rate": 1.976850014746704e-05, "loss": 1.5445, "step": 29764 }, { "epoch": 0.39, "grad_norm": 3.943439483642578, "learning_rate": 1.976847766776544e-05, "loss": 2.104, "step": 29765 }, { "epoch": 0.39, "grad_norm": 3.7661004066467285, "learning_rate": 1.9768455186985233e-05, "loss": 1.789, "step": 29766 }, { "epoch": 0.39, "grad_norm": 3.9353766441345215, "learning_rate": 1.9768432705126427e-05, "loss": 1.9562, "step": 29767 }, { "epoch": 0.39, "grad_norm": 3.6787869930267334, "learning_rate": 1.976841022218902e-05, "loss": 2.1684, "step": 29768 }, { "epoch": 0.39, "grad_norm": 3.9217100143432617, "learning_rate": 1.9768387738173008e-05, "loss": 2.1665, "step": 29769 }, { "epoch": 0.39, "grad_norm": 3.623102903366089, "learning_rate": 1.9768365253078408e-05, "loss": 1.644, "step": 29770 }, { "epoch": 0.39, "grad_norm": 3.9696602821350098, "learning_rate": 1.976834276690521e-05, "loss": 1.8213, "step": 29771 }, { "epoch": 0.39, "grad_norm": 3.9297642707824707, "learning_rate": 1.9768320279653426e-05, "loss": 1.8842, "step": 29772 }, { "epoch": 0.39, "grad_norm": 3.778806209564209, "learning_rate": 1.976829779132305e-05, "loss": 2.0737, "step": 29773 }, { "epoch": 0.39, "grad_norm": 3.533151865005493, "learning_rate": 1.976827530191409e-05, "loss": 1.715, "step": 29774 }, { "epoch": 0.39, "grad_norm": 4.028611183166504, "learning_rate": 1.9768252811426546e-05, "loss": 2.1326, "step": 29775 }, { "epoch": 0.39, "grad_norm": 3.7793614864349365, "learning_rate": 1.976823031986042e-05, "loss": 1.8903, "step": 29776 }, { "epoch": 0.39, "grad_norm": 3.5334925651550293, "learning_rate": 1.9768207827215718e-05, "loss": 2.082, "step": 29777 }, { "epoch": 0.39, "grad_norm": 3.8332526683807373, "learning_rate": 1.9768185333492437e-05, "loss": 2.1409, "step": 29778 }, { "epoch": 0.39, "grad_norm": 4.265674591064453, "learning_rate": 1.9768162838690585e-05, "loss": 2.2718, "step": 29779 }, { "epoch": 0.39, "grad_norm": 3.851301670074463, "learning_rate": 1.976814034281016e-05, "loss": 1.974, "step": 29780 }, { "epoch": 0.39, "grad_norm": 3.7970786094665527, "learning_rate": 1.9768117845851168e-05, "loss": 1.6808, "step": 29781 }, { "epoch": 0.39, "grad_norm": 3.871382713317871, "learning_rate": 1.976809534781361e-05, "loss": 1.7951, "step": 29782 }, { "epoch": 0.39, "grad_norm": 4.286932468414307, "learning_rate": 1.9768072848697487e-05, "loss": 2.5089, "step": 29783 }, { "epoch": 0.39, "grad_norm": 3.4766056537628174, "learning_rate": 1.9768050348502807e-05, "loss": 1.8443, "step": 29784 }, { "epoch": 0.39, "grad_norm": 4.424984455108643, "learning_rate": 1.9768027847229566e-05, "loss": 2.1252, "step": 29785 }, { "epoch": 0.39, "grad_norm": 3.60141921043396, "learning_rate": 1.976800534487777e-05, "loss": 1.5665, "step": 29786 }, { "epoch": 0.39, "grad_norm": 3.583033323287964, "learning_rate": 1.976798284144742e-05, "loss": 1.6143, "step": 29787 }, { "epoch": 0.39, "grad_norm": 3.0670926570892334, "learning_rate": 1.9767960336938517e-05, "loss": 1.4365, "step": 29788 }, { "epoch": 0.39, "grad_norm": 4.182713508605957, "learning_rate": 1.976793783135107e-05, "loss": 1.8742, "step": 29789 }, { "epoch": 0.39, "grad_norm": 3.7672998905181885, "learning_rate": 1.976791532468507e-05, "loss": 2.0966, "step": 29790 }, { "epoch": 0.39, "grad_norm": 3.510035991668701, "learning_rate": 1.9767892816940532e-05, "loss": 1.76, "step": 29791 }, { "epoch": 0.39, "grad_norm": 3.9531261920928955, "learning_rate": 1.9767870308117452e-05, "loss": 2.0514, "step": 29792 }, { "epoch": 0.39, "grad_norm": 4.1657395362854, "learning_rate": 1.9767847798215832e-05, "loss": 2.09, "step": 29793 }, { "epoch": 0.39, "grad_norm": 3.7389886379241943, "learning_rate": 1.9767825287235678e-05, "loss": 2.0524, "step": 29794 }, { "epoch": 0.39, "grad_norm": 4.085977077484131, "learning_rate": 1.976780277517699e-05, "loss": 2.5825, "step": 29795 }, { "epoch": 0.39, "grad_norm": 3.804281234741211, "learning_rate": 1.976778026203977e-05, "loss": 1.7312, "step": 29796 }, { "epoch": 0.39, "grad_norm": 3.792092800140381, "learning_rate": 1.976775774782402e-05, "loss": 2.0932, "step": 29797 }, { "epoch": 0.39, "grad_norm": 3.375318765640259, "learning_rate": 1.9767735232529744e-05, "loss": 1.5053, "step": 29798 }, { "epoch": 0.39, "grad_norm": 3.9940185546875, "learning_rate": 1.9767712716156948e-05, "loss": 2.0025, "step": 29799 }, { "epoch": 0.39, "grad_norm": 3.5987629890441895, "learning_rate": 1.9767690198705628e-05, "loss": 2.0296, "step": 29800 }, { "epoch": 0.39, "grad_norm": 3.2705769538879395, "learning_rate": 1.976766768017579e-05, "loss": 1.7486, "step": 29801 }, { "epoch": 0.39, "grad_norm": 3.8437178134918213, "learning_rate": 1.9767645160567435e-05, "loss": 1.9516, "step": 29802 }, { "epoch": 0.39, "grad_norm": 3.6826868057250977, "learning_rate": 1.9767622639880564e-05, "loss": 1.9059, "step": 29803 }, { "epoch": 0.39, "grad_norm": 3.1445887088775635, "learning_rate": 1.9767600118115187e-05, "loss": 1.522, "step": 29804 }, { "epoch": 0.39, "grad_norm": 3.7541663646698, "learning_rate": 1.97675775952713e-05, "loss": 1.8706, "step": 29805 }, { "epoch": 0.39, "grad_norm": 3.9576196670532227, "learning_rate": 1.9767555071348903e-05, "loss": 1.9279, "step": 29806 }, { "epoch": 0.39, "grad_norm": 4.265993595123291, "learning_rate": 1.9767532546348005e-05, "loss": 2.2474, "step": 29807 }, { "epoch": 0.39, "grad_norm": 3.7513182163238525, "learning_rate": 1.9767510020268605e-05, "loss": 1.8196, "step": 29808 }, { "epoch": 0.39, "grad_norm": 4.176197528839111, "learning_rate": 1.9767487493110708e-05, "loss": 2.5339, "step": 29809 }, { "epoch": 0.39, "grad_norm": 3.7799484729766846, "learning_rate": 1.9767464964874314e-05, "loss": 2.2755, "step": 29810 }, { "epoch": 0.39, "grad_norm": 3.54355788230896, "learning_rate": 1.9767442435559423e-05, "loss": 1.9037, "step": 29811 }, { "epoch": 0.39, "grad_norm": 3.868525266647339, "learning_rate": 1.9767419905166043e-05, "loss": 2.0103, "step": 29812 }, { "epoch": 0.39, "grad_norm": 3.7659521102905273, "learning_rate": 1.9767397373694174e-05, "loss": 2.1929, "step": 29813 }, { "epoch": 0.39, "grad_norm": 3.398653268814087, "learning_rate": 1.9767374841143817e-05, "loss": 1.5204, "step": 29814 }, { "epoch": 0.39, "grad_norm": 3.644632577896118, "learning_rate": 1.9767352307514978e-05, "loss": 1.7678, "step": 29815 }, { "epoch": 0.39, "grad_norm": 4.209159851074219, "learning_rate": 1.976732977280766e-05, "loss": 2.2522, "step": 29816 }, { "epoch": 0.39, "grad_norm": 3.085045337677002, "learning_rate": 1.9767307237021857e-05, "loss": 1.5273, "step": 29817 }, { "epoch": 0.39, "grad_norm": 4.087803840637207, "learning_rate": 1.9767284700157582e-05, "loss": 2.4449, "step": 29818 }, { "epoch": 0.39, "grad_norm": 4.228692054748535, "learning_rate": 1.976726216221483e-05, "loss": 2.0233, "step": 29819 }, { "epoch": 0.39, "grad_norm": 4.237765789031982, "learning_rate": 1.976723962319361e-05, "loss": 2.1969, "step": 29820 }, { "epoch": 0.39, "grad_norm": 4.13804292678833, "learning_rate": 1.976721708309392e-05, "loss": 2.2455, "step": 29821 }, { "epoch": 0.39, "grad_norm": 3.409466505050659, "learning_rate": 1.9767194541915765e-05, "loss": 1.4018, "step": 29822 }, { "epoch": 0.39, "grad_norm": 5.6783528327941895, "learning_rate": 1.9767171999659143e-05, "loss": 2.2295, "step": 29823 }, { "epoch": 0.39, "grad_norm": 3.3271889686584473, "learning_rate": 1.9767149456324062e-05, "loss": 1.7832, "step": 29824 }, { "epoch": 0.39, "grad_norm": 4.029838562011719, "learning_rate": 1.976712691191052e-05, "loss": 2.2667, "step": 29825 }, { "epoch": 0.39, "grad_norm": 3.8033015727996826, "learning_rate": 1.9767104366418525e-05, "loss": 1.9427, "step": 29826 }, { "epoch": 0.39, "grad_norm": 4.0960283279418945, "learning_rate": 1.976708181984807e-05, "loss": 2.1399, "step": 29827 }, { "epoch": 0.39, "grad_norm": 3.8698525428771973, "learning_rate": 1.9767059272199168e-05, "loss": 1.9715, "step": 29828 }, { "epoch": 0.39, "grad_norm": 4.226036071777344, "learning_rate": 1.9767036723471814e-05, "loss": 2.4692, "step": 29829 }, { "epoch": 0.39, "grad_norm": 4.071850776672363, "learning_rate": 1.9767014173666017e-05, "loss": 2.2699, "step": 29830 }, { "epoch": 0.39, "grad_norm": 3.7430124282836914, "learning_rate": 1.9766991622781772e-05, "loss": 2.2809, "step": 29831 }, { "epoch": 0.39, "grad_norm": 4.422825813293457, "learning_rate": 1.9766969070819087e-05, "loss": 1.9797, "step": 29832 }, { "epoch": 0.39, "grad_norm": 4.009889125823975, "learning_rate": 1.9766946517777964e-05, "loss": 2.1188, "step": 29833 }, { "epoch": 0.39, "grad_norm": 4.059898853302002, "learning_rate": 1.9766923963658405e-05, "loss": 2.4134, "step": 29834 }, { "epoch": 0.39, "grad_norm": 3.5123541355133057, "learning_rate": 1.976690140846041e-05, "loss": 1.9579, "step": 29835 }, { "epoch": 0.39, "grad_norm": 3.9327590465545654, "learning_rate": 1.9766878852183984e-05, "loss": 2.1516, "step": 29836 }, { "epoch": 0.39, "grad_norm": 3.2360618114471436, "learning_rate": 1.9766856294829128e-05, "loss": 1.9682, "step": 29837 }, { "epoch": 0.39, "grad_norm": 3.8809337615966797, "learning_rate": 1.9766833736395847e-05, "loss": 1.749, "step": 29838 }, { "epoch": 0.39, "grad_norm": 3.948373794555664, "learning_rate": 1.976681117688414e-05, "loss": 2.2701, "step": 29839 }, { "epoch": 0.39, "grad_norm": 3.8111836910247803, "learning_rate": 1.9766788616294016e-05, "loss": 1.9298, "step": 29840 }, { "epoch": 0.39, "grad_norm": 4.497973918914795, "learning_rate": 1.976676605462547e-05, "loss": 2.4132, "step": 29841 }, { "epoch": 0.39, "grad_norm": 3.893608331680298, "learning_rate": 1.9766743491878503e-05, "loss": 2.1606, "step": 29842 }, { "epoch": 0.39, "grad_norm": 4.034400463104248, "learning_rate": 1.9766720928053126e-05, "loss": 2.0553, "step": 29843 }, { "epoch": 0.39, "grad_norm": 3.88139271736145, "learning_rate": 1.9766698363149336e-05, "loss": 1.935, "step": 29844 }, { "epoch": 0.39, "grad_norm": 3.6190237998962402, "learning_rate": 1.976667579716714e-05, "loss": 1.4939, "step": 29845 }, { "epoch": 0.39, "grad_norm": 4.061666011810303, "learning_rate": 1.9766653230106536e-05, "loss": 2.2229, "step": 29846 }, { "epoch": 0.39, "grad_norm": 3.974705696105957, "learning_rate": 1.9766630661967524e-05, "loss": 1.9454, "step": 29847 }, { "epoch": 0.39, "grad_norm": 4.068801403045654, "learning_rate": 1.9766608092750114e-05, "loss": 2.3085, "step": 29848 }, { "epoch": 0.39, "grad_norm": 4.022548675537109, "learning_rate": 1.9766585522454303e-05, "loss": 1.9075, "step": 29849 }, { "epoch": 0.39, "grad_norm": 3.586425542831421, "learning_rate": 1.9766562951080093e-05, "loss": 1.5805, "step": 29850 }, { "epoch": 0.39, "grad_norm": 3.1761677265167236, "learning_rate": 1.9766540378627496e-05, "loss": 1.6524, "step": 29851 }, { "epoch": 0.39, "grad_norm": 3.611845016479492, "learning_rate": 1.9766517805096503e-05, "loss": 1.8775, "step": 29852 }, { "epoch": 0.39, "grad_norm": 3.8776888847351074, "learning_rate": 1.9766495230487122e-05, "loss": 2.0379, "step": 29853 }, { "epoch": 0.39, "grad_norm": 4.357885360717773, "learning_rate": 1.976647265479935e-05, "loss": 2.6337, "step": 29854 }, { "epoch": 0.39, "grad_norm": 4.0103254318237305, "learning_rate": 1.97664500780332e-05, "loss": 2.4544, "step": 29855 }, { "epoch": 0.39, "grad_norm": 4.101191997528076, "learning_rate": 1.9766427500188664e-05, "loss": 1.8625, "step": 29856 }, { "epoch": 0.39, "grad_norm": 3.630624294281006, "learning_rate": 1.9766404921265748e-05, "loss": 2.19, "step": 29857 }, { "epoch": 0.39, "grad_norm": 4.211100101470947, "learning_rate": 1.9766382341264457e-05, "loss": 2.0961, "step": 29858 }, { "epoch": 0.39, "grad_norm": 4.392692565917969, "learning_rate": 1.976635976018479e-05, "loss": 2.3042, "step": 29859 }, { "epoch": 0.39, "grad_norm": 3.8479182720184326, "learning_rate": 1.9766337178026754e-05, "loss": 1.8449, "step": 29860 }, { "epoch": 0.39, "grad_norm": 4.045540809631348, "learning_rate": 1.9766314594790346e-05, "loss": 2.1491, "step": 29861 }, { "epoch": 0.39, "grad_norm": 3.7766685485839844, "learning_rate": 1.9766292010475574e-05, "loss": 1.9494, "step": 29862 }, { "epoch": 0.39, "grad_norm": 3.741128444671631, "learning_rate": 1.9766269425082437e-05, "loss": 2.0816, "step": 29863 }, { "epoch": 0.39, "grad_norm": 3.745028257369995, "learning_rate": 1.976624683861094e-05, "loss": 1.6379, "step": 29864 }, { "epoch": 0.39, "grad_norm": 3.6778438091278076, "learning_rate": 1.976622425106108e-05, "loss": 1.8106, "step": 29865 }, { "epoch": 0.39, "grad_norm": 3.8161065578460693, "learning_rate": 1.9766201662432864e-05, "loss": 1.9157, "step": 29866 }, { "epoch": 0.39, "grad_norm": 3.619013786315918, "learning_rate": 1.9766179072726293e-05, "loss": 1.7768, "step": 29867 }, { "epoch": 0.39, "grad_norm": 3.9660637378692627, "learning_rate": 1.976615648194137e-05, "loss": 2.0167, "step": 29868 }, { "epoch": 0.39, "grad_norm": 3.7303740978240967, "learning_rate": 1.9766133890078102e-05, "loss": 1.9115, "step": 29869 }, { "epoch": 0.39, "grad_norm": 3.9655020236968994, "learning_rate": 1.9766111297136482e-05, "loss": 1.9449, "step": 29870 }, { "epoch": 0.39, "grad_norm": 3.3454790115356445, "learning_rate": 1.976608870311652e-05, "loss": 1.7403, "step": 29871 }, { "epoch": 0.39, "grad_norm": 3.9044501781463623, "learning_rate": 1.9766066108018216e-05, "loss": 2.3666, "step": 29872 }, { "epoch": 0.39, "grad_norm": 4.06473445892334, "learning_rate": 1.976604351184157e-05, "loss": 1.8316, "step": 29873 }, { "epoch": 0.39, "grad_norm": 3.8571856021881104, "learning_rate": 1.976602091458659e-05, "loss": 2.2613, "step": 29874 }, { "epoch": 0.39, "grad_norm": 3.249337911605835, "learning_rate": 1.9765998316253278e-05, "loss": 1.5196, "step": 29875 }, { "epoch": 0.39, "grad_norm": 3.926502227783203, "learning_rate": 1.9765975716841627e-05, "loss": 1.9716, "step": 29876 }, { "epoch": 0.39, "grad_norm": 3.4548418521881104, "learning_rate": 1.976595311635165e-05, "loss": 1.8082, "step": 29877 }, { "epoch": 0.39, "grad_norm": 3.9553310871124268, "learning_rate": 1.976593051478335e-05, "loss": 2.2345, "step": 29878 }, { "epoch": 0.39, "grad_norm": 2.8007798194885254, "learning_rate": 1.9765907912136722e-05, "loss": 1.3044, "step": 29879 }, { "epoch": 0.39, "grad_norm": 3.639346122741699, "learning_rate": 1.9765885308411773e-05, "loss": 2.0359, "step": 29880 }, { "epoch": 0.39, "grad_norm": 3.8378660678863525, "learning_rate": 1.9765862703608503e-05, "loss": 1.856, "step": 29881 }, { "epoch": 0.39, "grad_norm": 4.350736618041992, "learning_rate": 1.9765840097726917e-05, "loss": 2.023, "step": 29882 }, { "epoch": 0.39, "grad_norm": 3.921461343765259, "learning_rate": 1.9765817490767017e-05, "loss": 2.1963, "step": 29883 }, { "epoch": 0.39, "grad_norm": 3.6412816047668457, "learning_rate": 1.9765794882728803e-05, "loss": 1.9771, "step": 29884 }, { "epoch": 0.39, "grad_norm": 3.304015874862671, "learning_rate": 1.9765772273612282e-05, "loss": 1.7915, "step": 29885 }, { "epoch": 0.39, "grad_norm": 3.5144193172454834, "learning_rate": 1.976574966341745e-05, "loss": 2.0931, "step": 29886 }, { "epoch": 0.39, "grad_norm": 3.9532337188720703, "learning_rate": 1.976572705214432e-05, "loss": 2.2257, "step": 29887 }, { "epoch": 0.39, "grad_norm": 3.6692922115325928, "learning_rate": 1.9765704439792886e-05, "loss": 1.896, "step": 29888 }, { "epoch": 0.39, "grad_norm": 3.5175437927246094, "learning_rate": 1.976568182636315e-05, "loss": 1.7237, "step": 29889 }, { "epoch": 0.39, "grad_norm": 3.36506724357605, "learning_rate": 1.9765659211855118e-05, "loss": 1.804, "step": 29890 }, { "epoch": 0.39, "grad_norm": 4.0118207931518555, "learning_rate": 1.9765636596268794e-05, "loss": 2.2401, "step": 29891 }, { "epoch": 0.39, "grad_norm": 4.320901393890381, "learning_rate": 1.9765613979604176e-05, "loss": 2.1315, "step": 29892 }, { "epoch": 0.39, "grad_norm": 3.638573408126831, "learning_rate": 1.976559136186127e-05, "loss": 1.8156, "step": 29893 }, { "epoch": 0.39, "grad_norm": 4.238798141479492, "learning_rate": 1.9765568743040075e-05, "loss": 2.0619, "step": 29894 }, { "epoch": 0.39, "grad_norm": 3.8542706966400146, "learning_rate": 1.9765546123140594e-05, "loss": 1.7531, "step": 29895 }, { "epoch": 0.39, "grad_norm": 4.085543155670166, "learning_rate": 1.9765523502162834e-05, "loss": 1.9267, "step": 29896 }, { "epoch": 0.39, "grad_norm": 3.6072115898132324, "learning_rate": 1.9765500880106794e-05, "loss": 1.9789, "step": 29897 }, { "epoch": 0.39, "grad_norm": 4.09413480758667, "learning_rate": 1.976547825697248e-05, "loss": 2.1986, "step": 29898 }, { "epoch": 0.39, "grad_norm": 4.310793876647949, "learning_rate": 1.9765455632759886e-05, "loss": 2.1274, "step": 29899 }, { "epoch": 0.39, "grad_norm": 3.427187442779541, "learning_rate": 1.976543300746902e-05, "loss": 1.8463, "step": 29900 }, { "epoch": 0.39, "grad_norm": 3.662821054458618, "learning_rate": 1.976541038109989e-05, "loss": 1.572, "step": 29901 }, { "epoch": 0.39, "grad_norm": 4.102158069610596, "learning_rate": 1.9765387753652488e-05, "loss": 2.0674, "step": 29902 }, { "epoch": 0.39, "grad_norm": 3.5622689723968506, "learning_rate": 1.9765365125126824e-05, "loss": 1.6726, "step": 29903 }, { "epoch": 0.39, "grad_norm": 3.6793384552001953, "learning_rate": 1.9765342495522897e-05, "loss": 1.6031, "step": 29904 }, { "epoch": 0.39, "grad_norm": 3.5130772590637207, "learning_rate": 1.976531986484071e-05, "loss": 2.0794, "step": 29905 }, { "epoch": 0.39, "grad_norm": 3.837427854537964, "learning_rate": 1.976529723308027e-05, "loss": 2.2293, "step": 29906 }, { "epoch": 0.39, "grad_norm": 4.3956708908081055, "learning_rate": 1.976527460024157e-05, "loss": 2.6773, "step": 29907 }, { "epoch": 0.39, "grad_norm": 3.7466723918914795, "learning_rate": 1.976525196632462e-05, "loss": 1.7141, "step": 29908 }, { "epoch": 0.39, "grad_norm": 3.5646979808807373, "learning_rate": 1.9765229331329424e-05, "loss": 1.8602, "step": 29909 }, { "epoch": 0.39, "grad_norm": 3.821747064590454, "learning_rate": 1.9765206695255978e-05, "loss": 1.6957, "step": 29910 }, { "epoch": 0.39, "grad_norm": 3.6508500576019287, "learning_rate": 1.9765184058104286e-05, "loss": 1.6772, "step": 29911 }, { "epoch": 0.39, "grad_norm": 3.3770949840545654, "learning_rate": 1.9765161419874356e-05, "loss": 1.5529, "step": 29912 }, { "epoch": 0.39, "grad_norm": 3.760143756866455, "learning_rate": 1.9765138780566183e-05, "loss": 2.3586, "step": 29913 }, { "epoch": 0.39, "grad_norm": 3.5635664463043213, "learning_rate": 1.9765116140179774e-05, "loss": 1.9691, "step": 29914 }, { "epoch": 0.39, "grad_norm": 3.980705976486206, "learning_rate": 1.976509349871513e-05, "loss": 2.1941, "step": 29915 }, { "epoch": 0.39, "grad_norm": 4.4761576652526855, "learning_rate": 1.9765070856172256e-05, "loss": 2.1323, "step": 29916 }, { "epoch": 0.39, "grad_norm": 4.068312168121338, "learning_rate": 1.976504821255115e-05, "loss": 2.1898, "step": 29917 }, { "epoch": 0.39, "grad_norm": 3.940720558166504, "learning_rate": 1.9765025567851816e-05, "loss": 2.3341, "step": 29918 }, { "epoch": 0.39, "grad_norm": 3.7618796825408936, "learning_rate": 1.9765002922074263e-05, "loss": 2.3071, "step": 29919 }, { "epoch": 0.39, "grad_norm": 3.694373846054077, "learning_rate": 1.9764980275218484e-05, "loss": 2.0082, "step": 29920 }, { "epoch": 0.39, "grad_norm": 3.292022943496704, "learning_rate": 1.9764957627284483e-05, "loss": 1.5676, "step": 29921 }, { "epoch": 0.39, "grad_norm": 3.949470043182373, "learning_rate": 1.9764934978272274e-05, "loss": 2.0014, "step": 29922 }, { "epoch": 0.39, "grad_norm": 3.18001389503479, "learning_rate": 1.9764912328181842e-05, "loss": 1.5359, "step": 29923 }, { "epoch": 0.39, "grad_norm": 4.6216301918029785, "learning_rate": 1.97648896770132e-05, "loss": 2.4602, "step": 29924 }, { "epoch": 0.39, "grad_norm": 3.686927080154419, "learning_rate": 1.976486702476635e-05, "loss": 2.0595, "step": 29925 }, { "epoch": 0.39, "grad_norm": 3.9755618572235107, "learning_rate": 1.9764844371441292e-05, "loss": 2.2063, "step": 29926 }, { "epoch": 0.39, "grad_norm": 4.408156394958496, "learning_rate": 1.976482171703803e-05, "loss": 2.0428, "step": 29927 }, { "epoch": 0.39, "grad_norm": 3.9595632553100586, "learning_rate": 1.9764799061556566e-05, "loss": 2.3906, "step": 29928 }, { "epoch": 0.39, "grad_norm": 3.153249502182007, "learning_rate": 1.9764776404996903e-05, "loss": 1.6703, "step": 29929 }, { "epoch": 0.39, "grad_norm": 4.06993293762207, "learning_rate": 1.976475374735904e-05, "loss": 2.3966, "step": 29930 }, { "epoch": 0.39, "grad_norm": 3.6835126876831055, "learning_rate": 1.9764731088642986e-05, "loss": 1.8918, "step": 29931 }, { "epoch": 0.39, "grad_norm": 3.7136287689208984, "learning_rate": 1.9764708428848742e-05, "loss": 1.8952, "step": 29932 }, { "epoch": 0.39, "grad_norm": 4.25540018081665, "learning_rate": 1.9764685767976302e-05, "loss": 2.3583, "step": 29933 }, { "epoch": 0.39, "grad_norm": 3.4819836616516113, "learning_rate": 1.9764663106025678e-05, "loss": 1.837, "step": 29934 }, { "epoch": 0.39, "grad_norm": 3.950714349746704, "learning_rate": 1.9764640442996873e-05, "loss": 1.9074, "step": 29935 }, { "epoch": 0.39, "grad_norm": 3.7493157386779785, "learning_rate": 1.9764617778889882e-05, "loss": 2.1151, "step": 29936 }, { "epoch": 0.39, "grad_norm": 3.601602554321289, "learning_rate": 1.9764595113704714e-05, "loss": 1.6993, "step": 29937 }, { "epoch": 0.39, "grad_norm": 4.029038906097412, "learning_rate": 1.9764572447441367e-05, "loss": 2.2663, "step": 29938 }, { "epoch": 0.39, "grad_norm": 3.663468837738037, "learning_rate": 1.9764549780099846e-05, "loss": 2.2762, "step": 29939 }, { "epoch": 0.39, "grad_norm": 3.665944814682007, "learning_rate": 1.976452711168015e-05, "loss": 1.9013, "step": 29940 }, { "epoch": 0.39, "grad_norm": 3.9883012771606445, "learning_rate": 1.976450444218229e-05, "loss": 2.014, "step": 29941 }, { "epoch": 0.39, "grad_norm": 3.9100911617279053, "learning_rate": 1.9764481771606257e-05, "loss": 2.1268, "step": 29942 }, { "epoch": 0.39, "grad_norm": 3.4955005645751953, "learning_rate": 1.9764459099952064e-05, "loss": 2.0472, "step": 29943 }, { "epoch": 0.39, "grad_norm": 4.30772590637207, "learning_rate": 1.976443642721971e-05, "loss": 2.0909, "step": 29944 }, { "epoch": 0.39, "grad_norm": 4.388134956359863, "learning_rate": 1.9764413753409195e-05, "loss": 2.1338, "step": 29945 }, { "epoch": 0.39, "grad_norm": 3.9398193359375, "learning_rate": 1.9764391078520526e-05, "loss": 2.159, "step": 29946 }, { "epoch": 0.39, "grad_norm": 3.7176387310028076, "learning_rate": 1.9764368402553698e-05, "loss": 1.7651, "step": 29947 }, { "epoch": 0.39, "grad_norm": 3.835796356201172, "learning_rate": 1.976434572550872e-05, "loss": 2.1668, "step": 29948 }, { "epoch": 0.39, "grad_norm": 3.7993900775909424, "learning_rate": 1.976432304738559e-05, "loss": 1.893, "step": 29949 }, { "epoch": 0.39, "grad_norm": 3.674936056137085, "learning_rate": 1.9764300368184317e-05, "loss": 1.7371, "step": 29950 }, { "epoch": 0.39, "grad_norm": 3.694535493850708, "learning_rate": 1.97642776879049e-05, "loss": 1.6641, "step": 29951 }, { "epoch": 0.39, "grad_norm": 4.468729019165039, "learning_rate": 1.9764255006547338e-05, "loss": 2.0241, "step": 29952 }, { "epoch": 0.39, "grad_norm": 3.3871638774871826, "learning_rate": 1.9764232324111635e-05, "loss": 1.8209, "step": 29953 }, { "epoch": 0.39, "grad_norm": 4.459529399871826, "learning_rate": 1.97642096405978e-05, "loss": 2.4208, "step": 29954 }, { "epoch": 0.39, "grad_norm": 3.8643879890441895, "learning_rate": 1.9764186956005828e-05, "loss": 2.17, "step": 29955 }, { "epoch": 0.39, "grad_norm": 3.329373359680176, "learning_rate": 1.9764164270335723e-05, "loss": 1.8483, "step": 29956 }, { "epoch": 0.39, "grad_norm": 3.2460594177246094, "learning_rate": 1.976414158358749e-05, "loss": 1.513, "step": 29957 }, { "epoch": 0.39, "grad_norm": 4.255428314208984, "learning_rate": 1.9764118895761132e-05, "loss": 2.202, "step": 29958 }, { "epoch": 0.39, "grad_norm": 3.685264825820923, "learning_rate": 1.9764096206856645e-05, "loss": 1.871, "step": 29959 }, { "epoch": 0.39, "grad_norm": 3.936375856399536, "learning_rate": 1.9764073516874042e-05, "loss": 2.0607, "step": 29960 }, { "epoch": 0.39, "grad_norm": 3.5476698875427246, "learning_rate": 1.9764050825813314e-05, "loss": 1.9885, "step": 29961 }, { "epoch": 0.39, "grad_norm": 3.789024591445923, "learning_rate": 1.976402813367447e-05, "loss": 1.9259, "step": 29962 }, { "epoch": 0.39, "grad_norm": 3.7537050247192383, "learning_rate": 1.9764005440457515e-05, "loss": 1.8223, "step": 29963 }, { "epoch": 0.39, "grad_norm": 4.170326232910156, "learning_rate": 1.9763982746162446e-05, "loss": 2.1721, "step": 29964 }, { "epoch": 0.39, "grad_norm": 4.078746795654297, "learning_rate": 1.976396005078927e-05, "loss": 1.9644, "step": 29965 }, { "epoch": 0.39, "grad_norm": 3.85644793510437, "learning_rate": 1.9763937354337982e-05, "loss": 1.7915, "step": 29966 }, { "epoch": 0.39, "grad_norm": 3.9201104640960693, "learning_rate": 1.9763914656808596e-05, "loss": 1.8904, "step": 29967 }, { "epoch": 0.39, "grad_norm": 4.213830471038818, "learning_rate": 1.9763891958201103e-05, "loss": 2.2455, "step": 29968 }, { "epoch": 0.39, "grad_norm": 4.143405437469482, "learning_rate": 1.9763869258515513e-05, "loss": 2.0709, "step": 29969 }, { "epoch": 0.39, "grad_norm": 4.276474475860596, "learning_rate": 1.9763846557751827e-05, "loss": 2.3416, "step": 29970 }, { "epoch": 0.39, "grad_norm": 3.6668965816497803, "learning_rate": 1.9763823855910047e-05, "loss": 1.8693, "step": 29971 }, { "epoch": 0.39, "grad_norm": 3.8698067665100098, "learning_rate": 1.9763801152990174e-05, "loss": 2.2634, "step": 29972 }, { "epoch": 0.39, "grad_norm": 4.122267246246338, "learning_rate": 1.976377844899221e-05, "loss": 2.1075, "step": 29973 }, { "epoch": 0.39, "grad_norm": 3.7644872665405273, "learning_rate": 1.976375574391616e-05, "loss": 2.172, "step": 29974 }, { "epoch": 0.39, "grad_norm": 4.896340847015381, "learning_rate": 1.976373303776203e-05, "loss": 2.6903, "step": 29975 }, { "epoch": 0.39, "grad_norm": 3.749241590499878, "learning_rate": 1.9763710330529813e-05, "loss": 1.8389, "step": 29976 }, { "epoch": 0.39, "grad_norm": 4.637426853179932, "learning_rate": 1.976368762221952e-05, "loss": 2.5964, "step": 29977 }, { "epoch": 0.39, "grad_norm": 3.639514684677124, "learning_rate": 1.9763664912831148e-05, "loss": 1.6952, "step": 29978 }, { "epoch": 0.39, "grad_norm": 3.5206193923950195, "learning_rate": 1.9763642202364702e-05, "loss": 1.6269, "step": 29979 }, { "epoch": 0.39, "grad_norm": 3.2002148628234863, "learning_rate": 1.9763619490820184e-05, "loss": 1.8404, "step": 29980 }, { "epoch": 0.39, "grad_norm": 3.6758296489715576, "learning_rate": 1.97635967781976e-05, "loss": 1.5845, "step": 29981 }, { "epoch": 0.39, "grad_norm": 3.68782377243042, "learning_rate": 1.9763574064496945e-05, "loss": 1.8341, "step": 29982 }, { "epoch": 0.39, "grad_norm": 3.7420477867126465, "learning_rate": 1.9763551349718228e-05, "loss": 2.1731, "step": 29983 }, { "epoch": 0.39, "grad_norm": 3.762993812561035, "learning_rate": 1.9763528633861448e-05, "loss": 2.297, "step": 29984 }, { "epoch": 0.39, "grad_norm": 3.735511541366577, "learning_rate": 1.9763505916926608e-05, "loss": 1.9962, "step": 29985 }, { "epoch": 0.39, "grad_norm": 3.881861686706543, "learning_rate": 1.9763483198913716e-05, "loss": 1.9813, "step": 29986 }, { "epoch": 0.39, "grad_norm": 3.8204128742218018, "learning_rate": 1.9763460479822764e-05, "loss": 1.9044, "step": 29987 }, { "epoch": 0.39, "grad_norm": 3.8837389945983887, "learning_rate": 1.9763437759653766e-05, "loss": 2.1518, "step": 29988 }, { "epoch": 0.39, "grad_norm": 3.8586554527282715, "learning_rate": 1.9763415038406716e-05, "loss": 2.0993, "step": 29989 }, { "epoch": 0.39, "grad_norm": 4.467322826385498, "learning_rate": 1.976339231608162e-05, "loss": 2.051, "step": 29990 }, { "epoch": 0.39, "grad_norm": 3.850198745727539, "learning_rate": 1.9763369592678477e-05, "loss": 2.2082, "step": 29991 }, { "epoch": 0.39, "grad_norm": 3.292283058166504, "learning_rate": 1.9763346868197292e-05, "loss": 1.7645, "step": 29992 }, { "epoch": 0.39, "grad_norm": 4.351789474487305, "learning_rate": 1.976332414263807e-05, "loss": 2.3813, "step": 29993 }, { "epoch": 0.39, "grad_norm": 3.6524739265441895, "learning_rate": 1.9763301416000815e-05, "loss": 1.8126, "step": 29994 }, { "epoch": 0.39, "grad_norm": 3.615602493286133, "learning_rate": 1.9763278688285523e-05, "loss": 1.8662, "step": 29995 }, { "epoch": 0.39, "grad_norm": 3.796074390411377, "learning_rate": 1.97632559594922e-05, "loss": 1.9591, "step": 29996 }, { "epoch": 0.39, "grad_norm": 3.2420272827148438, "learning_rate": 1.9763233229620845e-05, "loss": 1.5695, "step": 29997 }, { "epoch": 0.39, "grad_norm": 3.5713884830474854, "learning_rate": 1.9763210498671466e-05, "loss": 1.7388, "step": 29998 }, { "epoch": 0.39, "grad_norm": 3.9687235355377197, "learning_rate": 1.976318776664406e-05, "loss": 2.1421, "step": 29999 }, { "epoch": 0.39, "grad_norm": 3.566758632659912, "learning_rate": 1.9763165033538637e-05, "loss": 2.0042, "step": 30000 }, { "epoch": 0.39, "grad_norm": 4.179489612579346, "learning_rate": 1.976314229935519e-05, "loss": 1.9061, "step": 30001 }, { "epoch": 0.39, "grad_norm": 3.878668785095215, "learning_rate": 1.976311956409373e-05, "loss": 2.2944, "step": 30002 }, { "epoch": 0.39, "grad_norm": 4.170907497406006, "learning_rate": 1.9763096827754257e-05, "loss": 1.8533, "step": 30003 }, { "epoch": 0.39, "grad_norm": 4.209718227386475, "learning_rate": 1.9763074090336768e-05, "loss": 1.9, "step": 30004 }, { "epoch": 0.39, "grad_norm": 3.914290428161621, "learning_rate": 1.9763051351841274e-05, "loss": 2.2064, "step": 30005 }, { "epoch": 0.39, "grad_norm": 3.936155080795288, "learning_rate": 1.976302861226777e-05, "loss": 2.2812, "step": 30006 }, { "epoch": 0.39, "grad_norm": 4.112549304962158, "learning_rate": 1.9763005871616267e-05, "loss": 2.2763, "step": 30007 }, { "epoch": 0.39, "grad_norm": 3.6373023986816406, "learning_rate": 1.976298312988676e-05, "loss": 1.7247, "step": 30008 }, { "epoch": 0.39, "grad_norm": 4.3150458335876465, "learning_rate": 1.9762960387079252e-05, "loss": 2.4093, "step": 30009 }, { "epoch": 0.39, "grad_norm": 3.8701319694519043, "learning_rate": 1.9762937643193748e-05, "loss": 1.9952, "step": 30010 }, { "epoch": 0.39, "grad_norm": 3.9527435302734375, "learning_rate": 1.9762914898230253e-05, "loss": 1.8663, "step": 30011 }, { "epoch": 0.39, "grad_norm": 3.8547921180725098, "learning_rate": 1.9762892152188763e-05, "loss": 1.992, "step": 30012 }, { "epoch": 0.39, "grad_norm": 3.8100240230560303, "learning_rate": 1.9762869405069287e-05, "loss": 1.8889, "step": 30013 }, { "epoch": 0.39, "grad_norm": 3.5325095653533936, "learning_rate": 1.9762846656871824e-05, "loss": 1.7066, "step": 30014 }, { "epoch": 0.39, "grad_norm": 3.6384894847869873, "learning_rate": 1.9762823907596375e-05, "loss": 1.9861, "step": 30015 }, { "epoch": 0.39, "grad_norm": 3.315609931945801, "learning_rate": 1.9762801157242948e-05, "loss": 1.7501, "step": 30016 }, { "epoch": 0.39, "grad_norm": 4.091471195220947, "learning_rate": 1.9762778405811537e-05, "loss": 2.3143, "step": 30017 }, { "epoch": 0.39, "grad_norm": 3.76005482673645, "learning_rate": 1.9762755653302153e-05, "loss": 2.0396, "step": 30018 }, { "epoch": 0.39, "grad_norm": 4.075504779815674, "learning_rate": 1.97627328997148e-05, "loss": 2.526, "step": 30019 }, { "epoch": 0.39, "grad_norm": 3.699960708618164, "learning_rate": 1.976271014504947e-05, "loss": 2.2842, "step": 30020 }, { "epoch": 0.39, "grad_norm": 4.117228031158447, "learning_rate": 1.976268738930617e-05, "loss": 1.9575, "step": 30021 }, { "epoch": 0.39, "grad_norm": 3.998948335647583, "learning_rate": 1.9762664632484904e-05, "loss": 1.7898, "step": 30022 }, { "epoch": 0.39, "grad_norm": 3.6590847969055176, "learning_rate": 1.9762641874585674e-05, "loss": 1.8282, "step": 30023 }, { "epoch": 0.39, "grad_norm": 4.547015190124512, "learning_rate": 1.976261911560849e-05, "loss": 2.1374, "step": 30024 }, { "epoch": 0.39, "grad_norm": 3.733217716217041, "learning_rate": 1.9762596355553337e-05, "loss": 2.0177, "step": 30025 }, { "epoch": 0.39, "grad_norm": 3.8980517387390137, "learning_rate": 1.9762573594420233e-05, "loss": 2.0651, "step": 30026 }, { "epoch": 0.39, "grad_norm": 3.431631565093994, "learning_rate": 1.9762550832209175e-05, "loss": 1.668, "step": 30027 }, { "epoch": 0.39, "grad_norm": 4.031355857849121, "learning_rate": 1.9762528068920166e-05, "loss": 2.4379, "step": 30028 }, { "epoch": 0.39, "grad_norm": 3.9260525703430176, "learning_rate": 1.9762505304553206e-05, "loss": 2.0462, "step": 30029 }, { "epoch": 0.39, "grad_norm": 4.525421619415283, "learning_rate": 1.9762482539108302e-05, "loss": 2.3684, "step": 30030 }, { "epoch": 0.39, "grad_norm": 3.855520725250244, "learning_rate": 1.9762459772585453e-05, "loss": 1.5845, "step": 30031 }, { "epoch": 0.39, "grad_norm": 4.1749267578125, "learning_rate": 1.9762437004984667e-05, "loss": 2.314, "step": 30032 }, { "epoch": 0.39, "grad_norm": 3.8279428482055664, "learning_rate": 1.9762414236305937e-05, "loss": 2.0047, "step": 30033 }, { "epoch": 0.39, "grad_norm": 4.1703643798828125, "learning_rate": 1.9762391466549272e-05, "loss": 2.3814, "step": 30034 }, { "epoch": 0.39, "grad_norm": 3.6014132499694824, "learning_rate": 1.9762368695714677e-05, "loss": 1.7681, "step": 30035 }, { "epoch": 0.39, "grad_norm": 3.679898262023926, "learning_rate": 1.9762345923802147e-05, "loss": 1.9632, "step": 30036 }, { "epoch": 0.39, "grad_norm": 3.627202033996582, "learning_rate": 1.9762323150811687e-05, "loss": 1.6832, "step": 30037 }, { "epoch": 0.39, "grad_norm": 4.217654228210449, "learning_rate": 1.9762300376743306e-05, "loss": 1.9761, "step": 30038 }, { "epoch": 0.39, "grad_norm": 3.519782781600952, "learning_rate": 1.9762277601596998e-05, "loss": 1.9612, "step": 30039 }, { "epoch": 0.39, "grad_norm": 3.606776475906372, "learning_rate": 1.976225482537277e-05, "loss": 1.6435, "step": 30040 }, { "epoch": 0.39, "grad_norm": 4.264307975769043, "learning_rate": 1.9762232048070624e-05, "loss": 2.8732, "step": 30041 }, { "epoch": 0.39, "grad_norm": 3.7565248012542725, "learning_rate": 1.976220926969056e-05, "loss": 2.0226, "step": 30042 }, { "epoch": 0.39, "grad_norm": 3.4128904342651367, "learning_rate": 1.9762186490232584e-05, "loss": 1.8094, "step": 30043 }, { "epoch": 0.39, "grad_norm": 4.123797416687012, "learning_rate": 1.9762163709696696e-05, "loss": 2.4229, "step": 30044 }, { "epoch": 0.39, "grad_norm": 5.864696025848389, "learning_rate": 1.97621409280829e-05, "loss": 2.1604, "step": 30045 }, { "epoch": 0.39, "grad_norm": 3.6710104942321777, "learning_rate": 1.9762118145391197e-05, "loss": 2.0957, "step": 30046 }, { "epoch": 0.39, "grad_norm": 3.4198548793792725, "learning_rate": 1.9762095361621592e-05, "loss": 1.9953, "step": 30047 }, { "epoch": 0.39, "grad_norm": 3.37713360786438, "learning_rate": 1.9762072576774084e-05, "loss": 1.7623, "step": 30048 }, { "epoch": 0.39, "grad_norm": 3.907038688659668, "learning_rate": 1.976204979084868e-05, "loss": 2.2734, "step": 30049 }, { "epoch": 0.39, "grad_norm": 4.710681438446045, "learning_rate": 1.9762027003845383e-05, "loss": 2.6442, "step": 30050 }, { "epoch": 0.39, "grad_norm": 3.7681145668029785, "learning_rate": 1.9762004215764187e-05, "loss": 2.3828, "step": 30051 }, { "epoch": 0.39, "grad_norm": 3.3742752075195312, "learning_rate": 1.9761981426605104e-05, "loss": 1.7507, "step": 30052 }, { "epoch": 0.39, "grad_norm": 3.5201449394226074, "learning_rate": 1.976195863636813e-05, "loss": 1.7765, "step": 30053 }, { "epoch": 0.39, "grad_norm": 4.5064377784729, "learning_rate": 1.976193584505327e-05, "loss": 2.518, "step": 30054 }, { "epoch": 0.39, "grad_norm": 4.300804138183594, "learning_rate": 1.9761913052660526e-05, "loss": 2.1683, "step": 30055 }, { "epoch": 0.39, "grad_norm": 3.6931803226470947, "learning_rate": 1.9761890259189904e-05, "loss": 1.8437, "step": 30056 }, { "epoch": 0.39, "grad_norm": 3.6877756118774414, "learning_rate": 1.97618674646414e-05, "loss": 2.0866, "step": 30057 }, { "epoch": 0.39, "grad_norm": 4.281777858734131, "learning_rate": 1.9761844669015026e-05, "loss": 2.5592, "step": 30058 }, { "epoch": 0.39, "grad_norm": 4.227419853210449, "learning_rate": 1.9761821872310777e-05, "loss": 2.3175, "step": 30059 }, { "epoch": 0.39, "grad_norm": 3.813528537750244, "learning_rate": 1.9761799074528652e-05, "loss": 1.682, "step": 30060 }, { "epoch": 0.39, "grad_norm": 4.4952473640441895, "learning_rate": 1.9761776275668664e-05, "loss": 2.2079, "step": 30061 }, { "epoch": 0.39, "grad_norm": 3.195796012878418, "learning_rate": 1.9761753475730806e-05, "loss": 1.3899, "step": 30062 }, { "epoch": 0.39, "grad_norm": 3.6315529346466064, "learning_rate": 1.9761730674715088e-05, "loss": 2.0006, "step": 30063 }, { "epoch": 0.39, "grad_norm": 3.799391269683838, "learning_rate": 1.976170787262151e-05, "loss": 1.8863, "step": 30064 }, { "epoch": 0.39, "grad_norm": 3.688412666320801, "learning_rate": 1.9761685069450074e-05, "loss": 1.9428, "step": 30065 }, { "epoch": 0.39, "grad_norm": 3.6235499382019043, "learning_rate": 1.976166226520078e-05, "loss": 1.831, "step": 30066 }, { "epoch": 0.39, "grad_norm": 3.8110599517822266, "learning_rate": 1.9761639459873632e-05, "loss": 2.1817, "step": 30067 }, { "epoch": 0.39, "grad_norm": 3.7291882038116455, "learning_rate": 1.9761616653468637e-05, "loss": 1.8686, "step": 30068 }, { "epoch": 0.39, "grad_norm": 3.6760008335113525, "learning_rate": 1.9761593845985792e-05, "loss": 2.061, "step": 30069 }, { "epoch": 0.39, "grad_norm": 3.790811538696289, "learning_rate": 1.9761571037425102e-05, "loss": 1.76, "step": 30070 }, { "epoch": 0.39, "grad_norm": 3.219383955001831, "learning_rate": 1.9761548227786565e-05, "loss": 1.8583, "step": 30071 }, { "epoch": 0.39, "grad_norm": 3.302011013031006, "learning_rate": 1.9761525417070193e-05, "loss": 1.9513, "step": 30072 }, { "epoch": 0.39, "grad_norm": 3.7154860496520996, "learning_rate": 1.976150260527598e-05, "loss": 2.1037, "step": 30073 }, { "epoch": 0.39, "grad_norm": 4.9803385734558105, "learning_rate": 1.9761479792403936e-05, "loss": 2.3772, "step": 30074 }, { "epoch": 0.39, "grad_norm": 3.851454973220825, "learning_rate": 1.9761456978454053e-05, "loss": 2.038, "step": 30075 }, { "epoch": 0.39, "grad_norm": 3.3982324600219727, "learning_rate": 1.9761434163426344e-05, "loss": 1.5962, "step": 30076 }, { "epoch": 0.39, "grad_norm": 3.777939796447754, "learning_rate": 1.9761411347320805e-05, "loss": 2.5407, "step": 30077 }, { "epoch": 0.39, "grad_norm": 3.7173688411712646, "learning_rate": 1.976138853013744e-05, "loss": 2.0396, "step": 30078 }, { "epoch": 0.39, "grad_norm": 4.487239360809326, "learning_rate": 1.976136571187625e-05, "loss": 2.2747, "step": 30079 }, { "epoch": 0.39, "grad_norm": 3.6509389877319336, "learning_rate": 1.9761342892537246e-05, "loss": 1.8974, "step": 30080 }, { "epoch": 0.39, "grad_norm": 3.823376178741455, "learning_rate": 1.9761320072120422e-05, "loss": 2.2726, "step": 30081 }, { "epoch": 0.39, "grad_norm": 3.512040376663208, "learning_rate": 1.976129725062578e-05, "loss": 1.9366, "step": 30082 }, { "epoch": 0.39, "grad_norm": 3.5096867084503174, "learning_rate": 1.9761274428053327e-05, "loss": 1.8248, "step": 30083 }, { "epoch": 0.39, "grad_norm": 3.6148428916931152, "learning_rate": 1.9761251604403063e-05, "loss": 1.8768, "step": 30084 }, { "epoch": 0.39, "grad_norm": 4.048449993133545, "learning_rate": 1.976122877967499e-05, "loss": 2.0018, "step": 30085 }, { "epoch": 0.39, "grad_norm": 3.6011338233947754, "learning_rate": 1.9761205953869115e-05, "loss": 1.7136, "step": 30086 }, { "epoch": 0.39, "grad_norm": 3.9969351291656494, "learning_rate": 1.976118312698543e-05, "loss": 1.7372, "step": 30087 }, { "epoch": 0.39, "grad_norm": 4.4681477546691895, "learning_rate": 1.9761160299023952e-05, "loss": 1.9888, "step": 30088 }, { "epoch": 0.39, "grad_norm": 3.7008473873138428, "learning_rate": 1.9761137469984678e-05, "loss": 1.9382, "step": 30089 }, { "epoch": 0.39, "grad_norm": 4.019404411315918, "learning_rate": 1.9761114639867602e-05, "loss": 2.2687, "step": 30090 }, { "epoch": 0.39, "grad_norm": 3.5220680236816406, "learning_rate": 1.9761091808672738e-05, "loss": 1.9893, "step": 30091 }, { "epoch": 0.39, "grad_norm": 3.8839364051818848, "learning_rate": 1.9761068976400085e-05, "loss": 1.9522, "step": 30092 }, { "epoch": 0.39, "grad_norm": 3.624368667602539, "learning_rate": 1.9761046143049637e-05, "loss": 1.6727, "step": 30093 }, { "epoch": 0.39, "grad_norm": 4.095809459686279, "learning_rate": 1.976102330862141e-05, "loss": 2.239, "step": 30094 }, { "epoch": 0.39, "grad_norm": 4.136188983917236, "learning_rate": 1.97610004731154e-05, "loss": 2.3007, "step": 30095 }, { "epoch": 0.39, "grad_norm": 3.678652048110962, "learning_rate": 1.9760977636531612e-05, "loss": 1.6871, "step": 30096 }, { "epoch": 0.39, "grad_norm": 4.366180419921875, "learning_rate": 1.976095479887004e-05, "loss": 2.4129, "step": 30097 }, { "epoch": 0.39, "grad_norm": 4.11318826675415, "learning_rate": 1.9760931960130698e-05, "loss": 1.9503, "step": 30098 }, { "epoch": 0.39, "grad_norm": 3.538351058959961, "learning_rate": 1.9760909120313583e-05, "loss": 1.7084, "step": 30099 }, { "epoch": 0.39, "grad_norm": 4.312668800354004, "learning_rate": 1.9760886279418698e-05, "loss": 1.9626, "step": 30100 }, { "epoch": 0.39, "grad_norm": 3.9341683387756348, "learning_rate": 1.9760863437446044e-05, "loss": 2.104, "step": 30101 }, { "epoch": 0.39, "grad_norm": 3.5248661041259766, "learning_rate": 1.9760840594395625e-05, "loss": 1.7468, "step": 30102 }, { "epoch": 0.39, "grad_norm": 3.7339894771575928, "learning_rate": 1.9760817750267445e-05, "loss": 1.8663, "step": 30103 }, { "epoch": 0.39, "grad_norm": 3.7702527046203613, "learning_rate": 1.9760794905061503e-05, "loss": 1.6922, "step": 30104 }, { "epoch": 0.39, "grad_norm": 3.8746321201324463, "learning_rate": 1.9760772058777807e-05, "loss": 2.034, "step": 30105 }, { "epoch": 0.39, "grad_norm": 3.487410068511963, "learning_rate": 1.9760749211416353e-05, "loss": 1.6129, "step": 30106 }, { "epoch": 0.39, "grad_norm": 4.065791606903076, "learning_rate": 1.976072636297715e-05, "loss": 2.0307, "step": 30107 }, { "epoch": 0.39, "grad_norm": 4.762674331665039, "learning_rate": 1.9760703513460192e-05, "loss": 2.1365, "step": 30108 }, { "epoch": 0.39, "grad_norm": 3.63423752784729, "learning_rate": 1.976068066286549e-05, "loss": 1.9563, "step": 30109 }, { "epoch": 0.39, "grad_norm": 3.989213228225708, "learning_rate": 1.9760657811193046e-05, "loss": 2.6159, "step": 30110 }, { "epoch": 0.39, "grad_norm": 3.9987001419067383, "learning_rate": 1.9760634958442857e-05, "loss": 2.0744, "step": 30111 }, { "epoch": 0.39, "grad_norm": 3.8154795169830322, "learning_rate": 1.9760612104614926e-05, "loss": 1.7682, "step": 30112 }, { "epoch": 0.39, "grad_norm": 4.04257869720459, "learning_rate": 1.9760589249709262e-05, "loss": 2.0079, "step": 30113 }, { "epoch": 0.39, "grad_norm": 4.07636833190918, "learning_rate": 1.976056639372586e-05, "loss": 2.1985, "step": 30114 }, { "epoch": 0.39, "grad_norm": 3.763685703277588, "learning_rate": 1.9760543536664726e-05, "loss": 2.3081, "step": 30115 }, { "epoch": 0.39, "grad_norm": 4.068177700042725, "learning_rate": 1.9760520678525866e-05, "loss": 1.7564, "step": 30116 }, { "epoch": 0.39, "grad_norm": 3.9746127128601074, "learning_rate": 1.9760497819309278e-05, "loss": 1.6791, "step": 30117 }, { "epoch": 0.39, "grad_norm": 4.643517017364502, "learning_rate": 1.9760474959014962e-05, "loss": 2.1743, "step": 30118 }, { "epoch": 0.39, "grad_norm": 4.337129592895508, "learning_rate": 1.976045209764293e-05, "loss": 2.1281, "step": 30119 }, { "epoch": 0.39, "grad_norm": 3.990037202835083, "learning_rate": 1.9760429235193173e-05, "loss": 1.9235, "step": 30120 }, { "epoch": 0.39, "grad_norm": 3.967881441116333, "learning_rate": 1.9760406371665702e-05, "loss": 2.1337, "step": 30121 }, { "epoch": 0.39, "grad_norm": 3.6238348484039307, "learning_rate": 1.9760383507060514e-05, "loss": 1.7213, "step": 30122 }, { "epoch": 0.39, "grad_norm": 3.6752266883850098, "learning_rate": 1.9760360641377615e-05, "loss": 1.922, "step": 30123 }, { "epoch": 0.39, "grad_norm": 3.551140785217285, "learning_rate": 1.9760337774617006e-05, "loss": 1.8985, "step": 30124 }, { "epoch": 0.39, "grad_norm": 4.2873005867004395, "learning_rate": 1.976031490677869e-05, "loss": 2.2488, "step": 30125 }, { "epoch": 0.39, "grad_norm": 4.116040229797363, "learning_rate": 1.976029203786267e-05, "loss": 2.1314, "step": 30126 }, { "epoch": 0.39, "grad_norm": 3.944467067718506, "learning_rate": 1.976026916786895e-05, "loss": 2.4197, "step": 30127 }, { "epoch": 0.39, "grad_norm": 4.041078090667725, "learning_rate": 1.9760246296797533e-05, "loss": 2.0737, "step": 30128 }, { "epoch": 0.39, "grad_norm": 3.578573703765869, "learning_rate": 1.9760223424648415e-05, "loss": 1.8379, "step": 30129 }, { "epoch": 0.39, "grad_norm": 3.6992478370666504, "learning_rate": 1.97602005514216e-05, "loss": 1.7371, "step": 30130 }, { "epoch": 0.39, "grad_norm": 4.088551998138428, "learning_rate": 1.97601776771171e-05, "loss": 1.8203, "step": 30131 }, { "epoch": 0.39, "grad_norm": 4.294650077819824, "learning_rate": 1.9760154801734908e-05, "loss": 1.9146, "step": 30132 }, { "epoch": 0.39, "grad_norm": 3.729377269744873, "learning_rate": 1.9760131925275026e-05, "loss": 2.1525, "step": 30133 }, { "epoch": 0.39, "grad_norm": 4.01564359664917, "learning_rate": 1.9760109047737464e-05, "loss": 2.159, "step": 30134 }, { "epoch": 0.39, "grad_norm": 3.541410207748413, "learning_rate": 1.976008616912222e-05, "loss": 1.5983, "step": 30135 }, { "epoch": 0.39, "grad_norm": 3.7533791065216064, "learning_rate": 1.9760063289429294e-05, "loss": 1.601, "step": 30136 }, { "epoch": 0.39, "grad_norm": 3.91148042678833, "learning_rate": 1.9760040408658696e-05, "loss": 1.5868, "step": 30137 }, { "epoch": 0.39, "grad_norm": 4.4117279052734375, "learning_rate": 1.976001752681042e-05, "loss": 2.3515, "step": 30138 }, { "epoch": 0.39, "grad_norm": 3.5069804191589355, "learning_rate": 1.9759994643884473e-05, "loss": 1.6071, "step": 30139 }, { "epoch": 0.39, "grad_norm": 3.466618537902832, "learning_rate": 1.975997175988086e-05, "loss": 1.6943, "step": 30140 }, { "epoch": 0.39, "grad_norm": 4.053817272186279, "learning_rate": 1.9759948874799577e-05, "loss": 2.3253, "step": 30141 }, { "epoch": 0.39, "grad_norm": 3.1732232570648193, "learning_rate": 1.9759925988640632e-05, "loss": 1.5221, "step": 30142 }, { "epoch": 0.39, "grad_norm": 3.4591736793518066, "learning_rate": 1.9759903101404025e-05, "loss": 1.601, "step": 30143 }, { "epoch": 0.39, "grad_norm": 3.575669288635254, "learning_rate": 1.9759880213089758e-05, "loss": 1.7444, "step": 30144 }, { "epoch": 0.39, "grad_norm": 3.466399669647217, "learning_rate": 1.9759857323697838e-05, "loss": 2.0045, "step": 30145 }, { "epoch": 0.39, "grad_norm": 4.159203052520752, "learning_rate": 1.975983443322826e-05, "loss": 2.3087, "step": 30146 }, { "epoch": 0.39, "grad_norm": 4.03035831451416, "learning_rate": 1.9759811541681033e-05, "loss": 2.0677, "step": 30147 }, { "epoch": 0.39, "grad_norm": 3.8842861652374268, "learning_rate": 1.9759788649056157e-05, "loss": 1.8411, "step": 30148 }, { "epoch": 0.39, "grad_norm": 4.0878777503967285, "learning_rate": 1.9759765755353636e-05, "loss": 2.0528, "step": 30149 }, { "epoch": 0.39, "grad_norm": 4.559642314910889, "learning_rate": 1.975974286057347e-05, "loss": 2.3221, "step": 30150 }, { "epoch": 0.39, "grad_norm": 4.235744476318359, "learning_rate": 1.975971996471566e-05, "loss": 2.0744, "step": 30151 }, { "epoch": 0.39, "grad_norm": 3.85012149810791, "learning_rate": 1.975969706778022e-05, "loss": 2.2269, "step": 30152 }, { "epoch": 0.39, "grad_norm": 3.7365355491638184, "learning_rate": 1.9759674169767136e-05, "loss": 1.9447, "step": 30153 }, { "epoch": 0.39, "grad_norm": 4.151151180267334, "learning_rate": 1.9759651270676418e-05, "loss": 1.6471, "step": 30154 }, { "epoch": 0.39, "grad_norm": 4.2392401695251465, "learning_rate": 1.9759628370508074e-05, "loss": 2.5815, "step": 30155 }, { "epoch": 0.39, "grad_norm": 3.797130823135376, "learning_rate": 1.9759605469262097e-05, "loss": 1.9723, "step": 30156 }, { "epoch": 0.39, "grad_norm": 3.513211488723755, "learning_rate": 1.9759582566938498e-05, "loss": 1.6687, "step": 30157 }, { "epoch": 0.39, "grad_norm": 3.942943811416626, "learning_rate": 1.9759559663537273e-05, "loss": 1.9006, "step": 30158 }, { "epoch": 0.39, "grad_norm": 3.9853267669677734, "learning_rate": 1.9759536759058426e-05, "loss": 2.3282, "step": 30159 }, { "epoch": 0.39, "grad_norm": 3.6989665031433105, "learning_rate": 1.975951385350196e-05, "loss": 1.9063, "step": 30160 }, { "epoch": 0.39, "grad_norm": 4.402067184448242, "learning_rate": 1.9759490946867885e-05, "loss": 2.5225, "step": 30161 }, { "epoch": 0.39, "grad_norm": 4.044217586517334, "learning_rate": 1.975946803915619e-05, "loss": 2.3973, "step": 30162 }, { "epoch": 0.39, "grad_norm": 3.8908774852752686, "learning_rate": 1.9759445130366884e-05, "loss": 1.649, "step": 30163 }, { "epoch": 0.39, "grad_norm": 4.478695392608643, "learning_rate": 1.9759422220499972e-05, "loss": 2.5931, "step": 30164 }, { "epoch": 0.39, "grad_norm": 3.4162168502807617, "learning_rate": 1.9759399309555455e-05, "loss": 1.7846, "step": 30165 }, { "epoch": 0.39, "grad_norm": 3.8747518062591553, "learning_rate": 1.975937639753333e-05, "loss": 2.5023, "step": 30166 }, { "epoch": 0.39, "grad_norm": 3.8404502868652344, "learning_rate": 1.975935348443361e-05, "loss": 2.081, "step": 30167 }, { "epoch": 0.39, "grad_norm": 4.464264392852783, "learning_rate": 1.975933057025629e-05, "loss": 1.9209, "step": 30168 }, { "epoch": 0.39, "grad_norm": 3.8995676040649414, "learning_rate": 1.9759307655001374e-05, "loss": 1.9596, "step": 30169 }, { "epoch": 0.39, "grad_norm": 3.46635365486145, "learning_rate": 1.9759284738668863e-05, "loss": 1.6716, "step": 30170 }, { "epoch": 0.39, "grad_norm": 3.6297483444213867, "learning_rate": 1.9759261821258767e-05, "loss": 2.1457, "step": 30171 }, { "epoch": 0.39, "grad_norm": 3.7513649463653564, "learning_rate": 1.9759238902771076e-05, "loss": 2.3901, "step": 30172 }, { "epoch": 0.39, "grad_norm": 3.731863260269165, "learning_rate": 1.9759215983205802e-05, "loss": 2.2048, "step": 30173 }, { "epoch": 0.39, "grad_norm": 4.270209789276123, "learning_rate": 1.9759193062562948e-05, "loss": 2.2807, "step": 30174 }, { "epoch": 0.39, "grad_norm": 4.106373310089111, "learning_rate": 1.975917014084251e-05, "loss": 2.3848, "step": 30175 }, { "epoch": 0.39, "grad_norm": 3.763336181640625, "learning_rate": 1.9759147218044496e-05, "loss": 1.9856, "step": 30176 }, { "epoch": 0.39, "grad_norm": 4.090831756591797, "learning_rate": 1.9759124294168902e-05, "loss": 1.8885, "step": 30177 }, { "epoch": 0.39, "grad_norm": 4.3505754470825195, "learning_rate": 1.975910136921574e-05, "loss": 2.5274, "step": 30178 }, { "epoch": 0.39, "grad_norm": 3.561605215072632, "learning_rate": 1.975907844318501e-05, "loss": 1.8249, "step": 30179 }, { "epoch": 0.39, "grad_norm": 3.9020864963531494, "learning_rate": 1.9759055516076706e-05, "loss": 1.9773, "step": 30180 }, { "epoch": 0.39, "grad_norm": 4.0596184730529785, "learning_rate": 1.975903258789084e-05, "loss": 1.973, "step": 30181 }, { "epoch": 0.39, "grad_norm": 3.801561117172241, "learning_rate": 1.9759009658627412e-05, "loss": 2.3111, "step": 30182 }, { "epoch": 0.39, "grad_norm": 3.661423921585083, "learning_rate": 1.975898672828642e-05, "loss": 2.072, "step": 30183 }, { "epoch": 0.39, "grad_norm": 3.2327678203582764, "learning_rate": 1.9758963796867872e-05, "loss": 1.4993, "step": 30184 }, { "epoch": 0.39, "grad_norm": 3.608891725540161, "learning_rate": 1.975894086437177e-05, "loss": 1.8302, "step": 30185 }, { "epoch": 0.39, "grad_norm": 4.290521621704102, "learning_rate": 1.9758917930798117e-05, "loss": 2.1205, "step": 30186 }, { "epoch": 0.39, "grad_norm": 4.227456092834473, "learning_rate": 1.9758894996146913e-05, "loss": 1.9479, "step": 30187 }, { "epoch": 0.39, "grad_norm": 3.6883437633514404, "learning_rate": 1.9758872060418157e-05, "loss": 1.8347, "step": 30188 }, { "epoch": 0.39, "grad_norm": 3.5890135765075684, "learning_rate": 1.975884912361186e-05, "loss": 1.6408, "step": 30189 }, { "epoch": 0.39, "grad_norm": 3.897399425506592, "learning_rate": 1.975882618572802e-05, "loss": 2.2254, "step": 30190 }, { "epoch": 0.39, "grad_norm": 4.160727500915527, "learning_rate": 1.975880324676664e-05, "loss": 1.9438, "step": 30191 }, { "epoch": 0.39, "grad_norm": 4.3125834465026855, "learning_rate": 1.975878030672772e-05, "loss": 2.5022, "step": 30192 }, { "epoch": 0.39, "grad_norm": 4.029142379760742, "learning_rate": 1.975875736561127e-05, "loss": 1.8665, "step": 30193 }, { "epoch": 0.39, "grad_norm": 4.146238327026367, "learning_rate": 1.9758734423417283e-05, "loss": 2.3099, "step": 30194 }, { "epoch": 0.39, "grad_norm": 3.846365451812744, "learning_rate": 1.975871148014577e-05, "loss": 2.1045, "step": 30195 }, { "epoch": 0.39, "grad_norm": 3.4796371459960938, "learning_rate": 1.9758688535796727e-05, "loss": 2.0189, "step": 30196 }, { "epoch": 0.39, "grad_norm": 3.726560354232788, "learning_rate": 1.975866559037016e-05, "loss": 1.9508, "step": 30197 }, { "epoch": 0.39, "grad_norm": 3.845447063446045, "learning_rate": 1.975864264386607e-05, "loss": 1.8203, "step": 30198 }, { "epoch": 0.39, "grad_norm": 3.8991096019744873, "learning_rate": 1.975861969628446e-05, "loss": 2.2097, "step": 30199 }, { "epoch": 0.39, "grad_norm": 4.024349689483643, "learning_rate": 1.9758596747625333e-05, "loss": 2.4854, "step": 30200 }, { "epoch": 0.39, "grad_norm": 4.7577056884765625, "learning_rate": 1.9758573797888692e-05, "loss": 2.287, "step": 30201 }, { "epoch": 0.39, "grad_norm": 4.203752517700195, "learning_rate": 1.975855084707454e-05, "loss": 2.464, "step": 30202 }, { "epoch": 0.39, "grad_norm": 3.4725964069366455, "learning_rate": 1.975852789518288e-05, "loss": 1.5151, "step": 30203 }, { "epoch": 0.39, "grad_norm": 4.114612102508545, "learning_rate": 1.975850494221371e-05, "loss": 2.2518, "step": 30204 }, { "epoch": 0.39, "grad_norm": 3.8683295249938965, "learning_rate": 1.9758481988167036e-05, "loss": 2.0415, "step": 30205 }, { "epoch": 0.39, "grad_norm": 4.228093147277832, "learning_rate": 1.975845903304286e-05, "loss": 2.3575, "step": 30206 }, { "epoch": 0.39, "grad_norm": 4.072062015533447, "learning_rate": 1.9758436076841185e-05, "loss": 1.8912, "step": 30207 }, { "epoch": 0.39, "grad_norm": 4.3006672859191895, "learning_rate": 1.975841311956201e-05, "loss": 2.1725, "step": 30208 }, { "epoch": 0.39, "grad_norm": 4.226583957672119, "learning_rate": 1.9758390161205345e-05, "loss": 1.9326, "step": 30209 }, { "epoch": 0.39, "grad_norm": 4.131949424743652, "learning_rate": 1.9758367201771187e-05, "loss": 2.219, "step": 30210 }, { "epoch": 0.39, "grad_norm": 3.3190090656280518, "learning_rate": 1.9758344241259538e-05, "loss": 1.6328, "step": 30211 }, { "epoch": 0.39, "grad_norm": 4.018998622894287, "learning_rate": 1.9758321279670402e-05, "loss": 1.9674, "step": 30212 }, { "epoch": 0.39, "grad_norm": 4.307605743408203, "learning_rate": 1.9758298317003786e-05, "loss": 2.1639, "step": 30213 }, { "epoch": 0.39, "grad_norm": 3.8903002738952637, "learning_rate": 1.9758275353259683e-05, "loss": 1.8299, "step": 30214 }, { "epoch": 0.39, "grad_norm": 3.8796355724334717, "learning_rate": 1.9758252388438107e-05, "loss": 1.9786, "step": 30215 }, { "epoch": 0.39, "grad_norm": 3.933389902114868, "learning_rate": 1.9758229422539048e-05, "loss": 2.1822, "step": 30216 }, { "epoch": 0.39, "grad_norm": 3.6621789932250977, "learning_rate": 1.9758206455562518e-05, "loss": 1.9981, "step": 30217 }, { "epoch": 0.39, "grad_norm": 3.4435184001922607, "learning_rate": 1.975818348750852e-05, "loss": 1.5847, "step": 30218 }, { "epoch": 0.39, "grad_norm": 3.6990671157836914, "learning_rate": 1.9758160518377046e-05, "loss": 1.9549, "step": 30219 }, { "epoch": 0.39, "grad_norm": 4.013568878173828, "learning_rate": 1.975813754816811e-05, "loss": 1.8303, "step": 30220 }, { "epoch": 0.39, "grad_norm": 3.4432802200317383, "learning_rate": 1.9758114576881707e-05, "loss": 1.7925, "step": 30221 }, { "epoch": 0.39, "grad_norm": 4.945284366607666, "learning_rate": 1.9758091604517845e-05, "loss": 2.6642, "step": 30222 }, { "epoch": 0.39, "grad_norm": 4.185417175292969, "learning_rate": 1.9758068631076523e-05, "loss": 1.9701, "step": 30223 }, { "epoch": 0.39, "grad_norm": 3.888444423675537, "learning_rate": 1.9758045656557744e-05, "loss": 1.7442, "step": 30224 }, { "epoch": 0.39, "grad_norm": 4.357283115386963, "learning_rate": 1.9758022680961516e-05, "loss": 2.392, "step": 30225 }, { "epoch": 0.39, "grad_norm": 3.9976439476013184, "learning_rate": 1.975799970428783e-05, "loss": 1.9238, "step": 30226 }, { "epoch": 0.39, "grad_norm": 3.2296199798583984, "learning_rate": 1.9757976726536697e-05, "loss": 1.6851, "step": 30227 }, { "epoch": 0.39, "grad_norm": 3.6753830909729004, "learning_rate": 1.975795374770812e-05, "loss": 1.9177, "step": 30228 }, { "epoch": 0.39, "grad_norm": 3.458076238632202, "learning_rate": 1.97579307678021e-05, "loss": 1.9, "step": 30229 }, { "epoch": 0.39, "grad_norm": 3.5317158699035645, "learning_rate": 1.9757907786818637e-05, "loss": 1.856, "step": 30230 }, { "epoch": 0.39, "grad_norm": 3.8763556480407715, "learning_rate": 1.9757884804757735e-05, "loss": 2.1733, "step": 30231 }, { "epoch": 0.39, "grad_norm": 3.4921319484710693, "learning_rate": 1.97578618216194e-05, "loss": 2.0041, "step": 30232 }, { "epoch": 0.39, "grad_norm": 4.037911415100098, "learning_rate": 1.975783883740363e-05, "loss": 2.4697, "step": 30233 }, { "epoch": 0.39, "grad_norm": 4.187621593475342, "learning_rate": 1.975781585211043e-05, "loss": 1.9145, "step": 30234 }, { "epoch": 0.39, "grad_norm": 3.894846200942993, "learning_rate": 1.97577928657398e-05, "loss": 2.3749, "step": 30235 }, { "epoch": 0.39, "grad_norm": 3.8840291500091553, "learning_rate": 1.975776987829174e-05, "loss": 2.4516, "step": 30236 }, { "epoch": 0.39, "grad_norm": 3.9031012058258057, "learning_rate": 1.9757746889766262e-05, "loss": 2.1294, "step": 30237 }, { "epoch": 0.39, "grad_norm": 4.349918842315674, "learning_rate": 1.9757723900163365e-05, "loss": 2.2587, "step": 30238 }, { "epoch": 0.39, "grad_norm": 3.182126760482788, "learning_rate": 1.9757700909483045e-05, "loss": 1.3011, "step": 30239 }, { "epoch": 0.39, "grad_norm": 4.3407883644104, "learning_rate": 1.975767791772531e-05, "loss": 2.2948, "step": 30240 }, { "epoch": 0.39, "grad_norm": 4.092840194702148, "learning_rate": 1.9757654924890162e-05, "loss": 1.9732, "step": 30241 }, { "epoch": 0.39, "grad_norm": 3.412165880203247, "learning_rate": 1.9757631930977606e-05, "loss": 1.8506, "step": 30242 }, { "epoch": 0.39, "grad_norm": 4.240063190460205, "learning_rate": 1.975760893598764e-05, "loss": 1.8904, "step": 30243 }, { "epoch": 0.39, "grad_norm": 4.276608467102051, "learning_rate": 1.975758593992027e-05, "loss": 2.3955, "step": 30244 }, { "epoch": 0.39, "grad_norm": 3.7266013622283936, "learning_rate": 1.975756294277549e-05, "loss": 1.9855, "step": 30245 }, { "epoch": 0.39, "grad_norm": 3.992218017578125, "learning_rate": 1.975753994455332e-05, "loss": 2.2797, "step": 30246 }, { "epoch": 0.39, "grad_norm": 4.109868049621582, "learning_rate": 1.9757516945253745e-05, "loss": 2.1065, "step": 30247 }, { "epoch": 0.39, "grad_norm": 4.247063159942627, "learning_rate": 1.975749394487678e-05, "loss": 2.4237, "step": 30248 }, { "epoch": 0.39, "grad_norm": 4.201418876647949, "learning_rate": 1.9757470943422416e-05, "loss": 2.3491, "step": 30249 }, { "epoch": 0.39, "grad_norm": 3.826841354370117, "learning_rate": 1.9757447940890663e-05, "loss": 2.121, "step": 30250 }, { "epoch": 0.39, "grad_norm": 4.2056074142456055, "learning_rate": 1.9757424937281527e-05, "loss": 2.2809, "step": 30251 }, { "epoch": 0.39, "grad_norm": 3.6702842712402344, "learning_rate": 1.9757401932595002e-05, "loss": 2.0876, "step": 30252 }, { "epoch": 0.39, "grad_norm": 3.5916390419006348, "learning_rate": 1.9757378926831096e-05, "loss": 1.6857, "step": 30253 }, { "epoch": 0.39, "grad_norm": 3.0928261280059814, "learning_rate": 1.9757355919989808e-05, "loss": 1.4604, "step": 30254 }, { "epoch": 0.39, "grad_norm": 4.763901233673096, "learning_rate": 1.975733291207114e-05, "loss": 2.2062, "step": 30255 }, { "epoch": 0.39, "grad_norm": 3.9941916465759277, "learning_rate": 1.9757309903075103e-05, "loss": 1.9992, "step": 30256 }, { "epoch": 0.39, "grad_norm": 4.146546363830566, "learning_rate": 1.975728689300169e-05, "loss": 2.0112, "step": 30257 }, { "epoch": 0.39, "grad_norm": 3.6165401935577393, "learning_rate": 1.9757263881850904e-05, "loss": 1.8953, "step": 30258 }, { "epoch": 0.39, "grad_norm": 4.012388229370117, "learning_rate": 1.9757240869622754e-05, "loss": 1.9929, "step": 30259 }, { "epoch": 0.39, "grad_norm": 3.831116199493408, "learning_rate": 1.975721785631724e-05, "loss": 1.9513, "step": 30260 }, { "epoch": 0.39, "grad_norm": 3.330430746078491, "learning_rate": 1.9757194841934363e-05, "loss": 1.7224, "step": 30261 }, { "epoch": 0.39, "grad_norm": 4.434621334075928, "learning_rate": 1.9757171826474125e-05, "loss": 2.3902, "step": 30262 }, { "epoch": 0.39, "grad_norm": 4.277578353881836, "learning_rate": 1.9757148809936532e-05, "loss": 2.311, "step": 30263 }, { "epoch": 0.39, "grad_norm": 3.5300233364105225, "learning_rate": 1.975712579232158e-05, "loss": 1.88, "step": 30264 }, { "epoch": 0.39, "grad_norm": 4.257258415222168, "learning_rate": 1.975710277362928e-05, "loss": 2.5094, "step": 30265 }, { "epoch": 0.39, "grad_norm": 3.6952314376831055, "learning_rate": 1.9757079753859628e-05, "loss": 1.8182, "step": 30266 }, { "epoch": 0.39, "grad_norm": 3.948233127593994, "learning_rate": 1.975705673301263e-05, "loss": 2.1664, "step": 30267 }, { "epoch": 0.39, "grad_norm": 4.225000858306885, "learning_rate": 1.9757033711088287e-05, "loss": 2.3685, "step": 30268 }, { "epoch": 0.39, "grad_norm": 3.444899797439575, "learning_rate": 1.9757010688086603e-05, "loss": 1.805, "step": 30269 }, { "epoch": 0.39, "grad_norm": 4.112889766693115, "learning_rate": 1.9756987664007578e-05, "loss": 2.3254, "step": 30270 }, { "epoch": 0.39, "grad_norm": 3.57855224609375, "learning_rate": 1.9756964638851218e-05, "loss": 1.3679, "step": 30271 }, { "epoch": 0.39, "grad_norm": 3.712616205215454, "learning_rate": 1.975694161261752e-05, "loss": 2.1242, "step": 30272 }, { "epoch": 0.39, "grad_norm": 3.335405111312866, "learning_rate": 1.9756918585306492e-05, "loss": 1.5369, "step": 30273 }, { "epoch": 0.39, "grad_norm": 3.3877217769622803, "learning_rate": 1.9756895556918136e-05, "loss": 1.5991, "step": 30274 }, { "epoch": 0.39, "grad_norm": 3.933838129043579, "learning_rate": 1.975687252745245e-05, "loss": 2.1152, "step": 30275 }, { "epoch": 0.39, "grad_norm": 3.8654282093048096, "learning_rate": 1.9756849496909443e-05, "loss": 2.2243, "step": 30276 }, { "epoch": 0.39, "grad_norm": 4.019940376281738, "learning_rate": 1.9756826465289114e-05, "loss": 2.1451, "step": 30277 }, { "epoch": 0.39, "grad_norm": 3.2872941493988037, "learning_rate": 1.975680343259146e-05, "loss": 1.8244, "step": 30278 }, { "epoch": 0.39, "grad_norm": 4.045689582824707, "learning_rate": 1.9756780398816497e-05, "loss": 2.3536, "step": 30279 }, { "epoch": 0.39, "grad_norm": 3.95888352394104, "learning_rate": 1.975675736396422e-05, "loss": 1.623, "step": 30280 }, { "epoch": 0.39, "grad_norm": 4.082498073577881, "learning_rate": 1.9756734328034627e-05, "loss": 2.2528, "step": 30281 }, { "epoch": 0.39, "grad_norm": 3.7845137119293213, "learning_rate": 1.9756711291027727e-05, "loss": 1.996, "step": 30282 }, { "epoch": 0.39, "grad_norm": 3.9238152503967285, "learning_rate": 1.975668825294352e-05, "loss": 1.9858, "step": 30283 }, { "epoch": 0.39, "grad_norm": 3.883516788482666, "learning_rate": 1.9756665213782006e-05, "loss": 1.6072, "step": 30284 }, { "epoch": 0.39, "grad_norm": 3.6800308227539062, "learning_rate": 1.9756642173543198e-05, "loss": 1.7105, "step": 30285 }, { "epoch": 0.39, "grad_norm": 3.518221378326416, "learning_rate": 1.9756619132227083e-05, "loss": 1.4158, "step": 30286 }, { "epoch": 0.39, "grad_norm": 3.3616855144500732, "learning_rate": 1.9756596089833677e-05, "loss": 2.0505, "step": 30287 }, { "epoch": 0.39, "grad_norm": 3.624873399734497, "learning_rate": 1.9756573046362978e-05, "loss": 2.1953, "step": 30288 }, { "epoch": 0.39, "grad_norm": 3.306520462036133, "learning_rate": 1.9756550001814988e-05, "loss": 1.6771, "step": 30289 }, { "epoch": 0.39, "grad_norm": 4.163524627685547, "learning_rate": 1.9756526956189704e-05, "loss": 1.9929, "step": 30290 }, { "epoch": 0.39, "grad_norm": 3.5890393257141113, "learning_rate": 1.975650390948714e-05, "loss": 1.951, "step": 30291 }, { "epoch": 0.39, "grad_norm": 3.908095359802246, "learning_rate": 1.9756480861707286e-05, "loss": 1.9028, "step": 30292 }, { "epoch": 0.39, "grad_norm": 4.082591533660889, "learning_rate": 1.9756457812850155e-05, "loss": 2.115, "step": 30293 }, { "epoch": 0.39, "grad_norm": 3.5256521701812744, "learning_rate": 1.9756434762915748e-05, "loss": 1.9423, "step": 30294 }, { "epoch": 0.39, "grad_norm": 3.4712533950805664, "learning_rate": 1.975641171190406e-05, "loss": 1.88, "step": 30295 }, { "epoch": 0.39, "grad_norm": 3.8513643741607666, "learning_rate": 1.97563886598151e-05, "loss": 1.9257, "step": 30296 }, { "epoch": 0.39, "grad_norm": 4.1654887199401855, "learning_rate": 1.975636560664887e-05, "loss": 2.2439, "step": 30297 }, { "epoch": 0.39, "grad_norm": 3.48451566696167, "learning_rate": 1.975634255240537e-05, "loss": 1.9447, "step": 30298 }, { "epoch": 0.39, "grad_norm": 3.739936351776123, "learning_rate": 1.975631949708461e-05, "loss": 1.7338, "step": 30299 }, { "epoch": 0.39, "grad_norm": 3.2019100189208984, "learning_rate": 1.9756296440686582e-05, "loss": 1.6758, "step": 30300 }, { "epoch": 0.39, "grad_norm": 3.8445775508880615, "learning_rate": 1.9756273383211294e-05, "loss": 1.8105, "step": 30301 }, { "epoch": 0.39, "grad_norm": 3.6804184913635254, "learning_rate": 1.975625032465875e-05, "loss": 2.1191, "step": 30302 }, { "epoch": 0.39, "grad_norm": 4.889939785003662, "learning_rate": 1.9756227265028945e-05, "loss": 2.4745, "step": 30303 }, { "epoch": 0.39, "grad_norm": 3.7599518299102783, "learning_rate": 1.975620420432189e-05, "loss": 1.8434, "step": 30304 }, { "epoch": 0.39, "grad_norm": 3.527608871459961, "learning_rate": 1.9756181142537583e-05, "loss": 1.7942, "step": 30305 }, { "epoch": 0.39, "grad_norm": 3.642474889755249, "learning_rate": 1.9756158079676033e-05, "loss": 2.2659, "step": 30306 }, { "epoch": 0.39, "grad_norm": 4.273538112640381, "learning_rate": 1.9756135015737236e-05, "loss": 2.2988, "step": 30307 }, { "epoch": 0.39, "grad_norm": 4.2242631912231445, "learning_rate": 1.9756111950721193e-05, "loss": 1.9726, "step": 30308 }, { "epoch": 0.39, "grad_norm": 3.598834753036499, "learning_rate": 1.9756088884627914e-05, "loss": 2.1847, "step": 30309 }, { "epoch": 0.39, "grad_norm": 4.4399237632751465, "learning_rate": 1.9756065817457392e-05, "loss": 2.5647, "step": 30310 }, { "epoch": 0.39, "grad_norm": 3.896024465560913, "learning_rate": 1.975604274920964e-05, "loss": 2.028, "step": 30311 }, { "epoch": 0.39, "grad_norm": 3.6925861835479736, "learning_rate": 1.9756019679884653e-05, "loss": 1.7755, "step": 30312 }, { "epoch": 0.39, "grad_norm": 3.3910913467407227, "learning_rate": 1.9755996609482436e-05, "loss": 1.8129, "step": 30313 }, { "epoch": 0.39, "grad_norm": 4.084610939025879, "learning_rate": 1.975597353800299e-05, "loss": 2.0315, "step": 30314 }, { "epoch": 0.39, "grad_norm": 4.287798881530762, "learning_rate": 1.975595046544632e-05, "loss": 2.289, "step": 30315 }, { "epoch": 0.39, "grad_norm": 3.7259175777435303, "learning_rate": 1.975592739181243e-05, "loss": 1.7694, "step": 30316 }, { "epoch": 0.39, "grad_norm": 3.6347310543060303, "learning_rate": 1.9755904317101318e-05, "loss": 1.7801, "step": 30317 }, { "epoch": 0.39, "grad_norm": 3.8286495208740234, "learning_rate": 1.9755881241312988e-05, "loss": 2.4056, "step": 30318 }, { "epoch": 0.39, "grad_norm": 3.4300875663757324, "learning_rate": 1.9755858164447445e-05, "loss": 1.5664, "step": 30319 }, { "epoch": 0.39, "grad_norm": 3.659912586212158, "learning_rate": 1.975583508650469e-05, "loss": 1.8088, "step": 30320 }, { "epoch": 0.39, "grad_norm": 3.7563397884368896, "learning_rate": 1.9755812007484723e-05, "loss": 1.7875, "step": 30321 }, { "epoch": 0.39, "grad_norm": 4.288424491882324, "learning_rate": 1.975578892738755e-05, "loss": 2.3421, "step": 30322 }, { "epoch": 0.39, "grad_norm": 4.146366596221924, "learning_rate": 1.975576584621317e-05, "loss": 2.0157, "step": 30323 }, { "epoch": 0.39, "grad_norm": 3.5577070713043213, "learning_rate": 1.9755742763961593e-05, "loss": 1.7974, "step": 30324 }, { "epoch": 0.39, "grad_norm": 4.042534351348877, "learning_rate": 1.9755719680632813e-05, "loss": 2.2703, "step": 30325 }, { "epoch": 0.39, "grad_norm": 3.8718323707580566, "learning_rate": 1.9755696596226834e-05, "loss": 1.9636, "step": 30326 }, { "epoch": 0.39, "grad_norm": 3.627580165863037, "learning_rate": 1.9755673510743666e-05, "loss": 1.9162, "step": 30327 }, { "epoch": 0.39, "grad_norm": 4.15688419342041, "learning_rate": 1.9755650424183303e-05, "loss": 2.0319, "step": 30328 }, { "epoch": 0.39, "grad_norm": 3.6410748958587646, "learning_rate": 1.975562733654575e-05, "loss": 2.0287, "step": 30329 }, { "epoch": 0.39, "grad_norm": 3.814453601837158, "learning_rate": 1.9755604247831014e-05, "loss": 2.3217, "step": 30330 }, { "epoch": 0.39, "grad_norm": 3.5440866947174072, "learning_rate": 1.975558115803909e-05, "loss": 1.712, "step": 30331 }, { "epoch": 0.39, "grad_norm": 4.232326984405518, "learning_rate": 1.9755558067169987e-05, "loss": 2.3472, "step": 30332 }, { "epoch": 0.39, "grad_norm": 3.8239221572875977, "learning_rate": 1.9755534975223703e-05, "loss": 1.9847, "step": 30333 }, { "epoch": 0.39, "grad_norm": 3.7637736797332764, "learning_rate": 1.975551188220024e-05, "loss": 2.3768, "step": 30334 }, { "epoch": 0.39, "grad_norm": 3.9672746658325195, "learning_rate": 1.9755488788099608e-05, "loss": 1.8955, "step": 30335 }, { "epoch": 0.39, "grad_norm": 3.7982161045074463, "learning_rate": 1.9755465692921803e-05, "loss": 1.8607, "step": 30336 }, { "epoch": 0.39, "grad_norm": 3.7535572052001953, "learning_rate": 1.975544259666683e-05, "loss": 1.8223, "step": 30337 }, { "epoch": 0.39, "grad_norm": 3.650501251220703, "learning_rate": 1.9755419499334687e-05, "loss": 1.8679, "step": 30338 }, { "epoch": 0.39, "grad_norm": 4.1391377449035645, "learning_rate": 1.9755396400925383e-05, "loss": 2.0664, "step": 30339 }, { "epoch": 0.39, "grad_norm": 3.4960732460021973, "learning_rate": 1.9755373301438917e-05, "loss": 1.8906, "step": 30340 }, { "epoch": 0.39, "grad_norm": 3.8012235164642334, "learning_rate": 1.975535020087529e-05, "loss": 2.2217, "step": 30341 }, { "epoch": 0.39, "grad_norm": 4.818115711212158, "learning_rate": 1.975532709923451e-05, "loss": 2.4166, "step": 30342 }, { "epoch": 0.39, "grad_norm": 5.172920227050781, "learning_rate": 1.9755303996516575e-05, "loss": 2.7621, "step": 30343 }, { "epoch": 0.39, "grad_norm": 3.662851095199585, "learning_rate": 1.975528089272149e-05, "loss": 1.9391, "step": 30344 }, { "epoch": 0.39, "grad_norm": 3.8569326400756836, "learning_rate": 1.9755257787849254e-05, "loss": 2.0837, "step": 30345 }, { "epoch": 0.39, "grad_norm": 3.7079999446868896, "learning_rate": 1.9755234681899876e-05, "loss": 1.8773, "step": 30346 }, { "epoch": 0.39, "grad_norm": 4.281673431396484, "learning_rate": 1.975521157487335e-05, "loss": 2.0371, "step": 30347 }, { "epoch": 0.39, "grad_norm": 4.070313453674316, "learning_rate": 1.9755188466769687e-05, "loss": 2.201, "step": 30348 }, { "epoch": 0.39, "grad_norm": 4.022700786590576, "learning_rate": 1.9755165357588885e-05, "loss": 1.9974, "step": 30349 }, { "epoch": 0.39, "grad_norm": 3.7813196182250977, "learning_rate": 1.9755142247330946e-05, "loss": 1.6204, "step": 30350 }, { "epoch": 0.39, "grad_norm": 4.007569313049316, "learning_rate": 1.9755119135995875e-05, "loss": 2.1803, "step": 30351 }, { "epoch": 0.39, "grad_norm": 3.7057926654815674, "learning_rate": 1.975509602358367e-05, "loss": 1.5802, "step": 30352 }, { "epoch": 0.39, "grad_norm": 4.188208103179932, "learning_rate": 1.975507291009434e-05, "loss": 1.8481, "step": 30353 }, { "epoch": 0.39, "grad_norm": 4.105578899383545, "learning_rate": 1.9755049795527886e-05, "loss": 2.0961, "step": 30354 }, { "epoch": 0.39, "grad_norm": 3.6097896099090576, "learning_rate": 1.9755026679884305e-05, "loss": 2.0435, "step": 30355 }, { "epoch": 0.39, "grad_norm": 4.033995628356934, "learning_rate": 1.975500356316361e-05, "loss": 2.5772, "step": 30356 }, { "epoch": 0.39, "grad_norm": 3.762639284133911, "learning_rate": 1.975498044536579e-05, "loss": 1.9939, "step": 30357 }, { "epoch": 0.39, "grad_norm": 4.144205570220947, "learning_rate": 1.9754957326490857e-05, "loss": 1.7141, "step": 30358 }, { "epoch": 0.39, "grad_norm": 3.8014814853668213, "learning_rate": 1.9754934206538812e-05, "loss": 1.9405, "step": 30359 }, { "epoch": 0.39, "grad_norm": 3.8422274589538574, "learning_rate": 1.9754911085509656e-05, "loss": 2.0272, "step": 30360 }, { "epoch": 0.39, "grad_norm": 3.42164945602417, "learning_rate": 1.9754887963403396e-05, "loss": 1.8162, "step": 30361 }, { "epoch": 0.39, "grad_norm": 3.8115580081939697, "learning_rate": 1.975486484022003e-05, "loss": 2.0984, "step": 30362 }, { "epoch": 0.39, "grad_norm": 3.3594672679901123, "learning_rate": 1.9754841715959557e-05, "loss": 1.7896, "step": 30363 }, { "epoch": 0.39, "grad_norm": 3.4951813220977783, "learning_rate": 1.975481859062198e-05, "loss": 2.3033, "step": 30364 }, { "epoch": 0.39, "grad_norm": 3.688767910003662, "learning_rate": 1.9754795464207315e-05, "loss": 2.1503, "step": 30365 }, { "epoch": 0.39, "grad_norm": 4.093333721160889, "learning_rate": 1.9754772336715552e-05, "loss": 2.1697, "step": 30366 }, { "epoch": 0.39, "grad_norm": 3.7931482791900635, "learning_rate": 1.9754749208146698e-05, "loss": 1.9759, "step": 30367 }, { "epoch": 0.39, "grad_norm": 3.6882681846618652, "learning_rate": 1.975472607850075e-05, "loss": 2.0979, "step": 30368 }, { "epoch": 0.39, "grad_norm": 4.296243667602539, "learning_rate": 1.975470294777772e-05, "loss": 2.488, "step": 30369 }, { "epoch": 0.39, "grad_norm": 4.119194030761719, "learning_rate": 1.97546798159776e-05, "loss": 2.5346, "step": 30370 }, { "epoch": 0.39, "grad_norm": 3.9254403114318848, "learning_rate": 1.9754656683100402e-05, "loss": 2.0873, "step": 30371 }, { "epoch": 0.39, "grad_norm": 3.2093324661254883, "learning_rate": 1.975463354914612e-05, "loss": 1.5924, "step": 30372 }, { "epoch": 0.39, "grad_norm": 3.578118085861206, "learning_rate": 1.9754610414114767e-05, "loss": 2.3648, "step": 30373 }, { "epoch": 0.39, "grad_norm": 3.790428876876831, "learning_rate": 1.9754587278006334e-05, "loss": 1.9494, "step": 30374 }, { "epoch": 0.39, "grad_norm": 3.8790531158447266, "learning_rate": 1.9754564140820834e-05, "loss": 1.9879, "step": 30375 }, { "epoch": 0.39, "grad_norm": 3.761064052581787, "learning_rate": 1.975454100255826e-05, "loss": 1.8871, "step": 30376 }, { "epoch": 0.39, "grad_norm": 3.9141907691955566, "learning_rate": 1.975451786321862e-05, "loss": 1.9974, "step": 30377 }, { "epoch": 0.39, "grad_norm": 4.258552074432373, "learning_rate": 1.9754494722801917e-05, "loss": 2.6025, "step": 30378 }, { "epoch": 0.39, "grad_norm": 4.055426120758057, "learning_rate": 1.9754471581308154e-05, "loss": 1.8337, "step": 30379 }, { "epoch": 0.39, "grad_norm": 3.5356428623199463, "learning_rate": 1.975444843873733e-05, "loss": 1.7688, "step": 30380 }, { "epoch": 0.39, "grad_norm": 4.427013397216797, "learning_rate": 1.975442529508945e-05, "loss": 2.5828, "step": 30381 }, { "epoch": 0.39, "grad_norm": 3.5347585678100586, "learning_rate": 1.9754402150364514e-05, "loss": 1.7337, "step": 30382 }, { "epoch": 0.39, "grad_norm": 3.583512306213379, "learning_rate": 1.9754379004562527e-05, "loss": 1.6745, "step": 30383 }, { "epoch": 0.39, "grad_norm": 4.415435314178467, "learning_rate": 1.9754355857683494e-05, "loss": 2.6326, "step": 30384 }, { "epoch": 0.39, "grad_norm": 3.7078871726989746, "learning_rate": 1.975433270972741e-05, "loss": 1.9819, "step": 30385 }, { "epoch": 0.39, "grad_norm": 3.5218234062194824, "learning_rate": 1.9754309560694286e-05, "loss": 2.0936, "step": 30386 }, { "epoch": 0.39, "grad_norm": 3.5176053047180176, "learning_rate": 1.9754286410584124e-05, "loss": 1.9221, "step": 30387 }, { "epoch": 0.39, "grad_norm": 3.5860655307769775, "learning_rate": 1.9754263259396915e-05, "loss": 1.6578, "step": 30388 }, { "epoch": 0.39, "grad_norm": 4.340404510498047, "learning_rate": 1.9754240107132677e-05, "loss": 2.2837, "step": 30389 }, { "epoch": 0.39, "grad_norm": 3.4494667053222656, "learning_rate": 1.97542169537914e-05, "loss": 1.8882, "step": 30390 }, { "epoch": 0.39, "grad_norm": 4.040198802947998, "learning_rate": 1.9754193799373092e-05, "loss": 2.0845, "step": 30391 }, { "epoch": 0.39, "grad_norm": 3.7093093395233154, "learning_rate": 1.975417064387776e-05, "loss": 1.9401, "step": 30392 }, { "epoch": 0.39, "grad_norm": 4.041059970855713, "learning_rate": 1.97541474873054e-05, "loss": 2.5986, "step": 30393 }, { "epoch": 0.39, "grad_norm": 3.6952097415924072, "learning_rate": 1.9754124329656018e-05, "loss": 1.7597, "step": 30394 }, { "epoch": 0.39, "grad_norm": 3.321610450744629, "learning_rate": 1.9754101170929613e-05, "loss": 1.8277, "step": 30395 }, { "epoch": 0.39, "grad_norm": 4.603448867797852, "learning_rate": 1.975407801112619e-05, "loss": 2.8676, "step": 30396 }, { "epoch": 0.39, "grad_norm": 3.592921257019043, "learning_rate": 1.975405485024575e-05, "loss": 2.0677, "step": 30397 }, { "epoch": 0.39, "grad_norm": 4.126304626464844, "learning_rate": 1.9754031688288296e-05, "loss": 2.2815, "step": 30398 }, { "epoch": 0.39, "grad_norm": 3.7560956478118896, "learning_rate": 1.9754008525253838e-05, "loss": 2.0887, "step": 30399 }, { "epoch": 0.39, "grad_norm": 3.678659439086914, "learning_rate": 1.9753985361142363e-05, "loss": 1.9547, "step": 30400 }, { "epoch": 0.39, "grad_norm": 4.3654632568359375, "learning_rate": 1.975396219595389e-05, "loss": 2.3001, "step": 30401 }, { "epoch": 0.39, "grad_norm": 3.667886257171631, "learning_rate": 1.975393902968841e-05, "loss": 1.6324, "step": 30402 }, { "epoch": 0.39, "grad_norm": 3.517260789871216, "learning_rate": 1.9753915862345933e-05, "loss": 1.8207, "step": 30403 }, { "epoch": 0.39, "grad_norm": 3.656691789627075, "learning_rate": 1.9753892693926455e-05, "loss": 1.8521, "step": 30404 }, { "epoch": 0.39, "grad_norm": 4.105992794036865, "learning_rate": 1.9753869524429986e-05, "loss": 2.4142, "step": 30405 }, { "epoch": 0.39, "grad_norm": 3.453401565551758, "learning_rate": 1.975384635385652e-05, "loss": 1.8934, "step": 30406 }, { "epoch": 0.39, "grad_norm": 3.29719614982605, "learning_rate": 1.9753823182206067e-05, "loss": 1.6233, "step": 30407 }, { "epoch": 0.39, "grad_norm": 3.468984365463257, "learning_rate": 1.9753800009478624e-05, "loss": 1.7147, "step": 30408 }, { "epoch": 0.39, "grad_norm": 4.280978202819824, "learning_rate": 1.9753776835674196e-05, "loss": 1.85, "step": 30409 }, { "epoch": 0.39, "grad_norm": 3.2884886264801025, "learning_rate": 1.9753753660792786e-05, "loss": 1.3568, "step": 30410 }, { "epoch": 0.39, "grad_norm": 4.00651741027832, "learning_rate": 1.9753730484834397e-05, "loss": 2.0342, "step": 30411 }, { "epoch": 0.39, "grad_norm": 3.641730308532715, "learning_rate": 1.975370730779903e-05, "loss": 1.7957, "step": 30412 }, { "epoch": 0.39, "grad_norm": 3.36149263381958, "learning_rate": 1.9753684129686687e-05, "loss": 1.5488, "step": 30413 }, { "epoch": 0.39, "grad_norm": 3.629755735397339, "learning_rate": 1.9753660950497372e-05, "loss": 2.1529, "step": 30414 }, { "epoch": 0.39, "grad_norm": 4.02178955078125, "learning_rate": 1.9753637770231092e-05, "loss": 2.2509, "step": 30415 }, { "epoch": 0.39, "grad_norm": 3.791287899017334, "learning_rate": 1.975361458888784e-05, "loss": 1.9294, "step": 30416 }, { "epoch": 0.39, "grad_norm": 3.690528154373169, "learning_rate": 1.9753591406467624e-05, "loss": 1.6203, "step": 30417 }, { "epoch": 0.39, "grad_norm": 4.127653121948242, "learning_rate": 1.975356822297045e-05, "loss": 2.0396, "step": 30418 }, { "epoch": 0.39, "grad_norm": 4.207347869873047, "learning_rate": 1.9753545038396315e-05, "loss": 2.2724, "step": 30419 }, { "epoch": 0.39, "grad_norm": 3.822291612625122, "learning_rate": 1.975352185274522e-05, "loss": 1.8963, "step": 30420 }, { "epoch": 0.39, "grad_norm": 4.020008563995361, "learning_rate": 1.9753498666017173e-05, "loss": 1.916, "step": 30421 }, { "epoch": 0.39, "grad_norm": 3.8174428939819336, "learning_rate": 1.975347547821217e-05, "loss": 2.2128, "step": 30422 }, { "epoch": 0.39, "grad_norm": 3.6128792762756348, "learning_rate": 1.9753452289330224e-05, "loss": 1.6803, "step": 30423 }, { "epoch": 0.39, "grad_norm": 3.565516233444214, "learning_rate": 1.975342909937133e-05, "loss": 1.7636, "step": 30424 }, { "epoch": 0.39, "grad_norm": 3.7503771781921387, "learning_rate": 1.975340590833549e-05, "loss": 1.9388, "step": 30425 }, { "epoch": 0.39, "grad_norm": 4.037392616271973, "learning_rate": 1.975338271622271e-05, "loss": 1.8706, "step": 30426 }, { "epoch": 0.39, "grad_norm": 3.658320903778076, "learning_rate": 1.975335952303299e-05, "loss": 2.2296, "step": 30427 }, { "epoch": 0.39, "grad_norm": 3.917137622833252, "learning_rate": 1.9753336328766336e-05, "loss": 1.7069, "step": 30428 }, { "epoch": 0.39, "grad_norm": 3.9152119159698486, "learning_rate": 1.975331313342274e-05, "loss": 1.8226, "step": 30429 }, { "epoch": 0.39, "grad_norm": 3.8076891899108887, "learning_rate": 1.975328993700222e-05, "loss": 1.6758, "step": 30430 }, { "epoch": 0.39, "grad_norm": 4.214661598205566, "learning_rate": 1.9753266739504774e-05, "loss": 2.4549, "step": 30431 }, { "epoch": 0.39, "grad_norm": 3.0137031078338623, "learning_rate": 1.97532435409304e-05, "loss": 1.3667, "step": 30432 }, { "epoch": 0.39, "grad_norm": 3.831834077835083, "learning_rate": 1.97532203412791e-05, "loss": 2.0794, "step": 30433 }, { "epoch": 0.39, "grad_norm": 3.380276679992676, "learning_rate": 1.9753197140550878e-05, "loss": 2.1865, "step": 30434 }, { "epoch": 0.39, "grad_norm": 3.9497692584991455, "learning_rate": 1.975317393874574e-05, "loss": 2.2815, "step": 30435 }, { "epoch": 0.39, "grad_norm": 3.965731620788574, "learning_rate": 1.9753150735863684e-05, "loss": 2.0262, "step": 30436 }, { "epoch": 0.4, "grad_norm": 3.313138961791992, "learning_rate": 1.9753127531904717e-05, "loss": 1.5752, "step": 30437 }, { "epoch": 0.4, "grad_norm": 4.335632801055908, "learning_rate": 1.975310432686884e-05, "loss": 2.3227, "step": 30438 }, { "epoch": 0.4, "grad_norm": 4.140985488891602, "learning_rate": 1.9753081120756053e-05, "loss": 2.0133, "step": 30439 }, { "epoch": 0.4, "grad_norm": 4.354211807250977, "learning_rate": 1.975305791356636e-05, "loss": 2.0242, "step": 30440 }, { "epoch": 0.4, "grad_norm": 3.9508004188537598, "learning_rate": 1.9753034705299763e-05, "loss": 2.1723, "step": 30441 }, { "epoch": 0.4, "grad_norm": 4.463911533355713, "learning_rate": 1.975301149595627e-05, "loss": 2.316, "step": 30442 }, { "epoch": 0.4, "grad_norm": 3.965758800506592, "learning_rate": 1.9752988285535873e-05, "loss": 1.9034, "step": 30443 }, { "epoch": 0.4, "grad_norm": 3.9682419300079346, "learning_rate": 1.9752965074038584e-05, "loss": 2.2941, "step": 30444 }, { "epoch": 0.4, "grad_norm": 4.0844645500183105, "learning_rate": 1.9752941861464404e-05, "loss": 2.0839, "step": 30445 }, { "epoch": 0.4, "grad_norm": 3.7753171920776367, "learning_rate": 1.975291864781333e-05, "loss": 1.6977, "step": 30446 }, { "epoch": 0.4, "grad_norm": 3.9315874576568604, "learning_rate": 1.975289543308537e-05, "loss": 1.9198, "step": 30447 }, { "epoch": 0.4, "grad_norm": 4.030876636505127, "learning_rate": 1.9752872217280524e-05, "loss": 2.0532, "step": 30448 }, { "epoch": 0.4, "grad_norm": 4.15888786315918, "learning_rate": 1.97528490003988e-05, "loss": 2.1364, "step": 30449 }, { "epoch": 0.4, "grad_norm": 3.697803020477295, "learning_rate": 1.975282578244019e-05, "loss": 1.7838, "step": 30450 }, { "epoch": 0.4, "grad_norm": 4.131383419036865, "learning_rate": 1.9752802563404704e-05, "loss": 2.227, "step": 30451 }, { "epoch": 0.4, "grad_norm": 3.51855206489563, "learning_rate": 1.9752779343292344e-05, "loss": 1.8395, "step": 30452 }, { "epoch": 0.4, "grad_norm": 3.97214412689209, "learning_rate": 1.9752756122103114e-05, "loss": 2.3221, "step": 30453 }, { "epoch": 0.4, "grad_norm": 3.7660629749298096, "learning_rate": 1.9752732899837014e-05, "loss": 1.9874, "step": 30454 }, { "epoch": 0.4, "grad_norm": 4.19431209564209, "learning_rate": 1.9752709676494044e-05, "loss": 2.4727, "step": 30455 }, { "epoch": 0.4, "grad_norm": 3.760819911956787, "learning_rate": 1.9752686452074206e-05, "loss": 2.1077, "step": 30456 }, { "epoch": 0.4, "grad_norm": 4.177850246429443, "learning_rate": 1.9752663226577512e-05, "loss": 2.4354, "step": 30457 }, { "epoch": 0.4, "grad_norm": 4.959370136260986, "learning_rate": 1.9752640000003958e-05, "loss": 2.5034, "step": 30458 }, { "epoch": 0.4, "grad_norm": 3.790494918823242, "learning_rate": 1.9752616772353547e-05, "loss": 2.2164, "step": 30459 }, { "epoch": 0.4, "grad_norm": 3.6430952548980713, "learning_rate": 1.975259354362628e-05, "loss": 1.7149, "step": 30460 }, { "epoch": 0.4, "grad_norm": 3.6092851161956787, "learning_rate": 1.9752570313822162e-05, "loss": 1.9131, "step": 30461 }, { "epoch": 0.4, "grad_norm": 3.724076271057129, "learning_rate": 1.9752547082941195e-05, "loss": 2.1588, "step": 30462 }, { "epoch": 0.4, "grad_norm": 3.4462897777557373, "learning_rate": 1.975252385098338e-05, "loss": 1.9738, "step": 30463 }, { "epoch": 0.4, "grad_norm": 4.11160945892334, "learning_rate": 1.975250061794872e-05, "loss": 2.158, "step": 30464 }, { "epoch": 0.4, "grad_norm": 3.6354899406433105, "learning_rate": 1.975247738383722e-05, "loss": 1.5551, "step": 30465 }, { "epoch": 0.4, "grad_norm": 4.770930767059326, "learning_rate": 1.975245414864888e-05, "loss": 2.5631, "step": 30466 }, { "epoch": 0.4, "grad_norm": 3.7674736976623535, "learning_rate": 1.9752430912383707e-05, "loss": 1.9318, "step": 30467 }, { "epoch": 0.4, "grad_norm": 3.9449284076690674, "learning_rate": 1.9752407675041698e-05, "loss": 1.5997, "step": 30468 }, { "epoch": 0.4, "grad_norm": 4.021422863006592, "learning_rate": 1.975238443662286e-05, "loss": 2.3819, "step": 30469 }, { "epoch": 0.4, "grad_norm": 4.3766679763793945, "learning_rate": 1.9752361197127187e-05, "loss": 2.3213, "step": 30470 }, { "epoch": 0.4, "grad_norm": 4.269622325897217, "learning_rate": 1.9752337956554695e-05, "loss": 2.0321, "step": 30471 }, { "epoch": 0.4, "grad_norm": 3.92228627204895, "learning_rate": 1.9752314714905378e-05, "loss": 1.7293, "step": 30472 }, { "epoch": 0.4, "grad_norm": 3.642043352127075, "learning_rate": 1.9752291472179234e-05, "loss": 2.0546, "step": 30473 }, { "epoch": 0.4, "grad_norm": 3.816737413406372, "learning_rate": 1.9752268228376278e-05, "loss": 2.0992, "step": 30474 }, { "epoch": 0.4, "grad_norm": 4.015981197357178, "learning_rate": 1.9752244983496505e-05, "loss": 2.2486, "step": 30475 }, { "epoch": 0.4, "grad_norm": 3.7604317665100098, "learning_rate": 1.975222173753992e-05, "loss": 2.3207, "step": 30476 }, { "epoch": 0.4, "grad_norm": 4.211779594421387, "learning_rate": 1.975219849050652e-05, "loss": 2.4033, "step": 30477 }, { "epoch": 0.4, "grad_norm": 4.139884948730469, "learning_rate": 1.975217524239631e-05, "loss": 2.2549, "step": 30478 }, { "epoch": 0.4, "grad_norm": 4.0614542961120605, "learning_rate": 1.9752151993209302e-05, "loss": 2.3652, "step": 30479 }, { "epoch": 0.4, "grad_norm": 4.106967449188232, "learning_rate": 1.9752128742945487e-05, "loss": 2.3317, "step": 30480 }, { "epoch": 0.4, "grad_norm": 4.392910003662109, "learning_rate": 1.9752105491604872e-05, "loss": 2.3248, "step": 30481 }, { "epoch": 0.4, "grad_norm": 4.461334705352783, "learning_rate": 1.975208223918746e-05, "loss": 2.7925, "step": 30482 }, { "epoch": 0.4, "grad_norm": 3.855962038040161, "learning_rate": 1.9752058985693252e-05, "loss": 1.9313, "step": 30483 }, { "epoch": 0.4, "grad_norm": 3.39017915725708, "learning_rate": 1.975203573112225e-05, "loss": 1.6079, "step": 30484 }, { "epoch": 0.4, "grad_norm": 3.3770322799682617, "learning_rate": 1.9752012475474457e-05, "loss": 2.143, "step": 30485 }, { "epoch": 0.4, "grad_norm": 4.5142822265625, "learning_rate": 1.9751989218749877e-05, "loss": 2.3827, "step": 30486 }, { "epoch": 0.4, "grad_norm": 4.104736804962158, "learning_rate": 1.9751965960948515e-05, "loss": 2.7583, "step": 30487 }, { "epoch": 0.4, "grad_norm": 3.832376480102539, "learning_rate": 1.975194270207037e-05, "loss": 2.073, "step": 30488 }, { "epoch": 0.4, "grad_norm": 3.873530626296997, "learning_rate": 1.9751919442115445e-05, "loss": 1.9641, "step": 30489 }, { "epoch": 0.4, "grad_norm": 4.043654441833496, "learning_rate": 1.9751896181083737e-05, "loss": 1.8401, "step": 30490 }, { "epoch": 0.4, "grad_norm": 3.723926544189453, "learning_rate": 1.975187291897526e-05, "loss": 2.0596, "step": 30491 }, { "epoch": 0.4, "grad_norm": 4.067514896392822, "learning_rate": 1.975184965579001e-05, "loss": 2.4069, "step": 30492 }, { "epoch": 0.4, "grad_norm": 4.205361366271973, "learning_rate": 1.975182639152799e-05, "loss": 2.1566, "step": 30493 }, { "epoch": 0.4, "grad_norm": 4.154428958892822, "learning_rate": 1.97518031261892e-05, "loss": 2.1022, "step": 30494 }, { "epoch": 0.4, "grad_norm": 4.3543500900268555, "learning_rate": 1.975177985977365e-05, "loss": 2.1341, "step": 30495 }, { "epoch": 0.4, "grad_norm": 3.899819850921631, "learning_rate": 1.9751756592281337e-05, "loss": 1.8653, "step": 30496 }, { "epoch": 0.4, "grad_norm": 3.80769681930542, "learning_rate": 1.9751733323712265e-05, "loss": 1.8109, "step": 30497 }, { "epoch": 0.4, "grad_norm": 3.7452969551086426, "learning_rate": 1.975171005406643e-05, "loss": 1.8014, "step": 30498 }, { "epoch": 0.4, "grad_norm": 3.831768751144409, "learning_rate": 1.975168678334385e-05, "loss": 2.2245, "step": 30499 }, { "epoch": 0.4, "grad_norm": 4.140511512756348, "learning_rate": 1.9751663511544513e-05, "loss": 1.9458, "step": 30500 }, { "epoch": 0.4, "grad_norm": 3.726989507675171, "learning_rate": 1.9751640238668428e-05, "loss": 1.6915, "step": 30501 }, { "epoch": 0.4, "grad_norm": 3.579380750656128, "learning_rate": 1.9751616964715598e-05, "loss": 1.9116, "step": 30502 }, { "epoch": 0.4, "grad_norm": 4.366888999938965, "learning_rate": 1.9751593689686023e-05, "loss": 2.0258, "step": 30503 }, { "epoch": 0.4, "grad_norm": 3.6143453121185303, "learning_rate": 1.9751570413579703e-05, "loss": 2.0109, "step": 30504 }, { "epoch": 0.4, "grad_norm": 3.2153658866882324, "learning_rate": 1.9751547136396646e-05, "loss": 1.5529, "step": 30505 }, { "epoch": 0.4, "grad_norm": 4.080399513244629, "learning_rate": 1.9751523858136856e-05, "loss": 2.2022, "step": 30506 }, { "epoch": 0.4, "grad_norm": 3.2587509155273438, "learning_rate": 1.9751500578800333e-05, "loss": 1.4789, "step": 30507 }, { "epoch": 0.4, "grad_norm": 3.3102645874023438, "learning_rate": 1.9751477298387074e-05, "loss": 1.7542, "step": 30508 }, { "epoch": 0.4, "grad_norm": 3.1524598598480225, "learning_rate": 1.9751454016897087e-05, "loss": 1.5329, "step": 30509 }, { "epoch": 0.4, "grad_norm": 3.525521993637085, "learning_rate": 1.9751430734330376e-05, "loss": 1.7092, "step": 30510 }, { "epoch": 0.4, "grad_norm": 4.496497631072998, "learning_rate": 1.975140745068694e-05, "loss": 2.1626, "step": 30511 }, { "epoch": 0.4, "grad_norm": 3.8764841556549072, "learning_rate": 1.9751384165966787e-05, "loss": 2.018, "step": 30512 }, { "epoch": 0.4, "grad_norm": 3.530059814453125, "learning_rate": 1.9751360880169912e-05, "loss": 1.9289, "step": 30513 }, { "epoch": 0.4, "grad_norm": 3.489473342895508, "learning_rate": 1.9751337593296323e-05, "loss": 1.6797, "step": 30514 }, { "epoch": 0.4, "grad_norm": 3.726304769515991, "learning_rate": 1.975131430534602e-05, "loss": 2.4093, "step": 30515 }, { "epoch": 0.4, "grad_norm": 3.9972119331359863, "learning_rate": 1.975129101631901e-05, "loss": 2.3707, "step": 30516 }, { "epoch": 0.4, "grad_norm": 4.07098913192749, "learning_rate": 1.9751267726215286e-05, "loss": 2.8931, "step": 30517 }, { "epoch": 0.4, "grad_norm": 3.8995401859283447, "learning_rate": 1.9751244435034863e-05, "loss": 2.4052, "step": 30518 }, { "epoch": 0.4, "grad_norm": 3.5228590965270996, "learning_rate": 1.9751221142777733e-05, "loss": 1.9359, "step": 30519 }, { "epoch": 0.4, "grad_norm": 3.8640525341033936, "learning_rate": 1.9751197849443905e-05, "loss": 2.4599, "step": 30520 }, { "epoch": 0.4, "grad_norm": 3.7945563793182373, "learning_rate": 1.9751174555033376e-05, "loss": 1.8405, "step": 30521 }, { "epoch": 0.4, "grad_norm": 3.6883046627044678, "learning_rate": 1.9751151259546153e-05, "loss": 1.8401, "step": 30522 }, { "epoch": 0.4, "grad_norm": 3.479581832885742, "learning_rate": 1.9751127962982236e-05, "loss": 1.6102, "step": 30523 }, { "epoch": 0.4, "grad_norm": 4.22259521484375, "learning_rate": 1.9751104665341635e-05, "loss": 2.2385, "step": 30524 }, { "epoch": 0.4, "grad_norm": 3.82049822807312, "learning_rate": 1.975108136662434e-05, "loss": 1.8668, "step": 30525 }, { "epoch": 0.4, "grad_norm": 4.071282386779785, "learning_rate": 1.9751058066830367e-05, "loss": 2.2246, "step": 30526 }, { "epoch": 0.4, "grad_norm": 3.3563754558563232, "learning_rate": 1.9751034765959708e-05, "loss": 1.6116, "step": 30527 }, { "epoch": 0.4, "grad_norm": 3.6442816257476807, "learning_rate": 1.9751011464012368e-05, "loss": 1.7203, "step": 30528 }, { "epoch": 0.4, "grad_norm": 3.835512399673462, "learning_rate": 1.975098816098835e-05, "loss": 2.3077, "step": 30529 }, { "epoch": 0.4, "grad_norm": 3.4017374515533447, "learning_rate": 1.975096485688766e-05, "loss": 1.7485, "step": 30530 }, { "epoch": 0.4, "grad_norm": 4.8318281173706055, "learning_rate": 1.97509415517103e-05, "loss": 2.7765, "step": 30531 }, { "epoch": 0.4, "grad_norm": 4.239443302154541, "learning_rate": 1.9750918245456267e-05, "loss": 2.6554, "step": 30532 }, { "epoch": 0.4, "grad_norm": 4.024260520935059, "learning_rate": 1.975089493812557e-05, "loss": 2.117, "step": 30533 }, { "epoch": 0.4, "grad_norm": 4.11257791519165, "learning_rate": 1.9750871629718203e-05, "loss": 2.1104, "step": 30534 }, { "epoch": 0.4, "grad_norm": 3.824495792388916, "learning_rate": 1.975084832023418e-05, "loss": 1.9488, "step": 30535 }, { "epoch": 0.4, "grad_norm": 3.957681179046631, "learning_rate": 1.9750825009673498e-05, "loss": 2.0996, "step": 30536 }, { "epoch": 0.4, "grad_norm": 3.882359027862549, "learning_rate": 1.975080169803616e-05, "loss": 2.119, "step": 30537 }, { "epoch": 0.4, "grad_norm": 3.692516565322876, "learning_rate": 1.9750778385322166e-05, "loss": 2.0562, "step": 30538 }, { "epoch": 0.4, "grad_norm": 3.994503974914551, "learning_rate": 1.975075507153152e-05, "loss": 1.7819, "step": 30539 }, { "epoch": 0.4, "grad_norm": 3.8575210571289062, "learning_rate": 1.9750731756664224e-05, "loss": 2.1126, "step": 30540 }, { "epoch": 0.4, "grad_norm": 3.682621479034424, "learning_rate": 1.9750708440720284e-05, "loss": 1.786, "step": 30541 }, { "epoch": 0.4, "grad_norm": 4.057621955871582, "learning_rate": 1.97506851236997e-05, "loss": 2.4256, "step": 30542 }, { "epoch": 0.4, "grad_norm": 3.905318260192871, "learning_rate": 1.9750661805602476e-05, "loss": 2.1205, "step": 30543 }, { "epoch": 0.4, "grad_norm": 3.918172836303711, "learning_rate": 1.9750638486428613e-05, "loss": 2.0958, "step": 30544 }, { "epoch": 0.4, "grad_norm": 3.905362367630005, "learning_rate": 1.9750615166178115e-05, "loss": 2.2532, "step": 30545 }, { "epoch": 0.4, "grad_norm": 3.2438313961029053, "learning_rate": 1.975059184485098e-05, "loss": 1.5138, "step": 30546 }, { "epoch": 0.4, "grad_norm": 4.189520835876465, "learning_rate": 1.9750568522447214e-05, "loss": 1.9456, "step": 30547 }, { "epoch": 0.4, "grad_norm": 4.16375732421875, "learning_rate": 1.9750545198966823e-05, "loss": 2.1832, "step": 30548 }, { "epoch": 0.4, "grad_norm": 3.787029981613159, "learning_rate": 1.975052187440981e-05, "loss": 2.103, "step": 30549 }, { "epoch": 0.4, "grad_norm": 4.484210968017578, "learning_rate": 1.9750498548776168e-05, "loss": 2.0037, "step": 30550 }, { "epoch": 0.4, "grad_norm": 3.763187885284424, "learning_rate": 1.9750475222065904e-05, "loss": 2.076, "step": 30551 }, { "epoch": 0.4, "grad_norm": 4.395678520202637, "learning_rate": 1.9750451894279028e-05, "loss": 2.8966, "step": 30552 }, { "epoch": 0.4, "grad_norm": 4.3416852951049805, "learning_rate": 1.9750428565415536e-05, "loss": 2.3769, "step": 30553 }, { "epoch": 0.4, "grad_norm": 3.954547166824341, "learning_rate": 1.9750405235475427e-05, "loss": 2.0581, "step": 30554 }, { "epoch": 0.4, "grad_norm": 3.872015953063965, "learning_rate": 1.975038190445871e-05, "loss": 2.0763, "step": 30555 }, { "epoch": 0.4, "grad_norm": 3.828068733215332, "learning_rate": 1.9750358572365385e-05, "loss": 1.922, "step": 30556 }, { "epoch": 0.4, "grad_norm": 4.123703956604004, "learning_rate": 1.975033523919546e-05, "loss": 1.9962, "step": 30557 }, { "epoch": 0.4, "grad_norm": 4.0302863121032715, "learning_rate": 1.9750311904948923e-05, "loss": 2.0297, "step": 30558 }, { "epoch": 0.4, "grad_norm": 5.0670166015625, "learning_rate": 1.975028856962579e-05, "loss": 1.9988, "step": 30559 }, { "epoch": 0.4, "grad_norm": 4.681705951690674, "learning_rate": 1.9750265233226064e-05, "loss": 2.5705, "step": 30560 }, { "epoch": 0.4, "grad_norm": 4.141386032104492, "learning_rate": 1.975024189574974e-05, "loss": 2.0888, "step": 30561 }, { "epoch": 0.4, "grad_norm": 3.4250431060791016, "learning_rate": 1.9750218557196825e-05, "loss": 1.7785, "step": 30562 }, { "epoch": 0.4, "grad_norm": 5.297362327575684, "learning_rate": 1.975019521756732e-05, "loss": 2.4281, "step": 30563 }, { "epoch": 0.4, "grad_norm": 3.5681796073913574, "learning_rate": 1.9750171876861226e-05, "loss": 1.6348, "step": 30564 }, { "epoch": 0.4, "grad_norm": 4.332681179046631, "learning_rate": 1.975014853507855e-05, "loss": 2.5884, "step": 30565 }, { "epoch": 0.4, "grad_norm": 3.3938779830932617, "learning_rate": 1.975012519221929e-05, "loss": 1.7405, "step": 30566 }, { "epoch": 0.4, "grad_norm": 3.8142120838165283, "learning_rate": 1.9750101848283454e-05, "loss": 2.1017, "step": 30567 }, { "epoch": 0.4, "grad_norm": 4.13226318359375, "learning_rate": 1.9750078503271042e-05, "loss": 1.8396, "step": 30568 }, { "epoch": 0.4, "grad_norm": 3.8815276622772217, "learning_rate": 1.975005515718205e-05, "loss": 2.2333, "step": 30569 }, { "epoch": 0.4, "grad_norm": 4.290497779846191, "learning_rate": 1.9750031810016493e-05, "loss": 2.2467, "step": 30570 }, { "epoch": 0.4, "grad_norm": 3.646632432937622, "learning_rate": 1.9750008461774364e-05, "loss": 1.7489, "step": 30571 }, { "epoch": 0.4, "grad_norm": 4.387386798858643, "learning_rate": 1.9749985112455666e-05, "loss": 2.0931, "step": 30572 }, { "epoch": 0.4, "grad_norm": 4.397806644439697, "learning_rate": 1.974996176206041e-05, "loss": 2.252, "step": 30573 }, { "epoch": 0.4, "grad_norm": 3.7778048515319824, "learning_rate": 1.974993841058859e-05, "loss": 1.585, "step": 30574 }, { "epoch": 0.4, "grad_norm": 4.157570838928223, "learning_rate": 1.974991505804021e-05, "loss": 2.6769, "step": 30575 }, { "epoch": 0.4, "grad_norm": 4.017526626586914, "learning_rate": 1.9749891704415277e-05, "loss": 2.236, "step": 30576 }, { "epoch": 0.4, "grad_norm": 4.006961345672607, "learning_rate": 1.974986834971379e-05, "loss": 1.9961, "step": 30577 }, { "epoch": 0.4, "grad_norm": 4.0891194343566895, "learning_rate": 1.9749844993935747e-05, "loss": 1.8951, "step": 30578 }, { "epoch": 0.4, "grad_norm": 3.9692230224609375, "learning_rate": 1.974982163708116e-05, "loss": 1.9229, "step": 30579 }, { "epoch": 0.4, "grad_norm": 4.0092267990112305, "learning_rate": 1.9749798279150028e-05, "loss": 2.2789, "step": 30580 }, { "epoch": 0.4, "grad_norm": 3.2760212421417236, "learning_rate": 1.974977492014235e-05, "loss": 1.5185, "step": 30581 }, { "epoch": 0.4, "grad_norm": 4.157954692840576, "learning_rate": 1.9749751560058135e-05, "loss": 2.5077, "step": 30582 }, { "epoch": 0.4, "grad_norm": 3.7077839374542236, "learning_rate": 1.9749728198897378e-05, "loss": 1.6668, "step": 30583 }, { "epoch": 0.4, "grad_norm": 4.5326080322265625, "learning_rate": 1.974970483666009e-05, "loss": 2.4513, "step": 30584 }, { "epoch": 0.4, "grad_norm": 4.9338884353637695, "learning_rate": 1.9749681473346266e-05, "loss": 2.7451, "step": 30585 }, { "epoch": 0.4, "grad_norm": 4.078834533691406, "learning_rate": 1.9749658108955915e-05, "loss": 1.8575, "step": 30586 }, { "epoch": 0.4, "grad_norm": 3.892436981201172, "learning_rate": 1.9749634743489032e-05, "loss": 2.2669, "step": 30587 }, { "epoch": 0.4, "grad_norm": 4.0669684410095215, "learning_rate": 1.9749611376945625e-05, "loss": 1.682, "step": 30588 }, { "epoch": 0.4, "grad_norm": 4.139720916748047, "learning_rate": 1.97495880093257e-05, "loss": 2.226, "step": 30589 }, { "epoch": 0.4, "grad_norm": 3.79868745803833, "learning_rate": 1.974956464062925e-05, "loss": 2.1592, "step": 30590 }, { "epoch": 0.4, "grad_norm": 4.2231645584106445, "learning_rate": 1.974954127085628e-05, "loss": 2.2226, "step": 30591 }, { "epoch": 0.4, "grad_norm": 3.9810235500335693, "learning_rate": 1.97495179000068e-05, "loss": 1.7077, "step": 30592 }, { "epoch": 0.4, "grad_norm": 4.136220932006836, "learning_rate": 1.9749494528080812e-05, "loss": 2.2463, "step": 30593 }, { "epoch": 0.4, "grad_norm": 4.066375732421875, "learning_rate": 1.9749471155078307e-05, "loss": 2.0014, "step": 30594 }, { "epoch": 0.4, "grad_norm": 4.4513349533081055, "learning_rate": 1.97494477809993e-05, "loss": 2.2307, "step": 30595 }, { "epoch": 0.4, "grad_norm": 3.749227285385132, "learning_rate": 1.9749424405843785e-05, "loss": 1.7273, "step": 30596 }, { "epoch": 0.4, "grad_norm": 3.372251272201538, "learning_rate": 1.9749401029611767e-05, "loss": 1.8732, "step": 30597 }, { "epoch": 0.4, "grad_norm": 3.818544387817383, "learning_rate": 1.9749377652303252e-05, "loss": 1.8523, "step": 30598 }, { "epoch": 0.4, "grad_norm": 3.9378461837768555, "learning_rate": 1.9749354273918243e-05, "loss": 2.023, "step": 30599 }, { "epoch": 0.4, "grad_norm": 3.5137367248535156, "learning_rate": 1.9749330894456736e-05, "loss": 1.7014, "step": 30600 }, { "epoch": 0.4, "grad_norm": 3.9680979251861572, "learning_rate": 1.9749307513918738e-05, "loss": 2.2429, "step": 30601 }, { "epoch": 0.4, "grad_norm": 4.64681339263916, "learning_rate": 1.9749284132304253e-05, "loss": 2.2717, "step": 30602 }, { "epoch": 0.4, "grad_norm": 4.214014530181885, "learning_rate": 1.974926074961328e-05, "loss": 2.114, "step": 30603 }, { "epoch": 0.4, "grad_norm": 4.231046199798584, "learning_rate": 1.9749237365845824e-05, "loss": 2.0871, "step": 30604 }, { "epoch": 0.4, "grad_norm": 4.408615589141846, "learning_rate": 1.9749213981001887e-05, "loss": 2.4855, "step": 30605 }, { "epoch": 0.4, "grad_norm": 3.9009287357330322, "learning_rate": 1.974919059508147e-05, "loss": 2.1157, "step": 30606 }, { "epoch": 0.4, "grad_norm": 4.027788162231445, "learning_rate": 1.9749167208084577e-05, "loss": 2.105, "step": 30607 }, { "epoch": 0.4, "grad_norm": 5.029284477233887, "learning_rate": 1.9749143820011212e-05, "loss": 2.2833, "step": 30608 }, { "epoch": 0.4, "grad_norm": 3.405545711517334, "learning_rate": 1.9749120430861376e-05, "loss": 1.6687, "step": 30609 }, { "epoch": 0.4, "grad_norm": 3.9921700954437256, "learning_rate": 1.974909704063507e-05, "loss": 2.1619, "step": 30610 }, { "epoch": 0.4, "grad_norm": 3.9822683334350586, "learning_rate": 1.97490736493323e-05, "loss": 2.1998, "step": 30611 }, { "epoch": 0.4, "grad_norm": 3.572009563446045, "learning_rate": 1.9749050256953066e-05, "loss": 2.1019, "step": 30612 }, { "epoch": 0.4, "grad_norm": 3.667422294616699, "learning_rate": 1.974902686349737e-05, "loss": 2.0755, "step": 30613 }, { "epoch": 0.4, "grad_norm": 4.273163795471191, "learning_rate": 1.974900346896522e-05, "loss": 2.1867, "step": 30614 }, { "epoch": 0.4, "grad_norm": 3.9760890007019043, "learning_rate": 1.9748980073356613e-05, "loss": 2.3825, "step": 30615 }, { "epoch": 0.4, "grad_norm": 3.3863320350646973, "learning_rate": 1.974895667667155e-05, "loss": 1.4902, "step": 30616 }, { "epoch": 0.4, "grad_norm": 3.279366970062256, "learning_rate": 1.974893327891004e-05, "loss": 1.6167, "step": 30617 }, { "epoch": 0.4, "grad_norm": 3.837599277496338, "learning_rate": 1.9748909880072082e-05, "loss": 2.1603, "step": 30618 }, { "epoch": 0.4, "grad_norm": 3.581634283065796, "learning_rate": 1.9748886480157677e-05, "loss": 1.8795, "step": 30619 }, { "epoch": 0.4, "grad_norm": 3.3533709049224854, "learning_rate": 1.9748863079166832e-05, "loss": 1.8502, "step": 30620 }, { "epoch": 0.4, "grad_norm": 3.769216299057007, "learning_rate": 1.9748839677099547e-05, "loss": 2.037, "step": 30621 }, { "epoch": 0.4, "grad_norm": 3.965460777282715, "learning_rate": 1.9748816273955826e-05, "loss": 1.8871, "step": 30622 }, { "epoch": 0.4, "grad_norm": 3.800488233566284, "learning_rate": 1.9748792869735668e-05, "loss": 2.0693, "step": 30623 }, { "epoch": 0.4, "grad_norm": 3.668022871017456, "learning_rate": 1.974876946443908e-05, "loss": 1.8421, "step": 30624 }, { "epoch": 0.4, "grad_norm": 3.8125996589660645, "learning_rate": 1.974874605806606e-05, "loss": 1.8678, "step": 30625 }, { "epoch": 0.4, "grad_norm": 3.718552589416504, "learning_rate": 1.974872265061661e-05, "loss": 1.9288, "step": 30626 }, { "epoch": 0.4, "grad_norm": 3.9223861694335938, "learning_rate": 1.9748699242090744e-05, "loss": 2.3, "step": 30627 }, { "epoch": 0.4, "grad_norm": 3.4169697761535645, "learning_rate": 1.974867583248845e-05, "loss": 1.9651, "step": 30628 }, { "epoch": 0.4, "grad_norm": 3.944307804107666, "learning_rate": 1.9748652421809738e-05, "loss": 2.4032, "step": 30629 }, { "epoch": 0.4, "grad_norm": 3.9945950508117676, "learning_rate": 1.974862901005461e-05, "loss": 1.7781, "step": 30630 }, { "epoch": 0.4, "grad_norm": 3.794302225112915, "learning_rate": 1.9748605597223066e-05, "loss": 1.804, "step": 30631 }, { "epoch": 0.4, "grad_norm": 3.563218355178833, "learning_rate": 1.9748582183315114e-05, "loss": 1.5777, "step": 30632 }, { "epoch": 0.4, "grad_norm": 4.080400466918945, "learning_rate": 1.974855876833075e-05, "loss": 2.5644, "step": 30633 }, { "epoch": 0.4, "grad_norm": 4.196247100830078, "learning_rate": 1.9748535352269983e-05, "loss": 2.0892, "step": 30634 }, { "epoch": 0.4, "grad_norm": 4.202188491821289, "learning_rate": 1.974851193513281e-05, "loss": 2.744, "step": 30635 }, { "epoch": 0.4, "grad_norm": 3.8063552379608154, "learning_rate": 1.9748488516919237e-05, "loss": 1.6803, "step": 30636 }, { "epoch": 0.4, "grad_norm": 4.366279125213623, "learning_rate": 1.9748465097629265e-05, "loss": 2.0475, "step": 30637 }, { "epoch": 0.4, "grad_norm": 4.022179126739502, "learning_rate": 1.9748441677262894e-05, "loss": 2.1061, "step": 30638 }, { "epoch": 0.4, "grad_norm": 4.419188499450684, "learning_rate": 1.9748418255820133e-05, "loss": 1.8601, "step": 30639 }, { "epoch": 0.4, "grad_norm": 3.6993935108184814, "learning_rate": 1.974839483330098e-05, "loss": 2.3104, "step": 30640 }, { "epoch": 0.4, "grad_norm": 3.6353909969329834, "learning_rate": 1.974837140970544e-05, "loss": 2.1664, "step": 30641 }, { "epoch": 0.4, "grad_norm": 4.187184810638428, "learning_rate": 1.9748347985033517e-05, "loss": 2.1773, "step": 30642 }, { "epoch": 0.4, "grad_norm": 4.5789008140563965, "learning_rate": 1.9748324559285207e-05, "loss": 1.934, "step": 30643 }, { "epoch": 0.4, "grad_norm": 3.7417116165161133, "learning_rate": 1.9748301132460518e-05, "loss": 1.9024, "step": 30644 }, { "epoch": 0.4, "grad_norm": 3.9709153175354004, "learning_rate": 1.974827770455945e-05, "loss": 2.4265, "step": 30645 }, { "epoch": 0.4, "grad_norm": 4.53959321975708, "learning_rate": 1.974825427558201e-05, "loss": 2.4519, "step": 30646 }, { "epoch": 0.4, "grad_norm": 4.178399085998535, "learning_rate": 1.9748230845528194e-05, "loss": 1.9886, "step": 30647 }, { "epoch": 0.4, "grad_norm": 3.7122697830200195, "learning_rate": 1.974820741439801e-05, "loss": 2.0778, "step": 30648 }, { "epoch": 0.4, "grad_norm": 3.793095827102661, "learning_rate": 1.9748183982191455e-05, "loss": 2.136, "step": 30649 }, { "epoch": 0.4, "grad_norm": 3.832347869873047, "learning_rate": 1.974816054890854e-05, "loss": 2.1701, "step": 30650 }, { "epoch": 0.4, "grad_norm": 3.844270706176758, "learning_rate": 1.974813711454926e-05, "loss": 2.1247, "step": 30651 }, { "epoch": 0.4, "grad_norm": 4.434222221374512, "learning_rate": 1.974811367911362e-05, "loss": 2.1169, "step": 30652 }, { "epoch": 0.4, "grad_norm": 4.283836364746094, "learning_rate": 1.9748090242601624e-05, "loss": 2.2816, "step": 30653 }, { "epoch": 0.4, "grad_norm": 3.6262078285217285, "learning_rate": 1.974806680501327e-05, "loss": 1.7963, "step": 30654 }, { "epoch": 0.4, "grad_norm": 4.032045364379883, "learning_rate": 1.974804336634857e-05, "loss": 2.212, "step": 30655 }, { "epoch": 0.4, "grad_norm": 3.9398787021636963, "learning_rate": 1.9748019926607515e-05, "loss": 1.9901, "step": 30656 }, { "epoch": 0.4, "grad_norm": 3.8640553951263428, "learning_rate": 1.9747996485790116e-05, "loss": 1.6774, "step": 30657 }, { "epoch": 0.4, "grad_norm": 3.3553831577301025, "learning_rate": 1.9747973043896372e-05, "loss": 1.7113, "step": 30658 }, { "epoch": 0.4, "grad_norm": 3.9805548191070557, "learning_rate": 1.974794960092629e-05, "loss": 2.0908, "step": 30659 }, { "epoch": 0.4, "grad_norm": 4.0258917808532715, "learning_rate": 1.9747926156879865e-05, "loss": 2.0093, "step": 30660 }, { "epoch": 0.4, "grad_norm": 3.7421956062316895, "learning_rate": 1.9747902711757103e-05, "loss": 1.9641, "step": 30661 }, { "epoch": 0.4, "grad_norm": 3.5325961112976074, "learning_rate": 1.974787926555801e-05, "loss": 1.5196, "step": 30662 }, { "epoch": 0.4, "grad_norm": 4.013585090637207, "learning_rate": 1.9747855818282584e-05, "loss": 1.9851, "step": 30663 }, { "epoch": 0.4, "grad_norm": 3.964775323867798, "learning_rate": 1.974783236993083e-05, "loss": 2.2728, "step": 30664 }, { "epoch": 0.4, "grad_norm": 3.5264925956726074, "learning_rate": 1.9747808920502748e-05, "loss": 1.7207, "step": 30665 }, { "epoch": 0.4, "grad_norm": 3.85884165763855, "learning_rate": 1.9747785469998345e-05, "loss": 1.7163, "step": 30666 }, { "epoch": 0.4, "grad_norm": 3.6731910705566406, "learning_rate": 1.974776201841762e-05, "loss": 1.8849, "step": 30667 }, { "epoch": 0.4, "grad_norm": 3.8225314617156982, "learning_rate": 1.9747738565760578e-05, "loss": 1.9579, "step": 30668 }, { "epoch": 0.4, "grad_norm": 3.826218366622925, "learning_rate": 1.9747715112027217e-05, "loss": 2.2275, "step": 30669 }, { "epoch": 0.4, "grad_norm": 3.2811477184295654, "learning_rate": 1.9747691657217546e-05, "loss": 1.5781, "step": 30670 }, { "epoch": 0.4, "grad_norm": 3.6136741638183594, "learning_rate": 1.9747668201331566e-05, "loss": 1.7506, "step": 30671 }, { "epoch": 0.4, "grad_norm": 4.664586067199707, "learning_rate": 1.9747644744369275e-05, "loss": 2.5897, "step": 30672 }, { "epoch": 0.4, "grad_norm": 3.86905574798584, "learning_rate": 1.9747621286330677e-05, "loss": 1.6228, "step": 30673 }, { "epoch": 0.4, "grad_norm": 4.573125839233398, "learning_rate": 1.974759782721578e-05, "loss": 2.2557, "step": 30674 }, { "epoch": 0.4, "grad_norm": 3.4960250854492188, "learning_rate": 1.9747574367024583e-05, "loss": 1.7192, "step": 30675 }, { "epoch": 0.4, "grad_norm": 3.7893710136413574, "learning_rate": 1.9747550905757085e-05, "loss": 1.7396, "step": 30676 }, { "epoch": 0.4, "grad_norm": 4.305935859680176, "learning_rate": 1.9747527443413295e-05, "loss": 2.273, "step": 30677 }, { "epoch": 0.4, "grad_norm": 4.606222629547119, "learning_rate": 1.974750397999321e-05, "loss": 2.387, "step": 30678 }, { "epoch": 0.4, "grad_norm": 4.258636951446533, "learning_rate": 1.9747480515496837e-05, "loss": 2.2157, "step": 30679 }, { "epoch": 0.4, "grad_norm": 4.166863918304443, "learning_rate": 1.9747457049924175e-05, "loss": 2.6198, "step": 30680 }, { "epoch": 0.4, "grad_norm": 3.5128512382507324, "learning_rate": 1.974743358327523e-05, "loss": 1.8278, "step": 30681 }, { "epoch": 0.4, "grad_norm": 3.893695116043091, "learning_rate": 1.9747410115550006e-05, "loss": 2.3524, "step": 30682 }, { "epoch": 0.4, "grad_norm": 4.202329635620117, "learning_rate": 1.97473866467485e-05, "loss": 2.4593, "step": 30683 }, { "epoch": 0.4, "grad_norm": 4.005022048950195, "learning_rate": 1.9747363176870714e-05, "loss": 2.0808, "step": 30684 }, { "epoch": 0.4, "grad_norm": 4.163289546966553, "learning_rate": 1.9747339705916655e-05, "loss": 1.9749, "step": 30685 }, { "epoch": 0.4, "grad_norm": 3.5288915634155273, "learning_rate": 1.9747316233886326e-05, "loss": 1.7146, "step": 30686 }, { "epoch": 0.4, "grad_norm": 3.9231436252593994, "learning_rate": 1.974729276077973e-05, "loss": 2.0074, "step": 30687 }, { "epoch": 0.4, "grad_norm": 3.441606283187866, "learning_rate": 1.9747269286596862e-05, "loss": 1.9586, "step": 30688 }, { "epoch": 0.4, "grad_norm": 3.183804988861084, "learning_rate": 1.9747245811337735e-05, "loss": 1.6076, "step": 30689 }, { "epoch": 0.4, "grad_norm": 4.363279342651367, "learning_rate": 1.9747222335002344e-05, "loss": 2.3027, "step": 30690 }, { "epoch": 0.4, "grad_norm": 3.870952606201172, "learning_rate": 1.9747198857590695e-05, "loss": 2.0336, "step": 30691 }, { "epoch": 0.4, "grad_norm": 4.092554092407227, "learning_rate": 1.974717537910279e-05, "loss": 1.8666, "step": 30692 }, { "epoch": 0.4, "grad_norm": 3.852980613708496, "learning_rate": 1.974715189953863e-05, "loss": 2.0789, "step": 30693 }, { "epoch": 0.4, "grad_norm": 4.417069435119629, "learning_rate": 1.9747128418898222e-05, "loss": 2.3697, "step": 30694 }, { "epoch": 0.4, "grad_norm": 3.555755853652954, "learning_rate": 1.9747104937181566e-05, "loss": 1.9506, "step": 30695 }, { "epoch": 0.4, "grad_norm": 3.644385576248169, "learning_rate": 1.974708145438866e-05, "loss": 2.1906, "step": 30696 }, { "epoch": 0.4, "grad_norm": 3.2719764709472656, "learning_rate": 1.9747057970519514e-05, "loss": 1.6242, "step": 30697 }, { "epoch": 0.4, "grad_norm": 3.8322603702545166, "learning_rate": 1.9747034485574124e-05, "loss": 2.1986, "step": 30698 }, { "epoch": 0.4, "grad_norm": 3.8224432468414307, "learning_rate": 1.97470109995525e-05, "loss": 2.2047, "step": 30699 }, { "epoch": 0.4, "grad_norm": 4.204078197479248, "learning_rate": 1.9746987512454642e-05, "loss": 2.5174, "step": 30700 }, { "epoch": 0.4, "grad_norm": 3.9490532875061035, "learning_rate": 1.974696402428055e-05, "loss": 2.5615, "step": 30701 }, { "epoch": 0.4, "grad_norm": 3.619835615158081, "learning_rate": 1.9746940535030223e-05, "loss": 1.669, "step": 30702 }, { "epoch": 0.4, "grad_norm": 3.6886425018310547, "learning_rate": 1.9746917044703674e-05, "loss": 2.1294, "step": 30703 }, { "epoch": 0.4, "grad_norm": 4.087269306182861, "learning_rate": 1.9746893553300898e-05, "loss": 2.6632, "step": 30704 }, { "epoch": 0.4, "grad_norm": 3.8607709407806396, "learning_rate": 1.97468700608219e-05, "loss": 1.9668, "step": 30705 }, { "epoch": 0.4, "grad_norm": 4.041949272155762, "learning_rate": 1.974684656726668e-05, "loss": 2.0907, "step": 30706 }, { "epoch": 0.4, "grad_norm": 3.6409389972686768, "learning_rate": 1.9746823072635245e-05, "loss": 1.975, "step": 30707 }, { "epoch": 0.4, "grad_norm": 4.447726249694824, "learning_rate": 1.9746799576927594e-05, "loss": 2.3507, "step": 30708 }, { "epoch": 0.4, "grad_norm": 3.4131224155426025, "learning_rate": 1.974677608014373e-05, "loss": 1.6218, "step": 30709 }, { "epoch": 0.4, "grad_norm": 4.116422653198242, "learning_rate": 1.974675258228366e-05, "loss": 2.053, "step": 30710 }, { "epoch": 0.4, "grad_norm": 3.8238162994384766, "learning_rate": 1.9746729083347383e-05, "loss": 1.8973, "step": 30711 }, { "epoch": 0.4, "grad_norm": 4.1314167976379395, "learning_rate": 1.9746705583334896e-05, "loss": 2.3145, "step": 30712 }, { "epoch": 0.4, "grad_norm": 3.664015054702759, "learning_rate": 1.9746682082246214e-05, "loss": 1.7442, "step": 30713 }, { "epoch": 0.4, "grad_norm": 4.159815311431885, "learning_rate": 1.974665858008133e-05, "loss": 2.0588, "step": 30714 }, { "epoch": 0.4, "grad_norm": 4.298495769500732, "learning_rate": 1.974663507684025e-05, "loss": 2.2934, "step": 30715 }, { "epoch": 0.4, "grad_norm": 3.911616086959839, "learning_rate": 1.9746611572522974e-05, "loss": 1.9858, "step": 30716 }, { "epoch": 0.4, "grad_norm": 4.29544734954834, "learning_rate": 1.974658806712951e-05, "loss": 2.1229, "step": 30717 }, { "epoch": 0.4, "grad_norm": 4.106906890869141, "learning_rate": 1.9746564560659856e-05, "loss": 2.0801, "step": 30718 }, { "epoch": 0.4, "grad_norm": 4.145507335662842, "learning_rate": 1.9746541053114018e-05, "loss": 1.7853, "step": 30719 }, { "epoch": 0.4, "grad_norm": 3.619316339492798, "learning_rate": 1.9746517544491994e-05, "loss": 1.7891, "step": 30720 }, { "epoch": 0.4, "grad_norm": 4.112852573394775, "learning_rate": 1.9746494034793788e-05, "loss": 2.1203, "step": 30721 }, { "epoch": 0.4, "grad_norm": 3.807600975036621, "learning_rate": 1.974647052401941e-05, "loss": 1.8202, "step": 30722 }, { "epoch": 0.4, "grad_norm": 3.8686506748199463, "learning_rate": 1.9746447012168848e-05, "loss": 2.5497, "step": 30723 }, { "epoch": 0.4, "grad_norm": 4.1529154777526855, "learning_rate": 1.9746423499242118e-05, "loss": 1.8226, "step": 30724 }, { "epoch": 0.4, "grad_norm": 3.693126678466797, "learning_rate": 1.9746399985239215e-05, "loss": 1.9334, "step": 30725 }, { "epoch": 0.4, "grad_norm": 3.678941249847412, "learning_rate": 1.9746376470160148e-05, "loss": 1.6345, "step": 30726 }, { "epoch": 0.4, "grad_norm": 4.31334114074707, "learning_rate": 1.974635295400491e-05, "loss": 2.0511, "step": 30727 }, { "epoch": 0.4, "grad_norm": 3.891582489013672, "learning_rate": 1.9746329436773512e-05, "loss": 2.1083, "step": 30728 }, { "epoch": 0.4, "grad_norm": 4.448800086975098, "learning_rate": 1.9746305918465954e-05, "loss": 2.2346, "step": 30729 }, { "epoch": 0.4, "grad_norm": 3.7049293518066406, "learning_rate": 1.9746282399082238e-05, "loss": 1.5257, "step": 30730 }, { "epoch": 0.4, "grad_norm": 4.576208114624023, "learning_rate": 1.974625887862237e-05, "loss": 2.5966, "step": 30731 }, { "epoch": 0.4, "grad_norm": 3.9160408973693848, "learning_rate": 1.9746235357086346e-05, "loss": 2.1233, "step": 30732 }, { "epoch": 0.4, "grad_norm": 3.7038567066192627, "learning_rate": 1.9746211834474174e-05, "loss": 2.1493, "step": 30733 }, { "epoch": 0.4, "grad_norm": 3.441483974456787, "learning_rate": 1.9746188310785853e-05, "loss": 2.0508, "step": 30734 }, { "epoch": 0.4, "grad_norm": 3.7064437866210938, "learning_rate": 1.974616478602139e-05, "loss": 2.1497, "step": 30735 }, { "epoch": 0.4, "grad_norm": 3.5741357803344727, "learning_rate": 1.9746141260180786e-05, "loss": 1.6459, "step": 30736 }, { "epoch": 0.4, "grad_norm": 3.5531439781188965, "learning_rate": 1.974611773326404e-05, "loss": 1.6959, "step": 30737 }, { "epoch": 0.4, "grad_norm": 3.8275904655456543, "learning_rate": 1.9746094205271154e-05, "loss": 2.0589, "step": 30738 }, { "epoch": 0.4, "grad_norm": 4.14794397354126, "learning_rate": 1.974607067620214e-05, "loss": 1.9468, "step": 30739 }, { "epoch": 0.4, "grad_norm": 3.643019914627075, "learning_rate": 1.9746047146056994e-05, "loss": 1.602, "step": 30740 }, { "epoch": 0.4, "grad_norm": 3.9280619621276855, "learning_rate": 1.9746023614835717e-05, "loss": 2.0864, "step": 30741 }, { "epoch": 0.4, "grad_norm": 4.196467876434326, "learning_rate": 1.9746000082538313e-05, "loss": 2.0073, "step": 30742 }, { "epoch": 0.4, "grad_norm": 3.9197144508361816, "learning_rate": 1.9745976549164787e-05, "loss": 1.9287, "step": 30743 }, { "epoch": 0.4, "grad_norm": 3.7416810989379883, "learning_rate": 1.974595301471514e-05, "loss": 2.1759, "step": 30744 }, { "epoch": 0.4, "grad_norm": 3.5703847408294678, "learning_rate": 1.974592947918937e-05, "loss": 1.8997, "step": 30745 }, { "epoch": 0.4, "grad_norm": 4.076854705810547, "learning_rate": 1.974590594258749e-05, "loss": 2.228, "step": 30746 }, { "epoch": 0.4, "grad_norm": 4.3670196533203125, "learning_rate": 1.9745882404909492e-05, "loss": 2.1789, "step": 30747 }, { "epoch": 0.4, "grad_norm": 3.706666946411133, "learning_rate": 1.974585886615539e-05, "loss": 1.8609, "step": 30748 }, { "epoch": 0.4, "grad_norm": 3.499025583267212, "learning_rate": 1.974583532632517e-05, "loss": 1.7942, "step": 30749 }, { "epoch": 0.4, "grad_norm": 3.1602189540863037, "learning_rate": 1.9745811785418852e-05, "loss": 1.5747, "step": 30750 }, { "epoch": 0.4, "grad_norm": 4.154387474060059, "learning_rate": 1.974578824343643e-05, "loss": 2.2204, "step": 30751 }, { "epoch": 0.4, "grad_norm": 3.554286479949951, "learning_rate": 1.9745764700377905e-05, "loss": 1.976, "step": 30752 }, { "epoch": 0.4, "grad_norm": 3.8214356899261475, "learning_rate": 1.9745741156243283e-05, "loss": 1.706, "step": 30753 }, { "epoch": 0.4, "grad_norm": 4.2377777099609375, "learning_rate": 1.9745717611032566e-05, "loss": 2.4034, "step": 30754 }, { "epoch": 0.4, "grad_norm": 3.710181713104248, "learning_rate": 1.974569406474576e-05, "loss": 1.8769, "step": 30755 }, { "epoch": 0.4, "grad_norm": 4.270326614379883, "learning_rate": 1.9745670517382857e-05, "loss": 2.0448, "step": 30756 }, { "epoch": 0.4, "grad_norm": 3.685821771621704, "learning_rate": 1.9745646968943874e-05, "loss": 1.8071, "step": 30757 }, { "epoch": 0.4, "grad_norm": 3.4456868171691895, "learning_rate": 1.97456234194288e-05, "loss": 1.7172, "step": 30758 }, { "epoch": 0.4, "grad_norm": 3.363037347793579, "learning_rate": 1.9745599868837646e-05, "loss": 1.7827, "step": 30759 }, { "epoch": 0.4, "grad_norm": 3.5220887660980225, "learning_rate": 1.9745576317170414e-05, "loss": 1.834, "step": 30760 }, { "epoch": 0.4, "grad_norm": 3.6368253231048584, "learning_rate": 1.9745552764427104e-05, "loss": 1.6848, "step": 30761 }, { "epoch": 0.4, "grad_norm": 4.013144493103027, "learning_rate": 1.9745529210607717e-05, "loss": 2.3071, "step": 30762 }, { "epoch": 0.4, "grad_norm": 3.8476338386535645, "learning_rate": 1.9745505655712263e-05, "loss": 2.0757, "step": 30763 }, { "epoch": 0.4, "grad_norm": 3.781176805496216, "learning_rate": 1.974548209974074e-05, "loss": 1.8744, "step": 30764 }, { "epoch": 0.4, "grad_norm": 3.838946580886841, "learning_rate": 1.9745458542693146e-05, "loss": 1.7981, "step": 30765 }, { "epoch": 0.4, "grad_norm": 3.5153615474700928, "learning_rate": 1.974543498456949e-05, "loss": 1.7624, "step": 30766 }, { "epoch": 0.4, "grad_norm": 3.924849271774292, "learning_rate": 1.9745411425369774e-05, "loss": 2.1217, "step": 30767 }, { "epoch": 0.4, "grad_norm": 4.170226097106934, "learning_rate": 1.9745387865093997e-05, "loss": 1.8177, "step": 30768 }, { "epoch": 0.4, "grad_norm": 4.557455539703369, "learning_rate": 1.9745364303742167e-05, "loss": 1.9161, "step": 30769 }, { "epoch": 0.4, "grad_norm": 3.962263822555542, "learning_rate": 1.9745340741314282e-05, "loss": 2.3276, "step": 30770 }, { "epoch": 0.4, "grad_norm": 3.6137208938598633, "learning_rate": 1.974531717781034e-05, "loss": 1.7802, "step": 30771 }, { "epoch": 0.4, "grad_norm": 3.925757884979248, "learning_rate": 1.9745293613230356e-05, "loss": 2.3471, "step": 30772 }, { "epoch": 0.4, "grad_norm": 3.9985060691833496, "learning_rate": 1.9745270047574327e-05, "loss": 2.1401, "step": 30773 }, { "epoch": 0.4, "grad_norm": 3.928337335586548, "learning_rate": 1.9745246480842252e-05, "loss": 2.1277, "step": 30774 }, { "epoch": 0.4, "grad_norm": 3.9935142993927, "learning_rate": 1.974522291303414e-05, "loss": 2.25, "step": 30775 }, { "epoch": 0.4, "grad_norm": 3.8445467948913574, "learning_rate": 1.9745199344149987e-05, "loss": 2.1712, "step": 30776 }, { "epoch": 0.4, "grad_norm": 3.642681121826172, "learning_rate": 1.97451757741898e-05, "loss": 1.8146, "step": 30777 }, { "epoch": 0.4, "grad_norm": 3.3486833572387695, "learning_rate": 1.974515220315358e-05, "loss": 1.9548, "step": 30778 }, { "epoch": 0.4, "grad_norm": 3.6972618103027344, "learning_rate": 1.974512863104133e-05, "loss": 1.7062, "step": 30779 }, { "epoch": 0.4, "grad_norm": 3.9085559844970703, "learning_rate": 1.974510505785305e-05, "loss": 1.9393, "step": 30780 }, { "epoch": 0.4, "grad_norm": 3.860212564468384, "learning_rate": 1.9745081483588748e-05, "loss": 1.6685, "step": 30781 }, { "epoch": 0.4, "grad_norm": 3.915539264678955, "learning_rate": 1.9745057908248422e-05, "loss": 1.8607, "step": 30782 }, { "epoch": 0.4, "grad_norm": 3.9398915767669678, "learning_rate": 1.9745034331832077e-05, "loss": 2.1759, "step": 30783 }, { "epoch": 0.4, "grad_norm": 3.4481308460235596, "learning_rate": 1.9745010754339715e-05, "loss": 1.8175, "step": 30784 }, { "epoch": 0.4, "grad_norm": 3.457484006881714, "learning_rate": 1.974498717577134e-05, "loss": 1.8095, "step": 30785 }, { "epoch": 0.4, "grad_norm": 3.6204605102539062, "learning_rate": 1.9744963596126952e-05, "loss": 1.96, "step": 30786 }, { "epoch": 0.4, "grad_norm": 3.713923215866089, "learning_rate": 1.9744940015406554e-05, "loss": 1.6031, "step": 30787 }, { "epoch": 0.4, "grad_norm": 4.072702407836914, "learning_rate": 1.9744916433610147e-05, "loss": 2.2003, "step": 30788 }, { "epoch": 0.4, "grad_norm": 4.100456714630127, "learning_rate": 1.9744892850737737e-05, "loss": 1.8136, "step": 30789 }, { "epoch": 0.4, "grad_norm": 3.455205202102661, "learning_rate": 1.974486926678933e-05, "loss": 1.858, "step": 30790 }, { "epoch": 0.4, "grad_norm": 4.11671257019043, "learning_rate": 1.974484568176492e-05, "loss": 2.1713, "step": 30791 }, { "epoch": 0.4, "grad_norm": 3.533196449279785, "learning_rate": 1.9744822095664516e-05, "loss": 1.8642, "step": 30792 }, { "epoch": 0.4, "grad_norm": 4.1574320793151855, "learning_rate": 1.9744798508488117e-05, "loss": 1.9055, "step": 30793 }, { "epoch": 0.4, "grad_norm": 3.8456900119781494, "learning_rate": 1.9744774920235727e-05, "loss": 1.9194, "step": 30794 }, { "epoch": 0.4, "grad_norm": 3.8161606788635254, "learning_rate": 1.9744751330907346e-05, "loss": 2.1095, "step": 30795 }, { "epoch": 0.4, "grad_norm": 3.9522664546966553, "learning_rate": 1.9744727740502984e-05, "loss": 1.9198, "step": 30796 }, { "epoch": 0.4, "grad_norm": 3.843132495880127, "learning_rate": 1.9744704149022635e-05, "loss": 2.0205, "step": 30797 }, { "epoch": 0.4, "grad_norm": 3.568333864212036, "learning_rate": 1.9744680556466307e-05, "loss": 1.7581, "step": 30798 }, { "epoch": 0.4, "grad_norm": 3.799053907394409, "learning_rate": 1.9744656962834e-05, "loss": 2.152, "step": 30799 }, { "epoch": 0.4, "grad_norm": 3.9975686073303223, "learning_rate": 1.9744633368125717e-05, "loss": 1.9959, "step": 30800 }, { "epoch": 0.4, "grad_norm": 4.188409328460693, "learning_rate": 1.9744609772341462e-05, "loss": 1.9892, "step": 30801 }, { "epoch": 0.4, "grad_norm": 3.6518797874450684, "learning_rate": 1.974458617548124e-05, "loss": 1.8738, "step": 30802 }, { "epoch": 0.4, "grad_norm": 4.291473388671875, "learning_rate": 1.9744562577545044e-05, "loss": 2.2213, "step": 30803 }, { "epoch": 0.4, "grad_norm": 4.012853145599365, "learning_rate": 1.974453897853289e-05, "loss": 2.3966, "step": 30804 }, { "epoch": 0.4, "grad_norm": 3.235015392303467, "learning_rate": 1.9744515378444768e-05, "loss": 1.6164, "step": 30805 }, { "epoch": 0.4, "grad_norm": 4.0715813636779785, "learning_rate": 1.974449177728069e-05, "loss": 2.0391, "step": 30806 }, { "epoch": 0.4, "grad_norm": 3.747274875640869, "learning_rate": 1.9744468175040652e-05, "loss": 1.8457, "step": 30807 }, { "epoch": 0.4, "grad_norm": 4.521087169647217, "learning_rate": 1.974444457172466e-05, "loss": 2.1432, "step": 30808 }, { "epoch": 0.4, "grad_norm": 3.7712855339050293, "learning_rate": 1.9744420967332715e-05, "loss": 2.0635, "step": 30809 }, { "epoch": 0.4, "grad_norm": 3.5849387645721436, "learning_rate": 1.9744397361864823e-05, "loss": 1.6862, "step": 30810 }, { "epoch": 0.4, "grad_norm": 3.6013007164001465, "learning_rate": 1.9744373755320983e-05, "loss": 1.6683, "step": 30811 }, { "epoch": 0.4, "grad_norm": 4.565313816070557, "learning_rate": 1.97443501477012e-05, "loss": 2.1592, "step": 30812 }, { "epoch": 0.4, "grad_norm": 3.5845158100128174, "learning_rate": 1.9744326539005473e-05, "loss": 2.1027, "step": 30813 }, { "epoch": 0.4, "grad_norm": 4.965734958648682, "learning_rate": 1.974430292923381e-05, "loss": 2.556, "step": 30814 }, { "epoch": 0.4, "grad_norm": 3.9596123695373535, "learning_rate": 1.974427931838621e-05, "loss": 2.174, "step": 30815 }, { "epoch": 0.4, "grad_norm": 4.338321208953857, "learning_rate": 1.9744255706462673e-05, "loss": 2.411, "step": 30816 }, { "epoch": 0.4, "grad_norm": 3.600489616394043, "learning_rate": 1.974423209346321e-05, "loss": 1.8854, "step": 30817 }, { "epoch": 0.4, "grad_norm": 3.9989309310913086, "learning_rate": 1.9744208479387815e-05, "loss": 1.978, "step": 30818 }, { "epoch": 0.4, "grad_norm": 4.056002140045166, "learning_rate": 1.9744184864236494e-05, "loss": 2.0345, "step": 30819 }, { "epoch": 0.4, "grad_norm": 4.173705101013184, "learning_rate": 1.974416124800925e-05, "loss": 1.9996, "step": 30820 }, { "epoch": 0.4, "grad_norm": 3.614954710006714, "learning_rate": 1.974413763070609e-05, "loss": 2.0511, "step": 30821 }, { "epoch": 0.4, "grad_norm": 3.909658908843994, "learning_rate": 1.9744114012327004e-05, "loss": 1.9013, "step": 30822 }, { "epoch": 0.4, "grad_norm": 3.4947075843811035, "learning_rate": 1.9744090392872006e-05, "loss": 1.7035, "step": 30823 }, { "epoch": 0.4, "grad_norm": 3.9298946857452393, "learning_rate": 1.9744066772341094e-05, "loss": 1.9561, "step": 30824 }, { "epoch": 0.4, "grad_norm": 3.59175443649292, "learning_rate": 1.9744043150734274e-05, "loss": 1.3415, "step": 30825 }, { "epoch": 0.4, "grad_norm": 4.663124084472656, "learning_rate": 1.9744019528051545e-05, "loss": 2.0781, "step": 30826 }, { "epoch": 0.4, "grad_norm": 4.204959869384766, "learning_rate": 1.9743995904292912e-05, "loss": 1.9866, "step": 30827 }, { "epoch": 0.4, "grad_norm": 3.7680416107177734, "learning_rate": 1.9743972279458378e-05, "loss": 2.126, "step": 30828 }, { "epoch": 0.4, "grad_norm": 4.435091018676758, "learning_rate": 1.974394865354794e-05, "loss": 2.0249, "step": 30829 }, { "epoch": 0.4, "grad_norm": 3.4014506340026855, "learning_rate": 1.974392502656161e-05, "loss": 1.9217, "step": 30830 }, { "epoch": 0.4, "grad_norm": 4.063126564025879, "learning_rate": 1.9743901398499378e-05, "loss": 1.9656, "step": 30831 }, { "epoch": 0.4, "grad_norm": 4.443589687347412, "learning_rate": 1.974387776936126e-05, "loss": 2.3267, "step": 30832 }, { "epoch": 0.4, "grad_norm": 3.0122923851013184, "learning_rate": 1.974385413914725e-05, "loss": 1.5988, "step": 30833 }, { "epoch": 0.4, "grad_norm": 3.473281145095825, "learning_rate": 1.9743830507857354e-05, "loss": 1.7519, "step": 30834 }, { "epoch": 0.4, "grad_norm": 3.8857760429382324, "learning_rate": 1.9743806875491572e-05, "loss": 1.9963, "step": 30835 }, { "epoch": 0.4, "grad_norm": 3.7629382610321045, "learning_rate": 1.9743783242049913e-05, "loss": 2.3334, "step": 30836 }, { "epoch": 0.4, "grad_norm": 3.5950443744659424, "learning_rate": 1.974375960753237e-05, "loss": 1.686, "step": 30837 }, { "epoch": 0.4, "grad_norm": 3.9452738761901855, "learning_rate": 1.9743735971938954e-05, "loss": 2.3681, "step": 30838 }, { "epoch": 0.4, "grad_norm": 4.031048774719238, "learning_rate": 1.974371233526966e-05, "loss": 2.1144, "step": 30839 }, { "epoch": 0.4, "grad_norm": 3.3884613513946533, "learning_rate": 1.97436886975245e-05, "loss": 1.6071, "step": 30840 }, { "epoch": 0.4, "grad_norm": 3.283783197402954, "learning_rate": 1.974366505870347e-05, "loss": 1.6415, "step": 30841 }, { "epoch": 0.4, "grad_norm": 4.107973575592041, "learning_rate": 1.974364141880657e-05, "loss": 2.4647, "step": 30842 }, { "epoch": 0.4, "grad_norm": 3.7351996898651123, "learning_rate": 1.9743617777833813e-05, "loss": 1.8974, "step": 30843 }, { "epoch": 0.4, "grad_norm": 3.7740397453308105, "learning_rate": 1.974359413578519e-05, "loss": 2.0748, "step": 30844 }, { "epoch": 0.4, "grad_norm": 3.4056143760681152, "learning_rate": 1.974357049266071e-05, "loss": 1.5945, "step": 30845 }, { "epoch": 0.4, "grad_norm": 3.7555389404296875, "learning_rate": 1.9743546848460378e-05, "loss": 2.2864, "step": 30846 }, { "epoch": 0.4, "grad_norm": 3.7972445487976074, "learning_rate": 1.974352320318419e-05, "loss": 1.8774, "step": 30847 }, { "epoch": 0.4, "grad_norm": 3.926769733428955, "learning_rate": 1.9743499556832154e-05, "loss": 2.0531, "step": 30848 }, { "epoch": 0.4, "grad_norm": 3.677478075027466, "learning_rate": 1.974347590940427e-05, "loss": 2.1283, "step": 30849 }, { "epoch": 0.4, "grad_norm": 3.9038498401641846, "learning_rate": 1.974345226090054e-05, "loss": 2.1075, "step": 30850 }, { "epoch": 0.4, "grad_norm": 4.334118843078613, "learning_rate": 1.9743428611320966e-05, "loss": 2.0687, "step": 30851 }, { "epoch": 0.4, "grad_norm": 3.664656400680542, "learning_rate": 1.9743404960665554e-05, "loss": 1.7967, "step": 30852 }, { "epoch": 0.4, "grad_norm": 3.739936351776123, "learning_rate": 1.9743381308934305e-05, "loss": 1.7406, "step": 30853 }, { "epoch": 0.4, "grad_norm": 3.7024524211883545, "learning_rate": 1.974335765612722e-05, "loss": 2.0331, "step": 30854 }, { "epoch": 0.4, "grad_norm": 3.5904972553253174, "learning_rate": 1.9743334002244303e-05, "loss": 1.7784, "step": 30855 }, { "epoch": 0.4, "grad_norm": 3.4832167625427246, "learning_rate": 1.9743310347285557e-05, "loss": 1.8484, "step": 30856 }, { "epoch": 0.4, "grad_norm": 3.4911398887634277, "learning_rate": 1.9743286691250984e-05, "loss": 1.7033, "step": 30857 }, { "epoch": 0.4, "grad_norm": 3.8344430923461914, "learning_rate": 1.9743263034140588e-05, "loss": 1.8811, "step": 30858 }, { "epoch": 0.4, "grad_norm": 3.7906088829040527, "learning_rate": 1.9743239375954372e-05, "loss": 1.5335, "step": 30859 }, { "epoch": 0.4, "grad_norm": 4.113249778747559, "learning_rate": 1.9743215716692336e-05, "loss": 2.6562, "step": 30860 }, { "epoch": 0.4, "grad_norm": 4.085521697998047, "learning_rate": 1.974319205635448e-05, "loss": 2.2974, "step": 30861 }, { "epoch": 0.4, "grad_norm": 4.117404460906982, "learning_rate": 1.9743168394940814e-05, "loss": 1.7169, "step": 30862 }, { "epoch": 0.4, "grad_norm": 3.551204204559326, "learning_rate": 1.9743144732451336e-05, "loss": 1.7063, "step": 30863 }, { "epoch": 0.4, "grad_norm": 3.8371059894561768, "learning_rate": 1.974312106888605e-05, "loss": 2.0701, "step": 30864 }, { "epoch": 0.4, "grad_norm": 3.7566075325012207, "learning_rate": 1.974309740424496e-05, "loss": 1.9446, "step": 30865 }, { "epoch": 0.4, "grad_norm": 3.860549211502075, "learning_rate": 1.974307373852806e-05, "loss": 1.8642, "step": 30866 }, { "epoch": 0.4, "grad_norm": 3.385282278060913, "learning_rate": 1.9743050071735367e-05, "loss": 1.8917, "step": 30867 }, { "epoch": 0.4, "grad_norm": 4.428747653961182, "learning_rate": 1.9743026403866873e-05, "loss": 2.2174, "step": 30868 }, { "epoch": 0.4, "grad_norm": 3.8597049713134766, "learning_rate": 1.9743002734922583e-05, "loss": 1.8153, "step": 30869 }, { "epoch": 0.4, "grad_norm": 3.1900556087493896, "learning_rate": 1.97429790649025e-05, "loss": 1.67, "step": 30870 }, { "epoch": 0.4, "grad_norm": 3.9295263290405273, "learning_rate": 1.9742955393806628e-05, "loss": 1.9789, "step": 30871 }, { "epoch": 0.4, "grad_norm": 4.023220539093018, "learning_rate": 1.974293172163497e-05, "loss": 2.0308, "step": 30872 }, { "epoch": 0.4, "grad_norm": 3.53904128074646, "learning_rate": 1.974290804838752e-05, "loss": 1.9296, "step": 30873 }, { "epoch": 0.4, "grad_norm": 4.269465446472168, "learning_rate": 1.9742884374064295e-05, "loss": 2.2231, "step": 30874 }, { "epoch": 0.4, "grad_norm": 3.884329319000244, "learning_rate": 1.974286069866529e-05, "loss": 2.0013, "step": 30875 }, { "epoch": 0.4, "grad_norm": 3.880742311477661, "learning_rate": 1.9742837022190503e-05, "loss": 2.13, "step": 30876 }, { "epoch": 0.4, "grad_norm": 4.021470546722412, "learning_rate": 1.9742813344639945e-05, "loss": 2.3891, "step": 30877 }, { "epoch": 0.4, "grad_norm": 3.7753806114196777, "learning_rate": 1.9742789666013614e-05, "loss": 2.1213, "step": 30878 }, { "epoch": 0.4, "grad_norm": 4.289141654968262, "learning_rate": 1.974276598631151e-05, "loss": 2.56, "step": 30879 }, { "epoch": 0.4, "grad_norm": 3.4753811359405518, "learning_rate": 1.9742742305533646e-05, "loss": 1.8219, "step": 30880 }, { "epoch": 0.4, "grad_norm": 3.6495306491851807, "learning_rate": 1.9742718623680014e-05, "loss": 2.054, "step": 30881 }, { "epoch": 0.4, "grad_norm": 3.9555399417877197, "learning_rate": 1.9742694940750624e-05, "loss": 2.2276, "step": 30882 }, { "epoch": 0.4, "grad_norm": 3.793837547302246, "learning_rate": 1.9742671256745472e-05, "loss": 2.0802, "step": 30883 }, { "epoch": 0.4, "grad_norm": 3.4589691162109375, "learning_rate": 1.9742647571664567e-05, "loss": 1.7648, "step": 30884 }, { "epoch": 0.4, "grad_norm": 4.462553977966309, "learning_rate": 1.9742623885507904e-05, "loss": 2.5001, "step": 30885 }, { "epoch": 0.4, "grad_norm": 3.338263750076294, "learning_rate": 1.974260019827549e-05, "loss": 1.8476, "step": 30886 }, { "epoch": 0.4, "grad_norm": 4.364048004150391, "learning_rate": 1.9742576509967334e-05, "loss": 1.9537, "step": 30887 }, { "epoch": 0.4, "grad_norm": 3.523864507675171, "learning_rate": 1.974255282058343e-05, "loss": 2.075, "step": 30888 }, { "epoch": 0.4, "grad_norm": 3.9216248989105225, "learning_rate": 1.974252913012378e-05, "loss": 2.0395, "step": 30889 }, { "epoch": 0.4, "grad_norm": 3.257200002670288, "learning_rate": 1.974250543858839e-05, "loss": 1.6637, "step": 30890 }, { "epoch": 0.4, "grad_norm": 4.029330253601074, "learning_rate": 1.9742481745977262e-05, "loss": 2.3951, "step": 30891 }, { "epoch": 0.4, "grad_norm": 3.497431993484497, "learning_rate": 1.9742458052290398e-05, "loss": 1.7113, "step": 30892 }, { "epoch": 0.4, "grad_norm": 3.9020440578460693, "learning_rate": 1.9742434357527805e-05, "loss": 1.783, "step": 30893 }, { "epoch": 0.4, "grad_norm": 3.5927255153656006, "learning_rate": 1.974241066168948e-05, "loss": 1.9884, "step": 30894 }, { "epoch": 0.4, "grad_norm": 4.361456394195557, "learning_rate": 1.9742386964775427e-05, "loss": 2.0422, "step": 30895 }, { "epoch": 0.4, "grad_norm": 3.3312137126922607, "learning_rate": 1.974236326678565e-05, "loss": 1.7673, "step": 30896 }, { "epoch": 0.4, "grad_norm": 4.389070987701416, "learning_rate": 1.974233956772015e-05, "loss": 2.3842, "step": 30897 }, { "epoch": 0.4, "grad_norm": 3.5515999794006348, "learning_rate": 1.974231586757893e-05, "loss": 1.792, "step": 30898 }, { "epoch": 0.4, "grad_norm": 3.4067587852478027, "learning_rate": 1.9742292166361994e-05, "loss": 1.4665, "step": 30899 }, { "epoch": 0.4, "grad_norm": 3.705667018890381, "learning_rate": 1.9742268464069343e-05, "loss": 1.6772, "step": 30900 }, { "epoch": 0.4, "grad_norm": 3.6407535076141357, "learning_rate": 1.974224476070098e-05, "loss": 2.1241, "step": 30901 }, { "epoch": 0.4, "grad_norm": 3.6264307498931885, "learning_rate": 1.9742221056256912e-05, "loss": 1.8711, "step": 30902 }, { "epoch": 0.4, "grad_norm": 3.7821919918060303, "learning_rate": 1.9742197350737133e-05, "loss": 2.1336, "step": 30903 }, { "epoch": 0.4, "grad_norm": 3.6848981380462646, "learning_rate": 1.974217364414165e-05, "loss": 1.9615, "step": 30904 }, { "epoch": 0.4, "grad_norm": 3.4399123191833496, "learning_rate": 1.974214993647047e-05, "loss": 1.8055, "step": 30905 }, { "epoch": 0.4, "grad_norm": 3.612476110458374, "learning_rate": 1.9742126227723588e-05, "loss": 1.9137, "step": 30906 }, { "epoch": 0.4, "grad_norm": 3.855121612548828, "learning_rate": 1.974210251790101e-05, "loss": 1.7883, "step": 30907 }, { "epoch": 0.4, "grad_norm": 4.186507701873779, "learning_rate": 1.974207880700274e-05, "loss": 2.0842, "step": 30908 }, { "epoch": 0.4, "grad_norm": 3.625793218612671, "learning_rate": 1.9742055095028778e-05, "loss": 1.9627, "step": 30909 }, { "epoch": 0.4, "grad_norm": 4.153810024261475, "learning_rate": 1.9742031381979126e-05, "loss": 1.9749, "step": 30910 }, { "epoch": 0.4, "grad_norm": 3.964365005493164, "learning_rate": 1.974200766785379e-05, "loss": 2.0345, "step": 30911 }, { "epoch": 0.4, "grad_norm": 3.823498010635376, "learning_rate": 1.9741983952652774e-05, "loss": 2.0771, "step": 30912 }, { "epoch": 0.4, "grad_norm": 3.5052459239959717, "learning_rate": 1.9741960236376076e-05, "loss": 1.8581, "step": 30913 }, { "epoch": 0.4, "grad_norm": 3.2436110973358154, "learning_rate": 1.9741936519023697e-05, "loss": 1.7191, "step": 30914 }, { "epoch": 0.4, "grad_norm": 3.394566297531128, "learning_rate": 1.9741912800595648e-05, "loss": 2.0779, "step": 30915 }, { "epoch": 0.4, "grad_norm": 4.1845197677612305, "learning_rate": 1.9741889081091924e-05, "loss": 2.1564, "step": 30916 }, { "epoch": 0.4, "grad_norm": 3.859370470046997, "learning_rate": 1.974186536051253e-05, "loss": 2.3067, "step": 30917 }, { "epoch": 0.4, "grad_norm": 3.6609787940979004, "learning_rate": 1.974184163885747e-05, "loss": 1.9755, "step": 30918 }, { "epoch": 0.4, "grad_norm": 4.177710056304932, "learning_rate": 1.9741817916126744e-05, "loss": 2.07, "step": 30919 }, { "epoch": 0.4, "grad_norm": 3.3296992778778076, "learning_rate": 1.9741794192320357e-05, "loss": 1.4642, "step": 30920 }, { "epoch": 0.4, "grad_norm": 4.086767673492432, "learning_rate": 1.974177046743831e-05, "loss": 1.9836, "step": 30921 }, { "epoch": 0.4, "grad_norm": 4.171811580657959, "learning_rate": 1.9741746741480608e-05, "loss": 2.6884, "step": 30922 }, { "epoch": 0.4, "grad_norm": 3.435502290725708, "learning_rate": 1.974172301444725e-05, "loss": 1.9011, "step": 30923 }, { "epoch": 0.4, "grad_norm": 3.7757816314697266, "learning_rate": 1.974169928633824e-05, "loss": 2.0226, "step": 30924 }, { "epoch": 0.4, "grad_norm": 3.8956682682037354, "learning_rate": 1.9741675557153583e-05, "loss": 2.1245, "step": 30925 }, { "epoch": 0.4, "grad_norm": 3.75713849067688, "learning_rate": 1.974165182689328e-05, "loss": 2.0844, "step": 30926 }, { "epoch": 0.4, "grad_norm": 3.5738422870635986, "learning_rate": 1.974162809555733e-05, "loss": 2.0172, "step": 30927 }, { "epoch": 0.4, "grad_norm": 4.283600330352783, "learning_rate": 1.974160436314574e-05, "loss": 2.1622, "step": 30928 }, { "epoch": 0.4, "grad_norm": 3.5755629539489746, "learning_rate": 1.9741580629658516e-05, "loss": 1.7988, "step": 30929 }, { "epoch": 0.4, "grad_norm": 4.389249324798584, "learning_rate": 1.974155689509565e-05, "loss": 2.8104, "step": 30930 }, { "epoch": 0.4, "grad_norm": 4.234976291656494, "learning_rate": 1.9741533159457155e-05, "loss": 2.2428, "step": 30931 }, { "epoch": 0.4, "grad_norm": 4.073121547698975, "learning_rate": 1.974150942274303e-05, "loss": 2.2033, "step": 30932 }, { "epoch": 0.4, "grad_norm": 3.6584649085998535, "learning_rate": 1.9741485684953274e-05, "loss": 2.0756, "step": 30933 }, { "epoch": 0.4, "grad_norm": 3.9176294803619385, "learning_rate": 1.9741461946087895e-05, "loss": 2.4507, "step": 30934 }, { "epoch": 0.4, "grad_norm": 4.441303253173828, "learning_rate": 1.974143820614689e-05, "loss": 2.4634, "step": 30935 }, { "epoch": 0.4, "grad_norm": 3.9899988174438477, "learning_rate": 1.974141446513027e-05, "loss": 1.8998, "step": 30936 }, { "epoch": 0.4, "grad_norm": 3.4054508209228516, "learning_rate": 1.9741390723038027e-05, "loss": 1.467, "step": 30937 }, { "epoch": 0.4, "grad_norm": 3.9286110401153564, "learning_rate": 1.9741366979870174e-05, "loss": 2.3032, "step": 30938 }, { "epoch": 0.4, "grad_norm": 3.8125364780426025, "learning_rate": 1.9741343235626705e-05, "loss": 2.2668, "step": 30939 }, { "epoch": 0.4, "grad_norm": 3.485341787338257, "learning_rate": 1.974131949030763e-05, "loss": 1.9383, "step": 30940 }, { "epoch": 0.4, "grad_norm": 3.2893786430358887, "learning_rate": 1.9741295743912947e-05, "loss": 1.7216, "step": 30941 }, { "epoch": 0.4, "grad_norm": 3.6816370487213135, "learning_rate": 1.974127199644266e-05, "loss": 1.7801, "step": 30942 }, { "epoch": 0.4, "grad_norm": 3.9245948791503906, "learning_rate": 1.974124824789677e-05, "loss": 1.6034, "step": 30943 }, { "epoch": 0.4, "grad_norm": 3.6276562213897705, "learning_rate": 1.9741224498275282e-05, "loss": 1.7178, "step": 30944 }, { "epoch": 0.4, "grad_norm": 3.3100218772888184, "learning_rate": 1.9741200747578194e-05, "loss": 1.3878, "step": 30945 }, { "epoch": 0.4, "grad_norm": 4.314154148101807, "learning_rate": 1.9741176995805517e-05, "loss": 2.6332, "step": 30946 }, { "epoch": 0.4, "grad_norm": 4.243099212646484, "learning_rate": 1.9741153242957247e-05, "loss": 1.9238, "step": 30947 }, { "epoch": 0.4, "grad_norm": 3.5129451751708984, "learning_rate": 1.9741129489033387e-05, "loss": 1.6387, "step": 30948 }, { "epoch": 0.4, "grad_norm": 4.303922653198242, "learning_rate": 1.9741105734033944e-05, "loss": 2.0152, "step": 30949 }, { "epoch": 0.4, "grad_norm": 4.133930683135986, "learning_rate": 1.9741081977958916e-05, "loss": 2.3203, "step": 30950 }, { "epoch": 0.4, "grad_norm": 3.7419190406799316, "learning_rate": 1.9741058220808308e-05, "loss": 1.7442, "step": 30951 }, { "epoch": 0.4, "grad_norm": 3.1983892917633057, "learning_rate": 1.974103446258212e-05, "loss": 1.7711, "step": 30952 }, { "epoch": 0.4, "grad_norm": 3.6294562816619873, "learning_rate": 1.9741010703280357e-05, "loss": 1.895, "step": 30953 }, { "epoch": 0.4, "grad_norm": 3.9389281272888184, "learning_rate": 1.9740986942903024e-05, "loss": 1.7731, "step": 30954 }, { "epoch": 0.4, "grad_norm": 3.7894816398620605, "learning_rate": 1.9740963181450116e-05, "loss": 1.8784, "step": 30955 }, { "epoch": 0.4, "grad_norm": 4.284261226654053, "learning_rate": 1.9740939418921644e-05, "loss": 2.4537, "step": 30956 }, { "epoch": 0.4, "grad_norm": 3.745725631713867, "learning_rate": 1.9740915655317608e-05, "loss": 2.0093, "step": 30957 }, { "epoch": 0.4, "grad_norm": 3.8472254276275635, "learning_rate": 1.9740891890638008e-05, "loss": 1.8481, "step": 30958 }, { "epoch": 0.4, "grad_norm": 3.757503032684326, "learning_rate": 1.9740868124882847e-05, "loss": 1.9323, "step": 30959 }, { "epoch": 0.4, "grad_norm": 4.310868740081787, "learning_rate": 1.974084435805213e-05, "loss": 2.0123, "step": 30960 }, { "epoch": 0.4, "grad_norm": 3.6847362518310547, "learning_rate": 1.974082059014586e-05, "loss": 1.9183, "step": 30961 }, { "epoch": 0.4, "grad_norm": 3.8692071437835693, "learning_rate": 1.9740796821164037e-05, "loss": 1.9188, "step": 30962 }, { "epoch": 0.4, "grad_norm": 3.0067245960235596, "learning_rate": 1.9740773051106663e-05, "loss": 1.5029, "step": 30963 }, { "epoch": 0.4, "grad_norm": 4.015400409698486, "learning_rate": 1.9740749279973747e-05, "loss": 1.8821, "step": 30964 }, { "epoch": 0.4, "grad_norm": 3.1539056301116943, "learning_rate": 1.974072550776528e-05, "loss": 1.6793, "step": 30965 }, { "epoch": 0.4, "grad_norm": 4.0501790046691895, "learning_rate": 1.9740701734481278e-05, "loss": 2.0815, "step": 30966 }, { "epoch": 0.4, "grad_norm": 3.826390266418457, "learning_rate": 1.9740677960121735e-05, "loss": 1.8466, "step": 30967 }, { "epoch": 0.4, "grad_norm": 3.782410144805908, "learning_rate": 1.9740654184686654e-05, "loss": 2.0472, "step": 30968 }, { "epoch": 0.4, "grad_norm": 4.1127166748046875, "learning_rate": 1.9740630408176044e-05, "loss": 2.0645, "step": 30969 }, { "epoch": 0.4, "grad_norm": 3.74562668800354, "learning_rate": 1.97406066305899e-05, "loss": 1.907, "step": 30970 }, { "epoch": 0.4, "grad_norm": 3.9619200229644775, "learning_rate": 1.9740582851928228e-05, "loss": 1.7498, "step": 30971 }, { "epoch": 0.4, "grad_norm": 3.6305227279663086, "learning_rate": 1.974055907219103e-05, "loss": 1.7573, "step": 30972 }, { "epoch": 0.4, "grad_norm": 3.575397253036499, "learning_rate": 1.9740535291378308e-05, "loss": 1.6996, "step": 30973 }, { "epoch": 0.4, "grad_norm": 4.270226955413818, "learning_rate": 1.974051150949007e-05, "loss": 2.2875, "step": 30974 }, { "epoch": 0.4, "grad_norm": 3.1372995376586914, "learning_rate": 1.9740487726526308e-05, "loss": 1.466, "step": 30975 }, { "epoch": 0.4, "grad_norm": 3.938784599304199, "learning_rate": 1.9740463942487035e-05, "loss": 2.2176, "step": 30976 }, { "epoch": 0.4, "grad_norm": 4.537806510925293, "learning_rate": 1.9740440157372248e-05, "loss": 2.314, "step": 30977 }, { "epoch": 0.4, "grad_norm": 3.856832265853882, "learning_rate": 1.9740416371181956e-05, "loss": 2.2588, "step": 30978 }, { "epoch": 0.4, "grad_norm": 3.8701772689819336, "learning_rate": 1.974039258391615e-05, "loss": 1.8312, "step": 30979 }, { "epoch": 0.4, "grad_norm": 3.738626003265381, "learning_rate": 1.9740368795574843e-05, "loss": 1.8648, "step": 30980 }, { "epoch": 0.4, "grad_norm": 4.4648518562316895, "learning_rate": 1.9740345006158032e-05, "loss": 2.7045, "step": 30981 }, { "epoch": 0.4, "grad_norm": 4.2835822105407715, "learning_rate": 1.9740321215665724e-05, "loss": 2.3021, "step": 30982 }, { "epoch": 0.4, "grad_norm": 3.5111987590789795, "learning_rate": 1.9740297424097918e-05, "loss": 1.7648, "step": 30983 }, { "epoch": 0.4, "grad_norm": 4.347280979156494, "learning_rate": 1.974027363145462e-05, "loss": 2.087, "step": 30984 }, { "epoch": 0.4, "grad_norm": 3.9789013862609863, "learning_rate": 1.9740249837735827e-05, "loss": 2.5862, "step": 30985 }, { "epoch": 0.4, "grad_norm": 4.225244998931885, "learning_rate": 1.9740226042941547e-05, "loss": 2.3265, "step": 30986 }, { "epoch": 0.4, "grad_norm": 4.1848464012146, "learning_rate": 1.9740202247071782e-05, "loss": 2.2999, "step": 30987 }, { "epoch": 0.4, "grad_norm": 3.8952345848083496, "learning_rate": 1.9740178450126533e-05, "loss": 2.2897, "step": 30988 }, { "epoch": 0.4, "grad_norm": 3.8534650802612305, "learning_rate": 1.97401546521058e-05, "loss": 1.9341, "step": 30989 }, { "epoch": 0.4, "grad_norm": 3.969717502593994, "learning_rate": 1.974013085300959e-05, "loss": 1.9566, "step": 30990 }, { "epoch": 0.4, "grad_norm": 4.374495983123779, "learning_rate": 1.974010705283791e-05, "loss": 2.3541, "step": 30991 }, { "epoch": 0.4, "grad_norm": 3.9806385040283203, "learning_rate": 1.974008325159075e-05, "loss": 2.3886, "step": 30992 }, { "epoch": 0.4, "grad_norm": 4.911936283111572, "learning_rate": 1.974005944926812e-05, "loss": 2.2632, "step": 30993 }, { "epoch": 0.4, "grad_norm": 4.147340774536133, "learning_rate": 1.9740035645870025e-05, "loss": 1.9829, "step": 30994 }, { "epoch": 0.4, "grad_norm": 3.645677328109741, "learning_rate": 1.9740011841396462e-05, "loss": 1.9404, "step": 30995 }, { "epoch": 0.4, "grad_norm": 3.753535032272339, "learning_rate": 1.9739988035847442e-05, "loss": 2.1254, "step": 30996 }, { "epoch": 0.4, "grad_norm": 3.8035130500793457, "learning_rate": 1.9739964229222957e-05, "loss": 1.857, "step": 30997 }, { "epoch": 0.4, "grad_norm": 3.4736814498901367, "learning_rate": 1.9739940421523015e-05, "loss": 1.8512, "step": 30998 }, { "epoch": 0.4, "grad_norm": 3.794062852859497, "learning_rate": 1.973991661274762e-05, "loss": 1.7781, "step": 30999 }, { "epoch": 0.4, "grad_norm": 3.7287750244140625, "learning_rate": 1.9739892802896773e-05, "loss": 2.1431, "step": 31000 }, { "epoch": 0.4, "grad_norm": 3.7823312282562256, "learning_rate": 1.9739868991970476e-05, "loss": 1.866, "step": 31001 }, { "epoch": 0.4, "grad_norm": 3.952860116958618, "learning_rate": 1.973984517996873e-05, "loss": 1.9345, "step": 31002 }, { "epoch": 0.4, "grad_norm": 4.148777484893799, "learning_rate": 1.9739821366891543e-05, "loss": 2.2844, "step": 31003 }, { "epoch": 0.4, "grad_norm": 3.072399616241455, "learning_rate": 1.9739797552738914e-05, "loss": 1.3958, "step": 31004 }, { "epoch": 0.4, "grad_norm": 4.362326622009277, "learning_rate": 1.9739773737510845e-05, "loss": 2.0401, "step": 31005 }, { "epoch": 0.4, "grad_norm": 3.340852737426758, "learning_rate": 1.9739749921207342e-05, "loss": 1.4604, "step": 31006 }, { "epoch": 0.4, "grad_norm": 3.5730152130126953, "learning_rate": 1.9739726103828402e-05, "loss": 2.1342, "step": 31007 }, { "epoch": 0.4, "grad_norm": 3.5262229442596436, "learning_rate": 1.9739702285374035e-05, "loss": 1.7417, "step": 31008 }, { "epoch": 0.4, "grad_norm": 3.571803569793701, "learning_rate": 1.9739678465844238e-05, "loss": 2.0067, "step": 31009 }, { "epoch": 0.4, "grad_norm": 3.4777305126190186, "learning_rate": 1.973965464523901e-05, "loss": 1.7026, "step": 31010 }, { "epoch": 0.4, "grad_norm": 3.5556321144104004, "learning_rate": 1.9739630823558363e-05, "loss": 2.0304, "step": 31011 }, { "epoch": 0.4, "grad_norm": 4.242326736450195, "learning_rate": 1.97396070008023e-05, "loss": 2.2947, "step": 31012 }, { "epoch": 0.4, "grad_norm": 4.080298900604248, "learning_rate": 1.9739583176970814e-05, "loss": 2.2431, "step": 31013 }, { "epoch": 0.4, "grad_norm": 3.8548619747161865, "learning_rate": 1.9739559352063913e-05, "loss": 1.7161, "step": 31014 }, { "epoch": 0.4, "grad_norm": 3.908602714538574, "learning_rate": 1.97395355260816e-05, "loss": 1.9003, "step": 31015 }, { "epoch": 0.4, "grad_norm": 3.3967273235321045, "learning_rate": 1.973951169902388e-05, "loss": 1.675, "step": 31016 }, { "epoch": 0.4, "grad_norm": 4.509886264801025, "learning_rate": 1.973948787089075e-05, "loss": 2.2491, "step": 31017 }, { "epoch": 0.4, "grad_norm": 3.5872397422790527, "learning_rate": 1.9739464041682215e-05, "loss": 1.8088, "step": 31018 }, { "epoch": 0.4, "grad_norm": 3.562779664993286, "learning_rate": 1.9739440211398277e-05, "loss": 1.8233, "step": 31019 }, { "epoch": 0.4, "grad_norm": 3.332709789276123, "learning_rate": 1.973941638003894e-05, "loss": 1.5969, "step": 31020 }, { "epoch": 0.4, "grad_norm": 4.070423126220703, "learning_rate": 1.973939254760421e-05, "loss": 1.9499, "step": 31021 }, { "epoch": 0.4, "grad_norm": 3.929415464401245, "learning_rate": 1.973936871409408e-05, "loss": 2.0679, "step": 31022 }, { "epoch": 0.4, "grad_norm": 3.7930753231048584, "learning_rate": 1.9739344879508563e-05, "loss": 1.8192, "step": 31023 }, { "epoch": 0.4, "grad_norm": 3.1661527156829834, "learning_rate": 1.9739321043847653e-05, "loss": 1.4988, "step": 31024 }, { "epoch": 0.4, "grad_norm": 4.081664562225342, "learning_rate": 1.973929720711136e-05, "loss": 1.8823, "step": 31025 }, { "epoch": 0.4, "grad_norm": 3.8261547088623047, "learning_rate": 1.9739273369299682e-05, "loss": 1.8975, "step": 31026 }, { "epoch": 0.4, "grad_norm": 4.27249813079834, "learning_rate": 1.9739249530412624e-05, "loss": 2.1965, "step": 31027 }, { "epoch": 0.4, "grad_norm": 4.305120468139648, "learning_rate": 1.9739225690450187e-05, "loss": 2.053, "step": 31028 }, { "epoch": 0.4, "grad_norm": 3.575751543045044, "learning_rate": 1.9739201849412376e-05, "loss": 2.0149, "step": 31029 }, { "epoch": 0.4, "grad_norm": 4.121906280517578, "learning_rate": 1.973917800729919e-05, "loss": 2.4983, "step": 31030 }, { "epoch": 0.4, "grad_norm": 3.721774101257324, "learning_rate": 1.9739154164110634e-05, "loss": 1.8371, "step": 31031 }, { "epoch": 0.4, "grad_norm": 4.105207443237305, "learning_rate": 1.973913031984671e-05, "loss": 2.1759, "step": 31032 }, { "epoch": 0.4, "grad_norm": 3.2646074295043945, "learning_rate": 1.9739106474507422e-05, "loss": 1.8642, "step": 31033 }, { "epoch": 0.4, "grad_norm": 3.485259771347046, "learning_rate": 1.973908262809277e-05, "loss": 1.9992, "step": 31034 }, { "epoch": 0.4, "grad_norm": 4.136788845062256, "learning_rate": 1.9739058780602756e-05, "loss": 1.9386, "step": 31035 }, { "epoch": 0.4, "grad_norm": 4.029760837554932, "learning_rate": 1.9739034932037388e-05, "loss": 2.12, "step": 31036 }, { "epoch": 0.4, "grad_norm": 2.9744224548339844, "learning_rate": 1.9739011082396663e-05, "loss": 1.6996, "step": 31037 }, { "epoch": 0.4, "grad_norm": 4.2302751541137695, "learning_rate": 1.9738987231680587e-05, "loss": 1.8887, "step": 31038 }, { "epoch": 0.4, "grad_norm": 4.304289817810059, "learning_rate": 1.9738963379889165e-05, "loss": 2.3285, "step": 31039 }, { "epoch": 0.4, "grad_norm": 3.221282720565796, "learning_rate": 1.973893952702239e-05, "loss": 1.424, "step": 31040 }, { "epoch": 0.4, "grad_norm": 4.258023738861084, "learning_rate": 1.9738915673080274e-05, "loss": 2.4598, "step": 31041 }, { "epoch": 0.4, "grad_norm": 3.749889612197876, "learning_rate": 1.9738891818062818e-05, "loss": 1.914, "step": 31042 }, { "epoch": 0.4, "grad_norm": 3.824221611022949, "learning_rate": 1.973886796197002e-05, "loss": 1.7804, "step": 31043 }, { "epoch": 0.4, "grad_norm": 3.547121524810791, "learning_rate": 1.9738844104801887e-05, "loss": 2.0097, "step": 31044 }, { "epoch": 0.4, "grad_norm": 3.793292284011841, "learning_rate": 1.9738820246558422e-05, "loss": 1.7693, "step": 31045 }, { "epoch": 0.4, "grad_norm": 3.8277671337127686, "learning_rate": 1.9738796387239625e-05, "loss": 2.2759, "step": 31046 }, { "epoch": 0.4, "grad_norm": 4.705078125, "learning_rate": 1.97387725268455e-05, "loss": 2.6613, "step": 31047 }, { "epoch": 0.4, "grad_norm": 3.7174065113067627, "learning_rate": 1.9738748665376045e-05, "loss": 1.9354, "step": 31048 }, { "epoch": 0.4, "grad_norm": 3.489379405975342, "learning_rate": 1.9738724802831272e-05, "loss": 1.7885, "step": 31049 }, { "epoch": 0.4, "grad_norm": 3.447767734527588, "learning_rate": 1.9738700939211177e-05, "loss": 1.5277, "step": 31050 }, { "epoch": 0.4, "grad_norm": 4.226567268371582, "learning_rate": 1.9738677074515764e-05, "loss": 2.434, "step": 31051 }, { "epoch": 0.4, "grad_norm": 3.7712066173553467, "learning_rate": 1.9738653208745035e-05, "loss": 2.2911, "step": 31052 }, { "epoch": 0.4, "grad_norm": 2.934999465942383, "learning_rate": 1.9738629341898995e-05, "loss": 1.4746, "step": 31053 }, { "epoch": 0.4, "grad_norm": 3.551830291748047, "learning_rate": 1.9738605473977646e-05, "loss": 1.801, "step": 31054 }, { "epoch": 0.4, "grad_norm": 3.579993486404419, "learning_rate": 1.973858160498099e-05, "loss": 2.1282, "step": 31055 }, { "epoch": 0.4, "grad_norm": 3.8570735454559326, "learning_rate": 1.9738557734909024e-05, "loss": 2.0599, "step": 31056 }, { "epoch": 0.4, "grad_norm": 3.3408467769622803, "learning_rate": 1.973853386376176e-05, "loss": 1.6024, "step": 31057 }, { "epoch": 0.4, "grad_norm": 3.681334972381592, "learning_rate": 1.97385099915392e-05, "loss": 2.0288, "step": 31058 }, { "epoch": 0.4, "grad_norm": 4.118229389190674, "learning_rate": 1.9738486118241336e-05, "loss": 2.0958, "step": 31059 }, { "epoch": 0.4, "grad_norm": 3.914827346801758, "learning_rate": 1.9738462243868185e-05, "loss": 2.5339, "step": 31060 }, { "epoch": 0.4, "grad_norm": 4.804427146911621, "learning_rate": 1.9738438368419736e-05, "loss": 2.4336, "step": 31061 }, { "epoch": 0.4, "grad_norm": 3.983820915222168, "learning_rate": 1.9738414491896002e-05, "loss": 2.1964, "step": 31062 }, { "epoch": 0.4, "grad_norm": 3.273202419281006, "learning_rate": 1.9738390614296984e-05, "loss": 1.8043, "step": 31063 }, { "epoch": 0.4, "grad_norm": 4.219849586486816, "learning_rate": 1.9738366735622678e-05, "loss": 2.4092, "step": 31064 }, { "epoch": 0.4, "grad_norm": 3.9773764610290527, "learning_rate": 1.9738342855873093e-05, "loss": 1.9963, "step": 31065 }, { "epoch": 0.4, "grad_norm": 3.5626323223114014, "learning_rate": 1.9738318975048228e-05, "loss": 1.7227, "step": 31066 }, { "epoch": 0.4, "grad_norm": 3.8244433403015137, "learning_rate": 1.973829509314809e-05, "loss": 1.8593, "step": 31067 }, { "epoch": 0.4, "grad_norm": 3.3739013671875, "learning_rate": 1.973827121017268e-05, "loss": 1.7635, "step": 31068 }, { "epoch": 0.4, "grad_norm": 3.528686046600342, "learning_rate": 1.9738247326121995e-05, "loss": 1.5718, "step": 31069 }, { "epoch": 0.4, "grad_norm": 4.0795512199401855, "learning_rate": 1.9738223440996045e-05, "loss": 2.095, "step": 31070 }, { "epoch": 0.4, "grad_norm": 3.7654688358306885, "learning_rate": 1.973819955479483e-05, "loss": 2.1279, "step": 31071 }, { "epoch": 0.4, "grad_norm": 3.7551488876342773, "learning_rate": 1.9738175667518353e-05, "loss": 2.0054, "step": 31072 }, { "epoch": 0.4, "grad_norm": 3.779139518737793, "learning_rate": 1.9738151779166614e-05, "loss": 1.9429, "step": 31073 }, { "epoch": 0.4, "grad_norm": 4.372650623321533, "learning_rate": 1.973812788973962e-05, "loss": 2.583, "step": 31074 }, { "epoch": 0.4, "grad_norm": 4.002256393432617, "learning_rate": 1.973810399923737e-05, "loss": 2.2506, "step": 31075 }, { "epoch": 0.4, "grad_norm": 5.142969131469727, "learning_rate": 1.9738080107659868e-05, "loss": 2.1089, "step": 31076 }, { "epoch": 0.4, "grad_norm": 3.4320826530456543, "learning_rate": 1.973805621500712e-05, "loss": 1.6623, "step": 31077 }, { "epoch": 0.4, "grad_norm": 3.36503005027771, "learning_rate": 1.973803232127912e-05, "loss": 1.6801, "step": 31078 }, { "epoch": 0.4, "grad_norm": 3.444230794906616, "learning_rate": 1.973800842647588e-05, "loss": 1.7814, "step": 31079 }, { "epoch": 0.4, "grad_norm": 4.07381010055542, "learning_rate": 1.97379845305974e-05, "loss": 2.1707, "step": 31080 }, { "epoch": 0.4, "grad_norm": 3.71890926361084, "learning_rate": 1.9737960633643678e-05, "loss": 1.8444, "step": 31081 }, { "epoch": 0.4, "grad_norm": 3.6300995349884033, "learning_rate": 1.973793673561472e-05, "loss": 1.701, "step": 31082 }, { "epoch": 0.4, "grad_norm": 4.268181800842285, "learning_rate": 1.973791283651053e-05, "loss": 2.3184, "step": 31083 }, { "epoch": 0.4, "grad_norm": 3.9839627742767334, "learning_rate": 1.9737888936331108e-05, "loss": 2.1615, "step": 31084 }, { "epoch": 0.4, "grad_norm": 4.486781597137451, "learning_rate": 1.973786503507646e-05, "loss": 2.3856, "step": 31085 }, { "epoch": 0.4, "grad_norm": 3.9541220664978027, "learning_rate": 1.9737841132746585e-05, "loss": 2.0456, "step": 31086 }, { "epoch": 0.4, "grad_norm": 4.147110939025879, "learning_rate": 1.9737817229341485e-05, "loss": 2.0229, "step": 31087 }, { "epoch": 0.4, "grad_norm": 4.037132263183594, "learning_rate": 1.973779332486117e-05, "loss": 1.9557, "step": 31088 }, { "epoch": 0.4, "grad_norm": 3.826354503631592, "learning_rate": 1.9737769419305633e-05, "loss": 1.9603, "step": 31089 }, { "epoch": 0.4, "grad_norm": 3.5737853050231934, "learning_rate": 1.9737745512674883e-05, "loss": 1.9642, "step": 31090 }, { "epoch": 0.4, "grad_norm": 3.69014573097229, "learning_rate": 1.973772160496892e-05, "loss": 2.0987, "step": 31091 }, { "epoch": 0.4, "grad_norm": 4.442721843719482, "learning_rate": 1.9737697696187744e-05, "loss": 2.0038, "step": 31092 }, { "epoch": 0.4, "grad_norm": 3.677603244781494, "learning_rate": 1.9737673786331366e-05, "loss": 2.0223, "step": 31093 }, { "epoch": 0.4, "grad_norm": 3.9874417781829834, "learning_rate": 1.9737649875399785e-05, "loss": 2.2315, "step": 31094 }, { "epoch": 0.4, "grad_norm": 3.5605971813201904, "learning_rate": 1.9737625963392998e-05, "loss": 2.0998, "step": 31095 }, { "epoch": 0.4, "grad_norm": 3.950813055038452, "learning_rate": 1.9737602050311014e-05, "loss": 2.1347, "step": 31096 }, { "epoch": 0.4, "grad_norm": 3.727609395980835, "learning_rate": 1.9737578136153833e-05, "loss": 1.9902, "step": 31097 }, { "epoch": 0.4, "grad_norm": 3.963496446609497, "learning_rate": 1.9737554220921456e-05, "loss": 1.8738, "step": 31098 }, { "epoch": 0.4, "grad_norm": 3.587677001953125, "learning_rate": 1.973753030461389e-05, "loss": 1.8503, "step": 31099 }, { "epoch": 0.4, "grad_norm": 3.652085542678833, "learning_rate": 1.9737506387231136e-05, "loss": 1.8765, "step": 31100 }, { "epoch": 0.4, "grad_norm": 3.5767533779144287, "learning_rate": 1.9737482468773196e-05, "loss": 1.7682, "step": 31101 }, { "epoch": 0.4, "grad_norm": 4.311995983123779, "learning_rate": 1.973745854924007e-05, "loss": 2.4545, "step": 31102 }, { "epoch": 0.4, "grad_norm": 3.903320550918579, "learning_rate": 1.9737434628631768e-05, "loss": 1.6746, "step": 31103 }, { "epoch": 0.4, "grad_norm": 4.296374797821045, "learning_rate": 1.9737410706948283e-05, "loss": 2.1896, "step": 31104 }, { "epoch": 0.4, "grad_norm": 4.241146087646484, "learning_rate": 1.9737386784189625e-05, "loss": 2.3406, "step": 31105 }, { "epoch": 0.4, "grad_norm": 3.6252503395080566, "learning_rate": 1.9737362860355794e-05, "loss": 2.0352, "step": 31106 }, { "epoch": 0.4, "grad_norm": 3.2049965858459473, "learning_rate": 1.9737338935446794e-05, "loss": 1.5416, "step": 31107 }, { "epoch": 0.4, "grad_norm": 3.5356295108795166, "learning_rate": 1.9737315009462628e-05, "loss": 1.8127, "step": 31108 }, { "epoch": 0.4, "grad_norm": 4.088132381439209, "learning_rate": 1.9737291082403292e-05, "loss": 1.9825, "step": 31109 }, { "epoch": 0.4, "grad_norm": 3.5695102214813232, "learning_rate": 1.9737267154268797e-05, "loss": 1.5678, "step": 31110 }, { "epoch": 0.4, "grad_norm": 3.8923556804656982, "learning_rate": 1.9737243225059143e-05, "loss": 2.0881, "step": 31111 }, { "epoch": 0.4, "grad_norm": 4.029512882232666, "learning_rate": 1.9737219294774333e-05, "loss": 2.081, "step": 31112 }, { "epoch": 0.4, "grad_norm": 4.013075828552246, "learning_rate": 1.9737195363414368e-05, "loss": 2.1229, "step": 31113 }, { "epoch": 0.4, "grad_norm": 3.4959800243377686, "learning_rate": 1.973717143097925e-05, "loss": 1.8889, "step": 31114 }, { "epoch": 0.4, "grad_norm": 4.160053253173828, "learning_rate": 1.9737147497468982e-05, "loss": 2.2937, "step": 31115 }, { "epoch": 0.4, "grad_norm": 3.5422582626342773, "learning_rate": 1.9737123562883572e-05, "loss": 1.895, "step": 31116 }, { "epoch": 0.4, "grad_norm": 3.6062073707580566, "learning_rate": 1.9737099627223017e-05, "loss": 2.1663, "step": 31117 }, { "epoch": 0.4, "grad_norm": 3.781085729598999, "learning_rate": 1.973707569048732e-05, "loss": 2.1638, "step": 31118 }, { "epoch": 0.4, "grad_norm": 3.488028049468994, "learning_rate": 1.9737051752676483e-05, "loss": 1.974, "step": 31119 }, { "epoch": 0.4, "grad_norm": 4.219764709472656, "learning_rate": 1.9737027813790514e-05, "loss": 2.792, "step": 31120 }, { "epoch": 0.4, "grad_norm": 3.878614902496338, "learning_rate": 1.9737003873829406e-05, "loss": 2.3861, "step": 31121 }, { "epoch": 0.4, "grad_norm": 3.9110093116760254, "learning_rate": 1.9736979932793173e-05, "loss": 2.1591, "step": 31122 }, { "epoch": 0.4, "grad_norm": 3.6694443225860596, "learning_rate": 1.973695599068181e-05, "loss": 1.7953, "step": 31123 }, { "epoch": 0.4, "grad_norm": 3.826317548751831, "learning_rate": 1.9736932047495324e-05, "loss": 1.769, "step": 31124 }, { "epoch": 0.4, "grad_norm": 4.123791694641113, "learning_rate": 1.9736908103233715e-05, "loss": 1.6951, "step": 31125 }, { "epoch": 0.4, "grad_norm": 3.572232484817505, "learning_rate": 1.9736884157896984e-05, "loss": 1.5209, "step": 31126 }, { "epoch": 0.4, "grad_norm": 4.0304059982299805, "learning_rate": 1.9736860211485134e-05, "loss": 2.1383, "step": 31127 }, { "epoch": 0.4, "grad_norm": 3.675058126449585, "learning_rate": 1.9736836263998176e-05, "loss": 2.046, "step": 31128 }, { "epoch": 0.4, "grad_norm": 3.4695041179656982, "learning_rate": 1.9736812315436103e-05, "loss": 1.5861, "step": 31129 }, { "epoch": 0.4, "grad_norm": 3.8842520713806152, "learning_rate": 1.973678836579892e-05, "loss": 1.9081, "step": 31130 }, { "epoch": 0.4, "grad_norm": 3.871483087539673, "learning_rate": 1.9736764415086628e-05, "loss": 2.159, "step": 31131 }, { "epoch": 0.4, "grad_norm": 3.9822895526885986, "learning_rate": 1.973674046329924e-05, "loss": 2.2747, "step": 31132 }, { "epoch": 0.4, "grad_norm": 3.8524892330169678, "learning_rate": 1.9736716510436742e-05, "loss": 2.0009, "step": 31133 }, { "epoch": 0.4, "grad_norm": 3.720478057861328, "learning_rate": 1.9736692556499147e-05, "loss": 1.8788, "step": 31134 }, { "epoch": 0.4, "grad_norm": 3.7245891094207764, "learning_rate": 1.973666860148646e-05, "loss": 2.3523, "step": 31135 }, { "epoch": 0.4, "grad_norm": 3.6421918869018555, "learning_rate": 1.9736644645398676e-05, "loss": 1.968, "step": 31136 }, { "epoch": 0.4, "grad_norm": 3.928863286972046, "learning_rate": 1.9736620688235803e-05, "loss": 1.951, "step": 31137 }, { "epoch": 0.4, "grad_norm": 3.7004809379577637, "learning_rate": 1.973659672999784e-05, "loss": 1.5322, "step": 31138 }, { "epoch": 0.4, "grad_norm": 3.549848794937134, "learning_rate": 1.9736572770684796e-05, "loss": 1.7476, "step": 31139 }, { "epoch": 0.4, "grad_norm": 3.736603260040283, "learning_rate": 1.9736548810296665e-05, "loss": 2.5922, "step": 31140 }, { "epoch": 0.4, "grad_norm": 3.767735004425049, "learning_rate": 1.9736524848833457e-05, "loss": 2.0453, "step": 31141 }, { "epoch": 0.4, "grad_norm": 3.762515068054199, "learning_rate": 1.973650088629517e-05, "loss": 1.8555, "step": 31142 }, { "epoch": 0.4, "grad_norm": 3.7336738109588623, "learning_rate": 1.9736476922681803e-05, "loss": 1.9658, "step": 31143 }, { "epoch": 0.4, "grad_norm": 4.014580726623535, "learning_rate": 1.973645295799337e-05, "loss": 2.0155, "step": 31144 }, { "epoch": 0.4, "grad_norm": 3.9388628005981445, "learning_rate": 1.9736428992229864e-05, "loss": 1.9765, "step": 31145 }, { "epoch": 0.4, "grad_norm": 3.928889036178589, "learning_rate": 1.9736405025391297e-05, "loss": 1.7911, "step": 31146 }, { "epoch": 0.4, "grad_norm": 4.4756951332092285, "learning_rate": 1.973638105747766e-05, "loss": 2.2449, "step": 31147 }, { "epoch": 0.4, "grad_norm": 4.630025863647461, "learning_rate": 1.973635708848896e-05, "loss": 2.1965, "step": 31148 }, { "epoch": 0.4, "grad_norm": 3.7602016925811768, "learning_rate": 1.9736333118425207e-05, "loss": 2.0432, "step": 31149 }, { "epoch": 0.4, "grad_norm": 3.2274491786956787, "learning_rate": 1.9736309147286393e-05, "loss": 1.8463, "step": 31150 }, { "epoch": 0.4, "grad_norm": 4.02023983001709, "learning_rate": 1.9736285175072527e-05, "loss": 2.0882, "step": 31151 }, { "epoch": 0.4, "grad_norm": 3.5595624446868896, "learning_rate": 1.973626120178361e-05, "loss": 2.036, "step": 31152 }, { "epoch": 0.4, "grad_norm": 4.06443977355957, "learning_rate": 1.9736237227419646e-05, "loss": 2.2529, "step": 31153 }, { "epoch": 0.4, "grad_norm": 3.7302141189575195, "learning_rate": 1.9736213251980635e-05, "loss": 2.0761, "step": 31154 }, { "epoch": 0.4, "grad_norm": 3.433006525039673, "learning_rate": 1.973618927546658e-05, "loss": 2.097, "step": 31155 }, { "epoch": 0.4, "grad_norm": 4.13521671295166, "learning_rate": 1.9736165297877483e-05, "loss": 2.3963, "step": 31156 }, { "epoch": 0.4, "grad_norm": 4.252810478210449, "learning_rate": 1.973614131921335e-05, "loss": 2.5219, "step": 31157 }, { "epoch": 0.4, "grad_norm": 3.095726728439331, "learning_rate": 1.9736117339474182e-05, "loss": 1.5696, "step": 31158 }, { "epoch": 0.4, "grad_norm": 3.5434768199920654, "learning_rate": 1.9736093358659984e-05, "loss": 1.8615, "step": 31159 }, { "epoch": 0.4, "grad_norm": 4.022414207458496, "learning_rate": 1.973606937677075e-05, "loss": 1.8962, "step": 31160 }, { "epoch": 0.4, "grad_norm": 4.476419448852539, "learning_rate": 1.9736045393806493e-05, "loss": 2.2922, "step": 31161 }, { "epoch": 0.4, "grad_norm": 4.27616548538208, "learning_rate": 1.973602140976721e-05, "loss": 1.9716, "step": 31162 }, { "epoch": 0.4, "grad_norm": 3.665087938308716, "learning_rate": 1.9735997424652908e-05, "loss": 2.0884, "step": 31163 }, { "epoch": 0.4, "grad_norm": 4.1816253662109375, "learning_rate": 1.9735973438463585e-05, "loss": 2.1347, "step": 31164 }, { "epoch": 0.4, "grad_norm": 3.901869058609009, "learning_rate": 1.9735949451199244e-05, "loss": 2.3222, "step": 31165 }, { "epoch": 0.4, "grad_norm": 4.450690746307373, "learning_rate": 1.973592546285989e-05, "loss": 2.4102, "step": 31166 }, { "epoch": 0.4, "grad_norm": 3.701925277709961, "learning_rate": 1.9735901473445523e-05, "loss": 1.9308, "step": 31167 }, { "epoch": 0.4, "grad_norm": 4.398073673248291, "learning_rate": 1.973587748295615e-05, "loss": 2.1984, "step": 31168 }, { "epoch": 0.4, "grad_norm": 4.250307083129883, "learning_rate": 1.9735853491391768e-05, "loss": 2.3844, "step": 31169 }, { "epoch": 0.4, "grad_norm": 3.705310583114624, "learning_rate": 1.9735829498752387e-05, "loss": 1.9249, "step": 31170 }, { "epoch": 0.4, "grad_norm": 4.113931655883789, "learning_rate": 1.9735805505038003e-05, "loss": 2.0009, "step": 31171 }, { "epoch": 0.4, "grad_norm": 3.471902847290039, "learning_rate": 1.973578151024862e-05, "loss": 1.8795, "step": 31172 }, { "epoch": 0.4, "grad_norm": 4.993468284606934, "learning_rate": 1.973575751438424e-05, "loss": 1.8248, "step": 31173 }, { "epoch": 0.4, "grad_norm": 3.532459259033203, "learning_rate": 1.973573351744487e-05, "loss": 2.3676, "step": 31174 }, { "epoch": 0.4, "grad_norm": 3.4947712421417236, "learning_rate": 1.973570951943051e-05, "loss": 1.8715, "step": 31175 }, { "epoch": 0.4, "grad_norm": 3.918095111846924, "learning_rate": 1.9735685520341163e-05, "loss": 2.3048, "step": 31176 }, { "epoch": 0.4, "grad_norm": 3.4777979850769043, "learning_rate": 1.973566152017683e-05, "loss": 1.8761, "step": 31177 }, { "epoch": 0.4, "grad_norm": 3.477572441101074, "learning_rate": 1.9735637518937513e-05, "loss": 1.9109, "step": 31178 }, { "epoch": 0.4, "grad_norm": 3.966815233230591, "learning_rate": 1.9735613516623216e-05, "loss": 2.2281, "step": 31179 }, { "epoch": 0.4, "grad_norm": 3.6035566329956055, "learning_rate": 1.9735589513233947e-05, "loss": 1.6636, "step": 31180 }, { "epoch": 0.4, "grad_norm": 3.491462469100952, "learning_rate": 1.9735565508769698e-05, "loss": 1.902, "step": 31181 }, { "epoch": 0.4, "grad_norm": 3.885435104370117, "learning_rate": 1.973554150323048e-05, "loss": 2.6476, "step": 31182 }, { "epoch": 0.4, "grad_norm": 3.7534139156341553, "learning_rate": 1.9735517496616295e-05, "loss": 1.7497, "step": 31183 }, { "epoch": 0.4, "grad_norm": 3.7084662914276123, "learning_rate": 1.973549348892714e-05, "loss": 1.9598, "step": 31184 }, { "epoch": 0.4, "grad_norm": 4.047164440155029, "learning_rate": 1.9735469480163025e-05, "loss": 1.6679, "step": 31185 }, { "epoch": 0.4, "grad_norm": 3.8373143672943115, "learning_rate": 1.9735445470323945e-05, "loss": 2.1567, "step": 31186 }, { "epoch": 0.4, "grad_norm": 3.593003988265991, "learning_rate": 1.973542145940991e-05, "loss": 2.094, "step": 31187 }, { "epoch": 0.4, "grad_norm": 3.7354347705841064, "learning_rate": 1.973539744742092e-05, "loss": 2.1643, "step": 31188 }, { "epoch": 0.4, "grad_norm": 3.9232430458068848, "learning_rate": 1.9735373434356974e-05, "loss": 2.4413, "step": 31189 }, { "epoch": 0.4, "grad_norm": 3.7675817012786865, "learning_rate": 1.973534942021808e-05, "loss": 1.8629, "step": 31190 }, { "epoch": 0.4, "grad_norm": 3.9425418376922607, "learning_rate": 1.973532540500424e-05, "loss": 2.2206, "step": 31191 }, { "epoch": 0.4, "grad_norm": 3.697108745574951, "learning_rate": 1.9735301388715447e-05, "loss": 1.9915, "step": 31192 }, { "epoch": 0.4, "grad_norm": 3.353506565093994, "learning_rate": 1.973527737135172e-05, "loss": 1.5398, "step": 31193 }, { "epoch": 0.4, "grad_norm": 3.7396562099456787, "learning_rate": 1.9735253352913047e-05, "loss": 2.2123, "step": 31194 }, { "epoch": 0.4, "grad_norm": 3.961887836456299, "learning_rate": 1.9735229333399442e-05, "loss": 2.0007, "step": 31195 }, { "epoch": 0.4, "grad_norm": 3.923663854598999, "learning_rate": 1.97352053128109e-05, "loss": 1.9663, "step": 31196 }, { "epoch": 0.4, "grad_norm": 3.5464718341827393, "learning_rate": 1.9735181291147425e-05, "loss": 1.7962, "step": 31197 }, { "epoch": 0.4, "grad_norm": 4.207411766052246, "learning_rate": 1.9735157268409023e-05, "loss": 1.6807, "step": 31198 }, { "epoch": 0.4, "grad_norm": 3.674678325653076, "learning_rate": 1.9735133244595695e-05, "loss": 1.592, "step": 31199 }, { "epoch": 0.4, "grad_norm": 3.5505852699279785, "learning_rate": 1.973510921970744e-05, "loss": 1.929, "step": 31200 }, { "epoch": 0.4, "grad_norm": 3.33030366897583, "learning_rate": 1.9735085193744264e-05, "loss": 1.5031, "step": 31201 }, { "epoch": 0.4, "grad_norm": 3.873976945877075, "learning_rate": 1.9735061166706174e-05, "loss": 1.8562, "step": 31202 }, { "epoch": 0.4, "grad_norm": 5.008505344390869, "learning_rate": 1.9735037138593165e-05, "loss": 2.3377, "step": 31203 }, { "epoch": 0.4, "grad_norm": 3.8551435470581055, "learning_rate": 1.9735013109405244e-05, "loss": 1.7937, "step": 31204 }, { "epoch": 0.4, "grad_norm": 3.6200013160705566, "learning_rate": 1.973498907914241e-05, "loss": 1.7855, "step": 31205 }, { "epoch": 0.4, "grad_norm": 3.905951738357544, "learning_rate": 1.9734965047804667e-05, "loss": 2.1236, "step": 31206 }, { "epoch": 0.4, "grad_norm": 3.6029913425445557, "learning_rate": 1.9734941015392024e-05, "loss": 1.9269, "step": 31207 }, { "epoch": 0.41, "grad_norm": 3.4976511001586914, "learning_rate": 1.9734916981904476e-05, "loss": 1.7236, "step": 31208 }, { "epoch": 0.41, "grad_norm": 3.360520839691162, "learning_rate": 1.9734892947342026e-05, "loss": 1.686, "step": 31209 }, { "epoch": 0.41, "grad_norm": 4.01533317565918, "learning_rate": 1.973486891170468e-05, "loss": 2.0867, "step": 31210 }, { "epoch": 0.41, "grad_norm": 4.038010597229004, "learning_rate": 1.973484487499244e-05, "loss": 1.8196, "step": 31211 }, { "epoch": 0.41, "grad_norm": 4.362104892730713, "learning_rate": 1.973482083720531e-05, "loss": 2.5956, "step": 31212 }, { "epoch": 0.41, "grad_norm": 3.6117026805877686, "learning_rate": 1.9734796798343286e-05, "loss": 1.9394, "step": 31213 }, { "epoch": 0.41, "grad_norm": 3.8262500762939453, "learning_rate": 1.9734772758406375e-05, "loss": 1.8202, "step": 31214 }, { "epoch": 0.41, "grad_norm": 3.6556830406188965, "learning_rate": 1.9734748717394586e-05, "loss": 2.2451, "step": 31215 }, { "epoch": 0.41, "grad_norm": 3.6553592681884766, "learning_rate": 1.9734724675307908e-05, "loss": 1.99, "step": 31216 }, { "epoch": 0.41, "grad_norm": 4.148899078369141, "learning_rate": 1.9734700632146356e-05, "loss": 2.2299, "step": 31217 }, { "epoch": 0.41, "grad_norm": 3.6925032138824463, "learning_rate": 1.973467658790993e-05, "loss": 1.686, "step": 31218 }, { "epoch": 0.41, "grad_norm": 3.3819401264190674, "learning_rate": 1.9734652542598625e-05, "loss": 1.8485, "step": 31219 }, { "epoch": 0.41, "grad_norm": 3.605360507965088, "learning_rate": 1.9734628496212452e-05, "loss": 1.7934, "step": 31220 }, { "epoch": 0.41, "grad_norm": 4.865344047546387, "learning_rate": 1.973460444875141e-05, "loss": 2.8275, "step": 31221 }, { "epoch": 0.41, "grad_norm": 4.025907516479492, "learning_rate": 1.9734580400215503e-05, "loss": 1.9724, "step": 31222 }, { "epoch": 0.41, "grad_norm": 3.5043911933898926, "learning_rate": 1.9734556350604733e-05, "loss": 1.5087, "step": 31223 }, { "epoch": 0.41, "grad_norm": 4.003693103790283, "learning_rate": 1.9734532299919103e-05, "loss": 2.3364, "step": 31224 }, { "epoch": 0.41, "grad_norm": 4.036623954772949, "learning_rate": 1.9734508248158618e-05, "loss": 1.9084, "step": 31225 }, { "epoch": 0.41, "grad_norm": 3.904773712158203, "learning_rate": 1.9734484195323275e-05, "loss": 1.9592, "step": 31226 }, { "epoch": 0.41, "grad_norm": 4.130618572235107, "learning_rate": 1.973446014141308e-05, "loss": 2.2503, "step": 31227 }, { "epoch": 0.41, "grad_norm": 3.284925699234009, "learning_rate": 1.9734436086428034e-05, "loss": 1.5016, "step": 31228 }, { "epoch": 0.41, "grad_norm": 3.8519551753997803, "learning_rate": 1.9734412030368145e-05, "loss": 2.0657, "step": 31229 }, { "epoch": 0.41, "grad_norm": 3.713268280029297, "learning_rate": 1.9734387973233412e-05, "loss": 2.084, "step": 31230 }, { "epoch": 0.41, "grad_norm": 3.5931265354156494, "learning_rate": 1.9734363915023832e-05, "loss": 2.041, "step": 31231 }, { "epoch": 0.41, "grad_norm": 4.230827331542969, "learning_rate": 1.9734339855739418e-05, "loss": 2.0057, "step": 31232 }, { "epoch": 0.41, "grad_norm": 3.6043689250946045, "learning_rate": 1.9734315795380166e-05, "loss": 1.8959, "step": 31233 }, { "epoch": 0.41, "grad_norm": 4.198518753051758, "learning_rate": 1.973429173394608e-05, "loss": 2.3128, "step": 31234 }, { "epoch": 0.41, "grad_norm": 3.5355587005615234, "learning_rate": 1.973426767143716e-05, "loss": 1.7542, "step": 31235 }, { "epoch": 0.41, "grad_norm": 3.2858614921569824, "learning_rate": 1.9734243607853417e-05, "loss": 1.6804, "step": 31236 }, { "epoch": 0.41, "grad_norm": 3.3671441078186035, "learning_rate": 1.9734219543194847e-05, "loss": 1.5254, "step": 31237 }, { "epoch": 0.41, "grad_norm": 3.635913372039795, "learning_rate": 1.9734195477461452e-05, "loss": 2.0868, "step": 31238 }, { "epoch": 0.41, "grad_norm": 3.86257266998291, "learning_rate": 1.973417141065324e-05, "loss": 2.2853, "step": 31239 }, { "epoch": 0.41, "grad_norm": 3.5170044898986816, "learning_rate": 1.973414734277021e-05, "loss": 2.0947, "step": 31240 }, { "epoch": 0.41, "grad_norm": 3.935699462890625, "learning_rate": 1.9734123273812363e-05, "loss": 1.9147, "step": 31241 }, { "epoch": 0.41, "grad_norm": 3.7117104530334473, "learning_rate": 1.9734099203779703e-05, "loss": 1.8518, "step": 31242 }, { "epoch": 0.41, "grad_norm": 3.666429042816162, "learning_rate": 1.9734075132672234e-05, "loss": 1.8117, "step": 31243 }, { "epoch": 0.41, "grad_norm": 3.5733699798583984, "learning_rate": 1.973405106048996e-05, "loss": 1.6329, "step": 31244 }, { "epoch": 0.41, "grad_norm": 3.564687967300415, "learning_rate": 1.973402698723288e-05, "loss": 1.9876, "step": 31245 }, { "epoch": 0.41, "grad_norm": 3.6794960498809814, "learning_rate": 1.9734002912901e-05, "loss": 1.5237, "step": 31246 }, { "epoch": 0.41, "grad_norm": 3.6997263431549072, "learning_rate": 1.9733978837494317e-05, "loss": 1.8898, "step": 31247 }, { "epoch": 0.41, "grad_norm": 3.524162769317627, "learning_rate": 1.973395476101284e-05, "loss": 1.8286, "step": 31248 }, { "epoch": 0.41, "grad_norm": 3.601210832595825, "learning_rate": 1.973393068345657e-05, "loss": 1.9388, "step": 31249 }, { "epoch": 0.41, "grad_norm": 3.900650978088379, "learning_rate": 1.9733906604825506e-05, "loss": 1.9763, "step": 31250 }, { "epoch": 0.41, "grad_norm": 4.398015975952148, "learning_rate": 1.973388252511966e-05, "loss": 2.2475, "step": 31251 }, { "epoch": 0.41, "grad_norm": 3.5784189701080322, "learning_rate": 1.9733858444339023e-05, "loss": 1.9696, "step": 31252 }, { "epoch": 0.41, "grad_norm": 4.446610450744629, "learning_rate": 1.9733834362483603e-05, "loss": 2.7698, "step": 31253 }, { "epoch": 0.41, "grad_norm": 3.5612409114837646, "learning_rate": 1.9733810279553403e-05, "loss": 1.7228, "step": 31254 }, { "epoch": 0.41, "grad_norm": 4.9811601638793945, "learning_rate": 1.9733786195548424e-05, "loss": 2.6152, "step": 31255 }, { "epoch": 0.41, "grad_norm": 4.254457950592041, "learning_rate": 1.973376211046867e-05, "loss": 2.6548, "step": 31256 }, { "epoch": 0.41, "grad_norm": 3.784435510635376, "learning_rate": 1.973373802431415e-05, "loss": 2.0754, "step": 31257 }, { "epoch": 0.41, "grad_norm": 4.4138946533203125, "learning_rate": 1.9733713937084855e-05, "loss": 2.6005, "step": 31258 }, { "epoch": 0.41, "grad_norm": 3.499476909637451, "learning_rate": 1.973368984878079e-05, "loss": 1.9984, "step": 31259 }, { "epoch": 0.41, "grad_norm": 3.536461591720581, "learning_rate": 1.9733665759401966e-05, "loss": 1.9685, "step": 31260 }, { "epoch": 0.41, "grad_norm": 3.741478204727173, "learning_rate": 1.973364166894838e-05, "loss": 1.7379, "step": 31261 }, { "epoch": 0.41, "grad_norm": 4.611722469329834, "learning_rate": 1.973361757742003e-05, "loss": 2.0771, "step": 31262 }, { "epoch": 0.41, "grad_norm": 3.7112667560577393, "learning_rate": 1.9733593484816923e-05, "loss": 1.8844, "step": 31263 }, { "epoch": 0.41, "grad_norm": 3.740309238433838, "learning_rate": 1.9733569391139068e-05, "loss": 1.906, "step": 31264 }, { "epoch": 0.41, "grad_norm": 3.7737205028533936, "learning_rate": 1.9733545296386457e-05, "loss": 2.1081, "step": 31265 }, { "epoch": 0.41, "grad_norm": 3.588139295578003, "learning_rate": 1.97335212005591e-05, "loss": 1.9359, "step": 31266 }, { "epoch": 0.41, "grad_norm": 3.3076648712158203, "learning_rate": 1.9733497103656997e-05, "loss": 1.5583, "step": 31267 }, { "epoch": 0.41, "grad_norm": 3.938166856765747, "learning_rate": 1.973347300568015e-05, "loss": 2.3112, "step": 31268 }, { "epoch": 0.41, "grad_norm": 3.3365464210510254, "learning_rate": 1.9733448906628564e-05, "loss": 1.5764, "step": 31269 }, { "epoch": 0.41, "grad_norm": 3.8201169967651367, "learning_rate": 1.973342480650224e-05, "loss": 1.8145, "step": 31270 }, { "epoch": 0.41, "grad_norm": 4.280832767486572, "learning_rate": 1.9733400705301177e-05, "loss": 2.2847, "step": 31271 }, { "epoch": 0.41, "grad_norm": 3.920494794845581, "learning_rate": 1.9733376603025385e-05, "loss": 1.7352, "step": 31272 }, { "epoch": 0.41, "grad_norm": 3.6104912757873535, "learning_rate": 1.973335249967486e-05, "loss": 2.0996, "step": 31273 }, { "epoch": 0.41, "grad_norm": 3.5770723819732666, "learning_rate": 1.9733328395249615e-05, "loss": 1.7793, "step": 31274 }, { "epoch": 0.41, "grad_norm": 3.4566774368286133, "learning_rate": 1.9733304289749636e-05, "loss": 1.7347, "step": 31275 }, { "epoch": 0.41, "grad_norm": 3.6229968070983887, "learning_rate": 1.9733280183174942e-05, "loss": 2.0604, "step": 31276 }, { "epoch": 0.41, "grad_norm": 4.191196918487549, "learning_rate": 1.9733256075525523e-05, "loss": 2.2829, "step": 31277 }, { "epoch": 0.41, "grad_norm": 4.095613479614258, "learning_rate": 1.973323196680139e-05, "loss": 2.2527, "step": 31278 }, { "epoch": 0.41, "grad_norm": 3.620598077774048, "learning_rate": 1.9733207857002544e-05, "loss": 2.0185, "step": 31279 }, { "epoch": 0.41, "grad_norm": 4.163446426391602, "learning_rate": 1.9733183746128988e-05, "loss": 2.2532, "step": 31280 }, { "epoch": 0.41, "grad_norm": 3.6098215579986572, "learning_rate": 1.973315963418072e-05, "loss": 2.2504, "step": 31281 }, { "epoch": 0.41, "grad_norm": 4.094578742980957, "learning_rate": 1.973313552115775e-05, "loss": 2.1715, "step": 31282 }, { "epoch": 0.41, "grad_norm": 4.012980937957764, "learning_rate": 1.9733111407060076e-05, "loss": 1.98, "step": 31283 }, { "epoch": 0.41, "grad_norm": 3.900308847427368, "learning_rate": 1.97330872918877e-05, "loss": 1.9915, "step": 31284 }, { "epoch": 0.41, "grad_norm": 4.889698028564453, "learning_rate": 1.9733063175640625e-05, "loss": 2.4453, "step": 31285 }, { "epoch": 0.41, "grad_norm": 3.6231420040130615, "learning_rate": 1.9733039058318854e-05, "loss": 2.0838, "step": 31286 }, { "epoch": 0.41, "grad_norm": 3.8591244220733643, "learning_rate": 1.9733014939922392e-05, "loss": 1.5099, "step": 31287 }, { "epoch": 0.41, "grad_norm": 3.9445362091064453, "learning_rate": 1.9732990820451242e-05, "loss": 1.8083, "step": 31288 }, { "epoch": 0.41, "grad_norm": 3.553001880645752, "learning_rate": 1.97329666999054e-05, "loss": 1.9739, "step": 31289 }, { "epoch": 0.41, "grad_norm": 3.8561158180236816, "learning_rate": 1.9732942578284877e-05, "loss": 1.5696, "step": 31290 }, { "epoch": 0.41, "grad_norm": 3.473332405090332, "learning_rate": 1.9732918455589672e-05, "loss": 1.7188, "step": 31291 }, { "epoch": 0.41, "grad_norm": 4.131351947784424, "learning_rate": 1.9732894331819786e-05, "loss": 2.2332, "step": 31292 }, { "epoch": 0.41, "grad_norm": 3.398665189743042, "learning_rate": 1.9732870206975225e-05, "loss": 1.8063, "step": 31293 }, { "epoch": 0.41, "grad_norm": 4.3144049644470215, "learning_rate": 1.973284608105599e-05, "loss": 1.9176, "step": 31294 }, { "epoch": 0.41, "grad_norm": 3.4394783973693848, "learning_rate": 1.973282195406208e-05, "loss": 1.6539, "step": 31295 }, { "epoch": 0.41, "grad_norm": 3.4412453174591064, "learning_rate": 1.9732797825993506e-05, "loss": 1.6934, "step": 31296 }, { "epoch": 0.41, "grad_norm": 3.732029676437378, "learning_rate": 1.9732773696850263e-05, "loss": 2.0857, "step": 31297 }, { "epoch": 0.41, "grad_norm": 3.5531647205352783, "learning_rate": 1.973274956663236e-05, "loss": 1.6319, "step": 31298 }, { "epoch": 0.41, "grad_norm": 3.5810232162475586, "learning_rate": 1.9732725435339793e-05, "loss": 1.582, "step": 31299 }, { "epoch": 0.41, "grad_norm": 3.8315110206604004, "learning_rate": 1.973270130297257e-05, "loss": 1.9185, "step": 31300 }, { "epoch": 0.41, "grad_norm": 3.1982247829437256, "learning_rate": 1.973267716953069e-05, "loss": 1.7503, "step": 31301 }, { "epoch": 0.41, "grad_norm": 4.205751895904541, "learning_rate": 1.9732653035014156e-05, "loss": 2.1651, "step": 31302 }, { "epoch": 0.41, "grad_norm": 4.400265216827393, "learning_rate": 1.9732628899422973e-05, "loss": 2.1987, "step": 31303 }, { "epoch": 0.41, "grad_norm": 3.5570223331451416, "learning_rate": 1.9732604762757146e-05, "loss": 1.6575, "step": 31304 }, { "epoch": 0.41, "grad_norm": 3.3969500064849854, "learning_rate": 1.9732580625016674e-05, "loss": 1.9618, "step": 31305 }, { "epoch": 0.41, "grad_norm": 3.8616230487823486, "learning_rate": 1.9732556486201555e-05, "loss": 1.7385, "step": 31306 }, { "epoch": 0.41, "grad_norm": 3.7956886291503906, "learning_rate": 1.97325323463118e-05, "loss": 1.809, "step": 31307 }, { "epoch": 0.41, "grad_norm": 3.404766082763672, "learning_rate": 1.973250820534741e-05, "loss": 1.8556, "step": 31308 }, { "epoch": 0.41, "grad_norm": 3.8625216484069824, "learning_rate": 1.9732484063308385e-05, "loss": 2.1183, "step": 31309 }, { "epoch": 0.41, "grad_norm": 3.68782639503479, "learning_rate": 1.9732459920194726e-05, "loss": 1.8008, "step": 31310 }, { "epoch": 0.41, "grad_norm": 3.7346231937408447, "learning_rate": 1.973243577600644e-05, "loss": 1.7155, "step": 31311 }, { "epoch": 0.41, "grad_norm": 3.806504726409912, "learning_rate": 1.9732411630743526e-05, "loss": 2.0654, "step": 31312 }, { "epoch": 0.41, "grad_norm": 3.9533424377441406, "learning_rate": 1.973238748440599e-05, "loss": 2.1179, "step": 31313 }, { "epoch": 0.41, "grad_norm": 3.7950611114501953, "learning_rate": 1.9732363336993837e-05, "loss": 1.9471, "step": 31314 }, { "epoch": 0.41, "grad_norm": 3.8893277645111084, "learning_rate": 1.973233918850706e-05, "loss": 1.776, "step": 31315 }, { "epoch": 0.41, "grad_norm": 4.462409496307373, "learning_rate": 1.9732315038945672e-05, "loss": 2.3162, "step": 31316 }, { "epoch": 0.41, "grad_norm": 4.129939556121826, "learning_rate": 1.973229088830967e-05, "loss": 2.2051, "step": 31317 }, { "epoch": 0.41, "grad_norm": 4.069444179534912, "learning_rate": 1.9732266736599056e-05, "loss": 1.8982, "step": 31318 }, { "epoch": 0.41, "grad_norm": 4.019513130187988, "learning_rate": 1.973224258381384e-05, "loss": 2.0257, "step": 31319 }, { "epoch": 0.41, "grad_norm": 4.193825721740723, "learning_rate": 1.9732218429954012e-05, "loss": 2.3848, "step": 31320 }, { "epoch": 0.41, "grad_norm": 3.621216297149658, "learning_rate": 1.9732194275019588e-05, "loss": 1.7164, "step": 31321 }, { "epoch": 0.41, "grad_norm": 3.826740264892578, "learning_rate": 1.9732170119010564e-05, "loss": 2.0339, "step": 31322 }, { "epoch": 0.41, "grad_norm": 4.439522743225098, "learning_rate": 1.973214596192694e-05, "loss": 2.3403, "step": 31323 }, { "epoch": 0.41, "grad_norm": 3.651625394821167, "learning_rate": 1.9732121803768725e-05, "loss": 1.8338, "step": 31324 }, { "epoch": 0.41, "grad_norm": 3.677508592605591, "learning_rate": 1.9732097644535913e-05, "loss": 1.9906, "step": 31325 }, { "epoch": 0.41, "grad_norm": 3.618110179901123, "learning_rate": 1.973207348422852e-05, "loss": 1.7712, "step": 31326 }, { "epoch": 0.41, "grad_norm": 3.702346086502075, "learning_rate": 1.9732049322846538e-05, "loss": 1.9094, "step": 31327 }, { "epoch": 0.41, "grad_norm": 4.044275760650635, "learning_rate": 1.9732025160389973e-05, "loss": 2.0459, "step": 31328 }, { "epoch": 0.41, "grad_norm": 3.699666738510132, "learning_rate": 1.9732000996858825e-05, "loss": 2.1565, "step": 31329 }, { "epoch": 0.41, "grad_norm": 3.750797986984253, "learning_rate": 1.97319768322531e-05, "loss": 1.9028, "step": 31330 }, { "epoch": 0.41, "grad_norm": 3.551135778427124, "learning_rate": 1.97319526665728e-05, "loss": 1.7137, "step": 31331 }, { "epoch": 0.41, "grad_norm": 3.5268962383270264, "learning_rate": 1.9731928499817926e-05, "loss": 1.677, "step": 31332 }, { "epoch": 0.41, "grad_norm": 3.6590964794158936, "learning_rate": 1.9731904331988486e-05, "loss": 1.9056, "step": 31333 }, { "epoch": 0.41, "grad_norm": 3.539773464202881, "learning_rate": 1.9731880163084476e-05, "loss": 1.7998, "step": 31334 }, { "epoch": 0.41, "grad_norm": 3.4523262977600098, "learning_rate": 1.97318559931059e-05, "loss": 1.9558, "step": 31335 }, { "epoch": 0.41, "grad_norm": 3.395913600921631, "learning_rate": 1.9731831822052764e-05, "loss": 1.7724, "step": 31336 }, { "epoch": 0.41, "grad_norm": 3.5158400535583496, "learning_rate": 1.973180764992507e-05, "loss": 1.7745, "step": 31337 }, { "epoch": 0.41, "grad_norm": 3.5686275959014893, "learning_rate": 1.9731783476722818e-05, "loss": 1.953, "step": 31338 }, { "epoch": 0.41, "grad_norm": 3.7755837440490723, "learning_rate": 1.973175930244601e-05, "loss": 1.6863, "step": 31339 }, { "epoch": 0.41, "grad_norm": 4.097463607788086, "learning_rate": 1.973173512709465e-05, "loss": 2.0217, "step": 31340 }, { "epoch": 0.41, "grad_norm": 4.195082187652588, "learning_rate": 1.9731710950668743e-05, "loss": 2.3618, "step": 31341 }, { "epoch": 0.41, "grad_norm": 3.6824376583099365, "learning_rate": 1.9731686773168294e-05, "loss": 2.2315, "step": 31342 }, { "epoch": 0.41, "grad_norm": 3.769968032836914, "learning_rate": 1.9731662594593296e-05, "loss": 1.9329, "step": 31343 }, { "epoch": 0.41, "grad_norm": 3.8528783321380615, "learning_rate": 1.973163841494376e-05, "loss": 2.1415, "step": 31344 }, { "epoch": 0.41, "grad_norm": 4.089674472808838, "learning_rate": 1.9731614234219685e-05, "loss": 2.2199, "step": 31345 }, { "epoch": 0.41, "grad_norm": 3.687269926071167, "learning_rate": 1.9731590052421075e-05, "loss": 1.8534, "step": 31346 }, { "epoch": 0.41, "grad_norm": 3.467226505279541, "learning_rate": 1.9731565869547934e-05, "loss": 1.5238, "step": 31347 }, { "epoch": 0.41, "grad_norm": 3.79592227935791, "learning_rate": 1.9731541685600263e-05, "loss": 2.0823, "step": 31348 }, { "epoch": 0.41, "grad_norm": 4.210842132568359, "learning_rate": 1.973151750057806e-05, "loss": 2.1145, "step": 31349 }, { "epoch": 0.41, "grad_norm": 4.079458236694336, "learning_rate": 1.9731493314481337e-05, "loss": 2.054, "step": 31350 }, { "epoch": 0.41, "grad_norm": 4.46162748336792, "learning_rate": 1.973146912731009e-05, "loss": 2.0315, "step": 31351 }, { "epoch": 0.41, "grad_norm": 3.454192876815796, "learning_rate": 1.9731444939064323e-05, "loss": 1.7176, "step": 31352 }, { "epoch": 0.41, "grad_norm": 3.4296324253082275, "learning_rate": 1.9731420749744044e-05, "loss": 1.6053, "step": 31353 }, { "epoch": 0.41, "grad_norm": 3.3767995834350586, "learning_rate": 1.9731396559349247e-05, "loss": 1.5643, "step": 31354 }, { "epoch": 0.41, "grad_norm": 4.317172050476074, "learning_rate": 1.9731372367879937e-05, "loss": 2.2721, "step": 31355 }, { "epoch": 0.41, "grad_norm": 4.552556037902832, "learning_rate": 1.973134817533612e-05, "loss": 1.7561, "step": 31356 }, { "epoch": 0.41, "grad_norm": 4.207409858703613, "learning_rate": 1.97313239817178e-05, "loss": 1.9547, "step": 31357 }, { "epoch": 0.41, "grad_norm": 4.23042106628418, "learning_rate": 1.9731299787024975e-05, "loss": 2.5491, "step": 31358 }, { "epoch": 0.41, "grad_norm": 3.4878227710723877, "learning_rate": 1.9731275591257647e-05, "loss": 1.8464, "step": 31359 }, { "epoch": 0.41, "grad_norm": 4.1608405113220215, "learning_rate": 1.9731251394415825e-05, "loss": 2.5037, "step": 31360 }, { "epoch": 0.41, "grad_norm": 3.637429714202881, "learning_rate": 1.9731227196499506e-05, "loss": 1.7834, "step": 31361 }, { "epoch": 0.41, "grad_norm": 3.994018077850342, "learning_rate": 1.9731202997508693e-05, "loss": 2.1471, "step": 31362 }, { "epoch": 0.41, "grad_norm": 4.077515125274658, "learning_rate": 1.9731178797443395e-05, "loss": 2.0163, "step": 31363 }, { "epoch": 0.41, "grad_norm": 3.536083459854126, "learning_rate": 1.9731154596303605e-05, "loss": 2.0362, "step": 31364 }, { "epoch": 0.41, "grad_norm": 4.832601547241211, "learning_rate": 1.9731130394089333e-05, "loss": 2.6246, "step": 31365 }, { "epoch": 0.41, "grad_norm": 3.587242603302002, "learning_rate": 1.9731106190800576e-05, "loss": 1.625, "step": 31366 }, { "epoch": 0.41, "grad_norm": 3.736865282058716, "learning_rate": 1.9731081986437345e-05, "loss": 2.1463, "step": 31367 }, { "epoch": 0.41, "grad_norm": 3.9956347942352295, "learning_rate": 1.973105778099963e-05, "loss": 2.2244, "step": 31368 }, { "epoch": 0.41, "grad_norm": 3.331714153289795, "learning_rate": 1.9731033574487447e-05, "loss": 1.5915, "step": 31369 }, { "epoch": 0.41, "grad_norm": 4.049557209014893, "learning_rate": 1.973100936690079e-05, "loss": 1.965, "step": 31370 }, { "epoch": 0.41, "grad_norm": 5.347555160522461, "learning_rate": 1.9730985158239666e-05, "loss": 2.4902, "step": 31371 }, { "epoch": 0.41, "grad_norm": 3.442927360534668, "learning_rate": 1.9730960948504078e-05, "loss": 1.8796, "step": 31372 }, { "epoch": 0.41, "grad_norm": 3.8023922443389893, "learning_rate": 1.973093673769402e-05, "loss": 2.0544, "step": 31373 }, { "epoch": 0.41, "grad_norm": 3.565431594848633, "learning_rate": 1.9730912525809506e-05, "loss": 1.7254, "step": 31374 }, { "epoch": 0.41, "grad_norm": 3.8708016872406006, "learning_rate": 1.9730888312850536e-05, "loss": 1.7084, "step": 31375 }, { "epoch": 0.41, "grad_norm": 3.785796642303467, "learning_rate": 1.973086409881711e-05, "loss": 1.6497, "step": 31376 }, { "epoch": 0.41, "grad_norm": 3.7916698455810547, "learning_rate": 1.973083988370923e-05, "loss": 2.1002, "step": 31377 }, { "epoch": 0.41, "grad_norm": 3.8050754070281982, "learning_rate": 1.9730815667526898e-05, "loss": 2.0768, "step": 31378 }, { "epoch": 0.41, "grad_norm": 3.849489212036133, "learning_rate": 1.9730791450270124e-05, "loss": 1.7115, "step": 31379 }, { "epoch": 0.41, "grad_norm": 3.9211714267730713, "learning_rate": 1.9730767231938902e-05, "loss": 2.0289, "step": 31380 }, { "epoch": 0.41, "grad_norm": 3.8377151489257812, "learning_rate": 1.973074301253324e-05, "loss": 2.2458, "step": 31381 }, { "epoch": 0.41, "grad_norm": 3.4625728130340576, "learning_rate": 1.973071879205314e-05, "loss": 1.526, "step": 31382 }, { "epoch": 0.41, "grad_norm": 3.7641549110412598, "learning_rate": 1.9730694570498602e-05, "loss": 1.9613, "step": 31383 }, { "epoch": 0.41, "grad_norm": 3.3036751747131348, "learning_rate": 1.9730670347869627e-05, "loss": 1.6979, "step": 31384 }, { "epoch": 0.41, "grad_norm": 4.498351097106934, "learning_rate": 1.9730646124166224e-05, "loss": 2.4926, "step": 31385 }, { "epoch": 0.41, "grad_norm": 3.2529003620147705, "learning_rate": 1.9730621899388394e-05, "loss": 1.5931, "step": 31386 }, { "epoch": 0.41, "grad_norm": 4.154768943786621, "learning_rate": 1.973059767353614e-05, "loss": 2.4769, "step": 31387 }, { "epoch": 0.41, "grad_norm": 4.30780029296875, "learning_rate": 1.9730573446609455e-05, "loss": 1.8636, "step": 31388 }, { "epoch": 0.41, "grad_norm": 4.0261311531066895, "learning_rate": 1.9730549218608356e-05, "loss": 2.3289, "step": 31389 }, { "epoch": 0.41, "grad_norm": 4.004382610321045, "learning_rate": 1.9730524989532836e-05, "loss": 2.0945, "step": 31390 }, { "epoch": 0.41, "grad_norm": 4.11271858215332, "learning_rate": 1.9730500759382903e-05, "loss": 2.2432, "step": 31391 }, { "epoch": 0.41, "grad_norm": 3.2699108123779297, "learning_rate": 1.9730476528158556e-05, "loss": 1.6992, "step": 31392 }, { "epoch": 0.41, "grad_norm": 4.346978187561035, "learning_rate": 1.97304522958598e-05, "loss": 2.2619, "step": 31393 }, { "epoch": 0.41, "grad_norm": 3.5715725421905518, "learning_rate": 1.973042806248664e-05, "loss": 1.8383, "step": 31394 }, { "epoch": 0.41, "grad_norm": 4.0526628494262695, "learning_rate": 1.9730403828039072e-05, "loss": 2.4604, "step": 31395 }, { "epoch": 0.41, "grad_norm": 3.5461654663085938, "learning_rate": 1.9730379592517103e-05, "loss": 1.8859, "step": 31396 }, { "epoch": 0.41, "grad_norm": 4.432682037353516, "learning_rate": 1.9730355355920737e-05, "loss": 1.916, "step": 31397 }, { "epoch": 0.41, "grad_norm": 3.6250998973846436, "learning_rate": 1.9730331118249975e-05, "loss": 2.2021, "step": 31398 }, { "epoch": 0.41, "grad_norm": 4.005671977996826, "learning_rate": 1.9730306879504815e-05, "loss": 2.0168, "step": 31399 }, { "epoch": 0.41, "grad_norm": 4.17183256149292, "learning_rate": 1.973028263968527e-05, "loss": 1.9356, "step": 31400 }, { "epoch": 0.41, "grad_norm": 3.6024303436279297, "learning_rate": 1.973025839879133e-05, "loss": 1.924, "step": 31401 }, { "epoch": 0.41, "grad_norm": 3.7845897674560547, "learning_rate": 1.9730234156823006e-05, "loss": 1.6478, "step": 31402 }, { "epoch": 0.41, "grad_norm": 4.177957057952881, "learning_rate": 1.9730209913780303e-05, "loss": 2.1113, "step": 31403 }, { "epoch": 0.41, "grad_norm": 4.003178596496582, "learning_rate": 1.9730185669663217e-05, "loss": 2.1886, "step": 31404 }, { "epoch": 0.41, "grad_norm": 4.124040126800537, "learning_rate": 1.9730161424471754e-05, "loss": 2.7082, "step": 31405 }, { "epoch": 0.41, "grad_norm": 4.146371364593506, "learning_rate": 1.9730137178205914e-05, "loss": 2.4267, "step": 31406 }, { "epoch": 0.41, "grad_norm": 4.032662868499756, "learning_rate": 1.9730112930865704e-05, "loss": 1.9282, "step": 31407 }, { "epoch": 0.41, "grad_norm": 4.551279544830322, "learning_rate": 1.9730088682451125e-05, "loss": 1.9908, "step": 31408 }, { "epoch": 0.41, "grad_norm": 3.3382620811462402, "learning_rate": 1.973006443296218e-05, "loss": 1.7181, "step": 31409 }, { "epoch": 0.41, "grad_norm": 3.4086973667144775, "learning_rate": 1.973004018239887e-05, "loss": 1.7495, "step": 31410 }, { "epoch": 0.41, "grad_norm": 3.430048704147339, "learning_rate": 1.97300159307612e-05, "loss": 1.7058, "step": 31411 }, { "epoch": 0.41, "grad_norm": 3.6346356868743896, "learning_rate": 1.9729991678049164e-05, "loss": 1.9117, "step": 31412 }, { "epoch": 0.41, "grad_norm": 3.918020009994507, "learning_rate": 1.9729967424262776e-05, "loss": 2.212, "step": 31413 }, { "epoch": 0.41, "grad_norm": 4.288904190063477, "learning_rate": 1.972994316940204e-05, "loss": 2.364, "step": 31414 }, { "epoch": 0.41, "grad_norm": 3.657824754714966, "learning_rate": 1.972991891346695e-05, "loss": 1.7751, "step": 31415 }, { "epoch": 0.41, "grad_norm": 3.3416993618011475, "learning_rate": 1.9729894656457506e-05, "loss": 2.0438, "step": 31416 }, { "epoch": 0.41, "grad_norm": 3.9993393421173096, "learning_rate": 1.972987039837372e-05, "loss": 2.0233, "step": 31417 }, { "epoch": 0.41, "grad_norm": 3.9254608154296875, "learning_rate": 1.9729846139215595e-05, "loss": 1.9928, "step": 31418 }, { "epoch": 0.41, "grad_norm": 4.259000301361084, "learning_rate": 1.9729821878983128e-05, "loss": 1.9817, "step": 31419 }, { "epoch": 0.41, "grad_norm": 3.9527101516723633, "learning_rate": 1.972979761767632e-05, "loss": 2.1143, "step": 31420 }, { "epoch": 0.41, "grad_norm": 4.209048271179199, "learning_rate": 1.9729773355295178e-05, "loss": 2.3153, "step": 31421 }, { "epoch": 0.41, "grad_norm": 4.4164910316467285, "learning_rate": 1.9729749091839706e-05, "loss": 2.0156, "step": 31422 }, { "epoch": 0.41, "grad_norm": 4.878825664520264, "learning_rate": 1.9729724827309908e-05, "loss": 2.1003, "step": 31423 }, { "epoch": 0.41, "grad_norm": 4.248733997344971, "learning_rate": 1.9729700561705777e-05, "loss": 2.4289, "step": 31424 }, { "epoch": 0.41, "grad_norm": 3.851146697998047, "learning_rate": 1.9729676295027328e-05, "loss": 1.9343, "step": 31425 }, { "epoch": 0.41, "grad_norm": 4.056669235229492, "learning_rate": 1.9729652027274553e-05, "loss": 1.9677, "step": 31426 }, { "epoch": 0.41, "grad_norm": 4.132745265960693, "learning_rate": 1.9729627758447462e-05, "loss": 2.0435, "step": 31427 }, { "epoch": 0.41, "grad_norm": 3.5489535331726074, "learning_rate": 1.9729603488546056e-05, "loss": 1.9035, "step": 31428 }, { "epoch": 0.41, "grad_norm": 3.943992853164673, "learning_rate": 1.972957921757033e-05, "loss": 1.9375, "step": 31429 }, { "epoch": 0.41, "grad_norm": 4.157599449157715, "learning_rate": 1.97295549455203e-05, "loss": 2.0978, "step": 31430 }, { "epoch": 0.41, "grad_norm": 3.810171365737915, "learning_rate": 1.9729530672395957e-05, "loss": 2.166, "step": 31431 }, { "epoch": 0.41, "grad_norm": 4.60612154006958, "learning_rate": 1.9729506398197315e-05, "loss": 2.2667, "step": 31432 }, { "epoch": 0.41, "grad_norm": 3.8593099117279053, "learning_rate": 1.9729482122924364e-05, "loss": 1.5662, "step": 31433 }, { "epoch": 0.41, "grad_norm": 3.734755516052246, "learning_rate": 1.972945784657712e-05, "loss": 2.2299, "step": 31434 }, { "epoch": 0.41, "grad_norm": 4.172074794769287, "learning_rate": 1.9729433569155574e-05, "loss": 2.156, "step": 31435 }, { "epoch": 0.41, "grad_norm": 3.371166706085205, "learning_rate": 1.9729409290659734e-05, "loss": 1.8256, "step": 31436 }, { "epoch": 0.41, "grad_norm": 3.861454486846924, "learning_rate": 1.9729385011089603e-05, "loss": 2.163, "step": 31437 }, { "epoch": 0.41, "grad_norm": 4.074089050292969, "learning_rate": 1.9729360730445183e-05, "loss": 1.9476, "step": 31438 }, { "epoch": 0.41, "grad_norm": 3.976693630218506, "learning_rate": 1.9729336448726478e-05, "loss": 1.9051, "step": 31439 }, { "epoch": 0.41, "grad_norm": 3.9912493228912354, "learning_rate": 1.9729312165933484e-05, "loss": 2.2869, "step": 31440 }, { "epoch": 0.41, "grad_norm": 3.961366891860962, "learning_rate": 1.9729287882066215e-05, "loss": 2.1955, "step": 31441 }, { "epoch": 0.41, "grad_norm": 4.000957489013672, "learning_rate": 1.9729263597124665e-05, "loss": 1.9774, "step": 31442 }, { "epoch": 0.41, "grad_norm": 3.3418021202087402, "learning_rate": 1.9729239311108836e-05, "loss": 1.652, "step": 31443 }, { "epoch": 0.41, "grad_norm": 3.7500996589660645, "learning_rate": 1.972921502401874e-05, "loss": 2.038, "step": 31444 }, { "epoch": 0.41, "grad_norm": 3.9923312664031982, "learning_rate": 1.9729190735854367e-05, "loss": 1.8308, "step": 31445 }, { "epoch": 0.41, "grad_norm": 3.98691725730896, "learning_rate": 1.972916644661573e-05, "loss": 2.001, "step": 31446 }, { "epoch": 0.41, "grad_norm": 3.6814849376678467, "learning_rate": 1.972914215630283e-05, "loss": 1.7934, "step": 31447 }, { "epoch": 0.41, "grad_norm": 3.88437557220459, "learning_rate": 1.9729117864915666e-05, "loss": 1.9021, "step": 31448 }, { "epoch": 0.41, "grad_norm": 3.93563175201416, "learning_rate": 1.9729093572454242e-05, "loss": 1.8271, "step": 31449 }, { "epoch": 0.41, "grad_norm": 3.4210076332092285, "learning_rate": 1.972906927891856e-05, "loss": 1.6295, "step": 31450 }, { "epoch": 0.41, "grad_norm": 3.2759974002838135, "learning_rate": 1.9729044984308623e-05, "loss": 1.6272, "step": 31451 }, { "epoch": 0.41, "grad_norm": 3.60243821144104, "learning_rate": 1.9729020688624435e-05, "loss": 1.8254, "step": 31452 }, { "epoch": 0.41, "grad_norm": 4.395747661590576, "learning_rate": 1.9728996391866e-05, "loss": 2.2117, "step": 31453 }, { "epoch": 0.41, "grad_norm": 3.385577440261841, "learning_rate": 1.972897209403332e-05, "loss": 1.6474, "step": 31454 }, { "epoch": 0.41, "grad_norm": 3.566363573074341, "learning_rate": 1.972894779512639e-05, "loss": 1.4924, "step": 31455 }, { "epoch": 0.41, "grad_norm": 4.05943489074707, "learning_rate": 1.9728923495145225e-05, "loss": 2.2797, "step": 31456 }, { "epoch": 0.41, "grad_norm": 3.5845534801483154, "learning_rate": 1.9728899194089818e-05, "loss": 2.0028, "step": 31457 }, { "epoch": 0.41, "grad_norm": 5.204524993896484, "learning_rate": 1.972887489196018e-05, "loss": 2.4208, "step": 31458 }, { "epoch": 0.41, "grad_norm": 3.8042147159576416, "learning_rate": 1.9728850588756305e-05, "loss": 1.9624, "step": 31459 }, { "epoch": 0.41, "grad_norm": 4.506051063537598, "learning_rate": 1.9728826284478202e-05, "loss": 2.0554, "step": 31460 }, { "epoch": 0.41, "grad_norm": 3.877595901489258, "learning_rate": 1.972880197912587e-05, "loss": 2.0469, "step": 31461 }, { "epoch": 0.41, "grad_norm": 4.192056179046631, "learning_rate": 1.9728777672699317e-05, "loss": 2.3073, "step": 31462 }, { "epoch": 0.41, "grad_norm": 3.669646739959717, "learning_rate": 1.9728753365198538e-05, "loss": 1.7415, "step": 31463 }, { "epoch": 0.41, "grad_norm": 3.5035207271575928, "learning_rate": 1.9728729056623538e-05, "loss": 1.8033, "step": 31464 }, { "epoch": 0.41, "grad_norm": 3.613508939743042, "learning_rate": 1.9728704746974325e-05, "loss": 1.7151, "step": 31465 }, { "epoch": 0.41, "grad_norm": 4.074528217315674, "learning_rate": 1.9728680436250897e-05, "loss": 2.5671, "step": 31466 }, { "epoch": 0.41, "grad_norm": 4.0203938484191895, "learning_rate": 1.972865612445326e-05, "loss": 1.6282, "step": 31467 }, { "epoch": 0.41, "grad_norm": 4.480865955352783, "learning_rate": 1.9728631811581407e-05, "loss": 2.6294, "step": 31468 }, { "epoch": 0.41, "grad_norm": 3.870666265487671, "learning_rate": 1.972860749763536e-05, "loss": 2.3931, "step": 31469 }, { "epoch": 0.41, "grad_norm": 4.115790843963623, "learning_rate": 1.97285831826151e-05, "loss": 2.0631, "step": 31470 }, { "epoch": 0.41, "grad_norm": 4.208883285522461, "learning_rate": 1.972855886652064e-05, "loss": 2.3974, "step": 31471 }, { "epoch": 0.41, "grad_norm": 3.4382729530334473, "learning_rate": 1.9728534549351982e-05, "loss": 1.8476, "step": 31472 }, { "epoch": 0.41, "grad_norm": 3.5869698524475098, "learning_rate": 1.972851023110913e-05, "loss": 1.5841, "step": 31473 }, { "epoch": 0.41, "grad_norm": 4.970728874206543, "learning_rate": 1.9728485911792087e-05, "loss": 2.3924, "step": 31474 }, { "epoch": 0.41, "grad_norm": 3.1455237865448, "learning_rate": 1.9728461591400853e-05, "loss": 1.356, "step": 31475 }, { "epoch": 0.41, "grad_norm": 4.2716264724731445, "learning_rate": 1.9728437269935435e-05, "loss": 2.5843, "step": 31476 }, { "epoch": 0.41, "grad_norm": 3.7500250339508057, "learning_rate": 1.9728412947395826e-05, "loss": 1.4038, "step": 31477 }, { "epoch": 0.41, "grad_norm": 3.3412535190582275, "learning_rate": 1.972838862378204e-05, "loss": 1.5479, "step": 31478 }, { "epoch": 0.41, "grad_norm": 3.5527920722961426, "learning_rate": 1.9728364299094073e-05, "loss": 1.9842, "step": 31479 }, { "epoch": 0.41, "grad_norm": 3.891597032546997, "learning_rate": 1.972833997333193e-05, "loss": 1.6831, "step": 31480 }, { "epoch": 0.41, "grad_norm": 3.827338457107544, "learning_rate": 1.9728315646495613e-05, "loss": 2.0016, "step": 31481 }, { "epoch": 0.41, "grad_norm": 3.5055832862854004, "learning_rate": 1.9728291318585125e-05, "loss": 1.8627, "step": 31482 }, { "epoch": 0.41, "grad_norm": 4.210101127624512, "learning_rate": 1.972826698960047e-05, "loss": 1.8984, "step": 31483 }, { "epoch": 0.41, "grad_norm": 3.4288718700408936, "learning_rate": 1.972824265954165e-05, "loss": 1.7491, "step": 31484 }, { "epoch": 0.41, "grad_norm": 3.8188014030456543, "learning_rate": 1.9728218328408664e-05, "loss": 2.3715, "step": 31485 }, { "epoch": 0.41, "grad_norm": 4.469707489013672, "learning_rate": 1.9728193996201517e-05, "loss": 2.2335, "step": 31486 }, { "epoch": 0.41, "grad_norm": 3.852759838104248, "learning_rate": 1.9728169662920216e-05, "loss": 2.0939, "step": 31487 }, { "epoch": 0.41, "grad_norm": 4.046480178833008, "learning_rate": 1.9728145328564756e-05, "loss": 2.1458, "step": 31488 }, { "epoch": 0.41, "grad_norm": 4.007772445678711, "learning_rate": 1.9728120993135146e-05, "loss": 2.1741, "step": 31489 }, { "epoch": 0.41, "grad_norm": 3.7660574913024902, "learning_rate": 1.9728096656631387e-05, "loss": 2.0829, "step": 31490 }, { "epoch": 0.41, "grad_norm": 4.237102508544922, "learning_rate": 1.972807231905348e-05, "loss": 2.1208, "step": 31491 }, { "epoch": 0.41, "grad_norm": 3.666710615158081, "learning_rate": 1.972804798040143e-05, "loss": 1.6015, "step": 31492 }, { "epoch": 0.41, "grad_norm": 3.2977354526519775, "learning_rate": 1.9728023640675234e-05, "loss": 1.566, "step": 31493 }, { "epoch": 0.41, "grad_norm": 4.2251386642456055, "learning_rate": 1.9727999299874904e-05, "loss": 2.0228, "step": 31494 }, { "epoch": 0.41, "grad_norm": 3.5208537578582764, "learning_rate": 1.9727974958000437e-05, "loss": 1.9633, "step": 31495 }, { "epoch": 0.41, "grad_norm": 3.434866428375244, "learning_rate": 1.9727950615051834e-05, "loss": 1.7383, "step": 31496 }, { "epoch": 0.41, "grad_norm": 3.6369731426239014, "learning_rate": 1.9727926271029102e-05, "loss": 1.7103, "step": 31497 }, { "epoch": 0.41, "grad_norm": 4.602600574493408, "learning_rate": 1.9727901925932244e-05, "loss": 2.7762, "step": 31498 }, { "epoch": 0.41, "grad_norm": 3.6742746829986572, "learning_rate": 1.972787757976126e-05, "loss": 2.0207, "step": 31499 }, { "epoch": 0.41, "grad_norm": 4.055360794067383, "learning_rate": 1.972785323251615e-05, "loss": 1.9747, "step": 31500 }, { "epoch": 0.41, "grad_norm": 3.575437307357788, "learning_rate": 1.9727828884196922e-05, "loss": 1.8271, "step": 31501 }, { "epoch": 0.41, "grad_norm": 3.800870656967163, "learning_rate": 1.9727804534803578e-05, "loss": 1.7421, "step": 31502 }, { "epoch": 0.41, "grad_norm": 3.7530713081359863, "learning_rate": 1.9727780184336117e-05, "loss": 1.714, "step": 31503 }, { "epoch": 0.41, "grad_norm": 3.817784070968628, "learning_rate": 1.9727755832794545e-05, "loss": 2.1853, "step": 31504 }, { "epoch": 0.41, "grad_norm": 4.103691577911377, "learning_rate": 1.9727731480178864e-05, "loss": 2.0783, "step": 31505 }, { "epoch": 0.41, "grad_norm": 3.7815451622009277, "learning_rate": 1.9727707126489077e-05, "loss": 1.9138, "step": 31506 }, { "epoch": 0.41, "grad_norm": 3.5087735652923584, "learning_rate": 1.9727682771725184e-05, "loss": 1.824, "step": 31507 }, { "epoch": 0.41, "grad_norm": 4.017197608947754, "learning_rate": 1.972765841588719e-05, "loss": 1.9995, "step": 31508 }, { "epoch": 0.41, "grad_norm": 4.181258201599121, "learning_rate": 1.97276340589751e-05, "loss": 2.4486, "step": 31509 }, { "epoch": 0.41, "grad_norm": 4.751269817352295, "learning_rate": 1.972760970098891e-05, "loss": 2.2277, "step": 31510 }, { "epoch": 0.41, "grad_norm": 4.0889153480529785, "learning_rate": 1.972758534192863e-05, "loss": 2.1676, "step": 31511 }, { "epoch": 0.41, "grad_norm": 3.657719135284424, "learning_rate": 1.972756098179426e-05, "loss": 1.9173, "step": 31512 }, { "epoch": 0.41, "grad_norm": 3.7217185497283936, "learning_rate": 1.97275366205858e-05, "loss": 1.8597, "step": 31513 }, { "epoch": 0.41, "grad_norm": 4.106490612030029, "learning_rate": 1.972751225830326e-05, "loss": 2.2994, "step": 31514 }, { "epoch": 0.41, "grad_norm": 3.125467300415039, "learning_rate": 1.972748789494663e-05, "loss": 1.4901, "step": 31515 }, { "epoch": 0.41, "grad_norm": 3.242288589477539, "learning_rate": 1.9727463530515924e-05, "loss": 1.7787, "step": 31516 }, { "epoch": 0.41, "grad_norm": 3.6946682929992676, "learning_rate": 1.9727439165011143e-05, "loss": 1.834, "step": 31517 }, { "epoch": 0.41, "grad_norm": 4.588193416595459, "learning_rate": 1.9727414798432286e-05, "loss": 2.3456, "step": 31518 }, { "epoch": 0.41, "grad_norm": 3.565941095352173, "learning_rate": 1.9727390430779357e-05, "loss": 1.7333, "step": 31519 }, { "epoch": 0.41, "grad_norm": 3.379532814025879, "learning_rate": 1.9727366062052357e-05, "loss": 1.7941, "step": 31520 }, { "epoch": 0.41, "grad_norm": 3.641467332839966, "learning_rate": 1.9727341692251295e-05, "loss": 1.7925, "step": 31521 }, { "epoch": 0.41, "grad_norm": 4.000242710113525, "learning_rate": 1.972731732137617e-05, "loss": 1.8078, "step": 31522 }, { "epoch": 0.41, "grad_norm": 3.6556308269500732, "learning_rate": 1.9727292949426977e-05, "loss": 1.6477, "step": 31523 }, { "epoch": 0.41, "grad_norm": 3.323119878768921, "learning_rate": 1.9727268576403734e-05, "loss": 1.7412, "step": 31524 }, { "epoch": 0.41, "grad_norm": 4.192424297332764, "learning_rate": 1.9727244202306427e-05, "loss": 2.5969, "step": 31525 }, { "epoch": 0.41, "grad_norm": 4.075475692749023, "learning_rate": 1.9727219827135074e-05, "loss": 2.0838, "step": 31526 }, { "epoch": 0.41, "grad_norm": 4.3127946853637695, "learning_rate": 1.972719545088967e-05, "loss": 2.1595, "step": 31527 }, { "epoch": 0.41, "grad_norm": 3.7075812816619873, "learning_rate": 1.9727171073570216e-05, "loss": 1.8481, "step": 31528 }, { "epoch": 0.41, "grad_norm": 3.782097339630127, "learning_rate": 1.972714669517672e-05, "loss": 1.7969, "step": 31529 }, { "epoch": 0.41, "grad_norm": 3.820733070373535, "learning_rate": 1.9727122315709183e-05, "loss": 1.8583, "step": 31530 }, { "epoch": 0.41, "grad_norm": 3.7708194255828857, "learning_rate": 1.9727097935167604e-05, "loss": 1.9768, "step": 31531 }, { "epoch": 0.41, "grad_norm": 3.74302339553833, "learning_rate": 1.9727073553551988e-05, "loss": 1.6415, "step": 31532 }, { "epoch": 0.41, "grad_norm": 3.5377161502838135, "learning_rate": 1.972704917086234e-05, "loss": 1.7792, "step": 31533 }, { "epoch": 0.41, "grad_norm": 3.9091227054595947, "learning_rate": 1.972702478709866e-05, "loss": 1.9686, "step": 31534 }, { "epoch": 0.41, "grad_norm": 4.073080539703369, "learning_rate": 1.972700040226095e-05, "loss": 1.5917, "step": 31535 }, { "epoch": 0.41, "grad_norm": 4.198650360107422, "learning_rate": 1.9726976016349217e-05, "loss": 2.4306, "step": 31536 }, { "epoch": 0.41, "grad_norm": 3.315518856048584, "learning_rate": 1.972695162936346e-05, "loss": 1.4583, "step": 31537 }, { "epoch": 0.41, "grad_norm": 3.9908370971679688, "learning_rate": 1.9726927241303677e-05, "loss": 2.268, "step": 31538 }, { "epoch": 0.41, "grad_norm": 4.232915878295898, "learning_rate": 1.972690285216988e-05, "loss": 2.1355, "step": 31539 }, { "epoch": 0.41, "grad_norm": 3.681147575378418, "learning_rate": 1.972687846196207e-05, "loss": 2.1538, "step": 31540 }, { "epoch": 0.41, "grad_norm": 4.227805137634277, "learning_rate": 1.9726854070680248e-05, "loss": 2.6832, "step": 31541 }, { "epoch": 0.41, "grad_norm": 4.648185729980469, "learning_rate": 1.9726829678324416e-05, "loss": 2.617, "step": 31542 }, { "epoch": 0.41, "grad_norm": 4.506274223327637, "learning_rate": 1.9726805284894574e-05, "loss": 2.4431, "step": 31543 }, { "epoch": 0.41, "grad_norm": 3.8294966220855713, "learning_rate": 1.972678089039073e-05, "loss": 2.0863, "step": 31544 }, { "epoch": 0.41, "grad_norm": 3.809201955795288, "learning_rate": 1.9726756494812882e-05, "loss": 1.5568, "step": 31545 }, { "epoch": 0.41, "grad_norm": 3.9115498065948486, "learning_rate": 1.9726732098161035e-05, "loss": 2.2865, "step": 31546 }, { "epoch": 0.41, "grad_norm": 4.1017584800720215, "learning_rate": 1.9726707700435198e-05, "loss": 2.0659, "step": 31547 }, { "epoch": 0.41, "grad_norm": 3.5980231761932373, "learning_rate": 1.9726683301635363e-05, "loss": 1.655, "step": 31548 }, { "epoch": 0.41, "grad_norm": 4.202085494995117, "learning_rate": 1.9726658901761535e-05, "loss": 2.167, "step": 31549 }, { "epoch": 0.41, "grad_norm": 4.513625621795654, "learning_rate": 1.972663450081372e-05, "loss": 2.6111, "step": 31550 }, { "epoch": 0.41, "grad_norm": 4.0918660163879395, "learning_rate": 1.9726610098791922e-05, "loss": 2.0307, "step": 31551 }, { "epoch": 0.41, "grad_norm": 3.4251160621643066, "learning_rate": 1.972658569569614e-05, "loss": 2.0637, "step": 31552 }, { "epoch": 0.41, "grad_norm": 4.258989334106445, "learning_rate": 1.9726561291526377e-05, "loss": 2.3258, "step": 31553 }, { "epoch": 0.41, "grad_norm": 4.638956069946289, "learning_rate": 1.9726536886282638e-05, "loss": 2.4239, "step": 31554 }, { "epoch": 0.41, "grad_norm": 4.130765914916992, "learning_rate": 1.9726512479964922e-05, "loss": 2.0986, "step": 31555 }, { "epoch": 0.41, "grad_norm": 3.0652074813842773, "learning_rate": 1.9726488072573236e-05, "loss": 1.4956, "step": 31556 }, { "epoch": 0.41, "grad_norm": 4.836496829986572, "learning_rate": 1.972646366410758e-05, "loss": 2.5984, "step": 31557 }, { "epoch": 0.41, "grad_norm": 4.118898868560791, "learning_rate": 1.972643925456796e-05, "loss": 1.9085, "step": 31558 }, { "epoch": 0.41, "grad_norm": 3.788902521133423, "learning_rate": 1.9726414843954372e-05, "loss": 1.9945, "step": 31559 }, { "epoch": 0.41, "grad_norm": 3.697321891784668, "learning_rate": 1.9726390432266823e-05, "loss": 2.2285, "step": 31560 }, { "epoch": 0.41, "grad_norm": 3.70068621635437, "learning_rate": 1.9726366019505317e-05, "loss": 1.8032, "step": 31561 }, { "epoch": 0.41, "grad_norm": 4.064294815063477, "learning_rate": 1.972634160566986e-05, "loss": 1.7658, "step": 31562 }, { "epoch": 0.41, "grad_norm": 3.7184793949127197, "learning_rate": 1.9726317190760444e-05, "loss": 1.842, "step": 31563 }, { "epoch": 0.41, "grad_norm": 3.5330026149749756, "learning_rate": 1.972629277477708e-05, "loss": 1.9821, "step": 31564 }, { "epoch": 0.41, "grad_norm": 3.6350231170654297, "learning_rate": 1.9726268357719768e-05, "loss": 1.7317, "step": 31565 }, { "epoch": 0.41, "grad_norm": 3.7317612171173096, "learning_rate": 1.9726243939588507e-05, "loss": 1.9981, "step": 31566 }, { "epoch": 0.41, "grad_norm": 3.6637392044067383, "learning_rate": 1.9726219520383307e-05, "loss": 1.8833, "step": 31567 }, { "epoch": 0.41, "grad_norm": 4.4167094230651855, "learning_rate": 1.9726195100104167e-05, "loss": 2.2436, "step": 31568 }, { "epoch": 0.41, "grad_norm": 4.942056655883789, "learning_rate": 1.9726170678751088e-05, "loss": 2.3416, "step": 31569 }, { "epoch": 0.41, "grad_norm": 3.477665424346924, "learning_rate": 1.972614625632408e-05, "loss": 1.6817, "step": 31570 }, { "epoch": 0.41, "grad_norm": 3.410198450088501, "learning_rate": 1.9726121832823134e-05, "loss": 1.6994, "step": 31571 }, { "epoch": 0.41, "grad_norm": 3.821075201034546, "learning_rate": 1.9726097408248263e-05, "loss": 2.0171, "step": 31572 }, { "epoch": 0.41, "grad_norm": 3.5946807861328125, "learning_rate": 1.9726072982599467e-05, "loss": 2.0725, "step": 31573 }, { "epoch": 0.41, "grad_norm": 3.832850933074951, "learning_rate": 1.9726048555876744e-05, "loss": 1.977, "step": 31574 }, { "epoch": 0.41, "grad_norm": 4.4148335456848145, "learning_rate": 1.9726024128080102e-05, "loss": 2.0543, "step": 31575 }, { "epoch": 0.41, "grad_norm": 4.161741733551025, "learning_rate": 1.972599969920954e-05, "loss": 2.1426, "step": 31576 }, { "epoch": 0.41, "grad_norm": 3.256476402282715, "learning_rate": 1.9725975269265068e-05, "loss": 1.692, "step": 31577 }, { "epoch": 0.41, "grad_norm": 3.6967241764068604, "learning_rate": 1.9725950838246678e-05, "loss": 2.1124, "step": 31578 }, { "epoch": 0.41, "grad_norm": 4.233647346496582, "learning_rate": 1.972592640615438e-05, "loss": 2.417, "step": 31579 }, { "epoch": 0.41, "grad_norm": 3.843625068664551, "learning_rate": 1.9725901972988176e-05, "loss": 1.9638, "step": 31580 }, { "epoch": 0.41, "grad_norm": 4.232038497924805, "learning_rate": 1.9725877538748066e-05, "loss": 2.5571, "step": 31581 }, { "epoch": 0.41, "grad_norm": 3.1352787017822266, "learning_rate": 1.9725853103434055e-05, "loss": 1.6164, "step": 31582 }, { "epoch": 0.41, "grad_norm": 3.79972243309021, "learning_rate": 1.9725828667046144e-05, "loss": 1.8174, "step": 31583 }, { "epoch": 0.41, "grad_norm": 3.6647818088531494, "learning_rate": 1.9725804229584334e-05, "loss": 1.8736, "step": 31584 }, { "epoch": 0.41, "grad_norm": 3.8484787940979004, "learning_rate": 1.9725779791048633e-05, "loss": 2.0401, "step": 31585 }, { "epoch": 0.41, "grad_norm": 3.7576985359191895, "learning_rate": 1.9725755351439042e-05, "loss": 2.2513, "step": 31586 }, { "epoch": 0.41, "grad_norm": 3.5316710472106934, "learning_rate": 1.972573091075556e-05, "loss": 1.8687, "step": 31587 }, { "epoch": 0.41, "grad_norm": 4.027027606964111, "learning_rate": 1.9725706468998192e-05, "loss": 2.2044, "step": 31588 }, { "epoch": 0.41, "grad_norm": 3.596647024154663, "learning_rate": 1.9725682026166946e-05, "loss": 1.9626, "step": 31589 }, { "epoch": 0.41, "grad_norm": 3.6113007068634033, "learning_rate": 1.9725657582261814e-05, "loss": 1.8042, "step": 31590 }, { "epoch": 0.41, "grad_norm": 3.577802896499634, "learning_rate": 1.972563313728281e-05, "loss": 1.4687, "step": 31591 }, { "epoch": 0.41, "grad_norm": 3.399164915084839, "learning_rate": 1.9725608691229925e-05, "loss": 1.6535, "step": 31592 }, { "epoch": 0.41, "grad_norm": 3.3201444149017334, "learning_rate": 1.972558424410317e-05, "loss": 1.8309, "step": 31593 }, { "epoch": 0.41, "grad_norm": 4.115175724029541, "learning_rate": 1.9725559795902546e-05, "loss": 1.9913, "step": 31594 }, { "epoch": 0.41, "grad_norm": 3.896658182144165, "learning_rate": 1.9725535346628056e-05, "loss": 2.2068, "step": 31595 }, { "epoch": 0.41, "grad_norm": 3.581129312515259, "learning_rate": 1.9725510896279704e-05, "loss": 1.9232, "step": 31596 }, { "epoch": 0.41, "grad_norm": 3.879470109939575, "learning_rate": 1.9725486444857487e-05, "loss": 1.9485, "step": 31597 }, { "epoch": 0.41, "grad_norm": 4.492889881134033, "learning_rate": 1.9725461992361408e-05, "loss": 2.1726, "step": 31598 }, { "epoch": 0.41, "grad_norm": 3.9995100498199463, "learning_rate": 1.972543753879148e-05, "loss": 1.9983, "step": 31599 }, { "epoch": 0.41, "grad_norm": 3.903757095336914, "learning_rate": 1.9725413084147697e-05, "loss": 1.9751, "step": 31600 }, { "epoch": 0.41, "grad_norm": 3.9958019256591797, "learning_rate": 1.972538862843006e-05, "loss": 2.0272, "step": 31601 }, { "epoch": 0.41, "grad_norm": 3.8175389766693115, "learning_rate": 1.9725364171638576e-05, "loss": 1.9273, "step": 31602 }, { "epoch": 0.41, "grad_norm": 3.4589121341705322, "learning_rate": 1.9725339713773247e-05, "loss": 1.9164, "step": 31603 }, { "epoch": 0.41, "grad_norm": 3.8483095169067383, "learning_rate": 1.972531525483408e-05, "loss": 1.8174, "step": 31604 }, { "epoch": 0.41, "grad_norm": 3.3532216548919678, "learning_rate": 1.972529079482107e-05, "loss": 1.5143, "step": 31605 }, { "epoch": 0.41, "grad_norm": 4.278434753417969, "learning_rate": 1.972526633373422e-05, "loss": 2.2565, "step": 31606 }, { "epoch": 0.41, "grad_norm": 3.909719467163086, "learning_rate": 1.9725241871573543e-05, "loss": 2.3986, "step": 31607 }, { "epoch": 0.41, "grad_norm": 3.501814126968384, "learning_rate": 1.9725217408339028e-05, "loss": 2.0628, "step": 31608 }, { "epoch": 0.41, "grad_norm": 3.935042142868042, "learning_rate": 1.9725192944030685e-05, "loss": 1.9949, "step": 31609 }, { "epoch": 0.41, "grad_norm": 2.9174537658691406, "learning_rate": 1.9725168478648515e-05, "loss": 1.2958, "step": 31610 }, { "epoch": 0.41, "grad_norm": 3.8439977169036865, "learning_rate": 1.9725144012192523e-05, "loss": 2.0482, "step": 31611 }, { "epoch": 0.41, "grad_norm": 3.7597429752349854, "learning_rate": 1.972511954466271e-05, "loss": 1.82, "step": 31612 }, { "epoch": 0.41, "grad_norm": 2.9464221000671387, "learning_rate": 1.9725095076059078e-05, "loss": 1.373, "step": 31613 }, { "epoch": 0.41, "grad_norm": 3.4440388679504395, "learning_rate": 1.9725070606381634e-05, "loss": 1.7234, "step": 31614 }, { "epoch": 0.41, "grad_norm": 4.198777675628662, "learning_rate": 1.9725046135630372e-05, "loss": 1.9015, "step": 31615 }, { "epoch": 0.41, "grad_norm": 4.039234638214111, "learning_rate": 1.9725021663805303e-05, "loss": 2.2082, "step": 31616 }, { "epoch": 0.41, "grad_norm": 3.9697985649108887, "learning_rate": 1.9724997190906424e-05, "loss": 2.102, "step": 31617 }, { "epoch": 0.41, "grad_norm": 4.002845764160156, "learning_rate": 1.9724972716933743e-05, "loss": 1.9245, "step": 31618 }, { "epoch": 0.41, "grad_norm": 3.79815411567688, "learning_rate": 1.972494824188726e-05, "loss": 2.2857, "step": 31619 }, { "epoch": 0.41, "grad_norm": 3.6155552864074707, "learning_rate": 1.9724923765766978e-05, "loss": 1.6939, "step": 31620 }, { "epoch": 0.41, "grad_norm": 4.022559642791748, "learning_rate": 1.97248992885729e-05, "loss": 2.1595, "step": 31621 }, { "epoch": 0.41, "grad_norm": 3.8490488529205322, "learning_rate": 1.9724874810305023e-05, "loss": 1.8119, "step": 31622 }, { "epoch": 0.41, "grad_norm": 4.402824401855469, "learning_rate": 1.972485033096336e-05, "loss": 2.6272, "step": 31623 }, { "epoch": 0.41, "grad_norm": 3.528993606567383, "learning_rate": 1.9724825850547907e-05, "loss": 1.7509, "step": 31624 }, { "epoch": 0.41, "grad_norm": 4.148133277893066, "learning_rate": 1.972480136905867e-05, "loss": 2.308, "step": 31625 }, { "epoch": 0.41, "grad_norm": 3.673625946044922, "learning_rate": 1.9724776886495645e-05, "loss": 2.1625, "step": 31626 }, { "epoch": 0.41, "grad_norm": 4.295393943786621, "learning_rate": 1.9724752402858844e-05, "loss": 2.1524, "step": 31627 }, { "epoch": 0.41, "grad_norm": 4.02925443649292, "learning_rate": 1.9724727918148263e-05, "loss": 1.7268, "step": 31628 }, { "epoch": 0.41, "grad_norm": 4.284609317779541, "learning_rate": 1.9724703432363908e-05, "loss": 2.0317, "step": 31629 }, { "epoch": 0.41, "grad_norm": 3.9557008743286133, "learning_rate": 1.9724678945505783e-05, "loss": 1.9728, "step": 31630 }, { "epoch": 0.41, "grad_norm": 3.9019131660461426, "learning_rate": 1.9724654457573885e-05, "loss": 1.8301, "step": 31631 }, { "epoch": 0.41, "grad_norm": 3.5885684490203857, "learning_rate": 1.972462996856822e-05, "loss": 2.0633, "step": 31632 }, { "epoch": 0.41, "grad_norm": 4.172273635864258, "learning_rate": 1.972460547848879e-05, "loss": 2.3472, "step": 31633 }, { "epoch": 0.41, "grad_norm": 3.649101734161377, "learning_rate": 1.9724580987335603e-05, "loss": 1.7707, "step": 31634 }, { "epoch": 0.41, "grad_norm": 4.6217451095581055, "learning_rate": 1.9724556495108658e-05, "loss": 1.9148, "step": 31635 }, { "epoch": 0.41, "grad_norm": 3.7388999462127686, "learning_rate": 1.9724532001807956e-05, "loss": 1.9035, "step": 31636 }, { "epoch": 0.41, "grad_norm": 3.4866065979003906, "learning_rate": 1.9724507507433498e-05, "loss": 1.8181, "step": 31637 }, { "epoch": 0.41, "grad_norm": 3.734633684158325, "learning_rate": 1.972448301198529e-05, "loss": 1.7797, "step": 31638 }, { "epoch": 0.41, "grad_norm": 3.9148619174957275, "learning_rate": 1.9724458515463332e-05, "loss": 2.0105, "step": 31639 }, { "epoch": 0.41, "grad_norm": 4.385073184967041, "learning_rate": 1.972443401786763e-05, "loss": 2.0494, "step": 31640 }, { "epoch": 0.41, "grad_norm": 3.892674207687378, "learning_rate": 1.9724409519198188e-05, "loss": 1.7783, "step": 31641 }, { "epoch": 0.41, "grad_norm": 3.8714120388031006, "learning_rate": 1.9724385019455004e-05, "loss": 2.2359, "step": 31642 }, { "epoch": 0.41, "grad_norm": 3.612454652786255, "learning_rate": 1.9724360518638085e-05, "loss": 2.0664, "step": 31643 }, { "epoch": 0.41, "grad_norm": 5.046958923339844, "learning_rate": 1.9724336016747432e-05, "loss": 2.4229, "step": 31644 }, { "epoch": 0.41, "grad_norm": 4.158507347106934, "learning_rate": 1.9724311513783044e-05, "loss": 1.7631, "step": 31645 }, { "epoch": 0.41, "grad_norm": 3.8279755115509033, "learning_rate": 1.972428700974493e-05, "loss": 2.4785, "step": 31646 }, { "epoch": 0.41, "grad_norm": 4.0568976402282715, "learning_rate": 1.9724262504633088e-05, "loss": 2.1733, "step": 31647 }, { "epoch": 0.41, "grad_norm": 3.5996556282043457, "learning_rate": 1.9724237998447525e-05, "loss": 1.7941, "step": 31648 }, { "epoch": 0.41, "grad_norm": 3.8415894508361816, "learning_rate": 1.972421349118824e-05, "loss": 1.9497, "step": 31649 }, { "epoch": 0.41, "grad_norm": 3.2692439556121826, "learning_rate": 1.9724188982855233e-05, "loss": 1.472, "step": 31650 }, { "epoch": 0.41, "grad_norm": 3.4754579067230225, "learning_rate": 1.9724164473448515e-05, "loss": 1.43, "step": 31651 }, { "epoch": 0.41, "grad_norm": 3.852374315261841, "learning_rate": 1.9724139962968085e-05, "loss": 1.6376, "step": 31652 }, { "epoch": 0.41, "grad_norm": 3.7568578720092773, "learning_rate": 1.972411545141394e-05, "loss": 1.5826, "step": 31653 }, { "epoch": 0.41, "grad_norm": 4.242283821105957, "learning_rate": 1.972409093878609e-05, "loss": 2.2613, "step": 31654 }, { "epoch": 0.41, "grad_norm": 4.030032157897949, "learning_rate": 1.9724066425084535e-05, "loss": 1.7756, "step": 31655 }, { "epoch": 0.41, "grad_norm": 3.874556541442871, "learning_rate": 1.972404191030928e-05, "loss": 1.9826, "step": 31656 }, { "epoch": 0.41, "grad_norm": 4.124618053436279, "learning_rate": 1.9724017394460326e-05, "loss": 2.1494, "step": 31657 }, { "epoch": 0.41, "grad_norm": 3.4133529663085938, "learning_rate": 1.9723992877537674e-05, "loss": 1.5865, "step": 31658 }, { "epoch": 0.41, "grad_norm": 4.329033374786377, "learning_rate": 1.9723968359541327e-05, "loss": 2.3398, "step": 31659 }, { "epoch": 0.41, "grad_norm": 3.5848710536956787, "learning_rate": 1.972394384047129e-05, "loss": 2.0042, "step": 31660 }, { "epoch": 0.41, "grad_norm": 3.6163957118988037, "learning_rate": 1.9723919320327567e-05, "loss": 1.7354, "step": 31661 }, { "epoch": 0.41, "grad_norm": 3.277203321456909, "learning_rate": 1.9723894799110157e-05, "loss": 1.7903, "step": 31662 }, { "epoch": 0.41, "grad_norm": 3.775456428527832, "learning_rate": 1.9723870276819062e-05, "loss": 1.926, "step": 31663 }, { "epoch": 0.41, "grad_norm": 4.926645278930664, "learning_rate": 1.9723845753454288e-05, "loss": 3.002, "step": 31664 }, { "epoch": 0.41, "grad_norm": 3.716726541519165, "learning_rate": 1.972382122901584e-05, "loss": 1.8943, "step": 31665 }, { "epoch": 0.41, "grad_norm": 3.714343547821045, "learning_rate": 1.972379670350371e-05, "loss": 1.7468, "step": 31666 }, { "epoch": 0.41, "grad_norm": 4.05881404876709, "learning_rate": 1.972377217691791e-05, "loss": 2.1637, "step": 31667 }, { "epoch": 0.41, "grad_norm": 3.748995065689087, "learning_rate": 1.9723747649258446e-05, "loss": 2.0961, "step": 31668 }, { "epoch": 0.41, "grad_norm": 3.5855798721313477, "learning_rate": 1.9723723120525313e-05, "loss": 1.7298, "step": 31669 }, { "epoch": 0.41, "grad_norm": 4.132638454437256, "learning_rate": 1.972369859071851e-05, "loss": 2.0692, "step": 31670 }, { "epoch": 0.41, "grad_norm": 3.6767280101776123, "learning_rate": 1.972367405983805e-05, "loss": 2.0155, "step": 31671 }, { "epoch": 0.41, "grad_norm": 3.6218953132629395, "learning_rate": 1.9723649527883936e-05, "loss": 1.9786, "step": 31672 }, { "epoch": 0.41, "grad_norm": 3.7319538593292236, "learning_rate": 1.9723624994856162e-05, "loss": 2.1753, "step": 31673 }, { "epoch": 0.41, "grad_norm": 3.772023916244507, "learning_rate": 1.9723600460754734e-05, "loss": 1.8826, "step": 31674 }, { "epoch": 0.41, "grad_norm": 3.1773769855499268, "learning_rate": 1.9723575925579658e-05, "loss": 1.5957, "step": 31675 }, { "epoch": 0.41, "grad_norm": 4.4182515144348145, "learning_rate": 1.972355138933093e-05, "loss": 2.0874, "step": 31676 }, { "epoch": 0.41, "grad_norm": 3.531475305557251, "learning_rate": 1.972352685200856e-05, "loss": 1.7609, "step": 31677 }, { "epoch": 0.41, "grad_norm": 3.4266998767852783, "learning_rate": 1.9723502313612547e-05, "loss": 1.8783, "step": 31678 }, { "epoch": 0.41, "grad_norm": 3.7516376972198486, "learning_rate": 1.9723477774142895e-05, "loss": 2.0139, "step": 31679 }, { "epoch": 0.41, "grad_norm": 4.2652907371521, "learning_rate": 1.9723453233599605e-05, "loss": 2.2573, "step": 31680 }, { "epoch": 0.41, "grad_norm": 3.922138214111328, "learning_rate": 1.972342869198268e-05, "loss": 1.9139, "step": 31681 }, { "epoch": 0.41, "grad_norm": 3.7718634605407715, "learning_rate": 1.9723404149292124e-05, "loss": 2.2106, "step": 31682 }, { "epoch": 0.41, "grad_norm": 4.1384453773498535, "learning_rate": 1.9723379605527944e-05, "loss": 2.4676, "step": 31683 }, { "epoch": 0.41, "grad_norm": 4.479841709136963, "learning_rate": 1.9723355060690133e-05, "loss": 2.3064, "step": 31684 }, { "epoch": 0.41, "grad_norm": 3.649251699447632, "learning_rate": 1.97233305147787e-05, "loss": 1.7217, "step": 31685 }, { "epoch": 0.41, "grad_norm": 3.360856056213379, "learning_rate": 1.9723305967793647e-05, "loss": 1.5296, "step": 31686 }, { "epoch": 0.41, "grad_norm": 3.612848997116089, "learning_rate": 1.9723281419734972e-05, "loss": 1.5354, "step": 31687 }, { "epoch": 0.41, "grad_norm": 3.2711174488067627, "learning_rate": 1.9723256870602684e-05, "loss": 1.4399, "step": 31688 }, { "epoch": 0.41, "grad_norm": 3.9209136962890625, "learning_rate": 1.9723232320396787e-05, "loss": 1.9584, "step": 31689 }, { "epoch": 0.41, "grad_norm": 3.9807050228118896, "learning_rate": 1.972320776911728e-05, "loss": 2.1908, "step": 31690 }, { "epoch": 0.41, "grad_norm": 3.303363561630249, "learning_rate": 1.972318321676416e-05, "loss": 1.7507, "step": 31691 }, { "epoch": 0.41, "grad_norm": 4.333642959594727, "learning_rate": 1.972315866333744e-05, "loss": 1.9331, "step": 31692 }, { "epoch": 0.41, "grad_norm": 4.009429454803467, "learning_rate": 1.9723134108837117e-05, "loss": 1.9108, "step": 31693 }, { "epoch": 0.41, "grad_norm": 3.7728967666625977, "learning_rate": 1.9723109553263197e-05, "loss": 1.9321, "step": 31694 }, { "epoch": 0.41, "grad_norm": 3.606137275695801, "learning_rate": 1.9723084996615676e-05, "loss": 2.0662, "step": 31695 }, { "epoch": 0.41, "grad_norm": 4.070281028747559, "learning_rate": 1.972306043889457e-05, "loss": 2.6286, "step": 31696 }, { "epoch": 0.41, "grad_norm": 3.645224094390869, "learning_rate": 1.9723035880099863e-05, "loss": 1.8382, "step": 31697 }, { "epoch": 0.41, "grad_norm": 3.814753770828247, "learning_rate": 1.9723011320231574e-05, "loss": 1.904, "step": 31698 }, { "epoch": 0.41, "grad_norm": 3.6910150051116943, "learning_rate": 1.9722986759289698e-05, "loss": 1.7967, "step": 31699 }, { "epoch": 0.41, "grad_norm": 4.330507278442383, "learning_rate": 1.972296219727424e-05, "loss": 2.4336, "step": 31700 }, { "epoch": 0.41, "grad_norm": 4.595173358917236, "learning_rate": 1.97229376341852e-05, "loss": 1.8732, "step": 31701 }, { "epoch": 0.41, "grad_norm": 4.1745452880859375, "learning_rate": 1.9722913070022587e-05, "loss": 1.9381, "step": 31702 }, { "epoch": 0.41, "grad_norm": 3.774059772491455, "learning_rate": 1.9722888504786396e-05, "loss": 2.1621, "step": 31703 }, { "epoch": 0.41, "grad_norm": 4.62327766418457, "learning_rate": 1.9722863938476635e-05, "loss": 2.1478, "step": 31704 }, { "epoch": 0.41, "grad_norm": 3.8827853202819824, "learning_rate": 1.9722839371093303e-05, "loss": 2.0975, "step": 31705 }, { "epoch": 0.41, "grad_norm": 3.8170409202575684, "learning_rate": 1.9722814802636405e-05, "loss": 1.7684, "step": 31706 }, { "epoch": 0.41, "grad_norm": 3.5960981845855713, "learning_rate": 1.9722790233105943e-05, "loss": 1.8994, "step": 31707 }, { "epoch": 0.41, "grad_norm": 3.6768970489501953, "learning_rate": 1.972276566250192e-05, "loss": 1.9444, "step": 31708 }, { "epoch": 0.41, "grad_norm": 3.7970762252807617, "learning_rate": 1.972274109082434e-05, "loss": 2.2321, "step": 31709 }, { "epoch": 0.41, "grad_norm": 3.728466272354126, "learning_rate": 1.9722716518073205e-05, "loss": 1.7503, "step": 31710 }, { "epoch": 0.41, "grad_norm": 3.815077066421509, "learning_rate": 1.9722691944248517e-05, "loss": 1.6322, "step": 31711 }, { "epoch": 0.41, "grad_norm": 3.2436962127685547, "learning_rate": 1.972266736935028e-05, "loss": 1.7203, "step": 31712 }, { "epoch": 0.41, "grad_norm": 3.5372390747070312, "learning_rate": 1.972264279337849e-05, "loss": 1.4925, "step": 31713 }, { "epoch": 0.41, "grad_norm": 3.9235146045684814, "learning_rate": 1.972261821633316e-05, "loss": 2.0849, "step": 31714 }, { "epoch": 0.41, "grad_norm": 3.8213436603546143, "learning_rate": 1.9722593638214288e-05, "loss": 1.8625, "step": 31715 }, { "epoch": 0.41, "grad_norm": 4.459882736206055, "learning_rate": 1.9722569059021874e-05, "loss": 2.6166, "step": 31716 }, { "epoch": 0.41, "grad_norm": 3.444179058074951, "learning_rate": 1.9722544478755924e-05, "loss": 1.7726, "step": 31717 }, { "epoch": 0.41, "grad_norm": 4.046647548675537, "learning_rate": 1.9722519897416444e-05, "loss": 2.308, "step": 31718 }, { "epoch": 0.41, "grad_norm": 3.577242612838745, "learning_rate": 1.9722495315003428e-05, "loss": 1.8951, "step": 31719 }, { "epoch": 0.41, "grad_norm": 4.0629377365112305, "learning_rate": 1.972247073151689e-05, "loss": 1.7104, "step": 31720 }, { "epoch": 0.41, "grad_norm": 3.5344650745391846, "learning_rate": 1.9722446146956817e-05, "loss": 1.9694, "step": 31721 }, { "epoch": 0.41, "grad_norm": 3.4014575481414795, "learning_rate": 1.9722421561323225e-05, "loss": 1.677, "step": 31722 }, { "epoch": 0.41, "grad_norm": 3.8052172660827637, "learning_rate": 1.9722396974616114e-05, "loss": 1.8881, "step": 31723 }, { "epoch": 0.41, "grad_norm": 3.8100411891937256, "learning_rate": 1.9722372386835484e-05, "loss": 2.2317, "step": 31724 }, { "epoch": 0.41, "grad_norm": 3.7558486461639404, "learning_rate": 1.972234779798134e-05, "loss": 2.2429, "step": 31725 }, { "epoch": 0.41, "grad_norm": 3.7576913833618164, "learning_rate": 1.9722323208053686e-05, "loss": 1.7972, "step": 31726 }, { "epoch": 0.41, "grad_norm": 3.807624340057373, "learning_rate": 1.972229861705252e-05, "loss": 2.0236, "step": 31727 }, { "epoch": 0.41, "grad_norm": 3.9311208724975586, "learning_rate": 1.9722274024977848e-05, "loss": 2.0638, "step": 31728 }, { "epoch": 0.41, "grad_norm": 3.1689414978027344, "learning_rate": 1.972224943182967e-05, "loss": 1.8108, "step": 31729 }, { "epoch": 0.41, "grad_norm": 4.046277046203613, "learning_rate": 1.9722224837607996e-05, "loss": 2.2587, "step": 31730 }, { "epoch": 0.41, "grad_norm": 3.587566614151001, "learning_rate": 1.9722200242312818e-05, "loss": 2.0319, "step": 31731 }, { "epoch": 0.41, "grad_norm": 4.690110683441162, "learning_rate": 1.9722175645944145e-05, "loss": 2.2466, "step": 31732 }, { "epoch": 0.41, "grad_norm": 3.9139957427978516, "learning_rate": 1.9722151048501982e-05, "loss": 2.0416, "step": 31733 }, { "epoch": 0.41, "grad_norm": 4.210758209228516, "learning_rate": 1.9722126449986328e-05, "loss": 2.0193, "step": 31734 }, { "epoch": 0.41, "grad_norm": 4.330375671386719, "learning_rate": 1.9722101850397184e-05, "loss": 2.7261, "step": 31735 }, { "epoch": 0.41, "grad_norm": 3.5482892990112305, "learning_rate": 1.9722077249734555e-05, "loss": 1.7201, "step": 31736 }, { "epoch": 0.41, "grad_norm": 3.7299227714538574, "learning_rate": 1.9722052647998447e-05, "loss": 1.7332, "step": 31737 }, { "epoch": 0.41, "grad_norm": 3.6584408283233643, "learning_rate": 1.9722028045188854e-05, "loss": 1.9809, "step": 31738 }, { "epoch": 0.41, "grad_norm": 3.962414026260376, "learning_rate": 1.972200344130579e-05, "loss": 2.2002, "step": 31739 }, { "epoch": 0.41, "grad_norm": 4.274563312530518, "learning_rate": 1.9721978836349248e-05, "loss": 2.0876, "step": 31740 }, { "epoch": 0.41, "grad_norm": 3.389733076095581, "learning_rate": 1.9721954230319235e-05, "loss": 1.886, "step": 31741 }, { "epoch": 0.41, "grad_norm": 4.017111301422119, "learning_rate": 1.9721929623215754e-05, "loss": 2.2433, "step": 31742 }, { "epoch": 0.41, "grad_norm": 4.475368499755859, "learning_rate": 1.9721905015038808e-05, "loss": 2.0248, "step": 31743 }, { "epoch": 0.41, "grad_norm": 3.579123020172119, "learning_rate": 1.9721880405788396e-05, "loss": 2.0351, "step": 31744 }, { "epoch": 0.41, "grad_norm": 3.9061384201049805, "learning_rate": 1.9721855795464526e-05, "loss": 1.5513, "step": 31745 }, { "epoch": 0.41, "grad_norm": 3.490147829055786, "learning_rate": 1.9721831184067198e-05, "loss": 1.8741, "step": 31746 }, { "epoch": 0.41, "grad_norm": 3.9111080169677734, "learning_rate": 1.9721806571596415e-05, "loss": 1.7709, "step": 31747 }, { "epoch": 0.41, "grad_norm": 3.4645822048187256, "learning_rate": 1.972178195805218e-05, "loss": 1.8942, "step": 31748 }, { "epoch": 0.41, "grad_norm": 4.0849928855896, "learning_rate": 1.9721757343434493e-05, "loss": 1.8897, "step": 31749 }, { "epoch": 0.41, "grad_norm": 3.510413646697998, "learning_rate": 1.9721732727743358e-05, "loss": 1.7442, "step": 31750 }, { "epoch": 0.41, "grad_norm": 3.372178554534912, "learning_rate": 1.9721708110978782e-05, "loss": 1.5757, "step": 31751 }, { "epoch": 0.41, "grad_norm": 3.7621541023254395, "learning_rate": 1.9721683493140764e-05, "loss": 2.0325, "step": 31752 }, { "epoch": 0.41, "grad_norm": 4.15557336807251, "learning_rate": 1.9721658874229308e-05, "loss": 1.9792, "step": 31753 }, { "epoch": 0.41, "grad_norm": 3.404102325439453, "learning_rate": 1.9721634254244414e-05, "loss": 1.717, "step": 31754 }, { "epoch": 0.41, "grad_norm": 4.03547477722168, "learning_rate": 1.972160963318609e-05, "loss": 2.077, "step": 31755 }, { "epoch": 0.41, "grad_norm": 3.8091156482696533, "learning_rate": 1.9721585011054332e-05, "loss": 2.0297, "step": 31756 }, { "epoch": 0.41, "grad_norm": 3.537202835083008, "learning_rate": 1.9721560387849144e-05, "loss": 1.7186, "step": 31757 }, { "epoch": 0.41, "grad_norm": 3.808096408843994, "learning_rate": 1.9721535763570535e-05, "loss": 1.7465, "step": 31758 }, { "epoch": 0.41, "grad_norm": 3.367295026779175, "learning_rate": 1.9721511138218502e-05, "loss": 1.643, "step": 31759 }, { "epoch": 0.41, "grad_norm": 4.000125885009766, "learning_rate": 1.972148651179305e-05, "loss": 1.8017, "step": 31760 }, { "epoch": 0.41, "grad_norm": 3.4028570652008057, "learning_rate": 1.972146188429418e-05, "loss": 1.8694, "step": 31761 }, { "epoch": 0.41, "grad_norm": 3.356139898300171, "learning_rate": 1.9721437255721895e-05, "loss": 1.8512, "step": 31762 }, { "epoch": 0.41, "grad_norm": 4.020995616912842, "learning_rate": 1.97214126260762e-05, "loss": 2.2077, "step": 31763 }, { "epoch": 0.41, "grad_norm": 3.491527557373047, "learning_rate": 1.97213879953571e-05, "loss": 1.6682, "step": 31764 }, { "epoch": 0.41, "grad_norm": 4.0353312492370605, "learning_rate": 1.972136336356459e-05, "loss": 2.397, "step": 31765 }, { "epoch": 0.41, "grad_norm": 3.944012403488159, "learning_rate": 1.9721338730698675e-05, "loss": 2.2537, "step": 31766 }, { "epoch": 0.41, "grad_norm": 4.323127269744873, "learning_rate": 1.9721314096759357e-05, "loss": 2.1943, "step": 31767 }, { "epoch": 0.41, "grad_norm": 4.101685047149658, "learning_rate": 1.9721289461746645e-05, "loss": 2.4773, "step": 31768 }, { "epoch": 0.41, "grad_norm": 3.608212947845459, "learning_rate": 1.972126482566054e-05, "loss": 2.118, "step": 31769 }, { "epoch": 0.41, "grad_norm": 3.7608773708343506, "learning_rate": 1.972124018850104e-05, "loss": 2.0264, "step": 31770 }, { "epoch": 0.41, "grad_norm": 3.4774796962738037, "learning_rate": 1.9721215550268145e-05, "loss": 1.7058, "step": 31771 }, { "epoch": 0.41, "grad_norm": 3.141043186187744, "learning_rate": 1.972119091096187e-05, "loss": 1.8444, "step": 31772 }, { "epoch": 0.41, "grad_norm": 3.5670440196990967, "learning_rate": 1.972116627058221e-05, "loss": 1.5771, "step": 31773 }, { "epoch": 0.41, "grad_norm": 3.242013454437256, "learning_rate": 1.9721141629129167e-05, "loss": 1.5182, "step": 31774 }, { "epoch": 0.41, "grad_norm": 3.956850290298462, "learning_rate": 1.972111698660274e-05, "loss": 1.9374, "step": 31775 }, { "epoch": 0.41, "grad_norm": 3.6708991527557373, "learning_rate": 1.9721092343002944e-05, "loss": 1.665, "step": 31776 }, { "epoch": 0.41, "grad_norm": 3.5803840160369873, "learning_rate": 1.972106769832977e-05, "loss": 1.5954, "step": 31777 }, { "epoch": 0.41, "grad_norm": 3.4035871028900146, "learning_rate": 1.9721043052583227e-05, "loss": 1.449, "step": 31778 }, { "epoch": 0.41, "grad_norm": 3.858193874359131, "learning_rate": 1.9721018405763316e-05, "loss": 2.3792, "step": 31779 }, { "epoch": 0.41, "grad_norm": 4.150087833404541, "learning_rate": 1.9720993757870038e-05, "loss": 2.2358, "step": 31780 }, { "epoch": 0.41, "grad_norm": 3.602018356323242, "learning_rate": 1.9720969108903404e-05, "loss": 1.8733, "step": 31781 }, { "epoch": 0.41, "grad_norm": 4.027482509613037, "learning_rate": 1.9720944458863402e-05, "loss": 2.2509, "step": 31782 }, { "epoch": 0.41, "grad_norm": 3.992393732070923, "learning_rate": 1.9720919807750048e-05, "loss": 1.9159, "step": 31783 }, { "epoch": 0.41, "grad_norm": 4.1643757820129395, "learning_rate": 1.9720895155563336e-05, "loss": 1.8396, "step": 31784 }, { "epoch": 0.41, "grad_norm": 3.78554630279541, "learning_rate": 1.9720870502303275e-05, "loss": 1.713, "step": 31785 }, { "epoch": 0.41, "grad_norm": 3.8838298320770264, "learning_rate": 1.9720845847969863e-05, "loss": 2.3233, "step": 31786 }, { "epoch": 0.41, "grad_norm": 4.40275764465332, "learning_rate": 1.9720821192563105e-05, "loss": 2.1891, "step": 31787 }, { "epoch": 0.41, "grad_norm": 4.3195695877075195, "learning_rate": 1.9720796536083004e-05, "loss": 2.0896, "step": 31788 }, { "epoch": 0.41, "grad_norm": 3.5037214756011963, "learning_rate": 1.9720771878529562e-05, "loss": 1.5326, "step": 31789 }, { "epoch": 0.41, "grad_norm": 3.4415688514709473, "learning_rate": 1.972074721990278e-05, "loss": 1.8206, "step": 31790 }, { "epoch": 0.41, "grad_norm": 3.8910763263702393, "learning_rate": 1.9720722560202664e-05, "loss": 1.8385, "step": 31791 }, { "epoch": 0.41, "grad_norm": 3.660903215408325, "learning_rate": 1.9720697899429217e-05, "loss": 1.6178, "step": 31792 }, { "epoch": 0.41, "grad_norm": 4.355318069458008, "learning_rate": 1.972067323758244e-05, "loss": 2.2401, "step": 31793 }, { "epoch": 0.41, "grad_norm": 3.6840195655822754, "learning_rate": 1.9720648574662334e-05, "loss": 1.9941, "step": 31794 }, { "epoch": 0.41, "grad_norm": 3.8901073932647705, "learning_rate": 1.9720623910668905e-05, "loss": 1.8571, "step": 31795 }, { "epoch": 0.41, "grad_norm": 3.2382657527923584, "learning_rate": 1.972059924560215e-05, "loss": 1.6918, "step": 31796 }, { "epoch": 0.41, "grad_norm": 3.7505252361297607, "learning_rate": 1.972057457946208e-05, "loss": 2.3643, "step": 31797 }, { "epoch": 0.41, "grad_norm": 3.7027170658111572, "learning_rate": 1.9720549912248693e-05, "loss": 2.1107, "step": 31798 }, { "epoch": 0.41, "grad_norm": 3.888925313949585, "learning_rate": 1.9720525243961993e-05, "loss": 1.7887, "step": 31799 }, { "epoch": 0.41, "grad_norm": 3.6383049488067627, "learning_rate": 1.972050057460198e-05, "loss": 1.9508, "step": 31800 }, { "epoch": 0.41, "grad_norm": 3.9700653553009033, "learning_rate": 1.972047590416866e-05, "loss": 2.4988, "step": 31801 }, { "epoch": 0.41, "grad_norm": 4.306210994720459, "learning_rate": 1.9720451232662035e-05, "loss": 2.3038, "step": 31802 }, { "epoch": 0.41, "grad_norm": 3.732374906539917, "learning_rate": 1.9720426560082108e-05, "loss": 1.6274, "step": 31803 }, { "epoch": 0.41, "grad_norm": 3.8506991863250732, "learning_rate": 1.9720401886428878e-05, "loss": 2.0658, "step": 31804 }, { "epoch": 0.41, "grad_norm": 4.001312732696533, "learning_rate": 1.9720377211702352e-05, "loss": 1.8948, "step": 31805 }, { "epoch": 0.41, "grad_norm": 3.701964855194092, "learning_rate": 1.9720352535902533e-05, "loss": 1.9196, "step": 31806 }, { "epoch": 0.41, "grad_norm": 3.2035036087036133, "learning_rate": 1.972032785902942e-05, "loss": 1.397, "step": 31807 }, { "epoch": 0.41, "grad_norm": 3.7567715644836426, "learning_rate": 1.972030318108302e-05, "loss": 2.3038, "step": 31808 }, { "epoch": 0.41, "grad_norm": 3.6252894401550293, "learning_rate": 1.972027850206333e-05, "loss": 1.9588, "step": 31809 }, { "epoch": 0.41, "grad_norm": 3.8916356563568115, "learning_rate": 1.972025382197036e-05, "loss": 1.8576, "step": 31810 }, { "epoch": 0.41, "grad_norm": 3.583354949951172, "learning_rate": 1.972022914080411e-05, "loss": 1.5175, "step": 31811 }, { "epoch": 0.41, "grad_norm": 3.7286384105682373, "learning_rate": 1.972020445856458e-05, "loss": 2.0348, "step": 31812 }, { "epoch": 0.41, "grad_norm": 3.8405532836914062, "learning_rate": 1.9720179775251774e-05, "loss": 1.8667, "step": 31813 }, { "epoch": 0.41, "grad_norm": 3.2778170108795166, "learning_rate": 1.9720155090865695e-05, "loss": 1.3696, "step": 31814 }, { "epoch": 0.41, "grad_norm": 3.9713165760040283, "learning_rate": 1.9720130405406347e-05, "loss": 1.8197, "step": 31815 }, { "epoch": 0.41, "grad_norm": 3.83257794380188, "learning_rate": 1.9720105718873733e-05, "loss": 2.0936, "step": 31816 }, { "epoch": 0.41, "grad_norm": 3.4665613174438477, "learning_rate": 1.972008103126785e-05, "loss": 1.7668, "step": 31817 }, { "epoch": 0.41, "grad_norm": 4.254570484161377, "learning_rate": 1.972005634258871e-05, "loss": 2.0769, "step": 31818 }, { "epoch": 0.41, "grad_norm": 3.3378567695617676, "learning_rate": 1.9720031652836308e-05, "loss": 1.8824, "step": 31819 }, { "epoch": 0.41, "grad_norm": 3.837590217590332, "learning_rate": 1.972000696201065e-05, "loss": 1.9376, "step": 31820 }, { "epoch": 0.41, "grad_norm": 3.561828851699829, "learning_rate": 1.971998227011174e-05, "loss": 1.8821, "step": 31821 }, { "epoch": 0.41, "grad_norm": 4.002020359039307, "learning_rate": 1.9719957577139576e-05, "loss": 2.2815, "step": 31822 }, { "epoch": 0.41, "grad_norm": 4.101284980773926, "learning_rate": 1.9719932883094167e-05, "loss": 1.7952, "step": 31823 }, { "epoch": 0.41, "grad_norm": 4.511807441711426, "learning_rate": 1.9719908187975513e-05, "loss": 2.3544, "step": 31824 }, { "epoch": 0.41, "grad_norm": 4.064597129821777, "learning_rate": 1.971988349178361e-05, "loss": 2.0446, "step": 31825 }, { "epoch": 0.41, "grad_norm": 3.7482337951660156, "learning_rate": 1.9719858794518472e-05, "loss": 1.896, "step": 31826 }, { "epoch": 0.41, "grad_norm": 4.330813884735107, "learning_rate": 1.9719834096180097e-05, "loss": 1.8112, "step": 31827 }, { "epoch": 0.41, "grad_norm": 4.440425872802734, "learning_rate": 1.9719809396768486e-05, "loss": 2.0987, "step": 31828 }, { "epoch": 0.41, "grad_norm": 4.063787937164307, "learning_rate": 1.9719784696283644e-05, "loss": 1.9198, "step": 31829 }, { "epoch": 0.41, "grad_norm": 3.9208953380584717, "learning_rate": 1.9719759994725574e-05, "loss": 2.1958, "step": 31830 }, { "epoch": 0.41, "grad_norm": 3.662975311279297, "learning_rate": 1.9719735292094273e-05, "loss": 2.1849, "step": 31831 }, { "epoch": 0.41, "grad_norm": 3.562248468399048, "learning_rate": 1.9719710588389754e-05, "loss": 1.9405, "step": 31832 }, { "epoch": 0.41, "grad_norm": 3.3639988899230957, "learning_rate": 1.971968588361201e-05, "loss": 1.9284, "step": 31833 }, { "epoch": 0.41, "grad_norm": 3.610797882080078, "learning_rate": 1.971966117776105e-05, "loss": 1.7043, "step": 31834 }, { "epoch": 0.41, "grad_norm": 3.710083484649658, "learning_rate": 1.9719636470836872e-05, "loss": 1.6004, "step": 31835 }, { "epoch": 0.41, "grad_norm": 3.928529977798462, "learning_rate": 1.9719611762839482e-05, "loss": 1.8021, "step": 31836 }, { "epoch": 0.41, "grad_norm": 3.864392042160034, "learning_rate": 1.9719587053768884e-05, "loss": 1.7962, "step": 31837 }, { "epoch": 0.41, "grad_norm": 3.8910422325134277, "learning_rate": 1.9719562343625078e-05, "loss": 1.8801, "step": 31838 }, { "epoch": 0.41, "grad_norm": 3.28818416595459, "learning_rate": 1.9719537632408067e-05, "loss": 1.7085, "step": 31839 }, { "epoch": 0.41, "grad_norm": 3.5616207122802734, "learning_rate": 1.9719512920117853e-05, "loss": 1.8589, "step": 31840 }, { "epoch": 0.41, "grad_norm": 4.4267048835754395, "learning_rate": 1.971948820675444e-05, "loss": 2.1318, "step": 31841 }, { "epoch": 0.41, "grad_norm": 3.890136957168579, "learning_rate": 1.9719463492317835e-05, "loss": 2.1501, "step": 31842 }, { "epoch": 0.41, "grad_norm": 4.154788970947266, "learning_rate": 1.9719438776808034e-05, "loss": 2.1124, "step": 31843 }, { "epoch": 0.41, "grad_norm": 3.70780086517334, "learning_rate": 1.971941406022504e-05, "loss": 1.7397, "step": 31844 }, { "epoch": 0.41, "grad_norm": 3.350081443786621, "learning_rate": 1.9719389342568858e-05, "loss": 1.7531, "step": 31845 }, { "epoch": 0.41, "grad_norm": 3.85970401763916, "learning_rate": 1.971936462383949e-05, "loss": 2.2391, "step": 31846 }, { "epoch": 0.41, "grad_norm": 3.6606245040893555, "learning_rate": 1.9719339904036943e-05, "loss": 1.9312, "step": 31847 }, { "epoch": 0.41, "grad_norm": 3.8644096851348877, "learning_rate": 1.9719315183161212e-05, "loss": 2.0931, "step": 31848 }, { "epoch": 0.41, "grad_norm": 3.5687966346740723, "learning_rate": 1.9719290461212305e-05, "loss": 2.1784, "step": 31849 }, { "epoch": 0.41, "grad_norm": 3.591829776763916, "learning_rate": 1.9719265738190227e-05, "loss": 1.877, "step": 31850 }, { "epoch": 0.41, "grad_norm": 4.147506237030029, "learning_rate": 1.971924101409497e-05, "loss": 2.0574, "step": 31851 }, { "epoch": 0.41, "grad_norm": 3.771099090576172, "learning_rate": 1.971921628892655e-05, "loss": 1.9121, "step": 31852 }, { "epoch": 0.41, "grad_norm": 4.248511791229248, "learning_rate": 1.971919156268496e-05, "loss": 2.0112, "step": 31853 }, { "epoch": 0.41, "grad_norm": 4.048402309417725, "learning_rate": 1.9719166835370208e-05, "loss": 2.0915, "step": 31854 }, { "epoch": 0.41, "grad_norm": 3.947348117828369, "learning_rate": 1.9719142106982298e-05, "loss": 1.9757, "step": 31855 }, { "epoch": 0.41, "grad_norm": 4.107311725616455, "learning_rate": 1.9719117377521225e-05, "loss": 2.1775, "step": 31856 }, { "epoch": 0.41, "grad_norm": 3.397261142730713, "learning_rate": 1.9719092646986998e-05, "loss": 1.5275, "step": 31857 }, { "epoch": 0.41, "grad_norm": 3.9769582748413086, "learning_rate": 1.9719067915379618e-05, "loss": 2.1599, "step": 31858 }, { "epoch": 0.41, "grad_norm": 4.25228214263916, "learning_rate": 1.9719043182699087e-05, "loss": 2.396, "step": 31859 }, { "epoch": 0.41, "grad_norm": 3.701706647872925, "learning_rate": 1.971901844894541e-05, "loss": 1.7885, "step": 31860 }, { "epoch": 0.41, "grad_norm": 4.430112838745117, "learning_rate": 1.971899371411859e-05, "loss": 1.9878, "step": 31861 }, { "epoch": 0.41, "grad_norm": 3.9717774391174316, "learning_rate": 1.971896897821863e-05, "loss": 1.8137, "step": 31862 }, { "epoch": 0.41, "grad_norm": 3.207771062850952, "learning_rate": 1.9718944241245526e-05, "loss": 1.7682, "step": 31863 }, { "epoch": 0.41, "grad_norm": 3.87680721282959, "learning_rate": 1.9718919503199286e-05, "loss": 2.583, "step": 31864 }, { "epoch": 0.41, "grad_norm": 3.8396105766296387, "learning_rate": 1.9718894764079913e-05, "loss": 1.9932, "step": 31865 }, { "epoch": 0.41, "grad_norm": 3.907759189605713, "learning_rate": 1.971887002388741e-05, "loss": 2.3401, "step": 31866 }, { "epoch": 0.41, "grad_norm": 5.115632057189941, "learning_rate": 1.971884528262178e-05, "loss": 2.5971, "step": 31867 }, { "epoch": 0.41, "grad_norm": 3.660675287246704, "learning_rate": 1.9718820540283025e-05, "loss": 1.9866, "step": 31868 }, { "epoch": 0.41, "grad_norm": 3.763916015625, "learning_rate": 1.9718795796871146e-05, "loss": 2.0432, "step": 31869 }, { "epoch": 0.41, "grad_norm": 4.322510242462158, "learning_rate": 1.9718771052386144e-05, "loss": 2.4688, "step": 31870 }, { "epoch": 0.41, "grad_norm": 3.342864751815796, "learning_rate": 1.9718746306828033e-05, "loss": 1.999, "step": 31871 }, { "epoch": 0.41, "grad_norm": 3.829655647277832, "learning_rate": 1.97187215601968e-05, "loss": 1.7862, "step": 31872 }, { "epoch": 0.41, "grad_norm": 3.2656326293945312, "learning_rate": 1.971869681249246e-05, "loss": 1.8852, "step": 31873 }, { "epoch": 0.41, "grad_norm": 4.004089832305908, "learning_rate": 1.9718672063715006e-05, "loss": 2.3167, "step": 31874 }, { "epoch": 0.41, "grad_norm": 3.606322765350342, "learning_rate": 1.971864731386445e-05, "loss": 2.1992, "step": 31875 }, { "epoch": 0.41, "grad_norm": 3.593904733657837, "learning_rate": 1.971862256294079e-05, "loss": 1.6605, "step": 31876 }, { "epoch": 0.41, "grad_norm": 3.580589532852173, "learning_rate": 1.9718597810944024e-05, "loss": 2.0065, "step": 31877 }, { "epoch": 0.41, "grad_norm": 4.565146446228027, "learning_rate": 1.9718573057874164e-05, "loss": 2.7547, "step": 31878 }, { "epoch": 0.41, "grad_norm": 3.6196436882019043, "learning_rate": 1.971854830373121e-05, "loss": 1.7609, "step": 31879 }, { "epoch": 0.41, "grad_norm": 3.2205867767333984, "learning_rate": 1.971852354851516e-05, "loss": 1.4697, "step": 31880 }, { "epoch": 0.41, "grad_norm": 3.5622522830963135, "learning_rate": 1.9718498792226025e-05, "loss": 1.72, "step": 31881 }, { "epoch": 0.41, "grad_norm": 3.450068473815918, "learning_rate": 1.9718474034863798e-05, "loss": 1.6169, "step": 31882 }, { "epoch": 0.41, "grad_norm": 3.692629814147949, "learning_rate": 1.971844927642849e-05, "loss": 1.9835, "step": 31883 }, { "epoch": 0.41, "grad_norm": 4.489560604095459, "learning_rate": 1.9718424516920098e-05, "loss": 1.8907, "step": 31884 }, { "epoch": 0.41, "grad_norm": 4.18833065032959, "learning_rate": 1.9718399756338627e-05, "loss": 2.346, "step": 31885 }, { "epoch": 0.41, "grad_norm": 3.6011159420013428, "learning_rate": 1.971837499468408e-05, "loss": 1.7555, "step": 31886 }, { "epoch": 0.41, "grad_norm": 4.1947245597839355, "learning_rate": 1.971835023195646e-05, "loss": 2.3533, "step": 31887 }, { "epoch": 0.41, "grad_norm": 4.66467809677124, "learning_rate": 1.971832546815577e-05, "loss": 2.3284, "step": 31888 }, { "epoch": 0.41, "grad_norm": 4.097939968109131, "learning_rate": 1.971830070328201e-05, "loss": 2.0828, "step": 31889 }, { "epoch": 0.41, "grad_norm": 3.5092039108276367, "learning_rate": 1.9718275937335186e-05, "loss": 1.7481, "step": 31890 }, { "epoch": 0.41, "grad_norm": 3.9354395866394043, "learning_rate": 1.9718251170315303e-05, "loss": 2.3652, "step": 31891 }, { "epoch": 0.41, "grad_norm": 3.570617198944092, "learning_rate": 1.9718226402222355e-05, "loss": 2.0305, "step": 31892 }, { "epoch": 0.41, "grad_norm": 4.0948710441589355, "learning_rate": 1.9718201633056355e-05, "loss": 1.8461, "step": 31893 }, { "epoch": 0.41, "grad_norm": 3.9473369121551514, "learning_rate": 1.9718176862817296e-05, "loss": 2.2575, "step": 31894 }, { "epoch": 0.41, "grad_norm": 4.047644138336182, "learning_rate": 1.971815209150519e-05, "loss": 2.3582, "step": 31895 }, { "epoch": 0.41, "grad_norm": 3.6971421241760254, "learning_rate": 1.971812731912003e-05, "loss": 2.32, "step": 31896 }, { "epoch": 0.41, "grad_norm": 3.739611864089966, "learning_rate": 1.9718102545661822e-05, "loss": 2.1749, "step": 31897 }, { "epoch": 0.41, "grad_norm": 3.3352668285369873, "learning_rate": 1.9718077771130576e-05, "loss": 1.6575, "step": 31898 }, { "epoch": 0.41, "grad_norm": 4.034961700439453, "learning_rate": 1.971805299552629e-05, "loss": 1.9178, "step": 31899 }, { "epoch": 0.41, "grad_norm": 3.920175552368164, "learning_rate": 1.9718028218848963e-05, "loss": 2.2885, "step": 31900 }, { "epoch": 0.41, "grad_norm": 3.874840259552002, "learning_rate": 1.9718003441098602e-05, "loss": 2.0907, "step": 31901 }, { "epoch": 0.41, "grad_norm": 3.224606513977051, "learning_rate": 1.9717978662275207e-05, "loss": 1.5763, "step": 31902 }, { "epoch": 0.41, "grad_norm": 3.9931745529174805, "learning_rate": 1.9717953882378783e-05, "loss": 1.9841, "step": 31903 }, { "epoch": 0.41, "grad_norm": 3.4872331619262695, "learning_rate": 1.971792910140933e-05, "loss": 1.6809, "step": 31904 }, { "epoch": 0.41, "grad_norm": 3.5294058322906494, "learning_rate": 1.9717904319366858e-05, "loss": 1.6582, "step": 31905 }, { "epoch": 0.41, "grad_norm": 3.595762252807617, "learning_rate": 1.9717879536251364e-05, "loss": 1.7464, "step": 31906 }, { "epoch": 0.41, "grad_norm": 4.2938385009765625, "learning_rate": 1.9717854752062848e-05, "loss": 2.3531, "step": 31907 }, { "epoch": 0.41, "grad_norm": 3.925408124923706, "learning_rate": 1.9717829966801318e-05, "loss": 1.8759, "step": 31908 }, { "epoch": 0.41, "grad_norm": 4.146952152252197, "learning_rate": 1.9717805180466773e-05, "loss": 2.2477, "step": 31909 }, { "epoch": 0.41, "grad_norm": 3.2295660972595215, "learning_rate": 1.9717780393059217e-05, "loss": 1.3791, "step": 31910 }, { "epoch": 0.41, "grad_norm": 4.0682268142700195, "learning_rate": 1.9717755604578653e-05, "loss": 1.9772, "step": 31911 }, { "epoch": 0.41, "grad_norm": 3.8261401653289795, "learning_rate": 1.9717730815025088e-05, "loss": 1.9762, "step": 31912 }, { "epoch": 0.41, "grad_norm": 3.680281162261963, "learning_rate": 1.971770602439852e-05, "loss": 1.9958, "step": 31913 }, { "epoch": 0.41, "grad_norm": 3.6791305541992188, "learning_rate": 1.9717681232698945e-05, "loss": 2.0943, "step": 31914 }, { "epoch": 0.41, "grad_norm": 4.028566837310791, "learning_rate": 1.971765643992638e-05, "loss": 2.4769, "step": 31915 }, { "epoch": 0.41, "grad_norm": 3.668980836868286, "learning_rate": 1.971763164608082e-05, "loss": 2.2388, "step": 31916 }, { "epoch": 0.41, "grad_norm": 3.8255887031555176, "learning_rate": 1.971760685116227e-05, "loss": 2.0056, "step": 31917 }, { "epoch": 0.41, "grad_norm": 3.738558769226074, "learning_rate": 1.971758205517073e-05, "loss": 1.8579, "step": 31918 }, { "epoch": 0.41, "grad_norm": 4.302104949951172, "learning_rate": 1.97175572581062e-05, "loss": 2.0794, "step": 31919 }, { "epoch": 0.41, "grad_norm": 4.2267069816589355, "learning_rate": 1.971753245996869e-05, "loss": 2.0552, "step": 31920 }, { "epoch": 0.41, "grad_norm": 3.4033403396606445, "learning_rate": 1.97175076607582e-05, "loss": 1.991, "step": 31921 }, { "epoch": 0.41, "grad_norm": 3.564809560775757, "learning_rate": 1.9717482860474733e-05, "loss": 1.9255, "step": 31922 }, { "epoch": 0.41, "grad_norm": 3.5054235458374023, "learning_rate": 1.971745805911829e-05, "loss": 1.7021, "step": 31923 }, { "epoch": 0.41, "grad_norm": 3.1521973609924316, "learning_rate": 1.9717433256688875e-05, "loss": 1.6277, "step": 31924 }, { "epoch": 0.41, "grad_norm": 2.9905123710632324, "learning_rate": 1.971740845318649e-05, "loss": 1.3655, "step": 31925 }, { "epoch": 0.41, "grad_norm": 4.202216148376465, "learning_rate": 1.9717383648611138e-05, "loss": 2.4063, "step": 31926 }, { "epoch": 0.41, "grad_norm": 3.394606351852417, "learning_rate": 1.9717358842962822e-05, "loss": 1.7472, "step": 31927 }, { "epoch": 0.41, "grad_norm": 3.367197036743164, "learning_rate": 1.9717334036241546e-05, "loss": 1.6179, "step": 31928 }, { "epoch": 0.41, "grad_norm": 3.6293745040893555, "learning_rate": 1.971730922844731e-05, "loss": 1.7668, "step": 31929 }, { "epoch": 0.41, "grad_norm": 3.851198196411133, "learning_rate": 1.971728441958012e-05, "loss": 2.2066, "step": 31930 }, { "epoch": 0.41, "grad_norm": 3.123305559158325, "learning_rate": 1.9717259609639976e-05, "loss": 1.2845, "step": 31931 }, { "epoch": 0.41, "grad_norm": 3.9118435382843018, "learning_rate": 1.9717234798626882e-05, "loss": 1.9277, "step": 31932 }, { "epoch": 0.41, "grad_norm": 3.745307207107544, "learning_rate": 1.971720998654084e-05, "loss": 1.8521, "step": 31933 }, { "epoch": 0.41, "grad_norm": 3.8030335903167725, "learning_rate": 1.971718517338185e-05, "loss": 2.1036, "step": 31934 }, { "epoch": 0.41, "grad_norm": 3.566173791885376, "learning_rate": 1.9717160359149924e-05, "loss": 1.8299, "step": 31935 }, { "epoch": 0.41, "grad_norm": 4.433431148529053, "learning_rate": 1.9717135543845053e-05, "loss": 2.0943, "step": 31936 }, { "epoch": 0.41, "grad_norm": 3.686967372894287, "learning_rate": 1.971711072746725e-05, "loss": 1.7122, "step": 31937 }, { "epoch": 0.41, "grad_norm": 3.953347682952881, "learning_rate": 1.971708591001651e-05, "loss": 2.0222, "step": 31938 }, { "epoch": 0.41, "grad_norm": 3.875910758972168, "learning_rate": 1.971706109149284e-05, "loss": 2.0134, "step": 31939 }, { "epoch": 0.41, "grad_norm": 3.7308661937713623, "learning_rate": 1.971703627189624e-05, "loss": 1.8428, "step": 31940 }, { "epoch": 0.41, "grad_norm": 4.038645267486572, "learning_rate": 1.9717011451226717e-05, "loss": 2.467, "step": 31941 }, { "epoch": 0.41, "grad_norm": 4.048806667327881, "learning_rate": 1.971698662948427e-05, "loss": 2.1663, "step": 31942 }, { "epoch": 0.41, "grad_norm": 3.993783950805664, "learning_rate": 1.9716961806668904e-05, "loss": 2.2833, "step": 31943 }, { "epoch": 0.41, "grad_norm": 3.399059772491455, "learning_rate": 1.971693698278062e-05, "loss": 1.5893, "step": 31944 }, { "epoch": 0.41, "grad_norm": 4.128622055053711, "learning_rate": 1.971691215781942e-05, "loss": 2.017, "step": 31945 }, { "epoch": 0.41, "grad_norm": 3.9032130241394043, "learning_rate": 1.9716887331785308e-05, "loss": 2.3292, "step": 31946 }, { "epoch": 0.41, "grad_norm": 4.193352222442627, "learning_rate": 1.971686250467829e-05, "loss": 2.4582, "step": 31947 }, { "epoch": 0.41, "grad_norm": 3.6779792308807373, "learning_rate": 1.9716837676498363e-05, "loss": 1.8507, "step": 31948 }, { "epoch": 0.41, "grad_norm": 4.123407363891602, "learning_rate": 1.9716812847245532e-05, "loss": 1.7165, "step": 31949 }, { "epoch": 0.41, "grad_norm": 3.040741443634033, "learning_rate": 1.97167880169198e-05, "loss": 1.5374, "step": 31950 }, { "epoch": 0.41, "grad_norm": 3.411670207977295, "learning_rate": 1.971676318552117e-05, "loss": 1.9323, "step": 31951 }, { "epoch": 0.41, "grad_norm": 3.7111732959747314, "learning_rate": 1.9716738353049646e-05, "loss": 1.9542, "step": 31952 }, { "epoch": 0.41, "grad_norm": 3.7498745918273926, "learning_rate": 1.9716713519505228e-05, "loss": 1.589, "step": 31953 }, { "epoch": 0.41, "grad_norm": 4.053786277770996, "learning_rate": 1.9716688684887918e-05, "loss": 2.047, "step": 31954 }, { "epoch": 0.41, "grad_norm": 3.8349099159240723, "learning_rate": 1.9716663849197722e-05, "loss": 2.0119, "step": 31955 }, { "epoch": 0.41, "grad_norm": 3.7949106693267822, "learning_rate": 1.9716639012434644e-05, "loss": 1.8834, "step": 31956 }, { "epoch": 0.41, "grad_norm": 3.603144884109497, "learning_rate": 1.9716614174598683e-05, "loss": 2.2014, "step": 31957 }, { "epoch": 0.41, "grad_norm": 3.904254913330078, "learning_rate": 1.9716589335689843e-05, "loss": 2.2448, "step": 31958 }, { "epoch": 0.41, "grad_norm": 3.7235047817230225, "learning_rate": 1.9716564495708125e-05, "loss": 2.1135, "step": 31959 }, { "epoch": 0.41, "grad_norm": 3.7017154693603516, "learning_rate": 1.9716539654653533e-05, "loss": 1.5439, "step": 31960 }, { "epoch": 0.41, "grad_norm": 4.2194342613220215, "learning_rate": 1.9716514812526074e-05, "loss": 2.2989, "step": 31961 }, { "epoch": 0.41, "grad_norm": 3.71323823928833, "learning_rate": 1.971648996932574e-05, "loss": 1.7192, "step": 31962 }, { "epoch": 0.41, "grad_norm": 3.606431484222412, "learning_rate": 1.9716465125052547e-05, "loss": 2.0168, "step": 31963 }, { "epoch": 0.41, "grad_norm": 4.5053277015686035, "learning_rate": 1.971644027970649e-05, "loss": 2.6096, "step": 31964 }, { "epoch": 0.41, "grad_norm": 3.4064836502075195, "learning_rate": 1.9716415433287572e-05, "loss": 1.7472, "step": 31965 }, { "epoch": 0.41, "grad_norm": 3.9152987003326416, "learning_rate": 1.97163905857958e-05, "loss": 2.2061, "step": 31966 }, { "epoch": 0.41, "grad_norm": 4.354150295257568, "learning_rate": 1.971636573723117e-05, "loss": 2.203, "step": 31967 }, { "epoch": 0.41, "grad_norm": 4.050266742706299, "learning_rate": 1.971634088759369e-05, "loss": 1.9184, "step": 31968 }, { "epoch": 0.41, "grad_norm": 3.8136327266693115, "learning_rate": 1.9716316036883362e-05, "loss": 1.7397, "step": 31969 }, { "epoch": 0.41, "grad_norm": 3.5571610927581787, "learning_rate": 1.9716291185100184e-05, "loss": 1.9144, "step": 31970 }, { "epoch": 0.41, "grad_norm": 4.141342639923096, "learning_rate": 1.9716266332244167e-05, "loss": 1.9671, "step": 31971 }, { "epoch": 0.41, "grad_norm": 4.229555130004883, "learning_rate": 1.9716241478315306e-05, "loss": 2.1785, "step": 31972 }, { "epoch": 0.41, "grad_norm": 3.8377678394317627, "learning_rate": 1.971621662331361e-05, "loss": 1.8892, "step": 31973 }, { "epoch": 0.41, "grad_norm": 3.237973690032959, "learning_rate": 1.9716191767239077e-05, "loss": 1.534, "step": 31974 }, { "epoch": 0.41, "grad_norm": 4.2836761474609375, "learning_rate": 1.971616691009171e-05, "loss": 2.4454, "step": 31975 }, { "epoch": 0.41, "grad_norm": 3.6969964504241943, "learning_rate": 1.9716142051871516e-05, "loss": 1.8537, "step": 31976 }, { "epoch": 0.41, "grad_norm": 3.5734593868255615, "learning_rate": 1.9716117192578496e-05, "loss": 1.584, "step": 31977 }, { "epoch": 0.42, "grad_norm": 4.48444938659668, "learning_rate": 1.971609233221265e-05, "loss": 2.2874, "step": 31978 }, { "epoch": 0.42, "grad_norm": 4.105861663818359, "learning_rate": 1.9716067470773982e-05, "loss": 2.2607, "step": 31979 }, { "epoch": 0.42, "grad_norm": 3.892226219177246, "learning_rate": 1.9716042608262498e-05, "loss": 1.9761, "step": 31980 }, { "epoch": 0.42, "grad_norm": 3.5286624431610107, "learning_rate": 1.9716017744678196e-05, "loss": 1.7643, "step": 31981 }, { "epoch": 0.42, "grad_norm": 3.568704605102539, "learning_rate": 1.971599288002108e-05, "loss": 1.9895, "step": 31982 }, { "epoch": 0.42, "grad_norm": 3.58685302734375, "learning_rate": 1.9715968014291155e-05, "loss": 1.8075, "step": 31983 }, { "epoch": 0.42, "grad_norm": 3.448195695877075, "learning_rate": 1.9715943147488422e-05, "loss": 1.8207, "step": 31984 }, { "epoch": 0.42, "grad_norm": 4.06796407699585, "learning_rate": 1.9715918279612885e-05, "loss": 2.424, "step": 31985 }, { "epoch": 0.42, "grad_norm": 3.9542529582977295, "learning_rate": 1.9715893410664547e-05, "loss": 1.6218, "step": 31986 }, { "epoch": 0.42, "grad_norm": 3.5045857429504395, "learning_rate": 1.9715868540643405e-05, "loss": 1.9671, "step": 31987 }, { "epoch": 0.42, "grad_norm": 3.730740547180176, "learning_rate": 1.9715843669549468e-05, "loss": 1.7644, "step": 31988 }, { "epoch": 0.42, "grad_norm": 3.4826459884643555, "learning_rate": 1.971581879738274e-05, "loss": 2.0807, "step": 31989 }, { "epoch": 0.42, "grad_norm": 3.812163829803467, "learning_rate": 1.9715793924143218e-05, "loss": 2.08, "step": 31990 }, { "epoch": 0.42, "grad_norm": 3.6547086238861084, "learning_rate": 1.9715769049830905e-05, "loss": 2.1419, "step": 31991 }, { "epoch": 0.42, "grad_norm": 3.775264024734497, "learning_rate": 1.971574417444581e-05, "loss": 1.9739, "step": 31992 }, { "epoch": 0.42, "grad_norm": 4.113462924957275, "learning_rate": 1.9715719297987933e-05, "loss": 2.1974, "step": 31993 }, { "epoch": 0.42, "grad_norm": 4.338466167449951, "learning_rate": 1.9715694420457274e-05, "loss": 1.8409, "step": 31994 }, { "epoch": 0.42, "grad_norm": 4.141585350036621, "learning_rate": 1.9715669541853837e-05, "loss": 2.21, "step": 31995 }, { "epoch": 0.42, "grad_norm": 4.093122959136963, "learning_rate": 1.9715644662177627e-05, "loss": 2.2386, "step": 31996 }, { "epoch": 0.42, "grad_norm": 3.851637363433838, "learning_rate": 1.971561978142864e-05, "loss": 1.9183, "step": 31997 }, { "epoch": 0.42, "grad_norm": 3.843430519104004, "learning_rate": 1.9715594899606888e-05, "loss": 1.7652, "step": 31998 }, { "epoch": 0.42, "grad_norm": 3.3795790672302246, "learning_rate": 1.971557001671237e-05, "loss": 1.7997, "step": 31999 }, { "epoch": 0.42, "grad_norm": 3.7565786838531494, "learning_rate": 1.9715545132745087e-05, "loss": 2.1076, "step": 32000 }, { "epoch": 0.42, "grad_norm": 3.811704158782959, "learning_rate": 1.9715520247705044e-05, "loss": 2.0395, "step": 32001 }, { "epoch": 0.42, "grad_norm": 3.9278032779693604, "learning_rate": 1.9715495361592244e-05, "loss": 1.8846, "step": 32002 }, { "epoch": 0.42, "grad_norm": 3.6516549587249756, "learning_rate": 1.9715470474406683e-05, "loss": 1.9422, "step": 32003 }, { "epoch": 0.42, "grad_norm": 3.813056468963623, "learning_rate": 1.9715445586148372e-05, "loss": 2.0085, "step": 32004 }, { "epoch": 0.42, "grad_norm": 3.9265081882476807, "learning_rate": 1.9715420696817315e-05, "loss": 1.9811, "step": 32005 }, { "epoch": 0.42, "grad_norm": 3.3926525115966797, "learning_rate": 1.9715395806413507e-05, "loss": 1.6533, "step": 32006 }, { "epoch": 0.42, "grad_norm": 3.931490898132324, "learning_rate": 1.9715370914936952e-05, "loss": 1.9872, "step": 32007 }, { "epoch": 0.42, "grad_norm": 3.9574108123779297, "learning_rate": 1.971534602238766e-05, "loss": 2.2188, "step": 32008 }, { "epoch": 0.42, "grad_norm": 3.8087081909179688, "learning_rate": 1.971532112876563e-05, "loss": 2.2856, "step": 32009 }, { "epoch": 0.42, "grad_norm": 4.2056074142456055, "learning_rate": 1.9715296234070856e-05, "loss": 2.3095, "step": 32010 }, { "epoch": 0.42, "grad_norm": 3.956871747970581, "learning_rate": 1.9715271338303355e-05, "loss": 1.7785, "step": 32011 }, { "epoch": 0.42, "grad_norm": 4.156259059906006, "learning_rate": 1.971524644146312e-05, "loss": 1.8896, "step": 32012 }, { "epoch": 0.42, "grad_norm": 3.724622964859009, "learning_rate": 1.971522154355016e-05, "loss": 1.6018, "step": 32013 }, { "epoch": 0.42, "grad_norm": 3.512291669845581, "learning_rate": 1.9715196644564473e-05, "loss": 1.7043, "step": 32014 }, { "epoch": 0.42, "grad_norm": 3.4511897563934326, "learning_rate": 1.9715171744506065e-05, "loss": 1.9905, "step": 32015 }, { "epoch": 0.42, "grad_norm": 3.6075046062469482, "learning_rate": 1.9715146843374933e-05, "loss": 1.6756, "step": 32016 }, { "epoch": 0.42, "grad_norm": 4.118983745574951, "learning_rate": 1.971512194117109e-05, "loss": 2.4668, "step": 32017 }, { "epoch": 0.42, "grad_norm": 3.8610517978668213, "learning_rate": 1.9715097037894528e-05, "loss": 2.0631, "step": 32018 }, { "epoch": 0.42, "grad_norm": 3.665250539779663, "learning_rate": 1.9715072133545258e-05, "loss": 1.895, "step": 32019 }, { "epoch": 0.42, "grad_norm": 3.754967212677002, "learning_rate": 1.9715047228123275e-05, "loss": 1.813, "step": 32020 }, { "epoch": 0.42, "grad_norm": 3.530153274536133, "learning_rate": 1.971502232162859e-05, "loss": 1.7494, "step": 32021 }, { "epoch": 0.42, "grad_norm": 3.6394827365875244, "learning_rate": 1.97149974140612e-05, "loss": 1.8774, "step": 32022 }, { "epoch": 0.42, "grad_norm": 3.6620171070098877, "learning_rate": 1.9714972505421107e-05, "loss": 2.0527, "step": 32023 }, { "epoch": 0.42, "grad_norm": 4.424489974975586, "learning_rate": 1.971494759570832e-05, "loss": 2.479, "step": 32024 }, { "epoch": 0.42, "grad_norm": 3.9246444702148438, "learning_rate": 1.971492268492284e-05, "loss": 1.9438, "step": 32025 }, { "epoch": 0.42, "grad_norm": 3.6562838554382324, "learning_rate": 1.9714897773064662e-05, "loss": 2.0398, "step": 32026 }, { "epoch": 0.42, "grad_norm": 4.298302173614502, "learning_rate": 1.9714872860133796e-05, "loss": 2.0816, "step": 32027 }, { "epoch": 0.42, "grad_norm": 4.194134712219238, "learning_rate": 1.9714847946130244e-05, "loss": 2.4494, "step": 32028 }, { "epoch": 0.42, "grad_norm": 3.9764277935028076, "learning_rate": 1.9714823031054006e-05, "loss": 2.312, "step": 32029 }, { "epoch": 0.42, "grad_norm": 4.018149375915527, "learning_rate": 1.971479811490509e-05, "loss": 2.2168, "step": 32030 }, { "epoch": 0.42, "grad_norm": 4.013644695281982, "learning_rate": 1.9714773197683496e-05, "loss": 2.0321, "step": 32031 }, { "epoch": 0.42, "grad_norm": 3.2892982959747314, "learning_rate": 1.971474827938922e-05, "loss": 1.8037, "step": 32032 }, { "epoch": 0.42, "grad_norm": 3.8806581497192383, "learning_rate": 1.971472336002228e-05, "loss": 1.6237, "step": 32033 }, { "epoch": 0.42, "grad_norm": 4.184743404388428, "learning_rate": 1.9714698439582663e-05, "loss": 1.4066, "step": 32034 }, { "epoch": 0.42, "grad_norm": 4.056232452392578, "learning_rate": 1.971467351807038e-05, "loss": 1.8143, "step": 32035 }, { "epoch": 0.42, "grad_norm": 3.838336229324341, "learning_rate": 1.9714648595485433e-05, "loss": 2.0612, "step": 32036 }, { "epoch": 0.42, "grad_norm": 3.8079872131347656, "learning_rate": 1.9714623671827823e-05, "loss": 1.6862, "step": 32037 }, { "epoch": 0.42, "grad_norm": 4.0211591720581055, "learning_rate": 1.9714598747097554e-05, "loss": 2.2405, "step": 32038 }, { "epoch": 0.42, "grad_norm": 4.19336462020874, "learning_rate": 1.971457382129463e-05, "loss": 2.7247, "step": 32039 }, { "epoch": 0.42, "grad_norm": 3.89077091217041, "learning_rate": 1.971454889441905e-05, "loss": 2.0762, "step": 32040 }, { "epoch": 0.42, "grad_norm": 3.1893177032470703, "learning_rate": 1.9714523966470822e-05, "loss": 1.6072, "step": 32041 }, { "epoch": 0.42, "grad_norm": 3.5689778327941895, "learning_rate": 1.9714499037449942e-05, "loss": 1.7077, "step": 32042 }, { "epoch": 0.42, "grad_norm": 3.8174569606781006, "learning_rate": 1.971447410735642e-05, "loss": 2.086, "step": 32043 }, { "epoch": 0.42, "grad_norm": 3.3444418907165527, "learning_rate": 1.9714449176190252e-05, "loss": 1.7256, "step": 32044 }, { "epoch": 0.42, "grad_norm": 3.7633750438690186, "learning_rate": 1.9714424243951446e-05, "loss": 1.8997, "step": 32045 }, { "epoch": 0.42, "grad_norm": 3.8886330127716064, "learning_rate": 1.9714399310640005e-05, "loss": 2.1877, "step": 32046 }, { "epoch": 0.42, "grad_norm": 3.476447820663452, "learning_rate": 1.9714374376255926e-05, "loss": 1.6703, "step": 32047 }, { "epoch": 0.42, "grad_norm": 3.4770750999450684, "learning_rate": 1.9714349440799218e-05, "loss": 1.7809, "step": 32048 }, { "epoch": 0.42, "grad_norm": 4.04121208190918, "learning_rate": 1.971432450426988e-05, "loss": 1.8292, "step": 32049 }, { "epoch": 0.42, "grad_norm": 3.8961269855499268, "learning_rate": 1.9714299566667914e-05, "loss": 2.1189, "step": 32050 }, { "epoch": 0.42, "grad_norm": 3.7842206954956055, "learning_rate": 1.9714274627993325e-05, "loss": 1.9096, "step": 32051 }, { "epoch": 0.42, "grad_norm": 4.767352104187012, "learning_rate": 1.9714249688246117e-05, "loss": 2.6044, "step": 32052 }, { "epoch": 0.42, "grad_norm": 3.491872549057007, "learning_rate": 1.9714224747426288e-05, "loss": 2.2528, "step": 32053 }, { "epoch": 0.42, "grad_norm": 3.7445573806762695, "learning_rate": 1.9714199805533844e-05, "loss": 2.0521, "step": 32054 }, { "epoch": 0.42, "grad_norm": 4.246824264526367, "learning_rate": 1.9714174862568793e-05, "loss": 1.9413, "step": 32055 }, { "epoch": 0.42, "grad_norm": 3.6435840129852295, "learning_rate": 1.9714149918531126e-05, "loss": 2.1394, "step": 32056 }, { "epoch": 0.42, "grad_norm": 4.285229682922363, "learning_rate": 1.9714124973420855e-05, "loss": 2.3498, "step": 32057 }, { "epoch": 0.42, "grad_norm": 4.47321891784668, "learning_rate": 1.971410002723798e-05, "loss": 2.6958, "step": 32058 }, { "epoch": 0.42, "grad_norm": 3.3805313110351562, "learning_rate": 1.9714075079982502e-05, "loss": 1.9633, "step": 32059 }, { "epoch": 0.42, "grad_norm": 3.648375988006592, "learning_rate": 1.9714050131654424e-05, "loss": 1.5178, "step": 32060 }, { "epoch": 0.42, "grad_norm": 3.818249464035034, "learning_rate": 1.9714025182253755e-05, "loss": 2.0333, "step": 32061 }, { "epoch": 0.42, "grad_norm": 3.8429248332977295, "learning_rate": 1.971400023178049e-05, "loss": 2.1403, "step": 32062 }, { "epoch": 0.42, "grad_norm": 3.4328696727752686, "learning_rate": 1.9713975280234633e-05, "loss": 1.6417, "step": 32063 }, { "epoch": 0.42, "grad_norm": 4.4480695724487305, "learning_rate": 1.971395032761619e-05, "loss": 2.1896, "step": 32064 }, { "epoch": 0.42, "grad_norm": 3.85341739654541, "learning_rate": 1.9713925373925164e-05, "loss": 2.0381, "step": 32065 }, { "epoch": 0.42, "grad_norm": 3.34352445602417, "learning_rate": 1.971390041916155e-05, "loss": 1.8261, "step": 32066 }, { "epoch": 0.42, "grad_norm": 3.94559645652771, "learning_rate": 1.971387546332536e-05, "loss": 1.941, "step": 32067 }, { "epoch": 0.42, "grad_norm": 4.2192182540893555, "learning_rate": 1.9713850506416595e-05, "loss": 1.9902, "step": 32068 }, { "epoch": 0.42, "grad_norm": 3.6133391857147217, "learning_rate": 1.9713825548435255e-05, "loss": 1.8856, "step": 32069 }, { "epoch": 0.42, "grad_norm": 3.921470880508423, "learning_rate": 1.9713800589381345e-05, "loss": 2.0913, "step": 32070 }, { "epoch": 0.42, "grad_norm": 3.6809678077697754, "learning_rate": 1.9713775629254867e-05, "loss": 2.0943, "step": 32071 }, { "epoch": 0.42, "grad_norm": 4.315109729766846, "learning_rate": 1.971375066805582e-05, "loss": 2.2464, "step": 32072 }, { "epoch": 0.42, "grad_norm": 3.946763515472412, "learning_rate": 1.9713725705784212e-05, "loss": 2.0253, "step": 32073 }, { "epoch": 0.42, "grad_norm": 4.304515361785889, "learning_rate": 1.9713700742440046e-05, "loss": 2.12, "step": 32074 }, { "epoch": 0.42, "grad_norm": 3.753966808319092, "learning_rate": 1.971367577802332e-05, "loss": 1.8131, "step": 32075 }, { "epoch": 0.42, "grad_norm": 4.169070243835449, "learning_rate": 1.971365081253404e-05, "loss": 2.4207, "step": 32076 }, { "epoch": 0.42, "grad_norm": 3.6725897789001465, "learning_rate": 1.9713625845972207e-05, "loss": 2.0046, "step": 32077 }, { "epoch": 0.42, "grad_norm": 4.126211166381836, "learning_rate": 1.9713600878337825e-05, "loss": 2.5009, "step": 32078 }, { "epoch": 0.42, "grad_norm": 4.15553092956543, "learning_rate": 1.97135759096309e-05, "loss": 1.7648, "step": 32079 }, { "epoch": 0.42, "grad_norm": 3.6695704460144043, "learning_rate": 1.9713550939851428e-05, "loss": 1.8955, "step": 32080 }, { "epoch": 0.42, "grad_norm": 3.625436544418335, "learning_rate": 1.9713525968999415e-05, "loss": 1.4933, "step": 32081 }, { "epoch": 0.42, "grad_norm": 3.7212493419647217, "learning_rate": 1.9713500997074866e-05, "loss": 2.1596, "step": 32082 }, { "epoch": 0.42, "grad_norm": 4.137294769287109, "learning_rate": 1.9713476024077784e-05, "loss": 2.4847, "step": 32083 }, { "epoch": 0.42, "grad_norm": 4.847923755645752, "learning_rate": 1.9713451050008164e-05, "loss": 2.9298, "step": 32084 }, { "epoch": 0.42, "grad_norm": 4.208766460418701, "learning_rate": 1.9713426074866014e-05, "loss": 2.1982, "step": 32085 }, { "epoch": 0.42, "grad_norm": 4.109001159667969, "learning_rate": 1.971340109865134e-05, "loss": 1.9261, "step": 32086 }, { "epoch": 0.42, "grad_norm": 3.849830389022827, "learning_rate": 1.9713376121364142e-05, "loss": 1.6291, "step": 32087 }, { "epoch": 0.42, "grad_norm": 3.803104877471924, "learning_rate": 1.9713351143004423e-05, "loss": 2.2248, "step": 32088 }, { "epoch": 0.42, "grad_norm": 4.148102283477783, "learning_rate": 1.9713326163572183e-05, "loss": 1.9743, "step": 32089 }, { "epoch": 0.42, "grad_norm": 3.682339668273926, "learning_rate": 1.9713301183067428e-05, "loss": 1.6425, "step": 32090 }, { "epoch": 0.42, "grad_norm": 3.856462001800537, "learning_rate": 1.971327620149016e-05, "loss": 2.1578, "step": 32091 }, { "epoch": 0.42, "grad_norm": 4.981653690338135, "learning_rate": 1.971325121884038e-05, "loss": 2.4884, "step": 32092 }, { "epoch": 0.42, "grad_norm": 3.860870838165283, "learning_rate": 1.9713226235118095e-05, "loss": 2.0631, "step": 32093 }, { "epoch": 0.42, "grad_norm": 4.067741870880127, "learning_rate": 1.9713201250323304e-05, "loss": 1.9525, "step": 32094 }, { "epoch": 0.42, "grad_norm": 4.069244384765625, "learning_rate": 1.971317626445601e-05, "loss": 2.2164, "step": 32095 }, { "epoch": 0.42, "grad_norm": 3.7226338386535645, "learning_rate": 1.9713151277516216e-05, "loss": 2.0315, "step": 32096 }, { "epoch": 0.42, "grad_norm": 3.6518490314483643, "learning_rate": 1.9713126289503925e-05, "loss": 2.2255, "step": 32097 }, { "epoch": 0.42, "grad_norm": 4.048830986022949, "learning_rate": 1.9713101300419143e-05, "loss": 1.8271, "step": 32098 }, { "epoch": 0.42, "grad_norm": 3.7599282264709473, "learning_rate": 1.9713076310261867e-05, "loss": 1.9666, "step": 32099 }, { "epoch": 0.42, "grad_norm": 3.984769344329834, "learning_rate": 1.9713051319032105e-05, "loss": 2.1466, "step": 32100 }, { "epoch": 0.42, "grad_norm": 4.078313827514648, "learning_rate": 1.9713026326729854e-05, "loss": 2.08, "step": 32101 }, { "epoch": 0.42, "grad_norm": 3.522158145904541, "learning_rate": 1.9713001333355124e-05, "loss": 2.0341, "step": 32102 }, { "epoch": 0.42, "grad_norm": 3.809880018234253, "learning_rate": 1.971297633890791e-05, "loss": 1.8327, "step": 32103 }, { "epoch": 0.42, "grad_norm": 3.566274881362915, "learning_rate": 1.9712951343388222e-05, "loss": 1.8718, "step": 32104 }, { "epoch": 0.42, "grad_norm": 3.6744823455810547, "learning_rate": 1.971292634679606e-05, "loss": 2.0806, "step": 32105 }, { "epoch": 0.42, "grad_norm": 3.8466293811798096, "learning_rate": 1.9712901349131424e-05, "loss": 1.9878, "step": 32106 }, { "epoch": 0.42, "grad_norm": 3.8445613384246826, "learning_rate": 1.9712876350394318e-05, "loss": 2.0629, "step": 32107 }, { "epoch": 0.42, "grad_norm": 3.3921923637390137, "learning_rate": 1.9712851350584746e-05, "loss": 1.7412, "step": 32108 }, { "epoch": 0.42, "grad_norm": 3.76835560798645, "learning_rate": 1.9712826349702712e-05, "loss": 1.9735, "step": 32109 }, { "epoch": 0.42, "grad_norm": 3.951223134994507, "learning_rate": 1.9712801347748216e-05, "loss": 2.3981, "step": 32110 }, { "epoch": 0.42, "grad_norm": 3.74215030670166, "learning_rate": 1.9712776344721264e-05, "loss": 2.1289, "step": 32111 }, { "epoch": 0.42, "grad_norm": 4.344244956970215, "learning_rate": 1.9712751340621853e-05, "loss": 2.3266, "step": 32112 }, { "epoch": 0.42, "grad_norm": 3.8898417949676514, "learning_rate": 1.9712726335449994e-05, "loss": 1.9061, "step": 32113 }, { "epoch": 0.42, "grad_norm": 4.031326770782471, "learning_rate": 1.9712701329205682e-05, "loss": 2.2002, "step": 32114 }, { "epoch": 0.42, "grad_norm": 4.129400253295898, "learning_rate": 1.971267632188892e-05, "loss": 2.0875, "step": 32115 }, { "epoch": 0.42, "grad_norm": 3.4557411670684814, "learning_rate": 1.971265131349972e-05, "loss": 1.9708, "step": 32116 }, { "epoch": 0.42, "grad_norm": 3.7930335998535156, "learning_rate": 1.9712626304038075e-05, "loss": 2.0313, "step": 32117 }, { "epoch": 0.42, "grad_norm": 4.496776103973389, "learning_rate": 1.9712601293503995e-05, "loss": 2.3713, "step": 32118 }, { "epoch": 0.42, "grad_norm": 3.3910107612609863, "learning_rate": 1.9712576281897477e-05, "loss": 1.681, "step": 32119 }, { "epoch": 0.42, "grad_norm": 4.154165267944336, "learning_rate": 1.9712551269218524e-05, "loss": 2.8327, "step": 32120 }, { "epoch": 0.42, "grad_norm": 3.8229634761810303, "learning_rate": 1.971252625546714e-05, "loss": 1.9807, "step": 32121 }, { "epoch": 0.42, "grad_norm": 4.302330017089844, "learning_rate": 1.971250124064333e-05, "loss": 2.3648, "step": 32122 }, { "epoch": 0.42, "grad_norm": 3.6318154335021973, "learning_rate": 1.9712476224747096e-05, "loss": 1.5919, "step": 32123 }, { "epoch": 0.42, "grad_norm": 3.588008165359497, "learning_rate": 1.971245120777844e-05, "loss": 2.1461, "step": 32124 }, { "epoch": 0.42, "grad_norm": 3.6403920650482178, "learning_rate": 1.971242618973736e-05, "loss": 2.1271, "step": 32125 }, { "epoch": 0.42, "grad_norm": 3.556006908416748, "learning_rate": 1.9712401170623868e-05, "loss": 1.8942, "step": 32126 }, { "epoch": 0.42, "grad_norm": 3.3968002796173096, "learning_rate": 1.9712376150437963e-05, "loss": 1.6748, "step": 32127 }, { "epoch": 0.42, "grad_norm": 3.2613110542297363, "learning_rate": 1.9712351129179647e-05, "loss": 1.8102, "step": 32128 }, { "epoch": 0.42, "grad_norm": 4.084777355194092, "learning_rate": 1.971232610684892e-05, "loss": 1.773, "step": 32129 }, { "epoch": 0.42, "grad_norm": 4.571263313293457, "learning_rate": 1.9712301083445787e-05, "loss": 2.5869, "step": 32130 }, { "epoch": 0.42, "grad_norm": 3.7307119369506836, "learning_rate": 1.971227605897025e-05, "loss": 1.9902, "step": 32131 }, { "epoch": 0.42, "grad_norm": 3.8227343559265137, "learning_rate": 1.9712251033422314e-05, "loss": 2.1028, "step": 32132 }, { "epoch": 0.42, "grad_norm": 3.68076491355896, "learning_rate": 1.9712226006801985e-05, "loss": 1.9544, "step": 32133 }, { "epoch": 0.42, "grad_norm": 3.3551504611968994, "learning_rate": 1.9712200979109257e-05, "loss": 1.8926, "step": 32134 }, { "epoch": 0.42, "grad_norm": 4.0023512840271, "learning_rate": 1.971217595034414e-05, "loss": 2.0906, "step": 32135 }, { "epoch": 0.42, "grad_norm": 3.918025016784668, "learning_rate": 1.9712150920506632e-05, "loss": 1.9545, "step": 32136 }, { "epoch": 0.42, "grad_norm": 3.5280182361602783, "learning_rate": 1.9712125889596738e-05, "loss": 1.6052, "step": 32137 }, { "epoch": 0.42, "grad_norm": 3.4048640727996826, "learning_rate": 1.9712100857614463e-05, "loss": 2.0773, "step": 32138 }, { "epoch": 0.42, "grad_norm": 3.918292284011841, "learning_rate": 1.9712075824559803e-05, "loss": 2.3213, "step": 32139 }, { "epoch": 0.42, "grad_norm": 4.182982444763184, "learning_rate": 1.971205079043277e-05, "loss": 2.3079, "step": 32140 }, { "epoch": 0.42, "grad_norm": 4.484996795654297, "learning_rate": 1.9712025755233358e-05, "loss": 2.3314, "step": 32141 }, { "epoch": 0.42, "grad_norm": 3.179335355758667, "learning_rate": 1.9712000718961573e-05, "loss": 1.3101, "step": 32142 }, { "epoch": 0.42, "grad_norm": 4.406908988952637, "learning_rate": 1.971197568161742e-05, "loss": 2.246, "step": 32143 }, { "epoch": 0.42, "grad_norm": 3.663647174835205, "learning_rate": 1.9711950643200903e-05, "loss": 1.6504, "step": 32144 }, { "epoch": 0.42, "grad_norm": 3.7537386417388916, "learning_rate": 1.9711925603712015e-05, "loss": 2.1565, "step": 32145 }, { "epoch": 0.42, "grad_norm": 4.084019660949707, "learning_rate": 1.9711900563150773e-05, "loss": 2.3303, "step": 32146 }, { "epoch": 0.42, "grad_norm": 4.333469390869141, "learning_rate": 1.9711875521517168e-05, "loss": 2.0339, "step": 32147 }, { "epoch": 0.42, "grad_norm": 3.602644443511963, "learning_rate": 1.9711850478811208e-05, "loss": 1.5933, "step": 32148 }, { "epoch": 0.42, "grad_norm": 3.3924739360809326, "learning_rate": 1.9711825435032894e-05, "loss": 1.6263, "step": 32149 }, { "epoch": 0.42, "grad_norm": 3.8749287128448486, "learning_rate": 1.971180039018223e-05, "loss": 2.4139, "step": 32150 }, { "epoch": 0.42, "grad_norm": 3.611260414123535, "learning_rate": 1.9711775344259223e-05, "loss": 1.6969, "step": 32151 }, { "epoch": 0.42, "grad_norm": 3.7472405433654785, "learning_rate": 1.9711750297263865e-05, "loss": 2.0511, "step": 32152 }, { "epoch": 0.42, "grad_norm": 3.748380184173584, "learning_rate": 1.971172524919617e-05, "loss": 2.1432, "step": 32153 }, { "epoch": 0.42, "grad_norm": 3.7389938831329346, "learning_rate": 1.971170020005613e-05, "loss": 2.1479, "step": 32154 }, { "epoch": 0.42, "grad_norm": 3.9673593044281006, "learning_rate": 1.9711675149843762e-05, "loss": 2.1595, "step": 32155 }, { "epoch": 0.42, "grad_norm": 3.5337271690368652, "learning_rate": 1.9711650098559052e-05, "loss": 1.7694, "step": 32156 }, { "epoch": 0.42, "grad_norm": 3.2427456378936768, "learning_rate": 1.9711625046202013e-05, "loss": 1.296, "step": 32157 }, { "epoch": 0.42, "grad_norm": 3.944631576538086, "learning_rate": 1.971159999277265e-05, "loss": 2.2062, "step": 32158 }, { "epoch": 0.42, "grad_norm": 3.860633611679077, "learning_rate": 1.9711574938270957e-05, "loss": 1.8994, "step": 32159 }, { "epoch": 0.42, "grad_norm": 3.3780667781829834, "learning_rate": 1.9711549882696944e-05, "loss": 1.2605, "step": 32160 }, { "epoch": 0.42, "grad_norm": 3.6455626487731934, "learning_rate": 1.971152482605061e-05, "loss": 1.5482, "step": 32161 }, { "epoch": 0.42, "grad_norm": 4.131718158721924, "learning_rate": 1.9711499768331958e-05, "loss": 1.842, "step": 32162 }, { "epoch": 0.42, "grad_norm": 2.9792871475219727, "learning_rate": 1.9711474709540995e-05, "loss": 1.3416, "step": 32163 }, { "epoch": 0.42, "grad_norm": 3.8375463485717773, "learning_rate": 1.971144964967772e-05, "loss": 1.8771, "step": 32164 }, { "epoch": 0.42, "grad_norm": 3.4666237831115723, "learning_rate": 1.9711424588742134e-05, "loss": 1.823, "step": 32165 }, { "epoch": 0.42, "grad_norm": 4.8390374183654785, "learning_rate": 1.971139952673424e-05, "loss": 3.0153, "step": 32166 }, { "epoch": 0.42, "grad_norm": 3.597973585128784, "learning_rate": 1.9711374463654046e-05, "loss": 2.0598, "step": 32167 }, { "epoch": 0.42, "grad_norm": 3.4497435092926025, "learning_rate": 1.971134939950155e-05, "loss": 1.9375, "step": 32168 }, { "epoch": 0.42, "grad_norm": 3.4536421298980713, "learning_rate": 1.971132433427676e-05, "loss": 1.4865, "step": 32169 }, { "epoch": 0.42, "grad_norm": 4.436011791229248, "learning_rate": 1.971129926797967e-05, "loss": 1.8645, "step": 32170 }, { "epoch": 0.42, "grad_norm": 3.9634478092193604, "learning_rate": 1.9711274200610288e-05, "loss": 2.076, "step": 32171 }, { "epoch": 0.42, "grad_norm": 3.9545254707336426, "learning_rate": 1.9711249132168618e-05, "loss": 2.1112, "step": 32172 }, { "epoch": 0.42, "grad_norm": 4.047921180725098, "learning_rate": 1.9711224062654662e-05, "loss": 1.956, "step": 32173 }, { "epoch": 0.42, "grad_norm": 3.36374831199646, "learning_rate": 1.9711198992068423e-05, "loss": 1.3171, "step": 32174 }, { "epoch": 0.42, "grad_norm": 3.8288092613220215, "learning_rate": 1.9711173920409902e-05, "loss": 1.5976, "step": 32175 }, { "epoch": 0.42, "grad_norm": 3.536870241165161, "learning_rate": 1.97111488476791e-05, "loss": 1.7967, "step": 32176 }, { "epoch": 0.42, "grad_norm": 3.571683645248413, "learning_rate": 1.9711123773876028e-05, "loss": 1.7247, "step": 32177 }, { "epoch": 0.42, "grad_norm": 3.872253656387329, "learning_rate": 1.9711098699000678e-05, "loss": 2.132, "step": 32178 }, { "epoch": 0.42, "grad_norm": 3.6651535034179688, "learning_rate": 1.9711073623053063e-05, "loss": 2.0005, "step": 32179 }, { "epoch": 0.42, "grad_norm": 3.5694656372070312, "learning_rate": 1.9711048546033175e-05, "loss": 2.0858, "step": 32180 }, { "epoch": 0.42, "grad_norm": 3.67140531539917, "learning_rate": 1.9711023467941025e-05, "loss": 1.9422, "step": 32181 }, { "epoch": 0.42, "grad_norm": 4.19663143157959, "learning_rate": 1.9710998388776613e-05, "loss": 1.9751, "step": 32182 }, { "epoch": 0.42, "grad_norm": 3.6280524730682373, "learning_rate": 1.9710973308539945e-05, "loss": 1.9234, "step": 32183 }, { "epoch": 0.42, "grad_norm": 4.250082492828369, "learning_rate": 1.9710948227231017e-05, "loss": 2.4364, "step": 32184 }, { "epoch": 0.42, "grad_norm": 3.5477592945098877, "learning_rate": 1.9710923144849835e-05, "loss": 1.9121, "step": 32185 }, { "epoch": 0.42, "grad_norm": 4.305112838745117, "learning_rate": 1.9710898061396406e-05, "loss": 2.1935, "step": 32186 }, { "epoch": 0.42, "grad_norm": 4.029879570007324, "learning_rate": 1.9710872976870726e-05, "loss": 2.0736, "step": 32187 }, { "epoch": 0.42, "grad_norm": 3.8211753368377686, "learning_rate": 1.9710847891272803e-05, "loss": 1.8993, "step": 32188 }, { "epoch": 0.42, "grad_norm": 4.018466472625732, "learning_rate": 1.9710822804602636e-05, "loss": 2.3235, "step": 32189 }, { "epoch": 0.42, "grad_norm": 3.9158332347869873, "learning_rate": 1.9710797716860232e-05, "loss": 2.2929, "step": 32190 }, { "epoch": 0.42, "grad_norm": 4.608891010284424, "learning_rate": 1.971077262804559e-05, "loss": 2.3063, "step": 32191 }, { "epoch": 0.42, "grad_norm": 3.8301119804382324, "learning_rate": 1.9710747538158713e-05, "loss": 2.0811, "step": 32192 }, { "epoch": 0.42, "grad_norm": 3.835223913192749, "learning_rate": 1.9710722447199604e-05, "loss": 1.6541, "step": 32193 }, { "epoch": 0.42, "grad_norm": 3.8814094066619873, "learning_rate": 1.9710697355168266e-05, "loss": 1.7821, "step": 32194 }, { "epoch": 0.42, "grad_norm": 3.636571168899536, "learning_rate": 1.9710672262064704e-05, "loss": 1.6155, "step": 32195 }, { "epoch": 0.42, "grad_norm": 4.02271032333374, "learning_rate": 1.9710647167888922e-05, "loss": 2.0543, "step": 32196 }, { "epoch": 0.42, "grad_norm": 3.7423834800720215, "learning_rate": 1.9710622072640916e-05, "loss": 2.0781, "step": 32197 }, { "epoch": 0.42, "grad_norm": 3.2942440509796143, "learning_rate": 1.971059697632069e-05, "loss": 1.7005, "step": 32198 }, { "epoch": 0.42, "grad_norm": 3.788348913192749, "learning_rate": 1.9710571878928255e-05, "loss": 2.0532, "step": 32199 }, { "epoch": 0.42, "grad_norm": 3.6028285026550293, "learning_rate": 1.9710546780463608e-05, "loss": 2.2586, "step": 32200 }, { "epoch": 0.42, "grad_norm": 3.984398126602173, "learning_rate": 1.971052168092675e-05, "loss": 1.6839, "step": 32201 }, { "epoch": 0.42, "grad_norm": 3.831355571746826, "learning_rate": 1.971049658031768e-05, "loss": 2.0426, "step": 32202 }, { "epoch": 0.42, "grad_norm": 3.458625078201294, "learning_rate": 1.9710471478636415e-05, "loss": 1.5661, "step": 32203 }, { "epoch": 0.42, "grad_norm": 3.3825433254241943, "learning_rate": 1.9710446375882945e-05, "loss": 1.7308, "step": 32204 }, { "epoch": 0.42, "grad_norm": 3.9976072311401367, "learning_rate": 1.9710421272057277e-05, "loss": 2.4085, "step": 32205 }, { "epoch": 0.42, "grad_norm": 3.389066457748413, "learning_rate": 1.9710396167159417e-05, "loss": 1.6998, "step": 32206 }, { "epoch": 0.42, "grad_norm": 4.248044490814209, "learning_rate": 1.971037106118936e-05, "loss": 2.1052, "step": 32207 }, { "epoch": 0.42, "grad_norm": 3.8044373989105225, "learning_rate": 1.9710345954147117e-05, "loss": 2.0739, "step": 32208 }, { "epoch": 0.42, "grad_norm": 4.102890491485596, "learning_rate": 1.9710320846032684e-05, "loss": 2.2646, "step": 32209 }, { "epoch": 0.42, "grad_norm": 3.6134819984436035, "learning_rate": 1.9710295736846066e-05, "loss": 1.5543, "step": 32210 }, { "epoch": 0.42, "grad_norm": 3.618234634399414, "learning_rate": 1.971027062658727e-05, "loss": 1.8654, "step": 32211 }, { "epoch": 0.42, "grad_norm": 3.828533411026001, "learning_rate": 1.9710245515256298e-05, "loss": 2.1539, "step": 32212 }, { "epoch": 0.42, "grad_norm": 3.6398417949676514, "learning_rate": 1.9710220402853143e-05, "loss": 1.9124, "step": 32213 }, { "epoch": 0.42, "grad_norm": 3.619466543197632, "learning_rate": 1.971019528937782e-05, "loss": 1.9866, "step": 32214 }, { "epoch": 0.42, "grad_norm": 4.295699119567871, "learning_rate": 1.9710170174830325e-05, "loss": 2.1941, "step": 32215 }, { "epoch": 0.42, "grad_norm": 3.7639427185058594, "learning_rate": 1.9710145059210665e-05, "loss": 1.9853, "step": 32216 }, { "epoch": 0.42, "grad_norm": 3.872080087661743, "learning_rate": 1.9710119942518834e-05, "loss": 1.6645, "step": 32217 }, { "epoch": 0.42, "grad_norm": 4.0673604011535645, "learning_rate": 1.9710094824754846e-05, "loss": 2.2719, "step": 32218 }, { "epoch": 0.42, "grad_norm": 4.583298206329346, "learning_rate": 1.9710069705918696e-05, "loss": 2.2023, "step": 32219 }, { "epoch": 0.42, "grad_norm": 4.1259989738464355, "learning_rate": 1.9710044586010393e-05, "loss": 2.2708, "step": 32220 }, { "epoch": 0.42, "grad_norm": 3.4409773349761963, "learning_rate": 1.9710019465029936e-05, "loss": 1.4847, "step": 32221 }, { "epoch": 0.42, "grad_norm": 3.5157134532928467, "learning_rate": 1.9709994342977325e-05, "loss": 1.9205, "step": 32222 }, { "epoch": 0.42, "grad_norm": 3.9417874813079834, "learning_rate": 1.9709969219852566e-05, "loss": 2.0376, "step": 32223 }, { "epoch": 0.42, "grad_norm": 3.480238676071167, "learning_rate": 1.9709944095655667e-05, "loss": 1.7988, "step": 32224 }, { "epoch": 0.42, "grad_norm": 3.8250584602355957, "learning_rate": 1.970991897038662e-05, "loss": 2.2854, "step": 32225 }, { "epoch": 0.42, "grad_norm": 3.902749538421631, "learning_rate": 1.9709893844045435e-05, "loss": 2.1942, "step": 32226 }, { "epoch": 0.42, "grad_norm": 3.692117691040039, "learning_rate": 1.9709868716632115e-05, "loss": 1.9366, "step": 32227 }, { "epoch": 0.42, "grad_norm": 3.5399298667907715, "learning_rate": 1.9709843588146658e-05, "loss": 1.7907, "step": 32228 }, { "epoch": 0.42, "grad_norm": 3.5944509506225586, "learning_rate": 1.970981845858907e-05, "loss": 1.7056, "step": 32229 }, { "epoch": 0.42, "grad_norm": 3.7082579135894775, "learning_rate": 1.9709793327959353e-05, "loss": 2.0649, "step": 32230 }, { "epoch": 0.42, "grad_norm": 3.4979500770568848, "learning_rate": 1.970976819625751e-05, "loss": 1.5863, "step": 32231 }, { "epoch": 0.42, "grad_norm": 5.023755073547363, "learning_rate": 1.9709743063483544e-05, "loss": 2.9706, "step": 32232 }, { "epoch": 0.42, "grad_norm": 2.7625784873962402, "learning_rate": 1.970971792963746e-05, "loss": 1.3211, "step": 32233 }, { "epoch": 0.42, "grad_norm": 3.2858927249908447, "learning_rate": 1.9709692794719254e-05, "loss": 1.6557, "step": 32234 }, { "epoch": 0.42, "grad_norm": 3.632692575454712, "learning_rate": 1.970966765872894e-05, "loss": 1.9212, "step": 32235 }, { "epoch": 0.42, "grad_norm": 3.6084980964660645, "learning_rate": 1.9709642521666508e-05, "loss": 2.1642, "step": 32236 }, { "epoch": 0.42, "grad_norm": 3.888244152069092, "learning_rate": 1.970961738353197e-05, "loss": 2.1257, "step": 32237 }, { "epoch": 0.42, "grad_norm": 4.330214023590088, "learning_rate": 1.9709592244325322e-05, "loss": 1.9968, "step": 32238 }, { "epoch": 0.42, "grad_norm": 3.252455472946167, "learning_rate": 1.970956710404657e-05, "loss": 1.5744, "step": 32239 }, { "epoch": 0.42, "grad_norm": 3.4410057067871094, "learning_rate": 1.9709541962695724e-05, "loss": 1.6924, "step": 32240 }, { "epoch": 0.42, "grad_norm": 4.043272972106934, "learning_rate": 1.9709516820272773e-05, "loss": 2.0259, "step": 32241 }, { "epoch": 0.42, "grad_norm": 3.98671555519104, "learning_rate": 1.970949167677773e-05, "loss": 2.1388, "step": 32242 }, { "epoch": 0.42, "grad_norm": 2.964470386505127, "learning_rate": 1.9709466532210593e-05, "loss": 1.3967, "step": 32243 }, { "epoch": 0.42, "grad_norm": 3.0826120376586914, "learning_rate": 1.9709441386571363e-05, "loss": 1.417, "step": 32244 }, { "epoch": 0.42, "grad_norm": 4.483813285827637, "learning_rate": 1.970941623986005e-05, "loss": 2.8997, "step": 32245 }, { "epoch": 0.42, "grad_norm": 3.6038286685943604, "learning_rate": 1.9709391092076652e-05, "loss": 1.777, "step": 32246 }, { "epoch": 0.42, "grad_norm": 4.138926029205322, "learning_rate": 1.9709365943221174e-05, "loss": 1.7716, "step": 32247 }, { "epoch": 0.42, "grad_norm": 3.7978439331054688, "learning_rate": 1.9709340793293617e-05, "loss": 1.8906, "step": 32248 }, { "epoch": 0.42, "grad_norm": 3.8261139392852783, "learning_rate": 1.9709315642293983e-05, "loss": 2.337, "step": 32249 }, { "epoch": 0.42, "grad_norm": 3.9942309856414795, "learning_rate": 1.9709290490222274e-05, "loss": 2.3403, "step": 32250 }, { "epoch": 0.42, "grad_norm": 4.206343650817871, "learning_rate": 1.9709265337078495e-05, "loss": 2.2364, "step": 32251 }, { "epoch": 0.42, "grad_norm": 4.233284950256348, "learning_rate": 1.9709240182862653e-05, "loss": 2.2842, "step": 32252 }, { "epoch": 0.42, "grad_norm": 3.6866259574890137, "learning_rate": 1.9709215027574743e-05, "loss": 1.9459, "step": 32253 }, { "epoch": 0.42, "grad_norm": 3.944972276687622, "learning_rate": 1.970918987121477e-05, "loss": 2.0988, "step": 32254 }, { "epoch": 0.42, "grad_norm": 3.990281105041504, "learning_rate": 1.9709164713782738e-05, "loss": 1.9132, "step": 32255 }, { "epoch": 0.42, "grad_norm": 3.9944186210632324, "learning_rate": 1.970913955527865e-05, "loss": 2.2616, "step": 32256 }, { "epoch": 0.42, "grad_norm": 3.89615535736084, "learning_rate": 1.9709114395702506e-05, "loss": 2.1338, "step": 32257 }, { "epoch": 0.42, "grad_norm": 3.5010547637939453, "learning_rate": 1.9709089235054313e-05, "loss": 1.4726, "step": 32258 }, { "epoch": 0.42, "grad_norm": 3.430532217025757, "learning_rate": 1.9709064073334075e-05, "loss": 1.6036, "step": 32259 }, { "epoch": 0.42, "grad_norm": 3.7172539234161377, "learning_rate": 1.9709038910541788e-05, "loss": 2.1419, "step": 32260 }, { "epoch": 0.42, "grad_norm": 3.246925115585327, "learning_rate": 1.970901374667746e-05, "loss": 1.2821, "step": 32261 }, { "epoch": 0.42, "grad_norm": 5.42788553237915, "learning_rate": 1.9708988581741087e-05, "loss": 2.4615, "step": 32262 }, { "epoch": 0.42, "grad_norm": 4.1652655601501465, "learning_rate": 1.9708963415732684e-05, "loss": 2.3144, "step": 32263 }, { "epoch": 0.42, "grad_norm": 3.95184326171875, "learning_rate": 1.9708938248652245e-05, "loss": 2.2053, "step": 32264 }, { "epoch": 0.42, "grad_norm": 3.9927382469177246, "learning_rate": 1.970891308049977e-05, "loss": 1.9469, "step": 32265 }, { "epoch": 0.42, "grad_norm": 3.747678279876709, "learning_rate": 1.970888791127527e-05, "loss": 1.979, "step": 32266 }, { "epoch": 0.42, "grad_norm": 3.631699323654175, "learning_rate": 1.9708862740978743e-05, "loss": 1.8147, "step": 32267 }, { "epoch": 0.42, "grad_norm": 3.767169237136841, "learning_rate": 1.9708837569610197e-05, "loss": 1.6923, "step": 32268 }, { "epoch": 0.42, "grad_norm": 4.1088714599609375, "learning_rate": 1.970881239716963e-05, "loss": 2.2404, "step": 32269 }, { "epoch": 0.42, "grad_norm": 3.4631223678588867, "learning_rate": 1.970878722365704e-05, "loss": 1.6726, "step": 32270 }, { "epoch": 0.42, "grad_norm": 3.226091146469116, "learning_rate": 1.970876204907244e-05, "loss": 1.5955, "step": 32271 }, { "epoch": 0.42, "grad_norm": 3.791633129119873, "learning_rate": 1.9708736873415823e-05, "loss": 2.0705, "step": 32272 }, { "epoch": 0.42, "grad_norm": 3.944779872894287, "learning_rate": 1.9708711696687198e-05, "loss": 2.042, "step": 32273 }, { "epoch": 0.42, "grad_norm": 3.9232583045959473, "learning_rate": 1.9708686518886568e-05, "loss": 2.1959, "step": 32274 }, { "epoch": 0.42, "grad_norm": 3.927934169769287, "learning_rate": 1.9708661340013936e-05, "loss": 2.2281, "step": 32275 }, { "epoch": 0.42, "grad_norm": 3.9363832473754883, "learning_rate": 1.97086361600693e-05, "loss": 2.1462, "step": 32276 }, { "epoch": 0.42, "grad_norm": 3.2441940307617188, "learning_rate": 1.9708610979052668e-05, "loss": 1.8804, "step": 32277 }, { "epoch": 0.42, "grad_norm": 3.3313543796539307, "learning_rate": 1.9708585796964042e-05, "loss": 1.5881, "step": 32278 }, { "epoch": 0.42, "grad_norm": 4.3845086097717285, "learning_rate": 1.970856061380342e-05, "loss": 2.1965, "step": 32279 }, { "epoch": 0.42, "grad_norm": 3.357192277908325, "learning_rate": 1.970853542957081e-05, "loss": 1.442, "step": 32280 }, { "epoch": 0.42, "grad_norm": 3.159064769744873, "learning_rate": 1.9708510244266213e-05, "loss": 1.433, "step": 32281 }, { "epoch": 0.42, "grad_norm": 4.086992263793945, "learning_rate": 1.970848505788963e-05, "loss": 2.1194, "step": 32282 }, { "epoch": 0.42, "grad_norm": 3.7748987674713135, "learning_rate": 1.970845987044107e-05, "loss": 1.9601, "step": 32283 }, { "epoch": 0.42, "grad_norm": 3.3506720066070557, "learning_rate": 1.9708434681920526e-05, "loss": 1.4747, "step": 32284 }, { "epoch": 0.42, "grad_norm": 4.018404483795166, "learning_rate": 1.9708409492328008e-05, "loss": 2.2325, "step": 32285 }, { "epoch": 0.42, "grad_norm": 4.423387050628662, "learning_rate": 1.9708384301663515e-05, "loss": 2.2843, "step": 32286 }, { "epoch": 0.42, "grad_norm": 4.101259708404541, "learning_rate": 1.9708359109927055e-05, "loss": 2.1395, "step": 32287 }, { "epoch": 0.42, "grad_norm": 3.7892372608184814, "learning_rate": 1.9708333917118628e-05, "loss": 1.7679, "step": 32288 }, { "epoch": 0.42, "grad_norm": 3.593197822570801, "learning_rate": 1.9708308723238232e-05, "loss": 2.176, "step": 32289 }, { "epoch": 0.42, "grad_norm": 3.6728177070617676, "learning_rate": 1.970828352828588e-05, "loss": 1.5429, "step": 32290 }, { "epoch": 0.42, "grad_norm": 3.5403494834899902, "learning_rate": 1.9708258332261563e-05, "loss": 1.5502, "step": 32291 }, { "epoch": 0.42, "grad_norm": 4.474816799163818, "learning_rate": 1.9708233135165292e-05, "loss": 2.5854, "step": 32292 }, { "epoch": 0.42, "grad_norm": 3.7976508140563965, "learning_rate": 1.9708207936997067e-05, "loss": 1.8258, "step": 32293 }, { "epoch": 0.42, "grad_norm": 4.152596473693848, "learning_rate": 1.9708182737756894e-05, "loss": 2.0735, "step": 32294 }, { "epoch": 0.42, "grad_norm": 3.7795779705047607, "learning_rate": 1.9708157537444768e-05, "loss": 1.9601, "step": 32295 }, { "epoch": 0.42, "grad_norm": 3.737574577331543, "learning_rate": 1.97081323360607e-05, "loss": 1.9139, "step": 32296 }, { "epoch": 0.42, "grad_norm": 3.4582979679107666, "learning_rate": 1.9708107133604686e-05, "loss": 1.6795, "step": 32297 }, { "epoch": 0.42, "grad_norm": 3.674283981323242, "learning_rate": 1.970808193007674e-05, "loss": 1.8795, "step": 32298 }, { "epoch": 0.42, "grad_norm": 3.8519372940063477, "learning_rate": 1.9708056725476846e-05, "loss": 2.1902, "step": 32299 }, { "epoch": 0.42, "grad_norm": 3.9519686698913574, "learning_rate": 1.9708031519805027e-05, "loss": 2.1013, "step": 32300 }, { "epoch": 0.42, "grad_norm": 3.9013216495513916, "learning_rate": 1.970800631306127e-05, "loss": 2.0475, "step": 32301 }, { "epoch": 0.42, "grad_norm": 3.8834192752838135, "learning_rate": 1.970798110524559e-05, "loss": 1.5113, "step": 32302 }, { "epoch": 0.42, "grad_norm": 3.827512741088867, "learning_rate": 1.970795589635798e-05, "loss": 1.9546, "step": 32303 }, { "epoch": 0.42, "grad_norm": 4.262417316436768, "learning_rate": 1.970793068639845e-05, "loss": 2.2127, "step": 32304 }, { "epoch": 0.42, "grad_norm": 3.1767313480377197, "learning_rate": 1.9707905475367e-05, "loss": 1.4105, "step": 32305 }, { "epoch": 0.42, "grad_norm": 3.734663724899292, "learning_rate": 1.9707880263263634e-05, "loss": 1.8524, "step": 32306 }, { "epoch": 0.42, "grad_norm": 3.7298154830932617, "learning_rate": 1.9707855050088346e-05, "loss": 1.9249, "step": 32307 }, { "epoch": 0.42, "grad_norm": 3.6674487590789795, "learning_rate": 1.970782983584115e-05, "loss": 1.894, "step": 32308 }, { "epoch": 0.42, "grad_norm": 3.8146655559539795, "learning_rate": 1.9707804620522045e-05, "loss": 1.8049, "step": 32309 }, { "epoch": 0.42, "grad_norm": 4.326697826385498, "learning_rate": 1.9707779404131034e-05, "loss": 2.3182, "step": 32310 }, { "epoch": 0.42, "grad_norm": 3.318859338760376, "learning_rate": 1.9707754186668124e-05, "loss": 1.5127, "step": 32311 }, { "epoch": 0.42, "grad_norm": 4.54253625869751, "learning_rate": 1.9707728968133306e-05, "loss": 2.496, "step": 32312 }, { "epoch": 0.42, "grad_norm": 4.010074615478516, "learning_rate": 1.9707703748526593e-05, "loss": 2.0568, "step": 32313 }, { "epoch": 0.42, "grad_norm": 4.467801570892334, "learning_rate": 1.9707678527847986e-05, "loss": 2.2369, "step": 32314 }, { "epoch": 0.42, "grad_norm": 3.5981063842773438, "learning_rate": 1.9707653306097486e-05, "loss": 1.8786, "step": 32315 }, { "epoch": 0.42, "grad_norm": 4.362180233001709, "learning_rate": 1.9707628083275093e-05, "loss": 1.847, "step": 32316 }, { "epoch": 0.42, "grad_norm": 3.681823492050171, "learning_rate": 1.9707602859380817e-05, "loss": 1.7295, "step": 32317 }, { "epoch": 0.42, "grad_norm": 4.03900146484375, "learning_rate": 1.9707577634414655e-05, "loss": 2.2816, "step": 32318 }, { "epoch": 0.42, "grad_norm": 3.4374990463256836, "learning_rate": 1.9707552408376613e-05, "loss": 1.4394, "step": 32319 }, { "epoch": 0.42, "grad_norm": 3.6390233039855957, "learning_rate": 1.9707527181266692e-05, "loss": 1.8842, "step": 32320 }, { "epoch": 0.42, "grad_norm": 4.139647960662842, "learning_rate": 1.9707501953084894e-05, "loss": 2.3918, "step": 32321 }, { "epoch": 0.42, "grad_norm": 4.341678142547607, "learning_rate": 1.9707476723831224e-05, "loss": 2.1352, "step": 32322 }, { "epoch": 0.42, "grad_norm": 3.776992082595825, "learning_rate": 1.9707451493505688e-05, "loss": 1.9919, "step": 32323 }, { "epoch": 0.42, "grad_norm": 3.808074474334717, "learning_rate": 1.970742626210828e-05, "loss": 1.9901, "step": 32324 }, { "epoch": 0.42, "grad_norm": 4.62324857711792, "learning_rate": 1.9707401029639007e-05, "loss": 2.5384, "step": 32325 }, { "epoch": 0.42, "grad_norm": 3.85567045211792, "learning_rate": 1.9707375796097877e-05, "loss": 2.1315, "step": 32326 }, { "epoch": 0.42, "grad_norm": 3.54775071144104, "learning_rate": 1.970735056148488e-05, "loss": 1.6516, "step": 32327 }, { "epoch": 0.42, "grad_norm": 3.4139697551727295, "learning_rate": 1.9707325325800034e-05, "loss": 1.7512, "step": 32328 }, { "epoch": 0.42, "grad_norm": 4.388931751251221, "learning_rate": 1.9707300089043332e-05, "loss": 2.1486, "step": 32329 }, { "epoch": 0.42, "grad_norm": 3.7645132541656494, "learning_rate": 1.970727485121478e-05, "loss": 1.9563, "step": 32330 }, { "epoch": 0.42, "grad_norm": 3.685779094696045, "learning_rate": 1.970724961231438e-05, "loss": 1.9871, "step": 32331 }, { "epoch": 0.42, "grad_norm": 3.505631446838379, "learning_rate": 1.9707224372342133e-05, "loss": 1.919, "step": 32332 }, { "epoch": 0.42, "grad_norm": 3.507399797439575, "learning_rate": 1.9707199131298046e-05, "loss": 1.7565, "step": 32333 }, { "epoch": 0.42, "grad_norm": 4.0755815505981445, "learning_rate": 1.970717388918212e-05, "loss": 2.7275, "step": 32334 }, { "epoch": 0.42, "grad_norm": 3.9624574184417725, "learning_rate": 1.9707148645994354e-05, "loss": 2.0721, "step": 32335 }, { "epoch": 0.42, "grad_norm": 4.261584758758545, "learning_rate": 1.9707123401734756e-05, "loss": 2.4745, "step": 32336 }, { "epoch": 0.42, "grad_norm": 3.824509859085083, "learning_rate": 1.970709815640333e-05, "loss": 1.8901, "step": 32337 }, { "epoch": 0.42, "grad_norm": 3.5318918228149414, "learning_rate": 1.9707072910000072e-05, "loss": 1.8545, "step": 32338 }, { "epoch": 0.42, "grad_norm": 3.654005289077759, "learning_rate": 1.970704766252499e-05, "loss": 1.8106, "step": 32339 }, { "epoch": 0.42, "grad_norm": 4.257267951965332, "learning_rate": 1.9707022413978083e-05, "loss": 2.5083, "step": 32340 }, { "epoch": 0.42, "grad_norm": 3.4905190467834473, "learning_rate": 1.970699716435936e-05, "loss": 1.6679, "step": 32341 }, { "epoch": 0.42, "grad_norm": 4.124467372894287, "learning_rate": 1.9706971913668816e-05, "loss": 1.875, "step": 32342 }, { "epoch": 0.42, "grad_norm": 3.59771466255188, "learning_rate": 1.9706946661906458e-05, "loss": 1.9132, "step": 32343 }, { "epoch": 0.42, "grad_norm": 3.291382074356079, "learning_rate": 1.9706921409072292e-05, "loss": 1.7749, "step": 32344 }, { "epoch": 0.42, "grad_norm": 3.6339364051818848, "learning_rate": 1.9706896155166314e-05, "loss": 1.5492, "step": 32345 }, { "epoch": 0.42, "grad_norm": 3.901364803314209, "learning_rate": 1.9706870900188528e-05, "loss": 2.1386, "step": 32346 }, { "epoch": 0.42, "grad_norm": 3.786639928817749, "learning_rate": 1.9706845644138943e-05, "loss": 2.09, "step": 32347 }, { "epoch": 0.42, "grad_norm": 3.793208599090576, "learning_rate": 1.9706820387017557e-05, "loss": 1.9917, "step": 32348 }, { "epoch": 0.42, "grad_norm": 4.0956573486328125, "learning_rate": 1.9706795128824372e-05, "loss": 2.0514, "step": 32349 }, { "epoch": 0.42, "grad_norm": 3.957491159439087, "learning_rate": 1.9706769869559393e-05, "loss": 2.0454, "step": 32350 }, { "epoch": 0.42, "grad_norm": 3.8778343200683594, "learning_rate": 1.9706744609222622e-05, "loss": 2.3233, "step": 32351 }, { "epoch": 0.42, "grad_norm": 3.932448148727417, "learning_rate": 1.970671934781406e-05, "loss": 1.7063, "step": 32352 }, { "epoch": 0.42, "grad_norm": 3.9728152751922607, "learning_rate": 1.9706694085333713e-05, "loss": 2.1668, "step": 32353 }, { "epoch": 0.42, "grad_norm": 3.7620508670806885, "learning_rate": 1.9706668821781582e-05, "loss": 1.7685, "step": 32354 }, { "epoch": 0.42, "grad_norm": 4.180646896362305, "learning_rate": 1.970664355715767e-05, "loss": 1.9365, "step": 32355 }, { "epoch": 0.42, "grad_norm": 3.417999267578125, "learning_rate": 1.970661829146198e-05, "loss": 1.7044, "step": 32356 }, { "epoch": 0.42, "grad_norm": 3.1947925090789795, "learning_rate": 1.9706593024694513e-05, "loss": 1.5897, "step": 32357 }, { "epoch": 0.42, "grad_norm": 3.9858860969543457, "learning_rate": 1.9706567756855274e-05, "loss": 1.8448, "step": 32358 }, { "epoch": 0.42, "grad_norm": 3.583862543106079, "learning_rate": 1.9706542487944266e-05, "loss": 1.804, "step": 32359 }, { "epoch": 0.42, "grad_norm": 4.124274253845215, "learning_rate": 1.9706517217961494e-05, "loss": 1.9642, "step": 32360 }, { "epoch": 0.42, "grad_norm": 3.5287559032440186, "learning_rate": 1.9706491946906955e-05, "loss": 1.8657, "step": 32361 }, { "epoch": 0.42, "grad_norm": 3.8516438007354736, "learning_rate": 1.9706466674780655e-05, "loss": 1.938, "step": 32362 }, { "epoch": 0.42, "grad_norm": 4.05942440032959, "learning_rate": 1.9706441401582593e-05, "loss": 2.0811, "step": 32363 }, { "epoch": 0.42, "grad_norm": 4.136768341064453, "learning_rate": 1.970641612731278e-05, "loss": 2.3924, "step": 32364 }, { "epoch": 0.42, "grad_norm": 3.430232286453247, "learning_rate": 1.9706390851971215e-05, "loss": 1.6319, "step": 32365 }, { "epoch": 0.42, "grad_norm": 3.973245620727539, "learning_rate": 1.9706365575557894e-05, "loss": 2.5324, "step": 32366 }, { "epoch": 0.42, "grad_norm": 3.5818207263946533, "learning_rate": 1.970634029807283e-05, "loss": 2.1116, "step": 32367 }, { "epoch": 0.42, "grad_norm": 3.5114567279815674, "learning_rate": 1.970631501951602e-05, "loss": 2.1495, "step": 32368 }, { "epoch": 0.42, "grad_norm": 3.5331649780273438, "learning_rate": 1.970628973988747e-05, "loss": 2.0226, "step": 32369 }, { "epoch": 0.42, "grad_norm": 3.8334784507751465, "learning_rate": 1.970626445918718e-05, "loss": 1.9565, "step": 32370 }, { "epoch": 0.42, "grad_norm": 3.337409257888794, "learning_rate": 1.970623917741515e-05, "loss": 1.535, "step": 32371 }, { "epoch": 0.42, "grad_norm": 3.63458514213562, "learning_rate": 1.970621389457139e-05, "loss": 1.9879, "step": 32372 }, { "epoch": 0.42, "grad_norm": 3.7988312244415283, "learning_rate": 1.97061886106559e-05, "loss": 2.0827, "step": 32373 }, { "epoch": 0.42, "grad_norm": 3.3064517974853516, "learning_rate": 1.970616332566868e-05, "loss": 1.579, "step": 32374 }, { "epoch": 0.42, "grad_norm": 4.457386016845703, "learning_rate": 1.9706138039609736e-05, "loss": 2.4027, "step": 32375 }, { "epoch": 0.42, "grad_norm": 3.797572135925293, "learning_rate": 1.9706112752479066e-05, "loss": 1.8143, "step": 32376 }, { "epoch": 0.42, "grad_norm": 3.9182772636413574, "learning_rate": 1.970608746427668e-05, "loss": 2.0634, "step": 32377 }, { "epoch": 0.42, "grad_norm": 3.364993095397949, "learning_rate": 1.970606217500258e-05, "loss": 1.6795, "step": 32378 }, { "epoch": 0.42, "grad_norm": 3.659888744354248, "learning_rate": 1.9706036884656764e-05, "loss": 1.7884, "step": 32379 }, { "epoch": 0.42, "grad_norm": 3.9346094131469727, "learning_rate": 1.9706011593239238e-05, "loss": 2.1168, "step": 32380 }, { "epoch": 0.42, "grad_norm": 4.2050580978393555, "learning_rate": 1.970598630075e-05, "loss": 1.871, "step": 32381 }, { "epoch": 0.42, "grad_norm": 3.5792829990386963, "learning_rate": 1.9705961007189057e-05, "loss": 1.9979, "step": 32382 }, { "epoch": 0.42, "grad_norm": 4.075333118438721, "learning_rate": 1.970593571255641e-05, "loss": 2.2693, "step": 32383 }, { "epoch": 0.42, "grad_norm": 3.8551318645477295, "learning_rate": 1.9705910416852067e-05, "loss": 2.0055, "step": 32384 }, { "epoch": 0.42, "grad_norm": 3.4725680351257324, "learning_rate": 1.9705885120076027e-05, "loss": 1.634, "step": 32385 }, { "epoch": 0.42, "grad_norm": 3.70023775100708, "learning_rate": 1.970585982222829e-05, "loss": 1.8225, "step": 32386 }, { "epoch": 0.42, "grad_norm": 3.7738893032073975, "learning_rate": 1.970583452330886e-05, "loss": 1.579, "step": 32387 }, { "epoch": 0.42, "grad_norm": 4.7751264572143555, "learning_rate": 1.9705809223317748e-05, "loss": 2.5205, "step": 32388 }, { "epoch": 0.42, "grad_norm": 3.141334056854248, "learning_rate": 1.9705783922254945e-05, "loss": 1.7446, "step": 32389 }, { "epoch": 0.42, "grad_norm": 3.4015815258026123, "learning_rate": 1.970575862012046e-05, "loss": 2.0644, "step": 32390 }, { "epoch": 0.42, "grad_norm": 4.276238441467285, "learning_rate": 1.9705733316914294e-05, "loss": 2.3107, "step": 32391 }, { "epoch": 0.42, "grad_norm": 3.99277925491333, "learning_rate": 1.970570801263645e-05, "loss": 2.5811, "step": 32392 }, { "epoch": 0.42, "grad_norm": 3.703087568283081, "learning_rate": 1.970568270728693e-05, "loss": 2.1451, "step": 32393 }, { "epoch": 0.42, "grad_norm": 3.9052793979644775, "learning_rate": 1.970565740086574e-05, "loss": 2.1424, "step": 32394 }, { "epoch": 0.42, "grad_norm": 3.9076340198516846, "learning_rate": 1.970563209337288e-05, "loss": 2.1019, "step": 32395 }, { "epoch": 0.42, "grad_norm": 3.7949886322021484, "learning_rate": 1.9705606784808355e-05, "loss": 1.9385, "step": 32396 }, { "epoch": 0.42, "grad_norm": 4.044395446777344, "learning_rate": 1.9705581475172163e-05, "loss": 2.2986, "step": 32397 }, { "epoch": 0.42, "grad_norm": 3.5604827404022217, "learning_rate": 1.9705556164464314e-05, "loss": 2.2419, "step": 32398 }, { "epoch": 0.42, "grad_norm": 4.770186901092529, "learning_rate": 1.9705530852684803e-05, "loss": 2.0573, "step": 32399 }, { "epoch": 0.42, "grad_norm": 4.027491092681885, "learning_rate": 1.970550553983364e-05, "loss": 2.1511, "step": 32400 }, { "epoch": 0.42, "grad_norm": 4.389815807342529, "learning_rate": 1.9705480225910824e-05, "loss": 2.502, "step": 32401 }, { "epoch": 0.42, "grad_norm": 3.733464479446411, "learning_rate": 1.970545491091636e-05, "loss": 1.8642, "step": 32402 }, { "epoch": 0.42, "grad_norm": 3.632502794265747, "learning_rate": 1.9705429594850245e-05, "loss": 2.1842, "step": 32403 }, { "epoch": 0.42, "grad_norm": 4.471118450164795, "learning_rate": 1.9705404277712488e-05, "loss": 2.345, "step": 32404 }, { "epoch": 0.42, "grad_norm": 3.4905548095703125, "learning_rate": 1.970537895950309e-05, "loss": 1.4651, "step": 32405 }, { "epoch": 0.42, "grad_norm": 4.089916229248047, "learning_rate": 1.9705353640222055e-05, "loss": 2.1847, "step": 32406 }, { "epoch": 0.42, "grad_norm": 3.569014310836792, "learning_rate": 1.970532831986938e-05, "loss": 2.3444, "step": 32407 }, { "epoch": 0.42, "grad_norm": 3.8724372386932373, "learning_rate": 1.9705302998445075e-05, "loss": 2.004, "step": 32408 }, { "epoch": 0.42, "grad_norm": 3.8571906089782715, "learning_rate": 1.970527767594914e-05, "loss": 1.8406, "step": 32409 }, { "epoch": 0.42, "grad_norm": 3.772244691848755, "learning_rate": 1.9705252352381577e-05, "loss": 1.9894, "step": 32410 }, { "epoch": 0.42, "grad_norm": 3.6471810340881348, "learning_rate": 1.970522702774239e-05, "loss": 1.9031, "step": 32411 }, { "epoch": 0.42, "grad_norm": 3.382000207901001, "learning_rate": 1.970520170203158e-05, "loss": 1.8299, "step": 32412 }, { "epoch": 0.42, "grad_norm": 3.9001624584198, "learning_rate": 1.970517637524915e-05, "loss": 2.2561, "step": 32413 }, { "epoch": 0.42, "grad_norm": 4.4572553634643555, "learning_rate": 1.9705151047395107e-05, "loss": 2.6155, "step": 32414 }, { "epoch": 0.42, "grad_norm": 3.9638609886169434, "learning_rate": 1.970512571846945e-05, "loss": 2.3823, "step": 32415 }, { "epoch": 0.42, "grad_norm": 3.9297640323638916, "learning_rate": 1.970510038847218e-05, "loss": 2.3527, "step": 32416 }, { "epoch": 0.42, "grad_norm": 3.3865039348602295, "learning_rate": 1.9705075057403308e-05, "loss": 1.8799, "step": 32417 }, { "epoch": 0.42, "grad_norm": 3.6289045810699463, "learning_rate": 1.9705049725262826e-05, "loss": 1.6979, "step": 32418 }, { "epoch": 0.42, "grad_norm": 3.728581190109253, "learning_rate": 1.9705024392050742e-05, "loss": 2.1266, "step": 32419 }, { "epoch": 0.42, "grad_norm": 4.060035705566406, "learning_rate": 1.970499905776706e-05, "loss": 1.8873, "step": 32420 }, { "epoch": 0.42, "grad_norm": 4.199502944946289, "learning_rate": 1.9704973722411783e-05, "loss": 2.0129, "step": 32421 }, { "epoch": 0.42, "grad_norm": 3.5785787105560303, "learning_rate": 1.9704948385984908e-05, "loss": 1.7293, "step": 32422 }, { "epoch": 0.42, "grad_norm": 3.846766233444214, "learning_rate": 1.9704923048486447e-05, "loss": 2.1385, "step": 32423 }, { "epoch": 0.42, "grad_norm": 3.797213554382324, "learning_rate": 1.9704897709916392e-05, "loss": 2.0652, "step": 32424 }, { "epoch": 0.42, "grad_norm": 3.93912935256958, "learning_rate": 1.9704872370274756e-05, "loss": 2.3447, "step": 32425 }, { "epoch": 0.42, "grad_norm": 3.968275547027588, "learning_rate": 1.9704847029561538e-05, "loss": 1.6819, "step": 32426 }, { "epoch": 0.42, "grad_norm": 3.802793025970459, "learning_rate": 1.970482168777674e-05, "loss": 2.3204, "step": 32427 }, { "epoch": 0.42, "grad_norm": 3.8261351585388184, "learning_rate": 1.970479634492036e-05, "loss": 2.1207, "step": 32428 }, { "epoch": 0.42, "grad_norm": 3.5255653858184814, "learning_rate": 1.9704771000992413e-05, "loss": 1.7434, "step": 32429 }, { "epoch": 0.42, "grad_norm": 3.4294698238372803, "learning_rate": 1.9704745655992893e-05, "loss": 1.9105, "step": 32430 }, { "epoch": 0.42, "grad_norm": 3.5091769695281982, "learning_rate": 1.9704720309921803e-05, "loss": 1.5751, "step": 32431 }, { "epoch": 0.42, "grad_norm": 4.317772388458252, "learning_rate": 1.9704694962779144e-05, "loss": 2.0847, "step": 32432 }, { "epoch": 0.42, "grad_norm": 3.1572697162628174, "learning_rate": 1.9704669614564924e-05, "loss": 1.7321, "step": 32433 }, { "epoch": 0.42, "grad_norm": 3.497730255126953, "learning_rate": 1.9704644265279147e-05, "loss": 2.1096, "step": 32434 }, { "epoch": 0.42, "grad_norm": 3.677234649658203, "learning_rate": 1.9704618914921808e-05, "loss": 1.8791, "step": 32435 }, { "epoch": 0.42, "grad_norm": 3.8047897815704346, "learning_rate": 1.970459356349292e-05, "loss": 1.9889, "step": 32436 }, { "epoch": 0.42, "grad_norm": 3.7760634422302246, "learning_rate": 1.9704568210992475e-05, "loss": 2.0036, "step": 32437 }, { "epoch": 0.42, "grad_norm": 3.542968988418579, "learning_rate": 1.9704542857420484e-05, "loss": 1.9461, "step": 32438 }, { "epoch": 0.42, "grad_norm": 4.0836992263793945, "learning_rate": 1.9704517502776945e-05, "loss": 1.843, "step": 32439 }, { "epoch": 0.42, "grad_norm": 3.881981611251831, "learning_rate": 1.9704492147061866e-05, "loss": 2.2025, "step": 32440 }, { "epoch": 0.42, "grad_norm": 4.273406028747559, "learning_rate": 1.970446679027524e-05, "loss": 2.3789, "step": 32441 }, { "epoch": 0.42, "grad_norm": 3.1859500408172607, "learning_rate": 1.970444143241708e-05, "loss": 1.5586, "step": 32442 }, { "epoch": 0.42, "grad_norm": 3.8810508251190186, "learning_rate": 1.9704416073487387e-05, "loss": 2.3374, "step": 32443 }, { "epoch": 0.42, "grad_norm": 3.932746171951294, "learning_rate": 1.970439071348616e-05, "loss": 2.1447, "step": 32444 }, { "epoch": 0.42, "grad_norm": 3.71409010887146, "learning_rate": 1.9704365352413406e-05, "loss": 1.6086, "step": 32445 }, { "epoch": 0.42, "grad_norm": 3.9573612213134766, "learning_rate": 1.9704339990269123e-05, "loss": 2.1378, "step": 32446 }, { "epoch": 0.42, "grad_norm": 4.0126214027404785, "learning_rate": 1.9704314627053316e-05, "loss": 2.0879, "step": 32447 }, { "epoch": 0.42, "grad_norm": 3.9726014137268066, "learning_rate": 1.970428926276599e-05, "loss": 2.379, "step": 32448 }, { "epoch": 0.42, "grad_norm": 3.7969610691070557, "learning_rate": 1.9704263897407144e-05, "loss": 2.1399, "step": 32449 }, { "epoch": 0.42, "grad_norm": 4.442135810852051, "learning_rate": 1.970423853097678e-05, "loss": 2.5245, "step": 32450 }, { "epoch": 0.42, "grad_norm": 3.5650153160095215, "learning_rate": 1.9704213163474905e-05, "loss": 1.9523, "step": 32451 }, { "epoch": 0.42, "grad_norm": 3.7443878650665283, "learning_rate": 1.9704187794901523e-05, "loss": 1.823, "step": 32452 }, { "epoch": 0.42, "grad_norm": 3.846006155014038, "learning_rate": 1.9704162425256635e-05, "loss": 1.9323, "step": 32453 }, { "epoch": 0.42, "grad_norm": 3.8459904193878174, "learning_rate": 1.970413705454024e-05, "loss": 2.119, "step": 32454 }, { "epoch": 0.42, "grad_norm": 3.7797794342041016, "learning_rate": 1.9704111682752344e-05, "loss": 1.7672, "step": 32455 }, { "epoch": 0.42, "grad_norm": 3.834338426589966, "learning_rate": 1.9704086309892948e-05, "loss": 2.2536, "step": 32456 }, { "epoch": 0.42, "grad_norm": 3.5097086429595947, "learning_rate": 1.9704060935962056e-05, "loss": 1.5023, "step": 32457 }, { "epoch": 0.42, "grad_norm": 3.697138786315918, "learning_rate": 1.9704035560959674e-05, "loss": 1.9314, "step": 32458 }, { "epoch": 0.42, "grad_norm": 3.7538704872131348, "learning_rate": 1.97040101848858e-05, "loss": 1.7227, "step": 32459 }, { "epoch": 0.42, "grad_norm": 3.7164151668548584, "learning_rate": 1.970398480774044e-05, "loss": 1.7585, "step": 32460 }, { "epoch": 0.42, "grad_norm": 3.9015958309173584, "learning_rate": 1.9703959429523595e-05, "loss": 2.32, "step": 32461 }, { "epoch": 0.42, "grad_norm": 3.206939697265625, "learning_rate": 1.970393405023527e-05, "loss": 1.6675, "step": 32462 }, { "epoch": 0.42, "grad_norm": 3.5497817993164062, "learning_rate": 1.9703908669875462e-05, "loss": 1.8742, "step": 32463 }, { "epoch": 0.42, "grad_norm": 3.2384109497070312, "learning_rate": 1.9703883288444178e-05, "loss": 1.4926, "step": 32464 }, { "epoch": 0.42, "grad_norm": 4.335979461669922, "learning_rate": 1.9703857905941424e-05, "loss": 2.5157, "step": 32465 }, { "epoch": 0.42, "grad_norm": 3.7522599697113037, "learning_rate": 1.9703832522367197e-05, "loss": 2.1224, "step": 32466 }, { "epoch": 0.42, "grad_norm": 3.1158478260040283, "learning_rate": 1.9703807137721504e-05, "loss": 1.3063, "step": 32467 }, { "epoch": 0.42, "grad_norm": 3.9304745197296143, "learning_rate": 1.9703781752004345e-05, "loss": 2.083, "step": 32468 }, { "epoch": 0.42, "grad_norm": 4.335689544677734, "learning_rate": 1.9703756365215723e-05, "loss": 2.4287, "step": 32469 }, { "epoch": 0.42, "grad_norm": 4.1518120765686035, "learning_rate": 1.9703730977355645e-05, "loss": 2.0485, "step": 32470 }, { "epoch": 0.42, "grad_norm": 3.516110897064209, "learning_rate": 1.9703705588424108e-05, "loss": 1.7175, "step": 32471 }, { "epoch": 0.42, "grad_norm": 3.307925224304199, "learning_rate": 1.9703680198421118e-05, "loss": 1.3089, "step": 32472 }, { "epoch": 0.42, "grad_norm": 3.674396276473999, "learning_rate": 1.9703654807346675e-05, "loss": 1.6783, "step": 32473 }, { "epoch": 0.42, "grad_norm": 3.784282684326172, "learning_rate": 1.9703629415200787e-05, "loss": 2.1115, "step": 32474 }, { "epoch": 0.42, "grad_norm": 4.05416202545166, "learning_rate": 1.9703604021983452e-05, "loss": 2.3724, "step": 32475 }, { "epoch": 0.42, "grad_norm": 3.907355308532715, "learning_rate": 1.9703578627694676e-05, "loss": 2.0288, "step": 32476 }, { "epoch": 0.42, "grad_norm": 4.612982273101807, "learning_rate": 1.970355323233446e-05, "loss": 2.5133, "step": 32477 }, { "epoch": 0.42, "grad_norm": 4.324306964874268, "learning_rate": 1.9703527835902805e-05, "loss": 1.9953, "step": 32478 }, { "epoch": 0.42, "grad_norm": 3.609220504760742, "learning_rate": 1.9703502438399714e-05, "loss": 1.591, "step": 32479 }, { "epoch": 0.42, "grad_norm": 4.008652687072754, "learning_rate": 1.9703477039825198e-05, "loss": 2.5736, "step": 32480 }, { "epoch": 0.42, "grad_norm": 4.085049152374268, "learning_rate": 1.9703451640179246e-05, "loss": 1.9599, "step": 32481 }, { "epoch": 0.42, "grad_norm": 4.411098957061768, "learning_rate": 1.9703426239461872e-05, "loss": 2.5945, "step": 32482 }, { "epoch": 0.42, "grad_norm": 4.422637939453125, "learning_rate": 1.970340083767308e-05, "loss": 2.4503, "step": 32483 }, { "epoch": 0.42, "grad_norm": 3.858156442642212, "learning_rate": 1.970337543481286e-05, "loss": 1.7941, "step": 32484 }, { "epoch": 0.42, "grad_norm": 4.423351764678955, "learning_rate": 1.9703350030881227e-05, "loss": 2.4647, "step": 32485 }, { "epoch": 0.42, "grad_norm": 3.4592435359954834, "learning_rate": 1.9703324625878178e-05, "loss": 1.6631, "step": 32486 }, { "epoch": 0.42, "grad_norm": 4.058151721954346, "learning_rate": 1.970329921980372e-05, "loss": 2.2835, "step": 32487 }, { "epoch": 0.42, "grad_norm": 3.810277223587036, "learning_rate": 1.9703273812657854e-05, "loss": 1.8416, "step": 32488 }, { "epoch": 0.42, "grad_norm": 3.394421100616455, "learning_rate": 1.970324840444058e-05, "loss": 1.5275, "step": 32489 }, { "epoch": 0.42, "grad_norm": 3.591041326522827, "learning_rate": 1.97032229951519e-05, "loss": 1.9312, "step": 32490 }, { "epoch": 0.42, "grad_norm": 4.1979241371154785, "learning_rate": 1.970319758479182e-05, "loss": 2.2681, "step": 32491 }, { "epoch": 0.42, "grad_norm": 3.4697208404541016, "learning_rate": 1.9703172173360344e-05, "loss": 1.4885, "step": 32492 }, { "epoch": 0.42, "grad_norm": 3.6542177200317383, "learning_rate": 1.9703146760857476e-05, "loss": 1.7787, "step": 32493 }, { "epoch": 0.42, "grad_norm": 4.24821662902832, "learning_rate": 1.9703121347283212e-05, "loss": 2.2909, "step": 32494 }, { "epoch": 0.42, "grad_norm": 3.894491672515869, "learning_rate": 1.970309593263756e-05, "loss": 2.4687, "step": 32495 }, { "epoch": 0.42, "grad_norm": 3.9232068061828613, "learning_rate": 1.970307051692052e-05, "loss": 1.7844, "step": 32496 }, { "epoch": 0.42, "grad_norm": 3.5966498851776123, "learning_rate": 1.97030451001321e-05, "loss": 1.8822, "step": 32497 }, { "epoch": 0.42, "grad_norm": 3.492546558380127, "learning_rate": 1.9703019682272296e-05, "loss": 1.7122, "step": 32498 }, { "epoch": 0.42, "grad_norm": 3.4498209953308105, "learning_rate": 1.9702994263341115e-05, "loss": 1.9296, "step": 32499 }, { "epoch": 0.42, "grad_norm": 4.135907173156738, "learning_rate": 1.970296884333856e-05, "loss": 2.4506, "step": 32500 }, { "epoch": 0.42, "grad_norm": 3.953573703765869, "learning_rate": 1.9702943422264632e-05, "loss": 2.492, "step": 32501 }, { "epoch": 0.42, "grad_norm": 3.7314531803131104, "learning_rate": 1.9702918000119333e-05, "loss": 1.8261, "step": 32502 }, { "epoch": 0.42, "grad_norm": 4.170208930969238, "learning_rate": 1.9702892576902667e-05, "loss": 2.5854, "step": 32503 }, { "epoch": 0.42, "grad_norm": 3.4313862323760986, "learning_rate": 1.970286715261464e-05, "loss": 1.6855, "step": 32504 }, { "epoch": 0.42, "grad_norm": 3.9401638507843018, "learning_rate": 1.970284172725525e-05, "loss": 1.8488, "step": 32505 }, { "epoch": 0.42, "grad_norm": 3.9398138523101807, "learning_rate": 1.9702816300824504e-05, "loss": 2.1471, "step": 32506 }, { "epoch": 0.42, "grad_norm": 4.036670207977295, "learning_rate": 1.97027908733224e-05, "loss": 2.0859, "step": 32507 }, { "epoch": 0.42, "grad_norm": 4.42454195022583, "learning_rate": 1.970276544474894e-05, "loss": 2.1662, "step": 32508 }, { "epoch": 0.42, "grad_norm": 4.123112678527832, "learning_rate": 1.9702740015104135e-05, "loss": 2.0931, "step": 32509 }, { "epoch": 0.42, "grad_norm": 4.447570323944092, "learning_rate": 1.970271458438798e-05, "loss": 2.2665, "step": 32510 }, { "epoch": 0.42, "grad_norm": 3.955167055130005, "learning_rate": 1.9702689152600484e-05, "loss": 1.7035, "step": 32511 }, { "epoch": 0.42, "grad_norm": 4.295571327209473, "learning_rate": 1.9702663719741645e-05, "loss": 1.9986, "step": 32512 }, { "epoch": 0.42, "grad_norm": 3.717116117477417, "learning_rate": 1.9702638285811465e-05, "loss": 1.7826, "step": 32513 }, { "epoch": 0.42, "grad_norm": 4.9331512451171875, "learning_rate": 1.970261285080995e-05, "loss": 2.34, "step": 32514 }, { "epoch": 0.42, "grad_norm": 4.154804229736328, "learning_rate": 1.9702587414737107e-05, "loss": 2.4982, "step": 32515 }, { "epoch": 0.42, "grad_norm": 3.63669490814209, "learning_rate": 1.970256197759293e-05, "loss": 1.5751, "step": 32516 }, { "epoch": 0.42, "grad_norm": 3.923689126968384, "learning_rate": 1.9702536539377423e-05, "loss": 2.1111, "step": 32517 }, { "epoch": 0.42, "grad_norm": 3.3041157722473145, "learning_rate": 1.9702511100090597e-05, "loss": 1.7463, "step": 32518 }, { "epoch": 0.42, "grad_norm": 3.3052172660827637, "learning_rate": 1.9702485659732443e-05, "loss": 1.7524, "step": 32519 }, { "epoch": 0.42, "grad_norm": 4.012461185455322, "learning_rate": 1.9702460218302973e-05, "loss": 2.0679, "step": 32520 }, { "epoch": 0.42, "grad_norm": 3.8972833156585693, "learning_rate": 1.970243477580219e-05, "loss": 1.813, "step": 32521 }, { "epoch": 0.42, "grad_norm": 4.415325164794922, "learning_rate": 1.9702409332230087e-05, "loss": 1.9181, "step": 32522 }, { "epoch": 0.42, "grad_norm": 3.5421602725982666, "learning_rate": 1.970238388758668e-05, "loss": 1.7587, "step": 32523 }, { "epoch": 0.42, "grad_norm": 3.4430971145629883, "learning_rate": 1.970235844187196e-05, "loss": 1.6168, "step": 32524 }, { "epoch": 0.42, "grad_norm": 3.6368136405944824, "learning_rate": 1.9702332995085935e-05, "loss": 1.9631, "step": 32525 }, { "epoch": 0.42, "grad_norm": 3.725658416748047, "learning_rate": 1.9702307547228612e-05, "loss": 1.9623, "step": 32526 }, { "epoch": 0.42, "grad_norm": 3.668494701385498, "learning_rate": 1.9702282098299986e-05, "loss": 1.8745, "step": 32527 }, { "epoch": 0.42, "grad_norm": 3.464137554168701, "learning_rate": 1.9702256648300067e-05, "loss": 1.8306, "step": 32528 }, { "epoch": 0.42, "grad_norm": 4.252859115600586, "learning_rate": 1.970223119722885e-05, "loss": 2.2241, "step": 32529 }, { "epoch": 0.42, "grad_norm": 4.001537322998047, "learning_rate": 1.9702205745086345e-05, "loss": 1.8583, "step": 32530 }, { "epoch": 0.42, "grad_norm": 4.993827819824219, "learning_rate": 1.970218029187255e-05, "loss": 2.1271, "step": 32531 }, { "epoch": 0.42, "grad_norm": 3.8744089603424072, "learning_rate": 1.9702154837587473e-05, "loss": 2.0344, "step": 32532 }, { "epoch": 0.42, "grad_norm": 4.4006757736206055, "learning_rate": 1.970212938223111e-05, "loss": 2.2802, "step": 32533 }, { "epoch": 0.42, "grad_norm": 3.606677532196045, "learning_rate": 1.9702103925803468e-05, "loss": 2.1083, "step": 32534 }, { "epoch": 0.42, "grad_norm": 4.052100658416748, "learning_rate": 1.970207846830455e-05, "loss": 2.2319, "step": 32535 }, { "epoch": 0.42, "grad_norm": 3.9681649208068848, "learning_rate": 1.9702053009734357e-05, "loss": 1.6253, "step": 32536 }, { "epoch": 0.42, "grad_norm": 3.473978281021118, "learning_rate": 1.9702027550092896e-05, "loss": 1.5545, "step": 32537 }, { "epoch": 0.42, "grad_norm": 4.176884174346924, "learning_rate": 1.9702002089380164e-05, "loss": 2.2672, "step": 32538 }, { "epoch": 0.42, "grad_norm": 3.99265456199646, "learning_rate": 1.9701976627596167e-05, "loss": 2.001, "step": 32539 }, { "epoch": 0.42, "grad_norm": 3.90883469581604, "learning_rate": 1.9701951164740903e-05, "loss": 1.8902, "step": 32540 }, { "epoch": 0.42, "grad_norm": 3.7143468856811523, "learning_rate": 1.9701925700814386e-05, "loss": 2.0629, "step": 32541 }, { "epoch": 0.42, "grad_norm": 4.238748550415039, "learning_rate": 1.970190023581661e-05, "loss": 2.5193, "step": 32542 }, { "epoch": 0.42, "grad_norm": 3.70278000831604, "learning_rate": 1.9701874769747575e-05, "loss": 2.0154, "step": 32543 }, { "epoch": 0.42, "grad_norm": 4.335141658782959, "learning_rate": 1.9701849302607293e-05, "loss": 2.1107, "step": 32544 }, { "epoch": 0.42, "grad_norm": 4.378794193267822, "learning_rate": 1.9701823834395762e-05, "loss": 1.9525, "step": 32545 }, { "epoch": 0.42, "grad_norm": 3.833923578262329, "learning_rate": 1.9701798365112982e-05, "loss": 2.0149, "step": 32546 }, { "epoch": 0.42, "grad_norm": 3.2992842197418213, "learning_rate": 1.970177289475896e-05, "loss": 1.717, "step": 32547 }, { "epoch": 0.42, "grad_norm": 3.1411874294281006, "learning_rate": 1.9701747423333697e-05, "loss": 1.5879, "step": 32548 }, { "epoch": 0.42, "grad_norm": 3.64166259765625, "learning_rate": 1.97017219508372e-05, "loss": 2.1805, "step": 32549 }, { "epoch": 0.42, "grad_norm": 3.8001813888549805, "learning_rate": 1.9701696477269466e-05, "loss": 2.263, "step": 32550 }, { "epoch": 0.42, "grad_norm": 4.177878379821777, "learning_rate": 1.97016710026305e-05, "loss": 2.2049, "step": 32551 }, { "epoch": 0.42, "grad_norm": 4.047025680541992, "learning_rate": 1.9701645526920305e-05, "loss": 1.8036, "step": 32552 }, { "epoch": 0.42, "grad_norm": 3.869302988052368, "learning_rate": 1.9701620050138885e-05, "loss": 2.0177, "step": 32553 }, { "epoch": 0.42, "grad_norm": 3.803133010864258, "learning_rate": 1.970159457228624e-05, "loss": 1.8027, "step": 32554 }, { "epoch": 0.42, "grad_norm": 4.048230171203613, "learning_rate": 1.9701569093362373e-05, "loss": 1.9415, "step": 32555 }, { "epoch": 0.42, "grad_norm": 4.026520729064941, "learning_rate": 1.970154361336729e-05, "loss": 1.7286, "step": 32556 }, { "epoch": 0.42, "grad_norm": 3.8904669284820557, "learning_rate": 1.9701518132300993e-05, "loss": 2.4215, "step": 32557 }, { "epoch": 0.42, "grad_norm": 3.6214444637298584, "learning_rate": 1.9701492650163482e-05, "loss": 2.0642, "step": 32558 }, { "epoch": 0.42, "grad_norm": 3.9812135696411133, "learning_rate": 1.9701467166954762e-05, "loss": 2.2031, "step": 32559 }, { "epoch": 0.42, "grad_norm": 3.6027908325195312, "learning_rate": 1.9701441682674836e-05, "loss": 1.8535, "step": 32560 }, { "epoch": 0.42, "grad_norm": 3.6173789501190186, "learning_rate": 1.9701416197323706e-05, "loss": 1.7502, "step": 32561 }, { "epoch": 0.42, "grad_norm": 4.175073623657227, "learning_rate": 1.9701390710901376e-05, "loss": 2.1532, "step": 32562 }, { "epoch": 0.42, "grad_norm": 4.019072532653809, "learning_rate": 1.9701365223407845e-05, "loss": 2.0713, "step": 32563 }, { "epoch": 0.42, "grad_norm": 3.9688024520874023, "learning_rate": 1.9701339734843122e-05, "loss": 2.2269, "step": 32564 }, { "epoch": 0.42, "grad_norm": 3.4585533142089844, "learning_rate": 1.9701314245207208e-05, "loss": 2.0362, "step": 32565 }, { "epoch": 0.42, "grad_norm": 4.013200759887695, "learning_rate": 1.9701288754500098e-05, "loss": 2.2166, "step": 32566 }, { "epoch": 0.42, "grad_norm": 3.8358700275421143, "learning_rate": 1.9701263262721806e-05, "loss": 1.875, "step": 32567 }, { "epoch": 0.42, "grad_norm": 3.508817672729492, "learning_rate": 1.970123776987233e-05, "loss": 1.8686, "step": 32568 }, { "epoch": 0.42, "grad_norm": 3.5984933376312256, "learning_rate": 1.9701212275951672e-05, "loss": 1.8944, "step": 32569 }, { "epoch": 0.42, "grad_norm": 3.3432395458221436, "learning_rate": 1.9701186780959833e-05, "loss": 1.8945, "step": 32570 }, { "epoch": 0.42, "grad_norm": 3.859562397003174, "learning_rate": 1.970116128489682e-05, "loss": 2.3275, "step": 32571 }, { "epoch": 0.42, "grad_norm": 3.8409817218780518, "learning_rate": 1.9701135787762636e-05, "loss": 2.1329, "step": 32572 }, { "epoch": 0.42, "grad_norm": 3.7167422771453857, "learning_rate": 1.970111028955728e-05, "loss": 2.2428, "step": 32573 }, { "epoch": 0.42, "grad_norm": 3.9844353199005127, "learning_rate": 1.9701084790280757e-05, "loss": 2.0172, "step": 32574 }, { "epoch": 0.42, "grad_norm": 4.074138641357422, "learning_rate": 1.970105928993307e-05, "loss": 2.2149, "step": 32575 }, { "epoch": 0.42, "grad_norm": 4.086474418640137, "learning_rate": 1.9701033788514223e-05, "loss": 2.3153, "step": 32576 }, { "epoch": 0.42, "grad_norm": 3.686506509780884, "learning_rate": 1.9701008286024214e-05, "loss": 1.8328, "step": 32577 }, { "epoch": 0.42, "grad_norm": 3.655590534210205, "learning_rate": 1.970098278246305e-05, "loss": 1.7949, "step": 32578 }, { "epoch": 0.42, "grad_norm": 4.071527004241943, "learning_rate": 1.9700957277830736e-05, "loss": 2.1471, "step": 32579 }, { "epoch": 0.42, "grad_norm": 4.500249862670898, "learning_rate": 1.970093177212727e-05, "loss": 2.0767, "step": 32580 }, { "epoch": 0.42, "grad_norm": 3.1661667823791504, "learning_rate": 1.9700906265352654e-05, "loss": 1.5115, "step": 32581 }, { "epoch": 0.42, "grad_norm": 3.509964942932129, "learning_rate": 1.9700880757506896e-05, "loss": 1.9185, "step": 32582 }, { "epoch": 0.42, "grad_norm": 3.729332447052002, "learning_rate": 1.9700855248589997e-05, "loss": 1.9614, "step": 32583 }, { "epoch": 0.42, "grad_norm": 4.122856140136719, "learning_rate": 1.9700829738601958e-05, "loss": 2.0335, "step": 32584 }, { "epoch": 0.42, "grad_norm": 3.867311716079712, "learning_rate": 1.970080422754278e-05, "loss": 1.9996, "step": 32585 }, { "epoch": 0.42, "grad_norm": 4.159591197967529, "learning_rate": 1.9700778715412473e-05, "loss": 1.8445, "step": 32586 }, { "epoch": 0.42, "grad_norm": 3.3074166774749756, "learning_rate": 1.9700753202211035e-05, "loss": 1.5922, "step": 32587 }, { "epoch": 0.42, "grad_norm": 4.152033805847168, "learning_rate": 1.9700727687938465e-05, "loss": 2.1688, "step": 32588 }, { "epoch": 0.42, "grad_norm": 4.160877704620361, "learning_rate": 1.9700702172594775e-05, "loss": 2.0142, "step": 32589 }, { "epoch": 0.42, "grad_norm": 4.800177574157715, "learning_rate": 1.970067665617996e-05, "loss": 2.2395, "step": 32590 }, { "epoch": 0.42, "grad_norm": 3.7214603424072266, "learning_rate": 1.970065113869403e-05, "loss": 1.7152, "step": 32591 }, { "epoch": 0.42, "grad_norm": 3.4878110885620117, "learning_rate": 1.970062562013698e-05, "loss": 1.7783, "step": 32592 }, { "epoch": 0.42, "grad_norm": 4.228687286376953, "learning_rate": 1.9700600100508813e-05, "loss": 2.554, "step": 32593 }, { "epoch": 0.42, "grad_norm": 3.845048427581787, "learning_rate": 1.970057457980954e-05, "loss": 2.302, "step": 32594 }, { "epoch": 0.42, "grad_norm": 3.977843761444092, "learning_rate": 1.9700549058039158e-05, "loss": 2.0645, "step": 32595 }, { "epoch": 0.42, "grad_norm": 3.796525716781616, "learning_rate": 1.970052353519767e-05, "loss": 2.1696, "step": 32596 }, { "epoch": 0.42, "grad_norm": 3.7372701168060303, "learning_rate": 1.970049801128508e-05, "loss": 1.7629, "step": 32597 }, { "epoch": 0.42, "grad_norm": 4.36132287979126, "learning_rate": 1.970047248630139e-05, "loss": 2.0343, "step": 32598 }, { "epoch": 0.42, "grad_norm": 4.177600860595703, "learning_rate": 1.9700446960246603e-05, "loss": 2.208, "step": 32599 }, { "epoch": 0.42, "grad_norm": 5.0088653564453125, "learning_rate": 1.9700421433120726e-05, "loss": 2.3353, "step": 32600 }, { "epoch": 0.42, "grad_norm": 3.8277406692504883, "learning_rate": 1.9700395904923754e-05, "loss": 2.2101, "step": 32601 }, { "epoch": 0.42, "grad_norm": 4.48044490814209, "learning_rate": 1.9700370375655697e-05, "loss": 2.4565, "step": 32602 }, { "epoch": 0.42, "grad_norm": 3.822908401489258, "learning_rate": 1.970034484531655e-05, "loss": 2.1996, "step": 32603 }, { "epoch": 0.42, "grad_norm": 3.7331347465515137, "learning_rate": 1.970031931390632e-05, "loss": 2.3273, "step": 32604 }, { "epoch": 0.42, "grad_norm": 3.677551031112671, "learning_rate": 1.9700293781425013e-05, "loss": 1.9861, "step": 32605 }, { "epoch": 0.42, "grad_norm": 3.3149752616882324, "learning_rate": 1.970026824787263e-05, "loss": 1.5909, "step": 32606 }, { "epoch": 0.42, "grad_norm": 3.8209550380706787, "learning_rate": 1.9700242713249172e-05, "loss": 1.8772, "step": 32607 }, { "epoch": 0.42, "grad_norm": 3.7372913360595703, "learning_rate": 1.970021717755464e-05, "loss": 1.8987, "step": 32608 }, { "epoch": 0.42, "grad_norm": 3.6857218742370605, "learning_rate": 1.9700191640789044e-05, "loss": 1.7588, "step": 32609 }, { "epoch": 0.42, "grad_norm": 3.5591988563537598, "learning_rate": 1.9700166102952378e-05, "loss": 1.7409, "step": 32610 }, { "epoch": 0.42, "grad_norm": 3.6799612045288086, "learning_rate": 1.970014056404465e-05, "loss": 2.0381, "step": 32611 }, { "epoch": 0.42, "grad_norm": 4.484790325164795, "learning_rate": 1.9700115024065865e-05, "loss": 2.5482, "step": 32612 }, { "epoch": 0.42, "grad_norm": 3.7766969203948975, "learning_rate": 1.970008948301602e-05, "loss": 2.0298, "step": 32613 }, { "epoch": 0.42, "grad_norm": 3.028388261795044, "learning_rate": 1.970006394089512e-05, "loss": 1.6413, "step": 32614 }, { "epoch": 0.42, "grad_norm": 3.974266529083252, "learning_rate": 1.9700038397703173e-05, "loss": 2.5081, "step": 32615 }, { "epoch": 0.42, "grad_norm": 4.118634223937988, "learning_rate": 1.9700012853440173e-05, "loss": 1.9072, "step": 32616 }, { "epoch": 0.42, "grad_norm": 4.606832981109619, "learning_rate": 1.9699987308106126e-05, "loss": 2.3607, "step": 32617 }, { "epoch": 0.42, "grad_norm": 3.7715566158294678, "learning_rate": 1.969996176170104e-05, "loss": 1.8901, "step": 32618 }, { "epoch": 0.42, "grad_norm": 3.2381598949432373, "learning_rate": 1.969993621422491e-05, "loss": 1.6324, "step": 32619 }, { "epoch": 0.42, "grad_norm": 4.1469268798828125, "learning_rate": 1.9699910665677743e-05, "loss": 2.0499, "step": 32620 }, { "epoch": 0.42, "grad_norm": 3.766139030456543, "learning_rate": 1.9699885116059543e-05, "loss": 1.8351, "step": 32621 }, { "epoch": 0.42, "grad_norm": 3.8749942779541016, "learning_rate": 1.969985956537031e-05, "loss": 2.0487, "step": 32622 }, { "epoch": 0.42, "grad_norm": 3.1563117504119873, "learning_rate": 1.9699834013610048e-05, "loss": 1.6861, "step": 32623 }, { "epoch": 0.42, "grad_norm": 3.6745710372924805, "learning_rate": 1.969980846077876e-05, "loss": 2.0725, "step": 32624 }, { "epoch": 0.42, "grad_norm": 3.546475648880005, "learning_rate": 1.9699782906876446e-05, "loss": 1.8253, "step": 32625 }, { "epoch": 0.42, "grad_norm": 4.548705577850342, "learning_rate": 1.9699757351903117e-05, "loss": 2.3059, "step": 32626 }, { "epoch": 0.42, "grad_norm": 4.075636863708496, "learning_rate": 1.9699731795858766e-05, "loss": 1.8006, "step": 32627 }, { "epoch": 0.42, "grad_norm": 4.1453776359558105, "learning_rate": 1.9699706238743402e-05, "loss": 1.9992, "step": 32628 }, { "epoch": 0.42, "grad_norm": 3.7212681770324707, "learning_rate": 1.9699680680557023e-05, "loss": 2.0516, "step": 32629 }, { "epoch": 0.42, "grad_norm": 4.1260986328125, "learning_rate": 1.9699655121299635e-05, "loss": 2.3377, "step": 32630 }, { "epoch": 0.42, "grad_norm": 3.4648609161376953, "learning_rate": 1.9699629560971245e-05, "loss": 1.8175, "step": 32631 }, { "epoch": 0.42, "grad_norm": 3.5253686904907227, "learning_rate": 1.9699603999571846e-05, "loss": 1.9014, "step": 32632 }, { "epoch": 0.42, "grad_norm": 3.7540442943573, "learning_rate": 1.969957843710145e-05, "loss": 1.7724, "step": 32633 }, { "epoch": 0.42, "grad_norm": 3.9219110012054443, "learning_rate": 1.9699552873560056e-05, "loss": 1.9119, "step": 32634 }, { "epoch": 0.42, "grad_norm": 4.038280963897705, "learning_rate": 1.9699527308947665e-05, "loss": 2.574, "step": 32635 }, { "epoch": 0.42, "grad_norm": 4.254209518432617, "learning_rate": 1.9699501743264285e-05, "loss": 1.9877, "step": 32636 }, { "epoch": 0.42, "grad_norm": 3.5265607833862305, "learning_rate": 1.969947617650991e-05, "loss": 1.8164, "step": 32637 }, { "epoch": 0.42, "grad_norm": 4.000479221343994, "learning_rate": 1.969945060868455e-05, "loss": 2.0162, "step": 32638 }, { "epoch": 0.42, "grad_norm": 3.7232401371002197, "learning_rate": 1.9699425039788205e-05, "loss": 2.2418, "step": 32639 }, { "epoch": 0.42, "grad_norm": 4.077902793884277, "learning_rate": 1.9699399469820885e-05, "loss": 1.9489, "step": 32640 }, { "epoch": 0.42, "grad_norm": 3.3562817573547363, "learning_rate": 1.969937389878258e-05, "loss": 1.5587, "step": 32641 }, { "epoch": 0.42, "grad_norm": 4.942641735076904, "learning_rate": 1.9699348326673304e-05, "loss": 2.7229, "step": 32642 }, { "epoch": 0.42, "grad_norm": 4.284877777099609, "learning_rate": 1.9699322753493052e-05, "loss": 1.8562, "step": 32643 }, { "epoch": 0.42, "grad_norm": 4.054302215576172, "learning_rate": 1.9699297179241833e-05, "loss": 2.0943, "step": 32644 }, { "epoch": 0.42, "grad_norm": 3.975141763687134, "learning_rate": 1.9699271603919645e-05, "loss": 1.9818, "step": 32645 }, { "epoch": 0.42, "grad_norm": 4.210847854614258, "learning_rate": 1.9699246027526496e-05, "loss": 2.2543, "step": 32646 }, { "epoch": 0.42, "grad_norm": 3.4753870964050293, "learning_rate": 1.9699220450062382e-05, "loss": 1.9356, "step": 32647 }, { "epoch": 0.42, "grad_norm": 4.458176612854004, "learning_rate": 1.969919487152731e-05, "loss": 2.3331, "step": 32648 }, { "epoch": 0.42, "grad_norm": 3.2167603969573975, "learning_rate": 1.9699169291921284e-05, "loss": 1.9564, "step": 32649 }, { "epoch": 0.42, "grad_norm": 3.632258176803589, "learning_rate": 1.9699143711244306e-05, "loss": 1.99, "step": 32650 }, { "epoch": 0.42, "grad_norm": 3.4682490825653076, "learning_rate": 1.9699118129496374e-05, "loss": 1.4848, "step": 32651 }, { "epoch": 0.42, "grad_norm": 3.9553916454315186, "learning_rate": 1.9699092546677497e-05, "loss": 2.2297, "step": 32652 }, { "epoch": 0.42, "grad_norm": 3.9792797565460205, "learning_rate": 1.9699066962787676e-05, "loss": 1.8843, "step": 32653 }, { "epoch": 0.42, "grad_norm": 3.496061325073242, "learning_rate": 1.9699041377826914e-05, "loss": 1.8855, "step": 32654 }, { "epoch": 0.42, "grad_norm": 3.771397113800049, "learning_rate": 1.969901579179521e-05, "loss": 1.9354, "step": 32655 }, { "epoch": 0.42, "grad_norm": 3.723393678665161, "learning_rate": 1.9698990204692578e-05, "loss": 2.0975, "step": 32656 }, { "epoch": 0.42, "grad_norm": 4.195209980010986, "learning_rate": 1.9698964616519006e-05, "loss": 1.9283, "step": 32657 }, { "epoch": 0.42, "grad_norm": 4.122395038604736, "learning_rate": 1.9698939027274507e-05, "loss": 2.1316, "step": 32658 }, { "epoch": 0.42, "grad_norm": 3.7871756553649902, "learning_rate": 1.9698913436959077e-05, "loss": 1.9586, "step": 32659 }, { "epoch": 0.42, "grad_norm": 3.6997642517089844, "learning_rate": 1.9698887845572723e-05, "loss": 1.8057, "step": 32660 }, { "epoch": 0.42, "grad_norm": 4.091947555541992, "learning_rate": 1.969886225311545e-05, "loss": 1.6795, "step": 32661 }, { "epoch": 0.42, "grad_norm": 4.208561420440674, "learning_rate": 1.9698836659587257e-05, "loss": 2.2971, "step": 32662 }, { "epoch": 0.42, "grad_norm": 3.6913108825683594, "learning_rate": 1.9698811064988147e-05, "loss": 1.9177, "step": 32663 }, { "epoch": 0.42, "grad_norm": 3.9897773265838623, "learning_rate": 1.9698785469318124e-05, "loss": 1.9415, "step": 32664 }, { "epoch": 0.42, "grad_norm": 3.4649198055267334, "learning_rate": 1.969875987257719e-05, "loss": 1.9063, "step": 32665 }, { "epoch": 0.42, "grad_norm": 3.9661672115325928, "learning_rate": 1.969873427476535e-05, "loss": 2.1609, "step": 32666 }, { "epoch": 0.42, "grad_norm": 4.423354625701904, "learning_rate": 1.9698708675882605e-05, "loss": 2.3271, "step": 32667 }, { "epoch": 0.42, "grad_norm": 3.681750535964966, "learning_rate": 1.9698683075928957e-05, "loss": 1.9713, "step": 32668 }, { "epoch": 0.42, "grad_norm": 3.5616745948791504, "learning_rate": 1.969865747490441e-05, "loss": 1.8726, "step": 32669 }, { "epoch": 0.42, "grad_norm": 3.704807758331299, "learning_rate": 1.969863187280897e-05, "loss": 1.3928, "step": 32670 }, { "epoch": 0.42, "grad_norm": 3.6434364318847656, "learning_rate": 1.969860626964263e-05, "loss": 1.762, "step": 32671 }, { "epoch": 0.42, "grad_norm": 3.701688051223755, "learning_rate": 1.9698580665405404e-05, "loss": 1.8487, "step": 32672 }, { "epoch": 0.42, "grad_norm": 4.629573822021484, "learning_rate": 1.9698555060097287e-05, "loss": 2.4467, "step": 32673 }, { "epoch": 0.42, "grad_norm": 4.034910678863525, "learning_rate": 1.9698529453718288e-05, "loss": 1.9926, "step": 32674 }, { "epoch": 0.42, "grad_norm": 3.7497775554656982, "learning_rate": 1.9698503846268408e-05, "loss": 2.076, "step": 32675 }, { "epoch": 0.42, "grad_norm": 3.8877179622650146, "learning_rate": 1.9698478237747645e-05, "loss": 2.0649, "step": 32676 }, { "epoch": 0.42, "grad_norm": 4.404630661010742, "learning_rate": 1.9698452628156005e-05, "loss": 2.3062, "step": 32677 }, { "epoch": 0.42, "grad_norm": 3.955796480178833, "learning_rate": 1.9698427017493492e-05, "loss": 1.9614, "step": 32678 }, { "epoch": 0.42, "grad_norm": 2.9757561683654785, "learning_rate": 1.9698401405760112e-05, "loss": 1.4324, "step": 32679 }, { "epoch": 0.42, "grad_norm": 3.6838693618774414, "learning_rate": 1.9698375792955863e-05, "loss": 1.8072, "step": 32680 }, { "epoch": 0.42, "grad_norm": 4.048457622528076, "learning_rate": 1.9698350179080747e-05, "loss": 2.5112, "step": 32681 }, { "epoch": 0.42, "grad_norm": 4.204790115356445, "learning_rate": 1.9698324564134768e-05, "loss": 2.3766, "step": 32682 }, { "epoch": 0.42, "grad_norm": 4.228847503662109, "learning_rate": 1.969829894811793e-05, "loss": 2.362, "step": 32683 }, { "epoch": 0.42, "grad_norm": 3.410963296890259, "learning_rate": 1.9698273331030234e-05, "loss": 1.8006, "step": 32684 }, { "epoch": 0.42, "grad_norm": 3.502074718475342, "learning_rate": 1.9698247712871686e-05, "loss": 1.85, "step": 32685 }, { "epoch": 0.42, "grad_norm": 3.978618860244751, "learning_rate": 1.9698222093642285e-05, "loss": 2.1585, "step": 32686 }, { "epoch": 0.42, "grad_norm": 3.9736452102661133, "learning_rate": 1.9698196473342038e-05, "loss": 1.6531, "step": 32687 }, { "epoch": 0.42, "grad_norm": 3.562626838684082, "learning_rate": 1.9698170851970945e-05, "loss": 1.9642, "step": 32688 }, { "epoch": 0.42, "grad_norm": 3.757766008377075, "learning_rate": 1.969814522952901e-05, "loss": 1.991, "step": 32689 }, { "epoch": 0.42, "grad_norm": 4.06567907333374, "learning_rate": 1.9698119606016233e-05, "loss": 2.2774, "step": 32690 }, { "epoch": 0.42, "grad_norm": 3.775675058364868, "learning_rate": 1.969809398143262e-05, "loss": 1.9501, "step": 32691 }, { "epoch": 0.42, "grad_norm": 3.578256607055664, "learning_rate": 1.9698068355778174e-05, "loss": 1.9972, "step": 32692 }, { "epoch": 0.42, "grad_norm": 4.06247091293335, "learning_rate": 1.9698042729052894e-05, "loss": 2.5479, "step": 32693 }, { "epoch": 0.42, "grad_norm": 4.375565528869629, "learning_rate": 1.969801710125679e-05, "loss": 1.9872, "step": 32694 }, { "epoch": 0.42, "grad_norm": 3.745725631713867, "learning_rate": 1.9697991472389857e-05, "loss": 2.097, "step": 32695 }, { "epoch": 0.42, "grad_norm": 4.014245510101318, "learning_rate": 1.9697965842452103e-05, "loss": 1.9353, "step": 32696 }, { "epoch": 0.42, "grad_norm": 3.7603890895843506, "learning_rate": 1.9697940211443526e-05, "loss": 2.2884, "step": 32697 }, { "epoch": 0.42, "grad_norm": 4.261918067932129, "learning_rate": 1.9697914579364134e-05, "loss": 2.0835, "step": 32698 }, { "epoch": 0.42, "grad_norm": 3.520890712738037, "learning_rate": 1.969788894621393e-05, "loss": 1.7297, "step": 32699 }, { "epoch": 0.42, "grad_norm": 3.657391309738159, "learning_rate": 1.969786331199291e-05, "loss": 2.1955, "step": 32700 }, { "epoch": 0.42, "grad_norm": 4.793084144592285, "learning_rate": 1.9697837676701084e-05, "loss": 2.2567, "step": 32701 }, { "epoch": 0.42, "grad_norm": 4.264240264892578, "learning_rate": 1.9697812040338453e-05, "loss": 2.4045, "step": 32702 }, { "epoch": 0.42, "grad_norm": 3.3745646476745605, "learning_rate": 1.9697786402905016e-05, "loss": 2.0208, "step": 32703 }, { "epoch": 0.42, "grad_norm": 3.3368260860443115, "learning_rate": 1.969776076440078e-05, "loss": 1.6981, "step": 32704 }, { "epoch": 0.42, "grad_norm": 4.351517200469971, "learning_rate": 1.9697735124825746e-05, "loss": 2.0576, "step": 32705 }, { "epoch": 0.42, "grad_norm": 3.440095901489258, "learning_rate": 1.969770948417992e-05, "loss": 1.6527, "step": 32706 }, { "epoch": 0.42, "grad_norm": 3.5000193119049072, "learning_rate": 1.9697683842463302e-05, "loss": 1.6444, "step": 32707 }, { "epoch": 0.42, "grad_norm": 3.7983205318450928, "learning_rate": 1.9697658199675896e-05, "loss": 2.3627, "step": 32708 }, { "epoch": 0.42, "grad_norm": 3.8032119274139404, "learning_rate": 1.96976325558177e-05, "loss": 2.1642, "step": 32709 }, { "epoch": 0.42, "grad_norm": 4.017945766448975, "learning_rate": 1.9697606910888724e-05, "loss": 2.1272, "step": 32710 }, { "epoch": 0.42, "grad_norm": 3.7185802459716797, "learning_rate": 1.969758126488897e-05, "loss": 2.2503, "step": 32711 }, { "epoch": 0.42, "grad_norm": 3.6495373249053955, "learning_rate": 1.9697555617818435e-05, "loss": 1.8663, "step": 32712 }, { "epoch": 0.42, "grad_norm": 3.8708558082580566, "learning_rate": 1.9697529969677123e-05, "loss": 1.8494, "step": 32713 }, { "epoch": 0.42, "grad_norm": 3.4599199295043945, "learning_rate": 1.9697504320465043e-05, "loss": 1.6602, "step": 32714 }, { "epoch": 0.42, "grad_norm": 4.328749179840088, "learning_rate": 1.9697478670182194e-05, "loss": 2.4365, "step": 32715 }, { "epoch": 0.42, "grad_norm": 3.777127742767334, "learning_rate": 1.9697453018828577e-05, "loss": 2.3822, "step": 32716 }, { "epoch": 0.42, "grad_norm": 4.939909934997559, "learning_rate": 1.9697427366404196e-05, "loss": 2.3753, "step": 32717 }, { "epoch": 0.42, "grad_norm": 11.401382446289062, "learning_rate": 1.9697401712909056e-05, "loss": 2.5459, "step": 32718 }, { "epoch": 0.42, "grad_norm": 4.5761942863464355, "learning_rate": 1.969737605834316e-05, "loss": 2.1999, "step": 32719 }, { "epoch": 0.42, "grad_norm": 4.5412516593933105, "learning_rate": 1.9697350402706506e-05, "loss": 2.6722, "step": 32720 }, { "epoch": 0.42, "grad_norm": 3.272066831588745, "learning_rate": 1.9697324745999102e-05, "loss": 1.5206, "step": 32721 }, { "epoch": 0.42, "grad_norm": 4.202125072479248, "learning_rate": 1.9697299088220947e-05, "loss": 1.9723, "step": 32722 }, { "epoch": 0.42, "grad_norm": 4.022300720214844, "learning_rate": 1.9697273429372048e-05, "loss": 2.2374, "step": 32723 }, { "epoch": 0.42, "grad_norm": 4.047728061676025, "learning_rate": 1.9697247769452407e-05, "loss": 2.4278, "step": 32724 }, { "epoch": 0.42, "grad_norm": 5.123000621795654, "learning_rate": 1.969722210846202e-05, "loss": 2.3759, "step": 32725 }, { "epoch": 0.42, "grad_norm": 3.6651391983032227, "learning_rate": 1.96971964464009e-05, "loss": 1.993, "step": 32726 }, { "epoch": 0.42, "grad_norm": 3.9680159091949463, "learning_rate": 1.969717078326904e-05, "loss": 2.1656, "step": 32727 }, { "epoch": 0.42, "grad_norm": 3.333721160888672, "learning_rate": 1.969714511906645e-05, "loss": 1.6629, "step": 32728 }, { "epoch": 0.42, "grad_norm": 3.6564958095550537, "learning_rate": 1.9697119453793135e-05, "loss": 1.8728, "step": 32729 }, { "epoch": 0.42, "grad_norm": 3.4548251628875732, "learning_rate": 1.9697093787449088e-05, "loss": 1.8453, "step": 32730 }, { "epoch": 0.42, "grad_norm": 3.34962797164917, "learning_rate": 1.969706812003432e-05, "loss": 1.7419, "step": 32731 }, { "epoch": 0.42, "grad_norm": 4.032534122467041, "learning_rate": 1.969704245154883e-05, "loss": 2.1276, "step": 32732 }, { "epoch": 0.42, "grad_norm": 3.6787378787994385, "learning_rate": 1.9697016781992623e-05, "loss": 1.7016, "step": 32733 }, { "epoch": 0.42, "grad_norm": 3.572638511657715, "learning_rate": 1.96969911113657e-05, "loss": 1.6884, "step": 32734 }, { "epoch": 0.42, "grad_norm": 3.6324493885040283, "learning_rate": 1.9696965439668063e-05, "loss": 1.9098, "step": 32735 }, { "epoch": 0.42, "grad_norm": 3.901460886001587, "learning_rate": 1.969693976689972e-05, "loss": 2.3158, "step": 32736 }, { "epoch": 0.42, "grad_norm": 3.8154983520507812, "learning_rate": 1.969691409306067e-05, "loss": 2.0641, "step": 32737 }, { "epoch": 0.42, "grad_norm": 3.3431591987609863, "learning_rate": 1.969688841815091e-05, "loss": 1.6268, "step": 32738 }, { "epoch": 0.42, "grad_norm": 3.948984384536743, "learning_rate": 1.969686274217046e-05, "loss": 1.6203, "step": 32739 }, { "epoch": 0.42, "grad_norm": 3.592355966567993, "learning_rate": 1.9696837065119302e-05, "loss": 1.7575, "step": 32740 }, { "epoch": 0.42, "grad_norm": 4.854549884796143, "learning_rate": 1.9696811386997452e-05, "loss": 2.4582, "step": 32741 }, { "epoch": 0.42, "grad_norm": 4.556906700134277, "learning_rate": 1.969678570780491e-05, "loss": 2.3647, "step": 32742 }, { "epoch": 0.42, "grad_norm": 3.4598395824432373, "learning_rate": 1.969676002754168e-05, "loss": 1.5946, "step": 32743 }, { "epoch": 0.42, "grad_norm": 3.185941457748413, "learning_rate": 1.9696734346207763e-05, "loss": 1.4515, "step": 32744 }, { "epoch": 0.42, "grad_norm": 4.0192389488220215, "learning_rate": 1.969670866380316e-05, "loss": 2.4886, "step": 32745 }, { "epoch": 0.42, "grad_norm": 3.349790573120117, "learning_rate": 1.9696682980327874e-05, "loss": 1.7655, "step": 32746 }, { "epoch": 0.42, "grad_norm": 3.622453451156616, "learning_rate": 1.9696657295781913e-05, "loss": 1.886, "step": 32747 }, { "epoch": 0.42, "grad_norm": 3.1620290279388428, "learning_rate": 1.9696631610165274e-05, "loss": 1.6632, "step": 32748 }, { "epoch": 0.43, "grad_norm": 3.431487560272217, "learning_rate": 1.9696605923477966e-05, "loss": 1.7908, "step": 32749 }, { "epoch": 0.43, "grad_norm": 4.456902503967285, "learning_rate": 1.9696580235719985e-05, "loss": 2.2841, "step": 32750 }, { "epoch": 0.43, "grad_norm": 3.5719432830810547, "learning_rate": 1.969655454689134e-05, "loss": 1.584, "step": 32751 }, { "epoch": 0.43, "grad_norm": 3.9587440490722656, "learning_rate": 1.969652885699203e-05, "loss": 2.3655, "step": 32752 }, { "epoch": 0.43, "grad_norm": 3.667105197906494, "learning_rate": 1.9696503166022058e-05, "loss": 2.1393, "step": 32753 }, { "epoch": 0.43, "grad_norm": 4.195324897766113, "learning_rate": 1.9696477473981427e-05, "loss": 2.1389, "step": 32754 }, { "epoch": 0.43, "grad_norm": 4.211763381958008, "learning_rate": 1.969645178087014e-05, "loss": 2.4498, "step": 32755 }, { "epoch": 0.43, "grad_norm": 4.026069641113281, "learning_rate": 1.96964260866882e-05, "loss": 2.0296, "step": 32756 }, { "epoch": 0.43, "grad_norm": 4.1286420822143555, "learning_rate": 1.969640039143561e-05, "loss": 2.2542, "step": 32757 }, { "epoch": 0.43, "grad_norm": 3.3335347175598145, "learning_rate": 1.9696374695112376e-05, "loss": 1.568, "step": 32758 }, { "epoch": 0.43, "grad_norm": 4.033974647521973, "learning_rate": 1.9696348997718494e-05, "loss": 1.9758, "step": 32759 }, { "epoch": 0.43, "grad_norm": 3.449965238571167, "learning_rate": 1.969632329925397e-05, "loss": 1.5543, "step": 32760 }, { "epoch": 0.43, "grad_norm": 3.7142910957336426, "learning_rate": 1.9696297599718813e-05, "loss": 2.5253, "step": 32761 }, { "epoch": 0.43, "grad_norm": 4.271370887756348, "learning_rate": 1.9696271899113016e-05, "loss": 2.0636, "step": 32762 }, { "epoch": 0.43, "grad_norm": 3.8955795764923096, "learning_rate": 1.969624619743659e-05, "loss": 1.9259, "step": 32763 }, { "epoch": 0.43, "grad_norm": 3.8992512226104736, "learning_rate": 1.969622049468953e-05, "loss": 2.0277, "step": 32764 }, { "epoch": 0.43, "grad_norm": 3.5727360248565674, "learning_rate": 1.969619479087184e-05, "loss": 1.8473, "step": 32765 }, { "epoch": 0.43, "grad_norm": 3.6384167671203613, "learning_rate": 1.969616908598353e-05, "loss": 2.1208, "step": 32766 }, { "epoch": 0.43, "grad_norm": 3.503164052963257, "learning_rate": 1.9696143380024598e-05, "loss": 1.7583, "step": 32767 }, { "epoch": 0.43, "grad_norm": 3.6840803623199463, "learning_rate": 1.9696117672995045e-05, "loss": 1.8528, "step": 32768 }, { "epoch": 0.43, "grad_norm": 4.087560653686523, "learning_rate": 1.969609196489488e-05, "loss": 2.6067, "step": 32769 }, { "epoch": 0.43, "grad_norm": 3.833277702331543, "learning_rate": 1.96960662557241e-05, "loss": 1.7861, "step": 32770 }, { "epoch": 0.43, "grad_norm": 4.281924247741699, "learning_rate": 1.9696040545482708e-05, "loss": 2.3178, "step": 32771 }, { "epoch": 0.43, "grad_norm": 3.885115623474121, "learning_rate": 1.9696014834170712e-05, "loss": 2.5959, "step": 32772 }, { "epoch": 0.43, "grad_norm": 3.3562421798706055, "learning_rate": 1.969598912178811e-05, "loss": 1.76, "step": 32773 }, { "epoch": 0.43, "grad_norm": 3.1280479431152344, "learning_rate": 1.9695963408334907e-05, "loss": 1.3712, "step": 32774 }, { "epoch": 0.43, "grad_norm": 4.1296162605285645, "learning_rate": 1.96959376938111e-05, "loss": 2.2566, "step": 32775 }, { "epoch": 0.43, "grad_norm": 3.64975905418396, "learning_rate": 1.9695911978216706e-05, "loss": 1.6273, "step": 32776 }, { "epoch": 0.43, "grad_norm": 3.6858718395233154, "learning_rate": 1.9695886261551713e-05, "loss": 1.8413, "step": 32777 }, { "epoch": 0.43, "grad_norm": 3.3309993743896484, "learning_rate": 1.969586054381613e-05, "loss": 1.4209, "step": 32778 }, { "epoch": 0.43, "grad_norm": 3.943918466567993, "learning_rate": 1.969583482500996e-05, "loss": 2.2418, "step": 32779 }, { "epoch": 0.43, "grad_norm": 4.244044303894043, "learning_rate": 1.9695809105133204e-05, "loss": 2.0369, "step": 32780 }, { "epoch": 0.43, "grad_norm": 3.843461513519287, "learning_rate": 1.969578338418587e-05, "loss": 2.3698, "step": 32781 }, { "epoch": 0.43, "grad_norm": 4.027947425842285, "learning_rate": 1.9695757662167953e-05, "loss": 1.8788, "step": 32782 }, { "epoch": 0.43, "grad_norm": 3.9093101024627686, "learning_rate": 1.9695731939079462e-05, "loss": 1.9564, "step": 32783 }, { "epoch": 0.43, "grad_norm": 3.9716074466705322, "learning_rate": 1.96957062149204e-05, "loss": 2.2167, "step": 32784 }, { "epoch": 0.43, "grad_norm": 4.174349308013916, "learning_rate": 1.9695680489690764e-05, "loss": 2.3777, "step": 32785 }, { "epoch": 0.43, "grad_norm": 3.8629400730133057, "learning_rate": 1.969565476339056e-05, "loss": 2.111, "step": 32786 }, { "epoch": 0.43, "grad_norm": 3.808608293533325, "learning_rate": 1.9695629036019796e-05, "loss": 1.8501, "step": 32787 }, { "epoch": 0.43, "grad_norm": 3.583247184753418, "learning_rate": 1.9695603307578466e-05, "loss": 1.6879, "step": 32788 }, { "epoch": 0.43, "grad_norm": 3.9049508571624756, "learning_rate": 1.9695577578066575e-05, "loss": 1.9455, "step": 32789 }, { "epoch": 0.43, "grad_norm": 3.8692219257354736, "learning_rate": 1.9695551847484132e-05, "loss": 2.132, "step": 32790 }, { "epoch": 0.43, "grad_norm": 3.841658115386963, "learning_rate": 1.9695526115831135e-05, "loss": 2.0549, "step": 32791 }, { "epoch": 0.43, "grad_norm": 3.87390398979187, "learning_rate": 1.9695500383107586e-05, "loss": 1.9912, "step": 32792 }, { "epoch": 0.43, "grad_norm": 3.4292540550231934, "learning_rate": 1.969547464931349e-05, "loss": 1.6967, "step": 32793 }, { "epoch": 0.43, "grad_norm": 3.823303699493408, "learning_rate": 1.9695448914448848e-05, "loss": 2.002, "step": 32794 }, { "epoch": 0.43, "grad_norm": 3.619638442993164, "learning_rate": 1.9695423178513665e-05, "loss": 1.7036, "step": 32795 }, { "epoch": 0.43, "grad_norm": 3.7403652667999268, "learning_rate": 1.969539744150794e-05, "loss": 2.3644, "step": 32796 }, { "epoch": 0.43, "grad_norm": 3.6493937969207764, "learning_rate": 1.969537170343168e-05, "loss": 1.8464, "step": 32797 }, { "epoch": 0.43, "grad_norm": 3.453310012817383, "learning_rate": 1.969534596428489e-05, "loss": 1.9711, "step": 32798 }, { "epoch": 0.43, "grad_norm": 4.41790771484375, "learning_rate": 1.9695320224067566e-05, "loss": 2.1292, "step": 32799 }, { "epoch": 0.43, "grad_norm": 3.7304043769836426, "learning_rate": 1.9695294482779714e-05, "loss": 2.026, "step": 32800 }, { "epoch": 0.43, "grad_norm": 4.006753921508789, "learning_rate": 1.9695268740421342e-05, "loss": 2.0489, "step": 32801 }, { "epoch": 0.43, "grad_norm": 3.896991014480591, "learning_rate": 1.9695242996992442e-05, "loss": 2.2641, "step": 32802 }, { "epoch": 0.43, "grad_norm": 3.280327081680298, "learning_rate": 1.9695217252493024e-05, "loss": 1.5162, "step": 32803 }, { "epoch": 0.43, "grad_norm": 3.8711369037628174, "learning_rate": 1.969519150692309e-05, "loss": 1.9069, "step": 32804 }, { "epoch": 0.43, "grad_norm": 3.2445948123931885, "learning_rate": 1.9695165760282643e-05, "loss": 1.586, "step": 32805 }, { "epoch": 0.43, "grad_norm": 3.5781424045562744, "learning_rate": 1.9695140012571687e-05, "loss": 1.6386, "step": 32806 }, { "epoch": 0.43, "grad_norm": 3.395636796951294, "learning_rate": 1.969511426379022e-05, "loss": 1.7818, "step": 32807 }, { "epoch": 0.43, "grad_norm": 4.0868988037109375, "learning_rate": 1.9695088513938248e-05, "loss": 2.017, "step": 32808 }, { "epoch": 0.43, "grad_norm": 3.63136625289917, "learning_rate": 1.9695062763015773e-05, "loss": 2.1085, "step": 32809 }, { "epoch": 0.43, "grad_norm": 3.4628539085388184, "learning_rate": 1.96950370110228e-05, "loss": 1.7719, "step": 32810 }, { "epoch": 0.43, "grad_norm": 3.2682883739471436, "learning_rate": 1.969501125795933e-05, "loss": 1.7179, "step": 32811 }, { "epoch": 0.43, "grad_norm": 4.325057506561279, "learning_rate": 1.9694985503825367e-05, "loss": 2.4788, "step": 32812 }, { "epoch": 0.43, "grad_norm": 4.123684406280518, "learning_rate": 1.9694959748620914e-05, "loss": 2.3095, "step": 32813 }, { "epoch": 0.43, "grad_norm": 3.0956332683563232, "learning_rate": 1.969493399234597e-05, "loss": 1.5512, "step": 32814 }, { "epoch": 0.43, "grad_norm": 3.6275041103363037, "learning_rate": 1.969490823500054e-05, "loss": 2.2879, "step": 32815 }, { "epoch": 0.43, "grad_norm": 3.9493134021759033, "learning_rate": 1.9694882476584634e-05, "loss": 2.1749, "step": 32816 }, { "epoch": 0.43, "grad_norm": 3.6212832927703857, "learning_rate": 1.9694856717098245e-05, "loss": 1.6347, "step": 32817 }, { "epoch": 0.43, "grad_norm": 3.9291470050811768, "learning_rate": 1.9694830956541377e-05, "loss": 2.14, "step": 32818 }, { "epoch": 0.43, "grad_norm": 3.641038656234741, "learning_rate": 1.9694805194914038e-05, "loss": 1.9432, "step": 32819 }, { "epoch": 0.43, "grad_norm": 3.329345941543579, "learning_rate": 1.9694779432216226e-05, "loss": 1.6041, "step": 32820 }, { "epoch": 0.43, "grad_norm": 3.4141440391540527, "learning_rate": 1.9694753668447947e-05, "loss": 1.6955, "step": 32821 }, { "epoch": 0.43, "grad_norm": 3.9218175411224365, "learning_rate": 1.9694727903609205e-05, "loss": 1.6693, "step": 32822 }, { "epoch": 0.43, "grad_norm": 4.838987827301025, "learning_rate": 1.96947021377e-05, "loss": 2.503, "step": 32823 }, { "epoch": 0.43, "grad_norm": 3.0914506912231445, "learning_rate": 1.969467637072033e-05, "loss": 1.4564, "step": 32824 }, { "epoch": 0.43, "grad_norm": 3.3181092739105225, "learning_rate": 1.969465060267021e-05, "loss": 1.4402, "step": 32825 }, { "epoch": 0.43, "grad_norm": 4.108692646026611, "learning_rate": 1.9694624833549635e-05, "loss": 2.3677, "step": 32826 }, { "epoch": 0.43, "grad_norm": 3.512132167816162, "learning_rate": 1.9694599063358603e-05, "loss": 1.7653, "step": 32827 }, { "epoch": 0.43, "grad_norm": 3.846667528152466, "learning_rate": 1.969457329209713e-05, "loss": 1.6483, "step": 32828 }, { "epoch": 0.43, "grad_norm": 4.138974189758301, "learning_rate": 1.9694547519765205e-05, "loss": 2.0653, "step": 32829 }, { "epoch": 0.43, "grad_norm": 4.336065292358398, "learning_rate": 1.969452174636284e-05, "loss": 2.4333, "step": 32830 }, { "epoch": 0.43, "grad_norm": 4.027714729309082, "learning_rate": 1.969449597189004e-05, "loss": 2.1957, "step": 32831 }, { "epoch": 0.43, "grad_norm": 3.698378562927246, "learning_rate": 1.96944701963468e-05, "loss": 1.6693, "step": 32832 }, { "epoch": 0.43, "grad_norm": 3.4711785316467285, "learning_rate": 1.9694444419733126e-05, "loss": 1.6881, "step": 32833 }, { "epoch": 0.43, "grad_norm": 3.9674787521362305, "learning_rate": 1.9694418642049017e-05, "loss": 1.9448, "step": 32834 }, { "epoch": 0.43, "grad_norm": 3.973201274871826, "learning_rate": 1.9694392863294486e-05, "loss": 2.0362, "step": 32835 }, { "epoch": 0.43, "grad_norm": 4.345543384552002, "learning_rate": 1.969436708346953e-05, "loss": 2.021, "step": 32836 }, { "epoch": 0.43, "grad_norm": 3.6465117931365967, "learning_rate": 1.9694341302574146e-05, "loss": 1.8339, "step": 32837 }, { "epoch": 0.43, "grad_norm": 3.9625051021575928, "learning_rate": 1.9694315520608343e-05, "loss": 1.9129, "step": 32838 }, { "epoch": 0.43, "grad_norm": 3.89732027053833, "learning_rate": 1.9694289737572125e-05, "loss": 1.7687, "step": 32839 }, { "epoch": 0.43, "grad_norm": 3.4054417610168457, "learning_rate": 1.9694263953465494e-05, "loss": 1.5175, "step": 32840 }, { "epoch": 0.43, "grad_norm": 3.6447792053222656, "learning_rate": 1.9694238168288448e-05, "loss": 2.013, "step": 32841 }, { "epoch": 0.43, "grad_norm": 4.880517482757568, "learning_rate": 1.9694212382040995e-05, "loss": 2.0047, "step": 32842 }, { "epoch": 0.43, "grad_norm": 3.86220121383667, "learning_rate": 1.969418659472314e-05, "loss": 2.1218, "step": 32843 }, { "epoch": 0.43, "grad_norm": 4.139150142669678, "learning_rate": 1.969416080633488e-05, "loss": 2.1568, "step": 32844 }, { "epoch": 0.43, "grad_norm": 4.110758304595947, "learning_rate": 1.9694135016876217e-05, "loss": 1.8303, "step": 32845 }, { "epoch": 0.43, "grad_norm": 3.9509501457214355, "learning_rate": 1.9694109226347163e-05, "loss": 2.25, "step": 32846 }, { "epoch": 0.43, "grad_norm": 3.207906484603882, "learning_rate": 1.969408343474771e-05, "loss": 1.7822, "step": 32847 }, { "epoch": 0.43, "grad_norm": 3.8175952434539795, "learning_rate": 1.9694057642077866e-05, "loss": 2.1236, "step": 32848 }, { "epoch": 0.43, "grad_norm": 4.976937294006348, "learning_rate": 1.969403184833764e-05, "loss": 2.1649, "step": 32849 }, { "epoch": 0.43, "grad_norm": 4.275993824005127, "learning_rate": 1.9694006053527022e-05, "loss": 2.445, "step": 32850 }, { "epoch": 0.43, "grad_norm": 4.027100086212158, "learning_rate": 1.969398025764602e-05, "loss": 2.2771, "step": 32851 }, { "epoch": 0.43, "grad_norm": 3.575547695159912, "learning_rate": 1.9693954460694643e-05, "loss": 1.6716, "step": 32852 }, { "epoch": 0.43, "grad_norm": 3.692896842956543, "learning_rate": 1.9693928662672887e-05, "loss": 2.1493, "step": 32853 }, { "epoch": 0.43, "grad_norm": 4.386431694030762, "learning_rate": 1.9693902863580752e-05, "loss": 2.2117, "step": 32854 }, { "epoch": 0.43, "grad_norm": 4.235091209411621, "learning_rate": 1.9693877063418253e-05, "loss": 2.0732, "step": 32855 }, { "epoch": 0.43, "grad_norm": 4.0838398933410645, "learning_rate": 1.9693851262185385e-05, "loss": 2.5286, "step": 32856 }, { "epoch": 0.43, "grad_norm": 3.812624931335449, "learning_rate": 1.969382545988215e-05, "loss": 1.6814, "step": 32857 }, { "epoch": 0.43, "grad_norm": 3.799668312072754, "learning_rate": 1.969379965650855e-05, "loss": 1.7564, "step": 32858 }, { "epoch": 0.43, "grad_norm": 3.981576442718506, "learning_rate": 1.969377385206459e-05, "loss": 1.6905, "step": 32859 }, { "epoch": 0.43, "grad_norm": 3.7269530296325684, "learning_rate": 1.9693748046550273e-05, "loss": 1.8588, "step": 32860 }, { "epoch": 0.43, "grad_norm": 3.7607789039611816, "learning_rate": 1.9693722239965604e-05, "loss": 2.1257, "step": 32861 }, { "epoch": 0.43, "grad_norm": 4.023369312286377, "learning_rate": 1.9693696432310583e-05, "loss": 2.1911, "step": 32862 }, { "epoch": 0.43, "grad_norm": 3.7977285385131836, "learning_rate": 1.9693670623585214e-05, "loss": 1.8908, "step": 32863 }, { "epoch": 0.43, "grad_norm": 4.230596542358398, "learning_rate": 1.9693644813789497e-05, "loss": 2.0107, "step": 32864 }, { "epoch": 0.43, "grad_norm": 3.5075125694274902, "learning_rate": 1.9693619002923442e-05, "loss": 1.878, "step": 32865 }, { "epoch": 0.43, "grad_norm": 3.907285690307617, "learning_rate": 1.9693593190987042e-05, "loss": 1.9126, "step": 32866 }, { "epoch": 0.43, "grad_norm": 3.550445079803467, "learning_rate": 1.9693567377980308e-05, "loss": 1.855, "step": 32867 }, { "epoch": 0.43, "grad_norm": 4.311191558837891, "learning_rate": 1.969354156390324e-05, "loss": 2.3093, "step": 32868 }, { "epoch": 0.43, "grad_norm": 3.6218385696411133, "learning_rate": 1.9693515748755836e-05, "loss": 1.8016, "step": 32869 }, { "epoch": 0.43, "grad_norm": 4.173950672149658, "learning_rate": 1.9693489932538105e-05, "loss": 2.1315, "step": 32870 }, { "epoch": 0.43, "grad_norm": 3.6046502590179443, "learning_rate": 1.9693464115250053e-05, "loss": 1.7494, "step": 32871 }, { "epoch": 0.43, "grad_norm": 3.9663238525390625, "learning_rate": 1.9693438296891673e-05, "loss": 2.1326, "step": 32872 }, { "epoch": 0.43, "grad_norm": 3.713336944580078, "learning_rate": 1.9693412477462975e-05, "loss": 1.7169, "step": 32873 }, { "epoch": 0.43, "grad_norm": 4.053690433502197, "learning_rate": 1.969338665696396e-05, "loss": 1.6001, "step": 32874 }, { "epoch": 0.43, "grad_norm": 4.185001373291016, "learning_rate": 1.9693360835394627e-05, "loss": 2.3433, "step": 32875 }, { "epoch": 0.43, "grad_norm": 3.593191146850586, "learning_rate": 1.9693335012754987e-05, "loss": 1.9871, "step": 32876 }, { "epoch": 0.43, "grad_norm": 4.111794948577881, "learning_rate": 1.9693309189045036e-05, "loss": 2.2014, "step": 32877 }, { "epoch": 0.43, "grad_norm": 3.789522171020508, "learning_rate": 1.969328336426478e-05, "loss": 1.8184, "step": 32878 }, { "epoch": 0.43, "grad_norm": 3.7664451599121094, "learning_rate": 1.9693257538414222e-05, "loss": 1.907, "step": 32879 }, { "epoch": 0.43, "grad_norm": 3.947430372238159, "learning_rate": 1.9693231711493362e-05, "loss": 1.8786, "step": 32880 }, { "epoch": 0.43, "grad_norm": 4.317156791687012, "learning_rate": 1.969320588350221e-05, "loss": 2.0533, "step": 32881 }, { "epoch": 0.43, "grad_norm": 4.144556999206543, "learning_rate": 1.9693180054440757e-05, "loss": 1.9431, "step": 32882 }, { "epoch": 0.43, "grad_norm": 3.224552869796753, "learning_rate": 1.9693154224309016e-05, "loss": 1.4156, "step": 32883 }, { "epoch": 0.43, "grad_norm": 3.7140700817108154, "learning_rate": 1.9693128393106984e-05, "loss": 2.1679, "step": 32884 }, { "epoch": 0.43, "grad_norm": 4.001245498657227, "learning_rate": 1.969310256083467e-05, "loss": 2.4284, "step": 32885 }, { "epoch": 0.43, "grad_norm": 3.573760509490967, "learning_rate": 1.969307672749207e-05, "loss": 1.788, "step": 32886 }, { "epoch": 0.43, "grad_norm": 3.9109439849853516, "learning_rate": 1.969305089307919e-05, "loss": 2.0188, "step": 32887 }, { "epoch": 0.43, "grad_norm": 3.8983049392700195, "learning_rate": 1.9693025057596032e-05, "loss": 1.8295, "step": 32888 }, { "epoch": 0.43, "grad_norm": 3.6220719814300537, "learning_rate": 1.9692999221042602e-05, "loss": 1.9013, "step": 32889 }, { "epoch": 0.43, "grad_norm": 3.8254899978637695, "learning_rate": 1.96929733834189e-05, "loss": 2.0493, "step": 32890 }, { "epoch": 0.43, "grad_norm": 3.6409435272216797, "learning_rate": 1.9692947544724928e-05, "loss": 2.0951, "step": 32891 }, { "epoch": 0.43, "grad_norm": 4.134315490722656, "learning_rate": 1.9692921704960687e-05, "loss": 2.0497, "step": 32892 }, { "epoch": 0.43, "grad_norm": 3.797485113143921, "learning_rate": 1.969289586412619e-05, "loss": 1.9581, "step": 32893 }, { "epoch": 0.43, "grad_norm": 4.114743709564209, "learning_rate": 1.969287002222143e-05, "loss": 2.0509, "step": 32894 }, { "epoch": 0.43, "grad_norm": 3.4496448040008545, "learning_rate": 1.969284417924641e-05, "loss": 1.7214, "step": 32895 }, { "epoch": 0.43, "grad_norm": 3.8412156105041504, "learning_rate": 1.9692818335201137e-05, "loss": 1.9415, "step": 32896 }, { "epoch": 0.43, "grad_norm": 3.7736384868621826, "learning_rate": 1.9692792490085613e-05, "loss": 1.8448, "step": 32897 }, { "epoch": 0.43, "grad_norm": 3.6080267429351807, "learning_rate": 1.9692766643899843e-05, "loss": 1.7016, "step": 32898 }, { "epoch": 0.43, "grad_norm": 4.106566429138184, "learning_rate": 1.9692740796643823e-05, "loss": 1.8146, "step": 32899 }, { "epoch": 0.43, "grad_norm": 3.9862003326416016, "learning_rate": 1.9692714948317563e-05, "loss": 1.7145, "step": 32900 }, { "epoch": 0.43, "grad_norm": 3.840977907180786, "learning_rate": 1.969268909892106e-05, "loss": 2.1776, "step": 32901 }, { "epoch": 0.43, "grad_norm": 4.279897689819336, "learning_rate": 1.9692663248454324e-05, "loss": 2.2402, "step": 32902 }, { "epoch": 0.43, "grad_norm": 3.9217028617858887, "learning_rate": 1.969263739691735e-05, "loss": 2.1733, "step": 32903 }, { "epoch": 0.43, "grad_norm": 3.780515670776367, "learning_rate": 1.9692611544310143e-05, "loss": 1.918, "step": 32904 }, { "epoch": 0.43, "grad_norm": 4.145126819610596, "learning_rate": 1.969258569063271e-05, "loss": 2.0586, "step": 32905 }, { "epoch": 0.43, "grad_norm": 3.5829973220825195, "learning_rate": 1.969255983588505e-05, "loss": 1.6772, "step": 32906 }, { "epoch": 0.43, "grad_norm": 3.647184133529663, "learning_rate": 1.9692533980067168e-05, "loss": 1.899, "step": 32907 }, { "epoch": 0.43, "grad_norm": 4.332564353942871, "learning_rate": 1.9692508123179065e-05, "loss": 2.5453, "step": 32908 }, { "epoch": 0.43, "grad_norm": 3.9645252227783203, "learning_rate": 1.9692482265220746e-05, "loss": 1.9658, "step": 32909 }, { "epoch": 0.43, "grad_norm": 3.592700719833374, "learning_rate": 1.9692456406192208e-05, "loss": 1.6242, "step": 32910 }, { "epoch": 0.43, "grad_norm": 3.841876745223999, "learning_rate": 1.969243054609346e-05, "loss": 2.0648, "step": 32911 }, { "epoch": 0.43, "grad_norm": 3.833791971206665, "learning_rate": 1.9692404684924508e-05, "loss": 2.1426, "step": 32912 }, { "epoch": 0.43, "grad_norm": 4.138725757598877, "learning_rate": 1.9692378822685346e-05, "loss": 1.8258, "step": 32913 }, { "epoch": 0.43, "grad_norm": 4.073907852172852, "learning_rate": 1.969235295937598e-05, "loss": 2.4121, "step": 32914 }, { "epoch": 0.43, "grad_norm": 4.466416835784912, "learning_rate": 1.9692327094996417e-05, "loss": 2.2386, "step": 32915 }, { "epoch": 0.43, "grad_norm": 3.7203617095947266, "learning_rate": 1.9692301229546658e-05, "loss": 1.9924, "step": 32916 }, { "epoch": 0.43, "grad_norm": 4.117077350616455, "learning_rate": 1.96922753630267e-05, "loss": 1.904, "step": 32917 }, { "epoch": 0.43, "grad_norm": 3.580441951751709, "learning_rate": 1.9692249495436552e-05, "loss": 1.884, "step": 32918 }, { "epoch": 0.43, "grad_norm": 3.1465091705322266, "learning_rate": 1.9692223626776216e-05, "loss": 1.5218, "step": 32919 }, { "epoch": 0.43, "grad_norm": 3.307631731033325, "learning_rate": 1.9692197757045694e-05, "loss": 1.4804, "step": 32920 }, { "epoch": 0.43, "grad_norm": 3.71199631690979, "learning_rate": 1.9692171886244987e-05, "loss": 2.0027, "step": 32921 }, { "epoch": 0.43, "grad_norm": 4.0934648513793945, "learning_rate": 1.96921460143741e-05, "loss": 2.1729, "step": 32922 }, { "epoch": 0.43, "grad_norm": 4.207154273986816, "learning_rate": 1.969212014143304e-05, "loss": 1.8027, "step": 32923 }, { "epoch": 0.43, "grad_norm": 3.5887131690979004, "learning_rate": 1.96920942674218e-05, "loss": 1.9204, "step": 32924 }, { "epoch": 0.43, "grad_norm": 4.522383689880371, "learning_rate": 1.9692068392340392e-05, "loss": 2.1583, "step": 32925 }, { "epoch": 0.43, "grad_norm": 3.745729446411133, "learning_rate": 1.9692042516188816e-05, "loss": 1.9195, "step": 32926 }, { "epoch": 0.43, "grad_norm": 3.9830474853515625, "learning_rate": 1.969201663896707e-05, "loss": 1.7287, "step": 32927 }, { "epoch": 0.43, "grad_norm": 4.099679946899414, "learning_rate": 1.9691990760675163e-05, "loss": 2.3062, "step": 32928 }, { "epoch": 0.43, "grad_norm": 3.6377079486846924, "learning_rate": 1.9691964881313098e-05, "loss": 1.8148, "step": 32929 }, { "epoch": 0.43, "grad_norm": 3.5992493629455566, "learning_rate": 1.9691939000880874e-05, "loss": 1.7904, "step": 32930 }, { "epoch": 0.43, "grad_norm": 3.7854163646698, "learning_rate": 1.9691913119378495e-05, "loss": 1.8761, "step": 32931 }, { "epoch": 0.43, "grad_norm": 4.440771102905273, "learning_rate": 1.9691887236805965e-05, "loss": 2.5297, "step": 32932 }, { "epoch": 0.43, "grad_norm": 3.0391597747802734, "learning_rate": 1.9691861353163283e-05, "loss": 1.7215, "step": 32933 }, { "epoch": 0.43, "grad_norm": 4.046788692474365, "learning_rate": 1.9691835468450456e-05, "loss": 2.0381, "step": 32934 }, { "epoch": 0.43, "grad_norm": 3.376375436782837, "learning_rate": 1.9691809582667488e-05, "loss": 1.7254, "step": 32935 }, { "epoch": 0.43, "grad_norm": 4.139483451843262, "learning_rate": 1.969178369581438e-05, "loss": 1.7219, "step": 32936 }, { "epoch": 0.43, "grad_norm": 3.817754030227661, "learning_rate": 1.9691757807891134e-05, "loss": 1.7644, "step": 32937 }, { "epoch": 0.43, "grad_norm": 3.9969353675842285, "learning_rate": 1.9691731918897755e-05, "loss": 1.9359, "step": 32938 }, { "epoch": 0.43, "grad_norm": 3.936980962753296, "learning_rate": 1.9691706028834245e-05, "loss": 1.8656, "step": 32939 }, { "epoch": 0.43, "grad_norm": 3.8401708602905273, "learning_rate": 1.9691680137700603e-05, "loss": 2.112, "step": 32940 }, { "epoch": 0.43, "grad_norm": 4.037663459777832, "learning_rate": 1.9691654245496834e-05, "loss": 2.0086, "step": 32941 }, { "epoch": 0.43, "grad_norm": 3.941710948944092, "learning_rate": 1.9691628352222943e-05, "loss": 1.7285, "step": 32942 }, { "epoch": 0.43, "grad_norm": 4.373408794403076, "learning_rate": 1.9691602457878935e-05, "loss": 1.777, "step": 32943 }, { "epoch": 0.43, "grad_norm": 4.01493501663208, "learning_rate": 1.969157656246481e-05, "loss": 2.3379, "step": 32944 }, { "epoch": 0.43, "grad_norm": 4.385517120361328, "learning_rate": 1.969155066598057e-05, "loss": 2.361, "step": 32945 }, { "epoch": 0.43, "grad_norm": 3.7428359985351562, "learning_rate": 1.9691524768426215e-05, "loss": 1.9448, "step": 32946 }, { "epoch": 0.43, "grad_norm": 4.119072914123535, "learning_rate": 1.969149886980175e-05, "loss": 1.9784, "step": 32947 }, { "epoch": 0.43, "grad_norm": 3.6237974166870117, "learning_rate": 1.9691472970107183e-05, "loss": 2.0659, "step": 32948 }, { "epoch": 0.43, "grad_norm": 3.5790998935699463, "learning_rate": 1.9691447069342513e-05, "loss": 1.8921, "step": 32949 }, { "epoch": 0.43, "grad_norm": 4.074962139129639, "learning_rate": 1.969142116750774e-05, "loss": 1.9983, "step": 32950 }, { "epoch": 0.43, "grad_norm": 3.9625749588012695, "learning_rate": 1.9691395264602873e-05, "loss": 2.2878, "step": 32951 }, { "epoch": 0.43, "grad_norm": 3.6148617267608643, "learning_rate": 1.9691369360627907e-05, "loss": 2.0715, "step": 32952 }, { "epoch": 0.43, "grad_norm": 4.485970497131348, "learning_rate": 1.9691343455582854e-05, "loss": 2.0943, "step": 32953 }, { "epoch": 0.43, "grad_norm": 4.164430618286133, "learning_rate": 1.969131754946771e-05, "loss": 1.9308, "step": 32954 }, { "epoch": 0.43, "grad_norm": 3.665595769882202, "learning_rate": 1.9691291642282483e-05, "loss": 1.8559, "step": 32955 }, { "epoch": 0.43, "grad_norm": 4.284121513366699, "learning_rate": 1.9691265734027168e-05, "loss": 2.3339, "step": 32956 }, { "epoch": 0.43, "grad_norm": 3.9005589485168457, "learning_rate": 1.9691239824701776e-05, "loss": 2.0202, "step": 32957 }, { "epoch": 0.43, "grad_norm": 3.6342334747314453, "learning_rate": 1.9691213914306307e-05, "loss": 2.0332, "step": 32958 }, { "epoch": 0.43, "grad_norm": 3.3438403606414795, "learning_rate": 1.9691188002840765e-05, "loss": 1.6747, "step": 32959 }, { "epoch": 0.43, "grad_norm": 3.4084644317626953, "learning_rate": 1.969116209030515e-05, "loss": 1.8366, "step": 32960 }, { "epoch": 0.43, "grad_norm": 3.5705246925354004, "learning_rate": 1.9691136176699462e-05, "loss": 1.7086, "step": 32961 }, { "epoch": 0.43, "grad_norm": 3.7067527770996094, "learning_rate": 1.9691110262023712e-05, "loss": 1.4767, "step": 32962 }, { "epoch": 0.43, "grad_norm": 3.7228198051452637, "learning_rate": 1.96910843462779e-05, "loss": 1.7143, "step": 32963 }, { "epoch": 0.43, "grad_norm": 4.450734615325928, "learning_rate": 1.9691058429462025e-05, "loss": 2.1688, "step": 32964 }, { "epoch": 0.43, "grad_norm": 4.449418067932129, "learning_rate": 1.9691032511576095e-05, "loss": 2.6216, "step": 32965 }, { "epoch": 0.43, "grad_norm": 3.9008047580718994, "learning_rate": 1.969100659262011e-05, "loss": 1.9005, "step": 32966 }, { "epoch": 0.43, "grad_norm": 3.659052848815918, "learning_rate": 1.9690980672594074e-05, "loss": 2.0366, "step": 32967 }, { "epoch": 0.43, "grad_norm": 3.309558868408203, "learning_rate": 1.969095475149799e-05, "loss": 1.4877, "step": 32968 }, { "epoch": 0.43, "grad_norm": 4.033721446990967, "learning_rate": 1.9690928829331857e-05, "loss": 2.4269, "step": 32969 }, { "epoch": 0.43, "grad_norm": 3.691817045211792, "learning_rate": 1.9690902906095684e-05, "loss": 1.8173, "step": 32970 }, { "epoch": 0.43, "grad_norm": 4.159997463226318, "learning_rate": 1.969087698178947e-05, "loss": 2.2731, "step": 32971 }, { "epoch": 0.43, "grad_norm": 3.483566999435425, "learning_rate": 1.969085105641322e-05, "loss": 1.9061, "step": 32972 }, { "epoch": 0.43, "grad_norm": 4.194351673126221, "learning_rate": 1.9690825129966936e-05, "loss": 2.4684, "step": 32973 }, { "epoch": 0.43, "grad_norm": 4.395063877105713, "learning_rate": 1.9690799202450617e-05, "loss": 1.9176, "step": 32974 }, { "epoch": 0.43, "grad_norm": 3.826939344406128, "learning_rate": 1.969077327386427e-05, "loss": 1.8869, "step": 32975 }, { "epoch": 0.43, "grad_norm": 3.8079586029052734, "learning_rate": 1.9690747344207897e-05, "loss": 2.0474, "step": 32976 }, { "epoch": 0.43, "grad_norm": 3.7876136302948, "learning_rate": 1.9690721413481503e-05, "loss": 1.8006, "step": 32977 }, { "epoch": 0.43, "grad_norm": 4.107898712158203, "learning_rate": 1.969069548168509e-05, "loss": 2.0649, "step": 32978 }, { "epoch": 0.43, "grad_norm": 4.065372467041016, "learning_rate": 1.969066954881866e-05, "loss": 2.4, "step": 32979 }, { "epoch": 0.43, "grad_norm": 3.6814157962799072, "learning_rate": 1.969064361488221e-05, "loss": 2.0303, "step": 32980 }, { "epoch": 0.43, "grad_norm": 3.7510862350463867, "learning_rate": 1.9690617679875753e-05, "loss": 1.8331, "step": 32981 }, { "epoch": 0.43, "grad_norm": 3.5805423259735107, "learning_rate": 1.969059174379929e-05, "loss": 1.7267, "step": 32982 }, { "epoch": 0.43, "grad_norm": 4.1328277587890625, "learning_rate": 1.9690565806652818e-05, "loss": 2.4596, "step": 32983 }, { "epoch": 0.43, "grad_norm": 3.684454917907715, "learning_rate": 1.9690539868436342e-05, "loss": 1.5579, "step": 32984 }, { "epoch": 0.43, "grad_norm": 3.572200298309326, "learning_rate": 1.9690513929149867e-05, "loss": 2.0614, "step": 32985 }, { "epoch": 0.43, "grad_norm": 4.481389045715332, "learning_rate": 1.9690487988793396e-05, "loss": 2.3468, "step": 32986 }, { "epoch": 0.43, "grad_norm": 3.7815239429473877, "learning_rate": 1.969046204736693e-05, "loss": 1.8655, "step": 32987 }, { "epoch": 0.43, "grad_norm": 4.034816265106201, "learning_rate": 1.969043610487047e-05, "loss": 2.2221, "step": 32988 }, { "epoch": 0.43, "grad_norm": 3.8085429668426514, "learning_rate": 1.969041016130403e-05, "loss": 2.1726, "step": 32989 }, { "epoch": 0.43, "grad_norm": 4.525036811828613, "learning_rate": 1.9690384216667592e-05, "loss": 2.3492, "step": 32990 }, { "epoch": 0.43, "grad_norm": 3.9865119457244873, "learning_rate": 1.969035827096118e-05, "loss": 2.4538, "step": 32991 }, { "epoch": 0.43, "grad_norm": 3.970682382583618, "learning_rate": 1.9690332324184785e-05, "loss": 1.7273, "step": 32992 }, { "epoch": 0.43, "grad_norm": 3.6282567977905273, "learning_rate": 1.9690306376338416e-05, "loss": 1.5591, "step": 32993 }, { "epoch": 0.43, "grad_norm": 3.8402717113494873, "learning_rate": 1.9690280427422067e-05, "loss": 2.148, "step": 32994 }, { "epoch": 0.43, "grad_norm": 4.087228298187256, "learning_rate": 1.969025447743575e-05, "loss": 2.3484, "step": 32995 }, { "epoch": 0.43, "grad_norm": 3.805112361907959, "learning_rate": 1.9690228526379463e-05, "loss": 2.0255, "step": 32996 }, { "epoch": 0.43, "grad_norm": 4.124937057495117, "learning_rate": 1.9690202574253212e-05, "loss": 2.0565, "step": 32997 }, { "epoch": 0.43, "grad_norm": 3.7142105102539062, "learning_rate": 1.9690176621057e-05, "loss": 2.0649, "step": 32998 }, { "epoch": 0.43, "grad_norm": 3.999418020248413, "learning_rate": 1.9690150666790828e-05, "loss": 1.7677, "step": 32999 }, { "epoch": 0.43, "grad_norm": 3.6582999229431152, "learning_rate": 1.9690124711454694e-05, "loss": 1.9343, "step": 33000 }, { "epoch": 0.43, "grad_norm": 4.183663368225098, "learning_rate": 1.969009875504861e-05, "loss": 2.1755, "step": 33001 }, { "epoch": 0.43, "grad_norm": 3.511718511581421, "learning_rate": 1.9690072797572572e-05, "loss": 1.6964, "step": 33002 }, { "epoch": 0.43, "grad_norm": 3.819934368133545, "learning_rate": 1.9690046839026587e-05, "loss": 2.1644, "step": 33003 }, { "epoch": 0.43, "grad_norm": 4.006134986877441, "learning_rate": 1.9690020879410657e-05, "loss": 2.1023, "step": 33004 }, { "epoch": 0.43, "grad_norm": 3.373901844024658, "learning_rate": 1.9689994918724786e-05, "loss": 1.5725, "step": 33005 }, { "epoch": 0.43, "grad_norm": 3.4714200496673584, "learning_rate": 1.968996895696897e-05, "loss": 1.7152, "step": 33006 }, { "epoch": 0.43, "grad_norm": 3.5827977657318115, "learning_rate": 1.968994299414322e-05, "loss": 2.0502, "step": 33007 }, { "epoch": 0.43, "grad_norm": 3.7253291606903076, "learning_rate": 1.9689917030247536e-05, "loss": 1.7138, "step": 33008 }, { "epoch": 0.43, "grad_norm": 4.002143859863281, "learning_rate": 1.968989106528192e-05, "loss": 1.4792, "step": 33009 }, { "epoch": 0.43, "grad_norm": 3.3514468669891357, "learning_rate": 1.968986509924638e-05, "loss": 1.5183, "step": 33010 }, { "epoch": 0.43, "grad_norm": 3.9881813526153564, "learning_rate": 1.968983913214091e-05, "loss": 2.252, "step": 33011 }, { "epoch": 0.43, "grad_norm": 3.1651978492736816, "learning_rate": 1.9689813163965515e-05, "loss": 1.51, "step": 33012 }, { "epoch": 0.43, "grad_norm": 3.364863872528076, "learning_rate": 1.9689787194720207e-05, "loss": 1.7834, "step": 33013 }, { "epoch": 0.43, "grad_norm": 3.586824417114258, "learning_rate": 1.9689761224404976e-05, "loss": 1.7027, "step": 33014 }, { "epoch": 0.43, "grad_norm": 4.109052658081055, "learning_rate": 1.9689735253019835e-05, "loss": 2.0321, "step": 33015 }, { "epoch": 0.43, "grad_norm": 3.9360361099243164, "learning_rate": 1.9689709280564783e-05, "loss": 2.4439, "step": 33016 }, { "epoch": 0.43, "grad_norm": 3.8499488830566406, "learning_rate": 1.968968330703982e-05, "loss": 2.2124, "step": 33017 }, { "epoch": 0.43, "grad_norm": 4.652109622955322, "learning_rate": 1.9689657332444953e-05, "loss": 2.379, "step": 33018 }, { "epoch": 0.43, "grad_norm": 4.615617275238037, "learning_rate": 1.9689631356780184e-05, "loss": 2.5929, "step": 33019 }, { "epoch": 0.43, "grad_norm": 3.7801523208618164, "learning_rate": 1.9689605380045512e-05, "loss": 1.9508, "step": 33020 }, { "epoch": 0.43, "grad_norm": 3.6696341037750244, "learning_rate": 1.9689579402240945e-05, "loss": 2.0036, "step": 33021 }, { "epoch": 0.43, "grad_norm": 3.798396110534668, "learning_rate": 1.9689553423366488e-05, "loss": 1.6283, "step": 33022 }, { "epoch": 0.43, "grad_norm": 3.842137575149536, "learning_rate": 1.9689527443422136e-05, "loss": 1.984, "step": 33023 }, { "epoch": 0.43, "grad_norm": 4.184846878051758, "learning_rate": 1.9689501462407894e-05, "loss": 2.5382, "step": 33024 }, { "epoch": 0.43, "grad_norm": 3.617037534713745, "learning_rate": 1.9689475480323768e-05, "loss": 1.8139, "step": 33025 }, { "epoch": 0.43, "grad_norm": 3.5510401725769043, "learning_rate": 1.9689449497169765e-05, "loss": 1.7022, "step": 33026 }, { "epoch": 0.43, "grad_norm": 4.010308742523193, "learning_rate": 1.9689423512945875e-05, "loss": 1.829, "step": 33027 }, { "epoch": 0.43, "grad_norm": 4.404575824737549, "learning_rate": 1.9689397527652114e-05, "loss": 1.9168, "step": 33028 }, { "epoch": 0.43, "grad_norm": 4.015557765960693, "learning_rate": 1.9689371541288472e-05, "loss": 1.6684, "step": 33029 }, { "epoch": 0.43, "grad_norm": 3.824939727783203, "learning_rate": 1.9689345553854964e-05, "loss": 1.7793, "step": 33030 }, { "epoch": 0.43, "grad_norm": 4.16885232925415, "learning_rate": 1.968931956535159e-05, "loss": 2.5443, "step": 33031 }, { "epoch": 0.43, "grad_norm": 3.6956050395965576, "learning_rate": 1.9689293575778346e-05, "loss": 2.1234, "step": 33032 }, { "epoch": 0.43, "grad_norm": 4.061933994293213, "learning_rate": 1.9689267585135243e-05, "loss": 2.4265, "step": 33033 }, { "epoch": 0.43, "grad_norm": 3.322969913482666, "learning_rate": 1.968924159342228e-05, "loss": 1.5231, "step": 33034 }, { "epoch": 0.43, "grad_norm": 4.176459312438965, "learning_rate": 1.9689215600639458e-05, "loss": 2.1473, "step": 33035 }, { "epoch": 0.43, "grad_norm": 3.824974298477173, "learning_rate": 1.968918960678678e-05, "loss": 1.9718, "step": 33036 }, { "epoch": 0.43, "grad_norm": 4.138083457946777, "learning_rate": 1.9689163611864258e-05, "loss": 1.8933, "step": 33037 }, { "epoch": 0.43, "grad_norm": 4.150606155395508, "learning_rate": 1.968913761587188e-05, "loss": 1.9347, "step": 33038 }, { "epoch": 0.43, "grad_norm": 3.9088895320892334, "learning_rate": 1.9689111618809665e-05, "loss": 1.846, "step": 33039 }, { "epoch": 0.43, "grad_norm": 4.069431304931641, "learning_rate": 1.9689085620677605e-05, "loss": 2.2437, "step": 33040 }, { "epoch": 0.43, "grad_norm": 4.595574378967285, "learning_rate": 1.9689059621475702e-05, "loss": 2.4488, "step": 33041 }, { "epoch": 0.43, "grad_norm": 4.295609474182129, "learning_rate": 1.9689033621203966e-05, "loss": 2.2687, "step": 33042 }, { "epoch": 0.43, "grad_norm": 3.7241952419281006, "learning_rate": 1.9689007619862397e-05, "loss": 1.8282, "step": 33043 }, { "epoch": 0.43, "grad_norm": 4.074845314025879, "learning_rate": 1.9688981617450995e-05, "loss": 2.5446, "step": 33044 }, { "epoch": 0.43, "grad_norm": 3.6704556941986084, "learning_rate": 1.9688955613969766e-05, "loss": 1.8195, "step": 33045 }, { "epoch": 0.43, "grad_norm": 3.81103253364563, "learning_rate": 1.968892960941871e-05, "loss": 1.9063, "step": 33046 }, { "epoch": 0.43, "grad_norm": 3.8840866088867188, "learning_rate": 1.9688903603797837e-05, "loss": 2.1937, "step": 33047 }, { "epoch": 0.43, "grad_norm": 4.240818023681641, "learning_rate": 1.968887759710714e-05, "loss": 2.6605, "step": 33048 }, { "epoch": 0.43, "grad_norm": 3.462247133255005, "learning_rate": 1.9688851589346625e-05, "loss": 1.576, "step": 33049 }, { "epoch": 0.43, "grad_norm": 3.6722466945648193, "learning_rate": 1.9688825580516302e-05, "loss": 1.8196, "step": 33050 }, { "epoch": 0.43, "grad_norm": 3.4572927951812744, "learning_rate": 1.9688799570616163e-05, "loss": 1.7181, "step": 33051 }, { "epoch": 0.43, "grad_norm": 4.1529340744018555, "learning_rate": 1.9688773559646218e-05, "loss": 2.1257, "step": 33052 }, { "epoch": 0.43, "grad_norm": 3.223289728164673, "learning_rate": 1.968874754760647e-05, "loss": 1.3982, "step": 33053 }, { "epoch": 0.43, "grad_norm": 4.148755073547363, "learning_rate": 1.968872153449692e-05, "loss": 1.8662, "step": 33054 }, { "epoch": 0.43, "grad_norm": 4.589961528778076, "learning_rate": 1.9688695520317567e-05, "loss": 1.9914, "step": 33055 }, { "epoch": 0.43, "grad_norm": 3.7019336223602295, "learning_rate": 1.968866950506842e-05, "loss": 2.1379, "step": 33056 }, { "epoch": 0.43, "grad_norm": 3.967008352279663, "learning_rate": 1.968864348874948e-05, "loss": 2.0674, "step": 33057 }, { "epoch": 0.43, "grad_norm": 4.096536636352539, "learning_rate": 1.968861747136075e-05, "loss": 2.033, "step": 33058 }, { "epoch": 0.43, "grad_norm": 3.628995895385742, "learning_rate": 1.9688591452902227e-05, "loss": 2.0048, "step": 33059 }, { "epoch": 0.43, "grad_norm": 3.687938690185547, "learning_rate": 1.9688565433373925e-05, "loss": 2.0344, "step": 33060 }, { "epoch": 0.43, "grad_norm": 3.7545888423919678, "learning_rate": 1.9688539412775838e-05, "loss": 1.92, "step": 33061 }, { "epoch": 0.43, "grad_norm": 4.464089870452881, "learning_rate": 1.9688513391107972e-05, "loss": 2.4125, "step": 33062 }, { "epoch": 0.43, "grad_norm": 4.032284736633301, "learning_rate": 1.9688487368370333e-05, "loss": 1.775, "step": 33063 }, { "epoch": 0.43, "grad_norm": 3.5867726802825928, "learning_rate": 1.9688461344562916e-05, "loss": 2.0919, "step": 33064 }, { "epoch": 0.43, "grad_norm": 4.540075778961182, "learning_rate": 1.9688435319685733e-05, "loss": 2.4451, "step": 33065 }, { "epoch": 0.43, "grad_norm": 4.210480690002441, "learning_rate": 1.9688409293738782e-05, "loss": 2.1573, "step": 33066 }, { "epoch": 0.43, "grad_norm": 3.8494222164154053, "learning_rate": 1.9688383266722062e-05, "loss": 2.2172, "step": 33067 }, { "epoch": 0.43, "grad_norm": 3.7762670516967773, "learning_rate": 1.9688357238635583e-05, "loss": 1.7414, "step": 33068 }, { "epoch": 0.43, "grad_norm": 4.094486236572266, "learning_rate": 1.9688331209479346e-05, "loss": 2.6526, "step": 33069 }, { "epoch": 0.43, "grad_norm": 3.223947525024414, "learning_rate": 1.968830517925335e-05, "loss": 1.4625, "step": 33070 }, { "epoch": 0.43, "grad_norm": 4.258052825927734, "learning_rate": 1.9688279147957603e-05, "loss": 2.0385, "step": 33071 }, { "epoch": 0.43, "grad_norm": 3.816865921020508, "learning_rate": 1.9688253115592103e-05, "loss": 1.7491, "step": 33072 }, { "epoch": 0.43, "grad_norm": 3.4075894355773926, "learning_rate": 1.968822708215686e-05, "loss": 1.4861, "step": 33073 }, { "epoch": 0.43, "grad_norm": 3.9001617431640625, "learning_rate": 1.968820104765187e-05, "loss": 2.1843, "step": 33074 }, { "epoch": 0.43, "grad_norm": 3.702139139175415, "learning_rate": 1.968817501207714e-05, "loss": 1.9738, "step": 33075 }, { "epoch": 0.43, "grad_norm": 4.110004425048828, "learning_rate": 1.9688148975432666e-05, "loss": 2.1687, "step": 33076 }, { "epoch": 0.43, "grad_norm": 3.6840991973876953, "learning_rate": 1.968812293771846e-05, "loss": 2.0764, "step": 33077 }, { "epoch": 0.43, "grad_norm": 4.002906322479248, "learning_rate": 1.968809689893452e-05, "loss": 1.7114, "step": 33078 }, { "epoch": 0.43, "grad_norm": 3.3303112983703613, "learning_rate": 1.968807085908085e-05, "loss": 1.8566, "step": 33079 }, { "epoch": 0.43, "grad_norm": 3.971402406692505, "learning_rate": 1.968804481815745e-05, "loss": 1.8745, "step": 33080 }, { "epoch": 0.43, "grad_norm": 3.7081398963928223, "learning_rate": 1.9688018776164327e-05, "loss": 2.1882, "step": 33081 }, { "epoch": 0.43, "grad_norm": 3.772031307220459, "learning_rate": 1.9687992733101483e-05, "loss": 1.7503, "step": 33082 }, { "epoch": 0.43, "grad_norm": 3.358152389526367, "learning_rate": 1.968796668896892e-05, "loss": 1.7523, "step": 33083 }, { "epoch": 0.43, "grad_norm": 3.454195737838745, "learning_rate": 1.9687940643766644e-05, "loss": 1.7667, "step": 33084 }, { "epoch": 0.43, "grad_norm": 3.775578022003174, "learning_rate": 1.9687914597494652e-05, "loss": 1.6993, "step": 33085 }, { "epoch": 0.43, "grad_norm": 4.033432483673096, "learning_rate": 1.968788855015295e-05, "loss": 2.1951, "step": 33086 }, { "epoch": 0.43, "grad_norm": 3.8342347145080566, "learning_rate": 1.9687862501741543e-05, "loss": 2.1424, "step": 33087 }, { "epoch": 0.43, "grad_norm": 4.251084804534912, "learning_rate": 1.9687836452260428e-05, "loss": 2.001, "step": 33088 }, { "epoch": 0.43, "grad_norm": 3.704685926437378, "learning_rate": 1.9687810401709613e-05, "loss": 1.9927, "step": 33089 }, { "epoch": 0.43, "grad_norm": 3.6885342597961426, "learning_rate": 1.96877843500891e-05, "loss": 2.249, "step": 33090 }, { "epoch": 0.43, "grad_norm": 3.5477826595306396, "learning_rate": 1.968775829739889e-05, "loss": 1.6605, "step": 33091 }, { "epoch": 0.43, "grad_norm": 3.93294620513916, "learning_rate": 1.9687732243638987e-05, "loss": 2.5468, "step": 33092 }, { "epoch": 0.43, "grad_norm": 3.797518253326416, "learning_rate": 1.9687706188809398e-05, "loss": 2.3111, "step": 33093 }, { "epoch": 0.43, "grad_norm": 4.0786871910095215, "learning_rate": 1.9687680132910118e-05, "loss": 2.5509, "step": 33094 }, { "epoch": 0.43, "grad_norm": 3.69687557220459, "learning_rate": 1.9687654075941154e-05, "loss": 1.9341, "step": 33095 }, { "epoch": 0.43, "grad_norm": 4.817414283752441, "learning_rate": 1.968762801790251e-05, "loss": 2.5036, "step": 33096 }, { "epoch": 0.43, "grad_norm": 3.8100154399871826, "learning_rate": 1.9687601958794188e-05, "loss": 1.9497, "step": 33097 }, { "epoch": 0.43, "grad_norm": 4.105313777923584, "learning_rate": 1.9687575898616186e-05, "loss": 2.0822, "step": 33098 }, { "epoch": 0.43, "grad_norm": 4.440059185028076, "learning_rate": 1.9687549837368513e-05, "loss": 2.2574, "step": 33099 }, { "epoch": 0.43, "grad_norm": 3.626594305038452, "learning_rate": 1.9687523775051173e-05, "loss": 2.0056, "step": 33100 }, { "epoch": 0.43, "grad_norm": 4.072243690490723, "learning_rate": 1.9687497711664166e-05, "loss": 2.3151, "step": 33101 }, { "epoch": 0.43, "grad_norm": 3.1156868934631348, "learning_rate": 1.9687471647207492e-05, "loss": 1.3679, "step": 33102 }, { "epoch": 0.43, "grad_norm": 4.444886207580566, "learning_rate": 1.968744558168116e-05, "loss": 2.1577, "step": 33103 }, { "epoch": 0.43, "grad_norm": 3.8284096717834473, "learning_rate": 1.9687419515085166e-05, "loss": 1.9262, "step": 33104 }, { "epoch": 0.43, "grad_norm": 4.093874931335449, "learning_rate": 1.9687393447419518e-05, "loss": 2.048, "step": 33105 }, { "epoch": 0.43, "grad_norm": 3.9738857746124268, "learning_rate": 1.968736737868422e-05, "loss": 2.308, "step": 33106 }, { "epoch": 0.43, "grad_norm": 4.0228400230407715, "learning_rate": 1.9687341308879268e-05, "loss": 1.6843, "step": 33107 }, { "epoch": 0.43, "grad_norm": 3.5167174339294434, "learning_rate": 1.9687315238004672e-05, "loss": 1.6735, "step": 33108 }, { "epoch": 0.43, "grad_norm": 3.6824793815612793, "learning_rate": 1.9687289166060434e-05, "loss": 1.7568, "step": 33109 }, { "epoch": 0.43, "grad_norm": 3.4219727516174316, "learning_rate": 1.968726309304655e-05, "loss": 2.0101, "step": 33110 }, { "epoch": 0.43, "grad_norm": 3.386469602584839, "learning_rate": 1.968723701896303e-05, "loss": 1.7852, "step": 33111 }, { "epoch": 0.43, "grad_norm": 3.7194933891296387, "learning_rate": 1.9687210943809874e-05, "loss": 1.8805, "step": 33112 }, { "epoch": 0.43, "grad_norm": 3.978222370147705, "learning_rate": 1.968718486758709e-05, "loss": 1.7767, "step": 33113 }, { "epoch": 0.43, "grad_norm": 3.619767189025879, "learning_rate": 1.968715879029467e-05, "loss": 1.6503, "step": 33114 }, { "epoch": 0.43, "grad_norm": 4.288691520690918, "learning_rate": 1.9687132711932626e-05, "loss": 2.4048, "step": 33115 }, { "epoch": 0.43, "grad_norm": 3.818572759628296, "learning_rate": 1.968710663250096e-05, "loss": 2.2105, "step": 33116 }, { "epoch": 0.43, "grad_norm": 4.24393367767334, "learning_rate": 1.9687080551999668e-05, "loss": 2.3679, "step": 33117 }, { "epoch": 0.43, "grad_norm": 4.262157917022705, "learning_rate": 1.9687054470428764e-05, "loss": 2.2954, "step": 33118 }, { "epoch": 0.43, "grad_norm": 3.5943806171417236, "learning_rate": 1.9687028387788244e-05, "loss": 1.8123, "step": 33119 }, { "epoch": 0.43, "grad_norm": 3.54827618598938, "learning_rate": 1.9687002304078108e-05, "loss": 1.8638, "step": 33120 }, { "epoch": 0.43, "grad_norm": 3.1848888397216797, "learning_rate": 1.9686976219298362e-05, "loss": 1.7187, "step": 33121 }, { "epoch": 0.43, "grad_norm": 3.776702404022217, "learning_rate": 1.9686950133449014e-05, "loss": 1.9135, "step": 33122 }, { "epoch": 0.43, "grad_norm": 3.9474563598632812, "learning_rate": 1.9686924046530063e-05, "loss": 2.105, "step": 33123 }, { "epoch": 0.43, "grad_norm": 3.8983986377716064, "learning_rate": 1.9686897958541506e-05, "loss": 2.3365, "step": 33124 }, { "epoch": 0.43, "grad_norm": 3.3381924629211426, "learning_rate": 1.9686871869483356e-05, "loss": 1.5551, "step": 33125 }, { "epoch": 0.43, "grad_norm": 3.902350425720215, "learning_rate": 1.9686845779355607e-05, "loss": 2.2305, "step": 33126 }, { "epoch": 0.43, "grad_norm": 3.621758222579956, "learning_rate": 1.968681968815827e-05, "loss": 1.9444, "step": 33127 }, { "epoch": 0.43, "grad_norm": 3.724881172180176, "learning_rate": 1.968679359589134e-05, "loss": 1.8059, "step": 33128 }, { "epoch": 0.43, "grad_norm": 3.647250175476074, "learning_rate": 1.9686767502554825e-05, "loss": 1.8458, "step": 33129 }, { "epoch": 0.43, "grad_norm": 3.6920177936553955, "learning_rate": 1.9686741408148726e-05, "loss": 1.8196, "step": 33130 }, { "epoch": 0.43, "grad_norm": 3.7058002948760986, "learning_rate": 1.9686715312673044e-05, "loss": 1.6143, "step": 33131 }, { "epoch": 0.43, "grad_norm": 4.790688991546631, "learning_rate": 1.968668921612779e-05, "loss": 2.4081, "step": 33132 }, { "epoch": 0.43, "grad_norm": 3.2582995891571045, "learning_rate": 1.9686663118512958e-05, "loss": 1.6531, "step": 33133 }, { "epoch": 0.43, "grad_norm": 3.7018184661865234, "learning_rate": 1.9686637019828553e-05, "loss": 1.883, "step": 33134 }, { "epoch": 0.43, "grad_norm": 4.785104274749756, "learning_rate": 1.9686610920074583e-05, "loss": 2.3744, "step": 33135 }, { "epoch": 0.43, "grad_norm": 4.191288948059082, "learning_rate": 1.9686584819251044e-05, "loss": 2.1608, "step": 33136 }, { "epoch": 0.43, "grad_norm": 4.3223042488098145, "learning_rate": 1.9686558717357943e-05, "loss": 2.2861, "step": 33137 }, { "epoch": 0.43, "grad_norm": 4.239264011383057, "learning_rate": 1.968653261439528e-05, "loss": 1.8379, "step": 33138 }, { "epoch": 0.43, "grad_norm": 4.213879585266113, "learning_rate": 1.968650651036306e-05, "loss": 1.954, "step": 33139 }, { "epoch": 0.43, "grad_norm": 4.174995422363281, "learning_rate": 1.9686480405261285e-05, "loss": 1.9116, "step": 33140 }, { "epoch": 0.43, "grad_norm": 4.215976238250732, "learning_rate": 1.968645429908996e-05, "loss": 2.4024, "step": 33141 }, { "epoch": 0.43, "grad_norm": 3.6383886337280273, "learning_rate": 1.9686428191849085e-05, "loss": 1.8352, "step": 33142 }, { "epoch": 0.43, "grad_norm": 3.6834771633148193, "learning_rate": 1.9686402083538662e-05, "loss": 2.2241, "step": 33143 }, { "epoch": 0.43, "grad_norm": 4.009824275970459, "learning_rate": 1.9686375974158698e-05, "loss": 2.156, "step": 33144 }, { "epoch": 0.43, "grad_norm": 3.901958465576172, "learning_rate": 1.9686349863709192e-05, "loss": 2.11, "step": 33145 }, { "epoch": 0.43, "grad_norm": 4.722837924957275, "learning_rate": 1.968632375219015e-05, "loss": 2.4768, "step": 33146 }, { "epoch": 0.43, "grad_norm": 3.75303316116333, "learning_rate": 1.9686297639601575e-05, "loss": 2.0271, "step": 33147 }, { "epoch": 0.43, "grad_norm": 4.179112911224365, "learning_rate": 1.9686271525943467e-05, "loss": 2.1007, "step": 33148 }, { "epoch": 0.43, "grad_norm": 3.833815336227417, "learning_rate": 1.968624541121583e-05, "loss": 1.9769, "step": 33149 }, { "epoch": 0.43, "grad_norm": 3.991802453994751, "learning_rate": 1.9686219295418668e-05, "loss": 2.2223, "step": 33150 }, { "epoch": 0.43, "grad_norm": 3.6933155059814453, "learning_rate": 1.9686193178551983e-05, "loss": 2.1226, "step": 33151 }, { "epoch": 0.43, "grad_norm": 3.9607903957366943, "learning_rate": 1.9686167060615776e-05, "loss": 2.1057, "step": 33152 }, { "epoch": 0.43, "grad_norm": 3.94297194480896, "learning_rate": 1.9686140941610055e-05, "loss": 2.0728, "step": 33153 }, { "epoch": 0.43, "grad_norm": 3.945974588394165, "learning_rate": 1.9686114821534815e-05, "loss": 1.8521, "step": 33154 }, { "epoch": 0.43, "grad_norm": 3.384242296218872, "learning_rate": 1.968608870039007e-05, "loss": 1.6202, "step": 33155 }, { "epoch": 0.43, "grad_norm": 3.383267879486084, "learning_rate": 1.968606257817581e-05, "loss": 1.5625, "step": 33156 }, { "epoch": 0.43, "grad_norm": 4.239558696746826, "learning_rate": 1.9686036454892047e-05, "loss": 2.2345, "step": 33157 }, { "epoch": 0.43, "grad_norm": 3.7528932094573975, "learning_rate": 1.9686010330538783e-05, "loss": 1.8131, "step": 33158 }, { "epoch": 0.43, "grad_norm": 4.317840576171875, "learning_rate": 1.968598420511602e-05, "loss": 2.1369, "step": 33159 }, { "epoch": 0.43, "grad_norm": 4.248409271240234, "learning_rate": 1.9685958078623756e-05, "loss": 2.2062, "step": 33160 }, { "epoch": 0.43, "grad_norm": 3.740940570831299, "learning_rate": 1.9685931951062002e-05, "loss": 1.8487, "step": 33161 }, { "epoch": 0.43, "grad_norm": 3.35929799079895, "learning_rate": 1.968590582243075e-05, "loss": 1.7828, "step": 33162 }, { "epoch": 0.43, "grad_norm": 3.6012206077575684, "learning_rate": 1.9685879692730016e-05, "loss": 1.9658, "step": 33163 }, { "epoch": 0.43, "grad_norm": 4.061460018157959, "learning_rate": 1.9685853561959796e-05, "loss": 2.1164, "step": 33164 }, { "epoch": 0.43, "grad_norm": 3.9976511001586914, "learning_rate": 1.968582743012009e-05, "loss": 2.0123, "step": 33165 }, { "epoch": 0.43, "grad_norm": 3.836299180984497, "learning_rate": 1.9685801297210908e-05, "loss": 2.2601, "step": 33166 }, { "epoch": 0.43, "grad_norm": 4.309260845184326, "learning_rate": 1.9685775163232247e-05, "loss": 2.4659, "step": 33167 }, { "epoch": 0.43, "grad_norm": 3.5395607948303223, "learning_rate": 1.9685749028184114e-05, "loss": 1.7679, "step": 33168 }, { "epoch": 0.43, "grad_norm": 3.752511739730835, "learning_rate": 1.9685722892066508e-05, "loss": 2.163, "step": 33169 }, { "epoch": 0.43, "grad_norm": 3.776538610458374, "learning_rate": 1.9685696754879435e-05, "loss": 1.6206, "step": 33170 }, { "epoch": 0.43, "grad_norm": 3.8913471698760986, "learning_rate": 1.9685670616622897e-05, "loss": 2.0714, "step": 33171 }, { "epoch": 0.43, "grad_norm": 4.552252769470215, "learning_rate": 1.9685644477296893e-05, "loss": 2.0997, "step": 33172 }, { "epoch": 0.43, "grad_norm": 3.7261087894439697, "learning_rate": 1.9685618336901435e-05, "loss": 2.0859, "step": 33173 }, { "epoch": 0.43, "grad_norm": 3.826101779937744, "learning_rate": 1.9685592195436517e-05, "loss": 2.4205, "step": 33174 }, { "epoch": 0.43, "grad_norm": 3.9174656867980957, "learning_rate": 1.9685566052902148e-05, "loss": 1.7743, "step": 33175 }, { "epoch": 0.43, "grad_norm": 3.1337320804595947, "learning_rate": 1.9685539909298325e-05, "loss": 1.6885, "step": 33176 }, { "epoch": 0.43, "grad_norm": 3.680433511734009, "learning_rate": 1.9685513764625056e-05, "loss": 1.7667, "step": 33177 }, { "epoch": 0.43, "grad_norm": 3.868208646774292, "learning_rate": 1.968548761888234e-05, "loss": 1.8126, "step": 33178 }, { "epoch": 0.43, "grad_norm": 3.702941656112671, "learning_rate": 1.9685461472070188e-05, "loss": 1.7924, "step": 33179 }, { "epoch": 0.43, "grad_norm": 3.592987537384033, "learning_rate": 1.9685435324188588e-05, "loss": 1.7654, "step": 33180 }, { "epoch": 0.43, "grad_norm": 3.856613874435425, "learning_rate": 1.968540917523756e-05, "loss": 2.2715, "step": 33181 }, { "epoch": 0.43, "grad_norm": 3.359807252883911, "learning_rate": 1.9685383025217092e-05, "loss": 1.7997, "step": 33182 }, { "epoch": 0.43, "grad_norm": 3.2923567295074463, "learning_rate": 1.9685356874127196e-05, "loss": 1.6097, "step": 33183 }, { "epoch": 0.43, "grad_norm": 3.9048962593078613, "learning_rate": 1.9685330721967873e-05, "loss": 2.2332, "step": 33184 }, { "epoch": 0.43, "grad_norm": 3.3414711952209473, "learning_rate": 1.968530456873912e-05, "loss": 1.601, "step": 33185 }, { "epoch": 0.43, "grad_norm": 3.9576261043548584, "learning_rate": 1.9685278414440952e-05, "loss": 2.1321, "step": 33186 }, { "epoch": 0.43, "grad_norm": 3.5292904376983643, "learning_rate": 1.9685252259073363e-05, "loss": 1.8991, "step": 33187 }, { "epoch": 0.43, "grad_norm": 4.242511749267578, "learning_rate": 1.9685226102636355e-05, "loss": 2.5941, "step": 33188 }, { "epoch": 0.43, "grad_norm": 4.559744834899902, "learning_rate": 1.9685199945129937e-05, "loss": 2.2701, "step": 33189 }, { "epoch": 0.43, "grad_norm": 4.2347283363342285, "learning_rate": 1.9685173786554106e-05, "loss": 2.2153, "step": 33190 }, { "epoch": 0.43, "grad_norm": 3.1982169151306152, "learning_rate": 1.968514762690887e-05, "loss": 1.4982, "step": 33191 }, { "epoch": 0.43, "grad_norm": 3.939661979675293, "learning_rate": 1.9685121466194225e-05, "loss": 1.7578, "step": 33192 }, { "epoch": 0.43, "grad_norm": 3.902454376220703, "learning_rate": 1.9685095304410186e-05, "loss": 1.9135, "step": 33193 }, { "epoch": 0.43, "grad_norm": 3.978851318359375, "learning_rate": 1.968506914155674e-05, "loss": 1.815, "step": 33194 }, { "epoch": 0.43, "grad_norm": 4.0043487548828125, "learning_rate": 1.9685042977633905e-05, "loss": 2.2164, "step": 33195 }, { "epoch": 0.43, "grad_norm": 4.469789028167725, "learning_rate": 1.9685016812641674e-05, "loss": 2.6615, "step": 33196 }, { "epoch": 0.43, "grad_norm": 4.177393913269043, "learning_rate": 1.968499064658005e-05, "loss": 2.3656, "step": 33197 }, { "epoch": 0.43, "grad_norm": 4.154656887054443, "learning_rate": 1.968496447944904e-05, "loss": 2.3399, "step": 33198 }, { "epoch": 0.43, "grad_norm": 3.7663419246673584, "learning_rate": 1.9684938311248646e-05, "loss": 1.8009, "step": 33199 }, { "epoch": 0.43, "grad_norm": 3.6492080688476562, "learning_rate": 1.9684912141978875e-05, "loss": 1.7242, "step": 33200 }, { "epoch": 0.43, "grad_norm": 3.622302293777466, "learning_rate": 1.968488597163972e-05, "loss": 2.0361, "step": 33201 }, { "epoch": 0.43, "grad_norm": 3.720627784729004, "learning_rate": 1.9684859800231192e-05, "loss": 1.8197, "step": 33202 }, { "epoch": 0.43, "grad_norm": 3.5181124210357666, "learning_rate": 1.968483362775329e-05, "loss": 1.7623, "step": 33203 }, { "epoch": 0.43, "grad_norm": 3.4841408729553223, "learning_rate": 1.968480745420602e-05, "loss": 1.7782, "step": 33204 }, { "epoch": 0.43, "grad_norm": 3.5625498294830322, "learning_rate": 1.968478127958938e-05, "loss": 1.598, "step": 33205 }, { "epoch": 0.43, "grad_norm": 3.7937023639678955, "learning_rate": 1.9684755103903375e-05, "loss": 1.9734, "step": 33206 }, { "epoch": 0.43, "grad_norm": 3.664167881011963, "learning_rate": 1.968472892714801e-05, "loss": 1.7623, "step": 33207 }, { "epoch": 0.43, "grad_norm": 4.125937461853027, "learning_rate": 1.968470274932329e-05, "loss": 2.4427, "step": 33208 }, { "epoch": 0.43, "grad_norm": 3.6492857933044434, "learning_rate": 1.968467657042921e-05, "loss": 1.8821, "step": 33209 }, { "epoch": 0.43, "grad_norm": 3.9819533824920654, "learning_rate": 1.968465039046578e-05, "loss": 2.0962, "step": 33210 }, { "epoch": 0.43, "grad_norm": 4.08468770980835, "learning_rate": 1.9684624209433e-05, "loss": 2.3709, "step": 33211 }, { "epoch": 0.43, "grad_norm": 4.025464057922363, "learning_rate": 1.9684598027330876e-05, "loss": 2.2244, "step": 33212 }, { "epoch": 0.43, "grad_norm": 3.7383859157562256, "learning_rate": 1.9684571844159403e-05, "loss": 1.7866, "step": 33213 }, { "epoch": 0.43, "grad_norm": 4.158905506134033, "learning_rate": 1.968454565991859e-05, "loss": 2.2308, "step": 33214 }, { "epoch": 0.43, "grad_norm": 4.4315104484558105, "learning_rate": 1.968451947460844e-05, "loss": 2.5141, "step": 33215 }, { "epoch": 0.43, "grad_norm": 4.07938814163208, "learning_rate": 1.9684493288228953e-05, "loss": 1.9052, "step": 33216 }, { "epoch": 0.43, "grad_norm": 4.623773097991943, "learning_rate": 1.9684467100780137e-05, "loss": 2.4095, "step": 33217 }, { "epoch": 0.43, "grad_norm": 3.993748426437378, "learning_rate": 1.968444091226199e-05, "loss": 2.0788, "step": 33218 }, { "epoch": 0.43, "grad_norm": 3.4955861568450928, "learning_rate": 1.9684414722674513e-05, "loss": 2.0604, "step": 33219 }, { "epoch": 0.43, "grad_norm": 3.8147799968719482, "learning_rate": 1.9684388532017716e-05, "loss": 1.8345, "step": 33220 }, { "epoch": 0.43, "grad_norm": 3.992917537689209, "learning_rate": 1.9684362340291597e-05, "loss": 2.0956, "step": 33221 }, { "epoch": 0.43, "grad_norm": 4.053461074829102, "learning_rate": 1.9684336147496164e-05, "loss": 2.1055, "step": 33222 }, { "epoch": 0.43, "grad_norm": 3.7670819759368896, "learning_rate": 1.9684309953631412e-05, "loss": 2.2343, "step": 33223 }, { "epoch": 0.43, "grad_norm": 3.594677686691284, "learning_rate": 1.9684283758697346e-05, "loss": 1.8946, "step": 33224 }, { "epoch": 0.43, "grad_norm": 3.4162776470184326, "learning_rate": 1.9684257562693975e-05, "loss": 1.4232, "step": 33225 }, { "epoch": 0.43, "grad_norm": 3.9286539554595947, "learning_rate": 1.9684231365621296e-05, "loss": 1.7715, "step": 33226 }, { "epoch": 0.43, "grad_norm": 4.164831161499023, "learning_rate": 1.9684205167479312e-05, "loss": 2.1614, "step": 33227 }, { "epoch": 0.43, "grad_norm": 3.8411548137664795, "learning_rate": 1.968417896826803e-05, "loss": 2.0622, "step": 33228 }, { "epoch": 0.43, "grad_norm": 3.7468528747558594, "learning_rate": 1.968415276798745e-05, "loss": 2.0803, "step": 33229 }, { "epoch": 0.43, "grad_norm": 3.401203155517578, "learning_rate": 1.9684126566637572e-05, "loss": 1.6817, "step": 33230 }, { "epoch": 0.43, "grad_norm": 3.6030242443084717, "learning_rate": 1.9684100364218408e-05, "loss": 1.8293, "step": 33231 }, { "epoch": 0.43, "grad_norm": 4.103472709655762, "learning_rate": 1.968407416072995e-05, "loss": 2.216, "step": 33232 }, { "epoch": 0.43, "grad_norm": 4.114969730377197, "learning_rate": 1.9684047956172206e-05, "loss": 2.2439, "step": 33233 }, { "epoch": 0.43, "grad_norm": 4.0502190589904785, "learning_rate": 1.9684021750545182e-05, "loss": 1.9576, "step": 33234 }, { "epoch": 0.43, "grad_norm": 3.8567593097686768, "learning_rate": 1.9683995543848877e-05, "loss": 2.1756, "step": 33235 }, { "epoch": 0.43, "grad_norm": 4.20971155166626, "learning_rate": 1.9683969336083292e-05, "loss": 2.1979, "step": 33236 }, { "epoch": 0.43, "grad_norm": 3.632030963897705, "learning_rate": 1.9683943127248436e-05, "loss": 1.8907, "step": 33237 }, { "epoch": 0.43, "grad_norm": 3.7594432830810547, "learning_rate": 1.968391691734431e-05, "loss": 1.7954, "step": 33238 }, { "epoch": 0.43, "grad_norm": 4.03642463684082, "learning_rate": 1.9683890706370908e-05, "loss": 1.8393, "step": 33239 }, { "epoch": 0.43, "grad_norm": 3.761101245880127, "learning_rate": 1.9683864494328243e-05, "loss": 2.004, "step": 33240 }, { "epoch": 0.43, "grad_norm": 4.033276081085205, "learning_rate": 1.9683838281216318e-05, "loss": 2.0378, "step": 33241 }, { "epoch": 0.43, "grad_norm": 4.603353023529053, "learning_rate": 1.9683812067035132e-05, "loss": 2.137, "step": 33242 }, { "epoch": 0.43, "grad_norm": 3.384828805923462, "learning_rate": 1.9683785851784685e-05, "loss": 1.5873, "step": 33243 }, { "epoch": 0.43, "grad_norm": 3.4088375568389893, "learning_rate": 1.968375963546499e-05, "loss": 1.8159, "step": 33244 }, { "epoch": 0.43, "grad_norm": 3.95670485496521, "learning_rate": 1.9683733418076038e-05, "loss": 2.0651, "step": 33245 }, { "epoch": 0.43, "grad_norm": 4.176616191864014, "learning_rate": 1.968370719961784e-05, "loss": 2.379, "step": 33246 }, { "epoch": 0.43, "grad_norm": 4.374295234680176, "learning_rate": 1.9683680980090396e-05, "loss": 1.8951, "step": 33247 }, { "epoch": 0.43, "grad_norm": 3.7248141765594482, "learning_rate": 1.9683654759493712e-05, "loss": 2.4261, "step": 33248 }, { "epoch": 0.43, "grad_norm": 3.6006288528442383, "learning_rate": 1.9683628537827784e-05, "loss": 1.6679, "step": 33249 }, { "epoch": 0.43, "grad_norm": 3.703927516937256, "learning_rate": 1.9683602315092622e-05, "loss": 1.639, "step": 33250 }, { "epoch": 0.43, "grad_norm": 4.133418083190918, "learning_rate": 1.9683576091288227e-05, "loss": 1.9427, "step": 33251 }, { "epoch": 0.43, "grad_norm": 4.653965473175049, "learning_rate": 1.96835498664146e-05, "loss": 1.9687, "step": 33252 }, { "epoch": 0.43, "grad_norm": 4.0853962898254395, "learning_rate": 1.9683523640471744e-05, "loss": 1.7032, "step": 33253 }, { "epoch": 0.43, "grad_norm": 3.7998268604278564, "learning_rate": 1.968349741345966e-05, "loss": 2.1145, "step": 33254 }, { "epoch": 0.43, "grad_norm": 3.3715403079986572, "learning_rate": 1.9683471185378354e-05, "loss": 1.6776, "step": 33255 }, { "epoch": 0.43, "grad_norm": 3.7686095237731934, "learning_rate": 1.9683444956227833e-05, "loss": 1.7263, "step": 33256 }, { "epoch": 0.43, "grad_norm": 3.606889009475708, "learning_rate": 1.968341872600809e-05, "loss": 1.9992, "step": 33257 }, { "epoch": 0.43, "grad_norm": 3.3554630279541016, "learning_rate": 1.9683392494719137e-05, "loss": 1.6419, "step": 33258 }, { "epoch": 0.43, "grad_norm": 3.2660434246063232, "learning_rate": 1.9683366262360973e-05, "loss": 1.6227, "step": 33259 }, { "epoch": 0.43, "grad_norm": 3.8915791511535645, "learning_rate": 1.9683340028933603e-05, "loss": 1.9039, "step": 33260 }, { "epoch": 0.43, "grad_norm": 3.581332206726074, "learning_rate": 1.9683313794437023e-05, "loss": 2.108, "step": 33261 }, { "epoch": 0.43, "grad_norm": 4.164453029632568, "learning_rate": 1.9683287558871244e-05, "loss": 2.0516, "step": 33262 }, { "epoch": 0.43, "grad_norm": 3.853872537612915, "learning_rate": 1.9683261322236268e-05, "loss": 2.0656, "step": 33263 }, { "epoch": 0.43, "grad_norm": 3.9834561347961426, "learning_rate": 1.968323508453209e-05, "loss": 2.1217, "step": 33264 }, { "epoch": 0.43, "grad_norm": 4.088652610778809, "learning_rate": 1.9683208845758722e-05, "loss": 2.0377, "step": 33265 }, { "epoch": 0.43, "grad_norm": 3.1062512397766113, "learning_rate": 1.9683182605916165e-05, "loss": 1.4962, "step": 33266 }, { "epoch": 0.43, "grad_norm": 4.508806228637695, "learning_rate": 1.9683156365004418e-05, "loss": 1.95, "step": 33267 }, { "epoch": 0.43, "grad_norm": 4.2079644203186035, "learning_rate": 1.9683130123023485e-05, "loss": 2.31, "step": 33268 }, { "epoch": 0.43, "grad_norm": 3.7392709255218506, "learning_rate": 1.9683103879973374e-05, "loss": 2.204, "step": 33269 }, { "epoch": 0.43, "grad_norm": 3.6812422275543213, "learning_rate": 1.968307763585408e-05, "loss": 1.6081, "step": 33270 }, { "epoch": 0.43, "grad_norm": 3.881477117538452, "learning_rate": 1.968305139066561e-05, "loss": 2.3097, "step": 33271 }, { "epoch": 0.43, "grad_norm": 4.024021625518799, "learning_rate": 1.968302514440797e-05, "loss": 1.9942, "step": 33272 }, { "epoch": 0.43, "grad_norm": 4.176015377044678, "learning_rate": 1.968299889708116e-05, "loss": 2.2542, "step": 33273 }, { "epoch": 0.43, "grad_norm": 3.551844358444214, "learning_rate": 1.9682972648685178e-05, "loss": 1.772, "step": 33274 }, { "epoch": 0.43, "grad_norm": 3.950737476348877, "learning_rate": 1.968294639922004e-05, "loss": 2.2332, "step": 33275 }, { "epoch": 0.43, "grad_norm": 3.6980977058410645, "learning_rate": 1.968292014868573e-05, "loss": 2.1512, "step": 33276 }, { "epoch": 0.43, "grad_norm": 3.705219268798828, "learning_rate": 1.9682893897082266e-05, "loss": 2.1387, "step": 33277 }, { "epoch": 0.43, "grad_norm": 3.4121246337890625, "learning_rate": 1.968286764440965e-05, "loss": 1.8219, "step": 33278 }, { "epoch": 0.43, "grad_norm": 4.19884729385376, "learning_rate": 1.9682841390667875e-05, "loss": 2.1329, "step": 33279 }, { "epoch": 0.43, "grad_norm": 4.075961589813232, "learning_rate": 1.968281513585695e-05, "loss": 2.1546, "step": 33280 }, { "epoch": 0.43, "grad_norm": 4.15814733505249, "learning_rate": 1.9682788879976885e-05, "loss": 1.8747, "step": 33281 }, { "epoch": 0.43, "grad_norm": 3.6442034244537354, "learning_rate": 1.968276262302767e-05, "loss": 2.185, "step": 33282 }, { "epoch": 0.43, "grad_norm": 4.455561637878418, "learning_rate": 1.9682736365009315e-05, "loss": 1.9032, "step": 33283 }, { "epoch": 0.43, "grad_norm": 4.675229072570801, "learning_rate": 1.9682710105921824e-05, "loss": 2.374, "step": 33284 }, { "epoch": 0.43, "grad_norm": 3.9711265563964844, "learning_rate": 1.9682683845765194e-05, "loss": 2.3739, "step": 33285 }, { "epoch": 0.43, "grad_norm": 3.617940664291382, "learning_rate": 1.9682657584539432e-05, "loss": 2.146, "step": 33286 }, { "epoch": 0.43, "grad_norm": 4.313282012939453, "learning_rate": 1.9682631322244542e-05, "loss": 1.9917, "step": 33287 }, { "epoch": 0.43, "grad_norm": 4.025363445281982, "learning_rate": 1.9682605058880524e-05, "loss": 1.8885, "step": 33288 }, { "epoch": 0.43, "grad_norm": 3.2521936893463135, "learning_rate": 1.9682578794447384e-05, "loss": 1.9339, "step": 33289 }, { "epoch": 0.43, "grad_norm": 4.182654857635498, "learning_rate": 1.9682552528945122e-05, "loss": 2.3553, "step": 33290 }, { "epoch": 0.43, "grad_norm": 3.3855626583099365, "learning_rate": 1.968252626237374e-05, "loss": 1.8054, "step": 33291 }, { "epoch": 0.43, "grad_norm": 3.7717316150665283, "learning_rate": 1.9682499994733244e-05, "loss": 2.0103, "step": 33292 }, { "epoch": 0.43, "grad_norm": 3.603717565536499, "learning_rate": 1.9682473726023638e-05, "loss": 2.0542, "step": 33293 }, { "epoch": 0.43, "grad_norm": 3.6401712894439697, "learning_rate": 1.968244745624492e-05, "loss": 1.8002, "step": 33294 }, { "epoch": 0.43, "grad_norm": 3.4782211780548096, "learning_rate": 1.9682421185397095e-05, "loss": 1.9823, "step": 33295 }, { "epoch": 0.43, "grad_norm": 3.2024199962615967, "learning_rate": 1.9682394913480168e-05, "loss": 1.5385, "step": 33296 }, { "epoch": 0.43, "grad_norm": 3.8582396507263184, "learning_rate": 1.968236864049414e-05, "loss": 1.942, "step": 33297 }, { "epoch": 0.43, "grad_norm": 3.6245217323303223, "learning_rate": 1.9682342366439014e-05, "loss": 1.7602, "step": 33298 }, { "epoch": 0.43, "grad_norm": 3.615549087524414, "learning_rate": 1.9682316091314793e-05, "loss": 2.0161, "step": 33299 }, { "epoch": 0.43, "grad_norm": 3.506274700164795, "learning_rate": 1.9682289815121482e-05, "loss": 1.6879, "step": 33300 }, { "epoch": 0.43, "grad_norm": 3.9373202323913574, "learning_rate": 1.968226353785908e-05, "loss": 1.8889, "step": 33301 }, { "epoch": 0.43, "grad_norm": 3.461702585220337, "learning_rate": 1.968223725952759e-05, "loss": 1.5823, "step": 33302 }, { "epoch": 0.43, "grad_norm": 4.1976704597473145, "learning_rate": 1.9682210980127017e-05, "loss": 2.0675, "step": 33303 }, { "epoch": 0.43, "grad_norm": 3.190786361694336, "learning_rate": 1.968218469965737e-05, "loss": 1.7208, "step": 33304 }, { "epoch": 0.43, "grad_norm": 3.535618543624878, "learning_rate": 1.968215841811864e-05, "loss": 2.0172, "step": 33305 }, { "epoch": 0.43, "grad_norm": 3.746455669403076, "learning_rate": 1.9682132135510835e-05, "loss": 1.7036, "step": 33306 }, { "epoch": 0.43, "grad_norm": 3.826460123062134, "learning_rate": 1.968210585183396e-05, "loss": 2.3739, "step": 33307 }, { "epoch": 0.43, "grad_norm": 3.7563745975494385, "learning_rate": 1.9682079567088018e-05, "loss": 1.9941, "step": 33308 }, { "epoch": 0.43, "grad_norm": 3.5381383895874023, "learning_rate": 1.9682053281273008e-05, "loss": 1.8059, "step": 33309 }, { "epoch": 0.43, "grad_norm": 3.8608126640319824, "learning_rate": 1.9682026994388935e-05, "loss": 2.1867, "step": 33310 }, { "epoch": 0.43, "grad_norm": 3.8739171028137207, "learning_rate": 1.9682000706435804e-05, "loss": 2.4236, "step": 33311 }, { "epoch": 0.43, "grad_norm": 4.1856536865234375, "learning_rate": 1.9681974417413613e-05, "loss": 2.2059, "step": 33312 }, { "epoch": 0.43, "grad_norm": 3.6612608432769775, "learning_rate": 1.9681948127322367e-05, "loss": 1.8036, "step": 33313 }, { "epoch": 0.43, "grad_norm": 4.3867058753967285, "learning_rate": 1.9681921836162075e-05, "loss": 2.0723, "step": 33314 }, { "epoch": 0.43, "grad_norm": 3.9288368225097656, "learning_rate": 1.968189554393273e-05, "loss": 2.4245, "step": 33315 }, { "epoch": 0.43, "grad_norm": 3.7216203212738037, "learning_rate": 1.9681869250634343e-05, "loss": 1.9755, "step": 33316 }, { "epoch": 0.43, "grad_norm": 3.6448769569396973, "learning_rate": 1.968184295626691e-05, "loss": 1.8914, "step": 33317 }, { "epoch": 0.43, "grad_norm": 3.296053409576416, "learning_rate": 1.968181666083044e-05, "loss": 1.314, "step": 33318 }, { "epoch": 0.43, "grad_norm": 3.1694931983947754, "learning_rate": 1.9681790364324928e-05, "loss": 1.3547, "step": 33319 }, { "epoch": 0.43, "grad_norm": 3.5498034954071045, "learning_rate": 1.9681764066750387e-05, "loss": 1.9594, "step": 33320 }, { "epoch": 0.43, "grad_norm": 3.6470367908477783, "learning_rate": 1.9681737768106813e-05, "loss": 2.043, "step": 33321 }, { "epoch": 0.43, "grad_norm": 3.667142391204834, "learning_rate": 1.9681711468394212e-05, "loss": 1.8289, "step": 33322 }, { "epoch": 0.43, "grad_norm": 3.9986701011657715, "learning_rate": 1.9681685167612584e-05, "loss": 2.1954, "step": 33323 }, { "epoch": 0.43, "grad_norm": 3.5176053047180176, "learning_rate": 1.9681658865761937e-05, "loss": 1.7403, "step": 33324 }, { "epoch": 0.43, "grad_norm": 3.981735944747925, "learning_rate": 1.968163256284227e-05, "loss": 2.013, "step": 33325 }, { "epoch": 0.43, "grad_norm": 3.444985866546631, "learning_rate": 1.968160625885358e-05, "loss": 1.412, "step": 33326 }, { "epoch": 0.43, "grad_norm": 3.5555200576782227, "learning_rate": 1.9681579953795884e-05, "loss": 1.9498, "step": 33327 }, { "epoch": 0.43, "grad_norm": 3.795409917831421, "learning_rate": 1.9681553647669177e-05, "loss": 1.8259, "step": 33328 }, { "epoch": 0.43, "grad_norm": 3.249959707260132, "learning_rate": 1.968152734047346e-05, "loss": 1.561, "step": 33329 }, { "epoch": 0.43, "grad_norm": 3.5516343116760254, "learning_rate": 1.968150103220874e-05, "loss": 1.7296, "step": 33330 }, { "epoch": 0.43, "grad_norm": 4.024895668029785, "learning_rate": 1.9681474722875014e-05, "loss": 2.0054, "step": 33331 }, { "epoch": 0.43, "grad_norm": 3.9870054721832275, "learning_rate": 1.968144841247229e-05, "loss": 2.3644, "step": 33332 }, { "epoch": 0.43, "grad_norm": 3.805634021759033, "learning_rate": 1.9681422101000572e-05, "loss": 1.7028, "step": 33333 }, { "epoch": 0.43, "grad_norm": 3.60775089263916, "learning_rate": 1.9681395788459857e-05, "loss": 1.8927, "step": 33334 }, { "epoch": 0.43, "grad_norm": 4.108503818511963, "learning_rate": 1.968136947485016e-05, "loss": 2.1353, "step": 33335 }, { "epoch": 0.43, "grad_norm": 3.6550519466400146, "learning_rate": 1.968134316017147e-05, "loss": 1.5504, "step": 33336 }, { "epoch": 0.43, "grad_norm": 4.000166893005371, "learning_rate": 1.9681316844423792e-05, "loss": 1.9197, "step": 33337 }, { "epoch": 0.43, "grad_norm": 3.7583227157592773, "learning_rate": 1.9681290527607136e-05, "loss": 1.8994, "step": 33338 }, { "epoch": 0.43, "grad_norm": 4.255524158477783, "learning_rate": 1.96812642097215e-05, "loss": 2.1936, "step": 33339 }, { "epoch": 0.43, "grad_norm": 3.6585471630096436, "learning_rate": 1.968123789076689e-05, "loss": 1.7189, "step": 33340 }, { "epoch": 0.43, "grad_norm": 4.244297504425049, "learning_rate": 1.968121157074331e-05, "loss": 2.4262, "step": 33341 }, { "epoch": 0.43, "grad_norm": 3.6587109565734863, "learning_rate": 1.9681185249650753e-05, "loss": 1.9337, "step": 33342 }, { "epoch": 0.43, "grad_norm": 3.617476224899292, "learning_rate": 1.9681158927489235e-05, "loss": 2.0775, "step": 33343 }, { "epoch": 0.43, "grad_norm": 3.4105064868927, "learning_rate": 1.968113260425875e-05, "loss": 1.7071, "step": 33344 }, { "epoch": 0.43, "grad_norm": 3.5472800731658936, "learning_rate": 1.9681106279959304e-05, "loss": 1.8578, "step": 33345 }, { "epoch": 0.43, "grad_norm": 4.090305328369141, "learning_rate": 1.9681079954590898e-05, "loss": 1.9814, "step": 33346 }, { "epoch": 0.43, "grad_norm": 4.48681116104126, "learning_rate": 1.968105362815354e-05, "loss": 2.3609, "step": 33347 }, { "epoch": 0.43, "grad_norm": 3.3429369926452637, "learning_rate": 1.9681027300647227e-05, "loss": 1.6124, "step": 33348 }, { "epoch": 0.43, "grad_norm": 4.312967300415039, "learning_rate": 1.9681000972071965e-05, "loss": 2.249, "step": 33349 }, { "epoch": 0.43, "grad_norm": 4.094560623168945, "learning_rate": 1.9680974642427754e-05, "loss": 2.4567, "step": 33350 }, { "epoch": 0.43, "grad_norm": 3.8187427520751953, "learning_rate": 1.9680948311714604e-05, "loss": 2.1906, "step": 33351 }, { "epoch": 0.43, "grad_norm": 3.614469051361084, "learning_rate": 1.968092197993251e-05, "loss": 2.2685, "step": 33352 }, { "epoch": 0.43, "grad_norm": 3.3970673084259033, "learning_rate": 1.968089564708148e-05, "loss": 1.443, "step": 33353 }, { "epoch": 0.43, "grad_norm": 3.533771514892578, "learning_rate": 1.968086931316151e-05, "loss": 1.9938, "step": 33354 }, { "epoch": 0.43, "grad_norm": 3.526054620742798, "learning_rate": 1.968084297817261e-05, "loss": 1.7348, "step": 33355 }, { "epoch": 0.43, "grad_norm": 3.800261974334717, "learning_rate": 1.9680816642114783e-05, "loss": 1.6413, "step": 33356 }, { "epoch": 0.43, "grad_norm": 3.8095743656158447, "learning_rate": 1.9680790304988025e-05, "loss": 2.0402, "step": 33357 }, { "epoch": 0.43, "grad_norm": 3.960864543914795, "learning_rate": 1.968076396679235e-05, "loss": 1.9929, "step": 33358 }, { "epoch": 0.43, "grad_norm": 3.794302225112915, "learning_rate": 1.968073762752775e-05, "loss": 1.9632, "step": 33359 }, { "epoch": 0.43, "grad_norm": 4.195112705230713, "learning_rate": 1.9680711287194235e-05, "loss": 1.9041, "step": 33360 }, { "epoch": 0.43, "grad_norm": 3.6024105548858643, "learning_rate": 1.9680684945791804e-05, "loss": 1.8914, "step": 33361 }, { "epoch": 0.43, "grad_norm": 3.8082756996154785, "learning_rate": 1.9680658603320458e-05, "loss": 1.9901, "step": 33362 }, { "epoch": 0.43, "grad_norm": 4.568240165710449, "learning_rate": 1.9680632259780207e-05, "loss": 2.5635, "step": 33363 }, { "epoch": 0.43, "grad_norm": 4.035417079925537, "learning_rate": 1.9680605915171052e-05, "loss": 1.9426, "step": 33364 }, { "epoch": 0.43, "grad_norm": 3.759383201599121, "learning_rate": 1.9680579569492992e-05, "loss": 1.5676, "step": 33365 }, { "epoch": 0.43, "grad_norm": 3.394749164581299, "learning_rate": 1.9680553222746027e-05, "loss": 1.6331, "step": 33366 }, { "epoch": 0.43, "grad_norm": 3.781723976135254, "learning_rate": 1.968052687493017e-05, "loss": 2.1314, "step": 33367 }, { "epoch": 0.43, "grad_norm": 3.5789666175842285, "learning_rate": 1.9680500526045417e-05, "loss": 1.8189, "step": 33368 }, { "epoch": 0.43, "grad_norm": 4.140115261077881, "learning_rate": 1.968047417609177e-05, "loss": 2.0337, "step": 33369 }, { "epoch": 0.43, "grad_norm": 3.606811761856079, "learning_rate": 1.9680447825069238e-05, "loss": 2.0345, "step": 33370 }, { "epoch": 0.43, "grad_norm": 3.5144622325897217, "learning_rate": 1.9680421472977817e-05, "loss": 1.9841, "step": 33371 }, { "epoch": 0.43, "grad_norm": 3.8593928813934326, "learning_rate": 1.968039511981752e-05, "loss": 1.9194, "step": 33372 }, { "epoch": 0.43, "grad_norm": 3.5408132076263428, "learning_rate": 1.9680368765588335e-05, "loss": 1.7718, "step": 33373 }, { "epoch": 0.43, "grad_norm": 3.750208616256714, "learning_rate": 1.9680342410290278e-05, "loss": 1.9246, "step": 33374 }, { "epoch": 0.43, "grad_norm": 3.548491954803467, "learning_rate": 1.9680316053923346e-05, "loss": 1.8816, "step": 33375 }, { "epoch": 0.43, "grad_norm": 3.6116597652435303, "learning_rate": 1.9680289696487544e-05, "loss": 1.9003, "step": 33376 }, { "epoch": 0.43, "grad_norm": 3.750995635986328, "learning_rate": 1.968026333798287e-05, "loss": 2.2811, "step": 33377 }, { "epoch": 0.43, "grad_norm": 3.9524900913238525, "learning_rate": 1.9680236978409333e-05, "loss": 2.0602, "step": 33378 }, { "epoch": 0.43, "grad_norm": 4.009999752044678, "learning_rate": 1.968021061776693e-05, "loss": 1.8672, "step": 33379 }, { "epoch": 0.43, "grad_norm": 4.138031005859375, "learning_rate": 1.9680184256055676e-05, "loss": 2.3701, "step": 33380 }, { "epoch": 0.43, "grad_norm": 3.9426515102386475, "learning_rate": 1.9680157893275556e-05, "loss": 1.7597, "step": 33381 }, { "epoch": 0.43, "grad_norm": 4.008535385131836, "learning_rate": 1.968013152942659e-05, "loss": 1.8628, "step": 33382 }, { "epoch": 0.43, "grad_norm": 4.000117301940918, "learning_rate": 1.968010516450877e-05, "loss": 1.8115, "step": 33383 }, { "epoch": 0.43, "grad_norm": 3.792379140853882, "learning_rate": 1.96800787985221e-05, "loss": 1.6093, "step": 33384 }, { "epoch": 0.43, "grad_norm": 4.018857002258301, "learning_rate": 1.968005243146659e-05, "loss": 1.7126, "step": 33385 }, { "epoch": 0.43, "grad_norm": 3.6625137329101562, "learning_rate": 1.9680026063342234e-05, "loss": 1.8971, "step": 33386 }, { "epoch": 0.43, "grad_norm": 3.4025473594665527, "learning_rate": 1.9679999694149042e-05, "loss": 2.1205, "step": 33387 }, { "epoch": 0.43, "grad_norm": 3.6202142238616943, "learning_rate": 1.9679973323887012e-05, "loss": 1.9113, "step": 33388 }, { "epoch": 0.43, "grad_norm": 3.9628312587738037, "learning_rate": 1.9679946952556146e-05, "loss": 2.402, "step": 33389 }, { "epoch": 0.43, "grad_norm": 3.717719078063965, "learning_rate": 1.9679920580156456e-05, "loss": 1.7046, "step": 33390 }, { "epoch": 0.43, "grad_norm": 4.119898319244385, "learning_rate": 1.9679894206687933e-05, "loss": 1.8434, "step": 33391 }, { "epoch": 0.43, "grad_norm": 3.9249179363250732, "learning_rate": 1.9679867832150586e-05, "loss": 1.7986, "step": 33392 }, { "epoch": 0.43, "grad_norm": 3.461115598678589, "learning_rate": 1.967984145654442e-05, "loss": 1.7803, "step": 33393 }, { "epoch": 0.43, "grad_norm": 3.9680709838867188, "learning_rate": 1.9679815079869432e-05, "loss": 1.8744, "step": 33394 }, { "epoch": 0.43, "grad_norm": 4.25193977355957, "learning_rate": 1.9679788702125632e-05, "loss": 1.6504, "step": 33395 }, { "epoch": 0.43, "grad_norm": 3.6719937324523926, "learning_rate": 1.9679762323313015e-05, "loss": 1.6236, "step": 33396 }, { "epoch": 0.43, "grad_norm": 3.718963861465454, "learning_rate": 1.967973594343159e-05, "loss": 1.6264, "step": 33397 }, { "epoch": 0.43, "grad_norm": 4.1406073570251465, "learning_rate": 1.967970956248136e-05, "loss": 2.1418, "step": 33398 }, { "epoch": 0.43, "grad_norm": 3.79467511177063, "learning_rate": 1.9679683180462325e-05, "loss": 2.0145, "step": 33399 }, { "epoch": 0.43, "grad_norm": 3.9839866161346436, "learning_rate": 1.9679656797374486e-05, "loss": 1.9318, "step": 33400 }, { "epoch": 0.43, "grad_norm": 3.2976646423339844, "learning_rate": 1.9679630413217853e-05, "loss": 1.4531, "step": 33401 }, { "epoch": 0.43, "grad_norm": 3.496769905090332, "learning_rate": 1.9679604027992424e-05, "loss": 1.7745, "step": 33402 }, { "epoch": 0.43, "grad_norm": 3.268594264984131, "learning_rate": 1.96795776416982e-05, "loss": 1.6542, "step": 33403 }, { "epoch": 0.43, "grad_norm": 3.3308780193328857, "learning_rate": 1.9679551254335184e-05, "loss": 1.8095, "step": 33404 }, { "epoch": 0.43, "grad_norm": 3.776301383972168, "learning_rate": 1.9679524865903387e-05, "loss": 2.128, "step": 33405 }, { "epoch": 0.43, "grad_norm": 4.694023609161377, "learning_rate": 1.9679498476402803e-05, "loss": 2.1697, "step": 33406 }, { "epoch": 0.43, "grad_norm": 3.6336987018585205, "learning_rate": 1.9679472085833437e-05, "loss": 1.4686, "step": 33407 }, { "epoch": 0.43, "grad_norm": 3.457928419113159, "learning_rate": 1.9679445694195294e-05, "loss": 1.599, "step": 33408 }, { "epoch": 0.43, "grad_norm": 3.6907074451446533, "learning_rate": 1.967941930148838e-05, "loss": 1.9785, "step": 33409 }, { "epoch": 0.43, "grad_norm": 3.9102463722229004, "learning_rate": 1.9679392907712688e-05, "loss": 2.1623, "step": 33410 }, { "epoch": 0.43, "grad_norm": 3.6960084438323975, "learning_rate": 1.967936651286823e-05, "loss": 1.9346, "step": 33411 }, { "epoch": 0.43, "grad_norm": 3.6536333560943604, "learning_rate": 1.9679340116955005e-05, "loss": 1.8761, "step": 33412 }, { "epoch": 0.43, "grad_norm": 4.61433744430542, "learning_rate": 1.9679313719973016e-05, "loss": 2.4829, "step": 33413 }, { "epoch": 0.43, "grad_norm": 4.064563274383545, "learning_rate": 1.9679287321922272e-05, "loss": 1.9626, "step": 33414 }, { "epoch": 0.43, "grad_norm": 3.5855729579925537, "learning_rate": 1.9679260922802762e-05, "loss": 1.6043, "step": 33415 }, { "epoch": 0.43, "grad_norm": 5.328543663024902, "learning_rate": 1.9679234522614503e-05, "loss": 2.4711, "step": 33416 }, { "epoch": 0.43, "grad_norm": 3.8867027759552, "learning_rate": 1.9679208121357488e-05, "loss": 1.9663, "step": 33417 }, { "epoch": 0.43, "grad_norm": 4.395167350769043, "learning_rate": 1.9679181719031728e-05, "loss": 2.2172, "step": 33418 }, { "epoch": 0.43, "grad_norm": 4.104914665222168, "learning_rate": 1.9679155315637222e-05, "loss": 2.0646, "step": 33419 }, { "epoch": 0.43, "grad_norm": 4.014377117156982, "learning_rate": 1.967912891117397e-05, "loss": 2.194, "step": 33420 }, { "epoch": 0.43, "grad_norm": 3.5795116424560547, "learning_rate": 1.9679102505641984e-05, "loss": 1.8437, "step": 33421 }, { "epoch": 0.43, "grad_norm": 4.2541728019714355, "learning_rate": 1.9679076099041256e-05, "loss": 2.2249, "step": 33422 }, { "epoch": 0.43, "grad_norm": 4.3992919921875, "learning_rate": 1.9679049691371795e-05, "loss": 1.64, "step": 33423 }, { "epoch": 0.43, "grad_norm": 4.166976451873779, "learning_rate": 1.96790232826336e-05, "loss": 2.207, "step": 33424 }, { "epoch": 0.43, "grad_norm": 3.752746105194092, "learning_rate": 1.9678996872826678e-05, "loss": 2.3505, "step": 33425 }, { "epoch": 0.43, "grad_norm": 4.069957256317139, "learning_rate": 1.967897046195103e-05, "loss": 2.6893, "step": 33426 }, { "epoch": 0.43, "grad_norm": 3.765782117843628, "learning_rate": 1.9678944050006663e-05, "loss": 2.4336, "step": 33427 }, { "epoch": 0.43, "grad_norm": 3.4504547119140625, "learning_rate": 1.9678917636993576e-05, "loss": 1.8361, "step": 33428 }, { "epoch": 0.43, "grad_norm": 3.7576045989990234, "learning_rate": 1.9678891222911768e-05, "loss": 2.3828, "step": 33429 }, { "epoch": 0.43, "grad_norm": 4.322455406188965, "learning_rate": 1.967886480776125e-05, "loss": 1.9635, "step": 33430 }, { "epoch": 0.43, "grad_norm": 3.904984951019287, "learning_rate": 1.967883839154202e-05, "loss": 2.1434, "step": 33431 }, { "epoch": 0.43, "grad_norm": 3.8588781356811523, "learning_rate": 1.9678811974254083e-05, "loss": 1.8864, "step": 33432 }, { "epoch": 0.43, "grad_norm": 3.54655385017395, "learning_rate": 1.9678785555897438e-05, "loss": 1.6969, "step": 33433 }, { "epoch": 0.43, "grad_norm": 4.235533237457275, "learning_rate": 1.9678759136472095e-05, "loss": 2.3493, "step": 33434 }, { "epoch": 0.43, "grad_norm": 3.8834502696990967, "learning_rate": 1.9678732715978047e-05, "loss": 2.0083, "step": 33435 }, { "epoch": 0.43, "grad_norm": 4.242798805236816, "learning_rate": 1.9678706294415308e-05, "loss": 1.9053, "step": 33436 }, { "epoch": 0.43, "grad_norm": 4.136296272277832, "learning_rate": 1.9678679871783874e-05, "loss": 2.2144, "step": 33437 }, { "epoch": 0.43, "grad_norm": 3.892655611038208, "learning_rate": 1.967865344808375e-05, "loss": 2.0528, "step": 33438 }, { "epoch": 0.43, "grad_norm": 4.51296854019165, "learning_rate": 1.9678627023314936e-05, "loss": 1.8024, "step": 33439 }, { "epoch": 0.43, "grad_norm": 4.205275058746338, "learning_rate": 1.9678600597477442e-05, "loss": 2.2424, "step": 33440 }, { "epoch": 0.43, "grad_norm": 3.456819534301758, "learning_rate": 1.9678574170571264e-05, "loss": 1.7065, "step": 33441 }, { "epoch": 0.43, "grad_norm": 3.8911292552948, "learning_rate": 1.9678547742596407e-05, "loss": 1.8346, "step": 33442 }, { "epoch": 0.43, "grad_norm": 4.176663398742676, "learning_rate": 1.9678521313552873e-05, "loss": 2.6664, "step": 33443 }, { "epoch": 0.43, "grad_norm": 3.2933859825134277, "learning_rate": 1.9678494883440668e-05, "loss": 1.8915, "step": 33444 }, { "epoch": 0.43, "grad_norm": 3.4520435333251953, "learning_rate": 1.967846845225979e-05, "loss": 1.8513, "step": 33445 }, { "epoch": 0.43, "grad_norm": 3.6350457668304443, "learning_rate": 1.9678442020010248e-05, "loss": 1.7088, "step": 33446 }, { "epoch": 0.43, "grad_norm": 3.665693759918213, "learning_rate": 1.967841558669204e-05, "loss": 1.9253, "step": 33447 }, { "epoch": 0.43, "grad_norm": 3.425832509994507, "learning_rate": 1.967838915230517e-05, "loss": 1.5926, "step": 33448 }, { "epoch": 0.43, "grad_norm": 3.56058931350708, "learning_rate": 1.9678362716849645e-05, "loss": 1.9448, "step": 33449 }, { "epoch": 0.43, "grad_norm": 3.222031593322754, "learning_rate": 1.9678336280325462e-05, "loss": 1.7246, "step": 33450 }, { "epoch": 0.43, "grad_norm": 3.2900097370147705, "learning_rate": 1.9678309842732628e-05, "loss": 1.6507, "step": 33451 }, { "epoch": 0.43, "grad_norm": 3.7169601917266846, "learning_rate": 1.9678283404071143e-05, "loss": 1.9099, "step": 33452 }, { "epoch": 0.43, "grad_norm": 4.003300666809082, "learning_rate": 1.967825696434101e-05, "loss": 2.2425, "step": 33453 }, { "epoch": 0.43, "grad_norm": 4.170504570007324, "learning_rate": 1.9678230523542235e-05, "loss": 2.4674, "step": 33454 }, { "epoch": 0.43, "grad_norm": 4.361946105957031, "learning_rate": 1.9678204081674818e-05, "loss": 2.062, "step": 33455 }, { "epoch": 0.43, "grad_norm": 3.8814682960510254, "learning_rate": 1.9678177638738765e-05, "loss": 2.1917, "step": 33456 }, { "epoch": 0.43, "grad_norm": 3.2124829292297363, "learning_rate": 1.9678151194734077e-05, "loss": 1.8955, "step": 33457 }, { "epoch": 0.43, "grad_norm": 4.3570556640625, "learning_rate": 1.9678124749660756e-05, "loss": 2.3413, "step": 33458 }, { "epoch": 0.43, "grad_norm": 4.062800407409668, "learning_rate": 1.9678098303518805e-05, "loss": 2.1121, "step": 33459 }, { "epoch": 0.43, "grad_norm": 3.629316806793213, "learning_rate": 1.967807185630823e-05, "loss": 2.1844, "step": 33460 }, { "epoch": 0.43, "grad_norm": 3.6590030193328857, "learning_rate": 1.967804540802903e-05, "loss": 1.8991, "step": 33461 }, { "epoch": 0.43, "grad_norm": 4.629344940185547, "learning_rate": 1.967801895868121e-05, "loss": 1.963, "step": 33462 }, { "epoch": 0.43, "grad_norm": 3.8769328594207764, "learning_rate": 1.9677992508264775e-05, "loss": 2.0454, "step": 33463 }, { "epoch": 0.43, "grad_norm": 3.91729736328125, "learning_rate": 1.967796605677972e-05, "loss": 2.1078, "step": 33464 }, { "epoch": 0.43, "grad_norm": 3.5381553173065186, "learning_rate": 1.9677939604226054e-05, "loss": 2.1078, "step": 33465 }, { "epoch": 0.43, "grad_norm": 3.6764023303985596, "learning_rate": 1.9677913150603785e-05, "loss": 1.9437, "step": 33466 }, { "epoch": 0.43, "grad_norm": 4.293925762176514, "learning_rate": 1.9677886695912906e-05, "loss": 2.2937, "step": 33467 }, { "epoch": 0.43, "grad_norm": 3.677823305130005, "learning_rate": 1.9677860240153425e-05, "loss": 1.8682, "step": 33468 }, { "epoch": 0.43, "grad_norm": 3.5739102363586426, "learning_rate": 1.9677833783325343e-05, "loss": 1.618, "step": 33469 }, { "epoch": 0.43, "grad_norm": 3.6071746349334717, "learning_rate": 1.9677807325428665e-05, "loss": 1.8017, "step": 33470 }, { "epoch": 0.43, "grad_norm": 3.692819356918335, "learning_rate": 1.967778086646339e-05, "loss": 2.2507, "step": 33471 }, { "epoch": 0.43, "grad_norm": 4.700669765472412, "learning_rate": 1.967775440642953e-05, "loss": 2.1736, "step": 33472 }, { "epoch": 0.43, "grad_norm": 3.834394693374634, "learning_rate": 1.967772794532708e-05, "loss": 2.0277, "step": 33473 }, { "epoch": 0.43, "grad_norm": 3.9457054138183594, "learning_rate": 1.9677701483156042e-05, "loss": 1.8955, "step": 33474 }, { "epoch": 0.43, "grad_norm": 3.8156678676605225, "learning_rate": 1.967767501991642e-05, "loss": 1.941, "step": 33475 }, { "epoch": 0.43, "grad_norm": 3.434476375579834, "learning_rate": 1.9677648555608223e-05, "loss": 1.7403, "step": 33476 }, { "epoch": 0.43, "grad_norm": 3.7172486782073975, "learning_rate": 1.9677622090231448e-05, "loss": 2.0543, "step": 33477 }, { "epoch": 0.43, "grad_norm": 3.9673194885253906, "learning_rate": 1.9677595623786097e-05, "loss": 2.3177, "step": 33478 }, { "epoch": 0.43, "grad_norm": 3.55301833152771, "learning_rate": 1.967756915627218e-05, "loss": 1.5333, "step": 33479 }, { "epoch": 0.43, "grad_norm": 4.148653507232666, "learning_rate": 1.967754268768969e-05, "loss": 2.3592, "step": 33480 }, { "epoch": 0.43, "grad_norm": 3.1721224784851074, "learning_rate": 1.9677516218038637e-05, "loss": 1.6367, "step": 33481 }, { "epoch": 0.43, "grad_norm": 3.3270092010498047, "learning_rate": 1.967748974731902e-05, "loss": 1.7647, "step": 33482 }, { "epoch": 0.43, "grad_norm": 3.8260068893432617, "learning_rate": 1.9677463275530847e-05, "loss": 1.8581, "step": 33483 }, { "epoch": 0.43, "grad_norm": 3.4374916553497314, "learning_rate": 1.967743680267412e-05, "loss": 2.249, "step": 33484 }, { "epoch": 0.43, "grad_norm": 3.9094104766845703, "learning_rate": 1.9677410328748835e-05, "loss": 1.7658, "step": 33485 }, { "epoch": 0.43, "grad_norm": 3.9688212871551514, "learning_rate": 1.9677383853755003e-05, "loss": 1.9963, "step": 33486 }, { "epoch": 0.43, "grad_norm": 3.659684181213379, "learning_rate": 1.967735737769262e-05, "loss": 1.819, "step": 33487 }, { "epoch": 0.43, "grad_norm": 4.010455131530762, "learning_rate": 1.9677330900561697e-05, "loss": 2.2752, "step": 33488 }, { "epoch": 0.43, "grad_norm": 4.005114555358887, "learning_rate": 1.967730442236223e-05, "loss": 2.2195, "step": 33489 }, { "epoch": 0.43, "grad_norm": 3.6071977615356445, "learning_rate": 1.9677277943094225e-05, "loss": 2.2324, "step": 33490 }, { "epoch": 0.43, "grad_norm": 3.6745474338531494, "learning_rate": 1.9677251462757682e-05, "loss": 2.0427, "step": 33491 }, { "epoch": 0.43, "grad_norm": 3.8936665058135986, "learning_rate": 1.967722498135261e-05, "loss": 1.9125, "step": 33492 }, { "epoch": 0.43, "grad_norm": 3.6395280361175537, "learning_rate": 1.9677198498879007e-05, "loss": 2.0853, "step": 33493 }, { "epoch": 0.43, "grad_norm": 3.9417481422424316, "learning_rate": 1.9677172015336875e-05, "loss": 2.2147, "step": 33494 }, { "epoch": 0.43, "grad_norm": 3.5048086643218994, "learning_rate": 1.967714553072622e-05, "loss": 1.9053, "step": 33495 }, { "epoch": 0.43, "grad_norm": 3.7340614795684814, "learning_rate": 1.9677119045047045e-05, "loss": 1.8303, "step": 33496 }, { "epoch": 0.43, "grad_norm": 3.5974807739257812, "learning_rate": 1.9677092558299355e-05, "loss": 1.8235, "step": 33497 }, { "epoch": 0.43, "grad_norm": 3.7251877784729004, "learning_rate": 1.9677066070483145e-05, "loss": 2.0187, "step": 33498 }, { "epoch": 0.43, "grad_norm": 3.193876028060913, "learning_rate": 1.9677039581598424e-05, "loss": 1.5969, "step": 33499 }, { "epoch": 0.43, "grad_norm": 4.126095294952393, "learning_rate": 1.9677013091645194e-05, "loss": 2.2963, "step": 33500 }, { "epoch": 0.43, "grad_norm": 3.9681859016418457, "learning_rate": 1.9676986600623456e-05, "loss": 1.8688, "step": 33501 }, { "epoch": 0.43, "grad_norm": 3.7714109420776367, "learning_rate": 1.9676960108533217e-05, "loss": 1.8861, "step": 33502 }, { "epoch": 0.43, "grad_norm": 3.3915085792541504, "learning_rate": 1.9676933615374476e-05, "loss": 1.6558, "step": 33503 }, { "epoch": 0.43, "grad_norm": 3.9268605709075928, "learning_rate": 1.967690712114724e-05, "loss": 1.7232, "step": 33504 }, { "epoch": 0.43, "grad_norm": 3.5260961055755615, "learning_rate": 1.9676880625851506e-05, "loss": 1.4941, "step": 33505 }, { "epoch": 0.43, "grad_norm": 4.075114727020264, "learning_rate": 1.967685412948728e-05, "loss": 2.0583, "step": 33506 }, { "epoch": 0.43, "grad_norm": 3.975013494491577, "learning_rate": 1.9676827632054567e-05, "loss": 1.9121, "step": 33507 }, { "epoch": 0.43, "grad_norm": 4.055981636047363, "learning_rate": 1.9676801133553366e-05, "loss": 1.8863, "step": 33508 }, { "epoch": 0.43, "grad_norm": 3.6212470531463623, "learning_rate": 1.9676774633983683e-05, "loss": 1.7605, "step": 33509 }, { "epoch": 0.43, "grad_norm": 3.906397581100464, "learning_rate": 1.967674813334552e-05, "loss": 1.7546, "step": 33510 }, { "epoch": 0.43, "grad_norm": 4.068646430969238, "learning_rate": 1.967672163163888e-05, "loss": 1.6841, "step": 33511 }, { "epoch": 0.43, "grad_norm": 3.5009377002716064, "learning_rate": 1.9676695128863765e-05, "loss": 1.973, "step": 33512 }, { "epoch": 0.43, "grad_norm": 4.338308811187744, "learning_rate": 1.967666862502018e-05, "loss": 2.4651, "step": 33513 }, { "epoch": 0.43, "grad_norm": 4.125192165374756, "learning_rate": 1.9676642120108126e-05, "loss": 1.9409, "step": 33514 }, { "epoch": 0.43, "grad_norm": 3.693748712539673, "learning_rate": 1.9676615614127606e-05, "loss": 1.8841, "step": 33515 }, { "epoch": 0.43, "grad_norm": 3.844231605529785, "learning_rate": 1.9676589107078626e-05, "loss": 2.1855, "step": 33516 }, { "epoch": 0.43, "grad_norm": 3.512176036834717, "learning_rate": 1.9676562598961185e-05, "loss": 2.2832, "step": 33517 }, { "epoch": 0.43, "grad_norm": 4.240533351898193, "learning_rate": 1.9676536089775286e-05, "loss": 1.9604, "step": 33518 }, { "epoch": 0.44, "grad_norm": 3.8314321041107178, "learning_rate": 1.9676509579520933e-05, "loss": 1.9726, "step": 33519 }, { "epoch": 0.44, "grad_norm": 3.5956013202667236, "learning_rate": 1.9676483068198133e-05, "loss": 1.8429, "step": 33520 }, { "epoch": 0.44, "grad_norm": 3.9391350746154785, "learning_rate": 1.9676456555806882e-05, "loss": 1.8831, "step": 33521 }, { "epoch": 0.44, "grad_norm": 3.1704416275024414, "learning_rate": 1.9676430042347184e-05, "loss": 1.4983, "step": 33522 }, { "epoch": 0.44, "grad_norm": 3.678256034851074, "learning_rate": 1.967640352781905e-05, "loss": 2.0122, "step": 33523 }, { "epoch": 0.44, "grad_norm": 4.410048484802246, "learning_rate": 1.9676377012222473e-05, "loss": 1.6115, "step": 33524 }, { "epoch": 0.44, "grad_norm": 4.21992301940918, "learning_rate": 1.967635049555746e-05, "loss": 2.008, "step": 33525 }, { "epoch": 0.44, "grad_norm": 4.11856746673584, "learning_rate": 1.9676323977824012e-05, "loss": 2.2526, "step": 33526 }, { "epoch": 0.44, "grad_norm": 4.284548759460449, "learning_rate": 1.9676297459022135e-05, "loss": 1.8926, "step": 33527 }, { "epoch": 0.44, "grad_norm": 3.9080188274383545, "learning_rate": 1.967627093915183e-05, "loss": 2.2078, "step": 33528 }, { "epoch": 0.44, "grad_norm": 4.423901557922363, "learning_rate": 1.9676244418213102e-05, "loss": 2.4224, "step": 33529 }, { "epoch": 0.44, "grad_norm": 3.9002599716186523, "learning_rate": 1.9676217896205954e-05, "loss": 2.0496, "step": 33530 }, { "epoch": 0.44, "grad_norm": 4.726027488708496, "learning_rate": 1.9676191373130385e-05, "loss": 1.8012, "step": 33531 }, { "epoch": 0.44, "grad_norm": 3.623713970184326, "learning_rate": 1.96761648489864e-05, "loss": 1.7727, "step": 33532 }, { "epoch": 0.44, "grad_norm": 3.60937762260437, "learning_rate": 1.9676138323774004e-05, "loss": 1.8493, "step": 33533 }, { "epoch": 0.44, "grad_norm": 3.948167324066162, "learning_rate": 1.96761117974932e-05, "loss": 2.1581, "step": 33534 }, { "epoch": 0.44, "grad_norm": 3.347207546234131, "learning_rate": 1.9676085270143983e-05, "loss": 1.7215, "step": 33535 }, { "epoch": 0.44, "grad_norm": 3.5067687034606934, "learning_rate": 1.9676058741726363e-05, "loss": 1.8224, "step": 33536 }, { "epoch": 0.44, "grad_norm": 3.966662645339966, "learning_rate": 1.9676032212240348e-05, "loss": 2.0942, "step": 33537 }, { "epoch": 0.44, "grad_norm": 3.7528090476989746, "learning_rate": 1.967600568168593e-05, "loss": 2.1042, "step": 33538 }, { "epoch": 0.44, "grad_norm": 3.7381300926208496, "learning_rate": 1.9675979150063117e-05, "loss": 2.3379, "step": 33539 }, { "epoch": 0.44, "grad_norm": 4.023711681365967, "learning_rate": 1.9675952617371915e-05, "loss": 1.8606, "step": 33540 }, { "epoch": 0.44, "grad_norm": 5.060849189758301, "learning_rate": 1.967592608361232e-05, "loss": 2.3842, "step": 33541 }, { "epoch": 0.44, "grad_norm": 3.9439947605133057, "learning_rate": 1.9675899548784342e-05, "loss": 1.9924, "step": 33542 }, { "epoch": 0.44, "grad_norm": 4.112315654754639, "learning_rate": 1.9675873012887978e-05, "loss": 1.9887, "step": 33543 }, { "epoch": 0.44, "grad_norm": 3.281599283218384, "learning_rate": 1.9675846475923234e-05, "loss": 1.5045, "step": 33544 }, { "epoch": 0.44, "grad_norm": 4.582265377044678, "learning_rate": 1.967581993789011e-05, "loss": 2.5066, "step": 33545 }, { "epoch": 0.44, "grad_norm": 3.350191354751587, "learning_rate": 1.9675793398788614e-05, "loss": 1.6736, "step": 33546 }, { "epoch": 0.44, "grad_norm": 3.5371246337890625, "learning_rate": 1.9675766858618748e-05, "loss": 1.8172, "step": 33547 }, { "epoch": 0.44, "grad_norm": 4.138238906860352, "learning_rate": 1.967574031738051e-05, "loss": 1.9921, "step": 33548 }, { "epoch": 0.44, "grad_norm": 4.07411003112793, "learning_rate": 1.9675713775073908e-05, "loss": 1.7758, "step": 33549 }, { "epoch": 0.44, "grad_norm": 4.1784586906433105, "learning_rate": 1.967568723169894e-05, "loss": 2.0648, "step": 33550 }, { "epoch": 0.44, "grad_norm": 3.4038829803466797, "learning_rate": 1.9675660687255614e-05, "loss": 1.717, "step": 33551 }, { "epoch": 0.44, "grad_norm": 3.996488332748413, "learning_rate": 1.9675634141743935e-05, "loss": 2.5301, "step": 33552 }, { "epoch": 0.44, "grad_norm": 3.7367866039276123, "learning_rate": 1.9675607595163896e-05, "loss": 1.7493, "step": 33553 }, { "epoch": 0.44, "grad_norm": 4.003934383392334, "learning_rate": 1.9675581047515506e-05, "loss": 2.1816, "step": 33554 }, { "epoch": 0.44, "grad_norm": 3.455906391143799, "learning_rate": 1.967555449879877e-05, "loss": 1.8559, "step": 33555 }, { "epoch": 0.44, "grad_norm": 4.279994964599609, "learning_rate": 1.967552794901369e-05, "loss": 2.592, "step": 33556 }, { "epoch": 0.44, "grad_norm": 3.8056726455688477, "learning_rate": 1.9675501398160266e-05, "loss": 1.8803, "step": 33557 }, { "epoch": 0.44, "grad_norm": 4.074883937835693, "learning_rate": 1.9675474846238502e-05, "loss": 2.2747, "step": 33558 }, { "epoch": 0.44, "grad_norm": 4.474178791046143, "learning_rate": 1.9675448293248403e-05, "loss": 2.496, "step": 33559 }, { "epoch": 0.44, "grad_norm": 3.575951099395752, "learning_rate": 1.9675421739189965e-05, "loss": 1.6408, "step": 33560 }, { "epoch": 0.44, "grad_norm": 3.720088005065918, "learning_rate": 1.96753951840632e-05, "loss": 2.0827, "step": 33561 }, { "epoch": 0.44, "grad_norm": 4.082957744598389, "learning_rate": 1.967536862786811e-05, "loss": 1.7249, "step": 33562 }, { "epoch": 0.44, "grad_norm": 3.712675094604492, "learning_rate": 1.967534207060469e-05, "loss": 1.9692, "step": 33563 }, { "epoch": 0.44, "grad_norm": 4.16190767288208, "learning_rate": 1.9675315512272952e-05, "loss": 2.1804, "step": 33564 }, { "epoch": 0.44, "grad_norm": 3.2586288452148438, "learning_rate": 1.9675288952872894e-05, "loss": 1.9534, "step": 33565 }, { "epoch": 0.44, "grad_norm": 3.715470790863037, "learning_rate": 1.967526239240452e-05, "loss": 2.0746, "step": 33566 }, { "epoch": 0.44, "grad_norm": 4.452670097351074, "learning_rate": 1.9675235830867835e-05, "loss": 2.6141, "step": 33567 }, { "epoch": 0.44, "grad_norm": 4.584110260009766, "learning_rate": 1.9675209268262835e-05, "loss": 1.9896, "step": 33568 }, { "epoch": 0.44, "grad_norm": 3.6513051986694336, "learning_rate": 1.9675182704589532e-05, "loss": 1.7633, "step": 33569 }, { "epoch": 0.44, "grad_norm": 4.053335666656494, "learning_rate": 1.9675156139847922e-05, "loss": 2.1897, "step": 33570 }, { "epoch": 0.44, "grad_norm": 3.5712532997131348, "learning_rate": 1.9675129574038013e-05, "loss": 2.2344, "step": 33571 }, { "epoch": 0.44, "grad_norm": 3.715980291366577, "learning_rate": 1.9675103007159802e-05, "loss": 2.0626, "step": 33572 }, { "epoch": 0.44, "grad_norm": 3.9159982204437256, "learning_rate": 1.96750764392133e-05, "loss": 2.4039, "step": 33573 }, { "epoch": 0.44, "grad_norm": 4.153082370758057, "learning_rate": 1.96750498701985e-05, "loss": 1.9134, "step": 33574 }, { "epoch": 0.44, "grad_norm": 3.9179234504699707, "learning_rate": 1.9675023300115415e-05, "loss": 2.1394, "step": 33575 }, { "epoch": 0.44, "grad_norm": 3.555927276611328, "learning_rate": 1.9674996728964042e-05, "loss": 1.7306, "step": 33576 }, { "epoch": 0.44, "grad_norm": 3.3855795860290527, "learning_rate": 1.9674970156744384e-05, "loss": 1.6043, "step": 33577 }, { "epoch": 0.44, "grad_norm": 3.908773899078369, "learning_rate": 1.9674943583456448e-05, "loss": 2.4271, "step": 33578 }, { "epoch": 0.44, "grad_norm": 3.7887051105499268, "learning_rate": 1.967491700910023e-05, "loss": 2.0902, "step": 33579 }, { "epoch": 0.44, "grad_norm": 4.099306106567383, "learning_rate": 1.967489043367574e-05, "loss": 2.1802, "step": 33580 }, { "epoch": 0.44, "grad_norm": 3.547846555709839, "learning_rate": 1.9674863857182976e-05, "loss": 2.0334, "step": 33581 }, { "epoch": 0.44, "grad_norm": 3.3923730850219727, "learning_rate": 1.9674837279621942e-05, "loss": 1.781, "step": 33582 }, { "epoch": 0.44, "grad_norm": 3.521360158920288, "learning_rate": 1.9674810700992647e-05, "loss": 1.8395, "step": 33583 }, { "epoch": 0.44, "grad_norm": 3.556199312210083, "learning_rate": 1.9674784121295084e-05, "loss": 2.0302, "step": 33584 }, { "epoch": 0.44, "grad_norm": 4.083340167999268, "learning_rate": 1.9674757540529263e-05, "loss": 1.9517, "step": 33585 }, { "epoch": 0.44, "grad_norm": 3.542487859725952, "learning_rate": 1.967473095869518e-05, "loss": 1.8939, "step": 33586 }, { "epoch": 0.44, "grad_norm": 4.3877363204956055, "learning_rate": 1.967470437579285e-05, "loss": 2.3945, "step": 33587 }, { "epoch": 0.44, "grad_norm": 3.9827358722686768, "learning_rate": 1.9674677791822266e-05, "loss": 1.9514, "step": 33588 }, { "epoch": 0.44, "grad_norm": 3.717080593109131, "learning_rate": 1.9674651206783432e-05, "loss": 2.1261, "step": 33589 }, { "epoch": 0.44, "grad_norm": 3.516686201095581, "learning_rate": 1.9674624620676354e-05, "loss": 2.3269, "step": 33590 }, { "epoch": 0.44, "grad_norm": 4.173124313354492, "learning_rate": 1.967459803350103e-05, "loss": 2.0709, "step": 33591 }, { "epoch": 0.44, "grad_norm": 3.671128988265991, "learning_rate": 1.967457144525747e-05, "loss": 1.5947, "step": 33592 }, { "epoch": 0.44, "grad_norm": 3.6377227306365967, "learning_rate": 1.9674544855945668e-05, "loss": 2.0946, "step": 33593 }, { "epoch": 0.44, "grad_norm": 3.6422784328460693, "learning_rate": 1.9674518265565638e-05, "loss": 2.2122, "step": 33594 }, { "epoch": 0.44, "grad_norm": 3.759025812149048, "learning_rate": 1.9674491674117376e-05, "loss": 1.9614, "step": 33595 }, { "epoch": 0.44, "grad_norm": 3.7967653274536133, "learning_rate": 1.9674465081600884e-05, "loss": 2.0466, "step": 33596 }, { "epoch": 0.44, "grad_norm": 3.4843051433563232, "learning_rate": 1.967443848801617e-05, "loss": 1.6421, "step": 33597 }, { "epoch": 0.44, "grad_norm": 3.1327483654022217, "learning_rate": 1.9674411893363226e-05, "loss": 1.4974, "step": 33598 }, { "epoch": 0.44, "grad_norm": 4.221735954284668, "learning_rate": 1.967438529764207e-05, "loss": 1.9941, "step": 33599 }, { "epoch": 0.44, "grad_norm": 3.558483362197876, "learning_rate": 1.9674358700852697e-05, "loss": 1.7424, "step": 33600 }, { "epoch": 0.44, "grad_norm": 4.008981227874756, "learning_rate": 1.967433210299511e-05, "loss": 1.9212, "step": 33601 }, { "epoch": 0.44, "grad_norm": 3.6393721103668213, "learning_rate": 1.967430550406931e-05, "loss": 1.793, "step": 33602 }, { "epoch": 0.44, "grad_norm": 3.6379053592681885, "learning_rate": 1.967427890407531e-05, "loss": 2.0558, "step": 33603 }, { "epoch": 0.44, "grad_norm": 3.7810635566711426, "learning_rate": 1.96742523030131e-05, "loss": 2.1132, "step": 33604 }, { "epoch": 0.44, "grad_norm": 3.8864076137542725, "learning_rate": 1.9674225700882688e-05, "loss": 1.9439, "step": 33605 }, { "epoch": 0.44, "grad_norm": 3.31123423576355, "learning_rate": 1.9674199097684074e-05, "loss": 1.607, "step": 33606 }, { "epoch": 0.44, "grad_norm": 3.6735620498657227, "learning_rate": 1.967417249341727e-05, "loss": 1.8616, "step": 33607 }, { "epoch": 0.44, "grad_norm": 3.993818759918213, "learning_rate": 1.9674145888082273e-05, "loss": 1.9363, "step": 33608 }, { "epoch": 0.44, "grad_norm": 3.8417000770568848, "learning_rate": 1.9674119281679083e-05, "loss": 2.0583, "step": 33609 }, { "epoch": 0.44, "grad_norm": 4.025282859802246, "learning_rate": 1.967409267420771e-05, "loss": 2.2984, "step": 33610 }, { "epoch": 0.44, "grad_norm": 3.6907174587249756, "learning_rate": 1.967406606566815e-05, "loss": 1.8313, "step": 33611 }, { "epoch": 0.44, "grad_norm": 3.4703025817871094, "learning_rate": 1.967403945606041e-05, "loss": 1.746, "step": 33612 }, { "epoch": 0.44, "grad_norm": 3.5905911922454834, "learning_rate": 1.967401284538449e-05, "loss": 1.8407, "step": 33613 }, { "epoch": 0.44, "grad_norm": 3.762458086013794, "learning_rate": 1.9673986233640398e-05, "loss": 1.5755, "step": 33614 }, { "epoch": 0.44, "grad_norm": 4.014656066894531, "learning_rate": 1.9673959620828133e-05, "loss": 2.244, "step": 33615 }, { "epoch": 0.44, "grad_norm": 3.764625072479248, "learning_rate": 1.9673933006947698e-05, "loss": 1.8193, "step": 33616 }, { "epoch": 0.44, "grad_norm": 4.053310394287109, "learning_rate": 1.9673906391999097e-05, "loss": 1.8204, "step": 33617 }, { "epoch": 0.44, "grad_norm": 3.2014150619506836, "learning_rate": 1.9673879775982334e-05, "loss": 1.966, "step": 33618 }, { "epoch": 0.44, "grad_norm": 3.695946216583252, "learning_rate": 1.9673853158897407e-05, "loss": 1.7466, "step": 33619 }, { "epoch": 0.44, "grad_norm": 4.510859966278076, "learning_rate": 1.9673826540744325e-05, "loss": 2.0048, "step": 33620 }, { "epoch": 0.44, "grad_norm": 3.9961228370666504, "learning_rate": 1.9673799921523086e-05, "loss": 1.643, "step": 33621 }, { "epoch": 0.44, "grad_norm": 3.774574041366577, "learning_rate": 1.9673773301233702e-05, "loss": 1.9791, "step": 33622 }, { "epoch": 0.44, "grad_norm": 3.1780433654785156, "learning_rate": 1.9673746679876162e-05, "loss": 1.5998, "step": 33623 }, { "epoch": 0.44, "grad_norm": 3.851283550262451, "learning_rate": 1.967372005745048e-05, "loss": 1.8925, "step": 33624 }, { "epoch": 0.44, "grad_norm": 3.4253551959991455, "learning_rate": 1.9673693433956655e-05, "loss": 2.0076, "step": 33625 }, { "epoch": 0.44, "grad_norm": 4.59307336807251, "learning_rate": 1.9673666809394687e-05, "loss": 2.7379, "step": 33626 }, { "epoch": 0.44, "grad_norm": 3.8548121452331543, "learning_rate": 1.9673640183764588e-05, "loss": 2.0338, "step": 33627 }, { "epoch": 0.44, "grad_norm": 3.363912582397461, "learning_rate": 1.967361355706635e-05, "loss": 1.7638, "step": 33628 }, { "epoch": 0.44, "grad_norm": 3.8360068798065186, "learning_rate": 1.9673586929299978e-05, "loss": 1.7507, "step": 33629 }, { "epoch": 0.44, "grad_norm": 3.8777503967285156, "learning_rate": 1.9673560300465485e-05, "loss": 2.273, "step": 33630 }, { "epoch": 0.44, "grad_norm": 3.477436065673828, "learning_rate": 1.9673533670562863e-05, "loss": 1.5037, "step": 33631 }, { "epoch": 0.44, "grad_norm": 3.8724100589752197, "learning_rate": 1.9673507039592122e-05, "loss": 1.8518, "step": 33632 }, { "epoch": 0.44, "grad_norm": 3.907870054244995, "learning_rate": 1.967348040755326e-05, "loss": 2.1926, "step": 33633 }, { "epoch": 0.44, "grad_norm": 4.1752190589904785, "learning_rate": 1.9673453774446278e-05, "loss": 1.9552, "step": 33634 }, { "epoch": 0.44, "grad_norm": 4.353185176849365, "learning_rate": 1.9673427140271188e-05, "loss": 2.3555, "step": 33635 }, { "epoch": 0.44, "grad_norm": 4.254080772399902, "learning_rate": 1.9673400505027983e-05, "loss": 2.0575, "step": 33636 }, { "epoch": 0.44, "grad_norm": 3.9171953201293945, "learning_rate": 1.9673373868716675e-05, "loss": 2.2252, "step": 33637 }, { "epoch": 0.44, "grad_norm": 4.420613765716553, "learning_rate": 1.967334723133726e-05, "loss": 2.3459, "step": 33638 }, { "epoch": 0.44, "grad_norm": 3.9075872898101807, "learning_rate": 1.9673320592889742e-05, "loss": 1.9418, "step": 33639 }, { "epoch": 0.44, "grad_norm": 3.3644940853118896, "learning_rate": 1.9673293953374127e-05, "loss": 1.9206, "step": 33640 }, { "epoch": 0.44, "grad_norm": 3.9822280406951904, "learning_rate": 1.9673267312790416e-05, "loss": 2.1414, "step": 33641 }, { "epoch": 0.44, "grad_norm": 3.5249249935150146, "learning_rate": 1.967324067113861e-05, "loss": 1.8294, "step": 33642 }, { "epoch": 0.44, "grad_norm": 3.7061946392059326, "learning_rate": 1.9673214028418716e-05, "loss": 1.7669, "step": 33643 }, { "epoch": 0.44, "grad_norm": 4.7471537590026855, "learning_rate": 1.9673187384630737e-05, "loss": 2.62, "step": 33644 }, { "epoch": 0.44, "grad_norm": 3.7015366554260254, "learning_rate": 1.9673160739774673e-05, "loss": 1.9814, "step": 33645 }, { "epoch": 0.44, "grad_norm": 3.790815591812134, "learning_rate": 1.9673134093850525e-05, "loss": 1.917, "step": 33646 }, { "epoch": 0.44, "grad_norm": 3.6237871646881104, "learning_rate": 1.9673107446858305e-05, "loss": 2.0756, "step": 33647 }, { "epoch": 0.44, "grad_norm": 3.635143995285034, "learning_rate": 1.9673080798798004e-05, "loss": 1.7873, "step": 33648 }, { "epoch": 0.44, "grad_norm": 3.8347179889678955, "learning_rate": 1.967305414966963e-05, "loss": 1.969, "step": 33649 }, { "epoch": 0.44, "grad_norm": 3.9087584018707275, "learning_rate": 1.967302749947319e-05, "loss": 2.0229, "step": 33650 }, { "epoch": 0.44, "grad_norm": 3.3988523483276367, "learning_rate": 1.9673000848208686e-05, "loss": 1.7728, "step": 33651 }, { "epoch": 0.44, "grad_norm": 3.8283700942993164, "learning_rate": 1.9672974195876117e-05, "loss": 1.9275, "step": 33652 }, { "epoch": 0.44, "grad_norm": 3.846698522567749, "learning_rate": 1.9672947542475486e-05, "loss": 2.2715, "step": 33653 }, { "epoch": 0.44, "grad_norm": 3.494101047515869, "learning_rate": 1.9672920888006798e-05, "loss": 1.5286, "step": 33654 }, { "epoch": 0.44, "grad_norm": 3.8759865760803223, "learning_rate": 1.9672894232470055e-05, "loss": 1.9716, "step": 33655 }, { "epoch": 0.44, "grad_norm": 3.7863025665283203, "learning_rate": 1.967286757586526e-05, "loss": 1.945, "step": 33656 }, { "epoch": 0.44, "grad_norm": 4.110017776489258, "learning_rate": 1.9672840918192414e-05, "loss": 1.8162, "step": 33657 }, { "epoch": 0.44, "grad_norm": 4.201676845550537, "learning_rate": 1.967281425945153e-05, "loss": 2.0149, "step": 33658 }, { "epoch": 0.44, "grad_norm": 4.311280727386475, "learning_rate": 1.9672787599642596e-05, "loss": 2.6955, "step": 33659 }, { "epoch": 0.44, "grad_norm": 3.679192304611206, "learning_rate": 1.9672760938765626e-05, "loss": 2.2374, "step": 33660 }, { "epoch": 0.44, "grad_norm": 3.5315792560577393, "learning_rate": 1.9672734276820615e-05, "loss": 1.8585, "step": 33661 }, { "epoch": 0.44, "grad_norm": 4.160552978515625, "learning_rate": 1.9672707613807573e-05, "loss": 2.2087, "step": 33662 }, { "epoch": 0.44, "grad_norm": 3.464303731918335, "learning_rate": 1.96726809497265e-05, "loss": 1.6754, "step": 33663 }, { "epoch": 0.44, "grad_norm": 4.245706558227539, "learning_rate": 1.9672654284577398e-05, "loss": 2.2038, "step": 33664 }, { "epoch": 0.44, "grad_norm": 4.642350673675537, "learning_rate": 1.967262761836027e-05, "loss": 2.1156, "step": 33665 }, { "epoch": 0.44, "grad_norm": 3.6190145015716553, "learning_rate": 1.967260095107512e-05, "loss": 1.9349, "step": 33666 }, { "epoch": 0.44, "grad_norm": 4.1969313621521, "learning_rate": 1.9672574282721957e-05, "loss": 2.1125, "step": 33667 }, { "epoch": 0.44, "grad_norm": 4.14755916595459, "learning_rate": 1.9672547613300772e-05, "loss": 2.2822, "step": 33668 }, { "epoch": 0.44, "grad_norm": 3.7650623321533203, "learning_rate": 1.9672520942811574e-05, "loss": 2.4037, "step": 33669 }, { "epoch": 0.44, "grad_norm": 4.230877876281738, "learning_rate": 1.967249427125437e-05, "loss": 2.1725, "step": 33670 }, { "epoch": 0.44, "grad_norm": 3.2952075004577637, "learning_rate": 1.967246759862915e-05, "loss": 1.5451, "step": 33671 }, { "epoch": 0.44, "grad_norm": 4.086572647094727, "learning_rate": 1.967244092493593e-05, "loss": 1.8133, "step": 33672 }, { "epoch": 0.44, "grad_norm": 3.5974748134613037, "learning_rate": 1.9672414250174708e-05, "loss": 1.7879, "step": 33673 }, { "epoch": 0.44, "grad_norm": 3.9741666316986084, "learning_rate": 1.967238757434549e-05, "loss": 1.6194, "step": 33674 }, { "epoch": 0.44, "grad_norm": 3.466942548751831, "learning_rate": 1.9672360897448273e-05, "loss": 2.027, "step": 33675 }, { "epoch": 0.44, "grad_norm": 4.026068210601807, "learning_rate": 1.9672334219483063e-05, "loss": 2.0713, "step": 33676 }, { "epoch": 0.44, "grad_norm": 3.0562984943389893, "learning_rate": 1.9672307540449864e-05, "loss": 1.3728, "step": 33677 }, { "epoch": 0.44, "grad_norm": 4.028913497924805, "learning_rate": 1.967228086034868e-05, "loss": 2.1408, "step": 33678 }, { "epoch": 0.44, "grad_norm": 3.8894827365875244, "learning_rate": 1.9672254179179512e-05, "loss": 1.8075, "step": 33679 }, { "epoch": 0.44, "grad_norm": 3.8641743659973145, "learning_rate": 1.9672227496942363e-05, "loss": 2.1079, "step": 33680 }, { "epoch": 0.44, "grad_norm": 3.674572229385376, "learning_rate": 1.967220081363723e-05, "loss": 1.7788, "step": 33681 }, { "epoch": 0.44, "grad_norm": 3.6998350620269775, "learning_rate": 1.967217412926413e-05, "loss": 1.6292, "step": 33682 }, { "epoch": 0.44, "grad_norm": 4.22307825088501, "learning_rate": 1.9672147443823055e-05, "loss": 1.8921, "step": 33683 }, { "epoch": 0.44, "grad_norm": 3.8986072540283203, "learning_rate": 1.967212075731401e-05, "loss": 1.9377, "step": 33684 }, { "epoch": 0.44, "grad_norm": 4.125707626342773, "learning_rate": 1.9672094069736996e-05, "loss": 1.8497, "step": 33685 }, { "epoch": 0.44, "grad_norm": 4.550518035888672, "learning_rate": 1.9672067381092023e-05, "loss": 2.0398, "step": 33686 }, { "epoch": 0.44, "grad_norm": 3.892416000366211, "learning_rate": 1.967204069137909e-05, "loss": 2.0382, "step": 33687 }, { "epoch": 0.44, "grad_norm": 3.996366024017334, "learning_rate": 1.9672014000598197e-05, "loss": 1.9737, "step": 33688 }, { "epoch": 0.44, "grad_norm": 3.5245532989501953, "learning_rate": 1.9671987308749352e-05, "loss": 1.9359, "step": 33689 }, { "epoch": 0.44, "grad_norm": 3.9126620292663574, "learning_rate": 1.9671960615832554e-05, "loss": 1.9871, "step": 33690 }, { "epoch": 0.44, "grad_norm": 3.386834144592285, "learning_rate": 1.9671933921847807e-05, "loss": 1.6828, "step": 33691 }, { "epoch": 0.44, "grad_norm": 3.7503509521484375, "learning_rate": 1.9671907226795118e-05, "loss": 2.3147, "step": 33692 }, { "epoch": 0.44, "grad_norm": 3.8788001537323, "learning_rate": 1.9671880530674482e-05, "loss": 2.4773, "step": 33693 }, { "epoch": 0.44, "grad_norm": 4.035086631774902, "learning_rate": 1.967185383348591e-05, "loss": 2.6121, "step": 33694 }, { "epoch": 0.44, "grad_norm": 3.7703444957733154, "learning_rate": 1.9671827135229397e-05, "loss": 2.0397, "step": 33695 }, { "epoch": 0.44, "grad_norm": 3.5542125701904297, "learning_rate": 1.9671800435904954e-05, "loss": 1.9356, "step": 33696 }, { "epoch": 0.44, "grad_norm": 3.6854262351989746, "learning_rate": 1.967177373551258e-05, "loss": 2.2971, "step": 33697 }, { "epoch": 0.44, "grad_norm": 3.5369670391082764, "learning_rate": 1.9671747034052275e-05, "loss": 1.677, "step": 33698 }, { "epoch": 0.44, "grad_norm": 3.557093858718872, "learning_rate": 1.967172033152405e-05, "loss": 1.5971, "step": 33699 }, { "epoch": 0.44, "grad_norm": 4.136244773864746, "learning_rate": 1.9671693627927893e-05, "loss": 2.1256, "step": 33700 }, { "epoch": 0.44, "grad_norm": 4.20285701751709, "learning_rate": 1.967166692326383e-05, "loss": 2.5197, "step": 33701 }, { "epoch": 0.44, "grad_norm": 3.7958455085754395, "learning_rate": 1.9671640217531843e-05, "loss": 2.0514, "step": 33702 }, { "epoch": 0.44, "grad_norm": 3.09597110748291, "learning_rate": 1.967161351073194e-05, "loss": 1.412, "step": 33703 }, { "epoch": 0.44, "grad_norm": 3.8721675872802734, "learning_rate": 1.9671586802864135e-05, "loss": 1.9638, "step": 33704 }, { "epoch": 0.44, "grad_norm": 3.7780017852783203, "learning_rate": 1.9671560093928416e-05, "loss": 2.0619, "step": 33705 }, { "epoch": 0.44, "grad_norm": 3.3376591205596924, "learning_rate": 1.96715333839248e-05, "loss": 1.667, "step": 33706 }, { "epoch": 0.44, "grad_norm": 3.7580080032348633, "learning_rate": 1.9671506672853272e-05, "loss": 1.7133, "step": 33707 }, { "epoch": 0.44, "grad_norm": 3.5955684185028076, "learning_rate": 1.9671479960713855e-05, "loss": 1.6799, "step": 33708 }, { "epoch": 0.44, "grad_norm": 3.743645668029785, "learning_rate": 1.9671453247506542e-05, "loss": 2.1796, "step": 33709 }, { "epoch": 0.44, "grad_norm": 4.081711292266846, "learning_rate": 1.967142653323133e-05, "loss": 2.0329, "step": 33710 }, { "epoch": 0.44, "grad_norm": 3.514927387237549, "learning_rate": 1.9671399817888234e-05, "loss": 1.8689, "step": 33711 }, { "epoch": 0.44, "grad_norm": 4.282112121582031, "learning_rate": 1.9671373101477246e-05, "loss": 2.016, "step": 33712 }, { "epoch": 0.44, "grad_norm": 3.8439419269561768, "learning_rate": 1.967134638399838e-05, "loss": 2.1479, "step": 33713 }, { "epoch": 0.44, "grad_norm": 3.5849056243896484, "learning_rate": 1.9671319665451633e-05, "loss": 1.7576, "step": 33714 }, { "epoch": 0.44, "grad_norm": 3.8598263263702393, "learning_rate": 1.9671292945837007e-05, "loss": 2.0868, "step": 33715 }, { "epoch": 0.44, "grad_norm": 4.002967834472656, "learning_rate": 1.9671266225154503e-05, "loss": 2.1014, "step": 33716 }, { "epoch": 0.44, "grad_norm": 3.855587959289551, "learning_rate": 1.967123950340413e-05, "loss": 1.8294, "step": 33717 }, { "epoch": 0.44, "grad_norm": 3.599313497543335, "learning_rate": 1.9671212780585888e-05, "loss": 1.7678, "step": 33718 }, { "epoch": 0.44, "grad_norm": 3.626385450363159, "learning_rate": 1.967118605669978e-05, "loss": 1.9739, "step": 33719 }, { "epoch": 0.44, "grad_norm": 4.106019020080566, "learning_rate": 1.9671159331745807e-05, "loss": 1.9549, "step": 33720 }, { "epoch": 0.44, "grad_norm": 3.968370199203491, "learning_rate": 1.9671132605723977e-05, "loss": 1.7655, "step": 33721 }, { "epoch": 0.44, "grad_norm": 3.393385171890259, "learning_rate": 1.967110587863429e-05, "loss": 1.5698, "step": 33722 }, { "epoch": 0.44, "grad_norm": 4.180058479309082, "learning_rate": 1.9671079150476746e-05, "loss": 2.1262, "step": 33723 }, { "epoch": 0.44, "grad_norm": 3.5040462017059326, "learning_rate": 1.9671052421251352e-05, "loss": 1.648, "step": 33724 }, { "epoch": 0.44, "grad_norm": 3.6607167720794678, "learning_rate": 1.967102569095811e-05, "loss": 1.9007, "step": 33725 }, { "epoch": 0.44, "grad_norm": 3.501192092895508, "learning_rate": 1.9670998959597022e-05, "loss": 1.7914, "step": 33726 }, { "epoch": 0.44, "grad_norm": 3.449601173400879, "learning_rate": 1.9670972227168096e-05, "loss": 1.7467, "step": 33727 }, { "epoch": 0.44, "grad_norm": 3.334212303161621, "learning_rate": 1.967094549367132e-05, "loss": 1.6561, "step": 33728 }, { "epoch": 0.44, "grad_norm": 3.575888156890869, "learning_rate": 1.967091875910672e-05, "loss": 1.9174, "step": 33729 }, { "epoch": 0.44, "grad_norm": 3.2979588508605957, "learning_rate": 1.9670892023474278e-05, "loss": 1.8351, "step": 33730 }, { "epoch": 0.44, "grad_norm": 3.609797716140747, "learning_rate": 1.967086528677401e-05, "loss": 1.801, "step": 33731 }, { "epoch": 0.44, "grad_norm": 3.8367984294891357, "learning_rate": 1.967083854900591e-05, "loss": 2.1386, "step": 33732 }, { "epoch": 0.44, "grad_norm": 3.261833429336548, "learning_rate": 1.967081181016999e-05, "loss": 1.575, "step": 33733 }, { "epoch": 0.44, "grad_norm": 4.074458599090576, "learning_rate": 1.967078507026625e-05, "loss": 2.1314, "step": 33734 }, { "epoch": 0.44, "grad_norm": 3.898167848587036, "learning_rate": 1.9670758329294687e-05, "loss": 2.0497, "step": 33735 }, { "epoch": 0.44, "grad_norm": 3.802830696105957, "learning_rate": 1.9670731587255307e-05, "loss": 2.11, "step": 33736 }, { "epoch": 0.44, "grad_norm": 3.5080533027648926, "learning_rate": 1.9670704844148118e-05, "loss": 1.5703, "step": 33737 }, { "epoch": 0.44, "grad_norm": 4.6615376472473145, "learning_rate": 1.9670678099973118e-05, "loss": 2.4278, "step": 33738 }, { "epoch": 0.44, "grad_norm": 3.0919907093048096, "learning_rate": 1.9670651354730307e-05, "loss": 1.5152, "step": 33739 }, { "epoch": 0.44, "grad_norm": 3.8891563415527344, "learning_rate": 1.9670624608419697e-05, "loss": 2.1671, "step": 33740 }, { "epoch": 0.44, "grad_norm": 4.42343807220459, "learning_rate": 1.9670597861041286e-05, "loss": 2.1953, "step": 33741 }, { "epoch": 0.44, "grad_norm": 4.036560535430908, "learning_rate": 1.9670571112595075e-05, "loss": 1.6254, "step": 33742 }, { "epoch": 0.44, "grad_norm": 3.40451979637146, "learning_rate": 1.9670544363081067e-05, "loss": 1.5578, "step": 33743 }, { "epoch": 0.44, "grad_norm": 3.6434361934661865, "learning_rate": 1.967051761249927e-05, "loss": 2.2341, "step": 33744 }, { "epoch": 0.44, "grad_norm": 3.8379158973693848, "learning_rate": 1.967049086084968e-05, "loss": 1.997, "step": 33745 }, { "epoch": 0.44, "grad_norm": 3.233386754989624, "learning_rate": 1.967046410813231e-05, "loss": 1.5881, "step": 33746 }, { "epoch": 0.44, "grad_norm": 3.5347695350646973, "learning_rate": 1.9670437354347152e-05, "loss": 1.6775, "step": 33747 }, { "epoch": 0.44, "grad_norm": 3.220858097076416, "learning_rate": 1.9670410599494217e-05, "loss": 1.7415, "step": 33748 }, { "epoch": 0.44, "grad_norm": 4.193252086639404, "learning_rate": 1.9670383843573503e-05, "loss": 2.0718, "step": 33749 }, { "epoch": 0.44, "grad_norm": 4.211179733276367, "learning_rate": 1.967035708658501e-05, "loss": 2.3325, "step": 33750 }, { "epoch": 0.44, "grad_norm": 4.440272331237793, "learning_rate": 1.967033032852875e-05, "loss": 2.1592, "step": 33751 }, { "epoch": 0.44, "grad_norm": 3.5423078536987305, "learning_rate": 1.9670303569404724e-05, "loss": 1.9124, "step": 33752 }, { "epoch": 0.44, "grad_norm": 3.774449348449707, "learning_rate": 1.9670276809212928e-05, "loss": 1.9018, "step": 33753 }, { "epoch": 0.44, "grad_norm": 3.6916871070861816, "learning_rate": 1.967025004795337e-05, "loss": 1.9613, "step": 33754 }, { "epoch": 0.44, "grad_norm": 3.858882188796997, "learning_rate": 1.9670223285626052e-05, "loss": 1.9255, "step": 33755 }, { "epoch": 0.44, "grad_norm": 4.579281806945801, "learning_rate": 1.967019652223098e-05, "loss": 2.4528, "step": 33756 }, { "epoch": 0.44, "grad_norm": 4.348625183105469, "learning_rate": 1.9670169757768152e-05, "loss": 2.1395, "step": 33757 }, { "epoch": 0.44, "grad_norm": 3.2021327018737793, "learning_rate": 1.9670142992237574e-05, "loss": 1.9347, "step": 33758 }, { "epoch": 0.44, "grad_norm": 4.049550533294678, "learning_rate": 1.9670116225639247e-05, "loss": 2.0522, "step": 33759 }, { "epoch": 0.44, "grad_norm": 4.040511608123779, "learning_rate": 1.9670089457973177e-05, "loss": 2.5026, "step": 33760 }, { "epoch": 0.44, "grad_norm": 3.6293623447418213, "learning_rate": 1.9670062689239365e-05, "loss": 2.2367, "step": 33761 }, { "epoch": 0.44, "grad_norm": 3.7300617694854736, "learning_rate": 1.9670035919437814e-05, "loss": 1.526, "step": 33762 }, { "epoch": 0.44, "grad_norm": 3.9652340412139893, "learning_rate": 1.9670009148568526e-05, "loss": 2.1105, "step": 33763 }, { "epoch": 0.44, "grad_norm": 3.4911625385284424, "learning_rate": 1.9669982376631503e-05, "loss": 1.7604, "step": 33764 }, { "epoch": 0.44, "grad_norm": 3.834291458129883, "learning_rate": 1.9669955603626754e-05, "loss": 1.6252, "step": 33765 }, { "epoch": 0.44, "grad_norm": 3.748178243637085, "learning_rate": 1.9669928829554276e-05, "loss": 1.8338, "step": 33766 }, { "epoch": 0.44, "grad_norm": 4.137472152709961, "learning_rate": 1.9669902054414072e-05, "loss": 2.2056, "step": 33767 }, { "epoch": 0.44, "grad_norm": 4.34511661529541, "learning_rate": 1.966987527820615e-05, "loss": 2.1804, "step": 33768 }, { "epoch": 0.44, "grad_norm": 4.044431686401367, "learning_rate": 1.966984850093051e-05, "loss": 2.0939, "step": 33769 }, { "epoch": 0.44, "grad_norm": 3.7432782649993896, "learning_rate": 1.966982172258715e-05, "loss": 1.9726, "step": 33770 }, { "epoch": 0.44, "grad_norm": 3.9745988845825195, "learning_rate": 1.9669794943176082e-05, "loss": 2.3211, "step": 33771 }, { "epoch": 0.44, "grad_norm": 3.594937801361084, "learning_rate": 1.9669768162697304e-05, "loss": 1.7558, "step": 33772 }, { "epoch": 0.44, "grad_norm": 3.8845818042755127, "learning_rate": 1.9669741381150818e-05, "loss": 2.1612, "step": 33773 }, { "epoch": 0.44, "grad_norm": 3.8013625144958496, "learning_rate": 1.966971459853663e-05, "loss": 1.958, "step": 33774 }, { "epoch": 0.44, "grad_norm": 3.653029441833496, "learning_rate": 1.966968781485474e-05, "loss": 1.95, "step": 33775 }, { "epoch": 0.44, "grad_norm": 3.5666444301605225, "learning_rate": 1.9669661030105153e-05, "loss": 1.8776, "step": 33776 }, { "epoch": 0.44, "grad_norm": 3.609074115753174, "learning_rate": 1.966963424428787e-05, "loss": 1.8356, "step": 33777 }, { "epoch": 0.44, "grad_norm": 4.002727508544922, "learning_rate": 1.96696074574029e-05, "loss": 2.276, "step": 33778 }, { "epoch": 0.44, "grad_norm": 4.036013126373291, "learning_rate": 1.9669580669450234e-05, "loss": 2.0535, "step": 33779 }, { "epoch": 0.44, "grad_norm": 4.041261672973633, "learning_rate": 1.9669553880429888e-05, "loss": 1.9081, "step": 33780 }, { "epoch": 0.44, "grad_norm": 4.345526695251465, "learning_rate": 1.9669527090341857e-05, "loss": 2.1063, "step": 33781 }, { "epoch": 0.44, "grad_norm": 3.5594165325164795, "learning_rate": 1.9669500299186147e-05, "loss": 2.0095, "step": 33782 }, { "epoch": 0.44, "grad_norm": 3.848815679550171, "learning_rate": 1.966947350696276e-05, "loss": 1.919, "step": 33783 }, { "epoch": 0.44, "grad_norm": 3.994870901107788, "learning_rate": 1.9669446713671697e-05, "loss": 1.6744, "step": 33784 }, { "epoch": 0.44, "grad_norm": 3.588392734527588, "learning_rate": 1.9669419919312966e-05, "loss": 1.687, "step": 33785 }, { "epoch": 0.44, "grad_norm": 3.768237590789795, "learning_rate": 1.9669393123886567e-05, "loss": 1.9549, "step": 33786 }, { "epoch": 0.44, "grad_norm": 3.3572895526885986, "learning_rate": 1.96693663273925e-05, "loss": 1.5697, "step": 33787 }, { "epoch": 0.44, "grad_norm": 3.8018646240234375, "learning_rate": 1.9669339529830775e-05, "loss": 2.2135, "step": 33788 }, { "epoch": 0.44, "grad_norm": 3.613776206970215, "learning_rate": 1.966931273120139e-05, "loss": 1.6009, "step": 33789 }, { "epoch": 0.44, "grad_norm": 3.974508762359619, "learning_rate": 1.9669285931504345e-05, "loss": 2.2366, "step": 33790 }, { "epoch": 0.44, "grad_norm": 3.779917001724243, "learning_rate": 1.966925913073965e-05, "loss": 2.4463, "step": 33791 }, { "epoch": 0.44, "grad_norm": 3.7186338901519775, "learning_rate": 1.9669232328907304e-05, "loss": 2.23, "step": 33792 }, { "epoch": 0.44, "grad_norm": 3.4733879566192627, "learning_rate": 1.966920552600731e-05, "loss": 2.0188, "step": 33793 }, { "epoch": 0.44, "grad_norm": 3.901583671569824, "learning_rate": 1.9669178722039674e-05, "loss": 2.0518, "step": 33794 }, { "epoch": 0.44, "grad_norm": 3.7842724323272705, "learning_rate": 1.9669151917004395e-05, "loss": 1.6748, "step": 33795 }, { "epoch": 0.44, "grad_norm": 4.14000129699707, "learning_rate": 1.9669125110901478e-05, "loss": 2.5076, "step": 33796 }, { "epoch": 0.44, "grad_norm": 3.901546001434326, "learning_rate": 1.9669098303730923e-05, "loss": 1.9875, "step": 33797 }, { "epoch": 0.44, "grad_norm": 3.693915367126465, "learning_rate": 1.966907149549274e-05, "loss": 1.7046, "step": 33798 }, { "epoch": 0.44, "grad_norm": 3.720226526260376, "learning_rate": 1.9669044686186925e-05, "loss": 2.2165, "step": 33799 }, { "epoch": 0.44, "grad_norm": 4.271768569946289, "learning_rate": 1.9669017875813484e-05, "loss": 2.1209, "step": 33800 }, { "epoch": 0.44, "grad_norm": 4.0243659019470215, "learning_rate": 1.9668991064372417e-05, "loss": 2.2635, "step": 33801 }, { "epoch": 0.44, "grad_norm": 3.1605191230773926, "learning_rate": 1.966896425186373e-05, "loss": 1.7156, "step": 33802 }, { "epoch": 0.44, "grad_norm": 3.7654731273651123, "learning_rate": 1.966893743828743e-05, "loss": 2.2945, "step": 33803 }, { "epoch": 0.44, "grad_norm": 3.266791820526123, "learning_rate": 1.9668910623643513e-05, "loss": 1.4227, "step": 33804 }, { "epoch": 0.44, "grad_norm": 3.8889405727386475, "learning_rate": 1.966888380793198e-05, "loss": 1.9158, "step": 33805 }, { "epoch": 0.44, "grad_norm": 3.6480915546417236, "learning_rate": 1.966885699115284e-05, "loss": 2.1409, "step": 33806 }, { "epoch": 0.44, "grad_norm": 3.4200990200042725, "learning_rate": 1.9668830173306098e-05, "loss": 1.6708, "step": 33807 }, { "epoch": 0.44, "grad_norm": 3.5124847888946533, "learning_rate": 1.966880335439175e-05, "loss": 1.7641, "step": 33808 }, { "epoch": 0.44, "grad_norm": 4.231894016265869, "learning_rate": 1.96687765344098e-05, "loss": 2.1098, "step": 33809 }, { "epoch": 0.44, "grad_norm": 4.0538554191589355, "learning_rate": 1.9668749713360256e-05, "loss": 1.8136, "step": 33810 }, { "epoch": 0.44, "grad_norm": 4.286553859710693, "learning_rate": 1.966872289124312e-05, "loss": 2.1889, "step": 33811 }, { "epoch": 0.44, "grad_norm": 3.6836161613464355, "learning_rate": 1.9668696068058392e-05, "loss": 2.0975, "step": 33812 }, { "epoch": 0.44, "grad_norm": 3.239169120788574, "learning_rate": 1.9668669243806073e-05, "loss": 1.4473, "step": 33813 }, { "epoch": 0.44, "grad_norm": 3.9995272159576416, "learning_rate": 1.966864241848617e-05, "loss": 2.207, "step": 33814 }, { "epoch": 0.44, "grad_norm": 4.057252883911133, "learning_rate": 1.9668615592098684e-05, "loss": 2.1088, "step": 33815 }, { "epoch": 0.44, "grad_norm": 3.8854877948760986, "learning_rate": 1.9668588764643622e-05, "loss": 1.7066, "step": 33816 }, { "epoch": 0.44, "grad_norm": 3.648055076599121, "learning_rate": 1.966856193612098e-05, "loss": 1.7559, "step": 33817 }, { "epoch": 0.44, "grad_norm": 3.763368606567383, "learning_rate": 1.9668535106530767e-05, "loss": 1.6664, "step": 33818 }, { "epoch": 0.44, "grad_norm": 3.9318594932556152, "learning_rate": 1.9668508275872985e-05, "loss": 2.2048, "step": 33819 }, { "epoch": 0.44, "grad_norm": 4.0084452629089355, "learning_rate": 1.9668481444147633e-05, "loss": 2.1021, "step": 33820 }, { "epoch": 0.44, "grad_norm": 3.6795175075531006, "learning_rate": 1.9668454611354718e-05, "loss": 1.9306, "step": 33821 }, { "epoch": 0.44, "grad_norm": 3.4633517265319824, "learning_rate": 1.9668427777494243e-05, "loss": 1.9595, "step": 33822 }, { "epoch": 0.44, "grad_norm": 3.7371103763580322, "learning_rate": 1.9668400942566205e-05, "loss": 1.8941, "step": 33823 }, { "epoch": 0.44, "grad_norm": 3.3567450046539307, "learning_rate": 1.9668374106570614e-05, "loss": 1.6083, "step": 33824 }, { "epoch": 0.44, "grad_norm": 4.218315601348877, "learning_rate": 1.9668347269507474e-05, "loss": 1.8732, "step": 33825 }, { "epoch": 0.44, "grad_norm": 3.6970012187957764, "learning_rate": 1.9668320431376778e-05, "loss": 1.7766, "step": 33826 }, { "epoch": 0.44, "grad_norm": 3.4581689834594727, "learning_rate": 1.966829359217854e-05, "loss": 1.8906, "step": 33827 }, { "epoch": 0.44, "grad_norm": 4.415099620819092, "learning_rate": 1.9668266751912756e-05, "loss": 2.4752, "step": 33828 }, { "epoch": 0.44, "grad_norm": 4.044066905975342, "learning_rate": 1.966823991057943e-05, "loss": 1.9482, "step": 33829 }, { "epoch": 0.44, "grad_norm": 3.853452682495117, "learning_rate": 1.966821306817857e-05, "loss": 1.8053, "step": 33830 }, { "epoch": 0.44, "grad_norm": 3.828557014465332, "learning_rate": 1.9668186224710174e-05, "loss": 2.0672, "step": 33831 }, { "epoch": 0.44, "grad_norm": 4.253635406494141, "learning_rate": 1.966815938017425e-05, "loss": 1.9289, "step": 33832 }, { "epoch": 0.44, "grad_norm": 3.8647093772888184, "learning_rate": 1.966813253457079e-05, "loss": 2.0189, "step": 33833 }, { "epoch": 0.44, "grad_norm": 3.2711315155029297, "learning_rate": 1.9668105687899806e-05, "loss": 1.9585, "step": 33834 }, { "epoch": 0.44, "grad_norm": 3.0240838527679443, "learning_rate": 1.96680788401613e-05, "loss": 1.4034, "step": 33835 }, { "epoch": 0.44, "grad_norm": 3.4884450435638428, "learning_rate": 1.9668051991355278e-05, "loss": 1.6207, "step": 33836 }, { "epoch": 0.44, "grad_norm": 3.3850197792053223, "learning_rate": 1.9668025141481733e-05, "loss": 1.8082, "step": 33837 }, { "epoch": 0.44, "grad_norm": 3.273188591003418, "learning_rate": 1.9667998290540676e-05, "loss": 1.7884, "step": 33838 }, { "epoch": 0.44, "grad_norm": 3.710399866104126, "learning_rate": 1.966797143853211e-05, "loss": 1.6763, "step": 33839 }, { "epoch": 0.44, "grad_norm": 3.5525503158569336, "learning_rate": 1.9667944585456033e-05, "loss": 1.8527, "step": 33840 }, { "epoch": 0.44, "grad_norm": 4.179737091064453, "learning_rate": 1.9667917731312453e-05, "loss": 1.9828, "step": 33841 }, { "epoch": 0.44, "grad_norm": 3.4952456951141357, "learning_rate": 1.966789087610137e-05, "loss": 2.0371, "step": 33842 }, { "epoch": 0.44, "grad_norm": 3.754103660583496, "learning_rate": 1.9667864019822787e-05, "loss": 1.9442, "step": 33843 }, { "epoch": 0.44, "grad_norm": 3.4884753227233887, "learning_rate": 1.966783716247671e-05, "loss": 1.7678, "step": 33844 }, { "epoch": 0.44, "grad_norm": 3.5895378589630127, "learning_rate": 1.9667810304063136e-05, "loss": 1.8377, "step": 33845 }, { "epoch": 0.44, "grad_norm": 3.6197874546051025, "learning_rate": 1.9667783444582076e-05, "loss": 1.5477, "step": 33846 }, { "epoch": 0.44, "grad_norm": 3.8953146934509277, "learning_rate": 1.9667756584033525e-05, "loss": 2.0393, "step": 33847 }, { "epoch": 0.44, "grad_norm": 3.2827956676483154, "learning_rate": 1.9667729722417495e-05, "loss": 1.6064, "step": 33848 }, { "epoch": 0.44, "grad_norm": 3.953645944595337, "learning_rate": 1.966770285973398e-05, "loss": 2.2375, "step": 33849 }, { "epoch": 0.44, "grad_norm": 3.4967825412750244, "learning_rate": 1.9667675995982986e-05, "loss": 1.7309, "step": 33850 }, { "epoch": 0.44, "grad_norm": 3.418916702270508, "learning_rate": 1.9667649131164517e-05, "loss": 2.0763, "step": 33851 }, { "epoch": 0.44, "grad_norm": 4.001962184906006, "learning_rate": 1.9667622265278577e-05, "loss": 2.3726, "step": 33852 }, { "epoch": 0.44, "grad_norm": 3.788585901260376, "learning_rate": 1.9667595398325166e-05, "loss": 2.0696, "step": 33853 }, { "epoch": 0.44, "grad_norm": 4.1456403732299805, "learning_rate": 1.966756853030429e-05, "loss": 2.3216, "step": 33854 }, { "epoch": 0.44, "grad_norm": 3.636221408843994, "learning_rate": 1.9667541661215948e-05, "loss": 2.0323, "step": 33855 }, { "epoch": 0.44, "grad_norm": 4.030869483947754, "learning_rate": 1.966751479106015e-05, "loss": 2.0759, "step": 33856 }, { "epoch": 0.44, "grad_norm": 4.039989471435547, "learning_rate": 1.9667487919836888e-05, "loss": 2.3581, "step": 33857 }, { "epoch": 0.44, "grad_norm": 3.6688878536224365, "learning_rate": 1.9667461047546176e-05, "loss": 1.7653, "step": 33858 }, { "epoch": 0.44, "grad_norm": 3.8592898845672607, "learning_rate": 1.966743417418801e-05, "loss": 2.2939, "step": 33859 }, { "epoch": 0.44, "grad_norm": 3.7484259605407715, "learning_rate": 1.9667407299762392e-05, "loss": 1.9116, "step": 33860 }, { "epoch": 0.44, "grad_norm": 3.8784255981445312, "learning_rate": 1.9667380424269336e-05, "loss": 1.8328, "step": 33861 }, { "epoch": 0.44, "grad_norm": 4.279208183288574, "learning_rate": 1.9667353547708833e-05, "loss": 1.7517, "step": 33862 }, { "epoch": 0.44, "grad_norm": 3.58247971534729, "learning_rate": 1.966732667008089e-05, "loss": 2.12, "step": 33863 }, { "epoch": 0.44, "grad_norm": 3.955157518386841, "learning_rate": 1.966729979138551e-05, "loss": 2.0383, "step": 33864 }, { "epoch": 0.44, "grad_norm": 3.4743034839630127, "learning_rate": 1.9667272911622697e-05, "loss": 1.6387, "step": 33865 }, { "epoch": 0.44, "grad_norm": 3.8952078819274902, "learning_rate": 1.9667246030792453e-05, "loss": 2.212, "step": 33866 }, { "epoch": 0.44, "grad_norm": 3.590311288833618, "learning_rate": 1.966721914889478e-05, "loss": 1.5701, "step": 33867 }, { "epoch": 0.44, "grad_norm": 3.3931288719177246, "learning_rate": 1.9667192265929685e-05, "loss": 1.6549, "step": 33868 }, { "epoch": 0.44, "grad_norm": 3.724285840988159, "learning_rate": 1.9667165381897165e-05, "loss": 1.7453, "step": 33869 }, { "epoch": 0.44, "grad_norm": 4.014721393585205, "learning_rate": 1.9667138496797228e-05, "loss": 2.1528, "step": 33870 }, { "epoch": 0.44, "grad_norm": 3.653153657913208, "learning_rate": 1.9667111610629874e-05, "loss": 1.8338, "step": 33871 }, { "epoch": 0.44, "grad_norm": 3.7098381519317627, "learning_rate": 1.9667084723395105e-05, "loss": 1.8013, "step": 33872 }, { "epoch": 0.44, "grad_norm": 4.176712512969971, "learning_rate": 1.9667057835092926e-05, "loss": 2.1933, "step": 33873 }, { "epoch": 0.44, "grad_norm": 4.177290439605713, "learning_rate": 1.966703094572334e-05, "loss": 2.2956, "step": 33874 }, { "epoch": 0.44, "grad_norm": 4.893863677978516, "learning_rate": 1.9667004055286353e-05, "loss": 2.1613, "step": 33875 }, { "epoch": 0.44, "grad_norm": 3.5703282356262207, "learning_rate": 1.9666977163781962e-05, "loss": 1.797, "step": 33876 }, { "epoch": 0.44, "grad_norm": 4.0529632568359375, "learning_rate": 1.966695027121017e-05, "loss": 2.4188, "step": 33877 }, { "epoch": 0.44, "grad_norm": 3.9049887657165527, "learning_rate": 1.966692337757099e-05, "loss": 1.9891, "step": 33878 }, { "epoch": 0.44, "grad_norm": 3.580659866333008, "learning_rate": 1.9666896482864412e-05, "loss": 1.6796, "step": 33879 }, { "epoch": 0.44, "grad_norm": 3.959272623062134, "learning_rate": 1.9666869587090447e-05, "loss": 2.1568, "step": 33880 }, { "epoch": 0.44, "grad_norm": 3.176739454269409, "learning_rate": 1.9666842690249095e-05, "loss": 1.5896, "step": 33881 }, { "epoch": 0.44, "grad_norm": 3.2203238010406494, "learning_rate": 1.966681579234036e-05, "loss": 1.7407, "step": 33882 }, { "epoch": 0.44, "grad_norm": 3.8533129692077637, "learning_rate": 1.9666788893364243e-05, "loss": 1.8445, "step": 33883 }, { "epoch": 0.44, "grad_norm": 3.6982016563415527, "learning_rate": 1.966676199332075e-05, "loss": 2.0961, "step": 33884 }, { "epoch": 0.44, "grad_norm": 3.7118892669677734, "learning_rate": 1.9666735092209882e-05, "loss": 1.8964, "step": 33885 }, { "epoch": 0.44, "grad_norm": 3.7494640350341797, "learning_rate": 1.9666708190031643e-05, "loss": 1.761, "step": 33886 }, { "epoch": 0.44, "grad_norm": 3.3453333377838135, "learning_rate": 1.9666681286786034e-05, "loss": 1.6186, "step": 33887 }, { "epoch": 0.44, "grad_norm": 3.428321599960327, "learning_rate": 1.9666654382473062e-05, "loss": 1.9363, "step": 33888 }, { "epoch": 0.44, "grad_norm": 4.272429466247559, "learning_rate": 1.9666627477092723e-05, "loss": 2.108, "step": 33889 }, { "epoch": 0.44, "grad_norm": 3.8180453777313232, "learning_rate": 1.966660057064503e-05, "loss": 2.0302, "step": 33890 }, { "epoch": 0.44, "grad_norm": 3.765098810195923, "learning_rate": 1.9666573663129977e-05, "loss": 2.0274, "step": 33891 }, { "epoch": 0.44, "grad_norm": 3.507242441177368, "learning_rate": 1.966654675454757e-05, "loss": 1.8076, "step": 33892 }, { "epoch": 0.44, "grad_norm": 3.4104864597320557, "learning_rate": 1.9666519844897812e-05, "loss": 1.8877, "step": 33893 }, { "epoch": 0.44, "grad_norm": 3.486093759536743, "learning_rate": 1.9666492934180708e-05, "loss": 1.9417, "step": 33894 }, { "epoch": 0.44, "grad_norm": 3.8952293395996094, "learning_rate": 1.966646602239626e-05, "loss": 2.1856, "step": 33895 }, { "epoch": 0.44, "grad_norm": 3.535330057144165, "learning_rate": 1.9666439109544466e-05, "loss": 1.8743, "step": 33896 }, { "epoch": 0.44, "grad_norm": 4.069011211395264, "learning_rate": 1.9666412195625334e-05, "loss": 1.7778, "step": 33897 }, { "epoch": 0.44, "grad_norm": 3.5639374256134033, "learning_rate": 1.966638528063887e-05, "loss": 1.7837, "step": 33898 }, { "epoch": 0.44, "grad_norm": 3.9681084156036377, "learning_rate": 1.9666358364585068e-05, "loss": 1.7981, "step": 33899 }, { "epoch": 0.44, "grad_norm": 3.6836602687835693, "learning_rate": 1.966633144746394e-05, "loss": 1.8975, "step": 33900 }, { "epoch": 0.44, "grad_norm": 3.870365619659424, "learning_rate": 1.9666304529275485e-05, "loss": 2.0968, "step": 33901 }, { "epoch": 0.44, "grad_norm": 3.7369420528411865, "learning_rate": 1.9666277610019704e-05, "loss": 1.9479, "step": 33902 }, { "epoch": 0.44, "grad_norm": 3.8792054653167725, "learning_rate": 1.9666250689696603e-05, "loss": 2.0393, "step": 33903 }, { "epoch": 0.44, "grad_norm": 4.096947193145752, "learning_rate": 1.966622376830618e-05, "loss": 2.0324, "step": 33904 }, { "epoch": 0.44, "grad_norm": 3.9009461402893066, "learning_rate": 1.9666196845848447e-05, "loss": 1.9132, "step": 33905 }, { "epoch": 0.44, "grad_norm": 3.7898809909820557, "learning_rate": 1.9666169922323402e-05, "loss": 2.2435, "step": 33906 }, { "epoch": 0.44, "grad_norm": 3.7746102809906006, "learning_rate": 1.9666142997731045e-05, "loss": 1.7786, "step": 33907 }, { "epoch": 0.44, "grad_norm": 3.855924606323242, "learning_rate": 1.9666116072071382e-05, "loss": 2.1374, "step": 33908 }, { "epoch": 0.44, "grad_norm": 4.547669410705566, "learning_rate": 1.9666089145344418e-05, "loss": 2.5269, "step": 33909 }, { "epoch": 0.44, "grad_norm": 4.333211898803711, "learning_rate": 1.9666062217550154e-05, "loss": 1.9292, "step": 33910 }, { "epoch": 0.44, "grad_norm": 4.444014549255371, "learning_rate": 1.966603528868859e-05, "loss": 2.4003, "step": 33911 }, { "epoch": 0.44, "grad_norm": 3.933177947998047, "learning_rate": 1.966600835875973e-05, "loss": 2.146, "step": 33912 }, { "epoch": 0.44, "grad_norm": 4.160025119781494, "learning_rate": 1.9665981427763584e-05, "loss": 2.0878, "step": 33913 }, { "epoch": 0.44, "grad_norm": 4.0560994148254395, "learning_rate": 1.966595449570015e-05, "loss": 1.8129, "step": 33914 }, { "epoch": 0.44, "grad_norm": 3.9035849571228027, "learning_rate": 1.9665927562569425e-05, "loss": 2.1652, "step": 33915 }, { "epoch": 0.44, "grad_norm": 3.470956563949585, "learning_rate": 1.9665900628371423e-05, "loss": 1.849, "step": 33916 }, { "epoch": 0.44, "grad_norm": 4.0299553871154785, "learning_rate": 1.966587369310614e-05, "loss": 1.9228, "step": 33917 }, { "epoch": 0.44, "grad_norm": 3.5244834423065186, "learning_rate": 1.966584675677358e-05, "loss": 2.0413, "step": 33918 }, { "epoch": 0.44, "grad_norm": 3.8357627391815186, "learning_rate": 1.9665819819373743e-05, "loss": 2.1962, "step": 33919 }, { "epoch": 0.44, "grad_norm": 4.510039329528809, "learning_rate": 1.966579288090664e-05, "loss": 2.1244, "step": 33920 }, { "epoch": 0.44, "grad_norm": 3.640385627746582, "learning_rate": 1.966576594137227e-05, "loss": 1.9337, "step": 33921 }, { "epoch": 0.44, "grad_norm": 3.6941452026367188, "learning_rate": 1.966573900077063e-05, "loss": 1.8404, "step": 33922 }, { "epoch": 0.44, "grad_norm": 4.064063549041748, "learning_rate": 1.9665712059101737e-05, "loss": 2.208, "step": 33923 }, { "epoch": 0.44, "grad_norm": 3.671417236328125, "learning_rate": 1.966568511636558e-05, "loss": 1.9704, "step": 33924 }, { "epoch": 0.44, "grad_norm": 4.253903388977051, "learning_rate": 1.966565817256217e-05, "loss": 2.1104, "step": 33925 }, { "epoch": 0.44, "grad_norm": 3.77701473236084, "learning_rate": 1.96656312276915e-05, "loss": 2.1263, "step": 33926 }, { "epoch": 0.44, "grad_norm": 3.349754810333252, "learning_rate": 1.9665604281753588e-05, "loss": 1.594, "step": 33927 }, { "epoch": 0.44, "grad_norm": 3.439985752105713, "learning_rate": 1.9665577334748428e-05, "loss": 1.6554, "step": 33928 }, { "epoch": 0.44, "grad_norm": 4.132015705108643, "learning_rate": 1.9665550386676023e-05, "loss": 2.3872, "step": 33929 }, { "epoch": 0.44, "grad_norm": 4.345216274261475, "learning_rate": 1.966552343753638e-05, "loss": 2.1142, "step": 33930 }, { "epoch": 0.44, "grad_norm": 4.001223564147949, "learning_rate": 1.9665496487329495e-05, "loss": 1.9753, "step": 33931 }, { "epoch": 0.44, "grad_norm": 3.7643113136291504, "learning_rate": 1.9665469536055374e-05, "loss": 1.8839, "step": 33932 }, { "epoch": 0.44, "grad_norm": 4.212187767028809, "learning_rate": 1.9665442583714024e-05, "loss": 1.9817, "step": 33933 }, { "epoch": 0.44, "grad_norm": 3.13567852973938, "learning_rate": 1.9665415630305447e-05, "loss": 1.5534, "step": 33934 }, { "epoch": 0.44, "grad_norm": 3.510179042816162, "learning_rate": 1.9665388675829642e-05, "loss": 1.5652, "step": 33935 }, { "epoch": 0.44, "grad_norm": 3.9762167930603027, "learning_rate": 1.9665361720286616e-05, "loss": 2.346, "step": 33936 }, { "epoch": 0.44, "grad_norm": 4.324634075164795, "learning_rate": 1.9665334763676366e-05, "loss": 2.385, "step": 33937 }, { "epoch": 0.44, "grad_norm": 3.9480600357055664, "learning_rate": 1.9665307805998902e-05, "loss": 1.9731, "step": 33938 }, { "epoch": 0.44, "grad_norm": 4.231618404388428, "learning_rate": 1.9665280847254228e-05, "loss": 2.0238, "step": 33939 }, { "epoch": 0.44, "grad_norm": 3.87886905670166, "learning_rate": 1.9665253887442336e-05, "loss": 2.0938, "step": 33940 }, { "epoch": 0.44, "grad_norm": 3.9023892879486084, "learning_rate": 1.966522692656324e-05, "loss": 2.1532, "step": 33941 }, { "epoch": 0.44, "grad_norm": 3.1798534393310547, "learning_rate": 1.9665199964616936e-05, "loss": 1.5483, "step": 33942 }, { "epoch": 0.44, "grad_norm": 3.273470640182495, "learning_rate": 1.9665173001603432e-05, "loss": 1.4783, "step": 33943 }, { "epoch": 0.44, "grad_norm": 3.650944948196411, "learning_rate": 1.9665146037522728e-05, "loss": 2.0468, "step": 33944 }, { "epoch": 0.44, "grad_norm": 3.6700003147125244, "learning_rate": 1.966511907237483e-05, "loss": 1.9713, "step": 33945 }, { "epoch": 0.44, "grad_norm": 3.576232433319092, "learning_rate": 1.9665092106159734e-05, "loss": 1.698, "step": 33946 }, { "epoch": 0.44, "grad_norm": 3.9802944660186768, "learning_rate": 1.9665065138877455e-05, "loss": 1.8753, "step": 33947 }, { "epoch": 0.44, "grad_norm": 3.65309476852417, "learning_rate": 1.9665038170527983e-05, "loss": 1.6012, "step": 33948 }, { "epoch": 0.44, "grad_norm": 3.641339063644409, "learning_rate": 1.966501120111133e-05, "loss": 1.9797, "step": 33949 }, { "epoch": 0.44, "grad_norm": 4.097468376159668, "learning_rate": 1.9664984230627497e-05, "loss": 2.4374, "step": 33950 }, { "epoch": 0.44, "grad_norm": 3.721925735473633, "learning_rate": 1.966495725907648e-05, "loss": 2.1197, "step": 33951 }, { "epoch": 0.44, "grad_norm": 3.96065354347229, "learning_rate": 1.9664930286458293e-05, "loss": 2.0773, "step": 33952 }, { "epoch": 0.44, "grad_norm": 3.818125009536743, "learning_rate": 1.966490331277293e-05, "loss": 1.9009, "step": 33953 }, { "epoch": 0.44, "grad_norm": 4.257022857666016, "learning_rate": 1.9664876338020403e-05, "loss": 2.0869, "step": 33954 }, { "epoch": 0.44, "grad_norm": 4.112329006195068, "learning_rate": 1.9664849362200703e-05, "loss": 2.5221, "step": 33955 }, { "epoch": 0.44, "grad_norm": 3.639019250869751, "learning_rate": 1.9664822385313846e-05, "loss": 1.913, "step": 33956 }, { "epoch": 0.44, "grad_norm": 3.9481515884399414, "learning_rate": 1.9664795407359823e-05, "loss": 1.8439, "step": 33957 }, { "epoch": 0.44, "grad_norm": 3.5946452617645264, "learning_rate": 1.9664768428338647e-05, "loss": 2.0887, "step": 33958 }, { "epoch": 0.44, "grad_norm": 3.3872005939483643, "learning_rate": 1.9664741448250314e-05, "loss": 1.6032, "step": 33959 }, { "epoch": 0.44, "grad_norm": 3.6858267784118652, "learning_rate": 1.966471446709483e-05, "loss": 1.8188, "step": 33960 }, { "epoch": 0.44, "grad_norm": 3.767268180847168, "learning_rate": 1.96646874848722e-05, "loss": 2.1928, "step": 33961 }, { "epoch": 0.44, "grad_norm": 4.080359935760498, "learning_rate": 1.9664660501582424e-05, "loss": 2.0821, "step": 33962 }, { "epoch": 0.44, "grad_norm": 3.3792412281036377, "learning_rate": 1.9664633517225506e-05, "loss": 1.8547, "step": 33963 }, { "epoch": 0.44, "grad_norm": 4.611903667449951, "learning_rate": 1.9664606531801445e-05, "loss": 2.3881, "step": 33964 }, { "epoch": 0.44, "grad_norm": 3.9513063430786133, "learning_rate": 1.9664579545310247e-05, "loss": 1.9501, "step": 33965 }, { "epoch": 0.44, "grad_norm": 3.2996628284454346, "learning_rate": 1.9664552557751917e-05, "loss": 2.0021, "step": 33966 }, { "epoch": 0.44, "grad_norm": 3.707169771194458, "learning_rate": 1.9664525569126458e-05, "loss": 2.3295, "step": 33967 }, { "epoch": 0.44, "grad_norm": 3.847200632095337, "learning_rate": 1.966449857943387e-05, "loss": 2.1477, "step": 33968 }, { "epoch": 0.44, "grad_norm": 4.12166166305542, "learning_rate": 1.9664471588674158e-05, "loss": 2.4682, "step": 33969 }, { "epoch": 0.44, "grad_norm": 3.9872639179229736, "learning_rate": 1.9664444596847324e-05, "loss": 2.1258, "step": 33970 }, { "epoch": 0.44, "grad_norm": 3.355217456817627, "learning_rate": 1.966441760395337e-05, "loss": 1.7068, "step": 33971 }, { "epoch": 0.44, "grad_norm": 3.463416814804077, "learning_rate": 1.9664390609992305e-05, "loss": 1.586, "step": 33972 }, { "epoch": 0.44, "grad_norm": 4.483565807342529, "learning_rate": 1.9664363614964124e-05, "loss": 2.4201, "step": 33973 }, { "epoch": 0.44, "grad_norm": 3.702483892440796, "learning_rate": 1.966433661886883e-05, "loss": 2.1704, "step": 33974 }, { "epoch": 0.44, "grad_norm": 4.262685775756836, "learning_rate": 1.9664309621706437e-05, "loss": 2.0475, "step": 33975 }, { "epoch": 0.44, "grad_norm": 3.7472221851348877, "learning_rate": 1.9664282623476936e-05, "loss": 1.9621, "step": 33976 }, { "epoch": 0.44, "grad_norm": 4.602569103240967, "learning_rate": 1.966425562418033e-05, "loss": 2.4744, "step": 33977 }, { "epoch": 0.44, "grad_norm": 3.3105554580688477, "learning_rate": 1.9664228623816632e-05, "loss": 1.652, "step": 33978 }, { "epoch": 0.44, "grad_norm": 3.992377996444702, "learning_rate": 1.966420162238584e-05, "loss": 1.9713, "step": 33979 }, { "epoch": 0.44, "grad_norm": 3.1580915451049805, "learning_rate": 1.9664174619887953e-05, "loss": 1.3672, "step": 33980 }, { "epoch": 0.44, "grad_norm": 4.02425479888916, "learning_rate": 1.9664147616322977e-05, "loss": 1.8404, "step": 33981 }, { "epoch": 0.44, "grad_norm": 3.7885327339172363, "learning_rate": 1.9664120611690916e-05, "loss": 2.1538, "step": 33982 }, { "epoch": 0.44, "grad_norm": 4.080298900604248, "learning_rate": 1.9664093605991773e-05, "loss": 2.0466, "step": 33983 }, { "epoch": 0.44, "grad_norm": 4.214791774749756, "learning_rate": 1.966406659922555e-05, "loss": 2.4631, "step": 33984 }, { "epoch": 0.44, "grad_norm": 4.032804012298584, "learning_rate": 1.9664039591392248e-05, "loss": 2.2707, "step": 33985 }, { "epoch": 0.44, "grad_norm": 3.679589033126831, "learning_rate": 1.9664012582491872e-05, "loss": 1.8127, "step": 33986 }, { "epoch": 0.44, "grad_norm": 3.496830940246582, "learning_rate": 1.966398557252443e-05, "loss": 1.9913, "step": 33987 }, { "epoch": 0.44, "grad_norm": 3.9263715744018555, "learning_rate": 1.9663958561489916e-05, "loss": 2.1174, "step": 33988 }, { "epoch": 0.44, "grad_norm": 4.04350471496582, "learning_rate": 1.966393154938834e-05, "loss": 2.1368, "step": 33989 }, { "epoch": 0.44, "grad_norm": 4.578031063079834, "learning_rate": 1.9663904536219697e-05, "loss": 2.0144, "step": 33990 }, { "epoch": 0.44, "grad_norm": 3.861239194869995, "learning_rate": 1.9663877521984e-05, "loss": 2.1006, "step": 33991 }, { "epoch": 0.44, "grad_norm": 3.7923521995544434, "learning_rate": 1.9663850506681245e-05, "loss": 1.978, "step": 33992 }, { "epoch": 0.44, "grad_norm": 4.011803150177002, "learning_rate": 1.9663823490311434e-05, "loss": 2.3356, "step": 33993 }, { "epoch": 0.44, "grad_norm": 3.8485217094421387, "learning_rate": 1.9663796472874576e-05, "loss": 1.6855, "step": 33994 }, { "epoch": 0.44, "grad_norm": 3.952252149581909, "learning_rate": 1.966376945437067e-05, "loss": 1.8998, "step": 33995 }, { "epoch": 0.44, "grad_norm": 3.9174163341522217, "learning_rate": 1.9663742434799723e-05, "loss": 2.2651, "step": 33996 }, { "epoch": 0.44, "grad_norm": 3.813087224960327, "learning_rate": 1.966371541416173e-05, "loss": 2.1413, "step": 33997 }, { "epoch": 0.44, "grad_norm": 4.306046009063721, "learning_rate": 1.96636883924567e-05, "loss": 2.3, "step": 33998 }, { "epoch": 0.44, "grad_norm": 3.6170122623443604, "learning_rate": 1.9663661369684636e-05, "loss": 1.9699, "step": 33999 }, { "epoch": 0.44, "grad_norm": 4.189189910888672, "learning_rate": 1.966363434584554e-05, "loss": 1.9493, "step": 34000 }, { "epoch": 0.44, "grad_norm": 3.939793825149536, "learning_rate": 1.9663607320939417e-05, "loss": 2.0704, "step": 34001 }, { "epoch": 0.44, "grad_norm": 3.2291266918182373, "learning_rate": 1.9663580294966265e-05, "loss": 1.7775, "step": 34002 }, { "epoch": 0.44, "grad_norm": 3.5700080394744873, "learning_rate": 1.966355326792609e-05, "loss": 1.6586, "step": 34003 }, { "epoch": 0.44, "grad_norm": 4.283638954162598, "learning_rate": 1.9663526239818897e-05, "loss": 2.3611, "step": 34004 }, { "epoch": 0.44, "grad_norm": 3.505650281906128, "learning_rate": 1.9663499210644683e-05, "loss": 1.6762, "step": 34005 }, { "epoch": 0.44, "grad_norm": 3.2127866744995117, "learning_rate": 1.9663472180403458e-05, "loss": 1.6263, "step": 34006 }, { "epoch": 0.44, "grad_norm": 3.486480712890625, "learning_rate": 1.9663445149095216e-05, "loss": 2.1067, "step": 34007 }, { "epoch": 0.44, "grad_norm": 3.8304479122161865, "learning_rate": 1.9663418116719973e-05, "loss": 1.8386, "step": 34008 }, { "epoch": 0.44, "grad_norm": 3.520613670349121, "learning_rate": 1.9663391083277722e-05, "loss": 1.9236, "step": 34009 }, { "epoch": 0.44, "grad_norm": 3.6505117416381836, "learning_rate": 1.9663364048768468e-05, "loss": 1.9086, "step": 34010 }, { "epoch": 0.44, "grad_norm": 4.019333362579346, "learning_rate": 1.9663337013192216e-05, "loss": 1.9714, "step": 34011 }, { "epoch": 0.44, "grad_norm": 5.103940486907959, "learning_rate": 1.9663309976548964e-05, "loss": 2.3958, "step": 34012 }, { "epoch": 0.44, "grad_norm": 3.3762564659118652, "learning_rate": 1.9663282938838724e-05, "loss": 1.5399, "step": 34013 }, { "epoch": 0.44, "grad_norm": 3.4511001110076904, "learning_rate": 1.9663255900061488e-05, "loss": 1.777, "step": 34014 }, { "epoch": 0.44, "grad_norm": 3.7049143314361572, "learning_rate": 1.966322886021727e-05, "loss": 2.1828, "step": 34015 }, { "epoch": 0.44, "grad_norm": 3.7248106002807617, "learning_rate": 1.9663201819306063e-05, "loss": 2.093, "step": 34016 }, { "epoch": 0.44, "grad_norm": 4.092887878417969, "learning_rate": 1.966317477732788e-05, "loss": 2.0754, "step": 34017 }, { "epoch": 0.44, "grad_norm": 3.5349373817443848, "learning_rate": 1.9663147734282715e-05, "loss": 1.3926, "step": 34018 }, { "epoch": 0.44, "grad_norm": 3.3447036743164062, "learning_rate": 1.9663120690170573e-05, "loss": 1.6369, "step": 34019 }, { "epoch": 0.44, "grad_norm": 4.1323676109313965, "learning_rate": 1.966309364499146e-05, "loss": 2.0806, "step": 34020 }, { "epoch": 0.44, "grad_norm": 4.014819622039795, "learning_rate": 1.9663066598745376e-05, "loss": 2.0558, "step": 34021 }, { "epoch": 0.44, "grad_norm": 3.701523780822754, "learning_rate": 1.966303955143233e-05, "loss": 1.8198, "step": 34022 }, { "epoch": 0.44, "grad_norm": 3.784187078475952, "learning_rate": 1.966301250305232e-05, "loss": 2.1228, "step": 34023 }, { "epoch": 0.44, "grad_norm": 3.7959671020507812, "learning_rate": 1.9662985453605344e-05, "loss": 1.7699, "step": 34024 }, { "epoch": 0.44, "grad_norm": 3.8902461528778076, "learning_rate": 1.9662958403091416e-05, "loss": 2.1488, "step": 34025 }, { "epoch": 0.44, "grad_norm": 3.656554698944092, "learning_rate": 1.966293135151053e-05, "loss": 1.6758, "step": 34026 }, { "epoch": 0.44, "grad_norm": 3.2162859439849854, "learning_rate": 1.9662904298862696e-05, "loss": 1.547, "step": 34027 }, { "epoch": 0.44, "grad_norm": 3.64607834815979, "learning_rate": 1.9662877245147908e-05, "loss": 2.0361, "step": 34028 }, { "epoch": 0.44, "grad_norm": 3.881624460220337, "learning_rate": 1.9662850190366177e-05, "loss": 1.697, "step": 34029 }, { "epoch": 0.44, "grad_norm": 4.706538200378418, "learning_rate": 1.9662823134517506e-05, "loss": 2.7325, "step": 34030 }, { "epoch": 0.44, "grad_norm": 4.522537708282471, "learning_rate": 1.9662796077601896e-05, "loss": 2.4724, "step": 34031 }, { "epoch": 0.44, "grad_norm": 4.16804313659668, "learning_rate": 1.9662769019619346e-05, "loss": 2.0886, "step": 34032 }, { "epoch": 0.44, "grad_norm": 3.8931493759155273, "learning_rate": 1.9662741960569863e-05, "loss": 1.7414, "step": 34033 }, { "epoch": 0.44, "grad_norm": 3.4778754711151123, "learning_rate": 1.9662714900453447e-05, "loss": 1.8712, "step": 34034 }, { "epoch": 0.44, "grad_norm": 3.8251900672912598, "learning_rate": 1.9662687839270106e-05, "loss": 2.2539, "step": 34035 }, { "epoch": 0.44, "grad_norm": 3.5171351432800293, "learning_rate": 1.966266077701984e-05, "loss": 1.9213, "step": 34036 }, { "epoch": 0.44, "grad_norm": 4.0014967918396, "learning_rate": 1.9662633713702654e-05, "loss": 2.3963, "step": 34037 }, { "epoch": 0.44, "grad_norm": 3.5558879375457764, "learning_rate": 1.9662606649318547e-05, "loss": 1.9741, "step": 34038 }, { "epoch": 0.44, "grad_norm": 3.243479013442993, "learning_rate": 1.9662579583867525e-05, "loss": 1.4942, "step": 34039 }, { "epoch": 0.44, "grad_norm": 3.897054433822632, "learning_rate": 1.966255251734959e-05, "loss": 2.2002, "step": 34040 }, { "epoch": 0.44, "grad_norm": 3.752570152282715, "learning_rate": 1.9662525449764746e-05, "loss": 2.6382, "step": 34041 }, { "epoch": 0.44, "grad_norm": 3.695068359375, "learning_rate": 1.9662498381112996e-05, "loss": 1.7816, "step": 34042 }, { "epoch": 0.44, "grad_norm": 3.7382304668426514, "learning_rate": 1.9662471311394344e-05, "loss": 1.8934, "step": 34043 }, { "epoch": 0.44, "grad_norm": 3.750396251678467, "learning_rate": 1.9662444240608786e-05, "loss": 1.6778, "step": 34044 }, { "epoch": 0.44, "grad_norm": 3.9305832386016846, "learning_rate": 1.9662417168756332e-05, "loss": 2.2263, "step": 34045 }, { "epoch": 0.44, "grad_norm": 3.765770196914673, "learning_rate": 1.9662390095836986e-05, "loss": 1.9787, "step": 34046 }, { "epoch": 0.44, "grad_norm": 3.548737049102783, "learning_rate": 1.966236302185074e-05, "loss": 1.8684, "step": 34047 }, { "epoch": 0.44, "grad_norm": 4.051382541656494, "learning_rate": 1.9662335946797615e-05, "loss": 1.7237, "step": 34048 }, { "epoch": 0.44, "grad_norm": 3.444101095199585, "learning_rate": 1.9662308870677603e-05, "loss": 1.7417, "step": 34049 }, { "epoch": 0.44, "grad_norm": 3.459507703781128, "learning_rate": 1.9662281793490705e-05, "loss": 1.7009, "step": 34050 }, { "epoch": 0.44, "grad_norm": 3.7717959880828857, "learning_rate": 1.966225471523693e-05, "loss": 2.2118, "step": 34051 }, { "epoch": 0.44, "grad_norm": 4.044470310211182, "learning_rate": 1.9662227635916274e-05, "loss": 2.1242, "step": 34052 }, { "epoch": 0.44, "grad_norm": 3.9091007709503174, "learning_rate": 1.9662200555528747e-05, "loss": 1.674, "step": 34053 }, { "epoch": 0.44, "grad_norm": 4.002331256866455, "learning_rate": 1.9662173474074348e-05, "loss": 2.1206, "step": 34054 }, { "epoch": 0.44, "grad_norm": 4.246433734893799, "learning_rate": 1.9662146391553077e-05, "loss": 1.9138, "step": 34055 }, { "epoch": 0.44, "grad_norm": 3.6204795837402344, "learning_rate": 1.966211930796495e-05, "loss": 1.9291, "step": 34056 }, { "epoch": 0.44, "grad_norm": 3.573514461517334, "learning_rate": 1.9662092223309958e-05, "loss": 1.8204, "step": 34057 }, { "epoch": 0.44, "grad_norm": 4.135148525238037, "learning_rate": 1.9662065137588105e-05, "loss": 2.0512, "step": 34058 }, { "epoch": 0.44, "grad_norm": 3.4936633110046387, "learning_rate": 1.9662038050799395e-05, "loss": 1.7139, "step": 34059 }, { "epoch": 0.44, "grad_norm": 4.042924880981445, "learning_rate": 1.9662010962943832e-05, "loss": 1.9706, "step": 34060 }, { "epoch": 0.44, "grad_norm": 3.938511610031128, "learning_rate": 1.9661983874021422e-05, "loss": 2.2945, "step": 34061 }, { "epoch": 0.44, "grad_norm": 3.690277099609375, "learning_rate": 1.9661956784032163e-05, "loss": 2.0172, "step": 34062 }, { "epoch": 0.44, "grad_norm": 3.2660610675811768, "learning_rate": 1.9661929692976063e-05, "loss": 1.3955, "step": 34063 }, { "epoch": 0.44, "grad_norm": 3.647381067276001, "learning_rate": 1.966190260085312e-05, "loss": 1.9224, "step": 34064 }, { "epoch": 0.44, "grad_norm": 4.670984268188477, "learning_rate": 1.966187550766334e-05, "loss": 3.0098, "step": 34065 }, { "epoch": 0.44, "grad_norm": 4.1430439949035645, "learning_rate": 1.9661848413406722e-05, "loss": 1.9116, "step": 34066 }, { "epoch": 0.44, "grad_norm": 3.8521153926849365, "learning_rate": 1.9661821318083273e-05, "loss": 2.0541, "step": 34067 }, { "epoch": 0.44, "grad_norm": 3.708688497543335, "learning_rate": 1.9661794221693e-05, "loss": 1.8484, "step": 34068 }, { "epoch": 0.44, "grad_norm": 3.5645809173583984, "learning_rate": 1.9661767124235896e-05, "loss": 1.8739, "step": 34069 }, { "epoch": 0.44, "grad_norm": 3.977407693862915, "learning_rate": 1.966174002571197e-05, "loss": 2.3048, "step": 34070 }, { "epoch": 0.44, "grad_norm": 4.027160167694092, "learning_rate": 1.9661712926121225e-05, "loss": 2.1353, "step": 34071 }, { "epoch": 0.44, "grad_norm": 3.763821601867676, "learning_rate": 1.966168582546366e-05, "loss": 2.0236, "step": 34072 }, { "epoch": 0.44, "grad_norm": 4.751775741577148, "learning_rate": 1.9661658723739286e-05, "loss": 2.5783, "step": 34073 }, { "epoch": 0.44, "grad_norm": 3.5216963291168213, "learning_rate": 1.9661631620948097e-05, "loss": 1.7832, "step": 34074 }, { "epoch": 0.44, "grad_norm": 3.501579523086548, "learning_rate": 1.96616045170901e-05, "loss": 2.0007, "step": 34075 }, { "epoch": 0.44, "grad_norm": 3.589327573776245, "learning_rate": 1.96615774121653e-05, "loss": 2.049, "step": 34076 }, { "epoch": 0.44, "grad_norm": 3.889392852783203, "learning_rate": 1.9661550306173698e-05, "loss": 2.0627, "step": 34077 }, { "epoch": 0.44, "grad_norm": 3.5038421154022217, "learning_rate": 1.9661523199115295e-05, "loss": 1.8059, "step": 34078 }, { "epoch": 0.44, "grad_norm": 3.824993133544922, "learning_rate": 1.96614960909901e-05, "loss": 1.9748, "step": 34079 }, { "epoch": 0.44, "grad_norm": 4.011059761047363, "learning_rate": 1.9661468981798106e-05, "loss": 2.0077, "step": 34080 }, { "epoch": 0.44, "grad_norm": 3.7362725734710693, "learning_rate": 1.9661441871539325e-05, "loss": 1.9062, "step": 34081 }, { "epoch": 0.44, "grad_norm": 4.237605571746826, "learning_rate": 1.9661414760213757e-05, "loss": 1.9235, "step": 34082 }, { "epoch": 0.44, "grad_norm": 3.908320188522339, "learning_rate": 1.9661387647821405e-05, "loss": 1.9954, "step": 34083 }, { "epoch": 0.44, "grad_norm": 4.829512596130371, "learning_rate": 1.966136053436227e-05, "loss": 2.3438, "step": 34084 }, { "epoch": 0.44, "grad_norm": 3.364250898361206, "learning_rate": 1.966133341983636e-05, "loss": 1.7594, "step": 34085 }, { "epoch": 0.44, "grad_norm": 3.5207247734069824, "learning_rate": 1.966130630424367e-05, "loss": 1.9477, "step": 34086 }, { "epoch": 0.44, "grad_norm": 3.3049216270446777, "learning_rate": 1.966127918758421e-05, "loss": 1.7298, "step": 34087 }, { "epoch": 0.44, "grad_norm": 4.072080612182617, "learning_rate": 1.9661252069857985e-05, "loss": 2.358, "step": 34088 }, { "epoch": 0.44, "grad_norm": 3.64693284034729, "learning_rate": 1.966122495106499e-05, "loss": 1.7059, "step": 34089 }, { "epoch": 0.44, "grad_norm": 3.7120726108551025, "learning_rate": 1.966119783120523e-05, "loss": 1.781, "step": 34090 }, { "epoch": 0.44, "grad_norm": 3.045536518096924, "learning_rate": 1.9661170710278714e-05, "loss": 1.5098, "step": 34091 }, { "epoch": 0.44, "grad_norm": 3.372004747390747, "learning_rate": 1.966114358828544e-05, "loss": 1.7937, "step": 34092 }, { "epoch": 0.44, "grad_norm": 3.8379855155944824, "learning_rate": 1.966111646522541e-05, "loss": 2.1142, "step": 34093 }, { "epoch": 0.44, "grad_norm": 3.655435800552368, "learning_rate": 1.966108934109863e-05, "loss": 2.2638, "step": 34094 }, { "epoch": 0.44, "grad_norm": 3.2065927982330322, "learning_rate": 1.9661062215905103e-05, "loss": 1.7546, "step": 34095 }, { "epoch": 0.44, "grad_norm": 3.4149417877197266, "learning_rate": 1.966103508964483e-05, "loss": 1.6205, "step": 34096 }, { "epoch": 0.44, "grad_norm": 4.098153114318848, "learning_rate": 1.9661007962317814e-05, "loss": 2.3673, "step": 34097 }, { "epoch": 0.44, "grad_norm": 4.053139686584473, "learning_rate": 1.966098083392406e-05, "loss": 2.082, "step": 34098 }, { "epoch": 0.44, "grad_norm": 3.408696174621582, "learning_rate": 1.966095370446357e-05, "loss": 1.9647, "step": 34099 }, { "epoch": 0.44, "grad_norm": 3.759404420852661, "learning_rate": 1.9660926573936346e-05, "loss": 1.8503, "step": 34100 }, { "epoch": 0.44, "grad_norm": 3.9828989505767822, "learning_rate": 1.966089944234239e-05, "loss": 2.14, "step": 34101 }, { "epoch": 0.44, "grad_norm": 3.3080363273620605, "learning_rate": 1.966087230968171e-05, "loss": 1.594, "step": 34102 }, { "epoch": 0.44, "grad_norm": 3.585686683654785, "learning_rate": 1.9660845175954303e-05, "loss": 1.7442, "step": 34103 }, { "epoch": 0.44, "grad_norm": 3.5837202072143555, "learning_rate": 1.9660818041160176e-05, "loss": 1.6497, "step": 34104 }, { "epoch": 0.44, "grad_norm": 3.9547393321990967, "learning_rate": 1.9660790905299333e-05, "loss": 1.9836, "step": 34105 }, { "epoch": 0.44, "grad_norm": 3.8203206062316895, "learning_rate": 1.966076376837177e-05, "loss": 1.8424, "step": 34106 }, { "epoch": 0.44, "grad_norm": 3.807901382446289, "learning_rate": 1.96607366303775e-05, "loss": 1.8682, "step": 34107 }, { "epoch": 0.44, "grad_norm": 3.908644199371338, "learning_rate": 1.966070949131652e-05, "loss": 2.241, "step": 34108 }, { "epoch": 0.44, "grad_norm": 3.5301907062530518, "learning_rate": 1.966068235118883e-05, "loss": 2.0446, "step": 34109 }, { "epoch": 0.44, "grad_norm": 3.8284497261047363, "learning_rate": 1.9660655209994436e-05, "loss": 2.113, "step": 34110 }, { "epoch": 0.44, "grad_norm": 3.664438486099243, "learning_rate": 1.9660628067733348e-05, "loss": 1.882, "step": 34111 }, { "epoch": 0.44, "grad_norm": 4.191499710083008, "learning_rate": 1.966060092440556e-05, "loss": 2.1342, "step": 34112 }, { "epoch": 0.44, "grad_norm": 4.232619285583496, "learning_rate": 1.9660573780011078e-05, "loss": 2.3432, "step": 34113 }, { "epoch": 0.44, "grad_norm": 4.202122688293457, "learning_rate": 1.96605466345499e-05, "loss": 2.2371, "step": 34114 }, { "epoch": 0.44, "grad_norm": 4.274406909942627, "learning_rate": 1.966051948802204e-05, "loss": 2.2621, "step": 34115 }, { "epoch": 0.44, "grad_norm": 3.812765121459961, "learning_rate": 1.966049234042749e-05, "loss": 1.9088, "step": 34116 }, { "epoch": 0.44, "grad_norm": 4.258872985839844, "learning_rate": 1.9660465191766264e-05, "loss": 1.8769, "step": 34117 }, { "epoch": 0.44, "grad_norm": 4.228670120239258, "learning_rate": 1.9660438042038353e-05, "loss": 2.1768, "step": 34118 }, { "epoch": 0.44, "grad_norm": 3.877856492996216, "learning_rate": 1.9660410891243767e-05, "loss": 1.8413, "step": 34119 }, { "epoch": 0.44, "grad_norm": 3.7464849948883057, "learning_rate": 1.966038373938251e-05, "loss": 2.1872, "step": 34120 }, { "epoch": 0.44, "grad_norm": 4.2835540771484375, "learning_rate": 1.966035658645458e-05, "loss": 2.4941, "step": 34121 }, { "epoch": 0.44, "grad_norm": 4.235208988189697, "learning_rate": 1.9660329432459984e-05, "loss": 2.3734, "step": 34122 }, { "epoch": 0.44, "grad_norm": 4.04168701171875, "learning_rate": 1.9660302277398724e-05, "loss": 1.9178, "step": 34123 }, { "epoch": 0.44, "grad_norm": 4.030784606933594, "learning_rate": 1.96602751212708e-05, "loss": 1.9516, "step": 34124 }, { "epoch": 0.44, "grad_norm": 3.66120982170105, "learning_rate": 1.966024796407622e-05, "loss": 1.8141, "step": 34125 }, { "epoch": 0.44, "grad_norm": 4.314549922943115, "learning_rate": 1.9660220805814986e-05, "loss": 2.5734, "step": 34126 }, { "epoch": 0.44, "grad_norm": 3.391298770904541, "learning_rate": 1.96601936464871e-05, "loss": 1.779, "step": 34127 }, { "epoch": 0.44, "grad_norm": 3.969489812850952, "learning_rate": 1.9660166486092563e-05, "loss": 2.0569, "step": 34128 }, { "epoch": 0.44, "grad_norm": 3.650895833969116, "learning_rate": 1.966013932463138e-05, "loss": 1.8575, "step": 34129 }, { "epoch": 0.44, "grad_norm": 3.8194773197174072, "learning_rate": 1.9660112162103554e-05, "loss": 1.8402, "step": 34130 }, { "epoch": 0.44, "grad_norm": 3.5392649173736572, "learning_rate": 1.9660084998509087e-05, "loss": 1.7841, "step": 34131 }, { "epoch": 0.44, "grad_norm": 3.724409580230713, "learning_rate": 1.9660057833847982e-05, "loss": 2.1406, "step": 34132 }, { "epoch": 0.44, "grad_norm": 3.593173027038574, "learning_rate": 1.9660030668120246e-05, "loss": 1.6233, "step": 34133 }, { "epoch": 0.44, "grad_norm": 3.23811674118042, "learning_rate": 1.9660003501325875e-05, "loss": 1.4296, "step": 34134 }, { "epoch": 0.44, "grad_norm": 4.175800800323486, "learning_rate": 1.9659976333464876e-05, "loss": 2.3076, "step": 34135 }, { "epoch": 0.44, "grad_norm": 3.744354724884033, "learning_rate": 1.9659949164537257e-05, "loss": 1.7324, "step": 34136 }, { "epoch": 0.44, "grad_norm": 3.9312829971313477, "learning_rate": 1.965992199454301e-05, "loss": 2.167, "step": 34137 }, { "epoch": 0.44, "grad_norm": 3.7439892292022705, "learning_rate": 1.9659894823482143e-05, "loss": 2.0848, "step": 34138 }, { "epoch": 0.44, "grad_norm": 3.6699397563934326, "learning_rate": 1.9659867651354666e-05, "loss": 1.5528, "step": 34139 }, { "epoch": 0.44, "grad_norm": 3.8270421028137207, "learning_rate": 1.9659840478160572e-05, "loss": 2.2501, "step": 34140 }, { "epoch": 0.44, "grad_norm": 3.2709803581237793, "learning_rate": 1.9659813303899867e-05, "loss": 1.8339, "step": 34141 }, { "epoch": 0.44, "grad_norm": 3.780410051345825, "learning_rate": 1.9659786128572557e-05, "loss": 2.0202, "step": 34142 }, { "epoch": 0.44, "grad_norm": 3.917383909225464, "learning_rate": 1.965975895217864e-05, "loss": 2.1126, "step": 34143 }, { "epoch": 0.44, "grad_norm": 3.63059663772583, "learning_rate": 1.965973177471812e-05, "loss": 1.7236, "step": 34144 }, { "epoch": 0.44, "grad_norm": 3.996253490447998, "learning_rate": 1.9659704596191006e-05, "loss": 2.3626, "step": 34145 }, { "epoch": 0.44, "grad_norm": 3.53572416305542, "learning_rate": 1.9659677416597297e-05, "loss": 2.0376, "step": 34146 }, { "epoch": 0.44, "grad_norm": 3.841718912124634, "learning_rate": 1.9659650235936993e-05, "loss": 1.8748, "step": 34147 }, { "epoch": 0.44, "grad_norm": 3.220170497894287, "learning_rate": 1.9659623054210102e-05, "loss": 1.4438, "step": 34148 }, { "epoch": 0.44, "grad_norm": 3.551288366317749, "learning_rate": 1.9659595871416624e-05, "loss": 2.0477, "step": 34149 }, { "epoch": 0.44, "grad_norm": 3.2769408226013184, "learning_rate": 1.9659568687556562e-05, "loss": 1.7385, "step": 34150 }, { "epoch": 0.44, "grad_norm": 3.918936252593994, "learning_rate": 1.965954150262992e-05, "loss": 1.6393, "step": 34151 }, { "epoch": 0.44, "grad_norm": 3.3134329319000244, "learning_rate": 1.96595143166367e-05, "loss": 1.6686, "step": 34152 }, { "epoch": 0.44, "grad_norm": 3.483959197998047, "learning_rate": 1.965948712957691e-05, "loss": 1.8351, "step": 34153 }, { "epoch": 0.44, "grad_norm": 3.722677707672119, "learning_rate": 1.9659459941450543e-05, "loss": 1.8023, "step": 34154 }, { "epoch": 0.44, "grad_norm": 3.771172285079956, "learning_rate": 1.965943275225761e-05, "loss": 2.2125, "step": 34155 }, { "epoch": 0.44, "grad_norm": 3.4623868465423584, "learning_rate": 1.965940556199811e-05, "loss": 1.9154, "step": 34156 }, { "epoch": 0.44, "grad_norm": 4.066892623901367, "learning_rate": 1.965937837067205e-05, "loss": 2.6537, "step": 34157 }, { "epoch": 0.44, "grad_norm": 3.9014475345611572, "learning_rate": 1.9659351178279432e-05, "loss": 1.9873, "step": 34158 }, { "epoch": 0.44, "grad_norm": 4.657735347747803, "learning_rate": 1.9659323984820257e-05, "loss": 2.073, "step": 34159 }, { "epoch": 0.44, "grad_norm": 3.6997663974761963, "learning_rate": 1.9659296790294526e-05, "loss": 1.8741, "step": 34160 }, { "epoch": 0.44, "grad_norm": 3.407259225845337, "learning_rate": 1.9659269594702247e-05, "loss": 1.6069, "step": 34161 }, { "epoch": 0.44, "grad_norm": 4.369166851043701, "learning_rate": 1.9659242398043423e-05, "loss": 2.4025, "step": 34162 }, { "epoch": 0.44, "grad_norm": 4.4921040534973145, "learning_rate": 1.965921520031805e-05, "loss": 2.3194, "step": 34163 }, { "epoch": 0.44, "grad_norm": 4.231419563293457, "learning_rate": 1.965918800152614e-05, "loss": 2.1287, "step": 34164 }, { "epoch": 0.44, "grad_norm": 3.4303979873657227, "learning_rate": 1.9659160801667686e-05, "loss": 1.726, "step": 34165 }, { "epoch": 0.44, "grad_norm": 3.898948907852173, "learning_rate": 1.96591336007427e-05, "loss": 1.8085, "step": 34166 }, { "epoch": 0.44, "grad_norm": 3.8532190322875977, "learning_rate": 1.9659106398751184e-05, "loss": 2.1592, "step": 34167 }, { "epoch": 0.44, "grad_norm": 3.868135452270508, "learning_rate": 1.9659079195693138e-05, "loss": 1.6779, "step": 34168 }, { "epoch": 0.44, "grad_norm": 3.1448276042938232, "learning_rate": 1.9659051991568564e-05, "loss": 1.4923, "step": 34169 }, { "epoch": 0.44, "grad_norm": 3.7565689086914062, "learning_rate": 1.965902478637747e-05, "loss": 1.6891, "step": 34170 }, { "epoch": 0.44, "grad_norm": 3.9252445697784424, "learning_rate": 1.9658997580119853e-05, "loss": 2.0542, "step": 34171 }, { "epoch": 0.44, "grad_norm": 3.8705191612243652, "learning_rate": 1.9658970372795714e-05, "loss": 1.9278, "step": 34172 }, { "epoch": 0.44, "grad_norm": 3.693636417388916, "learning_rate": 1.965894316440507e-05, "loss": 1.9726, "step": 34173 }, { "epoch": 0.44, "grad_norm": 3.9893763065338135, "learning_rate": 1.965891595494791e-05, "loss": 2.523, "step": 34174 }, { "epoch": 0.44, "grad_norm": 3.935772657394409, "learning_rate": 1.9658888744424243e-05, "loss": 1.7595, "step": 34175 }, { "epoch": 0.44, "grad_norm": 4.188382625579834, "learning_rate": 1.965886153283407e-05, "loss": 2.7267, "step": 34176 }, { "epoch": 0.44, "grad_norm": 3.709285020828247, "learning_rate": 1.9658834320177396e-05, "loss": 1.7692, "step": 34177 }, { "epoch": 0.44, "grad_norm": 3.6288812160491943, "learning_rate": 1.965880710645422e-05, "loss": 1.8446, "step": 34178 }, { "epoch": 0.44, "grad_norm": 3.6319284439086914, "learning_rate": 1.965877989166455e-05, "loss": 1.778, "step": 34179 }, { "epoch": 0.44, "grad_norm": 3.699115037918091, "learning_rate": 1.965875267580839e-05, "loss": 1.7549, "step": 34180 }, { "epoch": 0.44, "grad_norm": 3.603947401046753, "learning_rate": 1.9658725458885737e-05, "loss": 1.9917, "step": 34181 }, { "epoch": 0.44, "grad_norm": 3.9605932235717773, "learning_rate": 1.9658698240896594e-05, "loss": 2.232, "step": 34182 }, { "epoch": 0.44, "grad_norm": 3.741567611694336, "learning_rate": 1.965867102184097e-05, "loss": 1.7959, "step": 34183 }, { "epoch": 0.44, "grad_norm": 3.797964572906494, "learning_rate": 1.9658643801718863e-05, "loss": 1.8866, "step": 34184 }, { "epoch": 0.44, "grad_norm": 3.8019795417785645, "learning_rate": 1.9658616580530282e-05, "loss": 1.8291, "step": 34185 }, { "epoch": 0.44, "grad_norm": 4.110054969787598, "learning_rate": 1.9658589358275223e-05, "loss": 2.1266, "step": 34186 }, { "epoch": 0.44, "grad_norm": 3.875717878341675, "learning_rate": 1.965856213495369e-05, "loss": 2.0061, "step": 34187 }, { "epoch": 0.44, "grad_norm": 3.670941114425659, "learning_rate": 1.9658534910565688e-05, "loss": 1.9509, "step": 34188 }, { "epoch": 0.44, "grad_norm": 3.6006855964660645, "learning_rate": 1.9658507685111223e-05, "loss": 2.0638, "step": 34189 }, { "epoch": 0.44, "grad_norm": 3.6635642051696777, "learning_rate": 1.9658480458590296e-05, "loss": 2.0129, "step": 34190 }, { "epoch": 0.44, "grad_norm": 3.8003227710723877, "learning_rate": 1.9658453231002906e-05, "loss": 2.3055, "step": 34191 }, { "epoch": 0.44, "grad_norm": 4.535313129425049, "learning_rate": 1.965842600234906e-05, "loss": 2.2036, "step": 34192 }, { "epoch": 0.44, "grad_norm": 4.2736358642578125, "learning_rate": 1.9658398772628756e-05, "loss": 2.1129, "step": 34193 }, { "epoch": 0.44, "grad_norm": 4.113483905792236, "learning_rate": 1.9658371541842005e-05, "loss": 2.3104, "step": 34194 }, { "epoch": 0.44, "grad_norm": 3.5034072399139404, "learning_rate": 1.9658344309988805e-05, "loss": 1.6884, "step": 34195 }, { "epoch": 0.44, "grad_norm": 3.7363641262054443, "learning_rate": 1.965831707706916e-05, "loss": 2.073, "step": 34196 }, { "epoch": 0.44, "grad_norm": 4.498153209686279, "learning_rate": 1.965828984308307e-05, "loss": 2.2815, "step": 34197 }, { "epoch": 0.44, "grad_norm": 3.578925848007202, "learning_rate": 1.9658262608030547e-05, "loss": 1.8809, "step": 34198 }, { "epoch": 0.44, "grad_norm": 3.7588160037994385, "learning_rate": 1.9658235371911584e-05, "loss": 1.9376, "step": 34199 }, { "epoch": 0.44, "grad_norm": 3.7548396587371826, "learning_rate": 1.965820813472619e-05, "loss": 1.6451, "step": 34200 }, { "epoch": 0.44, "grad_norm": 4.0244140625, "learning_rate": 1.9658180896474365e-05, "loss": 2.1365, "step": 34201 }, { "epoch": 0.44, "grad_norm": 3.96478533744812, "learning_rate": 1.965815365715611e-05, "loss": 1.7361, "step": 34202 }, { "epoch": 0.44, "grad_norm": 3.666173219680786, "learning_rate": 1.9658126416771434e-05, "loss": 2.1908, "step": 34203 }, { "epoch": 0.44, "grad_norm": 3.670856475830078, "learning_rate": 1.9658099175320338e-05, "loss": 1.9237, "step": 34204 }, { "epoch": 0.44, "grad_norm": 3.927213191986084, "learning_rate": 1.9658071932802824e-05, "loss": 2.2009, "step": 34205 }, { "epoch": 0.44, "grad_norm": 4.404821872711182, "learning_rate": 1.9658044689218892e-05, "loss": 1.9203, "step": 34206 }, { "epoch": 0.44, "grad_norm": 3.467384099960327, "learning_rate": 1.965801744456855e-05, "loss": 1.5877, "step": 34207 }, { "epoch": 0.44, "grad_norm": 4.6169915199279785, "learning_rate": 1.96579901988518e-05, "loss": 2.178, "step": 34208 }, { "epoch": 0.44, "grad_norm": 3.3860719203948975, "learning_rate": 1.965796295206864e-05, "loss": 1.9021, "step": 34209 }, { "epoch": 0.44, "grad_norm": 3.8456900119781494, "learning_rate": 1.9657935704219082e-05, "loss": 2.0464, "step": 34210 }, { "epoch": 0.44, "grad_norm": 3.9589803218841553, "learning_rate": 1.9657908455303122e-05, "loss": 1.6908, "step": 34211 }, { "epoch": 0.44, "grad_norm": 3.5009002685546875, "learning_rate": 1.9657881205320764e-05, "loss": 2.0263, "step": 34212 }, { "epoch": 0.44, "grad_norm": 4.260560512542725, "learning_rate": 1.9657853954272013e-05, "loss": 2.197, "step": 34213 }, { "epoch": 0.44, "grad_norm": 3.3374524116516113, "learning_rate": 1.965782670215687e-05, "loss": 1.5314, "step": 34214 }, { "epoch": 0.44, "grad_norm": 5.268735408782959, "learning_rate": 1.965779944897534e-05, "loss": 2.5075, "step": 34215 }, { "epoch": 0.44, "grad_norm": 3.3313446044921875, "learning_rate": 1.9657772194727423e-05, "loss": 1.4822, "step": 34216 }, { "epoch": 0.44, "grad_norm": 3.7780723571777344, "learning_rate": 1.965774493941313e-05, "loss": 2.067, "step": 34217 }, { "epoch": 0.44, "grad_norm": 3.9990644454956055, "learning_rate": 1.9657717683032454e-05, "loss": 2.203, "step": 34218 }, { "epoch": 0.44, "grad_norm": 4.2949137687683105, "learning_rate": 1.96576904255854e-05, "loss": 1.9671, "step": 34219 }, { "epoch": 0.44, "grad_norm": 4.873776912689209, "learning_rate": 1.965766316707198e-05, "loss": 2.1096, "step": 34220 }, { "epoch": 0.44, "grad_norm": 4.033724308013916, "learning_rate": 1.9657635907492186e-05, "loss": 2.0231, "step": 34221 }, { "epoch": 0.44, "grad_norm": 4.395633220672607, "learning_rate": 1.9657608646846022e-05, "loss": 2.254, "step": 34222 }, { "epoch": 0.44, "grad_norm": 3.564199447631836, "learning_rate": 1.96575813851335e-05, "loss": 1.7708, "step": 34223 }, { "epoch": 0.44, "grad_norm": 3.5078980922698975, "learning_rate": 1.965755412235461e-05, "loss": 1.92, "step": 34224 }, { "epoch": 0.44, "grad_norm": 4.177478790283203, "learning_rate": 1.9657526858509367e-05, "loss": 2.1446, "step": 34225 }, { "epoch": 0.44, "grad_norm": 4.546233177185059, "learning_rate": 1.965749959359777e-05, "loss": 2.6589, "step": 34226 }, { "epoch": 0.44, "grad_norm": 3.6223857402801514, "learning_rate": 1.965747232761982e-05, "loss": 1.6488, "step": 34227 }, { "epoch": 0.44, "grad_norm": 4.012056827545166, "learning_rate": 1.965744506057552e-05, "loss": 2.1705, "step": 34228 }, { "epoch": 0.44, "grad_norm": 3.898393154144287, "learning_rate": 1.9657417792464875e-05, "loss": 1.7999, "step": 34229 }, { "epoch": 0.44, "grad_norm": 4.006894588470459, "learning_rate": 1.965739052328789e-05, "loss": 1.9018, "step": 34230 }, { "epoch": 0.44, "grad_norm": 3.187731981277466, "learning_rate": 1.9657363253044562e-05, "loss": 1.513, "step": 34231 }, { "epoch": 0.44, "grad_norm": 3.754589080810547, "learning_rate": 1.9657335981734897e-05, "loss": 2.3405, "step": 34232 }, { "epoch": 0.44, "grad_norm": 3.8716869354248047, "learning_rate": 1.96573087093589e-05, "loss": 1.9746, "step": 34233 }, { "epoch": 0.44, "grad_norm": 3.9754478931427, "learning_rate": 1.9657281435916572e-05, "loss": 1.9457, "step": 34234 }, { "epoch": 0.44, "grad_norm": 3.5863702297210693, "learning_rate": 1.9657254161407918e-05, "loss": 1.5949, "step": 34235 }, { "epoch": 0.44, "grad_norm": 4.016185283660889, "learning_rate": 1.9657226885832935e-05, "loss": 2.2476, "step": 34236 }, { "epoch": 0.44, "grad_norm": 3.687067747116089, "learning_rate": 1.965719960919163e-05, "loss": 2.0214, "step": 34237 }, { "epoch": 0.44, "grad_norm": 4.000511169433594, "learning_rate": 1.9657172331484012e-05, "loss": 1.7671, "step": 34238 }, { "epoch": 0.44, "grad_norm": 4.147218227386475, "learning_rate": 1.9657145052710073e-05, "loss": 1.9372, "step": 34239 }, { "epoch": 0.44, "grad_norm": 3.646146297454834, "learning_rate": 1.9657117772869824e-05, "loss": 1.6409, "step": 34240 }, { "epoch": 0.44, "grad_norm": 4.238357067108154, "learning_rate": 1.9657090491963266e-05, "loss": 2.0999, "step": 34241 }, { "epoch": 0.44, "grad_norm": 3.9565200805664062, "learning_rate": 1.96570632099904e-05, "loss": 2.6687, "step": 34242 }, { "epoch": 0.44, "grad_norm": 3.7974178791046143, "learning_rate": 1.9657035926951228e-05, "loss": 1.7041, "step": 34243 }, { "epoch": 0.44, "grad_norm": 3.6111435890197754, "learning_rate": 1.9657008642845762e-05, "loss": 1.8612, "step": 34244 }, { "epoch": 0.44, "grad_norm": 3.482480525970459, "learning_rate": 1.9656981357673993e-05, "loss": 2.0079, "step": 34245 }, { "epoch": 0.44, "grad_norm": 3.7275166511535645, "learning_rate": 1.965695407143593e-05, "loss": 2.2905, "step": 34246 }, { "epoch": 0.44, "grad_norm": 4.096363067626953, "learning_rate": 1.9656926784131575e-05, "loss": 2.4134, "step": 34247 }, { "epoch": 0.44, "grad_norm": 4.815068244934082, "learning_rate": 1.9656899495760936e-05, "loss": 2.4257, "step": 34248 }, { "epoch": 0.44, "grad_norm": 3.074436664581299, "learning_rate": 1.9656872206324007e-05, "loss": 1.5273, "step": 34249 }, { "epoch": 0.44, "grad_norm": 4.0189971923828125, "learning_rate": 1.9656844915820796e-05, "loss": 1.8659, "step": 34250 }, { "epoch": 0.44, "grad_norm": 3.561668634414673, "learning_rate": 1.9656817624251306e-05, "loss": 1.9678, "step": 34251 }, { "epoch": 0.44, "grad_norm": 3.654390335083008, "learning_rate": 1.965679033161554e-05, "loss": 1.8514, "step": 34252 }, { "epoch": 0.44, "grad_norm": 4.384523391723633, "learning_rate": 1.9656763037913503e-05, "loss": 2.2272, "step": 34253 }, { "epoch": 0.44, "grad_norm": 3.9932379722595215, "learning_rate": 1.965673574314519e-05, "loss": 2.0292, "step": 34254 }, { "epoch": 0.44, "grad_norm": 4.03041410446167, "learning_rate": 1.965670844731061e-05, "loss": 2.1176, "step": 34255 }, { "epoch": 0.44, "grad_norm": 3.6000938415527344, "learning_rate": 1.965668115040977e-05, "loss": 1.7087, "step": 34256 }, { "epoch": 0.44, "grad_norm": 3.762559652328491, "learning_rate": 1.9656653852442668e-05, "loss": 1.7676, "step": 34257 }, { "epoch": 0.44, "grad_norm": 3.6091325283050537, "learning_rate": 1.9656626553409307e-05, "loss": 2.0715, "step": 34258 }, { "epoch": 0.44, "grad_norm": 4.782464981079102, "learning_rate": 1.965659925330969e-05, "loss": 2.3573, "step": 34259 }, { "epoch": 0.44, "grad_norm": 3.9043569564819336, "learning_rate": 1.965657195214382e-05, "loss": 2.3496, "step": 34260 }, { "epoch": 0.44, "grad_norm": 3.91620135307312, "learning_rate": 1.9656544649911698e-05, "loss": 2.0865, "step": 34261 }, { "epoch": 0.44, "grad_norm": 4.202895641326904, "learning_rate": 1.9656517346613333e-05, "loss": 2.1452, "step": 34262 }, { "epoch": 0.44, "grad_norm": 4.229733943939209, "learning_rate": 1.9656490042248726e-05, "loss": 1.9607, "step": 34263 }, { "epoch": 0.44, "grad_norm": 3.9810547828674316, "learning_rate": 1.9656462736817877e-05, "loss": 1.8238, "step": 34264 }, { "epoch": 0.44, "grad_norm": 3.8832833766937256, "learning_rate": 1.9656435430320787e-05, "loss": 2.0784, "step": 34265 }, { "epoch": 0.44, "grad_norm": 4.165647029876709, "learning_rate": 1.965640812275747e-05, "loss": 2.4016, "step": 34266 }, { "epoch": 0.44, "grad_norm": 3.68764328956604, "learning_rate": 1.9656380814127915e-05, "loss": 1.7963, "step": 34267 }, { "epoch": 0.44, "grad_norm": 3.7753729820251465, "learning_rate": 1.9656353504432134e-05, "loss": 2.2575, "step": 34268 }, { "epoch": 0.44, "grad_norm": 3.6707751750946045, "learning_rate": 1.965632619367013e-05, "loss": 1.8059, "step": 34269 }, { "epoch": 0.44, "grad_norm": 4.646683216094971, "learning_rate": 1.9656298881841903e-05, "loss": 2.6745, "step": 34270 }, { "epoch": 0.44, "grad_norm": 3.578399658203125, "learning_rate": 1.9656271568947454e-05, "loss": 2.0261, "step": 34271 }, { "epoch": 0.44, "grad_norm": 4.034815788269043, "learning_rate": 1.965624425498679e-05, "loss": 1.9613, "step": 34272 }, { "epoch": 0.44, "grad_norm": 3.9449377059936523, "learning_rate": 1.9656216939959916e-05, "loss": 1.9798, "step": 34273 }, { "epoch": 0.44, "grad_norm": 3.1264188289642334, "learning_rate": 1.965618962386683e-05, "loss": 1.5536, "step": 34274 }, { "epoch": 0.44, "grad_norm": 3.961379289627075, "learning_rate": 1.9656162306707536e-05, "loss": 1.9311, "step": 34275 }, { "epoch": 0.44, "grad_norm": 3.488180160522461, "learning_rate": 1.9656134988482035e-05, "loss": 1.6164, "step": 34276 }, { "epoch": 0.44, "grad_norm": 4.298099040985107, "learning_rate": 1.965610766919034e-05, "loss": 2.1111, "step": 34277 }, { "epoch": 0.44, "grad_norm": 3.950507879257202, "learning_rate": 1.9656080348832442e-05, "loss": 2.1148, "step": 34278 }, { "epoch": 0.44, "grad_norm": 4.134721279144287, "learning_rate": 1.9656053027408348e-05, "loss": 2.2447, "step": 34279 }, { "epoch": 0.44, "grad_norm": 4.529580593109131, "learning_rate": 1.9656025704918066e-05, "loss": 2.0898, "step": 34280 }, { "epoch": 0.44, "grad_norm": 3.8380887508392334, "learning_rate": 1.9655998381361593e-05, "loss": 1.6996, "step": 34281 }, { "epoch": 0.44, "grad_norm": 3.789199113845825, "learning_rate": 1.9655971056738932e-05, "loss": 1.9831, "step": 34282 }, { "epoch": 0.44, "grad_norm": 3.4683310985565186, "learning_rate": 1.965594373105009e-05, "loss": 1.653, "step": 34283 }, { "epoch": 0.44, "grad_norm": 4.269297122955322, "learning_rate": 1.9655916404295066e-05, "loss": 2.2299, "step": 34284 }, { "epoch": 0.44, "grad_norm": 4.090580463409424, "learning_rate": 1.9655889076473867e-05, "loss": 2.0872, "step": 34285 }, { "epoch": 0.44, "grad_norm": 4.005478382110596, "learning_rate": 1.9655861747586494e-05, "loss": 2.1946, "step": 34286 }, { "epoch": 0.44, "grad_norm": 4.30035924911499, "learning_rate": 1.965583441763295e-05, "loss": 2.0653, "step": 34287 }, { "epoch": 0.44, "grad_norm": 3.732724666595459, "learning_rate": 1.9655807086613237e-05, "loss": 2.0408, "step": 34288 }, { "epoch": 0.44, "grad_norm": 3.7624456882476807, "learning_rate": 1.9655779754527363e-05, "loss": 1.8994, "step": 34289 }, { "epoch": 0.45, "grad_norm": 3.537623643875122, "learning_rate": 1.965575242137532e-05, "loss": 1.8449, "step": 34290 }, { "epoch": 0.45, "grad_norm": 3.4027960300445557, "learning_rate": 1.9655725087157126e-05, "loss": 1.7721, "step": 34291 }, { "epoch": 0.45, "grad_norm": 4.362100601196289, "learning_rate": 1.965569775187277e-05, "loss": 1.8657, "step": 34292 }, { "epoch": 0.45, "grad_norm": 3.5379700660705566, "learning_rate": 1.9655670415522265e-05, "loss": 2.0157, "step": 34293 }, { "epoch": 0.45, "grad_norm": 3.3858156204223633, "learning_rate": 1.965564307810561e-05, "loss": 1.8519, "step": 34294 }, { "epoch": 0.45, "grad_norm": 3.7045907974243164, "learning_rate": 1.9655615739622803e-05, "loss": 1.8871, "step": 34295 }, { "epoch": 0.45, "grad_norm": 3.7218849658966064, "learning_rate": 1.9655588400073857e-05, "loss": 2.2698, "step": 34296 }, { "epoch": 0.45, "grad_norm": 3.6236867904663086, "learning_rate": 1.965556105945877e-05, "loss": 1.6808, "step": 34297 }, { "epoch": 0.45, "grad_norm": 4.262351036071777, "learning_rate": 1.9655533717777544e-05, "loss": 2.2549, "step": 34298 }, { "epoch": 0.45, "grad_norm": 3.976654291152954, "learning_rate": 1.9655506375030185e-05, "loss": 2.3263, "step": 34299 }, { "epoch": 0.45, "grad_norm": 3.3895888328552246, "learning_rate": 1.9655479031216692e-05, "loss": 1.8023, "step": 34300 }, { "epoch": 0.45, "grad_norm": 3.6725823879241943, "learning_rate": 1.965545168633707e-05, "loss": 1.9407, "step": 34301 }, { "epoch": 0.45, "grad_norm": 3.869431734085083, "learning_rate": 1.9655424340391324e-05, "loss": 1.9516, "step": 34302 }, { "epoch": 0.45, "grad_norm": 3.811336040496826, "learning_rate": 1.9655396993379455e-05, "loss": 2.2738, "step": 34303 }, { "epoch": 0.45, "grad_norm": 4.138580799102783, "learning_rate": 1.9655369645301466e-05, "loss": 2.4185, "step": 34304 }, { "epoch": 0.45, "grad_norm": 4.176144123077393, "learning_rate": 1.965534229615736e-05, "loss": 2.7427, "step": 34305 }, { "epoch": 0.45, "grad_norm": 4.364468097686768, "learning_rate": 1.965531494594714e-05, "loss": 2.1603, "step": 34306 }, { "epoch": 0.45, "grad_norm": 3.7851905822753906, "learning_rate": 1.9655287594670813e-05, "loss": 2.0407, "step": 34307 }, { "epoch": 0.45, "grad_norm": 3.938934803009033, "learning_rate": 1.9655260242328376e-05, "loss": 2.2131, "step": 34308 }, { "epoch": 0.45, "grad_norm": 3.3510022163391113, "learning_rate": 1.9655232888919836e-05, "loss": 2.1417, "step": 34309 }, { "epoch": 0.45, "grad_norm": 3.8618736267089844, "learning_rate": 1.965520553444519e-05, "loss": 2.3037, "step": 34310 }, { "epoch": 0.45, "grad_norm": 3.4779062271118164, "learning_rate": 1.965517817890445e-05, "loss": 1.823, "step": 34311 }, { "epoch": 0.45, "grad_norm": 3.7493176460266113, "learning_rate": 1.9655150822297614e-05, "loss": 2.0034, "step": 34312 }, { "epoch": 0.45, "grad_norm": 3.7973482608795166, "learning_rate": 1.9655123464624684e-05, "loss": 2.0438, "step": 34313 }, { "epoch": 0.45, "grad_norm": 3.384078025817871, "learning_rate": 1.9655096105885664e-05, "loss": 1.7727, "step": 34314 }, { "epoch": 0.45, "grad_norm": 3.4807801246643066, "learning_rate": 1.965506874608056e-05, "loss": 1.6784, "step": 34315 }, { "epoch": 0.45, "grad_norm": 3.445641040802002, "learning_rate": 1.965504138520937e-05, "loss": 1.9017, "step": 34316 }, { "epoch": 0.45, "grad_norm": 3.8068222999572754, "learning_rate": 1.96550140232721e-05, "loss": 2.0666, "step": 34317 }, { "epoch": 0.45, "grad_norm": 3.759878635406494, "learning_rate": 1.9654986660268754e-05, "loss": 1.672, "step": 34318 }, { "epoch": 0.45, "grad_norm": 3.865971803665161, "learning_rate": 1.9654959296199333e-05, "loss": 1.7916, "step": 34319 }, { "epoch": 0.45, "grad_norm": 3.757146120071411, "learning_rate": 1.9654931931063842e-05, "loss": 2.0209, "step": 34320 }, { "epoch": 0.45, "grad_norm": 3.9901015758514404, "learning_rate": 1.965490456486228e-05, "loss": 1.8896, "step": 34321 }, { "epoch": 0.45, "grad_norm": 3.5102388858795166, "learning_rate": 1.9654877197594654e-05, "loss": 1.7745, "step": 34322 }, { "epoch": 0.45, "grad_norm": 3.401337146759033, "learning_rate": 1.965484982926097e-05, "loss": 1.8707, "step": 34323 }, { "epoch": 0.45, "grad_norm": 3.9996612071990967, "learning_rate": 1.965482245986122e-05, "loss": 1.7937, "step": 34324 }, { "epoch": 0.45, "grad_norm": 3.0875208377838135, "learning_rate": 1.965479508939542e-05, "loss": 1.7539, "step": 34325 }, { "epoch": 0.45, "grad_norm": 3.7362749576568604, "learning_rate": 1.965476771786356e-05, "loss": 1.8446, "step": 34326 }, { "epoch": 0.45, "grad_norm": 3.997225046157837, "learning_rate": 1.9654740345265655e-05, "loss": 1.9439, "step": 34327 }, { "epoch": 0.45, "grad_norm": 3.575448751449585, "learning_rate": 1.96547129716017e-05, "loss": 2.1349, "step": 34328 }, { "epoch": 0.45, "grad_norm": 3.5821774005889893, "learning_rate": 1.9654685596871706e-05, "loss": 1.7909, "step": 34329 }, { "epoch": 0.45, "grad_norm": 3.7446231842041016, "learning_rate": 1.9654658221075665e-05, "loss": 1.8815, "step": 34330 }, { "epoch": 0.45, "grad_norm": 3.4673852920532227, "learning_rate": 1.9654630844213586e-05, "loss": 1.5106, "step": 34331 }, { "epoch": 0.45, "grad_norm": 3.9117698669433594, "learning_rate": 1.9654603466285476e-05, "loss": 1.8631, "step": 34332 }, { "epoch": 0.45, "grad_norm": 3.133244037628174, "learning_rate": 1.9654576087291334e-05, "loss": 1.3673, "step": 34333 }, { "epoch": 0.45, "grad_norm": 3.1425483226776123, "learning_rate": 1.9654548707231156e-05, "loss": 1.6452, "step": 34334 }, { "epoch": 0.45, "grad_norm": 4.238378524780273, "learning_rate": 1.965452132610496e-05, "loss": 2.1119, "step": 34335 }, { "epoch": 0.45, "grad_norm": 3.863779306411743, "learning_rate": 1.9654493943912737e-05, "loss": 2.1425, "step": 34336 }, { "epoch": 0.45, "grad_norm": 4.341439723968506, "learning_rate": 1.9654466560654496e-05, "loss": 2.2834, "step": 34337 }, { "epoch": 0.45, "grad_norm": 3.5665743350982666, "learning_rate": 1.9654439176330236e-05, "loss": 1.7908, "step": 34338 }, { "epoch": 0.45, "grad_norm": 3.7140719890594482, "learning_rate": 1.9654411790939965e-05, "loss": 1.938, "step": 34339 }, { "epoch": 0.45, "grad_norm": 4.570703506469727, "learning_rate": 1.9654384404483683e-05, "loss": 2.1116, "step": 34340 }, { "epoch": 0.45, "grad_norm": 3.2401130199432373, "learning_rate": 1.965435701696139e-05, "loss": 1.8848, "step": 34341 }, { "epoch": 0.45, "grad_norm": 3.6600685119628906, "learning_rate": 1.9654329628373095e-05, "loss": 1.6534, "step": 34342 }, { "epoch": 0.45, "grad_norm": 3.800837516784668, "learning_rate": 1.9654302238718795e-05, "loss": 2.2986, "step": 34343 }, { "epoch": 0.45, "grad_norm": 3.380683660507202, "learning_rate": 1.96542748479985e-05, "loss": 1.4326, "step": 34344 }, { "epoch": 0.45, "grad_norm": 4.018675327301025, "learning_rate": 1.9654247456212204e-05, "loss": 2.0121, "step": 34345 }, { "epoch": 0.45, "grad_norm": 3.4759035110473633, "learning_rate": 1.965422006335992e-05, "loss": 2.1243, "step": 34346 }, { "epoch": 0.45, "grad_norm": 3.224778175354004, "learning_rate": 1.9654192669441648e-05, "loss": 1.67, "step": 34347 }, { "epoch": 0.45, "grad_norm": 3.7646138668060303, "learning_rate": 1.9654165274457387e-05, "loss": 1.7336, "step": 34348 }, { "epoch": 0.45, "grad_norm": 3.734941005706787, "learning_rate": 1.965413787840714e-05, "loss": 1.9535, "step": 34349 }, { "epoch": 0.45, "grad_norm": 4.242488384246826, "learning_rate": 1.9654110481290917e-05, "loss": 2.4384, "step": 34350 }, { "epoch": 0.45, "grad_norm": 3.885636329650879, "learning_rate": 1.9654083083108713e-05, "loss": 1.9783, "step": 34351 }, { "epoch": 0.45, "grad_norm": 3.87931227684021, "learning_rate": 1.9654055683860536e-05, "loss": 2.0559, "step": 34352 }, { "epoch": 0.45, "grad_norm": 3.903191328048706, "learning_rate": 1.9654028283546385e-05, "loss": 2.0057, "step": 34353 }, { "epoch": 0.45, "grad_norm": 3.4823904037475586, "learning_rate": 1.9654000882166266e-05, "loss": 1.7955, "step": 34354 }, { "epoch": 0.45, "grad_norm": 3.957911729812622, "learning_rate": 1.9653973479720184e-05, "loss": 2.2457, "step": 34355 }, { "epoch": 0.45, "grad_norm": 3.3002851009368896, "learning_rate": 1.9653946076208138e-05, "loss": 1.5363, "step": 34356 }, { "epoch": 0.45, "grad_norm": 3.792746067047119, "learning_rate": 1.9653918671630135e-05, "loss": 1.8855, "step": 34357 }, { "epoch": 0.45, "grad_norm": 3.2884836196899414, "learning_rate": 1.965389126598617e-05, "loss": 1.6391, "step": 34358 }, { "epoch": 0.45, "grad_norm": 4.215783596038818, "learning_rate": 1.9653863859276257e-05, "loss": 1.9066, "step": 34359 }, { "epoch": 0.45, "grad_norm": 4.311851501464844, "learning_rate": 1.965383645150039e-05, "loss": 2.2109, "step": 34360 }, { "epoch": 0.45, "grad_norm": 3.222693681716919, "learning_rate": 1.9653809042658578e-05, "loss": 1.4103, "step": 34361 }, { "epoch": 0.45, "grad_norm": 3.3658523559570312, "learning_rate": 1.9653781632750823e-05, "loss": 1.7047, "step": 34362 }, { "epoch": 0.45, "grad_norm": 3.8464977741241455, "learning_rate": 1.9653754221777124e-05, "loss": 1.9202, "step": 34363 }, { "epoch": 0.45, "grad_norm": 3.973210096359253, "learning_rate": 1.9653726809737486e-05, "loss": 2.0417, "step": 34364 }, { "epoch": 0.45, "grad_norm": 4.029274940490723, "learning_rate": 1.9653699396631914e-05, "loss": 1.9541, "step": 34365 }, { "epoch": 0.45, "grad_norm": 3.5263593196868896, "learning_rate": 1.9653671982460412e-05, "loss": 1.6757, "step": 34366 }, { "epoch": 0.45, "grad_norm": 4.154610633850098, "learning_rate": 1.9653644567222977e-05, "loss": 2.2454, "step": 34367 }, { "epoch": 0.45, "grad_norm": 3.8347740173339844, "learning_rate": 1.965361715091962e-05, "loss": 1.8754, "step": 34368 }, { "epoch": 0.45, "grad_norm": 3.896613597869873, "learning_rate": 1.9653589733550337e-05, "loss": 1.8728, "step": 34369 }, { "epoch": 0.45, "grad_norm": 3.7694637775421143, "learning_rate": 1.9653562315115135e-05, "loss": 2.07, "step": 34370 }, { "epoch": 0.45, "grad_norm": 3.338519334793091, "learning_rate": 1.9653534895614014e-05, "loss": 1.9553, "step": 34371 }, { "epoch": 0.45, "grad_norm": 4.5162129402160645, "learning_rate": 1.9653507475046983e-05, "loss": 2.2732, "step": 34372 }, { "epoch": 0.45, "grad_norm": 3.9785525798797607, "learning_rate": 1.9653480053414036e-05, "loss": 1.9273, "step": 34373 }, { "epoch": 0.45, "grad_norm": 3.465679883956909, "learning_rate": 1.9653452630715182e-05, "loss": 1.5622, "step": 34374 }, { "epoch": 0.45, "grad_norm": 4.483104705810547, "learning_rate": 1.9653425206950426e-05, "loss": 2.5575, "step": 34375 }, { "epoch": 0.45, "grad_norm": 3.8804330825805664, "learning_rate": 1.9653397782119767e-05, "loss": 1.7825, "step": 34376 }, { "epoch": 0.45, "grad_norm": 3.386277675628662, "learning_rate": 1.9653370356223208e-05, "loss": 1.8319, "step": 34377 }, { "epoch": 0.45, "grad_norm": 3.9024229049682617, "learning_rate": 1.9653342929260756e-05, "loss": 2.1309, "step": 34378 }, { "epoch": 0.45, "grad_norm": 3.6711370944976807, "learning_rate": 1.965331550123241e-05, "loss": 2.1098, "step": 34379 }, { "epoch": 0.45, "grad_norm": 3.3775699138641357, "learning_rate": 1.9653288072138172e-05, "loss": 1.7225, "step": 34380 }, { "epoch": 0.45, "grad_norm": 3.5654382705688477, "learning_rate": 1.965326064197805e-05, "loss": 1.7956, "step": 34381 }, { "epoch": 0.45, "grad_norm": 3.927368402481079, "learning_rate": 1.965323321075204e-05, "loss": 1.6692, "step": 34382 }, { "epoch": 0.45, "grad_norm": 3.616061210632324, "learning_rate": 1.9653205778460155e-05, "loss": 1.7519, "step": 34383 }, { "epoch": 0.45, "grad_norm": 4.26647424697876, "learning_rate": 1.965317834510239e-05, "loss": 2.2363, "step": 34384 }, { "epoch": 0.45, "grad_norm": 3.6549203395843506, "learning_rate": 1.9653150910678745e-05, "loss": 2.1348, "step": 34385 }, { "epoch": 0.45, "grad_norm": 3.365056037902832, "learning_rate": 1.9653123475189234e-05, "loss": 1.8149, "step": 34386 }, { "epoch": 0.45, "grad_norm": 3.805745840072632, "learning_rate": 1.9653096038633856e-05, "loss": 1.9169, "step": 34387 }, { "epoch": 0.45, "grad_norm": 4.07989501953125, "learning_rate": 1.965306860101261e-05, "loss": 2.2561, "step": 34388 }, { "epoch": 0.45, "grad_norm": 4.022636890411377, "learning_rate": 1.96530411623255e-05, "loss": 2.2027, "step": 34389 }, { "epoch": 0.45, "grad_norm": 3.667915105819702, "learning_rate": 1.9653013722572533e-05, "loss": 2.0879, "step": 34390 }, { "epoch": 0.45, "grad_norm": 3.292858123779297, "learning_rate": 1.9652986281753707e-05, "loss": 1.7755, "step": 34391 }, { "epoch": 0.45, "grad_norm": 3.798231363296509, "learning_rate": 1.9652958839869032e-05, "loss": 1.9199, "step": 34392 }, { "epoch": 0.45, "grad_norm": 3.8277549743652344, "learning_rate": 1.96529313969185e-05, "loss": 2.0815, "step": 34393 }, { "epoch": 0.45, "grad_norm": 3.7881462574005127, "learning_rate": 1.9652903952902126e-05, "loss": 1.6405, "step": 34394 }, { "epoch": 0.45, "grad_norm": 3.226048707962036, "learning_rate": 1.965287650781991e-05, "loss": 1.5615, "step": 34395 }, { "epoch": 0.45, "grad_norm": 3.47867488861084, "learning_rate": 1.9652849061671845e-05, "loss": 1.5787, "step": 34396 }, { "epoch": 0.45, "grad_norm": 3.874764919281006, "learning_rate": 1.9652821614457944e-05, "loss": 1.9775, "step": 34397 }, { "epoch": 0.45, "grad_norm": 3.2492120265960693, "learning_rate": 1.965279416617821e-05, "loss": 1.7714, "step": 34398 }, { "epoch": 0.45, "grad_norm": 3.4530415534973145, "learning_rate": 1.965276671683264e-05, "loss": 1.5961, "step": 34399 }, { "epoch": 0.45, "grad_norm": 3.43168568611145, "learning_rate": 1.9652739266421246e-05, "loss": 1.9667, "step": 34400 }, { "epoch": 0.45, "grad_norm": 3.920454740524292, "learning_rate": 1.9652711814944025e-05, "loss": 2.2137, "step": 34401 }, { "epoch": 0.45, "grad_norm": 3.5930020809173584, "learning_rate": 1.9652684362400975e-05, "loss": 2.0576, "step": 34402 }, { "epoch": 0.45, "grad_norm": 4.379016876220703, "learning_rate": 1.965265690879211e-05, "loss": 2.3265, "step": 34403 }, { "epoch": 0.45, "grad_norm": 3.4509336948394775, "learning_rate": 1.9652629454117428e-05, "loss": 1.7704, "step": 34404 }, { "epoch": 0.45, "grad_norm": 3.7390084266662598, "learning_rate": 1.965260199837693e-05, "loss": 1.5423, "step": 34405 }, { "epoch": 0.45, "grad_norm": 4.494701862335205, "learning_rate": 1.965257454157062e-05, "loss": 2.6221, "step": 34406 }, { "epoch": 0.45, "grad_norm": 4.190108299255371, "learning_rate": 1.9652547083698508e-05, "loss": 1.9529, "step": 34407 }, { "epoch": 0.45, "grad_norm": 3.8914148807525635, "learning_rate": 1.9652519624760586e-05, "loss": 2.3614, "step": 34408 }, { "epoch": 0.45, "grad_norm": 4.075996398925781, "learning_rate": 1.9652492164756863e-05, "loss": 2.0471, "step": 34409 }, { "epoch": 0.45, "grad_norm": 3.2046990394592285, "learning_rate": 1.965246470368734e-05, "loss": 1.4701, "step": 34410 }, { "epoch": 0.45, "grad_norm": 4.223062515258789, "learning_rate": 1.9652437241552024e-05, "loss": 2.1896, "step": 34411 }, { "epoch": 0.45, "grad_norm": 3.9435513019561768, "learning_rate": 1.9652409778350915e-05, "loss": 2.11, "step": 34412 }, { "epoch": 0.45, "grad_norm": 3.4739902019500732, "learning_rate": 1.9652382314084014e-05, "loss": 1.8075, "step": 34413 }, { "epoch": 0.45, "grad_norm": 4.344465255737305, "learning_rate": 1.9652354848751326e-05, "loss": 2.0625, "step": 34414 }, { "epoch": 0.45, "grad_norm": 3.8583388328552246, "learning_rate": 1.9652327382352853e-05, "loss": 2.2412, "step": 34415 }, { "epoch": 0.45, "grad_norm": 4.095873832702637, "learning_rate": 1.9652299914888603e-05, "loss": 2.2574, "step": 34416 }, { "epoch": 0.45, "grad_norm": 3.4338300228118896, "learning_rate": 1.9652272446358572e-05, "loss": 1.9609, "step": 34417 }, { "epoch": 0.45, "grad_norm": 3.943110466003418, "learning_rate": 1.965224497676277e-05, "loss": 2.2435, "step": 34418 }, { "epoch": 0.45, "grad_norm": 4.046151161193848, "learning_rate": 1.9652217506101194e-05, "loss": 2.0902, "step": 34419 }, { "epoch": 0.45, "grad_norm": 4.260056972503662, "learning_rate": 1.965219003437385e-05, "loss": 1.9827, "step": 34420 }, { "epoch": 0.45, "grad_norm": 3.8837692737579346, "learning_rate": 1.9652162561580737e-05, "loss": 2.193, "step": 34421 }, { "epoch": 0.45, "grad_norm": 3.9055967330932617, "learning_rate": 1.965213508772187e-05, "loss": 2.4737, "step": 34422 }, { "epoch": 0.45, "grad_norm": 3.848511219024658, "learning_rate": 1.9652107612797238e-05, "loss": 1.9931, "step": 34423 }, { "epoch": 0.45, "grad_norm": 3.717646360397339, "learning_rate": 1.965208013680685e-05, "loss": 2.4418, "step": 34424 }, { "epoch": 0.45, "grad_norm": 4.0409016609191895, "learning_rate": 1.9652052659750706e-05, "loss": 2.4129, "step": 34425 }, { "epoch": 0.45, "grad_norm": 3.815363645553589, "learning_rate": 1.9652025181628817e-05, "loss": 2.4591, "step": 34426 }, { "epoch": 0.45, "grad_norm": 3.5681653022766113, "learning_rate": 1.9651997702441177e-05, "loss": 2.1402, "step": 34427 }, { "epoch": 0.45, "grad_norm": 3.422912120819092, "learning_rate": 1.9651970222187793e-05, "loss": 1.6782, "step": 34428 }, { "epoch": 0.45, "grad_norm": 3.699883460998535, "learning_rate": 1.965194274086867e-05, "loss": 2.0604, "step": 34429 }, { "epoch": 0.45, "grad_norm": 3.601975679397583, "learning_rate": 1.9651915258483805e-05, "loss": 1.9047, "step": 34430 }, { "epoch": 0.45, "grad_norm": 4.3562397956848145, "learning_rate": 1.965188777503321e-05, "loss": 1.8187, "step": 34431 }, { "epoch": 0.45, "grad_norm": 4.104470252990723, "learning_rate": 1.9651860290516876e-05, "loss": 2.5159, "step": 34432 }, { "epoch": 0.45, "grad_norm": 4.022911071777344, "learning_rate": 1.9651832804934818e-05, "loss": 2.1196, "step": 34433 }, { "epoch": 0.45, "grad_norm": 3.5082168579101562, "learning_rate": 1.965180531828703e-05, "loss": 1.885, "step": 34434 }, { "epoch": 0.45, "grad_norm": 3.264007806777954, "learning_rate": 1.9651777830573526e-05, "loss": 1.8543, "step": 34435 }, { "epoch": 0.45, "grad_norm": 3.6746230125427246, "learning_rate": 1.9651750341794295e-05, "loss": 1.7939, "step": 34436 }, { "epoch": 0.45, "grad_norm": 3.5539143085479736, "learning_rate": 1.965172285194935e-05, "loss": 1.8327, "step": 34437 }, { "epoch": 0.45, "grad_norm": 4.361173629760742, "learning_rate": 1.9651695361038693e-05, "loss": 2.5167, "step": 34438 }, { "epoch": 0.45, "grad_norm": 3.8621578216552734, "learning_rate": 1.965166786906232e-05, "loss": 1.886, "step": 34439 }, { "epoch": 0.45, "grad_norm": 3.978829860687256, "learning_rate": 1.9651640376020246e-05, "loss": 2.3485, "step": 34440 }, { "epoch": 0.45, "grad_norm": 3.8590495586395264, "learning_rate": 1.965161288191246e-05, "loss": 2.1028, "step": 34441 }, { "epoch": 0.45, "grad_norm": 3.5438058376312256, "learning_rate": 1.9651585386738973e-05, "loss": 1.7628, "step": 34442 }, { "epoch": 0.45, "grad_norm": 4.332901954650879, "learning_rate": 1.965155789049979e-05, "loss": 1.973, "step": 34443 }, { "epoch": 0.45, "grad_norm": 3.7793052196502686, "learning_rate": 1.9651530393194912e-05, "loss": 1.7676, "step": 34444 }, { "epoch": 0.45, "grad_norm": 3.395192861557007, "learning_rate": 1.965150289482434e-05, "loss": 1.7374, "step": 34445 }, { "epoch": 0.45, "grad_norm": 3.712782144546509, "learning_rate": 1.965147539538808e-05, "loss": 1.665, "step": 34446 }, { "epoch": 0.45, "grad_norm": 3.748962640762329, "learning_rate": 1.965144789488613e-05, "loss": 2.5754, "step": 34447 }, { "epoch": 0.45, "grad_norm": 3.7566628456115723, "learning_rate": 1.96514203933185e-05, "loss": 1.8236, "step": 34448 }, { "epoch": 0.45, "grad_norm": 3.4981260299682617, "learning_rate": 1.9651392890685184e-05, "loss": 1.8091, "step": 34449 }, { "epoch": 0.45, "grad_norm": 3.409203052520752, "learning_rate": 1.9651365386986196e-05, "loss": 1.8638, "step": 34450 }, { "epoch": 0.45, "grad_norm": 3.9144718647003174, "learning_rate": 1.9651337882221532e-05, "loss": 1.8371, "step": 34451 }, { "epoch": 0.45, "grad_norm": 4.827498435974121, "learning_rate": 1.9651310376391196e-05, "loss": 2.4267, "step": 34452 }, { "epoch": 0.45, "grad_norm": 3.811605453491211, "learning_rate": 1.965128286949519e-05, "loss": 1.633, "step": 34453 }, { "epoch": 0.45, "grad_norm": 4.132449150085449, "learning_rate": 1.9651255361533523e-05, "loss": 2.521, "step": 34454 }, { "epoch": 0.45, "grad_norm": 3.9863932132720947, "learning_rate": 1.965122785250619e-05, "loss": 2.4108, "step": 34455 }, { "epoch": 0.45, "grad_norm": 3.828094005584717, "learning_rate": 1.96512003424132e-05, "loss": 1.678, "step": 34456 }, { "epoch": 0.45, "grad_norm": 3.8739941120147705, "learning_rate": 1.965117283125455e-05, "loss": 2.2158, "step": 34457 }, { "epoch": 0.45, "grad_norm": 4.039636611938477, "learning_rate": 1.9651145319030252e-05, "loss": 2.0986, "step": 34458 }, { "epoch": 0.45, "grad_norm": 4.3224358558654785, "learning_rate": 1.96511178057403e-05, "loss": 2.1045, "step": 34459 }, { "epoch": 0.45, "grad_norm": 3.3755240440368652, "learning_rate": 1.96510902913847e-05, "loss": 1.7838, "step": 34460 }, { "epoch": 0.45, "grad_norm": 3.7836999893188477, "learning_rate": 1.965106277596346e-05, "loss": 2.1605, "step": 34461 }, { "epoch": 0.45, "grad_norm": 3.7570323944091797, "learning_rate": 1.9651035259476578e-05, "loss": 2.3095, "step": 34462 }, { "epoch": 0.45, "grad_norm": 3.7033298015594482, "learning_rate": 1.9651007741924054e-05, "loss": 2.3833, "step": 34463 }, { "epoch": 0.45, "grad_norm": 4.325511932373047, "learning_rate": 1.96509802233059e-05, "loss": 2.3027, "step": 34464 }, { "epoch": 0.45, "grad_norm": 4.150051116943359, "learning_rate": 1.965095270362211e-05, "loss": 2.615, "step": 34465 }, { "epoch": 0.45, "grad_norm": 4.175577163696289, "learning_rate": 1.9650925182872692e-05, "loss": 1.7679, "step": 34466 }, { "epoch": 0.45, "grad_norm": 3.581608533859253, "learning_rate": 1.965089766105765e-05, "loss": 1.6778, "step": 34467 }, { "epoch": 0.45, "grad_norm": 3.9042322635650635, "learning_rate": 1.9650870138176986e-05, "loss": 2.0713, "step": 34468 }, { "epoch": 0.45, "grad_norm": 3.883755683898926, "learning_rate": 1.9650842614230702e-05, "loss": 2.0251, "step": 34469 }, { "epoch": 0.45, "grad_norm": 3.6961636543273926, "learning_rate": 1.9650815089218796e-05, "loss": 1.8684, "step": 34470 }, { "epoch": 0.45, "grad_norm": 4.0886406898498535, "learning_rate": 1.965078756314128e-05, "loss": 2.0797, "step": 34471 }, { "epoch": 0.45, "grad_norm": 3.428420066833496, "learning_rate": 1.9650760035998154e-05, "loss": 1.9581, "step": 34472 }, { "epoch": 0.45, "grad_norm": 4.040868282318115, "learning_rate": 1.965073250778942e-05, "loss": 2.3116, "step": 34473 }, { "epoch": 0.45, "grad_norm": 3.768503427505493, "learning_rate": 1.965070497851508e-05, "loss": 2.311, "step": 34474 }, { "epoch": 0.45, "grad_norm": 4.03316593170166, "learning_rate": 1.965067744817514e-05, "loss": 2.0527, "step": 34475 }, { "epoch": 0.45, "grad_norm": 3.2899012565612793, "learning_rate": 1.9650649916769602e-05, "loss": 1.5262, "step": 34476 }, { "epoch": 0.45, "grad_norm": 3.2992711067199707, "learning_rate": 1.9650622384298467e-05, "loss": 1.5147, "step": 34477 }, { "epoch": 0.45, "grad_norm": 4.3777756690979, "learning_rate": 1.965059485076174e-05, "loss": 2.4217, "step": 34478 }, { "epoch": 0.45, "grad_norm": 4.161360263824463, "learning_rate": 1.9650567316159423e-05, "loss": 1.9241, "step": 34479 }, { "epoch": 0.45, "grad_norm": 3.633735418319702, "learning_rate": 1.965053978049152e-05, "loss": 1.9998, "step": 34480 }, { "epoch": 0.45, "grad_norm": 4.4094767570495605, "learning_rate": 1.9650512243758034e-05, "loss": 1.7866, "step": 34481 }, { "epoch": 0.45, "grad_norm": 3.4975011348724365, "learning_rate": 1.965048470595897e-05, "loss": 1.6858, "step": 34482 }, { "epoch": 0.45, "grad_norm": 4.171503067016602, "learning_rate": 1.9650457167094325e-05, "loss": 2.0227, "step": 34483 }, { "epoch": 0.45, "grad_norm": 3.7260303497314453, "learning_rate": 1.9650429627164106e-05, "loss": 1.9992, "step": 34484 }, { "epoch": 0.45, "grad_norm": 3.8859405517578125, "learning_rate": 1.965040208616832e-05, "loss": 2.261, "step": 34485 }, { "epoch": 0.45, "grad_norm": 3.7443432807922363, "learning_rate": 1.965037454410696e-05, "loss": 2.0761, "step": 34486 }, { "epoch": 0.45, "grad_norm": 3.7378809452056885, "learning_rate": 1.965034700098004e-05, "loss": 1.7865, "step": 34487 }, { "epoch": 0.45, "grad_norm": 4.110134601593018, "learning_rate": 1.9650319456787558e-05, "loss": 2.3706, "step": 34488 }, { "epoch": 0.45, "grad_norm": 3.5334224700927734, "learning_rate": 1.9650291911529515e-05, "loss": 1.5555, "step": 34489 }, { "epoch": 0.45, "grad_norm": 3.4368245601654053, "learning_rate": 1.9650264365205914e-05, "loss": 1.809, "step": 34490 }, { "epoch": 0.45, "grad_norm": 3.3201773166656494, "learning_rate": 1.965023681781676e-05, "loss": 1.8076, "step": 34491 }, { "epoch": 0.45, "grad_norm": 3.739823341369629, "learning_rate": 1.9650209269362063e-05, "loss": 1.744, "step": 34492 }, { "epoch": 0.45, "grad_norm": 3.5420680046081543, "learning_rate": 1.965018171984181e-05, "loss": 1.8652, "step": 34493 }, { "epoch": 0.45, "grad_norm": 3.608273983001709, "learning_rate": 1.965015416925602e-05, "loss": 2.2219, "step": 34494 }, { "epoch": 0.45, "grad_norm": 4.518725395202637, "learning_rate": 1.9650126617604686e-05, "loss": 2.5629, "step": 34495 }, { "epoch": 0.45, "grad_norm": 3.8676810264587402, "learning_rate": 1.9650099064887817e-05, "loss": 1.8024, "step": 34496 }, { "epoch": 0.45, "grad_norm": 4.029550552368164, "learning_rate": 1.965007151110541e-05, "loss": 1.8767, "step": 34497 }, { "epoch": 0.45, "grad_norm": 3.991898536682129, "learning_rate": 1.9650043956257474e-05, "loss": 2.029, "step": 34498 }, { "epoch": 0.45, "grad_norm": 3.1897358894348145, "learning_rate": 1.9650016400344007e-05, "loss": 1.5568, "step": 34499 }, { "epoch": 0.45, "grad_norm": 4.107812881469727, "learning_rate": 1.9649988843365014e-05, "loss": 2.3648, "step": 34500 }, { "epoch": 0.45, "grad_norm": 3.7035727500915527, "learning_rate": 1.9649961285320504e-05, "loss": 1.8785, "step": 34501 }, { "epoch": 0.45, "grad_norm": 4.0359673500061035, "learning_rate": 1.9649933726210467e-05, "loss": 2.4531, "step": 34502 }, { "epoch": 0.45, "grad_norm": 3.746974229812622, "learning_rate": 1.964990616603492e-05, "loss": 1.9861, "step": 34503 }, { "epoch": 0.45, "grad_norm": 3.4866514205932617, "learning_rate": 1.9649878604793857e-05, "loss": 1.9258, "step": 34504 }, { "epoch": 0.45, "grad_norm": 4.39779806137085, "learning_rate": 1.9649851042487283e-05, "loss": 2.3542, "step": 34505 }, { "epoch": 0.45, "grad_norm": 3.8676934242248535, "learning_rate": 1.9649823479115204e-05, "loss": 2.0856, "step": 34506 }, { "epoch": 0.45, "grad_norm": 3.8604941368103027, "learning_rate": 1.9649795914677618e-05, "loss": 2.0714, "step": 34507 }, { "epoch": 0.45, "grad_norm": 3.950031280517578, "learning_rate": 1.9649768349174532e-05, "loss": 2.1925, "step": 34508 }, { "epoch": 0.45, "grad_norm": 3.8409383296966553, "learning_rate": 1.964974078260595e-05, "loss": 2.0371, "step": 34509 }, { "epoch": 0.45, "grad_norm": 3.935208320617676, "learning_rate": 1.964971321497187e-05, "loss": 2.1317, "step": 34510 }, { "epoch": 0.45, "grad_norm": 3.4989781379699707, "learning_rate": 1.96496856462723e-05, "loss": 1.5438, "step": 34511 }, { "epoch": 0.45, "grad_norm": 3.466151237487793, "learning_rate": 1.964965807650724e-05, "loss": 1.8643, "step": 34512 }, { "epoch": 0.45, "grad_norm": 4.2091965675354, "learning_rate": 1.9649630505676692e-05, "loss": 2.2311, "step": 34513 }, { "epoch": 0.45, "grad_norm": 3.5917251110076904, "learning_rate": 1.9649602933780666e-05, "loss": 1.5818, "step": 34514 }, { "epoch": 0.45, "grad_norm": 3.2252628803253174, "learning_rate": 1.9649575360819157e-05, "loss": 1.4859, "step": 34515 }, { "epoch": 0.45, "grad_norm": 4.141012668609619, "learning_rate": 1.9649547786792168e-05, "loss": 2.4486, "step": 34516 }, { "epoch": 0.45, "grad_norm": 3.733405351638794, "learning_rate": 1.964952021169971e-05, "loss": 2.0881, "step": 34517 }, { "epoch": 0.45, "grad_norm": 3.576159954071045, "learning_rate": 1.964949263554178e-05, "loss": 1.8431, "step": 34518 }, { "epoch": 0.45, "grad_norm": 3.418846845626831, "learning_rate": 1.964946505831838e-05, "loss": 1.6406, "step": 34519 }, { "epoch": 0.45, "grad_norm": 4.484021186828613, "learning_rate": 1.9649437480029522e-05, "loss": 2.6046, "step": 34520 }, { "epoch": 0.45, "grad_norm": 3.3221845626831055, "learning_rate": 1.9649409900675195e-05, "loss": 1.6947, "step": 34521 }, { "epoch": 0.45, "grad_norm": 3.7151660919189453, "learning_rate": 1.964938232025541e-05, "loss": 1.9078, "step": 34522 }, { "epoch": 0.45, "grad_norm": 3.867349863052368, "learning_rate": 1.9649354738770176e-05, "loss": 1.854, "step": 34523 }, { "epoch": 0.45, "grad_norm": 4.363142490386963, "learning_rate": 1.964932715621948e-05, "loss": 2.3849, "step": 34524 }, { "epoch": 0.45, "grad_norm": 3.671391725540161, "learning_rate": 1.9649299572603345e-05, "loss": 1.8543, "step": 34525 }, { "epoch": 0.45, "grad_norm": 3.74627947807312, "learning_rate": 1.9649271987921756e-05, "loss": 1.9826, "step": 34526 }, { "epoch": 0.45, "grad_norm": 4.660738945007324, "learning_rate": 1.9649244402174728e-05, "loss": 2.6773, "step": 34527 }, { "epoch": 0.45, "grad_norm": 3.4066359996795654, "learning_rate": 1.964921681536226e-05, "loss": 1.7961, "step": 34528 }, { "epoch": 0.45, "grad_norm": 3.9224236011505127, "learning_rate": 1.964918922748435e-05, "loss": 1.8419, "step": 34529 }, { "epoch": 0.45, "grad_norm": 4.107235908508301, "learning_rate": 1.9649161638541007e-05, "loss": 2.0933, "step": 34530 }, { "epoch": 0.45, "grad_norm": 4.012345790863037, "learning_rate": 1.9649134048532238e-05, "loss": 2.0046, "step": 34531 }, { "epoch": 0.45, "grad_norm": 4.048259735107422, "learning_rate": 1.9649106457458034e-05, "loss": 2.3616, "step": 34532 }, { "epoch": 0.45, "grad_norm": 3.867879867553711, "learning_rate": 1.964907886531841e-05, "loss": 2.278, "step": 34533 }, { "epoch": 0.45, "grad_norm": 3.8106331825256348, "learning_rate": 1.9649051272113363e-05, "loss": 2.0553, "step": 34534 }, { "epoch": 0.45, "grad_norm": 4.252024173736572, "learning_rate": 1.9649023677842894e-05, "loss": 2.0834, "step": 34535 }, { "epoch": 0.45, "grad_norm": 4.385288238525391, "learning_rate": 1.964899608250701e-05, "loss": 2.2254, "step": 34536 }, { "epoch": 0.45, "grad_norm": 3.636815071105957, "learning_rate": 1.9648968486105717e-05, "loss": 2.4142, "step": 34537 }, { "epoch": 0.45, "grad_norm": 4.0013885498046875, "learning_rate": 1.964894088863901e-05, "loss": 1.969, "step": 34538 }, { "epoch": 0.45, "grad_norm": 3.528445243835449, "learning_rate": 1.96489132901069e-05, "loss": 1.7827, "step": 34539 }, { "epoch": 0.45, "grad_norm": 3.896268129348755, "learning_rate": 1.9648885690509383e-05, "loss": 2.2073, "step": 34540 }, { "epoch": 0.45, "grad_norm": 3.6822071075439453, "learning_rate": 1.9648858089846468e-05, "loss": 1.8297, "step": 34541 }, { "epoch": 0.45, "grad_norm": 3.6238811016082764, "learning_rate": 1.964883048811815e-05, "loss": 2.1147, "step": 34542 }, { "epoch": 0.45, "grad_norm": 3.9061572551727295, "learning_rate": 1.9648802885324443e-05, "loss": 1.7764, "step": 34543 }, { "epoch": 0.45, "grad_norm": 4.245691776275635, "learning_rate": 1.964877528146534e-05, "loss": 1.9274, "step": 34544 }, { "epoch": 0.45, "grad_norm": 3.5123298168182373, "learning_rate": 1.9648747676540854e-05, "loss": 1.7094, "step": 34545 }, { "epoch": 0.45, "grad_norm": 3.5521116256713867, "learning_rate": 1.964872007055098e-05, "loss": 2.0911, "step": 34546 }, { "epoch": 0.45, "grad_norm": 3.4820969104766846, "learning_rate": 1.9648692463495723e-05, "loss": 1.6426, "step": 34547 }, { "epoch": 0.45, "grad_norm": 3.941549062728882, "learning_rate": 1.964866485537509e-05, "loss": 1.7269, "step": 34548 }, { "epoch": 0.45, "grad_norm": 3.8214125633239746, "learning_rate": 1.9648637246189074e-05, "loss": 2.4327, "step": 34549 }, { "epoch": 0.45, "grad_norm": 3.8556196689605713, "learning_rate": 1.964860963593769e-05, "loss": 2.1306, "step": 34550 }, { "epoch": 0.45, "grad_norm": 4.231734275817871, "learning_rate": 1.9648582024620935e-05, "loss": 1.9469, "step": 34551 }, { "epoch": 0.45, "grad_norm": 3.3699052333831787, "learning_rate": 1.964855441223881e-05, "loss": 1.7809, "step": 34552 }, { "epoch": 0.45, "grad_norm": 3.7173030376434326, "learning_rate": 1.9648526798791322e-05, "loss": 1.9094, "step": 34553 }, { "epoch": 0.45, "grad_norm": 3.115168571472168, "learning_rate": 1.964849918427848e-05, "loss": 1.5489, "step": 34554 }, { "epoch": 0.45, "grad_norm": 4.2237629890441895, "learning_rate": 1.9648471568700273e-05, "loss": 2.3937, "step": 34555 }, { "epoch": 0.45, "grad_norm": 3.5707907676696777, "learning_rate": 1.964844395205671e-05, "loss": 1.9139, "step": 34556 }, { "epoch": 0.45, "grad_norm": 3.19372296333313, "learning_rate": 1.9648416334347798e-05, "loss": 1.6843, "step": 34557 }, { "epoch": 0.45, "grad_norm": 3.7204604148864746, "learning_rate": 1.9648388715573536e-05, "loss": 2.0078, "step": 34558 }, { "epoch": 0.45, "grad_norm": 3.3556222915649414, "learning_rate": 1.9648361095733927e-05, "loss": 1.7117, "step": 34559 }, { "epoch": 0.45, "grad_norm": 3.7277414798736572, "learning_rate": 1.9648333474828977e-05, "loss": 2.328, "step": 34560 }, { "epoch": 0.45, "grad_norm": 3.6585917472839355, "learning_rate": 1.9648305852858687e-05, "loss": 1.7572, "step": 34561 }, { "epoch": 0.45, "grad_norm": 3.7608113288879395, "learning_rate": 1.964827822982306e-05, "loss": 2.0604, "step": 34562 }, { "epoch": 0.45, "grad_norm": 3.3955438137054443, "learning_rate": 1.96482506057221e-05, "loss": 1.5464, "step": 34563 }, { "epoch": 0.45, "grad_norm": 4.006420135498047, "learning_rate": 1.9648222980555812e-05, "loss": 1.9481, "step": 34564 }, { "epoch": 0.45, "grad_norm": 3.966813802719116, "learning_rate": 1.964819535432419e-05, "loss": 1.7722, "step": 34565 }, { "epoch": 0.45, "grad_norm": 3.5714123249053955, "learning_rate": 1.964816772702725e-05, "loss": 1.7174, "step": 34566 }, { "epoch": 0.45, "grad_norm": 3.6566669940948486, "learning_rate": 1.9648140098664985e-05, "loss": 2.1738, "step": 34567 }, { "epoch": 0.45, "grad_norm": 3.8018226623535156, "learning_rate": 1.9648112469237403e-05, "loss": 2.1495, "step": 34568 }, { "epoch": 0.45, "grad_norm": 4.6837944984436035, "learning_rate": 1.9648084838744505e-05, "loss": 2.3504, "step": 34569 }, { "epoch": 0.45, "grad_norm": 3.764986276626587, "learning_rate": 1.9648057207186297e-05, "loss": 2.222, "step": 34570 }, { "epoch": 0.45, "grad_norm": 4.001017093658447, "learning_rate": 1.964802957456278e-05, "loss": 1.5107, "step": 34571 }, { "epoch": 0.45, "grad_norm": 3.482722520828247, "learning_rate": 1.964800194087395e-05, "loss": 2.1204, "step": 34572 }, { "epoch": 0.45, "grad_norm": 3.6669528484344482, "learning_rate": 1.9647974306119822e-05, "loss": 1.9601, "step": 34573 }, { "epoch": 0.45, "grad_norm": 3.608978271484375, "learning_rate": 1.964794667030039e-05, "loss": 1.316, "step": 34574 }, { "epoch": 0.45, "grad_norm": 4.462009429931641, "learning_rate": 1.9647919033415667e-05, "loss": 2.8145, "step": 34575 }, { "epoch": 0.45, "grad_norm": 3.5171077251434326, "learning_rate": 1.9647891395465647e-05, "loss": 2.27, "step": 34576 }, { "epoch": 0.45, "grad_norm": 3.999211072921753, "learning_rate": 1.9647863756450337e-05, "loss": 2.2455, "step": 34577 }, { "epoch": 0.45, "grad_norm": 3.9970977306365967, "learning_rate": 1.9647836116369737e-05, "loss": 2.4397, "step": 34578 }, { "epoch": 0.45, "grad_norm": 3.877642869949341, "learning_rate": 1.9647808475223858e-05, "loss": 1.8031, "step": 34579 }, { "epoch": 0.45, "grad_norm": 4.25145959854126, "learning_rate": 1.9647780833012692e-05, "loss": 2.1171, "step": 34580 }, { "epoch": 0.45, "grad_norm": 3.574964761734009, "learning_rate": 1.964775318973625e-05, "loss": 1.9839, "step": 34581 }, { "epoch": 0.45, "grad_norm": 3.7083709239959717, "learning_rate": 1.964772554539453e-05, "loss": 1.9263, "step": 34582 }, { "epoch": 0.45, "grad_norm": 3.6109519004821777, "learning_rate": 1.964769789998754e-05, "loss": 1.9877, "step": 34583 }, { "epoch": 0.45, "grad_norm": 4.324432849884033, "learning_rate": 1.964767025351528e-05, "loss": 2.3031, "step": 34584 }, { "epoch": 0.45, "grad_norm": 3.8320257663726807, "learning_rate": 1.964764260597775e-05, "loss": 1.6479, "step": 34585 }, { "epoch": 0.45, "grad_norm": 4.210712432861328, "learning_rate": 1.964761495737496e-05, "loss": 2.1491, "step": 34586 }, { "epoch": 0.45, "grad_norm": 4.316822528839111, "learning_rate": 1.964758730770691e-05, "loss": 1.9334, "step": 34587 }, { "epoch": 0.45, "grad_norm": 4.669079303741455, "learning_rate": 1.96475596569736e-05, "loss": 2.147, "step": 34588 }, { "epoch": 0.45, "grad_norm": 4.003279209136963, "learning_rate": 1.9647532005175035e-05, "loss": 1.7963, "step": 34589 }, { "epoch": 0.45, "grad_norm": 3.435123920440674, "learning_rate": 1.964750435231122e-05, "loss": 1.7516, "step": 34590 }, { "epoch": 0.45, "grad_norm": 4.5801167488098145, "learning_rate": 1.964747669838216e-05, "loss": 2.5107, "step": 34591 }, { "epoch": 0.45, "grad_norm": 3.8224997520446777, "learning_rate": 1.9647449043387855e-05, "loss": 2.0113, "step": 34592 }, { "epoch": 0.45, "grad_norm": 3.5516159534454346, "learning_rate": 1.9647421387328304e-05, "loss": 1.9453, "step": 34593 }, { "epoch": 0.45, "grad_norm": 3.7994768619537354, "learning_rate": 1.9647393730203514e-05, "loss": 1.8153, "step": 34594 }, { "epoch": 0.45, "grad_norm": 3.4978833198547363, "learning_rate": 1.9647366072013492e-05, "loss": 1.7754, "step": 34595 }, { "epoch": 0.45, "grad_norm": 3.8405494689941406, "learning_rate": 1.964733841275823e-05, "loss": 1.9545, "step": 34596 }, { "epoch": 0.45, "grad_norm": 3.488555669784546, "learning_rate": 1.9647310752437746e-05, "loss": 1.7988, "step": 34597 }, { "epoch": 0.45, "grad_norm": 4.207651615142822, "learning_rate": 1.964728309105203e-05, "loss": 1.9777, "step": 34598 }, { "epoch": 0.45, "grad_norm": 3.914438009262085, "learning_rate": 1.9647255428601095e-05, "loss": 1.9241, "step": 34599 }, { "epoch": 0.45, "grad_norm": 3.160022497177124, "learning_rate": 1.9647227765084937e-05, "loss": 1.6455, "step": 34600 }, { "epoch": 0.45, "grad_norm": 4.03400993347168, "learning_rate": 1.964720010050356e-05, "loss": 1.9465, "step": 34601 }, { "epoch": 0.45, "grad_norm": 3.7075390815734863, "learning_rate": 1.9647172434856968e-05, "loss": 2.0465, "step": 34602 }, { "epoch": 0.45, "grad_norm": 3.6131958961486816, "learning_rate": 1.9647144768145168e-05, "loss": 1.9311, "step": 34603 }, { "epoch": 0.45, "grad_norm": 3.8249549865722656, "learning_rate": 1.964711710036816e-05, "loss": 2.1562, "step": 34604 }, { "epoch": 0.45, "grad_norm": 3.552123546600342, "learning_rate": 1.9647089431525943e-05, "loss": 1.7488, "step": 34605 }, { "epoch": 0.45, "grad_norm": 4.929903030395508, "learning_rate": 1.9647061761618525e-05, "loss": 2.4416, "step": 34606 }, { "epoch": 0.45, "grad_norm": 3.8527157306671143, "learning_rate": 1.9647034090645908e-05, "loss": 1.8772, "step": 34607 }, { "epoch": 0.45, "grad_norm": 3.7480194568634033, "learning_rate": 1.9647006418608094e-05, "loss": 1.8894, "step": 34608 }, { "epoch": 0.45, "grad_norm": 4.775260925292969, "learning_rate": 1.9646978745505088e-05, "loss": 2.8142, "step": 34609 }, { "epoch": 0.45, "grad_norm": 3.7560298442840576, "learning_rate": 1.964695107133689e-05, "loss": 1.7198, "step": 34610 }, { "epoch": 0.45, "grad_norm": 3.5630178451538086, "learning_rate": 1.9646923396103505e-05, "loss": 1.4737, "step": 34611 }, { "epoch": 0.45, "grad_norm": 4.202054023742676, "learning_rate": 1.9646895719804936e-05, "loss": 2.3298, "step": 34612 }, { "epoch": 0.45, "grad_norm": 3.767136573791504, "learning_rate": 1.9646868042441188e-05, "loss": 1.9489, "step": 34613 }, { "epoch": 0.45, "grad_norm": 4.273629665374756, "learning_rate": 1.9646840364012262e-05, "loss": 2.1324, "step": 34614 }, { "epoch": 0.45, "grad_norm": 4.00416374206543, "learning_rate": 1.9646812684518163e-05, "loss": 2.0594, "step": 34615 }, { "epoch": 0.45, "grad_norm": 3.697252035140991, "learning_rate": 1.9646785003958885e-05, "loss": 1.9059, "step": 34616 }, { "epoch": 0.45, "grad_norm": 4.302260875701904, "learning_rate": 1.9646757322334443e-05, "loss": 2.4876, "step": 34617 }, { "epoch": 0.45, "grad_norm": 4.048801898956299, "learning_rate": 1.9646729639644837e-05, "loss": 1.9883, "step": 34618 }, { "epoch": 0.45, "grad_norm": 3.9018259048461914, "learning_rate": 1.964670195589007e-05, "loss": 2.0908, "step": 34619 }, { "epoch": 0.45, "grad_norm": 3.5409412384033203, "learning_rate": 1.964667427107014e-05, "loss": 2.2114, "step": 34620 }, { "epoch": 0.45, "grad_norm": 3.9448423385620117, "learning_rate": 1.9646646585185053e-05, "loss": 1.9955, "step": 34621 }, { "epoch": 0.45, "grad_norm": 3.5515003204345703, "learning_rate": 1.964661889823481e-05, "loss": 1.803, "step": 34622 }, { "epoch": 0.45, "grad_norm": 3.9158895015716553, "learning_rate": 1.964659121021942e-05, "loss": 1.9531, "step": 34623 }, { "epoch": 0.45, "grad_norm": 4.071302890777588, "learning_rate": 1.9646563521138882e-05, "loss": 2.2942, "step": 34624 }, { "epoch": 0.45, "grad_norm": 3.5465691089630127, "learning_rate": 1.96465358309932e-05, "loss": 1.9506, "step": 34625 }, { "epoch": 0.45, "grad_norm": 3.9954733848571777, "learning_rate": 1.9646508139782375e-05, "loss": 2.0179, "step": 34626 }, { "epoch": 0.45, "grad_norm": 3.8525333404541016, "learning_rate": 1.9646480447506416e-05, "loss": 1.6351, "step": 34627 }, { "epoch": 0.45, "grad_norm": 3.483332633972168, "learning_rate": 1.964645275416532e-05, "loss": 1.8881, "step": 34628 }, { "epoch": 0.45, "grad_norm": 3.8204827308654785, "learning_rate": 1.9646425059759093e-05, "loss": 2.2909, "step": 34629 }, { "epoch": 0.45, "grad_norm": 3.6972696781158447, "learning_rate": 1.9646397364287736e-05, "loss": 1.5975, "step": 34630 }, { "epoch": 0.45, "grad_norm": 3.955383062362671, "learning_rate": 1.9646369667751252e-05, "loss": 2.1887, "step": 34631 }, { "epoch": 0.45, "grad_norm": 3.5933525562286377, "learning_rate": 1.9646341970149644e-05, "loss": 1.8216, "step": 34632 }, { "epoch": 0.45, "grad_norm": 3.8952088356018066, "learning_rate": 1.9646314271482916e-05, "loss": 1.9431, "step": 34633 }, { "epoch": 0.45, "grad_norm": 4.067311763763428, "learning_rate": 1.9646286571751075e-05, "loss": 2.1808, "step": 34634 }, { "epoch": 0.45, "grad_norm": 3.8444724082946777, "learning_rate": 1.964625887095412e-05, "loss": 2.1043, "step": 34635 }, { "epoch": 0.45, "grad_norm": 4.1422953605651855, "learning_rate": 1.9646231169092052e-05, "loss": 2.1906, "step": 34636 }, { "epoch": 0.45, "grad_norm": 4.4354424476623535, "learning_rate": 1.9646203466164877e-05, "loss": 2.5233, "step": 34637 }, { "epoch": 0.45, "grad_norm": 3.66611909866333, "learning_rate": 1.96461757621726e-05, "loss": 1.8427, "step": 34638 }, { "epoch": 0.45, "grad_norm": 3.3085970878601074, "learning_rate": 1.9646148057115218e-05, "loss": 1.8263, "step": 34639 }, { "epoch": 0.45, "grad_norm": 3.965407609939575, "learning_rate": 1.964612035099274e-05, "loss": 2.0293, "step": 34640 }, { "epoch": 0.45, "grad_norm": 3.488703966140747, "learning_rate": 1.9646092643805165e-05, "loss": 1.6199, "step": 34641 }, { "epoch": 0.45, "grad_norm": 3.969336986541748, "learning_rate": 1.96460649355525e-05, "loss": 2.0663, "step": 34642 }, { "epoch": 0.45, "grad_norm": 3.373528003692627, "learning_rate": 1.9646037226234744e-05, "loss": 1.62, "step": 34643 }, { "epoch": 0.45, "grad_norm": 3.5037267208099365, "learning_rate": 1.9646009515851904e-05, "loss": 1.5482, "step": 34644 }, { "epoch": 0.45, "grad_norm": 3.8570473194122314, "learning_rate": 1.9645981804403974e-05, "loss": 2.0327, "step": 34645 }, { "epoch": 0.45, "grad_norm": 3.7968647480010986, "learning_rate": 1.964595409189097e-05, "loss": 2.2511, "step": 34646 }, { "epoch": 0.45, "grad_norm": 3.6336281299591064, "learning_rate": 1.964592637831289e-05, "loss": 2.1894, "step": 34647 }, { "epoch": 0.45, "grad_norm": 3.965433359146118, "learning_rate": 1.9645898663669734e-05, "loss": 2.1416, "step": 34648 }, { "epoch": 0.45, "grad_norm": 3.9211628437042236, "learning_rate": 1.9645870947961506e-05, "loss": 1.8639, "step": 34649 }, { "epoch": 0.45, "grad_norm": 3.9960057735443115, "learning_rate": 1.9645843231188212e-05, "loss": 2.1766, "step": 34650 }, { "epoch": 0.45, "grad_norm": 3.6564700603485107, "learning_rate": 1.9645815513349852e-05, "loss": 1.6738, "step": 34651 }, { "epoch": 0.45, "grad_norm": 3.5369114875793457, "learning_rate": 1.964578779444643e-05, "loss": 1.7652, "step": 34652 }, { "epoch": 0.45, "grad_norm": 4.231290817260742, "learning_rate": 1.9645760074477952e-05, "loss": 2.2924, "step": 34653 }, { "epoch": 0.45, "grad_norm": 3.6757373809814453, "learning_rate": 1.9645732353444414e-05, "loss": 2.0059, "step": 34654 }, { "epoch": 0.45, "grad_norm": 4.385603904724121, "learning_rate": 1.964570463134583e-05, "loss": 2.5051, "step": 34655 }, { "epoch": 0.45, "grad_norm": 4.310800552368164, "learning_rate": 1.9645676908182192e-05, "loss": 2.1424, "step": 34656 }, { "epoch": 0.45, "grad_norm": 3.7555723190307617, "learning_rate": 1.9645649183953507e-05, "loss": 1.835, "step": 34657 }, { "epoch": 0.45, "grad_norm": 3.909409761428833, "learning_rate": 1.964562145865978e-05, "loss": 2.2226, "step": 34658 }, { "epoch": 0.45, "grad_norm": 4.334295272827148, "learning_rate": 1.9645593732301014e-05, "loss": 1.8985, "step": 34659 }, { "epoch": 0.45, "grad_norm": 3.1311535835266113, "learning_rate": 1.964556600487721e-05, "loss": 1.6442, "step": 34660 }, { "epoch": 0.45, "grad_norm": 3.7837796211242676, "learning_rate": 1.9645538276388373e-05, "loss": 2.0132, "step": 34661 }, { "epoch": 0.45, "grad_norm": 3.705784320831299, "learning_rate": 1.96455105468345e-05, "loss": 1.9636, "step": 34662 }, { "epoch": 0.45, "grad_norm": 3.7645108699798584, "learning_rate": 1.9645482816215603e-05, "loss": 2.3966, "step": 34663 }, { "epoch": 0.45, "grad_norm": 3.4962093830108643, "learning_rate": 1.9645455084531683e-05, "loss": 1.7072, "step": 34664 }, { "epoch": 0.45, "grad_norm": 3.8094632625579834, "learning_rate": 1.9645427351782738e-05, "loss": 2.165, "step": 34665 }, { "epoch": 0.45, "grad_norm": 4.130315780639648, "learning_rate": 1.9645399617968774e-05, "loss": 2.1302, "step": 34666 }, { "epoch": 0.45, "grad_norm": 3.742450714111328, "learning_rate": 1.9645371883089795e-05, "loss": 1.9378, "step": 34667 }, { "epoch": 0.45, "grad_norm": 3.613269805908203, "learning_rate": 1.9645344147145805e-05, "loss": 2.0924, "step": 34668 }, { "epoch": 0.45, "grad_norm": 3.562267541885376, "learning_rate": 1.9645316410136802e-05, "loss": 1.7528, "step": 34669 }, { "epoch": 0.45, "grad_norm": 3.760833501815796, "learning_rate": 1.9645288672062795e-05, "loss": 1.9732, "step": 34670 }, { "epoch": 0.45, "grad_norm": 3.6554722785949707, "learning_rate": 1.9645260932923786e-05, "loss": 1.8411, "step": 34671 }, { "epoch": 0.45, "grad_norm": 3.2945823669433594, "learning_rate": 1.9645233192719772e-05, "loss": 1.8141, "step": 34672 }, { "epoch": 0.45, "grad_norm": 3.4947240352630615, "learning_rate": 1.9645205451450763e-05, "loss": 1.7252, "step": 34673 }, { "epoch": 0.45, "grad_norm": 3.7428500652313232, "learning_rate": 1.964517770911676e-05, "loss": 2.138, "step": 34674 }, { "epoch": 0.45, "grad_norm": 4.344574928283691, "learning_rate": 1.9645149965717767e-05, "loss": 2.0323, "step": 34675 }, { "epoch": 0.45, "grad_norm": 3.5316872596740723, "learning_rate": 1.9645122221253784e-05, "loss": 1.585, "step": 34676 }, { "epoch": 0.45, "grad_norm": 4.011481761932373, "learning_rate": 1.9645094475724816e-05, "loss": 2.2672, "step": 34677 }, { "epoch": 0.45, "grad_norm": 3.3551459312438965, "learning_rate": 1.9645066729130863e-05, "loss": 1.6435, "step": 34678 }, { "epoch": 0.45, "grad_norm": 3.703159809112549, "learning_rate": 1.9645038981471936e-05, "loss": 1.6187, "step": 34679 }, { "epoch": 0.45, "grad_norm": 4.502884864807129, "learning_rate": 1.964501123274803e-05, "loss": 2.4657, "step": 34680 }, { "epoch": 0.45, "grad_norm": 3.7324063777923584, "learning_rate": 1.964498348295915e-05, "loss": 2.363, "step": 34681 }, { "epoch": 0.45, "grad_norm": 3.4477128982543945, "learning_rate": 1.9644955732105307e-05, "loss": 1.8891, "step": 34682 }, { "epoch": 0.45, "grad_norm": 3.5766146183013916, "learning_rate": 1.964492798018649e-05, "loss": 1.7411, "step": 34683 }, { "epoch": 0.45, "grad_norm": 3.5297904014587402, "learning_rate": 1.9644900227202713e-05, "loss": 1.5749, "step": 34684 }, { "epoch": 0.45, "grad_norm": 3.352215051651001, "learning_rate": 1.9644872473153972e-05, "loss": 2.0178, "step": 34685 }, { "epoch": 0.45, "grad_norm": 4.070165634155273, "learning_rate": 1.9644844718040277e-05, "loss": 2.0199, "step": 34686 }, { "epoch": 0.45, "grad_norm": 3.4863064289093018, "learning_rate": 1.964481696186163e-05, "loss": 2.0395, "step": 34687 }, { "epoch": 0.45, "grad_norm": 4.324067115783691, "learning_rate": 1.9644789204618025e-05, "loss": 2.183, "step": 34688 }, { "epoch": 0.45, "grad_norm": 3.3373117446899414, "learning_rate": 1.9644761446309475e-05, "loss": 1.6415, "step": 34689 }, { "epoch": 0.45, "grad_norm": 3.490370750427246, "learning_rate": 1.964473368693598e-05, "loss": 1.644, "step": 34690 }, { "epoch": 0.45, "grad_norm": 3.8586182594299316, "learning_rate": 1.9644705926497542e-05, "loss": 1.7795, "step": 34691 }, { "epoch": 0.45, "grad_norm": 3.534686326980591, "learning_rate": 1.9644678164994163e-05, "loss": 1.8148, "step": 34692 }, { "epoch": 0.45, "grad_norm": 3.4432835578918457, "learning_rate": 1.964465040242585e-05, "loss": 1.7128, "step": 34693 }, { "epoch": 0.45, "grad_norm": 3.5078132152557373, "learning_rate": 1.9644622638792606e-05, "loss": 1.6194, "step": 34694 }, { "epoch": 0.45, "grad_norm": 4.24022912979126, "learning_rate": 1.964459487409443e-05, "loss": 2.3087, "step": 34695 }, { "epoch": 0.45, "grad_norm": 4.0349555015563965, "learning_rate": 1.9644567108331324e-05, "loss": 2.3326, "step": 34696 }, { "epoch": 0.45, "grad_norm": 4.192455768585205, "learning_rate": 1.96445393415033e-05, "loss": 2.527, "step": 34697 }, { "epoch": 0.45, "grad_norm": 3.6744441986083984, "learning_rate": 1.964451157361035e-05, "loss": 1.6862, "step": 34698 }, { "epoch": 0.45, "grad_norm": 4.243299961090088, "learning_rate": 1.9644483804652485e-05, "loss": 1.9937, "step": 34699 }, { "epoch": 0.45, "grad_norm": 3.4873239994049072, "learning_rate": 1.9644456034629703e-05, "loss": 1.9623, "step": 34700 }, { "epoch": 0.45, "grad_norm": 3.7740116119384766, "learning_rate": 1.9644428263542014e-05, "loss": 2.0085, "step": 34701 }, { "epoch": 0.45, "grad_norm": 3.4091198444366455, "learning_rate": 1.964440049138941e-05, "loss": 1.7989, "step": 34702 }, { "epoch": 0.45, "grad_norm": 3.8910117149353027, "learning_rate": 1.964437271817191e-05, "loss": 1.9179, "step": 34703 }, { "epoch": 0.45, "grad_norm": 3.9600670337677, "learning_rate": 1.9644344943889503e-05, "loss": 2.1832, "step": 34704 }, { "epoch": 0.45, "grad_norm": 3.4763638973236084, "learning_rate": 1.9644317168542193e-05, "loss": 1.7767, "step": 34705 }, { "epoch": 0.45, "grad_norm": 3.7125349044799805, "learning_rate": 1.964428939212999e-05, "loss": 2.2176, "step": 34706 }, { "epoch": 0.45, "grad_norm": 3.9834511280059814, "learning_rate": 1.9644261614652895e-05, "loss": 1.6905, "step": 34707 }, { "epoch": 0.45, "grad_norm": 3.595155715942383, "learning_rate": 1.9644233836110906e-05, "loss": 1.7101, "step": 34708 }, { "epoch": 0.45, "grad_norm": 3.7505829334259033, "learning_rate": 1.9644206056504034e-05, "loss": 1.7324, "step": 34709 }, { "epoch": 0.45, "grad_norm": 4.416331768035889, "learning_rate": 1.9644178275832275e-05, "loss": 2.2524, "step": 34710 }, { "epoch": 0.45, "grad_norm": 3.73203706741333, "learning_rate": 1.9644150494095634e-05, "loss": 2.1463, "step": 34711 }, { "epoch": 0.45, "grad_norm": 3.5988876819610596, "learning_rate": 1.964412271129412e-05, "loss": 1.8757, "step": 34712 }, { "epoch": 0.45, "grad_norm": 3.8154351711273193, "learning_rate": 1.964409492742773e-05, "loss": 2.0091, "step": 34713 }, { "epoch": 0.45, "grad_norm": 3.7498557567596436, "learning_rate": 1.9644067142496465e-05, "loss": 1.7743, "step": 34714 }, { "epoch": 0.45, "grad_norm": 4.679416179656982, "learning_rate": 1.9644039356500335e-05, "loss": 1.8109, "step": 34715 }, { "epoch": 0.45, "grad_norm": 4.522657871246338, "learning_rate": 1.9644011569439336e-05, "loss": 2.0425, "step": 34716 }, { "epoch": 0.45, "grad_norm": 3.2759628295898438, "learning_rate": 1.9643983781313477e-05, "loss": 1.5747, "step": 34717 }, { "epoch": 0.45, "grad_norm": 4.331827163696289, "learning_rate": 1.964395599212276e-05, "loss": 2.3396, "step": 34718 }, { "epoch": 0.45, "grad_norm": 4.252993583679199, "learning_rate": 1.9643928201867182e-05, "loss": 2.1097, "step": 34719 }, { "epoch": 0.45, "grad_norm": 3.553175449371338, "learning_rate": 1.9643900410546756e-05, "loss": 1.8916, "step": 34720 }, { "epoch": 0.45, "grad_norm": 3.5159246921539307, "learning_rate": 1.9643872618161474e-05, "loss": 1.9443, "step": 34721 }, { "epoch": 0.45, "grad_norm": 3.8970375061035156, "learning_rate": 1.9643844824711352e-05, "loss": 2.2326, "step": 34722 }, { "epoch": 0.45, "grad_norm": 4.042725086212158, "learning_rate": 1.964381703019638e-05, "loss": 2.2413, "step": 34723 }, { "epoch": 0.45, "grad_norm": 3.9351580142974854, "learning_rate": 1.964378923461657e-05, "loss": 2.3479, "step": 34724 }, { "epoch": 0.45, "grad_norm": 3.5960028171539307, "learning_rate": 1.9643761437971917e-05, "loss": 1.7979, "step": 34725 }, { "epoch": 0.45, "grad_norm": 3.6019771099090576, "learning_rate": 1.9643733640262437e-05, "loss": 2.173, "step": 34726 }, { "epoch": 0.45, "grad_norm": 4.0920891761779785, "learning_rate": 1.9643705841488117e-05, "loss": 2.5091, "step": 34727 }, { "epoch": 0.45, "grad_norm": 3.2462425231933594, "learning_rate": 1.9643678041648975e-05, "loss": 1.7648, "step": 34728 }, { "epoch": 0.45, "grad_norm": 4.035626411437988, "learning_rate": 1.9643650240745005e-05, "loss": 2.2174, "step": 34729 }, { "epoch": 0.45, "grad_norm": 3.909268617630005, "learning_rate": 1.9643622438776212e-05, "loss": 1.885, "step": 34730 }, { "epoch": 0.45, "grad_norm": 3.593320846557617, "learning_rate": 1.96435946357426e-05, "loss": 1.8463, "step": 34731 }, { "epoch": 0.45, "grad_norm": 3.629276990890503, "learning_rate": 1.964356683164417e-05, "loss": 1.8224, "step": 34732 }, { "epoch": 0.45, "grad_norm": 3.3269448280334473, "learning_rate": 1.964353902648093e-05, "loss": 1.6683, "step": 34733 }, { "epoch": 0.45, "grad_norm": 3.984551429748535, "learning_rate": 1.9643511220252875e-05, "loss": 2.0597, "step": 34734 }, { "epoch": 0.45, "grad_norm": 3.0257387161254883, "learning_rate": 1.9643483412960017e-05, "loss": 1.696, "step": 34735 }, { "epoch": 0.45, "grad_norm": 4.208573341369629, "learning_rate": 1.9643455604602356e-05, "loss": 1.998, "step": 34736 }, { "epoch": 0.45, "grad_norm": 4.122462272644043, "learning_rate": 1.9643427795179887e-05, "loss": 2.852, "step": 34737 }, { "epoch": 0.45, "grad_norm": 3.5163354873657227, "learning_rate": 1.9643399984692626e-05, "loss": 1.7211, "step": 34738 }, { "epoch": 0.45, "grad_norm": 3.4829673767089844, "learning_rate": 1.9643372173140567e-05, "loss": 1.8995, "step": 34739 }, { "epoch": 0.45, "grad_norm": 3.9282784461975098, "learning_rate": 1.9643344360523717e-05, "loss": 2.1705, "step": 34740 }, { "epoch": 0.45, "grad_norm": 3.88136887550354, "learning_rate": 1.9643316546842078e-05, "loss": 2.2012, "step": 34741 }, { "epoch": 0.45, "grad_norm": 3.486062526702881, "learning_rate": 1.9643288732095654e-05, "loss": 1.6624, "step": 34742 }, { "epoch": 0.45, "grad_norm": 4.1993327140808105, "learning_rate": 1.964326091628445e-05, "loss": 2.4446, "step": 34743 }, { "epoch": 0.45, "grad_norm": 3.8900866508483887, "learning_rate": 1.9643233099408463e-05, "loss": 1.7927, "step": 34744 }, { "epoch": 0.45, "grad_norm": 3.4899439811706543, "learning_rate": 1.96432052814677e-05, "loss": 1.9282, "step": 34745 }, { "epoch": 0.45, "grad_norm": 4.108998775482178, "learning_rate": 1.9643177462462163e-05, "loss": 2.0783, "step": 34746 }, { "epoch": 0.45, "grad_norm": 3.433579921722412, "learning_rate": 1.9643149642391853e-05, "loss": 1.543, "step": 34747 }, { "epoch": 0.45, "grad_norm": 4.077713489532471, "learning_rate": 1.9643121821256782e-05, "loss": 2.3377, "step": 34748 }, { "epoch": 0.45, "grad_norm": 4.064518451690674, "learning_rate": 1.9643093999056943e-05, "loss": 2.0442, "step": 34749 }, { "epoch": 0.45, "grad_norm": 3.3041131496429443, "learning_rate": 1.9643066175792342e-05, "loss": 1.8339, "step": 34750 }, { "epoch": 0.45, "grad_norm": 3.673863649368286, "learning_rate": 1.9643038351462985e-05, "loss": 1.7785, "step": 34751 }, { "epoch": 0.45, "grad_norm": 3.6641924381256104, "learning_rate": 1.9643010526068872e-05, "loss": 2.3868, "step": 34752 }, { "epoch": 0.45, "grad_norm": 3.5075671672821045, "learning_rate": 1.9642982699610006e-05, "loss": 2.1025, "step": 34753 }, { "epoch": 0.45, "grad_norm": 3.665929079055786, "learning_rate": 1.9642954872086392e-05, "loss": 2.113, "step": 34754 }, { "epoch": 0.45, "grad_norm": 3.8632137775421143, "learning_rate": 1.9642927043498034e-05, "loss": 1.8409, "step": 34755 }, { "epoch": 0.45, "grad_norm": 3.7466471195220947, "learning_rate": 1.964289921384493e-05, "loss": 2.0636, "step": 34756 }, { "epoch": 0.45, "grad_norm": 3.808932304382324, "learning_rate": 1.964287138312709e-05, "loss": 1.8634, "step": 34757 }, { "epoch": 0.45, "grad_norm": 3.2033655643463135, "learning_rate": 1.964284355134451e-05, "loss": 1.3842, "step": 34758 }, { "epoch": 0.45, "grad_norm": 3.7437057495117188, "learning_rate": 1.96428157184972e-05, "loss": 1.9198, "step": 34759 }, { "epoch": 0.45, "grad_norm": 4.1583123207092285, "learning_rate": 1.9642787884585156e-05, "loss": 1.9094, "step": 34760 }, { "epoch": 0.45, "grad_norm": 4.536787986755371, "learning_rate": 1.9642760049608384e-05, "loss": 2.428, "step": 34761 }, { "epoch": 0.45, "grad_norm": 4.157700061798096, "learning_rate": 1.964273221356689e-05, "loss": 2.7224, "step": 34762 }, { "epoch": 0.45, "grad_norm": 3.7231783866882324, "learning_rate": 1.9642704376460675e-05, "loss": 1.8921, "step": 34763 }, { "epoch": 0.45, "grad_norm": 4.211385726928711, "learning_rate": 1.964267653828974e-05, "loss": 2.1995, "step": 34764 }, { "epoch": 0.45, "grad_norm": 2.9870445728302, "learning_rate": 1.9642648699054094e-05, "loss": 1.3948, "step": 34765 }, { "epoch": 0.45, "grad_norm": 3.848435640335083, "learning_rate": 1.9642620858753734e-05, "loss": 2.144, "step": 34766 }, { "epoch": 0.45, "grad_norm": 4.00641393661499, "learning_rate": 1.9642593017388663e-05, "loss": 1.7935, "step": 34767 }, { "epoch": 0.45, "grad_norm": 3.5950307846069336, "learning_rate": 1.964256517495889e-05, "loss": 1.9266, "step": 34768 }, { "epoch": 0.45, "grad_norm": 3.675342321395874, "learning_rate": 1.964253733146441e-05, "loss": 1.7226, "step": 34769 }, { "epoch": 0.45, "grad_norm": 3.990842580795288, "learning_rate": 1.9642509486905233e-05, "loss": 1.8571, "step": 34770 }, { "epoch": 0.45, "grad_norm": 3.607064962387085, "learning_rate": 1.9642481641281358e-05, "loss": 2.0742, "step": 34771 }, { "epoch": 0.45, "grad_norm": 3.777141571044922, "learning_rate": 1.9642453794592787e-05, "loss": 2.1072, "step": 34772 }, { "epoch": 0.45, "grad_norm": 3.301795721054077, "learning_rate": 1.964242594683953e-05, "loss": 2.0673, "step": 34773 }, { "epoch": 0.45, "grad_norm": 3.900786876678467, "learning_rate": 1.9642398098021585e-05, "loss": 1.8577, "step": 34774 }, { "epoch": 0.45, "grad_norm": 3.8780086040496826, "learning_rate": 1.9642370248138954e-05, "loss": 1.9379, "step": 34775 }, { "epoch": 0.45, "grad_norm": 3.938506603240967, "learning_rate": 1.9642342397191643e-05, "loss": 2.5092, "step": 34776 }, { "epoch": 0.45, "grad_norm": 4.365771770477295, "learning_rate": 1.9642314545179656e-05, "loss": 1.8986, "step": 34777 }, { "epoch": 0.45, "grad_norm": 3.9190616607666016, "learning_rate": 1.9642286692102986e-05, "loss": 1.8452, "step": 34778 }, { "epoch": 0.45, "grad_norm": 3.7219204902648926, "learning_rate": 1.9642258837961653e-05, "loss": 1.9409, "step": 34779 }, { "epoch": 0.45, "grad_norm": 4.3063836097717285, "learning_rate": 1.9642230982755647e-05, "loss": 2.0469, "step": 34780 }, { "epoch": 0.45, "grad_norm": 4.160109519958496, "learning_rate": 1.9642203126484975e-05, "loss": 2.0361, "step": 34781 }, { "epoch": 0.45, "grad_norm": 3.5142087936401367, "learning_rate": 1.964217526914964e-05, "loss": 1.9054, "step": 34782 }, { "epoch": 0.45, "grad_norm": 4.179999351501465, "learning_rate": 1.9642147410749642e-05, "loss": 1.762, "step": 34783 }, { "epoch": 0.45, "grad_norm": 4.151983737945557, "learning_rate": 1.964211955128499e-05, "loss": 2.1838, "step": 34784 }, { "epoch": 0.45, "grad_norm": 3.0882022380828857, "learning_rate": 1.964209169075569e-05, "loss": 1.6857, "step": 34785 }, { "epoch": 0.45, "grad_norm": 3.369877338409424, "learning_rate": 1.9642063829161736e-05, "loss": 1.8417, "step": 34786 }, { "epoch": 0.45, "grad_norm": 4.070377349853516, "learning_rate": 1.964203596650313e-05, "loss": 2.2198, "step": 34787 }, { "epoch": 0.45, "grad_norm": 3.630033254623413, "learning_rate": 1.9642008102779884e-05, "loss": 2.1443, "step": 34788 }, { "epoch": 0.45, "grad_norm": 4.166064739227295, "learning_rate": 1.9641980237991997e-05, "loss": 2.3648, "step": 34789 }, { "epoch": 0.45, "grad_norm": 3.7474493980407715, "learning_rate": 1.964195237213947e-05, "loss": 1.7952, "step": 34790 }, { "epoch": 0.45, "grad_norm": 3.311293363571167, "learning_rate": 1.964192450522231e-05, "loss": 1.5075, "step": 34791 }, { "epoch": 0.45, "grad_norm": 3.6157848834991455, "learning_rate": 1.9641896637240516e-05, "loss": 2.4835, "step": 34792 }, { "epoch": 0.45, "grad_norm": 3.6902143955230713, "learning_rate": 1.9641868768194094e-05, "loss": 1.8471, "step": 34793 }, { "epoch": 0.45, "grad_norm": 4.017614841461182, "learning_rate": 1.9641840898083045e-05, "loss": 2.1306, "step": 34794 }, { "epoch": 0.45, "grad_norm": 3.6754274368286133, "learning_rate": 1.964181302690737e-05, "loss": 1.7763, "step": 34795 }, { "epoch": 0.45, "grad_norm": 3.402047634124756, "learning_rate": 1.9641785154667082e-05, "loss": 1.6388, "step": 34796 }, { "epoch": 0.45, "grad_norm": 3.1769063472747803, "learning_rate": 1.9641757281362174e-05, "loss": 1.6727, "step": 34797 }, { "epoch": 0.45, "grad_norm": 3.2133944034576416, "learning_rate": 1.9641729406992655e-05, "loss": 1.6681, "step": 34798 }, { "epoch": 0.45, "grad_norm": 3.6136510372161865, "learning_rate": 1.9641701531558523e-05, "loss": 2.1514, "step": 34799 }, { "epoch": 0.45, "grad_norm": 3.843501329421997, "learning_rate": 1.9641673655059782e-05, "loss": 2.436, "step": 34800 }, { "epoch": 0.45, "grad_norm": 3.8598456382751465, "learning_rate": 1.964164577749644e-05, "loss": 2.0133, "step": 34801 }, { "epoch": 0.45, "grad_norm": 3.873325824737549, "learning_rate": 1.9641617898868496e-05, "loss": 1.9134, "step": 34802 }, { "epoch": 0.45, "grad_norm": 3.512439012527466, "learning_rate": 1.964159001917595e-05, "loss": 1.5953, "step": 34803 }, { "epoch": 0.45, "grad_norm": 4.027791500091553, "learning_rate": 1.9641562138418812e-05, "loss": 1.8922, "step": 34804 }, { "epoch": 0.45, "grad_norm": 4.112948894500732, "learning_rate": 1.9641534256597082e-05, "loss": 2.3316, "step": 34805 }, { "epoch": 0.45, "grad_norm": 4.002927303314209, "learning_rate": 1.9641506373710765e-05, "loss": 2.0078, "step": 34806 }, { "epoch": 0.45, "grad_norm": 3.0724267959594727, "learning_rate": 1.964147848975986e-05, "loss": 1.4628, "step": 34807 }, { "epoch": 0.45, "grad_norm": 4.137391567230225, "learning_rate": 1.964145060474437e-05, "loss": 2.2165, "step": 34808 }, { "epoch": 0.45, "grad_norm": 3.8509199619293213, "learning_rate": 1.9641422718664306e-05, "loss": 2.3038, "step": 34809 }, { "epoch": 0.45, "grad_norm": 4.1648688316345215, "learning_rate": 1.964139483151966e-05, "loss": 2.1777, "step": 34810 }, { "epoch": 0.45, "grad_norm": 4.080211639404297, "learning_rate": 1.9641366943310444e-05, "loss": 1.4203, "step": 34811 }, { "epoch": 0.45, "grad_norm": 3.0719354152679443, "learning_rate": 1.9641339054036654e-05, "loss": 1.5218, "step": 34812 }, { "epoch": 0.45, "grad_norm": 3.4395928382873535, "learning_rate": 1.96413111636983e-05, "loss": 1.9534, "step": 34813 }, { "epoch": 0.45, "grad_norm": 4.136800765991211, "learning_rate": 1.9641283272295378e-05, "loss": 2.1377, "step": 34814 }, { "epoch": 0.45, "grad_norm": 4.1928205490112305, "learning_rate": 1.9641255379827898e-05, "loss": 2.4855, "step": 34815 }, { "epoch": 0.45, "grad_norm": 3.6515679359436035, "learning_rate": 1.964122748629586e-05, "loss": 2.1647, "step": 34816 }, { "epoch": 0.45, "grad_norm": 3.425863265991211, "learning_rate": 1.9641199591699265e-05, "loss": 1.5959, "step": 34817 }, { "epoch": 0.45, "grad_norm": 3.604732036590576, "learning_rate": 1.9641171696038118e-05, "loss": 1.8665, "step": 34818 }, { "epoch": 0.45, "grad_norm": 4.325666427612305, "learning_rate": 1.9641143799312424e-05, "loss": 2.3054, "step": 34819 }, { "epoch": 0.45, "grad_norm": 3.9195196628570557, "learning_rate": 1.9641115901522182e-05, "loss": 2.0254, "step": 34820 }, { "epoch": 0.45, "grad_norm": 3.543097496032715, "learning_rate": 1.96410880026674e-05, "loss": 2.1144, "step": 34821 }, { "epoch": 0.45, "grad_norm": 3.780510425567627, "learning_rate": 1.9641060102748076e-05, "loss": 2.0549, "step": 34822 }, { "epoch": 0.45, "grad_norm": 3.990265130996704, "learning_rate": 1.9641032201764215e-05, "loss": 2.1875, "step": 34823 }, { "epoch": 0.45, "grad_norm": 3.38388729095459, "learning_rate": 1.9641004299715823e-05, "loss": 1.7176, "step": 34824 }, { "epoch": 0.45, "grad_norm": 3.7703659534454346, "learning_rate": 1.96409763966029e-05, "loss": 2.0629, "step": 34825 }, { "epoch": 0.45, "grad_norm": 3.538548469543457, "learning_rate": 1.9640948492425448e-05, "loss": 2.0668, "step": 34826 }, { "epoch": 0.45, "grad_norm": 4.150572299957275, "learning_rate": 1.964092058718347e-05, "loss": 2.6047, "step": 34827 }, { "epoch": 0.45, "grad_norm": 4.15119743347168, "learning_rate": 1.9640892680876974e-05, "loss": 1.9687, "step": 34828 }, { "epoch": 0.45, "grad_norm": 3.084775924682617, "learning_rate": 1.964086477350596e-05, "loss": 1.5469, "step": 34829 }, { "epoch": 0.45, "grad_norm": 3.695465564727783, "learning_rate": 1.964083686507043e-05, "loss": 2.0916, "step": 34830 }, { "epoch": 0.45, "grad_norm": 3.8304824829101562, "learning_rate": 1.964080895557039e-05, "loss": 1.8678, "step": 34831 }, { "epoch": 0.45, "grad_norm": 3.69388484954834, "learning_rate": 1.9640781045005837e-05, "loss": 2.061, "step": 34832 }, { "epoch": 0.45, "grad_norm": 4.307373046875, "learning_rate": 1.964075313337678e-05, "loss": 2.1304, "step": 34833 }, { "epoch": 0.45, "grad_norm": 3.371514320373535, "learning_rate": 1.9640725220683225e-05, "loss": 1.5726, "step": 34834 }, { "epoch": 0.45, "grad_norm": 3.641339063644409, "learning_rate": 1.964069730692516e-05, "loss": 2.3057, "step": 34835 }, { "epoch": 0.45, "grad_norm": 3.5802392959594727, "learning_rate": 1.964066939210261e-05, "loss": 1.9038, "step": 34836 }, { "epoch": 0.45, "grad_norm": 3.560021162033081, "learning_rate": 1.964064147621556e-05, "loss": 1.7925, "step": 34837 }, { "epoch": 0.45, "grad_norm": 4.213634967803955, "learning_rate": 1.964061355926402e-05, "loss": 1.9415, "step": 34838 }, { "epoch": 0.45, "grad_norm": 3.8432164192199707, "learning_rate": 1.9640585641247992e-05, "loss": 2.2925, "step": 34839 }, { "epoch": 0.45, "grad_norm": 4.18299674987793, "learning_rate": 1.9640557722167482e-05, "loss": 2.0467, "step": 34840 }, { "epoch": 0.45, "grad_norm": 3.779888868331909, "learning_rate": 1.964052980202249e-05, "loss": 1.9674, "step": 34841 }, { "epoch": 0.45, "grad_norm": 4.056015491485596, "learning_rate": 1.964050188081302e-05, "loss": 2.2466, "step": 34842 }, { "epoch": 0.45, "grad_norm": 4.578512191772461, "learning_rate": 1.9640473958539074e-05, "loss": 2.3826, "step": 34843 }, { "epoch": 0.45, "grad_norm": 3.634690761566162, "learning_rate": 1.964044603520066e-05, "loss": 2.1269, "step": 34844 }, { "epoch": 0.45, "grad_norm": 3.8785457611083984, "learning_rate": 1.9640418110797773e-05, "loss": 2.0501, "step": 34845 }, { "epoch": 0.45, "grad_norm": 3.9411733150482178, "learning_rate": 1.964039018533042e-05, "loss": 1.8703, "step": 34846 }, { "epoch": 0.45, "grad_norm": 3.3321335315704346, "learning_rate": 1.964036225879861e-05, "loss": 1.5129, "step": 34847 }, { "epoch": 0.45, "grad_norm": 4.092733383178711, "learning_rate": 1.9640334331202332e-05, "loss": 1.8719, "step": 34848 }, { "epoch": 0.45, "grad_norm": 3.4099013805389404, "learning_rate": 1.9640306402541606e-05, "loss": 1.8602, "step": 34849 }, { "epoch": 0.45, "grad_norm": 4.020157814025879, "learning_rate": 1.9640278472816423e-05, "loss": 2.4445, "step": 34850 }, { "epoch": 0.45, "grad_norm": 3.6364314556121826, "learning_rate": 1.964025054202679e-05, "loss": 1.761, "step": 34851 }, { "epoch": 0.45, "grad_norm": 3.467552900314331, "learning_rate": 1.9640222610172705e-05, "loss": 1.6342, "step": 34852 }, { "epoch": 0.45, "grad_norm": 3.405203104019165, "learning_rate": 1.964019467725418e-05, "loss": 1.9556, "step": 34853 }, { "epoch": 0.45, "grad_norm": 3.839029312133789, "learning_rate": 1.9640166743271217e-05, "loss": 1.985, "step": 34854 }, { "epoch": 0.45, "grad_norm": 3.7511019706726074, "learning_rate": 1.964013880822381e-05, "loss": 2.0879, "step": 34855 }, { "epoch": 0.45, "grad_norm": 4.342854022979736, "learning_rate": 1.9640110872111973e-05, "loss": 2.1497, "step": 34856 }, { "epoch": 0.45, "grad_norm": 3.375903606414795, "learning_rate": 1.96400829349357e-05, "loss": 1.8319, "step": 34857 }, { "epoch": 0.45, "grad_norm": 3.392591714859009, "learning_rate": 1.9640054996695005e-05, "loss": 1.7429, "step": 34858 }, { "epoch": 0.45, "grad_norm": 3.5577785968780518, "learning_rate": 1.964002705738988e-05, "loss": 2.1897, "step": 34859 }, { "epoch": 0.45, "grad_norm": 4.068310260772705, "learning_rate": 1.963999911702033e-05, "loss": 2.4079, "step": 34860 }, { "epoch": 0.45, "grad_norm": 3.8263754844665527, "learning_rate": 1.9639971175586365e-05, "loss": 1.5829, "step": 34861 }, { "epoch": 0.45, "grad_norm": 3.482780694961548, "learning_rate": 1.9639943233087982e-05, "loss": 1.7906, "step": 34862 }, { "epoch": 0.45, "grad_norm": 4.02440071105957, "learning_rate": 1.9639915289525185e-05, "loss": 1.8097, "step": 34863 }, { "epoch": 0.45, "grad_norm": 4.1459736824035645, "learning_rate": 1.9639887344897978e-05, "loss": 2.1765, "step": 34864 }, { "epoch": 0.45, "grad_norm": 3.509040117263794, "learning_rate": 1.9639859399206363e-05, "loss": 1.6972, "step": 34865 }, { "epoch": 0.45, "grad_norm": 3.569183588027954, "learning_rate": 1.9639831452450346e-05, "loss": 1.6932, "step": 34866 }, { "epoch": 0.45, "grad_norm": 4.575451374053955, "learning_rate": 1.9639803504629927e-05, "loss": 2.4632, "step": 34867 }, { "epoch": 0.45, "grad_norm": 3.7994418144226074, "learning_rate": 1.963977555574511e-05, "loss": 1.8277, "step": 34868 }, { "epoch": 0.45, "grad_norm": 4.054840087890625, "learning_rate": 1.96397476057959e-05, "loss": 2.4798, "step": 34869 }, { "epoch": 0.45, "grad_norm": 3.531362533569336, "learning_rate": 1.9639719654782296e-05, "loss": 1.9834, "step": 34870 }, { "epoch": 0.45, "grad_norm": 3.685270071029663, "learning_rate": 1.9639691702704303e-05, "loss": 1.7438, "step": 34871 }, { "epoch": 0.45, "grad_norm": 3.8777995109558105, "learning_rate": 1.963966374956193e-05, "loss": 1.9722, "step": 34872 }, { "epoch": 0.45, "grad_norm": 4.240773677825928, "learning_rate": 1.963963579535517e-05, "loss": 2.5625, "step": 34873 }, { "epoch": 0.45, "grad_norm": 4.0989298820495605, "learning_rate": 1.963960784008403e-05, "loss": 2.2219, "step": 34874 }, { "epoch": 0.45, "grad_norm": 3.763429641723633, "learning_rate": 1.9639579883748515e-05, "loss": 2.2017, "step": 34875 }, { "epoch": 0.45, "grad_norm": 3.604644775390625, "learning_rate": 1.9639551926348625e-05, "loss": 2.3515, "step": 34876 }, { "epoch": 0.45, "grad_norm": 3.558178186416626, "learning_rate": 1.963952396788437e-05, "loss": 1.4994, "step": 34877 }, { "epoch": 0.45, "grad_norm": 3.554661989212036, "learning_rate": 1.9639496008355742e-05, "loss": 1.9708, "step": 34878 }, { "epoch": 0.45, "grad_norm": 3.6426761150360107, "learning_rate": 1.9639468047762754e-05, "loss": 1.9795, "step": 34879 }, { "epoch": 0.45, "grad_norm": 3.731126070022583, "learning_rate": 1.9639440086105405e-05, "loss": 2.1588, "step": 34880 }, { "epoch": 0.45, "grad_norm": 4.4496073722839355, "learning_rate": 1.9639412123383694e-05, "loss": 2.1536, "step": 34881 }, { "epoch": 0.45, "grad_norm": 3.3063340187072754, "learning_rate": 1.9639384159597633e-05, "loss": 1.9363, "step": 34882 }, { "epoch": 0.45, "grad_norm": 3.8483963012695312, "learning_rate": 1.963935619474722e-05, "loss": 1.6477, "step": 34883 }, { "epoch": 0.45, "grad_norm": 3.887298822402954, "learning_rate": 1.9639328228832456e-05, "loss": 1.6866, "step": 34884 }, { "epoch": 0.45, "grad_norm": 3.8917741775512695, "learning_rate": 1.9639300261853348e-05, "loss": 1.6431, "step": 34885 }, { "epoch": 0.45, "grad_norm": 4.059174060821533, "learning_rate": 1.96392722938099e-05, "loss": 2.3445, "step": 34886 }, { "epoch": 0.45, "grad_norm": 3.8051583766937256, "learning_rate": 1.963924432470211e-05, "loss": 1.928, "step": 34887 }, { "epoch": 0.45, "grad_norm": 3.9947941303253174, "learning_rate": 1.9639216354529984e-05, "loss": 2.3279, "step": 34888 }, { "epoch": 0.45, "grad_norm": 3.578996181488037, "learning_rate": 1.9639188383293526e-05, "loss": 2.1075, "step": 34889 }, { "epoch": 0.45, "grad_norm": 4.0175604820251465, "learning_rate": 1.9639160410992736e-05, "loss": 2.2915, "step": 34890 }, { "epoch": 0.45, "grad_norm": 4.191603660583496, "learning_rate": 1.9639132437627623e-05, "loss": 1.8538, "step": 34891 }, { "epoch": 0.45, "grad_norm": 3.5308592319488525, "learning_rate": 1.9639104463198186e-05, "loss": 1.9127, "step": 34892 }, { "epoch": 0.45, "grad_norm": 3.798041343688965, "learning_rate": 1.963907648770442e-05, "loss": 1.9694, "step": 34893 }, { "epoch": 0.45, "grad_norm": 4.495546340942383, "learning_rate": 1.963904851114635e-05, "loss": 2.5538, "step": 34894 }, { "epoch": 0.45, "grad_norm": 3.4723896980285645, "learning_rate": 1.9639020533523957e-05, "loss": 2.1353, "step": 34895 }, { "epoch": 0.45, "grad_norm": 3.3267741203308105, "learning_rate": 1.963899255483725e-05, "loss": 1.6337, "step": 34896 }, { "epoch": 0.45, "grad_norm": 4.023932456970215, "learning_rate": 1.963896457508624e-05, "loss": 2.0024, "step": 34897 }, { "epoch": 0.45, "grad_norm": 3.4324777126312256, "learning_rate": 1.9638936594270924e-05, "loss": 1.8436, "step": 34898 }, { "epoch": 0.45, "grad_norm": 4.080370903015137, "learning_rate": 1.9638908612391307e-05, "loss": 1.8716, "step": 34899 }, { "epoch": 0.45, "grad_norm": 4.308394432067871, "learning_rate": 1.963888062944739e-05, "loss": 1.995, "step": 34900 }, { "epoch": 0.45, "grad_norm": 3.768352746963501, "learning_rate": 1.9638852645439175e-05, "loss": 1.8705, "step": 34901 }, { "epoch": 0.45, "grad_norm": 3.7562899589538574, "learning_rate": 1.9638824660366667e-05, "loss": 1.7068, "step": 34902 }, { "epoch": 0.45, "grad_norm": 4.60871696472168, "learning_rate": 1.9638796674229872e-05, "loss": 1.9384, "step": 34903 }, { "epoch": 0.45, "grad_norm": 4.0131402015686035, "learning_rate": 1.9638768687028788e-05, "loss": 1.8609, "step": 34904 }, { "epoch": 0.45, "grad_norm": 4.079196929931641, "learning_rate": 1.9638740698763423e-05, "loss": 2.1638, "step": 34905 }, { "epoch": 0.45, "grad_norm": 3.4323782920837402, "learning_rate": 1.9638712709433775e-05, "loss": 1.7105, "step": 34906 }, { "epoch": 0.45, "grad_norm": 3.366847276687622, "learning_rate": 1.963868471903985e-05, "loss": 1.6142, "step": 34907 }, { "epoch": 0.45, "grad_norm": 3.7538816928863525, "learning_rate": 1.9638656727581652e-05, "loss": 1.9169, "step": 34908 }, { "epoch": 0.45, "grad_norm": 3.299246072769165, "learning_rate": 1.963862873505918e-05, "loss": 1.5972, "step": 34909 }, { "epoch": 0.45, "grad_norm": 4.241365432739258, "learning_rate": 1.9638600741472444e-05, "loss": 2.2613, "step": 34910 }, { "epoch": 0.45, "grad_norm": 3.818035840988159, "learning_rate": 1.963857274682144e-05, "loss": 1.7512, "step": 34911 }, { "epoch": 0.45, "grad_norm": 3.747375726699829, "learning_rate": 1.9638544751106177e-05, "loss": 2.2364, "step": 34912 }, { "epoch": 0.45, "grad_norm": 3.833125114440918, "learning_rate": 1.963851675432665e-05, "loss": 2.0592, "step": 34913 }, { "epoch": 0.45, "grad_norm": 4.326345443725586, "learning_rate": 1.963848875648287e-05, "loss": 2.3089, "step": 34914 }, { "epoch": 0.45, "grad_norm": 3.333822011947632, "learning_rate": 1.963846075757484e-05, "loss": 1.9485, "step": 34915 }, { "epoch": 0.45, "grad_norm": 3.7032599449157715, "learning_rate": 1.9638432757602557e-05, "loss": 1.8475, "step": 34916 }, { "epoch": 0.45, "grad_norm": 3.2902884483337402, "learning_rate": 1.963840475656603e-05, "loss": 1.6345, "step": 34917 }, { "epoch": 0.45, "grad_norm": 4.273326396942139, "learning_rate": 1.963837675446526e-05, "loss": 2.3022, "step": 34918 }, { "epoch": 0.45, "grad_norm": 3.0951223373413086, "learning_rate": 1.9638348751300246e-05, "loss": 1.5911, "step": 34919 }, { "epoch": 0.45, "grad_norm": 4.148138523101807, "learning_rate": 1.9638320747070994e-05, "loss": 2.461, "step": 34920 }, { "epoch": 0.45, "grad_norm": 3.963266372680664, "learning_rate": 1.9638292741777512e-05, "loss": 1.7156, "step": 34921 }, { "epoch": 0.45, "grad_norm": 3.758478879928589, "learning_rate": 1.9638264735419795e-05, "loss": 2.3153, "step": 34922 }, { "epoch": 0.45, "grad_norm": 4.394705295562744, "learning_rate": 1.9638236727997855e-05, "loss": 2.5199, "step": 34923 }, { "epoch": 0.45, "grad_norm": 4.207476615905762, "learning_rate": 1.9638208719511686e-05, "loss": 2.4725, "step": 34924 }, { "epoch": 0.45, "grad_norm": 4.008540153503418, "learning_rate": 1.9638180709961298e-05, "loss": 1.9347, "step": 34925 }, { "epoch": 0.45, "grad_norm": 3.877671241760254, "learning_rate": 1.9638152699346688e-05, "loss": 2.2031, "step": 34926 }, { "epoch": 0.45, "grad_norm": 3.587008237838745, "learning_rate": 1.963812468766787e-05, "loss": 1.7464, "step": 34927 }, { "epoch": 0.45, "grad_norm": 4.0936079025268555, "learning_rate": 1.963809667492483e-05, "loss": 2.3425, "step": 34928 }, { "epoch": 0.45, "grad_norm": 3.97558856010437, "learning_rate": 1.9638068661117584e-05, "loss": 2.0395, "step": 34929 }, { "epoch": 0.45, "grad_norm": 3.7011213302612305, "learning_rate": 1.9638040646246135e-05, "loss": 1.8841, "step": 34930 }, { "epoch": 0.45, "grad_norm": 4.580479145050049, "learning_rate": 1.9638012630310478e-05, "loss": 2.1474, "step": 34931 }, { "epoch": 0.45, "grad_norm": 3.9999139308929443, "learning_rate": 1.9637984613310625e-05, "loss": 2.0245, "step": 34932 }, { "epoch": 0.45, "grad_norm": 3.469939947128296, "learning_rate": 1.9637956595246573e-05, "loss": 1.8186, "step": 34933 }, { "epoch": 0.45, "grad_norm": 3.3729476928710938, "learning_rate": 1.9637928576118326e-05, "loss": 1.9005, "step": 34934 }, { "epoch": 0.45, "grad_norm": 4.176671981811523, "learning_rate": 1.9637900555925888e-05, "loss": 2.4122, "step": 34935 }, { "epoch": 0.45, "grad_norm": 3.2786166667938232, "learning_rate": 1.9637872534669267e-05, "loss": 1.481, "step": 34936 }, { "epoch": 0.45, "grad_norm": 3.8775997161865234, "learning_rate": 1.9637844512348455e-05, "loss": 2.343, "step": 34937 }, { "epoch": 0.45, "grad_norm": 3.819650888442993, "learning_rate": 1.9637816488963464e-05, "loss": 1.8686, "step": 34938 }, { "epoch": 0.45, "grad_norm": 3.9144208431243896, "learning_rate": 1.9637788464514295e-05, "loss": 2.089, "step": 34939 }, { "epoch": 0.45, "grad_norm": 3.968309164047241, "learning_rate": 1.963776043900095e-05, "loss": 1.703, "step": 34940 }, { "epoch": 0.45, "grad_norm": 3.5785837173461914, "learning_rate": 1.9637732412423436e-05, "loss": 1.6175, "step": 34941 }, { "epoch": 0.45, "grad_norm": 3.4251902103424072, "learning_rate": 1.963770438478175e-05, "loss": 1.7332, "step": 34942 }, { "epoch": 0.45, "grad_norm": 3.9675493240356445, "learning_rate": 1.9637676356075898e-05, "loss": 1.7359, "step": 34943 }, { "epoch": 0.45, "grad_norm": 3.622358560562134, "learning_rate": 1.9637648326305885e-05, "loss": 1.8079, "step": 34944 }, { "epoch": 0.45, "grad_norm": 4.155326843261719, "learning_rate": 1.963762029547171e-05, "loss": 2.4213, "step": 34945 }, { "epoch": 0.45, "grad_norm": 3.9303171634674072, "learning_rate": 1.9637592263573378e-05, "loss": 1.9013, "step": 34946 }, { "epoch": 0.45, "grad_norm": 3.7619104385375977, "learning_rate": 1.963756423061089e-05, "loss": 2.3869, "step": 34947 }, { "epoch": 0.45, "grad_norm": 4.0320000648498535, "learning_rate": 1.9637536196584256e-05, "loss": 2.33, "step": 34948 }, { "epoch": 0.45, "grad_norm": 3.4774043560028076, "learning_rate": 1.9637508161493474e-05, "loss": 1.6754, "step": 34949 }, { "epoch": 0.45, "grad_norm": 3.799734354019165, "learning_rate": 1.9637480125338547e-05, "loss": 2.116, "step": 34950 }, { "epoch": 0.45, "grad_norm": 4.385390758514404, "learning_rate": 1.963745208811948e-05, "loss": 2.0248, "step": 34951 }, { "epoch": 0.45, "grad_norm": 3.7869269847869873, "learning_rate": 1.9637424049836273e-05, "loss": 2.202, "step": 34952 }, { "epoch": 0.45, "grad_norm": 4.253254413604736, "learning_rate": 1.9637396010488926e-05, "loss": 2.2055, "step": 34953 }, { "epoch": 0.45, "grad_norm": 3.272339344024658, "learning_rate": 1.9637367970077452e-05, "loss": 1.5174, "step": 34954 }, { "epoch": 0.45, "grad_norm": 4.109159469604492, "learning_rate": 1.963733992860185e-05, "loss": 2.0296, "step": 34955 }, { "epoch": 0.45, "grad_norm": 3.3650052547454834, "learning_rate": 1.9637311886062124e-05, "loss": 1.7802, "step": 34956 }, { "epoch": 0.45, "grad_norm": 3.6325693130493164, "learning_rate": 1.963728384245827e-05, "loss": 1.8446, "step": 34957 }, { "epoch": 0.45, "grad_norm": 3.41451096534729, "learning_rate": 1.9637255797790297e-05, "loss": 1.8144, "step": 34958 }, { "epoch": 0.45, "grad_norm": 3.442450523376465, "learning_rate": 1.963722775205821e-05, "loss": 2.097, "step": 34959 }, { "epoch": 0.45, "grad_norm": 3.5887210369110107, "learning_rate": 1.963719970526201e-05, "loss": 1.8194, "step": 34960 }, { "epoch": 0.45, "grad_norm": 3.1846587657928467, "learning_rate": 1.9637171657401698e-05, "loss": 1.4596, "step": 34961 }, { "epoch": 0.45, "grad_norm": 4.135603427886963, "learning_rate": 1.9637143608477278e-05, "loss": 2.2395, "step": 34962 }, { "epoch": 0.45, "grad_norm": 4.1133036613464355, "learning_rate": 1.9637115558488754e-05, "loss": 2.2607, "step": 34963 }, { "epoch": 0.45, "grad_norm": 3.756608009338379, "learning_rate": 1.963708750743613e-05, "loss": 2.1476, "step": 34964 }, { "epoch": 0.45, "grad_norm": 3.750656843185425, "learning_rate": 1.963705945531941e-05, "loss": 1.8328, "step": 34965 }, { "epoch": 0.45, "grad_norm": 4.07750129699707, "learning_rate": 1.9637031402138592e-05, "loss": 2.2924, "step": 34966 }, { "epoch": 0.45, "grad_norm": 4.164437294006348, "learning_rate": 1.963700334789368e-05, "loss": 2.2819, "step": 34967 }, { "epoch": 0.45, "grad_norm": 3.834853172302246, "learning_rate": 1.9636975292584686e-05, "loss": 2.0774, "step": 34968 }, { "epoch": 0.45, "grad_norm": 3.412623405456543, "learning_rate": 1.9636947236211604e-05, "loss": 1.7079, "step": 34969 }, { "epoch": 0.45, "grad_norm": 3.806225299835205, "learning_rate": 1.9636919178774435e-05, "loss": 1.9864, "step": 34970 }, { "epoch": 0.45, "grad_norm": 4.370128631591797, "learning_rate": 1.963689112027319e-05, "loss": 2.1592, "step": 34971 }, { "epoch": 0.45, "grad_norm": 5.111837863922119, "learning_rate": 1.963686306070787e-05, "loss": 2.3816, "step": 34972 }, { "epoch": 0.45, "grad_norm": 3.7025251388549805, "learning_rate": 1.9636835000078476e-05, "loss": 1.9645, "step": 34973 }, { "epoch": 0.45, "grad_norm": 3.860374927520752, "learning_rate": 1.963680693838501e-05, "loss": 2.3103, "step": 34974 }, { "epoch": 0.45, "grad_norm": 4.130274772644043, "learning_rate": 1.963677887562748e-05, "loss": 2.3974, "step": 34975 }, { "epoch": 0.45, "grad_norm": 4.009945392608643, "learning_rate": 1.9636750811805885e-05, "loss": 1.9967, "step": 34976 }, { "epoch": 0.45, "grad_norm": 3.835084915161133, "learning_rate": 1.963672274692023e-05, "loss": 1.8698, "step": 34977 }, { "epoch": 0.45, "grad_norm": 3.301762819290161, "learning_rate": 1.9636694680970513e-05, "loss": 1.9695, "step": 34978 }, { "epoch": 0.45, "grad_norm": 3.9521255493164062, "learning_rate": 1.9636666613956746e-05, "loss": 1.8204, "step": 34979 }, { "epoch": 0.45, "grad_norm": 3.7828152179718018, "learning_rate": 1.963663854587893e-05, "loss": 1.7369, "step": 34980 }, { "epoch": 0.45, "grad_norm": 3.5508246421813965, "learning_rate": 1.963661047673706e-05, "loss": 1.6353, "step": 34981 }, { "epoch": 0.45, "grad_norm": 3.8908865451812744, "learning_rate": 1.9636582406531147e-05, "loss": 2.524, "step": 34982 }, { "epoch": 0.45, "grad_norm": 3.8647263050079346, "learning_rate": 1.963655433526119e-05, "loss": 2.2114, "step": 34983 }, { "epoch": 0.45, "grad_norm": 3.4579248428344727, "learning_rate": 1.9636526262927194e-05, "loss": 2.0545, "step": 34984 }, { "epoch": 0.45, "grad_norm": 3.9449732303619385, "learning_rate": 1.9636498189529164e-05, "loss": 1.9693, "step": 34985 }, { "epoch": 0.45, "grad_norm": 3.8371875286102295, "learning_rate": 1.96364701150671e-05, "loss": 1.7406, "step": 34986 }, { "epoch": 0.45, "grad_norm": 3.941354274749756, "learning_rate": 1.9636442039541008e-05, "loss": 2.3338, "step": 34987 }, { "epoch": 0.45, "grad_norm": 3.3125717639923096, "learning_rate": 1.963641396295089e-05, "loss": 1.8718, "step": 34988 }, { "epoch": 0.45, "grad_norm": 3.6404178142547607, "learning_rate": 1.9636385885296745e-05, "loss": 1.8624, "step": 34989 }, { "epoch": 0.45, "grad_norm": 4.113218784332275, "learning_rate": 1.9636357806578582e-05, "loss": 2.0741, "step": 34990 }, { "epoch": 0.45, "grad_norm": 3.2767109870910645, "learning_rate": 1.9636329726796396e-05, "loss": 1.7532, "step": 34991 }, { "epoch": 0.45, "grad_norm": 3.7787609100341797, "learning_rate": 1.9636301645950202e-05, "loss": 2.2372, "step": 34992 }, { "epoch": 0.45, "grad_norm": 3.55190372467041, "learning_rate": 1.9636273564039995e-05, "loss": 1.884, "step": 34993 }, { "epoch": 0.45, "grad_norm": 3.903712272644043, "learning_rate": 1.9636245481065783e-05, "loss": 2.4733, "step": 34994 }, { "epoch": 0.45, "grad_norm": 4.07544469833374, "learning_rate": 1.9636217397027558e-05, "loss": 2.5133, "step": 34995 }, { "epoch": 0.45, "grad_norm": 4.048316478729248, "learning_rate": 1.9636189311925338e-05, "loss": 2.3992, "step": 34996 }, { "epoch": 0.45, "grad_norm": 4.446933746337891, "learning_rate": 1.963616122575912e-05, "loss": 2.6873, "step": 34997 }, { "epoch": 0.45, "grad_norm": 3.268066167831421, "learning_rate": 1.96361331385289e-05, "loss": 1.8443, "step": 34998 }, { "epoch": 0.45, "grad_norm": 3.6861331462860107, "learning_rate": 1.9636105050234692e-05, "loss": 2.1309, "step": 34999 }, { "epoch": 0.45, "grad_norm": 3.6115059852600098, "learning_rate": 1.9636076960876497e-05, "loss": 1.6377, "step": 35000 }, { "epoch": 0.45, "grad_norm": 3.572744607925415, "learning_rate": 1.963604887045431e-05, "loss": 1.8734, "step": 35001 }, { "epoch": 0.45, "grad_norm": 3.843599319458008, "learning_rate": 1.9636020778968142e-05, "loss": 2.2209, "step": 35002 }, { "epoch": 0.45, "grad_norm": 3.5222926139831543, "learning_rate": 1.9635992686417992e-05, "loss": 1.7645, "step": 35003 }, { "epoch": 0.45, "grad_norm": 3.4781606197357178, "learning_rate": 1.9635964592803866e-05, "loss": 1.7433, "step": 35004 }, { "epoch": 0.45, "grad_norm": 3.5385639667510986, "learning_rate": 1.9635936498125766e-05, "loss": 1.7324, "step": 35005 }, { "epoch": 0.45, "grad_norm": 3.736427068710327, "learning_rate": 1.9635908402383698e-05, "loss": 2.0613, "step": 35006 }, { "epoch": 0.45, "grad_norm": 4.234447956085205, "learning_rate": 1.963588030557766e-05, "loss": 2.1891, "step": 35007 }, { "epoch": 0.45, "grad_norm": 3.175441026687622, "learning_rate": 1.963585220770766e-05, "loss": 1.6667, "step": 35008 }, { "epoch": 0.45, "grad_norm": 3.4298670291900635, "learning_rate": 1.9635824108773692e-05, "loss": 1.825, "step": 35009 }, { "epoch": 0.45, "grad_norm": 3.8831934928894043, "learning_rate": 1.963579600877577e-05, "loss": 2.3145, "step": 35010 }, { "epoch": 0.45, "grad_norm": 3.512585401535034, "learning_rate": 1.9635767907713892e-05, "loss": 1.6925, "step": 35011 }, { "epoch": 0.45, "grad_norm": 3.9999969005584717, "learning_rate": 1.9635739805588065e-05, "loss": 2.3902, "step": 35012 }, { "epoch": 0.45, "grad_norm": 3.7651708126068115, "learning_rate": 1.9635711702398283e-05, "loss": 1.8949, "step": 35013 }, { "epoch": 0.45, "grad_norm": 3.652555227279663, "learning_rate": 1.9635683598144556e-05, "loss": 1.909, "step": 35014 }, { "epoch": 0.45, "grad_norm": 4.101337909698486, "learning_rate": 1.9635655492826892e-05, "loss": 2.3671, "step": 35015 }, { "epoch": 0.45, "grad_norm": 3.703087329864502, "learning_rate": 1.9635627386445283e-05, "loss": 1.8657, "step": 35016 }, { "epoch": 0.45, "grad_norm": 4.060856819152832, "learning_rate": 1.9635599278999737e-05, "loss": 2.0643, "step": 35017 }, { "epoch": 0.45, "grad_norm": 4.5839924812316895, "learning_rate": 1.963557117049026e-05, "loss": 2.385, "step": 35018 }, { "epoch": 0.45, "grad_norm": 3.1695315837860107, "learning_rate": 1.9635543060916847e-05, "loss": 1.5257, "step": 35019 }, { "epoch": 0.45, "grad_norm": 3.7924129962921143, "learning_rate": 1.963551495027951e-05, "loss": 1.9343, "step": 35020 }, { "epoch": 0.45, "grad_norm": 4.070979118347168, "learning_rate": 1.963548683857825e-05, "loss": 2.2958, "step": 35021 }, { "epoch": 0.45, "grad_norm": 3.731353521347046, "learning_rate": 1.963545872581307e-05, "loss": 1.925, "step": 35022 }, { "epoch": 0.45, "grad_norm": 4.388891696929932, "learning_rate": 1.963543061198397e-05, "loss": 1.8644, "step": 35023 }, { "epoch": 0.45, "grad_norm": 3.6994857788085938, "learning_rate": 1.9635402497090955e-05, "loss": 2.0133, "step": 35024 }, { "epoch": 0.45, "grad_norm": 3.968010187149048, "learning_rate": 1.9635374381134025e-05, "loss": 1.8808, "step": 35025 }, { "epoch": 0.45, "grad_norm": 4.036293983459473, "learning_rate": 1.963534626411319e-05, "loss": 2.0958, "step": 35026 }, { "epoch": 0.45, "grad_norm": 3.803631067276001, "learning_rate": 1.963531814602845e-05, "loss": 2.1184, "step": 35027 }, { "epoch": 0.45, "grad_norm": 3.3748977184295654, "learning_rate": 1.9635290026879805e-05, "loss": 1.6012, "step": 35028 }, { "epoch": 0.45, "grad_norm": 3.5866403579711914, "learning_rate": 1.9635261906667263e-05, "loss": 1.7971, "step": 35029 }, { "epoch": 0.45, "grad_norm": 3.798574447631836, "learning_rate": 1.9635233785390824e-05, "loss": 1.7933, "step": 35030 }, { "epoch": 0.45, "grad_norm": 3.6384470462799072, "learning_rate": 1.9635205663050488e-05, "loss": 2.0232, "step": 35031 }, { "epoch": 0.45, "grad_norm": 3.4657511711120605, "learning_rate": 1.9635177539646268e-05, "loss": 1.7135, "step": 35032 }, { "epoch": 0.45, "grad_norm": 3.7422163486480713, "learning_rate": 1.9635149415178154e-05, "loss": 1.8309, "step": 35033 }, { "epoch": 0.45, "grad_norm": 3.686164379119873, "learning_rate": 1.963512128964616e-05, "loss": 2.1697, "step": 35034 }, { "epoch": 0.45, "grad_norm": 3.8704371452331543, "learning_rate": 1.9635093163050287e-05, "loss": 1.8166, "step": 35035 }, { "epoch": 0.45, "grad_norm": 4.507701873779297, "learning_rate": 1.9635065035390533e-05, "loss": 2.6583, "step": 35036 }, { "epoch": 0.45, "grad_norm": 3.2747766971588135, "learning_rate": 1.9635036906666905e-05, "loss": 1.5855, "step": 35037 }, { "epoch": 0.45, "grad_norm": 3.6231775283813477, "learning_rate": 1.9635008776879405e-05, "loss": 1.657, "step": 35038 }, { "epoch": 0.45, "grad_norm": 3.324960470199585, "learning_rate": 1.9634980646028037e-05, "loss": 1.8978, "step": 35039 }, { "epoch": 0.45, "grad_norm": 3.572955846786499, "learning_rate": 1.9634952514112803e-05, "loss": 1.8668, "step": 35040 }, { "epoch": 0.45, "grad_norm": 3.8550350666046143, "learning_rate": 1.963492438113371e-05, "loss": 2.0949, "step": 35041 }, { "epoch": 0.45, "grad_norm": 4.253576755523682, "learning_rate": 1.9634896247090756e-05, "loss": 2.2508, "step": 35042 }, { "epoch": 0.45, "grad_norm": 3.7620065212249756, "learning_rate": 1.9634868111983946e-05, "loss": 2.2965, "step": 35043 }, { "epoch": 0.45, "grad_norm": 3.742095470428467, "learning_rate": 1.9634839975813282e-05, "loss": 1.8119, "step": 35044 }, { "epoch": 0.45, "grad_norm": 4.032222747802734, "learning_rate": 1.963481183857877e-05, "loss": 2.1124, "step": 35045 }, { "epoch": 0.45, "grad_norm": 3.432307243347168, "learning_rate": 1.963478370028041e-05, "loss": 1.785, "step": 35046 }, { "epoch": 0.45, "grad_norm": 3.5849664211273193, "learning_rate": 1.9634755560918205e-05, "loss": 1.9595, "step": 35047 }, { "epoch": 0.45, "grad_norm": 3.240504026412964, "learning_rate": 1.9634727420492164e-05, "loss": 1.6391, "step": 35048 }, { "epoch": 0.45, "grad_norm": 3.705315113067627, "learning_rate": 1.9634699279002283e-05, "loss": 2.0768, "step": 35049 }, { "epoch": 0.45, "grad_norm": 3.6953110694885254, "learning_rate": 1.9634671136448565e-05, "loss": 2.0057, "step": 35050 }, { "epoch": 0.45, "grad_norm": 4.38930082321167, "learning_rate": 1.963464299283102e-05, "loss": 2.5306, "step": 35051 }, { "epoch": 0.45, "grad_norm": 3.978933334350586, "learning_rate": 1.9634614848149643e-05, "loss": 1.9527, "step": 35052 }, { "epoch": 0.45, "grad_norm": 4.150272369384766, "learning_rate": 1.9634586702404445e-05, "loss": 2.5632, "step": 35053 }, { "epoch": 0.45, "grad_norm": 3.3416035175323486, "learning_rate": 1.9634558555595424e-05, "loss": 1.5682, "step": 35054 }, { "epoch": 0.45, "grad_norm": 4.194539546966553, "learning_rate": 1.9634530407722584e-05, "loss": 2.5147, "step": 35055 }, { "epoch": 0.45, "grad_norm": 3.8744516372680664, "learning_rate": 1.9634502258785928e-05, "loss": 2.1615, "step": 35056 }, { "epoch": 0.45, "grad_norm": 3.8834781646728516, "learning_rate": 1.963447410878546e-05, "loss": 1.8499, "step": 35057 }, { "epoch": 0.45, "grad_norm": 3.5207631587982178, "learning_rate": 1.9634445957721183e-05, "loss": 1.8879, "step": 35058 }, { "epoch": 0.45, "grad_norm": 3.80625319480896, "learning_rate": 1.9634417805593096e-05, "loss": 1.6903, "step": 35059 }, { "epoch": 0.45, "grad_norm": 3.990870237350464, "learning_rate": 1.9634389652401215e-05, "loss": 1.987, "step": 35060 }, { "epoch": 0.46, "grad_norm": 3.857419967651367, "learning_rate": 1.9634361498145524e-05, "loss": 2.22, "step": 35061 }, { "epoch": 0.46, "grad_norm": 3.234323024749756, "learning_rate": 1.963433334282604e-05, "loss": 1.4717, "step": 35062 }, { "epoch": 0.46, "grad_norm": 3.618302345275879, "learning_rate": 1.9634305186442763e-05, "loss": 1.5095, "step": 35063 }, { "epoch": 0.46, "grad_norm": 3.8644726276397705, "learning_rate": 1.9634277028995693e-05, "loss": 1.6657, "step": 35064 }, { "epoch": 0.46, "grad_norm": 3.3114657402038574, "learning_rate": 1.9634248870484837e-05, "loss": 1.552, "step": 35065 }, { "epoch": 0.46, "grad_norm": 3.720235586166382, "learning_rate": 1.96342207109102e-05, "loss": 1.8657, "step": 35066 }, { "epoch": 0.46, "grad_norm": 4.645793437957764, "learning_rate": 1.9634192550271777e-05, "loss": 2.1458, "step": 35067 }, { "epoch": 0.46, "grad_norm": 3.712911367416382, "learning_rate": 1.9634164388569575e-05, "loss": 1.8683, "step": 35068 }, { "epoch": 0.46, "grad_norm": 3.598463296890259, "learning_rate": 1.96341362258036e-05, "loss": 1.9945, "step": 35069 }, { "epoch": 0.46, "grad_norm": 3.3656704425811768, "learning_rate": 1.963410806197385e-05, "loss": 1.4064, "step": 35070 }, { "epoch": 0.46, "grad_norm": 3.6319100856781006, "learning_rate": 1.9634079897080334e-05, "loss": 1.7504, "step": 35071 }, { "epoch": 0.46, "grad_norm": 4.089890956878662, "learning_rate": 1.9634051731123053e-05, "loss": 2.2726, "step": 35072 }, { "epoch": 0.46, "grad_norm": 4.876369953155518, "learning_rate": 1.9634023564102006e-05, "loss": 2.7661, "step": 35073 }, { "epoch": 0.46, "grad_norm": 3.947470188140869, "learning_rate": 1.9633995396017202e-05, "loss": 2.2676, "step": 35074 }, { "epoch": 0.46, "grad_norm": 4.593379974365234, "learning_rate": 1.963396722686864e-05, "loss": 2.5622, "step": 35075 }, { "epoch": 0.46, "grad_norm": 3.299340009689331, "learning_rate": 1.9633939056656325e-05, "loss": 1.6313, "step": 35076 }, { "epoch": 0.46, "grad_norm": 3.492936849594116, "learning_rate": 1.963391088538026e-05, "loss": 1.8363, "step": 35077 }, { "epoch": 0.46, "grad_norm": 3.6330859661102295, "learning_rate": 1.963388271304045e-05, "loss": 1.7895, "step": 35078 }, { "epoch": 0.46, "grad_norm": 3.650675058364868, "learning_rate": 1.963385453963689e-05, "loss": 2.0909, "step": 35079 }, { "epoch": 0.46, "grad_norm": 2.957862615585327, "learning_rate": 1.9633826365169594e-05, "loss": 1.3324, "step": 35080 }, { "epoch": 0.46, "grad_norm": 4.3821210861206055, "learning_rate": 1.9633798189638557e-05, "loss": 2.3316, "step": 35081 }, { "epoch": 0.46, "grad_norm": 3.4381184577941895, "learning_rate": 1.963377001304379e-05, "loss": 1.7106, "step": 35082 }, { "epoch": 0.46, "grad_norm": 3.794097900390625, "learning_rate": 1.9633741835385286e-05, "loss": 2.1149, "step": 35083 }, { "epoch": 0.46, "grad_norm": 4.377981185913086, "learning_rate": 1.9633713656663055e-05, "loss": 2.1856, "step": 35084 }, { "epoch": 0.46, "grad_norm": 3.8032753467559814, "learning_rate": 1.96336854768771e-05, "loss": 2.14, "step": 35085 }, { "epoch": 0.46, "grad_norm": 4.172792911529541, "learning_rate": 1.963365729602742e-05, "loss": 1.7323, "step": 35086 }, { "epoch": 0.46, "grad_norm": 3.9202880859375, "learning_rate": 1.9633629114114024e-05, "loss": 2.1518, "step": 35087 }, { "epoch": 0.46, "grad_norm": 4.1576690673828125, "learning_rate": 1.963360093113691e-05, "loss": 1.7696, "step": 35088 }, { "epoch": 0.46, "grad_norm": 3.6232426166534424, "learning_rate": 1.9633572747096085e-05, "loss": 1.6039, "step": 35089 }, { "epoch": 0.46, "grad_norm": 3.768252372741699, "learning_rate": 1.963354456199155e-05, "loss": 1.8573, "step": 35090 }, { "epoch": 0.46, "grad_norm": 3.1678292751312256, "learning_rate": 1.9633516375823306e-05, "loss": 1.7034, "step": 35091 }, { "epoch": 0.46, "grad_norm": 3.8964571952819824, "learning_rate": 1.963348818859136e-05, "loss": 1.8255, "step": 35092 }, { "epoch": 0.46, "grad_norm": 3.61057186126709, "learning_rate": 1.963346000029571e-05, "loss": 1.9337, "step": 35093 }, { "epoch": 0.46, "grad_norm": 4.777312755584717, "learning_rate": 1.9633431810936366e-05, "loss": 2.0204, "step": 35094 }, { "epoch": 0.46, "grad_norm": 3.9160544872283936, "learning_rate": 1.9633403620513327e-05, "loss": 2.229, "step": 35095 }, { "epoch": 0.46, "grad_norm": 3.622415542602539, "learning_rate": 1.9633375429026598e-05, "loss": 1.8561, "step": 35096 }, { "epoch": 0.46, "grad_norm": 3.7653019428253174, "learning_rate": 1.963334723647618e-05, "loss": 2.2014, "step": 35097 }, { "epoch": 0.46, "grad_norm": 4.17146635055542, "learning_rate": 1.9633319042862075e-05, "loss": 2.1191, "step": 35098 }, { "epoch": 0.46, "grad_norm": 3.090275287628174, "learning_rate": 1.9633290848184292e-05, "loss": 1.4827, "step": 35099 }, { "epoch": 0.46, "grad_norm": 3.342947006225586, "learning_rate": 1.9633262652442825e-05, "loss": 1.5123, "step": 35100 }, { "epoch": 0.46, "grad_norm": 3.0964787006378174, "learning_rate": 1.9633234455637688e-05, "loss": 1.427, "step": 35101 }, { "epoch": 0.46, "grad_norm": 3.798513889312744, "learning_rate": 1.9633206257768877e-05, "loss": 2.0765, "step": 35102 }, { "epoch": 0.46, "grad_norm": 3.8049938678741455, "learning_rate": 1.9633178058836396e-05, "loss": 1.9058, "step": 35103 }, { "epoch": 0.46, "grad_norm": 4.157611846923828, "learning_rate": 1.963314985884025e-05, "loss": 2.5225, "step": 35104 }, { "epoch": 0.46, "grad_norm": 3.875234365463257, "learning_rate": 1.9633121657780438e-05, "loss": 2.2071, "step": 35105 }, { "epoch": 0.46, "grad_norm": 3.749441385269165, "learning_rate": 1.9633093455656967e-05, "loss": 1.6389, "step": 35106 }, { "epoch": 0.46, "grad_norm": 3.7363414764404297, "learning_rate": 1.963306525246984e-05, "loss": 1.8773, "step": 35107 }, { "epoch": 0.46, "grad_norm": 3.884859085083008, "learning_rate": 1.9633037048219056e-05, "loss": 1.8538, "step": 35108 }, { "epoch": 0.46, "grad_norm": 4.3533453941345215, "learning_rate": 1.9633008842904622e-05, "loss": 2.1203, "step": 35109 }, { "epoch": 0.46, "grad_norm": 3.93638277053833, "learning_rate": 1.9632980636526546e-05, "loss": 2.3382, "step": 35110 }, { "epoch": 0.46, "grad_norm": 4.063363552093506, "learning_rate": 1.963295242908482e-05, "loss": 2.3881, "step": 35111 }, { "epoch": 0.46, "grad_norm": 3.740978240966797, "learning_rate": 1.9632924220579453e-05, "loss": 2.122, "step": 35112 }, { "epoch": 0.46, "grad_norm": 3.560446262359619, "learning_rate": 1.963289601101045e-05, "loss": 1.708, "step": 35113 }, { "epoch": 0.46, "grad_norm": 3.3524320125579834, "learning_rate": 1.963286780037781e-05, "loss": 1.9087, "step": 35114 }, { "epoch": 0.46, "grad_norm": 3.6494088172912598, "learning_rate": 1.963283958868154e-05, "loss": 1.6641, "step": 35115 }, { "epoch": 0.46, "grad_norm": 3.736393690109253, "learning_rate": 1.9632811375921638e-05, "loss": 1.9786, "step": 35116 }, { "epoch": 0.46, "grad_norm": 3.843777894973755, "learning_rate": 1.963278316209811e-05, "loss": 1.6237, "step": 35117 }, { "epoch": 0.46, "grad_norm": 3.9857687950134277, "learning_rate": 1.9632754947210965e-05, "loss": 1.8068, "step": 35118 }, { "epoch": 0.46, "grad_norm": 3.854579210281372, "learning_rate": 1.9632726731260196e-05, "loss": 1.7711, "step": 35119 }, { "epoch": 0.46, "grad_norm": 3.8015713691711426, "learning_rate": 1.9632698514245812e-05, "loss": 1.9156, "step": 35120 }, { "epoch": 0.46, "grad_norm": 3.7387142181396484, "learning_rate": 1.9632670296167815e-05, "loss": 1.9821, "step": 35121 }, { "epoch": 0.46, "grad_norm": 3.8992185592651367, "learning_rate": 1.963264207702621e-05, "loss": 2.4281, "step": 35122 }, { "epoch": 0.46, "grad_norm": 4.035781383514404, "learning_rate": 1.963261385682099e-05, "loss": 2.4729, "step": 35123 }, { "epoch": 0.46, "grad_norm": 4.3139967918396, "learning_rate": 1.963258563555217e-05, "loss": 2.3529, "step": 35124 }, { "epoch": 0.46, "grad_norm": 3.436450242996216, "learning_rate": 1.9632557413219753e-05, "loss": 1.7455, "step": 35125 }, { "epoch": 0.46, "grad_norm": 3.8350508213043213, "learning_rate": 1.9632529189823734e-05, "loss": 1.8688, "step": 35126 }, { "epoch": 0.46, "grad_norm": 3.734891653060913, "learning_rate": 1.963250096536412e-05, "loss": 1.7452, "step": 35127 }, { "epoch": 0.46, "grad_norm": 3.667667865753174, "learning_rate": 1.9632472739840916e-05, "loss": 1.5183, "step": 35128 }, { "epoch": 0.46, "grad_norm": 4.060048580169678, "learning_rate": 1.9632444513254125e-05, "loss": 1.9308, "step": 35129 }, { "epoch": 0.46, "grad_norm": 5.000595569610596, "learning_rate": 1.9632416285603747e-05, "loss": 1.7216, "step": 35130 }, { "epoch": 0.46, "grad_norm": 3.5388548374176025, "learning_rate": 1.9632388056889788e-05, "loss": 2.0101, "step": 35131 }, { "epoch": 0.46, "grad_norm": 4.028585910797119, "learning_rate": 1.963235982711225e-05, "loss": 2.0208, "step": 35132 }, { "epoch": 0.46, "grad_norm": 4.0080180168151855, "learning_rate": 1.9632331596271133e-05, "loss": 2.3093, "step": 35133 }, { "epoch": 0.46, "grad_norm": 4.483979225158691, "learning_rate": 1.9632303364366445e-05, "loss": 2.1771, "step": 35134 }, { "epoch": 0.46, "grad_norm": 3.496330976486206, "learning_rate": 1.9632275131398185e-05, "loss": 1.8364, "step": 35135 }, { "epoch": 0.46, "grad_norm": 3.6803455352783203, "learning_rate": 1.9632246897366363e-05, "loss": 1.871, "step": 35136 }, { "epoch": 0.46, "grad_norm": 4.0737199783325195, "learning_rate": 1.9632218662270976e-05, "loss": 2.1636, "step": 35137 }, { "epoch": 0.46, "grad_norm": 4.488694667816162, "learning_rate": 1.9632190426112027e-05, "loss": 2.1447, "step": 35138 }, { "epoch": 0.46, "grad_norm": 3.875875949859619, "learning_rate": 1.9632162188889524e-05, "loss": 2.1758, "step": 35139 }, { "epoch": 0.46, "grad_norm": 3.5823495388031006, "learning_rate": 1.9632133950603465e-05, "loss": 1.5907, "step": 35140 }, { "epoch": 0.46, "grad_norm": 3.604175090789795, "learning_rate": 1.9632105711253855e-05, "loss": 1.9384, "step": 35141 }, { "epoch": 0.46, "grad_norm": 3.6146342754364014, "learning_rate": 1.9632077470840697e-05, "loss": 2.0186, "step": 35142 }, { "epoch": 0.46, "grad_norm": 3.645256996154785, "learning_rate": 1.963204922936399e-05, "loss": 2.0858, "step": 35143 }, { "epoch": 0.46, "grad_norm": 3.733490467071533, "learning_rate": 1.9632020986823747e-05, "loss": 1.7482, "step": 35144 }, { "epoch": 0.46, "grad_norm": 3.9562666416168213, "learning_rate": 1.9631992743219965e-05, "loss": 1.9586, "step": 35145 }, { "epoch": 0.46, "grad_norm": 4.845175266265869, "learning_rate": 1.963196449855265e-05, "loss": 2.4431, "step": 35146 }, { "epoch": 0.46, "grad_norm": 4.142621040344238, "learning_rate": 1.9631936252821794e-05, "loss": 2.065, "step": 35147 }, { "epoch": 0.46, "grad_norm": 3.4956488609313965, "learning_rate": 1.9631908006027415e-05, "loss": 1.8859, "step": 35148 }, { "epoch": 0.46, "grad_norm": 3.304542303085327, "learning_rate": 1.963187975816951e-05, "loss": 1.6873, "step": 35149 }, { "epoch": 0.46, "grad_norm": 4.207472801208496, "learning_rate": 1.963185150924808e-05, "loss": 2.0693, "step": 35150 }, { "epoch": 0.46, "grad_norm": 3.876680850982666, "learning_rate": 1.963182325926313e-05, "loss": 1.7928, "step": 35151 }, { "epoch": 0.46, "grad_norm": 4.058037757873535, "learning_rate": 1.9631795008214668e-05, "loss": 2.3114, "step": 35152 }, { "epoch": 0.46, "grad_norm": 4.243452548980713, "learning_rate": 1.9631766756102687e-05, "loss": 2.2065, "step": 35153 }, { "epoch": 0.46, "grad_norm": 3.746846914291382, "learning_rate": 1.96317385029272e-05, "loss": 1.9179, "step": 35154 }, { "epoch": 0.46, "grad_norm": 3.6842916011810303, "learning_rate": 1.96317102486882e-05, "loss": 1.9253, "step": 35155 }, { "epoch": 0.46, "grad_norm": 4.05832052230835, "learning_rate": 1.96316819933857e-05, "loss": 2.1985, "step": 35156 }, { "epoch": 0.46, "grad_norm": 3.4575746059417725, "learning_rate": 1.96316537370197e-05, "loss": 1.9174, "step": 35157 }, { "epoch": 0.46, "grad_norm": 3.7777647972106934, "learning_rate": 1.9631625479590196e-05, "loss": 1.8677, "step": 35158 }, { "epoch": 0.46, "grad_norm": 3.763282060623169, "learning_rate": 1.9631597221097204e-05, "loss": 1.9298, "step": 35159 }, { "epoch": 0.46, "grad_norm": 4.124159812927246, "learning_rate": 1.963156896154072e-05, "loss": 1.7119, "step": 35160 }, { "epoch": 0.46, "grad_norm": 4.424278259277344, "learning_rate": 1.9631540700920742e-05, "loss": 2.6436, "step": 35161 }, { "epoch": 0.46, "grad_norm": 3.9232990741729736, "learning_rate": 1.9631512439237283e-05, "loss": 2.2158, "step": 35162 }, { "epoch": 0.46, "grad_norm": 3.832484245300293, "learning_rate": 1.9631484176490336e-05, "loss": 2.0644, "step": 35163 }, { "epoch": 0.46, "grad_norm": 5.5833306312561035, "learning_rate": 1.9631455912679915e-05, "loss": 2.5328, "step": 35164 }, { "epoch": 0.46, "grad_norm": 3.646444082260132, "learning_rate": 1.9631427647806018e-05, "loss": 1.972, "step": 35165 }, { "epoch": 0.46, "grad_norm": 3.7083899974823, "learning_rate": 1.9631399381868647e-05, "loss": 2.077, "step": 35166 }, { "epoch": 0.46, "grad_norm": 3.864107370376587, "learning_rate": 1.9631371114867802e-05, "loss": 2.1301, "step": 35167 }, { "epoch": 0.46, "grad_norm": 3.782865524291992, "learning_rate": 1.9631342846803497e-05, "loss": 2.3668, "step": 35168 }, { "epoch": 0.46, "grad_norm": 4.0391411781311035, "learning_rate": 1.9631314577675722e-05, "loss": 2.1221, "step": 35169 }, { "epoch": 0.46, "grad_norm": 4.028814315795898, "learning_rate": 1.963128630748449e-05, "loss": 2.3645, "step": 35170 }, { "epoch": 0.46, "grad_norm": 3.9996554851531982, "learning_rate": 1.96312580362298e-05, "loss": 2.3263, "step": 35171 }, { "epoch": 0.46, "grad_norm": 3.220745801925659, "learning_rate": 1.9631229763911658e-05, "loss": 1.4833, "step": 35172 }, { "epoch": 0.46, "grad_norm": 3.781308174133301, "learning_rate": 1.963120149053006e-05, "loss": 1.898, "step": 35173 }, { "epoch": 0.46, "grad_norm": 3.8440871238708496, "learning_rate": 1.963117321608502e-05, "loss": 2.0996, "step": 35174 }, { "epoch": 0.46, "grad_norm": 3.632225513458252, "learning_rate": 1.963114494057653e-05, "loss": 2.0157, "step": 35175 }, { "epoch": 0.46, "grad_norm": 3.478928327560425, "learning_rate": 1.96311166640046e-05, "loss": 2.1161, "step": 35176 }, { "epoch": 0.46, "grad_norm": 3.8753397464752197, "learning_rate": 1.9631088386369233e-05, "loss": 2.3525, "step": 35177 }, { "epoch": 0.46, "grad_norm": 3.6947021484375, "learning_rate": 1.9631060107670427e-05, "loss": 2.5707, "step": 35178 }, { "epoch": 0.46, "grad_norm": 2.9385218620300293, "learning_rate": 1.963103182790819e-05, "loss": 1.3447, "step": 35179 }, { "epoch": 0.46, "grad_norm": 3.2829301357269287, "learning_rate": 1.9631003547082525e-05, "loss": 1.8158, "step": 35180 }, { "epoch": 0.46, "grad_norm": 4.328773498535156, "learning_rate": 1.9630975265193433e-05, "loss": 2.4765, "step": 35181 }, { "epoch": 0.46, "grad_norm": 3.7664849758148193, "learning_rate": 1.9630946982240918e-05, "loss": 2.0385, "step": 35182 }, { "epoch": 0.46, "grad_norm": 3.5452640056610107, "learning_rate": 1.9630918698224984e-05, "loss": 1.9153, "step": 35183 }, { "epoch": 0.46, "grad_norm": 3.884006977081299, "learning_rate": 1.963089041314563e-05, "loss": 2.0457, "step": 35184 }, { "epoch": 0.46, "grad_norm": 3.518310546875, "learning_rate": 1.9630862127002865e-05, "loss": 1.8906, "step": 35185 }, { "epoch": 0.46, "grad_norm": 4.765768527984619, "learning_rate": 1.963083383979669e-05, "loss": 2.3175, "step": 35186 }, { "epoch": 0.46, "grad_norm": 4.3758931159973145, "learning_rate": 1.9630805551527105e-05, "loss": 2.1649, "step": 35187 }, { "epoch": 0.46, "grad_norm": 3.4122884273529053, "learning_rate": 1.9630777262194115e-05, "loss": 1.6869, "step": 35188 }, { "epoch": 0.46, "grad_norm": 4.140303611755371, "learning_rate": 1.963074897179773e-05, "loss": 2.2181, "step": 35189 }, { "epoch": 0.46, "grad_norm": 4.051494598388672, "learning_rate": 1.9630720680337944e-05, "loss": 1.9209, "step": 35190 }, { "epoch": 0.46, "grad_norm": 4.031971454620361, "learning_rate": 1.963069238781476e-05, "loss": 2.1381, "step": 35191 }, { "epoch": 0.46, "grad_norm": 4.189336776733398, "learning_rate": 1.9630664094228183e-05, "loss": 2.1402, "step": 35192 }, { "epoch": 0.46, "grad_norm": 3.4943554401397705, "learning_rate": 1.963063579957822e-05, "loss": 1.5642, "step": 35193 }, { "epoch": 0.46, "grad_norm": 4.238768100738525, "learning_rate": 1.9630607503864874e-05, "loss": 1.9666, "step": 35194 }, { "epoch": 0.46, "grad_norm": 3.844552993774414, "learning_rate": 1.963057920708814e-05, "loss": 1.956, "step": 35195 }, { "epoch": 0.46, "grad_norm": 3.7855260372161865, "learning_rate": 1.963055090924803e-05, "loss": 1.6746, "step": 35196 }, { "epoch": 0.46, "grad_norm": 3.770352840423584, "learning_rate": 1.9630522610344547e-05, "loss": 1.6845, "step": 35197 }, { "epoch": 0.46, "grad_norm": 3.781026601791382, "learning_rate": 1.9630494310377685e-05, "loss": 1.9441, "step": 35198 }, { "epoch": 0.46, "grad_norm": 3.6264729499816895, "learning_rate": 1.9630466009347457e-05, "loss": 1.5757, "step": 35199 }, { "epoch": 0.46, "grad_norm": 4.288456916809082, "learning_rate": 1.963043770725386e-05, "loss": 2.5973, "step": 35200 }, { "epoch": 0.46, "grad_norm": 3.557934284210205, "learning_rate": 1.96304094040969e-05, "loss": 1.7525, "step": 35201 }, { "epoch": 0.46, "grad_norm": 3.9671061038970947, "learning_rate": 1.963038109987658e-05, "loss": 2.0672, "step": 35202 }, { "epoch": 0.46, "grad_norm": 3.516545534133911, "learning_rate": 1.9630352794592903e-05, "loss": 1.7388, "step": 35203 }, { "epoch": 0.46, "grad_norm": 4.403687000274658, "learning_rate": 1.963032448824587e-05, "loss": 2.4407, "step": 35204 }, { "epoch": 0.46, "grad_norm": 3.9892146587371826, "learning_rate": 1.9630296180835486e-05, "loss": 2.0041, "step": 35205 }, { "epoch": 0.46, "grad_norm": 3.701937198638916, "learning_rate": 1.9630267872361757e-05, "loss": 1.7435, "step": 35206 }, { "epoch": 0.46, "grad_norm": 3.22124981880188, "learning_rate": 1.963023956282468e-05, "loss": 1.4811, "step": 35207 }, { "epoch": 0.46, "grad_norm": 4.243975639343262, "learning_rate": 1.9630211252224264e-05, "loss": 2.6921, "step": 35208 }, { "epoch": 0.46, "grad_norm": 3.606743097305298, "learning_rate": 1.9630182940560504e-05, "loss": 1.9124, "step": 35209 }, { "epoch": 0.46, "grad_norm": 3.9770119190216064, "learning_rate": 1.9630154627833412e-05, "loss": 1.4918, "step": 35210 }, { "epoch": 0.46, "grad_norm": 3.6071770191192627, "learning_rate": 1.963012631404299e-05, "loss": 1.5857, "step": 35211 }, { "epoch": 0.46, "grad_norm": 3.940688133239746, "learning_rate": 1.9630097999189232e-05, "loss": 2.1162, "step": 35212 }, { "epoch": 0.46, "grad_norm": 4.117578029632568, "learning_rate": 1.963006968327215e-05, "loss": 2.3077, "step": 35213 }, { "epoch": 0.46, "grad_norm": 3.953112840652466, "learning_rate": 1.9630041366291747e-05, "loss": 1.9733, "step": 35214 }, { "epoch": 0.46, "grad_norm": 3.447021245956421, "learning_rate": 1.9630013048248026e-05, "loss": 1.9157, "step": 35215 }, { "epoch": 0.46, "grad_norm": 3.4394211769104004, "learning_rate": 1.9629984729140984e-05, "loss": 1.8777, "step": 35216 }, { "epoch": 0.46, "grad_norm": 3.8824758529663086, "learning_rate": 1.962995640897063e-05, "loss": 2.3229, "step": 35217 }, { "epoch": 0.46, "grad_norm": 3.587433099746704, "learning_rate": 1.9629928087736963e-05, "loss": 1.8323, "step": 35218 }, { "epoch": 0.46, "grad_norm": 4.239350318908691, "learning_rate": 1.9629899765439995e-05, "loss": 2.5543, "step": 35219 }, { "epoch": 0.46, "grad_norm": 3.2045047283172607, "learning_rate": 1.9629871442079716e-05, "loss": 1.6238, "step": 35220 }, { "epoch": 0.46, "grad_norm": 4.288681507110596, "learning_rate": 1.962984311765614e-05, "loss": 1.8505, "step": 35221 }, { "epoch": 0.46, "grad_norm": 3.693636178970337, "learning_rate": 1.9629814792169263e-05, "loss": 1.6956, "step": 35222 }, { "epoch": 0.46, "grad_norm": 3.716824769973755, "learning_rate": 1.962978646561909e-05, "loss": 1.8639, "step": 35223 }, { "epoch": 0.46, "grad_norm": 4.4162516593933105, "learning_rate": 1.9629758138005633e-05, "loss": 2.6696, "step": 35224 }, { "epoch": 0.46, "grad_norm": 3.9775094985961914, "learning_rate": 1.962972980932888e-05, "loss": 1.7934, "step": 35225 }, { "epoch": 0.46, "grad_norm": 4.166105270385742, "learning_rate": 1.9629701479588844e-05, "loss": 2.0446, "step": 35226 }, { "epoch": 0.46, "grad_norm": 4.644060134887695, "learning_rate": 1.9629673148785524e-05, "loss": 1.8956, "step": 35227 }, { "epoch": 0.46, "grad_norm": 3.83990216255188, "learning_rate": 1.9629644816918927e-05, "loss": 1.9967, "step": 35228 }, { "epoch": 0.46, "grad_norm": 2.9807252883911133, "learning_rate": 1.9629616483989052e-05, "loss": 1.4445, "step": 35229 }, { "epoch": 0.46, "grad_norm": 3.5219357013702393, "learning_rate": 1.9629588149995904e-05, "loss": 1.5001, "step": 35230 }, { "epoch": 0.46, "grad_norm": 3.530789852142334, "learning_rate": 1.9629559814939486e-05, "loss": 1.598, "step": 35231 }, { "epoch": 0.46, "grad_norm": 3.496723175048828, "learning_rate": 1.9629531478819804e-05, "loss": 1.9479, "step": 35232 }, { "epoch": 0.46, "grad_norm": 3.597315549850464, "learning_rate": 1.9629503141636855e-05, "loss": 1.7825, "step": 35233 }, { "epoch": 0.46, "grad_norm": 3.3714423179626465, "learning_rate": 1.9629474803390647e-05, "loss": 1.6892, "step": 35234 }, { "epoch": 0.46, "grad_norm": 3.6793901920318604, "learning_rate": 1.962944646408118e-05, "loss": 1.8995, "step": 35235 }, { "epoch": 0.46, "grad_norm": 3.737548351287842, "learning_rate": 1.9629418123708462e-05, "loss": 1.8067, "step": 35236 }, { "epoch": 0.46, "grad_norm": 3.4606215953826904, "learning_rate": 1.962938978227249e-05, "loss": 2.101, "step": 35237 }, { "epoch": 0.46, "grad_norm": 4.127440452575684, "learning_rate": 1.962936143977327e-05, "loss": 2.2681, "step": 35238 }, { "epoch": 0.46, "grad_norm": 4.739435195922852, "learning_rate": 1.962933309621081e-05, "loss": 2.1019, "step": 35239 }, { "epoch": 0.46, "grad_norm": 3.4388678073883057, "learning_rate": 1.96293047515851e-05, "loss": 1.7245, "step": 35240 }, { "epoch": 0.46, "grad_norm": 3.9350955486297607, "learning_rate": 1.962927640589616e-05, "loss": 2.1318, "step": 35241 }, { "epoch": 0.46, "grad_norm": 3.546908140182495, "learning_rate": 1.9629248059143977e-05, "loss": 1.8456, "step": 35242 }, { "epoch": 0.46, "grad_norm": 3.8603436946868896, "learning_rate": 1.9629219711328566e-05, "loss": 1.9252, "step": 35243 }, { "epoch": 0.46, "grad_norm": 3.3055880069732666, "learning_rate": 1.9629191362449925e-05, "loss": 1.799, "step": 35244 }, { "epoch": 0.46, "grad_norm": 3.828115701675415, "learning_rate": 1.9629163012508058e-05, "loss": 2.1618, "step": 35245 }, { "epoch": 0.46, "grad_norm": 3.3832013607025146, "learning_rate": 1.9629134661502965e-05, "loss": 1.6095, "step": 35246 }, { "epoch": 0.46, "grad_norm": 3.8508121967315674, "learning_rate": 1.9629106309434656e-05, "loss": 2.0255, "step": 35247 }, { "epoch": 0.46, "grad_norm": 3.5542800426483154, "learning_rate": 1.962907795630313e-05, "loss": 1.7007, "step": 35248 }, { "epoch": 0.46, "grad_norm": 3.819133758544922, "learning_rate": 1.962904960210839e-05, "loss": 2.0373, "step": 35249 }, { "epoch": 0.46, "grad_norm": 3.0876898765563965, "learning_rate": 1.9629021246850438e-05, "loss": 1.6056, "step": 35250 }, { "epoch": 0.46, "grad_norm": 3.4186253547668457, "learning_rate": 1.962899289052928e-05, "loss": 1.5463, "step": 35251 }, { "epoch": 0.46, "grad_norm": 3.7037854194641113, "learning_rate": 1.9628964533144915e-05, "loss": 1.9528, "step": 35252 }, { "epoch": 0.46, "grad_norm": 3.556062936782837, "learning_rate": 1.9628936174697353e-05, "loss": 1.9186, "step": 35253 }, { "epoch": 0.46, "grad_norm": 3.946000099182129, "learning_rate": 1.9628907815186592e-05, "loss": 2.1613, "step": 35254 }, { "epoch": 0.46, "grad_norm": 3.8097779750823975, "learning_rate": 1.9628879454612636e-05, "loss": 1.769, "step": 35255 }, { "epoch": 0.46, "grad_norm": 3.9719491004943848, "learning_rate": 1.962885109297549e-05, "loss": 1.6339, "step": 35256 }, { "epoch": 0.46, "grad_norm": 3.271116256713867, "learning_rate": 1.9628822730275155e-05, "loss": 1.4933, "step": 35257 }, { "epoch": 0.46, "grad_norm": 3.5687272548675537, "learning_rate": 1.962879436651163e-05, "loss": 2.1319, "step": 35258 }, { "epoch": 0.46, "grad_norm": 3.858177423477173, "learning_rate": 1.962876600168493e-05, "loss": 1.6166, "step": 35259 }, { "epoch": 0.46, "grad_norm": 3.5592572689056396, "learning_rate": 1.9628737635795045e-05, "loss": 2.1633, "step": 35260 }, { "epoch": 0.46, "grad_norm": 4.195516586303711, "learning_rate": 1.9628709268841985e-05, "loss": 2.3764, "step": 35261 }, { "epoch": 0.46, "grad_norm": 3.895963430404663, "learning_rate": 1.9628680900825752e-05, "loss": 1.9964, "step": 35262 }, { "epoch": 0.46, "grad_norm": 4.399892330169678, "learning_rate": 1.9628652531746352e-05, "loss": 2.2425, "step": 35263 }, { "epoch": 0.46, "grad_norm": 3.8049139976501465, "learning_rate": 1.9628624161603786e-05, "loss": 1.8754, "step": 35264 }, { "epoch": 0.46, "grad_norm": 3.72831654548645, "learning_rate": 1.9628595790398054e-05, "loss": 1.7386, "step": 35265 }, { "epoch": 0.46, "grad_norm": 4.186367988586426, "learning_rate": 1.962856741812916e-05, "loss": 2.154, "step": 35266 }, { "epoch": 0.46, "grad_norm": 4.429605960845947, "learning_rate": 1.9628539044797113e-05, "loss": 2.0583, "step": 35267 }, { "epoch": 0.46, "grad_norm": 3.770671844482422, "learning_rate": 1.9628510670401906e-05, "loss": 2.1077, "step": 35268 }, { "epoch": 0.46, "grad_norm": 3.823828935623169, "learning_rate": 1.9628482294943555e-05, "loss": 2.1242, "step": 35269 }, { "epoch": 0.46, "grad_norm": 3.8388986587524414, "learning_rate": 1.962845391842205e-05, "loss": 1.7739, "step": 35270 }, { "epoch": 0.46, "grad_norm": 3.5396153926849365, "learning_rate": 1.9628425540837406e-05, "loss": 1.7631, "step": 35271 }, { "epoch": 0.46, "grad_norm": 3.9946887493133545, "learning_rate": 1.962839716218962e-05, "loss": 1.9366, "step": 35272 }, { "epoch": 0.46, "grad_norm": 4.292941570281982, "learning_rate": 1.9628368782478692e-05, "loss": 1.8993, "step": 35273 }, { "epoch": 0.46, "grad_norm": 3.1265411376953125, "learning_rate": 1.962834040170463e-05, "loss": 1.6566, "step": 35274 }, { "epoch": 0.46, "grad_norm": 3.7898638248443604, "learning_rate": 1.9628312019867435e-05, "loss": 2.0879, "step": 35275 }, { "epoch": 0.46, "grad_norm": 3.77779221534729, "learning_rate": 1.9628283636967113e-05, "loss": 2.1901, "step": 35276 }, { "epoch": 0.46, "grad_norm": 3.767056703567505, "learning_rate": 1.9628255253003663e-05, "loss": 1.6486, "step": 35277 }, { "epoch": 0.46, "grad_norm": 3.8075170516967773, "learning_rate": 1.962822686797709e-05, "loss": 2.1013, "step": 35278 }, { "epoch": 0.46, "grad_norm": 3.795347213745117, "learning_rate": 1.96281984818874e-05, "loss": 2.3297, "step": 35279 }, { "epoch": 0.46, "grad_norm": 3.7636332511901855, "learning_rate": 1.962817009473459e-05, "loss": 2.4735, "step": 35280 }, { "epoch": 0.46, "grad_norm": 3.773068428039551, "learning_rate": 1.9628141706518673e-05, "loss": 1.9152, "step": 35281 }, { "epoch": 0.46, "grad_norm": 3.8261098861694336, "learning_rate": 1.962811331723964e-05, "loss": 2.0713, "step": 35282 }, { "epoch": 0.46, "grad_norm": 3.7838351726531982, "learning_rate": 1.9628084926897504e-05, "loss": 1.9035, "step": 35283 }, { "epoch": 0.46, "grad_norm": 3.810631036758423, "learning_rate": 1.9628056535492263e-05, "loss": 2.0931, "step": 35284 }, { "epoch": 0.46, "grad_norm": 3.6315698623657227, "learning_rate": 1.9628028143023918e-05, "loss": 2.1768, "step": 35285 }, { "epoch": 0.46, "grad_norm": 3.4613559246063232, "learning_rate": 1.9627999749492478e-05, "loss": 2.0432, "step": 35286 }, { "epoch": 0.46, "grad_norm": 3.476170778274536, "learning_rate": 1.9627971354897945e-05, "loss": 1.704, "step": 35287 }, { "epoch": 0.46, "grad_norm": 3.9539897441864014, "learning_rate": 1.9627942959240317e-05, "loss": 2.3163, "step": 35288 }, { "epoch": 0.46, "grad_norm": 4.075846195220947, "learning_rate": 1.9627914562519602e-05, "loss": 2.3724, "step": 35289 }, { "epoch": 0.46, "grad_norm": 3.1693389415740967, "learning_rate": 1.96278861647358e-05, "loss": 1.7153, "step": 35290 }, { "epoch": 0.46, "grad_norm": 3.6484274864196777, "learning_rate": 1.962785776588892e-05, "loss": 2.297, "step": 35291 }, { "epoch": 0.46, "grad_norm": 3.2605533599853516, "learning_rate": 1.962782936597896e-05, "loss": 1.8144, "step": 35292 }, { "epoch": 0.46, "grad_norm": 3.8991620540618896, "learning_rate": 1.9627800965005926e-05, "loss": 2.0411, "step": 35293 }, { "epoch": 0.46, "grad_norm": 3.7003185749053955, "learning_rate": 1.9627772562969815e-05, "loss": 1.8975, "step": 35294 }, { "epoch": 0.46, "grad_norm": 4.127624034881592, "learning_rate": 1.9627744159870637e-05, "loss": 2.3105, "step": 35295 }, { "epoch": 0.46, "grad_norm": 3.5444157123565674, "learning_rate": 1.9627715755708392e-05, "loss": 1.8893, "step": 35296 }, { "epoch": 0.46, "grad_norm": 3.5765841007232666, "learning_rate": 1.9627687350483083e-05, "loss": 1.8288, "step": 35297 }, { "epoch": 0.46, "grad_norm": 3.6473708152770996, "learning_rate": 1.9627658944194718e-05, "loss": 1.8914, "step": 35298 }, { "epoch": 0.46, "grad_norm": 3.8537333011627197, "learning_rate": 1.9627630536843296e-05, "loss": 2.0609, "step": 35299 }, { "epoch": 0.46, "grad_norm": 4.087678909301758, "learning_rate": 1.9627602128428816e-05, "loss": 2.366, "step": 35300 }, { "epoch": 0.46, "grad_norm": 4.152638912200928, "learning_rate": 1.9627573718951284e-05, "loss": 2.2007, "step": 35301 }, { "epoch": 0.46, "grad_norm": 5.755977153778076, "learning_rate": 1.9627545308410708e-05, "loss": 1.8721, "step": 35302 }, { "epoch": 0.46, "grad_norm": 3.4047727584838867, "learning_rate": 1.962751689680709e-05, "loss": 1.46, "step": 35303 }, { "epoch": 0.46, "grad_norm": 3.9020819664001465, "learning_rate": 1.9627488484140426e-05, "loss": 1.8837, "step": 35304 }, { "epoch": 0.46, "grad_norm": 3.4770119190216064, "learning_rate": 1.9627460070410727e-05, "loss": 1.7821, "step": 35305 }, { "epoch": 0.46, "grad_norm": 3.6327226161956787, "learning_rate": 1.962743165561799e-05, "loss": 2.0483, "step": 35306 }, { "epoch": 0.46, "grad_norm": 4.237329006195068, "learning_rate": 1.9627403239762226e-05, "loss": 1.9261, "step": 35307 }, { "epoch": 0.46, "grad_norm": 3.2341091632843018, "learning_rate": 1.962737482284343e-05, "loss": 1.578, "step": 35308 }, { "epoch": 0.46, "grad_norm": 3.53739595413208, "learning_rate": 1.962734640486161e-05, "loss": 1.8644, "step": 35309 }, { "epoch": 0.46, "grad_norm": 3.800211191177368, "learning_rate": 1.9627317985816766e-05, "loss": 2.3617, "step": 35310 }, { "epoch": 0.46, "grad_norm": 3.893173933029175, "learning_rate": 1.96272895657089e-05, "loss": 1.8241, "step": 35311 }, { "epoch": 0.46, "grad_norm": 3.912858486175537, "learning_rate": 1.9627261144538025e-05, "loss": 1.9938, "step": 35312 }, { "epoch": 0.46, "grad_norm": 4.070573329925537, "learning_rate": 1.9627232722304132e-05, "loss": 2.2022, "step": 35313 }, { "epoch": 0.46, "grad_norm": 3.6279618740081787, "learning_rate": 1.9627204299007233e-05, "loss": 2.0211, "step": 35314 }, { "epoch": 0.46, "grad_norm": 3.298424243927002, "learning_rate": 1.9627175874647325e-05, "loss": 1.7518, "step": 35315 }, { "epoch": 0.46, "grad_norm": 3.546379327774048, "learning_rate": 1.9627147449224414e-05, "loss": 1.8262, "step": 35316 }, { "epoch": 0.46, "grad_norm": 4.3855977058410645, "learning_rate": 1.96271190227385e-05, "loss": 2.146, "step": 35317 }, { "epoch": 0.46, "grad_norm": 3.623843193054199, "learning_rate": 1.962709059518959e-05, "loss": 1.7435, "step": 35318 }, { "epoch": 0.46, "grad_norm": 4.112883567810059, "learning_rate": 1.9627062166577686e-05, "loss": 1.8147, "step": 35319 }, { "epoch": 0.46, "grad_norm": 3.3916738033294678, "learning_rate": 1.962703373690279e-05, "loss": 1.7778, "step": 35320 }, { "epoch": 0.46, "grad_norm": 3.722581386566162, "learning_rate": 1.962700530616491e-05, "loss": 2.1329, "step": 35321 }, { "epoch": 0.46, "grad_norm": 3.8074984550476074, "learning_rate": 1.9626976874364042e-05, "loss": 1.816, "step": 35322 }, { "epoch": 0.46, "grad_norm": 3.642922878265381, "learning_rate": 1.9626948441500194e-05, "loss": 2.1791, "step": 35323 }, { "epoch": 0.46, "grad_norm": 4.091037750244141, "learning_rate": 1.962692000757337e-05, "loss": 1.8189, "step": 35324 }, { "epoch": 0.46, "grad_norm": 4.003404140472412, "learning_rate": 1.9626891572583565e-05, "loss": 2.2636, "step": 35325 }, { "epoch": 0.46, "grad_norm": 3.3143110275268555, "learning_rate": 1.962686313653079e-05, "loss": 1.4536, "step": 35326 }, { "epoch": 0.46, "grad_norm": 4.09091854095459, "learning_rate": 1.9626834699415047e-05, "loss": 2.1269, "step": 35327 }, { "epoch": 0.46, "grad_norm": 4.724449634552002, "learning_rate": 1.962680626123634e-05, "loss": 2.2323, "step": 35328 }, { "epoch": 0.46, "grad_norm": 3.2691848278045654, "learning_rate": 1.9626777821994667e-05, "loss": 1.8348, "step": 35329 }, { "epoch": 0.46, "grad_norm": 3.977315664291382, "learning_rate": 1.9626749381690034e-05, "loss": 2.1456, "step": 35330 }, { "epoch": 0.46, "grad_norm": 4.028914928436279, "learning_rate": 1.9626720940322447e-05, "loss": 1.8004, "step": 35331 }, { "epoch": 0.46, "grad_norm": 3.4794650077819824, "learning_rate": 1.9626692497891907e-05, "loss": 1.9096, "step": 35332 }, { "epoch": 0.46, "grad_norm": 3.4585366249084473, "learning_rate": 1.9626664054398415e-05, "loss": 1.5518, "step": 35333 }, { "epoch": 0.46, "grad_norm": 3.3846161365509033, "learning_rate": 1.9626635609841975e-05, "loss": 1.7672, "step": 35334 }, { "epoch": 0.46, "grad_norm": 4.004365921020508, "learning_rate": 1.9626607164222594e-05, "loss": 1.5457, "step": 35335 }, { "epoch": 0.46, "grad_norm": 3.6868855953216553, "learning_rate": 1.962657871754027e-05, "loss": 2.1123, "step": 35336 }, { "epoch": 0.46, "grad_norm": 3.354494333267212, "learning_rate": 1.962655026979501e-05, "loss": 1.5082, "step": 35337 }, { "epoch": 0.46, "grad_norm": 4.159939765930176, "learning_rate": 1.9626521820986816e-05, "loss": 2.0231, "step": 35338 }, { "epoch": 0.46, "grad_norm": 3.733482837677002, "learning_rate": 1.962649337111569e-05, "loss": 1.7462, "step": 35339 }, { "epoch": 0.46, "grad_norm": 4.384616851806641, "learning_rate": 1.9626464920181633e-05, "loss": 2.3529, "step": 35340 }, { "epoch": 0.46, "grad_norm": 3.9468889236450195, "learning_rate": 1.9626436468184652e-05, "loss": 2.276, "step": 35341 }, { "epoch": 0.46, "grad_norm": 3.7133188247680664, "learning_rate": 1.962640801512475e-05, "loss": 2.043, "step": 35342 }, { "epoch": 0.46, "grad_norm": 4.257806777954102, "learning_rate": 1.962637956100193e-05, "loss": 2.7185, "step": 35343 }, { "epoch": 0.46, "grad_norm": 4.162090301513672, "learning_rate": 1.9626351105816196e-05, "loss": 2.3549, "step": 35344 }, { "epoch": 0.46, "grad_norm": 3.866903305053711, "learning_rate": 1.9626322649567547e-05, "loss": 2.1733, "step": 35345 }, { "epoch": 0.46, "grad_norm": 3.427628993988037, "learning_rate": 1.962629419225599e-05, "loss": 1.6178, "step": 35346 }, { "epoch": 0.46, "grad_norm": 4.322795391082764, "learning_rate": 1.9626265733881526e-05, "loss": 2.086, "step": 35347 }, { "epoch": 0.46, "grad_norm": 3.4782631397247314, "learning_rate": 1.9626237274444157e-05, "loss": 1.9733, "step": 35348 }, { "epoch": 0.46, "grad_norm": 4.138693332672119, "learning_rate": 1.962620881394389e-05, "loss": 2.1171, "step": 35349 }, { "epoch": 0.46, "grad_norm": 4.082200527191162, "learning_rate": 1.9626180352380726e-05, "loss": 2.2714, "step": 35350 }, { "epoch": 0.46, "grad_norm": 3.6850461959838867, "learning_rate": 1.962615188975467e-05, "loss": 1.8657, "step": 35351 }, { "epoch": 0.46, "grad_norm": 3.584461212158203, "learning_rate": 1.9626123426065722e-05, "loss": 1.9404, "step": 35352 }, { "epoch": 0.46, "grad_norm": 3.6834499835968018, "learning_rate": 1.9626094961313886e-05, "loss": 1.595, "step": 35353 }, { "epoch": 0.46, "grad_norm": 4.26446533203125, "learning_rate": 1.962606649549917e-05, "loss": 1.9765, "step": 35354 }, { "epoch": 0.46, "grad_norm": 4.0361809730529785, "learning_rate": 1.9626038028621567e-05, "loss": 2.2671, "step": 35355 }, { "epoch": 0.46, "grad_norm": 3.7264740467071533, "learning_rate": 1.962600956068109e-05, "loss": 2.3253, "step": 35356 }, { "epoch": 0.46, "grad_norm": 3.644843578338623, "learning_rate": 1.9625981091677736e-05, "loss": 2.0813, "step": 35357 }, { "epoch": 0.46, "grad_norm": 3.469202995300293, "learning_rate": 1.962595262161151e-05, "loss": 1.8441, "step": 35358 }, { "epoch": 0.46, "grad_norm": 3.643998384475708, "learning_rate": 1.9625924150482417e-05, "loss": 2.0832, "step": 35359 }, { "epoch": 0.46, "grad_norm": 4.367903232574463, "learning_rate": 1.962589567829046e-05, "loss": 2.3184, "step": 35360 }, { "epoch": 0.46, "grad_norm": 3.682687759399414, "learning_rate": 1.9625867205035635e-05, "loss": 1.8009, "step": 35361 }, { "epoch": 0.46, "grad_norm": 3.7903406620025635, "learning_rate": 1.9625838730717958e-05, "loss": 1.9152, "step": 35362 }, { "epoch": 0.46, "grad_norm": 3.8459224700927734, "learning_rate": 1.962581025533742e-05, "loss": 1.9687, "step": 35363 }, { "epoch": 0.46, "grad_norm": 4.112179756164551, "learning_rate": 1.962578177889403e-05, "loss": 2.1783, "step": 35364 }, { "epoch": 0.46, "grad_norm": 3.8516318798065186, "learning_rate": 1.9625753301387796e-05, "loss": 2.1127, "step": 35365 }, { "epoch": 0.46, "grad_norm": 3.975804567337036, "learning_rate": 1.962572482281871e-05, "loss": 2.2039, "step": 35366 }, { "epoch": 0.46, "grad_norm": 3.944680690765381, "learning_rate": 1.9625696343186783e-05, "loss": 2.1713, "step": 35367 }, { "epoch": 0.46, "grad_norm": 2.8610000610351562, "learning_rate": 1.9625667862492014e-05, "loss": 1.358, "step": 35368 }, { "epoch": 0.46, "grad_norm": 3.2949256896972656, "learning_rate": 1.962563938073441e-05, "loss": 1.3428, "step": 35369 }, { "epoch": 0.46, "grad_norm": 4.104162216186523, "learning_rate": 1.962561089791397e-05, "loss": 1.8099, "step": 35370 }, { "epoch": 0.46, "grad_norm": 3.3727357387542725, "learning_rate": 1.9625582414030702e-05, "loss": 1.7775, "step": 35371 }, { "epoch": 0.46, "grad_norm": 3.7318272590637207, "learning_rate": 1.96255539290846e-05, "loss": 1.833, "step": 35372 }, { "epoch": 0.46, "grad_norm": 3.669163703918457, "learning_rate": 1.9625525443075683e-05, "loss": 2.105, "step": 35373 }, { "epoch": 0.46, "grad_norm": 3.43587064743042, "learning_rate": 1.962549695600394e-05, "loss": 1.7926, "step": 35374 }, { "epoch": 0.46, "grad_norm": 3.6126492023468018, "learning_rate": 1.9625468467869378e-05, "loss": 1.8928, "step": 35375 }, { "epoch": 0.46, "grad_norm": 4.263759136199951, "learning_rate": 1.9625439978672e-05, "loss": 2.5426, "step": 35376 }, { "epoch": 0.46, "grad_norm": 4.102530002593994, "learning_rate": 1.962541148841181e-05, "loss": 2.028, "step": 35377 }, { "epoch": 0.46, "grad_norm": 3.8064935207366943, "learning_rate": 1.962538299708882e-05, "loss": 1.8991, "step": 35378 }, { "epoch": 0.46, "grad_norm": 3.713993549346924, "learning_rate": 1.9625354504703014e-05, "loss": 2.0815, "step": 35379 }, { "epoch": 0.46, "grad_norm": 3.696281671524048, "learning_rate": 1.962532601125441e-05, "loss": 1.9861, "step": 35380 }, { "epoch": 0.46, "grad_norm": 3.4698710441589355, "learning_rate": 1.9625297516743005e-05, "loss": 1.9002, "step": 35381 }, { "epoch": 0.46, "grad_norm": 3.3432540893554688, "learning_rate": 1.9625269021168805e-05, "loss": 1.8489, "step": 35382 }, { "epoch": 0.46, "grad_norm": 3.5933144092559814, "learning_rate": 1.9625240524531813e-05, "loss": 1.8882, "step": 35383 }, { "epoch": 0.46, "grad_norm": 4.122488021850586, "learning_rate": 1.9625212026832028e-05, "loss": 2.3188, "step": 35384 }, { "epoch": 0.46, "grad_norm": 3.3115224838256836, "learning_rate": 1.9625183528069457e-05, "loss": 1.6074, "step": 35385 }, { "epoch": 0.46, "grad_norm": 3.9869091510772705, "learning_rate": 1.9625155028244104e-05, "loss": 1.9481, "step": 35386 }, { "epoch": 0.46, "grad_norm": 3.428779125213623, "learning_rate": 1.962512652735597e-05, "loss": 1.6795, "step": 35387 }, { "epoch": 0.46, "grad_norm": 3.3007795810699463, "learning_rate": 1.962509802540506e-05, "loss": 1.739, "step": 35388 }, { "epoch": 0.46, "grad_norm": 3.9680540561676025, "learning_rate": 1.9625069522391373e-05, "loss": 1.9458, "step": 35389 }, { "epoch": 0.46, "grad_norm": 3.789605140686035, "learning_rate": 1.9625041018314917e-05, "loss": 2.1463, "step": 35390 }, { "epoch": 0.46, "grad_norm": 4.10469388961792, "learning_rate": 1.9625012513175693e-05, "loss": 1.9231, "step": 35391 }, { "epoch": 0.46, "grad_norm": 4.014516830444336, "learning_rate": 1.9624984006973706e-05, "loss": 2.5776, "step": 35392 }, { "epoch": 0.46, "grad_norm": 3.1986804008483887, "learning_rate": 1.9624955499708954e-05, "loss": 1.5812, "step": 35393 }, { "epoch": 0.46, "grad_norm": 3.7312867641448975, "learning_rate": 1.9624926991381446e-05, "loss": 1.88, "step": 35394 }, { "epoch": 0.46, "grad_norm": 3.821838855743408, "learning_rate": 1.9624898481991183e-05, "loss": 2.0389, "step": 35395 }, { "epoch": 0.46, "grad_norm": 3.7672924995422363, "learning_rate": 1.962486997153817e-05, "loss": 2.1394, "step": 35396 }, { "epoch": 0.46, "grad_norm": 3.3981690406799316, "learning_rate": 1.9624841460022402e-05, "loss": 1.5271, "step": 35397 }, { "epoch": 0.46, "grad_norm": 3.8904407024383545, "learning_rate": 1.962481294744389e-05, "loss": 1.929, "step": 35398 }, { "epoch": 0.46, "grad_norm": 3.3708348274230957, "learning_rate": 1.9624784433802637e-05, "loss": 1.5228, "step": 35399 }, { "epoch": 0.46, "grad_norm": 4.523777008056641, "learning_rate": 1.9624755919098645e-05, "loss": 2.4679, "step": 35400 }, { "epoch": 0.46, "grad_norm": 3.2201342582702637, "learning_rate": 1.9624727403331918e-05, "loss": 1.5605, "step": 35401 }, { "epoch": 0.46, "grad_norm": 4.981506824493408, "learning_rate": 1.9624698886502456e-05, "loss": 2.7751, "step": 35402 }, { "epoch": 0.46, "grad_norm": 4.159149169921875, "learning_rate": 1.962467036861026e-05, "loss": 1.9925, "step": 35403 }, { "epoch": 0.46, "grad_norm": 3.8053152561187744, "learning_rate": 1.9624641849655345e-05, "loss": 2.5542, "step": 35404 }, { "epoch": 0.46, "grad_norm": 4.086404800415039, "learning_rate": 1.9624613329637702e-05, "loss": 1.9029, "step": 35405 }, { "epoch": 0.46, "grad_norm": 3.199646472930908, "learning_rate": 1.9624584808557338e-05, "loss": 1.7874, "step": 35406 }, { "epoch": 0.46, "grad_norm": 4.384512901306152, "learning_rate": 1.9624556286414255e-05, "loss": 2.0291, "step": 35407 }, { "epoch": 0.46, "grad_norm": 3.669771432876587, "learning_rate": 1.962452776320846e-05, "loss": 2.0071, "step": 35408 }, { "epoch": 0.46, "grad_norm": 3.8608596324920654, "learning_rate": 1.9624499238939957e-05, "loss": 2.0536, "step": 35409 }, { "epoch": 0.46, "grad_norm": 4.242034912109375, "learning_rate": 1.9624470713608737e-05, "loss": 2.7698, "step": 35410 }, { "epoch": 0.46, "grad_norm": 3.7992875576019287, "learning_rate": 1.962444218721482e-05, "loss": 2.1835, "step": 35411 }, { "epoch": 0.46, "grad_norm": 4.1571455001831055, "learning_rate": 1.9624413659758198e-05, "loss": 2.12, "step": 35412 }, { "epoch": 0.46, "grad_norm": 3.785046100616455, "learning_rate": 1.9624385131238882e-05, "loss": 2.2299, "step": 35413 }, { "epoch": 0.46, "grad_norm": 3.243983507156372, "learning_rate": 1.9624356601656865e-05, "loss": 1.5079, "step": 35414 }, { "epoch": 0.46, "grad_norm": 4.2429351806640625, "learning_rate": 1.9624328071012157e-05, "loss": 1.881, "step": 35415 }, { "epoch": 0.46, "grad_norm": 2.9458014965057373, "learning_rate": 1.9624299539304765e-05, "loss": 1.5569, "step": 35416 }, { "epoch": 0.46, "grad_norm": 3.688966989517212, "learning_rate": 1.9624271006534678e-05, "loss": 2.059, "step": 35417 }, { "epoch": 0.46, "grad_norm": 3.5917842388153076, "learning_rate": 1.9624242472701914e-05, "loss": 1.9475, "step": 35418 }, { "epoch": 0.46, "grad_norm": 3.48587965965271, "learning_rate": 1.962421393780647e-05, "loss": 1.8329, "step": 35419 }, { "epoch": 0.46, "grad_norm": 3.66955828666687, "learning_rate": 1.9624185401848348e-05, "loss": 1.9132, "step": 35420 }, { "epoch": 0.46, "grad_norm": 3.999974250793457, "learning_rate": 1.962415686482755e-05, "loss": 2.14, "step": 35421 }, { "epoch": 0.46, "grad_norm": 3.841073513031006, "learning_rate": 1.9624128326744088e-05, "loss": 1.8198, "step": 35422 }, { "epoch": 0.46, "grad_norm": 3.7013919353485107, "learning_rate": 1.9624099787597958e-05, "loss": 1.8802, "step": 35423 }, { "epoch": 0.46, "grad_norm": 3.7975423336029053, "learning_rate": 1.9624071247389164e-05, "loss": 1.9836, "step": 35424 }, { "epoch": 0.46, "grad_norm": 3.834296703338623, "learning_rate": 1.9624042706117706e-05, "loss": 2.0498, "step": 35425 }, { "epoch": 0.46, "grad_norm": 4.528853893280029, "learning_rate": 1.9624014163783594e-05, "loss": 2.4915, "step": 35426 }, { "epoch": 0.46, "grad_norm": 4.037942886352539, "learning_rate": 1.9623985620386822e-05, "loss": 2.1364, "step": 35427 }, { "epoch": 0.46, "grad_norm": 4.0576863288879395, "learning_rate": 1.9623957075927406e-05, "loss": 2.1308, "step": 35428 }, { "epoch": 0.46, "grad_norm": 4.3409743309021, "learning_rate": 1.9623928530405337e-05, "loss": 2.0386, "step": 35429 }, { "epoch": 0.46, "grad_norm": 3.482576847076416, "learning_rate": 1.9623899983820624e-05, "loss": 1.6849, "step": 35430 }, { "epoch": 0.46, "grad_norm": 3.731318712234497, "learning_rate": 1.9623871436173268e-05, "loss": 2.1955, "step": 35431 }, { "epoch": 0.46, "grad_norm": 4.133090019226074, "learning_rate": 1.9623842887463275e-05, "loss": 2.2793, "step": 35432 }, { "epoch": 0.46, "grad_norm": 4.196828365325928, "learning_rate": 1.9623814337690645e-05, "loss": 2.3822, "step": 35433 }, { "epoch": 0.46, "grad_norm": 4.026767730712891, "learning_rate": 1.9623785786855385e-05, "loss": 2.2618, "step": 35434 }, { "epoch": 0.46, "grad_norm": 3.832202196121216, "learning_rate": 1.9623757234957496e-05, "loss": 2.0297, "step": 35435 }, { "epoch": 0.46, "grad_norm": 4.581133842468262, "learning_rate": 1.962372868199698e-05, "loss": 1.9985, "step": 35436 }, { "epoch": 0.46, "grad_norm": 3.6505234241485596, "learning_rate": 1.962370012797384e-05, "loss": 1.9457, "step": 35437 }, { "epoch": 0.46, "grad_norm": 3.6582987308502197, "learning_rate": 1.9623671572888078e-05, "loss": 1.9738, "step": 35438 }, { "epoch": 0.46, "grad_norm": 3.136223077774048, "learning_rate": 1.9623643016739703e-05, "loss": 1.4737, "step": 35439 }, { "epoch": 0.46, "grad_norm": 4.35699987411499, "learning_rate": 1.9623614459528714e-05, "loss": 2.5216, "step": 35440 }, { "epoch": 0.46, "grad_norm": 4.1274943351745605, "learning_rate": 1.9623585901255116e-05, "loss": 2.0308, "step": 35441 }, { "epoch": 0.46, "grad_norm": 3.3477492332458496, "learning_rate": 1.962355734191891e-05, "loss": 1.6549, "step": 35442 }, { "epoch": 0.46, "grad_norm": 3.793084144592285, "learning_rate": 1.9623528781520098e-05, "loss": 1.945, "step": 35443 }, { "epoch": 0.46, "grad_norm": 3.861687421798706, "learning_rate": 1.9623500220058688e-05, "loss": 1.9931, "step": 35444 }, { "epoch": 0.46, "grad_norm": 4.010940074920654, "learning_rate": 1.962347165753468e-05, "loss": 1.7711, "step": 35445 }, { "epoch": 0.46, "grad_norm": 3.847606658935547, "learning_rate": 1.9623443093948073e-05, "loss": 2.1235, "step": 35446 }, { "epoch": 0.46, "grad_norm": 4.008980751037598, "learning_rate": 1.962341452929888e-05, "loss": 2.1905, "step": 35447 }, { "epoch": 0.46, "grad_norm": 3.867056131362915, "learning_rate": 1.9623385963587095e-05, "loss": 1.9703, "step": 35448 }, { "epoch": 0.46, "grad_norm": 4.240579605102539, "learning_rate": 1.9623357396812728e-05, "loss": 2.3359, "step": 35449 }, { "epoch": 0.46, "grad_norm": 3.639209032058716, "learning_rate": 1.9623328828975776e-05, "loss": 1.8791, "step": 35450 }, { "epoch": 0.46, "grad_norm": 3.270651340484619, "learning_rate": 1.9623300260076248e-05, "loss": 1.6858, "step": 35451 }, { "epoch": 0.46, "grad_norm": 3.831712007522583, "learning_rate": 1.962327169011414e-05, "loss": 1.8298, "step": 35452 }, { "epoch": 0.46, "grad_norm": 4.158888816833496, "learning_rate": 1.9623243119089464e-05, "loss": 2.1115, "step": 35453 }, { "epoch": 0.46, "grad_norm": 3.7808876037597656, "learning_rate": 1.962321454700222e-05, "loss": 1.7694, "step": 35454 }, { "epoch": 0.46, "grad_norm": 3.782219886779785, "learning_rate": 1.9623185973852405e-05, "loss": 1.9178, "step": 35455 }, { "epoch": 0.46, "grad_norm": 5.337497711181641, "learning_rate": 1.962315739964003e-05, "loss": 2.2819, "step": 35456 }, { "epoch": 0.46, "grad_norm": 3.897256851196289, "learning_rate": 1.9623128824365094e-05, "loss": 1.8156, "step": 35457 }, { "epoch": 0.46, "grad_norm": 3.3911168575286865, "learning_rate": 1.9623100248027603e-05, "loss": 1.619, "step": 35458 }, { "epoch": 0.46, "grad_norm": 3.78398060798645, "learning_rate": 1.9623071670627558e-05, "loss": 1.9039, "step": 35459 }, { "epoch": 0.46, "grad_norm": 3.7953412532806396, "learning_rate": 1.9623043092164963e-05, "loss": 1.8472, "step": 35460 }, { "epoch": 0.46, "grad_norm": 4.011004447937012, "learning_rate": 1.9623014512639817e-05, "loss": 2.5048, "step": 35461 }, { "epoch": 0.46, "grad_norm": 4.390345096588135, "learning_rate": 1.962298593205213e-05, "loss": 2.1959, "step": 35462 }, { "epoch": 0.46, "grad_norm": 3.543454647064209, "learning_rate": 1.9622957350401905e-05, "loss": 1.9797, "step": 35463 }, { "epoch": 0.46, "grad_norm": 4.225953102111816, "learning_rate": 1.9622928767689136e-05, "loss": 1.9249, "step": 35464 }, { "epoch": 0.46, "grad_norm": 3.7546470165252686, "learning_rate": 1.9622900183913836e-05, "loss": 1.8217, "step": 35465 }, { "epoch": 0.46, "grad_norm": 3.343696117401123, "learning_rate": 1.9622871599076004e-05, "loss": 1.5835, "step": 35466 }, { "epoch": 0.46, "grad_norm": 3.5441107749938965, "learning_rate": 1.9622843013175644e-05, "loss": 2.2269, "step": 35467 }, { "epoch": 0.46, "grad_norm": 4.894674301147461, "learning_rate": 1.962281442621276e-05, "loss": 2.2363, "step": 35468 }, { "epoch": 0.46, "grad_norm": 4.049095630645752, "learning_rate": 1.9622785838187352e-05, "loss": 2.1175, "step": 35469 }, { "epoch": 0.46, "grad_norm": 3.5824077129364014, "learning_rate": 1.9622757249099424e-05, "loss": 2.0113, "step": 35470 }, { "epoch": 0.46, "grad_norm": 3.8185360431671143, "learning_rate": 1.962272865894898e-05, "loss": 2.1117, "step": 35471 }, { "epoch": 0.46, "grad_norm": 4.090023517608643, "learning_rate": 1.962270006773603e-05, "loss": 2.5013, "step": 35472 }, { "epoch": 0.46, "grad_norm": 3.769448757171631, "learning_rate": 1.9622671475460564e-05, "loss": 2.0581, "step": 35473 }, { "epoch": 0.46, "grad_norm": 3.5940380096435547, "learning_rate": 1.9622642882122594e-05, "loss": 1.8575, "step": 35474 }, { "epoch": 0.46, "grad_norm": 4.470620155334473, "learning_rate": 1.9622614287722122e-05, "loss": 2.0838, "step": 35475 }, { "epoch": 0.46, "grad_norm": 3.684643030166626, "learning_rate": 1.962258569225915e-05, "loss": 1.9447, "step": 35476 }, { "epoch": 0.46, "grad_norm": 4.479037284851074, "learning_rate": 1.962255709573368e-05, "loss": 2.1352, "step": 35477 }, { "epoch": 0.46, "grad_norm": 3.7058465480804443, "learning_rate": 1.9622528498145718e-05, "loss": 1.831, "step": 35478 }, { "epoch": 0.46, "grad_norm": 3.8535943031311035, "learning_rate": 1.9622499899495266e-05, "loss": 2.0507, "step": 35479 }, { "epoch": 0.46, "grad_norm": 4.872381210327148, "learning_rate": 1.9622471299782325e-05, "loss": 2.2962, "step": 35480 }, { "epoch": 0.46, "grad_norm": 3.5832529067993164, "learning_rate": 1.9622442699006898e-05, "loss": 1.7631, "step": 35481 }, { "epoch": 0.46, "grad_norm": 2.9675862789154053, "learning_rate": 1.9622414097168993e-05, "loss": 1.3519, "step": 35482 }, { "epoch": 0.46, "grad_norm": 3.5850670337677, "learning_rate": 1.9622385494268606e-05, "loss": 1.799, "step": 35483 }, { "epoch": 0.46, "grad_norm": 3.3572816848754883, "learning_rate": 1.9622356890305753e-05, "loss": 1.8872, "step": 35484 }, { "epoch": 0.46, "grad_norm": 3.995840549468994, "learning_rate": 1.9622328285280422e-05, "loss": 2.0767, "step": 35485 }, { "epoch": 0.46, "grad_norm": 3.9685842990875244, "learning_rate": 1.9622299679192624e-05, "loss": 1.8773, "step": 35486 }, { "epoch": 0.46, "grad_norm": 4.045562267303467, "learning_rate": 1.9622271072042362e-05, "loss": 1.9619, "step": 35487 }, { "epoch": 0.46, "grad_norm": 3.434809684753418, "learning_rate": 1.9622242463829636e-05, "loss": 1.4569, "step": 35488 }, { "epoch": 0.46, "grad_norm": 4.401946067810059, "learning_rate": 1.9622213854554453e-05, "loss": 2.4667, "step": 35489 }, { "epoch": 0.46, "grad_norm": 3.3988709449768066, "learning_rate": 1.9622185244216813e-05, "loss": 1.5347, "step": 35490 }, { "epoch": 0.46, "grad_norm": 3.869668960571289, "learning_rate": 1.962215663281672e-05, "loss": 2.0717, "step": 35491 }, { "epoch": 0.46, "grad_norm": 3.8497719764709473, "learning_rate": 1.9622128020354178e-05, "loss": 1.9231, "step": 35492 }, { "epoch": 0.46, "grad_norm": 4.210883140563965, "learning_rate": 1.9622099406829193e-05, "loss": 2.4062, "step": 35493 }, { "epoch": 0.46, "grad_norm": 3.999967098236084, "learning_rate": 1.962207079224176e-05, "loss": 1.6837, "step": 35494 }, { "epoch": 0.46, "grad_norm": 3.3858909606933594, "learning_rate": 1.9622042176591893e-05, "loss": 1.8408, "step": 35495 }, { "epoch": 0.46, "grad_norm": 4.008309841156006, "learning_rate": 1.9622013559879588e-05, "loss": 1.6634, "step": 35496 }, { "epoch": 0.46, "grad_norm": 3.9825315475463867, "learning_rate": 1.9621984942104846e-05, "loss": 2.0753, "step": 35497 }, { "epoch": 0.46, "grad_norm": 3.9194583892822266, "learning_rate": 1.9621956323267674e-05, "loss": 2.1014, "step": 35498 }, { "epoch": 0.46, "grad_norm": 3.349630355834961, "learning_rate": 1.962192770336808e-05, "loss": 1.8429, "step": 35499 }, { "epoch": 0.46, "grad_norm": 4.061105251312256, "learning_rate": 1.9621899082406057e-05, "loss": 1.6844, "step": 35500 }, { "epoch": 0.46, "grad_norm": 4.0287089347839355, "learning_rate": 1.962187046038161e-05, "loss": 1.9516, "step": 35501 }, { "epoch": 0.46, "grad_norm": 3.699751615524292, "learning_rate": 1.962184183729475e-05, "loss": 1.988, "step": 35502 }, { "epoch": 0.46, "grad_norm": 3.9326374530792236, "learning_rate": 1.9621813213145475e-05, "loss": 1.5775, "step": 35503 }, { "epoch": 0.46, "grad_norm": 3.9933102130889893, "learning_rate": 1.9621784587933787e-05, "loss": 1.8661, "step": 35504 }, { "epoch": 0.46, "grad_norm": 3.2023353576660156, "learning_rate": 1.9621755961659693e-05, "loss": 1.6331, "step": 35505 }, { "epoch": 0.46, "grad_norm": 3.5931639671325684, "learning_rate": 1.9621727334323192e-05, "loss": 1.8881, "step": 35506 }, { "epoch": 0.46, "grad_norm": 4.0479350090026855, "learning_rate": 1.9621698705924292e-05, "loss": 2.0731, "step": 35507 }, { "epoch": 0.46, "grad_norm": 4.381712436676025, "learning_rate": 1.9621670076462992e-05, "loss": 2.1553, "step": 35508 }, { "epoch": 0.46, "grad_norm": 3.391303777694702, "learning_rate": 1.9621641445939293e-05, "loss": 1.6673, "step": 35509 }, { "epoch": 0.46, "grad_norm": 3.691352367401123, "learning_rate": 1.9621612814353205e-05, "loss": 2.2083, "step": 35510 }, { "epoch": 0.46, "grad_norm": 4.482997417449951, "learning_rate": 1.9621584181704727e-05, "loss": 2.1527, "step": 35511 }, { "epoch": 0.46, "grad_norm": 3.84348726272583, "learning_rate": 1.9621555547993863e-05, "loss": 2.2201, "step": 35512 }, { "epoch": 0.46, "grad_norm": 3.8817503452301025, "learning_rate": 1.9621526913220616e-05, "loss": 2.0307, "step": 35513 }, { "epoch": 0.46, "grad_norm": 3.82420015335083, "learning_rate": 1.962149827738499e-05, "loss": 2.0033, "step": 35514 }, { "epoch": 0.46, "grad_norm": 4.0154852867126465, "learning_rate": 1.9621469640486986e-05, "loss": 1.9198, "step": 35515 }, { "epoch": 0.46, "grad_norm": 3.365722894668579, "learning_rate": 1.9621441002526608e-05, "loss": 1.6654, "step": 35516 }, { "epoch": 0.46, "grad_norm": 3.784140110015869, "learning_rate": 1.9621412363503862e-05, "loss": 1.833, "step": 35517 }, { "epoch": 0.46, "grad_norm": 3.777190923690796, "learning_rate": 1.9621383723418746e-05, "loss": 1.8092, "step": 35518 }, { "epoch": 0.46, "grad_norm": 4.108707904815674, "learning_rate": 1.962135508227127e-05, "loss": 2.2885, "step": 35519 }, { "epoch": 0.46, "grad_norm": 3.607766628265381, "learning_rate": 1.962132644006143e-05, "loss": 1.8466, "step": 35520 }, { "epoch": 0.46, "grad_norm": 4.03093147277832, "learning_rate": 1.9621297796789233e-05, "loss": 1.999, "step": 35521 }, { "epoch": 0.46, "grad_norm": 3.5299324989318848, "learning_rate": 1.9621269152454682e-05, "loss": 1.658, "step": 35522 }, { "epoch": 0.46, "grad_norm": 3.6963846683502197, "learning_rate": 1.962124050705778e-05, "loss": 2.1684, "step": 35523 }, { "epoch": 0.46, "grad_norm": 3.536569595336914, "learning_rate": 1.9621211860598527e-05, "loss": 1.8918, "step": 35524 }, { "epoch": 0.46, "grad_norm": 3.3154776096343994, "learning_rate": 1.962118321307693e-05, "loss": 1.6365, "step": 35525 }, { "epoch": 0.46, "grad_norm": 3.9074676036834717, "learning_rate": 1.9621154564492994e-05, "loss": 2.2293, "step": 35526 }, { "epoch": 0.46, "grad_norm": 3.7959792613983154, "learning_rate": 1.9621125914846716e-05, "loss": 1.6673, "step": 35527 }, { "epoch": 0.46, "grad_norm": 3.104788064956665, "learning_rate": 1.9621097264138103e-05, "loss": 1.404, "step": 35528 }, { "epoch": 0.46, "grad_norm": 3.7705013751983643, "learning_rate": 1.962106861236716e-05, "loss": 2.0067, "step": 35529 }, { "epoch": 0.46, "grad_norm": 4.125571250915527, "learning_rate": 1.9621039959533885e-05, "loss": 2.0555, "step": 35530 }, { "epoch": 0.46, "grad_norm": 4.186951160430908, "learning_rate": 1.9621011305638284e-05, "loss": 2.4544, "step": 35531 }, { "epoch": 0.46, "grad_norm": 3.497114419937134, "learning_rate": 1.962098265068036e-05, "loss": 2.0758, "step": 35532 }, { "epoch": 0.46, "grad_norm": 3.7680888175964355, "learning_rate": 1.9620953994660123e-05, "loss": 2.0355, "step": 35533 }, { "epoch": 0.46, "grad_norm": 4.089590072631836, "learning_rate": 1.962092533757756e-05, "loss": 2.1783, "step": 35534 }, { "epoch": 0.46, "grad_norm": 3.424614191055298, "learning_rate": 1.962089667943269e-05, "loss": 1.6571, "step": 35535 }, { "epoch": 0.46, "grad_norm": 3.3427765369415283, "learning_rate": 1.9620868020225507e-05, "loss": 1.855, "step": 35536 }, { "epoch": 0.46, "grad_norm": 3.2867534160614014, "learning_rate": 1.9620839359956016e-05, "loss": 1.6773, "step": 35537 }, { "epoch": 0.46, "grad_norm": 4.317131042480469, "learning_rate": 1.962081069862422e-05, "loss": 2.2714, "step": 35538 }, { "epoch": 0.46, "grad_norm": 4.6885294914245605, "learning_rate": 1.9620782036230128e-05, "loss": 1.9628, "step": 35539 }, { "epoch": 0.46, "grad_norm": 3.895697832107544, "learning_rate": 1.9620753372773737e-05, "loss": 1.7558, "step": 35540 }, { "epoch": 0.46, "grad_norm": 4.4075236320495605, "learning_rate": 1.962072470825505e-05, "loss": 2.3531, "step": 35541 }, { "epoch": 0.46, "grad_norm": 4.303521156311035, "learning_rate": 1.9620696042674075e-05, "loss": 2.7126, "step": 35542 }, { "epoch": 0.46, "grad_norm": 4.284027099609375, "learning_rate": 1.962066737603081e-05, "loss": 2.3087, "step": 35543 }, { "epoch": 0.46, "grad_norm": 3.806227207183838, "learning_rate": 1.9620638708325258e-05, "loss": 1.9908, "step": 35544 }, { "epoch": 0.46, "grad_norm": 3.8337063789367676, "learning_rate": 1.9620610039557425e-05, "loss": 2.1523, "step": 35545 }, { "epoch": 0.46, "grad_norm": 4.050156116485596, "learning_rate": 1.9620581369727318e-05, "loss": 2.3644, "step": 35546 }, { "epoch": 0.46, "grad_norm": 4.624126434326172, "learning_rate": 1.962055269883493e-05, "loss": 1.9029, "step": 35547 }, { "epoch": 0.46, "grad_norm": 3.6671316623687744, "learning_rate": 1.962052402688027e-05, "loss": 1.6871, "step": 35548 }, { "epoch": 0.46, "grad_norm": 3.325486183166504, "learning_rate": 1.9620495353863344e-05, "loss": 1.5451, "step": 35549 }, { "epoch": 0.46, "grad_norm": 3.7704484462738037, "learning_rate": 1.962046667978415e-05, "loss": 2.4693, "step": 35550 }, { "epoch": 0.46, "grad_norm": 3.9692556858062744, "learning_rate": 1.9620438004642696e-05, "loss": 2.0259, "step": 35551 }, { "epoch": 0.46, "grad_norm": 3.892632246017456, "learning_rate": 1.9620409328438978e-05, "loss": 2.2694, "step": 35552 }, { "epoch": 0.46, "grad_norm": 3.991929769515991, "learning_rate": 1.962038065117301e-05, "loss": 2.1137, "step": 35553 }, { "epoch": 0.46, "grad_norm": 3.0735933780670166, "learning_rate": 1.9620351972844786e-05, "loss": 1.4797, "step": 35554 }, { "epoch": 0.46, "grad_norm": 3.213656187057495, "learning_rate": 1.962032329345431e-05, "loss": 1.4388, "step": 35555 }, { "epoch": 0.46, "grad_norm": 3.9553980827331543, "learning_rate": 1.9620294613001585e-05, "loss": 2.1757, "step": 35556 }, { "epoch": 0.46, "grad_norm": 3.692023992538452, "learning_rate": 1.9620265931486618e-05, "loss": 1.7482, "step": 35557 }, { "epoch": 0.46, "grad_norm": 3.5848021507263184, "learning_rate": 1.9620237248909414e-05, "loss": 2.0862, "step": 35558 }, { "epoch": 0.46, "grad_norm": 3.696589708328247, "learning_rate": 1.962020856526997e-05, "loss": 2.2408, "step": 35559 }, { "epoch": 0.46, "grad_norm": 3.9559104442596436, "learning_rate": 1.9620179880568294e-05, "loss": 1.9791, "step": 35560 }, { "epoch": 0.46, "grad_norm": 4.006595611572266, "learning_rate": 1.9620151194804383e-05, "loss": 2.3082, "step": 35561 }, { "epoch": 0.46, "grad_norm": 3.601148843765259, "learning_rate": 1.9620122507978244e-05, "loss": 1.9746, "step": 35562 }, { "epoch": 0.46, "grad_norm": 3.4223010540008545, "learning_rate": 1.9620093820089882e-05, "loss": 1.6679, "step": 35563 }, { "epoch": 0.46, "grad_norm": 3.9840524196624756, "learning_rate": 1.96200651311393e-05, "loss": 2.4851, "step": 35564 }, { "epoch": 0.46, "grad_norm": 4.270450592041016, "learning_rate": 1.96200364411265e-05, "loss": 2.1051, "step": 35565 }, { "epoch": 0.46, "grad_norm": 3.51515793800354, "learning_rate": 1.962000775005148e-05, "loss": 1.9882, "step": 35566 }, { "epoch": 0.46, "grad_norm": 3.8445541858673096, "learning_rate": 1.9619979057914253e-05, "loss": 2.2624, "step": 35567 }, { "epoch": 0.46, "grad_norm": 3.5317087173461914, "learning_rate": 1.9619950364714815e-05, "loss": 2.0599, "step": 35568 }, { "epoch": 0.46, "grad_norm": 3.4082279205322266, "learning_rate": 1.9619921670453174e-05, "loss": 1.7801, "step": 35569 }, { "epoch": 0.46, "grad_norm": 3.743661642074585, "learning_rate": 1.9619892975129326e-05, "loss": 1.9819, "step": 35570 }, { "epoch": 0.46, "grad_norm": 3.749772071838379, "learning_rate": 1.961986427874328e-05, "loss": 1.8758, "step": 35571 }, { "epoch": 0.46, "grad_norm": 3.611258029937744, "learning_rate": 1.9619835581295036e-05, "loss": 1.5302, "step": 35572 }, { "epoch": 0.46, "grad_norm": 3.519326686859131, "learning_rate": 1.9619806882784603e-05, "loss": 1.2765, "step": 35573 }, { "epoch": 0.46, "grad_norm": 3.8228657245635986, "learning_rate": 1.9619778183211978e-05, "loss": 1.6671, "step": 35574 }, { "epoch": 0.46, "grad_norm": 3.7999155521392822, "learning_rate": 1.9619749482577166e-05, "loss": 2.1163, "step": 35575 }, { "epoch": 0.46, "grad_norm": 4.172285556793213, "learning_rate": 1.961972078088017e-05, "loss": 2.6408, "step": 35576 }, { "epoch": 0.46, "grad_norm": 3.7895355224609375, "learning_rate": 1.9619692078120994e-05, "loss": 1.9543, "step": 35577 }, { "epoch": 0.46, "grad_norm": 3.762155294418335, "learning_rate": 1.9619663374299645e-05, "loss": 1.9191, "step": 35578 }, { "epoch": 0.46, "grad_norm": 2.9756853580474854, "learning_rate": 1.9619634669416116e-05, "loss": 1.6299, "step": 35579 }, { "epoch": 0.46, "grad_norm": 3.975900888442993, "learning_rate": 1.961960596347042e-05, "loss": 2.1708, "step": 35580 }, { "epoch": 0.46, "grad_norm": 3.6015522480010986, "learning_rate": 1.9619577256462557e-05, "loss": 2.2055, "step": 35581 }, { "epoch": 0.46, "grad_norm": 3.5709969997406006, "learning_rate": 1.9619548548392524e-05, "loss": 2.0239, "step": 35582 }, { "epoch": 0.46, "grad_norm": 3.737311363220215, "learning_rate": 1.9619519839260332e-05, "loss": 1.9586, "step": 35583 }, { "epoch": 0.46, "grad_norm": 4.189745903015137, "learning_rate": 1.9619491129065988e-05, "loss": 2.552, "step": 35584 }, { "epoch": 0.46, "grad_norm": 4.282276153564453, "learning_rate": 1.961946241780948e-05, "loss": 2.1101, "step": 35585 }, { "epoch": 0.46, "grad_norm": 4.403858661651611, "learning_rate": 1.9619433705490825e-05, "loss": 2.4757, "step": 35586 }, { "epoch": 0.46, "grad_norm": 3.851350784301758, "learning_rate": 1.9619404992110018e-05, "loss": 1.5337, "step": 35587 }, { "epoch": 0.46, "grad_norm": 4.336614608764648, "learning_rate": 1.961937627766707e-05, "loss": 2.1457, "step": 35588 }, { "epoch": 0.46, "grad_norm": 3.147521734237671, "learning_rate": 1.9619347562161976e-05, "loss": 1.6474, "step": 35589 }, { "epoch": 0.46, "grad_norm": 4.067838668823242, "learning_rate": 1.9619318845594746e-05, "loss": 1.9471, "step": 35590 }, { "epoch": 0.46, "grad_norm": 3.4518914222717285, "learning_rate": 1.9619290127965377e-05, "loss": 1.9108, "step": 35591 }, { "epoch": 0.46, "grad_norm": 4.06999397277832, "learning_rate": 1.961926140927388e-05, "loss": 2.3297, "step": 35592 }, { "epoch": 0.46, "grad_norm": 3.726121187210083, "learning_rate": 1.9619232689520246e-05, "loss": 1.8583, "step": 35593 }, { "epoch": 0.46, "grad_norm": 4.049829959869385, "learning_rate": 1.961920396870449e-05, "loss": 2.0708, "step": 35594 }, { "epoch": 0.46, "grad_norm": 3.7259793281555176, "learning_rate": 1.961917524682661e-05, "loss": 1.6918, "step": 35595 }, { "epoch": 0.46, "grad_norm": 3.5478837490081787, "learning_rate": 1.961914652388661e-05, "loss": 2.0372, "step": 35596 }, { "epoch": 0.46, "grad_norm": 3.6414926052093506, "learning_rate": 1.9619117799884494e-05, "loss": 1.9921, "step": 35597 }, { "epoch": 0.46, "grad_norm": 4.1008381843566895, "learning_rate": 1.961908907482026e-05, "loss": 2.3687, "step": 35598 }, { "epoch": 0.46, "grad_norm": 3.8642234802246094, "learning_rate": 1.961906034869392e-05, "loss": 1.7004, "step": 35599 }, { "epoch": 0.46, "grad_norm": 3.877047061920166, "learning_rate": 1.961903162150547e-05, "loss": 2.3094, "step": 35600 }, { "epoch": 0.46, "grad_norm": 3.7409467697143555, "learning_rate": 1.9619002893254917e-05, "loss": 2.0758, "step": 35601 }, { "epoch": 0.46, "grad_norm": 4.151892185211182, "learning_rate": 1.9618974163942263e-05, "loss": 2.3307, "step": 35602 }, { "epoch": 0.46, "grad_norm": 3.3885600566864014, "learning_rate": 1.961894543356751e-05, "loss": 1.5027, "step": 35603 }, { "epoch": 0.46, "grad_norm": 3.7223103046417236, "learning_rate": 1.9618916702130665e-05, "loss": 2.1012, "step": 35604 }, { "epoch": 0.46, "grad_norm": 3.8232545852661133, "learning_rate": 1.9618887969631724e-05, "loss": 2.5901, "step": 35605 }, { "epoch": 0.46, "grad_norm": 3.6647212505340576, "learning_rate": 1.96188592360707e-05, "loss": 2.0999, "step": 35606 }, { "epoch": 0.46, "grad_norm": 3.838465929031372, "learning_rate": 1.9618830501447585e-05, "loss": 1.9871, "step": 35607 }, { "epoch": 0.46, "grad_norm": 3.08095121383667, "learning_rate": 1.961880176576239e-05, "loss": 1.5542, "step": 35608 }, { "epoch": 0.46, "grad_norm": 3.693446159362793, "learning_rate": 1.961877302901512e-05, "loss": 2.0251, "step": 35609 }, { "epoch": 0.46, "grad_norm": 3.6584129333496094, "learning_rate": 1.961874429120577e-05, "loss": 1.9432, "step": 35610 }, { "epoch": 0.46, "grad_norm": 3.866847276687622, "learning_rate": 1.961871555233435e-05, "loss": 2.1218, "step": 35611 }, { "epoch": 0.46, "grad_norm": 3.776845693588257, "learning_rate": 1.9618686812400854e-05, "loss": 2.5093, "step": 35612 }, { "epoch": 0.46, "grad_norm": 3.3048553466796875, "learning_rate": 1.96186580714053e-05, "loss": 1.6172, "step": 35613 }, { "epoch": 0.46, "grad_norm": 3.9316246509552, "learning_rate": 1.961862932934768e-05, "loss": 1.8706, "step": 35614 }, { "epoch": 0.46, "grad_norm": 3.995884418487549, "learning_rate": 1.9618600586227998e-05, "loss": 2.2488, "step": 35615 }, { "epoch": 0.46, "grad_norm": 3.3198769092559814, "learning_rate": 1.961857184204626e-05, "loss": 1.7192, "step": 35616 }, { "epoch": 0.46, "grad_norm": 4.1362128257751465, "learning_rate": 1.9618543096802472e-05, "loss": 2.4313, "step": 35617 }, { "epoch": 0.46, "grad_norm": 3.626964569091797, "learning_rate": 1.961851435049663e-05, "loss": 1.8082, "step": 35618 }, { "epoch": 0.46, "grad_norm": 4.411028861999512, "learning_rate": 1.9618485603128744e-05, "loss": 2.316, "step": 35619 }, { "epoch": 0.46, "grad_norm": 3.5153768062591553, "learning_rate": 1.961845685469881e-05, "loss": 1.7712, "step": 35620 }, { "epoch": 0.46, "grad_norm": 3.987666130065918, "learning_rate": 1.961842810520684e-05, "loss": 2.3133, "step": 35621 }, { "epoch": 0.46, "grad_norm": 3.897235870361328, "learning_rate": 1.961839935465283e-05, "loss": 1.9925, "step": 35622 }, { "epoch": 0.46, "grad_norm": 3.829318046569824, "learning_rate": 1.9618370603036782e-05, "loss": 1.9913, "step": 35623 }, { "epoch": 0.46, "grad_norm": 3.5155646800994873, "learning_rate": 1.9618341850358707e-05, "loss": 1.6582, "step": 35624 }, { "epoch": 0.46, "grad_norm": 3.3315916061401367, "learning_rate": 1.9618313096618602e-05, "loss": 1.6878, "step": 35625 }, { "epoch": 0.46, "grad_norm": 3.5502829551696777, "learning_rate": 1.9618284341816474e-05, "loss": 2.0004, "step": 35626 }, { "epoch": 0.46, "grad_norm": 3.4926607608795166, "learning_rate": 1.9618255585952323e-05, "loss": 1.5393, "step": 35627 }, { "epoch": 0.46, "grad_norm": 3.6266140937805176, "learning_rate": 1.961822682902615e-05, "loss": 2.0648, "step": 35628 }, { "epoch": 0.46, "grad_norm": 3.8230488300323486, "learning_rate": 1.961819807103797e-05, "loss": 1.759, "step": 35629 }, { "epoch": 0.46, "grad_norm": 3.689380407333374, "learning_rate": 1.961816931198777e-05, "loss": 1.838, "step": 35630 }, { "epoch": 0.46, "grad_norm": 4.548632621765137, "learning_rate": 1.9618140551875566e-05, "loss": 2.0829, "step": 35631 }, { "epoch": 0.46, "grad_norm": 4.210343837738037, "learning_rate": 1.9618111790701354e-05, "loss": 2.3204, "step": 35632 }, { "epoch": 0.46, "grad_norm": 3.952775716781616, "learning_rate": 1.9618083028465136e-05, "loss": 1.8756, "step": 35633 }, { "epoch": 0.46, "grad_norm": 3.916250467300415, "learning_rate": 1.9618054265166922e-05, "loss": 1.912, "step": 35634 }, { "epoch": 0.46, "grad_norm": 3.6667392253875732, "learning_rate": 1.9618025500806715e-05, "loss": 1.7044, "step": 35635 }, { "epoch": 0.46, "grad_norm": 3.8467724323272705, "learning_rate": 1.961799673538451e-05, "loss": 2.029, "step": 35636 }, { "epoch": 0.46, "grad_norm": 4.242246627807617, "learning_rate": 1.9617967968900315e-05, "loss": 2.353, "step": 35637 }, { "epoch": 0.46, "grad_norm": 4.616479396820068, "learning_rate": 1.9617939201354135e-05, "loss": 2.1946, "step": 35638 }, { "epoch": 0.46, "grad_norm": 3.4918360710144043, "learning_rate": 1.9617910432745973e-05, "loss": 1.8572, "step": 35639 }, { "epoch": 0.46, "grad_norm": 4.10760498046875, "learning_rate": 1.9617881663075825e-05, "loss": 2.0901, "step": 35640 }, { "epoch": 0.46, "grad_norm": 4.378908634185791, "learning_rate": 1.9617852892343704e-05, "loss": 2.298, "step": 35641 }, { "epoch": 0.46, "grad_norm": 3.1780004501342773, "learning_rate": 1.961782412054961e-05, "loss": 1.4549, "step": 35642 }, { "epoch": 0.46, "grad_norm": 3.2820773124694824, "learning_rate": 1.9617795347693544e-05, "loss": 1.5277, "step": 35643 }, { "epoch": 0.46, "grad_norm": 4.193106174468994, "learning_rate": 1.9617766573775505e-05, "loss": 2.0827, "step": 35644 }, { "epoch": 0.46, "grad_norm": 3.963928461074829, "learning_rate": 1.9617737798795507e-05, "loss": 2.3375, "step": 35645 }, { "epoch": 0.46, "grad_norm": 4.22900390625, "learning_rate": 1.9617709022753546e-05, "loss": 1.7577, "step": 35646 }, { "epoch": 0.46, "grad_norm": 3.719505786895752, "learning_rate": 1.9617680245649626e-05, "loss": 1.5823, "step": 35647 }, { "epoch": 0.46, "grad_norm": 3.5469765663146973, "learning_rate": 1.961765146748375e-05, "loss": 1.9892, "step": 35648 }, { "epoch": 0.46, "grad_norm": 3.1633145809173584, "learning_rate": 1.9617622688255924e-05, "loss": 1.5624, "step": 35649 }, { "epoch": 0.46, "grad_norm": 3.9961116313934326, "learning_rate": 1.961759390796615e-05, "loss": 1.8672, "step": 35650 }, { "epoch": 0.46, "grad_norm": 4.087891578674316, "learning_rate": 1.9617565126614432e-05, "loss": 1.7735, "step": 35651 }, { "epoch": 0.46, "grad_norm": 3.9217047691345215, "learning_rate": 1.9617536344200767e-05, "loss": 2.1938, "step": 35652 }, { "epoch": 0.46, "grad_norm": 3.662893772125244, "learning_rate": 1.9617507560725168e-05, "loss": 1.8176, "step": 35653 }, { "epoch": 0.46, "grad_norm": 4.310794830322266, "learning_rate": 1.9617478776187626e-05, "loss": 2.3377, "step": 35654 }, { "epoch": 0.46, "grad_norm": 3.6122992038726807, "learning_rate": 1.961744999058816e-05, "loss": 2.178, "step": 35655 }, { "epoch": 0.46, "grad_norm": 3.680312395095825, "learning_rate": 1.9617421203926757e-05, "loss": 1.7813, "step": 35656 }, { "epoch": 0.46, "grad_norm": 4.053694725036621, "learning_rate": 1.961739241620343e-05, "loss": 2.3026, "step": 35657 }, { "epoch": 0.46, "grad_norm": 4.540488243103027, "learning_rate": 1.9617363627418177e-05, "loss": 2.3727, "step": 35658 }, { "epoch": 0.46, "grad_norm": 3.7320733070373535, "learning_rate": 1.961733483757101e-05, "loss": 2.258, "step": 35659 }, { "epoch": 0.46, "grad_norm": 4.0030517578125, "learning_rate": 1.9617306046661923e-05, "loss": 2.3065, "step": 35660 }, { "epoch": 0.46, "grad_norm": 3.545862913131714, "learning_rate": 1.9617277254690918e-05, "loss": 2.0295, "step": 35661 }, { "epoch": 0.46, "grad_norm": 3.738570213317871, "learning_rate": 1.961724846165801e-05, "loss": 2.0164, "step": 35662 }, { "epoch": 0.46, "grad_norm": 4.455596923828125, "learning_rate": 1.961721966756319e-05, "loss": 2.1769, "step": 35663 }, { "epoch": 0.46, "grad_norm": 3.7916414737701416, "learning_rate": 1.9617190872406465e-05, "loss": 2.0435, "step": 35664 }, { "epoch": 0.46, "grad_norm": 3.571262836456299, "learning_rate": 1.961716207618784e-05, "loss": 1.9274, "step": 35665 }, { "epoch": 0.46, "grad_norm": 4.096643924713135, "learning_rate": 1.9617133278907322e-05, "loss": 2.1556, "step": 35666 }, { "epoch": 0.46, "grad_norm": 4.569727420806885, "learning_rate": 1.9617104480564905e-05, "loss": 2.147, "step": 35667 }, { "epoch": 0.46, "grad_norm": 3.996554136276245, "learning_rate": 1.9617075681160597e-05, "loss": 2.0385, "step": 35668 }, { "epoch": 0.46, "grad_norm": 4.340679168701172, "learning_rate": 1.96170468806944e-05, "loss": 2.0811, "step": 35669 }, { "epoch": 0.46, "grad_norm": 4.044387340545654, "learning_rate": 1.9617018079166316e-05, "loss": 2.1498, "step": 35670 }, { "epoch": 0.46, "grad_norm": 3.7533915042877197, "learning_rate": 1.9616989276576354e-05, "loss": 1.846, "step": 35671 }, { "epoch": 0.46, "grad_norm": 3.7463908195495605, "learning_rate": 1.961696047292451e-05, "loss": 2.0814, "step": 35672 }, { "epoch": 0.46, "grad_norm": 3.8361287117004395, "learning_rate": 1.9616931668210794e-05, "loss": 1.6713, "step": 35673 }, { "epoch": 0.46, "grad_norm": 3.6003611087799072, "learning_rate": 1.9616902862435202e-05, "loss": 2.0841, "step": 35674 }, { "epoch": 0.46, "grad_norm": 3.3405494689941406, "learning_rate": 1.9616874055597742e-05, "loss": 1.8051, "step": 35675 }, { "epoch": 0.46, "grad_norm": 3.3941826820373535, "learning_rate": 1.961684524769842e-05, "loss": 1.5682, "step": 35676 }, { "epoch": 0.46, "grad_norm": 4.056097030639648, "learning_rate": 1.961681643873723e-05, "loss": 1.9738, "step": 35677 }, { "epoch": 0.46, "grad_norm": 3.347559690475464, "learning_rate": 1.961678762871418e-05, "loss": 1.5578, "step": 35678 }, { "epoch": 0.46, "grad_norm": 3.704908609390259, "learning_rate": 1.9616758817629276e-05, "loss": 1.9675, "step": 35679 }, { "epoch": 0.46, "grad_norm": 3.401801824569702, "learning_rate": 1.961673000548252e-05, "loss": 1.8043, "step": 35680 }, { "epoch": 0.46, "grad_norm": 5.057584762573242, "learning_rate": 1.961670119227391e-05, "loss": 2.9119, "step": 35681 }, { "epoch": 0.46, "grad_norm": 4.149909019470215, "learning_rate": 1.9616672378003453e-05, "loss": 2.4346, "step": 35682 }, { "epoch": 0.46, "grad_norm": 3.8912434577941895, "learning_rate": 1.9616643562671155e-05, "loss": 1.9553, "step": 35683 }, { "epoch": 0.46, "grad_norm": 3.519456386566162, "learning_rate": 1.9616614746277016e-05, "loss": 1.7436, "step": 35684 }, { "epoch": 0.46, "grad_norm": 3.543881893157959, "learning_rate": 1.961658592882104e-05, "loss": 1.6995, "step": 35685 }, { "epoch": 0.46, "grad_norm": 4.442521572113037, "learning_rate": 1.9616557110303226e-05, "loss": 2.7047, "step": 35686 }, { "epoch": 0.46, "grad_norm": 4.156079292297363, "learning_rate": 1.9616528290723586e-05, "loss": 2.1553, "step": 35687 }, { "epoch": 0.46, "grad_norm": 3.544910192489624, "learning_rate": 1.9616499470082115e-05, "loss": 1.8617, "step": 35688 }, { "epoch": 0.46, "grad_norm": 4.298262119293213, "learning_rate": 1.961647064837882e-05, "loss": 2.4807, "step": 35689 }, { "epoch": 0.46, "grad_norm": 3.566838264465332, "learning_rate": 1.9616441825613703e-05, "loss": 1.859, "step": 35690 }, { "epoch": 0.46, "grad_norm": 3.984062671661377, "learning_rate": 1.9616413001786768e-05, "loss": 2.2538, "step": 35691 }, { "epoch": 0.46, "grad_norm": 3.5668396949768066, "learning_rate": 1.961638417689802e-05, "loss": 1.8097, "step": 35692 }, { "epoch": 0.46, "grad_norm": 3.609459638595581, "learning_rate": 1.961635535094746e-05, "loss": 2.0819, "step": 35693 }, { "epoch": 0.46, "grad_norm": 3.8884501457214355, "learning_rate": 1.961632652393509e-05, "loss": 2.0903, "step": 35694 }, { "epoch": 0.46, "grad_norm": 3.5578010082244873, "learning_rate": 1.9616297695860913e-05, "loss": 1.7047, "step": 35695 }, { "epoch": 0.46, "grad_norm": 3.4234771728515625, "learning_rate": 1.961626886672493e-05, "loss": 1.9376, "step": 35696 }, { "epoch": 0.46, "grad_norm": 3.721156358718872, "learning_rate": 1.9616240036527157e-05, "loss": 1.6825, "step": 35697 }, { "epoch": 0.46, "grad_norm": 4.5785231590271, "learning_rate": 1.961621120526758e-05, "loss": 2.4054, "step": 35698 }, { "epoch": 0.46, "grad_norm": 3.6657447814941406, "learning_rate": 1.9616182372946216e-05, "loss": 1.7293, "step": 35699 }, { "epoch": 0.46, "grad_norm": 3.354848623275757, "learning_rate": 1.961615353956306e-05, "loss": 1.5091, "step": 35700 }, { "epoch": 0.46, "grad_norm": 3.0288424491882324, "learning_rate": 1.9616124705118116e-05, "loss": 1.7538, "step": 35701 }, { "epoch": 0.46, "grad_norm": 4.550405502319336, "learning_rate": 1.9616095869611393e-05, "loss": 1.9688, "step": 35702 }, { "epoch": 0.46, "grad_norm": 3.694862127304077, "learning_rate": 1.9616067033042884e-05, "loss": 2.2572, "step": 35703 }, { "epoch": 0.46, "grad_norm": 3.53068470954895, "learning_rate": 1.9616038195412602e-05, "loss": 1.8823, "step": 35704 }, { "epoch": 0.46, "grad_norm": 4.1883111000061035, "learning_rate": 1.9616009356720544e-05, "loss": 2.3446, "step": 35705 }, { "epoch": 0.46, "grad_norm": 4.209099292755127, "learning_rate": 1.9615980516966714e-05, "loss": 2.559, "step": 35706 }, { "epoch": 0.46, "grad_norm": 3.6111338138580322, "learning_rate": 1.961595167615112e-05, "loss": 2.1299, "step": 35707 }, { "epoch": 0.46, "grad_norm": 4.256858825683594, "learning_rate": 1.9615922834273765e-05, "loss": 1.8906, "step": 35708 }, { "epoch": 0.46, "grad_norm": 4.096595287322998, "learning_rate": 1.9615893991334643e-05, "loss": 2.1517, "step": 35709 }, { "epoch": 0.46, "grad_norm": 3.2201802730560303, "learning_rate": 1.9615865147333765e-05, "loss": 1.7315, "step": 35710 }, { "epoch": 0.46, "grad_norm": 3.852571725845337, "learning_rate": 1.9615836302271132e-05, "loss": 1.8995, "step": 35711 }, { "epoch": 0.46, "grad_norm": 4.065303802490234, "learning_rate": 1.9615807456146746e-05, "loss": 2.0892, "step": 35712 }, { "epoch": 0.46, "grad_norm": 3.738571882247925, "learning_rate": 1.9615778608960615e-05, "loss": 1.9696, "step": 35713 }, { "epoch": 0.46, "grad_norm": 3.637587070465088, "learning_rate": 1.9615749760712738e-05, "loss": 1.8817, "step": 35714 }, { "epoch": 0.46, "grad_norm": 3.972651720046997, "learning_rate": 1.961572091140312e-05, "loss": 2.2134, "step": 35715 }, { "epoch": 0.46, "grad_norm": 3.1539714336395264, "learning_rate": 1.961569206103176e-05, "loss": 1.7199, "step": 35716 }, { "epoch": 0.46, "grad_norm": 3.6334056854248047, "learning_rate": 1.9615663209598667e-05, "loss": 1.6495, "step": 35717 }, { "epoch": 0.46, "grad_norm": 3.331341028213501, "learning_rate": 1.961563435710384e-05, "loss": 1.7348, "step": 35718 }, { "epoch": 0.46, "grad_norm": 4.039666652679443, "learning_rate": 1.9615605503547287e-05, "loss": 1.661, "step": 35719 }, { "epoch": 0.46, "grad_norm": 3.713813066482544, "learning_rate": 1.9615576648929004e-05, "loss": 1.6771, "step": 35720 }, { "epoch": 0.46, "grad_norm": 3.747591733932495, "learning_rate": 1.9615547793249002e-05, "loss": 2.1299, "step": 35721 }, { "epoch": 0.46, "grad_norm": 3.684121608734131, "learning_rate": 1.961551893650728e-05, "loss": 1.7885, "step": 35722 }, { "epoch": 0.46, "grad_norm": 3.222801685333252, "learning_rate": 1.961549007870384e-05, "loss": 1.5462, "step": 35723 }, { "epoch": 0.46, "grad_norm": 4.467173099517822, "learning_rate": 1.9615461219838686e-05, "loss": 2.0828, "step": 35724 }, { "epoch": 0.46, "grad_norm": 4.064949035644531, "learning_rate": 1.961543235991182e-05, "loss": 1.9808, "step": 35725 }, { "epoch": 0.46, "grad_norm": 4.113070011138916, "learning_rate": 1.9615403498923253e-05, "loss": 2.3364, "step": 35726 }, { "epoch": 0.46, "grad_norm": 3.206871271133423, "learning_rate": 1.9615374636872978e-05, "loss": 1.4624, "step": 35727 }, { "epoch": 0.46, "grad_norm": 4.220437526702881, "learning_rate": 1.9615345773761004e-05, "loss": 2.1451, "step": 35728 }, { "epoch": 0.46, "grad_norm": 3.762361526489258, "learning_rate": 1.9615316909587332e-05, "loss": 2.0791, "step": 35729 }, { "epoch": 0.46, "grad_norm": 3.4864649772644043, "learning_rate": 1.961528804435197e-05, "loss": 1.8426, "step": 35730 }, { "epoch": 0.46, "grad_norm": 3.972055435180664, "learning_rate": 1.961525917805491e-05, "loss": 1.8274, "step": 35731 }, { "epoch": 0.46, "grad_norm": 3.908949613571167, "learning_rate": 1.9615230310696164e-05, "loss": 2.2475, "step": 35732 }, { "epoch": 0.46, "grad_norm": 3.809072494506836, "learning_rate": 1.9615201442275737e-05, "loss": 2.224, "step": 35733 }, { "epoch": 0.46, "grad_norm": 4.214376449584961, "learning_rate": 1.9615172572793625e-05, "loss": 2.5182, "step": 35734 }, { "epoch": 0.46, "grad_norm": 3.8312430381774902, "learning_rate": 1.9615143702249838e-05, "loss": 1.7804, "step": 35735 }, { "epoch": 0.46, "grad_norm": 3.6124727725982666, "learning_rate": 1.9615114830644374e-05, "loss": 2.2225, "step": 35736 }, { "epoch": 0.46, "grad_norm": 4.009119033813477, "learning_rate": 1.961508595797724e-05, "loss": 1.8624, "step": 35737 }, { "epoch": 0.46, "grad_norm": 3.3755905628204346, "learning_rate": 1.961505708424844e-05, "loss": 1.9284, "step": 35738 }, { "epoch": 0.46, "grad_norm": 3.7867259979248047, "learning_rate": 1.9615028209457967e-05, "loss": 1.8893, "step": 35739 }, { "epoch": 0.46, "grad_norm": 4.024168968200684, "learning_rate": 1.9614999333605835e-05, "loss": 1.9868, "step": 35740 }, { "epoch": 0.46, "grad_norm": 4.310179710388184, "learning_rate": 1.9614970456692046e-05, "loss": 2.0607, "step": 35741 }, { "epoch": 0.46, "grad_norm": 3.0956547260284424, "learning_rate": 1.96149415787166e-05, "loss": 1.5466, "step": 35742 }, { "epoch": 0.46, "grad_norm": 3.6787819862365723, "learning_rate": 1.96149126996795e-05, "loss": 1.8105, "step": 35743 }, { "epoch": 0.46, "grad_norm": 3.768256425857544, "learning_rate": 1.9614883819580753e-05, "loss": 2.0774, "step": 35744 }, { "epoch": 0.46, "grad_norm": 3.493711471557617, "learning_rate": 1.9614854938420357e-05, "loss": 1.6987, "step": 35745 }, { "epoch": 0.46, "grad_norm": 4.749098300933838, "learning_rate": 1.961482605619832e-05, "loss": 2.7505, "step": 35746 }, { "epoch": 0.46, "grad_norm": 4.306131839752197, "learning_rate": 1.961479717291464e-05, "loss": 1.8629, "step": 35747 }, { "epoch": 0.46, "grad_norm": 3.229506015777588, "learning_rate": 1.961476828856933e-05, "loss": 1.496, "step": 35748 }, { "epoch": 0.46, "grad_norm": 3.1909637451171875, "learning_rate": 1.9614739403162377e-05, "loss": 1.5086, "step": 35749 }, { "epoch": 0.46, "grad_norm": 4.358913421630859, "learning_rate": 1.96147105166938e-05, "loss": 2.283, "step": 35750 }, { "epoch": 0.46, "grad_norm": 4.305752277374268, "learning_rate": 1.9614681629163593e-05, "loss": 2.8016, "step": 35751 }, { "epoch": 0.46, "grad_norm": 3.5934081077575684, "learning_rate": 1.9614652740571767e-05, "loss": 1.918, "step": 35752 }, { "epoch": 0.46, "grad_norm": 3.516296863555908, "learning_rate": 1.9614623850918314e-05, "loss": 1.9407, "step": 35753 }, { "epoch": 0.46, "grad_norm": 4.551424503326416, "learning_rate": 1.9614594960203244e-05, "loss": 2.46, "step": 35754 }, { "epoch": 0.46, "grad_norm": 3.0752360820770264, "learning_rate": 1.9614566068426564e-05, "loss": 1.4056, "step": 35755 }, { "epoch": 0.46, "grad_norm": 3.798351526260376, "learning_rate": 1.9614537175588274e-05, "loss": 2.017, "step": 35756 }, { "epoch": 0.46, "grad_norm": 3.718989849090576, "learning_rate": 1.961450828168837e-05, "loss": 1.8701, "step": 35757 }, { "epoch": 0.46, "grad_norm": 3.005798816680908, "learning_rate": 1.9614479386726864e-05, "loss": 1.3154, "step": 35758 }, { "epoch": 0.46, "grad_norm": 3.6016108989715576, "learning_rate": 1.9614450490703757e-05, "loss": 1.8248, "step": 35759 }, { "epoch": 0.46, "grad_norm": 4.227503299713135, "learning_rate": 1.961442159361905e-05, "loss": 2.3126, "step": 35760 }, { "epoch": 0.46, "grad_norm": 4.0537872314453125, "learning_rate": 1.9614392695472747e-05, "loss": 2.4064, "step": 35761 }, { "epoch": 0.46, "grad_norm": 3.830378532409668, "learning_rate": 1.9614363796264854e-05, "loss": 2.2155, "step": 35762 }, { "epoch": 0.46, "grad_norm": 4.088025093078613, "learning_rate": 1.961433489599537e-05, "loss": 1.9907, "step": 35763 }, { "epoch": 0.46, "grad_norm": 4.1482744216918945, "learning_rate": 1.9614305994664305e-05, "loss": 1.9843, "step": 35764 }, { "epoch": 0.46, "grad_norm": 3.7875349521636963, "learning_rate": 1.9614277092271653e-05, "loss": 2.1528, "step": 35765 }, { "epoch": 0.46, "grad_norm": 4.0475029945373535, "learning_rate": 1.961424818881742e-05, "loss": 1.8812, "step": 35766 }, { "epoch": 0.46, "grad_norm": 4.194705963134766, "learning_rate": 1.9614219284301616e-05, "loss": 1.9306, "step": 35767 }, { "epoch": 0.46, "grad_norm": 4.020447254180908, "learning_rate": 1.961419037872424e-05, "loss": 2.3562, "step": 35768 }, { "epoch": 0.46, "grad_norm": 4.0710954666137695, "learning_rate": 1.9614161472085288e-05, "loss": 2.2674, "step": 35769 }, { "epoch": 0.46, "grad_norm": 3.4505465030670166, "learning_rate": 1.961413256438477e-05, "loss": 1.9201, "step": 35770 }, { "epoch": 0.46, "grad_norm": 4.179751396179199, "learning_rate": 1.9614103655622692e-05, "loss": 2.1051, "step": 35771 }, { "epoch": 0.46, "grad_norm": 3.4119150638580322, "learning_rate": 1.9614074745799054e-05, "loss": 2.1364, "step": 35772 }, { "epoch": 0.46, "grad_norm": 3.9185571670532227, "learning_rate": 1.961404583491386e-05, "loss": 2.1494, "step": 35773 }, { "epoch": 0.46, "grad_norm": 3.574030876159668, "learning_rate": 1.961401692296711e-05, "loss": 1.9693, "step": 35774 }, { "epoch": 0.46, "grad_norm": 4.232822895050049, "learning_rate": 1.961398800995881e-05, "loss": 1.9753, "step": 35775 }, { "epoch": 0.46, "grad_norm": 3.498178720474243, "learning_rate": 1.961395909588896e-05, "loss": 1.8722, "step": 35776 }, { "epoch": 0.46, "grad_norm": 3.806208372116089, "learning_rate": 1.961393018075757e-05, "loss": 2.1866, "step": 35777 }, { "epoch": 0.46, "grad_norm": 3.8575901985168457, "learning_rate": 1.9613901264564637e-05, "loss": 2.1108, "step": 35778 }, { "epoch": 0.46, "grad_norm": 3.478998899459839, "learning_rate": 1.961387234731017e-05, "loss": 2.2246, "step": 35779 }, { "epoch": 0.46, "grad_norm": 4.3762030601501465, "learning_rate": 1.961384342899416e-05, "loss": 2.1856, "step": 35780 }, { "epoch": 0.46, "grad_norm": 3.7954182624816895, "learning_rate": 1.9613814509616627e-05, "loss": 2.1863, "step": 35781 }, { "epoch": 0.46, "grad_norm": 3.078378677368164, "learning_rate": 1.9613785589177562e-05, "loss": 1.3862, "step": 35782 }, { "epoch": 0.46, "grad_norm": 3.66292667388916, "learning_rate": 1.961375666767697e-05, "loss": 2.0625, "step": 35783 }, { "epoch": 0.46, "grad_norm": 3.9301254749298096, "learning_rate": 1.961372774511486e-05, "loss": 1.9281, "step": 35784 }, { "epoch": 0.46, "grad_norm": 3.3540735244750977, "learning_rate": 1.9613698821491227e-05, "loss": 1.4617, "step": 35785 }, { "epoch": 0.46, "grad_norm": 3.6322898864746094, "learning_rate": 1.9613669896806084e-05, "loss": 2.0018, "step": 35786 }, { "epoch": 0.46, "grad_norm": 3.3692405223846436, "learning_rate": 1.9613640971059425e-05, "loss": 1.5834, "step": 35787 }, { "epoch": 0.46, "grad_norm": 5.2091965675354, "learning_rate": 1.961361204425126e-05, "loss": 2.6087, "step": 35788 }, { "epoch": 0.46, "grad_norm": 3.233816146850586, "learning_rate": 1.9613583116381587e-05, "loss": 1.591, "step": 35789 }, { "epoch": 0.46, "grad_norm": 3.856346607208252, "learning_rate": 1.961355418745041e-05, "loss": 2.0115, "step": 35790 }, { "epoch": 0.46, "grad_norm": 4.764023303985596, "learning_rate": 1.9613525257457734e-05, "loss": 2.2531, "step": 35791 }, { "epoch": 0.46, "grad_norm": 3.4647767543792725, "learning_rate": 1.9613496326403564e-05, "loss": 1.7814, "step": 35792 }, { "epoch": 0.46, "grad_norm": 4.274310111999512, "learning_rate": 1.96134673942879e-05, "loss": 1.7961, "step": 35793 }, { "epoch": 0.46, "grad_norm": 4.020575046539307, "learning_rate": 1.9613438461110745e-05, "loss": 2.0854, "step": 35794 }, { "epoch": 0.46, "grad_norm": 3.842878580093384, "learning_rate": 1.9613409526872104e-05, "loss": 2.1429, "step": 35795 }, { "epoch": 0.46, "grad_norm": 3.4834933280944824, "learning_rate": 1.961338059157198e-05, "loss": 1.9837, "step": 35796 }, { "epoch": 0.46, "grad_norm": 3.8407979011535645, "learning_rate": 1.9613351655210376e-05, "loss": 1.9068, "step": 35797 }, { "epoch": 0.46, "grad_norm": 4.276134014129639, "learning_rate": 1.961332271778729e-05, "loss": 2.6704, "step": 35798 }, { "epoch": 0.46, "grad_norm": 4.10916805267334, "learning_rate": 1.9613293779302734e-05, "loss": 2.1055, "step": 35799 }, { "epoch": 0.46, "grad_norm": 3.5057413578033447, "learning_rate": 1.961326483975671e-05, "loss": 1.9084, "step": 35800 }, { "epoch": 0.46, "grad_norm": 3.926769256591797, "learning_rate": 1.9613235899149217e-05, "loss": 2.1477, "step": 35801 }, { "epoch": 0.46, "grad_norm": 3.5448434352874756, "learning_rate": 1.9613206957480257e-05, "loss": 1.8766, "step": 35802 }, { "epoch": 0.46, "grad_norm": 4.402919292449951, "learning_rate": 1.9613178014749836e-05, "loss": 2.0828, "step": 35803 }, { "epoch": 0.46, "grad_norm": 3.724220037460327, "learning_rate": 1.961314907095796e-05, "loss": 2.3779, "step": 35804 }, { "epoch": 0.46, "grad_norm": 4.322220325469971, "learning_rate": 1.9613120126104627e-05, "loss": 1.8707, "step": 35805 }, { "epoch": 0.46, "grad_norm": 4.073238372802734, "learning_rate": 1.9613091180189846e-05, "loss": 1.7232, "step": 35806 }, { "epoch": 0.46, "grad_norm": 4.310975074768066, "learning_rate": 1.961306223321361e-05, "loss": 2.3196, "step": 35807 }, { "epoch": 0.46, "grad_norm": 4.011531829833984, "learning_rate": 1.9613033285175933e-05, "loss": 2.2522, "step": 35808 }, { "epoch": 0.46, "grad_norm": 3.3693923950195312, "learning_rate": 1.9613004336076816e-05, "loss": 1.8162, "step": 35809 }, { "epoch": 0.46, "grad_norm": 3.6760332584381104, "learning_rate": 1.9612975385916256e-05, "loss": 1.8436, "step": 35810 }, { "epoch": 0.46, "grad_norm": 3.955070972442627, "learning_rate": 1.9612946434694265e-05, "loss": 2.1853, "step": 35811 }, { "epoch": 0.46, "grad_norm": 3.8864073753356934, "learning_rate": 1.9612917482410837e-05, "loss": 2.0551, "step": 35812 }, { "epoch": 0.46, "grad_norm": 3.3905725479125977, "learning_rate": 1.961288852906598e-05, "loss": 1.5748, "step": 35813 }, { "epoch": 0.46, "grad_norm": 4.199127674102783, "learning_rate": 1.96128595746597e-05, "loss": 2.1161, "step": 35814 }, { "epoch": 0.46, "grad_norm": 4.073965072631836, "learning_rate": 1.9612830619191996e-05, "loss": 1.8468, "step": 35815 }, { "epoch": 0.46, "grad_norm": 4.261231899261475, "learning_rate": 1.9612801662662873e-05, "loss": 2.4341, "step": 35816 }, { "epoch": 0.46, "grad_norm": 3.360924243927002, "learning_rate": 1.9612772705072332e-05, "loss": 1.7587, "step": 35817 }, { "epoch": 0.46, "grad_norm": 4.634340286254883, "learning_rate": 1.961274374642038e-05, "loss": 2.0237, "step": 35818 }, { "epoch": 0.46, "grad_norm": 3.559799909591675, "learning_rate": 1.9612714786707018e-05, "loss": 1.8874, "step": 35819 }, { "epoch": 0.46, "grad_norm": 4.257250785827637, "learning_rate": 1.9612685825932247e-05, "loss": 2.2403, "step": 35820 }, { "epoch": 0.46, "grad_norm": 3.336456537246704, "learning_rate": 1.9612656864096073e-05, "loss": 1.6804, "step": 35821 }, { "epoch": 0.46, "grad_norm": 4.053780555725098, "learning_rate": 1.9612627901198498e-05, "loss": 2.2518, "step": 35822 }, { "epoch": 0.46, "grad_norm": 3.6936731338500977, "learning_rate": 1.9612598937239527e-05, "loss": 1.9927, "step": 35823 }, { "epoch": 0.46, "grad_norm": 3.6391401290893555, "learning_rate": 1.9612569972219163e-05, "loss": 1.8159, "step": 35824 }, { "epoch": 0.46, "grad_norm": 3.4749302864074707, "learning_rate": 1.9612541006137405e-05, "loss": 1.8093, "step": 35825 }, { "epoch": 0.46, "grad_norm": 3.8093528747558594, "learning_rate": 1.961251203899426e-05, "loss": 2.2248, "step": 35826 }, { "epoch": 0.46, "grad_norm": 3.7839243412017822, "learning_rate": 1.9612483070789734e-05, "loss": 1.5887, "step": 35827 }, { "epoch": 0.46, "grad_norm": 3.761359214782715, "learning_rate": 1.9612454101523827e-05, "loss": 1.8374, "step": 35828 }, { "epoch": 0.46, "grad_norm": 3.973737955093384, "learning_rate": 1.961242513119654e-05, "loss": 1.9885, "step": 35829 }, { "epoch": 0.46, "grad_norm": 4.330497741699219, "learning_rate": 1.9612396159807878e-05, "loss": 1.8227, "step": 35830 }, { "epoch": 0.47, "grad_norm": 3.8737823963165283, "learning_rate": 1.9612367187357845e-05, "loss": 2.0899, "step": 35831 }, { "epoch": 0.47, "grad_norm": 4.390783786773682, "learning_rate": 1.961233821384644e-05, "loss": 2.0318, "step": 35832 }, { "epoch": 0.47, "grad_norm": 3.4161903858184814, "learning_rate": 1.9612309239273675e-05, "loss": 1.6304, "step": 35833 }, { "epoch": 0.47, "grad_norm": 4.194190979003906, "learning_rate": 1.9612280263639544e-05, "loss": 2.2942, "step": 35834 }, { "epoch": 0.47, "grad_norm": 3.7921302318573, "learning_rate": 1.9612251286944057e-05, "loss": 1.9154, "step": 35835 }, { "epoch": 0.47, "grad_norm": 3.8413596153259277, "learning_rate": 1.9612222309187214e-05, "loss": 2.381, "step": 35836 }, { "epoch": 0.47, "grad_norm": 3.726990222930908, "learning_rate": 1.9612193330369015e-05, "loss": 2.0348, "step": 35837 }, { "epoch": 0.47, "grad_norm": 3.391970157623291, "learning_rate": 1.961216435048947e-05, "loss": 1.7931, "step": 35838 }, { "epoch": 0.47, "grad_norm": 3.2679226398468018, "learning_rate": 1.961213536954858e-05, "loss": 1.7141, "step": 35839 }, { "epoch": 0.47, "grad_norm": 3.6349565982818604, "learning_rate": 1.9612106387546347e-05, "loss": 1.875, "step": 35840 }, { "epoch": 0.47, "grad_norm": 4.094570636749268, "learning_rate": 1.9612077404482774e-05, "loss": 2.4955, "step": 35841 }, { "epoch": 0.47, "grad_norm": 3.8068225383758545, "learning_rate": 1.9612048420357863e-05, "loss": 2.0514, "step": 35842 }, { "epoch": 0.47, "grad_norm": 3.0632054805755615, "learning_rate": 1.961201943517162e-05, "loss": 1.3457, "step": 35843 }, { "epoch": 0.47, "grad_norm": 3.498594284057617, "learning_rate": 1.9611990448924047e-05, "loss": 2.0397, "step": 35844 }, { "epoch": 0.47, "grad_norm": 3.353649377822876, "learning_rate": 1.9611961461615146e-05, "loss": 1.6082, "step": 35845 }, { "epoch": 0.47, "grad_norm": 3.798854351043701, "learning_rate": 1.9611932473244923e-05, "loss": 1.7854, "step": 35846 }, { "epoch": 0.47, "grad_norm": 4.342641830444336, "learning_rate": 1.961190348381338e-05, "loss": 1.943, "step": 35847 }, { "epoch": 0.47, "grad_norm": 3.3533639907836914, "learning_rate": 1.9611874493320516e-05, "loss": 2.1398, "step": 35848 }, { "epoch": 0.47, "grad_norm": 3.348841667175293, "learning_rate": 1.961184550176634e-05, "loss": 1.5777, "step": 35849 }, { "epoch": 0.47, "grad_norm": 3.956883430480957, "learning_rate": 1.9611816509150855e-05, "loss": 1.8674, "step": 35850 }, { "epoch": 0.47, "grad_norm": 4.144311904907227, "learning_rate": 1.9611787515474062e-05, "loss": 2.1204, "step": 35851 }, { "epoch": 0.47, "grad_norm": 3.3538050651550293, "learning_rate": 1.961175852073596e-05, "loss": 1.6221, "step": 35852 }, { "epoch": 0.47, "grad_norm": 3.753676414489746, "learning_rate": 1.9611729524936563e-05, "loss": 2.1744, "step": 35853 }, { "epoch": 0.47, "grad_norm": 3.6922216415405273, "learning_rate": 1.9611700528075865e-05, "loss": 1.9234, "step": 35854 }, { "epoch": 0.47, "grad_norm": 4.235847473144531, "learning_rate": 1.9611671530153874e-05, "loss": 2.0957, "step": 35855 }, { "epoch": 0.47, "grad_norm": 3.1630358695983887, "learning_rate": 1.961164253117059e-05, "loss": 1.5873, "step": 35856 }, { "epoch": 0.47, "grad_norm": 3.9350736141204834, "learning_rate": 1.961161353112602e-05, "loss": 2.2848, "step": 35857 }, { "epoch": 0.47, "grad_norm": 3.682037830352783, "learning_rate": 1.9611584530020157e-05, "loss": 2.1223, "step": 35858 }, { "epoch": 0.47, "grad_norm": 3.486046314239502, "learning_rate": 1.961155552785302e-05, "loss": 1.8108, "step": 35859 }, { "epoch": 0.47, "grad_norm": 4.017447471618652, "learning_rate": 1.96115265246246e-05, "loss": 2.1923, "step": 35860 }, { "epoch": 0.47, "grad_norm": 3.8832595348358154, "learning_rate": 1.9611497520334906e-05, "loss": 1.9186, "step": 35861 }, { "epoch": 0.47, "grad_norm": 3.8802154064178467, "learning_rate": 1.961146851498394e-05, "loss": 2.1047, "step": 35862 }, { "epoch": 0.47, "grad_norm": 4.0136919021606445, "learning_rate": 1.9611439508571707e-05, "loss": 2.0391, "step": 35863 }, { "epoch": 0.47, "grad_norm": 3.5318500995635986, "learning_rate": 1.9611410501098204e-05, "loss": 1.8035, "step": 35864 }, { "epoch": 0.47, "grad_norm": 3.574854612350464, "learning_rate": 1.961138149256344e-05, "loss": 1.9048, "step": 35865 }, { "epoch": 0.47, "grad_norm": 4.120652198791504, "learning_rate": 1.9611352482967418e-05, "loss": 2.1452, "step": 35866 }, { "epoch": 0.47, "grad_norm": 3.8372862339019775, "learning_rate": 1.9611323472310135e-05, "loss": 2.2848, "step": 35867 }, { "epoch": 0.47, "grad_norm": 3.7597382068634033, "learning_rate": 1.96112944605916e-05, "loss": 2.3797, "step": 35868 }, { "epoch": 0.47, "grad_norm": 3.0453360080718994, "learning_rate": 1.961126544781182e-05, "loss": 1.5255, "step": 35869 }, { "epoch": 0.47, "grad_norm": 3.7377917766571045, "learning_rate": 1.961123643397079e-05, "loss": 2.1967, "step": 35870 }, { "epoch": 0.47, "grad_norm": 3.5301716327667236, "learning_rate": 1.9611207419068513e-05, "loss": 1.9098, "step": 35871 }, { "epoch": 0.47, "grad_norm": 3.8262040615081787, "learning_rate": 1.9611178403105e-05, "loss": 2.2643, "step": 35872 }, { "epoch": 0.47, "grad_norm": 3.574422836303711, "learning_rate": 1.961114938608025e-05, "loss": 1.8511, "step": 35873 }, { "epoch": 0.47, "grad_norm": 4.1964240074157715, "learning_rate": 1.9611120367994264e-05, "loss": 2.3611, "step": 35874 }, { "epoch": 0.47, "grad_norm": 3.375544548034668, "learning_rate": 1.9611091348847048e-05, "loss": 1.6902, "step": 35875 }, { "epoch": 0.47, "grad_norm": 4.265291690826416, "learning_rate": 1.9611062328638604e-05, "loss": 2.2064, "step": 35876 }, { "epoch": 0.47, "grad_norm": 3.8673830032348633, "learning_rate": 1.961103330736894e-05, "loss": 1.9439, "step": 35877 }, { "epoch": 0.47, "grad_norm": 4.0746564865112305, "learning_rate": 1.9611004285038052e-05, "loss": 1.9367, "step": 35878 }, { "epoch": 0.47, "grad_norm": 3.534949541091919, "learning_rate": 1.9610975261645944e-05, "loss": 1.9297, "step": 35879 }, { "epoch": 0.47, "grad_norm": 3.7704012393951416, "learning_rate": 1.9610946237192622e-05, "loss": 1.8963, "step": 35880 }, { "epoch": 0.47, "grad_norm": 4.180037021636963, "learning_rate": 1.961091721167809e-05, "loss": 1.7326, "step": 35881 }, { "epoch": 0.47, "grad_norm": 4.488340377807617, "learning_rate": 1.9610888185102347e-05, "loss": 2.008, "step": 35882 }, { "epoch": 0.47, "grad_norm": 3.6705410480499268, "learning_rate": 1.96108591574654e-05, "loss": 2.0895, "step": 35883 }, { "epoch": 0.47, "grad_norm": 3.870211124420166, "learning_rate": 1.961083012876725e-05, "loss": 2.0473, "step": 35884 }, { "epoch": 0.47, "grad_norm": 4.336762428283691, "learning_rate": 1.9610801099007907e-05, "loss": 1.8553, "step": 35885 }, { "epoch": 0.47, "grad_norm": 3.5716965198516846, "learning_rate": 1.9610772068187364e-05, "loss": 1.9009, "step": 35886 }, { "epoch": 0.47, "grad_norm": 3.9862446784973145, "learning_rate": 1.9610743036305627e-05, "loss": 2.1889, "step": 35887 }, { "epoch": 0.47, "grad_norm": 3.6614785194396973, "learning_rate": 1.9610714003362704e-05, "loss": 1.8567, "step": 35888 }, { "epoch": 0.47, "grad_norm": 3.363513708114624, "learning_rate": 1.9610684969358594e-05, "loss": 1.5662, "step": 35889 }, { "epoch": 0.47, "grad_norm": 3.876488208770752, "learning_rate": 1.9610655934293304e-05, "loss": 1.9411, "step": 35890 }, { "epoch": 0.47, "grad_norm": 3.7875962257385254, "learning_rate": 1.961062689816683e-05, "loss": 2.2747, "step": 35891 }, { "epoch": 0.47, "grad_norm": 3.9291958808898926, "learning_rate": 1.9610597860979182e-05, "loss": 2.0857, "step": 35892 }, { "epoch": 0.47, "grad_norm": 3.842510938644409, "learning_rate": 1.961056882273036e-05, "loss": 1.9776, "step": 35893 }, { "epoch": 0.47, "grad_norm": 4.377395153045654, "learning_rate": 1.961053978342037e-05, "loss": 2.2651, "step": 35894 }, { "epoch": 0.47, "grad_norm": 4.300539016723633, "learning_rate": 1.9610510743049213e-05, "loss": 2.061, "step": 35895 }, { "epoch": 0.47, "grad_norm": 4.630279064178467, "learning_rate": 1.961048170161689e-05, "loss": 2.6414, "step": 35896 }, { "epoch": 0.47, "grad_norm": 3.9747846126556396, "learning_rate": 1.9610452659123408e-05, "loss": 1.7225, "step": 35897 }, { "epoch": 0.47, "grad_norm": 3.7016024589538574, "learning_rate": 1.961042361556877e-05, "loss": 1.8156, "step": 35898 }, { "epoch": 0.47, "grad_norm": 4.177595138549805, "learning_rate": 1.9610394570952978e-05, "loss": 2.5427, "step": 35899 }, { "epoch": 0.47, "grad_norm": 4.307560443878174, "learning_rate": 1.9610365525276036e-05, "loss": 2.656, "step": 35900 }, { "epoch": 0.47, "grad_norm": 4.057137966156006, "learning_rate": 1.9610336478537944e-05, "loss": 2.208, "step": 35901 }, { "epoch": 0.47, "grad_norm": 3.7179598808288574, "learning_rate": 1.961030743073871e-05, "loss": 1.6146, "step": 35902 }, { "epoch": 0.47, "grad_norm": 4.124273300170898, "learning_rate": 1.9610278381878337e-05, "loss": 2.1751, "step": 35903 }, { "epoch": 0.47, "grad_norm": 3.5398902893066406, "learning_rate": 1.9610249331956825e-05, "loss": 1.8555, "step": 35904 }, { "epoch": 0.47, "grad_norm": 3.779968738555908, "learning_rate": 1.9610220280974177e-05, "loss": 1.9948, "step": 35905 }, { "epoch": 0.47, "grad_norm": 3.540261745452881, "learning_rate": 1.9610191228930396e-05, "loss": 1.7075, "step": 35906 }, { "epoch": 0.47, "grad_norm": 3.870725631713867, "learning_rate": 1.961016217582549e-05, "loss": 1.9653, "step": 35907 }, { "epoch": 0.47, "grad_norm": 4.082038879394531, "learning_rate": 1.9610133121659455e-05, "loss": 2.1991, "step": 35908 }, { "epoch": 0.47, "grad_norm": 3.8186700344085693, "learning_rate": 1.9610104066432304e-05, "loss": 2.2212, "step": 35909 }, { "epoch": 0.47, "grad_norm": 4.016927719116211, "learning_rate": 1.9610075010144028e-05, "loss": 2.1743, "step": 35910 }, { "epoch": 0.47, "grad_norm": 4.584275245666504, "learning_rate": 1.9610045952794643e-05, "loss": 2.418, "step": 35911 }, { "epoch": 0.47, "grad_norm": 3.443256378173828, "learning_rate": 1.9610016894384143e-05, "loss": 2.0607, "step": 35912 }, { "epoch": 0.47, "grad_norm": 3.631984233856201, "learning_rate": 1.9609987834912534e-05, "loss": 1.9524, "step": 35913 }, { "epoch": 0.47, "grad_norm": 3.539207696914673, "learning_rate": 1.960995877437982e-05, "loss": 1.8884, "step": 35914 }, { "epoch": 0.47, "grad_norm": 3.732700824737549, "learning_rate": 1.9609929712786004e-05, "loss": 1.9822, "step": 35915 }, { "epoch": 0.47, "grad_norm": 4.011186122894287, "learning_rate": 1.9609900650131086e-05, "loss": 1.8048, "step": 35916 }, { "epoch": 0.47, "grad_norm": 3.822686195373535, "learning_rate": 1.9609871586415076e-05, "loss": 1.6612, "step": 35917 }, { "epoch": 0.47, "grad_norm": 3.6858584880828857, "learning_rate": 1.960984252163797e-05, "loss": 1.847, "step": 35918 }, { "epoch": 0.47, "grad_norm": 4.139118194580078, "learning_rate": 1.9609813455799774e-05, "loss": 1.8891, "step": 35919 }, { "epoch": 0.47, "grad_norm": 3.759944200515747, "learning_rate": 1.9609784388900493e-05, "loss": 1.8102, "step": 35920 }, { "epoch": 0.47, "grad_norm": 3.6167397499084473, "learning_rate": 1.960975532094013e-05, "loss": 1.8874, "step": 35921 }, { "epoch": 0.47, "grad_norm": 3.841423273086548, "learning_rate": 1.9609726251918685e-05, "loss": 2.0548, "step": 35922 }, { "epoch": 0.47, "grad_norm": 4.113776206970215, "learning_rate": 1.9609697181836162e-05, "loss": 2.1497, "step": 35923 }, { "epoch": 0.47, "grad_norm": 3.416419267654419, "learning_rate": 1.9609668110692568e-05, "loss": 2.2427, "step": 35924 }, { "epoch": 0.47, "grad_norm": 3.4603235721588135, "learning_rate": 1.9609639038487903e-05, "loss": 1.8156, "step": 35925 }, { "epoch": 0.47, "grad_norm": 4.015516757965088, "learning_rate": 1.9609609965222172e-05, "loss": 1.6773, "step": 35926 }, { "epoch": 0.47, "grad_norm": 4.013469219207764, "learning_rate": 1.9609580890895374e-05, "loss": 1.9803, "step": 35927 }, { "epoch": 0.47, "grad_norm": 3.806891679763794, "learning_rate": 1.9609551815507518e-05, "loss": 1.6089, "step": 35928 }, { "epoch": 0.47, "grad_norm": 4.426707744598389, "learning_rate": 1.96095227390586e-05, "loss": 2.0738, "step": 35929 }, { "epoch": 0.47, "grad_norm": 3.7856948375701904, "learning_rate": 1.9609493661548632e-05, "loss": 2.0002, "step": 35930 }, { "epoch": 0.47, "grad_norm": 4.235923767089844, "learning_rate": 1.9609464582977616e-05, "loss": 2.1692, "step": 35931 }, { "epoch": 0.47, "grad_norm": 3.7331202030181885, "learning_rate": 1.9609435503345546e-05, "loss": 2.0027, "step": 35932 }, { "epoch": 0.47, "grad_norm": 3.26588773727417, "learning_rate": 1.9609406422652434e-05, "loss": 1.736, "step": 35933 }, { "epoch": 0.47, "grad_norm": 3.7686660289764404, "learning_rate": 1.960937734089828e-05, "loss": 1.7995, "step": 35934 }, { "epoch": 0.47, "grad_norm": 3.551445960998535, "learning_rate": 1.9609348258083085e-05, "loss": 1.6865, "step": 35935 }, { "epoch": 0.47, "grad_norm": 3.557527542114258, "learning_rate": 1.9609319174206858e-05, "loss": 1.9158, "step": 35936 }, { "epoch": 0.47, "grad_norm": 3.9359683990478516, "learning_rate": 1.96092900892696e-05, "loss": 2.3304, "step": 35937 }, { "epoch": 0.47, "grad_norm": 4.400594234466553, "learning_rate": 1.9609261003271308e-05, "loss": 2.5142, "step": 35938 }, { "epoch": 0.47, "grad_norm": 3.8878626823425293, "learning_rate": 1.9609231916212e-05, "loss": 2.0528, "step": 35939 }, { "epoch": 0.47, "grad_norm": 3.5066075325012207, "learning_rate": 1.960920282809166e-05, "loss": 1.9917, "step": 35940 }, { "epoch": 0.47, "grad_norm": 3.879047155380249, "learning_rate": 1.9609173738910304e-05, "loss": 2.262, "step": 35941 }, { "epoch": 0.47, "grad_norm": 3.391916513442993, "learning_rate": 1.9609144648667936e-05, "loss": 1.754, "step": 35942 }, { "epoch": 0.47, "grad_norm": 4.36173152923584, "learning_rate": 1.960911555736455e-05, "loss": 2.3263, "step": 35943 }, { "epoch": 0.47, "grad_norm": 3.6509976387023926, "learning_rate": 1.960908646500016e-05, "loss": 2.2291, "step": 35944 }, { "epoch": 0.47, "grad_norm": 3.299410820007324, "learning_rate": 1.960905737157476e-05, "loss": 1.5986, "step": 35945 }, { "epoch": 0.47, "grad_norm": 3.881605386734009, "learning_rate": 1.960902827708836e-05, "loss": 1.8473, "step": 35946 }, { "epoch": 0.47, "grad_norm": 4.15136194229126, "learning_rate": 1.960899918154096e-05, "loss": 2.4778, "step": 35947 }, { "epoch": 0.47, "grad_norm": 4.008930683135986, "learning_rate": 1.9608970084932562e-05, "loss": 2.312, "step": 35948 }, { "epoch": 0.47, "grad_norm": 4.4032721519470215, "learning_rate": 1.960894098726317e-05, "loss": 2.1062, "step": 35949 }, { "epoch": 0.47, "grad_norm": 3.878016948699951, "learning_rate": 1.9608911888532786e-05, "loss": 2.0609, "step": 35950 }, { "epoch": 0.47, "grad_norm": 4.546926498413086, "learning_rate": 1.960888278874142e-05, "loss": 2.3691, "step": 35951 }, { "epoch": 0.47, "grad_norm": 3.463674545288086, "learning_rate": 1.9608853687889066e-05, "loss": 1.4207, "step": 35952 }, { "epoch": 0.47, "grad_norm": 3.3440959453582764, "learning_rate": 1.9608824585975733e-05, "loss": 1.6208, "step": 35953 }, { "epoch": 0.47, "grad_norm": 3.9144201278686523, "learning_rate": 1.9608795483001426e-05, "loss": 2.3134, "step": 35954 }, { "epoch": 0.47, "grad_norm": 3.8054773807525635, "learning_rate": 1.9608766378966144e-05, "loss": 1.9669, "step": 35955 }, { "epoch": 0.47, "grad_norm": 3.6991331577301025, "learning_rate": 1.9608737273869888e-05, "loss": 2.3288, "step": 35956 }, { "epoch": 0.47, "grad_norm": 4.2555742263793945, "learning_rate": 1.9608708167712665e-05, "loss": 2.1177, "step": 35957 }, { "epoch": 0.47, "grad_norm": 4.171658992767334, "learning_rate": 1.9608679060494476e-05, "loss": 2.4141, "step": 35958 }, { "epoch": 0.47, "grad_norm": 3.466395854949951, "learning_rate": 1.960864995221533e-05, "loss": 1.759, "step": 35959 }, { "epoch": 0.47, "grad_norm": 3.42885684967041, "learning_rate": 1.9608620842875227e-05, "loss": 1.717, "step": 35960 }, { "epoch": 0.47, "grad_norm": 4.032996654510498, "learning_rate": 1.9608591732474163e-05, "loss": 2.427, "step": 35961 }, { "epoch": 0.47, "grad_norm": 4.389065265655518, "learning_rate": 1.9608562621012153e-05, "loss": 2.1658, "step": 35962 }, { "epoch": 0.47, "grad_norm": 3.457719564437866, "learning_rate": 1.960853350848919e-05, "loss": 2.1104, "step": 35963 }, { "epoch": 0.47, "grad_norm": 3.2877018451690674, "learning_rate": 1.9608504394905288e-05, "loss": 1.5451, "step": 35964 }, { "epoch": 0.47, "grad_norm": 3.833395481109619, "learning_rate": 1.960847528026044e-05, "loss": 2.0167, "step": 35965 }, { "epoch": 0.47, "grad_norm": 3.56085467338562, "learning_rate": 1.9608446164554652e-05, "loss": 1.5385, "step": 35966 }, { "epoch": 0.47, "grad_norm": 3.4778544902801514, "learning_rate": 1.9608417047787932e-05, "loss": 1.6627, "step": 35967 }, { "epoch": 0.47, "grad_norm": 4.229084014892578, "learning_rate": 1.9608387929960276e-05, "loss": 2.0837, "step": 35968 }, { "epoch": 0.47, "grad_norm": 4.14284610748291, "learning_rate": 1.9608358811071695e-05, "loss": 2.0041, "step": 35969 }, { "epoch": 0.47, "grad_norm": 3.740180015563965, "learning_rate": 1.9608329691122184e-05, "loss": 2.0157, "step": 35970 }, { "epoch": 0.47, "grad_norm": 3.6512186527252197, "learning_rate": 1.9608300570111754e-05, "loss": 1.8734, "step": 35971 }, { "epoch": 0.47, "grad_norm": 3.638169527053833, "learning_rate": 1.9608271448040406e-05, "loss": 1.7738, "step": 35972 }, { "epoch": 0.47, "grad_norm": 4.2361016273498535, "learning_rate": 1.9608242324908138e-05, "loss": 2.1534, "step": 35973 }, { "epoch": 0.47, "grad_norm": 4.343743801116943, "learning_rate": 1.960821320071496e-05, "loss": 2.7705, "step": 35974 }, { "epoch": 0.47, "grad_norm": 3.7544009685516357, "learning_rate": 1.960818407546087e-05, "loss": 1.8956, "step": 35975 }, { "epoch": 0.47, "grad_norm": 3.7421910762786865, "learning_rate": 1.9608154949145873e-05, "loss": 2.3455, "step": 35976 }, { "epoch": 0.47, "grad_norm": 4.0771484375, "learning_rate": 1.960812582176997e-05, "loss": 1.8239, "step": 35977 }, { "epoch": 0.47, "grad_norm": 3.7496750354766846, "learning_rate": 1.9608096693333173e-05, "loss": 1.6087, "step": 35978 }, { "epoch": 0.47, "grad_norm": 4.068033218383789, "learning_rate": 1.9608067563835477e-05, "loss": 2.4224, "step": 35979 }, { "epoch": 0.47, "grad_norm": 4.092108726501465, "learning_rate": 1.960803843327689e-05, "loss": 1.709, "step": 35980 }, { "epoch": 0.47, "grad_norm": 3.713514804840088, "learning_rate": 1.960800930165741e-05, "loss": 1.8846, "step": 35981 }, { "epoch": 0.47, "grad_norm": 3.9624454975128174, "learning_rate": 1.960798016897704e-05, "loss": 2.3543, "step": 35982 }, { "epoch": 0.47, "grad_norm": 3.8613080978393555, "learning_rate": 1.960795103523579e-05, "loss": 1.8074, "step": 35983 }, { "epoch": 0.47, "grad_norm": 4.291143417358398, "learning_rate": 1.9607921900433655e-05, "loss": 2.3974, "step": 35984 }, { "epoch": 0.47, "grad_norm": 3.5896759033203125, "learning_rate": 1.9607892764570645e-05, "loss": 1.8426, "step": 35985 }, { "epoch": 0.47, "grad_norm": 3.573273181915283, "learning_rate": 1.9607863627646763e-05, "loss": 1.7141, "step": 35986 }, { "epoch": 0.47, "grad_norm": 3.809664249420166, "learning_rate": 1.9607834489662007e-05, "loss": 2.5434, "step": 35987 }, { "epoch": 0.47, "grad_norm": 3.2354414463043213, "learning_rate": 1.9607805350616382e-05, "loss": 1.5873, "step": 35988 }, { "epoch": 0.47, "grad_norm": 3.855211019515991, "learning_rate": 1.9607776210509893e-05, "loss": 2.1388, "step": 35989 }, { "epoch": 0.47, "grad_norm": 4.837242126464844, "learning_rate": 1.9607747069342546e-05, "loss": 2.6322, "step": 35990 }, { "epoch": 0.47, "grad_norm": 4.3423895835876465, "learning_rate": 1.9607717927114337e-05, "loss": 2.1501, "step": 35991 }, { "epoch": 0.47, "grad_norm": 3.8361213207244873, "learning_rate": 1.9607688783825274e-05, "loss": 1.9394, "step": 35992 }, { "epoch": 0.47, "grad_norm": 3.915208578109741, "learning_rate": 1.960765963947536e-05, "loss": 2.1536, "step": 35993 }, { "epoch": 0.47, "grad_norm": 3.7709500789642334, "learning_rate": 1.9607630494064595e-05, "loss": 2.145, "step": 35994 }, { "epoch": 0.47, "grad_norm": 3.5459914207458496, "learning_rate": 1.9607601347592985e-05, "loss": 1.6916, "step": 35995 }, { "epoch": 0.47, "grad_norm": 3.4066648483276367, "learning_rate": 1.9607572200060538e-05, "loss": 1.8713, "step": 35996 }, { "epoch": 0.47, "grad_norm": 3.450580358505249, "learning_rate": 1.9607543051467246e-05, "loss": 1.7617, "step": 35997 }, { "epoch": 0.47, "grad_norm": 3.563185453414917, "learning_rate": 1.960751390181312e-05, "loss": 1.7166, "step": 35998 }, { "epoch": 0.47, "grad_norm": 3.376549005508423, "learning_rate": 1.9607484751098163e-05, "loss": 1.5094, "step": 35999 }, { "epoch": 0.47, "grad_norm": 4.413177013397217, "learning_rate": 1.9607455599322375e-05, "loss": 2.5266, "step": 36000 }, { "epoch": 0.47, "grad_norm": 4.113051891326904, "learning_rate": 1.960742644648576e-05, "loss": 1.984, "step": 36001 }, { "epoch": 0.47, "grad_norm": 3.841181516647339, "learning_rate": 1.9607397292588324e-05, "loss": 2.0703, "step": 36002 }, { "epoch": 0.47, "grad_norm": 3.691298246383667, "learning_rate": 1.9607368137630068e-05, "loss": 2.0604, "step": 36003 }, { "epoch": 0.47, "grad_norm": 3.615355968475342, "learning_rate": 1.9607338981610994e-05, "loss": 1.9568, "step": 36004 }, { "epoch": 0.47, "grad_norm": 3.3243165016174316, "learning_rate": 1.960730982453111e-05, "loss": 1.5117, "step": 36005 }, { "epoch": 0.47, "grad_norm": 3.6204938888549805, "learning_rate": 1.9607280666390415e-05, "loss": 2.2825, "step": 36006 }, { "epoch": 0.47, "grad_norm": 4.063783645629883, "learning_rate": 1.960725150718891e-05, "loss": 2.4944, "step": 36007 }, { "epoch": 0.47, "grad_norm": 3.4266679286956787, "learning_rate": 1.9607222346926604e-05, "loss": 2.1382, "step": 36008 }, { "epoch": 0.47, "grad_norm": 4.031757831573486, "learning_rate": 1.9607193185603498e-05, "loss": 2.4036, "step": 36009 }, { "epoch": 0.47, "grad_norm": 3.676619529724121, "learning_rate": 1.9607164023219595e-05, "loss": 1.9642, "step": 36010 }, { "epoch": 0.47, "grad_norm": 4.051508903503418, "learning_rate": 1.9607134859774898e-05, "loss": 2.0638, "step": 36011 }, { "epoch": 0.47, "grad_norm": 4.015063762664795, "learning_rate": 1.9607105695269408e-05, "loss": 1.9267, "step": 36012 }, { "epoch": 0.47, "grad_norm": 3.8526382446289062, "learning_rate": 1.9607076529703134e-05, "loss": 2.4545, "step": 36013 }, { "epoch": 0.47, "grad_norm": 4.321966171264648, "learning_rate": 1.9607047363076074e-05, "loss": 2.3009, "step": 36014 }, { "epoch": 0.47, "grad_norm": 3.4470877647399902, "learning_rate": 1.960701819538823e-05, "loss": 1.9589, "step": 36015 }, { "epoch": 0.47, "grad_norm": 3.356564521789551, "learning_rate": 1.9606989026639613e-05, "loss": 1.838, "step": 36016 }, { "epoch": 0.47, "grad_norm": 3.3286163806915283, "learning_rate": 1.960695985683022e-05, "loss": 1.6689, "step": 36017 }, { "epoch": 0.47, "grad_norm": 3.0050158500671387, "learning_rate": 1.9606930685960056e-05, "loss": 1.4417, "step": 36018 }, { "epoch": 0.47, "grad_norm": 4.262339115142822, "learning_rate": 1.9606901514029123e-05, "loss": 2.2213, "step": 36019 }, { "epoch": 0.47, "grad_norm": 3.9806909561157227, "learning_rate": 1.9606872341037423e-05, "loss": 1.8487, "step": 36020 }, { "epoch": 0.47, "grad_norm": 3.841078281402588, "learning_rate": 1.9606843166984967e-05, "loss": 1.9647, "step": 36021 }, { "epoch": 0.47, "grad_norm": 3.7224063873291016, "learning_rate": 1.9606813991871748e-05, "loss": 2.1002, "step": 36022 }, { "epoch": 0.47, "grad_norm": 3.4682769775390625, "learning_rate": 1.9606784815697776e-05, "loss": 1.641, "step": 36023 }, { "epoch": 0.47, "grad_norm": 4.0808234214782715, "learning_rate": 1.960675563846305e-05, "loss": 2.1372, "step": 36024 }, { "epoch": 0.47, "grad_norm": 4.342701435089111, "learning_rate": 1.9606726460167577e-05, "loss": 1.7904, "step": 36025 }, { "epoch": 0.47, "grad_norm": 4.5934062004089355, "learning_rate": 1.9606697280811356e-05, "loss": 2.4519, "step": 36026 }, { "epoch": 0.47, "grad_norm": 4.18556547164917, "learning_rate": 1.9606668100394396e-05, "loss": 2.3706, "step": 36027 }, { "epoch": 0.47, "grad_norm": 3.607067823410034, "learning_rate": 1.9606638918916693e-05, "loss": 1.9469, "step": 36028 }, { "epoch": 0.47, "grad_norm": 3.7402243614196777, "learning_rate": 1.9606609736378258e-05, "loss": 1.6805, "step": 36029 }, { "epoch": 0.47, "grad_norm": 3.045821189880371, "learning_rate": 1.9606580552779087e-05, "loss": 1.4964, "step": 36030 }, { "epoch": 0.47, "grad_norm": 4.7348504066467285, "learning_rate": 1.960655136811919e-05, "loss": 2.346, "step": 36031 }, { "epoch": 0.47, "grad_norm": 3.783709764480591, "learning_rate": 1.9606522182398563e-05, "loss": 1.827, "step": 36032 }, { "epoch": 0.47, "grad_norm": 3.203570604324341, "learning_rate": 1.9606492995617215e-05, "loss": 1.4563, "step": 36033 }, { "epoch": 0.47, "grad_norm": 3.7752270698547363, "learning_rate": 1.9606463807775147e-05, "loss": 2.0733, "step": 36034 }, { "epoch": 0.47, "grad_norm": 4.133729934692383, "learning_rate": 1.9606434618872363e-05, "loss": 2.0477, "step": 36035 }, { "epoch": 0.47, "grad_norm": 4.046801567077637, "learning_rate": 1.9606405428908864e-05, "loss": 1.9771, "step": 36036 }, { "epoch": 0.47, "grad_norm": 3.956580638885498, "learning_rate": 1.9606376237884657e-05, "loss": 2.0055, "step": 36037 }, { "epoch": 0.47, "grad_norm": 3.686939001083374, "learning_rate": 1.960634704579974e-05, "loss": 2.0162, "step": 36038 }, { "epoch": 0.47, "grad_norm": 4.401146411895752, "learning_rate": 1.960631785265412e-05, "loss": 2.058, "step": 36039 }, { "epoch": 0.47, "grad_norm": 4.200722694396973, "learning_rate": 1.9606288658447803e-05, "loss": 2.5049, "step": 36040 }, { "epoch": 0.47, "grad_norm": 4.455572605133057, "learning_rate": 1.9606259463180787e-05, "loss": 2.3076, "step": 36041 }, { "epoch": 0.47, "grad_norm": 3.645885944366455, "learning_rate": 1.9606230266853076e-05, "loss": 2.0646, "step": 36042 }, { "epoch": 0.47, "grad_norm": 3.881436824798584, "learning_rate": 1.9606201069464675e-05, "loss": 2.1585, "step": 36043 }, { "epoch": 0.47, "grad_norm": 3.7625555992126465, "learning_rate": 1.9606171871015583e-05, "loss": 1.9684, "step": 36044 }, { "epoch": 0.47, "grad_norm": 3.7343051433563232, "learning_rate": 1.960614267150581e-05, "loss": 1.9768, "step": 36045 }, { "epoch": 0.47, "grad_norm": 3.8858065605163574, "learning_rate": 1.9606113470935357e-05, "loss": 1.9022, "step": 36046 }, { "epoch": 0.47, "grad_norm": 3.666992425918579, "learning_rate": 1.9606084269304224e-05, "loss": 2.1999, "step": 36047 }, { "epoch": 0.47, "grad_norm": 4.02605676651001, "learning_rate": 1.9606055066612418e-05, "loss": 2.2075, "step": 36048 }, { "epoch": 0.47, "grad_norm": 3.7476606369018555, "learning_rate": 1.9606025862859937e-05, "loss": 2.0895, "step": 36049 }, { "epoch": 0.47, "grad_norm": 5.32021427154541, "learning_rate": 1.9605996658046792e-05, "loss": 2.1068, "step": 36050 }, { "epoch": 0.47, "grad_norm": 4.356914520263672, "learning_rate": 1.9605967452172982e-05, "loss": 1.9411, "step": 36051 }, { "epoch": 0.47, "grad_norm": 4.349283218383789, "learning_rate": 1.960593824523851e-05, "loss": 1.9652, "step": 36052 }, { "epoch": 0.47, "grad_norm": 3.3010003566741943, "learning_rate": 1.9605909037243377e-05, "loss": 1.7414, "step": 36053 }, { "epoch": 0.47, "grad_norm": 4.352038383483887, "learning_rate": 1.960587982818759e-05, "loss": 2.2094, "step": 36054 }, { "epoch": 0.47, "grad_norm": 4.07511568069458, "learning_rate": 1.960585061807115e-05, "loss": 2.0552, "step": 36055 }, { "epoch": 0.47, "grad_norm": 3.745195150375366, "learning_rate": 1.960582140689406e-05, "loss": 1.683, "step": 36056 }, { "epoch": 0.47, "grad_norm": 4.118250846862793, "learning_rate": 1.960579219465633e-05, "loss": 1.8655, "step": 36057 }, { "epoch": 0.47, "grad_norm": 3.584336996078491, "learning_rate": 1.960576298135795e-05, "loss": 1.997, "step": 36058 }, { "epoch": 0.47, "grad_norm": 4.02562141418457, "learning_rate": 1.9605733766998937e-05, "loss": 1.8043, "step": 36059 }, { "epoch": 0.47, "grad_norm": 3.8597896099090576, "learning_rate": 1.9605704551579285e-05, "loss": 2.0887, "step": 36060 }, { "epoch": 0.47, "grad_norm": 4.242120742797852, "learning_rate": 1.9605675335099003e-05, "loss": 2.0226, "step": 36061 }, { "epoch": 0.47, "grad_norm": 4.106259346008301, "learning_rate": 1.9605646117558087e-05, "loss": 2.3425, "step": 36062 }, { "epoch": 0.47, "grad_norm": 3.6266860961914062, "learning_rate": 1.960561689895655e-05, "loss": 2.0354, "step": 36063 }, { "epoch": 0.47, "grad_norm": 3.8659825325012207, "learning_rate": 1.960558767929438e-05, "loss": 1.8603, "step": 36064 }, { "epoch": 0.47, "grad_norm": 4.444900989532471, "learning_rate": 1.96055584585716e-05, "loss": 2.1063, "step": 36065 }, { "epoch": 0.47, "grad_norm": 3.873119831085205, "learning_rate": 1.9605529236788203e-05, "loss": 1.9023, "step": 36066 }, { "epoch": 0.47, "grad_norm": 4.0942583084106445, "learning_rate": 1.960550001394419e-05, "loss": 2.6134, "step": 36067 }, { "epoch": 0.47, "grad_norm": 4.178889751434326, "learning_rate": 1.9605470790039565e-05, "loss": 1.6976, "step": 36068 }, { "epoch": 0.47, "grad_norm": 3.610018014907837, "learning_rate": 1.9605441565074333e-05, "loss": 1.9075, "step": 36069 }, { "epoch": 0.47, "grad_norm": 3.864938259124756, "learning_rate": 1.9605412339048498e-05, "loss": 2.0927, "step": 36070 }, { "epoch": 0.47, "grad_norm": 3.5080857276916504, "learning_rate": 1.9605383111962066e-05, "loss": 1.8118, "step": 36071 }, { "epoch": 0.47, "grad_norm": 3.463609457015991, "learning_rate": 1.9605353883815036e-05, "loss": 1.7679, "step": 36072 }, { "epoch": 0.47, "grad_norm": 3.2573940753936768, "learning_rate": 1.960532465460741e-05, "loss": 1.6904, "step": 36073 }, { "epoch": 0.47, "grad_norm": 3.3698570728302, "learning_rate": 1.9605295424339193e-05, "loss": 1.6676, "step": 36074 }, { "epoch": 0.47, "grad_norm": 3.7060048580169678, "learning_rate": 1.960526619301039e-05, "loss": 1.7705, "step": 36075 }, { "epoch": 0.47, "grad_norm": 4.068846702575684, "learning_rate": 1.9605236960621002e-05, "loss": 2.0737, "step": 36076 }, { "epoch": 0.47, "grad_norm": 3.9202511310577393, "learning_rate": 1.9605207727171032e-05, "loss": 2.209, "step": 36077 }, { "epoch": 0.47, "grad_norm": 4.369419097900391, "learning_rate": 1.9605178492660484e-05, "loss": 2.357, "step": 36078 }, { "epoch": 0.47, "grad_norm": 3.8594112396240234, "learning_rate": 1.9605149257089364e-05, "loss": 2.1211, "step": 36079 }, { "epoch": 0.47, "grad_norm": 3.9540538787841797, "learning_rate": 1.960512002045767e-05, "loss": 1.9335, "step": 36080 }, { "epoch": 0.47, "grad_norm": 3.8338770866394043, "learning_rate": 1.960509078276541e-05, "loss": 1.854, "step": 36081 }, { "epoch": 0.47, "grad_norm": 3.773218870162964, "learning_rate": 1.9605061544012582e-05, "loss": 2.1325, "step": 36082 }, { "epoch": 0.47, "grad_norm": 4.191904067993164, "learning_rate": 1.9605032304199195e-05, "loss": 2.3091, "step": 36083 }, { "epoch": 0.47, "grad_norm": 4.337653160095215, "learning_rate": 1.9605003063325245e-05, "loss": 2.2502, "step": 36084 }, { "epoch": 0.47, "grad_norm": 4.244861125946045, "learning_rate": 1.9604973821390745e-05, "loss": 2.3397, "step": 36085 }, { "epoch": 0.47, "grad_norm": 4.137452602386475, "learning_rate": 1.9604944578395692e-05, "loss": 1.9988, "step": 36086 }, { "epoch": 0.47, "grad_norm": 3.726059913635254, "learning_rate": 1.960491533434009e-05, "loss": 1.8149, "step": 36087 }, { "epoch": 0.47, "grad_norm": 3.503920793533325, "learning_rate": 1.9604886089223938e-05, "loss": 2.0205, "step": 36088 }, { "epoch": 0.47, "grad_norm": 4.1788740158081055, "learning_rate": 1.960485684304725e-05, "loss": 1.8919, "step": 36089 }, { "epoch": 0.47, "grad_norm": 3.5613088607788086, "learning_rate": 1.9604827595810015e-05, "loss": 1.7462, "step": 36090 }, { "epoch": 0.47, "grad_norm": 3.683645486831665, "learning_rate": 1.960479834751225e-05, "loss": 2.0787, "step": 36091 }, { "epoch": 0.47, "grad_norm": 4.095528602600098, "learning_rate": 1.9604769098153952e-05, "loss": 2.1419, "step": 36092 }, { "epoch": 0.47, "grad_norm": 4.301610469818115, "learning_rate": 1.9604739847735126e-05, "loss": 1.8387, "step": 36093 }, { "epoch": 0.47, "grad_norm": 3.728153944015503, "learning_rate": 1.960471059625577e-05, "loss": 2.2265, "step": 36094 }, { "epoch": 0.47, "grad_norm": 4.220214366912842, "learning_rate": 1.9604681343715894e-05, "loss": 2.2456, "step": 36095 }, { "epoch": 0.47, "grad_norm": 3.4910359382629395, "learning_rate": 1.9604652090115497e-05, "loss": 1.9428, "step": 36096 }, { "epoch": 0.47, "grad_norm": 3.242196559906006, "learning_rate": 1.960462283545458e-05, "loss": 1.5749, "step": 36097 }, { "epoch": 0.47, "grad_norm": 3.5965752601623535, "learning_rate": 1.9604593579733157e-05, "loss": 1.9512, "step": 36098 }, { "epoch": 0.47, "grad_norm": 3.3446085453033447, "learning_rate": 1.960456432295122e-05, "loss": 1.6318, "step": 36099 }, { "epoch": 0.47, "grad_norm": 4.2433061599731445, "learning_rate": 1.9604535065108777e-05, "loss": 2.1878, "step": 36100 }, { "epoch": 0.47, "grad_norm": 4.060977935791016, "learning_rate": 1.9604505806205826e-05, "loss": 2.456, "step": 36101 }, { "epoch": 0.47, "grad_norm": 3.479524612426758, "learning_rate": 1.960447654624238e-05, "loss": 1.7586, "step": 36102 }, { "epoch": 0.47, "grad_norm": 3.5969247817993164, "learning_rate": 1.9604447285218433e-05, "loss": 2.1438, "step": 36103 }, { "epoch": 0.47, "grad_norm": 4.061666488647461, "learning_rate": 1.9604418023133995e-05, "loss": 1.8641, "step": 36104 }, { "epoch": 0.47, "grad_norm": 3.8921546936035156, "learning_rate": 1.9604388759989065e-05, "loss": 2.5858, "step": 36105 }, { "epoch": 0.47, "grad_norm": 3.7553906440734863, "learning_rate": 1.960435949578365e-05, "loss": 2.1721, "step": 36106 }, { "epoch": 0.47, "grad_norm": 3.7219841480255127, "learning_rate": 1.9604330230517747e-05, "loss": 2.1527, "step": 36107 }, { "epoch": 0.47, "grad_norm": 3.7428689002990723, "learning_rate": 1.9604300964191364e-05, "loss": 1.9804, "step": 36108 }, { "epoch": 0.47, "grad_norm": 3.454108476638794, "learning_rate": 1.9604271696804504e-05, "loss": 1.7594, "step": 36109 }, { "epoch": 0.47, "grad_norm": 3.3921360969543457, "learning_rate": 1.960424242835717e-05, "loss": 1.8384, "step": 36110 }, { "epoch": 0.47, "grad_norm": 3.2949764728546143, "learning_rate": 1.9604213158849364e-05, "loss": 1.6129, "step": 36111 }, { "epoch": 0.47, "grad_norm": 4.106812000274658, "learning_rate": 1.9604183888281088e-05, "loss": 2.195, "step": 36112 }, { "epoch": 0.47, "grad_norm": 3.8886897563934326, "learning_rate": 1.960415461665235e-05, "loss": 2.3735, "step": 36113 }, { "epoch": 0.47, "grad_norm": 3.8016765117645264, "learning_rate": 1.9604125343963152e-05, "loss": 2.2064, "step": 36114 }, { "epoch": 0.47, "grad_norm": 3.6018660068511963, "learning_rate": 1.960409607021349e-05, "loss": 1.817, "step": 36115 }, { "epoch": 0.47, "grad_norm": 4.181703567504883, "learning_rate": 1.9604066795403377e-05, "loss": 2.352, "step": 36116 }, { "epoch": 0.47, "grad_norm": 3.967670202255249, "learning_rate": 1.9604037519532813e-05, "loss": 2.0387, "step": 36117 }, { "epoch": 0.47, "grad_norm": 4.082797527313232, "learning_rate": 1.96040082426018e-05, "loss": 2.2376, "step": 36118 }, { "epoch": 0.47, "grad_norm": 4.4164910316467285, "learning_rate": 1.9603978964610337e-05, "loss": 2.1735, "step": 36119 }, { "epoch": 0.47, "grad_norm": 3.151015281677246, "learning_rate": 1.9603949685558436e-05, "loss": 1.7031, "step": 36120 }, { "epoch": 0.47, "grad_norm": 3.4916980266571045, "learning_rate": 1.960392040544609e-05, "loss": 2.0238, "step": 36121 }, { "epoch": 0.47, "grad_norm": 3.8979220390319824, "learning_rate": 1.960389112427332e-05, "loss": 1.9856, "step": 36122 }, { "epoch": 0.47, "grad_norm": 3.692528009414673, "learning_rate": 1.960386184204011e-05, "loss": 1.7866, "step": 36123 }, { "epoch": 0.47, "grad_norm": 3.7045438289642334, "learning_rate": 1.960383255874647e-05, "loss": 1.7641, "step": 36124 }, { "epoch": 0.47, "grad_norm": 3.671495199203491, "learning_rate": 1.9603803274392405e-05, "loss": 2.3151, "step": 36125 }, { "epoch": 0.47, "grad_norm": 4.572503089904785, "learning_rate": 1.9603773988977915e-05, "loss": 2.4017, "step": 36126 }, { "epoch": 0.47, "grad_norm": 3.8848533630371094, "learning_rate": 1.9603744702503008e-05, "loss": 2.1653, "step": 36127 }, { "epoch": 0.47, "grad_norm": 3.7851343154907227, "learning_rate": 1.9603715414967686e-05, "loss": 1.7326, "step": 36128 }, { "epoch": 0.47, "grad_norm": 4.619533061981201, "learning_rate": 1.9603686126371947e-05, "loss": 2.4659, "step": 36129 }, { "epoch": 0.47, "grad_norm": 3.94183087348938, "learning_rate": 1.9603656836715804e-05, "loss": 2.2273, "step": 36130 }, { "epoch": 0.47, "grad_norm": 3.609682083129883, "learning_rate": 1.9603627545999247e-05, "loss": 1.7048, "step": 36131 }, { "epoch": 0.47, "grad_norm": 4.355231761932373, "learning_rate": 1.9603598254222293e-05, "loss": 2.1258, "step": 36132 }, { "epoch": 0.47, "grad_norm": 3.5341718196868896, "learning_rate": 1.960356896138494e-05, "loss": 1.8692, "step": 36133 }, { "epoch": 0.47, "grad_norm": 3.118023157119751, "learning_rate": 1.960353966748718e-05, "loss": 1.4758, "step": 36134 }, { "epoch": 0.47, "grad_norm": 3.5670135021209717, "learning_rate": 1.9603510372529034e-05, "loss": 1.7544, "step": 36135 }, { "epoch": 0.47, "grad_norm": 4.516571998596191, "learning_rate": 1.9603481076510498e-05, "loss": 2.1046, "step": 36136 }, { "epoch": 0.47, "grad_norm": 4.069515228271484, "learning_rate": 1.9603451779431572e-05, "loss": 2.6373, "step": 36137 }, { "epoch": 0.47, "grad_norm": 3.6945250034332275, "learning_rate": 1.9603422481292265e-05, "loss": 1.7444, "step": 36138 }, { "epoch": 0.47, "grad_norm": 3.8015329837799072, "learning_rate": 1.9603393182092575e-05, "loss": 1.737, "step": 36139 }, { "epoch": 0.47, "grad_norm": 3.502035617828369, "learning_rate": 1.9603363881832508e-05, "loss": 1.6404, "step": 36140 }, { "epoch": 0.47, "grad_norm": 3.5798752307891846, "learning_rate": 1.9603334580512064e-05, "loss": 2.1777, "step": 36141 }, { "epoch": 0.47, "grad_norm": 3.593794822692871, "learning_rate": 1.9603305278131254e-05, "loss": 1.483, "step": 36142 }, { "epoch": 0.47, "grad_norm": 3.5834548473358154, "learning_rate": 1.9603275974690073e-05, "loss": 1.7675, "step": 36143 }, { "epoch": 0.47, "grad_norm": 4.154781341552734, "learning_rate": 1.9603246670188526e-05, "loss": 2.1919, "step": 36144 }, { "epoch": 0.47, "grad_norm": 4.152864456176758, "learning_rate": 1.9603217364626622e-05, "loss": 1.999, "step": 36145 }, { "epoch": 0.47, "grad_norm": 3.928629159927368, "learning_rate": 1.9603188058004355e-05, "loss": 1.9495, "step": 36146 }, { "epoch": 0.47, "grad_norm": 3.9277501106262207, "learning_rate": 1.9603158750321735e-05, "loss": 1.7594, "step": 36147 }, { "epoch": 0.47, "grad_norm": 3.8128745555877686, "learning_rate": 1.9603129441578765e-05, "loss": 1.8493, "step": 36148 }, { "epoch": 0.47, "grad_norm": 4.1183576583862305, "learning_rate": 1.9603100131775445e-05, "loss": 1.9889, "step": 36149 }, { "epoch": 0.47, "grad_norm": 3.5154221057891846, "learning_rate": 1.960307082091178e-05, "loss": 1.7774, "step": 36150 }, { "epoch": 0.47, "grad_norm": 3.8283276557922363, "learning_rate": 1.9603041508987774e-05, "loss": 2.2307, "step": 36151 }, { "epoch": 0.47, "grad_norm": 3.563265085220337, "learning_rate": 1.960301219600343e-05, "loss": 1.7725, "step": 36152 }, { "epoch": 0.47, "grad_norm": 4.116515636444092, "learning_rate": 1.9602982881958748e-05, "loss": 2.2217, "step": 36153 }, { "epoch": 0.47, "grad_norm": 4.070427894592285, "learning_rate": 1.9602953566853733e-05, "loss": 1.8797, "step": 36154 }, { "epoch": 0.47, "grad_norm": 3.6449477672576904, "learning_rate": 1.9602924250688393e-05, "loss": 1.5669, "step": 36155 }, { "epoch": 0.47, "grad_norm": 3.7355220317840576, "learning_rate": 1.9602894933462724e-05, "loss": 1.7083, "step": 36156 }, { "epoch": 0.47, "grad_norm": 3.4274516105651855, "learning_rate": 1.9602865615176735e-05, "loss": 1.7171, "step": 36157 }, { "epoch": 0.47, "grad_norm": 3.3224289417266846, "learning_rate": 1.9602836295830424e-05, "loss": 1.4621, "step": 36158 }, { "epoch": 0.47, "grad_norm": 4.1660356521606445, "learning_rate": 1.96028069754238e-05, "loss": 2.2877, "step": 36159 }, { "epoch": 0.47, "grad_norm": 3.441279411315918, "learning_rate": 1.960277765395686e-05, "loss": 1.6458, "step": 36160 }, { "epoch": 0.47, "grad_norm": 4.135210990905762, "learning_rate": 1.9602748331429612e-05, "loss": 1.6956, "step": 36161 }, { "epoch": 0.47, "grad_norm": 3.8698365688323975, "learning_rate": 1.9602719007842058e-05, "loss": 1.8711, "step": 36162 }, { "epoch": 0.47, "grad_norm": 3.9189465045928955, "learning_rate": 1.96026896831942e-05, "loss": 1.8019, "step": 36163 }, { "epoch": 0.47, "grad_norm": 3.8280246257781982, "learning_rate": 1.9602660357486042e-05, "loss": 1.8759, "step": 36164 }, { "epoch": 0.47, "grad_norm": 3.9980344772338867, "learning_rate": 1.9602631030717588e-05, "loss": 2.6051, "step": 36165 }, { "epoch": 0.47, "grad_norm": 3.6557626724243164, "learning_rate": 1.960260170288884e-05, "loss": 1.8811, "step": 36166 }, { "epoch": 0.47, "grad_norm": 4.077219009399414, "learning_rate": 1.9602572373999803e-05, "loss": 2.2267, "step": 36167 }, { "epoch": 0.47, "grad_norm": 3.5581817626953125, "learning_rate": 1.960254304405048e-05, "loss": 1.6595, "step": 36168 }, { "epoch": 0.47, "grad_norm": 3.934462070465088, "learning_rate": 1.9602513713040873e-05, "loss": 2.0461, "step": 36169 }, { "epoch": 0.47, "grad_norm": 3.5092995166778564, "learning_rate": 1.960248438097098e-05, "loss": 1.7198, "step": 36170 }, { "epoch": 0.47, "grad_norm": 3.766510486602783, "learning_rate": 1.9602455047840818e-05, "loss": 1.8103, "step": 36171 }, { "epoch": 0.47, "grad_norm": 3.887639284133911, "learning_rate": 1.960242571365038e-05, "loss": 2.1513, "step": 36172 }, { "epoch": 0.47, "grad_norm": 3.524519205093384, "learning_rate": 1.960239637839967e-05, "loss": 1.8899, "step": 36173 }, { "epoch": 0.47, "grad_norm": 4.522843360900879, "learning_rate": 1.960236704208869e-05, "loss": 2.1518, "step": 36174 }, { "epoch": 0.47, "grad_norm": 3.678894519805908, "learning_rate": 1.9602337704717448e-05, "loss": 1.7547, "step": 36175 }, { "epoch": 0.47, "grad_norm": 3.792935371398926, "learning_rate": 1.9602308366285946e-05, "loss": 2.0493, "step": 36176 }, { "epoch": 0.47, "grad_norm": 4.160677433013916, "learning_rate": 1.9602279026794186e-05, "loss": 1.7732, "step": 36177 }, { "epoch": 0.47, "grad_norm": 4.417356014251709, "learning_rate": 1.960224968624217e-05, "loss": 2.1345, "step": 36178 }, { "epoch": 0.47, "grad_norm": 3.3014461994171143, "learning_rate": 1.9602220344629905e-05, "loss": 1.4706, "step": 36179 }, { "epoch": 0.47, "grad_norm": 3.977132558822632, "learning_rate": 1.960219100195739e-05, "loss": 1.9215, "step": 36180 }, { "epoch": 0.47, "grad_norm": 3.898022413253784, "learning_rate": 1.960216165822463e-05, "loss": 2.1839, "step": 36181 }, { "epoch": 0.47, "grad_norm": 3.7944116592407227, "learning_rate": 1.960213231343163e-05, "loss": 1.7137, "step": 36182 }, { "epoch": 0.47, "grad_norm": 4.54636287689209, "learning_rate": 1.960210296757839e-05, "loss": 2.1696, "step": 36183 }, { "epoch": 0.47, "grad_norm": 3.577451705932617, "learning_rate": 1.9602073620664916e-05, "loss": 1.8326, "step": 36184 }, { "epoch": 0.47, "grad_norm": 3.756639003753662, "learning_rate": 1.960204427269121e-05, "loss": 2.1411, "step": 36185 }, { "epoch": 0.47, "grad_norm": 3.4428839683532715, "learning_rate": 1.9602014923657277e-05, "loss": 1.7488, "step": 36186 }, { "epoch": 0.47, "grad_norm": 3.8519575595855713, "learning_rate": 1.9601985573563117e-05, "loss": 1.9517, "step": 36187 }, { "epoch": 0.47, "grad_norm": 4.186191558837891, "learning_rate": 1.9601956222408734e-05, "loss": 2.2843, "step": 36188 }, { "epoch": 0.47, "grad_norm": 4.707309246063232, "learning_rate": 1.9601926870194132e-05, "loss": 2.1361, "step": 36189 }, { "epoch": 0.47, "grad_norm": 4.42277717590332, "learning_rate": 1.9601897516919316e-05, "loss": 2.639, "step": 36190 }, { "epoch": 0.47, "grad_norm": 4.279407978057861, "learning_rate": 1.9601868162584287e-05, "loss": 2.0063, "step": 36191 }, { "epoch": 0.47, "grad_norm": 4.4170026779174805, "learning_rate": 1.960183880718905e-05, "loss": 2.1759, "step": 36192 }, { "epoch": 0.47, "grad_norm": 4.2546067237854, "learning_rate": 1.9601809450733605e-05, "loss": 2.4824, "step": 36193 }, { "epoch": 0.47, "grad_norm": 3.584998369216919, "learning_rate": 1.9601780093217956e-05, "loss": 1.8916, "step": 36194 }, { "epoch": 0.47, "grad_norm": 3.474703311920166, "learning_rate": 1.960175073464211e-05, "loss": 1.8552, "step": 36195 }, { "epoch": 0.47, "grad_norm": 3.5222411155700684, "learning_rate": 1.960172137500607e-05, "loss": 1.6045, "step": 36196 }, { "epoch": 0.47, "grad_norm": 3.5528171062469482, "learning_rate": 1.9601692014309833e-05, "loss": 1.7617, "step": 36197 }, { "epoch": 0.47, "grad_norm": 3.8634536266326904, "learning_rate": 1.9601662652553412e-05, "loss": 1.9044, "step": 36198 }, { "epoch": 0.47, "grad_norm": 4.350362777709961, "learning_rate": 1.9601633289736797e-05, "loss": 2.6222, "step": 36199 }, { "epoch": 0.47, "grad_norm": 3.3872385025024414, "learning_rate": 1.9601603925860005e-05, "loss": 1.6896, "step": 36200 }, { "epoch": 0.47, "grad_norm": 4.050978660583496, "learning_rate": 1.960157456092303e-05, "loss": 2.817, "step": 36201 }, { "epoch": 0.47, "grad_norm": 3.4072422981262207, "learning_rate": 1.9601545194925878e-05, "loss": 1.7473, "step": 36202 }, { "epoch": 0.47, "grad_norm": 3.974332094192505, "learning_rate": 1.9601515827868552e-05, "loss": 2.0863, "step": 36203 }, { "epoch": 0.47, "grad_norm": 4.45172643661499, "learning_rate": 1.960148645975106e-05, "loss": 2.2239, "step": 36204 }, { "epoch": 0.47, "grad_norm": 4.605374336242676, "learning_rate": 1.9601457090573393e-05, "loss": 2.6716, "step": 36205 }, { "epoch": 0.47, "grad_norm": 3.5172102451324463, "learning_rate": 1.960142772033557e-05, "loss": 1.8459, "step": 36206 }, { "epoch": 0.47, "grad_norm": 3.496070384979248, "learning_rate": 1.9601398349037584e-05, "loss": 1.4414, "step": 36207 }, { "epoch": 0.47, "grad_norm": 3.64184308052063, "learning_rate": 1.9601368976679442e-05, "loss": 1.7923, "step": 36208 }, { "epoch": 0.47, "grad_norm": 3.6344501972198486, "learning_rate": 1.9601339603261142e-05, "loss": 1.9077, "step": 36209 }, { "epoch": 0.47, "grad_norm": 3.5008487701416016, "learning_rate": 1.9601310228782693e-05, "loss": 1.7254, "step": 36210 }, { "epoch": 0.47, "grad_norm": 3.5061559677124023, "learning_rate": 1.96012808532441e-05, "loss": 2.1556, "step": 36211 }, { "epoch": 0.47, "grad_norm": 3.4569098949432373, "learning_rate": 1.960125147664536e-05, "loss": 1.8838, "step": 36212 }, { "epoch": 0.47, "grad_norm": 3.887641191482544, "learning_rate": 1.9601222098986477e-05, "loss": 1.96, "step": 36213 }, { "epoch": 0.47, "grad_norm": 4.175578594207764, "learning_rate": 1.960119272026746e-05, "loss": 2.4474, "step": 36214 }, { "epoch": 0.47, "grad_norm": 3.6760454177856445, "learning_rate": 1.9601163340488305e-05, "loss": 2.1552, "step": 36215 }, { "epoch": 0.47, "grad_norm": 3.7534477710723877, "learning_rate": 1.960113395964902e-05, "loss": 1.997, "step": 36216 }, { "epoch": 0.47, "grad_norm": 3.7588813304901123, "learning_rate": 1.9601104577749605e-05, "loss": 1.7365, "step": 36217 }, { "epoch": 0.47, "grad_norm": 3.6636664867401123, "learning_rate": 1.960107519479007e-05, "loss": 1.5868, "step": 36218 }, { "epoch": 0.47, "grad_norm": 3.563946008682251, "learning_rate": 1.960104581077041e-05, "loss": 1.7741, "step": 36219 }, { "epoch": 0.47, "grad_norm": 4.0608673095703125, "learning_rate": 1.9601016425690632e-05, "loss": 2.2726, "step": 36220 }, { "epoch": 0.47, "grad_norm": 3.0220320224761963, "learning_rate": 1.960098703955074e-05, "loss": 1.3097, "step": 36221 }, { "epoch": 0.47, "grad_norm": 4.257571697235107, "learning_rate": 1.9600957652350735e-05, "loss": 2.0938, "step": 36222 }, { "epoch": 0.47, "grad_norm": 3.886312961578369, "learning_rate": 1.9600928264090623e-05, "loss": 2.1324, "step": 36223 }, { "epoch": 0.47, "grad_norm": 3.6448733806610107, "learning_rate": 1.96008988747704e-05, "loss": 2.1172, "step": 36224 }, { "epoch": 0.47, "grad_norm": 4.040841579437256, "learning_rate": 1.960086948439008e-05, "loss": 1.7456, "step": 36225 }, { "epoch": 0.47, "grad_norm": 3.806483745574951, "learning_rate": 1.9600840092949662e-05, "loss": 2.0138, "step": 36226 }, { "epoch": 0.47, "grad_norm": 3.54978609085083, "learning_rate": 1.9600810700449147e-05, "loss": 1.8929, "step": 36227 }, { "epoch": 0.47, "grad_norm": 3.2860376834869385, "learning_rate": 1.960078130688854e-05, "loss": 1.5692, "step": 36228 }, { "epoch": 0.47, "grad_norm": 3.4143974781036377, "learning_rate": 1.960075191226784e-05, "loss": 2.1224, "step": 36229 }, { "epoch": 0.47, "grad_norm": 3.805616855621338, "learning_rate": 1.960072251658706e-05, "loss": 1.7044, "step": 36230 }, { "epoch": 0.47, "grad_norm": 3.927773952484131, "learning_rate": 1.9600693119846195e-05, "loss": 2.046, "step": 36231 }, { "epoch": 0.47, "grad_norm": 3.584796905517578, "learning_rate": 1.960066372204525e-05, "loss": 1.6931, "step": 36232 }, { "epoch": 0.47, "grad_norm": 4.045233726501465, "learning_rate": 1.9600634323184228e-05, "loss": 2.0829, "step": 36233 }, { "epoch": 0.47, "grad_norm": 3.4698898792266846, "learning_rate": 1.9600604923263135e-05, "loss": 1.9248, "step": 36234 }, { "epoch": 0.47, "grad_norm": 3.8352420330047607, "learning_rate": 1.960057552228197e-05, "loss": 2.1728, "step": 36235 }, { "epoch": 0.47, "grad_norm": 3.149449110031128, "learning_rate": 1.960054612024074e-05, "loss": 1.5552, "step": 36236 }, { "epoch": 0.47, "grad_norm": 3.5922038555145264, "learning_rate": 1.960051671713945e-05, "loss": 1.5974, "step": 36237 }, { "epoch": 0.47, "grad_norm": 5.0363569259643555, "learning_rate": 1.9600487312978096e-05, "loss": 2.5781, "step": 36238 }, { "epoch": 0.47, "grad_norm": 3.984470844268799, "learning_rate": 1.9600457907756687e-05, "loss": 2.0563, "step": 36239 }, { "epoch": 0.47, "grad_norm": 3.6332364082336426, "learning_rate": 1.9600428501475224e-05, "loss": 1.7481, "step": 36240 }, { "epoch": 0.47, "grad_norm": 4.320323944091797, "learning_rate": 1.960039909413371e-05, "loss": 2.2031, "step": 36241 }, { "epoch": 0.47, "grad_norm": 4.102491855621338, "learning_rate": 1.960036968573215e-05, "loss": 2.3467, "step": 36242 }, { "epoch": 0.47, "grad_norm": 3.9253480434417725, "learning_rate": 1.9600340276270545e-05, "loss": 2.1237, "step": 36243 }, { "epoch": 0.47, "grad_norm": 3.7846593856811523, "learning_rate": 1.9600310865748903e-05, "loss": 2.2298, "step": 36244 }, { "epoch": 0.47, "grad_norm": 4.114578723907471, "learning_rate": 1.960028145416722e-05, "loss": 1.9369, "step": 36245 }, { "epoch": 0.47, "grad_norm": 3.563884973526001, "learning_rate": 1.9600252041525504e-05, "loss": 1.8285, "step": 36246 }, { "epoch": 0.47, "grad_norm": 4.217981338500977, "learning_rate": 1.960022262782376e-05, "loss": 2.1427, "step": 36247 }, { "epoch": 0.47, "grad_norm": 3.9473698139190674, "learning_rate": 1.9600193213061987e-05, "loss": 2.074, "step": 36248 }, { "epoch": 0.47, "grad_norm": 3.979144334793091, "learning_rate": 1.9600163797240184e-05, "loss": 2.1469, "step": 36249 }, { "epoch": 0.47, "grad_norm": 3.8871400356292725, "learning_rate": 1.9600134380358368e-05, "loss": 2.0828, "step": 36250 }, { "epoch": 0.47, "grad_norm": 3.5512547492980957, "learning_rate": 1.9600104962416534e-05, "loss": 1.8706, "step": 36251 }, { "epoch": 0.47, "grad_norm": 3.488823413848877, "learning_rate": 1.960007554341468e-05, "loss": 1.8107, "step": 36252 }, { "epoch": 0.47, "grad_norm": 3.8855836391448975, "learning_rate": 1.960004612335282e-05, "loss": 2.0378, "step": 36253 }, { "epoch": 0.47, "grad_norm": 3.3821799755096436, "learning_rate": 1.960001670223095e-05, "loss": 1.5969, "step": 36254 }, { "epoch": 0.47, "grad_norm": 4.882460594177246, "learning_rate": 1.9599987280049072e-05, "loss": 2.1016, "step": 36255 }, { "epoch": 0.47, "grad_norm": 3.331880807876587, "learning_rate": 1.95999578568072e-05, "loss": 2.1196, "step": 36256 }, { "epoch": 0.47, "grad_norm": 3.4646668434143066, "learning_rate": 1.9599928432505325e-05, "loss": 1.8322, "step": 36257 }, { "epoch": 0.47, "grad_norm": 3.584806203842163, "learning_rate": 1.9599899007143455e-05, "loss": 1.7692, "step": 36258 }, { "epoch": 0.47, "grad_norm": 3.6770832538604736, "learning_rate": 1.9599869580721594e-05, "loss": 2.1233, "step": 36259 }, { "epoch": 0.47, "grad_norm": 3.7839696407318115, "learning_rate": 1.9599840153239744e-05, "loss": 1.7421, "step": 36260 }, { "epoch": 0.47, "grad_norm": 4.091951370239258, "learning_rate": 1.959981072469791e-05, "loss": 1.9829, "step": 36261 }, { "epoch": 0.47, "grad_norm": 3.97149395942688, "learning_rate": 1.959978129509609e-05, "loss": 2.0134, "step": 36262 }, { "epoch": 0.47, "grad_norm": 4.131776332855225, "learning_rate": 1.9599751864434298e-05, "loss": 1.8336, "step": 36263 }, { "epoch": 0.47, "grad_norm": 3.738039970397949, "learning_rate": 1.9599722432712526e-05, "loss": 1.7814, "step": 36264 }, { "epoch": 0.47, "grad_norm": 3.3873071670532227, "learning_rate": 1.9599692999930783e-05, "loss": 1.7678, "step": 36265 }, { "epoch": 0.47, "grad_norm": 3.6700470447540283, "learning_rate": 1.959966356608907e-05, "loss": 1.9001, "step": 36266 }, { "epoch": 0.47, "grad_norm": 3.8762266635894775, "learning_rate": 1.9599634131187393e-05, "loss": 1.7589, "step": 36267 }, { "epoch": 0.47, "grad_norm": 3.9322516918182373, "learning_rate": 1.9599604695225753e-05, "loss": 2.1932, "step": 36268 }, { "epoch": 0.47, "grad_norm": 3.797295331954956, "learning_rate": 1.9599575258204153e-05, "loss": 1.7035, "step": 36269 }, { "epoch": 0.47, "grad_norm": 3.612061023712158, "learning_rate": 1.9599545820122596e-05, "loss": 2.0181, "step": 36270 }, { "epoch": 0.47, "grad_norm": 3.718085527420044, "learning_rate": 1.959951638098109e-05, "loss": 1.9224, "step": 36271 }, { "epoch": 0.47, "grad_norm": 3.494330644607544, "learning_rate": 1.959948694077963e-05, "loss": 1.8202, "step": 36272 }, { "epoch": 0.47, "grad_norm": 3.9439849853515625, "learning_rate": 1.959945749951823e-05, "loss": 2.2934, "step": 36273 }, { "epoch": 0.47, "grad_norm": 2.8736679553985596, "learning_rate": 1.959942805719688e-05, "loss": 1.3296, "step": 36274 }, { "epoch": 0.47, "grad_norm": 3.8151257038116455, "learning_rate": 1.9599398613815593e-05, "loss": 2.0175, "step": 36275 }, { "epoch": 0.47, "grad_norm": 3.6988022327423096, "learning_rate": 1.959936916937437e-05, "loss": 1.607, "step": 36276 }, { "epoch": 0.47, "grad_norm": 3.3840858936309814, "learning_rate": 1.9599339723873215e-05, "loss": 1.5628, "step": 36277 }, { "epoch": 0.47, "grad_norm": 8.411723136901855, "learning_rate": 1.959931027731213e-05, "loss": 2.4147, "step": 36278 }, { "epoch": 0.47, "grad_norm": 3.7728958129882812, "learning_rate": 1.9599280829691116e-05, "loss": 1.8894, "step": 36279 }, { "epoch": 0.47, "grad_norm": 4.781825542449951, "learning_rate": 1.959925138101018e-05, "loss": 2.1203, "step": 36280 }, { "epoch": 0.47, "grad_norm": 3.6729815006256104, "learning_rate": 1.9599221931269325e-05, "loss": 2.1115, "step": 36281 }, { "epoch": 0.47, "grad_norm": 3.3812673091888428, "learning_rate": 1.9599192480468548e-05, "loss": 1.7073, "step": 36282 }, { "epoch": 0.47, "grad_norm": 4.164393424987793, "learning_rate": 1.9599163028607863e-05, "loss": 1.9619, "step": 36283 }, { "epoch": 0.47, "grad_norm": 3.2419376373291016, "learning_rate": 1.9599133575687264e-05, "loss": 1.8574, "step": 36284 }, { "epoch": 0.47, "grad_norm": 3.744697332382202, "learning_rate": 1.9599104121706757e-05, "loss": 1.9476, "step": 36285 }, { "epoch": 0.47, "grad_norm": 3.4799299240112305, "learning_rate": 1.959907466666635e-05, "loss": 1.8968, "step": 36286 }, { "epoch": 0.47, "grad_norm": 4.242343425750732, "learning_rate": 1.959904521056604e-05, "loss": 2.5825, "step": 36287 }, { "epoch": 0.47, "grad_norm": 3.3778390884399414, "learning_rate": 1.9599015753405836e-05, "loss": 1.958, "step": 36288 }, { "epoch": 0.47, "grad_norm": 3.6524581909179688, "learning_rate": 1.9598986295185732e-05, "loss": 1.9913, "step": 36289 }, { "epoch": 0.47, "grad_norm": 4.157925128936768, "learning_rate": 1.959895683590574e-05, "loss": 1.9048, "step": 36290 }, { "epoch": 0.47, "grad_norm": 3.705247402191162, "learning_rate": 1.959892737556586e-05, "loss": 1.7389, "step": 36291 }, { "epoch": 0.47, "grad_norm": 4.0759758949279785, "learning_rate": 1.9598897914166097e-05, "loss": 1.7877, "step": 36292 }, { "epoch": 0.47, "grad_norm": 3.500455379486084, "learning_rate": 1.959886845170645e-05, "loss": 1.4607, "step": 36293 }, { "epoch": 0.47, "grad_norm": 3.832550287246704, "learning_rate": 1.959883898818693e-05, "loss": 1.6364, "step": 36294 }, { "epoch": 0.47, "grad_norm": 3.923304557800293, "learning_rate": 1.9598809523607532e-05, "loss": 1.6875, "step": 36295 }, { "epoch": 0.47, "grad_norm": 3.538378953933716, "learning_rate": 1.959878005796826e-05, "loss": 1.965, "step": 36296 }, { "epoch": 0.47, "grad_norm": 3.7698652744293213, "learning_rate": 1.9598750591269122e-05, "loss": 2.2856, "step": 36297 }, { "epoch": 0.47, "grad_norm": 3.9195609092712402, "learning_rate": 1.959872112351012e-05, "loss": 2.3843, "step": 36298 }, { "epoch": 0.47, "grad_norm": 3.717996120452881, "learning_rate": 1.9598691654691255e-05, "loss": 1.9338, "step": 36299 }, { "epoch": 0.47, "grad_norm": 4.0699591636657715, "learning_rate": 1.9598662184812535e-05, "loss": 2.0869, "step": 36300 }, { "epoch": 0.47, "grad_norm": 3.394984483718872, "learning_rate": 1.9598632713873957e-05, "loss": 1.5623, "step": 36301 }, { "epoch": 0.47, "grad_norm": 3.4560952186584473, "learning_rate": 1.9598603241875527e-05, "loss": 1.4272, "step": 36302 }, { "epoch": 0.47, "grad_norm": 3.7146220207214355, "learning_rate": 1.959857376881725e-05, "loss": 1.7429, "step": 36303 }, { "epoch": 0.47, "grad_norm": 3.5724034309387207, "learning_rate": 1.9598544294699123e-05, "loss": 1.9086, "step": 36304 }, { "epoch": 0.47, "grad_norm": 3.794663667678833, "learning_rate": 1.959851481952116e-05, "loss": 2.1362, "step": 36305 }, { "epoch": 0.47, "grad_norm": 3.5497424602508545, "learning_rate": 1.9598485343283356e-05, "loss": 1.9845, "step": 36306 }, { "epoch": 0.47, "grad_norm": 3.9530515670776367, "learning_rate": 1.959845586598571e-05, "loss": 2.3326, "step": 36307 }, { "epoch": 0.47, "grad_norm": 3.482409715652466, "learning_rate": 1.9598426387628243e-05, "loss": 1.6699, "step": 36308 }, { "epoch": 0.47, "grad_norm": 3.919928789138794, "learning_rate": 1.959839690821094e-05, "loss": 1.707, "step": 36309 }, { "epoch": 0.47, "grad_norm": 3.7765042781829834, "learning_rate": 1.9598367427733812e-05, "loss": 2.0074, "step": 36310 }, { "epoch": 0.47, "grad_norm": 3.705094814300537, "learning_rate": 1.9598337946196863e-05, "loss": 1.4715, "step": 36311 }, { "epoch": 0.47, "grad_norm": 3.9405460357666016, "learning_rate": 1.9598308463600092e-05, "loss": 1.8004, "step": 36312 }, { "epoch": 0.47, "grad_norm": 3.939453363418579, "learning_rate": 1.9598278979943508e-05, "loss": 2.2968, "step": 36313 }, { "epoch": 0.47, "grad_norm": 3.8566057682037354, "learning_rate": 1.959824949522711e-05, "loss": 1.6987, "step": 36314 }, { "epoch": 0.47, "grad_norm": 4.080564022064209, "learning_rate": 1.95982200094509e-05, "loss": 2.2373, "step": 36315 }, { "epoch": 0.47, "grad_norm": 3.4267027378082275, "learning_rate": 1.9598190522614887e-05, "loss": 1.7938, "step": 36316 }, { "epoch": 0.47, "grad_norm": 3.2886383533477783, "learning_rate": 1.959816103471907e-05, "loss": 1.4606, "step": 36317 }, { "epoch": 0.47, "grad_norm": 3.3928420543670654, "learning_rate": 1.9598131545763453e-05, "loss": 1.7466, "step": 36318 }, { "epoch": 0.47, "grad_norm": 3.8306610584259033, "learning_rate": 1.9598102055748036e-05, "loss": 2.2188, "step": 36319 }, { "epoch": 0.47, "grad_norm": 3.6438229084014893, "learning_rate": 1.9598072564672832e-05, "loss": 1.5657, "step": 36320 }, { "epoch": 0.47, "grad_norm": 4.131410598754883, "learning_rate": 1.9598043072537834e-05, "loss": 1.6699, "step": 36321 }, { "epoch": 0.47, "grad_norm": 4.060050964355469, "learning_rate": 1.9598013579343048e-05, "loss": 2.1929, "step": 36322 }, { "epoch": 0.47, "grad_norm": 3.65718150138855, "learning_rate": 1.9597984085088482e-05, "loss": 1.7502, "step": 36323 }, { "epoch": 0.47, "grad_norm": 3.5913848876953125, "learning_rate": 1.9597954589774132e-05, "loss": 1.9713, "step": 36324 }, { "epoch": 0.47, "grad_norm": 3.894773483276367, "learning_rate": 1.9597925093400008e-05, "loss": 1.8237, "step": 36325 }, { "epoch": 0.47, "grad_norm": 4.318118095397949, "learning_rate": 1.959789559596611e-05, "loss": 2.1783, "step": 36326 }, { "epoch": 0.47, "grad_norm": 4.751468181610107, "learning_rate": 1.9597866097472443e-05, "loss": 1.7968, "step": 36327 }, { "epoch": 0.47, "grad_norm": 3.7046315670013428, "learning_rate": 1.9597836597919008e-05, "loss": 1.7712, "step": 36328 }, { "epoch": 0.47, "grad_norm": 3.7286548614501953, "learning_rate": 1.9597807097305806e-05, "loss": 1.7788, "step": 36329 }, { "epoch": 0.47, "grad_norm": 3.5944883823394775, "learning_rate": 1.9597777595632845e-05, "loss": 2.0113, "step": 36330 }, { "epoch": 0.47, "grad_norm": 3.6749672889709473, "learning_rate": 1.9597748092900122e-05, "loss": 1.813, "step": 36331 }, { "epoch": 0.47, "grad_norm": 3.620899200439453, "learning_rate": 1.9597718589107654e-05, "loss": 1.6611, "step": 36332 }, { "epoch": 0.47, "grad_norm": 4.173264980316162, "learning_rate": 1.9597689084255428e-05, "loss": 2.0849, "step": 36333 }, { "epoch": 0.47, "grad_norm": 3.5894572734832764, "learning_rate": 1.9597659578343456e-05, "loss": 2.1215, "step": 36334 }, { "epoch": 0.47, "grad_norm": 3.651923894882202, "learning_rate": 1.9597630071371744e-05, "loss": 1.7813, "step": 36335 }, { "epoch": 0.47, "grad_norm": 3.7439351081848145, "learning_rate": 1.9597600563340285e-05, "loss": 1.8217, "step": 36336 }, { "epoch": 0.47, "grad_norm": 4.203578472137451, "learning_rate": 1.959757105424909e-05, "loss": 2.0989, "step": 36337 }, { "epoch": 0.47, "grad_norm": 3.922302722930908, "learning_rate": 1.959754154409816e-05, "loss": 2.1057, "step": 36338 }, { "epoch": 0.47, "grad_norm": 4.204739570617676, "learning_rate": 1.95975120328875e-05, "loss": 2.1767, "step": 36339 }, { "epoch": 0.47, "grad_norm": 3.928008556365967, "learning_rate": 1.959748252061711e-05, "loss": 2.0952, "step": 36340 }, { "epoch": 0.47, "grad_norm": 4.433457374572754, "learning_rate": 1.9597453007287e-05, "loss": 2.0449, "step": 36341 }, { "epoch": 0.47, "grad_norm": 3.3465206623077393, "learning_rate": 1.9597423492897162e-05, "loss": 1.7198, "step": 36342 }, { "epoch": 0.47, "grad_norm": 3.323687791824341, "learning_rate": 1.959739397744761e-05, "loss": 1.5116, "step": 36343 }, { "epoch": 0.47, "grad_norm": 3.638625144958496, "learning_rate": 1.9597364460938336e-05, "loss": 1.7561, "step": 36344 }, { "epoch": 0.47, "grad_norm": 3.9971277713775635, "learning_rate": 1.9597334943369354e-05, "loss": 1.7043, "step": 36345 }, { "epoch": 0.47, "grad_norm": 3.398045301437378, "learning_rate": 1.9597305424740667e-05, "loss": 1.621, "step": 36346 }, { "epoch": 0.47, "grad_norm": 4.307990550994873, "learning_rate": 1.9597275905052273e-05, "loss": 1.8981, "step": 36347 }, { "epoch": 0.47, "grad_norm": 3.959036111831665, "learning_rate": 1.9597246384304176e-05, "loss": 2.0902, "step": 36348 }, { "epoch": 0.47, "grad_norm": 4.475247383117676, "learning_rate": 1.9597216862496377e-05, "loss": 1.8758, "step": 36349 }, { "epoch": 0.47, "grad_norm": 4.103571891784668, "learning_rate": 1.9597187339628886e-05, "loss": 2.0187, "step": 36350 }, { "epoch": 0.47, "grad_norm": 4.386077880859375, "learning_rate": 1.9597157815701705e-05, "loss": 2.634, "step": 36351 }, { "epoch": 0.47, "grad_norm": 3.57205867767334, "learning_rate": 1.9597128290714832e-05, "loss": 1.6842, "step": 36352 }, { "epoch": 0.47, "grad_norm": 3.925356149673462, "learning_rate": 1.9597098764668274e-05, "loss": 1.8995, "step": 36353 }, { "epoch": 0.47, "grad_norm": 3.646143674850464, "learning_rate": 1.959706923756203e-05, "loss": 1.9726, "step": 36354 }, { "epoch": 0.47, "grad_norm": 4.101394176483154, "learning_rate": 1.959703970939611e-05, "loss": 1.8787, "step": 36355 }, { "epoch": 0.47, "grad_norm": 3.4482429027557373, "learning_rate": 1.9597010180170514e-05, "loss": 1.6618, "step": 36356 }, { "epoch": 0.47, "grad_norm": 3.5737528800964355, "learning_rate": 1.9596980649885244e-05, "loss": 1.9172, "step": 36357 }, { "epoch": 0.47, "grad_norm": 4.378302574157715, "learning_rate": 1.9596951118540308e-05, "loss": 2.8337, "step": 36358 }, { "epoch": 0.47, "grad_norm": 3.7003982067108154, "learning_rate": 1.9596921586135703e-05, "loss": 1.631, "step": 36359 }, { "epoch": 0.47, "grad_norm": 3.616877794265747, "learning_rate": 1.9596892052671436e-05, "loss": 1.9408, "step": 36360 }, { "epoch": 0.47, "grad_norm": 3.971269369125366, "learning_rate": 1.9596862518147508e-05, "loss": 2.2198, "step": 36361 }, { "epoch": 0.47, "grad_norm": 3.6448209285736084, "learning_rate": 1.9596832982563924e-05, "loss": 2.0366, "step": 36362 }, { "epoch": 0.47, "grad_norm": 3.936422348022461, "learning_rate": 1.9596803445920685e-05, "loss": 1.7155, "step": 36363 }, { "epoch": 0.47, "grad_norm": 3.696335554122925, "learning_rate": 1.9596773908217798e-05, "loss": 2.1073, "step": 36364 }, { "epoch": 0.47, "grad_norm": 4.468804836273193, "learning_rate": 1.9596744369455265e-05, "loss": 2.3965, "step": 36365 }, { "epoch": 0.47, "grad_norm": 3.9133851528167725, "learning_rate": 1.9596714829633088e-05, "loss": 2.1443, "step": 36366 }, { "epoch": 0.47, "grad_norm": 3.3793153762817383, "learning_rate": 1.9596685288751272e-05, "loss": 1.8209, "step": 36367 }, { "epoch": 0.47, "grad_norm": 4.518923282623291, "learning_rate": 1.959665574680982e-05, "loss": 2.3306, "step": 36368 }, { "epoch": 0.47, "grad_norm": 4.009531021118164, "learning_rate": 1.959662620380873e-05, "loss": 2.0572, "step": 36369 }, { "epoch": 0.47, "grad_norm": 3.8772003650665283, "learning_rate": 1.959659665974801e-05, "loss": 1.9403, "step": 36370 }, { "epoch": 0.47, "grad_norm": 3.3966660499572754, "learning_rate": 1.9596567114627665e-05, "loss": 1.6272, "step": 36371 }, { "epoch": 0.47, "grad_norm": 3.7796106338500977, "learning_rate": 1.9596537568447696e-05, "loss": 1.8497, "step": 36372 }, { "epoch": 0.47, "grad_norm": 4.00617790222168, "learning_rate": 1.9596508021208106e-05, "loss": 2.3213, "step": 36373 }, { "epoch": 0.47, "grad_norm": 3.529646873474121, "learning_rate": 1.95964784729089e-05, "loss": 1.5928, "step": 36374 }, { "epoch": 0.47, "grad_norm": 3.328317880630493, "learning_rate": 1.959644892355008e-05, "loss": 1.5933, "step": 36375 }, { "epoch": 0.47, "grad_norm": 3.3924527168273926, "learning_rate": 1.959641937313165e-05, "loss": 1.7538, "step": 36376 }, { "epoch": 0.47, "grad_norm": 4.324816703796387, "learning_rate": 1.9596389821653608e-05, "loss": 2.2055, "step": 36377 }, { "epoch": 0.47, "grad_norm": 3.0033581256866455, "learning_rate": 1.959636026911597e-05, "loss": 1.4884, "step": 36378 }, { "epoch": 0.47, "grad_norm": 3.9668047428131104, "learning_rate": 1.9596330715518722e-05, "loss": 2.2889, "step": 36379 }, { "epoch": 0.47, "grad_norm": 4.024691581726074, "learning_rate": 1.959630116086188e-05, "loss": 2.0255, "step": 36380 }, { "epoch": 0.47, "grad_norm": 4.060957431793213, "learning_rate": 1.9596271605145445e-05, "loss": 2.3328, "step": 36381 }, { "epoch": 0.47, "grad_norm": 3.985797166824341, "learning_rate": 1.9596242048369415e-05, "loss": 1.8351, "step": 36382 }, { "epoch": 0.47, "grad_norm": 4.271602153778076, "learning_rate": 1.9596212490533802e-05, "loss": 1.9371, "step": 36383 }, { "epoch": 0.47, "grad_norm": 3.8712120056152344, "learning_rate": 1.95961829316386e-05, "loss": 2.4716, "step": 36384 }, { "epoch": 0.47, "grad_norm": 3.9863898754119873, "learning_rate": 1.959615337168382e-05, "loss": 1.9875, "step": 36385 }, { "epoch": 0.47, "grad_norm": 3.2745492458343506, "learning_rate": 1.959612381066946e-05, "loss": 1.5583, "step": 36386 }, { "epoch": 0.47, "grad_norm": 3.748852252960205, "learning_rate": 1.9596094248595524e-05, "loss": 2.0682, "step": 36387 }, { "epoch": 0.47, "grad_norm": 3.5442521572113037, "learning_rate": 1.959606468546202e-05, "loss": 1.9684, "step": 36388 }, { "epoch": 0.47, "grad_norm": 3.8260419368743896, "learning_rate": 1.9596035121268944e-05, "loss": 1.8489, "step": 36389 }, { "epoch": 0.47, "grad_norm": 3.895202875137329, "learning_rate": 1.9596005556016307e-05, "loss": 2.0353, "step": 36390 }, { "epoch": 0.47, "grad_norm": 3.698209285736084, "learning_rate": 1.9595975989704102e-05, "loss": 1.8705, "step": 36391 }, { "epoch": 0.47, "grad_norm": 3.626868963241577, "learning_rate": 1.9595946422332344e-05, "loss": 1.9863, "step": 36392 }, { "epoch": 0.47, "grad_norm": 3.582143545150757, "learning_rate": 1.9595916853901026e-05, "loss": 1.7191, "step": 36393 }, { "epoch": 0.47, "grad_norm": 3.339265823364258, "learning_rate": 1.959588728441016e-05, "loss": 1.8362, "step": 36394 }, { "epoch": 0.47, "grad_norm": 3.678269147872925, "learning_rate": 1.9595857713859746e-05, "loss": 2.1856, "step": 36395 }, { "epoch": 0.47, "grad_norm": 3.7971630096435547, "learning_rate": 1.9595828142249784e-05, "loss": 1.5975, "step": 36396 }, { "epoch": 0.47, "grad_norm": 3.8999829292297363, "learning_rate": 1.959579856958028e-05, "loss": 2.0467, "step": 36397 }, { "epoch": 0.47, "grad_norm": 3.8488235473632812, "learning_rate": 1.9595768995851238e-05, "loss": 2.281, "step": 36398 }, { "epoch": 0.47, "grad_norm": 3.4827849864959717, "learning_rate": 1.959573942106266e-05, "loss": 1.8357, "step": 36399 }, { "epoch": 0.47, "grad_norm": 3.4841113090515137, "learning_rate": 1.9595709845214548e-05, "loss": 2.1208, "step": 36400 }, { "epoch": 0.47, "grad_norm": 3.966355562210083, "learning_rate": 1.9595680268306907e-05, "loss": 2.4267, "step": 36401 }, { "epoch": 0.47, "grad_norm": 3.6671242713928223, "learning_rate": 1.9595650690339742e-05, "loss": 1.6892, "step": 36402 }, { "epoch": 0.47, "grad_norm": 3.998903274536133, "learning_rate": 1.959562111131305e-05, "loss": 2.3499, "step": 36403 }, { "epoch": 0.47, "grad_norm": 3.3257713317871094, "learning_rate": 1.9595591531226845e-05, "loss": 1.6832, "step": 36404 }, { "epoch": 0.47, "grad_norm": 3.415670871734619, "learning_rate": 1.959556195008112e-05, "loss": 1.5527, "step": 36405 }, { "epoch": 0.47, "grad_norm": 3.5767433643341064, "learning_rate": 1.9595532367875885e-05, "loss": 1.8676, "step": 36406 }, { "epoch": 0.47, "grad_norm": 3.7271006107330322, "learning_rate": 1.959550278461114e-05, "loss": 1.8983, "step": 36407 }, { "epoch": 0.47, "grad_norm": 3.9133453369140625, "learning_rate": 1.959547320028689e-05, "loss": 1.8692, "step": 36408 }, { "epoch": 0.47, "grad_norm": 4.404720306396484, "learning_rate": 1.959544361490313e-05, "loss": 2.2004, "step": 36409 }, { "epoch": 0.47, "grad_norm": 4.675082206726074, "learning_rate": 1.9595414028459877e-05, "loss": 2.253, "step": 36410 }, { "epoch": 0.47, "grad_norm": 3.851241111755371, "learning_rate": 1.9595384440957124e-05, "loss": 2.1267, "step": 36411 }, { "epoch": 0.47, "grad_norm": 3.7802743911743164, "learning_rate": 1.9595354852394875e-05, "loss": 1.906, "step": 36412 }, { "epoch": 0.47, "grad_norm": 4.177283763885498, "learning_rate": 1.959532526277314e-05, "loss": 2.2624, "step": 36413 }, { "epoch": 0.47, "grad_norm": 3.3606369495391846, "learning_rate": 1.959529567209192e-05, "loss": 1.4325, "step": 36414 }, { "epoch": 0.47, "grad_norm": 4.221406936645508, "learning_rate": 1.9595266080351215e-05, "loss": 2.1873, "step": 36415 }, { "epoch": 0.47, "grad_norm": 3.3588955402374268, "learning_rate": 1.959523648755103e-05, "loss": 1.8325, "step": 36416 }, { "epoch": 0.47, "grad_norm": 2.9776878356933594, "learning_rate": 1.959520689369137e-05, "loss": 1.4025, "step": 36417 }, { "epoch": 0.47, "grad_norm": 3.4110212326049805, "learning_rate": 1.9595177298772233e-05, "loss": 1.6765, "step": 36418 }, { "epoch": 0.47, "grad_norm": 3.7892661094665527, "learning_rate": 1.9595147702793628e-05, "loss": 2.074, "step": 36419 }, { "epoch": 0.47, "grad_norm": 4.315422058105469, "learning_rate": 1.9595118105755554e-05, "loss": 2.0186, "step": 36420 }, { "epoch": 0.47, "grad_norm": 4.099321365356445, "learning_rate": 1.9595088507658015e-05, "loss": 2.7107, "step": 36421 }, { "epoch": 0.47, "grad_norm": 3.7112245559692383, "learning_rate": 1.959505890850102e-05, "loss": 2.0545, "step": 36422 }, { "epoch": 0.47, "grad_norm": 4.104109764099121, "learning_rate": 1.9595029308284564e-05, "loss": 2.1095, "step": 36423 }, { "epoch": 0.47, "grad_norm": 3.4659674167633057, "learning_rate": 1.9594999707008652e-05, "loss": 1.6603, "step": 36424 }, { "epoch": 0.47, "grad_norm": 4.058965682983398, "learning_rate": 1.9594970104673295e-05, "loss": 2.0146, "step": 36425 }, { "epoch": 0.47, "grad_norm": 3.6072144508361816, "learning_rate": 1.9594940501278486e-05, "loss": 1.6739, "step": 36426 }, { "epoch": 0.47, "grad_norm": 3.9942920207977295, "learning_rate": 1.9594910896824236e-05, "loss": 2.1498, "step": 36427 }, { "epoch": 0.47, "grad_norm": 3.9203274250030518, "learning_rate": 1.9594881291310544e-05, "loss": 1.8624, "step": 36428 }, { "epoch": 0.47, "grad_norm": 4.593674659729004, "learning_rate": 1.9594851684737413e-05, "loss": 2.2492, "step": 36429 }, { "epoch": 0.47, "grad_norm": 3.9465346336364746, "learning_rate": 1.9594822077104848e-05, "loss": 2.329, "step": 36430 }, { "epoch": 0.47, "grad_norm": 4.106144905090332, "learning_rate": 1.9594792468412854e-05, "loss": 2.3393, "step": 36431 }, { "epoch": 0.47, "grad_norm": 4.023727893829346, "learning_rate": 1.959476285866143e-05, "loss": 2.0247, "step": 36432 }, { "epoch": 0.47, "grad_norm": 3.359290361404419, "learning_rate": 1.9594733247850583e-05, "loss": 1.6359, "step": 36433 }, { "epoch": 0.47, "grad_norm": 4.0040602684021, "learning_rate": 1.959470363598031e-05, "loss": 2.1409, "step": 36434 }, { "epoch": 0.47, "grad_norm": 3.4646377563476562, "learning_rate": 1.9594674023050622e-05, "loss": 1.7458, "step": 36435 }, { "epoch": 0.47, "grad_norm": 3.3425142765045166, "learning_rate": 1.9594644409061522e-05, "loss": 1.7364, "step": 36436 }, { "epoch": 0.47, "grad_norm": 4.455531597137451, "learning_rate": 1.9594614794013007e-05, "loss": 2.2858, "step": 36437 }, { "epoch": 0.47, "grad_norm": 3.6159744262695312, "learning_rate": 1.9594585177905087e-05, "loss": 1.9427, "step": 36438 }, { "epoch": 0.47, "grad_norm": 3.4164090156555176, "learning_rate": 1.9594555560737757e-05, "loss": 1.6443, "step": 36439 }, { "epoch": 0.47, "grad_norm": 3.467543601989746, "learning_rate": 1.9594525942511028e-05, "loss": 1.5791, "step": 36440 }, { "epoch": 0.47, "grad_norm": 3.2876620292663574, "learning_rate": 1.9594496323224902e-05, "loss": 1.9728, "step": 36441 }, { "epoch": 0.47, "grad_norm": 3.830777406692505, "learning_rate": 1.959446670287938e-05, "loss": 1.7368, "step": 36442 }, { "epoch": 0.47, "grad_norm": 3.9064853191375732, "learning_rate": 1.9594437081474467e-05, "loss": 1.8934, "step": 36443 }, { "epoch": 0.47, "grad_norm": 3.597994804382324, "learning_rate": 1.9594407459010162e-05, "loss": 1.8636, "step": 36444 }, { "epoch": 0.47, "grad_norm": 4.202678680419922, "learning_rate": 1.9594377835486476e-05, "loss": 2.1493, "step": 36445 }, { "epoch": 0.47, "grad_norm": 3.8378968238830566, "learning_rate": 1.9594348210903405e-05, "loss": 2.0864, "step": 36446 }, { "epoch": 0.47, "grad_norm": 3.4515318870544434, "learning_rate": 1.9594318585260958e-05, "loss": 1.6552, "step": 36447 }, { "epoch": 0.47, "grad_norm": 3.316847562789917, "learning_rate": 1.959428895855913e-05, "loss": 1.6304, "step": 36448 }, { "epoch": 0.47, "grad_norm": 3.681647777557373, "learning_rate": 1.9594259330797934e-05, "loss": 1.7854, "step": 36449 }, { "epoch": 0.47, "grad_norm": 3.5867857933044434, "learning_rate": 1.959422970197737e-05, "loss": 1.8723, "step": 36450 }, { "epoch": 0.47, "grad_norm": 4.043596267700195, "learning_rate": 1.9594200072097438e-05, "loss": 2.0308, "step": 36451 }, { "epoch": 0.47, "grad_norm": 4.679195880889893, "learning_rate": 1.959417044115814e-05, "loss": 2.2681, "step": 36452 }, { "epoch": 0.47, "grad_norm": 4.008316516876221, "learning_rate": 1.959414080915949e-05, "loss": 2.1271, "step": 36453 }, { "epoch": 0.47, "grad_norm": 3.861576557159424, "learning_rate": 1.959411117610148e-05, "loss": 2.1762, "step": 36454 }, { "epoch": 0.47, "grad_norm": 3.329408884048462, "learning_rate": 1.959408154198412e-05, "loss": 1.7729, "step": 36455 }, { "epoch": 0.47, "grad_norm": 4.1976399421691895, "learning_rate": 1.9594051906807404e-05, "loss": 2.1294, "step": 36456 }, { "epoch": 0.47, "grad_norm": 3.6617798805236816, "learning_rate": 1.959402227057135e-05, "loss": 1.5618, "step": 36457 }, { "epoch": 0.47, "grad_norm": 3.8118982315063477, "learning_rate": 1.959399263327595e-05, "loss": 1.8172, "step": 36458 }, { "epoch": 0.47, "grad_norm": 3.5682120323181152, "learning_rate": 1.959396299492121e-05, "loss": 1.6127, "step": 36459 }, { "epoch": 0.47, "grad_norm": 3.9675536155700684, "learning_rate": 1.9593933355507134e-05, "loss": 2.0476, "step": 36460 }, { "epoch": 0.47, "grad_norm": 3.23755145072937, "learning_rate": 1.9593903715033725e-05, "loss": 1.4956, "step": 36461 }, { "epoch": 0.47, "grad_norm": 3.43251371383667, "learning_rate": 1.9593874073500987e-05, "loss": 1.6932, "step": 36462 }, { "epoch": 0.47, "grad_norm": 4.015891075134277, "learning_rate": 1.9593844430908922e-05, "loss": 1.9942, "step": 36463 }, { "epoch": 0.47, "grad_norm": 3.6432430744171143, "learning_rate": 1.959381478725753e-05, "loss": 1.9202, "step": 36464 }, { "epoch": 0.47, "grad_norm": 3.801307439804077, "learning_rate": 1.9593785142546823e-05, "loss": 1.9739, "step": 36465 }, { "epoch": 0.47, "grad_norm": 4.170202732086182, "learning_rate": 1.95937554967768e-05, "loss": 2.012, "step": 36466 }, { "epoch": 0.47, "grad_norm": 3.7301993370056152, "learning_rate": 1.959372584994746e-05, "loss": 1.9325, "step": 36467 }, { "epoch": 0.47, "grad_norm": 3.6788675785064697, "learning_rate": 1.9593696202058813e-05, "loss": 1.6757, "step": 36468 }, { "epoch": 0.47, "grad_norm": 3.4069266319274902, "learning_rate": 1.9593666553110857e-05, "loss": 1.679, "step": 36469 }, { "epoch": 0.47, "grad_norm": 4.26453161239624, "learning_rate": 1.9593636903103598e-05, "loss": 2.3556, "step": 36470 }, { "epoch": 0.47, "grad_norm": 4.268121242523193, "learning_rate": 1.959360725203704e-05, "loss": 2.1021, "step": 36471 }, { "epoch": 0.47, "grad_norm": 3.427060127258301, "learning_rate": 1.9593577599911185e-05, "loss": 1.9562, "step": 36472 }, { "epoch": 0.47, "grad_norm": 3.7159972190856934, "learning_rate": 1.959354794672603e-05, "loss": 1.9354, "step": 36473 }, { "epoch": 0.47, "grad_norm": 4.5839972496032715, "learning_rate": 1.9593518292481595e-05, "loss": 2.0029, "step": 36474 }, { "epoch": 0.47, "grad_norm": 3.579336404800415, "learning_rate": 1.9593488637177867e-05, "loss": 1.7447, "step": 36475 }, { "epoch": 0.47, "grad_norm": 3.3152356147766113, "learning_rate": 1.9593458980814855e-05, "loss": 1.8563, "step": 36476 }, { "epoch": 0.47, "grad_norm": 4.117732524871826, "learning_rate": 1.9593429323392562e-05, "loss": 1.8682, "step": 36477 }, { "epoch": 0.47, "grad_norm": 3.6729896068573, "learning_rate": 1.9593399664910992e-05, "loss": 1.9829, "step": 36478 }, { "epoch": 0.47, "grad_norm": 3.8311948776245117, "learning_rate": 1.959337000537015e-05, "loss": 2.0153, "step": 36479 }, { "epoch": 0.47, "grad_norm": 3.9669253826141357, "learning_rate": 1.9593340344770037e-05, "loss": 1.7429, "step": 36480 }, { "epoch": 0.47, "grad_norm": 3.874255895614624, "learning_rate": 1.9593310683110657e-05, "loss": 1.6957, "step": 36481 }, { "epoch": 0.47, "grad_norm": 3.5227677822113037, "learning_rate": 1.959328102039201e-05, "loss": 1.7734, "step": 36482 }, { "epoch": 0.47, "grad_norm": 3.413574695587158, "learning_rate": 1.95932513566141e-05, "loss": 1.8906, "step": 36483 }, { "epoch": 0.47, "grad_norm": 3.50900936126709, "learning_rate": 1.9593221691776936e-05, "loss": 1.6506, "step": 36484 }, { "epoch": 0.47, "grad_norm": 3.679354667663574, "learning_rate": 1.9593192025880518e-05, "loss": 1.7176, "step": 36485 }, { "epoch": 0.47, "grad_norm": 3.628741979598999, "learning_rate": 1.9593162358924848e-05, "loss": 1.9579, "step": 36486 }, { "epoch": 0.47, "grad_norm": 3.941045045852661, "learning_rate": 1.959313269090993e-05, "loss": 2.3873, "step": 36487 }, { "epoch": 0.47, "grad_norm": 4.587984085083008, "learning_rate": 1.959310302183577e-05, "loss": 2.3072, "step": 36488 }, { "epoch": 0.47, "grad_norm": 3.478889226913452, "learning_rate": 1.9593073351702364e-05, "loss": 1.8507, "step": 36489 }, { "epoch": 0.47, "grad_norm": 3.4819650650024414, "learning_rate": 1.959304368050972e-05, "loss": 1.6913, "step": 36490 }, { "epoch": 0.47, "grad_norm": 4.1570658683776855, "learning_rate": 1.9593014008257845e-05, "loss": 1.9768, "step": 36491 }, { "epoch": 0.47, "grad_norm": 3.578052520751953, "learning_rate": 1.9592984334946738e-05, "loss": 1.773, "step": 36492 }, { "epoch": 0.47, "grad_norm": 3.602776050567627, "learning_rate": 1.95929546605764e-05, "loss": 1.5836, "step": 36493 }, { "epoch": 0.47, "grad_norm": 3.8802480697631836, "learning_rate": 1.959292498514684e-05, "loss": 1.7558, "step": 36494 }, { "epoch": 0.47, "grad_norm": 3.6627135276794434, "learning_rate": 1.9592895308658057e-05, "loss": 1.8847, "step": 36495 }, { "epoch": 0.47, "grad_norm": 3.883646011352539, "learning_rate": 1.9592865631110054e-05, "loss": 2.2607, "step": 36496 }, { "epoch": 0.47, "grad_norm": 3.6634624004364014, "learning_rate": 1.9592835952502836e-05, "loss": 1.6668, "step": 36497 }, { "epoch": 0.47, "grad_norm": 3.7224786281585693, "learning_rate": 1.959280627283641e-05, "loss": 1.9384, "step": 36498 }, { "epoch": 0.47, "grad_norm": 4.310727596282959, "learning_rate": 1.9592776592110773e-05, "loss": 1.9244, "step": 36499 }, { "epoch": 0.47, "grad_norm": 4.110934257507324, "learning_rate": 1.9592746910325928e-05, "loss": 1.7586, "step": 36500 }, { "epoch": 0.47, "grad_norm": 4.134061336517334, "learning_rate": 1.9592717227481885e-05, "loss": 2.2406, "step": 36501 }, { "epoch": 0.47, "grad_norm": 4.063593864440918, "learning_rate": 1.959268754357864e-05, "loss": 2.2721, "step": 36502 }, { "epoch": 0.47, "grad_norm": 4.167123794555664, "learning_rate": 1.9592657858616203e-05, "loss": 2.1759, "step": 36503 }, { "epoch": 0.47, "grad_norm": 3.8883209228515625, "learning_rate": 1.9592628172594573e-05, "loss": 2.0437, "step": 36504 }, { "epoch": 0.47, "grad_norm": 4.16206693649292, "learning_rate": 1.9592598485513753e-05, "loss": 2.135, "step": 36505 }, { "epoch": 0.47, "grad_norm": 3.9131016731262207, "learning_rate": 1.959256879737375e-05, "loss": 2.2287, "step": 36506 }, { "epoch": 0.47, "grad_norm": 3.5361971855163574, "learning_rate": 1.9592539108174563e-05, "loss": 1.792, "step": 36507 }, { "epoch": 0.47, "grad_norm": 3.531094551086426, "learning_rate": 1.9592509417916194e-05, "loss": 1.4708, "step": 36508 }, { "epoch": 0.47, "grad_norm": 3.2245757579803467, "learning_rate": 1.9592479726598654e-05, "loss": 1.294, "step": 36509 }, { "epoch": 0.47, "grad_norm": 3.8153021335601807, "learning_rate": 1.9592450034221936e-05, "loss": 1.94, "step": 36510 }, { "epoch": 0.47, "grad_norm": 3.8354787826538086, "learning_rate": 1.9592420340786055e-05, "loss": 2.0246, "step": 36511 }, { "epoch": 0.47, "grad_norm": 3.9268057346343994, "learning_rate": 1.9592390646291002e-05, "loss": 2.4872, "step": 36512 }, { "epoch": 0.47, "grad_norm": 3.6357262134552, "learning_rate": 1.959236095073679e-05, "loss": 1.8772, "step": 36513 }, { "epoch": 0.47, "grad_norm": 3.60029935836792, "learning_rate": 1.9592331254123417e-05, "loss": 1.8506, "step": 36514 }, { "epoch": 0.47, "grad_norm": 3.707819938659668, "learning_rate": 1.959230155645089e-05, "loss": 2.0367, "step": 36515 }, { "epoch": 0.47, "grad_norm": 3.7499518394470215, "learning_rate": 1.959227185771921e-05, "loss": 1.9191, "step": 36516 }, { "epoch": 0.47, "grad_norm": 3.747065544128418, "learning_rate": 1.959224215792838e-05, "loss": 1.9745, "step": 36517 }, { "epoch": 0.47, "grad_norm": 3.7185306549072266, "learning_rate": 1.9592212457078404e-05, "loss": 2.1974, "step": 36518 }, { "epoch": 0.47, "grad_norm": 4.33158540725708, "learning_rate": 1.9592182755169283e-05, "loss": 2.6658, "step": 36519 }, { "epoch": 0.47, "grad_norm": 3.6032352447509766, "learning_rate": 1.959215305220102e-05, "loss": 1.9846, "step": 36520 }, { "epoch": 0.47, "grad_norm": 3.614330530166626, "learning_rate": 1.959212334817363e-05, "loss": 1.9634, "step": 36521 }, { "epoch": 0.47, "grad_norm": 4.044718265533447, "learning_rate": 1.9592093643087098e-05, "loss": 2.2063, "step": 36522 }, { "epoch": 0.47, "grad_norm": 4.330575942993164, "learning_rate": 1.959206393694144e-05, "loss": 2.272, "step": 36523 }, { "epoch": 0.47, "grad_norm": 4.238413333892822, "learning_rate": 1.9592034229736655e-05, "loss": 2.2067, "step": 36524 }, { "epoch": 0.47, "grad_norm": 4.089305877685547, "learning_rate": 1.9592004521472746e-05, "loss": 2.4277, "step": 36525 }, { "epoch": 0.47, "grad_norm": 4.306235313415527, "learning_rate": 1.9591974812149715e-05, "loss": 1.9972, "step": 36526 }, { "epoch": 0.47, "grad_norm": 3.686150550842285, "learning_rate": 1.959194510176757e-05, "loss": 1.7807, "step": 36527 }, { "epoch": 0.47, "grad_norm": 3.728588104248047, "learning_rate": 1.9591915390326313e-05, "loss": 1.821, "step": 36528 }, { "epoch": 0.47, "grad_norm": 3.4916553497314453, "learning_rate": 1.9591885677825943e-05, "loss": 1.7405, "step": 36529 }, { "epoch": 0.47, "grad_norm": 4.442984580993652, "learning_rate": 1.9591855964266466e-05, "loss": 2.7614, "step": 36530 }, { "epoch": 0.47, "grad_norm": 4.108968734741211, "learning_rate": 1.9591826249647886e-05, "loss": 1.7831, "step": 36531 }, { "epoch": 0.47, "grad_norm": 4.098727703094482, "learning_rate": 1.9591796533970204e-05, "loss": 2.2493, "step": 36532 }, { "epoch": 0.47, "grad_norm": 3.53354549407959, "learning_rate": 1.9591766817233428e-05, "loss": 1.9335, "step": 36533 }, { "epoch": 0.47, "grad_norm": 3.864508628845215, "learning_rate": 1.9591737099437557e-05, "loss": 2.0342, "step": 36534 }, { "epoch": 0.47, "grad_norm": 3.9135732650756836, "learning_rate": 1.9591707380582596e-05, "loss": 1.9677, "step": 36535 }, { "epoch": 0.47, "grad_norm": 3.746284246444702, "learning_rate": 1.959167766066855e-05, "loss": 2.1036, "step": 36536 }, { "epoch": 0.47, "grad_norm": 3.684178113937378, "learning_rate": 1.9591647939695417e-05, "loss": 2.1478, "step": 36537 }, { "epoch": 0.47, "grad_norm": 4.097774982452393, "learning_rate": 1.95916182176632e-05, "loss": 2.1175, "step": 36538 }, { "epoch": 0.47, "grad_norm": 4.146090507507324, "learning_rate": 1.959158849457191e-05, "loss": 2.1503, "step": 36539 }, { "epoch": 0.47, "grad_norm": 3.567112922668457, "learning_rate": 1.9591558770421547e-05, "loss": 1.7352, "step": 36540 }, { "epoch": 0.47, "grad_norm": 4.621391773223877, "learning_rate": 1.959152904521211e-05, "loss": 2.0321, "step": 36541 }, { "epoch": 0.47, "grad_norm": 3.215718984603882, "learning_rate": 1.9591499318943606e-05, "loss": 1.7404, "step": 36542 }, { "epoch": 0.47, "grad_norm": 4.024074077606201, "learning_rate": 1.9591469591616043e-05, "loss": 2.1896, "step": 36543 }, { "epoch": 0.47, "grad_norm": 3.617044687271118, "learning_rate": 1.959143986322941e-05, "loss": 2.0516, "step": 36544 }, { "epoch": 0.47, "grad_norm": 4.217667579650879, "learning_rate": 1.9591410133783725e-05, "loss": 2.0604, "step": 36545 }, { "epoch": 0.47, "grad_norm": 3.391223192214966, "learning_rate": 1.9591380403278985e-05, "loss": 1.6746, "step": 36546 }, { "epoch": 0.47, "grad_norm": 3.7901902198791504, "learning_rate": 1.9591350671715196e-05, "loss": 2.462, "step": 36547 }, { "epoch": 0.47, "grad_norm": 3.283646583557129, "learning_rate": 1.9591320939092355e-05, "loss": 1.9868, "step": 36548 }, { "epoch": 0.47, "grad_norm": 4.014934539794922, "learning_rate": 1.959129120541047e-05, "loss": 1.4851, "step": 36549 }, { "epoch": 0.47, "grad_norm": 3.3742032051086426, "learning_rate": 1.9591261470669544e-05, "loss": 1.7209, "step": 36550 }, { "epoch": 0.47, "grad_norm": 3.9949190616607666, "learning_rate": 1.959123173486958e-05, "loss": 2.2688, "step": 36551 }, { "epoch": 0.47, "grad_norm": 3.809985876083374, "learning_rate": 1.9591201998010586e-05, "loss": 1.9216, "step": 36552 }, { "epoch": 0.47, "grad_norm": 4.029268741607666, "learning_rate": 1.9591172260092556e-05, "loss": 2.2144, "step": 36553 }, { "epoch": 0.47, "grad_norm": 4.1656107902526855, "learning_rate": 1.9591142521115497e-05, "loss": 2.2448, "step": 36554 }, { "epoch": 0.47, "grad_norm": 4.2706074714660645, "learning_rate": 1.9591112781079417e-05, "loss": 2.2582, "step": 36555 }, { "epoch": 0.47, "grad_norm": 3.5682873725891113, "learning_rate": 1.9591083039984312e-05, "loss": 1.7485, "step": 36556 }, { "epoch": 0.47, "grad_norm": 3.2943553924560547, "learning_rate": 1.959105329783019e-05, "loss": 1.7894, "step": 36557 }, { "epoch": 0.47, "grad_norm": 3.7040719985961914, "learning_rate": 1.9591023554617054e-05, "loss": 1.8784, "step": 36558 }, { "epoch": 0.47, "grad_norm": 4.0897908210754395, "learning_rate": 1.95909938103449e-05, "loss": 1.9937, "step": 36559 }, { "epoch": 0.47, "grad_norm": 3.9728808403015137, "learning_rate": 1.9590964065013747e-05, "loss": 1.963, "step": 36560 }, { "epoch": 0.47, "grad_norm": 3.1350579261779785, "learning_rate": 1.959093431862358e-05, "loss": 1.5489, "step": 36561 }, { "epoch": 0.47, "grad_norm": 3.902846574783325, "learning_rate": 1.959090457117442e-05, "loss": 2.1568, "step": 36562 }, { "epoch": 0.47, "grad_norm": 3.847078561782837, "learning_rate": 1.9590874822666257e-05, "loss": 1.7736, "step": 36563 }, { "epoch": 0.47, "grad_norm": 3.6623077392578125, "learning_rate": 1.9590845073099098e-05, "loss": 1.6901, "step": 36564 }, { "epoch": 0.47, "grad_norm": 4.197078227996826, "learning_rate": 1.9590815322472948e-05, "loss": 2.3852, "step": 36565 }, { "epoch": 0.47, "grad_norm": 3.6228771209716797, "learning_rate": 1.959078557078781e-05, "loss": 1.763, "step": 36566 }, { "epoch": 0.47, "grad_norm": 3.46905517578125, "learning_rate": 1.9590755818043684e-05, "loss": 2.0275, "step": 36567 }, { "epoch": 0.47, "grad_norm": 4.244638442993164, "learning_rate": 1.9590726064240578e-05, "loss": 1.7122, "step": 36568 }, { "epoch": 0.47, "grad_norm": 4.111752986907959, "learning_rate": 1.959069630937849e-05, "loss": 2.0907, "step": 36569 }, { "epoch": 0.47, "grad_norm": 4.222322463989258, "learning_rate": 1.9590666553457432e-05, "loss": 2.1902, "step": 36570 }, { "epoch": 0.47, "grad_norm": 3.5877389907836914, "learning_rate": 1.95906367964774e-05, "loss": 1.3922, "step": 36571 }, { "epoch": 0.47, "grad_norm": 3.826889753341675, "learning_rate": 1.9590607038438397e-05, "loss": 1.8624, "step": 36572 }, { "epoch": 0.47, "grad_norm": 3.7165863513946533, "learning_rate": 1.959057727934043e-05, "loss": 1.8627, "step": 36573 }, { "epoch": 0.47, "grad_norm": 4.111409664154053, "learning_rate": 1.95905475191835e-05, "loss": 2.3679, "step": 36574 }, { "epoch": 0.47, "grad_norm": 3.0700039863586426, "learning_rate": 1.9590517757967607e-05, "loss": 1.456, "step": 36575 }, { "epoch": 0.47, "grad_norm": 3.7650439739227295, "learning_rate": 1.9590487995692765e-05, "loss": 2.0419, "step": 36576 }, { "epoch": 0.47, "grad_norm": 4.222440242767334, "learning_rate": 1.9590458232358966e-05, "loss": 2.4712, "step": 36577 }, { "epoch": 0.47, "grad_norm": 3.3288800716400146, "learning_rate": 1.959042846796622e-05, "loss": 1.6676, "step": 36578 }, { "epoch": 0.47, "grad_norm": 3.7436139583587646, "learning_rate": 1.9590398702514526e-05, "loss": 2.098, "step": 36579 }, { "epoch": 0.47, "grad_norm": 3.5653774738311768, "learning_rate": 1.9590368936003886e-05, "loss": 2.0735, "step": 36580 }, { "epoch": 0.47, "grad_norm": 3.7962663173675537, "learning_rate": 1.9590339168434312e-05, "loss": 1.9703, "step": 36581 }, { "epoch": 0.47, "grad_norm": 4.100880146026611, "learning_rate": 1.95903093998058e-05, "loss": 1.9453, "step": 36582 }, { "epoch": 0.47, "grad_norm": 4.178649425506592, "learning_rate": 1.9590279630118357e-05, "loss": 2.3798, "step": 36583 }, { "epoch": 0.47, "grad_norm": 3.8306310176849365, "learning_rate": 1.9590249859371983e-05, "loss": 2.2811, "step": 36584 }, { "epoch": 0.47, "grad_norm": 4.059383869171143, "learning_rate": 1.9590220087566682e-05, "loss": 2.4413, "step": 36585 }, { "epoch": 0.47, "grad_norm": 3.4636991024017334, "learning_rate": 1.959019031470246e-05, "loss": 1.9223, "step": 36586 }, { "epoch": 0.47, "grad_norm": 3.9755823612213135, "learning_rate": 1.9590160540779317e-05, "loss": 2.2541, "step": 36587 }, { "epoch": 0.47, "grad_norm": 3.74153470993042, "learning_rate": 1.9590130765797256e-05, "loss": 1.9252, "step": 36588 }, { "epoch": 0.47, "grad_norm": 3.9184393882751465, "learning_rate": 1.9590100989756285e-05, "loss": 2.0575, "step": 36589 }, { "epoch": 0.47, "grad_norm": 4.249166965484619, "learning_rate": 1.95900712126564e-05, "loss": 1.8155, "step": 36590 }, { "epoch": 0.47, "grad_norm": 3.571911334991455, "learning_rate": 1.9590041434497615e-05, "loss": 1.6559, "step": 36591 }, { "epoch": 0.47, "grad_norm": 3.550469160079956, "learning_rate": 1.959001165527992e-05, "loss": 1.9412, "step": 36592 }, { "epoch": 0.47, "grad_norm": 4.217790126800537, "learning_rate": 1.958998187500333e-05, "loss": 1.899, "step": 36593 }, { "epoch": 0.47, "grad_norm": 3.347729444503784, "learning_rate": 1.958995209366784e-05, "loss": 1.8636, "step": 36594 }, { "epoch": 0.47, "grad_norm": 3.6576731204986572, "learning_rate": 1.9589922311273458e-05, "loss": 2.0059, "step": 36595 }, { "epoch": 0.47, "grad_norm": 3.9847447872161865, "learning_rate": 1.9589892527820184e-05, "loss": 1.7112, "step": 36596 }, { "epoch": 0.47, "grad_norm": 3.421419382095337, "learning_rate": 1.9589862743308026e-05, "loss": 1.7566, "step": 36597 }, { "epoch": 0.47, "grad_norm": 4.35759973526001, "learning_rate": 1.9589832957736983e-05, "loss": 2.0575, "step": 36598 }, { "epoch": 0.47, "grad_norm": 3.8481099605560303, "learning_rate": 1.958980317110706e-05, "loss": 1.9539, "step": 36599 }, { "epoch": 0.47, "grad_norm": 3.6640162467956543, "learning_rate": 1.958977338341826e-05, "loss": 1.9598, "step": 36600 }, { "epoch": 0.47, "grad_norm": 3.8216421604156494, "learning_rate": 1.9589743594670584e-05, "loss": 2.016, "step": 36601 }, { "epoch": 0.48, "grad_norm": 3.5738675594329834, "learning_rate": 1.9589713804864044e-05, "loss": 1.8033, "step": 36602 }, { "epoch": 0.48, "grad_norm": 3.815244197845459, "learning_rate": 1.958968401399863e-05, "loss": 1.9259, "step": 36603 }, { "epoch": 0.48, "grad_norm": 3.2352399826049805, "learning_rate": 1.9589654222074353e-05, "loss": 1.5937, "step": 36604 }, { "epoch": 0.48, "grad_norm": 3.610689163208008, "learning_rate": 1.958962442909122e-05, "loss": 1.6901, "step": 36605 }, { "epoch": 0.48, "grad_norm": 4.006771087646484, "learning_rate": 1.9589594635049227e-05, "loss": 2.177, "step": 36606 }, { "epoch": 0.48, "grad_norm": 4.229612827301025, "learning_rate": 1.9589564839948382e-05, "loss": 1.8623, "step": 36607 }, { "epoch": 0.48, "grad_norm": 3.6983957290649414, "learning_rate": 1.9589535043788685e-05, "loss": 2.1886, "step": 36608 }, { "epoch": 0.48, "grad_norm": 4.288323402404785, "learning_rate": 1.958950524657014e-05, "loss": 2.0425, "step": 36609 }, { "epoch": 0.48, "grad_norm": 4.158098220825195, "learning_rate": 1.958947544829275e-05, "loss": 1.5476, "step": 36610 }, { "epoch": 0.48, "grad_norm": 4.046012878417969, "learning_rate": 1.958944564895652e-05, "loss": 2.0, "step": 36611 }, { "epoch": 0.48, "grad_norm": 3.910499095916748, "learning_rate": 1.9589415848561452e-05, "loss": 2.1049, "step": 36612 }, { "epoch": 0.48, "grad_norm": 3.954503059387207, "learning_rate": 1.9589386047107552e-05, "loss": 2.1443, "step": 36613 }, { "epoch": 0.48, "grad_norm": 3.581820011138916, "learning_rate": 1.958935624459482e-05, "loss": 2.1139, "step": 36614 }, { "epoch": 0.48, "grad_norm": 4.225427150726318, "learning_rate": 1.958932644102326e-05, "loss": 2.5239, "step": 36615 }, { "epoch": 0.48, "grad_norm": 3.7632272243499756, "learning_rate": 1.9589296636392877e-05, "loss": 1.8598, "step": 36616 }, { "epoch": 0.48, "grad_norm": 4.054398059844971, "learning_rate": 1.958926683070367e-05, "loss": 2.1211, "step": 36617 }, { "epoch": 0.48, "grad_norm": 4.1825480461120605, "learning_rate": 1.958923702395565e-05, "loss": 2.1103, "step": 36618 }, { "epoch": 0.48, "grad_norm": 3.5690181255340576, "learning_rate": 1.958920721614881e-05, "loss": 2.0523, "step": 36619 }, { "epoch": 0.48, "grad_norm": 3.459286689758301, "learning_rate": 1.958917740728316e-05, "loss": 1.6446, "step": 36620 }, { "epoch": 0.48, "grad_norm": 3.8003013134002686, "learning_rate": 1.9589147597358707e-05, "loss": 1.8529, "step": 36621 }, { "epoch": 0.48, "grad_norm": 3.808375358581543, "learning_rate": 1.9589117786375446e-05, "loss": 1.8084, "step": 36622 }, { "epoch": 0.48, "grad_norm": 3.5088324546813965, "learning_rate": 1.9589087974333383e-05, "loss": 1.6103, "step": 36623 }, { "epoch": 0.48, "grad_norm": 4.336187839508057, "learning_rate": 1.9589058161232525e-05, "loss": 2.1223, "step": 36624 }, { "epoch": 0.48, "grad_norm": 3.5896785259246826, "learning_rate": 1.9589028347072866e-05, "loss": 1.5405, "step": 36625 }, { "epoch": 0.48, "grad_norm": 4.232399940490723, "learning_rate": 1.9588998531854423e-05, "loss": 2.1195, "step": 36626 }, { "epoch": 0.48, "grad_norm": 3.576035499572754, "learning_rate": 1.958896871557719e-05, "loss": 1.7931, "step": 36627 }, { "epoch": 0.48, "grad_norm": 3.807490825653076, "learning_rate": 1.9588938898241166e-05, "loss": 2.0074, "step": 36628 }, { "epoch": 0.48, "grad_norm": 4.083332061767578, "learning_rate": 1.9588909079846367e-05, "loss": 2.1409, "step": 36629 }, { "epoch": 0.48, "grad_norm": 3.5977671146392822, "learning_rate": 1.958887926039279e-05, "loss": 1.7913, "step": 36630 }, { "epoch": 0.48, "grad_norm": 4.063975811004639, "learning_rate": 1.9588849439880434e-05, "loss": 2.3557, "step": 36631 }, { "epoch": 0.48, "grad_norm": 4.17555570602417, "learning_rate": 1.9588819618309308e-05, "loss": 2.227, "step": 36632 }, { "epoch": 0.48, "grad_norm": 3.263167381286621, "learning_rate": 1.9588789795679414e-05, "loss": 1.4504, "step": 36633 }, { "epoch": 0.48, "grad_norm": 3.9826462268829346, "learning_rate": 1.9588759971990753e-05, "loss": 1.869, "step": 36634 }, { "epoch": 0.48, "grad_norm": 4.000307083129883, "learning_rate": 1.9588730147243335e-05, "loss": 1.9762, "step": 36635 }, { "epoch": 0.48, "grad_norm": 3.868853807449341, "learning_rate": 1.9588700321437156e-05, "loss": 1.8501, "step": 36636 }, { "epoch": 0.48, "grad_norm": 3.598555326461792, "learning_rate": 1.9588670494572223e-05, "loss": 1.6915, "step": 36637 }, { "epoch": 0.48, "grad_norm": 3.7453153133392334, "learning_rate": 1.9588640666648537e-05, "loss": 2.1062, "step": 36638 }, { "epoch": 0.48, "grad_norm": 3.393256187438965, "learning_rate": 1.95886108376661e-05, "loss": 1.7513, "step": 36639 }, { "epoch": 0.48, "grad_norm": 4.050259590148926, "learning_rate": 1.958858100762492e-05, "loss": 2.3787, "step": 36640 }, { "epoch": 0.48, "grad_norm": 3.811892032623291, "learning_rate": 1.9588551176524997e-05, "loss": 2.1585, "step": 36641 }, { "epoch": 0.48, "grad_norm": 3.7931437492370605, "learning_rate": 1.9588521344366335e-05, "loss": 2.0202, "step": 36642 }, { "epoch": 0.48, "grad_norm": 3.5432794094085693, "learning_rate": 1.958849151114894e-05, "loss": 1.6664, "step": 36643 }, { "epoch": 0.48, "grad_norm": 4.004104137420654, "learning_rate": 1.958846167687281e-05, "loss": 1.9719, "step": 36644 }, { "epoch": 0.48, "grad_norm": 3.3371808528900146, "learning_rate": 1.9588431841537955e-05, "loss": 1.6129, "step": 36645 }, { "epoch": 0.48, "grad_norm": 4.35895299911499, "learning_rate": 1.9588402005144372e-05, "loss": 2.281, "step": 36646 }, { "epoch": 0.48, "grad_norm": 3.9453821182250977, "learning_rate": 1.9588372167692063e-05, "loss": 2.3122, "step": 36647 }, { "epoch": 0.48, "grad_norm": 4.002640724182129, "learning_rate": 1.958834232918104e-05, "loss": 1.8756, "step": 36648 }, { "epoch": 0.48, "grad_norm": 3.693922519683838, "learning_rate": 1.95883124896113e-05, "loss": 1.616, "step": 36649 }, { "epoch": 0.48, "grad_norm": 3.1999709606170654, "learning_rate": 1.958828264898285e-05, "loss": 1.8101, "step": 36650 }, { "epoch": 0.48, "grad_norm": 3.513378620147705, "learning_rate": 1.9588252807295686e-05, "loss": 1.6848, "step": 36651 }, { "epoch": 0.48, "grad_norm": 3.377311944961548, "learning_rate": 1.958822296454982e-05, "loss": 1.7532, "step": 36652 }, { "epoch": 0.48, "grad_norm": 3.5600788593292236, "learning_rate": 1.9588193120745252e-05, "loss": 1.9019, "step": 36653 }, { "epoch": 0.48, "grad_norm": 4.157422065734863, "learning_rate": 1.958816327588198e-05, "loss": 2.4892, "step": 36654 }, { "epoch": 0.48, "grad_norm": 3.609469413757324, "learning_rate": 1.9588133429960016e-05, "loss": 1.7703, "step": 36655 }, { "epoch": 0.48, "grad_norm": 3.9102351665496826, "learning_rate": 1.9588103582979362e-05, "loss": 1.9235, "step": 36656 }, { "epoch": 0.48, "grad_norm": 3.583595037460327, "learning_rate": 1.9588073734940015e-05, "loss": 1.691, "step": 36657 }, { "epoch": 0.48, "grad_norm": 4.367842197418213, "learning_rate": 1.958804388584198e-05, "loss": 2.3483, "step": 36658 }, { "epoch": 0.48, "grad_norm": 3.395998239517212, "learning_rate": 1.9588014035685265e-05, "loss": 1.8485, "step": 36659 }, { "epoch": 0.48, "grad_norm": 3.904801368713379, "learning_rate": 1.958798418446987e-05, "loss": 2.1366, "step": 36660 }, { "epoch": 0.48, "grad_norm": 3.5043883323669434, "learning_rate": 1.95879543321958e-05, "loss": 2.0564, "step": 36661 }, { "epoch": 0.48, "grad_norm": 3.369701385498047, "learning_rate": 1.9587924478863055e-05, "loss": 1.5574, "step": 36662 }, { "epoch": 0.48, "grad_norm": 3.854257106781006, "learning_rate": 1.958789462447164e-05, "loss": 2.0531, "step": 36663 }, { "epoch": 0.48, "grad_norm": 3.9649596214294434, "learning_rate": 1.9587864769021566e-05, "loss": 1.5993, "step": 36664 }, { "epoch": 0.48, "grad_norm": 4.01115083694458, "learning_rate": 1.958783491251282e-05, "loss": 2.5282, "step": 36665 }, { "epoch": 0.48, "grad_norm": 4.0413970947265625, "learning_rate": 1.958780505494542e-05, "loss": 1.8142, "step": 36666 }, { "epoch": 0.48, "grad_norm": 3.6631288528442383, "learning_rate": 1.9587775196319365e-05, "loss": 2.0203, "step": 36667 }, { "epoch": 0.48, "grad_norm": 3.6425724029541016, "learning_rate": 1.9587745336634653e-05, "loss": 1.7494, "step": 36668 }, { "epoch": 0.48, "grad_norm": 3.780294418334961, "learning_rate": 1.958771547589129e-05, "loss": 1.8703, "step": 36669 }, { "epoch": 0.48, "grad_norm": 3.594517230987549, "learning_rate": 1.958768561408928e-05, "loss": 1.7837, "step": 36670 }, { "epoch": 0.48, "grad_norm": 3.712550163269043, "learning_rate": 1.958765575122863e-05, "loss": 2.1986, "step": 36671 }, { "epoch": 0.48, "grad_norm": 3.1471428871154785, "learning_rate": 1.958762588730934e-05, "loss": 1.528, "step": 36672 }, { "epoch": 0.48, "grad_norm": 4.480238437652588, "learning_rate": 1.9587596022331415e-05, "loss": 2.3442, "step": 36673 }, { "epoch": 0.48, "grad_norm": 3.843613624572754, "learning_rate": 1.9587566156294854e-05, "loss": 1.8173, "step": 36674 }, { "epoch": 0.48, "grad_norm": 3.4294285774230957, "learning_rate": 1.958753628919966e-05, "loss": 1.508, "step": 36675 }, { "epoch": 0.48, "grad_norm": 3.786219358444214, "learning_rate": 1.9587506421045846e-05, "loss": 2.0495, "step": 36676 }, { "epoch": 0.48, "grad_norm": 3.845580577850342, "learning_rate": 1.9587476551833403e-05, "loss": 2.1021, "step": 36677 }, { "epoch": 0.48, "grad_norm": 3.427684783935547, "learning_rate": 1.9587446681562344e-05, "loss": 1.5477, "step": 36678 }, { "epoch": 0.48, "grad_norm": 3.4382219314575195, "learning_rate": 1.9587416810232668e-05, "loss": 1.6233, "step": 36679 }, { "epoch": 0.48, "grad_norm": 4.180305480957031, "learning_rate": 1.9587386937844376e-05, "loss": 2.4736, "step": 36680 }, { "epoch": 0.48, "grad_norm": 3.734849214553833, "learning_rate": 1.9587357064397476e-05, "loss": 2.0443, "step": 36681 }, { "epoch": 0.48, "grad_norm": 4.159308910369873, "learning_rate": 1.9587327189891968e-05, "loss": 2.0983, "step": 36682 }, { "epoch": 0.48, "grad_norm": 3.965822458267212, "learning_rate": 1.958729731432786e-05, "loss": 2.3527, "step": 36683 }, { "epoch": 0.48, "grad_norm": 4.161802768707275, "learning_rate": 1.9587267437705147e-05, "loss": 2.2361, "step": 36684 }, { "epoch": 0.48, "grad_norm": 3.5784995555877686, "learning_rate": 1.9587237560023838e-05, "loss": 1.9721, "step": 36685 }, { "epoch": 0.48, "grad_norm": 3.8022866249084473, "learning_rate": 1.9587207681283934e-05, "loss": 2.0582, "step": 36686 }, { "epoch": 0.48, "grad_norm": 3.750894784927368, "learning_rate": 1.9587177801485443e-05, "loss": 1.6107, "step": 36687 }, { "epoch": 0.48, "grad_norm": 3.66943359375, "learning_rate": 1.9587147920628363e-05, "loss": 2.0348, "step": 36688 }, { "epoch": 0.48, "grad_norm": 3.908051013946533, "learning_rate": 1.95871180387127e-05, "loss": 2.1913, "step": 36689 }, { "epoch": 0.48, "grad_norm": 4.296688556671143, "learning_rate": 1.9587088155738457e-05, "loss": 2.08, "step": 36690 }, { "epoch": 0.48, "grad_norm": 4.073658466339111, "learning_rate": 1.9587058271705634e-05, "loss": 1.8045, "step": 36691 }, { "epoch": 0.48, "grad_norm": 4.228659629821777, "learning_rate": 1.958702838661424e-05, "loss": 2.0728, "step": 36692 }, { "epoch": 0.48, "grad_norm": 3.038552761077881, "learning_rate": 1.9586998500464272e-05, "loss": 1.3329, "step": 36693 }, { "epoch": 0.48, "grad_norm": 3.461404323577881, "learning_rate": 1.9586968613255743e-05, "loss": 1.7367, "step": 36694 }, { "epoch": 0.48, "grad_norm": 3.6645448207855225, "learning_rate": 1.9586938724988647e-05, "loss": 1.877, "step": 36695 }, { "epoch": 0.48, "grad_norm": 3.5726003646850586, "learning_rate": 1.958690883566299e-05, "loss": 1.6049, "step": 36696 }, { "epoch": 0.48, "grad_norm": 3.9224729537963867, "learning_rate": 1.9586878945278774e-05, "loss": 2.316, "step": 36697 }, { "epoch": 0.48, "grad_norm": 3.4820969104766846, "learning_rate": 1.9586849053836006e-05, "loss": 2.0203, "step": 36698 }, { "epoch": 0.48, "grad_norm": 3.2236266136169434, "learning_rate": 1.9586819161334686e-05, "loss": 1.8558, "step": 36699 }, { "epoch": 0.48, "grad_norm": 3.8981845378875732, "learning_rate": 1.9586789267774816e-05, "loss": 2.3346, "step": 36700 }, { "epoch": 0.48, "grad_norm": 3.8208985328674316, "learning_rate": 1.958675937315641e-05, "loss": 1.871, "step": 36701 }, { "epoch": 0.48, "grad_norm": 3.370954751968384, "learning_rate": 1.9586729477479455e-05, "loss": 1.6255, "step": 36702 }, { "epoch": 0.48, "grad_norm": 3.8905751705169678, "learning_rate": 1.9586699580743963e-05, "loss": 1.9489, "step": 36703 }, { "epoch": 0.48, "grad_norm": 3.684169292449951, "learning_rate": 1.958666968294994e-05, "loss": 1.522, "step": 36704 }, { "epoch": 0.48, "grad_norm": 3.850353717803955, "learning_rate": 1.9586639784097383e-05, "loss": 2.2192, "step": 36705 }, { "epoch": 0.48, "grad_norm": 3.684187650680542, "learning_rate": 1.9586609884186304e-05, "loss": 1.9905, "step": 36706 }, { "epoch": 0.48, "grad_norm": 4.428079128265381, "learning_rate": 1.9586579983216694e-05, "loss": 2.1211, "step": 36707 }, { "epoch": 0.48, "grad_norm": 4.3849334716796875, "learning_rate": 1.9586550081188565e-05, "loss": 2.0099, "step": 36708 }, { "epoch": 0.48, "grad_norm": 3.5501632690429688, "learning_rate": 1.958652017810192e-05, "loss": 1.9854, "step": 36709 }, { "epoch": 0.48, "grad_norm": 3.4404165744781494, "learning_rate": 1.958649027395676e-05, "loss": 1.5146, "step": 36710 }, { "epoch": 0.48, "grad_norm": 3.3556363582611084, "learning_rate": 1.958646036875309e-05, "loss": 1.6447, "step": 36711 }, { "epoch": 0.48, "grad_norm": 3.444589376449585, "learning_rate": 1.9586430462490907e-05, "loss": 1.7112, "step": 36712 }, { "epoch": 0.48, "grad_norm": 3.4623396396636963, "learning_rate": 1.9586400555170228e-05, "loss": 1.7869, "step": 36713 }, { "epoch": 0.48, "grad_norm": 3.4088551998138428, "learning_rate": 1.958637064679104e-05, "loss": 1.92, "step": 36714 }, { "epoch": 0.48, "grad_norm": 3.8589274883270264, "learning_rate": 1.9586340737353357e-05, "loss": 2.0778, "step": 36715 }, { "epoch": 0.48, "grad_norm": 3.588265895843506, "learning_rate": 1.958631082685718e-05, "loss": 1.4522, "step": 36716 }, { "epoch": 0.48, "grad_norm": 4.064711570739746, "learning_rate": 1.958628091530251e-05, "loss": 2.1502, "step": 36717 }, { "epoch": 0.48, "grad_norm": 3.8712046146392822, "learning_rate": 1.9586251002689352e-05, "loss": 2.0413, "step": 36718 }, { "epoch": 0.48, "grad_norm": 4.107468605041504, "learning_rate": 1.958622108901771e-05, "loss": 2.0053, "step": 36719 }, { "epoch": 0.48, "grad_norm": 4.120575904846191, "learning_rate": 1.9586191174287587e-05, "loss": 1.9961, "step": 36720 }, { "epoch": 0.48, "grad_norm": 3.4428014755249023, "learning_rate": 1.9586161258498984e-05, "loss": 1.8431, "step": 36721 }, { "epoch": 0.48, "grad_norm": 3.389460563659668, "learning_rate": 1.958613134165191e-05, "loss": 1.8292, "step": 36722 }, { "epoch": 0.48, "grad_norm": 3.4458377361297607, "learning_rate": 1.9586101423746363e-05, "loss": 1.7472, "step": 36723 }, { "epoch": 0.48, "grad_norm": 3.8823211193084717, "learning_rate": 1.9586071504782345e-05, "loss": 1.9193, "step": 36724 }, { "epoch": 0.48, "grad_norm": 3.8881995677948, "learning_rate": 1.9586041584759865e-05, "loss": 1.73, "step": 36725 }, { "epoch": 0.48, "grad_norm": 3.649200916290283, "learning_rate": 1.9586011663678922e-05, "loss": 2.0344, "step": 36726 }, { "epoch": 0.48, "grad_norm": 3.5698187351226807, "learning_rate": 1.958598174153952e-05, "loss": 1.7963, "step": 36727 }, { "epoch": 0.48, "grad_norm": 4.072647571563721, "learning_rate": 1.9585951818341666e-05, "loss": 2.5468, "step": 36728 }, { "epoch": 0.48, "grad_norm": 3.8292012214660645, "learning_rate": 1.9585921894085355e-05, "loss": 2.182, "step": 36729 }, { "epoch": 0.48, "grad_norm": 3.9656057357788086, "learning_rate": 1.95858919687706e-05, "loss": 2.2152, "step": 36730 }, { "epoch": 0.48, "grad_norm": 3.8256993293762207, "learning_rate": 1.95858620423974e-05, "loss": 1.9669, "step": 36731 }, { "epoch": 0.48, "grad_norm": 3.529897689819336, "learning_rate": 1.9585832114965758e-05, "loss": 1.8053, "step": 36732 }, { "epoch": 0.48, "grad_norm": 3.3250882625579834, "learning_rate": 1.9585802186475676e-05, "loss": 1.8167, "step": 36733 }, { "epoch": 0.48, "grad_norm": 3.975916624069214, "learning_rate": 1.9585772256927158e-05, "loss": 2.1887, "step": 36734 }, { "epoch": 0.48, "grad_norm": 3.33329701423645, "learning_rate": 1.9585742326320208e-05, "loss": 1.5572, "step": 36735 }, { "epoch": 0.48, "grad_norm": 3.8356785774230957, "learning_rate": 1.9585712394654833e-05, "loss": 2.1069, "step": 36736 }, { "epoch": 0.48, "grad_norm": 4.306797027587891, "learning_rate": 1.958568246193103e-05, "loss": 1.8666, "step": 36737 }, { "epoch": 0.48, "grad_norm": 3.940847873687744, "learning_rate": 1.9585652528148807e-05, "loss": 2.3406, "step": 36738 }, { "epoch": 0.48, "grad_norm": 3.6003527641296387, "learning_rate": 1.9585622593308163e-05, "loss": 1.9072, "step": 36739 }, { "epoch": 0.48, "grad_norm": 3.3924150466918945, "learning_rate": 1.9585592657409103e-05, "loss": 1.6834, "step": 36740 }, { "epoch": 0.48, "grad_norm": 4.12816858291626, "learning_rate": 1.9585562720451632e-05, "loss": 2.2842, "step": 36741 }, { "epoch": 0.48, "grad_norm": 3.5037198066711426, "learning_rate": 1.9585532782435753e-05, "loss": 1.8702, "step": 36742 }, { "epoch": 0.48, "grad_norm": 3.5166738033294678, "learning_rate": 1.958550284336147e-05, "loss": 1.9518, "step": 36743 }, { "epoch": 0.48, "grad_norm": 3.912046432495117, "learning_rate": 1.9585472903228783e-05, "loss": 1.817, "step": 36744 }, { "epoch": 0.48, "grad_norm": 3.857546329498291, "learning_rate": 1.9585442962037696e-05, "loss": 2.1589, "step": 36745 }, { "epoch": 0.48, "grad_norm": 3.626582622528076, "learning_rate": 1.9585413019788214e-05, "loss": 2.3293, "step": 36746 }, { "epoch": 0.48, "grad_norm": 4.636626243591309, "learning_rate": 1.9585383076480344e-05, "loss": 2.2827, "step": 36747 }, { "epoch": 0.48, "grad_norm": 3.6223158836364746, "learning_rate": 1.958535313211408e-05, "loss": 1.7632, "step": 36748 }, { "epoch": 0.48, "grad_norm": 3.5151357650756836, "learning_rate": 1.9585323186689433e-05, "loss": 1.4796, "step": 36749 }, { "epoch": 0.48, "grad_norm": 3.7713840007781982, "learning_rate": 1.9585293240206402e-05, "loss": 1.7328, "step": 36750 }, { "epoch": 0.48, "grad_norm": 3.130789279937744, "learning_rate": 1.9585263292664993e-05, "loss": 1.7159, "step": 36751 }, { "epoch": 0.48, "grad_norm": 3.5452589988708496, "learning_rate": 1.9585233344065207e-05, "loss": 2.0477, "step": 36752 }, { "epoch": 0.48, "grad_norm": 3.3407421112060547, "learning_rate": 1.958520339440705e-05, "loss": 1.8568, "step": 36753 }, { "epoch": 0.48, "grad_norm": 3.6068313121795654, "learning_rate": 1.9585173443690525e-05, "loss": 1.8972, "step": 36754 }, { "epoch": 0.48, "grad_norm": 4.335608959197998, "learning_rate": 1.9585143491915635e-05, "loss": 2.3554, "step": 36755 }, { "epoch": 0.48, "grad_norm": 3.561007022857666, "learning_rate": 1.9585113539082378e-05, "loss": 1.7075, "step": 36756 }, { "epoch": 0.48, "grad_norm": 3.1692702770233154, "learning_rate": 1.9585083585190764e-05, "loss": 1.6021, "step": 36757 }, { "epoch": 0.48, "grad_norm": 3.691770076751709, "learning_rate": 1.9585053630240795e-05, "loss": 1.9727, "step": 36758 }, { "epoch": 0.48, "grad_norm": 3.973618745803833, "learning_rate": 1.9585023674232472e-05, "loss": 2.056, "step": 36759 }, { "epoch": 0.48, "grad_norm": 4.751644611358643, "learning_rate": 1.9584993717165802e-05, "loss": 2.4467, "step": 36760 }, { "epoch": 0.48, "grad_norm": 3.97770357131958, "learning_rate": 1.9584963759040785e-05, "loss": 2.081, "step": 36761 }, { "epoch": 0.48, "grad_norm": 3.504791498184204, "learning_rate": 1.9584933799857423e-05, "loss": 1.7322, "step": 36762 }, { "epoch": 0.48, "grad_norm": 3.776362180709839, "learning_rate": 1.9584903839615728e-05, "loss": 2.0845, "step": 36763 }, { "epoch": 0.48, "grad_norm": 3.7676925659179688, "learning_rate": 1.9584873878315692e-05, "loss": 1.9185, "step": 36764 }, { "epoch": 0.48, "grad_norm": 4.193266868591309, "learning_rate": 1.9584843915957323e-05, "loss": 2.138, "step": 36765 }, { "epoch": 0.48, "grad_norm": 3.825805902481079, "learning_rate": 1.9584813952540623e-05, "loss": 2.2819, "step": 36766 }, { "epoch": 0.48, "grad_norm": 4.49459171295166, "learning_rate": 1.9584783988065603e-05, "loss": 1.712, "step": 36767 }, { "epoch": 0.48, "grad_norm": 3.659940719604492, "learning_rate": 1.9584754022532256e-05, "loss": 2.106, "step": 36768 }, { "epoch": 0.48, "grad_norm": 3.4500324726104736, "learning_rate": 1.958472405594059e-05, "loss": 1.7262, "step": 36769 }, { "epoch": 0.48, "grad_norm": 3.6200947761535645, "learning_rate": 1.958469408829061e-05, "loss": 1.9296, "step": 36770 }, { "epoch": 0.48, "grad_norm": 3.6792314052581787, "learning_rate": 1.9584664119582316e-05, "loss": 1.9389, "step": 36771 }, { "epoch": 0.48, "grad_norm": 4.022087097167969, "learning_rate": 1.9584634149815712e-05, "loss": 2.0934, "step": 36772 }, { "epoch": 0.48, "grad_norm": 4.439622402191162, "learning_rate": 1.95846041789908e-05, "loss": 2.4061, "step": 36773 }, { "epoch": 0.48, "grad_norm": 3.951489210128784, "learning_rate": 1.958457420710759e-05, "loss": 2.1588, "step": 36774 }, { "epoch": 0.48, "grad_norm": 3.8507165908813477, "learning_rate": 1.9584544234166076e-05, "loss": 1.9546, "step": 36775 }, { "epoch": 0.48, "grad_norm": 3.939985513687134, "learning_rate": 1.9584514260166268e-05, "loss": 2.1156, "step": 36776 }, { "epoch": 0.48, "grad_norm": 3.7554726600646973, "learning_rate": 1.9584484285108164e-05, "loss": 2.1959, "step": 36777 }, { "epoch": 0.48, "grad_norm": 3.964668035507202, "learning_rate": 1.958445430899177e-05, "loss": 2.136, "step": 36778 }, { "epoch": 0.48, "grad_norm": 3.756676435470581, "learning_rate": 1.9584424331817094e-05, "loss": 1.8388, "step": 36779 }, { "epoch": 0.48, "grad_norm": 4.082061767578125, "learning_rate": 1.9584394353584134e-05, "loss": 1.9634, "step": 36780 }, { "epoch": 0.48, "grad_norm": 3.981135606765747, "learning_rate": 1.9584364374292892e-05, "loss": 1.9757, "step": 36781 }, { "epoch": 0.48, "grad_norm": 3.8190932273864746, "learning_rate": 1.9584334393943377e-05, "loss": 2.0015, "step": 36782 }, { "epoch": 0.48, "grad_norm": 3.894956111907959, "learning_rate": 1.9584304412535585e-05, "loss": 1.9766, "step": 36783 }, { "epoch": 0.48, "grad_norm": 4.008347034454346, "learning_rate": 1.9584274430069524e-05, "loss": 1.8911, "step": 36784 }, { "epoch": 0.48, "grad_norm": 3.8376691341400146, "learning_rate": 1.95842444465452e-05, "loss": 2.128, "step": 36785 }, { "epoch": 0.48, "grad_norm": 4.512409210205078, "learning_rate": 1.958421446196261e-05, "loss": 2.5132, "step": 36786 }, { "epoch": 0.48, "grad_norm": 3.8506646156311035, "learning_rate": 1.958418447632176e-05, "loss": 1.7939, "step": 36787 }, { "epoch": 0.48, "grad_norm": 3.82391357421875, "learning_rate": 1.9584154489622654e-05, "loss": 2.3294, "step": 36788 }, { "epoch": 0.48, "grad_norm": 3.864365339279175, "learning_rate": 1.9584124501865293e-05, "loss": 1.7715, "step": 36789 }, { "epoch": 0.48, "grad_norm": 3.677367687225342, "learning_rate": 1.9584094513049684e-05, "loss": 1.8797, "step": 36790 }, { "epoch": 0.48, "grad_norm": 4.052508354187012, "learning_rate": 1.958406452317583e-05, "loss": 2.2939, "step": 36791 }, { "epoch": 0.48, "grad_norm": 3.71230411529541, "learning_rate": 1.958403453224373e-05, "loss": 2.0589, "step": 36792 }, { "epoch": 0.48, "grad_norm": 4.025221347808838, "learning_rate": 1.958400454025339e-05, "loss": 2.0626, "step": 36793 }, { "epoch": 0.48, "grad_norm": 4.101161956787109, "learning_rate": 1.9583974547204815e-05, "loss": 2.069, "step": 36794 }, { "epoch": 0.48, "grad_norm": 3.8665378093719482, "learning_rate": 1.9583944553098004e-05, "loss": 1.7825, "step": 36795 }, { "epoch": 0.48, "grad_norm": 3.8497610092163086, "learning_rate": 1.9583914557932964e-05, "loss": 2.4236, "step": 36796 }, { "epoch": 0.48, "grad_norm": 3.5000174045562744, "learning_rate": 1.95838845617097e-05, "loss": 1.7968, "step": 36797 }, { "epoch": 0.48, "grad_norm": 4.145745277404785, "learning_rate": 1.958385456442821e-05, "loss": 2.1958, "step": 36798 }, { "epoch": 0.48, "grad_norm": 3.772512674331665, "learning_rate": 1.95838245660885e-05, "loss": 1.8976, "step": 36799 }, { "epoch": 0.48, "grad_norm": 4.032073497772217, "learning_rate": 1.9583794566690572e-05, "loss": 1.8577, "step": 36800 }, { "epoch": 0.48, "grad_norm": 4.1235151290893555, "learning_rate": 1.9583764566234432e-05, "loss": 2.1468, "step": 36801 }, { "epoch": 0.48, "grad_norm": 3.561750650405884, "learning_rate": 1.958373456472008e-05, "loss": 1.8956, "step": 36802 }, { "epoch": 0.48, "grad_norm": 3.245074510574341, "learning_rate": 1.958370456214752e-05, "loss": 1.4325, "step": 36803 }, { "epoch": 0.48, "grad_norm": 3.744785785675049, "learning_rate": 1.9583674558516763e-05, "loss": 1.8179, "step": 36804 }, { "epoch": 0.48, "grad_norm": 4.461212158203125, "learning_rate": 1.95836445538278e-05, "loss": 1.93, "step": 36805 }, { "epoch": 0.48, "grad_norm": 3.571195602416992, "learning_rate": 1.9583614548080642e-05, "loss": 1.8054, "step": 36806 }, { "epoch": 0.48, "grad_norm": 3.544342041015625, "learning_rate": 1.958358454127529e-05, "loss": 1.7006, "step": 36807 }, { "epoch": 0.48, "grad_norm": 4.01534366607666, "learning_rate": 1.9583554533411752e-05, "loss": 2.4158, "step": 36808 }, { "epoch": 0.48, "grad_norm": 3.355167865753174, "learning_rate": 1.958352452449002e-05, "loss": 1.5351, "step": 36809 }, { "epoch": 0.48, "grad_norm": 3.8037807941436768, "learning_rate": 1.958349451451011e-05, "loss": 1.6979, "step": 36810 }, { "epoch": 0.48, "grad_norm": 3.9436402320861816, "learning_rate": 1.9583464503472014e-05, "loss": 2.0482, "step": 36811 }, { "epoch": 0.48, "grad_norm": 3.5047621726989746, "learning_rate": 1.9583434491375745e-05, "loss": 1.9307, "step": 36812 }, { "epoch": 0.48, "grad_norm": 3.498868942260742, "learning_rate": 1.95834044782213e-05, "loss": 1.6583, "step": 36813 }, { "epoch": 0.48, "grad_norm": 3.0896871089935303, "learning_rate": 1.9583374464008686e-05, "loss": 1.5942, "step": 36814 }, { "epoch": 0.48, "grad_norm": 3.6221508979797363, "learning_rate": 1.9583344448737906e-05, "loss": 1.9123, "step": 36815 }, { "epoch": 0.48, "grad_norm": 3.781503677368164, "learning_rate": 1.958331443240896e-05, "loss": 2.3057, "step": 36816 }, { "epoch": 0.48, "grad_norm": 4.10883092880249, "learning_rate": 1.9583284415021854e-05, "loss": 1.8367, "step": 36817 }, { "epoch": 0.48, "grad_norm": 3.752450466156006, "learning_rate": 1.958325439657659e-05, "loss": 1.6958, "step": 36818 }, { "epoch": 0.48, "grad_norm": 3.8993520736694336, "learning_rate": 1.9583224377073174e-05, "loss": 1.878, "step": 36819 }, { "epoch": 0.48, "grad_norm": 3.7294318675994873, "learning_rate": 1.9583194356511606e-05, "loss": 2.1633, "step": 36820 }, { "epoch": 0.48, "grad_norm": 3.979964256286621, "learning_rate": 1.958316433489189e-05, "loss": 2.1419, "step": 36821 }, { "epoch": 0.48, "grad_norm": 3.5901522636413574, "learning_rate": 1.9583134312214032e-05, "loss": 1.8574, "step": 36822 }, { "epoch": 0.48, "grad_norm": 3.4725899696350098, "learning_rate": 1.9583104288478036e-05, "loss": 2.004, "step": 36823 }, { "epoch": 0.48, "grad_norm": 3.3968470096588135, "learning_rate": 1.9583074263683895e-05, "loss": 1.6132, "step": 36824 }, { "epoch": 0.48, "grad_norm": 3.806675910949707, "learning_rate": 1.9583044237831628e-05, "loss": 2.0654, "step": 36825 }, { "epoch": 0.48, "grad_norm": 3.8731822967529297, "learning_rate": 1.9583014210921226e-05, "loss": 1.8696, "step": 36826 }, { "epoch": 0.48, "grad_norm": 3.7428665161132812, "learning_rate": 1.9582984182952698e-05, "loss": 1.9469, "step": 36827 }, { "epoch": 0.48, "grad_norm": 3.6331329345703125, "learning_rate": 1.9582954153926045e-05, "loss": 1.6262, "step": 36828 }, { "epoch": 0.48, "grad_norm": 3.5091819763183594, "learning_rate": 1.9582924123841273e-05, "loss": 1.719, "step": 36829 }, { "epoch": 0.48, "grad_norm": 3.5691287517547607, "learning_rate": 1.958289409269838e-05, "loss": 1.8071, "step": 36830 }, { "epoch": 0.48, "grad_norm": 3.470259428024292, "learning_rate": 1.9582864060497377e-05, "loss": 1.8082, "step": 36831 }, { "epoch": 0.48, "grad_norm": 3.7541022300720215, "learning_rate": 1.9582834027238263e-05, "loss": 1.8185, "step": 36832 }, { "epoch": 0.48, "grad_norm": 3.6431732177734375, "learning_rate": 1.9582803992921037e-05, "loss": 2.1268, "step": 36833 }, { "epoch": 0.48, "grad_norm": 3.7662734985351562, "learning_rate": 1.958277395754571e-05, "loss": 2.1996, "step": 36834 }, { "epoch": 0.48, "grad_norm": 3.4993345737457275, "learning_rate": 1.9582743921112283e-05, "loss": 2.0908, "step": 36835 }, { "epoch": 0.48, "grad_norm": 3.911306858062744, "learning_rate": 1.958271388362076e-05, "loss": 2.0193, "step": 36836 }, { "epoch": 0.48, "grad_norm": 3.801621913909912, "learning_rate": 1.958268384507114e-05, "loss": 1.8749, "step": 36837 }, { "epoch": 0.48, "grad_norm": 3.7085824012756348, "learning_rate": 1.9582653805463426e-05, "loss": 1.7901, "step": 36838 }, { "epoch": 0.48, "grad_norm": 3.8801217079162598, "learning_rate": 1.958262376479763e-05, "loss": 2.1107, "step": 36839 }, { "epoch": 0.48, "grad_norm": 4.553011894226074, "learning_rate": 1.958259372307375e-05, "loss": 2.0093, "step": 36840 }, { "epoch": 0.48, "grad_norm": 3.80143404006958, "learning_rate": 1.9582563680291784e-05, "loss": 1.9682, "step": 36841 }, { "epoch": 0.48, "grad_norm": 3.9247019290924072, "learning_rate": 1.9582533636451745e-05, "loss": 2.1939, "step": 36842 }, { "epoch": 0.48, "grad_norm": 3.6032612323760986, "learning_rate": 1.958250359155363e-05, "loss": 1.6778, "step": 36843 }, { "epoch": 0.48, "grad_norm": 3.362125873565674, "learning_rate": 1.9582473545597442e-05, "loss": 1.8444, "step": 36844 }, { "epoch": 0.48, "grad_norm": 3.924837827682495, "learning_rate": 1.958244349858319e-05, "loss": 2.2541, "step": 36845 }, { "epoch": 0.48, "grad_norm": 3.535515546798706, "learning_rate": 1.958241345051087e-05, "loss": 1.8637, "step": 36846 }, { "epoch": 0.48, "grad_norm": 4.453630447387695, "learning_rate": 1.958238340138049e-05, "loss": 2.1798, "step": 36847 }, { "epoch": 0.48, "grad_norm": 3.608883857727051, "learning_rate": 1.9582353351192056e-05, "loss": 1.5755, "step": 36848 }, { "epoch": 0.48, "grad_norm": 3.2124881744384766, "learning_rate": 1.9582323299945565e-05, "loss": 1.6251, "step": 36849 }, { "epoch": 0.48, "grad_norm": 3.772528886795044, "learning_rate": 1.9582293247641024e-05, "loss": 2.157, "step": 36850 }, { "epoch": 0.48, "grad_norm": 3.7160463333129883, "learning_rate": 1.9582263194278434e-05, "loss": 1.9608, "step": 36851 }, { "epoch": 0.48, "grad_norm": 3.6289844512939453, "learning_rate": 1.9582233139857798e-05, "loss": 2.1072, "step": 36852 }, { "epoch": 0.48, "grad_norm": 3.4809319972991943, "learning_rate": 1.958220308437912e-05, "loss": 1.518, "step": 36853 }, { "epoch": 0.48, "grad_norm": 4.054333209991455, "learning_rate": 1.958217302784241e-05, "loss": 2.1702, "step": 36854 }, { "epoch": 0.48, "grad_norm": 3.6089611053466797, "learning_rate": 1.958214297024766e-05, "loss": 2.1675, "step": 36855 }, { "epoch": 0.48, "grad_norm": 4.503374099731445, "learning_rate": 1.9582112911594882e-05, "loss": 2.0702, "step": 36856 }, { "epoch": 0.48, "grad_norm": 4.168873310089111, "learning_rate": 1.9582082851884075e-05, "loss": 2.2004, "step": 36857 }, { "epoch": 0.48, "grad_norm": 3.89437198638916, "learning_rate": 1.9582052791115246e-05, "loss": 2.0322, "step": 36858 }, { "epoch": 0.48, "grad_norm": 3.206969976425171, "learning_rate": 1.958202272928839e-05, "loss": 1.7087, "step": 36859 }, { "epoch": 0.48, "grad_norm": 3.6302971839904785, "learning_rate": 1.958199266640352e-05, "loss": 1.7377, "step": 36860 }, { "epoch": 0.48, "grad_norm": 3.542847156524658, "learning_rate": 1.9581962602460636e-05, "loss": 1.7303, "step": 36861 }, { "epoch": 0.48, "grad_norm": 4.2765703201293945, "learning_rate": 1.9581932537459743e-05, "loss": 2.2625, "step": 36862 }, { "epoch": 0.48, "grad_norm": 3.708151340484619, "learning_rate": 1.9581902471400837e-05, "loss": 1.7694, "step": 36863 }, { "epoch": 0.48, "grad_norm": 3.9413344860076904, "learning_rate": 1.958187240428393e-05, "loss": 2.0576, "step": 36864 }, { "epoch": 0.48, "grad_norm": 3.294994592666626, "learning_rate": 1.958184233610902e-05, "loss": 1.5983, "step": 36865 }, { "epoch": 0.48, "grad_norm": 3.3547213077545166, "learning_rate": 1.9581812266876113e-05, "loss": 1.7043, "step": 36866 }, { "epoch": 0.48, "grad_norm": 4.208735942840576, "learning_rate": 1.958178219658521e-05, "loss": 2.1654, "step": 36867 }, { "epoch": 0.48, "grad_norm": 3.6276793479919434, "learning_rate": 1.9581752125236316e-05, "loss": 1.9801, "step": 36868 }, { "epoch": 0.48, "grad_norm": 3.716167688369751, "learning_rate": 1.9581722052829434e-05, "loss": 1.8368, "step": 36869 }, { "epoch": 0.48, "grad_norm": 3.8030154705047607, "learning_rate": 1.9581691979364567e-05, "loss": 1.837, "step": 36870 }, { "epoch": 0.48, "grad_norm": 3.5356945991516113, "learning_rate": 1.958166190484172e-05, "loss": 1.839, "step": 36871 }, { "epoch": 0.48, "grad_norm": 3.803051471710205, "learning_rate": 1.9581631829260895e-05, "loss": 2.2522, "step": 36872 }, { "epoch": 0.48, "grad_norm": 3.6311185359954834, "learning_rate": 1.958160175262209e-05, "loss": 1.7941, "step": 36873 }, { "epoch": 0.48, "grad_norm": 3.543750524520874, "learning_rate": 1.9581571674925325e-05, "loss": 1.7241, "step": 36874 }, { "epoch": 0.48, "grad_norm": 3.712212085723877, "learning_rate": 1.9581541596170585e-05, "loss": 2.1879, "step": 36875 }, { "epoch": 0.48, "grad_norm": 3.389812707901001, "learning_rate": 1.9581511516357877e-05, "loss": 1.786, "step": 36876 }, { "epoch": 0.48, "grad_norm": 3.9792356491088867, "learning_rate": 1.958148143548721e-05, "loss": 1.8961, "step": 36877 }, { "epoch": 0.48, "grad_norm": 3.2910244464874268, "learning_rate": 1.9581451353558587e-05, "loss": 1.5257, "step": 36878 }, { "epoch": 0.48, "grad_norm": 3.675088882446289, "learning_rate": 1.958142127057201e-05, "loss": 1.6756, "step": 36879 }, { "epoch": 0.48, "grad_norm": 4.108511924743652, "learning_rate": 1.9581391186527477e-05, "loss": 2.3049, "step": 36880 }, { "epoch": 0.48, "grad_norm": 3.585245132446289, "learning_rate": 1.9581361101425e-05, "loss": 1.9697, "step": 36881 }, { "epoch": 0.48, "grad_norm": 3.896771192550659, "learning_rate": 1.9581331015264577e-05, "loss": 2.2937, "step": 36882 }, { "epoch": 0.48, "grad_norm": 4.09125280380249, "learning_rate": 1.9581300928046212e-05, "loss": 2.0649, "step": 36883 }, { "epoch": 0.48, "grad_norm": 3.343787431716919, "learning_rate": 1.958127083976991e-05, "loss": 1.6067, "step": 36884 }, { "epoch": 0.48, "grad_norm": 3.8937222957611084, "learning_rate": 1.958124075043567e-05, "loss": 1.8801, "step": 36885 }, { "epoch": 0.48, "grad_norm": 3.8582499027252197, "learning_rate": 1.95812106600435e-05, "loss": 1.7925, "step": 36886 }, { "epoch": 0.48, "grad_norm": 3.492845058441162, "learning_rate": 1.9581180568593404e-05, "loss": 1.8488, "step": 36887 }, { "epoch": 0.48, "grad_norm": 3.7331793308258057, "learning_rate": 1.9581150476085383e-05, "loss": 2.123, "step": 36888 }, { "epoch": 0.48, "grad_norm": 3.7265753746032715, "learning_rate": 1.9581120382519438e-05, "loss": 1.7059, "step": 36889 }, { "epoch": 0.48, "grad_norm": 4.00266695022583, "learning_rate": 1.9581090287895576e-05, "loss": 2.1498, "step": 36890 }, { "epoch": 0.48, "grad_norm": 3.1246023178100586, "learning_rate": 1.9581060192213797e-05, "loss": 1.5183, "step": 36891 }, { "epoch": 0.48, "grad_norm": 3.598524570465088, "learning_rate": 1.9581030095474108e-05, "loss": 1.9298, "step": 36892 }, { "epoch": 0.48, "grad_norm": 3.6471855640411377, "learning_rate": 1.9580999997676512e-05, "loss": 1.6486, "step": 36893 }, { "epoch": 0.48, "grad_norm": 4.129749298095703, "learning_rate": 1.9580969898821012e-05, "loss": 2.17, "step": 36894 }, { "epoch": 0.48, "grad_norm": 4.492968559265137, "learning_rate": 1.9580939798907606e-05, "loss": 2.38, "step": 36895 }, { "epoch": 0.48, "grad_norm": 3.378763198852539, "learning_rate": 1.9580909697936303e-05, "loss": 1.5819, "step": 36896 }, { "epoch": 0.48, "grad_norm": 3.4023118019104004, "learning_rate": 1.958087959590711e-05, "loss": 1.6172, "step": 36897 }, { "epoch": 0.48, "grad_norm": 3.3472814559936523, "learning_rate": 1.958084949282002e-05, "loss": 1.7572, "step": 36898 }, { "epoch": 0.48, "grad_norm": 3.3638055324554443, "learning_rate": 1.9580819388675043e-05, "loss": 1.887, "step": 36899 }, { "epoch": 0.48, "grad_norm": 4.24832010269165, "learning_rate": 1.958078928347218e-05, "loss": 2.6936, "step": 36900 }, { "epoch": 0.48, "grad_norm": 3.035564422607422, "learning_rate": 1.9580759177211437e-05, "loss": 1.7923, "step": 36901 }, { "epoch": 0.48, "grad_norm": 3.70115327835083, "learning_rate": 1.9580729069892815e-05, "loss": 2.1358, "step": 36902 }, { "epoch": 0.48, "grad_norm": 3.6637661457061768, "learning_rate": 1.958069896151632e-05, "loss": 1.9463, "step": 36903 }, { "epoch": 0.48, "grad_norm": 3.706787586212158, "learning_rate": 1.958066885208195e-05, "loss": 1.9224, "step": 36904 }, { "epoch": 0.48, "grad_norm": 3.198472023010254, "learning_rate": 1.9580638741589713e-05, "loss": 1.5093, "step": 36905 }, { "epoch": 0.48, "grad_norm": 3.668281078338623, "learning_rate": 1.9580608630039612e-05, "loss": 2.2591, "step": 36906 }, { "epoch": 0.48, "grad_norm": 3.398630142211914, "learning_rate": 1.9580578517431648e-05, "loss": 1.8109, "step": 36907 }, { "epoch": 0.48, "grad_norm": 3.813997268676758, "learning_rate": 1.9580548403765825e-05, "loss": 1.8458, "step": 36908 }, { "epoch": 0.48, "grad_norm": 4.679930210113525, "learning_rate": 1.958051828904215e-05, "loss": 2.5697, "step": 36909 }, { "epoch": 0.48, "grad_norm": 4.020630836486816, "learning_rate": 1.958048817326062e-05, "loss": 2.2467, "step": 36910 }, { "epoch": 0.48, "grad_norm": 3.5580062866210938, "learning_rate": 1.9580458056421242e-05, "loss": 1.6618, "step": 36911 }, { "epoch": 0.48, "grad_norm": 3.692836284637451, "learning_rate": 1.958042793852402e-05, "loss": 2.1973, "step": 36912 }, { "epoch": 0.48, "grad_norm": 3.8699569702148438, "learning_rate": 1.9580397819568956e-05, "loss": 1.9729, "step": 36913 }, { "epoch": 0.48, "grad_norm": 3.728205919265747, "learning_rate": 1.958036769955605e-05, "loss": 2.1293, "step": 36914 }, { "epoch": 0.48, "grad_norm": 3.4427907466888428, "learning_rate": 1.9580337578485313e-05, "loss": 1.9316, "step": 36915 }, { "epoch": 0.48, "grad_norm": 3.7447726726531982, "learning_rate": 1.9580307456356743e-05, "loss": 1.8235, "step": 36916 }, { "epoch": 0.48, "grad_norm": 4.09876823425293, "learning_rate": 1.9580277333170347e-05, "loss": 2.1411, "step": 36917 }, { "epoch": 0.48, "grad_norm": 3.7066030502319336, "learning_rate": 1.9580247208926126e-05, "loss": 1.9213, "step": 36918 }, { "epoch": 0.48, "grad_norm": 4.229830265045166, "learning_rate": 1.958021708362408e-05, "loss": 2.1656, "step": 36919 }, { "epoch": 0.48, "grad_norm": 3.5205929279327393, "learning_rate": 1.9580186957264213e-05, "loss": 1.7371, "step": 36920 }, { "epoch": 0.48, "grad_norm": 3.422396183013916, "learning_rate": 1.9580156829846536e-05, "loss": 1.7451, "step": 36921 }, { "epoch": 0.48, "grad_norm": 4.296231269836426, "learning_rate": 1.9580126701371043e-05, "loss": 2.0463, "step": 36922 }, { "epoch": 0.48, "grad_norm": 3.959968328475952, "learning_rate": 1.9580096571837745e-05, "loss": 2.2289, "step": 36923 }, { "epoch": 0.48, "grad_norm": 3.625364303588867, "learning_rate": 1.958006644124664e-05, "loss": 1.5178, "step": 36924 }, { "epoch": 0.48, "grad_norm": 3.3590822219848633, "learning_rate": 1.9580036309597733e-05, "loss": 1.8868, "step": 36925 }, { "epoch": 0.48, "grad_norm": 3.524841547012329, "learning_rate": 1.958000617689103e-05, "loss": 1.7316, "step": 36926 }, { "epoch": 0.48, "grad_norm": 3.5054502487182617, "learning_rate": 1.957997604312653e-05, "loss": 1.9825, "step": 36927 }, { "epoch": 0.48, "grad_norm": 3.8748533725738525, "learning_rate": 1.9579945908304236e-05, "loss": 2.0168, "step": 36928 }, { "epoch": 0.48, "grad_norm": 3.6317083835601807, "learning_rate": 1.9579915772424157e-05, "loss": 1.8618, "step": 36929 }, { "epoch": 0.48, "grad_norm": 3.876124858856201, "learning_rate": 1.957988563548629e-05, "loss": 1.8667, "step": 36930 }, { "epoch": 0.48, "grad_norm": 3.7997758388519287, "learning_rate": 1.9579855497490645e-05, "loss": 2.1112, "step": 36931 }, { "epoch": 0.48, "grad_norm": 3.5863616466522217, "learning_rate": 1.9579825358437218e-05, "loss": 2.0471, "step": 36932 }, { "epoch": 0.48, "grad_norm": 3.526500940322876, "learning_rate": 1.9579795218326017e-05, "loss": 1.7003, "step": 36933 }, { "epoch": 0.48, "grad_norm": 3.6873891353607178, "learning_rate": 1.9579765077157044e-05, "loss": 1.8864, "step": 36934 }, { "epoch": 0.48, "grad_norm": 3.7839195728302, "learning_rate": 1.9579734934930303e-05, "loss": 1.6662, "step": 36935 }, { "epoch": 0.48, "grad_norm": 4.807783126831055, "learning_rate": 1.9579704791645794e-05, "loss": 2.49, "step": 36936 }, { "epoch": 0.48, "grad_norm": 3.8143651485443115, "learning_rate": 1.9579674647303527e-05, "loss": 1.8596, "step": 36937 }, { "epoch": 0.48, "grad_norm": 3.5374577045440674, "learning_rate": 1.9579644501903496e-05, "loss": 1.902, "step": 36938 }, { "epoch": 0.48, "grad_norm": 3.742734909057617, "learning_rate": 1.9579614355445714e-05, "loss": 2.1075, "step": 36939 }, { "epoch": 0.48, "grad_norm": 3.580364942550659, "learning_rate": 1.957958420793018e-05, "loss": 2.2131, "step": 36940 }, { "epoch": 0.48, "grad_norm": 3.6591107845306396, "learning_rate": 1.9579554059356897e-05, "loss": 2.0977, "step": 36941 }, { "epoch": 0.48, "grad_norm": 4.365866661071777, "learning_rate": 1.9579523909725865e-05, "loss": 2.1964, "step": 36942 }, { "epoch": 0.48, "grad_norm": 3.88960337638855, "learning_rate": 1.95794937590371e-05, "loss": 2.0458, "step": 36943 }, { "epoch": 0.48, "grad_norm": 3.5041747093200684, "learning_rate": 1.957946360729059e-05, "loss": 1.9432, "step": 36944 }, { "epoch": 0.48, "grad_norm": 3.7226336002349854, "learning_rate": 1.9579433454486344e-05, "loss": 1.8646, "step": 36945 }, { "epoch": 0.48, "grad_norm": 3.5769903659820557, "learning_rate": 1.9579403300624366e-05, "loss": 2.0301, "step": 36946 }, { "epoch": 0.48, "grad_norm": 3.2753515243530273, "learning_rate": 1.957937314570466e-05, "loss": 1.7455, "step": 36947 }, { "epoch": 0.48, "grad_norm": 4.331925868988037, "learning_rate": 1.957934298972723e-05, "loss": 2.0918, "step": 36948 }, { "epoch": 0.48, "grad_norm": 3.278672695159912, "learning_rate": 1.9579312832692077e-05, "loss": 1.4466, "step": 36949 }, { "epoch": 0.48, "grad_norm": 3.572415590286255, "learning_rate": 1.9579282674599206e-05, "loss": 2.0866, "step": 36950 }, { "epoch": 0.48, "grad_norm": 3.4216983318328857, "learning_rate": 1.9579252515448618e-05, "loss": 2.3373, "step": 36951 }, { "epoch": 0.48, "grad_norm": 3.570732355117798, "learning_rate": 1.9579222355240323e-05, "loss": 1.9111, "step": 36952 }, { "epoch": 0.48, "grad_norm": 4.109959125518799, "learning_rate": 1.9579192193974315e-05, "loss": 1.9691, "step": 36953 }, { "epoch": 0.48, "grad_norm": 3.943169116973877, "learning_rate": 1.95791620316506e-05, "loss": 2.0194, "step": 36954 }, { "epoch": 0.48, "grad_norm": 4.079516410827637, "learning_rate": 1.9579131868269187e-05, "loss": 1.958, "step": 36955 }, { "epoch": 0.48, "grad_norm": 3.3685009479522705, "learning_rate": 1.9579101703830074e-05, "loss": 1.5451, "step": 36956 }, { "epoch": 0.48, "grad_norm": 4.1433587074279785, "learning_rate": 1.9579071538333264e-05, "loss": 1.9418, "step": 36957 }, { "epoch": 0.48, "grad_norm": 3.286799430847168, "learning_rate": 1.9579041371778765e-05, "loss": 1.6647, "step": 36958 }, { "epoch": 0.48, "grad_norm": 3.4565296173095703, "learning_rate": 1.9579011204166575e-05, "loss": 1.6996, "step": 36959 }, { "epoch": 0.48, "grad_norm": 3.397979736328125, "learning_rate": 1.95789810354967e-05, "loss": 1.7596, "step": 36960 }, { "epoch": 0.48, "grad_norm": 3.846754550933838, "learning_rate": 1.9578950865769143e-05, "loss": 2.1655, "step": 36961 }, { "epoch": 0.48, "grad_norm": 3.404311418533325, "learning_rate": 1.957892069498391e-05, "loss": 2.046, "step": 36962 }, { "epoch": 0.48, "grad_norm": 3.2215588092803955, "learning_rate": 1.9578890523140998e-05, "loss": 1.6856, "step": 36963 }, { "epoch": 0.48, "grad_norm": 4.212491512298584, "learning_rate": 1.9578860350240416e-05, "loss": 2.1879, "step": 36964 }, { "epoch": 0.48, "grad_norm": 4.168934345245361, "learning_rate": 1.9578830176282164e-05, "loss": 1.8691, "step": 36965 }, { "epoch": 0.48, "grad_norm": 3.7389683723449707, "learning_rate": 1.9578800001266246e-05, "loss": 2.1138, "step": 36966 }, { "epoch": 0.48, "grad_norm": 3.446956157684326, "learning_rate": 1.957876982519267e-05, "loss": 1.849, "step": 36967 }, { "epoch": 0.48, "grad_norm": 4.056763172149658, "learning_rate": 1.957873964806143e-05, "loss": 2.1672, "step": 36968 }, { "epoch": 0.48, "grad_norm": 4.276369571685791, "learning_rate": 1.9578709469872538e-05, "loss": 2.2214, "step": 36969 }, { "epoch": 0.48, "grad_norm": 4.3777289390563965, "learning_rate": 1.957867929062599e-05, "loss": 2.4247, "step": 36970 }, { "epoch": 0.48, "grad_norm": 4.066874980926514, "learning_rate": 1.95786491103218e-05, "loss": 2.472, "step": 36971 }, { "epoch": 0.48, "grad_norm": 3.510469675064087, "learning_rate": 1.957861892895996e-05, "loss": 1.685, "step": 36972 }, { "epoch": 0.48, "grad_norm": 3.7546041011810303, "learning_rate": 1.957858874654048e-05, "loss": 2.1623, "step": 36973 }, { "epoch": 0.48, "grad_norm": 4.492493152618408, "learning_rate": 1.957855856306336e-05, "loss": 2.0636, "step": 36974 }, { "epoch": 0.48, "grad_norm": 3.84921932220459, "learning_rate": 1.9578528378528605e-05, "loss": 2.1331, "step": 36975 }, { "epoch": 0.48, "grad_norm": 3.450165033340454, "learning_rate": 1.957849819293622e-05, "loss": 1.7632, "step": 36976 }, { "epoch": 0.48, "grad_norm": 3.6153388023376465, "learning_rate": 1.9578468006286206e-05, "loss": 2.1169, "step": 36977 }, { "epoch": 0.48, "grad_norm": 3.1794116497039795, "learning_rate": 1.9578437818578563e-05, "loss": 1.4048, "step": 36978 }, { "epoch": 0.48, "grad_norm": 4.378085613250732, "learning_rate": 1.9578407629813297e-05, "loss": 2.1303, "step": 36979 }, { "epoch": 0.48, "grad_norm": 3.759673833847046, "learning_rate": 1.9578377439990417e-05, "loss": 2.0382, "step": 36980 }, { "epoch": 0.48, "grad_norm": 4.13883113861084, "learning_rate": 1.957834724910992e-05, "loss": 1.9901, "step": 36981 }, { "epoch": 0.48, "grad_norm": 2.976512908935547, "learning_rate": 1.957831705717181e-05, "loss": 1.5507, "step": 36982 }, { "epoch": 0.48, "grad_norm": 4.831733226776123, "learning_rate": 1.957828686417609e-05, "loss": 2.5713, "step": 36983 }, { "epoch": 0.48, "grad_norm": 3.8692212104797363, "learning_rate": 1.957825667012277e-05, "loss": 2.1637, "step": 36984 }, { "epoch": 0.48, "grad_norm": 3.7435262203216553, "learning_rate": 1.9578226475011845e-05, "loss": 1.5989, "step": 36985 }, { "epoch": 0.48, "grad_norm": 3.7046329975128174, "learning_rate": 1.957819627884332e-05, "loss": 1.9197, "step": 36986 }, { "epoch": 0.48, "grad_norm": 3.674494743347168, "learning_rate": 1.95781660816172e-05, "loss": 1.8724, "step": 36987 }, { "epoch": 0.48, "grad_norm": 3.3871254920959473, "learning_rate": 1.957813588333349e-05, "loss": 1.8963, "step": 36988 }, { "epoch": 0.48, "grad_norm": 3.167532444000244, "learning_rate": 1.957810568399219e-05, "loss": 1.8227, "step": 36989 }, { "epoch": 0.48, "grad_norm": 3.719907522201538, "learning_rate": 1.9578075483593305e-05, "loss": 1.9064, "step": 36990 }, { "epoch": 0.48, "grad_norm": 4.421494483947754, "learning_rate": 1.9578045282136837e-05, "loss": 2.0595, "step": 36991 }, { "epoch": 0.48, "grad_norm": 3.5824029445648193, "learning_rate": 1.957801507962279e-05, "loss": 1.8709, "step": 36992 }, { "epoch": 0.48, "grad_norm": 4.249423980712891, "learning_rate": 1.957798487605117e-05, "loss": 2.0122, "step": 36993 }, { "epoch": 0.48, "grad_norm": 4.554927349090576, "learning_rate": 1.9577954671421976e-05, "loss": 2.1867, "step": 36994 }, { "epoch": 0.48, "grad_norm": 4.509446620941162, "learning_rate": 1.957792446573521e-05, "loss": 2.5436, "step": 36995 }, { "epoch": 0.48, "grad_norm": 4.004105091094971, "learning_rate": 1.9577894258990882e-05, "loss": 2.4296, "step": 36996 }, { "epoch": 0.48, "grad_norm": 3.6941912174224854, "learning_rate": 1.9577864051188995e-05, "loss": 2.0631, "step": 36997 }, { "epoch": 0.48, "grad_norm": 4.156299591064453, "learning_rate": 1.9577833842329547e-05, "loss": 2.1239, "step": 36998 }, { "epoch": 0.48, "grad_norm": 4.509455680847168, "learning_rate": 1.957780363241254e-05, "loss": 1.9961, "step": 36999 }, { "epoch": 0.48, "grad_norm": 3.5731050968170166, "learning_rate": 1.9577773421437987e-05, "loss": 1.7321, "step": 37000 }, { "epoch": 0.48, "grad_norm": 4.189316749572754, "learning_rate": 1.9577743209405882e-05, "loss": 2.2725, "step": 37001 }, { "epoch": 0.48, "grad_norm": 4.325740814208984, "learning_rate": 1.9577712996316233e-05, "loss": 1.822, "step": 37002 }, { "epoch": 0.48, "grad_norm": 3.8921937942504883, "learning_rate": 1.957768278216904e-05, "loss": 2.0314, "step": 37003 }, { "epoch": 0.48, "grad_norm": 4.286832332611084, "learning_rate": 1.957765256696431e-05, "loss": 2.3203, "step": 37004 }, { "epoch": 0.48, "grad_norm": 3.431274890899658, "learning_rate": 1.9577622350702046e-05, "loss": 1.5471, "step": 37005 }, { "epoch": 0.48, "grad_norm": 3.9037363529205322, "learning_rate": 1.957759213338225e-05, "loss": 2.409, "step": 37006 }, { "epoch": 0.48, "grad_norm": 3.3098721504211426, "learning_rate": 1.957756191500492e-05, "loss": 1.4385, "step": 37007 }, { "epoch": 0.48, "grad_norm": 3.92585825920105, "learning_rate": 1.9577531695570068e-05, "loss": 2.3, "step": 37008 }, { "epoch": 0.48, "grad_norm": 3.7460434436798096, "learning_rate": 1.9577501475077696e-05, "loss": 2.1224, "step": 37009 }, { "epoch": 0.48, "grad_norm": 4.348117828369141, "learning_rate": 1.95774712535278e-05, "loss": 2.2999, "step": 37010 }, { "epoch": 0.48, "grad_norm": 3.737276315689087, "learning_rate": 1.9577441030920394e-05, "loss": 2.0456, "step": 37011 }, { "epoch": 0.48, "grad_norm": 3.2820518016815186, "learning_rate": 1.9577410807255478e-05, "loss": 1.5266, "step": 37012 }, { "epoch": 0.48, "grad_norm": 3.734605312347412, "learning_rate": 1.9577380582533047e-05, "loss": 2.1248, "step": 37013 }, { "epoch": 0.48, "grad_norm": 4.27979850769043, "learning_rate": 1.9577350356753113e-05, "loss": 2.0809, "step": 37014 }, { "epoch": 0.48, "grad_norm": 3.982088327407837, "learning_rate": 1.957732012991568e-05, "loss": 1.72, "step": 37015 }, { "epoch": 0.48, "grad_norm": 3.786658525466919, "learning_rate": 1.9577289902020744e-05, "loss": 2.1367, "step": 37016 }, { "epoch": 0.48, "grad_norm": 3.669466733932495, "learning_rate": 1.9577259673068315e-05, "loss": 1.985, "step": 37017 }, { "epoch": 0.48, "grad_norm": 3.7693052291870117, "learning_rate": 1.9577229443058393e-05, "loss": 2.13, "step": 37018 }, { "epoch": 0.48, "grad_norm": 3.644439220428467, "learning_rate": 1.9577199211990985e-05, "loss": 1.7061, "step": 37019 }, { "epoch": 0.48, "grad_norm": 3.8451695442199707, "learning_rate": 1.957716897986609e-05, "loss": 1.9949, "step": 37020 }, { "epoch": 0.48, "grad_norm": 4.057304382324219, "learning_rate": 1.957713874668371e-05, "loss": 2.1911, "step": 37021 }, { "epoch": 0.48, "grad_norm": 4.077762126922607, "learning_rate": 1.9577108512443855e-05, "loss": 1.9932, "step": 37022 }, { "epoch": 0.48, "grad_norm": 3.386718511581421, "learning_rate": 1.9577078277146523e-05, "loss": 1.6238, "step": 37023 }, { "epoch": 0.48, "grad_norm": 4.210178375244141, "learning_rate": 1.9577048040791718e-05, "loss": 2.3224, "step": 37024 }, { "epoch": 0.48, "grad_norm": 3.346656084060669, "learning_rate": 1.9577017803379446e-05, "loss": 1.4998, "step": 37025 }, { "epoch": 0.48, "grad_norm": 3.454338788986206, "learning_rate": 1.9576987564909712e-05, "loss": 2.038, "step": 37026 }, { "epoch": 0.48, "grad_norm": 3.1157684326171875, "learning_rate": 1.9576957325382514e-05, "loss": 1.5122, "step": 37027 }, { "epoch": 0.48, "grad_norm": 3.740769863128662, "learning_rate": 1.9576927084797857e-05, "loss": 1.7241, "step": 37028 }, { "epoch": 0.48, "grad_norm": 3.6701977252960205, "learning_rate": 1.9576896843155743e-05, "loss": 2.0014, "step": 37029 }, { "epoch": 0.48, "grad_norm": 3.418560028076172, "learning_rate": 1.957686660045618e-05, "loss": 1.8159, "step": 37030 }, { "epoch": 0.48, "grad_norm": 3.6102097034454346, "learning_rate": 1.9576836356699168e-05, "loss": 2.1207, "step": 37031 }, { "epoch": 0.48, "grad_norm": 3.4767940044403076, "learning_rate": 1.957680611188471e-05, "loss": 1.6192, "step": 37032 }, { "epoch": 0.48, "grad_norm": 3.6514947414398193, "learning_rate": 1.9576775866012808e-05, "loss": 2.1716, "step": 37033 }, { "epoch": 0.48, "grad_norm": 3.9406800270080566, "learning_rate": 1.957674561908347e-05, "loss": 1.8226, "step": 37034 }, { "epoch": 0.48, "grad_norm": 3.984163284301758, "learning_rate": 1.9576715371096697e-05, "loss": 2.1768, "step": 37035 }, { "epoch": 0.48, "grad_norm": 4.030350685119629, "learning_rate": 1.957668512205249e-05, "loss": 2.012, "step": 37036 }, { "epoch": 0.48, "grad_norm": 3.606391191482544, "learning_rate": 1.9576654871950857e-05, "loss": 2.1884, "step": 37037 }, { "epoch": 0.48, "grad_norm": 3.469543933868408, "learning_rate": 1.9576624620791796e-05, "loss": 2.0638, "step": 37038 }, { "epoch": 0.48, "grad_norm": 3.365619421005249, "learning_rate": 1.9576594368575316e-05, "loss": 1.706, "step": 37039 }, { "epoch": 0.48, "grad_norm": 3.9861316680908203, "learning_rate": 1.9576564115301418e-05, "loss": 2.0297, "step": 37040 }, { "epoch": 0.48, "grad_norm": 3.909417152404785, "learning_rate": 1.95765338609701e-05, "loss": 2.2624, "step": 37041 }, { "epoch": 0.48, "grad_norm": 3.7032971382141113, "learning_rate": 1.9576503605581377e-05, "loss": 2.0386, "step": 37042 }, { "epoch": 0.48, "grad_norm": 3.7764530181884766, "learning_rate": 1.9576473349135242e-05, "loss": 1.9491, "step": 37043 }, { "epoch": 0.48, "grad_norm": 3.7542521953582764, "learning_rate": 1.95764430916317e-05, "loss": 2.1197, "step": 37044 }, { "epoch": 0.48, "grad_norm": 4.577105522155762, "learning_rate": 1.957641283307076e-05, "loss": 2.7187, "step": 37045 }, { "epoch": 0.48, "grad_norm": 3.2729275226593018, "learning_rate": 1.957638257345242e-05, "loss": 1.5894, "step": 37046 }, { "epoch": 0.48, "grad_norm": 3.823206663131714, "learning_rate": 1.9576352312776684e-05, "loss": 1.9505, "step": 37047 }, { "epoch": 0.48, "grad_norm": 4.033883571624756, "learning_rate": 1.957632205104356e-05, "loss": 2.5527, "step": 37048 }, { "epoch": 0.48, "grad_norm": 4.230968475341797, "learning_rate": 1.9576291788253038e-05, "loss": 2.1677, "step": 37049 }, { "epoch": 0.48, "grad_norm": 3.5045459270477295, "learning_rate": 1.957626152440514e-05, "loss": 1.8445, "step": 37050 }, { "epoch": 0.48, "grad_norm": 3.9476051330566406, "learning_rate": 1.9576231259499855e-05, "loss": 2.5249, "step": 37051 }, { "epoch": 0.48, "grad_norm": 3.402102470397949, "learning_rate": 1.9576200993537196e-05, "loss": 1.6063, "step": 37052 }, { "epoch": 0.48, "grad_norm": 3.887132406234741, "learning_rate": 1.957617072651716e-05, "loss": 1.8067, "step": 37053 }, { "epoch": 0.48, "grad_norm": 4.1208415031433105, "learning_rate": 1.9576140458439754e-05, "loss": 2.0551, "step": 37054 }, { "epoch": 0.48, "grad_norm": 3.914125442504883, "learning_rate": 1.9576110189304977e-05, "loss": 2.0193, "step": 37055 }, { "epoch": 0.48, "grad_norm": 4.7311601638793945, "learning_rate": 1.9576079919112835e-05, "loss": 2.4612, "step": 37056 }, { "epoch": 0.48, "grad_norm": 3.446685791015625, "learning_rate": 1.9576049647863332e-05, "loss": 1.6349, "step": 37057 }, { "epoch": 0.48, "grad_norm": 4.16606330871582, "learning_rate": 1.957601937555647e-05, "loss": 1.9643, "step": 37058 }, { "epoch": 0.48, "grad_norm": 3.7519383430480957, "learning_rate": 1.9575989102192255e-05, "loss": 2.0329, "step": 37059 }, { "epoch": 0.48, "grad_norm": 3.935368299484253, "learning_rate": 1.9575958827770688e-05, "loss": 2.3182, "step": 37060 }, { "epoch": 0.48, "grad_norm": 3.422515392303467, "learning_rate": 1.9575928552291773e-05, "loss": 1.5973, "step": 37061 }, { "epoch": 0.48, "grad_norm": 3.9306411743164062, "learning_rate": 1.957589827575551e-05, "loss": 1.8872, "step": 37062 }, { "epoch": 0.48, "grad_norm": 3.9478111267089844, "learning_rate": 1.9575867998161907e-05, "loss": 2.3234, "step": 37063 }, { "epoch": 0.48, "grad_norm": 4.0123443603515625, "learning_rate": 1.957583771951097e-05, "loss": 2.2068, "step": 37064 }, { "epoch": 0.48, "grad_norm": 3.6492295265197754, "learning_rate": 1.9575807439802694e-05, "loss": 1.8432, "step": 37065 }, { "epoch": 0.48, "grad_norm": 3.2543108463287354, "learning_rate": 1.9575777159037087e-05, "loss": 1.5338, "step": 37066 }, { "epoch": 0.48, "grad_norm": 3.6263978481292725, "learning_rate": 1.957574687721415e-05, "loss": 1.7125, "step": 37067 }, { "epoch": 0.48, "grad_norm": 3.8976869583129883, "learning_rate": 1.9575716594333888e-05, "loss": 1.9536, "step": 37068 }, { "epoch": 0.48, "grad_norm": 3.332388162612915, "learning_rate": 1.9575686310396306e-05, "loss": 1.714, "step": 37069 }, { "epoch": 0.48, "grad_norm": 4.041515350341797, "learning_rate": 1.9575656025401406e-05, "loss": 2.2949, "step": 37070 }, { "epoch": 0.48, "grad_norm": 3.3186306953430176, "learning_rate": 1.9575625739349192e-05, "loss": 1.4096, "step": 37071 }, { "epoch": 0.48, "grad_norm": 3.6370646953582764, "learning_rate": 1.9575595452239665e-05, "loss": 1.7679, "step": 37072 }, { "epoch": 0.48, "grad_norm": 2.9694294929504395, "learning_rate": 1.957556516407283e-05, "loss": 1.4358, "step": 37073 }, { "epoch": 0.48, "grad_norm": 3.6923303604125977, "learning_rate": 1.9575534874848688e-05, "loss": 1.9199, "step": 37074 }, { "epoch": 0.48, "grad_norm": 3.7771713733673096, "learning_rate": 1.957550458456725e-05, "loss": 1.4953, "step": 37075 }, { "epoch": 0.48, "grad_norm": 3.9834907054901123, "learning_rate": 1.957547429322851e-05, "loss": 1.9268, "step": 37076 }, { "epoch": 0.48, "grad_norm": 4.516915321350098, "learning_rate": 1.9575444000832477e-05, "loss": 2.7287, "step": 37077 }, { "epoch": 0.48, "grad_norm": 3.7657406330108643, "learning_rate": 1.9575413707379147e-05, "loss": 1.9694, "step": 37078 }, { "epoch": 0.48, "grad_norm": 3.6027185916900635, "learning_rate": 1.9575383412868537e-05, "loss": 2.0423, "step": 37079 }, { "epoch": 0.48, "grad_norm": 3.2673726081848145, "learning_rate": 1.9575353117300636e-05, "loss": 1.678, "step": 37080 }, { "epoch": 0.48, "grad_norm": 3.907163619995117, "learning_rate": 1.9575322820675456e-05, "loss": 1.9766, "step": 37081 }, { "epoch": 0.48, "grad_norm": 3.971343755722046, "learning_rate": 1.9575292522993e-05, "loss": 2.094, "step": 37082 }, { "epoch": 0.48, "grad_norm": 4.349240779876709, "learning_rate": 1.9575262224253265e-05, "loss": 2.3938, "step": 37083 }, { "epoch": 0.48, "grad_norm": 3.437993288040161, "learning_rate": 1.9575231924456258e-05, "loss": 1.6139, "step": 37084 }, { "epoch": 0.48, "grad_norm": 3.4909324645996094, "learning_rate": 1.9575201623601988e-05, "loss": 1.7066, "step": 37085 }, { "epoch": 0.48, "grad_norm": 3.6641275882720947, "learning_rate": 1.957517132169045e-05, "loss": 1.9956, "step": 37086 }, { "epoch": 0.48, "grad_norm": 3.5849452018737793, "learning_rate": 1.957514101872165e-05, "loss": 1.7264, "step": 37087 }, { "epoch": 0.48, "grad_norm": 3.6900413036346436, "learning_rate": 1.9575110714695595e-05, "loss": 2.0771, "step": 37088 }, { "epoch": 0.48, "grad_norm": 4.227612495422363, "learning_rate": 1.9575080409612286e-05, "loss": 2.199, "step": 37089 }, { "epoch": 0.48, "grad_norm": 3.83870267868042, "learning_rate": 1.9575050103471724e-05, "loss": 2.0194, "step": 37090 }, { "epoch": 0.48, "grad_norm": 3.605945587158203, "learning_rate": 1.9575019796273912e-05, "loss": 1.8215, "step": 37091 }, { "epoch": 0.48, "grad_norm": 4.089491844177246, "learning_rate": 1.9574989488018857e-05, "loss": 2.2173, "step": 37092 }, { "epoch": 0.48, "grad_norm": 3.4699456691741943, "learning_rate": 1.957495917870656e-05, "loss": 1.8828, "step": 37093 }, { "epoch": 0.48, "grad_norm": 3.623304843902588, "learning_rate": 1.957492886833703e-05, "loss": 1.9987, "step": 37094 }, { "epoch": 0.48, "grad_norm": 3.608445167541504, "learning_rate": 1.9574898556910257e-05, "loss": 1.786, "step": 37095 }, { "epoch": 0.48, "grad_norm": 3.759230852127075, "learning_rate": 1.9574868244426258e-05, "loss": 2.0838, "step": 37096 }, { "epoch": 0.48, "grad_norm": 3.6836745738983154, "learning_rate": 1.957483793088503e-05, "loss": 1.9827, "step": 37097 }, { "epoch": 0.48, "grad_norm": 4.150524616241455, "learning_rate": 1.9574807616286577e-05, "loss": 2.2564, "step": 37098 }, { "epoch": 0.48, "grad_norm": 3.8171305656433105, "learning_rate": 1.9574777300630904e-05, "loss": 2.2082, "step": 37099 }, { "epoch": 0.48, "grad_norm": 3.318957805633545, "learning_rate": 1.9574746983918015e-05, "loss": 1.5349, "step": 37100 }, { "epoch": 0.48, "grad_norm": 3.4271795749664307, "learning_rate": 1.9574716666147907e-05, "loss": 1.9249, "step": 37101 }, { "epoch": 0.48, "grad_norm": 3.2565605640411377, "learning_rate": 1.957468634732059e-05, "loss": 1.7467, "step": 37102 }, { "epoch": 0.48, "grad_norm": 4.00339937210083, "learning_rate": 1.957465602743606e-05, "loss": 2.367, "step": 37103 }, { "epoch": 0.48, "grad_norm": 4.325777053833008, "learning_rate": 1.9574625706494334e-05, "loss": 2.1519, "step": 37104 }, { "epoch": 0.48, "grad_norm": 4.023020267486572, "learning_rate": 1.9574595384495404e-05, "loss": 2.238, "step": 37105 }, { "epoch": 0.48, "grad_norm": 4.0477423667907715, "learning_rate": 1.9574565061439275e-05, "loss": 2.1863, "step": 37106 }, { "epoch": 0.48, "grad_norm": 4.123265266418457, "learning_rate": 1.957453473732595e-05, "loss": 1.8624, "step": 37107 }, { "epoch": 0.48, "grad_norm": 4.1296515464782715, "learning_rate": 1.957450441215544e-05, "loss": 2.176, "step": 37108 }, { "epoch": 0.48, "grad_norm": 3.444775342941284, "learning_rate": 1.9574474085927738e-05, "loss": 1.9413, "step": 37109 }, { "epoch": 0.48, "grad_norm": 3.497119665145874, "learning_rate": 1.957444375864285e-05, "loss": 1.6424, "step": 37110 }, { "epoch": 0.48, "grad_norm": 3.5057435035705566, "learning_rate": 1.9574413430300786e-05, "loss": 1.8038, "step": 37111 }, { "epoch": 0.48, "grad_norm": 3.414396286010742, "learning_rate": 1.957438310090154e-05, "loss": 1.9007, "step": 37112 }, { "epoch": 0.48, "grad_norm": 3.793139934539795, "learning_rate": 1.957435277044512e-05, "loss": 1.6407, "step": 37113 }, { "epoch": 0.48, "grad_norm": 3.7864160537719727, "learning_rate": 1.957432243893153e-05, "loss": 2.0862, "step": 37114 }, { "epoch": 0.48, "grad_norm": 3.5899367332458496, "learning_rate": 1.9574292106360773e-05, "loss": 1.7985, "step": 37115 }, { "epoch": 0.48, "grad_norm": 3.9616355895996094, "learning_rate": 1.957426177273285e-05, "loss": 1.9305, "step": 37116 }, { "epoch": 0.48, "grad_norm": 3.334462881088257, "learning_rate": 1.957423143804777e-05, "loss": 1.8215, "step": 37117 }, { "epoch": 0.48, "grad_norm": 3.3223776817321777, "learning_rate": 1.9574201102305526e-05, "loss": 1.6959, "step": 37118 }, { "epoch": 0.48, "grad_norm": 3.780203104019165, "learning_rate": 1.9574170765506132e-05, "loss": 2.0768, "step": 37119 }, { "epoch": 0.48, "grad_norm": 3.450462818145752, "learning_rate": 1.9574140427649587e-05, "loss": 1.8269, "step": 37120 }, { "epoch": 0.48, "grad_norm": 3.4746789932250977, "learning_rate": 1.9574110088735897e-05, "loss": 1.4859, "step": 37121 }, { "epoch": 0.48, "grad_norm": 3.470684051513672, "learning_rate": 1.957407974876506e-05, "loss": 1.6156, "step": 37122 }, { "epoch": 0.48, "grad_norm": 3.2478349208831787, "learning_rate": 1.957404940773708e-05, "loss": 1.3614, "step": 37123 }, { "epoch": 0.48, "grad_norm": 4.0542802810668945, "learning_rate": 1.9574019065651963e-05, "loss": 1.9624, "step": 37124 }, { "epoch": 0.48, "grad_norm": 3.714256525039673, "learning_rate": 1.9573988722509715e-05, "loss": 1.8034, "step": 37125 }, { "epoch": 0.48, "grad_norm": 3.328433036804199, "learning_rate": 1.9573958378310333e-05, "loss": 1.6527, "step": 37126 }, { "epoch": 0.48, "grad_norm": 3.938377857208252, "learning_rate": 1.9573928033053827e-05, "loss": 1.6951, "step": 37127 }, { "epoch": 0.48, "grad_norm": 3.7939233779907227, "learning_rate": 1.9573897686740193e-05, "loss": 1.8602, "step": 37128 }, { "epoch": 0.48, "grad_norm": 3.3879878520965576, "learning_rate": 1.957386733936944e-05, "loss": 1.5954, "step": 37129 }, { "epoch": 0.48, "grad_norm": 3.9167022705078125, "learning_rate": 1.957383699094157e-05, "loss": 2.0234, "step": 37130 }, { "epoch": 0.48, "grad_norm": 3.6568474769592285, "learning_rate": 1.9573806641456586e-05, "loss": 1.952, "step": 37131 }, { "epoch": 0.48, "grad_norm": 3.777986764907837, "learning_rate": 1.9573776290914492e-05, "loss": 1.955, "step": 37132 }, { "epoch": 0.48, "grad_norm": 4.206037521362305, "learning_rate": 1.957374593931529e-05, "loss": 1.9281, "step": 37133 }, { "epoch": 0.48, "grad_norm": 3.386809825897217, "learning_rate": 1.9573715586658983e-05, "loss": 1.6026, "step": 37134 }, { "epoch": 0.48, "grad_norm": 3.428227663040161, "learning_rate": 1.9573685232945578e-05, "loss": 1.5088, "step": 37135 }, { "epoch": 0.48, "grad_norm": 3.4449803829193115, "learning_rate": 1.9573654878175075e-05, "loss": 1.7915, "step": 37136 }, { "epoch": 0.48, "grad_norm": 3.9820337295532227, "learning_rate": 1.9573624522347476e-05, "loss": 2.1254, "step": 37137 }, { "epoch": 0.48, "grad_norm": 3.700763702392578, "learning_rate": 1.957359416546279e-05, "loss": 1.6153, "step": 37138 }, { "epoch": 0.48, "grad_norm": 3.795435905456543, "learning_rate": 1.9573563807521012e-05, "loss": 1.6805, "step": 37139 }, { "epoch": 0.48, "grad_norm": 3.7557594776153564, "learning_rate": 1.9573533448522152e-05, "loss": 1.9511, "step": 37140 }, { "epoch": 0.48, "grad_norm": 4.069263935089111, "learning_rate": 1.957350308846621e-05, "loss": 2.0704, "step": 37141 }, { "epoch": 0.48, "grad_norm": 4.39381742477417, "learning_rate": 1.9573472727353194e-05, "loss": 2.1998, "step": 37142 }, { "epoch": 0.48, "grad_norm": 4.3736724853515625, "learning_rate": 1.9573442365183103e-05, "loss": 2.253, "step": 37143 }, { "epoch": 0.48, "grad_norm": 3.674358367919922, "learning_rate": 1.9573412001955946e-05, "loss": 2.0596, "step": 37144 }, { "epoch": 0.48, "grad_norm": 3.6565773487091064, "learning_rate": 1.9573381637671715e-05, "loss": 1.7341, "step": 37145 }, { "epoch": 0.48, "grad_norm": 4.504712104797363, "learning_rate": 1.957335127233042e-05, "loss": 1.9435, "step": 37146 }, { "epoch": 0.48, "grad_norm": 3.5460565090179443, "learning_rate": 1.957332090593207e-05, "loss": 1.7602, "step": 37147 }, { "epoch": 0.48, "grad_norm": 3.725491523742676, "learning_rate": 1.957329053847666e-05, "loss": 1.9078, "step": 37148 }, { "epoch": 0.48, "grad_norm": 3.5205814838409424, "learning_rate": 1.9573260169964196e-05, "loss": 1.7571, "step": 37149 }, { "epoch": 0.48, "grad_norm": 3.635713577270508, "learning_rate": 1.9573229800394683e-05, "loss": 2.4691, "step": 37150 }, { "epoch": 0.48, "grad_norm": 4.234517574310303, "learning_rate": 1.9573199429768124e-05, "loss": 2.388, "step": 37151 }, { "epoch": 0.48, "grad_norm": 3.8365049362182617, "learning_rate": 1.9573169058084518e-05, "loss": 1.7818, "step": 37152 }, { "epoch": 0.48, "grad_norm": 3.5562641620635986, "learning_rate": 1.9573138685343873e-05, "loss": 1.9441, "step": 37153 }, { "epoch": 0.48, "grad_norm": 3.4000442028045654, "learning_rate": 1.957310831154619e-05, "loss": 1.8767, "step": 37154 }, { "epoch": 0.48, "grad_norm": 3.71818208694458, "learning_rate": 1.9573077936691477e-05, "loss": 1.8847, "step": 37155 }, { "epoch": 0.48, "grad_norm": 3.6773521900177, "learning_rate": 1.957304756077973e-05, "loss": 2.0141, "step": 37156 }, { "epoch": 0.48, "grad_norm": 3.4927072525024414, "learning_rate": 1.9573017183810958e-05, "loss": 1.7046, "step": 37157 }, { "epoch": 0.48, "grad_norm": 3.8737082481384277, "learning_rate": 1.9572986805785162e-05, "loss": 1.7117, "step": 37158 }, { "epoch": 0.48, "grad_norm": 3.597315549850464, "learning_rate": 1.9572956426702348e-05, "loss": 1.8561, "step": 37159 }, { "epoch": 0.48, "grad_norm": 3.994636297225952, "learning_rate": 1.9572926046562514e-05, "loss": 2.2002, "step": 37160 }, { "epoch": 0.48, "grad_norm": 4.0658278465271, "learning_rate": 1.957289566536567e-05, "loss": 2.0932, "step": 37161 }, { "epoch": 0.48, "grad_norm": 4.129459857940674, "learning_rate": 1.9572865283111812e-05, "loss": 2.1962, "step": 37162 }, { "epoch": 0.48, "grad_norm": 4.11703634262085, "learning_rate": 1.957283489980095e-05, "loss": 2.0893, "step": 37163 }, { "epoch": 0.48, "grad_norm": 4.549912452697754, "learning_rate": 1.9572804515433085e-05, "loss": 1.8679, "step": 37164 }, { "epoch": 0.48, "grad_norm": 3.555542230606079, "learning_rate": 1.9572774130008216e-05, "loss": 1.81, "step": 37165 }, { "epoch": 0.48, "grad_norm": 3.4387497901916504, "learning_rate": 1.9572743743526355e-05, "loss": 1.839, "step": 37166 }, { "epoch": 0.48, "grad_norm": 3.628739833831787, "learning_rate": 1.95727133559875e-05, "loss": 2.1945, "step": 37167 }, { "epoch": 0.48, "grad_norm": 4.222816467285156, "learning_rate": 1.957268296739165e-05, "loss": 2.1218, "step": 37168 }, { "epoch": 0.48, "grad_norm": 3.1993701457977295, "learning_rate": 1.957265257773882e-05, "loss": 1.7339, "step": 37169 }, { "epoch": 0.48, "grad_norm": 3.8311734199523926, "learning_rate": 1.9572622187029e-05, "loss": 2.015, "step": 37170 }, { "epoch": 0.48, "grad_norm": 3.910926580429077, "learning_rate": 1.9572591795262207e-05, "loss": 2.0862, "step": 37171 }, { "epoch": 0.48, "grad_norm": 3.649141788482666, "learning_rate": 1.9572561402438434e-05, "loss": 2.0231, "step": 37172 }, { "epoch": 0.48, "grad_norm": 3.68973970413208, "learning_rate": 1.957253100855769e-05, "loss": 1.9986, "step": 37173 }, { "epoch": 0.48, "grad_norm": 3.585451602935791, "learning_rate": 1.957250061361997e-05, "loss": 1.881, "step": 37174 }, { "epoch": 0.48, "grad_norm": 3.6218159198760986, "learning_rate": 1.957247021762529e-05, "loss": 1.9706, "step": 37175 }, { "epoch": 0.48, "grad_norm": 3.807844877243042, "learning_rate": 1.9572439820573646e-05, "loss": 2.3019, "step": 37176 }, { "epoch": 0.48, "grad_norm": 3.9671196937561035, "learning_rate": 1.9572409422465042e-05, "loss": 1.9027, "step": 37177 }, { "epoch": 0.48, "grad_norm": 3.647150754928589, "learning_rate": 1.957237902329948e-05, "loss": 2.3961, "step": 37178 }, { "epoch": 0.48, "grad_norm": 3.5139620304107666, "learning_rate": 1.9572348623076965e-05, "loss": 1.6761, "step": 37179 }, { "epoch": 0.48, "grad_norm": 3.399885892868042, "learning_rate": 1.9572318221797503e-05, "loss": 1.6659, "step": 37180 }, { "epoch": 0.48, "grad_norm": 3.552767276763916, "learning_rate": 1.957228781946109e-05, "loss": 1.793, "step": 37181 }, { "epoch": 0.48, "grad_norm": 3.823962926864624, "learning_rate": 1.957225741606774e-05, "loss": 1.7631, "step": 37182 }, { "epoch": 0.48, "grad_norm": 3.5060956478118896, "learning_rate": 1.9572227011617446e-05, "loss": 1.6961, "step": 37183 }, { "epoch": 0.48, "grad_norm": 4.217141151428223, "learning_rate": 1.9572196606110215e-05, "loss": 2.3343, "step": 37184 }, { "epoch": 0.48, "grad_norm": 4.371210098266602, "learning_rate": 1.9572166199546054e-05, "loss": 2.2987, "step": 37185 }, { "epoch": 0.48, "grad_norm": 4.135379314422607, "learning_rate": 1.957213579192496e-05, "loss": 2.3099, "step": 37186 }, { "epoch": 0.48, "grad_norm": 4.265889644622803, "learning_rate": 1.9572105383246944e-05, "loss": 2.0195, "step": 37187 }, { "epoch": 0.48, "grad_norm": 4.248585224151611, "learning_rate": 1.9572074973512003e-05, "loss": 2.3303, "step": 37188 }, { "epoch": 0.48, "grad_norm": 3.669764280319214, "learning_rate": 1.9572044562720144e-05, "loss": 1.591, "step": 37189 }, { "epoch": 0.48, "grad_norm": 4.184454917907715, "learning_rate": 1.9572014150871367e-05, "loss": 1.8905, "step": 37190 }, { "epoch": 0.48, "grad_norm": 3.789565324783325, "learning_rate": 1.957198373796568e-05, "loss": 1.9281, "step": 37191 }, { "epoch": 0.48, "grad_norm": 4.650448799133301, "learning_rate": 1.957195332400308e-05, "loss": 2.5759, "step": 37192 }, { "epoch": 0.48, "grad_norm": 4.160421371459961, "learning_rate": 1.9571922908983577e-05, "loss": 1.8643, "step": 37193 }, { "epoch": 0.48, "grad_norm": 4.537552356719971, "learning_rate": 1.9571892492907167e-05, "loss": 2.981, "step": 37194 }, { "epoch": 0.48, "grad_norm": 3.7672083377838135, "learning_rate": 1.957186207577386e-05, "loss": 2.1269, "step": 37195 }, { "epoch": 0.48, "grad_norm": 3.9058868885040283, "learning_rate": 1.957183165758366e-05, "loss": 1.9647, "step": 37196 }, { "epoch": 0.48, "grad_norm": 3.311776876449585, "learning_rate": 1.9571801238336566e-05, "loss": 2.0473, "step": 37197 }, { "epoch": 0.48, "grad_norm": 3.5350534915924072, "learning_rate": 1.957177081803258e-05, "loss": 1.6471, "step": 37198 }, { "epoch": 0.48, "grad_norm": 3.812901735305786, "learning_rate": 1.9571740396671713e-05, "loss": 1.8967, "step": 37199 }, { "epoch": 0.48, "grad_norm": 4.1666460037231445, "learning_rate": 1.9571709974253958e-05, "loss": 2.1526, "step": 37200 }, { "epoch": 0.48, "grad_norm": 3.9402413368225098, "learning_rate": 1.9571679550779325e-05, "loss": 2.0238, "step": 37201 }, { "epoch": 0.48, "grad_norm": 3.328406572341919, "learning_rate": 1.957164912624782e-05, "loss": 1.4664, "step": 37202 }, { "epoch": 0.48, "grad_norm": 3.6767590045928955, "learning_rate": 1.9571618700659437e-05, "loss": 1.6855, "step": 37203 }, { "epoch": 0.48, "grad_norm": 3.7756547927856445, "learning_rate": 1.9571588274014187e-05, "loss": 2.1683, "step": 37204 }, { "epoch": 0.48, "grad_norm": 3.2031662464141846, "learning_rate": 1.9571557846312075e-05, "loss": 1.6441, "step": 37205 }, { "epoch": 0.48, "grad_norm": 3.40724515914917, "learning_rate": 1.95715274175531e-05, "loss": 1.6274, "step": 37206 }, { "epoch": 0.48, "grad_norm": 3.755040168762207, "learning_rate": 1.957149698773726e-05, "loss": 1.8938, "step": 37207 }, { "epoch": 0.48, "grad_norm": 4.081896781921387, "learning_rate": 1.957146655686457e-05, "loss": 1.9281, "step": 37208 }, { "epoch": 0.48, "grad_norm": 3.845996856689453, "learning_rate": 1.9571436124935027e-05, "loss": 1.7693, "step": 37209 }, { "epoch": 0.48, "grad_norm": 4.0934600830078125, "learning_rate": 1.9571405691948632e-05, "loss": 2.1374, "step": 37210 }, { "epoch": 0.48, "grad_norm": 3.636281967163086, "learning_rate": 1.9571375257905393e-05, "loss": 1.7078, "step": 37211 }, { "epoch": 0.48, "grad_norm": 4.034278392791748, "learning_rate": 1.9571344822805312e-05, "loss": 2.1759, "step": 37212 }, { "epoch": 0.48, "grad_norm": 3.650331735610962, "learning_rate": 1.9571314386648394e-05, "loss": 1.9812, "step": 37213 }, { "epoch": 0.48, "grad_norm": 3.854433059692383, "learning_rate": 1.957128394943464e-05, "loss": 1.798, "step": 37214 }, { "epoch": 0.48, "grad_norm": 4.028162479400635, "learning_rate": 1.957125351116405e-05, "loss": 2.0753, "step": 37215 }, { "epoch": 0.48, "grad_norm": 3.1743884086608887, "learning_rate": 1.9571223071836633e-05, "loss": 1.6234, "step": 37216 }, { "epoch": 0.48, "grad_norm": 3.941863775253296, "learning_rate": 1.957119263145239e-05, "loss": 2.1329, "step": 37217 }, { "epoch": 0.48, "grad_norm": 3.9686756134033203, "learning_rate": 1.957116219001133e-05, "loss": 1.8565, "step": 37218 }, { "epoch": 0.48, "grad_norm": 3.9244494438171387, "learning_rate": 1.9571131747513448e-05, "loss": 1.9998, "step": 37219 }, { "epoch": 0.48, "grad_norm": 3.2168304920196533, "learning_rate": 1.9571101303958748e-05, "loss": 1.6169, "step": 37220 }, { "epoch": 0.48, "grad_norm": 4.185761451721191, "learning_rate": 1.9571070859347242e-05, "loss": 2.2427, "step": 37221 }, { "epoch": 0.48, "grad_norm": 3.228492498397827, "learning_rate": 1.957104041367892e-05, "loss": 1.5182, "step": 37222 }, { "epoch": 0.48, "grad_norm": 3.816314458847046, "learning_rate": 1.95710099669538e-05, "loss": 1.8783, "step": 37223 }, { "epoch": 0.48, "grad_norm": 4.133774280548096, "learning_rate": 1.9570979519171873e-05, "loss": 2.1042, "step": 37224 }, { "epoch": 0.48, "grad_norm": 3.5861291885375977, "learning_rate": 1.957094907033315e-05, "loss": 2.0286, "step": 37225 }, { "epoch": 0.48, "grad_norm": 3.5142018795013428, "learning_rate": 1.957091862043763e-05, "loss": 2.0461, "step": 37226 }, { "epoch": 0.48, "grad_norm": 4.449660778045654, "learning_rate": 1.957088816948532e-05, "loss": 2.1891, "step": 37227 }, { "epoch": 0.48, "grad_norm": 3.6983327865600586, "learning_rate": 1.957085771747622e-05, "loss": 1.77, "step": 37228 }, { "epoch": 0.48, "grad_norm": 3.715087890625, "learning_rate": 1.9570827264410336e-05, "loss": 1.9935, "step": 37229 }, { "epoch": 0.48, "grad_norm": 3.7330117225646973, "learning_rate": 1.957079681028767e-05, "loss": 1.9024, "step": 37230 }, { "epoch": 0.48, "grad_norm": 4.195061683654785, "learning_rate": 1.9570766355108225e-05, "loss": 1.8521, "step": 37231 }, { "epoch": 0.48, "grad_norm": 4.17610502243042, "learning_rate": 1.9570735898872005e-05, "loss": 2.4453, "step": 37232 }, { "epoch": 0.48, "grad_norm": 4.269676685333252, "learning_rate": 1.9570705441579013e-05, "loss": 2.0647, "step": 37233 }, { "epoch": 0.48, "grad_norm": 3.8486270904541016, "learning_rate": 1.9570674983229256e-05, "loss": 1.9701, "step": 37234 }, { "epoch": 0.48, "grad_norm": 3.8378853797912598, "learning_rate": 1.9570644523822734e-05, "loss": 1.8099, "step": 37235 }, { "epoch": 0.48, "grad_norm": 3.3448355197906494, "learning_rate": 1.9570614063359447e-05, "loss": 1.8972, "step": 37236 }, { "epoch": 0.48, "grad_norm": 3.809147357940674, "learning_rate": 1.9570583601839405e-05, "loss": 2.1145, "step": 37237 }, { "epoch": 0.48, "grad_norm": 3.559641122817993, "learning_rate": 1.9570553139262608e-05, "loss": 2.0079, "step": 37238 }, { "epoch": 0.48, "grad_norm": 3.4552040100097656, "learning_rate": 1.9570522675629056e-05, "loss": 1.7718, "step": 37239 }, { "epoch": 0.48, "grad_norm": 3.766860246658325, "learning_rate": 1.957049221093876e-05, "loss": 1.9386, "step": 37240 }, { "epoch": 0.48, "grad_norm": 3.544916868209839, "learning_rate": 1.9570461745191715e-05, "loss": 2.2176, "step": 37241 }, { "epoch": 0.48, "grad_norm": 3.9200527667999268, "learning_rate": 1.9570431278387932e-05, "loss": 1.8, "step": 37242 }, { "epoch": 0.48, "grad_norm": 3.458865165710449, "learning_rate": 1.9570400810527408e-05, "loss": 1.6441, "step": 37243 }, { "epoch": 0.48, "grad_norm": 3.5262138843536377, "learning_rate": 1.9570370341610152e-05, "loss": 1.8352, "step": 37244 }, { "epoch": 0.48, "grad_norm": 3.7467944622039795, "learning_rate": 1.9570339871636162e-05, "loss": 2.1973, "step": 37245 }, { "epoch": 0.48, "grad_norm": 3.2437632083892822, "learning_rate": 1.9570309400605448e-05, "loss": 1.4401, "step": 37246 }, { "epoch": 0.48, "grad_norm": 3.59590220451355, "learning_rate": 1.9570278928518006e-05, "loss": 1.7092, "step": 37247 }, { "epoch": 0.48, "grad_norm": 3.5612170696258545, "learning_rate": 1.957024845537385e-05, "loss": 1.9423, "step": 37248 }, { "epoch": 0.48, "grad_norm": 3.7571024894714355, "learning_rate": 1.957021798117297e-05, "loss": 2.0674, "step": 37249 }, { "epoch": 0.48, "grad_norm": 3.765882730484009, "learning_rate": 1.9570187505915373e-05, "loss": 1.6737, "step": 37250 }, { "epoch": 0.48, "grad_norm": 4.125181198120117, "learning_rate": 1.9570157029601067e-05, "loss": 2.2696, "step": 37251 }, { "epoch": 0.48, "grad_norm": 3.9775218963623047, "learning_rate": 1.9570126552230057e-05, "loss": 1.7219, "step": 37252 }, { "epoch": 0.48, "grad_norm": 3.9147238731384277, "learning_rate": 1.957009607380234e-05, "loss": 2.5861, "step": 37253 }, { "epoch": 0.48, "grad_norm": 4.589350700378418, "learning_rate": 1.9570065594317925e-05, "loss": 2.4755, "step": 37254 }, { "epoch": 0.48, "grad_norm": 3.556849479675293, "learning_rate": 1.9570035113776806e-05, "loss": 1.6481, "step": 37255 }, { "epoch": 0.48, "grad_norm": 3.3300929069519043, "learning_rate": 1.9570004632179e-05, "loss": 1.7358, "step": 37256 }, { "epoch": 0.48, "grad_norm": 3.5797135829925537, "learning_rate": 1.9569974149524496e-05, "loss": 1.8009, "step": 37257 }, { "epoch": 0.48, "grad_norm": 4.17135763168335, "learning_rate": 1.9569943665813312e-05, "loss": 2.3813, "step": 37258 }, { "epoch": 0.48, "grad_norm": 3.655827522277832, "learning_rate": 1.9569913181045438e-05, "loss": 1.8853, "step": 37259 }, { "epoch": 0.48, "grad_norm": 3.706876277923584, "learning_rate": 1.9569882695220887e-05, "loss": 1.7835, "step": 37260 }, { "epoch": 0.48, "grad_norm": 4.031637668609619, "learning_rate": 1.9569852208339655e-05, "loss": 2.0973, "step": 37261 }, { "epoch": 0.48, "grad_norm": 3.675945520401001, "learning_rate": 1.9569821720401753e-05, "loss": 2.1588, "step": 37262 }, { "epoch": 0.48, "grad_norm": 3.6211631298065186, "learning_rate": 1.9569791231407178e-05, "loss": 1.9186, "step": 37263 }, { "epoch": 0.48, "grad_norm": 3.5873067378997803, "learning_rate": 1.9569760741355935e-05, "loss": 2.0358, "step": 37264 }, { "epoch": 0.48, "grad_norm": 3.595020055770874, "learning_rate": 1.9569730250248026e-05, "loss": 1.7942, "step": 37265 }, { "epoch": 0.48, "grad_norm": 3.5411489009857178, "learning_rate": 1.956969975808346e-05, "loss": 1.78, "step": 37266 }, { "epoch": 0.48, "grad_norm": 3.853369951248169, "learning_rate": 1.9569669264862238e-05, "loss": 1.8912, "step": 37267 }, { "epoch": 0.48, "grad_norm": 4.223182201385498, "learning_rate": 1.956963877058436e-05, "loss": 1.7554, "step": 37268 }, { "epoch": 0.48, "grad_norm": 3.8795790672302246, "learning_rate": 1.956960827524983e-05, "loss": 2.1476, "step": 37269 }, { "epoch": 0.48, "grad_norm": 4.7841572761535645, "learning_rate": 1.9569577778858654e-05, "loss": 1.9095, "step": 37270 }, { "epoch": 0.48, "grad_norm": 4.011634349822998, "learning_rate": 1.956954728141084e-05, "loss": 2.1494, "step": 37271 }, { "epoch": 0.48, "grad_norm": 3.542696952819824, "learning_rate": 1.9569516782906377e-05, "loss": 1.7421, "step": 37272 }, { "epoch": 0.48, "grad_norm": 3.5239312648773193, "learning_rate": 1.9569486283345282e-05, "loss": 1.6241, "step": 37273 }, { "epoch": 0.48, "grad_norm": 4.555701732635498, "learning_rate": 1.9569455782727547e-05, "loss": 2.3652, "step": 37274 }, { "epoch": 0.48, "grad_norm": 3.691645622253418, "learning_rate": 1.956942528105319e-05, "loss": 1.943, "step": 37275 }, { "epoch": 0.48, "grad_norm": 3.2413060665130615, "learning_rate": 1.9569394778322203e-05, "loss": 1.9075, "step": 37276 }, { "epoch": 0.48, "grad_norm": 3.5257716178894043, "learning_rate": 1.9569364274534594e-05, "loss": 1.782, "step": 37277 }, { "epoch": 0.48, "grad_norm": 3.6572327613830566, "learning_rate": 1.9569333769690358e-05, "loss": 1.7697, "step": 37278 }, { "epoch": 0.48, "grad_norm": 3.9562032222747803, "learning_rate": 1.9569303263789513e-05, "loss": 2.0033, "step": 37279 }, { "epoch": 0.48, "grad_norm": 3.985590696334839, "learning_rate": 1.9569272756832052e-05, "loss": 1.9335, "step": 37280 }, { "epoch": 0.48, "grad_norm": 4.300066947937012, "learning_rate": 1.9569242248817982e-05, "loss": 2.3433, "step": 37281 }, { "epoch": 0.48, "grad_norm": 4.45475959777832, "learning_rate": 1.9569211739747303e-05, "loss": 2.089, "step": 37282 }, { "epoch": 0.48, "grad_norm": 4.348762512207031, "learning_rate": 1.956918122962002e-05, "loss": 2.1024, "step": 37283 }, { "epoch": 0.48, "grad_norm": 4.060887813568115, "learning_rate": 1.956915071843614e-05, "loss": 1.932, "step": 37284 }, { "epoch": 0.48, "grad_norm": 4.144294261932373, "learning_rate": 1.9569120206195663e-05, "loss": 2.0405, "step": 37285 }, { "epoch": 0.48, "grad_norm": 3.737358331680298, "learning_rate": 1.956908969289859e-05, "loss": 1.984, "step": 37286 }, { "epoch": 0.48, "grad_norm": 4.193709373474121, "learning_rate": 1.956905917854493e-05, "loss": 2.1164, "step": 37287 }, { "epoch": 0.48, "grad_norm": 3.1851823329925537, "learning_rate": 1.9569028663134683e-05, "loss": 1.5596, "step": 37288 }, { "epoch": 0.48, "grad_norm": 3.825659990310669, "learning_rate": 1.9568998146667854e-05, "loss": 1.7154, "step": 37289 }, { "epoch": 0.48, "grad_norm": 3.082292079925537, "learning_rate": 1.9568967629144443e-05, "loss": 1.3498, "step": 37290 }, { "epoch": 0.48, "grad_norm": 3.4907031059265137, "learning_rate": 1.9568937110564457e-05, "loss": 1.7737, "step": 37291 }, { "epoch": 0.48, "grad_norm": 4.2845377922058105, "learning_rate": 1.9568906590927895e-05, "loss": 2.443, "step": 37292 }, { "epoch": 0.48, "grad_norm": 3.738795280456543, "learning_rate": 1.956887607023477e-05, "loss": 2.1149, "step": 37293 }, { "epoch": 0.48, "grad_norm": 4.556464195251465, "learning_rate": 1.9568845548485073e-05, "loss": 2.0864, "step": 37294 }, { "epoch": 0.48, "grad_norm": 3.4559829235076904, "learning_rate": 1.9568815025678813e-05, "loss": 1.6726, "step": 37295 }, { "epoch": 0.48, "grad_norm": 4.864611625671387, "learning_rate": 1.9568784501815997e-05, "loss": 2.0929, "step": 37296 }, { "epoch": 0.48, "grad_norm": 3.6626973152160645, "learning_rate": 1.9568753976896623e-05, "loss": 2.0242, "step": 37297 }, { "epoch": 0.48, "grad_norm": 3.905303955078125, "learning_rate": 1.9568723450920694e-05, "loss": 1.8672, "step": 37298 }, { "epoch": 0.48, "grad_norm": 3.246840715408325, "learning_rate": 1.956869292388822e-05, "loss": 1.682, "step": 37299 }, { "epoch": 0.48, "grad_norm": 4.210121154785156, "learning_rate": 1.9568662395799198e-05, "loss": 2.0905, "step": 37300 }, { "epoch": 0.48, "grad_norm": 3.210832357406616, "learning_rate": 1.956863186665363e-05, "loss": 1.6721, "step": 37301 }, { "epoch": 0.48, "grad_norm": 3.8643910884857178, "learning_rate": 1.9568601336451526e-05, "loss": 1.9011, "step": 37302 }, { "epoch": 0.48, "grad_norm": 3.676180362701416, "learning_rate": 1.9568570805192886e-05, "loss": 2.1327, "step": 37303 }, { "epoch": 0.48, "grad_norm": 4.0978102684021, "learning_rate": 1.9568540272877715e-05, "loss": 1.7471, "step": 37304 }, { "epoch": 0.48, "grad_norm": 3.6125059127807617, "learning_rate": 1.9568509739506013e-05, "loss": 1.8424, "step": 37305 }, { "epoch": 0.48, "grad_norm": 3.5868802070617676, "learning_rate": 1.9568479205077786e-05, "loss": 1.8068, "step": 37306 }, { "epoch": 0.48, "grad_norm": 3.5603384971618652, "learning_rate": 1.9568448669593035e-05, "loss": 1.7819, "step": 37307 }, { "epoch": 0.48, "grad_norm": 3.9326021671295166, "learning_rate": 1.956841813305177e-05, "loss": 1.775, "step": 37308 }, { "epoch": 0.48, "grad_norm": 4.14602518081665, "learning_rate": 1.9568387595453982e-05, "loss": 2.5434, "step": 37309 }, { "epoch": 0.48, "grad_norm": 3.557807445526123, "learning_rate": 1.9568357056799684e-05, "loss": 1.6735, "step": 37310 }, { "epoch": 0.48, "grad_norm": 4.542590618133545, "learning_rate": 1.956832651708888e-05, "loss": 2.2976, "step": 37311 }, { "epoch": 0.48, "grad_norm": 3.7311792373657227, "learning_rate": 1.9568295976321566e-05, "loss": 1.9517, "step": 37312 }, { "epoch": 0.48, "grad_norm": 3.981382369995117, "learning_rate": 1.9568265434497756e-05, "loss": 2.0923, "step": 37313 }, { "epoch": 0.48, "grad_norm": 3.6604971885681152, "learning_rate": 1.956823489161744e-05, "loss": 1.7732, "step": 37314 }, { "epoch": 0.48, "grad_norm": 4.4130730628967285, "learning_rate": 1.9568204347680636e-05, "loss": 2.3084, "step": 37315 }, { "epoch": 0.48, "grad_norm": 3.895453929901123, "learning_rate": 1.9568173802687333e-05, "loss": 2.237, "step": 37316 }, { "epoch": 0.48, "grad_norm": 4.404361724853516, "learning_rate": 1.9568143256637543e-05, "loss": 2.0798, "step": 37317 }, { "epoch": 0.48, "grad_norm": 3.5685653686523438, "learning_rate": 1.956811270953127e-05, "loss": 2.0663, "step": 37318 }, { "epoch": 0.48, "grad_norm": 3.9410674571990967, "learning_rate": 1.9568082161368515e-05, "loss": 2.0036, "step": 37319 }, { "epoch": 0.48, "grad_norm": 3.808791399002075, "learning_rate": 1.956805161214928e-05, "loss": 2.2721, "step": 37320 }, { "epoch": 0.48, "grad_norm": 3.5096545219421387, "learning_rate": 1.956802106187357e-05, "loss": 2.0671, "step": 37321 }, { "epoch": 0.48, "grad_norm": 3.7744054794311523, "learning_rate": 1.9567990510541386e-05, "loss": 1.9324, "step": 37322 }, { "epoch": 0.48, "grad_norm": 3.8252265453338623, "learning_rate": 1.9567959958152737e-05, "loss": 1.8252, "step": 37323 }, { "epoch": 0.48, "grad_norm": 4.285264492034912, "learning_rate": 1.9567929404707618e-05, "loss": 2.3277, "step": 37324 }, { "epoch": 0.48, "grad_norm": 4.392050743103027, "learning_rate": 1.9567898850206045e-05, "loss": 2.466, "step": 37325 }, { "epoch": 0.48, "grad_norm": 3.801828384399414, "learning_rate": 1.9567868294648005e-05, "loss": 2.1059, "step": 37326 }, { "epoch": 0.48, "grad_norm": 3.7152626514434814, "learning_rate": 1.956783773803352e-05, "loss": 2.0875, "step": 37327 }, { "epoch": 0.48, "grad_norm": 3.66283917427063, "learning_rate": 1.9567807180362576e-05, "loss": 2.1035, "step": 37328 }, { "epoch": 0.48, "grad_norm": 4.205926418304443, "learning_rate": 1.9567776621635186e-05, "loss": 2.5708, "step": 37329 }, { "epoch": 0.48, "grad_norm": 3.831782341003418, "learning_rate": 1.956774606185135e-05, "loss": 1.8299, "step": 37330 }, { "epoch": 0.48, "grad_norm": 4.436915874481201, "learning_rate": 1.9567715501011074e-05, "loss": 2.3183, "step": 37331 }, { "epoch": 0.48, "grad_norm": 3.956068754196167, "learning_rate": 1.956768493911436e-05, "loss": 2.4884, "step": 37332 }, { "epoch": 0.48, "grad_norm": 4.037280559539795, "learning_rate": 1.956765437616121e-05, "loss": 1.8238, "step": 37333 }, { "epoch": 0.48, "grad_norm": 3.465391159057617, "learning_rate": 1.9567623812151627e-05, "loss": 1.4912, "step": 37334 }, { "epoch": 0.48, "grad_norm": 3.905014991760254, "learning_rate": 1.956759324708562e-05, "loss": 1.9437, "step": 37335 }, { "epoch": 0.48, "grad_norm": 3.6707189083099365, "learning_rate": 1.9567562680963185e-05, "loss": 1.9743, "step": 37336 }, { "epoch": 0.48, "grad_norm": 3.1995625495910645, "learning_rate": 1.956753211378433e-05, "loss": 1.6742, "step": 37337 }, { "epoch": 0.48, "grad_norm": 3.4531445503234863, "learning_rate": 1.956750154554906e-05, "loss": 1.9281, "step": 37338 }, { "epoch": 0.48, "grad_norm": 3.8270862102508545, "learning_rate": 1.956747097625737e-05, "loss": 1.9036, "step": 37339 }, { "epoch": 0.48, "grad_norm": 4.227065563201904, "learning_rate": 1.9567440405909272e-05, "loss": 2.128, "step": 37340 }, { "epoch": 0.48, "grad_norm": 3.9920220375061035, "learning_rate": 1.9567409834504767e-05, "loss": 1.9702, "step": 37341 }, { "epoch": 0.48, "grad_norm": 3.2909348011016846, "learning_rate": 1.9567379262043856e-05, "loss": 1.7071, "step": 37342 }, { "epoch": 0.48, "grad_norm": 3.6059560775756836, "learning_rate": 1.9567348688526543e-05, "loss": 1.7213, "step": 37343 }, { "epoch": 0.48, "grad_norm": 3.7283871173858643, "learning_rate": 1.9567318113952837e-05, "loss": 2.0509, "step": 37344 }, { "epoch": 0.48, "grad_norm": 3.6410064697265625, "learning_rate": 1.9567287538322732e-05, "loss": 2.1152, "step": 37345 }, { "epoch": 0.48, "grad_norm": 3.6566872596740723, "learning_rate": 1.956725696163624e-05, "loss": 1.902, "step": 37346 }, { "epoch": 0.48, "grad_norm": 3.8953659534454346, "learning_rate": 1.956722638389336e-05, "loss": 2.0258, "step": 37347 }, { "epoch": 0.48, "grad_norm": 3.2003583908081055, "learning_rate": 1.9567195805094094e-05, "loss": 1.6254, "step": 37348 }, { "epoch": 0.48, "grad_norm": 3.0417163372039795, "learning_rate": 1.9567165225238443e-05, "loss": 1.2101, "step": 37349 }, { "epoch": 0.48, "grad_norm": 3.6790220737457275, "learning_rate": 1.9567134644326424e-05, "loss": 1.5023, "step": 37350 }, { "epoch": 0.48, "grad_norm": 3.439544439315796, "learning_rate": 1.9567104062358026e-05, "loss": 1.6308, "step": 37351 }, { "epoch": 0.48, "grad_norm": 3.20400071144104, "learning_rate": 1.9567073479333255e-05, "loss": 1.5965, "step": 37352 }, { "epoch": 0.48, "grad_norm": 3.425004243850708, "learning_rate": 1.9567042895252123e-05, "loss": 1.8905, "step": 37353 }, { "epoch": 0.48, "grad_norm": 3.9899425506591797, "learning_rate": 1.9567012310114622e-05, "loss": 2.3226, "step": 37354 }, { "epoch": 0.48, "grad_norm": 4.539915561676025, "learning_rate": 1.9566981723920763e-05, "loss": 1.8621, "step": 37355 }, { "epoch": 0.48, "grad_norm": 4.261650085449219, "learning_rate": 1.9566951136670545e-05, "loss": 1.9303, "step": 37356 }, { "epoch": 0.48, "grad_norm": 4.202428817749023, "learning_rate": 1.9566920548363975e-05, "loss": 1.9936, "step": 37357 }, { "epoch": 0.48, "grad_norm": 3.8125433921813965, "learning_rate": 1.9566889959001054e-05, "loss": 1.9384, "step": 37358 }, { "epoch": 0.48, "grad_norm": 4.293458461761475, "learning_rate": 1.9566859368581784e-05, "loss": 2.4595, "step": 37359 }, { "epoch": 0.48, "grad_norm": 4.024687767028809, "learning_rate": 1.9566828777106177e-05, "loss": 2.0199, "step": 37360 }, { "epoch": 0.48, "grad_norm": 4.0681681632995605, "learning_rate": 1.9566798184574224e-05, "loss": 2.0428, "step": 37361 }, { "epoch": 0.48, "grad_norm": 3.517972230911255, "learning_rate": 1.9566767590985933e-05, "loss": 1.6247, "step": 37362 }, { "epoch": 0.48, "grad_norm": 4.22420072555542, "learning_rate": 1.956673699634131e-05, "loss": 2.1652, "step": 37363 }, { "epoch": 0.48, "grad_norm": 3.6312508583068848, "learning_rate": 1.956670640064036e-05, "loss": 1.8085, "step": 37364 }, { "epoch": 0.48, "grad_norm": 4.205616474151611, "learning_rate": 1.956667580388308e-05, "loss": 2.4967, "step": 37365 }, { "epoch": 0.48, "grad_norm": 3.89546537399292, "learning_rate": 1.956664520606948e-05, "loss": 1.9904, "step": 37366 }, { "epoch": 0.48, "grad_norm": 3.727076768875122, "learning_rate": 1.9566614607199558e-05, "loss": 2.3178, "step": 37367 }, { "epoch": 0.48, "grad_norm": 3.2582743167877197, "learning_rate": 1.956658400727332e-05, "loss": 2.025, "step": 37368 }, { "epoch": 0.48, "grad_norm": 4.330254077911377, "learning_rate": 1.9566553406290766e-05, "loss": 2.2014, "step": 37369 }, { "epoch": 0.48, "grad_norm": 4.011606216430664, "learning_rate": 1.9566522804251905e-05, "loss": 2.0841, "step": 37370 }, { "epoch": 0.48, "grad_norm": 4.133498191833496, "learning_rate": 1.956649220115674e-05, "loss": 2.0466, "step": 37371 }, { "epoch": 0.49, "grad_norm": 3.697474241256714, "learning_rate": 1.9566461597005267e-05, "loss": 1.9806, "step": 37372 }, { "epoch": 0.49, "grad_norm": 4.73380184173584, "learning_rate": 1.9566430991797497e-05, "loss": 2.3012, "step": 37373 }, { "epoch": 0.49, "grad_norm": 3.8794236183166504, "learning_rate": 1.9566400385533432e-05, "loss": 2.2052, "step": 37374 }, { "epoch": 0.49, "grad_norm": 3.4241271018981934, "learning_rate": 1.9566369778213074e-05, "loss": 1.6942, "step": 37375 }, { "epoch": 0.49, "grad_norm": 3.2305479049682617, "learning_rate": 1.956633916983642e-05, "loss": 1.579, "step": 37376 }, { "epoch": 0.49, "grad_norm": 3.1819427013397217, "learning_rate": 1.9566308560403487e-05, "loss": 1.5956, "step": 37377 }, { "epoch": 0.49, "grad_norm": 4.1677141189575195, "learning_rate": 1.9566277949914267e-05, "loss": 2.2645, "step": 37378 }, { "epoch": 0.49, "grad_norm": 3.4937679767608643, "learning_rate": 1.9566247338368772e-05, "loss": 1.661, "step": 37379 }, { "epoch": 0.49, "grad_norm": 4.232682228088379, "learning_rate": 1.9566216725766997e-05, "loss": 2.0064, "step": 37380 }, { "epoch": 0.49, "grad_norm": 4.2359724044799805, "learning_rate": 1.9566186112108952e-05, "loss": 2.044, "step": 37381 }, { "epoch": 0.49, "grad_norm": 3.9673144817352295, "learning_rate": 1.9566155497394636e-05, "loss": 1.9669, "step": 37382 }, { "epoch": 0.49, "grad_norm": 3.197883129119873, "learning_rate": 1.9566124881624056e-05, "loss": 1.5586, "step": 37383 }, { "epoch": 0.49, "grad_norm": 3.444758176803589, "learning_rate": 1.956609426479721e-05, "loss": 1.9249, "step": 37384 }, { "epoch": 0.49, "grad_norm": 4.072040557861328, "learning_rate": 1.956606364691411e-05, "loss": 2.0585, "step": 37385 }, { "epoch": 0.49, "grad_norm": 3.533567428588867, "learning_rate": 1.956603302797475e-05, "loss": 1.7135, "step": 37386 }, { "epoch": 0.49, "grad_norm": 3.851454496383667, "learning_rate": 1.956600240797914e-05, "loss": 1.8828, "step": 37387 }, { "epoch": 0.49, "grad_norm": 3.327548027038574, "learning_rate": 1.956597178692728e-05, "loss": 1.9668, "step": 37388 }, { "epoch": 0.49, "grad_norm": 3.7919797897338867, "learning_rate": 1.9565941164819175e-05, "loss": 1.717, "step": 37389 }, { "epoch": 0.49, "grad_norm": 3.8228213787078857, "learning_rate": 1.9565910541654824e-05, "loss": 2.0227, "step": 37390 }, { "epoch": 0.49, "grad_norm": 3.877628803253174, "learning_rate": 1.9565879917434237e-05, "loss": 2.0177, "step": 37391 }, { "epoch": 0.49, "grad_norm": 3.7099320888519287, "learning_rate": 1.9565849292157417e-05, "loss": 1.767, "step": 37392 }, { "epoch": 0.49, "grad_norm": 3.6097733974456787, "learning_rate": 1.9565818665824364e-05, "loss": 1.9993, "step": 37393 }, { "epoch": 0.49, "grad_norm": 4.0565690994262695, "learning_rate": 1.9565788038435084e-05, "loss": 2.1016, "step": 37394 }, { "epoch": 0.49, "grad_norm": 3.8199093341827393, "learning_rate": 1.9565757409989575e-05, "loss": 2.09, "step": 37395 }, { "epoch": 0.49, "grad_norm": 4.056438446044922, "learning_rate": 1.9565726780487843e-05, "loss": 2.4088, "step": 37396 }, { "epoch": 0.49, "grad_norm": 3.8914618492126465, "learning_rate": 1.9565696149929897e-05, "loss": 1.8659, "step": 37397 }, { "epoch": 0.49, "grad_norm": 3.805262565612793, "learning_rate": 1.956566551831573e-05, "loss": 1.8327, "step": 37398 }, { "epoch": 0.49, "grad_norm": 4.2139763832092285, "learning_rate": 1.956563488564536e-05, "loss": 2.4792, "step": 37399 }, { "epoch": 0.49, "grad_norm": 3.6135857105255127, "learning_rate": 1.9565604251918775e-05, "loss": 1.7862, "step": 37400 }, { "epoch": 0.49, "grad_norm": 3.7663912773132324, "learning_rate": 1.9565573617135985e-05, "loss": 2.4647, "step": 37401 }, { "epoch": 0.49, "grad_norm": 3.8089797496795654, "learning_rate": 1.9565542981296993e-05, "loss": 2.2562, "step": 37402 }, { "epoch": 0.49, "grad_norm": 4.126835346221924, "learning_rate": 1.9565512344401808e-05, "loss": 2.3876, "step": 37403 }, { "epoch": 0.49, "grad_norm": 3.6976091861724854, "learning_rate": 1.9565481706450424e-05, "loss": 1.9213, "step": 37404 }, { "epoch": 0.49, "grad_norm": 4.029536724090576, "learning_rate": 1.9565451067442847e-05, "loss": 2.3627, "step": 37405 }, { "epoch": 0.49, "grad_norm": 3.7933170795440674, "learning_rate": 1.956542042737909e-05, "loss": 2.1924, "step": 37406 }, { "epoch": 0.49, "grad_norm": 4.048337459564209, "learning_rate": 1.956538978625914e-05, "loss": 2.0024, "step": 37407 }, { "epoch": 0.49, "grad_norm": 3.9216468334198, "learning_rate": 1.956535914408301e-05, "loss": 1.7681, "step": 37408 }, { "epoch": 0.49, "grad_norm": 3.4889044761657715, "learning_rate": 1.95653285008507e-05, "loss": 1.9075, "step": 37409 }, { "epoch": 0.49, "grad_norm": 3.7310433387756348, "learning_rate": 1.9565297856562223e-05, "loss": 2.2201, "step": 37410 }, { "epoch": 0.49, "grad_norm": 3.675294876098633, "learning_rate": 1.956526721121757e-05, "loss": 2.1466, "step": 37411 }, { "epoch": 0.49, "grad_norm": 3.5696911811828613, "learning_rate": 1.956523656481675e-05, "loss": 1.8884, "step": 37412 }, { "epoch": 0.49, "grad_norm": 3.3140242099761963, "learning_rate": 1.9565205917359766e-05, "loss": 1.7383, "step": 37413 }, { "epoch": 0.49, "grad_norm": 3.5586652755737305, "learning_rate": 1.956517526884662e-05, "loss": 1.7324, "step": 37414 }, { "epoch": 0.49, "grad_norm": 3.506782293319702, "learning_rate": 1.9565144619277318e-05, "loss": 1.7134, "step": 37415 }, { "epoch": 0.49, "grad_norm": 4.543097496032715, "learning_rate": 1.9565113968651858e-05, "loss": 2.0279, "step": 37416 }, { "epoch": 0.49, "grad_norm": 3.627516031265259, "learning_rate": 1.956508331697025e-05, "loss": 1.8316, "step": 37417 }, { "epoch": 0.49, "grad_norm": 3.9507720470428467, "learning_rate": 1.9565052664232492e-05, "loss": 1.8631, "step": 37418 }, { "epoch": 0.49, "grad_norm": 4.3988847732543945, "learning_rate": 1.9565022010438594e-05, "loss": 1.9418, "step": 37419 }, { "epoch": 0.49, "grad_norm": 3.8557770252227783, "learning_rate": 1.9564991355588553e-05, "loss": 2.0467, "step": 37420 }, { "epoch": 0.49, "grad_norm": 4.256984233856201, "learning_rate": 1.9564960699682375e-05, "loss": 2.4488, "step": 37421 }, { "epoch": 0.49, "grad_norm": 3.7224621772766113, "learning_rate": 1.9564930042720064e-05, "loss": 1.7926, "step": 37422 }, { "epoch": 0.49, "grad_norm": 2.923011541366577, "learning_rate": 1.9564899384701623e-05, "loss": 1.4248, "step": 37423 }, { "epoch": 0.49, "grad_norm": 3.4224419593811035, "learning_rate": 1.9564868725627053e-05, "loss": 1.8572, "step": 37424 }, { "epoch": 0.49, "grad_norm": 4.29957914352417, "learning_rate": 1.9564838065496362e-05, "loss": 2.1531, "step": 37425 }, { "epoch": 0.49, "grad_norm": 4.085874557495117, "learning_rate": 1.9564807404309546e-05, "loss": 2.0935, "step": 37426 }, { "epoch": 0.49, "grad_norm": 3.481684446334839, "learning_rate": 1.9564776742066616e-05, "loss": 1.7963, "step": 37427 }, { "epoch": 0.49, "grad_norm": 3.5672059059143066, "learning_rate": 1.956474607876757e-05, "loss": 1.6558, "step": 37428 }, { "epoch": 0.49, "grad_norm": 3.7678189277648926, "learning_rate": 1.9564715414412415e-05, "loss": 2.0534, "step": 37429 }, { "epoch": 0.49, "grad_norm": 3.7055747509002686, "learning_rate": 1.9564684749001154e-05, "loss": 2.2377, "step": 37430 }, { "epoch": 0.49, "grad_norm": 3.6560447216033936, "learning_rate": 1.956465408253379e-05, "loss": 1.7758, "step": 37431 }, { "epoch": 0.49, "grad_norm": 4.286385536193848, "learning_rate": 1.9564623415010327e-05, "loss": 1.8704, "step": 37432 }, { "epoch": 0.49, "grad_norm": 3.677790880203247, "learning_rate": 1.9564592746430763e-05, "loss": 2.0469, "step": 37433 }, { "epoch": 0.49, "grad_norm": 3.7187204360961914, "learning_rate": 1.956456207679511e-05, "loss": 2.0538, "step": 37434 }, { "epoch": 0.49, "grad_norm": 3.8175361156463623, "learning_rate": 1.9564531406103364e-05, "loss": 2.1817, "step": 37435 }, { "epoch": 0.49, "grad_norm": 3.477189779281616, "learning_rate": 1.956450073435553e-05, "loss": 1.7817, "step": 37436 }, { "epoch": 0.49, "grad_norm": 3.837637424468994, "learning_rate": 1.9564470061551618e-05, "loss": 2.2804, "step": 37437 }, { "epoch": 0.49, "grad_norm": 3.7656028270721436, "learning_rate": 1.9564439387691624e-05, "loss": 1.9797, "step": 37438 }, { "epoch": 0.49, "grad_norm": 3.723068952560425, "learning_rate": 1.9564408712775552e-05, "loss": 2.0389, "step": 37439 }, { "epoch": 0.49, "grad_norm": 3.365652322769165, "learning_rate": 1.9564378036803407e-05, "loss": 1.5726, "step": 37440 }, { "epoch": 0.49, "grad_norm": 3.9446544647216797, "learning_rate": 1.9564347359775198e-05, "loss": 2.1963, "step": 37441 }, { "epoch": 0.49, "grad_norm": 3.7019169330596924, "learning_rate": 1.9564316681690915e-05, "loss": 1.8481, "step": 37442 }, { "epoch": 0.49, "grad_norm": 3.7379424571990967, "learning_rate": 1.9564286002550576e-05, "loss": 2.1101, "step": 37443 }, { "epoch": 0.49, "grad_norm": 3.8361384868621826, "learning_rate": 1.9564255322354172e-05, "loss": 2.034, "step": 37444 }, { "epoch": 0.49, "grad_norm": 3.680279493331909, "learning_rate": 1.9564224641101715e-05, "loss": 1.6766, "step": 37445 }, { "epoch": 0.49, "grad_norm": 4.148568630218506, "learning_rate": 1.9564193958793204e-05, "loss": 2.0642, "step": 37446 }, { "epoch": 0.49, "grad_norm": 3.728025436401367, "learning_rate": 1.9564163275428643e-05, "loss": 1.9348, "step": 37447 }, { "epoch": 0.49, "grad_norm": 3.9342713356018066, "learning_rate": 1.956413259100804e-05, "loss": 1.9091, "step": 37448 }, { "epoch": 0.49, "grad_norm": 4.300806522369385, "learning_rate": 1.956410190553139e-05, "loss": 2.3291, "step": 37449 }, { "epoch": 0.49, "grad_norm": 3.6741719245910645, "learning_rate": 1.9564071218998704e-05, "loss": 1.8469, "step": 37450 }, { "epoch": 0.49, "grad_norm": 3.7085015773773193, "learning_rate": 1.956404053140998e-05, "loss": 1.8078, "step": 37451 }, { "epoch": 0.49, "grad_norm": 4.235742092132568, "learning_rate": 1.9564009842765225e-05, "loss": 2.1434, "step": 37452 }, { "epoch": 0.49, "grad_norm": 3.4408912658691406, "learning_rate": 1.956397915306444e-05, "loss": 1.5864, "step": 37453 }, { "epoch": 0.49, "grad_norm": 3.514055013656616, "learning_rate": 1.956394846230763e-05, "loss": 1.7673, "step": 37454 }, { "epoch": 0.49, "grad_norm": 4.064133167266846, "learning_rate": 1.95639177704948e-05, "loss": 2.1006, "step": 37455 }, { "epoch": 0.49, "grad_norm": 3.554422616958618, "learning_rate": 1.956388707762595e-05, "loss": 2.0125, "step": 37456 }, { "epoch": 0.49, "grad_norm": 3.849151849746704, "learning_rate": 1.9563856383701084e-05, "loss": 1.6847, "step": 37457 }, { "epoch": 0.49, "grad_norm": 3.3687126636505127, "learning_rate": 1.9563825688720205e-05, "loss": 1.8299, "step": 37458 }, { "epoch": 0.49, "grad_norm": 3.792551040649414, "learning_rate": 1.956379499268332e-05, "loss": 1.9341, "step": 37459 }, { "epoch": 0.49, "grad_norm": 3.6862871646881104, "learning_rate": 1.9563764295590428e-05, "loss": 1.5989, "step": 37460 }, { "epoch": 0.49, "grad_norm": 3.71695613861084, "learning_rate": 1.9563733597441534e-05, "loss": 2.1548, "step": 37461 }, { "epoch": 0.49, "grad_norm": 4.596142292022705, "learning_rate": 1.956370289823664e-05, "loss": 2.2778, "step": 37462 }, { "epoch": 0.49, "grad_norm": 3.5069496631622314, "learning_rate": 1.9563672197975754e-05, "loss": 1.9599, "step": 37463 }, { "epoch": 0.49, "grad_norm": 3.2817375659942627, "learning_rate": 1.9563641496658876e-05, "loss": 1.5423, "step": 37464 }, { "epoch": 0.49, "grad_norm": 4.205321788787842, "learning_rate": 1.9563610794286008e-05, "loss": 1.8822, "step": 37465 }, { "epoch": 0.49, "grad_norm": 3.7374706268310547, "learning_rate": 1.9563580090857157e-05, "loss": 2.0738, "step": 37466 }, { "epoch": 0.49, "grad_norm": 3.2556142807006836, "learning_rate": 1.956354938637232e-05, "loss": 1.5886, "step": 37467 }, { "epoch": 0.49, "grad_norm": 4.031911849975586, "learning_rate": 1.956351868083151e-05, "loss": 1.6793, "step": 37468 }, { "epoch": 0.49, "grad_norm": 3.672041177749634, "learning_rate": 1.9563487974234725e-05, "loss": 1.6302, "step": 37469 }, { "epoch": 0.49, "grad_norm": 3.5109076499938965, "learning_rate": 1.9563457266581966e-05, "loss": 1.3757, "step": 37470 }, { "epoch": 0.49, "grad_norm": 3.7490954399108887, "learning_rate": 1.956342655787324e-05, "loss": 2.2017, "step": 37471 }, { "epoch": 0.49, "grad_norm": 3.3516924381256104, "learning_rate": 1.956339584810855e-05, "loss": 1.546, "step": 37472 }, { "epoch": 0.49, "grad_norm": 4.221719264984131, "learning_rate": 1.95633651372879e-05, "loss": 2.1259, "step": 37473 }, { "epoch": 0.49, "grad_norm": 3.9651284217834473, "learning_rate": 1.9563334425411292e-05, "loss": 1.8522, "step": 37474 }, { "epoch": 0.49, "grad_norm": 4.490350723266602, "learning_rate": 1.956330371247873e-05, "loss": 2.6871, "step": 37475 }, { "epoch": 0.49, "grad_norm": 3.4703269004821777, "learning_rate": 1.9563272998490214e-05, "loss": 1.9106, "step": 37476 }, { "epoch": 0.49, "grad_norm": 4.411373615264893, "learning_rate": 1.956324228344575e-05, "loss": 2.1671, "step": 37477 }, { "epoch": 0.49, "grad_norm": 3.374872922897339, "learning_rate": 1.9563211567345344e-05, "loss": 1.7333, "step": 37478 }, { "epoch": 0.49, "grad_norm": 4.25029993057251, "learning_rate": 1.9563180850189e-05, "loss": 2.4703, "step": 37479 }, { "epoch": 0.49, "grad_norm": 3.862734317779541, "learning_rate": 1.9563150131976714e-05, "loss": 2.1109, "step": 37480 }, { "epoch": 0.49, "grad_norm": 3.854959726333618, "learning_rate": 1.9563119412708497e-05, "loss": 1.7671, "step": 37481 }, { "epoch": 0.49, "grad_norm": 4.30239725112915, "learning_rate": 1.9563088692384348e-05, "loss": 2.7749, "step": 37482 }, { "epoch": 0.49, "grad_norm": 3.5806217193603516, "learning_rate": 1.956305797100427e-05, "loss": 1.687, "step": 37483 }, { "epoch": 0.49, "grad_norm": 4.7044477462768555, "learning_rate": 1.9563027248568272e-05, "loss": 1.971, "step": 37484 }, { "epoch": 0.49, "grad_norm": 3.7966372966766357, "learning_rate": 1.956299652507635e-05, "loss": 1.8707, "step": 37485 }, { "epoch": 0.49, "grad_norm": 3.6627397537231445, "learning_rate": 1.9562965800528514e-05, "loss": 1.7015, "step": 37486 }, { "epoch": 0.49, "grad_norm": 3.9749999046325684, "learning_rate": 1.956293507492476e-05, "loss": 1.9128, "step": 37487 }, { "epoch": 0.49, "grad_norm": 3.651134967803955, "learning_rate": 1.95629043482651e-05, "loss": 1.6545, "step": 37488 }, { "epoch": 0.49, "grad_norm": 3.5770907402038574, "learning_rate": 1.9562873620549532e-05, "loss": 2.0104, "step": 37489 }, { "epoch": 0.49, "grad_norm": 3.4331727027893066, "learning_rate": 1.956284289177806e-05, "loss": 1.6472, "step": 37490 }, { "epoch": 0.49, "grad_norm": 3.9981935024261475, "learning_rate": 1.9562812161950688e-05, "loss": 2.328, "step": 37491 }, { "epoch": 0.49, "grad_norm": 3.2089028358459473, "learning_rate": 1.956278143106742e-05, "loss": 1.5125, "step": 37492 }, { "epoch": 0.49, "grad_norm": 3.6815531253814697, "learning_rate": 1.9562750699128257e-05, "loss": 2.0771, "step": 37493 }, { "epoch": 0.49, "grad_norm": 3.793696880340576, "learning_rate": 1.9562719966133207e-05, "loss": 1.891, "step": 37494 }, { "epoch": 0.49, "grad_norm": 3.570063352584839, "learning_rate": 1.9562689232082265e-05, "loss": 1.7053, "step": 37495 }, { "epoch": 0.49, "grad_norm": 3.222505807876587, "learning_rate": 1.9562658496975447e-05, "loss": 1.4739, "step": 37496 }, { "epoch": 0.49, "grad_norm": 3.59012508392334, "learning_rate": 1.9562627760812744e-05, "loss": 1.923, "step": 37497 }, { "epoch": 0.49, "grad_norm": 3.8812222480773926, "learning_rate": 1.9562597023594167e-05, "loss": 2.2335, "step": 37498 }, { "epoch": 0.49, "grad_norm": 3.5893874168395996, "learning_rate": 1.9562566285319716e-05, "loss": 1.8406, "step": 37499 }, { "epoch": 0.49, "grad_norm": 3.667849063873291, "learning_rate": 1.9562535545989395e-05, "loss": 1.7894, "step": 37500 }, { "epoch": 0.49, "grad_norm": 3.4134137630462646, "learning_rate": 1.956250480560321e-05, "loss": 1.9697, "step": 37501 }, { "epoch": 0.49, "grad_norm": 3.761796474456787, "learning_rate": 1.956247406416116e-05, "loss": 1.8907, "step": 37502 }, { "epoch": 0.49, "grad_norm": 3.3689634799957275, "learning_rate": 1.956244332166325e-05, "loss": 1.5612, "step": 37503 }, { "epoch": 0.49, "grad_norm": 3.5445473194122314, "learning_rate": 1.9562412578109487e-05, "loss": 1.8097, "step": 37504 }, { "epoch": 0.49, "grad_norm": 3.54343843460083, "learning_rate": 1.9562381833499874e-05, "loss": 2.0478, "step": 37505 }, { "epoch": 0.49, "grad_norm": 4.207189559936523, "learning_rate": 1.9562351087834406e-05, "loss": 2.4881, "step": 37506 }, { "epoch": 0.49, "grad_norm": 3.543583631515503, "learning_rate": 1.9562320341113096e-05, "loss": 1.8974, "step": 37507 }, { "epoch": 0.49, "grad_norm": 3.2165451049804688, "learning_rate": 1.9562289593335938e-05, "loss": 1.7415, "step": 37508 }, { "epoch": 0.49, "grad_norm": 3.8384792804718018, "learning_rate": 1.9562258844502947e-05, "loss": 2.2595, "step": 37509 }, { "epoch": 0.49, "grad_norm": 3.9552018642425537, "learning_rate": 1.9562228094614116e-05, "loss": 2.7838, "step": 37510 }, { "epoch": 0.49, "grad_norm": 3.6417152881622314, "learning_rate": 1.9562197343669455e-05, "loss": 1.7727, "step": 37511 }, { "epoch": 0.49, "grad_norm": 3.5973498821258545, "learning_rate": 1.9562166591668967e-05, "loss": 1.9908, "step": 37512 }, { "epoch": 0.49, "grad_norm": 3.9998645782470703, "learning_rate": 1.9562135838612653e-05, "loss": 2.6535, "step": 37513 }, { "epoch": 0.49, "grad_norm": 3.7815988063812256, "learning_rate": 1.9562105084500512e-05, "loss": 2.0915, "step": 37514 }, { "epoch": 0.49, "grad_norm": 4.414064407348633, "learning_rate": 1.9562074329332555e-05, "loss": 2.242, "step": 37515 }, { "epoch": 0.49, "grad_norm": 3.2146403789520264, "learning_rate": 1.9562043573108785e-05, "loss": 1.4947, "step": 37516 }, { "epoch": 0.49, "grad_norm": 3.7693092823028564, "learning_rate": 1.9562012815829202e-05, "loss": 1.834, "step": 37517 }, { "epoch": 0.49, "grad_norm": 3.338440179824829, "learning_rate": 1.9561982057493812e-05, "loss": 1.6002, "step": 37518 }, { "epoch": 0.49, "grad_norm": 3.987386465072632, "learning_rate": 1.9561951298102617e-05, "loss": 1.8627, "step": 37519 }, { "epoch": 0.49, "grad_norm": 3.789721965789795, "learning_rate": 1.9561920537655618e-05, "loss": 2.0197, "step": 37520 }, { "epoch": 0.49, "grad_norm": 3.962815284729004, "learning_rate": 1.9561889776152823e-05, "loss": 2.2537, "step": 37521 }, { "epoch": 0.49, "grad_norm": 3.803964853286743, "learning_rate": 1.956185901359423e-05, "loss": 2.2662, "step": 37522 }, { "epoch": 0.49, "grad_norm": 3.5139410495758057, "learning_rate": 1.956182824997985e-05, "loss": 2.0904, "step": 37523 }, { "epoch": 0.49, "grad_norm": 3.861654758453369, "learning_rate": 1.9561797485309677e-05, "loss": 2.1573, "step": 37524 }, { "epoch": 0.49, "grad_norm": 3.3774681091308594, "learning_rate": 1.956176671958372e-05, "loss": 1.7626, "step": 37525 }, { "epoch": 0.49, "grad_norm": 3.2363617420196533, "learning_rate": 1.9561735952801984e-05, "loss": 1.6879, "step": 37526 }, { "epoch": 0.49, "grad_norm": 3.3667356967926025, "learning_rate": 1.956170518496447e-05, "loss": 1.5729, "step": 37527 }, { "epoch": 0.49, "grad_norm": 3.4475791454315186, "learning_rate": 1.956167441607118e-05, "loss": 1.6039, "step": 37528 }, { "epoch": 0.49, "grad_norm": 3.6622464656829834, "learning_rate": 1.9561643646122123e-05, "loss": 2.051, "step": 37529 }, { "epoch": 0.49, "grad_norm": 4.055080413818359, "learning_rate": 1.9561612875117292e-05, "loss": 1.8803, "step": 37530 }, { "epoch": 0.49, "grad_norm": 3.7988622188568115, "learning_rate": 1.9561582103056703e-05, "loss": 2.3834, "step": 37531 }, { "epoch": 0.49, "grad_norm": 4.178398609161377, "learning_rate": 1.9561551329940348e-05, "loss": 1.7837, "step": 37532 }, { "epoch": 0.49, "grad_norm": 3.9498302936553955, "learning_rate": 1.9561520555768238e-05, "loss": 2.0837, "step": 37533 }, { "epoch": 0.49, "grad_norm": 3.527219533920288, "learning_rate": 1.9561489780540372e-05, "loss": 1.8338, "step": 37534 }, { "epoch": 0.49, "grad_norm": 3.70947527885437, "learning_rate": 1.9561459004256757e-05, "loss": 2.414, "step": 37535 }, { "epoch": 0.49, "grad_norm": 3.701131582260132, "learning_rate": 1.9561428226917394e-05, "loss": 1.9772, "step": 37536 }, { "epoch": 0.49, "grad_norm": 3.222956418991089, "learning_rate": 1.956139744852229e-05, "loss": 1.9933, "step": 37537 }, { "epoch": 0.49, "grad_norm": 4.78408670425415, "learning_rate": 1.9561366669071443e-05, "loss": 2.5702, "step": 37538 }, { "epoch": 0.49, "grad_norm": 3.900759696960449, "learning_rate": 1.9561335888564857e-05, "loss": 1.7256, "step": 37539 }, { "epoch": 0.49, "grad_norm": 3.7659683227539062, "learning_rate": 1.956130510700254e-05, "loss": 1.895, "step": 37540 }, { "epoch": 0.49, "grad_norm": 3.495090961456299, "learning_rate": 1.9561274324384495e-05, "loss": 1.8317, "step": 37541 }, { "epoch": 0.49, "grad_norm": 4.156306743621826, "learning_rate": 1.9561243540710718e-05, "loss": 2.2111, "step": 37542 }, { "epoch": 0.49, "grad_norm": 3.293368339538574, "learning_rate": 1.956121275598122e-05, "loss": 1.6989, "step": 37543 }, { "epoch": 0.49, "grad_norm": 3.8334333896636963, "learning_rate": 1.9561181970196003e-05, "loss": 1.9255, "step": 37544 }, { "epoch": 0.49, "grad_norm": 3.2240421772003174, "learning_rate": 1.9561151183355068e-05, "loss": 1.7141, "step": 37545 }, { "epoch": 0.49, "grad_norm": 4.050851821899414, "learning_rate": 1.956112039545842e-05, "loss": 2.2402, "step": 37546 }, { "epoch": 0.49, "grad_norm": 3.456223487854004, "learning_rate": 1.956108960650606e-05, "loss": 1.7641, "step": 37547 }, { "epoch": 0.49, "grad_norm": 4.112520217895508, "learning_rate": 1.9561058816497996e-05, "loss": 2.0569, "step": 37548 }, { "epoch": 0.49, "grad_norm": 3.6965208053588867, "learning_rate": 1.956102802543423e-05, "loss": 2.0673, "step": 37549 }, { "epoch": 0.49, "grad_norm": 3.8580539226531982, "learning_rate": 1.956099723331476e-05, "loss": 1.5777, "step": 37550 }, { "epoch": 0.49, "grad_norm": 3.9940109252929688, "learning_rate": 1.95609664401396e-05, "loss": 2.4108, "step": 37551 }, { "epoch": 0.49, "grad_norm": 3.8150315284729004, "learning_rate": 1.9560935645908738e-05, "loss": 2.0323, "step": 37552 }, { "epoch": 0.49, "grad_norm": 3.6022629737854004, "learning_rate": 1.9560904850622194e-05, "loss": 1.7818, "step": 37553 }, { "epoch": 0.49, "grad_norm": 4.910616874694824, "learning_rate": 1.956087405427996e-05, "loss": 2.4255, "step": 37554 }, { "epoch": 0.49, "grad_norm": 3.3620827198028564, "learning_rate": 1.956084325688205e-05, "loss": 1.6508, "step": 37555 }, { "epoch": 0.49, "grad_norm": 4.021280765533447, "learning_rate": 1.956081245842845e-05, "loss": 2.2929, "step": 37556 }, { "epoch": 0.49, "grad_norm": 3.717024564743042, "learning_rate": 1.956078165891918e-05, "loss": 1.7673, "step": 37557 }, { "epoch": 0.49, "grad_norm": 3.3004541397094727, "learning_rate": 1.9560750858354238e-05, "loss": 2.003, "step": 37558 }, { "epoch": 0.49, "grad_norm": 3.5722954273223877, "learning_rate": 1.9560720056733625e-05, "loss": 1.7216, "step": 37559 }, { "epoch": 0.49, "grad_norm": 3.3075666427612305, "learning_rate": 1.9560689254057352e-05, "loss": 1.5299, "step": 37560 }, { "epoch": 0.49, "grad_norm": 3.6783366203308105, "learning_rate": 1.956065845032541e-05, "loss": 1.8997, "step": 37561 }, { "epoch": 0.49, "grad_norm": 3.4563586711883545, "learning_rate": 1.956062764553781e-05, "loss": 1.9553, "step": 37562 }, { "epoch": 0.49, "grad_norm": 3.5849223136901855, "learning_rate": 1.9560596839694558e-05, "loss": 1.6946, "step": 37563 }, { "epoch": 0.49, "grad_norm": 3.9372715950012207, "learning_rate": 1.956056603279565e-05, "loss": 1.881, "step": 37564 }, { "epoch": 0.49, "grad_norm": 3.3146450519561768, "learning_rate": 1.9560535224841094e-05, "loss": 1.6002, "step": 37565 }, { "epoch": 0.49, "grad_norm": 3.384777784347534, "learning_rate": 1.9560504415830894e-05, "loss": 1.7176, "step": 37566 }, { "epoch": 0.49, "grad_norm": 3.6804871559143066, "learning_rate": 1.956047360576505e-05, "loss": 1.9653, "step": 37567 }, { "epoch": 0.49, "grad_norm": 3.8569819927215576, "learning_rate": 1.956044279464357e-05, "loss": 2.0433, "step": 37568 }, { "epoch": 0.49, "grad_norm": 4.340863227844238, "learning_rate": 1.9560411982466455e-05, "loss": 2.2916, "step": 37569 }, { "epoch": 0.49, "grad_norm": 3.939391613006592, "learning_rate": 1.9560381169233708e-05, "loss": 1.9278, "step": 37570 }, { "epoch": 0.49, "grad_norm": 3.6423497200012207, "learning_rate": 1.956035035494533e-05, "loss": 2.1999, "step": 37571 }, { "epoch": 0.49, "grad_norm": 3.221053123474121, "learning_rate": 1.956031953960133e-05, "loss": 1.6283, "step": 37572 }, { "epoch": 0.49, "grad_norm": 3.532825231552124, "learning_rate": 1.956028872320171e-05, "loss": 2.0659, "step": 37573 }, { "epoch": 0.49, "grad_norm": 3.9607038497924805, "learning_rate": 1.956025790574647e-05, "loss": 2.1614, "step": 37574 }, { "epoch": 0.49, "grad_norm": 3.5485072135925293, "learning_rate": 1.956022708723562e-05, "loss": 1.9861, "step": 37575 }, { "epoch": 0.49, "grad_norm": 2.988237142562866, "learning_rate": 1.956019626766915e-05, "loss": 1.6377, "step": 37576 }, { "epoch": 0.49, "grad_norm": 4.246021747589111, "learning_rate": 1.9560165447047076e-05, "loss": 2.2635, "step": 37577 }, { "epoch": 0.49, "grad_norm": 4.09680700302124, "learning_rate": 1.9560134625369398e-05, "loss": 2.0348, "step": 37578 }, { "epoch": 0.49, "grad_norm": 3.4455759525299072, "learning_rate": 1.956010380263612e-05, "loss": 2.1645, "step": 37579 }, { "epoch": 0.49, "grad_norm": 3.797112464904785, "learning_rate": 1.956007297884724e-05, "loss": 1.9751, "step": 37580 }, { "epoch": 0.49, "grad_norm": 3.4905078411102295, "learning_rate": 1.956004215400277e-05, "loss": 1.7561, "step": 37581 }, { "epoch": 0.49, "grad_norm": 4.1192498207092285, "learning_rate": 1.9560011328102707e-05, "loss": 2.4905, "step": 37582 }, { "epoch": 0.49, "grad_norm": 3.7533745765686035, "learning_rate": 1.9559980501147058e-05, "loss": 2.0977, "step": 37583 }, { "epoch": 0.49, "grad_norm": 4.238340854644775, "learning_rate": 1.9559949673135826e-05, "loss": 2.114, "step": 37584 }, { "epoch": 0.49, "grad_norm": 3.231574058532715, "learning_rate": 1.955991884406901e-05, "loss": 1.4427, "step": 37585 }, { "epoch": 0.49, "grad_norm": 3.8292176723480225, "learning_rate": 1.955988801394662e-05, "loss": 1.8909, "step": 37586 }, { "epoch": 0.49, "grad_norm": 3.8275539875030518, "learning_rate": 1.9559857182768654e-05, "loss": 1.7118, "step": 37587 }, { "epoch": 0.49, "grad_norm": 3.6397435665130615, "learning_rate": 1.955982635053512e-05, "loss": 1.955, "step": 37588 }, { "epoch": 0.49, "grad_norm": 3.928640842437744, "learning_rate": 1.9559795517246014e-05, "loss": 2.1303, "step": 37589 }, { "epoch": 0.49, "grad_norm": 4.223321914672852, "learning_rate": 1.955976468290135e-05, "loss": 2.0925, "step": 37590 }, { "epoch": 0.49, "grad_norm": 3.3114013671875, "learning_rate": 1.955973384750112e-05, "loss": 1.8006, "step": 37591 }, { "epoch": 0.49, "grad_norm": 3.4418976306915283, "learning_rate": 1.955970301104534e-05, "loss": 1.5912, "step": 37592 }, { "epoch": 0.49, "grad_norm": 3.548611879348755, "learning_rate": 1.9559672173534e-05, "loss": 1.8626, "step": 37593 }, { "epoch": 0.49, "grad_norm": 3.674290180206299, "learning_rate": 1.9559641334967114e-05, "loss": 1.7158, "step": 37594 }, { "epoch": 0.49, "grad_norm": 4.21686315536499, "learning_rate": 1.9559610495344683e-05, "loss": 1.7356, "step": 37595 }, { "epoch": 0.49, "grad_norm": 3.515801429748535, "learning_rate": 1.9559579654666704e-05, "loss": 1.87, "step": 37596 }, { "epoch": 0.49, "grad_norm": 4.056633472442627, "learning_rate": 1.9559548812933192e-05, "loss": 2.2103, "step": 37597 }, { "epoch": 0.49, "grad_norm": 4.173289775848389, "learning_rate": 1.955951797014414e-05, "loss": 2.2046, "step": 37598 }, { "epoch": 0.49, "grad_norm": 4.1031646728515625, "learning_rate": 1.955948712629955e-05, "loss": 1.7476, "step": 37599 }, { "epoch": 0.49, "grad_norm": 4.40308952331543, "learning_rate": 1.9559456281399437e-05, "loss": 2.5172, "step": 37600 }, { "epoch": 0.49, "grad_norm": 3.823499917984009, "learning_rate": 1.9559425435443795e-05, "loss": 2.0104, "step": 37601 }, { "epoch": 0.49, "grad_norm": 3.846163272857666, "learning_rate": 1.955939458843263e-05, "loss": 1.7788, "step": 37602 }, { "epoch": 0.49, "grad_norm": 4.123045921325684, "learning_rate": 1.9559363740365946e-05, "loss": 2.1491, "step": 37603 }, { "epoch": 0.49, "grad_norm": 3.5278239250183105, "learning_rate": 1.955933289124375e-05, "loss": 1.9177, "step": 37604 }, { "epoch": 0.49, "grad_norm": 3.3250303268432617, "learning_rate": 1.9559302041066037e-05, "loss": 1.3492, "step": 37605 }, { "epoch": 0.49, "grad_norm": 3.6655359268188477, "learning_rate": 1.9559271189832814e-05, "loss": 2.0786, "step": 37606 }, { "epoch": 0.49, "grad_norm": 3.956984043121338, "learning_rate": 1.9559240337544086e-05, "loss": 2.139, "step": 37607 }, { "epoch": 0.49, "grad_norm": 3.5528225898742676, "learning_rate": 1.955920948419986e-05, "loss": 2.0993, "step": 37608 }, { "epoch": 0.49, "grad_norm": 3.1034278869628906, "learning_rate": 1.9559178629800134e-05, "loss": 1.3766, "step": 37609 }, { "epoch": 0.49, "grad_norm": 3.788364887237549, "learning_rate": 1.955914777434491e-05, "loss": 2.1013, "step": 37610 }, { "epoch": 0.49, "grad_norm": 3.5398385524749756, "learning_rate": 1.9559116917834193e-05, "loss": 1.9783, "step": 37611 }, { "epoch": 0.49, "grad_norm": 3.172100305557251, "learning_rate": 1.955908606026799e-05, "loss": 1.3549, "step": 37612 }, { "epoch": 0.49, "grad_norm": 3.7941253185272217, "learning_rate": 1.95590552016463e-05, "loss": 1.6524, "step": 37613 }, { "epoch": 0.49, "grad_norm": 3.910303831100464, "learning_rate": 1.9559024341969128e-05, "loss": 2.281, "step": 37614 }, { "epoch": 0.49, "grad_norm": 3.6779279708862305, "learning_rate": 1.9558993481236478e-05, "loss": 1.7139, "step": 37615 }, { "epoch": 0.49, "grad_norm": 3.7424628734588623, "learning_rate": 1.9558962619448353e-05, "loss": 2.1423, "step": 37616 }, { "epoch": 0.49, "grad_norm": 3.4050567150115967, "learning_rate": 1.9558931756604758e-05, "loss": 1.7525, "step": 37617 }, { "epoch": 0.49, "grad_norm": 3.629103899002075, "learning_rate": 1.9558900892705692e-05, "loss": 1.8379, "step": 37618 }, { "epoch": 0.49, "grad_norm": 3.9041943550109863, "learning_rate": 1.9558870027751166e-05, "loss": 2.1043, "step": 37619 }, { "epoch": 0.49, "grad_norm": 3.5664710998535156, "learning_rate": 1.955883916174117e-05, "loss": 2.081, "step": 37620 }, { "epoch": 0.49, "grad_norm": 4.088862895965576, "learning_rate": 1.955880829467572e-05, "loss": 2.711, "step": 37621 }, { "epoch": 0.49, "grad_norm": 2.985729217529297, "learning_rate": 1.955877742655482e-05, "loss": 1.5081, "step": 37622 }, { "epoch": 0.49, "grad_norm": 3.5795981884002686, "learning_rate": 1.9558746557378466e-05, "loss": 1.8875, "step": 37623 }, { "epoch": 0.49, "grad_norm": 3.386550188064575, "learning_rate": 1.9558715687146663e-05, "loss": 1.5285, "step": 37624 }, { "epoch": 0.49, "grad_norm": 3.50113844871521, "learning_rate": 1.9558684815859417e-05, "loss": 2.0409, "step": 37625 }, { "epoch": 0.49, "grad_norm": 3.7281179428100586, "learning_rate": 1.9558653943516726e-05, "loss": 1.5532, "step": 37626 }, { "epoch": 0.49, "grad_norm": 3.6451849937438965, "learning_rate": 1.9558623070118603e-05, "loss": 1.6306, "step": 37627 }, { "epoch": 0.49, "grad_norm": 3.439455509185791, "learning_rate": 1.9558592195665042e-05, "loss": 1.8151, "step": 37628 }, { "epoch": 0.49, "grad_norm": 3.9661099910736084, "learning_rate": 1.955856132015605e-05, "loss": 1.8218, "step": 37629 }, { "epoch": 0.49, "grad_norm": 4.389468669891357, "learning_rate": 1.9558530443591633e-05, "loss": 2.1955, "step": 37630 }, { "epoch": 0.49, "grad_norm": 4.01531982421875, "learning_rate": 1.9558499565971792e-05, "loss": 2.08, "step": 37631 }, { "epoch": 0.49, "grad_norm": 3.1105124950408936, "learning_rate": 1.955846868729653e-05, "loss": 1.284, "step": 37632 }, { "epoch": 0.49, "grad_norm": 3.752948045730591, "learning_rate": 1.955843780756585e-05, "loss": 1.7694, "step": 37633 }, { "epoch": 0.49, "grad_norm": 3.9328019618988037, "learning_rate": 1.955840692677976e-05, "loss": 1.7382, "step": 37634 }, { "epoch": 0.49, "grad_norm": 3.660367727279663, "learning_rate": 1.9558376044938256e-05, "loss": 1.873, "step": 37635 }, { "epoch": 0.49, "grad_norm": 3.386237144470215, "learning_rate": 1.9558345162041345e-05, "loss": 1.8703, "step": 37636 }, { "epoch": 0.49, "grad_norm": 4.036646842956543, "learning_rate": 1.955831427808903e-05, "loss": 2.2536, "step": 37637 }, { "epoch": 0.49, "grad_norm": 4.059257507324219, "learning_rate": 1.955828339308132e-05, "loss": 2.0353, "step": 37638 }, { "epoch": 0.49, "grad_norm": 3.8130924701690674, "learning_rate": 1.955825250701821e-05, "loss": 1.854, "step": 37639 }, { "epoch": 0.49, "grad_norm": 3.764772891998291, "learning_rate": 1.9558221619899703e-05, "loss": 1.8375, "step": 37640 }, { "epoch": 0.49, "grad_norm": 4.288044452667236, "learning_rate": 1.955819073172581e-05, "loss": 2.1503, "step": 37641 }, { "epoch": 0.49, "grad_norm": 3.5593552589416504, "learning_rate": 1.955815984249653e-05, "loss": 1.5065, "step": 37642 }, { "epoch": 0.49, "grad_norm": 3.4275481700897217, "learning_rate": 1.955812895221187e-05, "loss": 2.0577, "step": 37643 }, { "epoch": 0.49, "grad_norm": 3.603209972381592, "learning_rate": 1.9558098060871826e-05, "loss": 1.6902, "step": 37644 }, { "epoch": 0.49, "grad_norm": 4.1918463706970215, "learning_rate": 1.955806716847641e-05, "loss": 1.9102, "step": 37645 }, { "epoch": 0.49, "grad_norm": 3.594205379486084, "learning_rate": 1.9558036275025614e-05, "loss": 1.8212, "step": 37646 }, { "epoch": 0.49, "grad_norm": 3.5213510990142822, "learning_rate": 1.9558005380519457e-05, "loss": 1.8322, "step": 37647 }, { "epoch": 0.49, "grad_norm": 4.032862663269043, "learning_rate": 1.955797448495793e-05, "loss": 1.9729, "step": 37648 }, { "epoch": 0.49, "grad_norm": 4.193391799926758, "learning_rate": 1.955794358834104e-05, "loss": 1.7956, "step": 37649 }, { "epoch": 0.49, "grad_norm": 3.318204879760742, "learning_rate": 1.9557912690668792e-05, "loss": 1.6453, "step": 37650 }, { "epoch": 0.49, "grad_norm": 3.255679130554199, "learning_rate": 1.955788179194119e-05, "loss": 1.7983, "step": 37651 }, { "epoch": 0.49, "grad_norm": 3.713963270187378, "learning_rate": 1.9557850892158232e-05, "loss": 1.9849, "step": 37652 }, { "epoch": 0.49, "grad_norm": 3.4857475757598877, "learning_rate": 1.9557819991319927e-05, "loss": 2.0821, "step": 37653 }, { "epoch": 0.49, "grad_norm": 3.6461637020111084, "learning_rate": 1.9557789089426277e-05, "loss": 2.1199, "step": 37654 }, { "epoch": 0.49, "grad_norm": 3.8389716148376465, "learning_rate": 1.9557758186477284e-05, "loss": 2.1594, "step": 37655 }, { "epoch": 0.49, "grad_norm": 4.331320285797119, "learning_rate": 1.955772728247295e-05, "loss": 2.4048, "step": 37656 }, { "epoch": 0.49, "grad_norm": 3.1109888553619385, "learning_rate": 1.9557696377413283e-05, "loss": 1.7383, "step": 37657 }, { "epoch": 0.49, "grad_norm": 3.5231316089630127, "learning_rate": 1.955766547129829e-05, "loss": 1.8456, "step": 37658 }, { "epoch": 0.49, "grad_norm": 4.237053394317627, "learning_rate": 1.955763456412796e-05, "loss": 2.125, "step": 37659 }, { "epoch": 0.49, "grad_norm": 3.8946189880371094, "learning_rate": 1.955760365590231e-05, "loss": 2.209, "step": 37660 }, { "epoch": 0.49, "grad_norm": 4.6312432289123535, "learning_rate": 1.9557572746621332e-05, "loss": 2.6741, "step": 37661 }, { "epoch": 0.49, "grad_norm": 3.801187753677368, "learning_rate": 1.9557541836285044e-05, "loss": 2.0207, "step": 37662 }, { "epoch": 0.49, "grad_norm": 3.6038219928741455, "learning_rate": 1.9557510924893437e-05, "loss": 1.7435, "step": 37663 }, { "epoch": 0.49, "grad_norm": 3.9396471977233887, "learning_rate": 1.9557480012446518e-05, "loss": 2.0572, "step": 37664 }, { "epoch": 0.49, "grad_norm": 3.3347113132476807, "learning_rate": 1.9557449098944295e-05, "loss": 1.6029, "step": 37665 }, { "epoch": 0.49, "grad_norm": 3.713648557662964, "learning_rate": 1.9557418184386763e-05, "loss": 2.0446, "step": 37666 }, { "epoch": 0.49, "grad_norm": 3.1618616580963135, "learning_rate": 1.9557387268773933e-05, "loss": 1.3959, "step": 37667 }, { "epoch": 0.49, "grad_norm": 3.82576322555542, "learning_rate": 1.9557356352105806e-05, "loss": 1.9501, "step": 37668 }, { "epoch": 0.49, "grad_norm": 3.727665662765503, "learning_rate": 1.955732543438238e-05, "loss": 1.7523, "step": 37669 }, { "epoch": 0.49, "grad_norm": 3.6296026706695557, "learning_rate": 1.9557294515603666e-05, "loss": 1.8372, "step": 37670 }, { "epoch": 0.49, "grad_norm": 3.465264320373535, "learning_rate": 1.9557263595769665e-05, "loss": 1.8015, "step": 37671 }, { "epoch": 0.49, "grad_norm": 3.548758029937744, "learning_rate": 1.955723267488038e-05, "loss": 1.7838, "step": 37672 }, { "epoch": 0.49, "grad_norm": 3.99436354637146, "learning_rate": 1.9557201752935812e-05, "loss": 2.2159, "step": 37673 }, { "epoch": 0.49, "grad_norm": 4.343532562255859, "learning_rate": 1.955717082993597e-05, "loss": 2.4906, "step": 37674 }, { "epoch": 0.49, "grad_norm": 3.738281726837158, "learning_rate": 1.9557139905880852e-05, "loss": 2.053, "step": 37675 }, { "epoch": 0.49, "grad_norm": 4.11187744140625, "learning_rate": 1.9557108980770466e-05, "loss": 2.0107, "step": 37676 }, { "epoch": 0.49, "grad_norm": 3.6512722969055176, "learning_rate": 1.955707805460481e-05, "loss": 1.7599, "step": 37677 }, { "epoch": 0.49, "grad_norm": 4.283539295196533, "learning_rate": 1.9557047127383894e-05, "loss": 2.0423, "step": 37678 }, { "epoch": 0.49, "grad_norm": 3.8565502166748047, "learning_rate": 1.955701619910771e-05, "loss": 1.6899, "step": 37679 }, { "epoch": 0.49, "grad_norm": 4.146547794342041, "learning_rate": 1.955698526977628e-05, "loss": 1.9764, "step": 37680 }, { "epoch": 0.49, "grad_norm": 3.7066731452941895, "learning_rate": 1.9556954339389592e-05, "loss": 1.8344, "step": 37681 }, { "epoch": 0.49, "grad_norm": 3.764275074005127, "learning_rate": 1.955692340794765e-05, "loss": 2.3995, "step": 37682 }, { "epoch": 0.49, "grad_norm": 3.6424505710601807, "learning_rate": 1.9556892475450466e-05, "loss": 1.742, "step": 37683 }, { "epoch": 0.49, "grad_norm": 3.840877056121826, "learning_rate": 1.9556861541898038e-05, "loss": 1.9161, "step": 37684 }, { "epoch": 0.49, "grad_norm": 3.8327343463897705, "learning_rate": 1.9556830607290372e-05, "loss": 2.3321, "step": 37685 }, { "epoch": 0.49, "grad_norm": 3.8977551460266113, "learning_rate": 1.955679967162747e-05, "loss": 2.3425, "step": 37686 }, { "epoch": 0.49, "grad_norm": 4.089702606201172, "learning_rate": 1.955676873490933e-05, "loss": 1.9585, "step": 37687 }, { "epoch": 0.49, "grad_norm": 3.6787502765655518, "learning_rate": 1.9556737797135964e-05, "loss": 1.9254, "step": 37688 }, { "epoch": 0.49, "grad_norm": 3.4781877994537354, "learning_rate": 1.9556706858307374e-05, "loss": 1.9507, "step": 37689 }, { "epoch": 0.49, "grad_norm": 3.3165438175201416, "learning_rate": 1.9556675918423558e-05, "loss": 1.9789, "step": 37690 }, { "epoch": 0.49, "grad_norm": 3.638218641281128, "learning_rate": 1.9556644977484524e-05, "loss": 2.4962, "step": 37691 }, { "epoch": 0.49, "grad_norm": 3.5478382110595703, "learning_rate": 1.9556614035490277e-05, "loss": 1.9557, "step": 37692 }, { "epoch": 0.49, "grad_norm": 4.542773723602295, "learning_rate": 1.9556583092440814e-05, "loss": 1.9315, "step": 37693 }, { "epoch": 0.49, "grad_norm": 3.6368086338043213, "learning_rate": 1.9556552148336144e-05, "loss": 2.0515, "step": 37694 }, { "epoch": 0.49, "grad_norm": 3.402616024017334, "learning_rate": 1.9556521203176267e-05, "loss": 2.09, "step": 37695 }, { "epoch": 0.49, "grad_norm": 4.042821407318115, "learning_rate": 1.9556490256961188e-05, "loss": 2.5022, "step": 37696 }, { "epoch": 0.49, "grad_norm": 3.5557773113250732, "learning_rate": 1.9556459309690915e-05, "loss": 1.7842, "step": 37697 }, { "epoch": 0.49, "grad_norm": 3.991330623626709, "learning_rate": 1.955642836136544e-05, "loss": 2.3423, "step": 37698 }, { "epoch": 0.49, "grad_norm": 4.336196422576904, "learning_rate": 1.9556397411984777e-05, "loss": 2.1239, "step": 37699 }, { "epoch": 0.49, "grad_norm": 3.7097461223602295, "learning_rate": 1.9556366461548925e-05, "loss": 2.039, "step": 37700 }, { "epoch": 0.49, "grad_norm": 3.5255982875823975, "learning_rate": 1.9556335510057888e-05, "loss": 1.9998, "step": 37701 }, { "epoch": 0.49, "grad_norm": 3.2878355979919434, "learning_rate": 1.955630455751167e-05, "loss": 1.9413, "step": 37702 }, { "epoch": 0.49, "grad_norm": 3.713169813156128, "learning_rate": 1.955627360391027e-05, "loss": 1.8047, "step": 37703 }, { "epoch": 0.49, "grad_norm": 3.7618906497955322, "learning_rate": 1.95562426492537e-05, "loss": 2.0284, "step": 37704 }, { "epoch": 0.49, "grad_norm": 3.7752914428710938, "learning_rate": 1.955621169354196e-05, "loss": 2.2658, "step": 37705 }, { "epoch": 0.49, "grad_norm": 3.163891315460205, "learning_rate": 1.9556180736775044e-05, "loss": 1.8838, "step": 37706 }, { "epoch": 0.49, "grad_norm": 3.9049880504608154, "learning_rate": 1.955614977895297e-05, "loss": 2.3534, "step": 37707 }, { "epoch": 0.49, "grad_norm": 3.704542875289917, "learning_rate": 1.9556118820075737e-05, "loss": 2.1624, "step": 37708 }, { "epoch": 0.49, "grad_norm": 3.2259573936462402, "learning_rate": 1.955608786014334e-05, "loss": 1.6926, "step": 37709 }, { "epoch": 0.49, "grad_norm": 3.9441428184509277, "learning_rate": 1.9556056899155794e-05, "loss": 2.0542, "step": 37710 }, { "epoch": 0.49, "grad_norm": 3.2026660442352295, "learning_rate": 1.9556025937113094e-05, "loss": 1.5127, "step": 37711 }, { "epoch": 0.49, "grad_norm": 3.2805752754211426, "learning_rate": 1.9555994974015245e-05, "loss": 1.6517, "step": 37712 }, { "epoch": 0.49, "grad_norm": 3.605664014816284, "learning_rate": 1.9555964009862257e-05, "loss": 2.1518, "step": 37713 }, { "epoch": 0.49, "grad_norm": 3.6874141693115234, "learning_rate": 1.9555933044654124e-05, "loss": 1.9031, "step": 37714 }, { "epoch": 0.49, "grad_norm": 3.4441375732421875, "learning_rate": 1.9555902078390855e-05, "loss": 1.7895, "step": 37715 }, { "epoch": 0.49, "grad_norm": 3.9956822395324707, "learning_rate": 1.9555871111072455e-05, "loss": 2.1764, "step": 37716 }, { "epoch": 0.49, "grad_norm": 3.7882962226867676, "learning_rate": 1.9555840142698922e-05, "loss": 2.2789, "step": 37717 }, { "epoch": 0.49, "grad_norm": 4.067742347717285, "learning_rate": 1.9555809173270264e-05, "loss": 2.0382, "step": 37718 }, { "epoch": 0.49, "grad_norm": 3.74585223197937, "learning_rate": 1.955577820278648e-05, "loss": 1.8659, "step": 37719 }, { "epoch": 0.49, "grad_norm": 3.371377944946289, "learning_rate": 1.955574723124758e-05, "loss": 1.7313, "step": 37720 }, { "epoch": 0.49, "grad_norm": 3.5684456825256348, "learning_rate": 1.9555716258653562e-05, "loss": 1.8294, "step": 37721 }, { "epoch": 0.49, "grad_norm": 3.113022565841675, "learning_rate": 1.9555685285004427e-05, "loss": 1.7252, "step": 37722 }, { "epoch": 0.49, "grad_norm": 3.328293800354004, "learning_rate": 1.9555654310300184e-05, "loss": 1.7691, "step": 37723 }, { "epoch": 0.49, "grad_norm": 4.290313720703125, "learning_rate": 1.9555623334540835e-05, "loss": 1.8934, "step": 37724 }, { "epoch": 0.49, "grad_norm": 3.581251382827759, "learning_rate": 1.9555592357726386e-05, "loss": 1.9493, "step": 37725 }, { "epoch": 0.49, "grad_norm": 3.777433395385742, "learning_rate": 1.9555561379856834e-05, "loss": 1.7363, "step": 37726 }, { "epoch": 0.49, "grad_norm": 3.8189117908477783, "learning_rate": 1.9555530400932188e-05, "loss": 1.4613, "step": 37727 }, { "epoch": 0.49, "grad_norm": 4.016961574554443, "learning_rate": 1.955549942095245e-05, "loss": 1.9753, "step": 37728 }, { "epoch": 0.49, "grad_norm": 3.6476571559906006, "learning_rate": 1.955546843991762e-05, "loss": 1.4711, "step": 37729 }, { "epoch": 0.49, "grad_norm": 3.6035873889923096, "learning_rate": 1.9555437457827705e-05, "loss": 2.2206, "step": 37730 }, { "epoch": 0.49, "grad_norm": 3.964138984680176, "learning_rate": 1.9555406474682707e-05, "loss": 2.1807, "step": 37731 }, { "epoch": 0.49, "grad_norm": 3.9563851356506348, "learning_rate": 1.955537549048263e-05, "loss": 2.0106, "step": 37732 }, { "epoch": 0.49, "grad_norm": 4.096312999725342, "learning_rate": 1.955534450522748e-05, "loss": 2.2967, "step": 37733 }, { "epoch": 0.49, "grad_norm": 3.499177932739258, "learning_rate": 1.9555313518917257e-05, "loss": 2.0435, "step": 37734 }, { "epoch": 0.49, "grad_norm": 4.380636215209961, "learning_rate": 1.9555282531551968e-05, "loss": 2.1547, "step": 37735 }, { "epoch": 0.49, "grad_norm": 4.095756530761719, "learning_rate": 1.955525154313161e-05, "loss": 2.0146, "step": 37736 }, { "epoch": 0.49, "grad_norm": 3.748422622680664, "learning_rate": 1.9555220553656188e-05, "loss": 1.9865, "step": 37737 }, { "epoch": 0.49, "grad_norm": 3.6513607501983643, "learning_rate": 1.955518956312571e-05, "loss": 1.8651, "step": 37738 }, { "epoch": 0.49, "grad_norm": 4.019949436187744, "learning_rate": 1.9555158571540178e-05, "loss": 1.7526, "step": 37739 }, { "epoch": 0.49, "grad_norm": 3.49796462059021, "learning_rate": 1.955512757889959e-05, "loss": 1.9999, "step": 37740 }, { "epoch": 0.49, "grad_norm": 4.087944030761719, "learning_rate": 1.9555096585203958e-05, "loss": 1.8151, "step": 37741 }, { "epoch": 0.49, "grad_norm": 3.2750508785247803, "learning_rate": 1.955506559045328e-05, "loss": 1.654, "step": 37742 }, { "epoch": 0.49, "grad_norm": 3.876093864440918, "learning_rate": 1.9555034594647563e-05, "loss": 2.2296, "step": 37743 }, { "epoch": 0.49, "grad_norm": 4.130187034606934, "learning_rate": 1.9555003597786807e-05, "loss": 2.2031, "step": 37744 }, { "epoch": 0.49, "grad_norm": 3.1188135147094727, "learning_rate": 1.9554972599871015e-05, "loss": 1.6616, "step": 37745 }, { "epoch": 0.49, "grad_norm": 4.047644138336182, "learning_rate": 1.955494160090019e-05, "loss": 1.8419, "step": 37746 }, { "epoch": 0.49, "grad_norm": 3.5584421157836914, "learning_rate": 1.9554910600874343e-05, "loss": 1.8311, "step": 37747 }, { "epoch": 0.49, "grad_norm": 3.1244537830352783, "learning_rate": 1.9554879599793467e-05, "loss": 1.4007, "step": 37748 }, { "epoch": 0.49, "grad_norm": 3.833319664001465, "learning_rate": 1.9554848597657573e-05, "loss": 1.9471, "step": 37749 }, { "epoch": 0.49, "grad_norm": 3.730456829071045, "learning_rate": 1.955481759446666e-05, "loss": 1.9602, "step": 37750 }, { "epoch": 0.49, "grad_norm": 3.7139034271240234, "learning_rate": 1.9554786590220733e-05, "loss": 1.7009, "step": 37751 }, { "epoch": 0.49, "grad_norm": 3.492985963821411, "learning_rate": 1.9554755584919792e-05, "loss": 1.7437, "step": 37752 }, { "epoch": 0.49, "grad_norm": 3.3593924045562744, "learning_rate": 1.9554724578563852e-05, "loss": 1.6002, "step": 37753 }, { "epoch": 0.49, "grad_norm": 4.182690143585205, "learning_rate": 1.9554693571152902e-05, "loss": 2.3176, "step": 37754 }, { "epoch": 0.49, "grad_norm": 3.8068759441375732, "learning_rate": 1.9554662562686955e-05, "loss": 1.9633, "step": 37755 }, { "epoch": 0.49, "grad_norm": 3.764549732208252, "learning_rate": 1.955463155316601e-05, "loss": 1.8958, "step": 37756 }, { "epoch": 0.49, "grad_norm": 4.117344856262207, "learning_rate": 1.9554600542590072e-05, "loss": 1.9566, "step": 37757 }, { "epoch": 0.49, "grad_norm": 4.03916597366333, "learning_rate": 1.9554569530959143e-05, "loss": 1.9944, "step": 37758 }, { "epoch": 0.49, "grad_norm": 3.2139172554016113, "learning_rate": 1.9554538518273227e-05, "loss": 1.6064, "step": 37759 }, { "epoch": 0.49, "grad_norm": 3.604597806930542, "learning_rate": 1.955450750453233e-05, "loss": 1.853, "step": 37760 }, { "epoch": 0.49, "grad_norm": 3.788625717163086, "learning_rate": 1.955447648973645e-05, "loss": 2.0344, "step": 37761 }, { "epoch": 0.49, "grad_norm": 4.441600799560547, "learning_rate": 1.9554445473885596e-05, "loss": 2.4057, "step": 37762 }, { "epoch": 0.49, "grad_norm": 3.943847894668579, "learning_rate": 1.955441445697977e-05, "loss": 1.9365, "step": 37763 }, { "epoch": 0.49, "grad_norm": 3.4063804149627686, "learning_rate": 1.9554383439018973e-05, "loss": 1.9423, "step": 37764 }, { "epoch": 0.49, "grad_norm": 3.874016046524048, "learning_rate": 1.955435242000321e-05, "loss": 2.0799, "step": 37765 }, { "epoch": 0.49, "grad_norm": 3.7768332958221436, "learning_rate": 1.9554321399932485e-05, "loss": 2.1669, "step": 37766 }, { "epoch": 0.49, "grad_norm": 3.459599733352661, "learning_rate": 1.95542903788068e-05, "loss": 1.6743, "step": 37767 }, { "epoch": 0.49, "grad_norm": 3.678304433822632, "learning_rate": 1.955425935662616e-05, "loss": 1.9825, "step": 37768 }, { "epoch": 0.49, "grad_norm": 3.8936595916748047, "learning_rate": 1.955422833339057e-05, "loss": 1.8856, "step": 37769 }, { "epoch": 0.49, "grad_norm": 4.200923919677734, "learning_rate": 1.955419730910003e-05, "loss": 1.9861, "step": 37770 }, { "epoch": 0.49, "grad_norm": 4.012982368469238, "learning_rate": 1.955416628375454e-05, "loss": 2.0341, "step": 37771 }, { "epoch": 0.49, "grad_norm": 4.311893463134766, "learning_rate": 1.955413525735411e-05, "loss": 2.2549, "step": 37772 }, { "epoch": 0.49, "grad_norm": 3.8328158855438232, "learning_rate": 1.9554104229898746e-05, "loss": 1.7716, "step": 37773 }, { "epoch": 0.49, "grad_norm": 3.9321823120117188, "learning_rate": 1.9554073201388442e-05, "loss": 2.0023, "step": 37774 }, { "epoch": 0.49, "grad_norm": 3.719428539276123, "learning_rate": 1.955404217182321e-05, "loss": 2.0785, "step": 37775 }, { "epoch": 0.49, "grad_norm": 3.616001844406128, "learning_rate": 1.9554011141203045e-05, "loss": 2.1316, "step": 37776 }, { "epoch": 0.49, "grad_norm": 3.676121473312378, "learning_rate": 1.9553980109527958e-05, "loss": 2.1694, "step": 37777 }, { "epoch": 0.49, "grad_norm": 3.690812110900879, "learning_rate": 1.9553949076797946e-05, "loss": 1.661, "step": 37778 }, { "epoch": 0.49, "grad_norm": 3.9295589923858643, "learning_rate": 1.955391804301302e-05, "loss": 1.9849, "step": 37779 }, { "epoch": 0.49, "grad_norm": 3.5103354454040527, "learning_rate": 1.9553887008173177e-05, "loss": 2.0337, "step": 37780 }, { "epoch": 0.49, "grad_norm": 3.676619052886963, "learning_rate": 1.9553855972278427e-05, "loss": 2.2045, "step": 37781 }, { "epoch": 0.49, "grad_norm": 3.861323833465576, "learning_rate": 1.9553824935328768e-05, "loss": 1.9178, "step": 37782 }, { "epoch": 0.49, "grad_norm": 3.3685030937194824, "learning_rate": 1.95537938973242e-05, "loss": 1.6112, "step": 37783 }, { "epoch": 0.49, "grad_norm": 3.397919178009033, "learning_rate": 1.9553762858264735e-05, "loss": 1.8428, "step": 37784 }, { "epoch": 0.49, "grad_norm": 4.424202919006348, "learning_rate": 1.955373181815037e-05, "loss": 1.8291, "step": 37785 }, { "epoch": 0.49, "grad_norm": 4.45751953125, "learning_rate": 1.9553700776981112e-05, "loss": 2.1216, "step": 37786 }, { "epoch": 0.49, "grad_norm": 3.738612651824951, "learning_rate": 1.9553669734756962e-05, "loss": 1.714, "step": 37787 }, { "epoch": 0.49, "grad_norm": 3.751821756362915, "learning_rate": 1.9553638691477927e-05, "loss": 1.932, "step": 37788 }, { "epoch": 0.49, "grad_norm": 3.595655918121338, "learning_rate": 1.955360764714401e-05, "loss": 2.1154, "step": 37789 }, { "epoch": 0.49, "grad_norm": 3.761406421661377, "learning_rate": 1.9553576601755207e-05, "loss": 1.9894, "step": 37790 }, { "epoch": 0.49, "grad_norm": 3.7053513526916504, "learning_rate": 1.9553545555311532e-05, "loss": 2.104, "step": 37791 }, { "epoch": 0.49, "grad_norm": 3.384780168533325, "learning_rate": 1.9553514507812983e-05, "loss": 1.9037, "step": 37792 }, { "epoch": 0.49, "grad_norm": 3.811500310897827, "learning_rate": 1.9553483459259565e-05, "loss": 2.2157, "step": 37793 }, { "epoch": 0.49, "grad_norm": 3.5954396724700928, "learning_rate": 1.9553452409651273e-05, "loss": 2.0888, "step": 37794 }, { "epoch": 0.49, "grad_norm": 4.091360569000244, "learning_rate": 1.9553421358988124e-05, "loss": 2.2256, "step": 37795 }, { "epoch": 0.49, "grad_norm": 3.180595874786377, "learning_rate": 1.9553390307270117e-05, "loss": 1.5456, "step": 37796 }, { "epoch": 0.49, "grad_norm": 3.9206626415252686, "learning_rate": 1.955335925449725e-05, "loss": 2.0777, "step": 37797 }, { "epoch": 0.49, "grad_norm": 4.418194770812988, "learning_rate": 1.955332820066953e-05, "loss": 2.0826, "step": 37798 }, { "epoch": 0.49, "grad_norm": 3.5018417835235596, "learning_rate": 1.9553297145786964e-05, "loss": 1.5272, "step": 37799 }, { "epoch": 0.49, "grad_norm": 4.054659366607666, "learning_rate": 1.9553266089849547e-05, "loss": 2.1147, "step": 37800 }, { "epoch": 0.49, "grad_norm": 3.6434431076049805, "learning_rate": 1.955323503285729e-05, "loss": 1.9754, "step": 37801 }, { "epoch": 0.49, "grad_norm": 3.454000949859619, "learning_rate": 1.9553203974810194e-05, "loss": 1.9086, "step": 37802 }, { "epoch": 0.49, "grad_norm": 3.2561581134796143, "learning_rate": 1.955317291570826e-05, "loss": 1.7084, "step": 37803 }, { "epoch": 0.49, "grad_norm": 3.7000155448913574, "learning_rate": 1.9553141855551496e-05, "loss": 2.0698, "step": 37804 }, { "epoch": 0.49, "grad_norm": 3.5899293422698975, "learning_rate": 1.9553110794339903e-05, "loss": 1.9646, "step": 37805 }, { "epoch": 0.49, "grad_norm": 3.947803020477295, "learning_rate": 1.955307973207348e-05, "loss": 2.4419, "step": 37806 }, { "epoch": 0.49, "grad_norm": 3.6611149311065674, "learning_rate": 1.9553048668752243e-05, "loss": 1.6274, "step": 37807 }, { "epoch": 0.49, "grad_norm": 3.9225645065307617, "learning_rate": 1.9553017604376182e-05, "loss": 2.3568, "step": 37808 }, { "epoch": 0.49, "grad_norm": 3.609941005706787, "learning_rate": 1.9552986538945308e-05, "loss": 1.948, "step": 37809 }, { "epoch": 0.49, "grad_norm": 3.1808671951293945, "learning_rate": 1.9552955472459617e-05, "loss": 1.6983, "step": 37810 }, { "epoch": 0.49, "grad_norm": 4.612163066864014, "learning_rate": 1.9552924404919126e-05, "loss": 2.1888, "step": 37811 }, { "epoch": 0.49, "grad_norm": 3.787210702896118, "learning_rate": 1.9552893336323825e-05, "loss": 1.7014, "step": 37812 }, { "epoch": 0.49, "grad_norm": 3.38472843170166, "learning_rate": 1.9552862266673724e-05, "loss": 1.7461, "step": 37813 }, { "epoch": 0.49, "grad_norm": 3.936002016067505, "learning_rate": 1.955283119596882e-05, "loss": 2.1065, "step": 37814 }, { "epoch": 0.49, "grad_norm": 4.011819362640381, "learning_rate": 1.9552800124209125e-05, "loss": 1.9994, "step": 37815 }, { "epoch": 0.49, "grad_norm": 4.5645365715026855, "learning_rate": 1.955276905139464e-05, "loss": 2.7114, "step": 37816 }, { "epoch": 0.49, "grad_norm": 3.328652858734131, "learning_rate": 1.9552737977525367e-05, "loss": 1.6029, "step": 37817 }, { "epoch": 0.49, "grad_norm": 3.951425075531006, "learning_rate": 1.955270690260131e-05, "loss": 1.8999, "step": 37818 }, { "epoch": 0.49, "grad_norm": 3.423668146133423, "learning_rate": 1.955267582662247e-05, "loss": 1.5481, "step": 37819 }, { "epoch": 0.49, "grad_norm": 3.4676339626312256, "learning_rate": 1.9552644749588854e-05, "loss": 1.6992, "step": 37820 }, { "epoch": 0.49, "grad_norm": 3.7170000076293945, "learning_rate": 1.9552613671500465e-05, "loss": 2.0124, "step": 37821 }, { "epoch": 0.49, "grad_norm": 4.048914432525635, "learning_rate": 1.9552582592357307e-05, "loss": 2.0117, "step": 37822 }, { "epoch": 0.49, "grad_norm": 3.2589657306671143, "learning_rate": 1.9552551512159375e-05, "loss": 1.725, "step": 37823 }, { "epoch": 0.49, "grad_norm": 3.7756354808807373, "learning_rate": 1.9552520430906685e-05, "loss": 2.1245, "step": 37824 }, { "epoch": 0.49, "grad_norm": 3.7665700912475586, "learning_rate": 1.955248934859923e-05, "loss": 1.9276, "step": 37825 }, { "epoch": 0.49, "grad_norm": 3.9335100650787354, "learning_rate": 1.9552458265237025e-05, "loss": 2.3213, "step": 37826 }, { "epoch": 0.49, "grad_norm": 3.3276901245117188, "learning_rate": 1.955242718082006e-05, "loss": 1.7943, "step": 37827 }, { "epoch": 0.49, "grad_norm": 3.935753583908081, "learning_rate": 1.955239609534835e-05, "loss": 1.7, "step": 37828 }, { "epoch": 0.49, "grad_norm": 3.865382671356201, "learning_rate": 1.955236500882189e-05, "loss": 2.3185, "step": 37829 }, { "epoch": 0.49, "grad_norm": 4.094791889190674, "learning_rate": 1.9552333921240687e-05, "loss": 1.9607, "step": 37830 }, { "epoch": 0.49, "grad_norm": 3.829692840576172, "learning_rate": 1.9552302832604745e-05, "loss": 2.0086, "step": 37831 }, { "epoch": 0.49, "grad_norm": 3.8162319660186768, "learning_rate": 1.9552271742914065e-05, "loss": 1.9999, "step": 37832 }, { "epoch": 0.49, "grad_norm": 3.7935149669647217, "learning_rate": 1.9552240652168656e-05, "loss": 2.0575, "step": 37833 }, { "epoch": 0.49, "grad_norm": 3.380350112915039, "learning_rate": 1.9552209560368517e-05, "loss": 1.8837, "step": 37834 }, { "epoch": 0.49, "grad_norm": 3.8035926818847656, "learning_rate": 1.955217846751365e-05, "loss": 1.9242, "step": 37835 }, { "epoch": 0.49, "grad_norm": 3.7456603050231934, "learning_rate": 1.9552147373604062e-05, "loss": 2.1615, "step": 37836 }, { "epoch": 0.49, "grad_norm": 4.4898481369018555, "learning_rate": 1.9552116278639754e-05, "loss": 2.1942, "step": 37837 }, { "epoch": 0.49, "grad_norm": 3.462247848510742, "learning_rate": 1.9552085182620728e-05, "loss": 1.9053, "step": 37838 }, { "epoch": 0.49, "grad_norm": 3.646947145462036, "learning_rate": 1.9552054085546993e-05, "loss": 1.8, "step": 37839 }, { "epoch": 0.49, "grad_norm": 3.6386303901672363, "learning_rate": 1.955202298741855e-05, "loss": 1.6649, "step": 37840 }, { "epoch": 0.49, "grad_norm": 3.5604987144470215, "learning_rate": 1.95519918882354e-05, "loss": 1.3943, "step": 37841 }, { "epoch": 0.49, "grad_norm": 3.846219062805176, "learning_rate": 1.9551960787997545e-05, "loss": 1.7779, "step": 37842 }, { "epoch": 0.49, "grad_norm": 4.059401512145996, "learning_rate": 1.9551929686704997e-05, "loss": 2.3557, "step": 37843 }, { "epoch": 0.49, "grad_norm": 3.593076467514038, "learning_rate": 1.955189858435775e-05, "loss": 1.9729, "step": 37844 }, { "epoch": 0.49, "grad_norm": 3.4727790355682373, "learning_rate": 1.9551867480955812e-05, "loss": 1.7674, "step": 37845 }, { "epoch": 0.49, "grad_norm": 4.360080718994141, "learning_rate": 1.9551836376499186e-05, "loss": 2.1137, "step": 37846 }, { "epoch": 0.49, "grad_norm": 3.4479820728302, "learning_rate": 1.9551805270987875e-05, "loss": 1.7308, "step": 37847 }, { "epoch": 0.49, "grad_norm": 3.42154860496521, "learning_rate": 1.9551774164421886e-05, "loss": 1.6904, "step": 37848 }, { "epoch": 0.49, "grad_norm": 3.5503528118133545, "learning_rate": 1.9551743056801215e-05, "loss": 1.6572, "step": 37849 }, { "epoch": 0.49, "grad_norm": 3.269049882888794, "learning_rate": 1.955171194812587e-05, "loss": 1.7635, "step": 37850 }, { "epoch": 0.49, "grad_norm": 4.33789587020874, "learning_rate": 1.9551680838395854e-05, "loss": 2.0569, "step": 37851 }, { "epoch": 0.49, "grad_norm": 3.8458282947540283, "learning_rate": 1.955164972761117e-05, "loss": 2.1983, "step": 37852 }, { "epoch": 0.49, "grad_norm": 3.6577072143554688, "learning_rate": 1.9551618615771824e-05, "loss": 1.9335, "step": 37853 }, { "epoch": 0.49, "grad_norm": 3.9222476482391357, "learning_rate": 1.9551587502877818e-05, "loss": 2.0849, "step": 37854 }, { "epoch": 0.49, "grad_norm": 3.8450424671173096, "learning_rate": 1.955155638892915e-05, "loss": 1.8231, "step": 37855 }, { "epoch": 0.49, "grad_norm": 3.8019962310791016, "learning_rate": 1.9551525273925833e-05, "loss": 1.911, "step": 37856 }, { "epoch": 0.49, "grad_norm": 3.853729009628296, "learning_rate": 1.9551494157867864e-05, "loss": 2.6183, "step": 37857 }, { "epoch": 0.49, "grad_norm": 3.545536518096924, "learning_rate": 1.9551463040755244e-05, "loss": 1.5496, "step": 37858 }, { "epoch": 0.49, "grad_norm": 3.924501657485962, "learning_rate": 1.9551431922587987e-05, "loss": 2.0749, "step": 37859 }, { "epoch": 0.49, "grad_norm": 3.775500535964966, "learning_rate": 1.955140080336609e-05, "loss": 2.0408, "step": 37860 }, { "epoch": 0.49, "grad_norm": 3.052656888961792, "learning_rate": 1.955136968308955e-05, "loss": 1.4926, "step": 37861 }, { "epoch": 0.49, "grad_norm": 3.3215718269348145, "learning_rate": 1.955133856175838e-05, "loss": 1.7888, "step": 37862 }, { "epoch": 0.49, "grad_norm": 3.7419862747192383, "learning_rate": 1.955130743937258e-05, "loss": 1.8768, "step": 37863 }, { "epoch": 0.49, "grad_norm": 3.7402074337005615, "learning_rate": 1.9551276315932154e-05, "loss": 1.7325, "step": 37864 }, { "epoch": 0.49, "grad_norm": 3.0621514320373535, "learning_rate": 1.9551245191437107e-05, "loss": 1.5576, "step": 37865 }, { "epoch": 0.49, "grad_norm": 3.7159903049468994, "learning_rate": 1.9551214065887435e-05, "loss": 2.0831, "step": 37866 }, { "epoch": 0.49, "grad_norm": 3.799281597137451, "learning_rate": 1.955118293928315e-05, "loss": 2.2636, "step": 37867 }, { "epoch": 0.49, "grad_norm": 3.6780776977539062, "learning_rate": 1.9551151811624252e-05, "loss": 2.0055, "step": 37868 }, { "epoch": 0.49, "grad_norm": 3.7247073650360107, "learning_rate": 1.9551120682910747e-05, "loss": 1.6005, "step": 37869 }, { "epoch": 0.49, "grad_norm": 3.904531717300415, "learning_rate": 1.9551089553142635e-05, "loss": 2.518, "step": 37870 }, { "epoch": 0.49, "grad_norm": 3.443073034286499, "learning_rate": 1.955105842231992e-05, "loss": 1.8061, "step": 37871 }, { "epoch": 0.49, "grad_norm": 3.1926960945129395, "learning_rate": 1.9551027290442607e-05, "loss": 1.5989, "step": 37872 }, { "epoch": 0.49, "grad_norm": 3.6778993606567383, "learning_rate": 1.9550996157510698e-05, "loss": 1.8578, "step": 37873 }, { "epoch": 0.49, "grad_norm": 3.9866371154785156, "learning_rate": 1.9550965023524196e-05, "loss": 2.3028, "step": 37874 }, { "epoch": 0.49, "grad_norm": 3.5092499256134033, "learning_rate": 1.9550933888483107e-05, "loss": 1.5845, "step": 37875 }, { "epoch": 0.49, "grad_norm": 3.8767075538635254, "learning_rate": 1.9550902752387432e-05, "loss": 2.0617, "step": 37876 }, { "epoch": 0.49, "grad_norm": 3.3017261028289795, "learning_rate": 1.9550871615237174e-05, "loss": 1.6825, "step": 37877 }, { "epoch": 0.49, "grad_norm": 3.3986685276031494, "learning_rate": 1.955084047703234e-05, "loss": 2.027, "step": 37878 }, { "epoch": 0.49, "grad_norm": 3.7012219429016113, "learning_rate": 1.955080933777293e-05, "loss": 2.1001, "step": 37879 }, { "epoch": 0.49, "grad_norm": 3.633582353591919, "learning_rate": 1.955077819745895e-05, "loss": 1.5888, "step": 37880 }, { "epoch": 0.49, "grad_norm": 4.045535564422607, "learning_rate": 1.95507470560904e-05, "loss": 1.9802, "step": 37881 }, { "epoch": 0.49, "grad_norm": 3.3365015983581543, "learning_rate": 1.9550715913667288e-05, "loss": 1.4191, "step": 37882 }, { "epoch": 0.49, "grad_norm": 4.199318885803223, "learning_rate": 1.9550684770189612e-05, "loss": 2.4368, "step": 37883 }, { "epoch": 0.49, "grad_norm": 3.689378261566162, "learning_rate": 1.9550653625657384e-05, "loss": 2.2995, "step": 37884 }, { "epoch": 0.49, "grad_norm": 4.131054401397705, "learning_rate": 1.9550622480070592e-05, "loss": 2.4242, "step": 37885 }, { "epoch": 0.49, "grad_norm": 3.194758415222168, "learning_rate": 1.955059133342926e-05, "loss": 1.8313, "step": 37886 }, { "epoch": 0.49, "grad_norm": 3.5914039611816406, "learning_rate": 1.9550560185733373e-05, "loss": 1.8321, "step": 37887 }, { "epoch": 0.49, "grad_norm": 3.6982579231262207, "learning_rate": 1.9550529036982947e-05, "loss": 2.2303, "step": 37888 }, { "epoch": 0.49, "grad_norm": 3.749173402786255, "learning_rate": 1.9550497887177976e-05, "loss": 1.8683, "step": 37889 }, { "epoch": 0.49, "grad_norm": 3.3125529289245605, "learning_rate": 1.9550466736318472e-05, "loss": 1.5227, "step": 37890 }, { "epoch": 0.49, "grad_norm": 3.9960711002349854, "learning_rate": 1.9550435584404433e-05, "loss": 1.8795, "step": 37891 }, { "epoch": 0.49, "grad_norm": 3.460831880569458, "learning_rate": 1.955040443143586e-05, "loss": 1.6615, "step": 37892 }, { "epoch": 0.49, "grad_norm": 3.376554012298584, "learning_rate": 1.9550373277412768e-05, "loss": 2.0763, "step": 37893 }, { "epoch": 0.49, "grad_norm": 3.937723159790039, "learning_rate": 1.955034212233515e-05, "loss": 2.1235, "step": 37894 }, { "epoch": 0.49, "grad_norm": 3.7651898860931396, "learning_rate": 1.9550310966203008e-05, "loss": 1.5409, "step": 37895 }, { "epoch": 0.49, "grad_norm": 3.469974994659424, "learning_rate": 1.9550279809016354e-05, "loss": 1.7627, "step": 37896 }, { "epoch": 0.49, "grad_norm": 3.7740259170532227, "learning_rate": 1.9550248650775186e-05, "loss": 1.8953, "step": 37897 }, { "epoch": 0.49, "grad_norm": 3.7197256088256836, "learning_rate": 1.955021749147951e-05, "loss": 2.1998, "step": 37898 }, { "epoch": 0.49, "grad_norm": 3.8144290447235107, "learning_rate": 1.9550186331129323e-05, "loss": 2.105, "step": 37899 }, { "epoch": 0.49, "grad_norm": 3.0637378692626953, "learning_rate": 1.9550155169724635e-05, "loss": 1.4286, "step": 37900 }, { "epoch": 0.49, "grad_norm": 3.8032519817352295, "learning_rate": 1.9550124007265452e-05, "loss": 2.1971, "step": 37901 }, { "epoch": 0.49, "grad_norm": 3.893611431121826, "learning_rate": 1.9550092843751768e-05, "loss": 2.0523, "step": 37902 }, { "epoch": 0.49, "grad_norm": 3.5041465759277344, "learning_rate": 1.95500616791836e-05, "loss": 1.5364, "step": 37903 }, { "epoch": 0.49, "grad_norm": 3.5801262855529785, "learning_rate": 1.9550030513560934e-05, "loss": 1.9833, "step": 37904 }, { "epoch": 0.49, "grad_norm": 3.939682722091675, "learning_rate": 1.9549999346883788e-05, "loss": 2.2789, "step": 37905 }, { "epoch": 0.49, "grad_norm": 4.2635979652404785, "learning_rate": 1.9549968179152157e-05, "loss": 2.7225, "step": 37906 }, { "epoch": 0.49, "grad_norm": 3.4347007274627686, "learning_rate": 1.954993701036605e-05, "loss": 1.7513, "step": 37907 }, { "epoch": 0.49, "grad_norm": 3.7058279514312744, "learning_rate": 1.9549905840525465e-05, "loss": 1.7488, "step": 37908 }, { "epoch": 0.49, "grad_norm": 3.744379997253418, "learning_rate": 1.954987466963041e-05, "loss": 1.9847, "step": 37909 }, { "epoch": 0.49, "grad_norm": 4.40651273727417, "learning_rate": 1.954984349768089e-05, "loss": 2.6706, "step": 37910 }, { "epoch": 0.49, "grad_norm": 3.1234612464904785, "learning_rate": 1.95498123246769e-05, "loss": 1.5127, "step": 37911 }, { "epoch": 0.49, "grad_norm": 3.5415663719177246, "learning_rate": 1.9549781150618452e-05, "loss": 2.0129, "step": 37912 }, { "epoch": 0.49, "grad_norm": 3.974604845046997, "learning_rate": 1.9549749975505542e-05, "loss": 2.3036, "step": 37913 }, { "epoch": 0.49, "grad_norm": 3.6504342555999756, "learning_rate": 1.9549718799338185e-05, "loss": 1.9007, "step": 37914 }, { "epoch": 0.49, "grad_norm": 3.6842024326324463, "learning_rate": 1.954968762211637e-05, "loss": 2.0275, "step": 37915 }, { "epoch": 0.49, "grad_norm": 3.738973379135132, "learning_rate": 1.9549656443840114e-05, "loss": 1.9545, "step": 37916 }, { "epoch": 0.49, "grad_norm": 3.5431861877441406, "learning_rate": 1.9549625264509407e-05, "loss": 1.6088, "step": 37917 }, { "epoch": 0.49, "grad_norm": 3.464989423751831, "learning_rate": 1.9549594084124263e-05, "loss": 1.7435, "step": 37918 }, { "epoch": 0.49, "grad_norm": 3.746699333190918, "learning_rate": 1.954956290268468e-05, "loss": 1.7204, "step": 37919 }, { "epoch": 0.49, "grad_norm": 4.083889484405518, "learning_rate": 1.9549531720190666e-05, "loss": 2.4021, "step": 37920 }, { "epoch": 0.49, "grad_norm": 3.897045850753784, "learning_rate": 1.954950053664222e-05, "loss": 1.6799, "step": 37921 }, { "epoch": 0.49, "grad_norm": 3.700791358947754, "learning_rate": 1.9549469352039346e-05, "loss": 2.0517, "step": 37922 }, { "epoch": 0.49, "grad_norm": 3.506840944290161, "learning_rate": 1.9549438166382052e-05, "loss": 1.8675, "step": 37923 }, { "epoch": 0.49, "grad_norm": 4.192784786224365, "learning_rate": 1.9549406979670334e-05, "loss": 2.3125, "step": 37924 }, { "epoch": 0.49, "grad_norm": 4.0978779792785645, "learning_rate": 1.9549375791904202e-05, "loss": 2.1829, "step": 37925 }, { "epoch": 0.49, "grad_norm": 3.8368773460388184, "learning_rate": 1.9549344603083657e-05, "loss": 1.7477, "step": 37926 }, { "epoch": 0.49, "grad_norm": 3.3414855003356934, "learning_rate": 1.95493134132087e-05, "loss": 1.714, "step": 37927 }, { "epoch": 0.49, "grad_norm": 4.313924789428711, "learning_rate": 1.954928222227934e-05, "loss": 2.2512, "step": 37928 }, { "epoch": 0.49, "grad_norm": 4.179986476898193, "learning_rate": 1.9549251030295575e-05, "loss": 2.3953, "step": 37929 }, { "epoch": 0.49, "grad_norm": 3.7588393688201904, "learning_rate": 1.9549219837257412e-05, "loss": 2.1111, "step": 37930 }, { "epoch": 0.49, "grad_norm": 3.1681289672851562, "learning_rate": 1.9549188643164852e-05, "loss": 1.6154, "step": 37931 }, { "epoch": 0.49, "grad_norm": 3.898027181625366, "learning_rate": 1.95491574480179e-05, "loss": 1.9643, "step": 37932 }, { "epoch": 0.49, "grad_norm": 3.5779571533203125, "learning_rate": 1.954912625181656e-05, "loss": 2.0076, "step": 37933 }, { "epoch": 0.49, "grad_norm": 3.3626534938812256, "learning_rate": 1.9549095054560833e-05, "loss": 1.583, "step": 37934 }, { "epoch": 0.49, "grad_norm": 4.03540563583374, "learning_rate": 1.9549063856250723e-05, "loss": 1.8797, "step": 37935 }, { "epoch": 0.49, "grad_norm": 3.7077035903930664, "learning_rate": 1.9549032656886234e-05, "loss": 1.8731, "step": 37936 }, { "epoch": 0.49, "grad_norm": 3.7539474964141846, "learning_rate": 1.9549001456467375e-05, "loss": 1.8196, "step": 37937 }, { "epoch": 0.49, "grad_norm": 4.056121349334717, "learning_rate": 1.954897025499414e-05, "loss": 1.9711, "step": 37938 }, { "epoch": 0.49, "grad_norm": 3.664557456970215, "learning_rate": 1.9548939052466537e-05, "loss": 1.9761, "step": 37939 }, { "epoch": 0.49, "grad_norm": 3.9764835834503174, "learning_rate": 1.954890784888457e-05, "loss": 2.0434, "step": 37940 }, { "epoch": 0.49, "grad_norm": 3.345839500427246, "learning_rate": 1.954887664424824e-05, "loss": 1.6651, "step": 37941 }, { "epoch": 0.49, "grad_norm": 3.352458953857422, "learning_rate": 1.9548845438557554e-05, "loss": 1.9191, "step": 37942 }, { "epoch": 0.49, "grad_norm": 3.7382631301879883, "learning_rate": 1.9548814231812514e-05, "loss": 1.9471, "step": 37943 }, { "epoch": 0.49, "grad_norm": 4.220795631408691, "learning_rate": 1.954878302401312e-05, "loss": 2.5553, "step": 37944 }, { "epoch": 0.49, "grad_norm": 4.788362979888916, "learning_rate": 1.954875181515938e-05, "loss": 1.9764, "step": 37945 }, { "epoch": 0.49, "grad_norm": 4.319793224334717, "learning_rate": 1.95487206052513e-05, "loss": 2.1733, "step": 37946 }, { "epoch": 0.49, "grad_norm": 3.9157447814941406, "learning_rate": 1.954868939428887e-05, "loss": 2.0916, "step": 37947 }, { "epoch": 0.49, "grad_norm": 3.4548468589782715, "learning_rate": 1.954865818227211e-05, "loss": 1.7682, "step": 37948 }, { "epoch": 0.49, "grad_norm": 4.259450912475586, "learning_rate": 1.9548626969201013e-05, "loss": 1.7474, "step": 37949 }, { "epoch": 0.49, "grad_norm": 3.8404407501220703, "learning_rate": 1.9548595755075585e-05, "loss": 1.9945, "step": 37950 }, { "epoch": 0.49, "grad_norm": 3.568991184234619, "learning_rate": 1.9548564539895832e-05, "loss": 2.0616, "step": 37951 }, { "epoch": 0.49, "grad_norm": 3.494748592376709, "learning_rate": 1.9548533323661756e-05, "loss": 1.6081, "step": 37952 }, { "epoch": 0.49, "grad_norm": 3.843109369277954, "learning_rate": 1.954850210637336e-05, "loss": 1.9249, "step": 37953 }, { "epoch": 0.49, "grad_norm": 3.981501579284668, "learning_rate": 1.9548470888030645e-05, "loss": 2.2065, "step": 37954 }, { "epoch": 0.49, "grad_norm": 3.9998626708984375, "learning_rate": 1.954843966863362e-05, "loss": 2.2173, "step": 37955 }, { "epoch": 0.49, "grad_norm": 3.5866169929504395, "learning_rate": 1.9548408448182282e-05, "loss": 2.1336, "step": 37956 }, { "epoch": 0.49, "grad_norm": 3.1358249187469482, "learning_rate": 1.954837722667664e-05, "loss": 1.5085, "step": 37957 }, { "epoch": 0.49, "grad_norm": 3.21244215965271, "learning_rate": 1.954834600411669e-05, "loss": 1.729, "step": 37958 }, { "epoch": 0.49, "grad_norm": 3.3962066173553467, "learning_rate": 1.9548314780502445e-05, "loss": 1.6487, "step": 37959 }, { "epoch": 0.49, "grad_norm": 3.7835538387298584, "learning_rate": 1.9548283555833903e-05, "loss": 1.921, "step": 37960 }, { "epoch": 0.49, "grad_norm": 3.641869068145752, "learning_rate": 1.954825233011107e-05, "loss": 2.0074, "step": 37961 }, { "epoch": 0.49, "grad_norm": 3.710332155227661, "learning_rate": 1.954822110333395e-05, "loss": 2.318, "step": 37962 }, { "epoch": 0.49, "grad_norm": 4.457291603088379, "learning_rate": 1.954818987550254e-05, "loss": 1.6184, "step": 37963 }, { "epoch": 0.49, "grad_norm": 3.105976104736328, "learning_rate": 1.954815864661685e-05, "loss": 1.4279, "step": 37964 }, { "epoch": 0.49, "grad_norm": 4.137258052825928, "learning_rate": 1.9548127416676883e-05, "loss": 2.6618, "step": 37965 }, { "epoch": 0.49, "grad_norm": 3.8527824878692627, "learning_rate": 1.954809618568264e-05, "loss": 2.1328, "step": 37966 }, { "epoch": 0.49, "grad_norm": 3.2940714359283447, "learning_rate": 1.954806495363412e-05, "loss": 1.7844, "step": 37967 }, { "epoch": 0.49, "grad_norm": 4.055950164794922, "learning_rate": 1.9548033720531338e-05, "loss": 1.866, "step": 37968 }, { "epoch": 0.49, "grad_norm": 3.8952362537384033, "learning_rate": 1.9548002486374286e-05, "loss": 2.2939, "step": 37969 }, { "epoch": 0.49, "grad_norm": 3.576652765274048, "learning_rate": 1.9547971251162976e-05, "loss": 2.0996, "step": 37970 }, { "epoch": 0.49, "grad_norm": 3.280994415283203, "learning_rate": 1.9547940014897408e-05, "loss": 1.6296, "step": 37971 }, { "epoch": 0.49, "grad_norm": 3.8156325817108154, "learning_rate": 1.9547908777577582e-05, "loss": 1.944, "step": 37972 }, { "epoch": 0.49, "grad_norm": 4.478057861328125, "learning_rate": 1.954787753920351e-05, "loss": 2.5755, "step": 37973 }, { "epoch": 0.49, "grad_norm": 4.099091053009033, "learning_rate": 1.9547846299775187e-05, "loss": 1.9429, "step": 37974 }, { "epoch": 0.49, "grad_norm": 3.510784864425659, "learning_rate": 1.954781505929262e-05, "loss": 1.9085, "step": 37975 }, { "epoch": 0.49, "grad_norm": 3.57966685295105, "learning_rate": 1.9547783817755812e-05, "loss": 2.0308, "step": 37976 }, { "epoch": 0.49, "grad_norm": 3.5254461765289307, "learning_rate": 1.9547752575164768e-05, "loss": 1.6044, "step": 37977 }, { "epoch": 0.49, "grad_norm": 3.590500593185425, "learning_rate": 1.954772133151949e-05, "loss": 1.9674, "step": 37978 }, { "epoch": 0.49, "grad_norm": 3.3850178718566895, "learning_rate": 1.954769008681998e-05, "loss": 1.7167, "step": 37979 }, { "epoch": 0.49, "grad_norm": 4.341045379638672, "learning_rate": 1.9547658841066245e-05, "loss": 2.2939, "step": 37980 }, { "epoch": 0.49, "grad_norm": 3.761549949645996, "learning_rate": 1.9547627594258288e-05, "loss": 2.2007, "step": 37981 }, { "epoch": 0.49, "grad_norm": 3.666994333267212, "learning_rate": 1.9547596346396104e-05, "loss": 2.0597, "step": 37982 }, { "epoch": 0.49, "grad_norm": 4.244564533233643, "learning_rate": 1.954756509747971e-05, "loss": 1.836, "step": 37983 }, { "epoch": 0.49, "grad_norm": 3.7606353759765625, "learning_rate": 1.95475338475091e-05, "loss": 1.9163, "step": 37984 }, { "epoch": 0.49, "grad_norm": 3.75935959815979, "learning_rate": 1.9547502596484282e-05, "loss": 2.0026, "step": 37985 }, { "epoch": 0.49, "grad_norm": 3.5574495792388916, "learning_rate": 1.954747134440526e-05, "loss": 1.6236, "step": 37986 }, { "epoch": 0.49, "grad_norm": 4.0898966789245605, "learning_rate": 1.954744009127203e-05, "loss": 2.1092, "step": 37987 }, { "epoch": 0.49, "grad_norm": 3.9153778553009033, "learning_rate": 1.95474088370846e-05, "loss": 1.9265, "step": 37988 }, { "epoch": 0.49, "grad_norm": 3.626978635787964, "learning_rate": 1.954737758184298e-05, "loss": 2.0253, "step": 37989 }, { "epoch": 0.49, "grad_norm": 3.862436532974243, "learning_rate": 1.9547346325547163e-05, "loss": 2.0357, "step": 37990 }, { "epoch": 0.49, "grad_norm": 3.857785701751709, "learning_rate": 1.954731506819716e-05, "loss": 2.066, "step": 37991 }, { "epoch": 0.49, "grad_norm": 3.8279616832733154, "learning_rate": 1.954728380979297e-05, "loss": 2.1238, "step": 37992 }, { "epoch": 0.49, "grad_norm": 3.7990479469299316, "learning_rate": 1.9547252550334596e-05, "loss": 2.1255, "step": 37993 }, { "epoch": 0.49, "grad_norm": 3.7911994457244873, "learning_rate": 1.9547221289822046e-05, "loss": 2.0893, "step": 37994 }, { "epoch": 0.49, "grad_norm": 2.9650673866271973, "learning_rate": 1.954719002825532e-05, "loss": 1.2684, "step": 37995 }, { "epoch": 0.49, "grad_norm": 3.9227869510650635, "learning_rate": 1.9547158765634423e-05, "loss": 2.18, "step": 37996 }, { "epoch": 0.49, "grad_norm": 4.000967979431152, "learning_rate": 1.9547127501959356e-05, "loss": 2.02, "step": 37997 }, { "epoch": 0.49, "grad_norm": 4.1088409423828125, "learning_rate": 1.9547096237230124e-05, "loss": 1.9289, "step": 37998 }, { "epoch": 0.49, "grad_norm": 3.5793697834014893, "learning_rate": 1.9547064971446728e-05, "loss": 1.9628, "step": 37999 }, { "epoch": 0.49, "grad_norm": 4.036800384521484, "learning_rate": 1.954703370460918e-05, "loss": 1.9617, "step": 38000 }, { "epoch": 0.49, "grad_norm": 4.234034061431885, "learning_rate": 1.954700243671748e-05, "loss": 2.0487, "step": 38001 }, { "epoch": 0.49, "grad_norm": 3.9878182411193848, "learning_rate": 1.954697116777162e-05, "loss": 2.4684, "step": 38002 }, { "epoch": 0.49, "grad_norm": 4.035272121429443, "learning_rate": 1.9546939897771614e-05, "loss": 2.1858, "step": 38003 }, { "epoch": 0.49, "grad_norm": 3.776263952255249, "learning_rate": 1.9546908626717468e-05, "loss": 1.9452, "step": 38004 }, { "epoch": 0.49, "grad_norm": 3.76277232170105, "learning_rate": 1.954687735460918e-05, "loss": 1.7775, "step": 38005 }, { "epoch": 0.49, "grad_norm": 4.175325393676758, "learning_rate": 1.9546846081446754e-05, "loss": 2.4056, "step": 38006 }, { "epoch": 0.49, "grad_norm": 3.8701364994049072, "learning_rate": 1.9546814807230197e-05, "loss": 1.8711, "step": 38007 }, { "epoch": 0.49, "grad_norm": 3.654069423675537, "learning_rate": 1.9546783531959504e-05, "loss": 2.0999, "step": 38008 }, { "epoch": 0.49, "grad_norm": 4.42589807510376, "learning_rate": 1.954675225563469e-05, "loss": 2.1014, "step": 38009 }, { "epoch": 0.49, "grad_norm": 3.473803758621216, "learning_rate": 1.9546720978255747e-05, "loss": 1.779, "step": 38010 }, { "epoch": 0.49, "grad_norm": 3.0528557300567627, "learning_rate": 1.954668969982269e-05, "loss": 1.3913, "step": 38011 }, { "epoch": 0.49, "grad_norm": 3.601167678833008, "learning_rate": 1.9546658420335515e-05, "loss": 1.7559, "step": 38012 }, { "epoch": 0.49, "grad_norm": 3.7200469970703125, "learning_rate": 1.9546627139794223e-05, "loss": 1.7345, "step": 38013 }, { "epoch": 0.49, "grad_norm": 3.2861058712005615, "learning_rate": 1.9546595858198823e-05, "loss": 1.4512, "step": 38014 }, { "epoch": 0.49, "grad_norm": 2.9316282272338867, "learning_rate": 1.9546564575549318e-05, "loss": 1.4817, "step": 38015 }, { "epoch": 0.49, "grad_norm": 3.681785821914673, "learning_rate": 1.954653329184571e-05, "loss": 1.8052, "step": 38016 }, { "epoch": 0.49, "grad_norm": 3.6590137481689453, "learning_rate": 1.9546502007088004e-05, "loss": 2.0507, "step": 38017 }, { "epoch": 0.49, "grad_norm": 4.5088677406311035, "learning_rate": 1.95464707212762e-05, "loss": 2.6055, "step": 38018 }, { "epoch": 0.49, "grad_norm": 3.7040109634399414, "learning_rate": 1.9546439434410306e-05, "loss": 1.8063, "step": 38019 }, { "epoch": 0.49, "grad_norm": 3.5294487476348877, "learning_rate": 1.9546408146490325e-05, "loss": 1.9896, "step": 38020 }, { "epoch": 0.49, "grad_norm": 3.825364351272583, "learning_rate": 1.9546376857516255e-05, "loss": 1.626, "step": 38021 }, { "epoch": 0.49, "grad_norm": 4.521235942840576, "learning_rate": 1.95463455674881e-05, "loss": 2.5971, "step": 38022 }, { "epoch": 0.49, "grad_norm": 3.9562079906463623, "learning_rate": 1.954631427640587e-05, "loss": 1.8379, "step": 38023 }, { "epoch": 0.49, "grad_norm": 4.241113185882568, "learning_rate": 1.9546282984269568e-05, "loss": 2.0345, "step": 38024 }, { "epoch": 0.49, "grad_norm": 4.327300548553467, "learning_rate": 1.954625169107919e-05, "loss": 2.284, "step": 38025 }, { "epoch": 0.49, "grad_norm": 3.859797716140747, "learning_rate": 1.9546220396834747e-05, "loss": 1.928, "step": 38026 }, { "epoch": 0.49, "grad_norm": 3.667191505432129, "learning_rate": 1.9546189101536234e-05, "loss": 1.7593, "step": 38027 }, { "epoch": 0.49, "grad_norm": 4.44290018081665, "learning_rate": 1.9546157805183667e-05, "loss": 1.9701, "step": 38028 }, { "epoch": 0.49, "grad_norm": 3.6679699420928955, "learning_rate": 1.9546126507777035e-05, "loss": 2.0851, "step": 38029 }, { "epoch": 0.49, "grad_norm": 3.657169818878174, "learning_rate": 1.9546095209316355e-05, "loss": 1.971, "step": 38030 }, { "epoch": 0.49, "grad_norm": 4.215143203735352, "learning_rate": 1.9546063909801617e-05, "loss": 2.3634, "step": 38031 }, { "epoch": 0.49, "grad_norm": 3.619765043258667, "learning_rate": 1.9546032609232838e-05, "loss": 1.8864, "step": 38032 }, { "epoch": 0.49, "grad_norm": 3.6474344730377197, "learning_rate": 1.9546001307610015e-05, "loss": 1.9519, "step": 38033 }, { "epoch": 0.49, "grad_norm": 3.91384220123291, "learning_rate": 1.9545970004933147e-05, "loss": 1.8585, "step": 38034 }, { "epoch": 0.49, "grad_norm": 3.73394775390625, "learning_rate": 1.9545938701202245e-05, "loss": 2.0804, "step": 38035 }, { "epoch": 0.49, "grad_norm": 3.8628506660461426, "learning_rate": 1.954590739641731e-05, "loss": 2.1745, "step": 38036 }, { "epoch": 0.49, "grad_norm": 3.886765480041504, "learning_rate": 1.9545876090578342e-05, "loss": 2.2985, "step": 38037 }, { "epoch": 0.49, "grad_norm": 3.7793426513671875, "learning_rate": 1.9545844783685348e-05, "loss": 2.1122, "step": 38038 }, { "epoch": 0.49, "grad_norm": 3.7526910305023193, "learning_rate": 1.9545813475738332e-05, "loss": 1.8993, "step": 38039 }, { "epoch": 0.49, "grad_norm": 3.8132951259613037, "learning_rate": 1.95457821667373e-05, "loss": 2.0923, "step": 38040 }, { "epoch": 0.49, "grad_norm": 3.1540145874023438, "learning_rate": 1.9545750856682243e-05, "loss": 1.314, "step": 38041 }, { "epoch": 0.49, "grad_norm": 3.6631698608398438, "learning_rate": 1.954571954557318e-05, "loss": 1.7312, "step": 38042 }, { "epoch": 0.49, "grad_norm": 4.520535469055176, "learning_rate": 1.954568823341011e-05, "loss": 2.5452, "step": 38043 }, { "epoch": 0.49, "grad_norm": 4.380038738250732, "learning_rate": 1.9545656920193027e-05, "loss": 2.4016, "step": 38044 }, { "epoch": 0.49, "grad_norm": 3.602202892303467, "learning_rate": 1.9545625605921942e-05, "loss": 1.8712, "step": 38045 }, { "epoch": 0.49, "grad_norm": 3.7225265502929688, "learning_rate": 1.954559429059686e-05, "loss": 1.9104, "step": 38046 }, { "epoch": 0.49, "grad_norm": 3.552525281906128, "learning_rate": 1.9545562974217784e-05, "loss": 1.8929, "step": 38047 }, { "epoch": 0.49, "grad_norm": 3.497215986251831, "learning_rate": 1.954553165678471e-05, "loss": 2.0146, "step": 38048 }, { "epoch": 0.49, "grad_norm": 3.90305233001709, "learning_rate": 1.9545500338297655e-05, "loss": 2.0615, "step": 38049 }, { "epoch": 0.49, "grad_norm": 3.384160280227661, "learning_rate": 1.9545469018756613e-05, "loss": 1.6252, "step": 38050 }, { "epoch": 0.49, "grad_norm": 3.8520236015319824, "learning_rate": 1.9545437698161586e-05, "loss": 1.9362, "step": 38051 }, { "epoch": 0.49, "grad_norm": 4.089322566986084, "learning_rate": 1.9545406376512583e-05, "loss": 1.9902, "step": 38052 }, { "epoch": 0.49, "grad_norm": 3.2876999378204346, "learning_rate": 1.9545375053809607e-05, "loss": 1.7887, "step": 38053 }, { "epoch": 0.49, "grad_norm": 3.9226160049438477, "learning_rate": 1.9545343730052657e-05, "loss": 2.0565, "step": 38054 }, { "epoch": 0.49, "grad_norm": 3.8540525436401367, "learning_rate": 1.9545312405241738e-05, "loss": 2.0966, "step": 38055 }, { "epoch": 0.49, "grad_norm": 3.4297194480895996, "learning_rate": 1.954528107937686e-05, "loss": 1.6671, "step": 38056 }, { "epoch": 0.49, "grad_norm": 3.6344985961914062, "learning_rate": 1.9545249752458017e-05, "loss": 1.6737, "step": 38057 }, { "epoch": 0.49, "grad_norm": 3.6476383209228516, "learning_rate": 1.9545218424485215e-05, "loss": 1.8116, "step": 38058 }, { "epoch": 0.49, "grad_norm": 3.7060868740081787, "learning_rate": 1.954518709545846e-05, "loss": 2.0553, "step": 38059 }, { "epoch": 0.49, "grad_norm": 3.4879634380340576, "learning_rate": 1.9545155765377756e-05, "loss": 1.8186, "step": 38060 }, { "epoch": 0.49, "grad_norm": 3.925687074661255, "learning_rate": 1.9545124434243105e-05, "loss": 2.0809, "step": 38061 }, { "epoch": 0.49, "grad_norm": 3.7380974292755127, "learning_rate": 1.9545093102054508e-05, "loss": 2.0435, "step": 38062 }, { "epoch": 0.49, "grad_norm": 3.743457794189453, "learning_rate": 1.9545061768811972e-05, "loss": 2.0479, "step": 38063 }, { "epoch": 0.49, "grad_norm": 3.7686712741851807, "learning_rate": 1.95450304345155e-05, "loss": 1.9288, "step": 38064 }, { "epoch": 0.49, "grad_norm": 4.060853958129883, "learning_rate": 1.9544999099165095e-05, "loss": 2.2084, "step": 38065 }, { "epoch": 0.49, "grad_norm": 3.8338205814361572, "learning_rate": 1.9544967762760758e-05, "loss": 1.6721, "step": 38066 }, { "epoch": 0.49, "grad_norm": 3.980775833129883, "learning_rate": 1.9544936425302495e-05, "loss": 2.1427, "step": 38067 }, { "epoch": 0.49, "grad_norm": 3.912529468536377, "learning_rate": 1.954490508679031e-05, "loss": 2.392, "step": 38068 }, { "epoch": 0.49, "grad_norm": 4.525984287261963, "learning_rate": 1.954487374722421e-05, "loss": 2.3781, "step": 38069 }, { "epoch": 0.49, "grad_norm": 3.409147262573242, "learning_rate": 1.9544842406604188e-05, "loss": 1.648, "step": 38070 }, { "epoch": 0.49, "grad_norm": 3.8971681594848633, "learning_rate": 1.9544811064930252e-05, "loss": 1.9976, "step": 38071 }, { "epoch": 0.49, "grad_norm": 3.8354742527008057, "learning_rate": 1.954477972220241e-05, "loss": 2.249, "step": 38072 }, { "epoch": 0.49, "grad_norm": 4.2735595703125, "learning_rate": 1.9544748378420664e-05, "loss": 2.242, "step": 38073 }, { "epoch": 0.49, "grad_norm": 3.928635835647583, "learning_rate": 1.954471703358501e-05, "loss": 2.152, "step": 38074 }, { "epoch": 0.49, "grad_norm": 3.7426836490631104, "learning_rate": 1.9544685687695467e-05, "loss": 1.7535, "step": 38075 }, { "epoch": 0.49, "grad_norm": 3.039466381072998, "learning_rate": 1.954465434075202e-05, "loss": 1.4877, "step": 38076 }, { "epoch": 0.49, "grad_norm": 3.945917844772339, "learning_rate": 1.9544622992754686e-05, "loss": 1.9528, "step": 38077 }, { "epoch": 0.49, "grad_norm": 3.7715723514556885, "learning_rate": 1.9544591643703462e-05, "loss": 2.1082, "step": 38078 }, { "epoch": 0.49, "grad_norm": 3.295750379562378, "learning_rate": 1.954456029359835e-05, "loss": 1.5154, "step": 38079 }, { "epoch": 0.49, "grad_norm": 3.4222118854522705, "learning_rate": 1.9544528942439363e-05, "loss": 2.0113, "step": 38080 }, { "epoch": 0.49, "grad_norm": 4.5343732833862305, "learning_rate": 1.9544497590226497e-05, "loss": 2.0022, "step": 38081 }, { "epoch": 0.49, "grad_norm": 3.6234230995178223, "learning_rate": 1.9544466236959753e-05, "loss": 1.7286, "step": 38082 }, { "epoch": 0.49, "grad_norm": 3.5633842945098877, "learning_rate": 1.9544434882639138e-05, "loss": 1.72, "step": 38083 }, { "epoch": 0.49, "grad_norm": 3.6246237754821777, "learning_rate": 1.954440352726466e-05, "loss": 1.6318, "step": 38084 }, { "epoch": 0.49, "grad_norm": 4.165475845336914, "learning_rate": 1.9544372170836312e-05, "loss": 1.8245, "step": 38085 }, { "epoch": 0.49, "grad_norm": 3.4276328086853027, "learning_rate": 1.9544340813354105e-05, "loss": 1.7123, "step": 38086 }, { "epoch": 0.49, "grad_norm": 3.7533299922943115, "learning_rate": 1.9544309454818044e-05, "loss": 1.9746, "step": 38087 }, { "epoch": 0.49, "grad_norm": 3.4467484951019287, "learning_rate": 1.9544278095228128e-05, "loss": 1.6041, "step": 38088 }, { "epoch": 0.49, "grad_norm": 3.4148542881011963, "learning_rate": 1.954424673458436e-05, "loss": 1.6415, "step": 38089 }, { "epoch": 0.49, "grad_norm": 3.5101194381713867, "learning_rate": 1.954421537288675e-05, "loss": 1.8111, "step": 38090 }, { "epoch": 0.49, "grad_norm": 3.9057726860046387, "learning_rate": 1.9544184010135293e-05, "loss": 2.5154, "step": 38091 }, { "epoch": 0.49, "grad_norm": 3.919638156890869, "learning_rate": 1.9544152646329995e-05, "loss": 2.2383, "step": 38092 }, { "epoch": 0.49, "grad_norm": 3.5714166164398193, "learning_rate": 1.954412128147086e-05, "loss": 1.5887, "step": 38093 }, { "epoch": 0.49, "grad_norm": 3.4436697959899902, "learning_rate": 1.9544089915557897e-05, "loss": 1.7774, "step": 38094 }, { "epoch": 0.49, "grad_norm": 4.489534854888916, "learning_rate": 1.95440585485911e-05, "loss": 2.2223, "step": 38095 }, { "epoch": 0.49, "grad_norm": 3.7151496410369873, "learning_rate": 1.954402718057048e-05, "loss": 1.8323, "step": 38096 }, { "epoch": 0.49, "grad_norm": 4.237591743469238, "learning_rate": 1.9543995811496033e-05, "loss": 1.9351, "step": 38097 }, { "epoch": 0.49, "grad_norm": 3.947608470916748, "learning_rate": 1.9543964441367774e-05, "loss": 2.1975, "step": 38098 }, { "epoch": 0.49, "grad_norm": 4.807180881500244, "learning_rate": 1.9543933070185693e-05, "loss": 2.2646, "step": 38099 }, { "epoch": 0.49, "grad_norm": 3.4846086502075195, "learning_rate": 1.9543901697949802e-05, "loss": 1.7817, "step": 38100 }, { "epoch": 0.49, "grad_norm": 4.225709915161133, "learning_rate": 1.9543870324660104e-05, "loss": 2.3656, "step": 38101 }, { "epoch": 0.49, "grad_norm": 3.6387712955474854, "learning_rate": 1.95438389503166e-05, "loss": 2.1416, "step": 38102 }, { "epoch": 0.49, "grad_norm": 3.803441047668457, "learning_rate": 1.9543807574919293e-05, "loss": 2.4132, "step": 38103 }, { "epoch": 0.49, "grad_norm": 3.635946750640869, "learning_rate": 1.954377619846819e-05, "loss": 1.958, "step": 38104 }, { "epoch": 0.49, "grad_norm": 4.096179008483887, "learning_rate": 1.954374482096329e-05, "loss": 1.8809, "step": 38105 }, { "epoch": 0.49, "grad_norm": 3.598022699356079, "learning_rate": 1.95437134424046e-05, "loss": 1.8775, "step": 38106 }, { "epoch": 0.49, "grad_norm": 3.812635898590088, "learning_rate": 1.954368206279212e-05, "loss": 1.932, "step": 38107 }, { "epoch": 0.49, "grad_norm": 3.9125874042510986, "learning_rate": 1.9543650682125855e-05, "loss": 1.72, "step": 38108 }, { "epoch": 0.49, "grad_norm": 4.091371536254883, "learning_rate": 1.9543619300405814e-05, "loss": 2.2823, "step": 38109 }, { "epoch": 0.49, "grad_norm": 4.168206214904785, "learning_rate": 1.9543587917631993e-05, "loss": 1.8511, "step": 38110 }, { "epoch": 0.49, "grad_norm": 3.9782190322875977, "learning_rate": 1.95435565338044e-05, "loss": 2.1823, "step": 38111 }, { "epoch": 0.49, "grad_norm": 3.8145315647125244, "learning_rate": 1.954352514892303e-05, "loss": 1.8305, "step": 38112 }, { "epoch": 0.49, "grad_norm": 3.658660888671875, "learning_rate": 1.9543493762987896e-05, "loss": 1.907, "step": 38113 }, { "epoch": 0.49, "grad_norm": 3.6154468059539795, "learning_rate": 1.9543462375999e-05, "loss": 1.8673, "step": 38114 }, { "epoch": 0.49, "grad_norm": 3.4457762241363525, "learning_rate": 1.9543430987956345e-05, "loss": 1.6594, "step": 38115 }, { "epoch": 0.49, "grad_norm": 3.2073748111724854, "learning_rate": 1.954339959885993e-05, "loss": 1.7738, "step": 38116 }, { "epoch": 0.49, "grad_norm": 4.321704864501953, "learning_rate": 1.9543368208709764e-05, "loss": 2.3584, "step": 38117 }, { "epoch": 0.49, "grad_norm": 3.7864878177642822, "learning_rate": 1.9543336817505848e-05, "loss": 2.2117, "step": 38118 }, { "epoch": 0.49, "grad_norm": 3.5965676307678223, "learning_rate": 1.9543305425248185e-05, "loss": 2.0115, "step": 38119 }, { "epoch": 0.49, "grad_norm": 4.262932300567627, "learning_rate": 1.954327403193678e-05, "loss": 1.868, "step": 38120 }, { "epoch": 0.49, "grad_norm": 3.880479097366333, "learning_rate": 1.9543242637571634e-05, "loss": 2.2661, "step": 38121 }, { "epoch": 0.49, "grad_norm": 3.935044050216675, "learning_rate": 1.9543211242152753e-05, "loss": 2.1294, "step": 38122 }, { "epoch": 0.49, "grad_norm": 3.9557301998138428, "learning_rate": 1.954317984568014e-05, "loss": 1.9954, "step": 38123 }, { "epoch": 0.49, "grad_norm": 3.8225510120391846, "learning_rate": 1.9543148448153796e-05, "loss": 1.9131, "step": 38124 }, { "epoch": 0.49, "grad_norm": 3.7934396266937256, "learning_rate": 1.954311704957373e-05, "loss": 1.6045, "step": 38125 }, { "epoch": 0.49, "grad_norm": 4.031791687011719, "learning_rate": 1.9543085649939942e-05, "loss": 2.3388, "step": 38126 }, { "epoch": 0.49, "grad_norm": 3.151186466217041, "learning_rate": 1.9543054249252435e-05, "loss": 1.253, "step": 38127 }, { "epoch": 0.49, "grad_norm": 3.03303861618042, "learning_rate": 1.954302284751121e-05, "loss": 1.4936, "step": 38128 }, { "epoch": 0.49, "grad_norm": 3.3327949047088623, "learning_rate": 1.9542991444716276e-05, "loss": 1.5696, "step": 38129 }, { "epoch": 0.49, "grad_norm": 3.623060941696167, "learning_rate": 1.9542960040867632e-05, "loss": 1.7309, "step": 38130 }, { "epoch": 0.49, "grad_norm": 3.7755706310272217, "learning_rate": 1.9542928635965285e-05, "loss": 1.9812, "step": 38131 }, { "epoch": 0.49, "grad_norm": 4.225223541259766, "learning_rate": 1.9542897230009236e-05, "loss": 2.1985, "step": 38132 }, { "epoch": 0.49, "grad_norm": 4.327220439910889, "learning_rate": 1.9542865822999488e-05, "loss": 2.463, "step": 38133 }, { "epoch": 0.49, "grad_norm": 3.5800368785858154, "learning_rate": 1.954283441493605e-05, "loss": 1.5862, "step": 38134 }, { "epoch": 0.49, "grad_norm": 4.438088893890381, "learning_rate": 1.9542803005818913e-05, "loss": 2.413, "step": 38135 }, { "epoch": 0.49, "grad_norm": 3.677086591720581, "learning_rate": 1.9542771595648096e-05, "loss": 1.8135, "step": 38136 }, { "epoch": 0.49, "grad_norm": 3.5054471492767334, "learning_rate": 1.954274018442359e-05, "loss": 1.7023, "step": 38137 }, { "epoch": 0.49, "grad_norm": 4.11237096786499, "learning_rate": 1.954270877214541e-05, "loss": 2.2345, "step": 38138 }, { "epoch": 0.49, "grad_norm": 3.551193952560425, "learning_rate": 1.954267735881355e-05, "loss": 1.7042, "step": 38139 }, { "epoch": 0.49, "grad_norm": 3.778272867202759, "learning_rate": 1.9542645944428016e-05, "loss": 1.9874, "step": 38140 }, { "epoch": 0.49, "grad_norm": 3.7915759086608887, "learning_rate": 1.954261452898881e-05, "loss": 1.9242, "step": 38141 }, { "epoch": 0.49, "grad_norm": 3.759401798248291, "learning_rate": 1.954258311249594e-05, "loss": 1.7431, "step": 38142 }, { "epoch": 0.5, "grad_norm": 3.6170129776000977, "learning_rate": 1.954255169494941e-05, "loss": 1.4804, "step": 38143 }, { "epoch": 0.5, "grad_norm": 3.248680591583252, "learning_rate": 1.9542520276349215e-05, "loss": 1.6303, "step": 38144 }, { "epoch": 0.5, "grad_norm": 3.828474283218384, "learning_rate": 1.9542488856695362e-05, "loss": 1.9948, "step": 38145 }, { "epoch": 0.5, "grad_norm": 4.198757171630859, "learning_rate": 1.9542457435987862e-05, "loss": 2.2872, "step": 38146 }, { "epoch": 0.5, "grad_norm": 3.4394309520721436, "learning_rate": 1.9542426014226713e-05, "loss": 1.89, "step": 38147 }, { "epoch": 0.5, "grad_norm": 3.549492835998535, "learning_rate": 1.9542394591411914e-05, "loss": 2.0283, "step": 38148 }, { "epoch": 0.5, "grad_norm": 3.99507999420166, "learning_rate": 1.9542363167543476e-05, "loss": 2.3696, "step": 38149 }, { "epoch": 0.5, "grad_norm": 3.9922780990600586, "learning_rate": 1.9542331742621397e-05, "loss": 2.1841, "step": 38150 }, { "epoch": 0.5, "grad_norm": 3.1006100177764893, "learning_rate": 1.9542300316645684e-05, "loss": 1.5871, "step": 38151 }, { "epoch": 0.5, "grad_norm": 3.364793062210083, "learning_rate": 1.954226888961634e-05, "loss": 1.637, "step": 38152 }, { "epoch": 0.5, "grad_norm": 3.6311991214752197, "learning_rate": 1.9542237461533367e-05, "loss": 1.8962, "step": 38153 }, { "epoch": 0.5, "grad_norm": 4.043291091918945, "learning_rate": 1.9542206032396768e-05, "loss": 2.0596, "step": 38154 }, { "epoch": 0.5, "grad_norm": 4.054511547088623, "learning_rate": 1.954217460220655e-05, "loss": 2.1356, "step": 38155 }, { "epoch": 0.5, "grad_norm": 3.4447953701019287, "learning_rate": 1.9542143170962712e-05, "loss": 1.7564, "step": 38156 }, { "epoch": 0.5, "grad_norm": 3.8576483726501465, "learning_rate": 1.9542111738665257e-05, "loss": 1.8282, "step": 38157 }, { "epoch": 0.5, "grad_norm": 3.2669711112976074, "learning_rate": 1.9542080305314194e-05, "loss": 1.8969, "step": 38158 }, { "epoch": 0.5, "grad_norm": 3.8230719566345215, "learning_rate": 1.9542048870909524e-05, "loss": 2.1389, "step": 38159 }, { "epoch": 0.5, "grad_norm": 3.987774133682251, "learning_rate": 1.954201743545125e-05, "loss": 2.351, "step": 38160 }, { "epoch": 0.5, "grad_norm": 3.293161392211914, "learning_rate": 1.954198599893937e-05, "loss": 1.7787, "step": 38161 }, { "epoch": 0.5, "grad_norm": 3.76485538482666, "learning_rate": 1.9541954561373902e-05, "loss": 1.8448, "step": 38162 }, { "epoch": 0.5, "grad_norm": 3.854501247406006, "learning_rate": 1.9541923122754833e-05, "loss": 1.692, "step": 38163 }, { "epoch": 0.5, "grad_norm": 2.9902796745300293, "learning_rate": 1.9541891683082177e-05, "loss": 1.4978, "step": 38164 }, { "epoch": 0.5, "grad_norm": 3.5292856693267822, "learning_rate": 1.9541860242355937e-05, "loss": 1.8992, "step": 38165 }, { "epoch": 0.5, "grad_norm": 4.207742214202881, "learning_rate": 1.9541828800576107e-05, "loss": 2.4041, "step": 38166 }, { "epoch": 0.5, "grad_norm": 3.3851048946380615, "learning_rate": 1.9541797357742703e-05, "loss": 1.6109, "step": 38167 }, { "epoch": 0.5, "grad_norm": 4.050817489624023, "learning_rate": 1.9541765913855723e-05, "loss": 2.2959, "step": 38168 }, { "epoch": 0.5, "grad_norm": 3.9084420204162598, "learning_rate": 1.9541734468915162e-05, "loss": 1.997, "step": 38169 }, { "epoch": 0.5, "grad_norm": 4.001675128936768, "learning_rate": 1.9541703022921038e-05, "loss": 2.3145, "step": 38170 }, { "epoch": 0.5, "grad_norm": 3.35493540763855, "learning_rate": 1.9541671575873347e-05, "loss": 1.5346, "step": 38171 }, { "epoch": 0.5, "grad_norm": 3.467832565307617, "learning_rate": 1.9541640127772097e-05, "loss": 1.4368, "step": 38172 }, { "epoch": 0.5, "grad_norm": 3.5321152210235596, "learning_rate": 1.9541608678617283e-05, "loss": 1.5941, "step": 38173 }, { "epoch": 0.5, "grad_norm": 3.48047137260437, "learning_rate": 1.9541577228408916e-05, "loss": 1.6724, "step": 38174 }, { "epoch": 0.5, "grad_norm": 4.072577953338623, "learning_rate": 1.9541545777147e-05, "loss": 2.0582, "step": 38175 }, { "epoch": 0.5, "grad_norm": 4.207321643829346, "learning_rate": 1.954151432483153e-05, "loss": 2.6014, "step": 38176 }, { "epoch": 0.5, "grad_norm": 3.6720621585845947, "learning_rate": 1.954148287146252e-05, "loss": 2.179, "step": 38177 }, { "epoch": 0.5, "grad_norm": 3.885119915008545, "learning_rate": 1.9541451417039963e-05, "loss": 2.4244, "step": 38178 }, { "epoch": 0.5, "grad_norm": 3.489802598953247, "learning_rate": 1.9541419961563868e-05, "loss": 1.8531, "step": 38179 }, { "epoch": 0.5, "grad_norm": 3.143531560897827, "learning_rate": 1.9541388505034243e-05, "loss": 1.7727, "step": 38180 }, { "epoch": 0.5, "grad_norm": 4.035123825073242, "learning_rate": 1.9541357047451085e-05, "loss": 1.9127, "step": 38181 }, { "epoch": 0.5, "grad_norm": 3.6728672981262207, "learning_rate": 1.9541325588814398e-05, "loss": 2.0087, "step": 38182 }, { "epoch": 0.5, "grad_norm": 3.451538324356079, "learning_rate": 1.9541294129124185e-05, "loss": 1.6402, "step": 38183 }, { "epoch": 0.5, "grad_norm": 3.1112475395202637, "learning_rate": 1.9541262668380456e-05, "loss": 1.7925, "step": 38184 }, { "epoch": 0.5, "grad_norm": 3.817728042602539, "learning_rate": 1.9541231206583208e-05, "loss": 1.7164, "step": 38185 }, { "epoch": 0.5, "grad_norm": 3.739478588104248, "learning_rate": 1.9541199743732444e-05, "loss": 1.9276, "step": 38186 }, { "epoch": 0.5, "grad_norm": 3.7995026111602783, "learning_rate": 1.954116827982817e-05, "loss": 1.6176, "step": 38187 }, { "epoch": 0.5, "grad_norm": 3.3677384853363037, "learning_rate": 1.9541136814870395e-05, "loss": 1.621, "step": 38188 }, { "epoch": 0.5, "grad_norm": 4.008902549743652, "learning_rate": 1.954110534885911e-05, "loss": 2.2079, "step": 38189 }, { "epoch": 0.5, "grad_norm": 4.264477729797363, "learning_rate": 1.9541073881794327e-05, "loss": 2.0478, "step": 38190 }, { "epoch": 0.5, "grad_norm": 3.7444474697113037, "learning_rate": 1.9541042413676048e-05, "loss": 1.9012, "step": 38191 }, { "epoch": 0.5, "grad_norm": 4.005311489105225, "learning_rate": 1.9541010944504276e-05, "loss": 2.032, "step": 38192 }, { "epoch": 0.5, "grad_norm": 4.191834449768066, "learning_rate": 1.9540979474279017e-05, "loss": 2.3467, "step": 38193 }, { "epoch": 0.5, "grad_norm": 3.926661729812622, "learning_rate": 1.9540948003000268e-05, "loss": 1.9298, "step": 38194 }, { "epoch": 0.5, "grad_norm": 3.7313313484191895, "learning_rate": 1.9540916530668037e-05, "loss": 1.9509, "step": 38195 }, { "epoch": 0.5, "grad_norm": 3.6971957683563232, "learning_rate": 1.9540885057282328e-05, "loss": 1.875, "step": 38196 }, { "epoch": 0.5, "grad_norm": 3.7684848308563232, "learning_rate": 1.9540853582843143e-05, "loss": 1.8637, "step": 38197 }, { "epoch": 0.5, "grad_norm": 3.496495485305786, "learning_rate": 1.9540822107350487e-05, "loss": 1.7028, "step": 38198 }, { "epoch": 0.5, "grad_norm": 4.2203545570373535, "learning_rate": 1.9540790630804362e-05, "loss": 2.1652, "step": 38199 }, { "epoch": 0.5, "grad_norm": 3.396970510482788, "learning_rate": 1.9540759153204772e-05, "loss": 1.6472, "step": 38200 }, { "epoch": 0.5, "grad_norm": 3.4549052715301514, "learning_rate": 1.9540727674551718e-05, "loss": 1.9502, "step": 38201 }, { "epoch": 0.5, "grad_norm": 3.9257853031158447, "learning_rate": 1.9540696194845208e-05, "loss": 2.1782, "step": 38202 }, { "epoch": 0.5, "grad_norm": 3.727823257446289, "learning_rate": 1.9540664714085243e-05, "loss": 1.9408, "step": 38203 }, { "epoch": 0.5, "grad_norm": 3.4901890754699707, "learning_rate": 1.9540633232271827e-05, "loss": 1.9642, "step": 38204 }, { "epoch": 0.5, "grad_norm": 3.2705318927764893, "learning_rate": 1.9540601749404963e-05, "loss": 1.8085, "step": 38205 }, { "epoch": 0.5, "grad_norm": 3.7597169876098633, "learning_rate": 1.9540570265484654e-05, "loss": 2.0212, "step": 38206 }, { "epoch": 0.5, "grad_norm": 3.835904359817505, "learning_rate": 1.9540538780510904e-05, "loss": 1.7847, "step": 38207 }, { "epoch": 0.5, "grad_norm": 3.438394546508789, "learning_rate": 1.9540507294483716e-05, "loss": 2.0659, "step": 38208 }, { "epoch": 0.5, "grad_norm": 3.2464888095855713, "learning_rate": 1.95404758074031e-05, "loss": 1.5227, "step": 38209 }, { "epoch": 0.5, "grad_norm": 3.8061795234680176, "learning_rate": 1.9540444319269046e-05, "loss": 1.7931, "step": 38210 }, { "epoch": 0.5, "grad_norm": 3.6803030967712402, "learning_rate": 1.954041283008157e-05, "loss": 1.8886, "step": 38211 }, { "epoch": 0.5, "grad_norm": 3.443843364715576, "learning_rate": 1.954038133984067e-05, "loss": 1.6048, "step": 38212 }, { "epoch": 0.5, "grad_norm": 3.299542188644409, "learning_rate": 1.954034984854635e-05, "loss": 1.7067, "step": 38213 }, { "epoch": 0.5, "grad_norm": 3.589099407196045, "learning_rate": 1.954031835619861e-05, "loss": 2.0786, "step": 38214 }, { "epoch": 0.5, "grad_norm": 3.599442720413208, "learning_rate": 1.954028686279746e-05, "loss": 1.7396, "step": 38215 }, { "epoch": 0.5, "grad_norm": 3.869781970977783, "learning_rate": 1.95402553683429e-05, "loss": 2.243, "step": 38216 }, { "epoch": 0.5, "grad_norm": 4.886020660400391, "learning_rate": 1.9540223872834932e-05, "loss": 2.913, "step": 38217 }, { "epoch": 0.5, "grad_norm": 3.5445556640625, "learning_rate": 1.9540192376273565e-05, "loss": 1.8024, "step": 38218 }, { "epoch": 0.5, "grad_norm": 3.910327434539795, "learning_rate": 1.9540160878658796e-05, "loss": 2.59, "step": 38219 }, { "epoch": 0.5, "grad_norm": 4.078869342803955, "learning_rate": 1.954012937999063e-05, "loss": 1.8517, "step": 38220 }, { "epoch": 0.5, "grad_norm": 4.2487311363220215, "learning_rate": 1.9540097880269075e-05, "loss": 2.5682, "step": 38221 }, { "epoch": 0.5, "grad_norm": 4.020954608917236, "learning_rate": 1.954006637949413e-05, "loss": 2.2884, "step": 38222 }, { "epoch": 0.5, "grad_norm": 4.144458770751953, "learning_rate": 1.9540034877665805e-05, "loss": 2.059, "step": 38223 }, { "epoch": 0.5, "grad_norm": 3.9442338943481445, "learning_rate": 1.954000337478409e-05, "loss": 1.9628, "step": 38224 }, { "epoch": 0.5, "grad_norm": 3.5982577800750732, "learning_rate": 1.9539971870849003e-05, "loss": 2.0517, "step": 38225 }, { "epoch": 0.5, "grad_norm": 3.7145583629608154, "learning_rate": 1.953994036586054e-05, "loss": 1.788, "step": 38226 }, { "epoch": 0.5, "grad_norm": 4.0375776290893555, "learning_rate": 1.9539908859818704e-05, "loss": 2.4323, "step": 38227 }, { "epoch": 0.5, "grad_norm": 3.7093288898468018, "learning_rate": 1.95398773527235e-05, "loss": 2.1348, "step": 38228 }, { "epoch": 0.5, "grad_norm": 3.9269275665283203, "learning_rate": 1.9539845844574934e-05, "loss": 2.2452, "step": 38229 }, { "epoch": 0.5, "grad_norm": 4.082281112670898, "learning_rate": 1.9539814335373006e-05, "loss": 1.7699, "step": 38230 }, { "epoch": 0.5, "grad_norm": 4.442002296447754, "learning_rate": 1.9539782825117718e-05, "loss": 1.9797, "step": 38231 }, { "epoch": 0.5, "grad_norm": 3.8487963676452637, "learning_rate": 1.953975131380908e-05, "loss": 2.0135, "step": 38232 }, { "epoch": 0.5, "grad_norm": 3.8913846015930176, "learning_rate": 1.953971980144709e-05, "loss": 2.2609, "step": 38233 }, { "epoch": 0.5, "grad_norm": 4.445353984832764, "learning_rate": 1.9539688288031754e-05, "loss": 1.9755, "step": 38234 }, { "epoch": 0.5, "grad_norm": 3.712197780609131, "learning_rate": 1.9539656773563073e-05, "loss": 1.8948, "step": 38235 }, { "epoch": 0.5, "grad_norm": 3.28653883934021, "learning_rate": 1.9539625258041052e-05, "loss": 1.2421, "step": 38236 }, { "epoch": 0.5, "grad_norm": 3.755920648574829, "learning_rate": 1.9539593741465695e-05, "loss": 1.9997, "step": 38237 }, { "epoch": 0.5, "grad_norm": 3.577145576477051, "learning_rate": 1.953956222383701e-05, "loss": 1.8872, "step": 38238 }, { "epoch": 0.5, "grad_norm": 4.078617095947266, "learning_rate": 1.9539530705154986e-05, "loss": 1.8953, "step": 38239 }, { "epoch": 0.5, "grad_norm": 4.243277072906494, "learning_rate": 1.953949918541964e-05, "loss": 2.2844, "step": 38240 }, { "epoch": 0.5, "grad_norm": 3.2242720127105713, "learning_rate": 1.9539467664630973e-05, "loss": 1.8105, "step": 38241 }, { "epoch": 0.5, "grad_norm": 3.933849334716797, "learning_rate": 1.953943614278899e-05, "loss": 2.2073, "step": 38242 }, { "epoch": 0.5, "grad_norm": 3.6559269428253174, "learning_rate": 1.9539404619893685e-05, "loss": 1.9015, "step": 38243 }, { "epoch": 0.5, "grad_norm": 3.8545191287994385, "learning_rate": 1.9539373095945073e-05, "loss": 2.1246, "step": 38244 }, { "epoch": 0.5, "grad_norm": 3.1249890327453613, "learning_rate": 1.9539341570943148e-05, "loss": 1.5309, "step": 38245 }, { "epoch": 0.5, "grad_norm": 3.5633277893066406, "learning_rate": 1.953931004488792e-05, "loss": 1.6617, "step": 38246 }, { "epoch": 0.5, "grad_norm": 3.889583110809326, "learning_rate": 1.953927851777939e-05, "loss": 2.4218, "step": 38247 }, { "epoch": 0.5, "grad_norm": 3.376404047012329, "learning_rate": 1.953924698961756e-05, "loss": 1.8569, "step": 38248 }, { "epoch": 0.5, "grad_norm": 3.780630111694336, "learning_rate": 1.953921546040244e-05, "loss": 2.229, "step": 38249 }, { "epoch": 0.5, "grad_norm": 4.001833438873291, "learning_rate": 1.9539183930134026e-05, "loss": 1.8936, "step": 38250 }, { "epoch": 0.5, "grad_norm": 4.334625720977783, "learning_rate": 1.9539152398812324e-05, "loss": 2.4203, "step": 38251 }, { "epoch": 0.5, "grad_norm": 3.684478759765625, "learning_rate": 1.9539120866437336e-05, "loss": 1.8232, "step": 38252 }, { "epoch": 0.5, "grad_norm": 4.2618794441223145, "learning_rate": 1.9539089333009073e-05, "loss": 2.3938, "step": 38253 }, { "epoch": 0.5, "grad_norm": 3.0732929706573486, "learning_rate": 1.9539057798527524e-05, "loss": 1.6093, "step": 38254 }, { "epoch": 0.5, "grad_norm": 3.1924054622650146, "learning_rate": 1.9539026262992707e-05, "loss": 1.7241, "step": 38255 }, { "epoch": 0.5, "grad_norm": 3.9332354068756104, "learning_rate": 1.9538994726404618e-05, "loss": 1.9598, "step": 38256 }, { "epoch": 0.5, "grad_norm": 3.5162572860717773, "learning_rate": 1.9538963188763263e-05, "loss": 1.8277, "step": 38257 }, { "epoch": 0.5, "grad_norm": 4.159862518310547, "learning_rate": 1.9538931650068647e-05, "loss": 2.3212, "step": 38258 }, { "epoch": 0.5, "grad_norm": 3.1062405109405518, "learning_rate": 1.9538900110320766e-05, "loss": 1.5672, "step": 38259 }, { "epoch": 0.5, "grad_norm": 3.7932755947113037, "learning_rate": 1.9538868569519634e-05, "loss": 1.7034, "step": 38260 }, { "epoch": 0.5, "grad_norm": 4.211534023284912, "learning_rate": 1.9538837027665246e-05, "loss": 2.2235, "step": 38261 }, { "epoch": 0.5, "grad_norm": 3.74845814704895, "learning_rate": 1.9538805484757607e-05, "loss": 1.9621, "step": 38262 }, { "epoch": 0.5, "grad_norm": 3.4912397861480713, "learning_rate": 1.9538773940796726e-05, "loss": 1.6421, "step": 38263 }, { "epoch": 0.5, "grad_norm": 3.2507975101470947, "learning_rate": 1.95387423957826e-05, "loss": 1.4721, "step": 38264 }, { "epoch": 0.5, "grad_norm": 3.702935218811035, "learning_rate": 1.9538710849715237e-05, "loss": 1.9764, "step": 38265 }, { "epoch": 0.5, "grad_norm": 3.7979915142059326, "learning_rate": 1.9538679302594635e-05, "loss": 2.0888, "step": 38266 }, { "epoch": 0.5, "grad_norm": 3.6992499828338623, "learning_rate": 1.9538647754420802e-05, "loss": 1.6777, "step": 38267 }, { "epoch": 0.5, "grad_norm": 3.8693366050720215, "learning_rate": 1.953861620519374e-05, "loss": 2.0686, "step": 38268 }, { "epoch": 0.5, "grad_norm": 4.67280387878418, "learning_rate": 1.9538584654913456e-05, "loss": 2.1818, "step": 38269 }, { "epoch": 0.5, "grad_norm": 4.055121421813965, "learning_rate": 1.953855310357995e-05, "loss": 2.1225, "step": 38270 }, { "epoch": 0.5, "grad_norm": 3.4206647872924805, "learning_rate": 1.9538521551193223e-05, "loss": 1.7321, "step": 38271 }, { "epoch": 0.5, "grad_norm": 3.5285751819610596, "learning_rate": 1.953848999775328e-05, "loss": 2.13, "step": 38272 }, { "epoch": 0.5, "grad_norm": 2.905736207962036, "learning_rate": 1.9538458443260132e-05, "loss": 1.292, "step": 38273 }, { "epoch": 0.5, "grad_norm": 3.2348828315734863, "learning_rate": 1.9538426887713775e-05, "loss": 1.6395, "step": 38274 }, { "epoch": 0.5, "grad_norm": 4.542990684509277, "learning_rate": 1.9538395331114214e-05, "loss": 2.3943, "step": 38275 }, { "epoch": 0.5, "grad_norm": 3.284477710723877, "learning_rate": 1.953836377346145e-05, "loss": 1.5062, "step": 38276 }, { "epoch": 0.5, "grad_norm": 3.3130385875701904, "learning_rate": 1.9538332214755486e-05, "loss": 1.7969, "step": 38277 }, { "epoch": 0.5, "grad_norm": 3.6668496131896973, "learning_rate": 1.9538300654996333e-05, "loss": 1.8005, "step": 38278 }, { "epoch": 0.5, "grad_norm": 3.7580671310424805, "learning_rate": 1.953826909418399e-05, "loss": 1.9775, "step": 38279 }, { "epoch": 0.5, "grad_norm": 3.775933027267456, "learning_rate": 1.953823753231846e-05, "loss": 1.8597, "step": 38280 }, { "epoch": 0.5, "grad_norm": 4.659993648529053, "learning_rate": 1.9538205969399744e-05, "loss": 2.1541, "step": 38281 }, { "epoch": 0.5, "grad_norm": 3.8489208221435547, "learning_rate": 1.953817440542785e-05, "loss": 1.8267, "step": 38282 }, { "epoch": 0.5, "grad_norm": 3.8727424144744873, "learning_rate": 1.953814284040278e-05, "loss": 2.093, "step": 38283 }, { "epoch": 0.5, "grad_norm": 4.1441168785095215, "learning_rate": 1.9538111274324537e-05, "loss": 2.2676, "step": 38284 }, { "epoch": 0.5, "grad_norm": 3.132030487060547, "learning_rate": 1.953807970719312e-05, "loss": 1.3775, "step": 38285 }, { "epoch": 0.5, "grad_norm": 3.98366641998291, "learning_rate": 1.953804813900854e-05, "loss": 1.9534, "step": 38286 }, { "epoch": 0.5, "grad_norm": 3.7087481021881104, "learning_rate": 1.9538016569770802e-05, "loss": 1.8533, "step": 38287 }, { "epoch": 0.5, "grad_norm": 3.767777919769287, "learning_rate": 1.9537984999479902e-05, "loss": 1.9298, "step": 38288 }, { "epoch": 0.5, "grad_norm": 3.340608596801758, "learning_rate": 1.953795342813585e-05, "loss": 1.6013, "step": 38289 }, { "epoch": 0.5, "grad_norm": 3.3210530281066895, "learning_rate": 1.953792185573864e-05, "loss": 1.7983, "step": 38290 }, { "epoch": 0.5, "grad_norm": 3.8191514015197754, "learning_rate": 1.953789028228829e-05, "loss": 2.0012, "step": 38291 }, { "epoch": 0.5, "grad_norm": 3.037051200866699, "learning_rate": 1.9537858707784786e-05, "loss": 1.4013, "step": 38292 }, { "epoch": 0.5, "grad_norm": 3.826735496520996, "learning_rate": 1.9537827132228143e-05, "loss": 2.1347, "step": 38293 }, { "epoch": 0.5, "grad_norm": 3.46346378326416, "learning_rate": 1.9537795555618368e-05, "loss": 1.5225, "step": 38294 }, { "epoch": 0.5, "grad_norm": 4.015657901763916, "learning_rate": 1.9537763977955452e-05, "loss": 2.3311, "step": 38295 }, { "epoch": 0.5, "grad_norm": 3.386833667755127, "learning_rate": 1.9537732399239407e-05, "loss": 1.7818, "step": 38296 }, { "epoch": 0.5, "grad_norm": 3.76383638381958, "learning_rate": 1.9537700819470232e-05, "loss": 2.1339, "step": 38297 }, { "epoch": 0.5, "grad_norm": 3.769989252090454, "learning_rate": 1.9537669238647938e-05, "loss": 2.1172, "step": 38298 }, { "epoch": 0.5, "grad_norm": 4.096639156341553, "learning_rate": 1.953763765677252e-05, "loss": 1.9055, "step": 38299 }, { "epoch": 0.5, "grad_norm": 3.5694122314453125, "learning_rate": 1.9537606073843987e-05, "loss": 1.7428, "step": 38300 }, { "epoch": 0.5, "grad_norm": 4.337947845458984, "learning_rate": 1.9537574489862338e-05, "loss": 2.0634, "step": 38301 }, { "epoch": 0.5, "grad_norm": 3.4448044300079346, "learning_rate": 1.9537542904827583e-05, "loss": 1.9663, "step": 38302 }, { "epoch": 0.5, "grad_norm": 3.949190139770508, "learning_rate": 1.953751131873972e-05, "loss": 2.1694, "step": 38303 }, { "epoch": 0.5, "grad_norm": 3.7649033069610596, "learning_rate": 1.9537479731598748e-05, "loss": 2.565, "step": 38304 }, { "epoch": 0.5, "grad_norm": 4.3000078201293945, "learning_rate": 1.9537448143404685e-05, "loss": 2.0488, "step": 38305 }, { "epoch": 0.5, "grad_norm": 4.424418926239014, "learning_rate": 1.9537416554157524e-05, "loss": 2.0981, "step": 38306 }, { "epoch": 0.5, "grad_norm": 4.1162800788879395, "learning_rate": 1.9537384963857266e-05, "loss": 2.1779, "step": 38307 }, { "epoch": 0.5, "grad_norm": 4.0055766105651855, "learning_rate": 1.9537353372503923e-05, "loss": 1.7799, "step": 38308 }, { "epoch": 0.5, "grad_norm": 3.4222002029418945, "learning_rate": 1.9537321780097494e-05, "loss": 1.7367, "step": 38309 }, { "epoch": 0.5, "grad_norm": 4.198781490325928, "learning_rate": 1.953729018663798e-05, "loss": 2.4273, "step": 38310 }, { "epoch": 0.5, "grad_norm": 4.222722053527832, "learning_rate": 1.953725859212539e-05, "loss": 2.1075, "step": 38311 }, { "epoch": 0.5, "grad_norm": 3.632347822189331, "learning_rate": 1.9537226996559725e-05, "loss": 2.0596, "step": 38312 }, { "epoch": 0.5, "grad_norm": 3.960951805114746, "learning_rate": 1.9537195399940985e-05, "loss": 2.3001, "step": 38313 }, { "epoch": 0.5, "grad_norm": 3.5918917655944824, "learning_rate": 1.9537163802269183e-05, "loss": 1.8451, "step": 38314 }, { "epoch": 0.5, "grad_norm": 3.442103862762451, "learning_rate": 1.9537132203544308e-05, "loss": 1.7395, "step": 38315 }, { "epoch": 0.5, "grad_norm": 3.4779725074768066, "learning_rate": 1.9537100603766375e-05, "loss": 1.842, "step": 38316 }, { "epoch": 0.5, "grad_norm": 3.700056791305542, "learning_rate": 1.953706900293539e-05, "loss": 2.1798, "step": 38317 }, { "epoch": 0.5, "grad_norm": 4.1872453689575195, "learning_rate": 1.9537037401051348e-05, "loss": 2.2179, "step": 38318 }, { "epoch": 0.5, "grad_norm": 3.9733312129974365, "learning_rate": 1.9537005798114253e-05, "loss": 2.3577, "step": 38319 }, { "epoch": 0.5, "grad_norm": 3.8424274921417236, "learning_rate": 1.953697419412411e-05, "loss": 1.8257, "step": 38320 }, { "epoch": 0.5, "grad_norm": 4.5032806396484375, "learning_rate": 1.9536942589080926e-05, "loss": 2.3116, "step": 38321 }, { "epoch": 0.5, "grad_norm": 3.9736711978912354, "learning_rate": 1.95369109829847e-05, "loss": 2.2012, "step": 38322 }, { "epoch": 0.5, "grad_norm": 3.9003608226776123, "learning_rate": 1.953687937583544e-05, "loss": 2.3161, "step": 38323 }, { "epoch": 0.5, "grad_norm": 4.194135665893555, "learning_rate": 1.9536847767633144e-05, "loss": 2.3168, "step": 38324 }, { "epoch": 0.5, "grad_norm": 4.1297125816345215, "learning_rate": 1.9536816158377817e-05, "loss": 1.5982, "step": 38325 }, { "epoch": 0.5, "grad_norm": 3.4931650161743164, "learning_rate": 1.9536784548069465e-05, "loss": 1.8972, "step": 38326 }, { "epoch": 0.5, "grad_norm": 3.8529608249664307, "learning_rate": 1.953675293670809e-05, "loss": 2.2841, "step": 38327 }, { "epoch": 0.5, "grad_norm": 3.370924234390259, "learning_rate": 1.9536721324293695e-05, "loss": 1.4609, "step": 38328 }, { "epoch": 0.5, "grad_norm": 3.381901979446411, "learning_rate": 1.9536689710826287e-05, "loss": 1.6268, "step": 38329 }, { "epoch": 0.5, "grad_norm": 4.091052532196045, "learning_rate": 1.9536658096305865e-05, "loss": 2.0568, "step": 38330 }, { "epoch": 0.5, "grad_norm": 3.889582872390747, "learning_rate": 1.9536626480732434e-05, "loss": 1.8015, "step": 38331 }, { "epoch": 0.5, "grad_norm": 4.300773620605469, "learning_rate": 1.9536594864105996e-05, "loss": 1.8851, "step": 38332 }, { "epoch": 0.5, "grad_norm": 3.9898009300231934, "learning_rate": 1.953656324642656e-05, "loss": 1.9007, "step": 38333 }, { "epoch": 0.5, "grad_norm": 4.348517417907715, "learning_rate": 1.9536531627694125e-05, "loss": 1.8336, "step": 38334 }, { "epoch": 0.5, "grad_norm": 3.7410945892333984, "learning_rate": 1.953650000790869e-05, "loss": 2.1941, "step": 38335 }, { "epoch": 0.5, "grad_norm": 3.3511910438537598, "learning_rate": 1.953646838707027e-05, "loss": 2.135, "step": 38336 }, { "epoch": 0.5, "grad_norm": 3.9432075023651123, "learning_rate": 1.9536436765178858e-05, "loss": 1.9962, "step": 38337 }, { "epoch": 0.5, "grad_norm": 4.3314313888549805, "learning_rate": 1.9536405142234466e-05, "loss": 2.2152, "step": 38338 }, { "epoch": 0.5, "grad_norm": 4.002969741821289, "learning_rate": 1.953637351823709e-05, "loss": 1.9465, "step": 38339 }, { "epoch": 0.5, "grad_norm": 3.586803436279297, "learning_rate": 1.9536341893186736e-05, "loss": 1.7662, "step": 38340 }, { "epoch": 0.5, "grad_norm": 3.6385486125946045, "learning_rate": 1.953631026708341e-05, "loss": 1.9229, "step": 38341 }, { "epoch": 0.5, "grad_norm": 3.5896975994110107, "learning_rate": 1.953627863992711e-05, "loss": 1.7666, "step": 38342 }, { "epoch": 0.5, "grad_norm": 4.005734443664551, "learning_rate": 1.9536247011717847e-05, "loss": 2.2025, "step": 38343 }, { "epoch": 0.5, "grad_norm": 4.0701422691345215, "learning_rate": 1.9536215382455615e-05, "loss": 2.3731, "step": 38344 }, { "epoch": 0.5, "grad_norm": 3.5015861988067627, "learning_rate": 1.953618375214043e-05, "loss": 2.2984, "step": 38345 }, { "epoch": 0.5, "grad_norm": 3.5854036808013916, "learning_rate": 1.9536152120772285e-05, "loss": 1.803, "step": 38346 }, { "epoch": 0.5, "grad_norm": 3.6591033935546875, "learning_rate": 1.953612048835119e-05, "loss": 1.9135, "step": 38347 }, { "epoch": 0.5, "grad_norm": 3.765658378601074, "learning_rate": 1.953608885487714e-05, "loss": 1.5705, "step": 38348 }, { "epoch": 0.5, "grad_norm": 3.8192248344421387, "learning_rate": 1.9536057220350146e-05, "loss": 2.1895, "step": 38349 }, { "epoch": 0.5, "grad_norm": 3.908606767654419, "learning_rate": 1.9536025584770214e-05, "loss": 2.0059, "step": 38350 }, { "epoch": 0.5, "grad_norm": 3.651883840560913, "learning_rate": 1.953599394813734e-05, "loss": 1.8804, "step": 38351 }, { "epoch": 0.5, "grad_norm": 3.6650230884552, "learning_rate": 1.9535962310451528e-05, "loss": 2.1943, "step": 38352 }, { "epoch": 0.5, "grad_norm": 3.994408369064331, "learning_rate": 1.9535930671712786e-05, "loss": 2.3616, "step": 38353 }, { "epoch": 0.5, "grad_norm": 3.5876736640930176, "learning_rate": 1.9535899031921115e-05, "loss": 2.0914, "step": 38354 }, { "epoch": 0.5, "grad_norm": 3.430087089538574, "learning_rate": 1.953586739107652e-05, "loss": 1.4348, "step": 38355 }, { "epoch": 0.5, "grad_norm": 3.7353179454803467, "learning_rate": 1.9535835749179e-05, "loss": 1.9342, "step": 38356 }, { "epoch": 0.5, "grad_norm": 3.4376370906829834, "learning_rate": 1.9535804106228568e-05, "loss": 1.5257, "step": 38357 }, { "epoch": 0.5, "grad_norm": 3.649717330932617, "learning_rate": 1.953577246222522e-05, "loss": 1.9914, "step": 38358 }, { "epoch": 0.5, "grad_norm": 3.426056385040283, "learning_rate": 1.9535740817168957e-05, "loss": 1.7602, "step": 38359 }, { "epoch": 0.5, "grad_norm": 3.9403862953186035, "learning_rate": 1.953570917105979e-05, "loss": 2.056, "step": 38360 }, { "epoch": 0.5, "grad_norm": 3.611356019973755, "learning_rate": 1.9535677523897715e-05, "loss": 2.302, "step": 38361 }, { "epoch": 0.5, "grad_norm": 3.9142041206359863, "learning_rate": 1.9535645875682742e-05, "loss": 2.4134, "step": 38362 }, { "epoch": 0.5, "grad_norm": 3.5168051719665527, "learning_rate": 1.9535614226414873e-05, "loss": 1.9023, "step": 38363 }, { "epoch": 0.5, "grad_norm": 2.960254192352295, "learning_rate": 1.953558257609411e-05, "loss": 1.5057, "step": 38364 }, { "epoch": 0.5, "grad_norm": 3.8033831119537354, "learning_rate": 1.9535550924720457e-05, "loss": 2.0133, "step": 38365 }, { "epoch": 0.5, "grad_norm": 3.805129051208496, "learning_rate": 1.9535519272293918e-05, "loss": 2.3001, "step": 38366 }, { "epoch": 0.5, "grad_norm": 3.906195640563965, "learning_rate": 1.9535487618814493e-05, "loss": 2.0309, "step": 38367 }, { "epoch": 0.5, "grad_norm": 3.7278969287872314, "learning_rate": 1.9535455964282192e-05, "loss": 2.1542, "step": 38368 }, { "epoch": 0.5, "grad_norm": 3.9005110263824463, "learning_rate": 1.9535424308697012e-05, "loss": 2.1776, "step": 38369 }, { "epoch": 0.5, "grad_norm": 4.607151985168457, "learning_rate": 1.953539265205896e-05, "loss": 2.2642, "step": 38370 }, { "epoch": 0.5, "grad_norm": 3.3344802856445312, "learning_rate": 1.953536099436804e-05, "loss": 1.7627, "step": 38371 }, { "epoch": 0.5, "grad_norm": 4.091455936431885, "learning_rate": 1.9535329335624254e-05, "loss": 1.9026, "step": 38372 }, { "epoch": 0.5, "grad_norm": 4.2711992263793945, "learning_rate": 1.9535297675827603e-05, "loss": 2.331, "step": 38373 }, { "epoch": 0.5, "grad_norm": 3.888800859451294, "learning_rate": 1.9535266014978097e-05, "loss": 1.7488, "step": 38374 }, { "epoch": 0.5, "grad_norm": 3.524637222290039, "learning_rate": 1.9535234353075735e-05, "loss": 1.7318, "step": 38375 }, { "epoch": 0.5, "grad_norm": 3.6536941528320312, "learning_rate": 1.9535202690120522e-05, "loss": 1.6221, "step": 38376 }, { "epoch": 0.5, "grad_norm": 3.7458205223083496, "learning_rate": 1.9535171026112457e-05, "loss": 2.2135, "step": 38377 }, { "epoch": 0.5, "grad_norm": 3.461395263671875, "learning_rate": 1.9535139361051553e-05, "loss": 1.4974, "step": 38378 }, { "epoch": 0.5, "grad_norm": 3.5883021354675293, "learning_rate": 1.9535107694937805e-05, "loss": 1.9542, "step": 38379 }, { "epoch": 0.5, "grad_norm": 3.8856825828552246, "learning_rate": 1.953507602777122e-05, "loss": 1.9469, "step": 38380 }, { "epoch": 0.5, "grad_norm": 3.714925765991211, "learning_rate": 1.9535044359551797e-05, "loss": 1.9905, "step": 38381 }, { "epoch": 0.5, "grad_norm": 4.403493881225586, "learning_rate": 1.9535012690279547e-05, "loss": 2.3502, "step": 38382 }, { "epoch": 0.5, "grad_norm": 3.969665288925171, "learning_rate": 1.953498101995447e-05, "loss": 1.9696, "step": 38383 }, { "epoch": 0.5, "grad_norm": 4.09016752243042, "learning_rate": 1.9534949348576568e-05, "loss": 2.1853, "step": 38384 }, { "epoch": 0.5, "grad_norm": 3.5612432956695557, "learning_rate": 1.9534917676145847e-05, "loss": 2.0395, "step": 38385 }, { "epoch": 0.5, "grad_norm": 3.9487946033477783, "learning_rate": 1.953488600266231e-05, "loss": 2.2375, "step": 38386 }, { "epoch": 0.5, "grad_norm": 4.003391742706299, "learning_rate": 1.953485432812596e-05, "loss": 1.989, "step": 38387 }, { "epoch": 0.5, "grad_norm": 3.896571397781372, "learning_rate": 1.9534822652536796e-05, "loss": 2.1622, "step": 38388 }, { "epoch": 0.5, "grad_norm": 3.465578079223633, "learning_rate": 1.953479097589483e-05, "loss": 1.8737, "step": 38389 }, { "epoch": 0.5, "grad_norm": 3.8797011375427246, "learning_rate": 1.9534759298200062e-05, "loss": 2.2935, "step": 38390 }, { "epoch": 0.5, "grad_norm": 3.8764402866363525, "learning_rate": 1.9534727619452492e-05, "loss": 1.8733, "step": 38391 }, { "epoch": 0.5, "grad_norm": 3.978057384490967, "learning_rate": 1.9534695939652125e-05, "loss": 2.0252, "step": 38392 }, { "epoch": 0.5, "grad_norm": 3.86163592338562, "learning_rate": 1.953466425879897e-05, "loss": 2.1222, "step": 38393 }, { "epoch": 0.5, "grad_norm": 3.9876911640167236, "learning_rate": 1.9534632576893024e-05, "loss": 2.0998, "step": 38394 }, { "epoch": 0.5, "grad_norm": 3.5348472595214844, "learning_rate": 1.9534600893934294e-05, "loss": 1.7585, "step": 38395 }, { "epoch": 0.5, "grad_norm": 3.7036776542663574, "learning_rate": 1.9534569209922784e-05, "loss": 1.8556, "step": 38396 }, { "epoch": 0.5, "grad_norm": 4.357590675354004, "learning_rate": 1.953453752485849e-05, "loss": 2.1784, "step": 38397 }, { "epoch": 0.5, "grad_norm": 3.405320882797241, "learning_rate": 1.9534505838741428e-05, "loss": 1.8712, "step": 38398 }, { "epoch": 0.5, "grad_norm": 3.632683753967285, "learning_rate": 1.953447415157159e-05, "loss": 2.1752, "step": 38399 }, { "epoch": 0.5, "grad_norm": 3.829749584197998, "learning_rate": 1.9534442463348986e-05, "loss": 1.7481, "step": 38400 }, { "epoch": 0.5, "grad_norm": 4.413051128387451, "learning_rate": 1.9534410774073617e-05, "loss": 3.048, "step": 38401 }, { "epoch": 0.5, "grad_norm": 3.7837283611297607, "learning_rate": 1.9534379083745487e-05, "loss": 2.2173, "step": 38402 }, { "epoch": 0.5, "grad_norm": 4.161377906799316, "learning_rate": 1.95343473923646e-05, "loss": 2.0444, "step": 38403 }, { "epoch": 0.5, "grad_norm": 3.6325929164886475, "learning_rate": 1.9534315699930963e-05, "loss": 1.9708, "step": 38404 }, { "epoch": 0.5, "grad_norm": 4.379758358001709, "learning_rate": 1.9534284006444572e-05, "loss": 2.3447, "step": 38405 }, { "epoch": 0.5, "grad_norm": 3.3866827487945557, "learning_rate": 1.953425231190543e-05, "loss": 1.6299, "step": 38406 }, { "epoch": 0.5, "grad_norm": 3.9689953327178955, "learning_rate": 1.9534220616313554e-05, "loss": 1.9439, "step": 38407 }, { "epoch": 0.5, "grad_norm": 4.236667156219482, "learning_rate": 1.9534188919668934e-05, "loss": 2.1918, "step": 38408 }, { "epoch": 0.5, "grad_norm": 4.058906555175781, "learning_rate": 1.9534157221971576e-05, "loss": 2.3717, "step": 38409 }, { "epoch": 0.5, "grad_norm": 3.810631513595581, "learning_rate": 1.953412552322149e-05, "loss": 2.0018, "step": 38410 }, { "epoch": 0.5, "grad_norm": 3.259117841720581, "learning_rate": 1.953409382341867e-05, "loss": 1.7326, "step": 38411 }, { "epoch": 0.5, "grad_norm": 3.525273323059082, "learning_rate": 1.9534062122563126e-05, "loss": 1.721, "step": 38412 }, { "epoch": 0.5, "grad_norm": 4.43902587890625, "learning_rate": 1.9534030420654863e-05, "loss": 2.0279, "step": 38413 }, { "epoch": 0.5, "grad_norm": 3.734673261642456, "learning_rate": 1.9533998717693877e-05, "loss": 2.0542, "step": 38414 }, { "epoch": 0.5, "grad_norm": 3.2907540798187256, "learning_rate": 1.9533967013680178e-05, "loss": 1.6794, "step": 38415 }, { "epoch": 0.5, "grad_norm": 3.70350980758667, "learning_rate": 1.9533935308613766e-05, "loss": 1.4737, "step": 38416 }, { "epoch": 0.5, "grad_norm": 4.008727550506592, "learning_rate": 1.9533903602494645e-05, "loss": 2.3352, "step": 38417 }, { "epoch": 0.5, "grad_norm": 3.1420369148254395, "learning_rate": 1.953387189532282e-05, "loss": 1.5398, "step": 38418 }, { "epoch": 0.5, "grad_norm": 3.822381019592285, "learning_rate": 1.9533840187098298e-05, "loss": 2.0961, "step": 38419 }, { "epoch": 0.5, "grad_norm": 4.005392074584961, "learning_rate": 1.953380847782107e-05, "loss": 2.2791, "step": 38420 }, { "epoch": 0.5, "grad_norm": 3.673828601837158, "learning_rate": 1.9533776767491153e-05, "loss": 1.9704, "step": 38421 }, { "epoch": 0.5, "grad_norm": 4.117887020111084, "learning_rate": 1.9533745056108545e-05, "loss": 1.8314, "step": 38422 }, { "epoch": 0.5, "grad_norm": 3.906834363937378, "learning_rate": 1.9533713343673252e-05, "loss": 2.0143, "step": 38423 }, { "epoch": 0.5, "grad_norm": 3.5714614391326904, "learning_rate": 1.953368163018527e-05, "loss": 1.861, "step": 38424 }, { "epoch": 0.5, "grad_norm": 3.8768856525421143, "learning_rate": 1.953364991564461e-05, "loss": 2.0172, "step": 38425 }, { "epoch": 0.5, "grad_norm": 3.1174209117889404, "learning_rate": 1.9533618200051277e-05, "loss": 1.334, "step": 38426 }, { "epoch": 0.5, "grad_norm": 3.653730869293213, "learning_rate": 1.9533586483405268e-05, "loss": 1.6869, "step": 38427 }, { "epoch": 0.5, "grad_norm": 4.033546447753906, "learning_rate": 1.953355476570659e-05, "loss": 2.0368, "step": 38428 }, { "epoch": 0.5, "grad_norm": 3.710343360900879, "learning_rate": 1.9533523046955242e-05, "loss": 1.6508, "step": 38429 }, { "epoch": 0.5, "grad_norm": 4.092155933380127, "learning_rate": 1.9533491327151233e-05, "loss": 2.3221, "step": 38430 }, { "epoch": 0.5, "grad_norm": 4.152965545654297, "learning_rate": 1.9533459606294566e-05, "loss": 1.8954, "step": 38431 }, { "epoch": 0.5, "grad_norm": 3.033655881881714, "learning_rate": 1.9533427884385246e-05, "loss": 1.6172, "step": 38432 }, { "epoch": 0.5, "grad_norm": 3.6252825260162354, "learning_rate": 1.953339616142327e-05, "loss": 2.1772, "step": 38433 }, { "epoch": 0.5, "grad_norm": 3.9766855239868164, "learning_rate": 1.9533364437408647e-05, "loss": 2.1578, "step": 38434 }, { "epoch": 0.5, "grad_norm": 3.9000461101531982, "learning_rate": 1.953333271234138e-05, "loss": 2.228, "step": 38435 }, { "epoch": 0.5, "grad_norm": 4.105964183807373, "learning_rate": 1.953330098622147e-05, "loss": 2.2177, "step": 38436 }, { "epoch": 0.5, "grad_norm": 3.734283685684204, "learning_rate": 1.953326925904892e-05, "loss": 1.8978, "step": 38437 }, { "epoch": 0.5, "grad_norm": 3.879502534866333, "learning_rate": 1.9533237530823736e-05, "loss": 1.8438, "step": 38438 }, { "epoch": 0.5, "grad_norm": 3.775554895401001, "learning_rate": 1.953320580154592e-05, "loss": 2.403, "step": 38439 }, { "epoch": 0.5, "grad_norm": 3.3198211193084717, "learning_rate": 1.953317407121548e-05, "loss": 1.8144, "step": 38440 }, { "epoch": 0.5, "grad_norm": 3.55412220954895, "learning_rate": 1.9533142339832413e-05, "loss": 1.8062, "step": 38441 }, { "epoch": 0.5, "grad_norm": 3.4326536655426025, "learning_rate": 1.9533110607396727e-05, "loss": 1.3398, "step": 38442 }, { "epoch": 0.5, "grad_norm": 3.8935935497283936, "learning_rate": 1.9533078873908423e-05, "loss": 2.0069, "step": 38443 }, { "epoch": 0.5, "grad_norm": 3.4006028175354004, "learning_rate": 1.9533047139367504e-05, "loss": 1.4494, "step": 38444 }, { "epoch": 0.5, "grad_norm": 3.402656316757202, "learning_rate": 1.9533015403773977e-05, "loss": 1.7856, "step": 38445 }, { "epoch": 0.5, "grad_norm": 4.114616870880127, "learning_rate": 1.9532983667127846e-05, "loss": 2.5989, "step": 38446 }, { "epoch": 0.5, "grad_norm": 3.802676200866699, "learning_rate": 1.9532951929429107e-05, "loss": 1.7696, "step": 38447 }, { "epoch": 0.5, "grad_norm": 3.6063740253448486, "learning_rate": 1.953292019067777e-05, "loss": 1.6471, "step": 38448 }, { "epoch": 0.5, "grad_norm": 3.5461862087249756, "learning_rate": 1.9532888450873833e-05, "loss": 1.7156, "step": 38449 }, { "epoch": 0.5, "grad_norm": 4.7699480056762695, "learning_rate": 1.9532856710017308e-05, "loss": 2.2708, "step": 38450 }, { "epoch": 0.5, "grad_norm": 3.6267361640930176, "learning_rate": 1.9532824968108195e-05, "loss": 1.8038, "step": 38451 }, { "epoch": 0.5, "grad_norm": 4.446469783782959, "learning_rate": 1.9532793225146495e-05, "loss": 2.5217, "step": 38452 }, { "epoch": 0.5, "grad_norm": 3.737732410430908, "learning_rate": 1.953276148113221e-05, "loss": 1.736, "step": 38453 }, { "epoch": 0.5, "grad_norm": 3.8180246353149414, "learning_rate": 1.9532729736065352e-05, "loss": 1.9752, "step": 38454 }, { "epoch": 0.5, "grad_norm": 3.7679455280303955, "learning_rate": 1.9532697989945917e-05, "loss": 1.6942, "step": 38455 }, { "epoch": 0.5, "grad_norm": 3.4922423362731934, "learning_rate": 1.9532666242773907e-05, "loss": 1.4995, "step": 38456 }, { "epoch": 0.5, "grad_norm": 3.7460765838623047, "learning_rate": 1.953263449454933e-05, "loss": 1.946, "step": 38457 }, { "epoch": 0.5, "grad_norm": 2.9211182594299316, "learning_rate": 1.953260274527219e-05, "loss": 1.3448, "step": 38458 }, { "epoch": 0.5, "grad_norm": 4.5698676109313965, "learning_rate": 1.953257099494249e-05, "loss": 1.9426, "step": 38459 }, { "epoch": 0.5, "grad_norm": 3.749329090118408, "learning_rate": 1.953253924356023e-05, "loss": 1.9655, "step": 38460 }, { "epoch": 0.5, "grad_norm": 3.922956943511963, "learning_rate": 1.9532507491125416e-05, "loss": 1.8868, "step": 38461 }, { "epoch": 0.5, "grad_norm": 3.370591402053833, "learning_rate": 1.9532475737638054e-05, "loss": 1.6705, "step": 38462 }, { "epoch": 0.5, "grad_norm": 3.827212333679199, "learning_rate": 1.9532443983098145e-05, "loss": 2.2743, "step": 38463 }, { "epoch": 0.5, "grad_norm": 4.02623176574707, "learning_rate": 1.953241222750569e-05, "loss": 2.2325, "step": 38464 }, { "epoch": 0.5, "grad_norm": 4.2200541496276855, "learning_rate": 1.9532380470860694e-05, "loss": 1.7503, "step": 38465 }, { "epoch": 0.5, "grad_norm": 4.029766082763672, "learning_rate": 1.953234871316316e-05, "loss": 1.6541, "step": 38466 }, { "epoch": 0.5, "grad_norm": 4.1662445068359375, "learning_rate": 1.95323169544131e-05, "loss": 2.0933, "step": 38467 }, { "epoch": 0.5, "grad_norm": 3.6284894943237305, "learning_rate": 1.9532285194610504e-05, "loss": 1.7606, "step": 38468 }, { "epoch": 0.5, "grad_norm": 3.6760671138763428, "learning_rate": 1.9532253433755385e-05, "loss": 1.8512, "step": 38469 }, { "epoch": 0.5, "grad_norm": 3.644535779953003, "learning_rate": 1.9532221671847743e-05, "loss": 2.0293, "step": 38470 }, { "epoch": 0.5, "grad_norm": 3.985395908355713, "learning_rate": 1.9532189908887585e-05, "loss": 2.2386, "step": 38471 }, { "epoch": 0.5, "grad_norm": 3.488416910171509, "learning_rate": 1.9532158144874907e-05, "loss": 1.5955, "step": 38472 }, { "epoch": 0.5, "grad_norm": 3.620922327041626, "learning_rate": 1.953212637980972e-05, "loss": 1.8015, "step": 38473 }, { "epoch": 0.5, "grad_norm": 4.08237886428833, "learning_rate": 1.9532094613692022e-05, "loss": 2.1523, "step": 38474 }, { "epoch": 0.5, "grad_norm": 3.6209588050842285, "learning_rate": 1.9532062846521822e-05, "loss": 2.0363, "step": 38475 }, { "epoch": 0.5, "grad_norm": 4.104072093963623, "learning_rate": 1.9532031078299116e-05, "loss": 1.8904, "step": 38476 }, { "epoch": 0.5, "grad_norm": 4.262901306152344, "learning_rate": 1.9531999309023914e-05, "loss": 2.3466, "step": 38477 }, { "epoch": 0.5, "grad_norm": 4.471652507781982, "learning_rate": 1.9531967538696222e-05, "loss": 2.4005, "step": 38478 }, { "epoch": 0.5, "grad_norm": 3.8156189918518066, "learning_rate": 1.953193576731603e-05, "loss": 2.168, "step": 38479 }, { "epoch": 0.5, "grad_norm": 3.829712390899658, "learning_rate": 1.953190399488336e-05, "loss": 1.8784, "step": 38480 }, { "epoch": 0.5, "grad_norm": 4.362701416015625, "learning_rate": 1.9531872221398202e-05, "loss": 2.7578, "step": 38481 }, { "epoch": 0.5, "grad_norm": 3.7002756595611572, "learning_rate": 1.9531840446860564e-05, "loss": 2.1506, "step": 38482 }, { "epoch": 0.5, "grad_norm": 3.6344871520996094, "learning_rate": 1.953180867127045e-05, "loss": 1.6822, "step": 38483 }, { "epoch": 0.5, "grad_norm": 3.676330804824829, "learning_rate": 1.953177689462786e-05, "loss": 2.1798, "step": 38484 }, { "epoch": 0.5, "grad_norm": 4.151325702667236, "learning_rate": 1.95317451169328e-05, "loss": 2.6028, "step": 38485 }, { "epoch": 0.5, "grad_norm": 3.7618467807769775, "learning_rate": 1.9531713338185277e-05, "loss": 1.9097, "step": 38486 }, { "epoch": 0.5, "grad_norm": 3.308321475982666, "learning_rate": 1.953168155838529e-05, "loss": 1.6227, "step": 38487 }, { "epoch": 0.5, "grad_norm": 3.162921667098999, "learning_rate": 1.9531649777532846e-05, "loss": 1.6469, "step": 38488 }, { "epoch": 0.5, "grad_norm": 3.379396677017212, "learning_rate": 1.953161799562794e-05, "loss": 1.8145, "step": 38489 }, { "epoch": 0.5, "grad_norm": 4.419971942901611, "learning_rate": 1.9531586212670586e-05, "loss": 2.421, "step": 38490 }, { "epoch": 0.5, "grad_norm": 3.924123764038086, "learning_rate": 1.9531554428660778e-05, "loss": 1.7883, "step": 38491 }, { "epoch": 0.5, "grad_norm": 3.1712005138397217, "learning_rate": 1.953152264359853e-05, "loss": 1.5434, "step": 38492 }, { "epoch": 0.5, "grad_norm": 3.9926445484161377, "learning_rate": 1.953149085748384e-05, "loss": 2.2354, "step": 38493 }, { "epoch": 0.5, "grad_norm": 3.89452862739563, "learning_rate": 1.953145907031671e-05, "loss": 2.2106, "step": 38494 }, { "epoch": 0.5, "grad_norm": 4.0644378662109375, "learning_rate": 1.9531427282097145e-05, "loss": 2.373, "step": 38495 }, { "epoch": 0.5, "grad_norm": 3.767749547958374, "learning_rate": 1.953139549282515e-05, "loss": 2.2537, "step": 38496 }, { "epoch": 0.5, "grad_norm": 4.302185535430908, "learning_rate": 1.9531363702500727e-05, "loss": 2.4185, "step": 38497 }, { "epoch": 0.5, "grad_norm": 3.8441202640533447, "learning_rate": 1.953133191112388e-05, "loss": 1.9238, "step": 38498 }, { "epoch": 0.5, "grad_norm": 4.102428913116455, "learning_rate": 1.9531300118694612e-05, "loss": 1.7707, "step": 38499 }, { "epoch": 0.5, "grad_norm": 4.504157066345215, "learning_rate": 1.9531268325212926e-05, "loss": 1.7717, "step": 38500 }, { "epoch": 0.5, "grad_norm": 3.442544937133789, "learning_rate": 1.953123653067883e-05, "loss": 1.884, "step": 38501 }, { "epoch": 0.5, "grad_norm": 3.870868682861328, "learning_rate": 1.9531204735092317e-05, "loss": 2.1388, "step": 38502 }, { "epoch": 0.5, "grad_norm": 4.023581504821777, "learning_rate": 1.9531172938453403e-05, "loss": 2.2167, "step": 38503 }, { "epoch": 0.5, "grad_norm": 3.365598440170288, "learning_rate": 1.9531141140762082e-05, "loss": 1.9405, "step": 38504 }, { "epoch": 0.5, "grad_norm": 4.1139984130859375, "learning_rate": 1.9531109342018363e-05, "loss": 2.1256, "step": 38505 }, { "epoch": 0.5, "grad_norm": 3.9576785564422607, "learning_rate": 1.953107754222225e-05, "loss": 2.1093, "step": 38506 }, { "epoch": 0.5, "grad_norm": 4.231406211853027, "learning_rate": 1.953104574137374e-05, "loss": 2.6573, "step": 38507 }, { "epoch": 0.5, "grad_norm": 3.0909438133239746, "learning_rate": 1.9531013939472847e-05, "loss": 1.6251, "step": 38508 }, { "epoch": 0.5, "grad_norm": 3.4042437076568604, "learning_rate": 1.953098213651956e-05, "loss": 1.7603, "step": 38509 }, { "epoch": 0.5, "grad_norm": 3.612938165664673, "learning_rate": 1.9530950332513896e-05, "loss": 1.9798, "step": 38510 }, { "epoch": 0.5, "grad_norm": 4.266641616821289, "learning_rate": 1.953091852745585e-05, "loss": 1.913, "step": 38511 }, { "epoch": 0.5, "grad_norm": 3.555065631866455, "learning_rate": 1.9530886721345436e-05, "loss": 1.8611, "step": 38512 }, { "epoch": 0.5, "grad_norm": 3.5516750812530518, "learning_rate": 1.9530854914182643e-05, "loss": 1.8833, "step": 38513 }, { "epoch": 0.5, "grad_norm": 3.419759750366211, "learning_rate": 1.9530823105967487e-05, "loss": 1.7331, "step": 38514 }, { "epoch": 0.5, "grad_norm": 3.4173829555511475, "learning_rate": 1.9530791296699966e-05, "loss": 1.6606, "step": 38515 }, { "epoch": 0.5, "grad_norm": 3.797837972640991, "learning_rate": 1.9530759486380078e-05, "loss": 2.0619, "step": 38516 }, { "epoch": 0.5, "grad_norm": 4.392108917236328, "learning_rate": 1.953072767500784e-05, "loss": 1.9686, "step": 38517 }, { "epoch": 0.5, "grad_norm": 4.454631805419922, "learning_rate": 1.9530695862583245e-05, "loss": 2.109, "step": 38518 }, { "epoch": 0.5, "grad_norm": 4.041829586029053, "learning_rate": 1.95306640491063e-05, "loss": 1.9835, "step": 38519 }, { "epoch": 0.5, "grad_norm": 4.832575798034668, "learning_rate": 1.9530632234577006e-05, "loss": 2.5011, "step": 38520 }, { "epoch": 0.5, "grad_norm": 3.549760103225708, "learning_rate": 1.953060041899537e-05, "loss": 1.8895, "step": 38521 }, { "epoch": 0.5, "grad_norm": 3.781721591949463, "learning_rate": 1.9530568602361393e-05, "loss": 2.1371, "step": 38522 }, { "epoch": 0.5, "grad_norm": 3.461838960647583, "learning_rate": 1.9530536784675077e-05, "loss": 1.8284, "step": 38523 }, { "epoch": 0.5, "grad_norm": 3.5057458877563477, "learning_rate": 1.9530504965936434e-05, "loss": 1.4902, "step": 38524 }, { "epoch": 0.5, "grad_norm": 3.897338628768921, "learning_rate": 1.953047314614546e-05, "loss": 1.9413, "step": 38525 }, { "epoch": 0.5, "grad_norm": 3.2651565074920654, "learning_rate": 1.9530441325302157e-05, "loss": 1.6166, "step": 38526 }, { "epoch": 0.5, "grad_norm": 3.8876914978027344, "learning_rate": 1.9530409503406534e-05, "loss": 1.9599, "step": 38527 }, { "epoch": 0.5, "grad_norm": 3.9428060054779053, "learning_rate": 1.953037768045859e-05, "loss": 1.7618, "step": 38528 }, { "epoch": 0.5, "grad_norm": 3.8220231533050537, "learning_rate": 1.9530345856458335e-05, "loss": 2.3888, "step": 38529 }, { "epoch": 0.5, "grad_norm": 4.202641487121582, "learning_rate": 1.9530314031405762e-05, "loss": 2.0246, "step": 38530 }, { "epoch": 0.5, "grad_norm": 3.5503933429718018, "learning_rate": 1.9530282205300887e-05, "loss": 1.811, "step": 38531 }, { "epoch": 0.5, "grad_norm": 3.887909412384033, "learning_rate": 1.9530250378143705e-05, "loss": 2.1396, "step": 38532 }, { "epoch": 0.5, "grad_norm": 3.3637657165527344, "learning_rate": 1.953021854993422e-05, "loss": 1.518, "step": 38533 }, { "epoch": 0.5, "grad_norm": 3.302957773208618, "learning_rate": 1.953018672067244e-05, "loss": 1.7204, "step": 38534 }, { "epoch": 0.5, "grad_norm": 4.242741107940674, "learning_rate": 1.9530154890358363e-05, "loss": 2.3547, "step": 38535 }, { "epoch": 0.5, "grad_norm": 4.12303352355957, "learning_rate": 1.9530123058991997e-05, "loss": 2.4238, "step": 38536 }, { "epoch": 0.5, "grad_norm": 3.6943037509918213, "learning_rate": 1.953009122657334e-05, "loss": 2.1149, "step": 38537 }, { "epoch": 0.5, "grad_norm": 3.8188350200653076, "learning_rate": 1.9530059393102403e-05, "loss": 1.9607, "step": 38538 }, { "epoch": 0.5, "grad_norm": 3.6941120624542236, "learning_rate": 1.9530027558579182e-05, "loss": 1.9551, "step": 38539 }, { "epoch": 0.5, "grad_norm": 3.377598524093628, "learning_rate": 1.952999572300369e-05, "loss": 1.4833, "step": 38540 }, { "epoch": 0.5, "grad_norm": 3.89726185798645, "learning_rate": 1.9529963886375923e-05, "loss": 1.9991, "step": 38541 }, { "epoch": 0.5, "grad_norm": 3.730623722076416, "learning_rate": 1.9529932048695882e-05, "loss": 2.0847, "step": 38542 }, { "epoch": 0.5, "grad_norm": 4.3848042488098145, "learning_rate": 1.9529900209963578e-05, "loss": 1.9096, "step": 38543 }, { "epoch": 0.5, "grad_norm": 3.8640079498291016, "learning_rate": 1.9529868370179012e-05, "loss": 1.8891, "step": 38544 }, { "epoch": 0.5, "grad_norm": 3.32419490814209, "learning_rate": 1.9529836529342184e-05, "loss": 1.5428, "step": 38545 }, { "epoch": 0.5, "grad_norm": 3.843827724456787, "learning_rate": 1.9529804687453104e-05, "loss": 1.8456, "step": 38546 }, { "epoch": 0.5, "grad_norm": 3.5107014179229736, "learning_rate": 1.952977284451177e-05, "loss": 1.9458, "step": 38547 }, { "epoch": 0.5, "grad_norm": 3.5341851711273193, "learning_rate": 1.952974100051819e-05, "loss": 1.8919, "step": 38548 }, { "epoch": 0.5, "grad_norm": 4.045388221740723, "learning_rate": 1.9529709155472364e-05, "loss": 2.2493, "step": 38549 }, { "epoch": 0.5, "grad_norm": 3.941943407058716, "learning_rate": 1.9529677309374296e-05, "loss": 2.066, "step": 38550 }, { "epoch": 0.5, "grad_norm": 4.138535499572754, "learning_rate": 1.952964546222399e-05, "loss": 2.405, "step": 38551 }, { "epoch": 0.5, "grad_norm": 3.6329188346862793, "learning_rate": 1.9529613614021447e-05, "loss": 1.9907, "step": 38552 }, { "epoch": 0.5, "grad_norm": 4.275214195251465, "learning_rate": 1.9529581764766673e-05, "loss": 2.0994, "step": 38553 }, { "epoch": 0.5, "grad_norm": 3.983786106109619, "learning_rate": 1.9529549914459674e-05, "loss": 1.8824, "step": 38554 }, { "epoch": 0.5, "grad_norm": 3.577476978302002, "learning_rate": 1.9529518063100447e-05, "loss": 1.8794, "step": 38555 }, { "epoch": 0.5, "grad_norm": 3.784968137741089, "learning_rate": 1.9529486210689005e-05, "loss": 2.1609, "step": 38556 }, { "epoch": 0.5, "grad_norm": 3.574263334274292, "learning_rate": 1.9529454357225345e-05, "loss": 1.8903, "step": 38557 }, { "epoch": 0.5, "grad_norm": 3.6281070709228516, "learning_rate": 1.952942250270947e-05, "loss": 1.6739, "step": 38558 }, { "epoch": 0.5, "grad_norm": 4.120838165283203, "learning_rate": 1.9529390647141384e-05, "loss": 1.9039, "step": 38559 }, { "epoch": 0.5, "grad_norm": 3.041738510131836, "learning_rate": 1.9529358790521093e-05, "loss": 1.4177, "step": 38560 }, { "epoch": 0.5, "grad_norm": 4.008296012878418, "learning_rate": 1.95293269328486e-05, "loss": 2.2871, "step": 38561 }, { "epoch": 0.5, "grad_norm": 4.191689968109131, "learning_rate": 1.9529295074123907e-05, "loss": 2.4737, "step": 38562 }, { "epoch": 0.5, "grad_norm": 4.027674674987793, "learning_rate": 1.9529263214347015e-05, "loss": 2.41, "step": 38563 }, { "epoch": 0.5, "grad_norm": 3.6950807571411133, "learning_rate": 1.9529231353517936e-05, "loss": 2.0996, "step": 38564 }, { "epoch": 0.5, "grad_norm": 3.65022349357605, "learning_rate": 1.9529199491636666e-05, "loss": 1.7728, "step": 38565 }, { "epoch": 0.5, "grad_norm": 3.560915470123291, "learning_rate": 1.952916762870321e-05, "loss": 1.8385, "step": 38566 }, { "epoch": 0.5, "grad_norm": 5.029085636138916, "learning_rate": 1.9529135764717572e-05, "loss": 2.4776, "step": 38567 }, { "epoch": 0.5, "grad_norm": 3.376448154449463, "learning_rate": 1.9529103899679756e-05, "loss": 1.6831, "step": 38568 }, { "epoch": 0.5, "grad_norm": 3.4489095211029053, "learning_rate": 1.9529072033589765e-05, "loss": 1.7421, "step": 38569 }, { "epoch": 0.5, "grad_norm": 3.5831801891326904, "learning_rate": 1.9529040166447607e-05, "loss": 1.7637, "step": 38570 }, { "epoch": 0.5, "grad_norm": 4.063827991485596, "learning_rate": 1.9529008298253275e-05, "loss": 2.0564, "step": 38571 }, { "epoch": 0.5, "grad_norm": 3.828998327255249, "learning_rate": 1.9528976429006782e-05, "loss": 1.9222, "step": 38572 }, { "epoch": 0.5, "grad_norm": 3.543475389480591, "learning_rate": 1.952894455870813e-05, "loss": 2.1192, "step": 38573 }, { "epoch": 0.5, "grad_norm": 3.795090913772583, "learning_rate": 1.9528912687357318e-05, "loss": 1.8673, "step": 38574 }, { "epoch": 0.5, "grad_norm": 3.615807056427002, "learning_rate": 1.9528880814954353e-05, "loss": 1.9796, "step": 38575 }, { "epoch": 0.5, "grad_norm": 3.5405631065368652, "learning_rate": 1.9528848941499235e-05, "loss": 2.0568, "step": 38576 }, { "epoch": 0.5, "grad_norm": 3.341233730316162, "learning_rate": 1.9528817066991976e-05, "loss": 1.6303, "step": 38577 }, { "epoch": 0.5, "grad_norm": 4.528971195220947, "learning_rate": 1.952878519143257e-05, "loss": 2.1254, "step": 38578 }, { "epoch": 0.5, "grad_norm": 3.4951608180999756, "learning_rate": 1.9528753314821025e-05, "loss": 1.9734, "step": 38579 }, { "epoch": 0.5, "grad_norm": 4.207263946533203, "learning_rate": 1.952872143715735e-05, "loss": 2.2596, "step": 38580 }, { "epoch": 0.5, "grad_norm": 3.9444799423217773, "learning_rate": 1.9528689558441532e-05, "loss": 1.7435, "step": 38581 }, { "epoch": 0.5, "grad_norm": 3.4726107120513916, "learning_rate": 1.9528657678673593e-05, "loss": 1.9116, "step": 38582 }, { "epoch": 0.5, "grad_norm": 3.5459072589874268, "learning_rate": 1.9528625797853523e-05, "loss": 1.9934, "step": 38583 }, { "epoch": 0.5, "grad_norm": 4.111974716186523, "learning_rate": 1.9528593915981337e-05, "loss": 2.0466, "step": 38584 }, { "epoch": 0.5, "grad_norm": 3.4922242164611816, "learning_rate": 1.9528562033057027e-05, "loss": 1.9899, "step": 38585 }, { "epoch": 0.5, "grad_norm": 3.943828582763672, "learning_rate": 1.9528530149080604e-05, "loss": 2.1793, "step": 38586 }, { "epoch": 0.5, "grad_norm": 3.211900472640991, "learning_rate": 1.952849826405207e-05, "loss": 1.675, "step": 38587 }, { "epoch": 0.5, "grad_norm": 4.884523391723633, "learning_rate": 1.9528466377971432e-05, "loss": 2.404, "step": 38588 }, { "epoch": 0.5, "grad_norm": 3.5692269802093506, "learning_rate": 1.9528434490838687e-05, "loss": 2.1262, "step": 38589 }, { "epoch": 0.5, "grad_norm": 4.110855579376221, "learning_rate": 1.9528402602653838e-05, "loss": 2.2354, "step": 38590 }, { "epoch": 0.5, "grad_norm": 3.382422924041748, "learning_rate": 1.9528370713416894e-05, "loss": 1.4286, "step": 38591 }, { "epoch": 0.5, "grad_norm": 4.0957207679748535, "learning_rate": 1.9528338823127856e-05, "loss": 1.8162, "step": 38592 }, { "epoch": 0.5, "grad_norm": 4.371449947357178, "learning_rate": 1.952830693178673e-05, "loss": 2.0201, "step": 38593 }, { "epoch": 0.5, "grad_norm": 4.339852333068848, "learning_rate": 1.9528275039393513e-05, "loss": 1.953, "step": 38594 }, { "epoch": 0.5, "grad_norm": 3.4786453247070312, "learning_rate": 1.9528243145948217e-05, "loss": 1.8387, "step": 38595 }, { "epoch": 0.5, "grad_norm": 4.335960388183594, "learning_rate": 1.9528211251450838e-05, "loss": 2.37, "step": 38596 }, { "epoch": 0.5, "grad_norm": 3.9321205615997314, "learning_rate": 1.9528179355901384e-05, "loss": 2.2902, "step": 38597 }, { "epoch": 0.5, "grad_norm": 3.903989315032959, "learning_rate": 1.9528147459299857e-05, "loss": 1.9065, "step": 38598 }, { "epoch": 0.5, "grad_norm": 3.497770309448242, "learning_rate": 1.952811556164626e-05, "loss": 1.8626, "step": 38599 }, { "epoch": 0.5, "grad_norm": 3.19502329826355, "learning_rate": 1.95280836629406e-05, "loss": 1.5327, "step": 38600 }, { "epoch": 0.5, "grad_norm": 3.9132015705108643, "learning_rate": 1.9528051763182875e-05, "loss": 1.866, "step": 38601 }, { "epoch": 0.5, "grad_norm": 4.100709438323975, "learning_rate": 1.9528019862373092e-05, "loss": 2.1869, "step": 38602 }, { "epoch": 0.5, "grad_norm": 4.227596759796143, "learning_rate": 1.9527987960511257e-05, "loss": 2.6766, "step": 38603 }, { "epoch": 0.5, "grad_norm": 3.8338124752044678, "learning_rate": 1.952795605759737e-05, "loss": 2.0323, "step": 38604 }, { "epoch": 0.5, "grad_norm": 3.738236665725708, "learning_rate": 1.9527924153631433e-05, "loss": 2.1903, "step": 38605 }, { "epoch": 0.5, "grad_norm": 3.738342761993408, "learning_rate": 1.952789224861345e-05, "loss": 2.1283, "step": 38606 }, { "epoch": 0.5, "grad_norm": 3.535921096801758, "learning_rate": 1.952786034254343e-05, "loss": 1.9315, "step": 38607 }, { "epoch": 0.5, "grad_norm": 4.035961151123047, "learning_rate": 1.952782843542137e-05, "loss": 1.789, "step": 38608 }, { "epoch": 0.5, "grad_norm": 3.7567293643951416, "learning_rate": 1.9527796527247278e-05, "loss": 2.075, "step": 38609 }, { "epoch": 0.5, "grad_norm": 3.989900827407837, "learning_rate": 1.9527764618021155e-05, "loss": 2.067, "step": 38610 }, { "epoch": 0.5, "grad_norm": 3.8850135803222656, "learning_rate": 1.9527732707743e-05, "loss": 2.0043, "step": 38611 }, { "epoch": 0.5, "grad_norm": 4.072002410888672, "learning_rate": 1.9527700796412828e-05, "loss": 1.9614, "step": 38612 }, { "epoch": 0.5, "grad_norm": 3.694380760192871, "learning_rate": 1.9527668884030633e-05, "loss": 1.7671, "step": 38613 }, { "epoch": 0.5, "grad_norm": 3.588406562805176, "learning_rate": 1.9527636970596427e-05, "loss": 2.0048, "step": 38614 }, { "epoch": 0.5, "grad_norm": 4.682444095611572, "learning_rate": 1.9527605056110203e-05, "loss": 2.0906, "step": 38615 }, { "epoch": 0.5, "grad_norm": 3.8331823348999023, "learning_rate": 1.952757314057197e-05, "loss": 1.7134, "step": 38616 }, { "epoch": 0.5, "grad_norm": 3.2895259857177734, "learning_rate": 1.9527541223981733e-05, "loss": 1.7473, "step": 38617 }, { "epoch": 0.5, "grad_norm": 3.9451165199279785, "learning_rate": 1.9527509306339495e-05, "loss": 1.6661, "step": 38618 }, { "epoch": 0.5, "grad_norm": 4.133915901184082, "learning_rate": 1.9527477387645254e-05, "loss": 2.1102, "step": 38619 }, { "epoch": 0.5, "grad_norm": 4.161876678466797, "learning_rate": 1.952744546789902e-05, "loss": 1.8756, "step": 38620 }, { "epoch": 0.5, "grad_norm": 3.9287734031677246, "learning_rate": 1.9527413547100797e-05, "loss": 2.1602, "step": 38621 }, { "epoch": 0.5, "grad_norm": 4.501206398010254, "learning_rate": 1.9527381625250584e-05, "loss": 2.4596, "step": 38622 }, { "epoch": 0.5, "grad_norm": 4.169622421264648, "learning_rate": 1.9527349702348382e-05, "loss": 2.0996, "step": 38623 }, { "epoch": 0.5, "grad_norm": 3.6980648040771484, "learning_rate": 1.9527317778394207e-05, "loss": 1.9546, "step": 38624 }, { "epoch": 0.5, "grad_norm": 3.8646891117095947, "learning_rate": 1.952728585338805e-05, "loss": 2.0381, "step": 38625 }, { "epoch": 0.5, "grad_norm": 4.036166191101074, "learning_rate": 1.952725392732992e-05, "loss": 1.9093, "step": 38626 }, { "epoch": 0.5, "grad_norm": 3.2416696548461914, "learning_rate": 1.9527222000219817e-05, "loss": 1.6613, "step": 38627 }, { "epoch": 0.5, "grad_norm": 4.015298366546631, "learning_rate": 1.9527190072057748e-05, "loss": 2.2781, "step": 38628 }, { "epoch": 0.5, "grad_norm": 3.3573594093322754, "learning_rate": 1.952715814284372e-05, "loss": 1.7512, "step": 38629 }, { "epoch": 0.5, "grad_norm": 3.67728853225708, "learning_rate": 1.952712621257773e-05, "loss": 1.9017, "step": 38630 }, { "epoch": 0.5, "grad_norm": 3.3326990604400635, "learning_rate": 1.952709428125978e-05, "loss": 1.5156, "step": 38631 }, { "epoch": 0.5, "grad_norm": 3.800487756729126, "learning_rate": 1.952706234888988e-05, "loss": 1.8655, "step": 38632 }, { "epoch": 0.5, "grad_norm": 3.973355770111084, "learning_rate": 1.952703041546803e-05, "loss": 2.2285, "step": 38633 }, { "epoch": 0.5, "grad_norm": 3.514509677886963, "learning_rate": 1.9526998480994235e-05, "loss": 1.7829, "step": 38634 }, { "epoch": 0.5, "grad_norm": 4.0032525062561035, "learning_rate": 1.9526966545468498e-05, "loss": 2.2051, "step": 38635 }, { "epoch": 0.5, "grad_norm": 3.7910995483398438, "learning_rate": 1.952693460889082e-05, "loss": 2.2553, "step": 38636 }, { "epoch": 0.5, "grad_norm": 3.6871161460876465, "learning_rate": 1.952690267126121e-05, "loss": 2.0352, "step": 38637 }, { "epoch": 0.5, "grad_norm": 3.7131154537200928, "learning_rate": 1.9526870732579666e-05, "loss": 1.7388, "step": 38638 }, { "epoch": 0.5, "grad_norm": 3.094855785369873, "learning_rate": 1.9526838792846194e-05, "loss": 1.4778, "step": 38639 }, { "epoch": 0.5, "grad_norm": 3.478335380554199, "learning_rate": 1.95268068520608e-05, "loss": 1.782, "step": 38640 }, { "epoch": 0.5, "grad_norm": 4.414640426635742, "learning_rate": 1.9526774910223482e-05, "loss": 2.6029, "step": 38641 }, { "epoch": 0.5, "grad_norm": 3.4392683506011963, "learning_rate": 1.9526742967334247e-05, "loss": 1.697, "step": 38642 }, { "epoch": 0.5, "grad_norm": 3.312729597091675, "learning_rate": 1.9526711023393096e-05, "loss": 1.8156, "step": 38643 }, { "epoch": 0.5, "grad_norm": 3.4348104000091553, "learning_rate": 1.9526679078400038e-05, "loss": 1.9068, "step": 38644 }, { "epoch": 0.5, "grad_norm": 4.171570301055908, "learning_rate": 1.9526647132355073e-05, "loss": 2.4572, "step": 38645 }, { "epoch": 0.5, "grad_norm": 4.0450663566589355, "learning_rate": 1.9526615185258203e-05, "loss": 2.256, "step": 38646 }, { "epoch": 0.5, "grad_norm": 3.057882308959961, "learning_rate": 1.9526583237109432e-05, "loss": 1.55, "step": 38647 }, { "epoch": 0.5, "grad_norm": 3.713160276412964, "learning_rate": 1.9526551287908765e-05, "loss": 2.2176, "step": 38648 }, { "epoch": 0.5, "grad_norm": 3.4590089321136475, "learning_rate": 1.9526519337656205e-05, "loss": 1.7456, "step": 38649 }, { "epoch": 0.5, "grad_norm": 3.880913019180298, "learning_rate": 1.952648738635176e-05, "loss": 2.2383, "step": 38650 }, { "epoch": 0.5, "grad_norm": 4.190412521362305, "learning_rate": 1.9526455433995424e-05, "loss": 2.4491, "step": 38651 }, { "epoch": 0.5, "grad_norm": 3.636960983276367, "learning_rate": 1.9526423480587205e-05, "loss": 2.1377, "step": 38652 }, { "epoch": 0.5, "grad_norm": 3.7844889163970947, "learning_rate": 1.9526391526127114e-05, "loss": 1.9458, "step": 38653 }, { "epoch": 0.5, "grad_norm": 3.387033462524414, "learning_rate": 1.952635957061514e-05, "loss": 1.9087, "step": 38654 }, { "epoch": 0.5, "grad_norm": 3.8590500354766846, "learning_rate": 1.95263276140513e-05, "loss": 1.8468, "step": 38655 }, { "epoch": 0.5, "grad_norm": 3.885406970977783, "learning_rate": 1.952629565643559e-05, "loss": 1.8673, "step": 38656 }, { "epoch": 0.5, "grad_norm": 4.397302150726318, "learning_rate": 1.9526263697768014e-05, "loss": 1.9775, "step": 38657 }, { "epoch": 0.5, "grad_norm": 3.790435791015625, "learning_rate": 1.9526231738048577e-05, "loss": 2.2913, "step": 38658 }, { "epoch": 0.5, "grad_norm": 3.1187891960144043, "learning_rate": 1.9526199777277287e-05, "loss": 1.8436, "step": 38659 }, { "epoch": 0.5, "grad_norm": 3.4568638801574707, "learning_rate": 1.952616781545414e-05, "loss": 2.0375, "step": 38660 }, { "epoch": 0.5, "grad_norm": 3.9651131629943848, "learning_rate": 1.9526135852579138e-05, "loss": 2.1779, "step": 38661 }, { "epoch": 0.5, "grad_norm": 3.3203604221343994, "learning_rate": 1.952610388865229e-05, "loss": 1.9561, "step": 38662 }, { "epoch": 0.5, "grad_norm": 3.9729583263397217, "learning_rate": 1.9526071923673603e-05, "loss": 2.3412, "step": 38663 }, { "epoch": 0.5, "grad_norm": 3.5436344146728516, "learning_rate": 1.9526039957643076e-05, "loss": 1.7251, "step": 38664 }, { "epoch": 0.5, "grad_norm": 3.961467981338501, "learning_rate": 1.952600799056071e-05, "loss": 2.1987, "step": 38665 }, { "epoch": 0.5, "grad_norm": 3.3185486793518066, "learning_rate": 1.9525976022426512e-05, "loss": 1.8396, "step": 38666 }, { "epoch": 0.5, "grad_norm": 3.8445637226104736, "learning_rate": 1.9525944053240486e-05, "loss": 1.912, "step": 38667 }, { "epoch": 0.5, "grad_norm": 4.0238118171691895, "learning_rate": 1.952591208300263e-05, "loss": 2.0185, "step": 38668 }, { "epoch": 0.5, "grad_norm": 3.561479091644287, "learning_rate": 1.9525880111712957e-05, "loss": 1.9893, "step": 38669 }, { "epoch": 0.5, "grad_norm": 3.6419553756713867, "learning_rate": 1.9525848139371462e-05, "loss": 1.7774, "step": 38670 }, { "epoch": 0.5, "grad_norm": 3.3852531909942627, "learning_rate": 1.952581616597815e-05, "loss": 1.8605, "step": 38671 }, { "epoch": 0.5, "grad_norm": 2.988487482070923, "learning_rate": 1.952578419153303e-05, "loss": 1.495, "step": 38672 }, { "epoch": 0.5, "grad_norm": 3.5201659202575684, "learning_rate": 1.95257522160361e-05, "loss": 1.7602, "step": 38673 }, { "epoch": 0.5, "grad_norm": 3.832930326461792, "learning_rate": 1.9525720239487364e-05, "loss": 2.1857, "step": 38674 }, { "epoch": 0.5, "grad_norm": 3.3620851039886475, "learning_rate": 1.952568826188683e-05, "loss": 1.8711, "step": 38675 }, { "epoch": 0.5, "grad_norm": 3.775240421295166, "learning_rate": 1.9525656283234497e-05, "loss": 1.7962, "step": 38676 }, { "epoch": 0.5, "grad_norm": 3.8828818798065186, "learning_rate": 1.952562430353037e-05, "loss": 1.9895, "step": 38677 }, { "epoch": 0.5, "grad_norm": 3.773944854736328, "learning_rate": 1.9525592322774454e-05, "loss": 1.7053, "step": 38678 }, { "epoch": 0.5, "grad_norm": 3.3729753494262695, "learning_rate": 1.952556034096675e-05, "loss": 1.6745, "step": 38679 }, { "epoch": 0.5, "grad_norm": 3.77278733253479, "learning_rate": 1.952552835810726e-05, "loss": 1.7111, "step": 38680 }, { "epoch": 0.5, "grad_norm": 3.3290045261383057, "learning_rate": 1.9525496374195995e-05, "loss": 1.6944, "step": 38681 }, { "epoch": 0.5, "grad_norm": 3.776463747024536, "learning_rate": 1.9525464389232947e-05, "loss": 1.7092, "step": 38682 }, { "epoch": 0.5, "grad_norm": 3.5801968574523926, "learning_rate": 1.952543240321813e-05, "loss": 1.8479, "step": 38683 }, { "epoch": 0.5, "grad_norm": 3.998704671859741, "learning_rate": 1.9525400416151547e-05, "loss": 2.0534, "step": 38684 }, { "epoch": 0.5, "grad_norm": 3.635164499282837, "learning_rate": 1.9525368428033192e-05, "loss": 1.867, "step": 38685 }, { "epoch": 0.5, "grad_norm": 3.9314868450164795, "learning_rate": 1.9525336438863077e-05, "loss": 2.1363, "step": 38686 }, { "epoch": 0.5, "grad_norm": 3.514718770980835, "learning_rate": 1.9525304448641204e-05, "loss": 1.8716, "step": 38687 }, { "epoch": 0.5, "grad_norm": 3.5279476642608643, "learning_rate": 1.9525272457367574e-05, "loss": 1.7364, "step": 38688 }, { "epoch": 0.5, "grad_norm": 4.275222301483154, "learning_rate": 1.9525240465042193e-05, "loss": 2.2045, "step": 38689 }, { "epoch": 0.5, "grad_norm": 3.7849721908569336, "learning_rate": 1.9525208471665068e-05, "loss": 2.1313, "step": 38690 }, { "epoch": 0.5, "grad_norm": 3.386075258255005, "learning_rate": 1.9525176477236193e-05, "loss": 1.5258, "step": 38691 }, { "epoch": 0.5, "grad_norm": 4.20997428894043, "learning_rate": 1.952514448175558e-05, "loss": 2.5736, "step": 38692 }, { "epoch": 0.5, "grad_norm": 3.57456636428833, "learning_rate": 1.9525112485223227e-05, "loss": 2.0407, "step": 38693 }, { "epoch": 0.5, "grad_norm": 3.935669183731079, "learning_rate": 1.9525080487639144e-05, "loss": 2.1055, "step": 38694 }, { "epoch": 0.5, "grad_norm": 3.7823052406311035, "learning_rate": 1.9525048489003327e-05, "loss": 2.1323, "step": 38695 }, { "epoch": 0.5, "grad_norm": 3.7917211055755615, "learning_rate": 1.9525016489315783e-05, "loss": 1.8928, "step": 38696 }, { "epoch": 0.5, "grad_norm": 4.030500411987305, "learning_rate": 1.9524984488576515e-05, "loss": 2.1233, "step": 38697 }, { "epoch": 0.5, "grad_norm": 3.852346181869507, "learning_rate": 1.9524952486785527e-05, "loss": 2.2037, "step": 38698 }, { "epoch": 0.5, "grad_norm": 3.4846673011779785, "learning_rate": 1.9524920483942826e-05, "loss": 1.501, "step": 38699 }, { "epoch": 0.5, "grad_norm": 3.7743160724639893, "learning_rate": 1.952488848004841e-05, "loss": 2.0244, "step": 38700 }, { "epoch": 0.5, "grad_norm": 3.811392307281494, "learning_rate": 1.9524856475102284e-05, "loss": 1.7207, "step": 38701 }, { "epoch": 0.5, "grad_norm": 3.9756736755371094, "learning_rate": 1.9524824469104456e-05, "loss": 1.9611, "step": 38702 }, { "epoch": 0.5, "grad_norm": 3.5499606132507324, "learning_rate": 1.9524792462054926e-05, "loss": 2.0523, "step": 38703 }, { "epoch": 0.5, "grad_norm": 3.4150257110595703, "learning_rate": 1.952476045395369e-05, "loss": 1.6091, "step": 38704 }, { "epoch": 0.5, "grad_norm": 4.066861629486084, "learning_rate": 1.9524728444800764e-05, "loss": 2.3999, "step": 38705 }, { "epoch": 0.5, "grad_norm": 3.9739370346069336, "learning_rate": 1.9524696434596148e-05, "loss": 2.0488, "step": 38706 }, { "epoch": 0.5, "grad_norm": 3.629115343093872, "learning_rate": 1.952466442333984e-05, "loss": 2.2138, "step": 38707 }, { "epoch": 0.5, "grad_norm": 3.8940961360931396, "learning_rate": 1.952463241103185e-05, "loss": 2.0039, "step": 38708 }, { "epoch": 0.5, "grad_norm": 4.39213752746582, "learning_rate": 1.9524600397672176e-05, "loss": 2.4185, "step": 38709 }, { "epoch": 0.5, "grad_norm": 4.068258285522461, "learning_rate": 1.9524568383260828e-05, "loss": 1.6943, "step": 38710 }, { "epoch": 0.5, "grad_norm": 3.8657045364379883, "learning_rate": 1.9524536367797804e-05, "loss": 1.9675, "step": 38711 }, { "epoch": 0.5, "grad_norm": 3.8159701824188232, "learning_rate": 1.952450435128311e-05, "loss": 1.7808, "step": 38712 }, { "epoch": 0.5, "grad_norm": 3.0809030532836914, "learning_rate": 1.952447233371675e-05, "loss": 1.5965, "step": 38713 }, { "epoch": 0.5, "grad_norm": 3.42824649810791, "learning_rate": 1.9524440315098728e-05, "loss": 1.7885, "step": 38714 }, { "epoch": 0.5, "grad_norm": 3.6378867626190186, "learning_rate": 1.9524408295429043e-05, "loss": 1.9606, "step": 38715 }, { "epoch": 0.5, "grad_norm": 3.7884294986724854, "learning_rate": 1.9524376274707703e-05, "loss": 1.8524, "step": 38716 }, { "epoch": 0.5, "grad_norm": 3.7352845668792725, "learning_rate": 1.952434425293471e-05, "loss": 1.7509, "step": 38717 }, { "epoch": 0.5, "grad_norm": 4.112152576446533, "learning_rate": 1.952431223011007e-05, "loss": 2.3443, "step": 38718 }, { "epoch": 0.5, "grad_norm": 4.265243053436279, "learning_rate": 1.952428020623378e-05, "loss": 2.0124, "step": 38719 }, { "epoch": 0.5, "grad_norm": 4.069430351257324, "learning_rate": 1.952424818130585e-05, "loss": 2.0375, "step": 38720 }, { "epoch": 0.5, "grad_norm": 3.878101348876953, "learning_rate": 1.9524216155326282e-05, "loss": 2.1235, "step": 38721 }, { "epoch": 0.5, "grad_norm": 4.378365516662598, "learning_rate": 1.952418412829508e-05, "loss": 2.591, "step": 38722 }, { "epoch": 0.5, "grad_norm": 3.706225633621216, "learning_rate": 1.9524152100212245e-05, "loss": 2.1152, "step": 38723 }, { "epoch": 0.5, "grad_norm": 3.458660364151001, "learning_rate": 1.9524120071077784e-05, "loss": 1.5992, "step": 38724 }, { "epoch": 0.5, "grad_norm": 3.605891227722168, "learning_rate": 1.9524088040891697e-05, "loss": 1.6184, "step": 38725 }, { "epoch": 0.5, "grad_norm": 4.105070114135742, "learning_rate": 1.9524056009653988e-05, "loss": 2.173, "step": 38726 }, { "epoch": 0.5, "grad_norm": 4.362476825714111, "learning_rate": 1.9524023977364664e-05, "loss": 2.1786, "step": 38727 }, { "epoch": 0.5, "grad_norm": 3.522381544113159, "learning_rate": 1.9523991944023722e-05, "loss": 2.0483, "step": 38728 }, { "epoch": 0.5, "grad_norm": 3.5201547145843506, "learning_rate": 1.9523959909631175e-05, "loss": 2.1215, "step": 38729 }, { "epoch": 0.5, "grad_norm": 3.4507267475128174, "learning_rate": 1.9523927874187016e-05, "loss": 1.5267, "step": 38730 }, { "epoch": 0.5, "grad_norm": 3.5846073627471924, "learning_rate": 1.952389583769126e-05, "loss": 2.1341, "step": 38731 }, { "epoch": 0.5, "grad_norm": 3.386247158050537, "learning_rate": 1.9523863800143897e-05, "loss": 1.39, "step": 38732 }, { "epoch": 0.5, "grad_norm": 3.7839629650115967, "learning_rate": 1.9523831761544943e-05, "loss": 1.8938, "step": 38733 }, { "epoch": 0.5, "grad_norm": 3.448737144470215, "learning_rate": 1.9523799721894395e-05, "loss": 1.5665, "step": 38734 }, { "epoch": 0.5, "grad_norm": 3.5719823837280273, "learning_rate": 1.952376768119226e-05, "loss": 1.9278, "step": 38735 }, { "epoch": 0.5, "grad_norm": 3.8091747760772705, "learning_rate": 1.9523735639438538e-05, "loss": 1.8265, "step": 38736 }, { "epoch": 0.5, "grad_norm": 3.3055660724639893, "learning_rate": 1.952370359663323e-05, "loss": 1.6993, "step": 38737 }, { "epoch": 0.5, "grad_norm": 4.140239715576172, "learning_rate": 1.952367155277635e-05, "loss": 2.0667, "step": 38738 }, { "epoch": 0.5, "grad_norm": 4.016242980957031, "learning_rate": 1.952363950786789e-05, "loss": 2.0144, "step": 38739 }, { "epoch": 0.5, "grad_norm": 3.4353702068328857, "learning_rate": 1.9523607461907862e-05, "loss": 1.6994, "step": 38740 }, { "epoch": 0.5, "grad_norm": 3.188756227493286, "learning_rate": 1.9523575414896262e-05, "loss": 1.6229, "step": 38741 }, { "epoch": 0.5, "grad_norm": 4.098453521728516, "learning_rate": 1.9523543366833105e-05, "loss": 1.7064, "step": 38742 }, { "epoch": 0.5, "grad_norm": 3.6959774494171143, "learning_rate": 1.9523511317718383e-05, "loss": 2.1929, "step": 38743 }, { "epoch": 0.5, "grad_norm": 3.925368070602417, "learning_rate": 1.95234792675521e-05, "loss": 2.1454, "step": 38744 }, { "epoch": 0.5, "grad_norm": 4.371700286865234, "learning_rate": 1.9523447216334267e-05, "loss": 2.1786, "step": 38745 }, { "epoch": 0.5, "grad_norm": 3.321841239929199, "learning_rate": 1.9523415164064886e-05, "loss": 1.7654, "step": 38746 }, { "epoch": 0.5, "grad_norm": 3.439241647720337, "learning_rate": 1.9523383110743954e-05, "loss": 1.6635, "step": 38747 }, { "epoch": 0.5, "grad_norm": 3.3240184783935547, "learning_rate": 1.9523351056371482e-05, "loss": 1.6906, "step": 38748 }, { "epoch": 0.5, "grad_norm": 3.9830989837646484, "learning_rate": 1.952331900094747e-05, "loss": 2.0425, "step": 38749 }, { "epoch": 0.5, "grad_norm": 3.768251419067383, "learning_rate": 1.9523286944471923e-05, "loss": 1.8027, "step": 38750 }, { "epoch": 0.5, "grad_norm": 3.553361177444458, "learning_rate": 1.9523254886944842e-05, "loss": 2.039, "step": 38751 }, { "epoch": 0.5, "grad_norm": 3.9929287433624268, "learning_rate": 1.952322282836623e-05, "loss": 1.8885, "step": 38752 }, { "epoch": 0.5, "grad_norm": 3.7448859214782715, "learning_rate": 1.9523190768736097e-05, "loss": 2.0182, "step": 38753 }, { "epoch": 0.5, "grad_norm": 3.6808559894561768, "learning_rate": 1.952315870805444e-05, "loss": 2.1216, "step": 38754 }, { "epoch": 0.5, "grad_norm": 3.4898793697357178, "learning_rate": 1.9523126646321264e-05, "loss": 2.041, "step": 38755 }, { "epoch": 0.5, "grad_norm": 3.482039451599121, "learning_rate": 1.9523094583536576e-05, "loss": 2.0978, "step": 38756 }, { "epoch": 0.5, "grad_norm": 3.2873740196228027, "learning_rate": 1.9523062519700374e-05, "loss": 1.3469, "step": 38757 }, { "epoch": 0.5, "grad_norm": 3.9816248416900635, "learning_rate": 1.9523030454812665e-05, "loss": 2.0018, "step": 38758 }, { "epoch": 0.5, "grad_norm": 4.126199722290039, "learning_rate": 1.952299838887345e-05, "loss": 1.8023, "step": 38759 }, { "epoch": 0.5, "grad_norm": 3.9523367881774902, "learning_rate": 1.9522966321882738e-05, "loss": 1.9088, "step": 38760 }, { "epoch": 0.5, "grad_norm": 3.500074625015259, "learning_rate": 1.952293425384053e-05, "loss": 1.7194, "step": 38761 }, { "epoch": 0.5, "grad_norm": 4.250776290893555, "learning_rate": 1.9522902184746825e-05, "loss": 2.2012, "step": 38762 }, { "epoch": 0.5, "grad_norm": 4.1309003829956055, "learning_rate": 1.952287011460163e-05, "loss": 1.8397, "step": 38763 }, { "epoch": 0.5, "grad_norm": 3.9813313484191895, "learning_rate": 1.952283804340495e-05, "loss": 1.6992, "step": 38764 }, { "epoch": 0.5, "grad_norm": 3.424701452255249, "learning_rate": 1.9522805971156785e-05, "loss": 2.0138, "step": 38765 }, { "epoch": 0.5, "grad_norm": 3.6930794715881348, "learning_rate": 1.9522773897857145e-05, "loss": 1.8716, "step": 38766 }, { "epoch": 0.5, "grad_norm": 4.310395240783691, "learning_rate": 1.9522741823506025e-05, "loss": 2.2639, "step": 38767 }, { "epoch": 0.5, "grad_norm": 3.5324184894561768, "learning_rate": 1.9522709748103436e-05, "loss": 1.7767, "step": 38768 }, { "epoch": 0.5, "grad_norm": 3.9060773849487305, "learning_rate": 1.9522677671649374e-05, "loss": 2.2547, "step": 38769 }, { "epoch": 0.5, "grad_norm": 3.3535680770874023, "learning_rate": 1.9522645594143853e-05, "loss": 1.6471, "step": 38770 }, { "epoch": 0.5, "grad_norm": 3.6887009143829346, "learning_rate": 1.9522613515586865e-05, "loss": 1.8503, "step": 38771 }, { "epoch": 0.5, "grad_norm": 3.9044880867004395, "learning_rate": 1.9522581435978422e-05, "loss": 1.8658, "step": 38772 }, { "epoch": 0.5, "grad_norm": 4.135928630828857, "learning_rate": 1.952254935531852e-05, "loss": 2.5624, "step": 38773 }, { "epoch": 0.5, "grad_norm": 3.912269115447998, "learning_rate": 1.952251727360717e-05, "loss": 2.201, "step": 38774 }, { "epoch": 0.5, "grad_norm": 3.523120880126953, "learning_rate": 1.9522485190844377e-05, "loss": 2.0993, "step": 38775 }, { "epoch": 0.5, "grad_norm": 3.40063214302063, "learning_rate": 1.9522453107030134e-05, "loss": 1.6657, "step": 38776 }, { "epoch": 0.5, "grad_norm": 3.998344898223877, "learning_rate": 1.9522421022164452e-05, "loss": 2.1202, "step": 38777 }, { "epoch": 0.5, "grad_norm": 4.342339992523193, "learning_rate": 1.9522388936247335e-05, "loss": 2.3266, "step": 38778 }, { "epoch": 0.5, "grad_norm": 4.221534729003906, "learning_rate": 1.952235684927878e-05, "loss": 2.2613, "step": 38779 }, { "epoch": 0.5, "grad_norm": 3.981193780899048, "learning_rate": 1.9522324761258803e-05, "loss": 1.9567, "step": 38780 }, { "epoch": 0.5, "grad_norm": 3.768697738647461, "learning_rate": 1.9522292672187392e-05, "loss": 2.0466, "step": 38781 }, { "epoch": 0.5, "grad_norm": 4.008800983428955, "learning_rate": 1.9522260582064563e-05, "loss": 2.0833, "step": 38782 }, { "epoch": 0.5, "grad_norm": 3.8095383644104004, "learning_rate": 1.9522228490890315e-05, "loss": 2.3386, "step": 38783 }, { "epoch": 0.5, "grad_norm": 4.185743808746338, "learning_rate": 1.952219639866465e-05, "loss": 2.1017, "step": 38784 }, { "epoch": 0.5, "grad_norm": 3.826931953430176, "learning_rate": 1.952216430538757e-05, "loss": 1.8535, "step": 38785 }, { "epoch": 0.5, "grad_norm": 3.9181947708129883, "learning_rate": 1.9522132211059087e-05, "loss": 2.4114, "step": 38786 }, { "epoch": 0.5, "grad_norm": 3.9769816398620605, "learning_rate": 1.9522100115679195e-05, "loss": 1.7072, "step": 38787 }, { "epoch": 0.5, "grad_norm": 3.5029420852661133, "learning_rate": 1.9522068019247905e-05, "loss": 1.9336, "step": 38788 }, { "epoch": 0.5, "grad_norm": 3.3686892986297607, "learning_rate": 1.9522035921765214e-05, "loss": 1.9554, "step": 38789 }, { "epoch": 0.5, "grad_norm": 3.5579843521118164, "learning_rate": 1.952200382323113e-05, "loss": 1.6756, "step": 38790 }, { "epoch": 0.5, "grad_norm": 3.633073329925537, "learning_rate": 1.9521971723645656e-05, "loss": 1.7357, "step": 38791 }, { "epoch": 0.5, "grad_norm": 3.556062936782837, "learning_rate": 1.9521939623008793e-05, "loss": 1.875, "step": 38792 }, { "epoch": 0.5, "grad_norm": 3.6210083961486816, "learning_rate": 1.952190752132055e-05, "loss": 1.9001, "step": 38793 }, { "epoch": 0.5, "grad_norm": 3.8133628368377686, "learning_rate": 1.9521875418580923e-05, "loss": 2.0109, "step": 38794 }, { "epoch": 0.5, "grad_norm": 3.1885478496551514, "learning_rate": 1.952184331478992e-05, "loss": 1.627, "step": 38795 }, { "epoch": 0.5, "grad_norm": 3.4298148155212402, "learning_rate": 1.9521811209947545e-05, "loss": 1.7039, "step": 38796 }, { "epoch": 0.5, "grad_norm": 4.438690185546875, "learning_rate": 1.95217791040538e-05, "loss": 2.3311, "step": 38797 }, { "epoch": 0.5, "grad_norm": 3.4962515830993652, "learning_rate": 1.952174699710869e-05, "loss": 2.0281, "step": 38798 }, { "epoch": 0.5, "grad_norm": 3.9817519187927246, "learning_rate": 1.9521714889112217e-05, "loss": 2.3303, "step": 38799 }, { "epoch": 0.5, "grad_norm": 4.060532569885254, "learning_rate": 1.9521682780064385e-05, "loss": 2.0457, "step": 38800 }, { "epoch": 0.5, "grad_norm": 2.803614616394043, "learning_rate": 1.9521650669965195e-05, "loss": 1.2996, "step": 38801 }, { "epoch": 0.5, "grad_norm": 3.609354257583618, "learning_rate": 1.9521618558814658e-05, "loss": 2.1284, "step": 38802 }, { "epoch": 0.5, "grad_norm": 3.8451545238494873, "learning_rate": 1.952158644661277e-05, "loss": 1.9103, "step": 38803 }, { "epoch": 0.5, "grad_norm": 4.022100925445557, "learning_rate": 1.9521554333359537e-05, "loss": 1.8949, "step": 38804 }, { "epoch": 0.5, "grad_norm": 3.7992260456085205, "learning_rate": 1.9521522219054964e-05, "loss": 1.9569, "step": 38805 }, { "epoch": 0.5, "grad_norm": 3.035552501678467, "learning_rate": 1.9521490103699054e-05, "loss": 1.4608, "step": 38806 }, { "epoch": 0.5, "grad_norm": 3.534780740737915, "learning_rate": 1.952145798729181e-05, "loss": 2.061, "step": 38807 }, { "epoch": 0.5, "grad_norm": 3.8722944259643555, "learning_rate": 1.9521425869833232e-05, "loss": 1.9557, "step": 38808 }, { "epoch": 0.5, "grad_norm": 3.525522470474243, "learning_rate": 1.9521393751323334e-05, "loss": 2.1172, "step": 38809 }, { "epoch": 0.5, "grad_norm": 3.3737175464630127, "learning_rate": 1.9521361631762105e-05, "loss": 1.8158, "step": 38810 }, { "epoch": 0.5, "grad_norm": 3.8475492000579834, "learning_rate": 1.9521329511149562e-05, "loss": 2.1187, "step": 38811 }, { "epoch": 0.5, "grad_norm": 3.7059664726257324, "learning_rate": 1.95212973894857e-05, "loss": 1.7035, "step": 38812 }, { "epoch": 0.5, "grad_norm": 3.600100040435791, "learning_rate": 1.9521265266770523e-05, "loss": 1.9151, "step": 38813 }, { "epoch": 0.5, "grad_norm": 3.9054861068725586, "learning_rate": 1.952123314300404e-05, "loss": 2.2522, "step": 38814 }, { "epoch": 0.5, "grad_norm": 3.7494313716888428, "learning_rate": 1.952120101818625e-05, "loss": 1.8748, "step": 38815 }, { "epoch": 0.5, "grad_norm": 4.128312110900879, "learning_rate": 1.952116889231716e-05, "loss": 2.0236, "step": 38816 }, { "epoch": 0.5, "grad_norm": 3.6647896766662598, "learning_rate": 1.952113676539677e-05, "loss": 2.1608, "step": 38817 }, { "epoch": 0.5, "grad_norm": 3.6391818523406982, "learning_rate": 1.9521104637425083e-05, "loss": 1.945, "step": 38818 }, { "epoch": 0.5, "grad_norm": 3.319453716278076, "learning_rate": 1.952107250840211e-05, "loss": 1.5182, "step": 38819 }, { "epoch": 0.5, "grad_norm": 4.236364364624023, "learning_rate": 1.9521040378327843e-05, "loss": 2.2219, "step": 38820 }, { "epoch": 0.5, "grad_norm": 3.9077401161193848, "learning_rate": 1.9521008247202293e-05, "loss": 2.1104, "step": 38821 }, { "epoch": 0.5, "grad_norm": 3.297607660293579, "learning_rate": 1.9520976115025464e-05, "loss": 1.7733, "step": 38822 }, { "epoch": 0.5, "grad_norm": 3.1352853775024414, "learning_rate": 1.9520943981797355e-05, "loss": 1.4973, "step": 38823 }, { "epoch": 0.5, "grad_norm": 3.7806479930877686, "learning_rate": 1.9520911847517973e-05, "loss": 1.7863, "step": 38824 }, { "epoch": 0.5, "grad_norm": 3.1016738414764404, "learning_rate": 1.9520879712187325e-05, "loss": 1.7161, "step": 38825 }, { "epoch": 0.5, "grad_norm": 3.8118786811828613, "learning_rate": 1.9520847575805404e-05, "loss": 2.1994, "step": 38826 }, { "epoch": 0.5, "grad_norm": 3.3282370567321777, "learning_rate": 1.9520815438372224e-05, "loss": 1.7512, "step": 38827 }, { "epoch": 0.5, "grad_norm": 4.30829381942749, "learning_rate": 1.9520783299887785e-05, "loss": 2.5506, "step": 38828 }, { "epoch": 0.5, "grad_norm": 3.8687431812286377, "learning_rate": 1.952075116035209e-05, "loss": 2.0057, "step": 38829 }, { "epoch": 0.5, "grad_norm": 3.749950647354126, "learning_rate": 1.952071901976514e-05, "loss": 2.0238, "step": 38830 }, { "epoch": 0.5, "grad_norm": 3.2457385063171387, "learning_rate": 1.9520686878126943e-05, "loss": 1.6489, "step": 38831 }, { "epoch": 0.5, "grad_norm": 3.8989927768707275, "learning_rate": 1.95206547354375e-05, "loss": 2.2694, "step": 38832 }, { "epoch": 0.5, "grad_norm": 4.269765377044678, "learning_rate": 1.9520622591696814e-05, "loss": 2.0709, "step": 38833 }, { "epoch": 0.5, "grad_norm": 3.474468946456909, "learning_rate": 1.952059044690489e-05, "loss": 1.8133, "step": 38834 }, { "epoch": 0.5, "grad_norm": 4.102993488311768, "learning_rate": 1.9520558301061733e-05, "loss": 2.0944, "step": 38835 }, { "epoch": 0.5, "grad_norm": 3.700824499130249, "learning_rate": 1.9520526154167344e-05, "loss": 2.0709, "step": 38836 }, { "epoch": 0.5, "grad_norm": 3.6815688610076904, "learning_rate": 1.9520494006221725e-05, "loss": 1.7092, "step": 38837 }, { "epoch": 0.5, "grad_norm": 3.4789421558380127, "learning_rate": 1.952046185722489e-05, "loss": 1.8242, "step": 38838 }, { "epoch": 0.5, "grad_norm": 3.694708824157715, "learning_rate": 1.9520429707176823e-05, "loss": 2.1113, "step": 38839 }, { "epoch": 0.5, "grad_norm": 3.4224259853363037, "learning_rate": 1.952039755607755e-05, "loss": 1.6637, "step": 38840 }, { "epoch": 0.5, "grad_norm": 3.8067891597747803, "learning_rate": 1.9520365403927056e-05, "loss": 2.3658, "step": 38841 }, { "epoch": 0.5, "grad_norm": 3.4092888832092285, "learning_rate": 1.9520333250725354e-05, "loss": 1.5246, "step": 38842 }, { "epoch": 0.5, "grad_norm": 4.214475631713867, "learning_rate": 1.9520301096472448e-05, "loss": 2.1867, "step": 38843 }, { "epoch": 0.5, "grad_norm": 4.497951984405518, "learning_rate": 1.9520268941168336e-05, "loss": 2.0894, "step": 38844 }, { "epoch": 0.5, "grad_norm": 3.8558237552642822, "learning_rate": 1.9520236784813026e-05, "loss": 1.7745, "step": 38845 }, { "epoch": 0.5, "grad_norm": 3.992811441421509, "learning_rate": 1.9520204627406524e-05, "loss": 2.1192, "step": 38846 }, { "epoch": 0.5, "grad_norm": 3.897141218185425, "learning_rate": 1.9520172468948824e-05, "loss": 2.4461, "step": 38847 }, { "epoch": 0.5, "grad_norm": 3.9702165126800537, "learning_rate": 1.952014030943994e-05, "loss": 1.7798, "step": 38848 }, { "epoch": 0.5, "grad_norm": 3.358719825744629, "learning_rate": 1.9520108148879866e-05, "loss": 1.6552, "step": 38849 }, { "epoch": 0.5, "grad_norm": 3.8346197605133057, "learning_rate": 1.9520075987268614e-05, "loss": 2.193, "step": 38850 }, { "epoch": 0.5, "grad_norm": 3.81733775138855, "learning_rate": 1.9520043824606185e-05, "loss": 1.9212, "step": 38851 }, { "epoch": 0.5, "grad_norm": 3.900334596633911, "learning_rate": 1.9520011660892578e-05, "loss": 1.9103, "step": 38852 }, { "epoch": 0.5, "grad_norm": 3.6696617603302, "learning_rate": 1.9519979496127806e-05, "loss": 2.1208, "step": 38853 }, { "epoch": 0.5, "grad_norm": 3.84151554107666, "learning_rate": 1.9519947330311863e-05, "loss": 1.9954, "step": 38854 }, { "epoch": 0.5, "grad_norm": 4.071751594543457, "learning_rate": 1.9519915163444755e-05, "loss": 3.0168, "step": 38855 }, { "epoch": 0.5, "grad_norm": 3.958482265472412, "learning_rate": 1.9519882995526486e-05, "loss": 2.1768, "step": 38856 }, { "epoch": 0.5, "grad_norm": 3.9993679523468018, "learning_rate": 1.9519850826557067e-05, "loss": 1.7275, "step": 38857 }, { "epoch": 0.5, "grad_norm": 3.6061978340148926, "learning_rate": 1.951981865653649e-05, "loss": 1.8079, "step": 38858 }, { "epoch": 0.5, "grad_norm": 3.311326503753662, "learning_rate": 1.951978648546476e-05, "loss": 1.4868, "step": 38859 }, { "epoch": 0.5, "grad_norm": 3.7801601886749268, "learning_rate": 1.9519754313341892e-05, "loss": 1.7959, "step": 38860 }, { "epoch": 0.5, "grad_norm": 3.2571895122528076, "learning_rate": 1.9519722140167875e-05, "loss": 1.6901, "step": 38861 }, { "epoch": 0.5, "grad_norm": 3.5925133228302, "learning_rate": 1.9519689965942724e-05, "loss": 2.1658, "step": 38862 }, { "epoch": 0.5, "grad_norm": 3.316883087158203, "learning_rate": 1.9519657790666436e-05, "loss": 1.7493, "step": 38863 }, { "epoch": 0.5, "grad_norm": 4.156349182128906, "learning_rate": 1.9519625614339018e-05, "loss": 1.959, "step": 38864 }, { "epoch": 0.5, "grad_norm": 3.708414316177368, "learning_rate": 1.9519593436960468e-05, "loss": 1.9078, "step": 38865 }, { "epoch": 0.5, "grad_norm": 4.456984996795654, "learning_rate": 1.95195612585308e-05, "loss": 2.2155, "step": 38866 }, { "epoch": 0.5, "grad_norm": 3.2773983478546143, "learning_rate": 1.951952907905e-05, "loss": 1.9798, "step": 38867 }, { "epoch": 0.5, "grad_norm": 3.7904813289642334, "learning_rate": 1.951949689851809e-05, "loss": 2.2817, "step": 38868 }, { "epoch": 0.5, "grad_norm": 3.8708605766296387, "learning_rate": 1.9519464716935066e-05, "loss": 1.9784, "step": 38869 }, { "epoch": 0.5, "grad_norm": 3.6846213340759277, "learning_rate": 1.951943253430093e-05, "loss": 2.1281, "step": 38870 }, { "epoch": 0.5, "grad_norm": 3.4647717475891113, "learning_rate": 1.951940035061569e-05, "loss": 2.2012, "step": 38871 }, { "epoch": 0.5, "grad_norm": 4.3759331703186035, "learning_rate": 1.951936816587934e-05, "loss": 1.826, "step": 38872 }, { "epoch": 0.5, "grad_norm": 3.475696563720703, "learning_rate": 1.9519335980091896e-05, "loss": 1.8199, "step": 38873 }, { "epoch": 0.5, "grad_norm": 4.068084716796875, "learning_rate": 1.9519303793253358e-05, "loss": 2.1897, "step": 38874 }, { "epoch": 0.5, "grad_norm": 3.1396539211273193, "learning_rate": 1.9519271605363723e-05, "loss": 1.6029, "step": 38875 }, { "epoch": 0.5, "grad_norm": 3.4874050617218018, "learning_rate": 1.9519239416422997e-05, "loss": 1.8325, "step": 38876 }, { "epoch": 0.5, "grad_norm": 3.0494022369384766, "learning_rate": 1.951920722643119e-05, "loss": 1.5028, "step": 38877 }, { "epoch": 0.5, "grad_norm": 4.504892349243164, "learning_rate": 1.9519175035388297e-05, "loss": 2.6388, "step": 38878 }, { "epoch": 0.5, "grad_norm": 3.408766746520996, "learning_rate": 1.951914284329433e-05, "loss": 1.5296, "step": 38879 }, { "epoch": 0.5, "grad_norm": 3.4846739768981934, "learning_rate": 1.9519110650149285e-05, "loss": 1.7296, "step": 38880 }, { "epoch": 0.5, "grad_norm": 3.439013957977295, "learning_rate": 1.9519078455953167e-05, "loss": 1.9179, "step": 38881 }, { "epoch": 0.5, "grad_norm": 3.9387447834014893, "learning_rate": 1.9519046260705984e-05, "loss": 2.2028, "step": 38882 }, { "epoch": 0.5, "grad_norm": 3.8759772777557373, "learning_rate": 1.951901406440774e-05, "loss": 2.3144, "step": 38883 }, { "epoch": 0.5, "grad_norm": 3.409160614013672, "learning_rate": 1.951898186705843e-05, "loss": 1.8563, "step": 38884 }, { "epoch": 0.5, "grad_norm": 4.018937587738037, "learning_rate": 1.9518949668658066e-05, "loss": 2.0471, "step": 38885 }, { "epoch": 0.5, "grad_norm": 3.4203834533691406, "learning_rate": 1.9518917469206647e-05, "loss": 1.5987, "step": 38886 }, { "epoch": 0.5, "grad_norm": 3.492657423019409, "learning_rate": 1.951888526870418e-05, "loss": 1.6095, "step": 38887 }, { "epoch": 0.5, "grad_norm": 3.6089301109313965, "learning_rate": 1.9518853067150663e-05, "loss": 1.8956, "step": 38888 }, { "epoch": 0.5, "grad_norm": 3.0869829654693604, "learning_rate": 1.9518820864546107e-05, "loss": 1.3647, "step": 38889 }, { "epoch": 0.5, "grad_norm": 3.7240424156188965, "learning_rate": 1.9518788660890505e-05, "loss": 2.3744, "step": 38890 }, { "epoch": 0.5, "grad_norm": 3.7500646114349365, "learning_rate": 1.9518756456183875e-05, "loss": 1.9718, "step": 38891 }, { "epoch": 0.5, "grad_norm": 3.5847866535186768, "learning_rate": 1.951872425042621e-05, "loss": 2.1753, "step": 38892 }, { "epoch": 0.5, "grad_norm": 3.574512481689453, "learning_rate": 1.9518692043617516e-05, "loss": 2.1228, "step": 38893 }, { "epoch": 0.5, "grad_norm": 3.2621493339538574, "learning_rate": 1.9518659835757798e-05, "loss": 1.46, "step": 38894 }, { "epoch": 0.5, "grad_norm": 3.738901138305664, "learning_rate": 1.9518627626847055e-05, "loss": 1.7709, "step": 38895 }, { "epoch": 0.5, "grad_norm": 3.6351001262664795, "learning_rate": 1.9518595416885296e-05, "loss": 1.8522, "step": 38896 }, { "epoch": 0.5, "grad_norm": 3.493807792663574, "learning_rate": 1.9518563205872525e-05, "loss": 1.7512, "step": 38897 }, { "epoch": 0.5, "grad_norm": 3.3911309242248535, "learning_rate": 1.9518530993808742e-05, "loss": 1.6743, "step": 38898 }, { "epoch": 0.5, "grad_norm": 4.159875392913818, "learning_rate": 1.951849878069395e-05, "loss": 2.2784, "step": 38899 }, { "epoch": 0.5, "grad_norm": 3.3219172954559326, "learning_rate": 1.9518466566528154e-05, "loss": 1.6986, "step": 38900 }, { "epoch": 0.5, "grad_norm": 4.11416482925415, "learning_rate": 1.951843435131136e-05, "loss": 2.5099, "step": 38901 }, { "epoch": 0.5, "grad_norm": 4.041111946105957, "learning_rate": 1.951840213504357e-05, "loss": 2.175, "step": 38902 }, { "epoch": 0.5, "grad_norm": 3.846202850341797, "learning_rate": 1.9518369917724783e-05, "loss": 2.1175, "step": 38903 }, { "epoch": 0.5, "grad_norm": 3.7946367263793945, "learning_rate": 1.951833769935501e-05, "loss": 2.0919, "step": 38904 }, { "epoch": 0.5, "grad_norm": 3.50333309173584, "learning_rate": 1.951830547993425e-05, "loss": 1.8431, "step": 38905 }, { "epoch": 0.5, "grad_norm": 3.3451380729675293, "learning_rate": 1.9518273259462508e-05, "loss": 1.5076, "step": 38906 }, { "epoch": 0.5, "grad_norm": 3.145799160003662, "learning_rate": 1.9518241037939786e-05, "loss": 1.4143, "step": 38907 }, { "epoch": 0.5, "grad_norm": 4.566445827484131, "learning_rate": 1.951820881536609e-05, "loss": 2.7327, "step": 38908 }, { "epoch": 0.5, "grad_norm": 3.552759885787964, "learning_rate": 1.951817659174142e-05, "loss": 1.7579, "step": 38909 }, { "epoch": 0.5, "grad_norm": 3.5080373287200928, "learning_rate": 1.9518144367065782e-05, "loss": 1.9345, "step": 38910 }, { "epoch": 0.5, "grad_norm": 3.5539984703063965, "learning_rate": 1.9518112141339182e-05, "loss": 1.6083, "step": 38911 }, { "epoch": 0.5, "grad_norm": 3.018624782562256, "learning_rate": 1.951807991456162e-05, "loss": 1.4565, "step": 38912 }, { "epoch": 0.51, "grad_norm": 4.140029430389404, "learning_rate": 1.95180476867331e-05, "loss": 2.0533, "step": 38913 }, { "epoch": 0.51, "grad_norm": 3.9003701210021973, "learning_rate": 1.9518015457853627e-05, "loss": 1.9702, "step": 38914 }, { "epoch": 0.51, "grad_norm": 4.383420467376709, "learning_rate": 1.9517983227923202e-05, "loss": 2.1637, "step": 38915 }, { "epoch": 0.51, "grad_norm": 3.62886118888855, "learning_rate": 1.951795099694183e-05, "loss": 1.9126, "step": 38916 }, { "epoch": 0.51, "grad_norm": 3.7998158931732178, "learning_rate": 1.951791876490952e-05, "loss": 1.8749, "step": 38917 }, { "epoch": 0.51, "grad_norm": 3.048288583755493, "learning_rate": 1.9517886531826265e-05, "loss": 1.3524, "step": 38918 }, { "epoch": 0.51, "grad_norm": 3.490647792816162, "learning_rate": 1.9517854297692073e-05, "loss": 1.6611, "step": 38919 }, { "epoch": 0.51, "grad_norm": 3.5806221961975098, "learning_rate": 1.951782206250695e-05, "loss": 1.8402, "step": 38920 }, { "epoch": 0.51, "grad_norm": 3.454929828643799, "learning_rate": 1.9517789826270897e-05, "loss": 2.0147, "step": 38921 }, { "epoch": 0.51, "grad_norm": 3.4396557807922363, "learning_rate": 1.9517757588983923e-05, "loss": 1.9648, "step": 38922 }, { "epoch": 0.51, "grad_norm": 3.659423828125, "learning_rate": 1.9517725350646022e-05, "loss": 1.9018, "step": 38923 }, { "epoch": 0.51, "grad_norm": 3.476926803588867, "learning_rate": 1.9517693111257207e-05, "loss": 1.8189, "step": 38924 }, { "epoch": 0.51, "grad_norm": 4.035873889923096, "learning_rate": 1.9517660870817475e-05, "loss": 2.2832, "step": 38925 }, { "epoch": 0.51, "grad_norm": 3.745950222015381, "learning_rate": 1.951762862932683e-05, "loss": 2.0225, "step": 38926 }, { "epoch": 0.51, "grad_norm": 3.9401562213897705, "learning_rate": 1.951759638678528e-05, "loss": 2.123, "step": 38927 }, { "epoch": 0.51, "grad_norm": 3.9541187286376953, "learning_rate": 1.9517564143192823e-05, "loss": 1.9582, "step": 38928 }, { "epoch": 0.51, "grad_norm": 4.044402599334717, "learning_rate": 1.9517531898549466e-05, "loss": 1.9288, "step": 38929 }, { "epoch": 0.51, "grad_norm": 4.076931953430176, "learning_rate": 1.9517499652855213e-05, "loss": 2.3033, "step": 38930 }, { "epoch": 0.51, "grad_norm": 3.4639599323272705, "learning_rate": 1.951746740611007e-05, "loss": 1.675, "step": 38931 }, { "epoch": 0.51, "grad_norm": 3.6178064346313477, "learning_rate": 1.9517435158314032e-05, "loss": 2.0204, "step": 38932 }, { "epoch": 0.51, "grad_norm": 3.5177862644195557, "learning_rate": 1.9517402909467108e-05, "loss": 1.9497, "step": 38933 }, { "epoch": 0.51, "grad_norm": 4.347395896911621, "learning_rate": 1.9517370659569304e-05, "loss": 2.183, "step": 38934 }, { "epoch": 0.51, "grad_norm": 3.601788282394409, "learning_rate": 1.951733840862062e-05, "loss": 2.1788, "step": 38935 }, { "epoch": 0.51, "grad_norm": 4.094174861907959, "learning_rate": 1.951730615662106e-05, "loss": 2.2326, "step": 38936 }, { "epoch": 0.51, "grad_norm": 4.194805145263672, "learning_rate": 1.9517273903570628e-05, "loss": 2.1715, "step": 38937 }, { "epoch": 0.51, "grad_norm": 3.831059217453003, "learning_rate": 1.9517241649469328e-05, "loss": 2.0965, "step": 38938 }, { "epoch": 0.51, "grad_norm": 3.3092124462127686, "learning_rate": 1.9517209394317166e-05, "loss": 1.747, "step": 38939 }, { "epoch": 0.51, "grad_norm": 4.0458879470825195, "learning_rate": 1.9517177138114137e-05, "loss": 2.0662, "step": 38940 }, { "epoch": 0.51, "grad_norm": 3.737783193588257, "learning_rate": 1.9517144880860252e-05, "loss": 1.8035, "step": 38941 }, { "epoch": 0.51, "grad_norm": 4.0516815185546875, "learning_rate": 1.951711262255551e-05, "loss": 2.2715, "step": 38942 }, { "epoch": 0.51, "grad_norm": 3.439767360687256, "learning_rate": 1.9517080363199923e-05, "loss": 1.6459, "step": 38943 }, { "epoch": 0.51, "grad_norm": 3.428929090499878, "learning_rate": 1.9517048102793487e-05, "loss": 1.6659, "step": 38944 }, { "epoch": 0.51, "grad_norm": 3.5412187576293945, "learning_rate": 1.9517015841336208e-05, "loss": 2.1995, "step": 38945 }, { "epoch": 0.51, "grad_norm": 3.274733304977417, "learning_rate": 1.9516983578828086e-05, "loss": 1.779, "step": 38946 }, { "epoch": 0.51, "grad_norm": 3.450634002685547, "learning_rate": 1.951695131526913e-05, "loss": 1.883, "step": 38947 }, { "epoch": 0.51, "grad_norm": 4.108277797698975, "learning_rate": 1.951691905065934e-05, "loss": 2.475, "step": 38948 }, { "epoch": 0.51, "grad_norm": 3.5807456970214844, "learning_rate": 1.9516886784998722e-05, "loss": 1.8687, "step": 38949 }, { "epoch": 0.51, "grad_norm": 3.429081678390503, "learning_rate": 1.9516854518287277e-05, "loss": 1.8983, "step": 38950 }, { "epoch": 0.51, "grad_norm": 3.2604448795318604, "learning_rate": 1.951682225052501e-05, "loss": 1.683, "step": 38951 }, { "epoch": 0.51, "grad_norm": 3.807088851928711, "learning_rate": 1.9516789981711924e-05, "loss": 1.8354, "step": 38952 }, { "epoch": 0.51, "grad_norm": 3.6275899410247803, "learning_rate": 1.9516757711848022e-05, "loss": 1.7693, "step": 38953 }, { "epoch": 0.51, "grad_norm": 3.2069365978240967, "learning_rate": 1.951672544093331e-05, "loss": 1.5921, "step": 38954 }, { "epoch": 0.51, "grad_norm": 3.7611210346221924, "learning_rate": 1.9516693168967792e-05, "loss": 2.1312, "step": 38955 }, { "epoch": 0.51, "grad_norm": 3.6960606575012207, "learning_rate": 1.9516660895951468e-05, "loss": 1.949, "step": 38956 }, { "epoch": 0.51, "grad_norm": 3.7092058658599854, "learning_rate": 1.951662862188434e-05, "loss": 2.0358, "step": 38957 }, { "epoch": 0.51, "grad_norm": 4.023340225219727, "learning_rate": 1.951659634676642e-05, "loss": 2.3102, "step": 38958 }, { "epoch": 0.51, "grad_norm": 3.379854679107666, "learning_rate": 1.9516564070597703e-05, "loss": 1.7701, "step": 38959 }, { "epoch": 0.51, "grad_norm": 3.659454584121704, "learning_rate": 1.9516531793378198e-05, "loss": 1.7567, "step": 38960 }, { "epoch": 0.51, "grad_norm": 3.73371958732605, "learning_rate": 1.9516499515107904e-05, "loss": 2.2464, "step": 38961 }, { "epoch": 0.51, "grad_norm": 3.975430965423584, "learning_rate": 1.951646723578683e-05, "loss": 2.4469, "step": 38962 }, { "epoch": 0.51, "grad_norm": 3.752326726913452, "learning_rate": 1.951643495541497e-05, "loss": 1.9158, "step": 38963 }, { "epoch": 0.51, "grad_norm": 3.696821451187134, "learning_rate": 1.9516402673992343e-05, "loss": 2.17, "step": 38964 }, { "epoch": 0.51, "grad_norm": 3.0228731632232666, "learning_rate": 1.9516370391518936e-05, "loss": 1.584, "step": 38965 }, { "epoch": 0.51, "grad_norm": 4.19984769821167, "learning_rate": 1.9516338107994765e-05, "loss": 2.4229, "step": 38966 }, { "epoch": 0.51, "grad_norm": 4.253848075866699, "learning_rate": 1.951630582341983e-05, "loss": 2.0167, "step": 38967 }, { "epoch": 0.51, "grad_norm": 3.5201663970947266, "learning_rate": 1.9516273537794128e-05, "loss": 1.6006, "step": 38968 }, { "epoch": 0.51, "grad_norm": 3.5519566535949707, "learning_rate": 1.9516241251117673e-05, "loss": 1.6059, "step": 38969 }, { "epoch": 0.51, "grad_norm": 3.6233325004577637, "learning_rate": 1.951620896339046e-05, "loss": 1.7022, "step": 38970 }, { "epoch": 0.51, "grad_norm": 3.8757288455963135, "learning_rate": 1.95161766746125e-05, "loss": 2.3548, "step": 38971 }, { "epoch": 0.51, "grad_norm": 3.592273235321045, "learning_rate": 1.951614438478379e-05, "loss": 1.7157, "step": 38972 }, { "epoch": 0.51, "grad_norm": 3.4900805950164795, "learning_rate": 1.9516112093904336e-05, "loss": 1.8467, "step": 38973 }, { "epoch": 0.51, "grad_norm": 4.1538848876953125, "learning_rate": 1.9516079801974142e-05, "loss": 1.8813, "step": 38974 }, { "epoch": 0.51, "grad_norm": 3.8635525703430176, "learning_rate": 1.9516047508993214e-05, "loss": 1.8534, "step": 38975 }, { "epoch": 0.51, "grad_norm": 3.4711217880249023, "learning_rate": 1.9516015214961548e-05, "loss": 1.4801, "step": 38976 }, { "epoch": 0.51, "grad_norm": 3.8810768127441406, "learning_rate": 1.9515982919879155e-05, "loss": 2.2335, "step": 38977 }, { "epoch": 0.51, "grad_norm": 3.7609479427337646, "learning_rate": 1.9515950623746037e-05, "loss": 2.0656, "step": 38978 }, { "epoch": 0.51, "grad_norm": 3.246870517730713, "learning_rate": 1.9515918326562192e-05, "loss": 1.9745, "step": 38979 }, { "epoch": 0.51, "grad_norm": 3.622004270553589, "learning_rate": 1.9515886028327637e-05, "loss": 1.9069, "step": 38980 }, { "epoch": 0.51, "grad_norm": 3.7478034496307373, "learning_rate": 1.9515853729042358e-05, "loss": 2.0916, "step": 38981 }, { "epoch": 0.51, "grad_norm": 4.105547904968262, "learning_rate": 1.951582142870637e-05, "loss": 1.9827, "step": 38982 }, { "epoch": 0.51, "grad_norm": 3.9059112071990967, "learning_rate": 1.9515789127319674e-05, "loss": 1.771, "step": 38983 }, { "epoch": 0.51, "grad_norm": 3.695582866668701, "learning_rate": 1.9515756824882273e-05, "loss": 1.8299, "step": 38984 }, { "epoch": 0.51, "grad_norm": 3.4253342151641846, "learning_rate": 1.9515724521394176e-05, "loss": 1.9176, "step": 38985 }, { "epoch": 0.51, "grad_norm": 3.829920530319214, "learning_rate": 1.9515692216855378e-05, "loss": 2.0083, "step": 38986 }, { "epoch": 0.51, "grad_norm": 3.5315120220184326, "learning_rate": 1.9515659911265886e-05, "loss": 1.7755, "step": 38987 }, { "epoch": 0.51, "grad_norm": 3.5387089252471924, "learning_rate": 1.9515627604625705e-05, "loss": 2.075, "step": 38988 }, { "epoch": 0.51, "grad_norm": 3.5079262256622314, "learning_rate": 1.9515595296934836e-05, "loss": 1.8439, "step": 38989 }, { "epoch": 0.51, "grad_norm": 3.501371383666992, "learning_rate": 1.951556298819328e-05, "loss": 1.7314, "step": 38990 }, { "epoch": 0.51, "grad_norm": 3.3260064125061035, "learning_rate": 1.951553067840105e-05, "loss": 1.7044, "step": 38991 }, { "epoch": 0.51, "grad_norm": 3.4428274631500244, "learning_rate": 1.951549836755814e-05, "loss": 1.6493, "step": 38992 }, { "epoch": 0.51, "grad_norm": 3.708322763442993, "learning_rate": 1.9515466055664563e-05, "loss": 2.2193, "step": 38993 }, { "epoch": 0.51, "grad_norm": 3.7480034828186035, "learning_rate": 1.9515433742720314e-05, "loss": 2.0179, "step": 38994 }, { "epoch": 0.51, "grad_norm": 3.4743733406066895, "learning_rate": 1.9515401428725397e-05, "loss": 1.8027, "step": 38995 }, { "epoch": 0.51, "grad_norm": 3.108511447906494, "learning_rate": 1.9515369113679822e-05, "loss": 1.4928, "step": 38996 }, { "epoch": 0.51, "grad_norm": 4.025454998016357, "learning_rate": 1.9515336797583585e-05, "loss": 2.1388, "step": 38997 }, { "epoch": 0.51, "grad_norm": 3.857286214828491, "learning_rate": 1.95153044804367e-05, "loss": 1.8124, "step": 38998 }, { "epoch": 0.51, "grad_norm": 3.8683202266693115, "learning_rate": 1.951527216223916e-05, "loss": 1.8914, "step": 38999 }, { "epoch": 0.51, "grad_norm": 3.907748222351074, "learning_rate": 1.951523984299097e-05, "loss": 2.1459, "step": 39000 }, { "epoch": 0.51, "grad_norm": 3.4876623153686523, "learning_rate": 1.9515207522692138e-05, "loss": 1.7724, "step": 39001 }, { "epoch": 0.51, "grad_norm": 3.518866539001465, "learning_rate": 1.9515175201342666e-05, "loss": 1.6887, "step": 39002 }, { "epoch": 0.51, "grad_norm": 3.309583902359009, "learning_rate": 1.9515142878942556e-05, "loss": 1.9367, "step": 39003 }, { "epoch": 0.51, "grad_norm": 3.8098621368408203, "learning_rate": 1.9515110555491816e-05, "loss": 1.8151, "step": 39004 }, { "epoch": 0.51, "grad_norm": 3.6379709243774414, "learning_rate": 1.9515078230990444e-05, "loss": 2.081, "step": 39005 }, { "epoch": 0.51, "grad_norm": 3.652637243270874, "learning_rate": 1.951504590543845e-05, "loss": 2.1219, "step": 39006 }, { "epoch": 0.51, "grad_norm": 4.252779006958008, "learning_rate": 1.951501357883583e-05, "loss": 2.5423, "step": 39007 }, { "epoch": 0.51, "grad_norm": 3.7016360759735107, "learning_rate": 1.951498125118259e-05, "loss": 1.7814, "step": 39008 }, { "epoch": 0.51, "grad_norm": 3.5345675945281982, "learning_rate": 1.9514948922478734e-05, "loss": 1.5201, "step": 39009 }, { "epoch": 0.51, "grad_norm": 3.810865879058838, "learning_rate": 1.951491659272427e-05, "loss": 2.2186, "step": 39010 }, { "epoch": 0.51, "grad_norm": 3.640564441680908, "learning_rate": 1.9514884261919194e-05, "loss": 2.0025, "step": 39011 }, { "epoch": 0.51, "grad_norm": 3.9719598293304443, "learning_rate": 1.9514851930063516e-05, "loss": 2.0346, "step": 39012 }, { "epoch": 0.51, "grad_norm": 2.9188547134399414, "learning_rate": 1.9514819597157235e-05, "loss": 1.4986, "step": 39013 }, { "epoch": 0.51, "grad_norm": 3.828040599822998, "learning_rate": 1.9514787263200356e-05, "loss": 1.6658, "step": 39014 }, { "epoch": 0.51, "grad_norm": 3.7491214275360107, "learning_rate": 1.9514754928192885e-05, "loss": 2.1241, "step": 39015 }, { "epoch": 0.51, "grad_norm": 3.8540561199188232, "learning_rate": 1.9514722592134823e-05, "loss": 2.0343, "step": 39016 }, { "epoch": 0.51, "grad_norm": 3.5165603160858154, "learning_rate": 1.9514690255026175e-05, "loss": 1.8629, "step": 39017 }, { "epoch": 0.51, "grad_norm": 3.4871368408203125, "learning_rate": 1.9514657916866945e-05, "loss": 1.5974, "step": 39018 }, { "epoch": 0.51, "grad_norm": 3.7279787063598633, "learning_rate": 1.951462557765713e-05, "loss": 1.893, "step": 39019 }, { "epoch": 0.51, "grad_norm": 3.3806965351104736, "learning_rate": 1.9514593237396747e-05, "loss": 1.6194, "step": 39020 }, { "epoch": 0.51, "grad_norm": 4.005366325378418, "learning_rate": 1.9514560896085782e-05, "loss": 2.2626, "step": 39021 }, { "epoch": 0.51, "grad_norm": 3.8652184009552, "learning_rate": 1.9514528553724256e-05, "loss": 2.1794, "step": 39022 }, { "epoch": 0.51, "grad_norm": 3.3897740840911865, "learning_rate": 1.9514496210312162e-05, "loss": 1.6296, "step": 39023 }, { "epoch": 0.51, "grad_norm": 3.848666191101074, "learning_rate": 1.9514463865849506e-05, "loss": 1.6739, "step": 39024 }, { "epoch": 0.51, "grad_norm": 4.594228267669678, "learning_rate": 1.9514431520336292e-05, "loss": 2.6603, "step": 39025 }, { "epoch": 0.51, "grad_norm": 3.68123459815979, "learning_rate": 1.9514399173772524e-05, "loss": 1.8694, "step": 39026 }, { "epoch": 0.51, "grad_norm": 4.04961633682251, "learning_rate": 1.95143668261582e-05, "loss": 2.0096, "step": 39027 }, { "epoch": 0.51, "grad_norm": 3.116914749145508, "learning_rate": 1.9514334477493337e-05, "loss": 1.448, "step": 39028 }, { "epoch": 0.51, "grad_norm": 3.6958634853363037, "learning_rate": 1.951430212777792e-05, "loss": 1.5352, "step": 39029 }, { "epoch": 0.51, "grad_norm": 3.4650321006774902, "learning_rate": 1.951426977701197e-05, "loss": 1.7485, "step": 39030 }, { "epoch": 0.51, "grad_norm": 3.607497453689575, "learning_rate": 1.951423742519548e-05, "loss": 1.5123, "step": 39031 }, { "epoch": 0.51, "grad_norm": 3.4682981967926025, "learning_rate": 1.951420507232846e-05, "loss": 1.7421, "step": 39032 }, { "epoch": 0.51, "grad_norm": 3.169144868850708, "learning_rate": 1.951417271841091e-05, "loss": 1.5372, "step": 39033 }, { "epoch": 0.51, "grad_norm": 3.8147664070129395, "learning_rate": 1.951414036344283e-05, "loss": 1.8924, "step": 39034 }, { "epoch": 0.51, "grad_norm": 4.041627883911133, "learning_rate": 1.9514108007424232e-05, "loss": 1.8975, "step": 39035 }, { "epoch": 0.51, "grad_norm": 4.504940509796143, "learning_rate": 1.951407565035511e-05, "loss": 2.7065, "step": 39036 }, { "epoch": 0.51, "grad_norm": 4.377491474151611, "learning_rate": 1.9514043292235475e-05, "loss": 2.0235, "step": 39037 }, { "epoch": 0.51, "grad_norm": 3.8018147945404053, "learning_rate": 1.9514010933065332e-05, "loss": 2.0855, "step": 39038 }, { "epoch": 0.51, "grad_norm": 3.402846097946167, "learning_rate": 1.9513978572844675e-05, "loss": 1.5432, "step": 39039 }, { "epoch": 0.51, "grad_norm": 4.29344367980957, "learning_rate": 1.9513946211573515e-05, "loss": 2.5098, "step": 39040 }, { "epoch": 0.51, "grad_norm": 3.807569742202759, "learning_rate": 1.9513913849251857e-05, "loss": 1.6454, "step": 39041 }, { "epoch": 0.51, "grad_norm": 3.5099093914031982, "learning_rate": 1.95138814858797e-05, "loss": 1.5605, "step": 39042 }, { "epoch": 0.51, "grad_norm": 3.505417823791504, "learning_rate": 1.9513849121457047e-05, "loss": 1.8965, "step": 39043 }, { "epoch": 0.51, "grad_norm": 4.266479969024658, "learning_rate": 1.9513816755983905e-05, "loss": 2.1867, "step": 39044 }, { "epoch": 0.51, "grad_norm": 4.342397212982178, "learning_rate": 1.9513784389460276e-05, "loss": 2.4744, "step": 39045 }, { "epoch": 0.51, "grad_norm": 3.831329107284546, "learning_rate": 1.9513752021886164e-05, "loss": 1.7416, "step": 39046 }, { "epoch": 0.51, "grad_norm": 3.447112560272217, "learning_rate": 1.9513719653261575e-05, "loss": 1.5734, "step": 39047 }, { "epoch": 0.51, "grad_norm": 4.059781551361084, "learning_rate": 1.9513687283586506e-05, "loss": 1.7897, "step": 39048 }, { "epoch": 0.51, "grad_norm": 4.1466755867004395, "learning_rate": 1.9513654912860963e-05, "loss": 2.1404, "step": 39049 }, { "epoch": 0.51, "grad_norm": 3.651806116104126, "learning_rate": 1.9513622541084954e-05, "loss": 2.2959, "step": 39050 }, { "epoch": 0.51, "grad_norm": 3.293830156326294, "learning_rate": 1.9513590168258482e-05, "loss": 1.7859, "step": 39051 }, { "epoch": 0.51, "grad_norm": 3.249163866043091, "learning_rate": 1.9513557794381547e-05, "loss": 1.5072, "step": 39052 }, { "epoch": 0.51, "grad_norm": 4.0047078132629395, "learning_rate": 1.951352541945415e-05, "loss": 1.7682, "step": 39053 }, { "epoch": 0.51, "grad_norm": 3.8634865283966064, "learning_rate": 1.9513493043476303e-05, "loss": 1.9725, "step": 39054 }, { "epoch": 0.51, "grad_norm": 3.557543992996216, "learning_rate": 1.9513460666448002e-05, "loss": 1.9669, "step": 39055 }, { "epoch": 0.51, "grad_norm": 3.5946261882781982, "learning_rate": 1.9513428288369255e-05, "loss": 2.0787, "step": 39056 }, { "epoch": 0.51, "grad_norm": 3.598703622817993, "learning_rate": 1.951339590924006e-05, "loss": 1.8917, "step": 39057 }, { "epoch": 0.51, "grad_norm": 3.845364570617676, "learning_rate": 1.9513363529060432e-05, "loss": 1.8816, "step": 39058 }, { "epoch": 0.51, "grad_norm": 4.154164791107178, "learning_rate": 1.9513331147830366e-05, "loss": 2.181, "step": 39059 }, { "epoch": 0.51, "grad_norm": 3.9914989471435547, "learning_rate": 1.9513298765549865e-05, "loss": 1.9457, "step": 39060 }, { "epoch": 0.51, "grad_norm": 3.4721601009368896, "learning_rate": 1.951326638221893e-05, "loss": 2.0813, "step": 39061 }, { "epoch": 0.51, "grad_norm": 3.766726016998291, "learning_rate": 1.9513233997837573e-05, "loss": 1.6913, "step": 39062 }, { "epoch": 0.51, "grad_norm": 3.721977710723877, "learning_rate": 1.9513201612405798e-05, "loss": 1.8903, "step": 39063 }, { "epoch": 0.51, "grad_norm": 3.762320041656494, "learning_rate": 1.95131692259236e-05, "loss": 1.8974, "step": 39064 }, { "epoch": 0.51, "grad_norm": 4.012090682983398, "learning_rate": 1.951313683839098e-05, "loss": 2.1552, "step": 39065 }, { "epoch": 0.51, "grad_norm": 3.6428778171539307, "learning_rate": 1.951310444980796e-05, "loss": 2.0464, "step": 39066 }, { "epoch": 0.51, "grad_norm": 3.9421308040618896, "learning_rate": 1.9513072060174526e-05, "loss": 1.7895, "step": 39067 }, { "epoch": 0.51, "grad_norm": 3.809276580810547, "learning_rate": 1.951303966949069e-05, "loss": 2.1757, "step": 39068 }, { "epoch": 0.51, "grad_norm": 3.549281597137451, "learning_rate": 1.9513007277756446e-05, "loss": 2.359, "step": 39069 }, { "epoch": 0.51, "grad_norm": 4.02498722076416, "learning_rate": 1.951297488497181e-05, "loss": 2.0598, "step": 39070 }, { "epoch": 0.51, "grad_norm": 3.489797353744507, "learning_rate": 1.9512942491136783e-05, "loss": 1.8066, "step": 39071 }, { "epoch": 0.51, "grad_norm": 3.6632378101348877, "learning_rate": 1.951291009625136e-05, "loss": 1.9984, "step": 39072 }, { "epoch": 0.51, "grad_norm": 4.2490234375, "learning_rate": 1.9512877700315553e-05, "loss": 2.2503, "step": 39073 }, { "epoch": 0.51, "grad_norm": 3.443636655807495, "learning_rate": 1.9512845303329363e-05, "loss": 2.0036, "step": 39074 }, { "epoch": 0.51, "grad_norm": 3.6709821224212646, "learning_rate": 1.951281290529279e-05, "loss": 1.9103, "step": 39075 }, { "epoch": 0.51, "grad_norm": 3.9356987476348877, "learning_rate": 1.951278050620585e-05, "loss": 2.0035, "step": 39076 }, { "epoch": 0.51, "grad_norm": 3.5337135791778564, "learning_rate": 1.951274810606853e-05, "loss": 2.0601, "step": 39077 }, { "epoch": 0.51, "grad_norm": 3.43467378616333, "learning_rate": 1.9512715704880844e-05, "loss": 1.7088, "step": 39078 }, { "epoch": 0.51, "grad_norm": 3.484710931777954, "learning_rate": 1.9512683302642792e-05, "loss": 1.9435, "step": 39079 }, { "epoch": 0.51, "grad_norm": 4.9217848777771, "learning_rate": 1.9512650899354376e-05, "loss": 2.1446, "step": 39080 }, { "epoch": 0.51, "grad_norm": 3.74491024017334, "learning_rate": 1.9512618495015605e-05, "loss": 2.1916, "step": 39081 }, { "epoch": 0.51, "grad_norm": 3.581559419631958, "learning_rate": 1.9512586089626483e-05, "loss": 1.6823, "step": 39082 }, { "epoch": 0.51, "grad_norm": 4.174447059631348, "learning_rate": 1.9512553683187002e-05, "loss": 2.2533, "step": 39083 }, { "epoch": 0.51, "grad_norm": 3.7515766620635986, "learning_rate": 1.9512521275697177e-05, "loss": 2.2679, "step": 39084 }, { "epoch": 0.51, "grad_norm": 3.6944501399993896, "learning_rate": 1.951248886715701e-05, "loss": 2.1597, "step": 39085 }, { "epoch": 0.51, "grad_norm": 4.2616729736328125, "learning_rate": 1.9512456457566503e-05, "loss": 2.5279, "step": 39086 }, { "epoch": 0.51, "grad_norm": 3.5270090103149414, "learning_rate": 1.951242404692566e-05, "loss": 1.9456, "step": 39087 }, { "epoch": 0.51, "grad_norm": 3.9139530658721924, "learning_rate": 1.951239163523448e-05, "loss": 2.2574, "step": 39088 }, { "epoch": 0.51, "grad_norm": 3.8629963397979736, "learning_rate": 1.9512359222492974e-05, "loss": 2.0367, "step": 39089 }, { "epoch": 0.51, "grad_norm": 3.572969436645508, "learning_rate": 1.951232680870114e-05, "loss": 1.8983, "step": 39090 }, { "epoch": 0.51, "grad_norm": 4.145736217498779, "learning_rate": 1.9512294393858987e-05, "loss": 2.3024, "step": 39091 }, { "epoch": 0.51, "grad_norm": 4.030686855316162, "learning_rate": 1.951226197796651e-05, "loss": 2.0851, "step": 39092 }, { "epoch": 0.51, "grad_norm": 3.534895181655884, "learning_rate": 1.9512229561023723e-05, "loss": 1.7733, "step": 39093 }, { "epoch": 0.51, "grad_norm": 3.339114189147949, "learning_rate": 1.951219714303062e-05, "loss": 1.8215, "step": 39094 }, { "epoch": 0.51, "grad_norm": 3.332695722579956, "learning_rate": 1.9512164723987213e-05, "loss": 1.9179, "step": 39095 }, { "epoch": 0.51, "grad_norm": 3.7619197368621826, "learning_rate": 1.95121323038935e-05, "loss": 1.8793, "step": 39096 }, { "epoch": 0.51, "grad_norm": 3.5093400478363037, "learning_rate": 1.9512099882749487e-05, "loss": 1.6995, "step": 39097 }, { "epoch": 0.51, "grad_norm": 3.6496076583862305, "learning_rate": 1.9512067460555177e-05, "loss": 1.6513, "step": 39098 }, { "epoch": 0.51, "grad_norm": 4.209404468536377, "learning_rate": 1.9512035037310573e-05, "loss": 1.7804, "step": 39099 }, { "epoch": 0.51, "grad_norm": 3.472593069076538, "learning_rate": 1.951200261301568e-05, "loss": 1.5264, "step": 39100 }, { "epoch": 0.51, "grad_norm": 3.6036808490753174, "learning_rate": 1.9511970187670497e-05, "loss": 1.9607, "step": 39101 }, { "epoch": 0.51, "grad_norm": 3.931735038757324, "learning_rate": 1.9511937761275032e-05, "loss": 2.1173, "step": 39102 }, { "epoch": 0.51, "grad_norm": 2.822316884994507, "learning_rate": 1.951190533382929e-05, "loss": 1.4313, "step": 39103 }, { "epoch": 0.51, "grad_norm": 3.56304931640625, "learning_rate": 1.951187290533327e-05, "loss": 1.7465, "step": 39104 }, { "epoch": 0.51, "grad_norm": 3.879551887512207, "learning_rate": 1.951184047578698e-05, "loss": 1.8662, "step": 39105 }, { "epoch": 0.51, "grad_norm": 3.570039749145508, "learning_rate": 1.951180804519042e-05, "loss": 1.777, "step": 39106 }, { "epoch": 0.51, "grad_norm": 3.319282293319702, "learning_rate": 1.9511775613543596e-05, "loss": 1.6445, "step": 39107 }, { "epoch": 0.51, "grad_norm": 3.6757755279541016, "learning_rate": 1.951174318084651e-05, "loss": 2.0279, "step": 39108 }, { "epoch": 0.51, "grad_norm": 4.017184734344482, "learning_rate": 1.9511710747099166e-05, "loss": 2.4594, "step": 39109 }, { "epoch": 0.51, "grad_norm": 3.588225841522217, "learning_rate": 1.9511678312301567e-05, "loss": 2.082, "step": 39110 }, { "epoch": 0.51, "grad_norm": 3.9162819385528564, "learning_rate": 1.951164587645372e-05, "loss": 2.1926, "step": 39111 }, { "epoch": 0.51, "grad_norm": 4.27861213684082, "learning_rate": 1.9511613439555622e-05, "loss": 2.2689, "step": 39112 }, { "epoch": 0.51, "grad_norm": 4.404991626739502, "learning_rate": 1.9511581001607285e-05, "loss": 2.1178, "step": 39113 }, { "epoch": 0.51, "grad_norm": 3.314333438873291, "learning_rate": 1.9511548562608705e-05, "loss": 1.8751, "step": 39114 }, { "epoch": 0.51, "grad_norm": 3.418774366378784, "learning_rate": 1.951151612255989e-05, "loss": 1.95, "step": 39115 }, { "epoch": 0.51, "grad_norm": 3.839179039001465, "learning_rate": 1.9511483681460843e-05, "loss": 2.073, "step": 39116 }, { "epoch": 0.51, "grad_norm": 4.237483501434326, "learning_rate": 1.9511451239311565e-05, "loss": 2.3363, "step": 39117 }, { "epoch": 0.51, "grad_norm": 4.10284423828125, "learning_rate": 1.9511418796112064e-05, "loss": 2.0456, "step": 39118 }, { "epoch": 0.51, "grad_norm": 3.8847057819366455, "learning_rate": 1.9511386351862337e-05, "loss": 2.0568, "step": 39119 }, { "epoch": 0.51, "grad_norm": 3.75113582611084, "learning_rate": 1.9511353906562395e-05, "loss": 2.31, "step": 39120 }, { "epoch": 0.51, "grad_norm": 4.125180721282959, "learning_rate": 1.9511321460212236e-05, "loss": 2.1013, "step": 39121 }, { "epoch": 0.51, "grad_norm": 3.4937257766723633, "learning_rate": 1.9511289012811865e-05, "loss": 1.7287, "step": 39122 }, { "epoch": 0.51, "grad_norm": 3.2675886154174805, "learning_rate": 1.951125656436129e-05, "loss": 1.4055, "step": 39123 }, { "epoch": 0.51, "grad_norm": 3.865713119506836, "learning_rate": 1.951122411486051e-05, "loss": 2.1298, "step": 39124 }, { "epoch": 0.51, "grad_norm": 3.125148057937622, "learning_rate": 1.9511191664309525e-05, "loss": 1.3736, "step": 39125 }, { "epoch": 0.51, "grad_norm": 3.4971373081207275, "learning_rate": 1.951115921270835e-05, "loss": 1.8304, "step": 39126 }, { "epoch": 0.51, "grad_norm": 4.376204013824463, "learning_rate": 1.9511126760056976e-05, "loss": 2.3503, "step": 39127 }, { "epoch": 0.51, "grad_norm": 3.2344295978546143, "learning_rate": 1.9511094306355412e-05, "loss": 1.6232, "step": 39128 }, { "epoch": 0.51, "grad_norm": 3.5033702850341797, "learning_rate": 1.9511061851603666e-05, "loss": 1.4712, "step": 39129 }, { "epoch": 0.51, "grad_norm": 3.458969831466675, "learning_rate": 1.9511029395801735e-05, "loss": 1.6888, "step": 39130 }, { "epoch": 0.51, "grad_norm": 4.179239273071289, "learning_rate": 1.9510996938949628e-05, "loss": 2.0527, "step": 39131 }, { "epoch": 0.51, "grad_norm": 4.051321506500244, "learning_rate": 1.951096448104734e-05, "loss": 1.7795, "step": 39132 }, { "epoch": 0.51, "grad_norm": 3.7004544734954834, "learning_rate": 1.9510932022094886e-05, "loss": 2.1101, "step": 39133 }, { "epoch": 0.51, "grad_norm": 4.404210090637207, "learning_rate": 1.951089956209226e-05, "loss": 2.609, "step": 39134 }, { "epoch": 0.51, "grad_norm": 3.908701181411743, "learning_rate": 1.951086710103947e-05, "loss": 1.796, "step": 39135 }, { "epoch": 0.51, "grad_norm": 3.9381539821624756, "learning_rate": 1.951083463893652e-05, "loss": 2.0767, "step": 39136 }, { "epoch": 0.51, "grad_norm": 3.8331189155578613, "learning_rate": 1.951080217578341e-05, "loss": 2.0225, "step": 39137 }, { "epoch": 0.51, "grad_norm": 3.83138370513916, "learning_rate": 1.951076971158015e-05, "loss": 1.7954, "step": 39138 }, { "epoch": 0.51, "grad_norm": 3.2395012378692627, "learning_rate": 1.9510737246326736e-05, "loss": 1.6628, "step": 39139 }, { "epoch": 0.51, "grad_norm": 3.5448927879333496, "learning_rate": 1.951070478002318e-05, "loss": 1.733, "step": 39140 }, { "epoch": 0.51, "grad_norm": 3.8881423473358154, "learning_rate": 1.9510672312669476e-05, "loss": 2.1722, "step": 39141 }, { "epoch": 0.51, "grad_norm": 3.7919135093688965, "learning_rate": 1.9510639844265636e-05, "loss": 2.2737, "step": 39142 }, { "epoch": 0.51, "grad_norm": 3.556170701980591, "learning_rate": 1.9510607374811657e-05, "loss": 1.5268, "step": 39143 }, { "epoch": 0.51, "grad_norm": 3.6970722675323486, "learning_rate": 1.9510574904307545e-05, "loss": 1.7837, "step": 39144 }, { "epoch": 0.51, "grad_norm": 4.17100191116333, "learning_rate": 1.9510542432753307e-05, "loss": 1.6236, "step": 39145 }, { "epoch": 0.51, "grad_norm": 4.122450351715088, "learning_rate": 1.9510509960148943e-05, "loss": 2.2465, "step": 39146 }, { "epoch": 0.51, "grad_norm": 3.750265598297119, "learning_rate": 1.9510477486494457e-05, "loss": 2.2645, "step": 39147 }, { "epoch": 0.51, "grad_norm": 4.34152889251709, "learning_rate": 1.951044501178985e-05, "loss": 2.1779, "step": 39148 }, { "epoch": 0.51, "grad_norm": 4.030472278594971, "learning_rate": 1.9510412536035136e-05, "loss": 2.2233, "step": 39149 }, { "epoch": 0.51, "grad_norm": 3.8461854457855225, "learning_rate": 1.9510380059230304e-05, "loss": 2.2954, "step": 39150 }, { "epoch": 0.51, "grad_norm": 3.682497501373291, "learning_rate": 1.951034758137537e-05, "loss": 2.1469, "step": 39151 }, { "epoch": 0.51, "grad_norm": 3.669834613800049, "learning_rate": 1.951031510247033e-05, "loss": 1.8513, "step": 39152 }, { "epoch": 0.51, "grad_norm": 3.4147112369537354, "learning_rate": 1.9510282622515185e-05, "loss": 1.7064, "step": 39153 }, { "epoch": 0.51, "grad_norm": 3.8337881565093994, "learning_rate": 1.951025014150995e-05, "loss": 2.0742, "step": 39154 }, { "epoch": 0.51, "grad_norm": 4.040743350982666, "learning_rate": 1.951021765945462e-05, "loss": 1.908, "step": 39155 }, { "epoch": 0.51, "grad_norm": 3.5628550052642822, "learning_rate": 1.9510185176349197e-05, "loss": 1.8073, "step": 39156 }, { "epoch": 0.51, "grad_norm": 3.728010892868042, "learning_rate": 1.9510152692193695e-05, "loss": 1.7669, "step": 39157 }, { "epoch": 0.51, "grad_norm": 3.5740954875946045, "learning_rate": 1.951012020698811e-05, "loss": 2.0065, "step": 39158 }, { "epoch": 0.51, "grad_norm": 3.7635438442230225, "learning_rate": 1.951008772073244e-05, "loss": 1.951, "step": 39159 }, { "epoch": 0.51, "grad_norm": 3.284146785736084, "learning_rate": 1.95100552334267e-05, "loss": 1.991, "step": 39160 }, { "epoch": 0.51, "grad_norm": 4.211609363555908, "learning_rate": 1.9510022745070887e-05, "loss": 2.2711, "step": 39161 }, { "epoch": 0.51, "grad_norm": 3.9637303352355957, "learning_rate": 1.9509990255665007e-05, "loss": 1.8178, "step": 39162 }, { "epoch": 0.51, "grad_norm": 4.219250202178955, "learning_rate": 1.9509957765209063e-05, "loss": 2.2231, "step": 39163 }, { "epoch": 0.51, "grad_norm": 3.7056117057800293, "learning_rate": 1.9509925273703057e-05, "loss": 2.01, "step": 39164 }, { "epoch": 0.51, "grad_norm": 3.6275558471679688, "learning_rate": 1.9509892781146997e-05, "loss": 1.7274, "step": 39165 }, { "epoch": 0.51, "grad_norm": 3.8054826259613037, "learning_rate": 1.950986028754088e-05, "loss": 1.912, "step": 39166 }, { "epoch": 0.51, "grad_norm": 3.5703392028808594, "learning_rate": 1.9509827792884714e-05, "loss": 2.0083, "step": 39167 }, { "epoch": 0.51, "grad_norm": 3.551863431930542, "learning_rate": 1.95097952971785e-05, "loss": 2.0093, "step": 39168 }, { "epoch": 0.51, "grad_norm": 3.585601806640625, "learning_rate": 1.9509762800422246e-05, "loss": 1.6695, "step": 39169 }, { "epoch": 0.51, "grad_norm": 3.576624631881714, "learning_rate": 1.9509730302615954e-05, "loss": 1.672, "step": 39170 }, { "epoch": 0.51, "grad_norm": 3.3926706314086914, "learning_rate": 1.9509697803759622e-05, "loss": 1.6255, "step": 39171 }, { "epoch": 0.51, "grad_norm": 3.570111036300659, "learning_rate": 1.950966530385326e-05, "loss": 1.8057, "step": 39172 }, { "epoch": 0.51, "grad_norm": 4.350921630859375, "learning_rate": 1.950963280289687e-05, "loss": 2.6316, "step": 39173 }, { "epoch": 0.51, "grad_norm": 3.211219310760498, "learning_rate": 1.9509600300890457e-05, "loss": 1.6753, "step": 39174 }, { "epoch": 0.51, "grad_norm": 3.6365368366241455, "learning_rate": 1.950956779783402e-05, "loss": 2.1472, "step": 39175 }, { "epoch": 0.51, "grad_norm": 3.7835638523101807, "learning_rate": 1.9509535293727566e-05, "loss": 1.9212, "step": 39176 }, { "epoch": 0.51, "grad_norm": 3.114145040512085, "learning_rate": 1.9509502788571098e-05, "loss": 1.6173, "step": 39177 }, { "epoch": 0.51, "grad_norm": 4.006525039672852, "learning_rate": 1.950947028236462e-05, "loss": 2.6173, "step": 39178 }, { "epoch": 0.51, "grad_norm": 4.280964374542236, "learning_rate": 1.950943777510814e-05, "loss": 2.3077, "step": 39179 }, { "epoch": 0.51, "grad_norm": 3.867549419403076, "learning_rate": 1.950940526680165e-05, "loss": 1.9941, "step": 39180 }, { "epoch": 0.51, "grad_norm": 4.264477252960205, "learning_rate": 1.950937275744516e-05, "loss": 1.9795, "step": 39181 }, { "epoch": 0.51, "grad_norm": 3.8415417671203613, "learning_rate": 1.9509340247038676e-05, "loss": 1.9507, "step": 39182 }, { "epoch": 0.51, "grad_norm": 3.891779899597168, "learning_rate": 1.95093077355822e-05, "loss": 1.8396, "step": 39183 }, { "epoch": 0.51, "grad_norm": 3.8808510303497314, "learning_rate": 1.9509275223075733e-05, "loss": 2.553, "step": 39184 }, { "epoch": 0.51, "grad_norm": 3.1823930740356445, "learning_rate": 1.9509242709519284e-05, "loss": 1.8356, "step": 39185 }, { "epoch": 0.51, "grad_norm": 4.042484283447266, "learning_rate": 1.9509210194912852e-05, "loss": 1.7377, "step": 39186 }, { "epoch": 0.51, "grad_norm": 3.3859024047851562, "learning_rate": 1.9509177679256442e-05, "loss": 1.7746, "step": 39187 }, { "epoch": 0.51, "grad_norm": 3.8656928539276123, "learning_rate": 1.9509145162550057e-05, "loss": 2.0738, "step": 39188 }, { "epoch": 0.51, "grad_norm": 3.7631168365478516, "learning_rate": 1.9509112644793703e-05, "loss": 2.0585, "step": 39189 }, { "epoch": 0.51, "grad_norm": 4.261007785797119, "learning_rate": 1.9509080125987377e-05, "loss": 2.29, "step": 39190 }, { "epoch": 0.51, "grad_norm": 3.641096830368042, "learning_rate": 1.950904760613109e-05, "loss": 2.0313, "step": 39191 }, { "epoch": 0.51, "grad_norm": 3.9655649662017822, "learning_rate": 1.9509015085224844e-05, "loss": 2.1453, "step": 39192 }, { "epoch": 0.51, "grad_norm": 3.230379343032837, "learning_rate": 1.950898256326864e-05, "loss": 1.6616, "step": 39193 }, { "epoch": 0.51, "grad_norm": 3.2657265663146973, "learning_rate": 1.9508950040262484e-05, "loss": 1.7029, "step": 39194 }, { "epoch": 0.51, "grad_norm": 3.9378061294555664, "learning_rate": 1.9508917516206376e-05, "loss": 2.4793, "step": 39195 }, { "epoch": 0.51, "grad_norm": 3.8041152954101562, "learning_rate": 1.9508884991100324e-05, "loss": 2.0618, "step": 39196 }, { "epoch": 0.51, "grad_norm": 3.2373268604278564, "learning_rate": 1.950885246494433e-05, "loss": 1.4353, "step": 39197 }, { "epoch": 0.51, "grad_norm": 3.661055564880371, "learning_rate": 1.9508819937738396e-05, "loss": 1.938, "step": 39198 }, { "epoch": 0.51, "grad_norm": 4.417534351348877, "learning_rate": 1.950878740948253e-05, "loss": 2.4805, "step": 39199 }, { "epoch": 0.51, "grad_norm": 3.9728052616119385, "learning_rate": 1.950875488017673e-05, "loss": 1.9637, "step": 39200 }, { "epoch": 0.51, "grad_norm": 3.814049482345581, "learning_rate": 1.9508722349821e-05, "loss": 1.9884, "step": 39201 }, { "epoch": 0.51, "grad_norm": 3.760803461074829, "learning_rate": 1.950868981841535e-05, "loss": 2.097, "step": 39202 }, { "epoch": 0.51, "grad_norm": 3.741779327392578, "learning_rate": 1.9508657285959777e-05, "loss": 2.0189, "step": 39203 }, { "epoch": 0.51, "grad_norm": 3.4463350772857666, "learning_rate": 1.950862475245429e-05, "loss": 1.9836, "step": 39204 }, { "epoch": 0.51, "grad_norm": 3.216480016708374, "learning_rate": 1.9508592217898887e-05, "loss": 1.9321, "step": 39205 }, { "epoch": 0.51, "grad_norm": 3.96813702583313, "learning_rate": 1.9508559682293573e-05, "loss": 1.7846, "step": 39206 }, { "epoch": 0.51, "grad_norm": 3.4121179580688477, "learning_rate": 1.9508527145638356e-05, "loss": 1.6675, "step": 39207 }, { "epoch": 0.51, "grad_norm": 3.8364250659942627, "learning_rate": 1.9508494607933234e-05, "loss": 1.7939, "step": 39208 }, { "epoch": 0.51, "grad_norm": 3.099649667739868, "learning_rate": 1.950846206917821e-05, "loss": 1.4082, "step": 39209 }, { "epoch": 0.51, "grad_norm": 3.365274667739868, "learning_rate": 1.9508429529373295e-05, "loss": 1.776, "step": 39210 }, { "epoch": 0.51, "grad_norm": 3.5155928134918213, "learning_rate": 1.9508396988518488e-05, "loss": 1.9776, "step": 39211 }, { "epoch": 0.51, "grad_norm": 4.024491310119629, "learning_rate": 1.950836444661379e-05, "loss": 2.6018, "step": 39212 }, { "epoch": 0.51, "grad_norm": 3.295586109161377, "learning_rate": 1.9508331903659206e-05, "loss": 1.4174, "step": 39213 }, { "epoch": 0.51, "grad_norm": 3.5593931674957275, "learning_rate": 1.9508299359654748e-05, "loss": 1.8382, "step": 39214 }, { "epoch": 0.51, "grad_norm": 3.423248529434204, "learning_rate": 1.9508266814600406e-05, "loss": 1.7323, "step": 39215 }, { "epoch": 0.51, "grad_norm": 3.6198604106903076, "learning_rate": 1.9508234268496194e-05, "loss": 1.8735, "step": 39216 }, { "epoch": 0.51, "grad_norm": 3.588055372238159, "learning_rate": 1.9508201721342108e-05, "loss": 1.745, "step": 39217 }, { "epoch": 0.51, "grad_norm": 4.297313213348389, "learning_rate": 1.950816917313816e-05, "loss": 2.0318, "step": 39218 }, { "epoch": 0.51, "grad_norm": 3.754378318786621, "learning_rate": 1.9508136623884342e-05, "loss": 1.7719, "step": 39219 }, { "epoch": 0.51, "grad_norm": 4.102710247039795, "learning_rate": 1.950810407358067e-05, "loss": 2.0348, "step": 39220 }, { "epoch": 0.51, "grad_norm": 3.256657838821411, "learning_rate": 1.950807152222714e-05, "loss": 1.5772, "step": 39221 }, { "epoch": 0.51, "grad_norm": 3.8051164150238037, "learning_rate": 1.9508038969823757e-05, "loss": 2.2469, "step": 39222 }, { "epoch": 0.51, "grad_norm": 4.026734828948975, "learning_rate": 1.9508006416370528e-05, "loss": 2.2094, "step": 39223 }, { "epoch": 0.51, "grad_norm": 3.2248218059539795, "learning_rate": 1.950797386186745e-05, "loss": 1.7175, "step": 39224 }, { "epoch": 0.51, "grad_norm": 3.5732314586639404, "learning_rate": 1.950794130631453e-05, "loss": 1.7792, "step": 39225 }, { "epoch": 0.51, "grad_norm": 4.006373882293701, "learning_rate": 1.9507908749711774e-05, "loss": 2.1204, "step": 39226 }, { "epoch": 0.51, "grad_norm": 4.169351577758789, "learning_rate": 1.9507876192059182e-05, "loss": 1.8114, "step": 39227 }, { "epoch": 0.51, "grad_norm": 3.5649232864379883, "learning_rate": 1.9507843633356764e-05, "loss": 1.8118, "step": 39228 }, { "epoch": 0.51, "grad_norm": 4.097896099090576, "learning_rate": 1.9507811073604513e-05, "loss": 2.3034, "step": 39229 }, { "epoch": 0.51, "grad_norm": 3.7889387607574463, "learning_rate": 1.9507778512802443e-05, "loss": 1.9655, "step": 39230 }, { "epoch": 0.51, "grad_norm": 4.332135200500488, "learning_rate": 1.950774595095055e-05, "loss": 2.2329, "step": 39231 }, { "epoch": 0.51, "grad_norm": 4.1178812980651855, "learning_rate": 1.950771338804884e-05, "loss": 2.0541, "step": 39232 }, { "epoch": 0.51, "grad_norm": 3.6436495780944824, "learning_rate": 1.9507680824097323e-05, "loss": 2.0501, "step": 39233 }, { "epoch": 0.51, "grad_norm": 3.9116132259368896, "learning_rate": 1.9507648259095992e-05, "loss": 2.2774, "step": 39234 }, { "epoch": 0.51, "grad_norm": 3.798372983932495, "learning_rate": 1.950761569304485e-05, "loss": 2.2188, "step": 39235 }, { "epoch": 0.51, "grad_norm": 4.05260705947876, "learning_rate": 1.9507583125943916e-05, "loss": 1.9928, "step": 39236 }, { "epoch": 0.51, "grad_norm": 3.570161819458008, "learning_rate": 1.9507550557793177e-05, "loss": 1.9842, "step": 39237 }, { "epoch": 0.51, "grad_norm": 3.814513683319092, "learning_rate": 1.9507517988592647e-05, "loss": 2.0272, "step": 39238 }, { "epoch": 0.51, "grad_norm": 4.454318046569824, "learning_rate": 1.9507485418342324e-05, "loss": 2.365, "step": 39239 }, { "epoch": 0.51, "grad_norm": 3.1643271446228027, "learning_rate": 1.950745284704221e-05, "loss": 1.6795, "step": 39240 }, { "epoch": 0.51, "grad_norm": 3.6862740516662598, "learning_rate": 1.9507420274692314e-05, "loss": 1.9792, "step": 39241 }, { "epoch": 0.51, "grad_norm": 3.5640130043029785, "learning_rate": 1.9507387701292638e-05, "loss": 1.8471, "step": 39242 }, { "epoch": 0.51, "grad_norm": 3.599616050720215, "learning_rate": 1.950735512684319e-05, "loss": 1.7448, "step": 39243 }, { "epoch": 0.51, "grad_norm": 3.7671279907226562, "learning_rate": 1.9507322551343963e-05, "loss": 2.1672, "step": 39244 }, { "epoch": 0.51, "grad_norm": 3.2996044158935547, "learning_rate": 1.9507289974794967e-05, "loss": 1.8007, "step": 39245 }, { "epoch": 0.51, "grad_norm": 3.8033571243286133, "learning_rate": 1.9507257397196207e-05, "loss": 2.1809, "step": 39246 }, { "epoch": 0.51, "grad_norm": 4.152673721313477, "learning_rate": 1.950722481854768e-05, "loss": 2.6914, "step": 39247 }, { "epoch": 0.51, "grad_norm": 3.939180850982666, "learning_rate": 1.95071922388494e-05, "loss": 2.4278, "step": 39248 }, { "epoch": 0.51, "grad_norm": 4.2114577293396, "learning_rate": 1.9507159658101364e-05, "loss": 1.94, "step": 39249 }, { "epoch": 0.51, "grad_norm": 4.149289608001709, "learning_rate": 1.9507127076303573e-05, "loss": 2.3618, "step": 39250 }, { "epoch": 0.51, "grad_norm": 3.3212130069732666, "learning_rate": 1.9507094493456033e-05, "loss": 1.6858, "step": 39251 }, { "epoch": 0.51, "grad_norm": 3.6990320682525635, "learning_rate": 1.9507061909558753e-05, "loss": 1.7058, "step": 39252 }, { "epoch": 0.51, "grad_norm": 3.9234976768493652, "learning_rate": 1.9507029324611727e-05, "loss": 2.2965, "step": 39253 }, { "epoch": 0.51, "grad_norm": 3.845942258834839, "learning_rate": 1.9506996738614967e-05, "loss": 2.2965, "step": 39254 }, { "epoch": 0.51, "grad_norm": 3.640444755554199, "learning_rate": 1.950696415156847e-05, "loss": 2.0796, "step": 39255 }, { "epoch": 0.51, "grad_norm": 3.3326942920684814, "learning_rate": 1.9506931563472245e-05, "loss": 1.4494, "step": 39256 }, { "epoch": 0.51, "grad_norm": 4.187169075012207, "learning_rate": 1.9506898974326294e-05, "loss": 2.1714, "step": 39257 }, { "epoch": 0.51, "grad_norm": 3.313753366470337, "learning_rate": 1.9506866384130622e-05, "loss": 1.58, "step": 39258 }, { "epoch": 0.51, "grad_norm": 4.2532267570495605, "learning_rate": 1.9506833792885226e-05, "loss": 2.2167, "step": 39259 }, { "epoch": 0.51, "grad_norm": 4.013814926147461, "learning_rate": 1.950680120059012e-05, "loss": 2.2473, "step": 39260 }, { "epoch": 0.51, "grad_norm": 4.071364879608154, "learning_rate": 1.9506768607245296e-05, "loss": 2.0516, "step": 39261 }, { "epoch": 0.51, "grad_norm": 3.5927960872650146, "learning_rate": 1.9506736012850766e-05, "loss": 2.0135, "step": 39262 }, { "epoch": 0.51, "grad_norm": 3.0562357902526855, "learning_rate": 1.9506703417406533e-05, "loss": 1.4102, "step": 39263 }, { "epoch": 0.51, "grad_norm": 4.000574111938477, "learning_rate": 1.9506670820912598e-05, "loss": 2.0442, "step": 39264 }, { "epoch": 0.51, "grad_norm": 3.5900089740753174, "learning_rate": 1.9506638223368962e-05, "loss": 1.7931, "step": 39265 }, { "epoch": 0.51, "grad_norm": 3.371232271194458, "learning_rate": 1.9506605624775633e-05, "loss": 1.6954, "step": 39266 }, { "epoch": 0.51, "grad_norm": 4.083959102630615, "learning_rate": 1.9506573025132616e-05, "loss": 2.4036, "step": 39267 }, { "epoch": 0.51, "grad_norm": 3.050318956375122, "learning_rate": 1.950654042443991e-05, "loss": 1.5284, "step": 39268 }, { "epoch": 0.51, "grad_norm": 3.5819170475006104, "learning_rate": 1.950650782269752e-05, "loss": 1.5703, "step": 39269 }, { "epoch": 0.51, "grad_norm": 3.167741060256958, "learning_rate": 1.9506475219905452e-05, "loss": 1.7169, "step": 39270 }, { "epoch": 0.51, "grad_norm": 3.7972636222839355, "learning_rate": 1.9506442616063707e-05, "loss": 2.1274, "step": 39271 }, { "epoch": 0.51, "grad_norm": 3.7552597522735596, "learning_rate": 1.950641001117229e-05, "loss": 1.8485, "step": 39272 }, { "epoch": 0.51, "grad_norm": 3.5913491249084473, "learning_rate": 1.9506377405231205e-05, "loss": 1.5198, "step": 39273 }, { "epoch": 0.51, "grad_norm": 3.633999824523926, "learning_rate": 1.950634479824045e-05, "loss": 1.9773, "step": 39274 }, { "epoch": 0.51, "grad_norm": 4.106199264526367, "learning_rate": 1.950631219020004e-05, "loss": 2.042, "step": 39275 }, { "epoch": 0.51, "grad_norm": 3.591033697128296, "learning_rate": 1.9506279581109968e-05, "loss": 1.6325, "step": 39276 }, { "epoch": 0.51, "grad_norm": 3.2200355529785156, "learning_rate": 1.950624697097024e-05, "loss": 1.5172, "step": 39277 }, { "epoch": 0.51, "grad_norm": 4.3867316246032715, "learning_rate": 1.9506214359780868e-05, "loss": 1.9624, "step": 39278 }, { "epoch": 0.51, "grad_norm": 3.684283494949341, "learning_rate": 1.950618174754184e-05, "loss": 1.8701, "step": 39279 }, { "epoch": 0.51, "grad_norm": 3.5413973331451416, "learning_rate": 1.950614913425317e-05, "loss": 1.4897, "step": 39280 }, { "epoch": 0.51, "grad_norm": 3.7034854888916016, "learning_rate": 1.9506116519914864e-05, "loss": 2.159, "step": 39281 }, { "epoch": 0.51, "grad_norm": 4.006591320037842, "learning_rate": 1.950608390452692e-05, "loss": 2.0308, "step": 39282 }, { "epoch": 0.51, "grad_norm": 3.049097776412964, "learning_rate": 1.950605128808934e-05, "loss": 1.6078, "step": 39283 }, { "epoch": 0.51, "grad_norm": 3.165567398071289, "learning_rate": 1.9506018670602132e-05, "loss": 1.5208, "step": 39284 }, { "epoch": 0.51, "grad_norm": 3.874455213546753, "learning_rate": 1.9505986052065304e-05, "loss": 1.8696, "step": 39285 }, { "epoch": 0.51, "grad_norm": 3.926194190979004, "learning_rate": 1.950595343247885e-05, "loss": 2.2148, "step": 39286 }, { "epoch": 0.51, "grad_norm": 3.8867428302764893, "learning_rate": 1.9505920811842774e-05, "loss": 2.3875, "step": 39287 }, { "epoch": 0.51, "grad_norm": 3.2278378009796143, "learning_rate": 1.9505888190157084e-05, "loss": 1.5616, "step": 39288 }, { "epoch": 0.51, "grad_norm": 3.803997278213501, "learning_rate": 1.9505855567421787e-05, "loss": 1.9736, "step": 39289 }, { "epoch": 0.51, "grad_norm": 3.7012577056884766, "learning_rate": 1.9505822943636878e-05, "loss": 1.733, "step": 39290 }, { "epoch": 0.51, "grad_norm": 3.4777464866638184, "learning_rate": 1.9505790318802366e-05, "loss": 1.7113, "step": 39291 }, { "epoch": 0.51, "grad_norm": 3.7553346157073975, "learning_rate": 1.9505757692918255e-05, "loss": 2.0867, "step": 39292 }, { "epoch": 0.51, "grad_norm": 3.998443603515625, "learning_rate": 1.9505725065984548e-05, "loss": 2.2509, "step": 39293 }, { "epoch": 0.51, "grad_norm": 3.8110921382904053, "learning_rate": 1.9505692438001245e-05, "loss": 1.7349, "step": 39294 }, { "epoch": 0.51, "grad_norm": 3.6603057384490967, "learning_rate": 1.950565980896835e-05, "loss": 1.6456, "step": 39295 }, { "epoch": 0.51, "grad_norm": 3.728776693344116, "learning_rate": 1.9505627178885873e-05, "loss": 1.7648, "step": 39296 }, { "epoch": 0.51, "grad_norm": 3.9475760459899902, "learning_rate": 1.9505594547753813e-05, "loss": 2.2586, "step": 39297 }, { "epoch": 0.51, "grad_norm": 3.3931121826171875, "learning_rate": 1.950556191557217e-05, "loss": 1.873, "step": 39298 }, { "epoch": 0.51, "grad_norm": 4.177082061767578, "learning_rate": 1.950552928234096e-05, "loss": 2.5122, "step": 39299 }, { "epoch": 0.51, "grad_norm": 4.096407413482666, "learning_rate": 1.9505496648060172e-05, "loss": 1.982, "step": 39300 }, { "epoch": 0.51, "grad_norm": 3.594618320465088, "learning_rate": 1.9505464012729817e-05, "loss": 1.7611, "step": 39301 }, { "epoch": 0.51, "grad_norm": 3.752847671508789, "learning_rate": 1.95054313763499e-05, "loss": 2.0735, "step": 39302 }, { "epoch": 0.51, "grad_norm": 3.6965744495391846, "learning_rate": 1.950539873892042e-05, "loss": 1.7217, "step": 39303 }, { "epoch": 0.51, "grad_norm": 3.1493678092956543, "learning_rate": 1.9505366100441382e-05, "loss": 1.7767, "step": 39304 }, { "epoch": 0.51, "grad_norm": 3.3219780921936035, "learning_rate": 1.950533346091279e-05, "loss": 1.741, "step": 39305 }, { "epoch": 0.51, "grad_norm": 3.617075204849243, "learning_rate": 1.950530082033465e-05, "loss": 1.8023, "step": 39306 }, { "epoch": 0.51, "grad_norm": 4.013948917388916, "learning_rate": 1.950526817870696e-05, "loss": 2.165, "step": 39307 }, { "epoch": 0.51, "grad_norm": 3.5279440879821777, "learning_rate": 1.950523553602973e-05, "loss": 2.0472, "step": 39308 }, { "epoch": 0.51, "grad_norm": 3.64754056930542, "learning_rate": 1.9505202892302965e-05, "loss": 1.8379, "step": 39309 }, { "epoch": 0.51, "grad_norm": 3.7589688301086426, "learning_rate": 1.9505170247526658e-05, "loss": 1.9437, "step": 39310 }, { "epoch": 0.51, "grad_norm": 3.8253390789031982, "learning_rate": 1.950513760170082e-05, "loss": 1.7801, "step": 39311 }, { "epoch": 0.51, "grad_norm": 3.45247745513916, "learning_rate": 1.9505104954825454e-05, "loss": 1.9533, "step": 39312 }, { "epoch": 0.51, "grad_norm": 3.17264723777771, "learning_rate": 1.9505072306900563e-05, "loss": 1.4489, "step": 39313 }, { "epoch": 0.51, "grad_norm": 3.841881513595581, "learning_rate": 1.9505039657926154e-05, "loss": 2.1162, "step": 39314 }, { "epoch": 0.51, "grad_norm": 3.483537197113037, "learning_rate": 1.950500700790222e-05, "loss": 1.9065, "step": 39315 }, { "epoch": 0.51, "grad_norm": 3.707292318344116, "learning_rate": 1.950497435682878e-05, "loss": 2.0664, "step": 39316 }, { "epoch": 0.51, "grad_norm": 3.587005853652954, "learning_rate": 1.9504941704705824e-05, "loss": 1.9275, "step": 39317 }, { "epoch": 0.51, "grad_norm": 4.074421405792236, "learning_rate": 1.9504909051533364e-05, "loss": 2.3137, "step": 39318 }, { "epoch": 0.51, "grad_norm": 3.5074384212493896, "learning_rate": 1.95048763973114e-05, "loss": 1.7944, "step": 39319 }, { "epoch": 0.51, "grad_norm": 3.602918863296509, "learning_rate": 1.9504843742039935e-05, "loss": 1.826, "step": 39320 }, { "epoch": 0.51, "grad_norm": 3.739219903945923, "learning_rate": 1.950481108571898e-05, "loss": 2.0499, "step": 39321 }, { "epoch": 0.51, "grad_norm": 3.8112740516662598, "learning_rate": 1.9504778428348525e-05, "loss": 2.0235, "step": 39322 }, { "epoch": 0.51, "grad_norm": 3.8558342456817627, "learning_rate": 1.9504745769928585e-05, "loss": 1.6402, "step": 39323 }, { "epoch": 0.51, "grad_norm": 3.718801736831665, "learning_rate": 1.9504713110459156e-05, "loss": 2.2348, "step": 39324 }, { "epoch": 0.51, "grad_norm": 3.8676719665527344, "learning_rate": 1.950468044994025e-05, "loss": 2.4216, "step": 39325 }, { "epoch": 0.51, "grad_norm": 3.4549453258514404, "learning_rate": 1.9504647788371865e-05, "loss": 1.746, "step": 39326 }, { "epoch": 0.51, "grad_norm": 3.724091053009033, "learning_rate": 1.9504615125754005e-05, "loss": 1.933, "step": 39327 }, { "epoch": 0.51, "grad_norm": 3.482835292816162, "learning_rate": 1.9504582462086675e-05, "loss": 1.6306, "step": 39328 }, { "epoch": 0.51, "grad_norm": 3.2542145252227783, "learning_rate": 1.9504549797369878e-05, "loss": 1.6799, "step": 39329 }, { "epoch": 0.51, "grad_norm": 3.977534055709839, "learning_rate": 1.9504517131603617e-05, "loss": 1.8768, "step": 39330 }, { "epoch": 0.51, "grad_norm": 4.017890930175781, "learning_rate": 1.9504484464787895e-05, "loss": 2.2113, "step": 39331 }, { "epoch": 0.51, "grad_norm": 3.7311646938323975, "learning_rate": 1.9504451796922717e-05, "loss": 1.861, "step": 39332 }, { "epoch": 0.51, "grad_norm": 4.0107951164245605, "learning_rate": 1.9504419128008086e-05, "loss": 1.9711, "step": 39333 }, { "epoch": 0.51, "grad_norm": 3.429100513458252, "learning_rate": 1.9504386458044007e-05, "loss": 1.8144, "step": 39334 }, { "epoch": 0.51, "grad_norm": 3.9566729068756104, "learning_rate": 1.9504353787030482e-05, "loss": 2.0666, "step": 39335 }, { "epoch": 0.51, "grad_norm": 3.492485284805298, "learning_rate": 1.9504321114967514e-05, "loss": 2.0125, "step": 39336 }, { "epoch": 0.51, "grad_norm": 3.8478050231933594, "learning_rate": 1.950428844185511e-05, "loss": 1.6928, "step": 39337 }, { "epoch": 0.51, "grad_norm": 3.28715181350708, "learning_rate": 1.950425576769327e-05, "loss": 1.7721, "step": 39338 }, { "epoch": 0.51, "grad_norm": 3.3127527236938477, "learning_rate": 1.9504223092482e-05, "loss": 1.5229, "step": 39339 }, { "epoch": 0.51, "grad_norm": 4.004541397094727, "learning_rate": 1.95041904162213e-05, "loss": 1.9882, "step": 39340 }, { "epoch": 0.51, "grad_norm": 3.844749689102173, "learning_rate": 1.950415773891118e-05, "loss": 1.87, "step": 39341 }, { "epoch": 0.51, "grad_norm": 4.095747470855713, "learning_rate": 1.9504125060551638e-05, "loss": 1.8354, "step": 39342 }, { "epoch": 0.51, "grad_norm": 3.414661169052124, "learning_rate": 1.9504092381142678e-05, "loss": 1.8791, "step": 39343 }, { "epoch": 0.51, "grad_norm": 4.045155048370361, "learning_rate": 1.9504059700684305e-05, "loss": 2.5776, "step": 39344 }, { "epoch": 0.51, "grad_norm": 3.2929158210754395, "learning_rate": 1.9504027019176523e-05, "loss": 1.462, "step": 39345 }, { "epoch": 0.51, "grad_norm": 4.124778747558594, "learning_rate": 1.9503994336619335e-05, "loss": 2.3436, "step": 39346 }, { "epoch": 0.51, "grad_norm": 3.545776844024658, "learning_rate": 1.9503961653012747e-05, "loss": 1.6097, "step": 39347 }, { "epoch": 0.51, "grad_norm": 2.7805371284484863, "learning_rate": 1.950392896835676e-05, "loss": 1.2469, "step": 39348 }, { "epoch": 0.51, "grad_norm": 3.543917655944824, "learning_rate": 1.9503896282651375e-05, "loss": 1.5259, "step": 39349 }, { "epoch": 0.51, "grad_norm": 3.2845535278320312, "learning_rate": 1.95038635958966e-05, "loss": 1.758, "step": 39350 }, { "epoch": 0.51, "grad_norm": 3.641094446182251, "learning_rate": 1.950383090809244e-05, "loss": 1.7246, "step": 39351 }, { "epoch": 0.51, "grad_norm": 4.109652996063232, "learning_rate": 1.9503798219238892e-05, "loss": 2.6112, "step": 39352 }, { "epoch": 0.51, "grad_norm": 4.141471862792969, "learning_rate": 1.9503765529335963e-05, "loss": 2.3882, "step": 39353 }, { "epoch": 0.51, "grad_norm": 3.319363832473755, "learning_rate": 1.950373283838366e-05, "loss": 1.9324, "step": 39354 }, { "epoch": 0.51, "grad_norm": 3.8430991172790527, "learning_rate": 1.9503700146381982e-05, "loss": 1.9737, "step": 39355 }, { "epoch": 0.51, "grad_norm": 3.2101848125457764, "learning_rate": 1.9503667453330933e-05, "loss": 1.7061, "step": 39356 }, { "epoch": 0.51, "grad_norm": 3.521544933319092, "learning_rate": 1.9503634759230522e-05, "loss": 1.7089, "step": 39357 }, { "epoch": 0.51, "grad_norm": 4.206307888031006, "learning_rate": 1.9503602064080746e-05, "loss": 2.2278, "step": 39358 }, { "epoch": 0.51, "grad_norm": 3.556485652923584, "learning_rate": 1.950356936788161e-05, "loss": 1.7585, "step": 39359 }, { "epoch": 0.51, "grad_norm": 4.140720367431641, "learning_rate": 1.950353667063312e-05, "loss": 2.3041, "step": 39360 }, { "epoch": 0.51, "grad_norm": 3.428713083267212, "learning_rate": 1.9503503972335282e-05, "loss": 1.7663, "step": 39361 }, { "epoch": 0.51, "grad_norm": 3.8499083518981934, "learning_rate": 1.950347127298809e-05, "loss": 2.4164, "step": 39362 }, { "epoch": 0.51, "grad_norm": 3.8374977111816406, "learning_rate": 1.9503438572591556e-05, "loss": 2.3152, "step": 39363 }, { "epoch": 0.51, "grad_norm": 3.6303551197052, "learning_rate": 1.9503405871145682e-05, "loss": 1.7473, "step": 39364 }, { "epoch": 0.51, "grad_norm": 4.574830532073975, "learning_rate": 1.950337316865047e-05, "loss": 2.7834, "step": 39365 }, { "epoch": 0.51, "grad_norm": 3.8221068382263184, "learning_rate": 1.9503340465105923e-05, "loss": 2.3008, "step": 39366 }, { "epoch": 0.51, "grad_norm": 3.1484949588775635, "learning_rate": 1.9503307760512048e-05, "loss": 1.4778, "step": 39367 }, { "epoch": 0.51, "grad_norm": 3.6298036575317383, "learning_rate": 1.9503275054868848e-05, "loss": 2.2417, "step": 39368 }, { "epoch": 0.51, "grad_norm": 3.7252447605133057, "learning_rate": 1.950324234817632e-05, "loss": 2.0072, "step": 39369 }, { "epoch": 0.51, "grad_norm": 3.3674821853637695, "learning_rate": 1.9503209640434474e-05, "loss": 1.7548, "step": 39370 }, { "epoch": 0.51, "grad_norm": 3.4682841300964355, "learning_rate": 1.9503176931643317e-05, "loss": 1.7603, "step": 39371 }, { "epoch": 0.51, "grad_norm": 3.839507818222046, "learning_rate": 1.9503144221802845e-05, "loss": 1.8898, "step": 39372 }, { "epoch": 0.51, "grad_norm": 3.8144378662109375, "learning_rate": 1.9503111510913065e-05, "loss": 1.9524, "step": 39373 }, { "epoch": 0.51, "grad_norm": 3.666177988052368, "learning_rate": 1.950307879897398e-05, "loss": 2.0299, "step": 39374 }, { "epoch": 0.51, "grad_norm": 3.4332306385040283, "learning_rate": 1.9503046085985593e-05, "loss": 1.7248, "step": 39375 }, { "epoch": 0.51, "grad_norm": 3.7679567337036133, "learning_rate": 1.950301337194791e-05, "loss": 1.8354, "step": 39376 }, { "epoch": 0.51, "grad_norm": 3.738945484161377, "learning_rate": 1.9502980656860933e-05, "loss": 1.7857, "step": 39377 }, { "epoch": 0.51, "grad_norm": 3.7756364345550537, "learning_rate": 1.9502947940724665e-05, "loss": 1.9127, "step": 39378 }, { "epoch": 0.51, "grad_norm": 4.517214775085449, "learning_rate": 1.950291522353911e-05, "loss": 2.4063, "step": 39379 }, { "epoch": 0.51, "grad_norm": 3.601437568664551, "learning_rate": 1.9502882505304274e-05, "loss": 2.2499, "step": 39380 }, { "epoch": 0.51, "grad_norm": 4.037022590637207, "learning_rate": 1.9502849786020158e-05, "loss": 2.3274, "step": 39381 }, { "epoch": 0.51, "grad_norm": 4.393692970275879, "learning_rate": 1.9502817065686767e-05, "loss": 2.3643, "step": 39382 }, { "epoch": 0.51, "grad_norm": 3.741152048110962, "learning_rate": 1.95027843443041e-05, "loss": 1.7887, "step": 39383 }, { "epoch": 0.51, "grad_norm": 3.4278948307037354, "learning_rate": 1.9502751621872167e-05, "loss": 1.8061, "step": 39384 }, { "epoch": 0.51, "grad_norm": 3.6974825859069824, "learning_rate": 1.9502718898390965e-05, "loss": 1.8972, "step": 39385 }, { "epoch": 0.51, "grad_norm": 3.4152050018310547, "learning_rate": 1.950268617386051e-05, "loss": 1.8595, "step": 39386 }, { "epoch": 0.51, "grad_norm": 3.6530582904815674, "learning_rate": 1.950265344828079e-05, "loss": 1.885, "step": 39387 }, { "epoch": 0.51, "grad_norm": 3.340095281600952, "learning_rate": 1.950262072165182e-05, "loss": 1.5836, "step": 39388 }, { "epoch": 0.51, "grad_norm": 3.7261765003204346, "learning_rate": 1.9502587993973593e-05, "loss": 1.9688, "step": 39389 }, { "epoch": 0.51, "grad_norm": 3.9633514881134033, "learning_rate": 1.9502555265246127e-05, "loss": 2.0163, "step": 39390 }, { "epoch": 0.51, "grad_norm": 4.327810764312744, "learning_rate": 1.9502522535469413e-05, "loss": 2.3002, "step": 39391 }, { "epoch": 0.51, "grad_norm": 3.5595040321350098, "learning_rate": 1.950248980464346e-05, "loss": 1.9444, "step": 39392 }, { "epoch": 0.51, "grad_norm": 3.1217007637023926, "learning_rate": 1.9502457072768273e-05, "loss": 1.6937, "step": 39393 }, { "epoch": 0.51, "grad_norm": 3.8416526317596436, "learning_rate": 1.9502424339843852e-05, "loss": 2.0148, "step": 39394 }, { "epoch": 0.51, "grad_norm": 3.284369945526123, "learning_rate": 1.9502391605870202e-05, "loss": 1.4572, "step": 39395 }, { "epoch": 0.51, "grad_norm": 4.20807409286499, "learning_rate": 1.9502358870847326e-05, "loss": 1.9982, "step": 39396 }, { "epoch": 0.51, "grad_norm": 3.647926092147827, "learning_rate": 1.950232613477523e-05, "loss": 1.6906, "step": 39397 }, { "epoch": 0.51, "grad_norm": 3.988957643508911, "learning_rate": 1.9502293397653917e-05, "loss": 2.1431, "step": 39398 }, { "epoch": 0.51, "grad_norm": 3.717831611633301, "learning_rate": 1.9502260659483388e-05, "loss": 1.981, "step": 39399 }, { "epoch": 0.51, "grad_norm": 3.6013312339782715, "learning_rate": 1.9502227920263646e-05, "loss": 1.6298, "step": 39400 }, { "epoch": 0.51, "grad_norm": 3.886610746383667, "learning_rate": 1.9502195179994702e-05, "loss": 1.8882, "step": 39401 }, { "epoch": 0.51, "grad_norm": 3.727025032043457, "learning_rate": 1.9502162438676548e-05, "loss": 1.9115, "step": 39402 }, { "epoch": 0.51, "grad_norm": 3.6020591259002686, "learning_rate": 1.95021296963092e-05, "loss": 1.9272, "step": 39403 }, { "epoch": 0.51, "grad_norm": 3.886314630508423, "learning_rate": 1.950209695289265e-05, "loss": 2.0455, "step": 39404 }, { "epoch": 0.51, "grad_norm": 4.000527381896973, "learning_rate": 1.9502064208426913e-05, "loss": 2.2395, "step": 39405 }, { "epoch": 0.51, "grad_norm": 3.968029499053955, "learning_rate": 1.9502031462911985e-05, "loss": 1.961, "step": 39406 }, { "epoch": 0.51, "grad_norm": 3.781221389770508, "learning_rate": 1.950199871634787e-05, "loss": 1.9492, "step": 39407 }, { "epoch": 0.51, "grad_norm": 3.810682773590088, "learning_rate": 1.9501965968734575e-05, "loss": 1.7151, "step": 39408 }, { "epoch": 0.51, "grad_norm": 3.6204922199249268, "learning_rate": 1.95019332200721e-05, "loss": 2.1764, "step": 39409 }, { "epoch": 0.51, "grad_norm": 3.3951079845428467, "learning_rate": 1.9501900470360452e-05, "loss": 1.6448, "step": 39410 }, { "epoch": 0.51, "grad_norm": 4.218127250671387, "learning_rate": 1.9501867719599632e-05, "loss": 2.5301, "step": 39411 }, { "epoch": 0.51, "grad_norm": 4.209091663360596, "learning_rate": 1.9501834967789644e-05, "loss": 2.3615, "step": 39412 }, { "epoch": 0.51, "grad_norm": 3.637657403945923, "learning_rate": 1.9501802214930493e-05, "loss": 2.0514, "step": 39413 }, { "epoch": 0.51, "grad_norm": 3.230898141860962, "learning_rate": 1.9501769461022183e-05, "loss": 1.7124, "step": 39414 }, { "epoch": 0.51, "grad_norm": 3.65168833732605, "learning_rate": 1.9501736706064715e-05, "loss": 1.6723, "step": 39415 }, { "epoch": 0.51, "grad_norm": 3.7499048709869385, "learning_rate": 1.9501703950058095e-05, "loss": 2.0697, "step": 39416 }, { "epoch": 0.51, "grad_norm": 3.4685728549957275, "learning_rate": 1.9501671193002326e-05, "loss": 1.6317, "step": 39417 }, { "epoch": 0.51, "grad_norm": 4.177444934844971, "learning_rate": 1.950163843489741e-05, "loss": 2.0336, "step": 39418 }, { "epoch": 0.51, "grad_norm": 4.279112339019775, "learning_rate": 1.9501605675743353e-05, "loss": 2.2821, "step": 39419 }, { "epoch": 0.51, "grad_norm": 3.2542054653167725, "learning_rate": 1.9501572915540155e-05, "loss": 1.7295, "step": 39420 }, { "epoch": 0.51, "grad_norm": 3.7916793823242188, "learning_rate": 1.9501540154287823e-05, "loss": 1.9837, "step": 39421 }, { "epoch": 0.51, "grad_norm": 4.346304893493652, "learning_rate": 1.9501507391986362e-05, "loss": 2.2573, "step": 39422 }, { "epoch": 0.51, "grad_norm": 3.4750328063964844, "learning_rate": 1.9501474628635773e-05, "loss": 1.4479, "step": 39423 }, { "epoch": 0.51, "grad_norm": 3.620523452758789, "learning_rate": 1.950144186423606e-05, "loss": 1.7681, "step": 39424 }, { "epoch": 0.51, "grad_norm": 3.1189844608306885, "learning_rate": 1.9501409098787224e-05, "loss": 1.3594, "step": 39425 }, { "epoch": 0.51, "grad_norm": 3.0734498500823975, "learning_rate": 1.9501376332289274e-05, "loss": 1.6562, "step": 39426 }, { "epoch": 0.51, "grad_norm": 3.5498178005218506, "learning_rate": 1.950134356474221e-05, "loss": 1.7176, "step": 39427 }, { "epoch": 0.51, "grad_norm": 3.8852274417877197, "learning_rate": 1.9501310796146033e-05, "loss": 2.0174, "step": 39428 }, { "epoch": 0.51, "grad_norm": 3.408092975616455, "learning_rate": 1.9501278026500756e-05, "loss": 1.8534, "step": 39429 }, { "epoch": 0.51, "grad_norm": 3.25095272064209, "learning_rate": 1.9501245255806375e-05, "loss": 1.6507, "step": 39430 }, { "epoch": 0.51, "grad_norm": 3.487025260925293, "learning_rate": 1.9501212484062892e-05, "loss": 1.7141, "step": 39431 }, { "epoch": 0.51, "grad_norm": 4.437580108642578, "learning_rate": 1.9501179711270318e-05, "loss": 2.2208, "step": 39432 }, { "epoch": 0.51, "grad_norm": 3.489485025405884, "learning_rate": 1.950114693742865e-05, "loss": 1.636, "step": 39433 }, { "epoch": 0.51, "grad_norm": 3.72664213180542, "learning_rate": 1.9501114162537895e-05, "loss": 1.9436, "step": 39434 }, { "epoch": 0.51, "grad_norm": 4.309041500091553, "learning_rate": 1.9501081386598058e-05, "loss": 2.0118, "step": 39435 }, { "epoch": 0.51, "grad_norm": 4.552437782287598, "learning_rate": 1.9501048609609138e-05, "loss": 2.3303, "step": 39436 }, { "epoch": 0.51, "grad_norm": 3.4102182388305664, "learning_rate": 1.950101583157114e-05, "loss": 1.8957, "step": 39437 }, { "epoch": 0.51, "grad_norm": 4.106254577636719, "learning_rate": 1.950098305248407e-05, "loss": 2.0679, "step": 39438 }, { "epoch": 0.51, "grad_norm": 3.560699224472046, "learning_rate": 1.9500950272347928e-05, "loss": 1.9039, "step": 39439 }, { "epoch": 0.51, "grad_norm": 4.223377704620361, "learning_rate": 1.9500917491162725e-05, "loss": 1.9329, "step": 39440 }, { "epoch": 0.51, "grad_norm": 3.8217875957489014, "learning_rate": 1.9500884708928458e-05, "loss": 1.8392, "step": 39441 }, { "epoch": 0.51, "grad_norm": 3.6729748249053955, "learning_rate": 1.950085192564513e-05, "loss": 1.6619, "step": 39442 }, { "epoch": 0.51, "grad_norm": 3.91721773147583, "learning_rate": 1.950081914131275e-05, "loss": 1.5821, "step": 39443 }, { "epoch": 0.51, "grad_norm": 3.938000440597534, "learning_rate": 1.9500786355931315e-05, "loss": 2.343, "step": 39444 }, { "epoch": 0.51, "grad_norm": 3.8551197052001953, "learning_rate": 1.9500753569500833e-05, "loss": 2.0576, "step": 39445 }, { "epoch": 0.51, "grad_norm": 3.73486328125, "learning_rate": 1.9500720782021304e-05, "loss": 2.0961, "step": 39446 }, { "epoch": 0.51, "grad_norm": 4.420346260070801, "learning_rate": 1.950068799349274e-05, "loss": 1.8807, "step": 39447 }, { "epoch": 0.51, "grad_norm": 3.330732822418213, "learning_rate": 1.9500655203915137e-05, "loss": 1.6101, "step": 39448 }, { "epoch": 0.51, "grad_norm": 3.6195356845855713, "learning_rate": 1.9500622413288498e-05, "loss": 1.9577, "step": 39449 }, { "epoch": 0.51, "grad_norm": 3.5521719455718994, "learning_rate": 1.950058962161283e-05, "loss": 1.9152, "step": 39450 }, { "epoch": 0.51, "grad_norm": 3.7303049564361572, "learning_rate": 1.9500556828888137e-05, "loss": 1.6631, "step": 39451 }, { "epoch": 0.51, "grad_norm": 2.9747965335845947, "learning_rate": 1.9500524035114418e-05, "loss": 1.4892, "step": 39452 }, { "epoch": 0.51, "grad_norm": 4.241212844848633, "learning_rate": 1.9500491240291683e-05, "loss": 2.2418, "step": 39453 }, { "epoch": 0.51, "grad_norm": 3.681716203689575, "learning_rate": 1.950045844441993e-05, "loss": 2.0956, "step": 39454 }, { "epoch": 0.51, "grad_norm": 4.09488582611084, "learning_rate": 1.950042564749917e-05, "loss": 2.1223, "step": 39455 }, { "epoch": 0.51, "grad_norm": 3.9096648693084717, "learning_rate": 1.95003928495294e-05, "loss": 2.3647, "step": 39456 }, { "epoch": 0.51, "grad_norm": 3.4368183612823486, "learning_rate": 1.9500360050510622e-05, "loss": 1.8883, "step": 39457 }, { "epoch": 0.51, "grad_norm": 3.6791024208068848, "learning_rate": 1.9500327250442846e-05, "loss": 1.8222, "step": 39458 }, { "epoch": 0.51, "grad_norm": 3.522183895111084, "learning_rate": 1.9500294449326073e-05, "loss": 1.993, "step": 39459 }, { "epoch": 0.51, "grad_norm": 3.6802186965942383, "learning_rate": 1.9500261647160304e-05, "loss": 1.9123, "step": 39460 }, { "epoch": 0.51, "grad_norm": 3.860919713973999, "learning_rate": 1.950022884394555e-05, "loss": 1.9209, "step": 39461 }, { "epoch": 0.51, "grad_norm": 3.8829522132873535, "learning_rate": 1.9500196039681805e-05, "loss": 2.3016, "step": 39462 }, { "epoch": 0.51, "grad_norm": 4.597633361816406, "learning_rate": 1.950016323436908e-05, "loss": 1.7591, "step": 39463 }, { "epoch": 0.51, "grad_norm": 3.397789478302002, "learning_rate": 1.950013042800737e-05, "loss": 1.6951, "step": 39464 }, { "epoch": 0.51, "grad_norm": 3.1236987113952637, "learning_rate": 1.9500097620596688e-05, "loss": 1.3665, "step": 39465 }, { "epoch": 0.51, "grad_norm": 3.9190080165863037, "learning_rate": 1.9500064812137038e-05, "loss": 2.2539, "step": 39466 }, { "epoch": 0.51, "grad_norm": 3.944033622741699, "learning_rate": 1.9500032002628415e-05, "loss": 2.1833, "step": 39467 }, { "epoch": 0.51, "grad_norm": 3.626096725463867, "learning_rate": 1.949999919207083e-05, "loss": 1.9815, "step": 39468 }, { "epoch": 0.51, "grad_norm": 3.704831123352051, "learning_rate": 1.9499966380464283e-05, "loss": 2.1222, "step": 39469 }, { "epoch": 0.51, "grad_norm": 3.3694851398468018, "learning_rate": 1.9499933567808776e-05, "loss": 1.5122, "step": 39470 }, { "epoch": 0.51, "grad_norm": 3.74045729637146, "learning_rate": 1.9499900754104318e-05, "loss": 2.1414, "step": 39471 }, { "epoch": 0.51, "grad_norm": 4.068808078765869, "learning_rate": 1.9499867939350908e-05, "loss": 1.9551, "step": 39472 }, { "epoch": 0.51, "grad_norm": 3.5739219188690186, "learning_rate": 1.9499835123548553e-05, "loss": 1.6903, "step": 39473 }, { "epoch": 0.51, "grad_norm": 3.9967398643493652, "learning_rate": 1.9499802306697252e-05, "loss": 1.7827, "step": 39474 }, { "epoch": 0.51, "grad_norm": 3.9312686920166016, "learning_rate": 1.9499769488797016e-05, "loss": 2.1326, "step": 39475 }, { "epoch": 0.51, "grad_norm": 3.245819091796875, "learning_rate": 1.9499736669847845e-05, "loss": 1.5052, "step": 39476 }, { "epoch": 0.51, "grad_norm": 3.353548288345337, "learning_rate": 1.949970384984974e-05, "loss": 1.548, "step": 39477 }, { "epoch": 0.51, "grad_norm": 3.6109893321990967, "learning_rate": 1.94996710288027e-05, "loss": 2.0348, "step": 39478 }, { "epoch": 0.51, "grad_norm": 3.2902519702911377, "learning_rate": 1.9499638206706745e-05, "loss": 1.7326, "step": 39479 }, { "epoch": 0.51, "grad_norm": 3.6432077884674072, "learning_rate": 1.9499605383561864e-05, "loss": 1.7488, "step": 39480 }, { "epoch": 0.51, "grad_norm": 3.695333242416382, "learning_rate": 1.9499572559368065e-05, "loss": 1.9484, "step": 39481 }, { "epoch": 0.51, "grad_norm": 3.810487747192383, "learning_rate": 1.9499539734125355e-05, "loss": 1.9516, "step": 39482 }, { "epoch": 0.51, "grad_norm": 3.9629976749420166, "learning_rate": 1.9499506907833733e-05, "loss": 2.2953, "step": 39483 }, { "epoch": 0.51, "grad_norm": 4.025489330291748, "learning_rate": 1.9499474080493203e-05, "loss": 2.0255, "step": 39484 }, { "epoch": 0.51, "grad_norm": 4.0193963050842285, "learning_rate": 1.9499441252103772e-05, "loss": 2.3206, "step": 39485 }, { "epoch": 0.51, "grad_norm": 3.713261842727661, "learning_rate": 1.949940842266544e-05, "loss": 2.3197, "step": 39486 }, { "epoch": 0.51, "grad_norm": 4.3654561042785645, "learning_rate": 1.9499375592178213e-05, "loss": 2.2682, "step": 39487 }, { "epoch": 0.51, "grad_norm": 4.261056423187256, "learning_rate": 1.949934276064209e-05, "loss": 2.1524, "step": 39488 }, { "epoch": 0.51, "grad_norm": 3.909485340118408, "learning_rate": 1.9499309928057086e-05, "loss": 1.8949, "step": 39489 }, { "epoch": 0.51, "grad_norm": 3.2525036334991455, "learning_rate": 1.9499277094423193e-05, "loss": 1.7359, "step": 39490 }, { "epoch": 0.51, "grad_norm": 3.1576788425445557, "learning_rate": 1.949924425974042e-05, "loss": 1.6227, "step": 39491 }, { "epoch": 0.51, "grad_norm": 3.564675807952881, "learning_rate": 1.9499211424008764e-05, "loss": 2.0395, "step": 39492 }, { "epoch": 0.51, "grad_norm": 3.4563217163085938, "learning_rate": 1.9499178587228238e-05, "loss": 1.6748, "step": 39493 }, { "epoch": 0.51, "grad_norm": 3.716867446899414, "learning_rate": 1.949914574939884e-05, "loss": 2.2689, "step": 39494 }, { "epoch": 0.51, "grad_norm": 3.1098320484161377, "learning_rate": 1.9499112910520578e-05, "loss": 1.6167, "step": 39495 }, { "epoch": 0.51, "grad_norm": 3.5624239444732666, "learning_rate": 1.949908007059345e-05, "loss": 1.7577, "step": 39496 }, { "epoch": 0.51, "grad_norm": 3.665712833404541, "learning_rate": 1.9499047229617465e-05, "loss": 1.6991, "step": 39497 }, { "epoch": 0.51, "grad_norm": 3.7920303344726562, "learning_rate": 1.9499014387592623e-05, "loss": 2.0206, "step": 39498 }, { "epoch": 0.51, "grad_norm": 3.813955068588257, "learning_rate": 1.949898154451893e-05, "loss": 2.2045, "step": 39499 }, { "epoch": 0.51, "grad_norm": 3.7550313472747803, "learning_rate": 1.9498948700396384e-05, "loss": 2.1713, "step": 39500 }, { "epoch": 0.51, "grad_norm": 3.4885876178741455, "learning_rate": 1.9498915855224994e-05, "loss": 1.6458, "step": 39501 }, { "epoch": 0.51, "grad_norm": 4.112266540527344, "learning_rate": 1.9498883009004767e-05, "loss": 2.028, "step": 39502 }, { "epoch": 0.51, "grad_norm": 3.333993434906006, "learning_rate": 1.9498850161735697e-05, "loss": 1.7879, "step": 39503 }, { "epoch": 0.51, "grad_norm": 3.7367162704467773, "learning_rate": 1.9498817313417796e-05, "loss": 1.6317, "step": 39504 }, { "epoch": 0.51, "grad_norm": 3.480924367904663, "learning_rate": 1.9498784464051062e-05, "loss": 1.8742, "step": 39505 }, { "epoch": 0.51, "grad_norm": 4.063758373260498, "learning_rate": 1.9498751613635502e-05, "loss": 2.1945, "step": 39506 }, { "epoch": 0.51, "grad_norm": 3.4040942192077637, "learning_rate": 1.949871876217112e-05, "loss": 1.5304, "step": 39507 }, { "epoch": 0.51, "grad_norm": 3.704183578491211, "learning_rate": 1.949868590965792e-05, "loss": 1.8226, "step": 39508 }, { "epoch": 0.51, "grad_norm": 3.8743982315063477, "learning_rate": 1.94986530560959e-05, "loss": 2.1369, "step": 39509 }, { "epoch": 0.51, "grad_norm": 4.486968517303467, "learning_rate": 1.949862020148507e-05, "loss": 2.0711, "step": 39510 }, { "epoch": 0.51, "grad_norm": 3.643573760986328, "learning_rate": 1.9498587345825427e-05, "loss": 1.8992, "step": 39511 }, { "epoch": 0.51, "grad_norm": 3.607257604598999, "learning_rate": 1.9498554489116984e-05, "loss": 1.844, "step": 39512 }, { "epoch": 0.51, "grad_norm": 4.1817169189453125, "learning_rate": 1.9498521631359738e-05, "loss": 2.2605, "step": 39513 }, { "epoch": 0.51, "grad_norm": 3.8345398902893066, "learning_rate": 1.9498488772553693e-05, "loss": 1.7865, "step": 39514 }, { "epoch": 0.51, "grad_norm": 3.7591657638549805, "learning_rate": 1.9498455912698855e-05, "loss": 1.7907, "step": 39515 }, { "epoch": 0.51, "grad_norm": 4.057841777801514, "learning_rate": 1.9498423051795227e-05, "loss": 2.262, "step": 39516 }, { "epoch": 0.51, "grad_norm": 3.4604687690734863, "learning_rate": 1.949839018984281e-05, "loss": 1.5867, "step": 39517 }, { "epoch": 0.51, "grad_norm": 3.998812198638916, "learning_rate": 1.9498357326841607e-05, "loss": 1.9675, "step": 39518 }, { "epoch": 0.51, "grad_norm": 3.9946653842926025, "learning_rate": 1.9498324462791628e-05, "loss": 2.1807, "step": 39519 }, { "epoch": 0.51, "grad_norm": 4.483448028564453, "learning_rate": 1.949829159769287e-05, "loss": 2.1783, "step": 39520 }, { "epoch": 0.51, "grad_norm": 3.7005045413970947, "learning_rate": 1.9498258731545342e-05, "loss": 1.9103, "step": 39521 }, { "epoch": 0.51, "grad_norm": 3.718773126602173, "learning_rate": 1.9498225864349046e-05, "loss": 1.6717, "step": 39522 }, { "epoch": 0.51, "grad_norm": 3.538996458053589, "learning_rate": 1.949819299610398e-05, "loss": 2.0233, "step": 39523 }, { "epoch": 0.51, "grad_norm": 3.7415883541107178, "learning_rate": 1.9498160126810156e-05, "loss": 2.203, "step": 39524 }, { "epoch": 0.51, "grad_norm": 3.939215898513794, "learning_rate": 1.9498127256467576e-05, "loss": 2.0123, "step": 39525 }, { "epoch": 0.51, "grad_norm": 3.9345638751983643, "learning_rate": 1.9498094385076237e-05, "loss": 1.7536, "step": 39526 }, { "epoch": 0.51, "grad_norm": 3.757399559020996, "learning_rate": 1.9498061512636153e-05, "loss": 1.9179, "step": 39527 }, { "epoch": 0.51, "grad_norm": 3.817063570022583, "learning_rate": 1.9498028639147317e-05, "loss": 1.916, "step": 39528 }, { "epoch": 0.51, "grad_norm": 3.480506658554077, "learning_rate": 1.9497995764609738e-05, "loss": 1.9278, "step": 39529 }, { "epoch": 0.51, "grad_norm": 3.834381580352783, "learning_rate": 1.949796288902342e-05, "loss": 2.0209, "step": 39530 }, { "epoch": 0.51, "grad_norm": 4.048874855041504, "learning_rate": 1.9497930012388366e-05, "loss": 2.3012, "step": 39531 }, { "epoch": 0.51, "grad_norm": 3.359718084335327, "learning_rate": 1.9497897134704578e-05, "loss": 1.6012, "step": 39532 }, { "epoch": 0.51, "grad_norm": 3.269613742828369, "learning_rate": 1.9497864255972066e-05, "loss": 1.6962, "step": 39533 }, { "epoch": 0.51, "grad_norm": 3.5574843883514404, "learning_rate": 1.949783137619082e-05, "loss": 1.7141, "step": 39534 }, { "epoch": 0.51, "grad_norm": 3.1873579025268555, "learning_rate": 1.949779849536086e-05, "loss": 1.5931, "step": 39535 }, { "epoch": 0.51, "grad_norm": 3.3558130264282227, "learning_rate": 1.9497765613482178e-05, "loss": 1.9341, "step": 39536 }, { "epoch": 0.51, "grad_norm": 3.4301798343658447, "learning_rate": 1.9497732730554782e-05, "loss": 1.8267, "step": 39537 }, { "epoch": 0.51, "grad_norm": 4.387669086456299, "learning_rate": 1.949769984657868e-05, "loss": 2.3298, "step": 39538 }, { "epoch": 0.51, "grad_norm": 3.589259147644043, "learning_rate": 1.9497666961553863e-05, "loss": 1.7984, "step": 39539 }, { "epoch": 0.51, "grad_norm": 3.270416021347046, "learning_rate": 1.9497634075480343e-05, "loss": 1.4876, "step": 39540 }, { "epoch": 0.51, "grad_norm": 3.7305338382720947, "learning_rate": 1.949760118835813e-05, "loss": 2.1765, "step": 39541 }, { "epoch": 0.51, "grad_norm": 3.6071884632110596, "learning_rate": 1.9497568300187216e-05, "loss": 1.7826, "step": 39542 }, { "epoch": 0.51, "grad_norm": 3.1997408866882324, "learning_rate": 1.949753541096761e-05, "loss": 1.5543, "step": 39543 }, { "epoch": 0.51, "grad_norm": 4.498771667480469, "learning_rate": 1.9497502520699318e-05, "loss": 2.0431, "step": 39544 }, { "epoch": 0.51, "grad_norm": 3.264286518096924, "learning_rate": 1.9497469629382335e-05, "loss": 1.8154, "step": 39545 }, { "epoch": 0.51, "grad_norm": 4.567842483520508, "learning_rate": 1.9497436737016672e-05, "loss": 2.0804, "step": 39546 }, { "epoch": 0.51, "grad_norm": 3.637063503265381, "learning_rate": 1.9497403843602334e-05, "loss": 1.7703, "step": 39547 }, { "epoch": 0.51, "grad_norm": 3.34541392326355, "learning_rate": 1.949737094913932e-05, "loss": 1.7519, "step": 39548 }, { "epoch": 0.51, "grad_norm": 3.7934646606445312, "learning_rate": 1.949733805362763e-05, "loss": 2.0558, "step": 39549 }, { "epoch": 0.51, "grad_norm": 3.9771509170532227, "learning_rate": 1.949730515706728e-05, "loss": 2.4229, "step": 39550 }, { "epoch": 0.51, "grad_norm": 3.542463541030884, "learning_rate": 1.949727225945826e-05, "loss": 1.7465, "step": 39551 }, { "epoch": 0.51, "grad_norm": 3.349104166030884, "learning_rate": 1.9497239360800586e-05, "loss": 1.6733, "step": 39552 }, { "epoch": 0.51, "grad_norm": 3.666513681411743, "learning_rate": 1.9497206461094253e-05, "loss": 1.6932, "step": 39553 }, { "epoch": 0.51, "grad_norm": 3.674455165863037, "learning_rate": 1.9497173560339267e-05, "loss": 1.8282, "step": 39554 }, { "epoch": 0.51, "grad_norm": 3.504807233810425, "learning_rate": 1.9497140658535634e-05, "loss": 1.8893, "step": 39555 }, { "epoch": 0.51, "grad_norm": 3.2158820629119873, "learning_rate": 1.949710775568335e-05, "loss": 1.7104, "step": 39556 }, { "epoch": 0.51, "grad_norm": 3.4559457302093506, "learning_rate": 1.9497074851782433e-05, "loss": 2.0889, "step": 39557 }, { "epoch": 0.51, "grad_norm": 3.507388114929199, "learning_rate": 1.949704194683287e-05, "loss": 1.9307, "step": 39558 }, { "epoch": 0.51, "grad_norm": 3.5292470455169678, "learning_rate": 1.9497009040834677e-05, "loss": 1.9302, "step": 39559 }, { "epoch": 0.51, "grad_norm": 3.812260866165161, "learning_rate": 1.9496976133787852e-05, "loss": 1.9317, "step": 39560 }, { "epoch": 0.51, "grad_norm": 3.438314437866211, "learning_rate": 1.9496943225692397e-05, "loss": 1.7627, "step": 39561 }, { "epoch": 0.51, "grad_norm": 3.620650291442871, "learning_rate": 1.9496910316548322e-05, "loss": 1.942, "step": 39562 }, { "epoch": 0.51, "grad_norm": 3.3386447429656982, "learning_rate": 1.9496877406355623e-05, "loss": 1.728, "step": 39563 }, { "epoch": 0.51, "grad_norm": 3.5897042751312256, "learning_rate": 1.9496844495114312e-05, "loss": 1.9852, "step": 39564 }, { "epoch": 0.51, "grad_norm": 3.867603302001953, "learning_rate": 1.9496811582824383e-05, "loss": 1.9153, "step": 39565 }, { "epoch": 0.51, "grad_norm": 4.062528133392334, "learning_rate": 1.949677866948585e-05, "loss": 2.1567, "step": 39566 }, { "epoch": 0.51, "grad_norm": 3.685878276824951, "learning_rate": 1.949674575509871e-05, "loss": 1.8732, "step": 39567 }, { "epoch": 0.51, "grad_norm": 4.047274589538574, "learning_rate": 1.9496712839662966e-05, "loss": 2.1927, "step": 39568 }, { "epoch": 0.51, "grad_norm": 3.591438055038452, "learning_rate": 1.9496679923178626e-05, "loss": 1.7886, "step": 39569 }, { "epoch": 0.51, "grad_norm": 3.9444832801818848, "learning_rate": 1.9496647005645692e-05, "loss": 1.6907, "step": 39570 }, { "epoch": 0.51, "grad_norm": 3.7162954807281494, "learning_rate": 1.9496614087064166e-05, "loss": 2.3719, "step": 39571 }, { "epoch": 0.51, "grad_norm": 3.45326828956604, "learning_rate": 1.9496581167434053e-05, "loss": 1.7383, "step": 39572 }, { "epoch": 0.51, "grad_norm": 4.1756157875061035, "learning_rate": 1.9496548246755358e-05, "loss": 2.2052, "step": 39573 }, { "epoch": 0.51, "grad_norm": 3.4175775051116943, "learning_rate": 1.949651532502808e-05, "loss": 1.4948, "step": 39574 }, { "epoch": 0.51, "grad_norm": 3.7839877605438232, "learning_rate": 1.949648240225223e-05, "loss": 2.3543, "step": 39575 }, { "epoch": 0.51, "grad_norm": 3.9125022888183594, "learning_rate": 1.9496449478427803e-05, "loss": 1.8785, "step": 39576 }, { "epoch": 0.51, "grad_norm": 4.125113487243652, "learning_rate": 1.9496416553554804e-05, "loss": 1.9548, "step": 39577 }, { "epoch": 0.51, "grad_norm": 3.630546808242798, "learning_rate": 1.9496383627633245e-05, "loss": 2.0378, "step": 39578 }, { "epoch": 0.51, "grad_norm": 3.347506284713745, "learning_rate": 1.949635070066312e-05, "loss": 1.8912, "step": 39579 }, { "epoch": 0.51, "grad_norm": 3.643592357635498, "learning_rate": 1.9496317772644442e-05, "loss": 1.5653, "step": 39580 }, { "epoch": 0.51, "grad_norm": 4.159257888793945, "learning_rate": 1.9496284843577207e-05, "loss": 2.1049, "step": 39581 }, { "epoch": 0.51, "grad_norm": 4.086136817932129, "learning_rate": 1.949625191346142e-05, "loss": 2.4534, "step": 39582 }, { "epoch": 0.51, "grad_norm": 3.3905887603759766, "learning_rate": 1.9496218982297086e-05, "loss": 1.6968, "step": 39583 }, { "epoch": 0.51, "grad_norm": 3.2991268634796143, "learning_rate": 1.949618605008421e-05, "loss": 1.804, "step": 39584 }, { "epoch": 0.51, "grad_norm": 3.690040349960327, "learning_rate": 1.949615311682279e-05, "loss": 1.7739, "step": 39585 }, { "epoch": 0.51, "grad_norm": 3.5438129901885986, "learning_rate": 1.9496120182512838e-05, "loss": 1.8295, "step": 39586 }, { "epoch": 0.51, "grad_norm": 3.7697553634643555, "learning_rate": 1.949608724715435e-05, "loss": 1.8876, "step": 39587 }, { "epoch": 0.51, "grad_norm": 3.936690330505371, "learning_rate": 1.9496054310747334e-05, "loss": 1.7005, "step": 39588 }, { "epoch": 0.51, "grad_norm": 3.354743242263794, "learning_rate": 1.9496021373291792e-05, "loss": 1.8312, "step": 39589 }, { "epoch": 0.51, "grad_norm": 4.351337432861328, "learning_rate": 1.9495988434787733e-05, "loss": 2.1574, "step": 39590 }, { "epoch": 0.51, "grad_norm": 4.019142150878906, "learning_rate": 1.949595549523515e-05, "loss": 2.195, "step": 39591 }, { "epoch": 0.51, "grad_norm": 3.8101303577423096, "learning_rate": 1.9495922554634054e-05, "loss": 1.7508, "step": 39592 }, { "epoch": 0.51, "grad_norm": 4.227780818939209, "learning_rate": 1.949588961298445e-05, "loss": 1.9155, "step": 39593 }, { "epoch": 0.51, "grad_norm": 3.649594783782959, "learning_rate": 1.9495856670286333e-05, "loss": 1.6973, "step": 39594 }, { "epoch": 0.51, "grad_norm": 3.3861124515533447, "learning_rate": 1.9495823726539715e-05, "loss": 1.7848, "step": 39595 }, { "epoch": 0.51, "grad_norm": 4.321335315704346, "learning_rate": 1.94957907817446e-05, "loss": 2.5844, "step": 39596 }, { "epoch": 0.51, "grad_norm": 3.612745523452759, "learning_rate": 1.9495757835900982e-05, "loss": 1.6729, "step": 39597 }, { "epoch": 0.51, "grad_norm": 3.53135085105896, "learning_rate": 1.9495724889008877e-05, "loss": 2.0664, "step": 39598 }, { "epoch": 0.51, "grad_norm": 3.804739236831665, "learning_rate": 1.949569194106828e-05, "loss": 2.0206, "step": 39599 }, { "epoch": 0.51, "grad_norm": 3.539583444595337, "learning_rate": 1.94956589920792e-05, "loss": 1.8663, "step": 39600 }, { "epoch": 0.51, "grad_norm": 3.1727285385131836, "learning_rate": 1.9495626042041635e-05, "loss": 1.6267, "step": 39601 }, { "epoch": 0.51, "grad_norm": 3.282985210418701, "learning_rate": 1.9495593090955595e-05, "loss": 1.3999, "step": 39602 }, { "epoch": 0.51, "grad_norm": 3.4437432289123535, "learning_rate": 1.9495560138821078e-05, "loss": 1.6553, "step": 39603 }, { "epoch": 0.51, "grad_norm": 3.8152737617492676, "learning_rate": 1.949552718563809e-05, "loss": 2.0028, "step": 39604 }, { "epoch": 0.51, "grad_norm": 3.6692769527435303, "learning_rate": 1.9495494231406637e-05, "loss": 2.1084, "step": 39605 }, { "epoch": 0.51, "grad_norm": 4.328503131866455, "learning_rate": 1.949546127612672e-05, "loss": 2.0439, "step": 39606 }, { "epoch": 0.51, "grad_norm": 3.9692749977111816, "learning_rate": 1.949542831979834e-05, "loss": 1.9311, "step": 39607 }, { "epoch": 0.51, "grad_norm": 4.066402435302734, "learning_rate": 1.9495395362421504e-05, "loss": 1.887, "step": 39608 }, { "epoch": 0.51, "grad_norm": 3.5017335414886475, "learning_rate": 1.949536240399622e-05, "loss": 2.0356, "step": 39609 }, { "epoch": 0.51, "grad_norm": 3.301724672317505, "learning_rate": 1.9495329444522483e-05, "loss": 1.5126, "step": 39610 }, { "epoch": 0.51, "grad_norm": 3.245948553085327, "learning_rate": 1.94952964840003e-05, "loss": 1.6917, "step": 39611 }, { "epoch": 0.51, "grad_norm": 3.5378987789154053, "learning_rate": 1.9495263522429676e-05, "loss": 1.9716, "step": 39612 }, { "epoch": 0.51, "grad_norm": 3.642387866973877, "learning_rate": 1.9495230559810615e-05, "loss": 1.9212, "step": 39613 }, { "epoch": 0.51, "grad_norm": 4.108580589294434, "learning_rate": 1.9495197596143118e-05, "loss": 1.9438, "step": 39614 }, { "epoch": 0.51, "grad_norm": 4.265054225921631, "learning_rate": 1.949516463142719e-05, "loss": 2.2595, "step": 39615 }, { "epoch": 0.51, "grad_norm": 4.060582160949707, "learning_rate": 1.9495131665662838e-05, "loss": 2.323, "step": 39616 }, { "epoch": 0.51, "grad_norm": 3.1076438426971436, "learning_rate": 1.949509869885006e-05, "loss": 1.478, "step": 39617 }, { "epoch": 0.51, "grad_norm": 3.996558666229248, "learning_rate": 1.9495065730988858e-05, "loss": 2.0893, "step": 39618 }, { "epoch": 0.51, "grad_norm": 3.604799270629883, "learning_rate": 1.9495032762079246e-05, "loss": 1.9251, "step": 39619 }, { "epoch": 0.51, "grad_norm": 4.197814464569092, "learning_rate": 1.9494999792121217e-05, "loss": 2.0708, "step": 39620 }, { "epoch": 0.51, "grad_norm": 3.6545963287353516, "learning_rate": 1.9494966821114782e-05, "loss": 1.8103, "step": 39621 }, { "epoch": 0.51, "grad_norm": 3.960257053375244, "learning_rate": 1.9494933849059937e-05, "loss": 2.2464, "step": 39622 }, { "epoch": 0.51, "grad_norm": 3.9296462535858154, "learning_rate": 1.9494900875956695e-05, "loss": 2.2735, "step": 39623 }, { "epoch": 0.51, "grad_norm": 3.9827394485473633, "learning_rate": 1.9494867901805054e-05, "loss": 1.9956, "step": 39624 }, { "epoch": 0.51, "grad_norm": 3.442568063735962, "learning_rate": 1.9494834926605016e-05, "loss": 2.0108, "step": 39625 }, { "epoch": 0.51, "grad_norm": 3.7832388877868652, "learning_rate": 1.9494801950356592e-05, "loss": 2.5315, "step": 39626 }, { "epoch": 0.51, "grad_norm": 3.8551509380340576, "learning_rate": 1.949476897305978e-05, "loss": 1.6657, "step": 39627 }, { "epoch": 0.51, "grad_norm": 3.6373825073242188, "learning_rate": 1.949473599471458e-05, "loss": 1.7695, "step": 39628 }, { "epoch": 0.51, "grad_norm": 3.3376927375793457, "learning_rate": 1.9494703015321003e-05, "loss": 1.5326, "step": 39629 }, { "epoch": 0.51, "grad_norm": 3.9292919635772705, "learning_rate": 1.9494670034879048e-05, "loss": 1.8866, "step": 39630 }, { "epoch": 0.51, "grad_norm": 3.975956439971924, "learning_rate": 1.949463705338872e-05, "loss": 1.8259, "step": 39631 }, { "epoch": 0.51, "grad_norm": 4.152787685394287, "learning_rate": 1.9494604070850026e-05, "loss": 2.0532, "step": 39632 }, { "epoch": 0.51, "grad_norm": 3.8222761154174805, "learning_rate": 1.9494571087262963e-05, "loss": 2.1629, "step": 39633 }, { "epoch": 0.51, "grad_norm": 4.240653038024902, "learning_rate": 1.9494538102627544e-05, "loss": 2.0767, "step": 39634 }, { "epoch": 0.51, "grad_norm": 3.8051600456237793, "learning_rate": 1.949450511694376e-05, "loss": 1.5916, "step": 39635 }, { "epoch": 0.51, "grad_norm": 3.1760988235473633, "learning_rate": 1.9494472130211625e-05, "loss": 1.7373, "step": 39636 }, { "epoch": 0.51, "grad_norm": 3.1314854621887207, "learning_rate": 1.949443914243114e-05, "loss": 1.5755, "step": 39637 }, { "epoch": 0.51, "grad_norm": 3.794370651245117, "learning_rate": 1.9494406153602304e-05, "loss": 2.0056, "step": 39638 }, { "epoch": 0.51, "grad_norm": 5.10433292388916, "learning_rate": 1.949437316372513e-05, "loss": 1.8515, "step": 39639 }, { "epoch": 0.51, "grad_norm": 3.786029815673828, "learning_rate": 1.9494340172799614e-05, "loss": 1.8958, "step": 39640 }, { "epoch": 0.51, "grad_norm": 3.408289909362793, "learning_rate": 1.9494307180825762e-05, "loss": 1.91, "step": 39641 }, { "epoch": 0.51, "grad_norm": 3.8555052280426025, "learning_rate": 1.9494274187803575e-05, "loss": 2.0374, "step": 39642 }, { "epoch": 0.51, "grad_norm": 3.5991804599761963, "learning_rate": 1.949424119373306e-05, "loss": 2.0398, "step": 39643 }, { "epoch": 0.51, "grad_norm": 4.53974723815918, "learning_rate": 1.949420819861422e-05, "loss": 2.2034, "step": 39644 }, { "epoch": 0.51, "grad_norm": 3.7567033767700195, "learning_rate": 1.949417520244706e-05, "loss": 1.5998, "step": 39645 }, { "epoch": 0.51, "grad_norm": 3.7824482917785645, "learning_rate": 1.9494142205231582e-05, "loss": 1.831, "step": 39646 }, { "epoch": 0.51, "grad_norm": 3.7740602493286133, "learning_rate": 1.9494109206967786e-05, "loss": 1.6698, "step": 39647 }, { "epoch": 0.51, "grad_norm": 3.3902711868286133, "learning_rate": 1.9494076207655685e-05, "loss": 1.7032, "step": 39648 }, { "epoch": 0.51, "grad_norm": 3.8908395767211914, "learning_rate": 1.949404320729527e-05, "loss": 2.1664, "step": 39649 }, { "epoch": 0.51, "grad_norm": 3.8089327812194824, "learning_rate": 1.9494010205886554e-05, "loss": 2.2026, "step": 39650 }, { "epoch": 0.51, "grad_norm": 3.8365652561187744, "learning_rate": 1.9493977203429542e-05, "loss": 2.4303, "step": 39651 }, { "epoch": 0.51, "grad_norm": 3.7932395935058594, "learning_rate": 1.949394419992423e-05, "loss": 1.9728, "step": 39652 }, { "epoch": 0.51, "grad_norm": 3.6692893505096436, "learning_rate": 1.9493911195370626e-05, "loss": 1.9758, "step": 39653 }, { "epoch": 0.51, "grad_norm": 3.591325044631958, "learning_rate": 1.9493878189768733e-05, "loss": 1.7296, "step": 39654 }, { "epoch": 0.51, "grad_norm": 3.4206702709198, "learning_rate": 1.9493845183118555e-05, "loss": 1.8003, "step": 39655 }, { "epoch": 0.51, "grad_norm": 3.634014368057251, "learning_rate": 1.9493812175420096e-05, "loss": 1.7554, "step": 39656 }, { "epoch": 0.51, "grad_norm": 3.972371816635132, "learning_rate": 1.9493779166673362e-05, "loss": 2.1732, "step": 39657 }, { "epoch": 0.51, "grad_norm": 3.5271646976470947, "learning_rate": 1.9493746156878347e-05, "loss": 1.8129, "step": 39658 }, { "epoch": 0.51, "grad_norm": 3.7302982807159424, "learning_rate": 1.9493713146035068e-05, "loss": 1.9228, "step": 39659 }, { "epoch": 0.51, "grad_norm": 3.744532585144043, "learning_rate": 1.9493680134143518e-05, "loss": 1.8279, "step": 39660 }, { "epoch": 0.51, "grad_norm": 4.1026458740234375, "learning_rate": 1.9493647121203707e-05, "loss": 2.074, "step": 39661 }, { "epoch": 0.51, "grad_norm": 3.5810706615448, "learning_rate": 1.9493614107215636e-05, "loss": 1.6013, "step": 39662 }, { "epoch": 0.51, "grad_norm": 3.7790229320526123, "learning_rate": 1.9493581092179306e-05, "loss": 1.6213, "step": 39663 }, { "epoch": 0.51, "grad_norm": 3.4132845401763916, "learning_rate": 1.9493548076094726e-05, "loss": 1.8975, "step": 39664 }, { "epoch": 0.51, "grad_norm": 4.202385902404785, "learning_rate": 1.9493515058961896e-05, "loss": 2.2557, "step": 39665 }, { "epoch": 0.51, "grad_norm": 3.7593140602111816, "learning_rate": 1.949348204078082e-05, "loss": 2.0711, "step": 39666 }, { "epoch": 0.51, "grad_norm": 3.3464267253875732, "learning_rate": 1.9493449021551506e-05, "loss": 1.6323, "step": 39667 }, { "epoch": 0.51, "grad_norm": 4.341541767120361, "learning_rate": 1.9493416001273954e-05, "loss": 2.2565, "step": 39668 }, { "epoch": 0.51, "grad_norm": 4.2245330810546875, "learning_rate": 1.9493382979948164e-05, "loss": 1.7852, "step": 39669 }, { "epoch": 0.51, "grad_norm": 3.753803253173828, "learning_rate": 1.9493349957574148e-05, "loss": 2.1198, "step": 39670 }, { "epoch": 0.51, "grad_norm": 3.537282943725586, "learning_rate": 1.94933169341519e-05, "loss": 1.9543, "step": 39671 }, { "epoch": 0.51, "grad_norm": 5.196910381317139, "learning_rate": 1.9493283909681434e-05, "loss": 2.5664, "step": 39672 }, { "epoch": 0.51, "grad_norm": 4.481082916259766, "learning_rate": 1.9493250884162743e-05, "loss": 2.4161, "step": 39673 }, { "epoch": 0.51, "grad_norm": 4.045922756195068, "learning_rate": 1.949321785759584e-05, "loss": 2.103, "step": 39674 }, { "epoch": 0.51, "grad_norm": 3.943598747253418, "learning_rate": 1.9493184829980725e-05, "loss": 2.0854, "step": 39675 }, { "epoch": 0.51, "grad_norm": 3.7600667476654053, "learning_rate": 1.9493151801317398e-05, "loss": 2.2138, "step": 39676 }, { "epoch": 0.51, "grad_norm": 3.776628255844116, "learning_rate": 1.949311877160587e-05, "loss": 2.3833, "step": 39677 }, { "epoch": 0.51, "grad_norm": 3.668757438659668, "learning_rate": 1.949308574084614e-05, "loss": 2.0351, "step": 39678 }, { "epoch": 0.51, "grad_norm": 4.193703651428223, "learning_rate": 1.949305270903821e-05, "loss": 2.1234, "step": 39679 }, { "epoch": 0.51, "grad_norm": 3.57327938079834, "learning_rate": 1.9493019676182085e-05, "loss": 1.5593, "step": 39680 }, { "epoch": 0.51, "grad_norm": 3.748713254928589, "learning_rate": 1.9492986642277772e-05, "loss": 1.7517, "step": 39681 }, { "epoch": 0.51, "grad_norm": 3.839118480682373, "learning_rate": 1.9492953607325272e-05, "loss": 2.0515, "step": 39682 }, { "epoch": 0.51, "grad_norm": 4.289523601531982, "learning_rate": 1.9492920571324586e-05, "loss": 2.0025, "step": 39683 }, { "epoch": 0.52, "grad_norm": 3.1403698921203613, "learning_rate": 1.9492887534275727e-05, "loss": 1.6117, "step": 39684 }, { "epoch": 0.52, "grad_norm": 4.954514503479004, "learning_rate": 1.9492854496178685e-05, "loss": 2.1053, "step": 39685 }, { "epoch": 0.52, "grad_norm": 3.865412712097168, "learning_rate": 1.9492821457033474e-05, "loss": 1.8723, "step": 39686 }, { "epoch": 0.52, "grad_norm": 3.9370079040527344, "learning_rate": 1.9492788416840097e-05, "loss": 1.7187, "step": 39687 }, { "epoch": 0.52, "grad_norm": 3.9774889945983887, "learning_rate": 1.949275537559855e-05, "loss": 2.1715, "step": 39688 }, { "epoch": 0.52, "grad_norm": 3.625697135925293, "learning_rate": 1.9492722333308848e-05, "loss": 1.8547, "step": 39689 }, { "epoch": 0.52, "grad_norm": 3.5540502071380615, "learning_rate": 1.9492689289970983e-05, "loss": 2.0623, "step": 39690 }, { "epoch": 0.52, "grad_norm": 4.048705101013184, "learning_rate": 1.9492656245584965e-05, "loss": 2.2176, "step": 39691 }, { "epoch": 0.52, "grad_norm": 4.255087852478027, "learning_rate": 1.94926232001508e-05, "loss": 2.0226, "step": 39692 }, { "epoch": 0.52, "grad_norm": 3.888061285018921, "learning_rate": 1.9492590153668483e-05, "loss": 2.2748, "step": 39693 }, { "epoch": 0.52, "grad_norm": 3.993354558944702, "learning_rate": 1.9492557106138025e-05, "loss": 2.028, "step": 39694 }, { "epoch": 0.52, "grad_norm": 4.169322967529297, "learning_rate": 1.949252405755943e-05, "loss": 2.21, "step": 39695 }, { "epoch": 0.52, "grad_norm": 4.2386884689331055, "learning_rate": 1.94924910079327e-05, "loss": 2.2319, "step": 39696 }, { "epoch": 0.52, "grad_norm": 3.5290582180023193, "learning_rate": 1.9492457957257832e-05, "loss": 1.9594, "step": 39697 }, { "epoch": 0.52, "grad_norm": 4.001622200012207, "learning_rate": 1.9492424905534842e-05, "loss": 1.7877, "step": 39698 }, { "epoch": 0.52, "grad_norm": 3.9616096019744873, "learning_rate": 1.9492391852763723e-05, "loss": 1.8771, "step": 39699 }, { "epoch": 0.52, "grad_norm": 3.64680552482605, "learning_rate": 1.9492358798944483e-05, "loss": 1.8597, "step": 39700 }, { "epoch": 0.52, "grad_norm": 4.402060031890869, "learning_rate": 1.949232574407713e-05, "loss": 2.323, "step": 39701 }, { "epoch": 0.52, "grad_norm": 3.568044900894165, "learning_rate": 1.9492292688161657e-05, "loss": 1.9025, "step": 39702 }, { "epoch": 0.52, "grad_norm": 3.875904083251953, "learning_rate": 1.9492259631198077e-05, "loss": 2.2133, "step": 39703 }, { "epoch": 0.52, "grad_norm": 3.602367401123047, "learning_rate": 1.9492226573186393e-05, "loss": 1.7549, "step": 39704 }, { "epoch": 0.52, "grad_norm": 4.362236022949219, "learning_rate": 1.9492193514126604e-05, "loss": 2.3223, "step": 39705 }, { "epoch": 0.52, "grad_norm": 3.429659366607666, "learning_rate": 1.9492160454018716e-05, "loss": 1.7551, "step": 39706 }, { "epoch": 0.52, "grad_norm": 4.099756717681885, "learning_rate": 1.949212739286273e-05, "loss": 1.9396, "step": 39707 }, { "epoch": 0.52, "grad_norm": 3.4514336585998535, "learning_rate": 1.9492094330658657e-05, "loss": 1.538, "step": 39708 }, { "epoch": 0.52, "grad_norm": 4.335368633270264, "learning_rate": 1.949206126740649e-05, "loss": 2.3224, "step": 39709 }, { "epoch": 0.52, "grad_norm": 4.265214920043945, "learning_rate": 1.949202820310624e-05, "loss": 1.8101, "step": 39710 }, { "epoch": 0.52, "grad_norm": 4.121776580810547, "learning_rate": 1.9491995137757914e-05, "loss": 2.5286, "step": 39711 }, { "epoch": 0.52, "grad_norm": 3.4955549240112305, "learning_rate": 1.9491962071361504e-05, "loss": 1.7103, "step": 39712 }, { "epoch": 0.52, "grad_norm": 3.5316238403320312, "learning_rate": 1.9491929003917024e-05, "loss": 1.5129, "step": 39713 }, { "epoch": 0.52, "grad_norm": 4.301546573638916, "learning_rate": 1.9491895935424473e-05, "loss": 2.1182, "step": 39714 }, { "epoch": 0.52, "grad_norm": 3.892087459564209, "learning_rate": 1.9491862865883857e-05, "loss": 1.9435, "step": 39715 }, { "epoch": 0.52, "grad_norm": 3.684314489364624, "learning_rate": 1.9491829795295177e-05, "loss": 1.666, "step": 39716 }, { "epoch": 0.52, "grad_norm": 3.8525238037109375, "learning_rate": 1.949179672365844e-05, "loss": 2.0487, "step": 39717 }, { "epoch": 0.52, "grad_norm": 3.1596171855926514, "learning_rate": 1.9491763650973644e-05, "loss": 1.7938, "step": 39718 }, { "epoch": 0.52, "grad_norm": 3.4781253337860107, "learning_rate": 1.94917305772408e-05, "loss": 1.992, "step": 39719 }, { "epoch": 0.52, "grad_norm": 3.8418467044830322, "learning_rate": 1.9491697502459904e-05, "loss": 2.2563, "step": 39720 }, { "epoch": 0.52, "grad_norm": 2.8599765300750732, "learning_rate": 1.9491664426630966e-05, "loss": 1.3851, "step": 39721 }, { "epoch": 0.52, "grad_norm": 3.4237184524536133, "learning_rate": 1.9491631349753988e-05, "loss": 1.7383, "step": 39722 }, { "epoch": 0.52, "grad_norm": 3.5189106464385986, "learning_rate": 1.9491598271828973e-05, "loss": 1.7787, "step": 39723 }, { "epoch": 0.52, "grad_norm": 4.007450580596924, "learning_rate": 1.949156519285592e-05, "loss": 2.018, "step": 39724 }, { "epoch": 0.52, "grad_norm": 3.322331666946411, "learning_rate": 1.949153211283484e-05, "loss": 1.7468, "step": 39725 }, { "epoch": 0.52, "grad_norm": 3.500255584716797, "learning_rate": 1.9491499031765736e-05, "loss": 1.8689, "step": 39726 }, { "epoch": 0.52, "grad_norm": 3.973055362701416, "learning_rate": 1.949146594964861e-05, "loss": 1.8316, "step": 39727 }, { "epoch": 0.52, "grad_norm": 3.786043882369995, "learning_rate": 1.9491432866483458e-05, "loss": 1.7053, "step": 39728 }, { "epoch": 0.52, "grad_norm": 3.4594027996063232, "learning_rate": 1.94913997822703e-05, "loss": 1.8611, "step": 39729 }, { "epoch": 0.52, "grad_norm": 3.6312379837036133, "learning_rate": 1.9491366697009123e-05, "loss": 1.7382, "step": 39730 }, { "epoch": 0.52, "grad_norm": 3.8565526008605957, "learning_rate": 1.949133361069994e-05, "loss": 1.9181, "step": 39731 }, { "epoch": 0.52, "grad_norm": 3.6924636363983154, "learning_rate": 1.9491300523342756e-05, "loss": 1.9807, "step": 39732 }, { "epoch": 0.52, "grad_norm": 3.4875411987304688, "learning_rate": 1.9491267434937572e-05, "loss": 1.6681, "step": 39733 }, { "epoch": 0.52, "grad_norm": 4.379781246185303, "learning_rate": 1.9491234345484384e-05, "loss": 2.7801, "step": 39734 }, { "epoch": 0.52, "grad_norm": 3.636237859725952, "learning_rate": 1.949120125498321e-05, "loss": 1.9159, "step": 39735 }, { "epoch": 0.52, "grad_norm": 4.135744094848633, "learning_rate": 1.949116816343404e-05, "loss": 2.0305, "step": 39736 }, { "epoch": 0.52, "grad_norm": 4.233251571655273, "learning_rate": 1.949113507083689e-05, "loss": 2.7135, "step": 39737 }, { "epoch": 0.52, "grad_norm": 3.6335840225219727, "learning_rate": 1.9491101977191757e-05, "loss": 2.0352, "step": 39738 }, { "epoch": 0.52, "grad_norm": 4.012246608734131, "learning_rate": 1.949106888249864e-05, "loss": 2.2087, "step": 39739 }, { "epoch": 0.52, "grad_norm": 3.799679756164551, "learning_rate": 1.9491035786757553e-05, "loss": 1.949, "step": 39740 }, { "epoch": 0.52, "grad_norm": 3.607741594314575, "learning_rate": 1.9491002689968494e-05, "loss": 1.9121, "step": 39741 }, { "epoch": 0.52, "grad_norm": 3.909623622894287, "learning_rate": 1.9490969592131465e-05, "loss": 2.0717, "step": 39742 }, { "epoch": 0.52, "grad_norm": 3.948256254196167, "learning_rate": 1.9490936493246474e-05, "loss": 2.3117, "step": 39743 }, { "epoch": 0.52, "grad_norm": 4.177378177642822, "learning_rate": 1.949090339331352e-05, "loss": 2.0601, "step": 39744 }, { "epoch": 0.52, "grad_norm": 3.5312821865081787, "learning_rate": 1.9490870292332613e-05, "loss": 1.6745, "step": 39745 }, { "epoch": 0.52, "grad_norm": 4.234350204467773, "learning_rate": 1.949083719030375e-05, "loss": 2.5634, "step": 39746 }, { "epoch": 0.52, "grad_norm": 3.4013917446136475, "learning_rate": 1.949080408722694e-05, "loss": 1.6377, "step": 39747 }, { "epoch": 0.52, "grad_norm": 3.6896026134490967, "learning_rate": 1.9490770983102184e-05, "loss": 1.9869, "step": 39748 }, { "epoch": 0.52, "grad_norm": 3.2709274291992188, "learning_rate": 1.949073787792948e-05, "loss": 1.6404, "step": 39749 }, { "epoch": 0.52, "grad_norm": 3.668215751647949, "learning_rate": 1.9490704771708846e-05, "loss": 1.7373, "step": 39750 }, { "epoch": 0.52, "grad_norm": 3.6396074295043945, "learning_rate": 1.9490671664440274e-05, "loss": 2.0562, "step": 39751 }, { "epoch": 0.52, "grad_norm": 3.4877777099609375, "learning_rate": 1.9490638556123767e-05, "loss": 1.8266, "step": 39752 }, { "epoch": 0.52, "grad_norm": 3.7659807205200195, "learning_rate": 1.9490605446759335e-05, "loss": 1.9526, "step": 39753 }, { "epoch": 0.52, "grad_norm": 3.5339858531951904, "learning_rate": 1.949057233634698e-05, "loss": 1.6408, "step": 39754 }, { "epoch": 0.52, "grad_norm": 4.044052600860596, "learning_rate": 1.9490539224886708e-05, "loss": 2.3154, "step": 39755 }, { "epoch": 0.52, "grad_norm": 3.828303337097168, "learning_rate": 1.9490506112378513e-05, "loss": 1.9483, "step": 39756 }, { "epoch": 0.52, "grad_norm": 3.7855889797210693, "learning_rate": 1.949047299882241e-05, "loss": 1.862, "step": 39757 }, { "epoch": 0.52, "grad_norm": 3.625814914703369, "learning_rate": 1.9490439884218397e-05, "loss": 1.8903, "step": 39758 }, { "epoch": 0.52, "grad_norm": 3.7295854091644287, "learning_rate": 1.9490406768566477e-05, "loss": 1.9021, "step": 39759 }, { "epoch": 0.52, "grad_norm": 3.7641632556915283, "learning_rate": 1.9490373651866658e-05, "loss": 2.269, "step": 39760 }, { "epoch": 0.52, "grad_norm": 4.010214805603027, "learning_rate": 1.9490340534118934e-05, "loss": 2.0891, "step": 39761 }, { "epoch": 0.52, "grad_norm": 3.275040626525879, "learning_rate": 1.949030741532332e-05, "loss": 1.5972, "step": 39762 }, { "epoch": 0.52, "grad_norm": 3.774815559387207, "learning_rate": 1.9490274295479815e-05, "loss": 1.8457, "step": 39763 }, { "epoch": 0.52, "grad_norm": 3.935286521911621, "learning_rate": 1.9490241174588426e-05, "loss": 2.1973, "step": 39764 }, { "epoch": 0.52, "grad_norm": 3.8095932006835938, "learning_rate": 1.9490208052649146e-05, "loss": 1.8642, "step": 39765 }, { "epoch": 0.52, "grad_norm": 3.3972675800323486, "learning_rate": 1.949017492966199e-05, "loss": 1.6949, "step": 39766 }, { "epoch": 0.52, "grad_norm": 4.0116705894470215, "learning_rate": 1.9490141805626955e-05, "loss": 1.9416, "step": 39767 }, { "epoch": 0.52, "grad_norm": 3.4516823291778564, "learning_rate": 1.9490108680544054e-05, "loss": 1.9697, "step": 39768 }, { "epoch": 0.52, "grad_norm": 3.6744158267974854, "learning_rate": 1.949007555441328e-05, "loss": 2.4472, "step": 39769 }, { "epoch": 0.52, "grad_norm": 3.7317488193511963, "learning_rate": 1.949004242723464e-05, "loss": 1.8384, "step": 39770 }, { "epoch": 0.52, "grad_norm": 3.3524041175842285, "learning_rate": 1.9490009299008135e-05, "loss": 1.8437, "step": 39771 }, { "epoch": 0.52, "grad_norm": 3.5595502853393555, "learning_rate": 1.9489976169733777e-05, "loss": 1.7184, "step": 39772 }, { "epoch": 0.52, "grad_norm": 3.245300054550171, "learning_rate": 1.9489943039411564e-05, "loss": 1.4282, "step": 39773 }, { "epoch": 0.52, "grad_norm": 3.63321590423584, "learning_rate": 1.94899099080415e-05, "loss": 1.9468, "step": 39774 }, { "epoch": 0.52, "grad_norm": 4.021780490875244, "learning_rate": 1.948987677562359e-05, "loss": 2.2468, "step": 39775 }, { "epoch": 0.52, "grad_norm": 3.777066469192505, "learning_rate": 1.9489843642157834e-05, "loss": 2.0098, "step": 39776 }, { "epoch": 0.52, "grad_norm": 4.1607232093811035, "learning_rate": 1.948981050764424e-05, "loss": 2.1498, "step": 39777 }, { "epoch": 0.52, "grad_norm": 3.520789861679077, "learning_rate": 1.9489777372082808e-05, "loss": 2.0952, "step": 39778 }, { "epoch": 0.52, "grad_norm": 4.046023368835449, "learning_rate": 1.9489744235473548e-05, "loss": 1.7926, "step": 39779 }, { "epoch": 0.52, "grad_norm": 3.9909658432006836, "learning_rate": 1.9489711097816457e-05, "loss": 1.8453, "step": 39780 }, { "epoch": 0.52, "grad_norm": 3.226559638977051, "learning_rate": 1.9489677959111538e-05, "loss": 1.5059, "step": 39781 }, { "epoch": 0.52, "grad_norm": 3.175975799560547, "learning_rate": 1.94896448193588e-05, "loss": 1.6636, "step": 39782 }, { "epoch": 0.52, "grad_norm": 3.9654760360717773, "learning_rate": 1.9489611678558244e-05, "loss": 2.3063, "step": 39783 }, { "epoch": 0.52, "grad_norm": 2.9679877758026123, "learning_rate": 1.9489578536709874e-05, "loss": 1.5191, "step": 39784 }, { "epoch": 0.52, "grad_norm": 3.9141383171081543, "learning_rate": 1.9489545393813695e-05, "loss": 2.0272, "step": 39785 }, { "epoch": 0.52, "grad_norm": 3.842923402786255, "learning_rate": 1.9489512249869705e-05, "loss": 1.9189, "step": 39786 }, { "epoch": 0.52, "grad_norm": 3.9545061588287354, "learning_rate": 1.9489479104877915e-05, "loss": 1.7036, "step": 39787 }, { "epoch": 0.52, "grad_norm": 3.843902826309204, "learning_rate": 1.9489445958838328e-05, "loss": 1.7652, "step": 39788 }, { "epoch": 0.52, "grad_norm": 3.631131410598755, "learning_rate": 1.948941281175094e-05, "loss": 1.8127, "step": 39789 }, { "epoch": 0.52, "grad_norm": 3.6090729236602783, "learning_rate": 1.948937966361576e-05, "loss": 1.7141, "step": 39790 }, { "epoch": 0.52, "grad_norm": 3.3473291397094727, "learning_rate": 1.9489346514432795e-05, "loss": 1.9552, "step": 39791 }, { "epoch": 0.52, "grad_norm": 3.525087833404541, "learning_rate": 1.948931336420204e-05, "loss": 1.5128, "step": 39792 }, { "epoch": 0.52, "grad_norm": 3.938668966293335, "learning_rate": 1.9489280212923512e-05, "loss": 1.9602, "step": 39793 }, { "epoch": 0.52, "grad_norm": 3.986316680908203, "learning_rate": 1.94892470605972e-05, "loss": 1.9207, "step": 39794 }, { "epoch": 0.52, "grad_norm": 2.9868855476379395, "learning_rate": 1.9489213907223117e-05, "loss": 1.2613, "step": 39795 }, { "epoch": 0.52, "grad_norm": 3.4600067138671875, "learning_rate": 1.9489180752801262e-05, "loss": 1.8193, "step": 39796 }, { "epoch": 0.52, "grad_norm": 3.927401304244995, "learning_rate": 1.948914759733164e-05, "loss": 2.0622, "step": 39797 }, { "epoch": 0.52, "grad_norm": 3.873645544052124, "learning_rate": 1.948911444081426e-05, "loss": 2.1904, "step": 39798 }, { "epoch": 0.52, "grad_norm": 3.883113384246826, "learning_rate": 1.9489081283249116e-05, "loss": 1.8762, "step": 39799 }, { "epoch": 0.52, "grad_norm": 4.302911758422852, "learning_rate": 1.9489048124636216e-05, "loss": 2.1568, "step": 39800 }, { "epoch": 0.52, "grad_norm": 3.5845253467559814, "learning_rate": 1.9489014964975567e-05, "loss": 1.7627, "step": 39801 }, { "epoch": 0.52, "grad_norm": 3.2760610580444336, "learning_rate": 1.9488981804267166e-05, "loss": 1.5365, "step": 39802 }, { "epoch": 0.52, "grad_norm": 3.7916972637176514, "learning_rate": 1.9488948642511025e-05, "loss": 2.0091, "step": 39803 }, { "epoch": 0.52, "grad_norm": 3.47160005569458, "learning_rate": 1.948891547970714e-05, "loss": 1.6112, "step": 39804 }, { "epoch": 0.52, "grad_norm": 4.2216410636901855, "learning_rate": 1.948888231585552e-05, "loss": 2.2996, "step": 39805 }, { "epoch": 0.52, "grad_norm": 4.094827651977539, "learning_rate": 1.9488849150956165e-05, "loss": 1.8378, "step": 39806 }, { "epoch": 0.52, "grad_norm": 3.906688928604126, "learning_rate": 1.9488815985009078e-05, "loss": 1.9899, "step": 39807 }, { "epoch": 0.52, "grad_norm": 3.716033935546875, "learning_rate": 1.948878281801427e-05, "loss": 1.8524, "step": 39808 }, { "epoch": 0.52, "grad_norm": 3.625308036804199, "learning_rate": 1.9488749649971734e-05, "loss": 1.8527, "step": 39809 }, { "epoch": 0.52, "grad_norm": 4.133255481719971, "learning_rate": 1.948871648088148e-05, "loss": 2.176, "step": 39810 }, { "epoch": 0.52, "grad_norm": 3.7407126426696777, "learning_rate": 1.9488683310743515e-05, "loss": 1.7874, "step": 39811 }, { "epoch": 0.52, "grad_norm": 3.900852918624878, "learning_rate": 1.9488650139557835e-05, "loss": 2.0233, "step": 39812 }, { "epoch": 0.52, "grad_norm": 3.7701497077941895, "learning_rate": 1.948861696732445e-05, "loss": 1.9504, "step": 39813 }, { "epoch": 0.52, "grad_norm": 3.8936607837677, "learning_rate": 1.9488583794043357e-05, "loss": 2.2446, "step": 39814 }, { "epoch": 0.52, "grad_norm": 3.547819137573242, "learning_rate": 1.9488550619714564e-05, "loss": 1.947, "step": 39815 }, { "epoch": 0.52, "grad_norm": 3.697033643722534, "learning_rate": 1.9488517444338076e-05, "loss": 1.8895, "step": 39816 }, { "epoch": 0.52, "grad_norm": 3.6854054927825928, "learning_rate": 1.9488484267913893e-05, "loss": 2.136, "step": 39817 }, { "epoch": 0.52, "grad_norm": 3.549625873565674, "learning_rate": 1.9488451090442018e-05, "loss": 1.905, "step": 39818 }, { "epoch": 0.52, "grad_norm": 4.225362777709961, "learning_rate": 1.9488417911922462e-05, "loss": 2.4106, "step": 39819 }, { "epoch": 0.52, "grad_norm": 3.7589502334594727, "learning_rate": 1.948838473235522e-05, "loss": 2.1718, "step": 39820 }, { "epoch": 0.52, "grad_norm": 4.162764072418213, "learning_rate": 1.94883515517403e-05, "loss": 2.2206, "step": 39821 }, { "epoch": 0.52, "grad_norm": 3.3658368587493896, "learning_rate": 1.9488318370077707e-05, "loss": 1.9052, "step": 39822 }, { "epoch": 0.52, "grad_norm": 3.924337387084961, "learning_rate": 1.9488285187367442e-05, "loss": 2.5062, "step": 39823 }, { "epoch": 0.52, "grad_norm": 3.7906219959259033, "learning_rate": 1.948825200360951e-05, "loss": 2.3162, "step": 39824 }, { "epoch": 0.52, "grad_norm": 3.6143832206726074, "learning_rate": 1.948821881880391e-05, "loss": 1.9302, "step": 39825 }, { "epoch": 0.52, "grad_norm": 3.4533069133758545, "learning_rate": 1.9488185632950655e-05, "loss": 1.633, "step": 39826 }, { "epoch": 0.52, "grad_norm": 4.9752020835876465, "learning_rate": 1.948815244604974e-05, "loss": 3.0019, "step": 39827 }, { "epoch": 0.52, "grad_norm": 3.390543222427368, "learning_rate": 1.9488119258101174e-05, "loss": 1.6243, "step": 39828 }, { "epoch": 0.52, "grad_norm": 4.098461151123047, "learning_rate": 1.948808606910496e-05, "loss": 1.7194, "step": 39829 }, { "epoch": 0.52, "grad_norm": 3.819251775741577, "learning_rate": 1.9488052879061095e-05, "loss": 2.2934, "step": 39830 }, { "epoch": 0.52, "grad_norm": 4.132810115814209, "learning_rate": 1.948801968796959e-05, "loss": 2.5805, "step": 39831 }, { "epoch": 0.52, "grad_norm": 3.340602159500122, "learning_rate": 1.948798649583045e-05, "loss": 1.52, "step": 39832 }, { "epoch": 0.52, "grad_norm": 4.000730514526367, "learning_rate": 1.948795330264367e-05, "loss": 2.1645, "step": 39833 }, { "epoch": 0.52, "grad_norm": 3.6361582279205322, "learning_rate": 1.948792010840926e-05, "loss": 2.0952, "step": 39834 }, { "epoch": 0.52, "grad_norm": 3.534881114959717, "learning_rate": 1.9487886913127228e-05, "loss": 1.6667, "step": 39835 }, { "epoch": 0.52, "grad_norm": 3.72281813621521, "learning_rate": 1.9487853716797568e-05, "loss": 2.3277, "step": 39836 }, { "epoch": 0.52, "grad_norm": 3.765469551086426, "learning_rate": 1.948782051942029e-05, "loss": 1.916, "step": 39837 }, { "epoch": 0.52, "grad_norm": 3.4731967449188232, "learning_rate": 1.948778732099539e-05, "loss": 2.0033, "step": 39838 }, { "epoch": 0.52, "grad_norm": 3.939227819442749, "learning_rate": 1.9487754121522883e-05, "loss": 1.7142, "step": 39839 }, { "epoch": 0.52, "grad_norm": 4.0038557052612305, "learning_rate": 1.9487720921002767e-05, "loss": 1.856, "step": 39840 }, { "epoch": 0.52, "grad_norm": 3.5377256870269775, "learning_rate": 1.9487687719435044e-05, "loss": 1.7249, "step": 39841 }, { "epoch": 0.52, "grad_norm": 4.306379318237305, "learning_rate": 1.948765451681972e-05, "loss": 2.1122, "step": 39842 }, { "epoch": 0.52, "grad_norm": 4.27618932723999, "learning_rate": 1.94876213131568e-05, "loss": 2.6673, "step": 39843 }, { "epoch": 0.52, "grad_norm": 3.247267961502075, "learning_rate": 1.9487588108446282e-05, "loss": 1.6042, "step": 39844 }, { "epoch": 0.52, "grad_norm": 3.428783655166626, "learning_rate": 1.9487554902688176e-05, "loss": 1.7441, "step": 39845 }, { "epoch": 0.52, "grad_norm": 3.785430431365967, "learning_rate": 1.948752169588248e-05, "loss": 2.0332, "step": 39846 }, { "epoch": 0.52, "grad_norm": 3.6450395584106445, "learning_rate": 1.9487488488029203e-05, "loss": 1.935, "step": 39847 }, { "epoch": 0.52, "grad_norm": 3.846444845199585, "learning_rate": 1.9487455279128344e-05, "loss": 1.9298, "step": 39848 }, { "epoch": 0.52, "grad_norm": 3.9766571521759033, "learning_rate": 1.948742206917991e-05, "loss": 2.1985, "step": 39849 }, { "epoch": 0.52, "grad_norm": 3.7868762016296387, "learning_rate": 1.9487388858183905e-05, "loss": 2.2454, "step": 39850 }, { "epoch": 0.52, "grad_norm": 4.077180862426758, "learning_rate": 1.948735564614033e-05, "loss": 2.2259, "step": 39851 }, { "epoch": 0.52, "grad_norm": 3.406038999557495, "learning_rate": 1.948732243304919e-05, "loss": 1.768, "step": 39852 }, { "epoch": 0.52, "grad_norm": 3.776135206222534, "learning_rate": 1.948728921891049e-05, "loss": 1.8718, "step": 39853 }, { "epoch": 0.52, "grad_norm": 3.3855960369110107, "learning_rate": 1.948725600372423e-05, "loss": 1.5513, "step": 39854 }, { "epoch": 0.52, "grad_norm": 2.9204671382904053, "learning_rate": 1.948722278749042e-05, "loss": 1.6315, "step": 39855 }, { "epoch": 0.52, "grad_norm": 3.6127123832702637, "learning_rate": 1.9487189570209057e-05, "loss": 2.0629, "step": 39856 }, { "epoch": 0.52, "grad_norm": 3.875842809677124, "learning_rate": 1.9487156351880148e-05, "loss": 2.0553, "step": 39857 }, { "epoch": 0.52, "grad_norm": 4.206369400024414, "learning_rate": 1.9487123132503693e-05, "loss": 1.9367, "step": 39858 }, { "epoch": 0.52, "grad_norm": 3.5914013385772705, "learning_rate": 1.9487089912079706e-05, "loss": 1.9781, "step": 39859 }, { "epoch": 0.52, "grad_norm": 3.9310290813446045, "learning_rate": 1.9487056690608176e-05, "loss": 2.0539, "step": 39860 }, { "epoch": 0.52, "grad_norm": 4.0122599601745605, "learning_rate": 1.9487023468089114e-05, "loss": 1.9624, "step": 39861 }, { "epoch": 0.52, "grad_norm": 4.77833366394043, "learning_rate": 1.948699024452253e-05, "loss": 2.2417, "step": 39862 }, { "epoch": 0.52, "grad_norm": 3.904700756072998, "learning_rate": 1.9486957019908416e-05, "loss": 1.9041, "step": 39863 }, { "epoch": 0.52, "grad_norm": 3.642307996749878, "learning_rate": 1.948692379424678e-05, "loss": 1.6212, "step": 39864 }, { "epoch": 0.52, "grad_norm": 3.740396022796631, "learning_rate": 1.9486890567537632e-05, "loss": 2.1026, "step": 39865 }, { "epoch": 0.52, "grad_norm": 3.859952449798584, "learning_rate": 1.9486857339780965e-05, "loss": 1.8375, "step": 39866 }, { "epoch": 0.52, "grad_norm": 3.6983273029327393, "learning_rate": 1.948682411097679e-05, "loss": 2.2153, "step": 39867 }, { "epoch": 0.52, "grad_norm": 4.019248962402344, "learning_rate": 1.948679088112511e-05, "loss": 2.2567, "step": 39868 }, { "epoch": 0.52, "grad_norm": 3.616783380508423, "learning_rate": 1.9486757650225926e-05, "loss": 1.5087, "step": 39869 }, { "epoch": 0.52, "grad_norm": 3.1047303676605225, "learning_rate": 1.9486724418279245e-05, "loss": 1.5634, "step": 39870 }, { "epoch": 0.52, "grad_norm": 3.727198839187622, "learning_rate": 1.9486691185285065e-05, "loss": 1.5652, "step": 39871 }, { "epoch": 0.52, "grad_norm": 3.4911110401153564, "learning_rate": 1.9486657951243394e-05, "loss": 1.6704, "step": 39872 }, { "epoch": 0.52, "grad_norm": 3.031768560409546, "learning_rate": 1.9486624716154237e-05, "loss": 1.3723, "step": 39873 }, { "epoch": 0.52, "grad_norm": 3.60526442527771, "learning_rate": 1.9486591480017596e-05, "loss": 1.8635, "step": 39874 }, { "epoch": 0.52, "grad_norm": 3.6859099864959717, "learning_rate": 1.9486558242833476e-05, "loss": 1.951, "step": 39875 }, { "epoch": 0.52, "grad_norm": 3.0252881050109863, "learning_rate": 1.9486525004601875e-05, "loss": 1.4143, "step": 39876 }, { "epoch": 0.52, "grad_norm": 3.866220235824585, "learning_rate": 1.9486491765322802e-05, "loss": 2.2668, "step": 39877 }, { "epoch": 0.52, "grad_norm": 3.8153929710388184, "learning_rate": 1.948645852499626e-05, "loss": 2.1026, "step": 39878 }, { "epoch": 0.52, "grad_norm": 3.4895732402801514, "learning_rate": 1.9486425283622252e-05, "loss": 1.7958, "step": 39879 }, { "epoch": 0.52, "grad_norm": 3.824214220046997, "learning_rate": 1.948639204120078e-05, "loss": 2.4589, "step": 39880 }, { "epoch": 0.52, "grad_norm": 3.9230432510375977, "learning_rate": 1.948635879773185e-05, "loss": 2.1938, "step": 39881 }, { "epoch": 0.52, "grad_norm": 4.482903480529785, "learning_rate": 1.9486325553215465e-05, "loss": 2.4553, "step": 39882 }, { "epoch": 0.52, "grad_norm": 2.9375927448272705, "learning_rate": 1.948629230765163e-05, "loss": 1.3883, "step": 39883 }, { "epoch": 0.52, "grad_norm": 3.5402820110321045, "learning_rate": 1.948625906104035e-05, "loss": 2.1647, "step": 39884 }, { "epoch": 0.52, "grad_norm": 4.159817218780518, "learning_rate": 1.948622581338162e-05, "loss": 2.015, "step": 39885 }, { "epoch": 0.52, "grad_norm": 4.218414783477783, "learning_rate": 1.9486192564675454e-05, "loss": 2.0561, "step": 39886 }, { "epoch": 0.52, "grad_norm": 4.231886386871338, "learning_rate": 1.948615931492185e-05, "loss": 2.1737, "step": 39887 }, { "epoch": 0.52, "grad_norm": 3.66188907623291, "learning_rate": 1.9486126064120815e-05, "loss": 2.1016, "step": 39888 }, { "epoch": 0.52, "grad_norm": 3.830106496810913, "learning_rate": 1.948609281227235e-05, "loss": 1.9111, "step": 39889 }, { "epoch": 0.52, "grad_norm": 3.7606165409088135, "learning_rate": 1.9486059559376456e-05, "loss": 2.0699, "step": 39890 }, { "epoch": 0.52, "grad_norm": 3.739856004714966, "learning_rate": 1.9486026305433142e-05, "loss": 1.7773, "step": 39891 }, { "epoch": 0.52, "grad_norm": 3.925478219985962, "learning_rate": 1.9485993050442408e-05, "loss": 2.0888, "step": 39892 }, { "epoch": 0.52, "grad_norm": 3.991176128387451, "learning_rate": 1.9485959794404263e-05, "loss": 2.0618, "step": 39893 }, { "epoch": 0.52, "grad_norm": 4.105031967163086, "learning_rate": 1.9485926537318708e-05, "loss": 2.2092, "step": 39894 }, { "epoch": 0.52, "grad_norm": 4.152318954467773, "learning_rate": 1.9485893279185742e-05, "loss": 2.0839, "step": 39895 }, { "epoch": 0.52, "grad_norm": 3.316572904586792, "learning_rate": 1.9485860020005373e-05, "loss": 2.0431, "step": 39896 }, { "epoch": 0.52, "grad_norm": 4.015847682952881, "learning_rate": 1.9485826759777607e-05, "loss": 2.1694, "step": 39897 }, { "epoch": 0.52, "grad_norm": 4.280396461486816, "learning_rate": 1.948579349850244e-05, "loss": 2.1046, "step": 39898 }, { "epoch": 0.52, "grad_norm": 3.972505569458008, "learning_rate": 1.9485760236179884e-05, "loss": 2.0752, "step": 39899 }, { "epoch": 0.52, "grad_norm": 3.771956205368042, "learning_rate": 1.948572697280994e-05, "loss": 1.8081, "step": 39900 }, { "epoch": 0.52, "grad_norm": 4.090914726257324, "learning_rate": 1.9485693708392607e-05, "loss": 1.9935, "step": 39901 }, { "epoch": 0.52, "grad_norm": 4.036474227905273, "learning_rate": 1.9485660442927897e-05, "loss": 1.791, "step": 39902 }, { "epoch": 0.52, "grad_norm": 3.4866182804107666, "learning_rate": 1.9485627176415807e-05, "loss": 1.8621, "step": 39903 }, { "epoch": 0.52, "grad_norm": 3.723458766937256, "learning_rate": 1.9485593908856343e-05, "loss": 1.6829, "step": 39904 }, { "epoch": 0.52, "grad_norm": 3.958509922027588, "learning_rate": 1.9485560640249507e-05, "loss": 2.2856, "step": 39905 }, { "epoch": 0.52, "grad_norm": 3.43436598777771, "learning_rate": 1.9485527370595307e-05, "loss": 1.6367, "step": 39906 }, { "epoch": 0.52, "grad_norm": 4.007800102233887, "learning_rate": 1.948549409989374e-05, "loss": 1.8458, "step": 39907 }, { "epoch": 0.52, "grad_norm": 3.4111194610595703, "learning_rate": 1.9485460828144816e-05, "loss": 1.8663, "step": 39908 }, { "epoch": 0.52, "grad_norm": 4.040510654449463, "learning_rate": 1.9485427555348538e-05, "loss": 1.8447, "step": 39909 }, { "epoch": 0.52, "grad_norm": 3.912742853164673, "learning_rate": 1.9485394281504907e-05, "loss": 1.8359, "step": 39910 }, { "epoch": 0.52, "grad_norm": 4.206814289093018, "learning_rate": 1.9485361006613926e-05, "loss": 2.0498, "step": 39911 }, { "epoch": 0.52, "grad_norm": 3.9687013626098633, "learning_rate": 1.94853277306756e-05, "loss": 2.1787, "step": 39912 }, { "epoch": 0.52, "grad_norm": 2.892648220062256, "learning_rate": 1.948529445368993e-05, "loss": 1.4481, "step": 39913 }, { "epoch": 0.52, "grad_norm": 3.804750442504883, "learning_rate": 1.9485261175656928e-05, "loss": 2.1078, "step": 39914 }, { "epoch": 0.52, "grad_norm": 3.5579676628112793, "learning_rate": 1.948522789657659e-05, "loss": 1.7975, "step": 39915 }, { "epoch": 0.52, "grad_norm": 4.62693452835083, "learning_rate": 1.9485194616448923e-05, "loss": 2.3161, "step": 39916 }, { "epoch": 0.52, "grad_norm": 3.3385729789733887, "learning_rate": 1.948516133527393e-05, "loss": 1.928, "step": 39917 }, { "epoch": 0.52, "grad_norm": 3.515136957168579, "learning_rate": 1.9485128053051616e-05, "loss": 1.8668, "step": 39918 }, { "epoch": 0.52, "grad_norm": 3.5846996307373047, "learning_rate": 1.9485094769781975e-05, "loss": 1.7167, "step": 39919 }, { "epoch": 0.52, "grad_norm": 3.988840103149414, "learning_rate": 1.9485061485465025e-05, "loss": 2.1278, "step": 39920 }, { "epoch": 0.52, "grad_norm": 3.049264669418335, "learning_rate": 1.9485028200100763e-05, "loss": 1.3357, "step": 39921 }, { "epoch": 0.52, "grad_norm": 3.9586219787597656, "learning_rate": 1.948499491368919e-05, "loss": 1.966, "step": 39922 }, { "epoch": 0.52, "grad_norm": 3.655909538269043, "learning_rate": 1.9484961626230317e-05, "loss": 1.8018, "step": 39923 }, { "epoch": 0.52, "grad_norm": 4.049917221069336, "learning_rate": 1.948492833772414e-05, "loss": 1.9449, "step": 39924 }, { "epoch": 0.52, "grad_norm": 3.490406036376953, "learning_rate": 1.9484895048170666e-05, "loss": 1.841, "step": 39925 }, { "epoch": 0.52, "grad_norm": 3.792229652404785, "learning_rate": 1.94848617575699e-05, "loss": 2.2864, "step": 39926 }, { "epoch": 0.52, "grad_norm": 4.484694004058838, "learning_rate": 1.9484828465921843e-05, "loss": 2.2734, "step": 39927 }, { "epoch": 0.52, "grad_norm": 4.007136821746826, "learning_rate": 1.94847951732265e-05, "loss": 2.362, "step": 39928 }, { "epoch": 0.52, "grad_norm": 3.424003839492798, "learning_rate": 1.948476187948388e-05, "loss": 1.7066, "step": 39929 }, { "epoch": 0.52, "grad_norm": 4.326014041900635, "learning_rate": 1.9484728584693975e-05, "loss": 2.2892, "step": 39930 }, { "epoch": 0.52, "grad_norm": 3.66538405418396, "learning_rate": 1.9484695288856795e-05, "loss": 1.8519, "step": 39931 }, { "epoch": 0.52, "grad_norm": 3.61529803276062, "learning_rate": 1.9484661991972346e-05, "loss": 1.5235, "step": 39932 }, { "epoch": 0.52, "grad_norm": 3.794795036315918, "learning_rate": 1.9484628694040633e-05, "loss": 1.8822, "step": 39933 }, { "epoch": 0.52, "grad_norm": 3.5459344387054443, "learning_rate": 1.9484595395061648e-05, "loss": 1.9069, "step": 39934 }, { "epoch": 0.52, "grad_norm": 3.9378457069396973, "learning_rate": 1.9484562095035412e-05, "loss": 2.0755, "step": 39935 }, { "epoch": 0.52, "grad_norm": 3.5899269580841064, "learning_rate": 1.948452879396191e-05, "loss": 1.7118, "step": 39936 }, { "epoch": 0.52, "grad_norm": 3.7850353717803955, "learning_rate": 1.9484495491841162e-05, "loss": 1.8761, "step": 39937 }, { "epoch": 0.52, "grad_norm": 3.517683982849121, "learning_rate": 1.9484462188673163e-05, "loss": 1.6851, "step": 39938 }, { "epoch": 0.52, "grad_norm": 3.573793411254883, "learning_rate": 1.9484428884457918e-05, "loss": 1.7854, "step": 39939 }, { "epoch": 0.52, "grad_norm": 3.3816306591033936, "learning_rate": 1.9484395579195433e-05, "loss": 1.6411, "step": 39940 }, { "epoch": 0.52, "grad_norm": 4.089111328125, "learning_rate": 1.9484362272885706e-05, "loss": 1.7721, "step": 39941 }, { "epoch": 0.52, "grad_norm": 3.6488330364227295, "learning_rate": 1.9484328965528746e-05, "loss": 1.8038, "step": 39942 }, { "epoch": 0.52, "grad_norm": 3.884275436401367, "learning_rate": 1.9484295657124558e-05, "loss": 1.745, "step": 39943 }, { "epoch": 0.52, "grad_norm": 3.345268726348877, "learning_rate": 1.948426234767314e-05, "loss": 1.6378, "step": 39944 }, { "epoch": 0.52, "grad_norm": 3.690983533859253, "learning_rate": 1.94842290371745e-05, "loss": 2.0265, "step": 39945 }, { "epoch": 0.52, "grad_norm": 3.6476902961730957, "learning_rate": 1.9484195725628635e-05, "loss": 2.2965, "step": 39946 }, { "epoch": 0.52, "grad_norm": 3.2959020137786865, "learning_rate": 1.9484162413035557e-05, "loss": 1.6465, "step": 39947 }, { "epoch": 0.52, "grad_norm": 3.65297269821167, "learning_rate": 1.948412909939527e-05, "loss": 1.7802, "step": 39948 }, { "epoch": 0.52, "grad_norm": 3.9249322414398193, "learning_rate": 1.948409578470777e-05, "loss": 2.0573, "step": 39949 }, { "epoch": 0.52, "grad_norm": 3.8833422660827637, "learning_rate": 1.9484062468973067e-05, "loss": 1.7711, "step": 39950 }, { "epoch": 0.52, "grad_norm": 3.9234840869903564, "learning_rate": 1.9484029152191164e-05, "loss": 1.9655, "step": 39951 }, { "epoch": 0.52, "grad_norm": 3.7146458625793457, "learning_rate": 1.948399583436206e-05, "loss": 1.9925, "step": 39952 }, { "epoch": 0.52, "grad_norm": 4.272437572479248, "learning_rate": 1.9483962515485767e-05, "loss": 1.8147, "step": 39953 }, { "epoch": 0.52, "grad_norm": 3.693176746368408, "learning_rate": 1.948392919556228e-05, "loss": 2.343, "step": 39954 }, { "epoch": 0.52, "grad_norm": 3.491285800933838, "learning_rate": 1.9483895874591606e-05, "loss": 1.9376, "step": 39955 }, { "epoch": 0.52, "grad_norm": 3.8657267093658447, "learning_rate": 1.948386255257375e-05, "loss": 2.0448, "step": 39956 }, { "epoch": 0.52, "grad_norm": 3.8766705989837646, "learning_rate": 1.9483829229508714e-05, "loss": 2.1118, "step": 39957 }, { "epoch": 0.52, "grad_norm": 4.076425552368164, "learning_rate": 1.9483795905396503e-05, "loss": 2.3496, "step": 39958 }, { "epoch": 0.52, "grad_norm": 3.5154147148132324, "learning_rate": 1.948376258023712e-05, "loss": 1.8622, "step": 39959 }, { "epoch": 0.52, "grad_norm": 3.5603220462799072, "learning_rate": 1.9483729254030565e-05, "loss": 1.6596, "step": 39960 }, { "epoch": 0.52, "grad_norm": 4.861658096313477, "learning_rate": 1.948369592677685e-05, "loss": 2.6062, "step": 39961 }, { "epoch": 0.52, "grad_norm": 3.78658127784729, "learning_rate": 1.9483662598475974e-05, "loss": 2.1676, "step": 39962 }, { "epoch": 0.52, "grad_norm": 3.418492078781128, "learning_rate": 1.9483629269127938e-05, "loss": 1.7066, "step": 39963 }, { "epoch": 0.52, "grad_norm": 4.234675884246826, "learning_rate": 1.948359593873275e-05, "loss": 2.147, "step": 39964 }, { "epoch": 0.52, "grad_norm": 4.016572952270508, "learning_rate": 1.9483562607290413e-05, "loss": 2.2409, "step": 39965 }, { "epoch": 0.52, "grad_norm": 3.946382761001587, "learning_rate": 1.948352927480093e-05, "loss": 1.8836, "step": 39966 }, { "epoch": 0.52, "grad_norm": 3.943279504776001, "learning_rate": 1.9483495941264303e-05, "loss": 2.2418, "step": 39967 }, { "epoch": 0.52, "grad_norm": 3.6726417541503906, "learning_rate": 1.948346260668054e-05, "loss": 1.9674, "step": 39968 }, { "epoch": 0.52, "grad_norm": 3.7952723503112793, "learning_rate": 1.948342927104964e-05, "loss": 1.7109, "step": 39969 }, { "epoch": 0.52, "grad_norm": 3.5531134605407715, "learning_rate": 1.9483395934371606e-05, "loss": 1.8046, "step": 39970 }, { "epoch": 0.52, "grad_norm": 3.5945732593536377, "learning_rate": 1.9483362596646444e-05, "loss": 1.7875, "step": 39971 }, { "epoch": 0.52, "grad_norm": 3.7002146244049072, "learning_rate": 1.9483329257874163e-05, "loss": 2.1156, "step": 39972 }, { "epoch": 0.52, "grad_norm": 4.013640403747559, "learning_rate": 1.9483295918054756e-05, "loss": 1.8156, "step": 39973 }, { "epoch": 0.52, "grad_norm": 3.2536234855651855, "learning_rate": 1.9483262577188237e-05, "loss": 1.6015, "step": 39974 }, { "epoch": 0.52, "grad_norm": 3.4148128032684326, "learning_rate": 1.94832292352746e-05, "loss": 1.8303, "step": 39975 }, { "epoch": 0.52, "grad_norm": 3.8400862216949463, "learning_rate": 1.9483195892313858e-05, "loss": 1.752, "step": 39976 }, { "epoch": 0.52, "grad_norm": 3.740604877471924, "learning_rate": 1.948316254830601e-05, "loss": 1.8459, "step": 39977 }, { "epoch": 0.52, "grad_norm": 3.839611291885376, "learning_rate": 1.9483129203251057e-05, "loss": 2.2291, "step": 39978 }, { "epoch": 0.52, "grad_norm": 3.365889072418213, "learning_rate": 1.9483095857149007e-05, "loss": 1.8031, "step": 39979 }, { "epoch": 0.52, "grad_norm": 3.5727243423461914, "learning_rate": 1.9483062509999865e-05, "loss": 1.7346, "step": 39980 }, { "epoch": 0.52, "grad_norm": 3.9680228233337402, "learning_rate": 1.9483029161803627e-05, "loss": 1.7881, "step": 39981 }, { "epoch": 0.52, "grad_norm": 3.6603338718414307, "learning_rate": 1.9482995812560305e-05, "loss": 2.0616, "step": 39982 }, { "epoch": 0.52, "grad_norm": 3.544642686843872, "learning_rate": 1.94829624622699e-05, "loss": 1.7491, "step": 39983 }, { "epoch": 0.52, "grad_norm": 3.9077041149139404, "learning_rate": 1.9482929110932414e-05, "loss": 1.9837, "step": 39984 }, { "epoch": 0.52, "grad_norm": 4.139596939086914, "learning_rate": 1.948289575854785e-05, "loss": 2.1932, "step": 39985 }, { "epoch": 0.52, "grad_norm": 3.6224372386932373, "learning_rate": 1.9482862405116213e-05, "loss": 1.8429, "step": 39986 }, { "epoch": 0.52, "grad_norm": 3.9580185413360596, "learning_rate": 1.9482829050637512e-05, "loss": 2.1286, "step": 39987 }, { "epoch": 0.52, "grad_norm": 4.280725002288818, "learning_rate": 1.9482795695111742e-05, "loss": 1.7345, "step": 39988 }, { "epoch": 0.52, "grad_norm": 3.439929962158203, "learning_rate": 1.948276233853891e-05, "loss": 1.6483, "step": 39989 }, { "epoch": 0.52, "grad_norm": 3.4311721324920654, "learning_rate": 1.9482728980919023e-05, "loss": 1.7923, "step": 39990 }, { "epoch": 0.52, "grad_norm": 3.042274236679077, "learning_rate": 1.9482695622252082e-05, "loss": 1.4108, "step": 39991 }, { "epoch": 0.52, "grad_norm": 3.5565054416656494, "learning_rate": 1.9482662262538087e-05, "loss": 1.9626, "step": 39992 }, { "epoch": 0.52, "grad_norm": 3.5569725036621094, "learning_rate": 1.9482628901777048e-05, "loss": 1.7263, "step": 39993 }, { "epoch": 0.52, "grad_norm": 3.972769260406494, "learning_rate": 1.9482595539968964e-05, "loss": 2.1976, "step": 39994 }, { "epoch": 0.52, "grad_norm": 3.8760595321655273, "learning_rate": 1.9482562177113842e-05, "loss": 2.089, "step": 39995 }, { "epoch": 0.52, "grad_norm": 4.166469573974609, "learning_rate": 1.9482528813211682e-05, "loss": 2.1254, "step": 39996 }, { "epoch": 0.52, "grad_norm": 3.9814870357513428, "learning_rate": 1.9482495448262492e-05, "loss": 2.0487, "step": 39997 }, { "epoch": 0.52, "grad_norm": 3.864530086517334, "learning_rate": 1.9482462082266274e-05, "loss": 2.7288, "step": 39998 }, { "epoch": 0.52, "grad_norm": 3.978929281234741, "learning_rate": 1.948242871522303e-05, "loss": 2.1042, "step": 39999 }, { "epoch": 0.52, "grad_norm": 3.35123348236084, "learning_rate": 1.948239534713277e-05, "loss": 1.7245, "step": 40000 } ], "logging_steps": 1.0, "max_steps": 308220, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 10000, "total_flos": 1.4427876252778496e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }