Instructions to use Alan96/ACoRN_Flan-t5-large-triviaQA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Alan96/ACoRN_Flan-t5-large-triviaQA with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("Alan96/ACoRN_Flan-t5-large-triviaQA") model = AutoModelForMultimodalLM.from_pretrained("Alan96/ACoRN_Flan-t5-large-triviaQA") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.6242377758026123, | |
| "best_model_checkpoint": "./model/google/flan-t5-large-train_r_aug-tqa/checkpoint-12000", | |
| "epoch": 1.9913707268503154, | |
| "eval_steps": 1000, | |
| "global_step": 12000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00016594756057085962, | |
| "grad_norm": 4.900241374969482, | |
| "learning_rate": 4.9997234207323826e-05, | |
| "loss": 1.1171, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001659475605708596, | |
| "grad_norm": 3.7945871353149414, | |
| "learning_rate": 4.997234207323819e-05, | |
| "loss": 1.1217, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003318951211417192, | |
| "grad_norm": 3.418729543685913, | |
| "learning_rate": 4.9944684146476384e-05, | |
| "loss": 1.0061, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004978426817125788, | |
| "grad_norm": 20.93488311767578, | |
| "learning_rate": 4.9917026219714574e-05, | |
| "loss": 0.952, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.006637902422834384, | |
| "grad_norm": 4.321225643157959, | |
| "learning_rate": 4.9889368292952765e-05, | |
| "loss": 0.984, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.00829737802854298, | |
| "grad_norm": 3.9566705226898193, | |
| "learning_rate": 4.986171036619095e-05, | |
| "loss": 0.8963, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009956853634251576, | |
| "grad_norm": 3.582169771194458, | |
| "learning_rate": 4.983405243942914e-05, | |
| "loss": 0.962, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.011616329239960173, | |
| "grad_norm": 3.904132127761841, | |
| "learning_rate": 4.980639451266733e-05, | |
| "loss": 0.9226, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.013275804845668768, | |
| "grad_norm": 3.31311297416687, | |
| "learning_rate": 4.977873658590552e-05, | |
| "loss": 0.8423, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.014935280451377365, | |
| "grad_norm": 3.5518760681152344, | |
| "learning_rate": 4.975107865914371e-05, | |
| "loss": 0.9015, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.01659475605708596, | |
| "grad_norm": 3.8338520526885986, | |
| "learning_rate": 4.97234207323819e-05, | |
| "loss": 0.8883, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.018254231662794558, | |
| "grad_norm": 4.829561233520508, | |
| "learning_rate": 4.969576280562009e-05, | |
| "loss": 0.8755, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.019913707268503153, | |
| "grad_norm": 3.4585771560668945, | |
| "learning_rate": 4.966810487885828e-05, | |
| "loss": 0.8807, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.021573182874211748, | |
| "grad_norm": 4.243607997894287, | |
| "learning_rate": 4.9640446952096474e-05, | |
| "loss": 0.9079, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.023232658479920346, | |
| "grad_norm": 2.565274238586426, | |
| "learning_rate": 4.961278902533466e-05, | |
| "loss": 0.8284, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.02489213408562894, | |
| "grad_norm": 3.1821136474609375, | |
| "learning_rate": 4.958513109857285e-05, | |
| "loss": 0.9609, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.026551609691337536, | |
| "grad_norm": 3.768364667892456, | |
| "learning_rate": 4.955747317181104e-05, | |
| "loss": 0.8117, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.028211085297046135, | |
| "grad_norm": 3.528536558151245, | |
| "learning_rate": 4.952981524504923e-05, | |
| "loss": 0.8547, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.02987056090275473, | |
| "grad_norm": 3.6502795219421387, | |
| "learning_rate": 4.950215731828742e-05, | |
| "loss": 0.8715, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.031530036508463324, | |
| "grad_norm": 3.962785482406616, | |
| "learning_rate": 4.947449939152561e-05, | |
| "loss": 0.862, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03318951211417192, | |
| "grad_norm": 3.7627573013305664, | |
| "learning_rate": 4.94468414647638e-05, | |
| "loss": 0.9098, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03484898771988052, | |
| "grad_norm": 2.818209409713745, | |
| "learning_rate": 4.941918353800199e-05, | |
| "loss": 0.8594, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.036508463325589116, | |
| "grad_norm": 2.6864259243011475, | |
| "learning_rate": 4.939152561124018e-05, | |
| "loss": 0.8562, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03816793893129771, | |
| "grad_norm": 3.5816614627838135, | |
| "learning_rate": 4.936386768447838e-05, | |
| "loss": 0.8762, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.039827414537006306, | |
| "grad_norm": 3.709526777267456, | |
| "learning_rate": 4.9336209757716564e-05, | |
| "loss": 0.8121, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.0414868901427149, | |
| "grad_norm": 3.305800676345825, | |
| "learning_rate": 4.9308551830954754e-05, | |
| "loss": 0.8347, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.043146365748423496, | |
| "grad_norm": 3.0622973442077637, | |
| "learning_rate": 4.9280893904192945e-05, | |
| "loss": 0.8788, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0448058413541321, | |
| "grad_norm": 3.436626434326172, | |
| "learning_rate": 4.9253235977431136e-05, | |
| "loss": 0.881, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04646531695984069, | |
| "grad_norm": 3.1859350204467773, | |
| "learning_rate": 4.9225578050669326e-05, | |
| "loss": 0.8458, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04812479256554929, | |
| "grad_norm": 4.285528659820557, | |
| "learning_rate": 4.919792012390752e-05, | |
| "loss": 0.8636, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04978426817125788, | |
| "grad_norm": 3.98508882522583, | |
| "learning_rate": 4.917026219714571e-05, | |
| "loss": 0.8134, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05144374377696648, | |
| "grad_norm": 3.6300930976867676, | |
| "learning_rate": 4.91426042703839e-05, | |
| "loss": 0.7973, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.05310321938267507, | |
| "grad_norm": 3.743924140930176, | |
| "learning_rate": 4.911494634362209e-05, | |
| "loss": 0.8277, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.054762694988383674, | |
| "grad_norm": 2.5988316535949707, | |
| "learning_rate": 4.908728841686027e-05, | |
| "loss": 0.8533, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05642217059409227, | |
| "grad_norm": 3.569610357284546, | |
| "learning_rate": 4.905963049009846e-05, | |
| "loss": 0.8456, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.058081646199800864, | |
| "grad_norm": 2.9307737350463867, | |
| "learning_rate": 4.9031972563336654e-05, | |
| "loss": 0.7879, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.05974112180550946, | |
| "grad_norm": 3.5210940837860107, | |
| "learning_rate": 4.9004314636574844e-05, | |
| "loss": 0.8394, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.061400597411218054, | |
| "grad_norm": 2.749647617340088, | |
| "learning_rate": 4.8976656709813035e-05, | |
| "loss": 0.795, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.06306007301692665, | |
| "grad_norm": 4.256681442260742, | |
| "learning_rate": 4.8948998783051226e-05, | |
| "loss": 0.8333, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.06471954862263525, | |
| "grad_norm": 3.1975724697113037, | |
| "learning_rate": 4.8921340856289416e-05, | |
| "loss": 0.8099, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.06637902422834384, | |
| "grad_norm": 3.0923843383789062, | |
| "learning_rate": 4.889368292952761e-05, | |
| "loss": 0.8211, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06803849983405244, | |
| "grad_norm": 2.9573616981506348, | |
| "learning_rate": 4.88660250027658e-05, | |
| "loss": 0.8438, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06969797543976104, | |
| "grad_norm": 3.519888162612915, | |
| "learning_rate": 4.883836707600398e-05, | |
| "loss": 0.7978, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07135745104546963, | |
| "grad_norm": 4.7146196365356445, | |
| "learning_rate": 4.881070914924217e-05, | |
| "loss": 0.7877, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07301692665117823, | |
| "grad_norm": 2.54521107673645, | |
| "learning_rate": 4.878305122248036e-05, | |
| "loss": 0.7914, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.07467640225688682, | |
| "grad_norm": 3.443538188934326, | |
| "learning_rate": 4.875539329571855e-05, | |
| "loss": 0.875, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07633587786259542, | |
| "grad_norm": 3.5744690895080566, | |
| "learning_rate": 4.8727735368956744e-05, | |
| "loss": 0.8347, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.07799535346830401, | |
| "grad_norm": 3.129127025604248, | |
| "learning_rate": 4.8700077442194934e-05, | |
| "loss": 0.88, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07965482907401261, | |
| "grad_norm": 3.40054988861084, | |
| "learning_rate": 4.8672419515433125e-05, | |
| "loss": 0.8377, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08131430467972121, | |
| "grad_norm": 4.048141956329346, | |
| "learning_rate": 4.8644761588671316e-05, | |
| "loss": 0.8229, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.0829737802854298, | |
| "grad_norm": 3.28328537940979, | |
| "learning_rate": 4.8617103661909506e-05, | |
| "loss": 0.846, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0846332558911384, | |
| "grad_norm": 3.936415672302246, | |
| "learning_rate": 4.85894457351477e-05, | |
| "loss": 0.8217, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08629273149684699, | |
| "grad_norm": 3.3447494506835938, | |
| "learning_rate": 4.856178780838588e-05, | |
| "loss": 0.8553, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.0879522071025556, | |
| "grad_norm": 3.550673007965088, | |
| "learning_rate": 4.853412988162407e-05, | |
| "loss": 0.8646, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0896116827082642, | |
| "grad_norm": 2.695237398147583, | |
| "learning_rate": 4.850647195486226e-05, | |
| "loss": 0.7649, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.09127115831397278, | |
| "grad_norm": 2.307586193084717, | |
| "learning_rate": 4.847881402810045e-05, | |
| "loss": 0.8461, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.09293063391968139, | |
| "grad_norm": 3.99825119972229, | |
| "learning_rate": 4.845115610133864e-05, | |
| "loss": 0.8614, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.09459010952538997, | |
| "grad_norm": 2.767484426498413, | |
| "learning_rate": 4.8423498174576834e-05, | |
| "loss": 0.8256, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.09624958513109858, | |
| "grad_norm": 3.375134229660034, | |
| "learning_rate": 4.8395840247815024e-05, | |
| "loss": 0.8062, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.09790906073680716, | |
| "grad_norm": 3.587320327758789, | |
| "learning_rate": 4.8368182321053215e-05, | |
| "loss": 0.7861, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.09956853634251576, | |
| "grad_norm": 3.553729772567749, | |
| "learning_rate": 4.8340524394291406e-05, | |
| "loss": 0.886, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.10122801194822437, | |
| "grad_norm": 3.4651665687561035, | |
| "learning_rate": 4.8312866467529596e-05, | |
| "loss": 0.7524, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.10288748755393295, | |
| "grad_norm": 2.79295015335083, | |
| "learning_rate": 4.828520854076779e-05, | |
| "loss": 0.7185, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.10454696315964156, | |
| "grad_norm": 3.275655508041382, | |
| "learning_rate": 4.825755061400598e-05, | |
| "loss": 0.8519, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.10620643876535014, | |
| "grad_norm": 3.79915714263916, | |
| "learning_rate": 4.822989268724417e-05, | |
| "loss": 0.8619, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.10786591437105875, | |
| "grad_norm": 3.0836708545684814, | |
| "learning_rate": 4.820223476048236e-05, | |
| "loss": 0.8856, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.10952538997676735, | |
| "grad_norm": 3.225219488143921, | |
| "learning_rate": 4.817457683372055e-05, | |
| "loss": 0.8831, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.11118486558247594, | |
| "grad_norm": 2.489872932434082, | |
| "learning_rate": 4.814691890695874e-05, | |
| "loss": 0.8049, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.11284434118818454, | |
| "grad_norm": 3.352848768234253, | |
| "learning_rate": 4.811926098019693e-05, | |
| "loss": 0.8649, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.11450381679389313, | |
| "grad_norm": 5.773054122924805, | |
| "learning_rate": 4.809160305343512e-05, | |
| "loss": 0.7939, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.11616329239960173, | |
| "grad_norm": 3.3380932807922363, | |
| "learning_rate": 4.8063945126673305e-05, | |
| "loss": 0.8273, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.11782276800531032, | |
| "grad_norm": 3.8309950828552246, | |
| "learning_rate": 4.8036287199911496e-05, | |
| "loss": 0.9265, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.11948224361101892, | |
| "grad_norm": 3.8041698932647705, | |
| "learning_rate": 4.8008629273149686e-05, | |
| "loss": 0.8258, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.12114171921672752, | |
| "grad_norm": 3.6036689281463623, | |
| "learning_rate": 4.798097134638788e-05, | |
| "loss": 0.7618, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.12280119482243611, | |
| "grad_norm": 3.5238475799560547, | |
| "learning_rate": 4.795331341962607e-05, | |
| "loss": 0.7538, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.12446067042814471, | |
| "grad_norm": 2.8986926078796387, | |
| "learning_rate": 4.792565549286426e-05, | |
| "loss": 0.7851, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1261201460338533, | |
| "grad_norm": 3.8696155548095703, | |
| "learning_rate": 4.789799756610245e-05, | |
| "loss": 0.8544, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.12777962163956189, | |
| "grad_norm": 3.1447415351867676, | |
| "learning_rate": 4.787033963934064e-05, | |
| "loss": 0.7631, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.1294390972452705, | |
| "grad_norm": 3.2269225120544434, | |
| "learning_rate": 4.784268171257883e-05, | |
| "loss": 0.8603, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1310985728509791, | |
| "grad_norm": 3.555079698562622, | |
| "learning_rate": 4.7815023785817014e-05, | |
| "loss": 0.8047, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.13275804845668768, | |
| "grad_norm": 4.38774299621582, | |
| "learning_rate": 4.7787365859055205e-05, | |
| "loss": 0.8193, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1344175240623963, | |
| "grad_norm": 2.849234104156494, | |
| "learning_rate": 4.7759707932293395e-05, | |
| "loss": 0.8236, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.13607699966810488, | |
| "grad_norm": 3.4063913822174072, | |
| "learning_rate": 4.7732050005531586e-05, | |
| "loss": 0.7542, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.13773647527381347, | |
| "grad_norm": 4.454982280731201, | |
| "learning_rate": 4.7704392078769776e-05, | |
| "loss": 0.8126, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.13939595087952208, | |
| "grad_norm": 3.7919139862060547, | |
| "learning_rate": 4.767673415200797e-05, | |
| "loss": 0.8232, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.14105542648523067, | |
| "grad_norm": 2.609391927719116, | |
| "learning_rate": 4.764907622524616e-05, | |
| "loss": 0.748, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.14271490209093926, | |
| "grad_norm": 2.9120664596557617, | |
| "learning_rate": 4.762141829848435e-05, | |
| "loss": 0.8619, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.14437437769664785, | |
| "grad_norm": 3.4429476261138916, | |
| "learning_rate": 4.759376037172254e-05, | |
| "loss": 0.7827, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.14603385330235646, | |
| "grad_norm": 3.6868903636932373, | |
| "learning_rate": 4.756610244496073e-05, | |
| "loss": 0.8245, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.14769332890806505, | |
| "grad_norm": 3.6222927570343018, | |
| "learning_rate": 4.753844451819891e-05, | |
| "loss": 0.7452, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.14935280451377364, | |
| "grad_norm": 4.014353275299072, | |
| "learning_rate": 4.7510786591437104e-05, | |
| "loss": 0.8375, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.15101228011948226, | |
| "grad_norm": 2.4085919857025146, | |
| "learning_rate": 4.7483128664675295e-05, | |
| "loss": 0.8016, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.15267175572519084, | |
| "grad_norm": 2.73573637008667, | |
| "learning_rate": 4.7455470737913485e-05, | |
| "loss": 0.8543, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.15433123133089943, | |
| "grad_norm": 3.7764389514923096, | |
| "learning_rate": 4.7427812811151676e-05, | |
| "loss": 0.8465, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.15599070693660802, | |
| "grad_norm": 3.0908584594726562, | |
| "learning_rate": 4.7400154884389866e-05, | |
| "loss": 0.849, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.15765018254231664, | |
| "grad_norm": 2.892361640930176, | |
| "learning_rate": 4.737249695762806e-05, | |
| "loss": 0.7693, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.15930965814802522, | |
| "grad_norm": 3.766294479370117, | |
| "learning_rate": 4.734483903086625e-05, | |
| "loss": 0.845, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.1609691337537338, | |
| "grad_norm": 3.2067556381225586, | |
| "learning_rate": 4.731718110410444e-05, | |
| "loss": 0.8432, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.16262860935944243, | |
| "grad_norm": 3.325576066970825, | |
| "learning_rate": 4.728952317734263e-05, | |
| "loss": 0.7712, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.16428808496515102, | |
| "grad_norm": 2.6038808822631836, | |
| "learning_rate": 4.726186525058082e-05, | |
| "loss": 0.758, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.1659475605708596, | |
| "grad_norm": 3.717463254928589, | |
| "learning_rate": 4.723420732381901e-05, | |
| "loss": 0.8038, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1659475605708596, | |
| "eval_gen_len": 44.25406626506024, | |
| "eval_loss": 0.6970572471618652, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1355.4037, | |
| "eval_samples_per_second": 4.89, | |
| "eval_steps_per_second": 0.306, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1676070361765682, | |
| "grad_norm": 2.7978012561798096, | |
| "learning_rate": 4.72065493970572e-05, | |
| "loss": 0.8413, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.1692665117822768, | |
| "grad_norm": 3.021860361099243, | |
| "learning_rate": 4.717889147029539e-05, | |
| "loss": 0.7488, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.1709259873879854, | |
| "grad_norm": 3.307393789291382, | |
| "learning_rate": 4.715123354353358e-05, | |
| "loss": 0.7884, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.17258546299369398, | |
| "grad_norm": 4.444802761077881, | |
| "learning_rate": 4.712357561677177e-05, | |
| "loss": 0.7777, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.1742449385994026, | |
| "grad_norm": 2.8152804374694824, | |
| "learning_rate": 4.709591769000996e-05, | |
| "loss": 0.8055, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1759044142051112, | |
| "grad_norm": 2.854592800140381, | |
| "learning_rate": 4.7068259763248154e-05, | |
| "loss": 0.778, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.17756388981081977, | |
| "grad_norm": 3.072824716567993, | |
| "learning_rate": 4.704060183648634e-05, | |
| "loss": 0.776, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1792233654165284, | |
| "grad_norm": 3.6928513050079346, | |
| "learning_rate": 4.701294390972453e-05, | |
| "loss": 0.7938, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.18088284102223698, | |
| "grad_norm": 2.9358620643615723, | |
| "learning_rate": 4.698528598296272e-05, | |
| "loss": 0.7847, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.18254231662794557, | |
| "grad_norm": 2.9071340560913086, | |
| "learning_rate": 4.695762805620091e-05, | |
| "loss": 0.7455, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.18420179223365415, | |
| "grad_norm": 3.4249751567840576, | |
| "learning_rate": 4.69299701294391e-05, | |
| "loss": 0.8142, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.18586126783936277, | |
| "grad_norm": 3.0051093101501465, | |
| "learning_rate": 4.690231220267729e-05, | |
| "loss": 0.7925, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.18752074344507136, | |
| "grad_norm": 2.9879422187805176, | |
| "learning_rate": 4.687465427591548e-05, | |
| "loss": 0.728, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.18918021905077995, | |
| "grad_norm": 3.4878718852996826, | |
| "learning_rate": 4.684699634915367e-05, | |
| "loss": 0.7735, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.19083969465648856, | |
| "grad_norm": 3.447152853012085, | |
| "learning_rate": 4.681933842239186e-05, | |
| "loss": 0.7562, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.19249917026219715, | |
| "grad_norm": 3.2192983627319336, | |
| "learning_rate": 4.679168049563005e-05, | |
| "loss": 0.8128, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.19415864586790574, | |
| "grad_norm": 3.2137930393218994, | |
| "learning_rate": 4.676402256886824e-05, | |
| "loss": 0.8402, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.19581812147361433, | |
| "grad_norm": 2.711993455886841, | |
| "learning_rate": 4.673636464210643e-05, | |
| "loss": 0.7724, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.19747759707932294, | |
| "grad_norm": 2.9814987182617188, | |
| "learning_rate": 4.670870671534462e-05, | |
| "loss": 0.7809, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.19913707268503153, | |
| "grad_norm": 3.1554079055786133, | |
| "learning_rate": 4.668104878858281e-05, | |
| "loss": 0.756, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.20079654829074012, | |
| "grad_norm": 3.341683864593506, | |
| "learning_rate": 4.6653390861821e-05, | |
| "loss": 0.8088, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.20245602389644873, | |
| "grad_norm": 2.8297119140625, | |
| "learning_rate": 4.662573293505919e-05, | |
| "loss": 0.8249, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.20411549950215732, | |
| "grad_norm": 3.3890442848205566, | |
| "learning_rate": 4.659807500829738e-05, | |
| "loss": 0.8123, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.2057749751078659, | |
| "grad_norm": 3.256871223449707, | |
| "learning_rate": 4.657041708153557e-05, | |
| "loss": 0.8309, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2074344507135745, | |
| "grad_norm": 3.438433885574341, | |
| "learning_rate": 4.654275915477376e-05, | |
| "loss": 0.8109, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2090939263192831, | |
| "grad_norm": 3.2116994857788086, | |
| "learning_rate": 4.6515101228011946e-05, | |
| "loss": 0.6768, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.2107534019249917, | |
| "grad_norm": 2.6069250106811523, | |
| "learning_rate": 4.6487443301250137e-05, | |
| "loss": 0.7702, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.2124128775307003, | |
| "grad_norm": 3.114304304122925, | |
| "learning_rate": 4.645978537448833e-05, | |
| "loss": 0.7797, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.2140723531364089, | |
| "grad_norm": 2.907708168029785, | |
| "learning_rate": 4.643212744772652e-05, | |
| "loss": 0.7823, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2157318287421175, | |
| "grad_norm": 3.989586353302002, | |
| "learning_rate": 4.640446952096471e-05, | |
| "loss": 0.7759, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.21739130434782608, | |
| "grad_norm": 3.164072275161743, | |
| "learning_rate": 4.63768115942029e-05, | |
| "loss": 0.7979, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.2190507799535347, | |
| "grad_norm": 3.060279607772827, | |
| "learning_rate": 4.634915366744109e-05, | |
| "loss": 0.7863, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.22071025555924328, | |
| "grad_norm": 3.268155336380005, | |
| "learning_rate": 4.632149574067928e-05, | |
| "loss": 0.8434, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.22236973116495187, | |
| "grad_norm": 2.992119550704956, | |
| "learning_rate": 4.629383781391747e-05, | |
| "loss": 0.7481, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.22402920677066046, | |
| "grad_norm": 2.692070722579956, | |
| "learning_rate": 4.626617988715566e-05, | |
| "loss": 0.7951, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.22568868237636908, | |
| "grad_norm": 3.0484812259674072, | |
| "learning_rate": 4.623852196039385e-05, | |
| "loss": 0.8139, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.22734815798207766, | |
| "grad_norm": 3.799321174621582, | |
| "learning_rate": 4.621086403363204e-05, | |
| "loss": 0.8022, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.22900763358778625, | |
| "grad_norm": 3.715362548828125, | |
| "learning_rate": 4.618320610687023e-05, | |
| "loss": 0.7153, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.23066710919349487, | |
| "grad_norm": 3.7485156059265137, | |
| "learning_rate": 4.6155548180108424e-05, | |
| "loss": 0.8893, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.23232658479920346, | |
| "grad_norm": 3.6852569580078125, | |
| "learning_rate": 4.6127890253346615e-05, | |
| "loss": 0.7838, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.23398606040491204, | |
| "grad_norm": 3.174116611480713, | |
| "learning_rate": 4.6100232326584805e-05, | |
| "loss": 0.7797, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.23564553601062063, | |
| "grad_norm": 4.355712890625, | |
| "learning_rate": 4.6072574399822996e-05, | |
| "loss": 0.8305, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.23730501161632925, | |
| "grad_norm": 2.906917095184326, | |
| "learning_rate": 4.6044916473061186e-05, | |
| "loss": 0.8066, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.23896448722203784, | |
| "grad_norm": 2.5050249099731445, | |
| "learning_rate": 4.601725854629937e-05, | |
| "loss": 0.8062, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.24062396282774642, | |
| "grad_norm": 3.4250411987304688, | |
| "learning_rate": 4.598960061953756e-05, | |
| "loss": 0.7336, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.24228343843345504, | |
| "grad_norm": 2.7324156761169434, | |
| "learning_rate": 4.596194269277575e-05, | |
| "loss": 0.7701, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.24394291403916363, | |
| "grad_norm": 2.688563585281372, | |
| "learning_rate": 4.593428476601394e-05, | |
| "loss": 0.7636, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.24560238964487222, | |
| "grad_norm": 3.027071952819824, | |
| "learning_rate": 4.590662683925213e-05, | |
| "loss": 0.7496, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2472618652505808, | |
| "grad_norm": 2.9742684364318848, | |
| "learning_rate": 4.587896891249032e-05, | |
| "loss": 0.8015, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.24892134085628942, | |
| "grad_norm": 3.729691982269287, | |
| "learning_rate": 4.5851310985728514e-05, | |
| "loss": 0.8203, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.25058081646199803, | |
| "grad_norm": 3.2418553829193115, | |
| "learning_rate": 4.5823653058966705e-05, | |
| "loss": 0.8219, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.2522402920677066, | |
| "grad_norm": 2.6074585914611816, | |
| "learning_rate": 4.5795995132204895e-05, | |
| "loss": 0.6838, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.2538997676734152, | |
| "grad_norm": 3.124091148376465, | |
| "learning_rate": 4.5768337205443086e-05, | |
| "loss": 0.7972, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.25555924327912377, | |
| "grad_norm": 3.9640090465545654, | |
| "learning_rate": 4.574067927868127e-05, | |
| "loss": 0.7879, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.2572187188848324, | |
| "grad_norm": 4.680671215057373, | |
| "learning_rate": 4.571302135191946e-05, | |
| "loss": 0.7252, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.258878194490541, | |
| "grad_norm": 4.106893539428711, | |
| "learning_rate": 4.568536342515765e-05, | |
| "loss": 0.8023, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.26053767009624956, | |
| "grad_norm": 3.206587314605713, | |
| "learning_rate": 4.565770549839584e-05, | |
| "loss": 0.8094, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.2621971457019582, | |
| "grad_norm": 3.4632327556610107, | |
| "learning_rate": 4.563004757163403e-05, | |
| "loss": 0.7602, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.2638566213076668, | |
| "grad_norm": 2.723336935043335, | |
| "learning_rate": 4.560238964487222e-05, | |
| "loss": 0.7412, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.26551609691337535, | |
| "grad_norm": 2.829049587249756, | |
| "learning_rate": 4.5574731718110413e-05, | |
| "loss": 0.7328, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.26717557251908397, | |
| "grad_norm": 2.7743582725524902, | |
| "learning_rate": 4.5547073791348604e-05, | |
| "loss": 0.854, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2688350481247926, | |
| "grad_norm": 2.7201788425445557, | |
| "learning_rate": 4.5519415864586795e-05, | |
| "loss": 0.8023, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.27049452373050115, | |
| "grad_norm": 2.8764491081237793, | |
| "learning_rate": 4.549175793782498e-05, | |
| "loss": 0.8004, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.27215399933620976, | |
| "grad_norm": 2.746384859085083, | |
| "learning_rate": 4.546410001106317e-05, | |
| "loss": 0.7538, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.2738134749419184, | |
| "grad_norm": 3.606780529022217, | |
| "learning_rate": 4.543644208430136e-05, | |
| "loss": 0.796, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.27547295054762694, | |
| "grad_norm": 2.4817562103271484, | |
| "learning_rate": 4.540878415753955e-05, | |
| "loss": 0.8017, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.27713242615333555, | |
| "grad_norm": 3.016995668411255, | |
| "learning_rate": 4.538112623077774e-05, | |
| "loss": 0.833, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.27879190175904417, | |
| "grad_norm": 2.847045421600342, | |
| "learning_rate": 4.535346830401593e-05, | |
| "loss": 0.7387, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.28045137736475273, | |
| "grad_norm": 3.473771333694458, | |
| "learning_rate": 4.532581037725412e-05, | |
| "loss": 0.8778, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.28211085297046135, | |
| "grad_norm": 3.311330795288086, | |
| "learning_rate": 4.529815245049231e-05, | |
| "loss": 0.7255, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2837703285761699, | |
| "grad_norm": 2.5803961753845215, | |
| "learning_rate": 4.5270494523730503e-05, | |
| "loss": 0.8126, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.2854298041818785, | |
| "grad_norm": 3.2069246768951416, | |
| "learning_rate": 4.5242836596968694e-05, | |
| "loss": 0.7845, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.28708927978758714, | |
| "grad_norm": 2.9690170288085938, | |
| "learning_rate": 4.5215178670206885e-05, | |
| "loss": 0.7635, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2887487553932957, | |
| "grad_norm": 2.6883442401885986, | |
| "learning_rate": 4.5187520743445075e-05, | |
| "loss": 0.7325, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2904082309990043, | |
| "grad_norm": 2.671856641769409, | |
| "learning_rate": 4.5159862816683266e-05, | |
| "loss": 0.7755, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.29206770660471293, | |
| "grad_norm": 2.4875473976135254, | |
| "learning_rate": 4.5132204889921457e-05, | |
| "loss": 0.798, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2937271822104215, | |
| "grad_norm": 3.223682165145874, | |
| "learning_rate": 4.510454696315965e-05, | |
| "loss": 0.813, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2953866578161301, | |
| "grad_norm": 2.2691056728363037, | |
| "learning_rate": 4.507688903639784e-05, | |
| "loss": 0.805, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.2970461334218387, | |
| "grad_norm": 3.5715551376342773, | |
| "learning_rate": 4.504923110963603e-05, | |
| "loss": 0.7586, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.2987056090275473, | |
| "grad_norm": 2.625098466873169, | |
| "learning_rate": 4.502157318287422e-05, | |
| "loss": 0.8703, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3003650846332559, | |
| "grad_norm": 3.1126928329467773, | |
| "learning_rate": 4.499391525611241e-05, | |
| "loss": 0.7879, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.3020245602389645, | |
| "grad_norm": 3.1483154296875, | |
| "learning_rate": 4.4966257329350593e-05, | |
| "loss": 0.7849, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.3036840358446731, | |
| "grad_norm": 2.979381799697876, | |
| "learning_rate": 4.4938599402588784e-05, | |
| "loss": 0.7649, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.3053435114503817, | |
| "grad_norm": 3.920473098754883, | |
| "learning_rate": 4.4910941475826975e-05, | |
| "loss": 0.7568, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.30700298705609025, | |
| "grad_norm": 2.9425113201141357, | |
| "learning_rate": 4.4883283549065165e-05, | |
| "loss": 0.7519, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.30866246266179886, | |
| "grad_norm": 3.8821983337402344, | |
| "learning_rate": 4.4855625622303356e-05, | |
| "loss": 0.8094, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.3103219382675075, | |
| "grad_norm": 2.6359121799468994, | |
| "learning_rate": 4.4827967695541547e-05, | |
| "loss": 0.8354, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.31198141387321604, | |
| "grad_norm": 2.5459086894989014, | |
| "learning_rate": 4.480030976877974e-05, | |
| "loss": 0.7459, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.31364088947892466, | |
| "grad_norm": 2.7496984004974365, | |
| "learning_rate": 4.477265184201793e-05, | |
| "loss": 0.7553, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.31530036508463327, | |
| "grad_norm": 6.382673740386963, | |
| "learning_rate": 4.474499391525612e-05, | |
| "loss": 0.8221, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.31695984069034183, | |
| "grad_norm": 2.9733335971832275, | |
| "learning_rate": 4.47173359884943e-05, | |
| "loss": 0.8175, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.31861931629605045, | |
| "grad_norm": 2.1865618228912354, | |
| "learning_rate": 4.468967806173249e-05, | |
| "loss": 0.7811, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.32027879190175906, | |
| "grad_norm": 6.8357648849487305, | |
| "learning_rate": 4.4662020134970684e-05, | |
| "loss": 0.6994, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.3219382675074676, | |
| "grad_norm": 2.662757635116577, | |
| "learning_rate": 4.4634362208208874e-05, | |
| "loss": 0.7632, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.32359774311317624, | |
| "grad_norm": 3.145087480545044, | |
| "learning_rate": 4.4606704281447065e-05, | |
| "loss": 0.8453, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.32525721871888486, | |
| "grad_norm": 3.6768178939819336, | |
| "learning_rate": 4.4579046354685255e-05, | |
| "loss": 0.7155, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.3269166943245934, | |
| "grad_norm": 3.166222333908081, | |
| "learning_rate": 4.4551388427923446e-05, | |
| "loss": 0.7469, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.32857616993030203, | |
| "grad_norm": 2.641578197479248, | |
| "learning_rate": 4.452373050116164e-05, | |
| "loss": 0.8268, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.33023564553601065, | |
| "grad_norm": 3.3563296794891357, | |
| "learning_rate": 4.449607257439983e-05, | |
| "loss": 0.7641, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.3318951211417192, | |
| "grad_norm": 3.222212076187134, | |
| "learning_rate": 4.446841464763801e-05, | |
| "loss": 0.8712, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3318951211417192, | |
| "eval_gen_len": 47.415361445783134, | |
| "eval_loss": 0.6793270707130432, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1404.8246, | |
| "eval_samples_per_second": 4.718, | |
| "eval_steps_per_second": 0.295, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3335545967474278, | |
| "grad_norm": 3.183871030807495, | |
| "learning_rate": 4.44407567208762e-05, | |
| "loss": 0.854, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3352140723531364, | |
| "grad_norm": 3.283364772796631, | |
| "learning_rate": 4.441309879411439e-05, | |
| "loss": 0.7531, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.336873547958845, | |
| "grad_norm": 3.249227523803711, | |
| "learning_rate": 4.438544086735258e-05, | |
| "loss": 0.7456, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.3385330235645536, | |
| "grad_norm": 2.5303399562835693, | |
| "learning_rate": 4.4357782940590774e-05, | |
| "loss": 0.8303, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.3401924991702622, | |
| "grad_norm": 3.1236414909362793, | |
| "learning_rate": 4.4330125013828964e-05, | |
| "loss": 0.7733, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.3418519747759708, | |
| "grad_norm": 3.557269811630249, | |
| "learning_rate": 4.4302467087067155e-05, | |
| "loss": 0.7385, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.3435114503816794, | |
| "grad_norm": 3.093048334121704, | |
| "learning_rate": 4.4274809160305345e-05, | |
| "loss": 0.7486, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.34517092598738797, | |
| "grad_norm": 3.6354196071624756, | |
| "learning_rate": 4.4247151233543536e-05, | |
| "loss": 0.8101, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.3468304015930966, | |
| "grad_norm": 3.0970399379730225, | |
| "learning_rate": 4.421949330678173e-05, | |
| "loss": 0.7527, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.3484898771988052, | |
| "grad_norm": 2.604701280593872, | |
| "learning_rate": 4.419183538001991e-05, | |
| "loss": 0.8143, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.35014935280451376, | |
| "grad_norm": 4.05767822265625, | |
| "learning_rate": 4.41641774532581e-05, | |
| "loss": 0.8129, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.3518088284102224, | |
| "grad_norm": 3.9038188457489014, | |
| "learning_rate": 4.413651952649629e-05, | |
| "loss": 0.7824, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.353468304015931, | |
| "grad_norm": 2.652456760406494, | |
| "learning_rate": 4.410886159973448e-05, | |
| "loss": 0.7678, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.35512777962163955, | |
| "grad_norm": 2.766669511795044, | |
| "learning_rate": 4.408120367297267e-05, | |
| "loss": 0.7936, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.35678725522734817, | |
| "grad_norm": 3.649751663208008, | |
| "learning_rate": 4.4053545746210864e-05, | |
| "loss": 0.7205, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.3584467308330568, | |
| "grad_norm": 2.6961166858673096, | |
| "learning_rate": 4.4025887819449054e-05, | |
| "loss": 0.8331, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.36010620643876534, | |
| "grad_norm": 3.3244972229003906, | |
| "learning_rate": 4.399822989268725e-05, | |
| "loss": 0.8189, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.36176568204447396, | |
| "grad_norm": 3.269043445587158, | |
| "learning_rate": 4.397057196592544e-05, | |
| "loss": 0.8237, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3634251576501825, | |
| "grad_norm": 3.3199679851531982, | |
| "learning_rate": 4.3942914039163626e-05, | |
| "loss": 0.7835, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.36508463325589113, | |
| "grad_norm": 3.377162218093872, | |
| "learning_rate": 4.391525611240182e-05, | |
| "loss": 0.7065, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.36674410886159975, | |
| "grad_norm": 2.6638193130493164, | |
| "learning_rate": 4.388759818564001e-05, | |
| "loss": 0.7611, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.3684035844673083, | |
| "grad_norm": 2.741482734680176, | |
| "learning_rate": 4.38599402588782e-05, | |
| "loss": 0.7971, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.3700630600730169, | |
| "grad_norm": 4.292590618133545, | |
| "learning_rate": 4.383228233211639e-05, | |
| "loss": 0.735, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.37172253567872554, | |
| "grad_norm": 2.7163352966308594, | |
| "learning_rate": 4.380462440535458e-05, | |
| "loss": 0.742, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.3733820112844341, | |
| "grad_norm": 2.661367654800415, | |
| "learning_rate": 4.377696647859277e-05, | |
| "loss": 0.7493, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3750414868901427, | |
| "grad_norm": 3.442807674407959, | |
| "learning_rate": 4.374930855183096e-05, | |
| "loss": 0.7258, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.37670096249585133, | |
| "grad_norm": 3.017528772354126, | |
| "learning_rate": 4.372165062506915e-05, | |
| "loss": 0.7717, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.3783604381015599, | |
| "grad_norm": 3.1746342182159424, | |
| "learning_rate": 4.3693992698307335e-05, | |
| "loss": 0.7912, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.3800199137072685, | |
| "grad_norm": 2.567218780517578, | |
| "learning_rate": 4.3666334771545526e-05, | |
| "loss": 0.7329, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3816793893129771, | |
| "grad_norm": 3.3965744972229004, | |
| "learning_rate": 4.3638676844783716e-05, | |
| "loss": 0.8182, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3833388649186857, | |
| "grad_norm": 2.220444440841675, | |
| "learning_rate": 4.361101891802191e-05, | |
| "loss": 0.697, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3849983405243943, | |
| "grad_norm": 2.949594259262085, | |
| "learning_rate": 4.35833609912601e-05, | |
| "loss": 0.8063, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.38665781613010286, | |
| "grad_norm": 3.4351999759674072, | |
| "learning_rate": 4.355570306449829e-05, | |
| "loss": 0.8039, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.3883172917358115, | |
| "grad_norm": 3.2207376956939697, | |
| "learning_rate": 4.352804513773648e-05, | |
| "loss": 0.8298, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3899767673415201, | |
| "grad_norm": 2.397782802581787, | |
| "learning_rate": 4.350038721097467e-05, | |
| "loss": 0.6659, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.39163624294722865, | |
| "grad_norm": 2.6824631690979004, | |
| "learning_rate": 4.347272928421286e-05, | |
| "loss": 0.7375, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.39329571855293727, | |
| "grad_norm": 2.6002721786499023, | |
| "learning_rate": 4.3445071357451044e-05, | |
| "loss": 0.7758, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.3949551941586459, | |
| "grad_norm": 3.270160675048828, | |
| "learning_rate": 4.3417413430689234e-05, | |
| "loss": 0.8297, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.39661466976435444, | |
| "grad_norm": 3.2965505123138428, | |
| "learning_rate": 4.3389755503927425e-05, | |
| "loss": 0.76, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.39827414537006306, | |
| "grad_norm": 3.4657270908355713, | |
| "learning_rate": 4.3362097577165616e-05, | |
| "loss": 0.7997, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3999336209757717, | |
| "grad_norm": 2.213045120239258, | |
| "learning_rate": 4.3334439650403806e-05, | |
| "loss": 0.7271, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.40159309658148024, | |
| "grad_norm": 2.360948085784912, | |
| "learning_rate": 4.3306781723642e-05, | |
| "loss": 0.7122, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.40325257218718885, | |
| "grad_norm": 2.8001227378845215, | |
| "learning_rate": 4.327912379688019e-05, | |
| "loss": 0.8604, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.40491204779289747, | |
| "grad_norm": 2.840575933456421, | |
| "learning_rate": 4.325146587011838e-05, | |
| "loss": 0.7454, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.406571523398606, | |
| "grad_norm": 3.021378993988037, | |
| "learning_rate": 4.322380794335657e-05, | |
| "loss": 0.7479, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.40823099900431464, | |
| "grad_norm": 3.7414467334747314, | |
| "learning_rate": 4.319615001659476e-05, | |
| "loss": 0.7676, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.40989047461002326, | |
| "grad_norm": 3.384713888168335, | |
| "learning_rate": 4.316849208983294e-05, | |
| "loss": 0.7451, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.4115499502157318, | |
| "grad_norm": 3.215459108352661, | |
| "learning_rate": 4.3140834163071134e-05, | |
| "loss": 0.7401, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.41320942582144043, | |
| "grad_norm": 4.62844705581665, | |
| "learning_rate": 4.3113176236309324e-05, | |
| "loss": 0.8015, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.414868901427149, | |
| "grad_norm": 2.7699246406555176, | |
| "learning_rate": 4.3085518309547515e-05, | |
| "loss": 0.7665, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4165283770328576, | |
| "grad_norm": 3.4257094860076904, | |
| "learning_rate": 4.3057860382785706e-05, | |
| "loss": 0.8242, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.4181878526385662, | |
| "grad_norm": 2.566210985183716, | |
| "learning_rate": 4.3030202456023896e-05, | |
| "loss": 0.8138, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.4198473282442748, | |
| "grad_norm": 2.644387722015381, | |
| "learning_rate": 4.300254452926209e-05, | |
| "loss": 0.7472, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.4215068038499834, | |
| "grad_norm": 2.5530991554260254, | |
| "learning_rate": 4.297488660250028e-05, | |
| "loss": 0.8139, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.423166279455692, | |
| "grad_norm": 3.8945424556732178, | |
| "learning_rate": 4.294722867573847e-05, | |
| "loss": 0.8397, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.4248257550614006, | |
| "grad_norm": 2.5263960361480713, | |
| "learning_rate": 4.291957074897666e-05, | |
| "loss": 0.7298, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.4264852306671092, | |
| "grad_norm": 2.987938404083252, | |
| "learning_rate": 4.289191282221485e-05, | |
| "loss": 0.7347, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.4281447062728178, | |
| "grad_norm": 2.844803810119629, | |
| "learning_rate": 4.286425489545304e-05, | |
| "loss": 0.7533, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.42980418187852637, | |
| "grad_norm": 2.995476245880127, | |
| "learning_rate": 4.283659696869123e-05, | |
| "loss": 0.847, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.431463657484235, | |
| "grad_norm": 2.791422128677368, | |
| "learning_rate": 4.280893904192942e-05, | |
| "loss": 0.7767, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4331231330899436, | |
| "grad_norm": 2.7166290283203125, | |
| "learning_rate": 4.278128111516761e-05, | |
| "loss": 0.7942, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 2.4218597412109375, | |
| "learning_rate": 4.27536231884058e-05, | |
| "loss": 0.7281, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.4364420843013608, | |
| "grad_norm": 2.9908969402313232, | |
| "learning_rate": 4.272596526164399e-05, | |
| "loss": 0.7485, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.4381015599070694, | |
| "grad_norm": 2.891364336013794, | |
| "learning_rate": 4.2698307334882184e-05, | |
| "loss": 0.7798, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.43976103551277795, | |
| "grad_norm": 2.570340394973755, | |
| "learning_rate": 4.267064940812037e-05, | |
| "loss": 0.7695, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.44142051111848657, | |
| "grad_norm": 3.531270742416382, | |
| "learning_rate": 4.264299148135856e-05, | |
| "loss": 0.7381, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.44307998672419513, | |
| "grad_norm": 3.6200950145721436, | |
| "learning_rate": 4.261533355459675e-05, | |
| "loss": 0.8237, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.44473946232990375, | |
| "grad_norm": 2.875049352645874, | |
| "learning_rate": 4.258767562783494e-05, | |
| "loss": 0.7549, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.44639893793561236, | |
| "grad_norm": 2.9184587001800537, | |
| "learning_rate": 4.256001770107313e-05, | |
| "loss": 0.74, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.4480584135413209, | |
| "grad_norm": 3.756166458129883, | |
| "learning_rate": 4.253235977431132e-05, | |
| "loss": 0.7764, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.44971788914702954, | |
| "grad_norm": 2.6574513912200928, | |
| "learning_rate": 4.250470184754951e-05, | |
| "loss": 0.6868, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.45137736475273815, | |
| "grad_norm": 2.8833422660827637, | |
| "learning_rate": 4.24770439207877e-05, | |
| "loss": 0.7902, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.4530368403584467, | |
| "grad_norm": 3.441880464553833, | |
| "learning_rate": 4.244938599402589e-05, | |
| "loss": 0.7375, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.45469631596415533, | |
| "grad_norm": 2.796851396560669, | |
| "learning_rate": 4.242172806726408e-05, | |
| "loss": 0.7475, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.45635579156986394, | |
| "grad_norm": 3.374749183654785, | |
| "learning_rate": 4.239407014050227e-05, | |
| "loss": 0.7325, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4580152671755725, | |
| "grad_norm": 3.025646209716797, | |
| "learning_rate": 4.236641221374046e-05, | |
| "loss": 0.7562, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.4596747427812811, | |
| "grad_norm": 3.104525566101074, | |
| "learning_rate": 4.233875428697865e-05, | |
| "loss": 0.7907, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.46133421838698974, | |
| "grad_norm": 3.566995143890381, | |
| "learning_rate": 4.231109636021684e-05, | |
| "loss": 0.714, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.4629936939926983, | |
| "grad_norm": 2.225813150405884, | |
| "learning_rate": 4.228343843345503e-05, | |
| "loss": 0.7958, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.4646531695984069, | |
| "grad_norm": 5.6908440589904785, | |
| "learning_rate": 4.225578050669322e-05, | |
| "loss": 0.8055, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4663126452041155, | |
| "grad_norm": 3.8612444400787354, | |
| "learning_rate": 4.222812257993141e-05, | |
| "loss": 0.772, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.4679721208098241, | |
| "grad_norm": 2.7307820320129395, | |
| "learning_rate": 4.22004646531696e-05, | |
| "loss": 0.7752, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.4696315964155327, | |
| "grad_norm": 2.845541477203369, | |
| "learning_rate": 4.217280672640779e-05, | |
| "loss": 0.7597, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.47129107202124126, | |
| "grad_norm": 2.981630563735962, | |
| "learning_rate": 4.2145148799645976e-05, | |
| "loss": 0.7566, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.4729505476269499, | |
| "grad_norm": 2.511880397796631, | |
| "learning_rate": 4.2117490872884166e-05, | |
| "loss": 0.7161, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4746100232326585, | |
| "grad_norm": 3.225386142730713, | |
| "learning_rate": 4.208983294612236e-05, | |
| "loss": 0.791, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.47626949883836706, | |
| "grad_norm": 3.897096633911133, | |
| "learning_rate": 4.206217501936055e-05, | |
| "loss": 0.7072, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.47792897444407567, | |
| "grad_norm": 2.640658140182495, | |
| "learning_rate": 4.203451709259874e-05, | |
| "loss": 0.7708, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.4795884500497843, | |
| "grad_norm": 2.9083096981048584, | |
| "learning_rate": 4.200685916583693e-05, | |
| "loss": 0.7547, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.48124792565549285, | |
| "grad_norm": 2.8646459579467773, | |
| "learning_rate": 4.197920123907512e-05, | |
| "loss": 0.766, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.48290740126120146, | |
| "grad_norm": 2.6047298908233643, | |
| "learning_rate": 4.195154331231331e-05, | |
| "loss": 0.7522, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.4845668768669101, | |
| "grad_norm": 3.0007834434509277, | |
| "learning_rate": 4.19238853855515e-05, | |
| "loss": 0.7766, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.48622635247261864, | |
| "grad_norm": 3.063333511352539, | |
| "learning_rate": 4.189622745878969e-05, | |
| "loss": 0.8042, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.48788582807832725, | |
| "grad_norm": 2.5916037559509277, | |
| "learning_rate": 4.186856953202788e-05, | |
| "loss": 0.7362, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.48954530368403587, | |
| "grad_norm": 4.322835922241211, | |
| "learning_rate": 4.184091160526607e-05, | |
| "loss": 0.7463, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.49120477928974443, | |
| "grad_norm": 2.634681463241577, | |
| "learning_rate": 4.181325367850426e-05, | |
| "loss": 0.755, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.49286425489545305, | |
| "grad_norm": 2.652538776397705, | |
| "learning_rate": 4.1785595751742454e-05, | |
| "loss": 0.7799, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.4945237305011616, | |
| "grad_norm": 3.1968343257904053, | |
| "learning_rate": 4.1757937824980644e-05, | |
| "loss": 0.7158, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.4961832061068702, | |
| "grad_norm": 3.14144229888916, | |
| "learning_rate": 4.1730279898218835e-05, | |
| "loss": 0.7382, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.49784268171257884, | |
| "grad_norm": 3.0171921253204346, | |
| "learning_rate": 4.1702621971457026e-05, | |
| "loss": 0.7521, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.49784268171257884, | |
| "eval_gen_len": 41.39789156626506, | |
| "eval_loss": 0.6684303283691406, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1316.8012, | |
| "eval_samples_per_second": 5.033, | |
| "eval_steps_per_second": 0.315, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4995021573182874, | |
| "grad_norm": 2.768636465072632, | |
| "learning_rate": 4.1674964044695216e-05, | |
| "loss": 0.7066, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.5011616329239961, | |
| "grad_norm": 2.3734288215637207, | |
| "learning_rate": 4.16473061179334e-05, | |
| "loss": 0.7546, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.5028211085297046, | |
| "grad_norm": 2.6482138633728027, | |
| "learning_rate": 4.161964819117159e-05, | |
| "loss": 0.7635, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.5044805841354132, | |
| "grad_norm": 3.832292079925537, | |
| "learning_rate": 4.159199026440978e-05, | |
| "loss": 0.8326, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.5061400597411218, | |
| "grad_norm": 2.807021379470825, | |
| "learning_rate": 4.156433233764797e-05, | |
| "loss": 0.7692, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5077995353468304, | |
| "grad_norm": 3.430129289627075, | |
| "learning_rate": 4.153667441088616e-05, | |
| "loss": 0.7774, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.509459010952539, | |
| "grad_norm": 2.7762088775634766, | |
| "learning_rate": 4.150901648412435e-05, | |
| "loss": 0.7413, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.5111184865582475, | |
| "grad_norm": 2.8153493404388428, | |
| "learning_rate": 4.1481358557362544e-05, | |
| "loss": 0.8242, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.5127779621639562, | |
| "grad_norm": 2.6910579204559326, | |
| "learning_rate": 4.1453700630600734e-05, | |
| "loss": 0.8018, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.5144374377696648, | |
| "grad_norm": 3.160053014755249, | |
| "learning_rate": 4.1426042703838925e-05, | |
| "loss": 0.8186, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.5160969133753733, | |
| "grad_norm": 2.774655818939209, | |
| "learning_rate": 4.1398384777077116e-05, | |
| "loss": 0.7863, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.517756388981082, | |
| "grad_norm": 3.7984707355499268, | |
| "learning_rate": 4.13707268503153e-05, | |
| "loss": 0.7761, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.5194158645867906, | |
| "grad_norm": 2.766265869140625, | |
| "learning_rate": 4.134306892355349e-05, | |
| "loss": 0.8273, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.5210753401924991, | |
| "grad_norm": 3.027769088745117, | |
| "learning_rate": 4.131541099679168e-05, | |
| "loss": 0.6994, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.5227348157982078, | |
| "grad_norm": 3.2986860275268555, | |
| "learning_rate": 4.128775307002987e-05, | |
| "loss": 0.7505, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5243942914039164, | |
| "grad_norm": 3.352910041809082, | |
| "learning_rate": 4.126009514326806e-05, | |
| "loss": 0.8348, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.5260537670096249, | |
| "grad_norm": 3.2695400714874268, | |
| "learning_rate": 4.123243721650625e-05, | |
| "loss": 0.7393, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.5277132426153336, | |
| "grad_norm": 2.6088485717773438, | |
| "learning_rate": 4.120477928974444e-05, | |
| "loss": 0.7835, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.5293727182210421, | |
| "grad_norm": 2.8191120624542236, | |
| "learning_rate": 4.1177121362982634e-05, | |
| "loss": 0.7469, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.5310321938267507, | |
| "grad_norm": 6.382346153259277, | |
| "learning_rate": 4.1149463436220824e-05, | |
| "loss": 0.781, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5326916694324594, | |
| "grad_norm": 3.124753713607788, | |
| "learning_rate": 4.112180550945901e-05, | |
| "loss": 0.7763, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5343511450381679, | |
| "grad_norm": 3.3809783458709717, | |
| "learning_rate": 4.10941475826972e-05, | |
| "loss": 0.7479, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.5360106206438765, | |
| "grad_norm": 3.1917128562927246, | |
| "learning_rate": 4.106648965593539e-05, | |
| "loss": 0.745, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5376700962495852, | |
| "grad_norm": 2.9804718494415283, | |
| "learning_rate": 4.103883172917358e-05, | |
| "loss": 0.8186, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5393295718552937, | |
| "grad_norm": 2.429513692855835, | |
| "learning_rate": 4.101117380241177e-05, | |
| "loss": 0.7253, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5409890474610023, | |
| "grad_norm": 3.4630186557769775, | |
| "learning_rate": 4.098351587564996e-05, | |
| "loss": 0.8013, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.542648523066711, | |
| "grad_norm": 3.3614048957824707, | |
| "learning_rate": 4.095585794888815e-05, | |
| "loss": 0.7448, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5443079986724195, | |
| "grad_norm": 3.4482579231262207, | |
| "learning_rate": 4.092820002212634e-05, | |
| "loss": 0.7889, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5459674742781281, | |
| "grad_norm": 3.538914442062378, | |
| "learning_rate": 4.090054209536453e-05, | |
| "loss": 0.7433, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.5476269498838368, | |
| "grad_norm": 2.6519882678985596, | |
| "learning_rate": 4.0872884168602724e-05, | |
| "loss": 0.7425, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5492864254895453, | |
| "grad_norm": 3.908871650695801, | |
| "learning_rate": 4.0845226241840914e-05, | |
| "loss": 0.7687, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.5509459010952539, | |
| "grad_norm": 2.4761886596679688, | |
| "learning_rate": 4.0817568315079105e-05, | |
| "loss": 0.7559, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.5526053767009625, | |
| "grad_norm": 2.936110496520996, | |
| "learning_rate": 4.0789910388317296e-05, | |
| "loss": 0.7407, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5542648523066711, | |
| "grad_norm": 2.414314031600952, | |
| "learning_rate": 4.0762252461555486e-05, | |
| "loss": 0.7345, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.5559243279123797, | |
| "grad_norm": 2.5228681564331055, | |
| "learning_rate": 4.073459453479368e-05, | |
| "loss": 0.7433, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5575838035180883, | |
| "grad_norm": 2.4649956226348877, | |
| "learning_rate": 4.070693660803187e-05, | |
| "loss": 0.7801, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5592432791237969, | |
| "grad_norm": 3.223370313644409, | |
| "learning_rate": 4.067927868127006e-05, | |
| "loss": 0.6799, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.5609027547295055, | |
| "grad_norm": 3.04349422454834, | |
| "learning_rate": 4.065162075450825e-05, | |
| "loss": 0.7712, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.562562230335214, | |
| "grad_norm": 3.191512107849121, | |
| "learning_rate": 4.062396282774644e-05, | |
| "loss": 0.8188, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5642217059409227, | |
| "grad_norm": 2.470961332321167, | |
| "learning_rate": 4.059630490098462e-05, | |
| "loss": 0.7609, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5658811815466313, | |
| "grad_norm": 3.0786991119384766, | |
| "learning_rate": 4.0568646974222814e-05, | |
| "loss": 0.8628, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.5675406571523398, | |
| "grad_norm": 3.9537196159362793, | |
| "learning_rate": 4.0540989047461005e-05, | |
| "loss": 0.7777, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.5692001327580485, | |
| "grad_norm": 3.1566216945648193, | |
| "learning_rate": 4.0513331120699195e-05, | |
| "loss": 0.819, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.570859608363757, | |
| "grad_norm": 3.2270612716674805, | |
| "learning_rate": 4.0485673193937386e-05, | |
| "loss": 0.7715, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5725190839694656, | |
| "grad_norm": 3.0721094608306885, | |
| "learning_rate": 4.0458015267175576e-05, | |
| "loss": 0.7747, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5741785595751743, | |
| "grad_norm": 3.0017573833465576, | |
| "learning_rate": 4.043035734041377e-05, | |
| "loss": 0.6807, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5758380351808828, | |
| "grad_norm": 3.0706708431243896, | |
| "learning_rate": 4.040269941365196e-05, | |
| "loss": 0.8453, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.5774975107865914, | |
| "grad_norm": 2.895575761795044, | |
| "learning_rate": 4.037504148689015e-05, | |
| "loss": 0.8199, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5791569863923001, | |
| "grad_norm": 2.690824270248413, | |
| "learning_rate": 4.034738356012833e-05, | |
| "loss": 0.7427, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.5808164619980086, | |
| "grad_norm": 3.1231939792633057, | |
| "learning_rate": 4.031972563336652e-05, | |
| "loss": 0.7237, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5824759376037172, | |
| "grad_norm": 3.78774356842041, | |
| "learning_rate": 4.029206770660471e-05, | |
| "loss": 0.7579, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.5841354132094259, | |
| "grad_norm": 3.0420353412628174, | |
| "learning_rate": 4.0264409779842904e-05, | |
| "loss": 0.7719, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.5857948888151344, | |
| "grad_norm": 2.9405903816223145, | |
| "learning_rate": 4.0236751853081095e-05, | |
| "loss": 0.7119, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.587454364420843, | |
| "grad_norm": 3.3033666610717773, | |
| "learning_rate": 4.0209093926319285e-05, | |
| "loss": 0.7153, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5891138400265516, | |
| "grad_norm": 2.730015993118286, | |
| "learning_rate": 4.0181435999557476e-05, | |
| "loss": 0.8199, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5907733156322602, | |
| "grad_norm": 4.007871150970459, | |
| "learning_rate": 4.0153778072795666e-05, | |
| "loss": 0.7864, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.5924327912379688, | |
| "grad_norm": 5.673678398132324, | |
| "learning_rate": 4.012612014603386e-05, | |
| "loss": 0.7674, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.5940922668436774, | |
| "grad_norm": 3.6139519214630127, | |
| "learning_rate": 4.009846221927204e-05, | |
| "loss": 0.7064, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.595751742449386, | |
| "grad_norm": 2.455223321914673, | |
| "learning_rate": 4.007080429251023e-05, | |
| "loss": 0.7463, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.5974112180550946, | |
| "grad_norm": 2.7342236042022705, | |
| "learning_rate": 4.004314636574842e-05, | |
| "loss": 0.702, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5990706936608032, | |
| "grad_norm": 3.3068127632141113, | |
| "learning_rate": 4.001548843898661e-05, | |
| "loss": 0.7635, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.6007301692665118, | |
| "grad_norm": 3.509694814682007, | |
| "learning_rate": 3.99878305122248e-05, | |
| "loss": 0.6797, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.6023896448722204, | |
| "grad_norm": 3.3854920864105225, | |
| "learning_rate": 3.9960172585462994e-05, | |
| "loss": 0.7737, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.604049120477929, | |
| "grad_norm": 3.1327128410339355, | |
| "learning_rate": 3.9932514658701185e-05, | |
| "loss": 0.7498, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.6057085960836376, | |
| "grad_norm": 2.878110408782959, | |
| "learning_rate": 3.9904856731939375e-05, | |
| "loss": 0.7231, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6073680716893461, | |
| "grad_norm": 2.6065754890441895, | |
| "learning_rate": 3.9877198805177566e-05, | |
| "loss": 0.7698, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.6090275472950548, | |
| "grad_norm": 4.012957572937012, | |
| "learning_rate": 3.9849540878415756e-05, | |
| "loss": 0.7458, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.6106870229007634, | |
| "grad_norm": 4.345855712890625, | |
| "learning_rate": 3.982188295165395e-05, | |
| "loss": 0.7561, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.6123464985064719, | |
| "grad_norm": 3.0930395126342773, | |
| "learning_rate": 3.979422502489214e-05, | |
| "loss": 0.811, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.6140059741121805, | |
| "grad_norm": 2.519012212753296, | |
| "learning_rate": 3.976656709813033e-05, | |
| "loss": 0.7081, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.6156654497178892, | |
| "grad_norm": 3.1722018718719482, | |
| "learning_rate": 3.973890917136852e-05, | |
| "loss": 0.7695, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.6173249253235977, | |
| "grad_norm": 2.899458885192871, | |
| "learning_rate": 3.971125124460671e-05, | |
| "loss": 0.7741, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.6189844009293063, | |
| "grad_norm": 2.858637809753418, | |
| "learning_rate": 3.96835933178449e-05, | |
| "loss": 0.7594, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.620643876535015, | |
| "grad_norm": 3.2127928733825684, | |
| "learning_rate": 3.965593539108309e-05, | |
| "loss": 0.7823, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.6223033521407235, | |
| "grad_norm": 2.691950798034668, | |
| "learning_rate": 3.962827746432128e-05, | |
| "loss": 0.7618, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6239628277464321, | |
| "grad_norm": 2.7668871879577637, | |
| "learning_rate": 3.960061953755947e-05, | |
| "loss": 0.7169, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.6256223033521408, | |
| "grad_norm": 3.0835137367248535, | |
| "learning_rate": 3.9572961610797656e-05, | |
| "loss": 0.7301, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.6272817789578493, | |
| "grad_norm": 2.561507225036621, | |
| "learning_rate": 3.9545303684035846e-05, | |
| "loss": 0.7917, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.6289412545635579, | |
| "grad_norm": 3.317300796508789, | |
| "learning_rate": 3.951764575727404e-05, | |
| "loss": 0.7741, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.6306007301692665, | |
| "grad_norm": 2.73250412940979, | |
| "learning_rate": 3.948998783051223e-05, | |
| "loss": 0.7402, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6322602057749751, | |
| "grad_norm": 3.5862343311309814, | |
| "learning_rate": 3.946232990375042e-05, | |
| "loss": 0.7247, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.6339196813806837, | |
| "grad_norm": 3.3748645782470703, | |
| "learning_rate": 3.943467197698861e-05, | |
| "loss": 0.7192, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.6355791569863923, | |
| "grad_norm": 3.1063249111175537, | |
| "learning_rate": 3.94070140502268e-05, | |
| "loss": 0.7557, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.6372386325921009, | |
| "grad_norm": 2.6087396144866943, | |
| "learning_rate": 3.937935612346499e-05, | |
| "loss": 0.7523, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.6388981081978095, | |
| "grad_norm": 2.9586808681488037, | |
| "learning_rate": 3.935169819670318e-05, | |
| "loss": 0.7762, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6405575838035181, | |
| "grad_norm": 2.84586501121521, | |
| "learning_rate": 3.9324040269941365e-05, | |
| "loss": 0.722, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.6422170594092267, | |
| "grad_norm": 3.2668354511260986, | |
| "learning_rate": 3.9296382343179555e-05, | |
| "loss": 0.7091, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.6438765350149352, | |
| "grad_norm": 2.432651996612549, | |
| "learning_rate": 3.9268724416417746e-05, | |
| "loss": 0.761, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6455360106206439, | |
| "grad_norm": 2.915759801864624, | |
| "learning_rate": 3.9241066489655937e-05, | |
| "loss": 0.7052, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.6471954862263525, | |
| "grad_norm": 3.8678624629974365, | |
| "learning_rate": 3.921340856289413e-05, | |
| "loss": 0.7719, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.648854961832061, | |
| "grad_norm": 2.6558640003204346, | |
| "learning_rate": 3.918575063613232e-05, | |
| "loss": 0.8168, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6505144374377697, | |
| "grad_norm": 2.678056478500366, | |
| "learning_rate": 3.915809270937051e-05, | |
| "loss": 0.7295, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.6521739130434783, | |
| "grad_norm": 2.953716993331909, | |
| "learning_rate": 3.91304347826087e-05, | |
| "loss": 0.7825, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6538333886491868, | |
| "grad_norm": 3.628190755844116, | |
| "learning_rate": 3.910277685584689e-05, | |
| "loss": 0.7077, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.6554928642548955, | |
| "grad_norm": 4.070046424865723, | |
| "learning_rate": 3.9075118929085073e-05, | |
| "loss": 0.7921, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6571523398606041, | |
| "grad_norm": 3.1164205074310303, | |
| "learning_rate": 3.9047461002323264e-05, | |
| "loss": 0.7921, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6588118154663126, | |
| "grad_norm": 4.493014812469482, | |
| "learning_rate": 3.9019803075561455e-05, | |
| "loss": 0.7053, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.6604712910720213, | |
| "grad_norm": 3.5576398372650146, | |
| "learning_rate": 3.8992145148799645e-05, | |
| "loss": 0.7438, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.6621307666777299, | |
| "grad_norm": 2.7487661838531494, | |
| "learning_rate": 3.8964487222037836e-05, | |
| "loss": 0.7851, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.6637902422834384, | |
| "grad_norm": 3.2422597408294678, | |
| "learning_rate": 3.8936829295276027e-05, | |
| "loss": 0.6859, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6637902422834384, | |
| "eval_gen_len": 45.130421686746985, | |
| "eval_loss": 0.6545064449310303, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1363.3071, | |
| "eval_samples_per_second": 4.862, | |
| "eval_steps_per_second": 0.304, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.665449717889147, | |
| "grad_norm": 3.739856481552124, | |
| "learning_rate": 3.890917136851422e-05, | |
| "loss": 0.7867, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.6671091934948556, | |
| "grad_norm": 2.2884652614593506, | |
| "learning_rate": 3.888151344175241e-05, | |
| "loss": 0.7778, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.6687686691005642, | |
| "grad_norm": 2.648066759109497, | |
| "learning_rate": 3.88538555149906e-05, | |
| "loss": 0.7422, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.6704281447062728, | |
| "grad_norm": 3.048558235168457, | |
| "learning_rate": 3.882619758822879e-05, | |
| "loss": 0.7105, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.6720876203119814, | |
| "grad_norm": 2.565505027770996, | |
| "learning_rate": 3.879853966146697e-05, | |
| "loss": 0.7514, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.67374709591769, | |
| "grad_norm": 2.495495557785034, | |
| "learning_rate": 3.8770881734705164e-05, | |
| "loss": 0.7649, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.6754065715233986, | |
| "grad_norm": 2.873587131500244, | |
| "learning_rate": 3.8743223807943354e-05, | |
| "loss": 0.789, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.6770660471291072, | |
| "grad_norm": 3.2935984134674072, | |
| "learning_rate": 3.8715565881181545e-05, | |
| "loss": 0.7066, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.6787255227348158, | |
| "grad_norm": 2.862403392791748, | |
| "learning_rate": 3.8687907954419735e-05, | |
| "loss": 0.6715, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.6803849983405243, | |
| "grad_norm": 2.6522021293640137, | |
| "learning_rate": 3.8660250027657926e-05, | |
| "loss": 0.705, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.682044473946233, | |
| "grad_norm": 2.5446276664733887, | |
| "learning_rate": 3.863259210089612e-05, | |
| "loss": 0.8133, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.6837039495519416, | |
| "grad_norm": 4.17519998550415, | |
| "learning_rate": 3.8604934174134314e-05, | |
| "loss": 0.6572, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.6853634251576501, | |
| "grad_norm": 2.3463404178619385, | |
| "learning_rate": 3.8577276247372505e-05, | |
| "loss": 0.7737, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.6870229007633588, | |
| "grad_norm": 2.4097015857696533, | |
| "learning_rate": 3.854961832061069e-05, | |
| "loss": 0.7391, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.6886823763690674, | |
| "grad_norm": 3.627779483795166, | |
| "learning_rate": 3.852196039384888e-05, | |
| "loss": 0.7328, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6903418519747759, | |
| "grad_norm": 2.6210310459136963, | |
| "learning_rate": 3.849430246708707e-05, | |
| "loss": 0.7873, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.6920013275804846, | |
| "grad_norm": 2.494967460632324, | |
| "learning_rate": 3.846664454032526e-05, | |
| "loss": 0.7616, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.6936608031861932, | |
| "grad_norm": 3.345163106918335, | |
| "learning_rate": 3.843898661356345e-05, | |
| "loss": 0.7826, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.6953202787919017, | |
| "grad_norm": 2.6801059246063232, | |
| "learning_rate": 3.841132868680164e-05, | |
| "loss": 0.7355, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.6969797543976104, | |
| "grad_norm": 2.771024227142334, | |
| "learning_rate": 3.838367076003983e-05, | |
| "loss": 0.794, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.698639230003319, | |
| "grad_norm": 3.4726293087005615, | |
| "learning_rate": 3.835601283327802e-05, | |
| "loss": 0.8092, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.7002987056090275, | |
| "grad_norm": 3.0892086029052734, | |
| "learning_rate": 3.8328354906516213e-05, | |
| "loss": 0.8002, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.7019581812147362, | |
| "grad_norm": 2.4271984100341797, | |
| "learning_rate": 3.83006969797544e-05, | |
| "loss": 0.7059, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.7036176568204447, | |
| "grad_norm": 2.875136613845825, | |
| "learning_rate": 3.827303905299259e-05, | |
| "loss": 0.7054, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.7052771324261533, | |
| "grad_norm": 2.9489753246307373, | |
| "learning_rate": 3.824538112623078e-05, | |
| "loss": 0.8017, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.706936608031862, | |
| "grad_norm": 2.9833528995513916, | |
| "learning_rate": 3.821772319946897e-05, | |
| "loss": 0.8047, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.7085960836375705, | |
| "grad_norm": 3.6213622093200684, | |
| "learning_rate": 3.819006527270716e-05, | |
| "loss": 0.6837, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.7102555592432791, | |
| "grad_norm": 2.7520642280578613, | |
| "learning_rate": 3.816240734594535e-05, | |
| "loss": 0.7782, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.7119150348489878, | |
| "grad_norm": 2.3629531860351562, | |
| "learning_rate": 3.813474941918354e-05, | |
| "loss": 0.7936, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.7135745104546963, | |
| "grad_norm": 7.7020158767700195, | |
| "learning_rate": 3.810709149242173e-05, | |
| "loss": 0.8537, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.7152339860604049, | |
| "grad_norm": 2.456869602203369, | |
| "learning_rate": 3.807943356565992e-05, | |
| "loss": 0.7971, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.7168934616661136, | |
| "grad_norm": 3.720423936843872, | |
| "learning_rate": 3.8051775638898106e-05, | |
| "loss": 0.7088, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.7185529372718221, | |
| "grad_norm": 2.5650722980499268, | |
| "learning_rate": 3.80241177121363e-05, | |
| "loss": 0.7349, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.7202124128775307, | |
| "grad_norm": 3.272597551345825, | |
| "learning_rate": 3.799645978537449e-05, | |
| "loss": 0.7849, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.7218718884832392, | |
| "grad_norm": 2.7749786376953125, | |
| "learning_rate": 3.796880185861268e-05, | |
| "loss": 0.7628, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.7235313640889479, | |
| "grad_norm": 3.038445472717285, | |
| "learning_rate": 3.794114393185087e-05, | |
| "loss": 0.8163, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.7251908396946565, | |
| "grad_norm": 2.468409538269043, | |
| "learning_rate": 3.791348600508906e-05, | |
| "loss": 0.8063, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.726850315300365, | |
| "grad_norm": 2.681001901626587, | |
| "learning_rate": 3.788582807832725e-05, | |
| "loss": 0.7814, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.7285097909060737, | |
| "grad_norm": 2.493736743927002, | |
| "learning_rate": 3.785817015156544e-05, | |
| "loss": 0.7732, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.7301692665117823, | |
| "grad_norm": 3.0504839420318604, | |
| "learning_rate": 3.783051222480363e-05, | |
| "loss": 0.7583, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7318287421174908, | |
| "grad_norm": 3.1474430561065674, | |
| "learning_rate": 3.780285429804182e-05, | |
| "loss": 0.7625, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.7334882177231995, | |
| "grad_norm": 4.236194610595703, | |
| "learning_rate": 3.7775196371280005e-05, | |
| "loss": 0.7585, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.7351476933289081, | |
| "grad_norm": 3.4272358417510986, | |
| "learning_rate": 3.7747538444518196e-05, | |
| "loss": 0.7197, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.7368071689346166, | |
| "grad_norm": 6.535729885101318, | |
| "learning_rate": 3.771988051775639e-05, | |
| "loss": 0.7672, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.7384666445403253, | |
| "grad_norm": 3.000758171081543, | |
| "learning_rate": 3.769222259099458e-05, | |
| "loss": 0.7655, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7401261201460339, | |
| "grad_norm": 3.097958564758301, | |
| "learning_rate": 3.766456466423277e-05, | |
| "loss": 0.7275, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.7417855957517424, | |
| "grad_norm": 4.476720333099365, | |
| "learning_rate": 3.763690673747096e-05, | |
| "loss": 0.7846, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.7434450713574511, | |
| "grad_norm": 2.7858529090881348, | |
| "learning_rate": 3.760924881070915e-05, | |
| "loss": 0.7956, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.7451045469631596, | |
| "grad_norm": 3.158928155899048, | |
| "learning_rate": 3.758159088394734e-05, | |
| "loss": 0.7277, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.7467640225688682, | |
| "grad_norm": 3.0849194526672363, | |
| "learning_rate": 3.755393295718553e-05, | |
| "loss": 0.7204, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7484234981745769, | |
| "grad_norm": 2.7116057872772217, | |
| "learning_rate": 3.752627503042372e-05, | |
| "loss": 0.7761, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7500829737802854, | |
| "grad_norm": 2.537970542907715, | |
| "learning_rate": 3.749861710366191e-05, | |
| "loss": 0.7093, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.751742449385994, | |
| "grad_norm": 2.507575035095215, | |
| "learning_rate": 3.74709591769001e-05, | |
| "loss": 0.7035, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.7534019249917027, | |
| "grad_norm": 2.7225263118743896, | |
| "learning_rate": 3.744330125013829e-05, | |
| "loss": 0.7011, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7550614005974112, | |
| "grad_norm": 2.6047916412353516, | |
| "learning_rate": 3.7415643323376484e-05, | |
| "loss": 0.7202, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7567208762031198, | |
| "grad_norm": 3.1778979301452637, | |
| "learning_rate": 3.7387985396614674e-05, | |
| "loss": 0.7007, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.7583803518088285, | |
| "grad_norm": 3.0564684867858887, | |
| "learning_rate": 3.7360327469852865e-05, | |
| "loss": 0.7175, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.760039827414537, | |
| "grad_norm": 2.680342674255371, | |
| "learning_rate": 3.7332669543091055e-05, | |
| "loss": 0.7314, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.7616993030202456, | |
| "grad_norm": 4.322812080383301, | |
| "learning_rate": 3.7305011616329246e-05, | |
| "loss": 0.7302, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.7633587786259542, | |
| "grad_norm": 2.9004077911376953, | |
| "learning_rate": 3.727735368956743e-05, | |
| "loss": 0.7247, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7650182542316628, | |
| "grad_norm": 2.5869526863098145, | |
| "learning_rate": 3.724969576280562e-05, | |
| "loss": 0.7489, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.7666777298373714, | |
| "grad_norm": 3.1568167209625244, | |
| "learning_rate": 3.722203783604381e-05, | |
| "loss": 0.7502, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.76833720544308, | |
| "grad_norm": 2.8454699516296387, | |
| "learning_rate": 3.7194379909282e-05, | |
| "loss": 0.7598, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.7699966810487886, | |
| "grad_norm": 2.3352134227752686, | |
| "learning_rate": 3.716672198252019e-05, | |
| "loss": 0.7036, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.7716561566544972, | |
| "grad_norm": 2.986163377761841, | |
| "learning_rate": 3.713906405575838e-05, | |
| "loss": 0.679, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7733156322602057, | |
| "grad_norm": 3.093139886856079, | |
| "learning_rate": 3.7111406128996574e-05, | |
| "loss": 0.7585, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.7749751078659144, | |
| "grad_norm": 2.8010923862457275, | |
| "learning_rate": 3.7083748202234764e-05, | |
| "loss": 0.7965, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.776634583471623, | |
| "grad_norm": 3.4075088500976562, | |
| "learning_rate": 3.7056090275472955e-05, | |
| "loss": 0.6914, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.7782940590773315, | |
| "grad_norm": 3.064790725708008, | |
| "learning_rate": 3.7028432348711145e-05, | |
| "loss": 0.7382, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.7799535346830402, | |
| "grad_norm": 2.642049789428711, | |
| "learning_rate": 3.700077442194933e-05, | |
| "loss": 0.7604, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.7816130102887487, | |
| "grad_norm": 2.7482261657714844, | |
| "learning_rate": 3.697311649518752e-05, | |
| "loss": 0.7906, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.7832724858944573, | |
| "grad_norm": 3.271319627761841, | |
| "learning_rate": 3.694545856842571e-05, | |
| "loss": 0.7139, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.784931961500166, | |
| "grad_norm": 3.490607500076294, | |
| "learning_rate": 3.69178006416639e-05, | |
| "loss": 0.6696, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.7865914371058745, | |
| "grad_norm": 3.562227487564087, | |
| "learning_rate": 3.689014271490209e-05, | |
| "loss": 0.7333, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.7882509127115831, | |
| "grad_norm": 2.2841796875, | |
| "learning_rate": 3.686248478814028e-05, | |
| "loss": 0.7322, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.7899103883172918, | |
| "grad_norm": 2.444272041320801, | |
| "learning_rate": 3.683482686137847e-05, | |
| "loss": 0.7256, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.7915698639230003, | |
| "grad_norm": 2.394650936126709, | |
| "learning_rate": 3.6807168934616664e-05, | |
| "loss": 0.7735, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.7932293395287089, | |
| "grad_norm": 3.035123348236084, | |
| "learning_rate": 3.6779511007854854e-05, | |
| "loss": 0.7702, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.7948888151344176, | |
| "grad_norm": 2.773576259613037, | |
| "learning_rate": 3.675185308109304e-05, | |
| "loss": 0.7024, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.7965482907401261, | |
| "grad_norm": 3.0329270362854004, | |
| "learning_rate": 3.672419515433123e-05, | |
| "loss": 0.7271, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.7982077663458347, | |
| "grad_norm": 3.254540205001831, | |
| "learning_rate": 3.669653722756942e-05, | |
| "loss": 0.7227, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.7998672419515434, | |
| "grad_norm": 3.238571882247925, | |
| "learning_rate": 3.666887930080761e-05, | |
| "loss": 0.8293, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.8015267175572519, | |
| "grad_norm": 5.008415222167969, | |
| "learning_rate": 3.66412213740458e-05, | |
| "loss": 0.7218, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.8031861931629605, | |
| "grad_norm": 2.9953482151031494, | |
| "learning_rate": 3.661356344728399e-05, | |
| "loss": 0.7505, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.8048456687686691, | |
| "grad_norm": 2.52767014503479, | |
| "learning_rate": 3.658590552052218e-05, | |
| "loss": 0.7171, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.8065051443743777, | |
| "grad_norm": 2.4959516525268555, | |
| "learning_rate": 3.655824759376037e-05, | |
| "loss": 0.759, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.8081646199800863, | |
| "grad_norm": 3.0055716037750244, | |
| "learning_rate": 3.653058966699856e-05, | |
| "loss": 0.7182, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.8098240955857949, | |
| "grad_norm": 3.2920753955841064, | |
| "learning_rate": 3.6502931740236754e-05, | |
| "loss": 0.8102, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.8114835711915035, | |
| "grad_norm": 2.4466500282287598, | |
| "learning_rate": 3.6475273813474944e-05, | |
| "loss": 0.7053, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.813143046797212, | |
| "grad_norm": 3.1616554260253906, | |
| "learning_rate": 3.6447615886713135e-05, | |
| "loss": 0.7155, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.8148025224029207, | |
| "grad_norm": 2.9846699237823486, | |
| "learning_rate": 3.6419957959951325e-05, | |
| "loss": 0.7488, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.8164619980086293, | |
| "grad_norm": 3.1745669841766357, | |
| "learning_rate": 3.6392300033189516e-05, | |
| "loss": 0.7309, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.8181214736143378, | |
| "grad_norm": 3.3618462085723877, | |
| "learning_rate": 3.636464210642771e-05, | |
| "loss": 0.7755, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.8197809492200465, | |
| "grad_norm": 2.7471730709075928, | |
| "learning_rate": 3.63369841796659e-05, | |
| "loss": 0.8052, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.8214404248257551, | |
| "grad_norm": 2.917133331298828, | |
| "learning_rate": 3.630932625290409e-05, | |
| "loss": 0.7502, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.8230999004314636, | |
| "grad_norm": 3.5757155418395996, | |
| "learning_rate": 3.628166832614228e-05, | |
| "loss": 0.7106, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.8247593760371722, | |
| "grad_norm": 3.1652987003326416, | |
| "learning_rate": 3.625401039938047e-05, | |
| "loss": 0.7839, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.8264188516428809, | |
| "grad_norm": 3.2558505535125732, | |
| "learning_rate": 3.622635247261865e-05, | |
| "loss": 0.7705, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.8280783272485894, | |
| "grad_norm": 2.7738609313964844, | |
| "learning_rate": 3.6198694545856844e-05, | |
| "loss": 0.7781, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.829737802854298, | |
| "grad_norm": 3.5117483139038086, | |
| "learning_rate": 3.6171036619095034e-05, | |
| "loss": 0.7234, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.829737802854298, | |
| "eval_gen_len": 45.7875, | |
| "eval_loss": 0.6476278901100159, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1347.2209, | |
| "eval_samples_per_second": 4.92, | |
| "eval_steps_per_second": 0.308, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.8313972784600067, | |
| "grad_norm": 2.7426326274871826, | |
| "learning_rate": 3.6143378692333225e-05, | |
| "loss": 0.844, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.8330567540657152, | |
| "grad_norm": 3.0585334300994873, | |
| "learning_rate": 3.6115720765571416e-05, | |
| "loss": 0.7621, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.8347162296714238, | |
| "grad_norm": 2.7952592372894287, | |
| "learning_rate": 3.6088062838809606e-05, | |
| "loss": 0.7114, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.8363757052771325, | |
| "grad_norm": 2.2673919200897217, | |
| "learning_rate": 3.60604049120478e-05, | |
| "loss": 0.7163, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.838035180882841, | |
| "grad_norm": 2.556400775909424, | |
| "learning_rate": 3.603274698528599e-05, | |
| "loss": 0.7641, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.8396946564885496, | |
| "grad_norm": 3.465658187866211, | |
| "learning_rate": 3.600508905852418e-05, | |
| "loss": 0.777, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.8413541320942582, | |
| "grad_norm": 3.9427356719970703, | |
| "learning_rate": 3.597743113176236e-05, | |
| "loss": 0.7304, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.8430136076999668, | |
| "grad_norm": 2.4685842990875244, | |
| "learning_rate": 3.594977320500055e-05, | |
| "loss": 0.7541, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.8446730833056754, | |
| "grad_norm": 2.746155023574829, | |
| "learning_rate": 3.592211527823874e-05, | |
| "loss": 0.7408, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.846332558911384, | |
| "grad_norm": 3.3263399600982666, | |
| "learning_rate": 3.5894457351476934e-05, | |
| "loss": 0.755, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.8479920345170926, | |
| "grad_norm": 2.7520947456359863, | |
| "learning_rate": 3.5866799424715124e-05, | |
| "loss": 0.7239, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.8496515101228012, | |
| "grad_norm": 3.0293684005737305, | |
| "learning_rate": 3.5839141497953315e-05, | |
| "loss": 0.8202, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.8513109857285098, | |
| "grad_norm": 3.1652681827545166, | |
| "learning_rate": 3.5811483571191506e-05, | |
| "loss": 0.7466, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.8529704613342184, | |
| "grad_norm": 3.104422092437744, | |
| "learning_rate": 3.5783825644429696e-05, | |
| "loss": 0.6651, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.854629936939927, | |
| "grad_norm": 2.858640193939209, | |
| "learning_rate": 3.575616771766789e-05, | |
| "loss": 0.7021, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.8562894125456356, | |
| "grad_norm": 2.765066385269165, | |
| "learning_rate": 3.572850979090607e-05, | |
| "loss": 0.768, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8579488881513442, | |
| "grad_norm": 3.4913389682769775, | |
| "learning_rate": 3.570085186414426e-05, | |
| "loss": 0.6938, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.8596083637570527, | |
| "grad_norm": 2.3819427490234375, | |
| "learning_rate": 3.567319393738245e-05, | |
| "loss": 0.6678, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8612678393627614, | |
| "grad_norm": 5.098767280578613, | |
| "learning_rate": 3.564553601062064e-05, | |
| "loss": 0.7674, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.86292731496847, | |
| "grad_norm": 2.6126420497894287, | |
| "learning_rate": 3.561787808385883e-05, | |
| "loss": 0.6877, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8645867905741785, | |
| "grad_norm": 2.502443313598633, | |
| "learning_rate": 3.5590220157097024e-05, | |
| "loss": 0.7378, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8662462661798872, | |
| "grad_norm": 3.2261953353881836, | |
| "learning_rate": 3.5562562230335214e-05, | |
| "loss": 0.7222, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.8679057417855958, | |
| "grad_norm": 3.1731908321380615, | |
| "learning_rate": 3.5534904303573405e-05, | |
| "loss": 0.8105, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 3.071484327316284, | |
| "learning_rate": 3.5507246376811596e-05, | |
| "loss": 0.7798, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.871224692997013, | |
| "grad_norm": 3.302419662475586, | |
| "learning_rate": 3.5479588450049786e-05, | |
| "loss": 0.7042, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.8728841686027216, | |
| "grad_norm": 2.4384875297546387, | |
| "learning_rate": 3.545193052328798e-05, | |
| "loss": 0.7471, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.8745436442084301, | |
| "grad_norm": 3.48577880859375, | |
| "learning_rate": 3.542427259652617e-05, | |
| "loss": 0.7472, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.8762031198141388, | |
| "grad_norm": 3.4272027015686035, | |
| "learning_rate": 3.539661466976436e-05, | |
| "loss": 0.7818, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.8778625954198473, | |
| "grad_norm": 2.677224636077881, | |
| "learning_rate": 3.536895674300255e-05, | |
| "loss": 0.7573, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.8795220710255559, | |
| "grad_norm": 4.305156230926514, | |
| "learning_rate": 3.534129881624074e-05, | |
| "loss": 0.8223, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.8811815466312645, | |
| "grad_norm": 3.6015806198120117, | |
| "learning_rate": 3.531364088947893e-05, | |
| "loss": 0.6546, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.8828410222369731, | |
| "grad_norm": 3.4311509132385254, | |
| "learning_rate": 3.528598296271712e-05, | |
| "loss": 0.7541, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.8845004978426817, | |
| "grad_norm": 2.226832866668701, | |
| "learning_rate": 3.525832503595531e-05, | |
| "loss": 0.6735, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.8861599734483903, | |
| "grad_norm": 2.7130308151245117, | |
| "learning_rate": 3.52306671091935e-05, | |
| "loss": 0.738, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.8878194490540989, | |
| "grad_norm": 2.3154845237731934, | |
| "learning_rate": 3.5203009182431686e-05, | |
| "loss": 0.7721, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.8894789246598075, | |
| "grad_norm": 2.7403759956359863, | |
| "learning_rate": 3.5175351255669876e-05, | |
| "loss": 0.7622, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.891138400265516, | |
| "grad_norm": 2.614175796508789, | |
| "learning_rate": 3.514769332890807e-05, | |
| "loss": 0.7583, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.8927978758712247, | |
| "grad_norm": 2.589661121368408, | |
| "learning_rate": 3.512003540214626e-05, | |
| "loss": 0.7198, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.8944573514769333, | |
| "grad_norm": 2.6613898277282715, | |
| "learning_rate": 3.509237747538445e-05, | |
| "loss": 0.7294, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.8961168270826418, | |
| "grad_norm": 2.723780632019043, | |
| "learning_rate": 3.506471954862264e-05, | |
| "loss": 0.7546, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.8977763026883505, | |
| "grad_norm": 2.591231107711792, | |
| "learning_rate": 3.503706162186083e-05, | |
| "loss": 0.7599, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.8994357782940591, | |
| "grad_norm": 3.1421103477478027, | |
| "learning_rate": 3.500940369509902e-05, | |
| "loss": 0.7105, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.9010952538997676, | |
| "grad_norm": 3.057150363922119, | |
| "learning_rate": 3.498174576833721e-05, | |
| "loss": 0.7236, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.9027547295054763, | |
| "grad_norm": 2.2406747341156006, | |
| "learning_rate": 3.4954087841575394e-05, | |
| "loss": 0.8167, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.9044142051111849, | |
| "grad_norm": 2.847642421722412, | |
| "learning_rate": 3.4926429914813585e-05, | |
| "loss": 0.736, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.9060736807168934, | |
| "grad_norm": 2.9997546672821045, | |
| "learning_rate": 3.4898771988051776e-05, | |
| "loss": 0.7859, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.9077331563226021, | |
| "grad_norm": 3.000199556350708, | |
| "learning_rate": 3.4871114061289966e-05, | |
| "loss": 0.8063, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.9093926319283107, | |
| "grad_norm": 3.7363743782043457, | |
| "learning_rate": 3.484345613452816e-05, | |
| "loss": 0.7054, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.9110521075340192, | |
| "grad_norm": 3.2134993076324463, | |
| "learning_rate": 3.481579820776635e-05, | |
| "loss": 0.7257, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.9127115831397279, | |
| "grad_norm": 9.707784652709961, | |
| "learning_rate": 3.478814028100454e-05, | |
| "loss": 0.7097, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.9143710587454364, | |
| "grad_norm": 3.060612201690674, | |
| "learning_rate": 3.476048235424273e-05, | |
| "loss": 0.7491, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.916030534351145, | |
| "grad_norm": 2.9372243881225586, | |
| "learning_rate": 3.473282442748092e-05, | |
| "loss": 0.785, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.9176900099568537, | |
| "grad_norm": 2.6264986991882324, | |
| "learning_rate": 3.47051665007191e-05, | |
| "loss": 0.7403, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.9193494855625622, | |
| "grad_norm": 3.810741901397705, | |
| "learning_rate": 3.4677508573957294e-05, | |
| "loss": 0.782, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.9210089611682708, | |
| "grad_norm": 2.4057788848876953, | |
| "learning_rate": 3.4649850647195484e-05, | |
| "loss": 0.7332, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.9226684367739795, | |
| "grad_norm": 2.5776431560516357, | |
| "learning_rate": 3.4622192720433675e-05, | |
| "loss": 0.7777, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.924327912379688, | |
| "grad_norm": 2.6932153701782227, | |
| "learning_rate": 3.4594534793671866e-05, | |
| "loss": 0.7943, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.9259873879853966, | |
| "grad_norm": 2.4345345497131348, | |
| "learning_rate": 3.4566876866910056e-05, | |
| "loss": 0.7558, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.9276468635911053, | |
| "grad_norm": 4.420886039733887, | |
| "learning_rate": 3.453921894014825e-05, | |
| "loss": 0.7235, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.9293063391968138, | |
| "grad_norm": 3.0717618465423584, | |
| "learning_rate": 3.451156101338644e-05, | |
| "loss": 0.7157, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.9309658148025224, | |
| "grad_norm": 2.4393157958984375, | |
| "learning_rate": 3.448390308662463e-05, | |
| "loss": 0.7524, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.932625290408231, | |
| "grad_norm": 2.8943607807159424, | |
| "learning_rate": 3.445624515986282e-05, | |
| "loss": 0.703, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.9342847660139396, | |
| "grad_norm": 2.4995365142822266, | |
| "learning_rate": 3.442858723310101e-05, | |
| "loss": 0.7619, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.9359442416196482, | |
| "grad_norm": 2.7852730751037598, | |
| "learning_rate": 3.44009293063392e-05, | |
| "loss": 0.7796, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.9376037172253567, | |
| "grad_norm": 2.8426830768585205, | |
| "learning_rate": 3.437327137957739e-05, | |
| "loss": 0.7605, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.9392631928310654, | |
| "grad_norm": 2.4552955627441406, | |
| "learning_rate": 3.434561345281558e-05, | |
| "loss": 0.6601, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.940922668436774, | |
| "grad_norm": 3.545321464538574, | |
| "learning_rate": 3.431795552605377e-05, | |
| "loss": 0.7432, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.9425821440424825, | |
| "grad_norm": 3.482745885848999, | |
| "learning_rate": 3.429029759929196e-05, | |
| "loss": 0.6854, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.9442416196481912, | |
| "grad_norm": 2.753021717071533, | |
| "learning_rate": 3.426263967253015e-05, | |
| "loss": 0.726, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.9459010952538998, | |
| "grad_norm": 2.742332696914673, | |
| "learning_rate": 3.4234981745768344e-05, | |
| "loss": 0.727, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.9475605708596083, | |
| "grad_norm": 3.272718667984009, | |
| "learning_rate": 3.4207323819006534e-05, | |
| "loss": 0.7593, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.949220046465317, | |
| "grad_norm": 2.3225224018096924, | |
| "learning_rate": 3.417966589224472e-05, | |
| "loss": 0.6858, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.9508795220710256, | |
| "grad_norm": 3.2801098823547363, | |
| "learning_rate": 3.415200796548291e-05, | |
| "loss": 0.7554, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.9525389976767341, | |
| "grad_norm": 2.5931944847106934, | |
| "learning_rate": 3.41243500387211e-05, | |
| "loss": 0.7443, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.9541984732824428, | |
| "grad_norm": 2.872978687286377, | |
| "learning_rate": 3.409669211195929e-05, | |
| "loss": 0.673, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.9558579488881513, | |
| "grad_norm": 2.8649654388427734, | |
| "learning_rate": 3.406903418519748e-05, | |
| "loss": 0.7302, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.9575174244938599, | |
| "grad_norm": 3.1845171451568604, | |
| "learning_rate": 3.404137625843567e-05, | |
| "loss": 0.8126, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.9591769000995686, | |
| "grad_norm": 2.9678781032562256, | |
| "learning_rate": 3.401371833167386e-05, | |
| "loss": 0.7463, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.9608363757052771, | |
| "grad_norm": 3.0109095573425293, | |
| "learning_rate": 3.398606040491205e-05, | |
| "loss": 0.7716, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.9624958513109857, | |
| "grad_norm": 3.596421241760254, | |
| "learning_rate": 3.395840247815024e-05, | |
| "loss": 0.7262, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9641553269166944, | |
| "grad_norm": 2.6864354610443115, | |
| "learning_rate": 3.393074455138843e-05, | |
| "loss": 0.7623, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9658148025224029, | |
| "grad_norm": 3.0654313564300537, | |
| "learning_rate": 3.390308662462662e-05, | |
| "loss": 0.7788, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9674742781281115, | |
| "grad_norm": 3.0649118423461914, | |
| "learning_rate": 3.387542869786481e-05, | |
| "loss": 0.7007, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9691337537338202, | |
| "grad_norm": 2.9800171852111816, | |
| "learning_rate": 3.3847770771103e-05, | |
| "loss": 0.7909, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.9707932293395287, | |
| "grad_norm": 4.103562355041504, | |
| "learning_rate": 3.382011284434119e-05, | |
| "loss": 0.7356, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9724527049452373, | |
| "grad_norm": 3.1845431327819824, | |
| "learning_rate": 3.379245491757938e-05, | |
| "loss": 0.7547, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.974112180550946, | |
| "grad_norm": 3.618088960647583, | |
| "learning_rate": 3.376479699081757e-05, | |
| "loss": 0.7636, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.9757716561566545, | |
| "grad_norm": 2.9461872577667236, | |
| "learning_rate": 3.373713906405576e-05, | |
| "loss": 0.7425, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.9774311317623631, | |
| "grad_norm": 2.7135982513427734, | |
| "learning_rate": 3.370948113729395e-05, | |
| "loss": 0.7326, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.9790906073680717, | |
| "grad_norm": 2.974194049835205, | |
| "learning_rate": 3.3681823210532136e-05, | |
| "loss": 0.7332, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.9807500829737803, | |
| "grad_norm": 3.9138126373291016, | |
| "learning_rate": 3.3654165283770326e-05, | |
| "loss": 0.6801, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.9824095585794889, | |
| "grad_norm": 3.1881282329559326, | |
| "learning_rate": 3.362650735700852e-05, | |
| "loss": 0.6514, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.9840690341851974, | |
| "grad_norm": 3.0503334999084473, | |
| "learning_rate": 3.359884943024671e-05, | |
| "loss": 0.7232, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.9857285097909061, | |
| "grad_norm": 3.4355130195617676, | |
| "learning_rate": 3.35711915034849e-05, | |
| "loss": 0.7704, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.9873879853966147, | |
| "grad_norm": 2.670332193374634, | |
| "learning_rate": 3.354353357672309e-05, | |
| "loss": 0.7534, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.9890474610023232, | |
| "grad_norm": 3.4164748191833496, | |
| "learning_rate": 3.351587564996128e-05, | |
| "loss": 0.6759, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.9907069366080319, | |
| "grad_norm": 2.7511496543884277, | |
| "learning_rate": 3.348821772319947e-05, | |
| "loss": 0.7524, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.9923664122137404, | |
| "grad_norm": 3.1281206607818604, | |
| "learning_rate": 3.346055979643766e-05, | |
| "loss": 0.7486, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.994025887819449, | |
| "grad_norm": 2.146121025085449, | |
| "learning_rate": 3.343290186967585e-05, | |
| "loss": 0.7709, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.9956853634251577, | |
| "grad_norm": 2.9810049533843994, | |
| "learning_rate": 3.3405243942914035e-05, | |
| "loss": 0.7439, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9956853634251577, | |
| "eval_gen_len": 40.46566265060241, | |
| "eval_loss": 0.6396129131317139, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1287.1539, | |
| "eval_samples_per_second": 5.149, | |
| "eval_steps_per_second": 0.322, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9973448390308662, | |
| "grad_norm": 3.1074583530426025, | |
| "learning_rate": 3.3377586016152226e-05, | |
| "loss": 0.7614, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.9990043146365748, | |
| "grad_norm": 2.733332395553589, | |
| "learning_rate": 3.3349928089390416e-05, | |
| "loss": 0.7873, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.0006637902422835, | |
| "grad_norm": 2.6200480461120605, | |
| "learning_rate": 3.332227016262861e-05, | |
| "loss": 0.715, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.0023232658479921, | |
| "grad_norm": 2.853236436843872, | |
| "learning_rate": 3.32946122358668e-05, | |
| "loss": 0.6356, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.0039827414537006, | |
| "grad_norm": 2.4870617389678955, | |
| "learning_rate": 3.3266954309104995e-05, | |
| "loss": 0.735, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.0056422170594093, | |
| "grad_norm": 2.46635365486145, | |
| "learning_rate": 3.3239296382343186e-05, | |
| "loss": 0.6706, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.007301692665118, | |
| "grad_norm": 2.235780954360962, | |
| "learning_rate": 3.3211638455581376e-05, | |
| "loss": 0.6001, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.0089611682708264, | |
| "grad_norm": 2.660212516784668, | |
| "learning_rate": 3.318398052881957e-05, | |
| "loss": 0.6637, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.010620643876535, | |
| "grad_norm": 3.813750743865967, | |
| "learning_rate": 3.315632260205775e-05, | |
| "loss": 0.6834, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.0122801194822435, | |
| "grad_norm": 2.3156824111938477, | |
| "learning_rate": 3.312866467529594e-05, | |
| "loss": 0.619, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.0139395950879522, | |
| "grad_norm": 2.3183748722076416, | |
| "learning_rate": 3.310100674853413e-05, | |
| "loss": 0.6093, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.0155990706936608, | |
| "grad_norm": 3.1354057788848877, | |
| "learning_rate": 3.307334882177232e-05, | |
| "loss": 0.6389, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.0172585462993693, | |
| "grad_norm": 2.536813259124756, | |
| "learning_rate": 3.304569089501051e-05, | |
| "loss": 0.6711, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.018918021905078, | |
| "grad_norm": 2.364082098007202, | |
| "learning_rate": 3.3018032968248704e-05, | |
| "loss": 0.6867, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.0205774975107866, | |
| "grad_norm": 2.6374351978302, | |
| "learning_rate": 3.2990375041486895e-05, | |
| "loss": 0.69, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.022236973116495, | |
| "grad_norm": 3.3656816482543945, | |
| "learning_rate": 3.2962717114725085e-05, | |
| "loss": 0.6982, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.0238964487222038, | |
| "grad_norm": 2.7787158489227295, | |
| "learning_rate": 3.2935059187963276e-05, | |
| "loss": 0.6822, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.0255559243279124, | |
| "grad_norm": 3.168287754058838, | |
| "learning_rate": 3.290740126120146e-05, | |
| "loss": 0.6435, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.0272153999336209, | |
| "grad_norm": 2.751758575439453, | |
| "learning_rate": 3.287974333443965e-05, | |
| "loss": 0.6937, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.0288748755393295, | |
| "grad_norm": 2.5584921836853027, | |
| "learning_rate": 3.285208540767784e-05, | |
| "loss": 0.6076, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.0305343511450382, | |
| "grad_norm": 3.0572123527526855, | |
| "learning_rate": 3.282442748091603e-05, | |
| "loss": 0.648, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.0321938267507467, | |
| "grad_norm": 2.5673274993896484, | |
| "learning_rate": 3.279676955415422e-05, | |
| "loss": 0.5858, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.0338533023564553, | |
| "grad_norm": 2.4913575649261475, | |
| "learning_rate": 3.276911162739241e-05, | |
| "loss": 0.6632, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.035512777962164, | |
| "grad_norm": 2.7290186882019043, | |
| "learning_rate": 3.27414537006306e-05, | |
| "loss": 0.6078, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.0371722535678725, | |
| "grad_norm": 3.055506706237793, | |
| "learning_rate": 3.2713795773868794e-05, | |
| "loss": 0.67, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.0388317291735811, | |
| "grad_norm": 2.851560354232788, | |
| "learning_rate": 3.2686137847106985e-05, | |
| "loss": 0.6333, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.0404912047792898, | |
| "grad_norm": 2.7579662799835205, | |
| "learning_rate": 3.2658479920345175e-05, | |
| "loss": 0.6085, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.0421506803849983, | |
| "grad_norm": 3.7273287773132324, | |
| "learning_rate": 3.263082199358336e-05, | |
| "loss": 0.656, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.043810155990707, | |
| "grad_norm": 2.7276690006256104, | |
| "learning_rate": 3.260316406682155e-05, | |
| "loss": 0.6387, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.0454696315964156, | |
| "grad_norm": 3.238990068435669, | |
| "learning_rate": 3.257550614005974e-05, | |
| "loss": 0.6504, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.047129107202124, | |
| "grad_norm": 3.2276484966278076, | |
| "learning_rate": 3.254784821329793e-05, | |
| "loss": 0.6543, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.0487885828078327, | |
| "grad_norm": 3.0428245067596436, | |
| "learning_rate": 3.252019028653612e-05, | |
| "loss": 0.7151, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.0504480584135414, | |
| "grad_norm": 2.6299469470977783, | |
| "learning_rate": 3.249253235977431e-05, | |
| "loss": 0.6054, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.0521075340192498, | |
| "grad_norm": 2.7120039463043213, | |
| "learning_rate": 3.24648744330125e-05, | |
| "loss": 0.6339, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.0537670096249585, | |
| "grad_norm": 2.739844560623169, | |
| "learning_rate": 3.243721650625069e-05, | |
| "loss": 0.6676, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.0554264852306672, | |
| "grad_norm": 2.740752696990967, | |
| "learning_rate": 3.2409558579488884e-05, | |
| "loss": 0.6693, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.0570859608363756, | |
| "grad_norm": 6.5435051918029785, | |
| "learning_rate": 3.238190065272707e-05, | |
| "loss": 0.6744, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.0587454364420843, | |
| "grad_norm": 3.4088094234466553, | |
| "learning_rate": 3.235424272596526e-05, | |
| "loss": 0.6362, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.060404912047793, | |
| "grad_norm": 4.100635051727295, | |
| "learning_rate": 3.232658479920345e-05, | |
| "loss": 0.6715, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.0620643876535014, | |
| "grad_norm": 2.5293679237365723, | |
| "learning_rate": 3.229892687244164e-05, | |
| "loss": 0.7047, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.06372386325921, | |
| "grad_norm": 3.4982504844665527, | |
| "learning_rate": 3.227126894567983e-05, | |
| "loss": 0.6444, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.0653833388649188, | |
| "grad_norm": 2.6392831802368164, | |
| "learning_rate": 3.224361101891802e-05, | |
| "loss": 0.6596, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.0670428144706272, | |
| "grad_norm": 2.652277708053589, | |
| "learning_rate": 3.221595309215621e-05, | |
| "loss": 0.6365, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.0687022900763359, | |
| "grad_norm": 2.6296143531799316, | |
| "learning_rate": 3.21882951653944e-05, | |
| "loss": 0.684, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.0703617656820446, | |
| "grad_norm": 2.9363362789154053, | |
| "learning_rate": 3.216063723863259e-05, | |
| "loss": 0.6578, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.072021241287753, | |
| "grad_norm": 2.624547004699707, | |
| "learning_rate": 3.2132979311870783e-05, | |
| "loss": 0.6462, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.0736807168934617, | |
| "grad_norm": 2.5561087131500244, | |
| "learning_rate": 3.2105321385108974e-05, | |
| "loss": 0.643, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.0753401924991703, | |
| "grad_norm": 2.7740066051483154, | |
| "learning_rate": 3.2077663458347165e-05, | |
| "loss": 0.6781, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.0769996681048788, | |
| "grad_norm": 2.1783266067504883, | |
| "learning_rate": 3.2050005531585355e-05, | |
| "loss": 0.6169, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.0786591437105875, | |
| "grad_norm": 2.971466541290283, | |
| "learning_rate": 3.2022347604823546e-05, | |
| "loss": 0.6347, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0803186193162961, | |
| "grad_norm": 3.469334602355957, | |
| "learning_rate": 3.1994689678061737e-05, | |
| "loss": 0.6412, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.0819780949220046, | |
| "grad_norm": 3.781665802001953, | |
| "learning_rate": 3.196703175129993e-05, | |
| "loss": 0.6077, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.0836375705277133, | |
| "grad_norm": 3.0542349815368652, | |
| "learning_rate": 3.193937382453812e-05, | |
| "loss": 0.6172, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.085297046133422, | |
| "grad_norm": 2.6497995853424072, | |
| "learning_rate": 3.191171589777631e-05, | |
| "loss": 0.6114, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 3.0180935859680176, | |
| "learning_rate": 3.188405797101449e-05, | |
| "loss": 0.6214, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.088615997344839, | |
| "grad_norm": 2.9615893363952637, | |
| "learning_rate": 3.185640004425268e-05, | |
| "loss": 0.6236, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.0902754729505477, | |
| "grad_norm": 2.6770997047424316, | |
| "learning_rate": 3.1828742117490873e-05, | |
| "loss": 0.6799, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.0919349485562562, | |
| "grad_norm": 4.559014320373535, | |
| "learning_rate": 3.1801084190729064e-05, | |
| "loss": 0.6842, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.0935944241619648, | |
| "grad_norm": 2.9613256454467773, | |
| "learning_rate": 3.1773426263967255e-05, | |
| "loss": 0.6931, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.0952538997676735, | |
| "grad_norm": 2.513901472091675, | |
| "learning_rate": 3.1745768337205445e-05, | |
| "loss": 0.6738, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.096913375373382, | |
| "grad_norm": 3.3798329830169678, | |
| "learning_rate": 3.1718110410443636e-05, | |
| "loss": 0.6172, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.0985728509790906, | |
| "grad_norm": 3.9530441761016846, | |
| "learning_rate": 3.1690452483681827e-05, | |
| "loss": 0.677, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.1002323265847993, | |
| "grad_norm": 2.511976957321167, | |
| "learning_rate": 3.166279455692002e-05, | |
| "loss": 0.7343, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.1018918021905078, | |
| "grad_norm": 3.531120538711548, | |
| "learning_rate": 3.163513663015821e-05, | |
| "loss": 0.6117, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.1035512777962164, | |
| "grad_norm": 2.912233829498291, | |
| "learning_rate": 3.160747870339639e-05, | |
| "loss": 0.6477, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.105210753401925, | |
| "grad_norm": 2.7462551593780518, | |
| "learning_rate": 3.157982077663458e-05, | |
| "loss": 0.6137, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.1068702290076335, | |
| "grad_norm": 3.05641770362854, | |
| "learning_rate": 3.155216284987277e-05, | |
| "loss": 0.6083, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.1085297046133422, | |
| "grad_norm": 2.6118903160095215, | |
| "learning_rate": 3.1524504923110963e-05, | |
| "loss": 0.5909, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.1101891802190509, | |
| "grad_norm": 2.862626314163208, | |
| "learning_rate": 3.1496846996349154e-05, | |
| "loss": 0.5916, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.1118486558247593, | |
| "grad_norm": 3.342639923095703, | |
| "learning_rate": 3.1469189069587345e-05, | |
| "loss": 0.7119, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.113508131430468, | |
| "grad_norm": 2.773423910140991, | |
| "learning_rate": 3.1441531142825535e-05, | |
| "loss": 0.6357, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.1151676070361765, | |
| "grad_norm": 2.826077461242676, | |
| "learning_rate": 3.1413873216063726e-05, | |
| "loss": 0.6012, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.1168270826418851, | |
| "grad_norm": 3.7453114986419678, | |
| "learning_rate": 3.1386215289301917e-05, | |
| "loss": 0.6853, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.1184865582475938, | |
| "grad_norm": 2.565749406814575, | |
| "learning_rate": 3.13585573625401e-05, | |
| "loss": 0.6068, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.1201460338533025, | |
| "grad_norm": 3.0959222316741943, | |
| "learning_rate": 3.133089943577829e-05, | |
| "loss": 0.6165, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.121805509459011, | |
| "grad_norm": 2.629734992980957, | |
| "learning_rate": 3.130324150901648e-05, | |
| "loss": 0.6487, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.1234649850647196, | |
| "grad_norm": 3.2976136207580566, | |
| "learning_rate": 3.127558358225467e-05, | |
| "loss": 0.6019, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.125124460670428, | |
| "grad_norm": 2.2839882373809814, | |
| "learning_rate": 3.124792565549286e-05, | |
| "loss": 0.6067, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.1267839362761367, | |
| "grad_norm": 2.334839105606079, | |
| "learning_rate": 3.1220267728731054e-05, | |
| "loss": 0.6528, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.1284434118818454, | |
| "grad_norm": 1.9476408958435059, | |
| "learning_rate": 3.1192609801969244e-05, | |
| "loss": 0.6461, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.130102887487554, | |
| "grad_norm": 2.838207244873047, | |
| "learning_rate": 3.1164951875207435e-05, | |
| "loss": 0.7088, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.1317623630932625, | |
| "grad_norm": 5.253453731536865, | |
| "learning_rate": 3.1137293948445625e-05, | |
| "loss": 0.59, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.1334218386989712, | |
| "grad_norm": 2.9335341453552246, | |
| "learning_rate": 3.1109636021683816e-05, | |
| "loss": 0.6578, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.1350813143046796, | |
| "grad_norm": 3.694380521774292, | |
| "learning_rate": 3.108197809492201e-05, | |
| "loss": 0.6394, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.1367407899103883, | |
| "grad_norm": 2.563654661178589, | |
| "learning_rate": 3.10543201681602e-05, | |
| "loss": 0.6348, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.138400265516097, | |
| "grad_norm": 3.120288610458374, | |
| "learning_rate": 3.102666224139839e-05, | |
| "loss": 0.6605, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.1400597411218054, | |
| "grad_norm": 3.0192902088165283, | |
| "learning_rate": 3.099900431463658e-05, | |
| "loss": 0.6986, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.141719216727514, | |
| "grad_norm": 2.648144483566284, | |
| "learning_rate": 3.097134638787477e-05, | |
| "loss": 0.6621, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.1433786923332228, | |
| "grad_norm": 4.2885541915893555, | |
| "learning_rate": 3.094368846111296e-05, | |
| "loss": 0.638, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.1450381679389312, | |
| "grad_norm": 2.5953876972198486, | |
| "learning_rate": 3.091603053435115e-05, | |
| "loss": 0.671, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.1466976435446399, | |
| "grad_norm": 2.913402557373047, | |
| "learning_rate": 3.088837260758934e-05, | |
| "loss": 0.6305, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.1483571191503485, | |
| "grad_norm": 3.232034206390381, | |
| "learning_rate": 3.086071468082753e-05, | |
| "loss": 0.6411, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.150016594756057, | |
| "grad_norm": 3.1293694972991943, | |
| "learning_rate": 3.0833056754065715e-05, | |
| "loss": 0.6412, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.1516760703617657, | |
| "grad_norm": 2.8033883571624756, | |
| "learning_rate": 3.0805398827303906e-05, | |
| "loss": 0.6943, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.1533355459674743, | |
| "grad_norm": 3.2198326587677, | |
| "learning_rate": 3.07777409005421e-05, | |
| "loss": 0.6954, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.1549950215731828, | |
| "grad_norm": 2.676884174346924, | |
| "learning_rate": 3.075008297378029e-05, | |
| "loss": 0.7078, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.1566544971788915, | |
| "grad_norm": 2.441145658493042, | |
| "learning_rate": 3.072242504701848e-05, | |
| "loss": 0.6885, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.1583139727846001, | |
| "grad_norm": 5.0609612464904785, | |
| "learning_rate": 3.069476712025667e-05, | |
| "loss": 0.6249, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.1599734483903086, | |
| "grad_norm": 3.058180332183838, | |
| "learning_rate": 3.066710919349486e-05, | |
| "loss": 0.5588, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.1616329239960173, | |
| "grad_norm": 3.6752076148986816, | |
| "learning_rate": 3.063945126673305e-05, | |
| "loss": 0.6351, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1616329239960173, | |
| "eval_gen_len": 42.34563253012048, | |
| "eval_loss": 0.6441066265106201, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1313.5891, | |
| "eval_samples_per_second": 5.046, | |
| "eval_steps_per_second": 0.316, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.163292399601726, | |
| "grad_norm": 2.524378776550293, | |
| "learning_rate": 3.061179333997124e-05, | |
| "loss": 0.6689, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.1649518752074344, | |
| "grad_norm": 3.153264284133911, | |
| "learning_rate": 3.0584135413209424e-05, | |
| "loss": 0.6713, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.166611350813143, | |
| "grad_norm": 2.891984224319458, | |
| "learning_rate": 3.0556477486447615e-05, | |
| "loss": 0.6728, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.1682708264188517, | |
| "grad_norm": 2.646772623062134, | |
| "learning_rate": 3.0528819559685805e-05, | |
| "loss": 0.6418, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.1699303020245602, | |
| "grad_norm": 2.870234489440918, | |
| "learning_rate": 3.0501161632923996e-05, | |
| "loss": 0.6429, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.1715897776302688, | |
| "grad_norm": 2.939676523208618, | |
| "learning_rate": 3.0473503706162187e-05, | |
| "loss": 0.6506, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.1732492532359775, | |
| "grad_norm": 3.037081003189087, | |
| "learning_rate": 3.0445845779400377e-05, | |
| "loss": 0.6933, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.174908728841686, | |
| "grad_norm": 2.871654987335205, | |
| "learning_rate": 3.0418187852638568e-05, | |
| "loss": 0.6712, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.1765682044473946, | |
| "grad_norm": 2.945828914642334, | |
| "learning_rate": 3.039052992587676e-05, | |
| "loss": 0.6474, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.1782276800531033, | |
| "grad_norm": 3.159989595413208, | |
| "learning_rate": 3.036287199911495e-05, | |
| "loss": 0.6564, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.1798871556588117, | |
| "grad_norm": 2.5197629928588867, | |
| "learning_rate": 3.0335214072353136e-05, | |
| "loss": 0.6618, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.1815466312645204, | |
| "grad_norm": 2.656416416168213, | |
| "learning_rate": 3.0307556145591327e-05, | |
| "loss": 0.6439, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.183206106870229, | |
| "grad_norm": 3.2077407836914062, | |
| "learning_rate": 3.0279898218829518e-05, | |
| "loss": 0.659, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.1848655824759375, | |
| "grad_norm": 2.5919651985168457, | |
| "learning_rate": 3.0252240292067708e-05, | |
| "loss": 0.6787, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.1865250580816462, | |
| "grad_norm": 3.7159323692321777, | |
| "learning_rate": 3.02245823653059e-05, | |
| "loss": 0.6452, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.1881845336873549, | |
| "grad_norm": 2.56329607963562, | |
| "learning_rate": 3.019692443854409e-05, | |
| "loss": 0.6842, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.1898440092930633, | |
| "grad_norm": 2.9777672290802, | |
| "learning_rate": 3.016926651178228e-05, | |
| "loss": 0.6809, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.191503484898772, | |
| "grad_norm": 2.5090603828430176, | |
| "learning_rate": 3.014160858502047e-05, | |
| "loss": 0.6442, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.1931629605044807, | |
| "grad_norm": 2.4584901332855225, | |
| "learning_rate": 3.011395065825866e-05, | |
| "loss": 0.6453, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.1948224361101891, | |
| "grad_norm": 3.469609260559082, | |
| "learning_rate": 3.0086292731496845e-05, | |
| "loss": 0.697, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.1964819117158978, | |
| "grad_norm": 3.686086893081665, | |
| "learning_rate": 3.0058634804735036e-05, | |
| "loss": 0.6188, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.1981413873216065, | |
| "grad_norm": 2.244830846786499, | |
| "learning_rate": 3.0030976877973226e-05, | |
| "loss": 0.6651, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.199800862927315, | |
| "grad_norm": 2.945749521255493, | |
| "learning_rate": 3.0003318951211417e-05, | |
| "loss": 0.6997, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.2014603385330236, | |
| "grad_norm": 2.82460880279541, | |
| "learning_rate": 2.9975661024449608e-05, | |
| "loss": 0.5754, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.2031198141387323, | |
| "grad_norm": 3.569021224975586, | |
| "learning_rate": 2.99480030976878e-05, | |
| "loss": 0.6104, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.2047792897444407, | |
| "grad_norm": 3.185415744781494, | |
| "learning_rate": 2.992034517092599e-05, | |
| "loss": 0.6243, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.2064387653501494, | |
| "grad_norm": 2.5919158458709717, | |
| "learning_rate": 2.989268724416418e-05, | |
| "loss": 0.6446, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.208098240955858, | |
| "grad_norm": 2.6875202655792236, | |
| "learning_rate": 2.986502931740237e-05, | |
| "loss": 0.6539, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.2097577165615665, | |
| "grad_norm": 3.1231086254119873, | |
| "learning_rate": 2.983737139064056e-05, | |
| "loss": 0.6445, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.2114171921672752, | |
| "grad_norm": 3.024702787399292, | |
| "learning_rate": 2.9809713463878748e-05, | |
| "loss": 0.5636, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.2130766677729836, | |
| "grad_norm": 2.9127464294433594, | |
| "learning_rate": 2.978205553711694e-05, | |
| "loss": 0.6534, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.2147361433786923, | |
| "grad_norm": 4.460144996643066, | |
| "learning_rate": 2.975439761035513e-05, | |
| "loss": 0.6012, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.216395618984401, | |
| "grad_norm": 2.9597043991088867, | |
| "learning_rate": 2.972673968359332e-05, | |
| "loss": 0.6361, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.2180550945901096, | |
| "grad_norm": 2.4563422203063965, | |
| "learning_rate": 2.969908175683151e-05, | |
| "loss": 0.6568, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.219714570195818, | |
| "grad_norm": 2.4884228706359863, | |
| "learning_rate": 2.96714238300697e-05, | |
| "loss": 0.6493, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.2213740458015268, | |
| "grad_norm": 2.275660514831543, | |
| "learning_rate": 2.9643765903307892e-05, | |
| "loss": 0.6286, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.2230335214072352, | |
| "grad_norm": 2.318924903869629, | |
| "learning_rate": 2.9616107976546082e-05, | |
| "loss": 0.5821, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.2246929970129439, | |
| "grad_norm": 2.770963430404663, | |
| "learning_rate": 2.9588450049784273e-05, | |
| "loss": 0.6369, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.2263524726186525, | |
| "grad_norm": 2.4397408962249756, | |
| "learning_rate": 2.9560792123022457e-05, | |
| "loss": 0.7002, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.2280119482243612, | |
| "grad_norm": 3.2758724689483643, | |
| "learning_rate": 2.9533134196260647e-05, | |
| "loss": 0.6908, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.2296714238300697, | |
| "grad_norm": 2.9591310024261475, | |
| "learning_rate": 2.9505476269498838e-05, | |
| "loss": 0.6101, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.2313308994357783, | |
| "grad_norm": 3.2585608959198, | |
| "learning_rate": 2.947781834273703e-05, | |
| "loss": 0.6751, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.2329903750414868, | |
| "grad_norm": 2.8872411251068115, | |
| "learning_rate": 2.945016041597522e-05, | |
| "loss": 0.672, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.2346498506471955, | |
| "grad_norm": 2.7128067016601562, | |
| "learning_rate": 2.942250248921341e-05, | |
| "loss": 0.6619, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.2363093262529041, | |
| "grad_norm": 2.6857752799987793, | |
| "learning_rate": 2.93948445624516e-05, | |
| "loss": 0.6276, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.2379688018586128, | |
| "grad_norm": 2.4469690322875977, | |
| "learning_rate": 2.936718663568979e-05, | |
| "loss": 0.6659, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.2396282774643212, | |
| "grad_norm": 2.634112596511841, | |
| "learning_rate": 2.9339528708927982e-05, | |
| "loss": 0.6445, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.24128775307003, | |
| "grad_norm": 3.0444681644439697, | |
| "learning_rate": 2.931187078216617e-05, | |
| "loss": 0.6314, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.2429472286757384, | |
| "grad_norm": 3.5143823623657227, | |
| "learning_rate": 2.928421285540436e-05, | |
| "loss": 0.6747, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.244606704281447, | |
| "grad_norm": 2.7041027545928955, | |
| "learning_rate": 2.925655492864255e-05, | |
| "loss": 0.628, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.2462661798871557, | |
| "grad_norm": 2.981811285018921, | |
| "learning_rate": 2.922889700188074e-05, | |
| "loss": 0.6689, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.2479256554928642, | |
| "grad_norm": 2.5438361167907715, | |
| "learning_rate": 2.920123907511893e-05, | |
| "loss": 0.6445, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.2495851310985728, | |
| "grad_norm": 3.3671348094940186, | |
| "learning_rate": 2.9173581148357122e-05, | |
| "loss": 0.7005, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.2512446067042815, | |
| "grad_norm": 2.6247966289520264, | |
| "learning_rate": 2.9145923221595313e-05, | |
| "loss": 0.6512, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.25290408230999, | |
| "grad_norm": 3.0514414310455322, | |
| "learning_rate": 2.9118265294833503e-05, | |
| "loss": 0.6164, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.2545635579156986, | |
| "grad_norm": 2.6410088539123535, | |
| "learning_rate": 2.9090607368071694e-05, | |
| "loss": 0.5988, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.2562230335214073, | |
| "grad_norm": 3.9841790199279785, | |
| "learning_rate": 2.9062949441309885e-05, | |
| "loss": 0.6863, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.257882509127116, | |
| "grad_norm": 2.80208158493042, | |
| "learning_rate": 2.903529151454807e-05, | |
| "loss": 0.6827, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.2595419847328244, | |
| "grad_norm": 3.831223964691162, | |
| "learning_rate": 2.900763358778626e-05, | |
| "loss": 0.6628, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.261201460338533, | |
| "grad_norm": 3.715212821960449, | |
| "learning_rate": 2.897997566102445e-05, | |
| "loss": 0.7214, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.2628609359442415, | |
| "grad_norm": 3.88436222076416, | |
| "learning_rate": 2.895231773426264e-05, | |
| "loss": 0.612, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.2645204115499502, | |
| "grad_norm": 2.4657158851623535, | |
| "learning_rate": 2.892465980750083e-05, | |
| "loss": 0.5995, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.2661798871556589, | |
| "grad_norm": 3.639241933822632, | |
| "learning_rate": 2.889700188073902e-05, | |
| "loss": 0.6866, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.2678393627613673, | |
| "grad_norm": 2.5907673835754395, | |
| "learning_rate": 2.8869343953977212e-05, | |
| "loss": 0.6062, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.269498838367076, | |
| "grad_norm": 3.1435792446136475, | |
| "learning_rate": 2.8841686027215403e-05, | |
| "loss": 0.6667, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.2711583139727847, | |
| "grad_norm": 3.6317081451416016, | |
| "learning_rate": 2.8814028100453593e-05, | |
| "loss": 0.6012, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.2728177895784931, | |
| "grad_norm": 3.563117265701294, | |
| "learning_rate": 2.878637017369178e-05, | |
| "loss": 0.6467, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.2744772651842018, | |
| "grad_norm": 2.753971576690674, | |
| "learning_rate": 2.875871224692997e-05, | |
| "loss": 0.7008, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.2761367407899105, | |
| "grad_norm": 2.5007359981536865, | |
| "learning_rate": 2.8731054320168162e-05, | |
| "loss": 0.6534, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.277796216395619, | |
| "grad_norm": 4.024910926818848, | |
| "learning_rate": 2.8703396393406352e-05, | |
| "loss": 0.6863, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.2794556920013276, | |
| "grad_norm": 2.818535566329956, | |
| "learning_rate": 2.8675738466644543e-05, | |
| "loss": 0.6127, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.2811151676070363, | |
| "grad_norm": 2.7450509071350098, | |
| "learning_rate": 2.8648080539882734e-05, | |
| "loss": 0.6392, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.2827746432127447, | |
| "grad_norm": 3.4333643913269043, | |
| "learning_rate": 2.8620422613120924e-05, | |
| "loss": 0.6657, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.2844341188184534, | |
| "grad_norm": 2.8145976066589355, | |
| "learning_rate": 2.8592764686359115e-05, | |
| "loss": 0.644, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.286093594424162, | |
| "grad_norm": 3.2298789024353027, | |
| "learning_rate": 2.8565106759597306e-05, | |
| "loss": 0.5739, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.2877530700298705, | |
| "grad_norm": 2.691585063934326, | |
| "learning_rate": 2.853744883283549e-05, | |
| "loss": 0.6572, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.2894125456355792, | |
| "grad_norm": 2.9739527702331543, | |
| "learning_rate": 2.850979090607368e-05, | |
| "loss": 0.655, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.2910720212412876, | |
| "grad_norm": 2.2871487140655518, | |
| "learning_rate": 2.848213297931187e-05, | |
| "loss": 0.6159, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.2927314968469963, | |
| "grad_norm": 2.895627975463867, | |
| "learning_rate": 2.845447505255006e-05, | |
| "loss": 0.621, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.294390972452705, | |
| "grad_norm": 2.9289920330047607, | |
| "learning_rate": 2.8426817125788252e-05, | |
| "loss": 0.6461, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.2960504480584136, | |
| "grad_norm": 3.2139761447906494, | |
| "learning_rate": 2.8399159199026443e-05, | |
| "loss": 0.626, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.297709923664122, | |
| "grad_norm": 2.8325042724609375, | |
| "learning_rate": 2.8371501272264633e-05, | |
| "loss": 0.6721, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.2993693992698307, | |
| "grad_norm": 3.305152654647827, | |
| "learning_rate": 2.8343843345502824e-05, | |
| "loss": 0.6762, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.3010288748755392, | |
| "grad_norm": 3.015709161758423, | |
| "learning_rate": 2.8316185418741014e-05, | |
| "loss": 0.7299, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.3026883504812479, | |
| "grad_norm": 2.6467528343200684, | |
| "learning_rate": 2.8288527491979198e-05, | |
| "loss": 0.6014, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 3.627946138381958, | |
| "learning_rate": 2.826086956521739e-05, | |
| "loss": 0.5908, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.3060073016926652, | |
| "grad_norm": 2.5281457901000977, | |
| "learning_rate": 2.8233211638455583e-05, | |
| "loss": 0.6756, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.3076667772983737, | |
| "grad_norm": 2.6659297943115234, | |
| "learning_rate": 2.8205553711693773e-05, | |
| "loss": 0.7228, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.3093262529040823, | |
| "grad_norm": 3.160283327102661, | |
| "learning_rate": 2.8177895784931964e-05, | |
| "loss": 0.6716, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.3109857285097908, | |
| "grad_norm": 2.502490520477295, | |
| "learning_rate": 2.8150237858170155e-05, | |
| "loss": 0.6461, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.3126452041154995, | |
| "grad_norm": 3.6391186714172363, | |
| "learning_rate": 2.8122579931408345e-05, | |
| "loss": 0.6583, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.3143046797212081, | |
| "grad_norm": 3.105423927307129, | |
| "learning_rate": 2.8094922004646536e-05, | |
| "loss": 0.6507, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.3159641553269168, | |
| "grad_norm": 3.11173415184021, | |
| "learning_rate": 2.8067264077884727e-05, | |
| "loss": 0.6345, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.3176236309326252, | |
| "grad_norm": 2.8621175289154053, | |
| "learning_rate": 2.8039606151122917e-05, | |
| "loss": 0.6265, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.319283106538334, | |
| "grad_norm": 3.6533260345458984, | |
| "learning_rate": 2.80119482243611e-05, | |
| "loss": 0.7305, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.3209425821440424, | |
| "grad_norm": 2.6195852756500244, | |
| "learning_rate": 2.798429029759929e-05, | |
| "loss": 0.63, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.322602057749751, | |
| "grad_norm": 3.0489957332611084, | |
| "learning_rate": 2.7956632370837482e-05, | |
| "loss": 0.7371, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.3242615333554597, | |
| "grad_norm": 2.971059560775757, | |
| "learning_rate": 2.7928974444075673e-05, | |
| "loss": 0.735, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.3259210089611684, | |
| "grad_norm": 2.765115976333618, | |
| "learning_rate": 2.7901316517313863e-05, | |
| "loss": 0.6667, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.3275804845668768, | |
| "grad_norm": 2.631791353225708, | |
| "learning_rate": 2.7873658590552054e-05, | |
| "loss": 0.6851, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.3275804845668768, | |
| "eval_gen_len": 43.86626506024096, | |
| "eval_loss": 0.6383147239685059, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1348.2209, | |
| "eval_samples_per_second": 4.916, | |
| "eval_steps_per_second": 0.308, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.3292399601725855, | |
| "grad_norm": 2.0661141872406006, | |
| "learning_rate": 2.7846000663790245e-05, | |
| "loss": 0.6178, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.330899435778294, | |
| "grad_norm": 2.870048999786377, | |
| "learning_rate": 2.7818342737028435e-05, | |
| "loss": 0.5981, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.3325589113840026, | |
| "grad_norm": 2.73580265045166, | |
| "learning_rate": 2.7790684810266626e-05, | |
| "loss": 0.675, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.3342183869897113, | |
| "grad_norm": 3.086110830307007, | |
| "learning_rate": 2.776302688350481e-05, | |
| "loss": 0.6549, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.33587786259542, | |
| "grad_norm": 2.6239116191864014, | |
| "learning_rate": 2.7735368956743e-05, | |
| "loss": 0.6646, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.3375373382011284, | |
| "grad_norm": 2.4428651332855225, | |
| "learning_rate": 2.770771102998119e-05, | |
| "loss": 0.6159, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.339196813806837, | |
| "grad_norm": 2.848881244659424, | |
| "learning_rate": 2.768005310321938e-05, | |
| "loss": 0.6522, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.3408562894125455, | |
| "grad_norm": 2.495037078857422, | |
| "learning_rate": 2.7652395176457572e-05, | |
| "loss": 0.6597, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.3425157650182542, | |
| "grad_norm": 2.6207704544067383, | |
| "learning_rate": 2.7624737249695763e-05, | |
| "loss": 0.655, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.3441752406239629, | |
| "grad_norm": 4.076655864715576, | |
| "learning_rate": 2.7597079322933957e-05, | |
| "loss": 0.6712, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.3458347162296715, | |
| "grad_norm": 2.6263961791992188, | |
| "learning_rate": 2.7569421396172148e-05, | |
| "loss": 0.6117, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.34749419183538, | |
| "grad_norm": 3.9632880687713623, | |
| "learning_rate": 2.7541763469410338e-05, | |
| "loss": 0.65, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.3491536674410887, | |
| "grad_norm": 2.890467405319214, | |
| "learning_rate": 2.7514105542648522e-05, | |
| "loss": 0.584, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.3508131430467971, | |
| "grad_norm": 2.1744771003723145, | |
| "learning_rate": 2.7486447615886713e-05, | |
| "loss": 0.6357, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.3524726186525058, | |
| "grad_norm": 2.8852360248565674, | |
| "learning_rate": 2.7458789689124903e-05, | |
| "loss": 0.6569, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.3541320942582145, | |
| "grad_norm": 3.050649404525757, | |
| "learning_rate": 2.7431131762363094e-05, | |
| "loss": 0.6305, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.3557915698639231, | |
| "grad_norm": 3.469940423965454, | |
| "learning_rate": 2.7403473835601284e-05, | |
| "loss": 0.6498, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.3574510454696316, | |
| "grad_norm": 2.8733341693878174, | |
| "learning_rate": 2.7375815908839475e-05, | |
| "loss": 0.7025, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.3591105210753402, | |
| "grad_norm": 2.502155065536499, | |
| "learning_rate": 2.7348157982077666e-05, | |
| "loss": 0.6661, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.3607699966810487, | |
| "grad_norm": 3.2295703887939453, | |
| "learning_rate": 2.7320500055315856e-05, | |
| "loss": 0.5898, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.3624294722867574, | |
| "grad_norm": 3.060533046722412, | |
| "learning_rate": 2.7292842128554047e-05, | |
| "loss": 0.5634, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.364088947892466, | |
| "grad_norm": 2.6943938732147217, | |
| "learning_rate": 2.7265184201792238e-05, | |
| "loss": 0.5965, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.3657484234981747, | |
| "grad_norm": 3.3528614044189453, | |
| "learning_rate": 2.723752627503042e-05, | |
| "loss": 0.6374, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.3674078991038832, | |
| "grad_norm": 3.091634511947632, | |
| "learning_rate": 2.7209868348268612e-05, | |
| "loss": 0.6532, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.3690673747095918, | |
| "grad_norm": 2.3139495849609375, | |
| "learning_rate": 2.7182210421506803e-05, | |
| "loss": 0.6079, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.3707268503153003, | |
| "grad_norm": 2.5526602268218994, | |
| "learning_rate": 2.7154552494744993e-05, | |
| "loss": 0.6627, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.372386325921009, | |
| "grad_norm": 2.624354600906372, | |
| "learning_rate": 2.7126894567983184e-05, | |
| "loss": 0.6785, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.3740458015267176, | |
| "grad_norm": 2.9834911823272705, | |
| "learning_rate": 2.7099236641221375e-05, | |
| "loss": 0.6338, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.375705277132426, | |
| "grad_norm": 2.112609624862671, | |
| "learning_rate": 2.7071578714459565e-05, | |
| "loss": 0.5915, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.3773647527381347, | |
| "grad_norm": 3.110013723373413, | |
| "learning_rate": 2.7043920787697756e-05, | |
| "loss": 0.616, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.3790242283438434, | |
| "grad_norm": 3.0879156589508057, | |
| "learning_rate": 2.7016262860935946e-05, | |
| "loss": 0.5853, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.3806837039495519, | |
| "grad_norm": 2.6645970344543457, | |
| "learning_rate": 2.6988604934174134e-05, | |
| "loss": 0.6464, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.3823431795552605, | |
| "grad_norm": 3.700145721435547, | |
| "learning_rate": 2.6960947007412324e-05, | |
| "loss": 0.6499, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.3840026551609692, | |
| "grad_norm": 3.2514538764953613, | |
| "learning_rate": 2.6933289080650515e-05, | |
| "loss": 0.5642, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.3856621307666777, | |
| "grad_norm": 3.3618319034576416, | |
| "learning_rate": 2.6905631153888705e-05, | |
| "loss": 0.6425, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.3873216063723863, | |
| "grad_norm": 2.636019229888916, | |
| "learning_rate": 2.6877973227126896e-05, | |
| "loss": 0.6842, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.388981081978095, | |
| "grad_norm": 3.2628235816955566, | |
| "learning_rate": 2.6850315300365087e-05, | |
| "loss": 0.6271, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.3906405575838034, | |
| "grad_norm": 2.7013912200927734, | |
| "learning_rate": 2.6822657373603277e-05, | |
| "loss": 0.6782, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.3923000331895121, | |
| "grad_norm": 2.627906084060669, | |
| "learning_rate": 2.6794999446841468e-05, | |
| "loss": 0.6403, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.3939595087952208, | |
| "grad_norm": 3.1490073204040527, | |
| "learning_rate": 2.676734152007966e-05, | |
| "loss": 0.6318, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.3956189844009292, | |
| "grad_norm": 3.9242594242095947, | |
| "learning_rate": 2.6739683593317842e-05, | |
| "loss": 0.6493, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.397278460006638, | |
| "grad_norm": 2.508676528930664, | |
| "learning_rate": 2.6712025666556033e-05, | |
| "loss": 0.6678, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.3989379356123464, | |
| "grad_norm": 2.6345014572143555, | |
| "learning_rate": 2.6684367739794224e-05, | |
| "loss": 0.6622, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.400597411218055, | |
| "grad_norm": 3.522874116897583, | |
| "learning_rate": 2.6656709813032414e-05, | |
| "loss": 0.6576, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.4022568868237637, | |
| "grad_norm": 3.2026262283325195, | |
| "learning_rate": 2.6629051886270605e-05, | |
| "loss": 0.6862, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.4039163624294724, | |
| "grad_norm": 3.2341012954711914, | |
| "learning_rate": 2.6601393959508796e-05, | |
| "loss": 0.6862, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.4055758380351808, | |
| "grad_norm": 3.0050671100616455, | |
| "learning_rate": 2.6573736032746986e-05, | |
| "loss": 0.6724, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.4072353136408895, | |
| "grad_norm": 2.9396309852600098, | |
| "learning_rate": 2.6546078105985177e-05, | |
| "loss": 0.6655, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.408894789246598, | |
| "grad_norm": 3.1885645389556885, | |
| "learning_rate": 2.6518420179223367e-05, | |
| "loss": 0.6946, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.4105542648523066, | |
| "grad_norm": 2.589405059814453, | |
| "learning_rate": 2.6490762252461558e-05, | |
| "loss": 0.5516, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.4122137404580153, | |
| "grad_norm": 2.978219747543335, | |
| "learning_rate": 2.6463104325699745e-05, | |
| "loss": 0.6607, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.413873216063724, | |
| "grad_norm": 2.7965850830078125, | |
| "learning_rate": 2.6435446398937936e-05, | |
| "loss": 0.6475, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.4155326916694324, | |
| "grad_norm": 2.3449814319610596, | |
| "learning_rate": 2.6407788472176126e-05, | |
| "loss": 0.5845, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.417192167275141, | |
| "grad_norm": 2.6544861793518066, | |
| "learning_rate": 2.6380130545414317e-05, | |
| "loss": 0.6219, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.4188516428808495, | |
| "grad_norm": 2.6153106689453125, | |
| "learning_rate": 2.6352472618652508e-05, | |
| "loss": 0.5959, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.4205111184865582, | |
| "grad_norm": 2.964139938354492, | |
| "learning_rate": 2.63248146918907e-05, | |
| "loss": 0.606, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.4221705940922669, | |
| "grad_norm": 3.122018814086914, | |
| "learning_rate": 2.629715676512889e-05, | |
| "loss": 0.6384, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.4238300696979755, | |
| "grad_norm": 2.3362247943878174, | |
| "learning_rate": 2.626949883836708e-05, | |
| "loss": 0.6425, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.425489545303684, | |
| "grad_norm": 3.3441717624664307, | |
| "learning_rate": 2.624184091160527e-05, | |
| "loss": 0.6729, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.4271490209093927, | |
| "grad_norm": 3.02563214302063, | |
| "learning_rate": 2.6214182984843454e-05, | |
| "loss": 0.6573, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.4288084965151011, | |
| "grad_norm": 2.8026158809661865, | |
| "learning_rate": 2.6186525058081645e-05, | |
| "loss": 0.6934, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.4304679721208098, | |
| "grad_norm": 2.406740427017212, | |
| "learning_rate": 2.6158867131319835e-05, | |
| "loss": 0.6005, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.4321274477265185, | |
| "grad_norm": 2.6590702533721924, | |
| "learning_rate": 2.6131209204558026e-05, | |
| "loss": 0.6476, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.4337869233322271, | |
| "grad_norm": 2.5404417514801025, | |
| "learning_rate": 2.6103551277796216e-05, | |
| "loss": 0.6904, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.4354463989379356, | |
| "grad_norm": 2.8186190128326416, | |
| "learning_rate": 2.6075893351034407e-05, | |
| "loss": 0.5546, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.4371058745436442, | |
| "grad_norm": 3.4550652503967285, | |
| "learning_rate": 2.6048235424272598e-05, | |
| "loss": 0.6777, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.4387653501493527, | |
| "grad_norm": 2.432072877883911, | |
| "learning_rate": 2.602057749751079e-05, | |
| "loss": 0.6266, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.4404248257550614, | |
| "grad_norm": 2.4931819438934326, | |
| "learning_rate": 2.599291957074898e-05, | |
| "loss": 0.6463, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.44208430136077, | |
| "grad_norm": 3.0838961601257324, | |
| "learning_rate": 2.5965261643987166e-05, | |
| "loss": 0.653, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.4437437769664787, | |
| "grad_norm": 2.035360336303711, | |
| "learning_rate": 2.5937603717225357e-05, | |
| "loss": 0.6876, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.4454032525721872, | |
| "grad_norm": 2.6690499782562256, | |
| "learning_rate": 2.5909945790463547e-05, | |
| "loss": 0.7085, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.4470627281778958, | |
| "grad_norm": 4.042128086090088, | |
| "learning_rate": 2.5882287863701738e-05, | |
| "loss": 0.6389, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.4487222037836043, | |
| "grad_norm": 2.342874526977539, | |
| "learning_rate": 2.585462993693993e-05, | |
| "loss": 0.6777, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.450381679389313, | |
| "grad_norm": 3.238492488861084, | |
| "learning_rate": 2.582697201017812e-05, | |
| "loss": 0.6661, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.4520411549950216, | |
| "grad_norm": 2.1739718914031982, | |
| "learning_rate": 2.579931408341631e-05, | |
| "loss": 0.5928, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.4537006306007303, | |
| "grad_norm": 2.4919285774230957, | |
| "learning_rate": 2.57716561566545e-05, | |
| "loss": 0.5875, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.4553601062064387, | |
| "grad_norm": 3.0230798721313477, | |
| "learning_rate": 2.574399822989269e-05, | |
| "loss": 0.6158, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.4570195818121474, | |
| "grad_norm": 2.7015600204467773, | |
| "learning_rate": 2.5716340303130875e-05, | |
| "loss": 0.6585, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.4586790574178559, | |
| "grad_norm": 2.7721285820007324, | |
| "learning_rate": 2.5688682376369066e-05, | |
| "loss": 0.6449, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.4603385330235645, | |
| "grad_norm": 2.542915105819702, | |
| "learning_rate": 2.5661024449607256e-05, | |
| "loss": 0.6492, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.4619980086292732, | |
| "grad_norm": 2.4796090126037598, | |
| "learning_rate": 2.5633366522845447e-05, | |
| "loss": 0.6353, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.4636574842349819, | |
| "grad_norm": 2.580699920654297, | |
| "learning_rate": 2.5605708596083637e-05, | |
| "loss": 0.6512, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.4653169598406903, | |
| "grad_norm": 2.977383613586426, | |
| "learning_rate": 2.5578050669321828e-05, | |
| "loss": 0.5601, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.466976435446399, | |
| "grad_norm": 2.8172965049743652, | |
| "learning_rate": 2.555039274256002e-05, | |
| "loss": 0.6693, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.4686359110521074, | |
| "grad_norm": 2.785449981689453, | |
| "learning_rate": 2.552273481579821e-05, | |
| "loss": 0.5605, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.4702953866578161, | |
| "grad_norm": 3.623539447784424, | |
| "learning_rate": 2.54950768890364e-05, | |
| "loss": 0.6339, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.4719548622635248, | |
| "grad_norm": 3.051231861114502, | |
| "learning_rate": 2.546741896227459e-05, | |
| "loss": 0.719, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.4736143378692335, | |
| "grad_norm": 3.1508374214172363, | |
| "learning_rate": 2.5439761035512778e-05, | |
| "loss": 0.6446, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.475273813474942, | |
| "grad_norm": 2.500828742980957, | |
| "learning_rate": 2.541210310875097e-05, | |
| "loss": 0.6077, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.4769332890806506, | |
| "grad_norm": 3.5404295921325684, | |
| "learning_rate": 2.538444518198916e-05, | |
| "loss": 0.6684, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.478592764686359, | |
| "grad_norm": 3.6613194942474365, | |
| "learning_rate": 2.535678725522735e-05, | |
| "loss": 0.6374, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.4802522402920677, | |
| "grad_norm": 2.6419968605041504, | |
| "learning_rate": 2.532912932846554e-05, | |
| "loss": 0.6182, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.4819117158977764, | |
| "grad_norm": 3.8098807334899902, | |
| "learning_rate": 2.530147140170373e-05, | |
| "loss": 0.7255, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.4835711915034848, | |
| "grad_norm": 3.2926089763641357, | |
| "learning_rate": 2.527381347494192e-05, | |
| "loss": 0.6579, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.4852306671091935, | |
| "grad_norm": 4.201939582824707, | |
| "learning_rate": 2.5246155548180112e-05, | |
| "loss": 0.65, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.4868901427149022, | |
| "grad_norm": 2.726099967956543, | |
| "learning_rate": 2.5218497621418303e-05, | |
| "loss": 0.5891, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.4885496183206106, | |
| "grad_norm": 2.545718193054199, | |
| "learning_rate": 2.5190839694656487e-05, | |
| "loss": 0.6279, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.4902090939263193, | |
| "grad_norm": 2.973696231842041, | |
| "learning_rate": 2.5163181767894677e-05, | |
| "loss": 0.6443, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.491868569532028, | |
| "grad_norm": 3.0759024620056152, | |
| "learning_rate": 2.5135523841132868e-05, | |
| "loss": 0.6623, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.4935280451377364, | |
| "grad_norm": 3.30869722366333, | |
| "learning_rate": 2.510786591437106e-05, | |
| "loss": 0.6932, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.4935280451377364, | |
| "eval_gen_len": 45.99307228915663, | |
| "eval_loss": 0.6329591274261475, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1393.4765, | |
| "eval_samples_per_second": 4.756, | |
| "eval_steps_per_second": 0.298, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.495187520743445, | |
| "grad_norm": 3.156658172607422, | |
| "learning_rate": 2.508020798760925e-05, | |
| "loss": 0.6035, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.4968469963491537, | |
| "grad_norm": 2.6789326667785645, | |
| "learning_rate": 2.505255006084744e-05, | |
| "loss": 0.6678, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.4985064719548622, | |
| "grad_norm": 2.7142372131347656, | |
| "learning_rate": 2.502489213408563e-05, | |
| "loss": 0.6547, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.5001659475605709, | |
| "grad_norm": 3.6695101261138916, | |
| "learning_rate": 2.4997234207323818e-05, | |
| "loss": 0.6935, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.5018254231662795, | |
| "grad_norm": 2.5122296810150146, | |
| "learning_rate": 2.4969576280562008e-05, | |
| "loss": 0.7043, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.5034848987719882, | |
| "grad_norm": 2.7815310955047607, | |
| "learning_rate": 2.49419183538002e-05, | |
| "loss": 0.6292, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.5051443743776967, | |
| "grad_norm": 2.4146716594696045, | |
| "learning_rate": 2.4914260427038393e-05, | |
| "loss": 0.6641, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.506803849983405, | |
| "grad_norm": 2.420832633972168, | |
| "learning_rate": 2.4886602500276583e-05, | |
| "loss": 0.5655, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.5084633255891138, | |
| "grad_norm": 2.830427885055542, | |
| "learning_rate": 2.485894457351477e-05, | |
| "loss": 0.5935, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.5101228011948225, | |
| "grad_norm": 2.9108097553253174, | |
| "learning_rate": 2.483128664675296e-05, | |
| "loss": 0.6461, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.5117822768005311, | |
| "grad_norm": 4.128769874572754, | |
| "learning_rate": 2.4803628719991152e-05, | |
| "loss": 0.6407, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.5134417524062398, | |
| "grad_norm": 2.5911433696746826, | |
| "learning_rate": 2.4775970793229343e-05, | |
| "loss": 0.663, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.5151012280119482, | |
| "grad_norm": 2.6011362075805664, | |
| "learning_rate": 2.474831286646753e-05, | |
| "loss": 0.6861, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.5167607036176567, | |
| "grad_norm": 3.9745078086853027, | |
| "learning_rate": 2.472065493970572e-05, | |
| "loss": 0.6649, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.5184201792233654, | |
| "grad_norm": 2.8070755004882812, | |
| "learning_rate": 2.469299701294391e-05, | |
| "loss": 0.6806, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.520079654829074, | |
| "grad_norm": 3.158174514770508, | |
| "learning_rate": 2.46653390861821e-05, | |
| "loss": 0.7047, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.5217391304347827, | |
| "grad_norm": 2.933155059814453, | |
| "learning_rate": 2.4637681159420292e-05, | |
| "loss": 0.6379, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.5233986060404912, | |
| "grad_norm": 2.6287198066711426, | |
| "learning_rate": 2.461002323265848e-05, | |
| "loss": 0.603, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.5250580816461998, | |
| "grad_norm": 2.4520461559295654, | |
| "learning_rate": 2.458236530589667e-05, | |
| "loss": 0.637, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.5267175572519083, | |
| "grad_norm": 2.695462226867676, | |
| "learning_rate": 2.455470737913486e-05, | |
| "loss": 0.6158, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.528377032857617, | |
| "grad_norm": 2.981191396713257, | |
| "learning_rate": 2.452704945237305e-05, | |
| "loss": 0.7432, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.5300365084633256, | |
| "grad_norm": 2.3489644527435303, | |
| "learning_rate": 2.4499391525611242e-05, | |
| "loss": 0.7302, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.5316959840690343, | |
| "grad_norm": 3.2739570140838623, | |
| "learning_rate": 2.447173359884943e-05, | |
| "loss": 0.6705, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.5333554596747427, | |
| "grad_norm": 3.6367099285125732, | |
| "learning_rate": 2.444407567208762e-05, | |
| "loss": 0.6617, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.5350149352804514, | |
| "grad_norm": 2.5076191425323486, | |
| "learning_rate": 2.441641774532581e-05, | |
| "loss": 0.6446, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.5366744108861599, | |
| "grad_norm": 2.3779613971710205, | |
| "learning_rate": 2.4388759818564e-05, | |
| "loss": 0.6996, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.5383338864918685, | |
| "grad_norm": 2.8726320266723633, | |
| "learning_rate": 2.436110189180219e-05, | |
| "loss": 0.6534, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.5399933620975772, | |
| "grad_norm": 3.3525993824005127, | |
| "learning_rate": 2.4333443965040382e-05, | |
| "loss": 0.6596, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.5416528377032859, | |
| "grad_norm": 2.6728854179382324, | |
| "learning_rate": 2.4305786038278573e-05, | |
| "loss": 0.6563, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.5433123133089943, | |
| "grad_norm": 3.2132279872894287, | |
| "learning_rate": 2.4278128111516763e-05, | |
| "loss": 0.6718, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.544971788914703, | |
| "grad_norm": 2.8587329387664795, | |
| "learning_rate": 2.4250470184754954e-05, | |
| "loss": 0.6499, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.5466312645204114, | |
| "grad_norm": 3.229907751083374, | |
| "learning_rate": 2.422281225799314e-05, | |
| "loss": 0.6458, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.5482907401261201, | |
| "grad_norm": 2.425075054168701, | |
| "learning_rate": 2.4195154331231332e-05, | |
| "loss": 0.6801, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.5499502157318288, | |
| "grad_norm": 2.5436460971832275, | |
| "learning_rate": 2.4167496404469523e-05, | |
| "loss": 0.6251, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.5516096913375375, | |
| "grad_norm": 2.9517149925231934, | |
| "learning_rate": 2.4139838477707713e-05, | |
| "loss": 0.6242, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.553269166943246, | |
| "grad_norm": 3.7397382259368896, | |
| "learning_rate": 2.41121805509459e-05, | |
| "loss": 0.6693, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.5549286425489546, | |
| "grad_norm": 3.662407398223877, | |
| "learning_rate": 2.408452262418409e-05, | |
| "loss": 0.6185, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.556588118154663, | |
| "grad_norm": 3.0817925930023193, | |
| "learning_rate": 2.405686469742228e-05, | |
| "loss": 0.6114, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.5582475937603717, | |
| "grad_norm": 3.0462610721588135, | |
| "learning_rate": 2.4029206770660472e-05, | |
| "loss": 0.6051, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.5599070693660804, | |
| "grad_norm": 2.5288925170898438, | |
| "learning_rate": 2.4001548843898663e-05, | |
| "loss": 0.6199, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.561566544971789, | |
| "grad_norm": 3.114104747772217, | |
| "learning_rate": 2.397389091713685e-05, | |
| "loss": 0.6531, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.5632260205774975, | |
| "grad_norm": 3.1850929260253906, | |
| "learning_rate": 2.394623299037504e-05, | |
| "loss": 0.681, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.5648854961832062, | |
| "grad_norm": 5.208533763885498, | |
| "learning_rate": 2.391857506361323e-05, | |
| "loss": 0.6649, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.5665449717889146, | |
| "grad_norm": 2.8434998989105225, | |
| "learning_rate": 2.3890917136851422e-05, | |
| "loss": 0.6585, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.5682044473946233, | |
| "grad_norm": 2.9898252487182617, | |
| "learning_rate": 2.3863259210089613e-05, | |
| "loss": 0.631, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.569863923000332, | |
| "grad_norm": 2.4391958713531494, | |
| "learning_rate": 2.3835601283327803e-05, | |
| "loss": 0.5867, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.5715233986060406, | |
| "grad_norm": 2.509895086288452, | |
| "learning_rate": 2.3807943356565994e-05, | |
| "loss": 0.6581, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.573182874211749, | |
| "grad_norm": 2.519028425216675, | |
| "learning_rate": 2.3780285429804184e-05, | |
| "loss": 0.6581, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.5748423498174575, | |
| "grad_norm": 2.4313178062438965, | |
| "learning_rate": 2.3752627503042375e-05, | |
| "loss": 0.6034, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.5765018254231662, | |
| "grad_norm": 2.631897449493408, | |
| "learning_rate": 2.3724969576280562e-05, | |
| "loss": 0.6592, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.5781613010288749, | |
| "grad_norm": 2.4608094692230225, | |
| "learning_rate": 2.3697311649518753e-05, | |
| "loss": 0.6637, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.5798207766345835, | |
| "grad_norm": 2.5385336875915527, | |
| "learning_rate": 2.3669653722756944e-05, | |
| "loss": 0.7129, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.5814802522402922, | |
| "grad_norm": 5.761340618133545, | |
| "learning_rate": 2.3641995795995134e-05, | |
| "loss": 0.6218, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.5831397278460007, | |
| "grad_norm": 3.5122549533843994, | |
| "learning_rate": 2.3614337869233325e-05, | |
| "loss": 0.6643, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.584799203451709, | |
| "grad_norm": 3.492122173309326, | |
| "learning_rate": 2.3586679942471512e-05, | |
| "loss": 0.6172, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.5864586790574178, | |
| "grad_norm": 2.2743546962738037, | |
| "learning_rate": 2.3559022015709703e-05, | |
| "loss": 0.6185, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.5881181546631264, | |
| "grad_norm": 2.686427116394043, | |
| "learning_rate": 2.3531364088947893e-05, | |
| "loss": 0.5884, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.5897776302688351, | |
| "grad_norm": 3.168736219406128, | |
| "learning_rate": 2.3503706162186084e-05, | |
| "loss": 0.6482, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.5914371058745438, | |
| "grad_norm": 2.285508632659912, | |
| "learning_rate": 2.3476048235424275e-05, | |
| "loss": 0.6357, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.5930965814802522, | |
| "grad_norm": 2.3977394104003906, | |
| "learning_rate": 2.3448390308662462e-05, | |
| "loss": 0.6018, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.5947560570859607, | |
| "grad_norm": 3.285010814666748, | |
| "learning_rate": 2.3420732381900652e-05, | |
| "loss": 0.6282, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.5964155326916694, | |
| "grad_norm": 2.762423038482666, | |
| "learning_rate": 2.3393074455138843e-05, | |
| "loss": 0.6137, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.598075008297378, | |
| "grad_norm": 2.6022305488586426, | |
| "learning_rate": 2.3365416528377034e-05, | |
| "loss": 0.7065, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.5997344839030867, | |
| "grad_norm": 2.3043951988220215, | |
| "learning_rate": 2.3337758601615224e-05, | |
| "loss": 0.6861, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.6013939595087954, | |
| "grad_norm": 3.265958309173584, | |
| "learning_rate": 2.3310100674853415e-05, | |
| "loss": 0.6301, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.6030534351145038, | |
| "grad_norm": 2.5754103660583496, | |
| "learning_rate": 2.3282442748091605e-05, | |
| "loss": 0.6359, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.6047129107202123, | |
| "grad_norm": 2.7039718627929688, | |
| "learning_rate": 2.3254784821329796e-05, | |
| "loss": 0.5976, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.606372386325921, | |
| "grad_norm": 3.076953887939453, | |
| "learning_rate": 2.3227126894567987e-05, | |
| "loss": 0.5774, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.6080318619316296, | |
| "grad_norm": 2.4393558502197266, | |
| "learning_rate": 2.3199468967806174e-05, | |
| "loss": 0.6095, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.6096913375373383, | |
| "grad_norm": 2.4519779682159424, | |
| "learning_rate": 2.3171811041044365e-05, | |
| "loss": 0.6077, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.611350813143047, | |
| "grad_norm": 2.989640712738037, | |
| "learning_rate": 2.3144153114282555e-05, | |
| "loss": 0.6352, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.6130102887487554, | |
| "grad_norm": 3.529949188232422, | |
| "learning_rate": 2.3116495187520746e-05, | |
| "loss": 0.6709, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.6146697643544639, | |
| "grad_norm": 4.415449619293213, | |
| "learning_rate": 2.3088837260758936e-05, | |
| "loss": 0.6486, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.6163292399601725, | |
| "grad_norm": 3.1440181732177734, | |
| "learning_rate": 2.3061179333997124e-05, | |
| "loss": 0.6455, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.6179887155658812, | |
| "grad_norm": 2.616605043411255, | |
| "learning_rate": 2.3033521407235314e-05, | |
| "loss": 0.642, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.6196481911715899, | |
| "grad_norm": 4.769134521484375, | |
| "learning_rate": 2.3005863480473505e-05, | |
| "loss": 0.6213, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.6213076667772985, | |
| "grad_norm": 2.7411723136901855, | |
| "learning_rate": 2.2978205553711696e-05, | |
| "loss": 0.6623, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.622967142383007, | |
| "grad_norm": 2.7197189331054688, | |
| "learning_rate": 2.2950547626949883e-05, | |
| "loss": 0.6586, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.6246266179887154, | |
| "grad_norm": 3.303685188293457, | |
| "learning_rate": 2.2922889700188073e-05, | |
| "loss": 0.692, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.626286093594424, | |
| "grad_norm": 2.6795287132263184, | |
| "learning_rate": 2.2895231773426264e-05, | |
| "loss": 0.6298, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.6279455692001328, | |
| "grad_norm": 2.594517707824707, | |
| "learning_rate": 2.2867573846664455e-05, | |
| "loss": 0.5813, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.6296050448058415, | |
| "grad_norm": 2.5385282039642334, | |
| "learning_rate": 2.2839915919902645e-05, | |
| "loss": 0.6388, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.63126452041155, | |
| "grad_norm": 3.192117691040039, | |
| "learning_rate": 2.2812257993140836e-05, | |
| "loss": 0.5766, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.6329239960172586, | |
| "grad_norm": 2.816152811050415, | |
| "learning_rate": 2.2784600066379026e-05, | |
| "loss": 0.6354, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.634583471622967, | |
| "grad_norm": 2.7126712799072266, | |
| "learning_rate": 2.2756942139617217e-05, | |
| "loss": 0.6325, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.6362429472286757, | |
| "grad_norm": 2.268371105194092, | |
| "learning_rate": 2.2729284212855408e-05, | |
| "loss": 0.5883, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.6379024228343844, | |
| "grad_norm": 3.135272741317749, | |
| "learning_rate": 2.27016262860936e-05, | |
| "loss": 0.6781, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.639561898440093, | |
| "grad_norm": 3.2926957607269287, | |
| "learning_rate": 2.2673968359331786e-05, | |
| "loss": 0.6279, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.6412213740458015, | |
| "grad_norm": 3.9669864177703857, | |
| "learning_rate": 2.2646310432569976e-05, | |
| "loss": 0.6569, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.6428808496515102, | |
| "grad_norm": 2.7280452251434326, | |
| "learning_rate": 2.2618652505808167e-05, | |
| "loss": 0.6547, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.6445403252572186, | |
| "grad_norm": 3.3885629177093506, | |
| "learning_rate": 2.2590994579046357e-05, | |
| "loss": 0.6048, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.6461998008629273, | |
| "grad_norm": 2.8787283897399902, | |
| "learning_rate": 2.2563336652284545e-05, | |
| "loss": 0.6147, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.647859276468636, | |
| "grad_norm": 2.199803113937378, | |
| "learning_rate": 2.2535678725522735e-05, | |
| "loss": 0.618, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.6495187520743446, | |
| "grad_norm": 3.0264763832092285, | |
| "learning_rate": 2.2508020798760926e-05, | |
| "loss": 0.6366, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.651178227680053, | |
| "grad_norm": 2.1966772079467773, | |
| "learning_rate": 2.2480362871999116e-05, | |
| "loss": 0.6564, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.6528377032857617, | |
| "grad_norm": 2.9565937519073486, | |
| "learning_rate": 2.2452704945237307e-05, | |
| "loss": 0.675, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.6544971788914702, | |
| "grad_norm": 3.1447999477386475, | |
| "learning_rate": 2.2425047018475494e-05, | |
| "loss": 0.6851, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.6561566544971789, | |
| "grad_norm": 2.6409244537353516, | |
| "learning_rate": 2.2397389091713685e-05, | |
| "loss": 0.6022, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.6578161301028875, | |
| "grad_norm": 3.335141181945801, | |
| "learning_rate": 2.2369731164951876e-05, | |
| "loss": 0.6495, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.6594756057085962, | |
| "grad_norm": 3.172175168991089, | |
| "learning_rate": 2.2342073238190066e-05, | |
| "loss": 0.6506, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.6594756057085962, | |
| "eval_gen_len": 42.61882530120482, | |
| "eval_loss": 0.6317981481552124, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1328.2593, | |
| "eval_samples_per_second": 4.99, | |
| "eval_steps_per_second": 0.312, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.6611350813143047, | |
| "grad_norm": 3.085299015045166, | |
| "learning_rate": 2.2314415311428253e-05, | |
| "loss": 0.6738, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.6627945569200133, | |
| "grad_norm": 4.871061325073242, | |
| "learning_rate": 2.2286757384666444e-05, | |
| "loss": 0.612, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.6644540325257218, | |
| "grad_norm": 2.488039493560791, | |
| "learning_rate": 2.2259099457904635e-05, | |
| "loss": 0.612, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.6661135081314304, | |
| "grad_norm": 3.3118488788604736, | |
| "learning_rate": 2.223144153114283e-05, | |
| "loss": 0.6858, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.6677729837371391, | |
| "grad_norm": 2.2576591968536377, | |
| "learning_rate": 2.220378360438102e-05, | |
| "loss": 0.6412, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.6694324593428478, | |
| "grad_norm": 3.7689051628112793, | |
| "learning_rate": 2.2176125677619207e-05, | |
| "loss": 0.7009, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.6710919349485562, | |
| "grad_norm": 3.4723784923553467, | |
| "learning_rate": 2.2148467750857397e-05, | |
| "loss": 0.6825, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.672751410554265, | |
| "grad_norm": 2.3455286026000977, | |
| "learning_rate": 2.2120809824095588e-05, | |
| "loss": 0.584, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.6744108861599734, | |
| "grad_norm": 3.1980183124542236, | |
| "learning_rate": 2.209315189733378e-05, | |
| "loss": 0.6203, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.676070361765682, | |
| "grad_norm": 3.0734760761260986, | |
| "learning_rate": 2.206549397057197e-05, | |
| "loss": 0.6228, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.6777298373713907, | |
| "grad_norm": 2.5724833011627197, | |
| "learning_rate": 2.2037836043810156e-05, | |
| "loss": 0.6572, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.6793893129770994, | |
| "grad_norm": 2.5542571544647217, | |
| "learning_rate": 2.2010178117048347e-05, | |
| "loss": 0.6888, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.6810487885828078, | |
| "grad_norm": 2.8788347244262695, | |
| "learning_rate": 2.1982520190286537e-05, | |
| "loss": 0.6324, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.6827082641885163, | |
| "grad_norm": 2.3643722534179688, | |
| "learning_rate": 2.1954862263524728e-05, | |
| "loss": 0.6487, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.684367739794225, | |
| "grad_norm": 3.1754324436187744, | |
| "learning_rate": 2.1927204336762915e-05, | |
| "loss": 0.6625, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.6860272153999336, | |
| "grad_norm": 2.355295419692993, | |
| "learning_rate": 2.1899546410001106e-05, | |
| "loss": 0.5971, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.6876866910056423, | |
| "grad_norm": 2.548339605331421, | |
| "learning_rate": 2.1871888483239297e-05, | |
| "loss": 0.6521, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.689346166611351, | |
| "grad_norm": 3.4530179500579834, | |
| "learning_rate": 2.1844230556477487e-05, | |
| "loss": 0.685, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.6910056422170594, | |
| "grad_norm": 3.1450254917144775, | |
| "learning_rate": 2.1816572629715678e-05, | |
| "loss": 0.6059, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.6926651178227679, | |
| "grad_norm": 3.041930913925171, | |
| "learning_rate": 2.1788914702953865e-05, | |
| "loss": 0.7124, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.6943245934284765, | |
| "grad_norm": 2.4500343799591064, | |
| "learning_rate": 2.1761256776192056e-05, | |
| "loss": 0.5958, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.6959840690341852, | |
| "grad_norm": 2.8591954708099365, | |
| "learning_rate": 2.1733598849430246e-05, | |
| "loss": 0.668, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.6976435446398939, | |
| "grad_norm": 3.4934329986572266, | |
| "learning_rate": 2.1705940922668437e-05, | |
| "loss": 0.6782, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.6993030202456025, | |
| "grad_norm": 2.7269980907440186, | |
| "learning_rate": 2.1678282995906628e-05, | |
| "loss": 0.6895, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 1.700962495851311, | |
| "grad_norm": 3.1131813526153564, | |
| "learning_rate": 2.1650625069144818e-05, | |
| "loss": 0.6954, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.7026219714570194, | |
| "grad_norm": 2.731657028198242, | |
| "learning_rate": 2.162296714238301e-05, | |
| "loss": 0.6513, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.704281447062728, | |
| "grad_norm": 3.856985092163086, | |
| "learning_rate": 2.15953092156212e-05, | |
| "loss": 0.6292, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 1.7059409226684368, | |
| "grad_norm": 3.4055871963500977, | |
| "learning_rate": 2.156765128885939e-05, | |
| "loss": 0.6825, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 1.7076003982741454, | |
| "grad_norm": 3.0944137573242188, | |
| "learning_rate": 2.1539993362097577e-05, | |
| "loss": 0.6832, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.7092598738798541, | |
| "grad_norm": 3.316096544265747, | |
| "learning_rate": 2.1512335435335768e-05, | |
| "loss": 0.6296, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.7109193494855626, | |
| "grad_norm": 2.8443148136138916, | |
| "learning_rate": 2.148467750857396e-05, | |
| "loss": 0.667, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 1.712578825091271, | |
| "grad_norm": 4.633572578430176, | |
| "learning_rate": 2.145701958181215e-05, | |
| "loss": 0.6503, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.7142383006969797, | |
| "grad_norm": 2.528799057006836, | |
| "learning_rate": 2.142936165505034e-05, | |
| "loss": 0.6405, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 1.7158977763026884, | |
| "grad_norm": 2.6630334854125977, | |
| "learning_rate": 2.1401703728288527e-05, | |
| "loss": 0.623, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 1.717557251908397, | |
| "grad_norm": 3.258363962173462, | |
| "learning_rate": 2.1374045801526718e-05, | |
| "loss": 0.6795, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.7192167275141057, | |
| "grad_norm": 2.7798945903778076, | |
| "learning_rate": 2.1346387874764908e-05, | |
| "loss": 0.6706, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 1.7208762031198142, | |
| "grad_norm": 2.5304954051971436, | |
| "learning_rate": 2.13187299480031e-05, | |
| "loss": 0.5987, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 1.7225356787255226, | |
| "grad_norm": 2.4129419326782227, | |
| "learning_rate": 2.129107202124129e-05, | |
| "loss": 0.6772, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.7241951543312313, | |
| "grad_norm": 2.6216280460357666, | |
| "learning_rate": 2.1263414094479477e-05, | |
| "loss": 0.6406, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 1.72585462993694, | |
| "grad_norm": 2.539292097091675, | |
| "learning_rate": 2.1235756167717667e-05, | |
| "loss": 0.635, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.7275141055426486, | |
| "grad_norm": 2.541496992111206, | |
| "learning_rate": 2.1208098240955858e-05, | |
| "loss": 0.6457, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.729173581148357, | |
| "grad_norm": 2.9823789596557617, | |
| "learning_rate": 2.118044031419405e-05, | |
| "loss": 0.6112, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 1.7308330567540657, | |
| "grad_norm": 3.312760829925537, | |
| "learning_rate": 2.115278238743224e-05, | |
| "loss": 0.6349, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 1.7324925323597742, | |
| "grad_norm": 2.683608055114746, | |
| "learning_rate": 2.112512446067043e-05, | |
| "loss": 0.6493, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.7341520079654829, | |
| "grad_norm": 2.764828681945801, | |
| "learning_rate": 2.109746653390862e-05, | |
| "loss": 0.5791, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.7358114835711915, | |
| "grad_norm": 2.8977785110473633, | |
| "learning_rate": 2.106980860714681e-05, | |
| "loss": 0.629, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 1.7374709591769002, | |
| "grad_norm": 2.4492812156677246, | |
| "learning_rate": 2.1042150680385e-05, | |
| "loss": 0.6265, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 2.425192356109619, | |
| "learning_rate": 2.101449275362319e-05, | |
| "loss": 0.5907, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 1.7407899103883173, | |
| "grad_norm": 2.9035537242889404, | |
| "learning_rate": 2.098683482686138e-05, | |
| "loss": 0.6585, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 1.7424493859940258, | |
| "grad_norm": 2.507382392883301, | |
| "learning_rate": 2.095917690009957e-05, | |
| "loss": 0.6393, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.7441088615997344, | |
| "grad_norm": 3.7915854454040527, | |
| "learning_rate": 2.093151897333776e-05, | |
| "loss": 0.6419, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 1.745768337205443, | |
| "grad_norm": 2.4732701778411865, | |
| "learning_rate": 2.090386104657595e-05, | |
| "loss": 0.6623, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 1.7474278128111518, | |
| "grad_norm": 2.7935047149658203, | |
| "learning_rate": 2.087620311981414e-05, | |
| "loss": 0.6817, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.7490872884168602, | |
| "grad_norm": 2.607464075088501, | |
| "learning_rate": 2.084854519305233e-05, | |
| "loss": 0.6561, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 1.750746764022569, | |
| "grad_norm": 2.660127878189087, | |
| "learning_rate": 2.082088726629052e-05, | |
| "loss": 0.6613, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.7524062396282774, | |
| "grad_norm": 3.2085351943969727, | |
| "learning_rate": 2.079322933952871e-05, | |
| "loss": 0.6748, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.754065715233986, | |
| "grad_norm": 2.7219228744506836, | |
| "learning_rate": 2.0765571412766898e-05, | |
| "loss": 0.6382, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 1.7557251908396947, | |
| "grad_norm": 3.184359550476074, | |
| "learning_rate": 2.0737913486005088e-05, | |
| "loss": 0.6369, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 1.7573846664454034, | |
| "grad_norm": 3.1994669437408447, | |
| "learning_rate": 2.071025555924328e-05, | |
| "loss": 0.6546, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.7590441420511118, | |
| "grad_norm": 3.010939359664917, | |
| "learning_rate": 2.068259763248147e-05, | |
| "loss": 0.6864, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.7607036176568205, | |
| "grad_norm": 2.753485918045044, | |
| "learning_rate": 2.065493970571966e-05, | |
| "loss": 0.6881, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 1.762363093262529, | |
| "grad_norm": 2.8554036617279053, | |
| "learning_rate": 2.062728177895785e-05, | |
| "loss": 0.662, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.7640225688682376, | |
| "grad_norm": 4.670107364654541, | |
| "learning_rate": 2.059962385219604e-05, | |
| "loss": 0.6106, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 1.7656820444739463, | |
| "grad_norm": 2.309091806411743, | |
| "learning_rate": 2.0571965925434232e-05, | |
| "loss": 0.6692, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 1.767341520079655, | |
| "grad_norm": 3.542398691177368, | |
| "learning_rate": 2.0544307998672423e-05, | |
| "loss": 0.6851, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.7690009956853634, | |
| "grad_norm": 3.215907573699951, | |
| "learning_rate": 2.0516650071910613e-05, | |
| "loss": 0.6768, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 1.770660471291072, | |
| "grad_norm": 2.82903790473938, | |
| "learning_rate": 2.04889921451488e-05, | |
| "loss": 0.6761, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 1.7723199468967805, | |
| "grad_norm": 2.8713226318359375, | |
| "learning_rate": 2.046133421838699e-05, | |
| "loss": 0.6348, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.7739794225024892, | |
| "grad_norm": 2.563518762588501, | |
| "learning_rate": 2.043367629162518e-05, | |
| "loss": 0.6564, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 1.7756388981081979, | |
| "grad_norm": 2.8617069721221924, | |
| "learning_rate": 2.0406018364863372e-05, | |
| "loss": 0.6244, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.7772983737139065, | |
| "grad_norm": 3.2314953804016113, | |
| "learning_rate": 2.037836043810156e-05, | |
| "loss": 0.6567, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 1.778957849319615, | |
| "grad_norm": 3.1084470748901367, | |
| "learning_rate": 2.035070251133975e-05, | |
| "loss": 0.6223, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 1.7806173249253237, | |
| "grad_norm": 3.896014928817749, | |
| "learning_rate": 2.032304458457794e-05, | |
| "loss": 0.6031, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 1.782276800531032, | |
| "grad_norm": 2.7549264430999756, | |
| "learning_rate": 2.029538665781613e-05, | |
| "loss": 0.6582, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 1.7839362761367408, | |
| "grad_norm": 3.149277925491333, | |
| "learning_rate": 2.0267728731054322e-05, | |
| "loss": 0.5781, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.7855957517424494, | |
| "grad_norm": 2.9234907627105713, | |
| "learning_rate": 2.024007080429251e-05, | |
| "loss": 0.6705, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 1.7872552273481581, | |
| "grad_norm": 2.4791808128356934, | |
| "learning_rate": 2.02124128775307e-05, | |
| "loss": 0.6696, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 1.7889147029538666, | |
| "grad_norm": 2.654337167739868, | |
| "learning_rate": 2.018475495076889e-05, | |
| "loss": 0.6406, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 1.790574178559575, | |
| "grad_norm": 3.0971858501434326, | |
| "learning_rate": 2.015709702400708e-05, | |
| "loss": 0.6552, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 1.7922336541652837, | |
| "grad_norm": 2.394855260848999, | |
| "learning_rate": 2.0129439097245272e-05, | |
| "loss": 0.6378, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.7938931297709924, | |
| "grad_norm": 2.530996799468994, | |
| "learning_rate": 2.0101781170483462e-05, | |
| "loss": 0.6484, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 1.795552605376701, | |
| "grad_norm": 3.274632692337036, | |
| "learning_rate": 2.0074123243721653e-05, | |
| "loss": 0.6434, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 1.7972120809824097, | |
| "grad_norm": 2.5829977989196777, | |
| "learning_rate": 2.0046465316959844e-05, | |
| "loss": 0.6431, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 1.7988715565881181, | |
| "grad_norm": 2.7407333850860596, | |
| "learning_rate": 2.0018807390198034e-05, | |
| "loss": 0.659, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 1.8005310321938266, | |
| "grad_norm": 2.6258132457733154, | |
| "learning_rate": 1.999114946343622e-05, | |
| "loss": 0.653, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.8021905077995353, | |
| "grad_norm": 2.77689266204834, | |
| "learning_rate": 1.9963491536674412e-05, | |
| "loss": 0.6604, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 1.803849983405244, | |
| "grad_norm": 2.838128089904785, | |
| "learning_rate": 1.9935833609912603e-05, | |
| "loss": 0.607, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 1.8055094590109526, | |
| "grad_norm": 3.087151050567627, | |
| "learning_rate": 1.9908175683150793e-05, | |
| "loss": 0.6252, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 1.8071689346166613, | |
| "grad_norm": 2.8684113025665283, | |
| "learning_rate": 1.9880517756388984e-05, | |
| "loss": 0.6945, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 1.8088284102223697, | |
| "grad_norm": 3.120990037918091, | |
| "learning_rate": 1.985285982962717e-05, | |
| "loss": 0.684, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.8104878858280782, | |
| "grad_norm": 2.8556265830993652, | |
| "learning_rate": 1.9825201902865362e-05, | |
| "loss": 0.6328, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 1.8121473614337869, | |
| "grad_norm": 2.925619125366211, | |
| "learning_rate": 1.9797543976103552e-05, | |
| "loss": 0.5552, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 1.8138068370394955, | |
| "grad_norm": 2.4405603408813477, | |
| "learning_rate": 1.9769886049341743e-05, | |
| "loss": 0.6689, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 1.8154663126452042, | |
| "grad_norm": 3.013485908508301, | |
| "learning_rate": 1.974222812257993e-05, | |
| "loss": 0.6554, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 1.8171257882509129, | |
| "grad_norm": 2.554626703262329, | |
| "learning_rate": 1.971457019581812e-05, | |
| "loss": 0.667, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.8187852638566213, | |
| "grad_norm": 2.905426263809204, | |
| "learning_rate": 1.968691226905631e-05, | |
| "loss": 0.6502, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 1.8204447394623298, | |
| "grad_norm": 3.87603497505188, | |
| "learning_rate": 1.9659254342294502e-05, | |
| "loss": 0.7144, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 1.8221042150680384, | |
| "grad_norm": 2.709789276123047, | |
| "learning_rate": 1.9631596415532693e-05, | |
| "loss": 0.6134, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 1.823763690673747, | |
| "grad_norm": 2.6182925701141357, | |
| "learning_rate": 1.960393848877088e-05, | |
| "loss": 0.6697, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 1.8254231662794558, | |
| "grad_norm": 2.8797554969787598, | |
| "learning_rate": 1.957628056200907e-05, | |
| "loss": 0.6577, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.8254231662794558, | |
| "eval_gen_len": 45.56867469879518, | |
| "eval_loss": 0.6276843547821045, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1356.1116, | |
| "eval_samples_per_second": 4.888, | |
| "eval_steps_per_second": 0.306, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.8270826418851644, | |
| "grad_norm": 2.888942241668701, | |
| "learning_rate": 1.9548622635247265e-05, | |
| "loss": 0.6432, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 1.828742117490873, | |
| "grad_norm": 2.8916501998901367, | |
| "learning_rate": 1.9520964708485455e-05, | |
| "loss": 0.6114, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 1.8304015930965813, | |
| "grad_norm": 2.345423698425293, | |
| "learning_rate": 1.9493306781723646e-05, | |
| "loss": 0.6203, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 1.83206106870229, | |
| "grad_norm": 2.7087900638580322, | |
| "learning_rate": 1.9465648854961833e-05, | |
| "loss": 0.6254, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 1.8337205443079987, | |
| "grad_norm": 3.901052474975586, | |
| "learning_rate": 1.9437990928200024e-05, | |
| "loss": 0.6279, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.8353800199137074, | |
| "grad_norm": 2.765397787094116, | |
| "learning_rate": 1.9410333001438214e-05, | |
| "loss": 0.6609, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 1.8370394955194158, | |
| "grad_norm": 2.2285208702087402, | |
| "learning_rate": 1.9382675074676405e-05, | |
| "loss": 0.6638, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 1.8386989711251245, | |
| "grad_norm": 2.972564220428467, | |
| "learning_rate": 1.9355017147914592e-05, | |
| "loss": 0.6289, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 1.840358446730833, | |
| "grad_norm": 2.643881320953369, | |
| "learning_rate": 1.9327359221152783e-05, | |
| "loss": 0.6638, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 1.8420179223365416, | |
| "grad_norm": 2.7107179164886475, | |
| "learning_rate": 1.9299701294390973e-05, | |
| "loss": 0.64, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.8436773979422503, | |
| "grad_norm": 2.4541239738464355, | |
| "learning_rate": 1.9272043367629164e-05, | |
| "loss": 0.5943, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 1.845336873547959, | |
| "grad_norm": 2.520796060562134, | |
| "learning_rate": 1.9244385440867355e-05, | |
| "loss": 0.6804, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 1.8469963491536674, | |
| "grad_norm": 2.7786107063293457, | |
| "learning_rate": 1.9216727514105542e-05, | |
| "loss": 0.6414, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 1.848655824759376, | |
| "grad_norm": 3.0973737239837646, | |
| "learning_rate": 1.9189069587343732e-05, | |
| "loss": 0.6855, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 1.8503153003650845, | |
| "grad_norm": 2.5851657390594482, | |
| "learning_rate": 1.9161411660581923e-05, | |
| "loss": 0.5443, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.8519747759707932, | |
| "grad_norm": 2.8966641426086426, | |
| "learning_rate": 1.9133753733820114e-05, | |
| "loss": 0.5777, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 1.8536342515765019, | |
| "grad_norm": 3.042029857635498, | |
| "learning_rate": 1.9106095807058304e-05, | |
| "loss": 0.7043, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 1.8552937271822105, | |
| "grad_norm": 2.6720499992370605, | |
| "learning_rate": 1.907843788029649e-05, | |
| "loss": 0.6793, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 1.856953202787919, | |
| "grad_norm": 2.850257158279419, | |
| "learning_rate": 1.9050779953534682e-05, | |
| "loss": 0.6232, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 1.8586126783936276, | |
| "grad_norm": 2.7536768913269043, | |
| "learning_rate": 1.9023122026772873e-05, | |
| "loss": 0.6604, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.860272153999336, | |
| "grad_norm": 2.6577181816101074, | |
| "learning_rate": 1.8995464100011063e-05, | |
| "loss": 0.5952, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 1.8619316296050448, | |
| "grad_norm": 3.852022171020508, | |
| "learning_rate": 1.8967806173249254e-05, | |
| "loss": 0.6362, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 1.8635911052107534, | |
| "grad_norm": 2.875685453414917, | |
| "learning_rate": 1.8940148246487445e-05, | |
| "loss": 0.7276, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 1.8652505808164621, | |
| "grad_norm": 3.211580991744995, | |
| "learning_rate": 1.8912490319725635e-05, | |
| "loss": 0.6773, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 1.8669100564221706, | |
| "grad_norm": 2.6323704719543457, | |
| "learning_rate": 1.8884832392963826e-05, | |
| "loss": 0.6487, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.8685695320278792, | |
| "grad_norm": 3.9867587089538574, | |
| "learning_rate": 1.8857174466202016e-05, | |
| "loss": 0.6276, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 1.8702290076335877, | |
| "grad_norm": 2.4788997173309326, | |
| "learning_rate": 1.8829516539440204e-05, | |
| "loss": 0.6239, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 1.8718884832392964, | |
| "grad_norm": 2.5648040771484375, | |
| "learning_rate": 1.8801858612678394e-05, | |
| "loss": 0.6247, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 1.873547958845005, | |
| "grad_norm": 2.8344156742095947, | |
| "learning_rate": 1.8774200685916585e-05, | |
| "loss": 0.5787, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 1.8752074344507137, | |
| "grad_norm": 4.752839088439941, | |
| "learning_rate": 1.8746542759154776e-05, | |
| "loss": 0.6402, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.8768669100564221, | |
| "grad_norm": 3.165907382965088, | |
| "learning_rate": 1.8718884832392966e-05, | |
| "loss": 0.655, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 1.8785263856621308, | |
| "grad_norm": 2.813150405883789, | |
| "learning_rate": 1.8691226905631153e-05, | |
| "loss": 0.5664, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 1.8801858612678393, | |
| "grad_norm": 2.6606945991516113, | |
| "learning_rate": 1.8663568978869344e-05, | |
| "loss": 0.6411, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 1.881845336873548, | |
| "grad_norm": 2.7262566089630127, | |
| "learning_rate": 1.8635911052107535e-05, | |
| "loss": 0.6904, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 1.8835048124792566, | |
| "grad_norm": 3.206533908843994, | |
| "learning_rate": 1.8608253125345725e-05, | |
| "loss": 0.6105, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.8851642880849653, | |
| "grad_norm": 3.490770101547241, | |
| "learning_rate": 1.8580595198583913e-05, | |
| "loss": 0.6457, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 1.8868237636906737, | |
| "grad_norm": 2.6691598892211914, | |
| "learning_rate": 1.8552937271822103e-05, | |
| "loss": 0.7151, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 1.8884832392963822, | |
| "grad_norm": 2.681269884109497, | |
| "learning_rate": 1.8525279345060294e-05, | |
| "loss": 0.6186, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 1.8901427149020908, | |
| "grad_norm": 2.527893543243408, | |
| "learning_rate": 1.8497621418298484e-05, | |
| "loss": 0.5586, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 1.8918021905077995, | |
| "grad_norm": 2.677696704864502, | |
| "learning_rate": 1.8469963491536675e-05, | |
| "loss": 0.6233, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.8934616661135082, | |
| "grad_norm": 2.9220569133758545, | |
| "learning_rate": 1.8442305564774866e-05, | |
| "loss": 0.6908, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 1.8951211417192169, | |
| "grad_norm": 2.458573579788208, | |
| "learning_rate": 1.8414647638013056e-05, | |
| "loss": 0.639, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 1.8967806173249253, | |
| "grad_norm": 2.7711801528930664, | |
| "learning_rate": 1.8386989711251247e-05, | |
| "loss": 0.5861, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 1.8984400929306338, | |
| "grad_norm": 2.2654006481170654, | |
| "learning_rate": 1.8359331784489437e-05, | |
| "loss": 0.6429, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 1.9000995685363424, | |
| "grad_norm": 2.199928045272827, | |
| "learning_rate": 1.8331673857727625e-05, | |
| "loss": 0.6192, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.901759044142051, | |
| "grad_norm": 2.565298080444336, | |
| "learning_rate": 1.8304015930965815e-05, | |
| "loss": 0.6517, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 1.9034185197477598, | |
| "grad_norm": 2.959390878677368, | |
| "learning_rate": 1.8276358004204006e-05, | |
| "loss": 0.6197, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 1.9050779953534684, | |
| "grad_norm": 3.2915642261505127, | |
| "learning_rate": 1.8248700077442197e-05, | |
| "loss": 0.5705, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 1.906737470959177, | |
| "grad_norm": 2.0758042335510254, | |
| "learning_rate": 1.8221042150680387e-05, | |
| "loss": 0.6337, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 1.9083969465648853, | |
| "grad_norm": 2.862844467163086, | |
| "learning_rate": 1.8193384223918574e-05, | |
| "loss": 0.6384, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.910056422170594, | |
| "grad_norm": 2.489260673522949, | |
| "learning_rate": 1.8165726297156765e-05, | |
| "loss": 0.6693, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 1.9117158977763027, | |
| "grad_norm": 2.257587194442749, | |
| "learning_rate": 1.8138068370394956e-05, | |
| "loss": 0.5863, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 1.9133753733820114, | |
| "grad_norm": 3.025855779647827, | |
| "learning_rate": 1.8110410443633146e-05, | |
| "loss": 0.6333, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 1.91503484898772, | |
| "grad_norm": 2.7348458766937256, | |
| "learning_rate": 1.8082752516871337e-05, | |
| "loss": 0.6725, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 1.9166943245934285, | |
| "grad_norm": 3.3557896614074707, | |
| "learning_rate": 1.8055094590109524e-05, | |
| "loss": 0.6569, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.918353800199137, | |
| "grad_norm": 3.2904157638549805, | |
| "learning_rate": 1.8027436663347715e-05, | |
| "loss": 0.696, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 1.9200132758048456, | |
| "grad_norm": 2.3019628524780273, | |
| "learning_rate": 1.7999778736585905e-05, | |
| "loss": 0.5247, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 1.9216727514105543, | |
| "grad_norm": 2.766451597213745, | |
| "learning_rate": 1.7972120809824096e-05, | |
| "loss": 0.7265, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 1.923332227016263, | |
| "grad_norm": 2.454838991165161, | |
| "learning_rate": 1.7944462883062287e-05, | |
| "loss": 0.5796, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 1.9249917026219716, | |
| "grad_norm": 2.8631088733673096, | |
| "learning_rate": 1.7916804956300477e-05, | |
| "loss": 0.6788, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.92665117822768, | |
| "grad_norm": 2.889618158340454, | |
| "learning_rate": 1.7889147029538668e-05, | |
| "loss": 0.6684, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 1.9283106538333885, | |
| "grad_norm": 2.790698766708374, | |
| "learning_rate": 1.786148910277686e-05, | |
| "loss": 0.6312, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 1.9299701294390972, | |
| "grad_norm": 2.6731488704681396, | |
| "learning_rate": 1.783383117601505e-05, | |
| "loss": 0.6283, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 1.9316296050448059, | |
| "grad_norm": 2.563717842102051, | |
| "learning_rate": 1.7806173249253236e-05, | |
| "loss": 0.6463, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 1.9332890806505145, | |
| "grad_norm": 2.9437766075134277, | |
| "learning_rate": 1.7778515322491427e-05, | |
| "loss": 0.6631, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.9349485562562232, | |
| "grad_norm": 2.956129312515259, | |
| "learning_rate": 1.7750857395729618e-05, | |
| "loss": 0.5983, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 1.9366080318619316, | |
| "grad_norm": 2.8484396934509277, | |
| "learning_rate": 1.7723199468967808e-05, | |
| "loss": 0.65, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 1.93826750746764, | |
| "grad_norm": 2.8540737628936768, | |
| "learning_rate": 1.7695541542206e-05, | |
| "loss": 0.6743, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 1.9399269830733488, | |
| "grad_norm": 2.9417595863342285, | |
| "learning_rate": 1.7667883615444186e-05, | |
| "loss": 0.6692, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 1.9415864586790574, | |
| "grad_norm": 3.3319525718688965, | |
| "learning_rate": 1.7640225688682377e-05, | |
| "loss": 0.6147, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.943245934284766, | |
| "grad_norm": 2.3488125801086426, | |
| "learning_rate": 1.7612567761920567e-05, | |
| "loss": 0.6646, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 1.9449054098904746, | |
| "grad_norm": 3.0198192596435547, | |
| "learning_rate": 1.7584909835158758e-05, | |
| "loss": 0.6479, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 1.9465648854961832, | |
| "grad_norm": 2.935741662979126, | |
| "learning_rate": 1.7557251908396945e-05, | |
| "loss": 0.6406, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 1.9482243611018917, | |
| "grad_norm": 2.7237465381622314, | |
| "learning_rate": 1.7529593981635136e-05, | |
| "loss": 0.6627, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 1.9498838367076003, | |
| "grad_norm": 3.8201403617858887, | |
| "learning_rate": 1.7501936054873326e-05, | |
| "loss": 0.6603, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.951543312313309, | |
| "grad_norm": 2.511312484741211, | |
| "learning_rate": 1.7474278128111517e-05, | |
| "loss": 0.6557, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 1.9532027879190177, | |
| "grad_norm": 3.6143879890441895, | |
| "learning_rate": 1.7446620201349708e-05, | |
| "loss": 0.6626, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 1.9548622635247261, | |
| "grad_norm": 3.1605286598205566, | |
| "learning_rate": 1.7418962274587898e-05, | |
| "loss": 0.5847, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 1.9565217391304348, | |
| "grad_norm": 3.041008949279785, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.6105, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 1.9581812147361433, | |
| "grad_norm": 2.9670727252960205, | |
| "learning_rate": 1.736364642106428e-05, | |
| "loss": 0.6107, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.959840690341852, | |
| "grad_norm": 3.077299118041992, | |
| "learning_rate": 1.733598849430247e-05, | |
| "loss": 0.6169, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 1.9615001659475606, | |
| "grad_norm": 2.9002206325531006, | |
| "learning_rate": 1.730833056754066e-05, | |
| "loss": 0.6763, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 1.9631596415532693, | |
| "grad_norm": 2.9046597480773926, | |
| "learning_rate": 1.7280672640778848e-05, | |
| "loss": 0.5729, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 1.9648191171589777, | |
| "grad_norm": 3.2700092792510986, | |
| "learning_rate": 1.725301471401704e-05, | |
| "loss": 0.6393, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 1.9664785927646864, | |
| "grad_norm": 2.5844552516937256, | |
| "learning_rate": 1.722535678725523e-05, | |
| "loss": 0.6303, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.9681380683703948, | |
| "grad_norm": 3.339615821838379, | |
| "learning_rate": 1.719769886049342e-05, | |
| "loss": 0.6468, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 1.9697975439761035, | |
| "grad_norm": 3.2334821224212646, | |
| "learning_rate": 1.7170040933731607e-05, | |
| "loss": 0.6765, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 1.9714570195818122, | |
| "grad_norm": 2.5103602409362793, | |
| "learning_rate": 1.7142383006969798e-05, | |
| "loss": 0.7246, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 1.9731164951875209, | |
| "grad_norm": 2.6350810527801514, | |
| "learning_rate": 1.7114725080207988e-05, | |
| "loss": 0.6222, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 1.9747759707932293, | |
| "grad_norm": 3.489544630050659, | |
| "learning_rate": 1.708706715344618e-05, | |
| "loss": 0.6614, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.976435446398938, | |
| "grad_norm": 2.692086935043335, | |
| "learning_rate": 1.705940922668437e-05, | |
| "loss": 0.705, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 1.9780949220046464, | |
| "grad_norm": 3.1969101428985596, | |
| "learning_rate": 1.7031751299922557e-05, | |
| "loss": 0.6276, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 1.979754397610355, | |
| "grad_norm": 2.7400033473968506, | |
| "learning_rate": 1.7004093373160747e-05, | |
| "loss": 0.6217, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 1.9814138732160638, | |
| "grad_norm": 2.7665727138519287, | |
| "learning_rate": 1.6976435446398938e-05, | |
| "loss": 0.6248, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 1.9830733488217724, | |
| "grad_norm": 3.2178454399108887, | |
| "learning_rate": 1.694877751963713e-05, | |
| "loss": 0.65, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 1.984732824427481, | |
| "grad_norm": 2.3739986419677734, | |
| "learning_rate": 1.692111959287532e-05, | |
| "loss": 0.6424, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 1.9863923000331896, | |
| "grad_norm": 3.2119979858398438, | |
| "learning_rate": 1.6893461666113506e-05, | |
| "loss": 0.6361, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 1.988051775638898, | |
| "grad_norm": 3.085068941116333, | |
| "learning_rate": 1.68658037393517e-05, | |
| "loss": 0.6686, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 1.9897112512446067, | |
| "grad_norm": 3.0079755783081055, | |
| "learning_rate": 1.683814581258989e-05, | |
| "loss": 0.6811, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 1.9913707268503154, | |
| "grad_norm": 2.2022628784179688, | |
| "learning_rate": 1.681048788582808e-05, | |
| "loss": 0.6659, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.9913707268503154, | |
| "eval_gen_len": 45.67319277108434, | |
| "eval_loss": 0.6242377758026123, | |
| "eval_model_preparation_time": 0.0137, | |
| "eval_runtime": 1384.1691, | |
| "eval_samples_per_second": 4.788, | |
| "eval_steps_per_second": 0.3, | |
| "step": 12000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 18078, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.641411729138647e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |