Instructions to use deu05232/promptriever-llama2-7B-new_seed42-JointLH-cross_batch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use deu05232/promptriever-llama2-7B-new_seed42-JointLH-cross_batch with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 7644, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0013082155939298796, | |
| "grad_norm": 7.290712591801908, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 5.7851, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0026164311878597592, | |
| "grad_norm": 3.67299584724583, | |
| "learning_rate": 6.505149978319905e-05, | |
| "loss": 3.1259, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003924646781789639, | |
| "grad_norm": 2.4764513799723513, | |
| "learning_rate": 7.385606273598311e-05, | |
| "loss": 1.6786, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0052328623757195184, | |
| "grad_norm": 1.7953600265789333, | |
| "learning_rate": 8.01029995663981e-05, | |
| "loss": 1.2211, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006541077969649398, | |
| "grad_norm": 1.8771167917018612, | |
| "learning_rate": 8.494850021680092e-05, | |
| "loss": 1.2585, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007849293563579277, | |
| "grad_norm": 1.2473543190033471, | |
| "learning_rate": 8.890756251918216e-05, | |
| "loss": 0.9283, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.009157509157509158, | |
| "grad_norm": 2.618558273245568, | |
| "learning_rate": 9.225490200071284e-05, | |
| "loss": 1.1733, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010465724751439037, | |
| "grad_norm": 1.2184582707007738, | |
| "learning_rate": 9.515449934959716e-05, | |
| "loss": 0.9113, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.011773940345368918, | |
| "grad_norm": 1.1266167128317874, | |
| "learning_rate": 9.771212547196623e-05, | |
| "loss": 1.0926, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.013082155939298797, | |
| "grad_norm": 1.2759202461423789, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 0.9088, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.014390371533228676, | |
| "grad_norm": 1.1320718200823117, | |
| "learning_rate": 9.988069989395547e-05, | |
| "loss": 1.0652, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.015698587127158554, | |
| "grad_norm": 1.253166703044972, | |
| "learning_rate": 9.974814422057265e-05, | |
| "loss": 0.8037, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.017006802721088437, | |
| "grad_norm": 0.8736521465598114, | |
| "learning_rate": 9.961558854718983e-05, | |
| "loss": 1.0371, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.018315018315018316, | |
| "grad_norm": 0.9331897722352183, | |
| "learning_rate": 9.9483032873807e-05, | |
| "loss": 0.8399, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.019623233908948195, | |
| "grad_norm": 1.16561958480625, | |
| "learning_rate": 9.935047720042418e-05, | |
| "loss": 1.0586, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.020931449502878074, | |
| "grad_norm": 0.8921867195796972, | |
| "learning_rate": 9.921792152704136e-05, | |
| "loss": 0.7718, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.022239665096807953, | |
| "grad_norm": 0.7140587209081476, | |
| "learning_rate": 9.908536585365854e-05, | |
| "loss": 1.009, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.023547880690737835, | |
| "grad_norm": 0.8309946634200358, | |
| "learning_rate": 9.895281018027573e-05, | |
| "loss": 0.7839, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.024856096284667714, | |
| "grad_norm": 0.7342060793171521, | |
| "learning_rate": 9.88202545068929e-05, | |
| "loss": 0.9935, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.026164311878597593, | |
| "grad_norm": 0.9847538624284081, | |
| "learning_rate": 9.868769883351007e-05, | |
| "loss": 0.7705, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.027472527472527472, | |
| "grad_norm": 0.8944456756402039, | |
| "learning_rate": 9.855514316012726e-05, | |
| "loss": 1.0197, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.02878074306645735, | |
| "grad_norm": 0.7463212583397272, | |
| "learning_rate": 9.842258748674443e-05, | |
| "loss": 0.7562, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.030088958660387233, | |
| "grad_norm": 0.7842188750961813, | |
| "learning_rate": 9.829003181336162e-05, | |
| "loss": 0.9935, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03139717425431711, | |
| "grad_norm": 0.8675590356137016, | |
| "learning_rate": 9.815747613997879e-05, | |
| "loss": 0.7518, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03270538984824699, | |
| "grad_norm": 0.5923018779242124, | |
| "learning_rate": 9.802492046659596e-05, | |
| "loss": 0.9582, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.034013605442176874, | |
| "grad_norm": 0.6928464778679819, | |
| "learning_rate": 9.789236479321315e-05, | |
| "loss": 0.747, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.03532182103610675, | |
| "grad_norm": 0.7113730083361403, | |
| "learning_rate": 9.775980911983034e-05, | |
| "loss": 0.9576, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 0.8679078003827115, | |
| "learning_rate": 9.762725344644751e-05, | |
| "loss": 0.7286, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.03793825222396651, | |
| "grad_norm": 0.600907373361144, | |
| "learning_rate": 9.74946977730647e-05, | |
| "loss": 0.9778, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.03924646781789639, | |
| "grad_norm": 0.8848902266623525, | |
| "learning_rate": 9.736214209968187e-05, | |
| "loss": 0.7405, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04055468341182627, | |
| "grad_norm": 0.9589691469495706, | |
| "learning_rate": 9.722958642629904e-05, | |
| "loss": 0.9579, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04186289900575615, | |
| "grad_norm": 0.7099733591624596, | |
| "learning_rate": 9.709703075291623e-05, | |
| "loss": 0.7529, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04317111459968603, | |
| "grad_norm": 0.6510293809764843, | |
| "learning_rate": 9.696447507953341e-05, | |
| "loss": 0.979, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.044479330193615906, | |
| "grad_norm": 1.1641190214279045, | |
| "learning_rate": 9.683191940615059e-05, | |
| "loss": 0.7132, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.045787545787545784, | |
| "grad_norm": 0.9673000795752291, | |
| "learning_rate": 9.669936373276777e-05, | |
| "loss": 0.9253, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04709576138147567, | |
| "grad_norm": 0.7190353586884389, | |
| "learning_rate": 9.656680805938494e-05, | |
| "loss": 0.726, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.04840397697540555, | |
| "grad_norm": 0.7201601219088483, | |
| "learning_rate": 9.643425238600212e-05, | |
| "loss": 0.9286, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.04971219256933543, | |
| "grad_norm": 0.884374342504821, | |
| "learning_rate": 9.63016967126193e-05, | |
| "loss": 0.7203, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.05102040816326531, | |
| "grad_norm": 0.7049160941807705, | |
| "learning_rate": 9.616914103923649e-05, | |
| "loss": 0.9509, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.052328623757195186, | |
| "grad_norm": 0.9056185856880267, | |
| "learning_rate": 9.603658536585366e-05, | |
| "loss": 0.7115, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.053636839351125065, | |
| "grad_norm": 0.6053280737552994, | |
| "learning_rate": 9.590402969247085e-05, | |
| "loss": 0.949, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.669203867860999, | |
| "learning_rate": 9.577147401908802e-05, | |
| "loss": 0.7155, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.05625327053898482, | |
| "grad_norm": 0.7049507997603442, | |
| "learning_rate": 9.56389183457052e-05, | |
| "loss": 0.9633, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.0575614861329147, | |
| "grad_norm": 0.72413054526148, | |
| "learning_rate": 9.550636267232238e-05, | |
| "loss": 0.6704, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.05886970172684458, | |
| "grad_norm": 0.9434997463945898, | |
| "learning_rate": 9.537380699893957e-05, | |
| "loss": 0.9329, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06017791732077447, | |
| "grad_norm": 0.9425009249090048, | |
| "learning_rate": 9.524125132555674e-05, | |
| "loss": 0.6726, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.061486132914704346, | |
| "grad_norm": 0.529629216676613, | |
| "learning_rate": 9.510869565217391e-05, | |
| "loss": 0.9172, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.06279434850863422, | |
| "grad_norm": 0.694713736738518, | |
| "learning_rate": 9.49761399787911e-05, | |
| "loss": 0.7293, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 0.6230312130051202, | |
| "learning_rate": 9.484358430540827e-05, | |
| "loss": 0.903, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.06541077969649398, | |
| "grad_norm": 0.6370052650163481, | |
| "learning_rate": 9.471102863202546e-05, | |
| "loss": 0.7062, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06671899529042387, | |
| "grad_norm": 0.7394610544442655, | |
| "learning_rate": 9.457847295864264e-05, | |
| "loss": 0.9301, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.06802721088435375, | |
| "grad_norm": 0.5284256055090087, | |
| "learning_rate": 9.444591728525982e-05, | |
| "loss": 0.6504, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.06933542647828363, | |
| "grad_norm": 0.8769802465059457, | |
| "learning_rate": 9.431336161187699e-05, | |
| "loss": 0.9392, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0706436420722135, | |
| "grad_norm": 0.7788506408432221, | |
| "learning_rate": 9.418080593849417e-05, | |
| "loss": 0.7188, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.07195185766614338, | |
| "grad_norm": 0.6258384386257697, | |
| "learning_rate": 9.404825026511135e-05, | |
| "loss": 0.8991, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 0.771128895833298, | |
| "learning_rate": 9.391569459172853e-05, | |
| "loss": 0.6951, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.07456828885400314, | |
| "grad_norm": 0.8070532606977017, | |
| "learning_rate": 9.378313891834572e-05, | |
| "loss": 0.9198, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.07587650444793302, | |
| "grad_norm": 0.7040977734805667, | |
| "learning_rate": 9.365058324496289e-05, | |
| "loss": 0.7285, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.0771847200418629, | |
| "grad_norm": 0.5877418285296147, | |
| "learning_rate": 9.351802757158006e-05, | |
| "loss": 0.8823, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.07849293563579278, | |
| "grad_norm": 0.43162998979605355, | |
| "learning_rate": 9.338547189819725e-05, | |
| "loss": 0.6631, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07980115122972266, | |
| "grad_norm": 0.5655884401483549, | |
| "learning_rate": 9.325291622481442e-05, | |
| "loss": 0.9211, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.08110936682365254, | |
| "grad_norm": 0.6606829717694761, | |
| "learning_rate": 9.312036055143161e-05, | |
| "loss": 0.6835, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.08241758241758242, | |
| "grad_norm": 0.4866281049362381, | |
| "learning_rate": 9.29878048780488e-05, | |
| "loss": 0.8975, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.0837257980115123, | |
| "grad_norm": 1.1102292165370264, | |
| "learning_rate": 9.285524920466597e-05, | |
| "loss": 0.6914, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.08503401360544217, | |
| "grad_norm": 0.5006402275523141, | |
| "learning_rate": 9.272269353128314e-05, | |
| "loss": 0.9399, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.08634222919937205, | |
| "grad_norm": 0.7796348119914489, | |
| "learning_rate": 9.259013785790033e-05, | |
| "loss": 0.7019, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.08765044479330193, | |
| "grad_norm": 0.7168780685110666, | |
| "learning_rate": 9.24575821845175e-05, | |
| "loss": 0.8866, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.08895866038723181, | |
| "grad_norm": 0.8634136089366375, | |
| "learning_rate": 9.232502651113469e-05, | |
| "loss": 0.6761, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.09026687598116169, | |
| "grad_norm": 0.596915980244832, | |
| "learning_rate": 9.219247083775187e-05, | |
| "loss": 0.9005, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.09157509157509157, | |
| "grad_norm": 0.6529475469297664, | |
| "learning_rate": 9.205991516436903e-05, | |
| "loss": 0.6601, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.09288330716902145, | |
| "grad_norm": 0.5753441026258548, | |
| "learning_rate": 9.192735949098622e-05, | |
| "loss": 0.9205, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.09419152276295134, | |
| "grad_norm": 0.7907931238031155, | |
| "learning_rate": 9.17948038176034e-05, | |
| "loss": 0.6816, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.09549973835688122, | |
| "grad_norm": 0.5399094070955297, | |
| "learning_rate": 9.166224814422058e-05, | |
| "loss": 0.9374, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.0968079539508111, | |
| "grad_norm": 0.7177139918496634, | |
| "learning_rate": 9.152969247083776e-05, | |
| "loss": 0.6665, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.09811616954474098, | |
| "grad_norm": 0.5696746776689743, | |
| "learning_rate": 9.139713679745493e-05, | |
| "loss": 0.9129, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09942438513867086, | |
| "grad_norm": 0.638093758359057, | |
| "learning_rate": 9.126458112407211e-05, | |
| "loss": 0.6675, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.10073260073260074, | |
| "grad_norm": 0.6020538420505003, | |
| "learning_rate": 9.11320254506893e-05, | |
| "loss": 0.8901, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.10204081632653061, | |
| "grad_norm": 0.6120466557768905, | |
| "learning_rate": 9.099946977730648e-05, | |
| "loss": 0.6796, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1033490319204605, | |
| "grad_norm": 0.6210967554289628, | |
| "learning_rate": 9.086691410392365e-05, | |
| "loss": 0.8869, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.10465724751439037, | |
| "grad_norm": 0.8091885327796373, | |
| "learning_rate": 9.073435843054084e-05, | |
| "loss": 0.6625, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10596546310832025, | |
| "grad_norm": 0.5779837898387246, | |
| "learning_rate": 9.060180275715801e-05, | |
| "loss": 0.8876, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.10727367870225013, | |
| "grad_norm": 0.7299991372030511, | |
| "learning_rate": 9.046924708377518e-05, | |
| "loss": 0.696, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.10858189429618001, | |
| "grad_norm": 0.6620337610636755, | |
| "learning_rate": 9.033669141039237e-05, | |
| "loss": 0.9008, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.6010699007449223, | |
| "learning_rate": 9.020413573700954e-05, | |
| "loss": 0.6712, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.11119832548403977, | |
| "grad_norm": 0.5427558713650651, | |
| "learning_rate": 9.007158006362673e-05, | |
| "loss": 0.8753, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.11250654107796965, | |
| "grad_norm": 0.6742371579613256, | |
| "learning_rate": 8.993902439024391e-05, | |
| "loss": 0.6737, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.11381475667189953, | |
| "grad_norm": 0.7058859567453811, | |
| "learning_rate": 8.980646871686109e-05, | |
| "loss": 0.8956, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.1151229722658294, | |
| "grad_norm": 0.7684505962300139, | |
| "learning_rate": 8.967391304347826e-05, | |
| "loss": 0.6851, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.11643118785975928, | |
| "grad_norm": 0.5221308693647347, | |
| "learning_rate": 8.954135737009545e-05, | |
| "loss": 0.8774, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.11773940345368916, | |
| "grad_norm": 0.6747737190536728, | |
| "learning_rate": 8.940880169671262e-05, | |
| "loss": 0.6816, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11904761904761904, | |
| "grad_norm": 0.46024093702145724, | |
| "learning_rate": 8.92762460233298e-05, | |
| "loss": 0.8785, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.12035583464154893, | |
| "grad_norm": 0.5572757908727249, | |
| "learning_rate": 8.914369034994699e-05, | |
| "loss": 0.6749, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.12166405023547881, | |
| "grad_norm": 0.5755409211612259, | |
| "learning_rate": 8.901113467656415e-05, | |
| "loss": 0.8996, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.12297226582940869, | |
| "grad_norm": 0.44743009853908355, | |
| "learning_rate": 8.887857900318134e-05, | |
| "loss": 0.6812, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.12428048142333857, | |
| "grad_norm": 0.5356391814605695, | |
| "learning_rate": 8.874602332979852e-05, | |
| "loss": 0.8962, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.12558869701726844, | |
| "grad_norm": 0.9142027438043182, | |
| "learning_rate": 8.86134676564157e-05, | |
| "loss": 0.6923, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.12689691261119831, | |
| "grad_norm": 0.5787045277844781, | |
| "learning_rate": 8.848091198303288e-05, | |
| "loss": 0.8958, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.7928771640327954, | |
| "learning_rate": 8.834835630965005e-05, | |
| "loss": 0.6926, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.12951334379905807, | |
| "grad_norm": 0.6614005586526391, | |
| "learning_rate": 8.821580063626723e-05, | |
| "loss": 0.8844, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.13082155939298795, | |
| "grad_norm": 0.49917342117601304, | |
| "learning_rate": 8.808324496288441e-05, | |
| "loss": 0.6839, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13212977498691783, | |
| "grad_norm": 0.6870942412562521, | |
| "learning_rate": 8.79506892895016e-05, | |
| "loss": 0.937, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.13343799058084774, | |
| "grad_norm": 0.48623360323222364, | |
| "learning_rate": 8.781813361611877e-05, | |
| "loss": 0.6424, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.13474620617477762, | |
| "grad_norm": 0.522869801801631, | |
| "learning_rate": 8.768557794273596e-05, | |
| "loss": 0.9139, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1360544217687075, | |
| "grad_norm": 0.8763988251800717, | |
| "learning_rate": 8.755302226935313e-05, | |
| "loss": 0.6746, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.13736263736263737, | |
| "grad_norm": 0.7101195150803217, | |
| "learning_rate": 8.74204665959703e-05, | |
| "loss": 0.8867, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.13867085295656725, | |
| "grad_norm": 0.6262246747509773, | |
| "learning_rate": 8.728791092258749e-05, | |
| "loss": 0.6833, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.13997906855049713, | |
| "grad_norm": 0.5315808280206341, | |
| "learning_rate": 8.715535524920468e-05, | |
| "loss": 0.9028, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.141287284144427, | |
| "grad_norm": 0.5625387450672273, | |
| "learning_rate": 8.702279957582185e-05, | |
| "loss": 0.6623, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1425954997383569, | |
| "grad_norm": 0.6106698449027703, | |
| "learning_rate": 8.689024390243903e-05, | |
| "loss": 0.9054, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.14390371533228677, | |
| "grad_norm": 0.785333814713217, | |
| "learning_rate": 8.67576882290562e-05, | |
| "loss": 0.6621, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.14521193092621665, | |
| "grad_norm": 0.5341457759006656, | |
| "learning_rate": 8.662513255567338e-05, | |
| "loss": 0.8977, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 0.4836909763150667, | |
| "learning_rate": 8.649257688229057e-05, | |
| "loss": 0.643, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.1478283621140764, | |
| "grad_norm": 0.6214661415425415, | |
| "learning_rate": 8.636002120890775e-05, | |
| "loss": 0.8619, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.14913657770800628, | |
| "grad_norm": 0.44754299062781455, | |
| "learning_rate": 8.622746553552492e-05, | |
| "loss": 0.6545, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.15044479330193616, | |
| "grad_norm": 0.654698416147961, | |
| "learning_rate": 8.609490986214211e-05, | |
| "loss": 0.8687, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.15175300889586604, | |
| "grad_norm": 0.7796186456327326, | |
| "learning_rate": 8.596235418875928e-05, | |
| "loss": 0.6516, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.15306122448979592, | |
| "grad_norm": 0.5619622188020412, | |
| "learning_rate": 8.582979851537646e-05, | |
| "loss": 0.9019, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.1543694400837258, | |
| "grad_norm": 0.589108674850363, | |
| "learning_rate": 8.569724284199364e-05, | |
| "loss": 0.6514, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.15567765567765568, | |
| "grad_norm": 0.9533885730603633, | |
| "learning_rate": 8.556468716861083e-05, | |
| "loss": 0.8537, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.15698587127158556, | |
| "grad_norm": 0.5479885592424896, | |
| "learning_rate": 8.5432131495228e-05, | |
| "loss": 0.6615, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.15829408686551544, | |
| "grad_norm": 0.6153890373628342, | |
| "learning_rate": 8.529957582184517e-05, | |
| "loss": 0.8631, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.15960230245944532, | |
| "grad_norm": 0.7917177530306803, | |
| "learning_rate": 8.516702014846236e-05, | |
| "loss": 0.6616, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.1609105180533752, | |
| "grad_norm": 0.656469547745639, | |
| "learning_rate": 8.503446447507953e-05, | |
| "loss": 0.8929, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.16221873364730507, | |
| "grad_norm": 0.5569826880804676, | |
| "learning_rate": 8.490190880169672e-05, | |
| "loss": 0.6477, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.16352694924123495, | |
| "grad_norm": 0.6145021073052068, | |
| "learning_rate": 8.47693531283139e-05, | |
| "loss": 0.8835, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.6285934467054461, | |
| "learning_rate": 8.463679745493108e-05, | |
| "loss": 0.6549, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1661433804290947, | |
| "grad_norm": 0.4969597278981036, | |
| "learning_rate": 8.450424178154825e-05, | |
| "loss": 0.8846, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.1674515960230246, | |
| "grad_norm": 0.6062848536416026, | |
| "learning_rate": 8.437168610816544e-05, | |
| "loss": 0.6269, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.16875981161695447, | |
| "grad_norm": 0.5265730484032111, | |
| "learning_rate": 8.423913043478261e-05, | |
| "loss": 0.8592, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.17006802721088435, | |
| "grad_norm": 0.5811110234076874, | |
| "learning_rate": 8.41065747613998e-05, | |
| "loss": 0.6544, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.17137624280481423, | |
| "grad_norm": 0.5639694934132476, | |
| "learning_rate": 8.397401908801698e-05, | |
| "loss": 0.8732, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.1726844583987441, | |
| "grad_norm": 0.7531411828367692, | |
| "learning_rate": 8.384146341463415e-05, | |
| "loss": 0.6683, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.17399267399267399, | |
| "grad_norm": 0.5146605068810605, | |
| "learning_rate": 8.370890774125133e-05, | |
| "loss": 0.8911, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.17530088958660386, | |
| "grad_norm": 0.5881044587524927, | |
| "learning_rate": 8.357635206786851e-05, | |
| "loss": 0.6957, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.17660910518053374, | |
| "grad_norm": 0.6108606112713066, | |
| "learning_rate": 8.344379639448568e-05, | |
| "loss": 0.864, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.17791732077446362, | |
| "grad_norm": 0.6838348363870184, | |
| "learning_rate": 8.331124072110287e-05, | |
| "loss": 0.6382, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.1792255363683935, | |
| "grad_norm": 0.5844899134885503, | |
| "learning_rate": 8.317868504772006e-05, | |
| "loss": 0.895, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.18053375196232338, | |
| "grad_norm": 0.40337593852276243, | |
| "learning_rate": 8.304612937433723e-05, | |
| "loss": 0.649, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.18184196755625326, | |
| "grad_norm": 0.49730732309328707, | |
| "learning_rate": 8.29135737009544e-05, | |
| "loss": 0.8442, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 0.5590991907664666, | |
| "learning_rate": 8.278101802757159e-05, | |
| "loss": 0.6445, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18445839874411302, | |
| "grad_norm": 0.6588503605001691, | |
| "learning_rate": 8.264846235418876e-05, | |
| "loss": 0.864, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.1857666143380429, | |
| "grad_norm": 0.6197416228060506, | |
| "learning_rate": 8.251590668080595e-05, | |
| "loss": 0.6094, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.1870748299319728, | |
| "grad_norm": 0.5482300336211388, | |
| "learning_rate": 8.238335100742312e-05, | |
| "loss": 0.8619, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.18838304552590268, | |
| "grad_norm": 0.72709681776675, | |
| "learning_rate": 8.225079533404029e-05, | |
| "loss": 0.6456, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.18969126111983256, | |
| "grad_norm": 0.5600056919125233, | |
| "learning_rate": 8.211823966065748e-05, | |
| "loss": 0.86, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.19099947671376244, | |
| "grad_norm": 0.7289799556624317, | |
| "learning_rate": 8.198568398727466e-05, | |
| "loss": 0.6271, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.4590489049870012, | |
| "learning_rate": 8.185312831389184e-05, | |
| "loss": 0.8897, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.1936159079016222, | |
| "grad_norm": 0.8036211881560831, | |
| "learning_rate": 8.172057264050902e-05, | |
| "loss": 0.6434, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.19492412349555208, | |
| "grad_norm": 0.49768694148703807, | |
| "learning_rate": 8.15880169671262e-05, | |
| "loss": 0.8417, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.19623233908948196, | |
| "grad_norm": 0.771940987579212, | |
| "learning_rate": 8.145546129374337e-05, | |
| "loss": 0.626, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19754055468341183, | |
| "grad_norm": 0.5487861196155561, | |
| "learning_rate": 8.132290562036055e-05, | |
| "loss": 0.863, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.1988487702773417, | |
| "grad_norm": 0.5767745735777565, | |
| "learning_rate": 8.119034994697774e-05, | |
| "loss": 0.6327, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.2001569858712716, | |
| "grad_norm": 0.5740160328527427, | |
| "learning_rate": 8.105779427359491e-05, | |
| "loss": 0.8293, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.20146520146520147, | |
| "grad_norm": 0.6248485702307536, | |
| "learning_rate": 8.09252386002121e-05, | |
| "loss": 0.6525, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.20277341705913135, | |
| "grad_norm": 0.5959014412308178, | |
| "learning_rate": 8.079268292682927e-05, | |
| "loss": 0.8793, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 0.5523669620436882, | |
| "learning_rate": 8.066012725344644e-05, | |
| "loss": 0.6549, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.2053898482469911, | |
| "grad_norm": 0.6456441196706465, | |
| "learning_rate": 8.052757158006363e-05, | |
| "loss": 0.8688, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.206698063840921, | |
| "grad_norm": 0.5659881168480183, | |
| "learning_rate": 8.039501590668082e-05, | |
| "loss": 0.6565, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.20800627943485087, | |
| "grad_norm": 0.5616175050073812, | |
| "learning_rate": 8.026246023329799e-05, | |
| "loss": 0.8418, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.20931449502878074, | |
| "grad_norm": 0.6028672819947086, | |
| "learning_rate": 8.012990455991518e-05, | |
| "loss": 0.6231, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.21062271062271062, | |
| "grad_norm": 0.546703449007772, | |
| "learning_rate": 7.999734888653235e-05, | |
| "loss": 0.8631, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2119309262166405, | |
| "grad_norm": 0.3849996641772154, | |
| "learning_rate": 7.986479321314952e-05, | |
| "loss": 0.6246, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.21323914181057038, | |
| "grad_norm": 0.4435598672137561, | |
| "learning_rate": 7.973223753976671e-05, | |
| "loss": 0.886, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.21454735740450026, | |
| "grad_norm": 0.6111533721181235, | |
| "learning_rate": 7.95996818663839e-05, | |
| "loss": 0.6494, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.21585557299843014, | |
| "grad_norm": 0.5729934681943539, | |
| "learning_rate": 7.946712619300107e-05, | |
| "loss": 0.8618, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.21716378859236002, | |
| "grad_norm": 0.6355561878934224, | |
| "learning_rate": 7.933457051961824e-05, | |
| "loss": 0.6303, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.2184720041862899, | |
| "grad_norm": 0.4669924162265557, | |
| "learning_rate": 7.920201484623541e-05, | |
| "loss": 0.8654, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.6815024166926259, | |
| "learning_rate": 7.90694591728526e-05, | |
| "loss": 0.6299, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.22108843537414966, | |
| "grad_norm": 0.5542650967967168, | |
| "learning_rate": 7.893690349946978e-05, | |
| "loss": 0.8661, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.22239665096807953, | |
| "grad_norm": 0.5596504228795459, | |
| "learning_rate": 7.880434782608696e-05, | |
| "loss": 0.6462, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2237048665620094, | |
| "grad_norm": 0.6616295642250447, | |
| "learning_rate": 7.867179215270414e-05, | |
| "loss": 0.8638, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.2250130821559393, | |
| "grad_norm": 0.42204578619386185, | |
| "learning_rate": 7.853923647932132e-05, | |
| "loss": 0.6493, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.22632129774986917, | |
| "grad_norm": 0.6146957520525497, | |
| "learning_rate": 7.840668080593849e-05, | |
| "loss": 0.8621, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.22762951334379905, | |
| "grad_norm": 0.595798638618577, | |
| "learning_rate": 7.827412513255567e-05, | |
| "loss": 0.623, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.22893772893772893, | |
| "grad_norm": 0.624548132547215, | |
| "learning_rate": 7.814156945917286e-05, | |
| "loss": 0.8582, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2302459445316588, | |
| "grad_norm": 0.6305314992926004, | |
| "learning_rate": 7.800901378579003e-05, | |
| "loss": 0.6336, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2315541601255887, | |
| "grad_norm": 0.7759096340841495, | |
| "learning_rate": 7.787645811240722e-05, | |
| "loss": 0.8387, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.23286237571951857, | |
| "grad_norm": 0.5604953018516682, | |
| "learning_rate": 7.774390243902439e-05, | |
| "loss": 0.6283, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.23417059131344845, | |
| "grad_norm": 0.7099271936900229, | |
| "learning_rate": 7.761134676564156e-05, | |
| "loss": 0.8628, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.23547880690737832, | |
| "grad_norm": 0.474479805099593, | |
| "learning_rate": 7.747879109225875e-05, | |
| "loss": 0.6259, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.2367870225013082, | |
| "grad_norm": 0.5416834886960876, | |
| "learning_rate": 7.734623541887594e-05, | |
| "loss": 0.8567, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 0.5379729696776889, | |
| "learning_rate": 7.721367974549311e-05, | |
| "loss": 0.6096, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.239403453689168, | |
| "grad_norm": 0.6138823092852339, | |
| "learning_rate": 7.70811240721103e-05, | |
| "loss": 0.8454, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.24071166928309787, | |
| "grad_norm": 0.7163967325753281, | |
| "learning_rate": 7.694856839872747e-05, | |
| "loss": 0.6751, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.24201988487702775, | |
| "grad_norm": 0.7446327434350999, | |
| "learning_rate": 7.681601272534464e-05, | |
| "loss": 0.8933, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.24332810047095763, | |
| "grad_norm": 0.674203813956785, | |
| "learning_rate": 7.668345705196183e-05, | |
| "loss": 0.6436, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2446363160648875, | |
| "grad_norm": 0.5298303453138061, | |
| "learning_rate": 7.655090137857901e-05, | |
| "loss": 0.8383, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.24594453165881738, | |
| "grad_norm": 0.46309042220240854, | |
| "learning_rate": 7.641834570519619e-05, | |
| "loss": 0.624, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.24725274725274726, | |
| "grad_norm": 0.527675840331917, | |
| "learning_rate": 7.628579003181336e-05, | |
| "loss": 0.8423, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.24856096284667714, | |
| "grad_norm": 0.6656931192324065, | |
| "learning_rate": 7.615323435843054e-05, | |
| "loss": 0.66, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.24986917844060702, | |
| "grad_norm": 0.5865447145073008, | |
| "learning_rate": 7.602067868504772e-05, | |
| "loss": 0.8659, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.25117739403453687, | |
| "grad_norm": 0.5349083325210562, | |
| "learning_rate": 7.58881230116649e-05, | |
| "loss": 0.6439, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.2524856096284668, | |
| "grad_norm": 0.5557018540060792, | |
| "learning_rate": 7.575556733828209e-05, | |
| "loss": 0.846, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.25379382522239663, | |
| "grad_norm": 0.5858709719889754, | |
| "learning_rate": 7.562301166489926e-05, | |
| "loss": 0.6473, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.25510204081632654, | |
| "grad_norm": 0.5461126801060399, | |
| "learning_rate": 7.549045599151643e-05, | |
| "loss": 0.874, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.6905067688723441, | |
| "learning_rate": 7.535790031813362e-05, | |
| "loss": 0.6091, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.2577184720041863, | |
| "grad_norm": 0.7656408539875517, | |
| "learning_rate": 7.522534464475079e-05, | |
| "loss": 0.8679, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.25902668759811615, | |
| "grad_norm": 0.7233211006267372, | |
| "learning_rate": 7.509278897136798e-05, | |
| "loss": 0.6749, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.26033490319204605, | |
| "grad_norm": 0.5989700856780242, | |
| "learning_rate": 7.496023329798517e-05, | |
| "loss": 0.833, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.2616431187859759, | |
| "grad_norm": 0.5435529702312377, | |
| "learning_rate": 7.482767762460234e-05, | |
| "loss": 0.658, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2629513343799058, | |
| "grad_norm": 0.5335997393071716, | |
| "learning_rate": 7.469512195121951e-05, | |
| "loss": 0.8399, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.26425954997383566, | |
| "grad_norm": 0.9150436835320093, | |
| "learning_rate": 7.45625662778367e-05, | |
| "loss": 0.6114, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.26556776556776557, | |
| "grad_norm": 0.5384709854955332, | |
| "learning_rate": 7.443001060445387e-05, | |
| "loss": 0.8413, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.2668759811616955, | |
| "grad_norm": 0.8677435387982771, | |
| "learning_rate": 7.429745493107106e-05, | |
| "loss": 0.6475, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.2681841967556253, | |
| "grad_norm": 0.5123690892694776, | |
| "learning_rate": 7.416489925768824e-05, | |
| "loss": 0.849, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.26949241234955523, | |
| "grad_norm": 0.5800543468099533, | |
| "learning_rate": 7.403234358430541e-05, | |
| "loss": 0.6127, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.2708006279434851, | |
| "grad_norm": 0.8015793490826957, | |
| "learning_rate": 7.389978791092259e-05, | |
| "loss": 0.8682, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.272108843537415, | |
| "grad_norm": 0.5876567022243202, | |
| "learning_rate": 7.376723223753977e-05, | |
| "loss": 0.6164, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.27341705913134484, | |
| "grad_norm": 0.5542805867826196, | |
| "learning_rate": 7.363467656415695e-05, | |
| "loss": 0.8808, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.5170888187943017, | |
| "learning_rate": 7.350212089077413e-05, | |
| "loss": 0.6375, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2760334903192046, | |
| "grad_norm": 0.622686247940597, | |
| "learning_rate": 7.336956521739132e-05, | |
| "loss": 0.8605, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.2773417059131345, | |
| "grad_norm": 0.5587125235931543, | |
| "learning_rate": 7.323700954400848e-05, | |
| "loss": 0.6237, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.27864992150706436, | |
| "grad_norm": 0.6015065825515082, | |
| "learning_rate": 7.310445387062566e-05, | |
| "loss": 0.8276, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.27995813710099426, | |
| "grad_norm": 0.5122297183116, | |
| "learning_rate": 7.297189819724285e-05, | |
| "loss": 0.6289, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.2812663526949241, | |
| "grad_norm": 0.5663980177757836, | |
| "learning_rate": 7.283934252386002e-05, | |
| "loss": 0.861, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.282574568288854, | |
| "grad_norm": 0.7939853395114802, | |
| "learning_rate": 7.270678685047721e-05, | |
| "loss": 0.6551, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.2838827838827839, | |
| "grad_norm": 0.5287178325117134, | |
| "learning_rate": 7.257423117709438e-05, | |
| "loss": 0.8747, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.2851909994767138, | |
| "grad_norm": 0.562616836311441, | |
| "learning_rate": 7.244167550371155e-05, | |
| "loss": 0.6357, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.28649921507064363, | |
| "grad_norm": 0.5117823698698972, | |
| "learning_rate": 7.230911983032874e-05, | |
| "loss": 0.8377, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.28780743066457354, | |
| "grad_norm": 0.6453579912049506, | |
| "learning_rate": 7.217656415694593e-05, | |
| "loss": 0.6476, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2891156462585034, | |
| "grad_norm": 0.8731364069825441, | |
| "learning_rate": 7.20440084835631e-05, | |
| "loss": 0.8773, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2904238618524333, | |
| "grad_norm": 0.6010315625749808, | |
| "learning_rate": 7.191145281018028e-05, | |
| "loss": 0.6083, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.29173207744636315, | |
| "grad_norm": 0.5201002760771769, | |
| "learning_rate": 7.177889713679746e-05, | |
| "loss": 0.8496, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 0.6001877343124563, | |
| "learning_rate": 7.164634146341463e-05, | |
| "loss": 0.6389, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.2943485086342229, | |
| "grad_norm": 0.4371089670254897, | |
| "learning_rate": 7.151378579003182e-05, | |
| "loss": 0.8706, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2956567242281528, | |
| "grad_norm": 0.5778087926720157, | |
| "learning_rate": 7.1381230116649e-05, | |
| "loss": 0.5939, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.29696493982208266, | |
| "grad_norm": 0.5241829955540505, | |
| "learning_rate": 7.124867444326617e-05, | |
| "loss": 0.8461, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.29827315541601257, | |
| "grad_norm": 0.5396461429281756, | |
| "learning_rate": 7.111611876988336e-05, | |
| "loss": 0.645, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.2995813710099424, | |
| "grad_norm": 0.7287884001398448, | |
| "learning_rate": 7.098356309650053e-05, | |
| "loss": 0.8349, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3008895866038723, | |
| "grad_norm": 0.43985568295051974, | |
| "learning_rate": 7.08510074231177e-05, | |
| "loss": 0.6023, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3021978021978022, | |
| "grad_norm": 0.512789679014569, | |
| "learning_rate": 7.071845174973489e-05, | |
| "loss": 0.8592, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3035060177917321, | |
| "grad_norm": 0.5758747078766807, | |
| "learning_rate": 7.058589607635208e-05, | |
| "loss": 0.6302, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.30481423338566194, | |
| "grad_norm": 0.605959590741427, | |
| "learning_rate": 7.045334040296925e-05, | |
| "loss": 0.8437, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.30612244897959184, | |
| "grad_norm": 0.5064367889657759, | |
| "learning_rate": 7.032078472958644e-05, | |
| "loss": 0.6228, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3074306645735217, | |
| "grad_norm": 0.4968490853247213, | |
| "learning_rate": 7.018822905620361e-05, | |
| "loss": 0.8556, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3087388801674516, | |
| "grad_norm": 0.5772837050103059, | |
| "learning_rate": 7.005567338282078e-05, | |
| "loss": 0.6234, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.31004709576138145, | |
| "grad_norm": 0.6941306253378574, | |
| "learning_rate": 6.992311770943797e-05, | |
| "loss": 0.8362, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.31135531135531136, | |
| "grad_norm": 0.39822917719900375, | |
| "learning_rate": 6.979056203605516e-05, | |
| "loss": 0.6162, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.3126635269492412, | |
| "grad_norm": 0.4661807662020318, | |
| "learning_rate": 6.965800636267233e-05, | |
| "loss": 0.8414, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3139717425431711, | |
| "grad_norm": 0.45291786679307866, | |
| "learning_rate": 6.95254506892895e-05, | |
| "loss": 0.6433, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.31527995813710097, | |
| "grad_norm": 0.6300442868544786, | |
| "learning_rate": 6.939289501590669e-05, | |
| "loss": 0.8564, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3165881737310309, | |
| "grad_norm": 0.5144882452050621, | |
| "learning_rate": 6.926033934252386e-05, | |
| "loss": 0.6018, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.3178963893249607, | |
| "grad_norm": 0.5886230291498049, | |
| "learning_rate": 6.912778366914105e-05, | |
| "loss": 0.8861, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.31920460491889063, | |
| "grad_norm": 0.6836197118690605, | |
| "learning_rate": 6.899522799575823e-05, | |
| "loss": 0.6267, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 0.4951991615110591, | |
| "learning_rate": 6.88626723223754e-05, | |
| "loss": 0.8387, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3218210361067504, | |
| "grad_norm": 0.6412063103894238, | |
| "learning_rate": 6.873011664899258e-05, | |
| "loss": 0.5927, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3231292517006803, | |
| "grad_norm": 0.5782953097169173, | |
| "learning_rate": 6.859756097560976e-05, | |
| "loss": 0.8418, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.32443746729461015, | |
| "grad_norm": 0.5200166820109174, | |
| "learning_rate": 6.846500530222694e-05, | |
| "loss": 0.6324, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.32574568288854006, | |
| "grad_norm": 0.4922169436686345, | |
| "learning_rate": 6.833244962884412e-05, | |
| "loss": 0.8298, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.3270538984824699, | |
| "grad_norm": 0.4912804994081646, | |
| "learning_rate": 6.819989395546131e-05, | |
| "loss": 0.6123, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3283621140763998, | |
| "grad_norm": 0.5502723124138919, | |
| "learning_rate": 6.806733828207848e-05, | |
| "loss": 0.8522, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.5875399056209475, | |
| "learning_rate": 6.793478260869565e-05, | |
| "loss": 0.6144, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.33097854526425957, | |
| "grad_norm": 0.5609437523704139, | |
| "learning_rate": 6.780222693531283e-05, | |
| "loss": 0.8347, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.3322867608581894, | |
| "grad_norm": 0.338386078229902, | |
| "learning_rate": 6.766967126193001e-05, | |
| "loss": 0.587, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.33359497645211933, | |
| "grad_norm": 0.48756512910329014, | |
| "learning_rate": 6.75371155885472e-05, | |
| "loss": 0.8327, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3349031920460492, | |
| "grad_norm": 0.5143776858886028, | |
| "learning_rate": 6.740455991516437e-05, | |
| "loss": 0.6092, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.3362114076399791, | |
| "grad_norm": 0.5332261671554006, | |
| "learning_rate": 6.727200424178154e-05, | |
| "loss": 0.8486, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.33751962323390894, | |
| "grad_norm": 0.6594969470024875, | |
| "learning_rate": 6.713944856839873e-05, | |
| "loss": 0.6256, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.33882783882783885, | |
| "grad_norm": 0.4553927815832327, | |
| "learning_rate": 6.70068928950159e-05, | |
| "loss": 0.8357, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.3401360544217687, | |
| "grad_norm": 1.1222853735995233, | |
| "learning_rate": 6.687433722163309e-05, | |
| "loss": 0.6007, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3414442700156986, | |
| "grad_norm": 0.528439009952554, | |
| "learning_rate": 6.674178154825027e-05, | |
| "loss": 0.856, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.34275248560962845, | |
| "grad_norm": 0.5933307919214319, | |
| "learning_rate": 6.660922587486745e-05, | |
| "loss": 0.6421, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.34406070120355836, | |
| "grad_norm": 0.6607714283195972, | |
| "learning_rate": 6.647667020148462e-05, | |
| "loss": 0.8567, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.3453689167974882, | |
| "grad_norm": 0.7007503714365677, | |
| "learning_rate": 6.63441145281018e-05, | |
| "loss": 0.6207, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.3466771323914181, | |
| "grad_norm": 0.5135207638483642, | |
| "learning_rate": 6.621155885471898e-05, | |
| "loss": 0.8288, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.34798534798534797, | |
| "grad_norm": 0.6140077846492299, | |
| "learning_rate": 6.607900318133616e-05, | |
| "loss": 0.6015, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.3492935635792779, | |
| "grad_norm": 0.5100090518445459, | |
| "learning_rate": 6.594644750795335e-05, | |
| "loss": 0.8348, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.35060177917320773, | |
| "grad_norm": 0.5113632712765585, | |
| "learning_rate": 6.581389183457052e-05, | |
| "loss": 0.5924, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.35190999476713763, | |
| "grad_norm": 0.5039720207488532, | |
| "learning_rate": 6.56813361611877e-05, | |
| "loss": 0.8184, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.3532182103610675, | |
| "grad_norm": 0.49806965997978137, | |
| "learning_rate": 6.554878048780488e-05, | |
| "loss": 0.6046, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3545264259549974, | |
| "grad_norm": 0.5365120734516775, | |
| "learning_rate": 6.541622481442205e-05, | |
| "loss": 0.8514, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.35583464154892724, | |
| "grad_norm": 0.4007231339986853, | |
| "learning_rate": 6.528366914103924e-05, | |
| "loss": 0.5871, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 0.46254442147732966, | |
| "learning_rate": 6.515111346765643e-05, | |
| "loss": 0.8411, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.358451072736787, | |
| "grad_norm": 0.6576167821737804, | |
| "learning_rate": 6.50185577942736e-05, | |
| "loss": 0.6293, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.3597592883307169, | |
| "grad_norm": 0.5967952480358752, | |
| "learning_rate": 6.488600212089077e-05, | |
| "loss": 0.8511, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.36106750392464676, | |
| "grad_norm": 0.43508005533871247, | |
| "learning_rate": 6.475344644750796e-05, | |
| "loss": 0.621, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.36237571951857667, | |
| "grad_norm": 0.48108090717243124, | |
| "learning_rate": 6.462089077412513e-05, | |
| "loss": 0.8263, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.3636839351125065, | |
| "grad_norm": 0.568823321053749, | |
| "learning_rate": 6.448833510074232e-05, | |
| "loss": 0.5941, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.3649921507064364, | |
| "grad_norm": 0.5222642833529956, | |
| "learning_rate": 6.43557794273595e-05, | |
| "loss": 0.8525, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 0.5919546377210694, | |
| "learning_rate": 6.422322375397666e-05, | |
| "loss": 0.5901, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3676085818942962, | |
| "grad_norm": 0.46817017239205494, | |
| "learning_rate": 6.409066808059385e-05, | |
| "loss": 0.8678, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.36891679748822603, | |
| "grad_norm": 0.5530202729949584, | |
| "learning_rate": 6.395811240721103e-05, | |
| "loss": 0.6401, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.37022501308215594, | |
| "grad_norm": 0.6007081120859754, | |
| "learning_rate": 6.382555673382821e-05, | |
| "loss": 0.8291, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.3715332286760858, | |
| "grad_norm": 0.39683924232406637, | |
| "learning_rate": 6.36930010604454e-05, | |
| "loss": 0.6127, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.3728414442700157, | |
| "grad_norm": 0.5452479168330785, | |
| "learning_rate": 6.356044538706257e-05, | |
| "loss": 0.8495, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.3741496598639456, | |
| "grad_norm": 0.8980580036344767, | |
| "learning_rate": 6.342788971367974e-05, | |
| "loss": 0.6471, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.37545787545787546, | |
| "grad_norm": 0.48885709974761143, | |
| "learning_rate": 6.329533404029692e-05, | |
| "loss": 0.8626, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.37676609105180536, | |
| "grad_norm": 0.5684144901718525, | |
| "learning_rate": 6.316277836691411e-05, | |
| "loss": 0.5869, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3780743066457352, | |
| "grad_norm": 0.5836928349425793, | |
| "learning_rate": 6.303022269353128e-05, | |
| "loss": 0.8205, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.3793825222396651, | |
| "grad_norm": 0.5698475129755676, | |
| "learning_rate": 6.289766702014847e-05, | |
| "loss": 0.5985, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.38069073783359497, | |
| "grad_norm": 0.5257222435975014, | |
| "learning_rate": 6.276511134676564e-05, | |
| "loss": 0.8572, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.3819989534275249, | |
| "grad_norm": 0.524065233271633, | |
| "learning_rate": 6.263255567338282e-05, | |
| "loss": 0.5917, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.38330716902145473, | |
| "grad_norm": 0.5635081193305911, | |
| "learning_rate": 6.25e-05, | |
| "loss": 0.8333, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.7155336191069814, | |
| "learning_rate": 6.236744432661719e-05, | |
| "loss": 0.6211, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.3859236002093145, | |
| "grad_norm": 0.5087934741189605, | |
| "learning_rate": 6.223488865323436e-05, | |
| "loss": 0.8531, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3872318158032444, | |
| "grad_norm": 0.5635575417178986, | |
| "learning_rate": 6.210233297985155e-05, | |
| "loss": 0.6134, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.38854003139717425, | |
| "grad_norm": 0.5136035425363329, | |
| "learning_rate": 6.196977730646872e-05, | |
| "loss": 0.8142, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.38984824699110415, | |
| "grad_norm": 0.6741458077201186, | |
| "learning_rate": 6.183722163308589e-05, | |
| "loss": 0.6388, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.391156462585034, | |
| "grad_norm": 0.49349088755882425, | |
| "learning_rate": 6.170466595970308e-05, | |
| "loss": 0.8764, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.3924646781789639, | |
| "grad_norm": 0.5383608654756946, | |
| "learning_rate": 6.157211028632026e-05, | |
| "loss": 0.6271, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.39377289377289376, | |
| "grad_norm": 0.5194331296491325, | |
| "learning_rate": 6.143955461293744e-05, | |
| "loss": 0.8087, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.39508110936682367, | |
| "grad_norm": 1.1199076383208129, | |
| "learning_rate": 6.130699893955462e-05, | |
| "loss": 0.5948, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.3963893249607535, | |
| "grad_norm": 0.5119305052018042, | |
| "learning_rate": 6.11744432661718e-05, | |
| "loss": 0.7975, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.3976975405546834, | |
| "grad_norm": 0.6365196412735817, | |
| "learning_rate": 6.104188759278897e-05, | |
| "loss": 0.614, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.3990057561486133, | |
| "grad_norm": 0.4736074626159983, | |
| "learning_rate": 6.0909331919406154e-05, | |
| "loss": 0.8416, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.4003139717425432, | |
| "grad_norm": 0.8101929076329764, | |
| "learning_rate": 6.077677624602334e-05, | |
| "loss": 0.6173, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.40162218733647304, | |
| "grad_norm": 0.6168972276400505, | |
| "learning_rate": 6.0644220572640506e-05, | |
| "loss": 0.8407, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 0.7038053233260652, | |
| "learning_rate": 6.051166489925769e-05, | |
| "loss": 0.6247, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.4042386185243328, | |
| "grad_norm": 0.457467448213452, | |
| "learning_rate": 6.037910922587487e-05, | |
| "loss": 0.8322, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.4055468341182627, | |
| "grad_norm": 0.49357888449301257, | |
| "learning_rate": 6.0246553552492044e-05, | |
| "loss": 0.5908, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.40685504971219255, | |
| "grad_norm": 0.5808455738349774, | |
| "learning_rate": 6.011399787910923e-05, | |
| "loss": 0.8208, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 0.7022945988432154, | |
| "learning_rate": 5.998144220572641e-05, | |
| "loss": 0.6518, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.4094714809000523, | |
| "grad_norm": 0.6078075113609221, | |
| "learning_rate": 5.984888653234358e-05, | |
| "loss": 0.8636, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.4107796964939822, | |
| "grad_norm": 0.6199267742106505, | |
| "learning_rate": 5.971633085896077e-05, | |
| "loss": 0.5869, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.41208791208791207, | |
| "grad_norm": 0.6063216826492921, | |
| "learning_rate": 5.958377518557795e-05, | |
| "loss": 0.8341, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.413396127681842, | |
| "grad_norm": 0.3985769574080392, | |
| "learning_rate": 5.945121951219512e-05, | |
| "loss": 0.6207, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.4147043432757718, | |
| "grad_norm": 0.5360752659857189, | |
| "learning_rate": 5.931866383881231e-05, | |
| "loss": 0.8621, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.41601255886970173, | |
| "grad_norm": 0.7664757845756865, | |
| "learning_rate": 5.9186108165429486e-05, | |
| "loss": 0.6172, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.4173207744636316, | |
| "grad_norm": 0.5679551885466096, | |
| "learning_rate": 5.905355249204666e-05, | |
| "loss": 0.8324, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.4186289900575615, | |
| "grad_norm": 0.5745648833281791, | |
| "learning_rate": 5.8920996818663845e-05, | |
| "loss": 0.5986, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.41993720565149134, | |
| "grad_norm": 0.5395040305019896, | |
| "learning_rate": 5.8788441145281024e-05, | |
| "loss": 0.8278, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.42124542124542125, | |
| "grad_norm": 0.5945122594045797, | |
| "learning_rate": 5.86558854718982e-05, | |
| "loss": 0.6128, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.4225536368393511, | |
| "grad_norm": 0.4545401837270316, | |
| "learning_rate": 5.852332979851538e-05, | |
| "loss": 0.8388, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.423861852433281, | |
| "grad_norm": 0.5949653779281652, | |
| "learning_rate": 5.839077412513256e-05, | |
| "loss": 0.6167, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.42517006802721086, | |
| "grad_norm": 0.5067052935095019, | |
| "learning_rate": 5.8258218451749735e-05, | |
| "loss": 0.8489, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.42647828362114076, | |
| "grad_norm": 0.5191225717265471, | |
| "learning_rate": 5.812566277836692e-05, | |
| "loss": 0.6059, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.42778649921507067, | |
| "grad_norm": 0.4969767064651156, | |
| "learning_rate": 5.79931071049841e-05, | |
| "loss": 0.8627, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.4290947148090005, | |
| "grad_norm": 0.5785476329537975, | |
| "learning_rate": 5.786055143160127e-05, | |
| "loss": 0.622, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.43040293040293043, | |
| "grad_norm": 0.5598487117314234, | |
| "learning_rate": 5.772799575821846e-05, | |
| "loss": 0.807, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.4317111459968603, | |
| "grad_norm": 0.5427120253597403, | |
| "learning_rate": 5.759544008483564e-05, | |
| "loss": 0.6085, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4330193615907902, | |
| "grad_norm": 0.5672042251019751, | |
| "learning_rate": 5.746288441145281e-05, | |
| "loss": 0.822, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.43432757718472004, | |
| "grad_norm": 0.2894379338449793, | |
| "learning_rate": 5.733032873806999e-05, | |
| "loss": 0.5945, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.43563579277864994, | |
| "grad_norm": 0.677558336208755, | |
| "learning_rate": 5.719777306468718e-05, | |
| "loss": 0.8413, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.4369440083725798, | |
| "grad_norm": 0.723454845173246, | |
| "learning_rate": 5.706521739130435e-05, | |
| "loss": 0.6216, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.4382522239665097, | |
| "grad_norm": 0.4482745953779932, | |
| "learning_rate": 5.693266171792153e-05, | |
| "loss": 0.8383, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.5812694343372896, | |
| "learning_rate": 5.68001060445387e-05, | |
| "loss": 0.6193, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.44086865515436946, | |
| "grad_norm": 0.6007437160420993, | |
| "learning_rate": 5.666755037115589e-05, | |
| "loss": 0.8203, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.4421768707482993, | |
| "grad_norm": 0.6580604949252575, | |
| "learning_rate": 5.653499469777307e-05, | |
| "loss": 0.6043, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.4434850863422292, | |
| "grad_norm": 0.6046064236672993, | |
| "learning_rate": 5.640243902439024e-05, | |
| "loss": 0.8204, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.44479330193615907, | |
| "grad_norm": 0.47437375038207613, | |
| "learning_rate": 5.6269883351007426e-05, | |
| "loss": 0.6205, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.446101517530089, | |
| "grad_norm": 0.4644396135020371, | |
| "learning_rate": 5.6137327677624605e-05, | |
| "loss": 0.8304, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.4474097331240188, | |
| "grad_norm": 0.6080652395243531, | |
| "learning_rate": 5.600477200424178e-05, | |
| "loss": 0.5971, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 0.4969712368034865, | |
| "learning_rate": 5.5872216330858964e-05, | |
| "loss": 0.824, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.4500261643118786, | |
| "grad_norm": 0.7769636104632821, | |
| "learning_rate": 5.5739660657476144e-05, | |
| "loss": 0.6071, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.4513343799058085, | |
| "grad_norm": 0.5343464867649641, | |
| "learning_rate": 5.5607104984093316e-05, | |
| "loss": 0.8439, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.45264259549973834, | |
| "grad_norm": 0.4670043204824149, | |
| "learning_rate": 5.54745493107105e-05, | |
| "loss": 0.5924, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.45395081109366825, | |
| "grad_norm": 0.5128069264337731, | |
| "learning_rate": 5.534199363732768e-05, | |
| "loss": 0.8318, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.4552590266875981, | |
| "grad_norm": 0.5401925527479321, | |
| "learning_rate": 5.5209437963944854e-05, | |
| "loss": 0.5857, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.456567242281528, | |
| "grad_norm": 0.5099962477533319, | |
| "learning_rate": 5.507688229056204e-05, | |
| "loss": 0.8202, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.45787545787545786, | |
| "grad_norm": 0.5003587460481527, | |
| "learning_rate": 5.494432661717922e-05, | |
| "loss": 0.58, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.45918367346938777, | |
| "grad_norm": 0.5134647272370393, | |
| "learning_rate": 5.481177094379639e-05, | |
| "loss": 0.8078, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.4604918890633176, | |
| "grad_norm": 0.5469814680290468, | |
| "learning_rate": 5.467921527041357e-05, | |
| "loss": 0.6337, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.4618001046572475, | |
| "grad_norm": 0.592648552852463, | |
| "learning_rate": 5.454665959703076e-05, | |
| "loss": 0.8411, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.4631083202511774, | |
| "grad_norm": 0.5181504434630264, | |
| "learning_rate": 5.441410392364793e-05, | |
| "loss": 0.6083, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.4644165358451073, | |
| "grad_norm": 0.5499418553622303, | |
| "learning_rate": 5.428154825026511e-05, | |
| "loss": 0.849, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.46572475143903713, | |
| "grad_norm": 0.6574654424945503, | |
| "learning_rate": 5.4148992576882296e-05, | |
| "loss": 0.608, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.46703296703296704, | |
| "grad_norm": 0.6187475128909983, | |
| "learning_rate": 5.401643690349947e-05, | |
| "loss": 0.8159, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.4683411826268969, | |
| "grad_norm": 0.534345893303697, | |
| "learning_rate": 5.388388123011665e-05, | |
| "loss": 0.6117, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.4696493982208268, | |
| "grad_norm": 0.4564109249983669, | |
| "learning_rate": 5.3751325556733834e-05, | |
| "loss": 0.8049, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.47095761381475665, | |
| "grad_norm": 0.5578958162524437, | |
| "learning_rate": 5.361876988335101e-05, | |
| "loss": 0.6157, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.47226582940868655, | |
| "grad_norm": 0.500181472769301, | |
| "learning_rate": 5.3486214209968186e-05, | |
| "loss": 0.8502, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.4735740450026164, | |
| "grad_norm": 0.6141834045693259, | |
| "learning_rate": 5.335365853658537e-05, | |
| "loss": 0.6031, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.4748822605965463, | |
| "grad_norm": 0.4961433938028621, | |
| "learning_rate": 5.3221102863202545e-05, | |
| "loss": 0.8375, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.520091439534862, | |
| "learning_rate": 5.3088547189819725e-05, | |
| "loss": 0.5814, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.47749869178440607, | |
| "grad_norm": 0.6206247789922092, | |
| "learning_rate": 5.295599151643691e-05, | |
| "loss": 0.8411, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.478806907378336, | |
| "grad_norm": 0.5343995283355165, | |
| "learning_rate": 5.282343584305408e-05, | |
| "loss": 0.5814, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.48011512297226583, | |
| "grad_norm": 0.6876308319219042, | |
| "learning_rate": 5.269088016967126e-05, | |
| "loss": 0.8112, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.48142333856619574, | |
| "grad_norm": 0.8088277743851824, | |
| "learning_rate": 5.255832449628845e-05, | |
| "loss": 0.5852, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.4827315541601256, | |
| "grad_norm": 0.5226035766833458, | |
| "learning_rate": 5.242576882290562e-05, | |
| "loss": 0.8115, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.4840397697540555, | |
| "grad_norm": 0.711443652596892, | |
| "learning_rate": 5.22932131495228e-05, | |
| "loss": 0.6331, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.48534798534798534, | |
| "grad_norm": 0.44695965081239714, | |
| "learning_rate": 5.216065747613999e-05, | |
| "loss": 0.8312, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.48665620094191525, | |
| "grad_norm": 0.43172836208765664, | |
| "learning_rate": 5.202810180275716e-05, | |
| "loss": 0.6138, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.4879644165358451, | |
| "grad_norm": 0.48852761265154354, | |
| "learning_rate": 5.189554612937434e-05, | |
| "loss": 0.8535, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.489272632129775, | |
| "grad_norm": 0.98510305137075, | |
| "learning_rate": 5.1762990455991525e-05, | |
| "loss": 0.6202, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.49058084772370486, | |
| "grad_norm": 0.48915174823336055, | |
| "learning_rate": 5.163043478260869e-05, | |
| "loss": 0.8519, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.49188906331763477, | |
| "grad_norm": 0.5904577427313913, | |
| "learning_rate": 5.149787910922588e-05, | |
| "loss": 0.6166, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.4931972789115646, | |
| "grad_norm": 0.5128070606528945, | |
| "learning_rate": 5.1365323435843063e-05, | |
| "loss": 0.8471, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.5725171611100351, | |
| "learning_rate": 5.123276776246023e-05, | |
| "loss": 0.6287, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.4958137100994244, | |
| "grad_norm": 0.539320741587729, | |
| "learning_rate": 5.1100212089077415e-05, | |
| "loss": 0.824, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.4971219256933543, | |
| "grad_norm": 0.4028412015043289, | |
| "learning_rate": 5.0967656415694595e-05, | |
| "loss": 0.5806, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.49843014128728413, | |
| "grad_norm": 0.5389621790607799, | |
| "learning_rate": 5.083510074231177e-05, | |
| "loss": 0.8308, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.49973835688121404, | |
| "grad_norm": 0.6011860291653554, | |
| "learning_rate": 5.0702545068928954e-05, | |
| "loss": 0.6003, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.501046572475144, | |
| "grad_norm": 0.5389080519884727, | |
| "learning_rate": 5.056998939554613e-05, | |
| "loss": 0.8276, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.5023547880690737, | |
| "grad_norm": 0.6088518124424828, | |
| "learning_rate": 5.0437433722163306e-05, | |
| "loss": 0.6195, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.5036630036630036, | |
| "grad_norm": 0.6515108951670922, | |
| "learning_rate": 5.030487804878049e-05, | |
| "loss": 0.8051, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.5049712192569336, | |
| "grad_norm": 0.4313425003620371, | |
| "learning_rate": 5.017232237539767e-05, | |
| "loss": 0.6071, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.5062794348508635, | |
| "grad_norm": 0.5264707278305082, | |
| "learning_rate": 5.0039766702014844e-05, | |
| "loss": 0.8364, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.5075876504447933, | |
| "grad_norm": 0.7048581680415387, | |
| "learning_rate": 4.990721102863203e-05, | |
| "loss": 0.6049, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.5088958660387232, | |
| "grad_norm": 0.5055653926619902, | |
| "learning_rate": 4.97746553552492e-05, | |
| "loss": 0.8285, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "grad_norm": 0.5437489221882642, | |
| "learning_rate": 4.964209968186639e-05, | |
| "loss": 0.5851, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.511512297226583, | |
| "grad_norm": 0.5503188602842589, | |
| "learning_rate": 4.950954400848357e-05, | |
| "loss": 0.8251, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.5699382425924778, | |
| "learning_rate": 4.937698833510074e-05, | |
| "loss": 0.6065, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.5141287284144427, | |
| "grad_norm": 0.5601404210013031, | |
| "learning_rate": 4.924443266171793e-05, | |
| "loss": 0.8332, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.5154369440083726, | |
| "grad_norm": 0.6107160876812969, | |
| "learning_rate": 4.9111876988335106e-05, | |
| "loss": 0.6407, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.5167451596023025, | |
| "grad_norm": 0.5627086115947509, | |
| "learning_rate": 4.897932131495228e-05, | |
| "loss": 0.8524, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.5180533751962323, | |
| "grad_norm": 0.47974680429027505, | |
| "learning_rate": 4.8846765641569465e-05, | |
| "loss": 0.5891, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.5193615907901622, | |
| "grad_norm": 0.6078144910339062, | |
| "learning_rate": 4.8714209968186645e-05, | |
| "loss": 0.8207, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.5206698063840921, | |
| "grad_norm": 0.6828182233693575, | |
| "learning_rate": 4.858165429480382e-05, | |
| "loss": 0.6234, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.521978021978022, | |
| "grad_norm": 0.5078577809608877, | |
| "learning_rate": 4.8449098621421e-05, | |
| "loss": 0.8293, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.5232862375719518, | |
| "grad_norm": 0.501493452942641, | |
| "learning_rate": 4.8316542948038176e-05, | |
| "loss": 0.5924, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5245944531658817, | |
| "grad_norm": 0.5914460419911263, | |
| "learning_rate": 4.8183987274655355e-05, | |
| "loss": 0.8204, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.5259026687598116, | |
| "grad_norm": 0.5108518700290716, | |
| "learning_rate": 4.8051431601272535e-05, | |
| "loss": 0.5938, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.5272108843537415, | |
| "grad_norm": 0.5931433550467333, | |
| "learning_rate": 4.7918875927889714e-05, | |
| "loss": 0.8405, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.5285190999476713, | |
| "grad_norm": 0.6193186586877497, | |
| "learning_rate": 4.7786320254506894e-05, | |
| "loss": 0.5813, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.5298273155416012, | |
| "grad_norm": 0.5380628082146289, | |
| "learning_rate": 4.765376458112407e-05, | |
| "loss": 0.8669, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5311355311355311, | |
| "grad_norm": 0.5763128333912579, | |
| "learning_rate": 4.752120890774125e-05, | |
| "loss": 0.6076, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.532443746729461, | |
| "grad_norm": 0.5145862547581488, | |
| "learning_rate": 4.738865323435843e-05, | |
| "loss": 0.8424, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.533751962323391, | |
| "grad_norm": 0.5664958788683205, | |
| "learning_rate": 4.725609756097561e-05, | |
| "loss": 0.6293, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.5350601779173207, | |
| "grad_norm": 0.5292223052367038, | |
| "learning_rate": 4.712354188759279e-05, | |
| "loss": 0.8173, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.5363683935112507, | |
| "grad_norm": 0.58673619277203, | |
| "learning_rate": 4.699098621420997e-05, | |
| "loss": 0.5763, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5376766091051806, | |
| "grad_norm": 0.5594174176545202, | |
| "learning_rate": 4.685843054082715e-05, | |
| "loss": 0.8028, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.5389848246991105, | |
| "grad_norm": 0.6339559869110518, | |
| "learning_rate": 4.672587486744433e-05, | |
| "loss": 0.6053, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.5402930402930403, | |
| "grad_norm": 0.671405553815309, | |
| "learning_rate": 4.659331919406151e-05, | |
| "loss": 0.8398, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.5416012558869702, | |
| "grad_norm": 0.5445580424726075, | |
| "learning_rate": 4.646076352067869e-05, | |
| "loss": 0.621, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.5429094714809001, | |
| "grad_norm": 0.5938642782111705, | |
| "learning_rate": 4.632820784729587e-05, | |
| "loss": 0.8295, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.54421768707483, | |
| "grad_norm": 0.6435152678972964, | |
| "learning_rate": 4.6195652173913046e-05, | |
| "loss": 0.618, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.5455259026687598, | |
| "grad_norm": 0.5089883798718602, | |
| "learning_rate": 4.6063096500530226e-05, | |
| "loss": 0.8135, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.5468341182626897, | |
| "grad_norm": 0.5714660790898384, | |
| "learning_rate": 4.5930540827147405e-05, | |
| "loss": 0.622, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.5481423338566196, | |
| "grad_norm": 0.5423779003965784, | |
| "learning_rate": 4.5797985153764584e-05, | |
| "loss": 0.8011, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.5305546579845897, | |
| "learning_rate": 4.5665429480381764e-05, | |
| "loss": 0.5792, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5507587650444793, | |
| "grad_norm": 0.5057565979180018, | |
| "learning_rate": 4.553287380699894e-05, | |
| "loss": 0.8326, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.5520669806384092, | |
| "grad_norm": 0.5108055603767078, | |
| "learning_rate": 4.540031813361612e-05, | |
| "loss": 0.6014, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.5533751962323391, | |
| "grad_norm": 0.5358448167728354, | |
| "learning_rate": 4.5267762460233295e-05, | |
| "loss": 0.8152, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.554683411826269, | |
| "grad_norm": 0.7719427855271279, | |
| "learning_rate": 4.513520678685048e-05, | |
| "loss": 0.5967, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.5559916274201988, | |
| "grad_norm": 0.6513813945282019, | |
| "learning_rate": 4.500265111346766e-05, | |
| "loss": 0.8133, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.5572998430141287, | |
| "grad_norm": 0.611360841284405, | |
| "learning_rate": 4.487009544008483e-05, | |
| "loss": 0.6109, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.5586080586080586, | |
| "grad_norm": 0.5122207195541725, | |
| "learning_rate": 4.473753976670202e-05, | |
| "loss": 0.8237, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.5599162742019885, | |
| "grad_norm": 0.47024973003128884, | |
| "learning_rate": 4.46049840933192e-05, | |
| "loss": 0.6308, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.5612244897959183, | |
| "grad_norm": 0.5605094944984179, | |
| "learning_rate": 4.447242841993637e-05, | |
| "loss": 0.8664, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.5625327053898482, | |
| "grad_norm": 0.6555414150243584, | |
| "learning_rate": 4.433987274655356e-05, | |
| "loss": 0.6108, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.5638409209837781, | |
| "grad_norm": 0.4937828454850159, | |
| "learning_rate": 4.420731707317074e-05, | |
| "loss": 0.8288, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.565149136577708, | |
| "grad_norm": 0.5949648171644456, | |
| "learning_rate": 4.407476139978791e-05, | |
| "loss": 0.5755, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.5664573521716378, | |
| "grad_norm": 0.6150493489271065, | |
| "learning_rate": 4.3942205726405096e-05, | |
| "loss": 0.8562, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.5677655677655677, | |
| "grad_norm": 0.5168321021587649, | |
| "learning_rate": 4.3809650053022275e-05, | |
| "loss": 0.5859, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.5690737833594977, | |
| "grad_norm": 0.5149672746596766, | |
| "learning_rate": 4.367709437963945e-05, | |
| "loss": 0.8035, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.5703819989534276, | |
| "grad_norm": 0.6254032996036644, | |
| "learning_rate": 4.3544538706256634e-05, | |
| "loss": 0.6081, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.5716902145473574, | |
| "grad_norm": 0.5439410227795257, | |
| "learning_rate": 4.341198303287381e-05, | |
| "loss": 0.8486, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.5729984301412873, | |
| "grad_norm": 0.6989019848301199, | |
| "learning_rate": 4.3279427359490986e-05, | |
| "loss": 0.5977, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.5743066457352172, | |
| "grad_norm": 0.5253254730813908, | |
| "learning_rate": 4.314687168610817e-05, | |
| "loss": 0.8166, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.5756148613291471, | |
| "grad_norm": 0.6306757564007922, | |
| "learning_rate": 4.3014316012725345e-05, | |
| "loss": 0.6037, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.457131073575816, | |
| "learning_rate": 4.2881760339342524e-05, | |
| "loss": 0.8058, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.5782312925170068, | |
| "grad_norm": 0.6651032212615784, | |
| "learning_rate": 4.274920466595971e-05, | |
| "loss": 0.577, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.5795395081109367, | |
| "grad_norm": 0.5052314086043747, | |
| "learning_rate": 4.261664899257688e-05, | |
| "loss": 0.8133, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.5808477237048666, | |
| "grad_norm": 0.6345651754805625, | |
| "learning_rate": 4.248409331919406e-05, | |
| "loss": 0.5862, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.5821559392987964, | |
| "grad_norm": 0.5470901361744871, | |
| "learning_rate": 4.235153764581124e-05, | |
| "loss": 0.82, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.5834641548927263, | |
| "grad_norm": 0.6148840090122306, | |
| "learning_rate": 4.221898197242842e-05, | |
| "loss": 0.5963, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.5847723704866562, | |
| "grad_norm": 0.5654198812120623, | |
| "learning_rate": 4.20864262990456e-05, | |
| "loss": 0.8098, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 0.8759288696982239, | |
| "learning_rate": 4.195387062566278e-05, | |
| "loss": 0.6222, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.587388801674516, | |
| "grad_norm": 0.479216838198247, | |
| "learning_rate": 4.182131495227996e-05, | |
| "loss": 0.8338, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.5886970172684458, | |
| "grad_norm": 0.7698717669406883, | |
| "learning_rate": 4.168875927889714e-05, | |
| "loss": 0.6278, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5900052328623757, | |
| "grad_norm": 0.48674948133579515, | |
| "learning_rate": 4.155620360551432e-05, | |
| "loss": 0.7887, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.5913134484563056, | |
| "grad_norm": 0.5786002565311555, | |
| "learning_rate": 4.14236479321315e-05, | |
| "loss": 0.5891, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.5926216640502355, | |
| "grad_norm": 0.5370485226818142, | |
| "learning_rate": 4.129109225874868e-05, | |
| "loss": 0.8276, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.5939298796441653, | |
| "grad_norm": 0.43625907655336543, | |
| "learning_rate": 4.1158536585365856e-05, | |
| "loss": 0.5856, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.5952380952380952, | |
| "grad_norm": 0.5266268760596537, | |
| "learning_rate": 4.1025980911983036e-05, | |
| "loss": 0.7964, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5965463108320251, | |
| "grad_norm": 0.5771002724293784, | |
| "learning_rate": 4.0893425238600215e-05, | |
| "loss": 0.5854, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.597854526425955, | |
| "grad_norm": 0.5919954827087538, | |
| "learning_rate": 4.076086956521739e-05, | |
| "loss": 0.8034, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.5991627420198848, | |
| "grad_norm": 0.5680838313723001, | |
| "learning_rate": 4.0628313891834574e-05, | |
| "loss": 0.5926, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.6004709576138147, | |
| "grad_norm": 0.6715456913252982, | |
| "learning_rate": 4.049575821845175e-05, | |
| "loss": 0.8389, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.6017791732077447, | |
| "grad_norm": 0.5499015722260664, | |
| "learning_rate": 4.0363202545068926e-05, | |
| "loss": 0.6022, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.6030873888016746, | |
| "grad_norm": 0.7560936310398005, | |
| "learning_rate": 4.023064687168611e-05, | |
| "loss": 0.8014, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.5816663839566236, | |
| "learning_rate": 4.009809119830329e-05, | |
| "loss": 0.6037, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.6057038199895343, | |
| "grad_norm": 0.4954065888150648, | |
| "learning_rate": 3.9965535524920464e-05, | |
| "loss": 0.8123, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.6070120355834642, | |
| "grad_norm": 0.6634722345039996, | |
| "learning_rate": 3.983297985153765e-05, | |
| "loss": 0.6093, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.6083202511773941, | |
| "grad_norm": 0.5448340011567387, | |
| "learning_rate": 3.970042417815483e-05, | |
| "loss": 0.8391, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.6096284667713239, | |
| "grad_norm": 0.7234298679122729, | |
| "learning_rate": 3.9567868504772e-05, | |
| "loss": 0.6123, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.6109366823652538, | |
| "grad_norm": 0.7113516275751068, | |
| "learning_rate": 3.943531283138919e-05, | |
| "loss": 0.8188, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 0.5343485382635467, | |
| "learning_rate": 3.930275715800637e-05, | |
| "loss": 0.6025, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.6135531135531136, | |
| "grad_norm": 0.574688006845476, | |
| "learning_rate": 3.917020148462354e-05, | |
| "loss": 0.8244, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.6148613291470434, | |
| "grad_norm": 0.5954437693210181, | |
| "learning_rate": 3.9037645811240727e-05, | |
| "loss": 0.5976, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.6161695447409733, | |
| "grad_norm": 0.6359916685979564, | |
| "learning_rate": 3.89050901378579e-05, | |
| "loss": 0.804, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.6174777603349032, | |
| "grad_norm": 0.37864818768935987, | |
| "learning_rate": 3.877253446447508e-05, | |
| "loss": 0.5819, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.6187859759288331, | |
| "grad_norm": 0.6317553124676056, | |
| "learning_rate": 3.8639978791092265e-05, | |
| "loss": 0.8275, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.6200941915227629, | |
| "grad_norm": 0.37899782241778535, | |
| "learning_rate": 3.850742311770944e-05, | |
| "loss": 0.5939, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.6214024071166928, | |
| "grad_norm": 0.5096002461412386, | |
| "learning_rate": 3.837486744432662e-05, | |
| "loss": 0.8152, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 0.5858186125506725, | |
| "learning_rate": 3.82423117709438e-05, | |
| "loss": 0.5971, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.6240188383045526, | |
| "grad_norm": 0.5258202103911215, | |
| "learning_rate": 3.8109756097560976e-05, | |
| "loss": 0.8146, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.6253270538984824, | |
| "grad_norm": 0.59652515926618, | |
| "learning_rate": 3.7977200424178155e-05, | |
| "loss": 0.6034, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.6266352694924123, | |
| "grad_norm": 0.692241221647863, | |
| "learning_rate": 3.784464475079534e-05, | |
| "loss": 0.817, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.6279434850863422, | |
| "grad_norm": 0.5751964849585177, | |
| "learning_rate": 3.7712089077412514e-05, | |
| "loss": 0.5948, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6292517006802721, | |
| "grad_norm": 0.4867932739502536, | |
| "learning_rate": 3.757953340402969e-05, | |
| "loss": 0.7895, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.6305599162742019, | |
| "grad_norm": 0.6527891870524758, | |
| "learning_rate": 3.744697773064688e-05, | |
| "loss": 0.5869, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.6318681318681318, | |
| "grad_norm": 0.542994791536692, | |
| "learning_rate": 3.731442205726405e-05, | |
| "loss": 0.8016, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.6331763474620618, | |
| "grad_norm": 0.49091636854896203, | |
| "learning_rate": 3.718186638388123e-05, | |
| "loss": 0.591, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.6344845630559917, | |
| "grad_norm": 0.4808333120684155, | |
| "learning_rate": 3.704931071049841e-05, | |
| "loss": 0.8374, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.6357927786499215, | |
| "grad_norm": 0.609783391275745, | |
| "learning_rate": 3.691675503711559e-05, | |
| "loss": 0.5727, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.6371009942438514, | |
| "grad_norm": 0.4901032831365679, | |
| "learning_rate": 3.678419936373277e-05, | |
| "loss": 0.8326, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.6384092098377813, | |
| "grad_norm": 0.5535614493168636, | |
| "learning_rate": 3.665164369034995e-05, | |
| "loss": 0.587, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.6397174254317112, | |
| "grad_norm": 0.5661579063549987, | |
| "learning_rate": 3.651908801696713e-05, | |
| "loss": 0.8006, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.5755165016330849, | |
| "learning_rate": 3.638653234358431e-05, | |
| "loss": 0.5548, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6423338566195709, | |
| "grad_norm": 0.5195028269557372, | |
| "learning_rate": 3.625397667020149e-05, | |
| "loss": 0.8411, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.6436420722135008, | |
| "grad_norm": 0.595652162086749, | |
| "learning_rate": 3.6121420996818666e-05, | |
| "loss": 0.5966, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.6449502878074307, | |
| "grad_norm": 0.5243990905265108, | |
| "learning_rate": 3.5988865323435846e-05, | |
| "loss": 0.8298, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.6462585034013606, | |
| "grad_norm": 0.7940136105161147, | |
| "learning_rate": 3.585630965005302e-05, | |
| "loss": 0.6309, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.6475667189952904, | |
| "grad_norm": 0.4909267588948854, | |
| "learning_rate": 3.5723753976670205e-05, | |
| "loss": 0.7967, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.6488749345892203, | |
| "grad_norm": 0.6021461164458334, | |
| "learning_rate": 3.5591198303287384e-05, | |
| "loss": 0.5946, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.6501831501831502, | |
| "grad_norm": 0.5082946197773784, | |
| "learning_rate": 3.545864262990456e-05, | |
| "loss": 0.7959, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.6514913657770801, | |
| "grad_norm": 0.6887923783738013, | |
| "learning_rate": 3.532608695652174e-05, | |
| "loss": 0.6178, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.6527995813710099, | |
| "grad_norm": 0.4863608457352857, | |
| "learning_rate": 3.519353128313892e-05, | |
| "loss": 0.8218, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.6541077969649398, | |
| "grad_norm": 0.6370508452362942, | |
| "learning_rate": 3.5060975609756095e-05, | |
| "loss": 0.5726, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6554160125588697, | |
| "grad_norm": 0.5626726082078685, | |
| "learning_rate": 3.492841993637328e-05, | |
| "loss": 0.8388, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.6567242281527996, | |
| "grad_norm": 0.6058799396796493, | |
| "learning_rate": 3.479586426299046e-05, | |
| "loss": 0.6345, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.6580324437467294, | |
| "grad_norm": 0.5486653470890362, | |
| "learning_rate": 3.466330858960763e-05, | |
| "loss": 0.8304, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.7402614360383322, | |
| "learning_rate": 3.453075291622482e-05, | |
| "loss": 0.5972, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.6606488749345892, | |
| "grad_norm": 0.5065684112904657, | |
| "learning_rate": 3.4398197242842e-05, | |
| "loss": 0.8049, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.6619570905285191, | |
| "grad_norm": 0.4487165281422432, | |
| "learning_rate": 3.426564156945917e-05, | |
| "loss": 0.5813, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.6632653061224489, | |
| "grad_norm": 0.5576418936555791, | |
| "learning_rate": 3.413308589607636e-05, | |
| "loss": 0.85, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.6645735217163788, | |
| "grad_norm": 0.5629552965222027, | |
| "learning_rate": 3.400053022269353e-05, | |
| "loss": 0.6144, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.6658817373103088, | |
| "grad_norm": 0.5340051545534295, | |
| "learning_rate": 3.386797454931071e-05, | |
| "loss": 0.8256, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.6671899529042387, | |
| "grad_norm": 0.6481929473760202, | |
| "learning_rate": 3.3735418875927896e-05, | |
| "loss": 0.5934, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6684981684981685, | |
| "grad_norm": 0.586068083609296, | |
| "learning_rate": 3.360286320254507e-05, | |
| "loss": 0.8065, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.6698063840920984, | |
| "grad_norm": 0.5898674035419238, | |
| "learning_rate": 3.347030752916225e-05, | |
| "loss": 0.6332, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.6711145996860283, | |
| "grad_norm": 0.5271996892541019, | |
| "learning_rate": 3.3337751855779434e-05, | |
| "loss": 0.8407, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.6724228152799582, | |
| "grad_norm": 0.7209460794111061, | |
| "learning_rate": 3.3205196182396606e-05, | |
| "loss": 0.5954, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.673731030873888, | |
| "grad_norm": 0.5419491953310692, | |
| "learning_rate": 3.3072640509013786e-05, | |
| "loss": 0.8186, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.6750392464678179, | |
| "grad_norm": 0.6363503952339683, | |
| "learning_rate": 3.294008483563097e-05, | |
| "loss": 0.6252, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.6763474620617478, | |
| "grad_norm": 0.5479539766686561, | |
| "learning_rate": 3.2807529162248144e-05, | |
| "loss": 0.8248, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.6776556776556777, | |
| "grad_norm": 0.5535849243715827, | |
| "learning_rate": 3.2674973488865324e-05, | |
| "loss": 0.5717, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.6789638932496075, | |
| "grad_norm": 0.5091172962824471, | |
| "learning_rate": 3.25424178154825e-05, | |
| "loss": 0.8203, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.6802721088435374, | |
| "grad_norm": 0.562594433405121, | |
| "learning_rate": 3.240986214209968e-05, | |
| "loss": 0.6089, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6815803244374673, | |
| "grad_norm": 0.5517130562661668, | |
| "learning_rate": 3.227730646871686e-05, | |
| "loss": 0.8446, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.6828885400313972, | |
| "grad_norm": 0.4047148337025719, | |
| "learning_rate": 3.214475079533404e-05, | |
| "loss": 0.5894, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.684196755625327, | |
| "grad_norm": 0.5239322456435999, | |
| "learning_rate": 3.201219512195122e-05, | |
| "loss": 0.8321, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.6855049712192569, | |
| "grad_norm": 0.5498585626284401, | |
| "learning_rate": 3.18796394485684e-05, | |
| "loss": 0.6279, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.6868131868131868, | |
| "grad_norm": 0.48743593460492207, | |
| "learning_rate": 3.174708377518558e-05, | |
| "loss": 0.8242, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.6881214024071167, | |
| "grad_norm": 0.5651556275625538, | |
| "learning_rate": 3.161452810180276e-05, | |
| "loss": 0.6286, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.6894296180010465, | |
| "grad_norm": 0.5754174898482404, | |
| "learning_rate": 3.148197242841994e-05, | |
| "loss": 0.8177, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.6907378335949764, | |
| "grad_norm": 0.6605607744635831, | |
| "learning_rate": 3.134941675503712e-05, | |
| "loss": 0.597, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.6920460491889063, | |
| "grad_norm": 0.5228692951705382, | |
| "learning_rate": 3.12168610816543e-05, | |
| "loss": 0.7984, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.6933542647828362, | |
| "grad_norm": 0.4480430717512152, | |
| "learning_rate": 3.1084305408271477e-05, | |
| "loss": 0.6067, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6946624803767661, | |
| "grad_norm": 0.5638626523175552, | |
| "learning_rate": 3.095174973488865e-05, | |
| "loss": 0.8093, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.5946989663976191, | |
| "learning_rate": 3.0819194061505835e-05, | |
| "loss": 0.59, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.6972789115646258, | |
| "grad_norm": 0.5671263430143716, | |
| "learning_rate": 3.0686638388123015e-05, | |
| "loss": 0.8009, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.6985871271585558, | |
| "grad_norm": 0.589042697181555, | |
| "learning_rate": 3.055408271474019e-05, | |
| "loss": 0.5782, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.6998953427524857, | |
| "grad_norm": 0.7073787068267711, | |
| "learning_rate": 3.0421527041357374e-05, | |
| "loss": 0.7989, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.7012035583464155, | |
| "grad_norm": 0.5352877950074024, | |
| "learning_rate": 3.028897136797455e-05, | |
| "loss": 0.6068, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.7025117739403454, | |
| "grad_norm": 0.5346918859139157, | |
| "learning_rate": 3.015641569459173e-05, | |
| "loss": 0.8129, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.7038199895342753, | |
| "grad_norm": 0.48063163414005916, | |
| "learning_rate": 3.0023860021208912e-05, | |
| "loss": 0.561, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 0.5431033944364678, | |
| "learning_rate": 2.9891304347826088e-05, | |
| "loss": 0.8314, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.706436420722135, | |
| "grad_norm": 0.6251701992093956, | |
| "learning_rate": 2.9758748674443264e-05, | |
| "loss": 0.6043, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.7077446363160649, | |
| "grad_norm": 0.538107678456505, | |
| "learning_rate": 2.962619300106045e-05, | |
| "loss": 0.8345, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.7090528519099948, | |
| "grad_norm": 0.5596293446673235, | |
| "learning_rate": 2.9493637327677626e-05, | |
| "loss": 0.5767, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.7103610675039247, | |
| "grad_norm": 0.6302036834487577, | |
| "learning_rate": 2.9361081654294802e-05, | |
| "loss": 0.817, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.7116692830978545, | |
| "grad_norm": 0.6442132298338353, | |
| "learning_rate": 2.9228525980911985e-05, | |
| "loss": 0.5956, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.7129774986917844, | |
| "grad_norm": 0.5294060275695159, | |
| "learning_rate": 2.9095970307529164e-05, | |
| "loss": 0.8086, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.5780709508327503, | |
| "learning_rate": 2.896341463414634e-05, | |
| "loss": 0.599, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.7155939298796442, | |
| "grad_norm": 0.5178595179064079, | |
| "learning_rate": 2.8830858960763523e-05, | |
| "loss": 0.8023, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.716902145473574, | |
| "grad_norm": 0.4908347612839904, | |
| "learning_rate": 2.8698303287380702e-05, | |
| "loss": 0.5906, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.7182103610675039, | |
| "grad_norm": 0.5745266376669839, | |
| "learning_rate": 2.8565747613997878e-05, | |
| "loss": 0.8017, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.7195185766614338, | |
| "grad_norm": 0.7091252655798257, | |
| "learning_rate": 2.843319194061506e-05, | |
| "loss": 0.6165, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7208267922553637, | |
| "grad_norm": 0.5082645638178946, | |
| "learning_rate": 2.830063626723224e-05, | |
| "loss": 0.7916, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.7221350078492935, | |
| "grad_norm": 0.8135604062724642, | |
| "learning_rate": 2.8168080593849416e-05, | |
| "loss": 0.5766, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.7234432234432234, | |
| "grad_norm": 0.5344352881317546, | |
| "learning_rate": 2.80355249204666e-05, | |
| "loss": 0.7945, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.7247514390371533, | |
| "grad_norm": 0.6213640188906101, | |
| "learning_rate": 2.7902969247083775e-05, | |
| "loss": 0.5739, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.7260596546310832, | |
| "grad_norm": 0.5133786932833476, | |
| "learning_rate": 2.7770413573700955e-05, | |
| "loss": 0.8175, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.727367870225013, | |
| "grad_norm": 0.5687616103525199, | |
| "learning_rate": 2.7637857900318137e-05, | |
| "loss": 0.5946, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.7286760858189429, | |
| "grad_norm": 0.5361603157753395, | |
| "learning_rate": 2.7505302226935313e-05, | |
| "loss": 0.8331, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.7299843014128728, | |
| "grad_norm": 0.4846643967669185, | |
| "learning_rate": 2.7372746553552493e-05, | |
| "loss": 0.5846, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.7312925170068028, | |
| "grad_norm": 0.5264633004571062, | |
| "learning_rate": 2.7240190880169676e-05, | |
| "loss": 0.7929, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 0.5169934066982514, | |
| "learning_rate": 2.710763520678685e-05, | |
| "loss": 0.5812, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.7339089481946625, | |
| "grad_norm": 0.49516725051064175, | |
| "learning_rate": 2.697507953340403e-05, | |
| "loss": 0.8268, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.7352171637885924, | |
| "grad_norm": 0.5545634117837589, | |
| "learning_rate": 2.6842523860021214e-05, | |
| "loss": 0.6293, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.7365253793825223, | |
| "grad_norm": 0.5230373403077291, | |
| "learning_rate": 2.670996818663839e-05, | |
| "loss": 0.8166, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.7378335949764521, | |
| "grad_norm": 0.599453616763764, | |
| "learning_rate": 2.6577412513255566e-05, | |
| "loss": 0.5906, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.739141810570382, | |
| "grad_norm": 0.5973016920910486, | |
| "learning_rate": 2.6444856839872752e-05, | |
| "loss": 0.8012, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.7404500261643119, | |
| "grad_norm": 0.6449099450136865, | |
| "learning_rate": 2.6312301166489928e-05, | |
| "loss": 0.5705, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.7417582417582418, | |
| "grad_norm": 0.553182572295564, | |
| "learning_rate": 2.6179745493107104e-05, | |
| "loss": 0.8191, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.7430664573521716, | |
| "grad_norm": 0.4476262335389087, | |
| "learning_rate": 2.6047189819724287e-05, | |
| "loss": 0.5958, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.7443746729461015, | |
| "grad_norm": 0.4834161167587533, | |
| "learning_rate": 2.5914634146341466e-05, | |
| "loss": 0.8144, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.7456828885400314, | |
| "grad_norm": 0.5286677625170841, | |
| "learning_rate": 2.5782078472958642e-05, | |
| "loss": 0.5816, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.7469911041339613, | |
| "grad_norm": 0.5413842703568794, | |
| "learning_rate": 2.5649522799575825e-05, | |
| "loss": 0.8312, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.7482993197278912, | |
| "grad_norm": 0.6786517057597097, | |
| "learning_rate": 2.5516967126193004e-05, | |
| "loss": 0.6086, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.749607535321821, | |
| "grad_norm": 0.49742040117830627, | |
| "learning_rate": 2.538441145281018e-05, | |
| "loss": 0.7967, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.7509157509157509, | |
| "grad_norm": 0.49256559696291485, | |
| "learning_rate": 2.525185577942736e-05, | |
| "loss": 0.5724, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.7522239665096808, | |
| "grad_norm": 0.5263192680418415, | |
| "learning_rate": 2.5119300106044542e-05, | |
| "loss": 0.8011, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.7535321821036107, | |
| "grad_norm": 0.6716690763876726, | |
| "learning_rate": 2.498674443266172e-05, | |
| "loss": 0.601, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.7548403976975405, | |
| "grad_norm": 0.5268203857733461, | |
| "learning_rate": 2.4854188759278898e-05, | |
| "loss": 0.8224, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.7561486132914704, | |
| "grad_norm": 0.6545344595309585, | |
| "learning_rate": 2.4721633085896077e-05, | |
| "loss": 0.5883, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.7574568288854003, | |
| "grad_norm": 0.5192641904709395, | |
| "learning_rate": 2.4589077412513257e-05, | |
| "loss": 0.8223, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.7587650444793302, | |
| "grad_norm": 0.5569056177071494, | |
| "learning_rate": 2.4456521739130436e-05, | |
| "loss": 0.5882, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.76007326007326, | |
| "grad_norm": 0.652217104391276, | |
| "learning_rate": 2.4323966065747615e-05, | |
| "loss": 0.8111, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.7613814756671899, | |
| "grad_norm": 0.715865728522808, | |
| "learning_rate": 2.4191410392364795e-05, | |
| "loss": 0.5839, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.7626896912611199, | |
| "grad_norm": 0.4622828984795863, | |
| "learning_rate": 2.4058854718981974e-05, | |
| "loss": 0.8152, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.7639979068550498, | |
| "grad_norm": 0.7105275925193085, | |
| "learning_rate": 2.392629904559915e-05, | |
| "loss": 0.5727, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.7653061224489796, | |
| "grad_norm": 0.5593573119711108, | |
| "learning_rate": 2.3793743372216333e-05, | |
| "loss": 0.787, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.7666143380429095, | |
| "grad_norm": 0.6976481535500858, | |
| "learning_rate": 2.3661187698833512e-05, | |
| "loss": 0.5828, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.7679225536368394, | |
| "grad_norm": 0.5629390717962797, | |
| "learning_rate": 2.352863202545069e-05, | |
| "loss": 0.8119, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.8329984172410997, | |
| "learning_rate": 2.3396076352067868e-05, | |
| "loss": 0.5953, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.7705389848246991, | |
| "grad_norm": 0.5054513797033992, | |
| "learning_rate": 2.326352067868505e-05, | |
| "loss": 0.7897, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.771847200418629, | |
| "grad_norm": 0.8042478184879194, | |
| "learning_rate": 2.3130965005302227e-05, | |
| "loss": 0.601, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.7731554160125589, | |
| "grad_norm": 0.5134604404253917, | |
| "learning_rate": 2.2998409331919406e-05, | |
| "loss": 0.825, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.7744636316064888, | |
| "grad_norm": 0.6307033662226441, | |
| "learning_rate": 2.286585365853659e-05, | |
| "loss": 0.559, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.7757718472004186, | |
| "grad_norm": 0.5923006697127559, | |
| "learning_rate": 2.2733297985153765e-05, | |
| "loss": 0.7833, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.7770800627943485, | |
| "grad_norm": 0.7423403377088905, | |
| "learning_rate": 2.2600742311770944e-05, | |
| "loss": 0.5916, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.7783882783882784, | |
| "grad_norm": 0.5275344149548817, | |
| "learning_rate": 2.2468186638388124e-05, | |
| "loss": 0.7857, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.7796964939822083, | |
| "grad_norm": 0.4485788605700081, | |
| "learning_rate": 2.2335630965005303e-05, | |
| "loss": 0.5938, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.7810047095761381, | |
| "grad_norm": 0.45581986333641783, | |
| "learning_rate": 2.2203075291622482e-05, | |
| "loss": 0.789, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.782312925170068, | |
| "grad_norm": 0.7077436921839176, | |
| "learning_rate": 2.2070519618239662e-05, | |
| "loss": 0.5639, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.7836211407639979, | |
| "grad_norm": 0.485078935511288, | |
| "learning_rate": 2.193796394485684e-05, | |
| "loss": 0.7974, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.7849293563579278, | |
| "grad_norm": 0.5415931380682013, | |
| "learning_rate": 2.180540827147402e-05, | |
| "loss": 0.5513, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7862375719518576, | |
| "grad_norm": 0.6028769503669652, | |
| "learning_rate": 2.16728525980912e-05, | |
| "loss": 0.7859, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.7875457875457875, | |
| "grad_norm": 0.43775607621380236, | |
| "learning_rate": 2.154029692470838e-05, | |
| "loss": 0.586, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.7888540031397174, | |
| "grad_norm": 0.5129027729272807, | |
| "learning_rate": 2.140774125132556e-05, | |
| "loss": 0.8006, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.7901622187336473, | |
| "grad_norm": 0.7986746080445549, | |
| "learning_rate": 2.1275185577942735e-05, | |
| "loss": 0.6124, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.7914704343275771, | |
| "grad_norm": 0.5495341720621897, | |
| "learning_rate": 2.1142629904559917e-05, | |
| "loss": 0.8162, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.792778649921507, | |
| "grad_norm": 0.6565347700150155, | |
| "learning_rate": 2.1010074231177097e-05, | |
| "loss": 0.5797, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.794086865515437, | |
| "grad_norm": 0.49231844511035533, | |
| "learning_rate": 2.0877518557794273e-05, | |
| "loss": 0.815, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.7953950811093669, | |
| "grad_norm": 0.487405981388951, | |
| "learning_rate": 2.0744962884411452e-05, | |
| "loss": 0.6098, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.7967032967032966, | |
| "grad_norm": 0.5688986137956366, | |
| "learning_rate": 2.0612407211028635e-05, | |
| "loss": 0.8328, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.7980115122972266, | |
| "grad_norm": 0.553490629602128, | |
| "learning_rate": 2.047985153764581e-05, | |
| "loss": 0.5689, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.7993197278911565, | |
| "grad_norm": 0.5247897399616922, | |
| "learning_rate": 2.034729586426299e-05, | |
| "loss": 0.809, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.8006279434850864, | |
| "grad_norm": 0.6208953939424018, | |
| "learning_rate": 2.021474019088017e-05, | |
| "loss": 0.5615, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.8019361590790163, | |
| "grad_norm": 0.5020903361827662, | |
| "learning_rate": 2.008218451749735e-05, | |
| "loss": 0.8156, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.8032443746729461, | |
| "grad_norm": 0.5291771623927373, | |
| "learning_rate": 1.994962884411453e-05, | |
| "loss": 0.5884, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.804552590266876, | |
| "grad_norm": 0.5285998915355191, | |
| "learning_rate": 1.9817073170731708e-05, | |
| "loss": 0.8051, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 0.8178935383618982, | |
| "learning_rate": 1.9684517497348887e-05, | |
| "loss": 0.6138, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.8071690214547358, | |
| "grad_norm": 0.4987221022793129, | |
| "learning_rate": 1.9551961823966067e-05, | |
| "loss": 0.8294, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.8084772370486656, | |
| "grad_norm": 0.6163654350885334, | |
| "learning_rate": 1.9419406150583246e-05, | |
| "loss": 0.5636, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.8097854526425955, | |
| "grad_norm": 0.555469302539477, | |
| "learning_rate": 1.9286850477200426e-05, | |
| "loss": 0.8178, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.8110936682365254, | |
| "grad_norm": 0.6123412415543169, | |
| "learning_rate": 1.9154294803817605e-05, | |
| "loss": 0.5704, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.8124018838304553, | |
| "grad_norm": 0.5124297425086758, | |
| "learning_rate": 1.9021739130434784e-05, | |
| "loss": 0.8193, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.8137100994243851, | |
| "grad_norm": 0.8183903363373162, | |
| "learning_rate": 1.8889183457051964e-05, | |
| "loss": 0.6082, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.815018315018315, | |
| "grad_norm": 0.5990002289487968, | |
| "learning_rate": 1.8756627783669143e-05, | |
| "loss": 0.8463, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.5158204377847465, | |
| "learning_rate": 1.8624072110286323e-05, | |
| "loss": 0.5905, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.8176347462061748, | |
| "grad_norm": 0.5071733107919829, | |
| "learning_rate": 1.84915164369035e-05, | |
| "loss": 0.8052, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.8189429618001046, | |
| "grad_norm": 0.7067873685301963, | |
| "learning_rate": 1.835896076352068e-05, | |
| "loss": 0.6175, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.8202511773940345, | |
| "grad_norm": 0.49736879675215423, | |
| "learning_rate": 1.8226405090137857e-05, | |
| "loss": 0.8187, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.8215593929879644, | |
| "grad_norm": 0.6169326476344377, | |
| "learning_rate": 1.8093849416755037e-05, | |
| "loss": 0.5969, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.8228676085818943, | |
| "grad_norm": 0.5358771287992227, | |
| "learning_rate": 1.796129374337222e-05, | |
| "loss": 0.8084, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 0.6155654512041941, | |
| "learning_rate": 1.7828738069989395e-05, | |
| "loss": 0.5751, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.825484039769754, | |
| "grad_norm": 0.5694555500789946, | |
| "learning_rate": 1.7696182396606575e-05, | |
| "loss": 0.8114, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.826792255363684, | |
| "grad_norm": 0.5299599349376566, | |
| "learning_rate": 1.7563626723223754e-05, | |
| "loss": 0.5815, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.8281004709576139, | |
| "grad_norm": 0.5002411114343089, | |
| "learning_rate": 1.7431071049840934e-05, | |
| "loss": 0.8116, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.8294086865515437, | |
| "grad_norm": 0.6844907765956427, | |
| "learning_rate": 1.7298515376458113e-05, | |
| "loss": 0.5998, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.8307169021454736, | |
| "grad_norm": 0.5784470770219757, | |
| "learning_rate": 1.7165959703075292e-05, | |
| "loss": 0.8211, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.8320251177394035, | |
| "grad_norm": 0.6402598629047223, | |
| "learning_rate": 1.7033404029692472e-05, | |
| "loss": 0.5686, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.5486156319155983, | |
| "learning_rate": 1.690084835630965e-05, | |
| "loss": 0.819, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.8346415489272632, | |
| "grad_norm": 0.552758307022236, | |
| "learning_rate": 1.676829268292683e-05, | |
| "loss": 0.5744, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.8359497645211931, | |
| "grad_norm": 0.5129974867018121, | |
| "learning_rate": 1.663573700954401e-05, | |
| "loss": 0.802, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.837257980115123, | |
| "grad_norm": 0.6477174203830427, | |
| "learning_rate": 1.650318133616119e-05, | |
| "loss": 0.604, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.8385661957090529, | |
| "grad_norm": 0.5721470905692986, | |
| "learning_rate": 1.637062566277837e-05, | |
| "loss": 0.7867, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.8398744113029827, | |
| "grad_norm": 0.5613277299496854, | |
| "learning_rate": 1.6238069989395545e-05, | |
| "loss": 0.5995, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.8411826268969126, | |
| "grad_norm": 0.4873586998049457, | |
| "learning_rate": 1.6105514316012728e-05, | |
| "loss": 0.7987, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 0.6328797394647059, | |
| "learning_rate": 1.5972958642629907e-05, | |
| "loss": 0.5875, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.8437990580847724, | |
| "grad_norm": 0.5065229266101028, | |
| "learning_rate": 1.5840402969247083e-05, | |
| "loss": 0.7832, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.8451072736787022, | |
| "grad_norm": 0.7363086761942835, | |
| "learning_rate": 1.5707847295864266e-05, | |
| "loss": 0.5797, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.8464154892726321, | |
| "grad_norm": 0.5910783809750648, | |
| "learning_rate": 1.5575291622481442e-05, | |
| "loss": 0.8484, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.847723704866562, | |
| "grad_norm": 0.46343031391913037, | |
| "learning_rate": 1.544273594909862e-05, | |
| "loss": 0.5783, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.8490319204604919, | |
| "grad_norm": 0.5168004042862498, | |
| "learning_rate": 1.53101802757158e-05, | |
| "loss": 0.7856, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.8503401360544217, | |
| "grad_norm": 0.7961735878117324, | |
| "learning_rate": 1.517762460233298e-05, | |
| "loss": 0.5835, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8516483516483516, | |
| "grad_norm": 0.4869428795315839, | |
| "learning_rate": 1.504506892895016e-05, | |
| "loss": 0.7959, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.8529565672422815, | |
| "grad_norm": 0.6051259113895093, | |
| "learning_rate": 1.491251325556734e-05, | |
| "loss": 0.5812, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.8542647828362114, | |
| "grad_norm": 0.48181809369845346, | |
| "learning_rate": 1.4779957582184516e-05, | |
| "loss": 0.7691, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.8555729984301413, | |
| "grad_norm": 0.6165360359930039, | |
| "learning_rate": 1.4647401908801697e-05, | |
| "loss": 0.5906, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.8568812140240711, | |
| "grad_norm": 0.49455764845266237, | |
| "learning_rate": 1.4514846235418877e-05, | |
| "loss": 0.7854, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.858189429618001, | |
| "grad_norm": 0.5284557358740105, | |
| "learning_rate": 1.4382290562036055e-05, | |
| "loss": 0.588, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.859497645211931, | |
| "grad_norm": 0.5292637656987533, | |
| "learning_rate": 1.4249734888653236e-05, | |
| "loss": 0.8016, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.8608058608058609, | |
| "grad_norm": 0.8513431344553715, | |
| "learning_rate": 1.4117179215270415e-05, | |
| "loss": 0.6167, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.8621140763997907, | |
| "grad_norm": 0.579288662692426, | |
| "learning_rate": 1.3984623541887593e-05, | |
| "loss": 0.8134, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.8634222919937206, | |
| "grad_norm": 0.6785159865651157, | |
| "learning_rate": 1.3852067868504772e-05, | |
| "loss": 0.5753, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.8647305075876505, | |
| "grad_norm": 0.5143467597796619, | |
| "learning_rate": 1.3719512195121953e-05, | |
| "loss": 0.8117, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.8660387231815804, | |
| "grad_norm": 0.5958544884291407, | |
| "learning_rate": 1.3586956521739131e-05, | |
| "loss": 0.5694, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.8673469387755102, | |
| "grad_norm": 0.6204168418491064, | |
| "learning_rate": 1.345440084835631e-05, | |
| "loss": 0.7969, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.8686551543694401, | |
| "grad_norm": 0.5741612534922768, | |
| "learning_rate": 1.3321845174973491e-05, | |
| "loss": 0.5918, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.86996336996337, | |
| "grad_norm": 0.5784688853516818, | |
| "learning_rate": 1.3189289501590667e-05, | |
| "loss": 0.8035, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.8712715855572999, | |
| "grad_norm": 0.5603176409725421, | |
| "learning_rate": 1.3056733828207849e-05, | |
| "loss": 0.5805, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.8725798011512297, | |
| "grad_norm": 0.5498601994735227, | |
| "learning_rate": 1.2924178154825028e-05, | |
| "loss": 0.7983, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.8738880167451596, | |
| "grad_norm": 0.46586893720865835, | |
| "learning_rate": 1.2791622481442206e-05, | |
| "loss": 0.5847, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.8751962323390895, | |
| "grad_norm": 0.4854338950174297, | |
| "learning_rate": 1.2659066808059387e-05, | |
| "loss": 0.7692, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.8765044479330194, | |
| "grad_norm": 0.5304541776380965, | |
| "learning_rate": 1.2526511134676563e-05, | |
| "loss": 0.5896, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.8778126635269492, | |
| "grad_norm": 0.5909426804833323, | |
| "learning_rate": 1.2393955461293744e-05, | |
| "loss": 0.7792, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.4671763867260705, | |
| "learning_rate": 1.2261399787910923e-05, | |
| "loss": 0.5638, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.880429094714809, | |
| "grad_norm": 0.5359237947603994, | |
| "learning_rate": 1.2128844114528103e-05, | |
| "loss": 0.8022, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.8817373103087389, | |
| "grad_norm": 0.6960066070363433, | |
| "learning_rate": 1.1996288441145282e-05, | |
| "loss": 0.5957, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.8830455259026687, | |
| "grad_norm": 0.4855729622009359, | |
| "learning_rate": 1.186373276776246e-05, | |
| "loss": 0.8295, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.8843537414965986, | |
| "grad_norm": 0.9533835028532404, | |
| "learning_rate": 1.173117709437964e-05, | |
| "loss": 0.5855, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.8856619570905285, | |
| "grad_norm": 0.5386665177217399, | |
| "learning_rate": 1.1598621420996818e-05, | |
| "loss": 0.7768, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.8869701726844584, | |
| "grad_norm": 0.5851585267921338, | |
| "learning_rate": 1.1466065747613998e-05, | |
| "loss": 0.5735, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.8882783882783882, | |
| "grad_norm": 0.6320477723686851, | |
| "learning_rate": 1.1333510074231179e-05, | |
| "loss": 0.8027, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.8895866038723181, | |
| "grad_norm": 0.6634243839335632, | |
| "learning_rate": 1.1200954400848357e-05, | |
| "loss": 0.5573, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.890894819466248, | |
| "grad_norm": 0.5535217711779135, | |
| "learning_rate": 1.1068398727465536e-05, | |
| "loss": 0.7966, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.892203035060178, | |
| "grad_norm": 0.7832734038273745, | |
| "learning_rate": 1.0935843054082715e-05, | |
| "loss": 0.6074, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.8935112506541077, | |
| "grad_norm": 0.520725853685351, | |
| "learning_rate": 1.0803287380699895e-05, | |
| "loss": 0.7948, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.8948194662480377, | |
| "grad_norm": 0.5140360510057409, | |
| "learning_rate": 1.0670731707317074e-05, | |
| "loss": 0.5672, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.8961276818419676, | |
| "grad_norm": 0.5480237760678021, | |
| "learning_rate": 1.0538176033934252e-05, | |
| "loss": 0.8193, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.5379154678692974, | |
| "learning_rate": 1.0405620360551433e-05, | |
| "loss": 0.5615, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.8987441130298273, | |
| "grad_norm": 0.5748341162685962, | |
| "learning_rate": 1.027306468716861e-05, | |
| "loss": 0.7761, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.9000523286237572, | |
| "grad_norm": 0.354346750572681, | |
| "learning_rate": 1.014050901378579e-05, | |
| "loss": 0.5742, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.9013605442176871, | |
| "grad_norm": 0.5556016455622972, | |
| "learning_rate": 1.000795334040297e-05, | |
| "loss": 0.8124, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.902668759811617, | |
| "grad_norm": 0.616134681196408, | |
| "learning_rate": 9.875397667020149e-06, | |
| "loss": 0.5799, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.9039769754055468, | |
| "grad_norm": 0.5351793977307407, | |
| "learning_rate": 9.742841993637328e-06, | |
| "loss": 0.8111, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.9052851909994767, | |
| "grad_norm": 0.4796711570776462, | |
| "learning_rate": 9.610286320254508e-06, | |
| "loss": 0.5897, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.9065934065934066, | |
| "grad_norm": 0.5333607429667859, | |
| "learning_rate": 9.477730646871687e-06, | |
| "loss": 0.7895, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.9079016221873365, | |
| "grad_norm": 0.7210385096597725, | |
| "learning_rate": 9.345174973488865e-06, | |
| "loss": 0.5814, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.9092098377812664, | |
| "grad_norm": 0.4921959597022122, | |
| "learning_rate": 9.212619300106044e-06, | |
| "loss": 0.7722, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.9105180533751962, | |
| "grad_norm": 0.582597864482659, | |
| "learning_rate": 9.080063626723225e-06, | |
| "loss": 0.5718, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.9118262689691261, | |
| "grad_norm": 0.5378533355374352, | |
| "learning_rate": 8.947507953340403e-06, | |
| "loss": 0.7841, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.913134484563056, | |
| "grad_norm": 0.6935491033828649, | |
| "learning_rate": 8.814952279957582e-06, | |
| "loss": 0.5882, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.9144427001569859, | |
| "grad_norm": 0.5318156050528525, | |
| "learning_rate": 8.682396606574762e-06, | |
| "loss": 0.7961, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 0.6218540662399403, | |
| "learning_rate": 8.549840933191941e-06, | |
| "loss": 0.5646, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9170591313448456, | |
| "grad_norm": 0.5580021318568493, | |
| "learning_rate": 8.41728525980912e-06, | |
| "loss": 0.8223, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.9183673469387755, | |
| "grad_norm": 0.7372202219508209, | |
| "learning_rate": 8.2847295864263e-06, | |
| "loss": 0.5732, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.9196755625327054, | |
| "grad_norm": 0.5531047720727633, | |
| "learning_rate": 8.15217391304348e-06, | |
| "loss": 0.8233, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.9209837781266352, | |
| "grad_norm": 0.9657658989896405, | |
| "learning_rate": 8.019618239660657e-06, | |
| "loss": 0.5559, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.9222919937205651, | |
| "grad_norm": 0.5772618182869884, | |
| "learning_rate": 7.887062566277838e-06, | |
| "loss": 0.7868, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.923600209314495, | |
| "grad_norm": 0.4581262580386774, | |
| "learning_rate": 7.754506892895016e-06, | |
| "loss": 0.5821, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.924908424908425, | |
| "grad_norm": 0.6112521566283506, | |
| "learning_rate": 7.621951219512195e-06, | |
| "loss": 0.7813, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.9262166405023547, | |
| "grad_norm": 0.7318140554967631, | |
| "learning_rate": 7.4893955461293745e-06, | |
| "loss": 0.5652, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.9275248560962847, | |
| "grad_norm": 0.55917265698163, | |
| "learning_rate": 7.356839872746554e-06, | |
| "loss": 0.8161, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.9288330716902146, | |
| "grad_norm": 0.7037568341200027, | |
| "learning_rate": 7.224284199363733e-06, | |
| "loss": 0.586, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.9301412872841445, | |
| "grad_norm": 0.5346184273438803, | |
| "learning_rate": 7.091728525980912e-06, | |
| "loss": 0.7951, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.9314495028780743, | |
| "grad_norm": 0.4150830640122542, | |
| "learning_rate": 6.959172852598092e-06, | |
| "loss": 0.5949, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.9327577184720042, | |
| "grad_norm": 0.5114818023098201, | |
| "learning_rate": 6.826617179215271e-06, | |
| "loss": 0.7904, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 0.5908812058211556, | |
| "learning_rate": 6.69406150583245e-06, | |
| "loss": 0.5674, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.935374149659864, | |
| "grad_norm": 0.5343311980537179, | |
| "learning_rate": 6.561505832449629e-06, | |
| "loss": 0.7942, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.9366823652537938, | |
| "grad_norm": 0.5903444129390092, | |
| "learning_rate": 6.428950159066809e-06, | |
| "loss": 0.5679, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.9379905808477237, | |
| "grad_norm": 0.5453396700244478, | |
| "learning_rate": 6.296394485683987e-06, | |
| "loss": 0.8118, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.9392987964416536, | |
| "grad_norm": 0.48350350014595894, | |
| "learning_rate": 6.163838812301167e-06, | |
| "loss": 0.5974, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.9406070120355835, | |
| "grad_norm": 0.5664756102574485, | |
| "learning_rate": 6.031283138918345e-06, | |
| "loss": 0.7948, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.9419152276295133, | |
| "grad_norm": 0.5142501404418801, | |
| "learning_rate": 5.8987274655355255e-06, | |
| "loss": 0.606, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.9432234432234432, | |
| "grad_norm": 0.54569469097517, | |
| "learning_rate": 5.766171792152705e-06, | |
| "loss": 0.8303, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.9445316588173731, | |
| "grad_norm": 0.7008918499549309, | |
| "learning_rate": 5.6336161187698835e-06, | |
| "loss": 0.5698, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.945839874411303, | |
| "grad_norm": 0.4720187827903435, | |
| "learning_rate": 5.501060445387063e-06, | |
| "loss": 0.813, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.9471480900052328, | |
| "grad_norm": 0.6243694714554565, | |
| "learning_rate": 5.368504772004242e-06, | |
| "loss": 0.5858, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.9484563055991627, | |
| "grad_norm": 0.6260033107066734, | |
| "learning_rate": 5.235949098621421e-06, | |
| "loss": 0.8116, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.9497645211930926, | |
| "grad_norm": 0.6435112573826539, | |
| "learning_rate": 5.103393425238601e-06, | |
| "loss": 0.5544, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.9510727367870225, | |
| "grad_norm": 0.47784017687891694, | |
| "learning_rate": 4.9708377518557796e-06, | |
| "loss": 0.7778, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.5688080296868022, | |
| "learning_rate": 4.838282078472959e-06, | |
| "loss": 0.6016, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.9536891679748822, | |
| "grad_norm": 0.5213924382658889, | |
| "learning_rate": 4.705726405090138e-06, | |
| "loss": 0.7974, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.9549973835688121, | |
| "grad_norm": 0.45047173757415426, | |
| "learning_rate": 4.573170731707317e-06, | |
| "loss": 0.5249, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.956305599162742, | |
| "grad_norm": 0.5437903519326854, | |
| "learning_rate": 4.440615058324496e-06, | |
| "loss": 0.8166, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.957613814756672, | |
| "grad_norm": 0.48264576838040973, | |
| "learning_rate": 4.308059384941676e-06, | |
| "loss": 0.5814, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.9589220303506018, | |
| "grad_norm": 0.545526763976128, | |
| "learning_rate": 4.175503711558855e-06, | |
| "loss": 0.8006, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.9602302459445317, | |
| "grad_norm": 0.38539225021424495, | |
| "learning_rate": 4.0429480381760345e-06, | |
| "loss": 0.5745, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.5798773104196537, | |
| "learning_rate": 3.910392364793213e-06, | |
| "loss": 0.8073, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.9628466771323915, | |
| "grad_norm": 0.6878789040909351, | |
| "learning_rate": 3.777836691410393e-06, | |
| "loss": 0.5763, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.9641548927263213, | |
| "grad_norm": 0.5289835887969982, | |
| "learning_rate": 3.6452810180275714e-06, | |
| "loss": 0.7766, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.9654631083202512, | |
| "grad_norm": 0.40667843587961816, | |
| "learning_rate": 3.5127253446447508e-06, | |
| "loss": 0.558, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.9667713239141811, | |
| "grad_norm": 0.5840192236729715, | |
| "learning_rate": 3.3801696712619306e-06, | |
| "loss": 0.8129, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.968079539508111, | |
| "grad_norm": 0.3178170420497473, | |
| "learning_rate": 3.247613997879109e-06, | |
| "loss": 0.5787, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.9693877551020408, | |
| "grad_norm": 0.54996002509364, | |
| "learning_rate": 3.115058324496289e-06, | |
| "loss": 0.8326, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.9706959706959707, | |
| "grad_norm": 0.5534777218559572, | |
| "learning_rate": 2.982502651113468e-06, | |
| "loss": 0.5821, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.9720041862899006, | |
| "grad_norm": 0.5230629562997223, | |
| "learning_rate": 2.849946977730647e-06, | |
| "loss": 0.7779, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.9733124018838305, | |
| "grad_norm": 0.875422492824055, | |
| "learning_rate": 2.7173913043478263e-06, | |
| "loss": 0.592, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.9746206174777603, | |
| "grad_norm": 0.5569915281293889, | |
| "learning_rate": 2.5848356309650052e-06, | |
| "loss": 0.8067, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.9759288330716902, | |
| "grad_norm": 0.6183408664255449, | |
| "learning_rate": 2.4522799575821846e-06, | |
| "loss": 0.5875, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.9772370486656201, | |
| "grad_norm": 0.49698428419163243, | |
| "learning_rate": 2.319724284199364e-06, | |
| "loss": 0.8197, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.97854526425955, | |
| "grad_norm": 0.6395403482510305, | |
| "learning_rate": 2.187168610816543e-06, | |
| "loss": 0.5956, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.9798534798534798, | |
| "grad_norm": 0.5141508620993104, | |
| "learning_rate": 2.054612937433722e-06, | |
| "loss": 0.8213, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.9811616954474097, | |
| "grad_norm": 0.7297722231605804, | |
| "learning_rate": 1.9220572640509014e-06, | |
| "loss": 0.5724, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9824699110413396, | |
| "grad_norm": 0.6139671735977023, | |
| "learning_rate": 1.7895015906680807e-06, | |
| "loss": 0.8038, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.9837781266352695, | |
| "grad_norm": 0.6348684038508452, | |
| "learning_rate": 1.65694591728526e-06, | |
| "loss": 0.5891, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.9850863422291993, | |
| "grad_norm": 0.5570560567015977, | |
| "learning_rate": 1.5243902439024391e-06, | |
| "loss": 0.8008, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.9863945578231292, | |
| "grad_norm": 0.5421909673109165, | |
| "learning_rate": 1.3918345705196183e-06, | |
| "loss": 0.5792, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.9877027734170591, | |
| "grad_norm": 0.5728614761702414, | |
| "learning_rate": 1.2592788971367975e-06, | |
| "loss": 0.7941, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.6317238615567622, | |
| "learning_rate": 1.1267232237539766e-06, | |
| "loss": 0.6009, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.9903192046049188, | |
| "grad_norm": 0.5160377073279534, | |
| "learning_rate": 9.94167550371156e-07, | |
| "loss": 0.7883, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.9916274201988488, | |
| "grad_norm": 0.690634120523154, | |
| "learning_rate": 8.616118769883351e-07, | |
| "loss": 0.5766, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.9929356357927787, | |
| "grad_norm": 0.5284248764034778, | |
| "learning_rate": 7.290562036055143e-07, | |
| "loss": 0.8011, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.9942438513867086, | |
| "grad_norm": 0.5999593891013711, | |
| "learning_rate": 5.965005302226936e-07, | |
| "loss": 0.5956, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.9955520669806384, | |
| "grad_norm": 0.48546714136377134, | |
| "learning_rate": 4.6394485683987276e-07, | |
| "loss": 0.7916, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.9968602825745683, | |
| "grad_norm": 0.3062976732646243, | |
| "learning_rate": 3.31389183457052e-07, | |
| "loss": 0.5679, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.9981684981684982, | |
| "grad_norm": 0.5269723896971663, | |
| "learning_rate": 1.9883351007423118e-07, | |
| "loss": 0.8292, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.9994767137624281, | |
| "grad_norm": 0.49183779024992025, | |
| "learning_rate": 6.62778366914104e-08, | |
| "loss": 0.5841, | |
| "step": 7640 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7644, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |