| { |
| "best_global_step": 2838, |
| "best_metric": 0.9269727168763274, |
| "best_model_checkpoint": "./results/checkpoint-2838", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2838, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.052854122621564484, |
| "grad_norm": 7.628295421600342, |
| "learning_rate": 1.9654686398872448e-05, |
| "loss": 0.3925, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10570824524312897, |
| "grad_norm": 12.30865478515625, |
| "learning_rate": 1.9302325581395353e-05, |
| "loss": 0.3431, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15856236786469344, |
| "grad_norm": 14.237386703491211, |
| "learning_rate": 1.8949964763918254e-05, |
| "loss": 0.3237, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21141649048625794, |
| "grad_norm": 28.253515243530273, |
| "learning_rate": 1.8597603946441155e-05, |
| "loss": 0.3131, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2642706131078224, |
| "grad_norm": 7.577988624572754, |
| "learning_rate": 1.824524312896406e-05, |
| "loss": 0.3027, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3171247357293869, |
| "grad_norm": 12.410346031188965, |
| "learning_rate": 1.7892882311486964e-05, |
| "loss": 0.2873, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3699788583509514, |
| "grad_norm": 17.205244064331055, |
| "learning_rate": 1.754052149400987e-05, |
| "loss": 0.3037, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.42283298097251587, |
| "grad_norm": 15.440715789794922, |
| "learning_rate": 1.718816067653277e-05, |
| "loss": 0.2388, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.47568710359408034, |
| "grad_norm": 18.95528793334961, |
| "learning_rate": 1.6835799859055675e-05, |
| "loss": 0.2834, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5285412262156448, |
| "grad_norm": 11.796369552612305, |
| "learning_rate": 1.6483439041578576e-05, |
| "loss": 0.282, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5813953488372093, |
| "grad_norm": 16.309091567993164, |
| "learning_rate": 1.613107822410148e-05, |
| "loss": 0.2861, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6342494714587738, |
| "grad_norm": 34.3432731628418, |
| "learning_rate": 1.5778717406624385e-05, |
| "loss": 0.295, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6871035940803383, |
| "grad_norm": 7.3598151206970215, |
| "learning_rate": 1.542635658914729e-05, |
| "loss": 0.2786, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7399577167019028, |
| "grad_norm": 19.721378326416016, |
| "learning_rate": 1.5073995771670191e-05, |
| "loss": 0.2627, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7928118393234672, |
| "grad_norm": 6.87978982925415, |
| "learning_rate": 1.4721634954193096e-05, |
| "loss": 0.274, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8456659619450317, |
| "grad_norm": 23.790388107299805, |
| "learning_rate": 1.4369274136715999e-05, |
| "loss": 0.2477, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8985200845665962, |
| "grad_norm": 25.269287109375, |
| "learning_rate": 1.4016913319238903e-05, |
| "loss": 0.2559, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9513742071881607, |
| "grad_norm": 12.499388694763184, |
| "learning_rate": 1.3664552501761804e-05, |
| "loss": 0.2535, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1_macro": 0.8913938479590982, |
| "eval_f1_micro": 0.8905597326649958, |
| "eval_loss": 0.23979686200618744, |
| "eval_precision": 0.9089358799454298, |
| "eval_recall": 0.872911889944317, |
| "eval_runtime": 10.313, |
| "eval_samples_per_second": 366.721, |
| "eval_steps_per_second": 22.981, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.0042283298097252, |
| "grad_norm": 20.737754821777344, |
| "learning_rate": 1.3312191684284707e-05, |
| "loss": 0.2342, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0570824524312896, |
| "grad_norm": 8.743599891662598, |
| "learning_rate": 1.2959830866807612e-05, |
| "loss": 0.1932, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.109936575052854, |
| "grad_norm": 15.532188415527344, |
| "learning_rate": 1.2607470049330515e-05, |
| "loss": 0.1546, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 10.706155776977539, |
| "learning_rate": 1.225510923185342e-05, |
| "loss": 0.2058, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.215644820295983, |
| "grad_norm": 6.785822868347168, |
| "learning_rate": 1.1902748414376322e-05, |
| "loss": 0.1603, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2684989429175475, |
| "grad_norm": 19.18027687072754, |
| "learning_rate": 1.1550387596899227e-05, |
| "loss": 0.17, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3213530655391121, |
| "grad_norm": 5.942857265472412, |
| "learning_rate": 1.1198026779422128e-05, |
| "loss": 0.2369, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3742071881606766, |
| "grad_norm": 3.4256839752197266, |
| "learning_rate": 1.0845665961945033e-05, |
| "loss": 0.1843, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.427061310782241, |
| "grad_norm": 21.69367218017578, |
| "learning_rate": 1.0493305144467936e-05, |
| "loss": 0.1798, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4799154334038054, |
| "grad_norm": 12.499094009399414, |
| "learning_rate": 1.014094432699084e-05, |
| "loss": 0.2137, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.53276955602537, |
| "grad_norm": 9.75059700012207, |
| "learning_rate": 9.788583509513743e-06, |
| "loss": 0.1616, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5856236786469344, |
| "grad_norm": 16.770835876464844, |
| "learning_rate": 9.436222692036646e-06, |
| "loss": 0.1711, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.638477801268499, |
| "grad_norm": 7.7109222412109375, |
| "learning_rate": 9.083861874559549e-06, |
| "loss": 0.1757, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6913319238900635, |
| "grad_norm": 8.317294120788574, |
| "learning_rate": 8.731501057082454e-06, |
| "loss": 0.1886, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.744186046511628, |
| "grad_norm": 7.418989181518555, |
| "learning_rate": 8.379140239605357e-06, |
| "loss": 0.1743, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.7970401691331923, |
| "grad_norm": 15.134026527404785, |
| "learning_rate": 8.02677942212826e-06, |
| "loss": 0.1829, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8498942917547567, |
| "grad_norm": 8.40051555633545, |
| "learning_rate": 7.674418604651164e-06, |
| "loss": 0.1516, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9027484143763214, |
| "grad_norm": 42.32719039916992, |
| "learning_rate": 7.322057787174067e-06, |
| "loss": 0.1574, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.955602536997886, |
| "grad_norm": 7.198770046234131, |
| "learning_rate": 6.969696969696971e-06, |
| "loss": 0.1796, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1_macro": 0.9171441852345779, |
| "eval_f1_micro": 0.9165967444202048, |
| "eval_loss": 0.207748144865036, |
| "eval_precision": 0.9397797660013765, |
| "eval_recall": 0.8945299705207992, |
| "eval_runtime": 10.3145, |
| "eval_samples_per_second": 366.667, |
| "eval_steps_per_second": 22.977, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.0084566596194504, |
| "grad_norm": 3.685957193374634, |
| "learning_rate": 6.6173361522198745e-06, |
| "loss": 0.1328, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.061310782241015, |
| "grad_norm": 12.144611358642578, |
| "learning_rate": 6.2649753347427766e-06, |
| "loss": 0.1201, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1141649048625792, |
| "grad_norm": 26.261823654174805, |
| "learning_rate": 5.91261451726568e-06, |
| "loss": 0.1465, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1670190274841437, |
| "grad_norm": 42.77655029296875, |
| "learning_rate": 5.560253699788583e-06, |
| "loss": 0.1231, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.219873150105708, |
| "grad_norm": 10.951128959655762, |
| "learning_rate": 5.207892882311487e-06, |
| "loss": 0.1367, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 12.861650466918945, |
| "learning_rate": 4.855532064834391e-06, |
| "loss": 0.1056, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "grad_norm": 16.26862144470215, |
| "learning_rate": 4.5031712473572945e-06, |
| "loss": 0.1259, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.3784355179704018, |
| "grad_norm": 11.81278133392334, |
| "learning_rate": 4.150810429880197e-06, |
| "loss": 0.1214, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.431289640591966, |
| "grad_norm": 1.1252022981643677, |
| "learning_rate": 3.798449612403101e-06, |
| "loss": 0.1017, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.4841437632135306, |
| "grad_norm": 11.047788619995117, |
| "learning_rate": 3.4460887949260045e-06, |
| "loss": 0.1263, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.536997885835095, |
| "grad_norm": 16.96929359436035, |
| "learning_rate": 3.0937279774489083e-06, |
| "loss": 0.0986, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.58985200845666, |
| "grad_norm": 2.476245164871216, |
| "learning_rate": 2.741367159971811e-06, |
| "loss": 0.1105, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.6427061310782243, |
| "grad_norm": 5.029143810272217, |
| "learning_rate": 2.389006342494715e-06, |
| "loss": 0.1219, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.6955602536997887, |
| "grad_norm": 16.73661231994629, |
| "learning_rate": 2.0366455250176183e-06, |
| "loss": 0.1087, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.748414376321353, |
| "grad_norm": 1.3909024000167847, |
| "learning_rate": 1.6842847075405216e-06, |
| "loss": 0.1118, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.8012684989429175, |
| "grad_norm": 14.585844993591309, |
| "learning_rate": 1.3319238900634251e-06, |
| "loss": 0.1018, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.854122621564482, |
| "grad_norm": 24.876344680786133, |
| "learning_rate": 9.795630725863285e-07, |
| "loss": 0.1001, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.9069767441860463, |
| "grad_norm": 28.51801872253418, |
| "learning_rate": 6.272022551092319e-07, |
| "loss": 0.1004, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.9598308668076108, |
| "grad_norm": 2.746856689453125, |
| "learning_rate": 2.748414376321353e-07, |
| "loss": 0.1181, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1_macro": 0.927417514244305, |
| "eval_f1_micro": 0.9269727168763274, |
| "eval_loss": 0.2070944607257843, |
| "eval_precision": 0.9247066492829205, |
| "eval_recall": 0.9292499181133311, |
| "eval_runtime": 10.2948, |
| "eval_samples_per_second": 367.371, |
| "eval_steps_per_second": 23.021, |
| "step": 2838 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2838, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.200759923346432e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|