{ "best_global_step": 2838, "best_metric": 0.9269727168763274, "best_model_checkpoint": "./results/checkpoint-2838", "epoch": 3.0, "eval_steps": 500, "global_step": 2838, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.052854122621564484, "grad_norm": 7.628295421600342, "learning_rate": 1.9654686398872448e-05, "loss": 0.3925, "step": 50 }, { "epoch": 0.10570824524312897, "grad_norm": 12.30865478515625, "learning_rate": 1.9302325581395353e-05, "loss": 0.3431, "step": 100 }, { "epoch": 0.15856236786469344, "grad_norm": 14.237386703491211, "learning_rate": 1.8949964763918254e-05, "loss": 0.3237, "step": 150 }, { "epoch": 0.21141649048625794, "grad_norm": 28.253515243530273, "learning_rate": 1.8597603946441155e-05, "loss": 0.3131, "step": 200 }, { "epoch": 0.2642706131078224, "grad_norm": 7.577988624572754, "learning_rate": 1.824524312896406e-05, "loss": 0.3027, "step": 250 }, { "epoch": 0.3171247357293869, "grad_norm": 12.410346031188965, "learning_rate": 1.7892882311486964e-05, "loss": 0.2873, "step": 300 }, { "epoch": 0.3699788583509514, "grad_norm": 17.205244064331055, "learning_rate": 1.754052149400987e-05, "loss": 0.3037, "step": 350 }, { "epoch": 0.42283298097251587, "grad_norm": 15.440715789794922, "learning_rate": 1.718816067653277e-05, "loss": 0.2388, "step": 400 }, { "epoch": 0.47568710359408034, "grad_norm": 18.95528793334961, "learning_rate": 1.6835799859055675e-05, "loss": 0.2834, "step": 450 }, { "epoch": 0.5285412262156448, "grad_norm": 11.796369552612305, "learning_rate": 1.6483439041578576e-05, "loss": 0.282, "step": 500 }, { "epoch": 0.5813953488372093, "grad_norm": 16.309091567993164, "learning_rate": 1.613107822410148e-05, "loss": 0.2861, "step": 550 }, { "epoch": 0.6342494714587738, "grad_norm": 34.3432731628418, "learning_rate": 1.5778717406624385e-05, "loss": 0.295, "step": 600 }, { "epoch": 0.6871035940803383, "grad_norm": 7.3598151206970215, "learning_rate": 1.542635658914729e-05, "loss": 0.2786, "step": 650 }, { "epoch": 0.7399577167019028, "grad_norm": 19.721378326416016, "learning_rate": 1.5073995771670191e-05, "loss": 0.2627, "step": 700 }, { "epoch": 0.7928118393234672, "grad_norm": 6.87978982925415, "learning_rate": 1.4721634954193096e-05, "loss": 0.274, "step": 750 }, { "epoch": 0.8456659619450317, "grad_norm": 23.790388107299805, "learning_rate": 1.4369274136715999e-05, "loss": 0.2477, "step": 800 }, { "epoch": 0.8985200845665962, "grad_norm": 25.269287109375, "learning_rate": 1.4016913319238903e-05, "loss": 0.2559, "step": 850 }, { "epoch": 0.9513742071881607, "grad_norm": 12.499388694763184, "learning_rate": 1.3664552501761804e-05, "loss": 0.2535, "step": 900 }, { "epoch": 1.0, "eval_f1_macro": 0.8913938479590982, "eval_f1_micro": 0.8905597326649958, "eval_loss": 0.23979686200618744, "eval_precision": 0.9089358799454298, "eval_recall": 0.872911889944317, "eval_runtime": 10.313, "eval_samples_per_second": 366.721, "eval_steps_per_second": 22.981, "step": 946 }, { "epoch": 1.0042283298097252, "grad_norm": 20.737754821777344, "learning_rate": 1.3312191684284707e-05, "loss": 0.2342, "step": 950 }, { "epoch": 1.0570824524312896, "grad_norm": 8.743599891662598, "learning_rate": 1.2959830866807612e-05, "loss": 0.1932, "step": 1000 }, { "epoch": 1.109936575052854, "grad_norm": 15.532188415527344, "learning_rate": 1.2607470049330515e-05, "loss": 0.1546, "step": 1050 }, { "epoch": 1.1627906976744187, "grad_norm": 10.706155776977539, "learning_rate": 1.225510923185342e-05, "loss": 0.2058, "step": 1100 }, { "epoch": 1.215644820295983, "grad_norm": 6.785822868347168, "learning_rate": 1.1902748414376322e-05, "loss": 0.1603, "step": 1150 }, { "epoch": 1.2684989429175475, "grad_norm": 19.18027687072754, "learning_rate": 1.1550387596899227e-05, "loss": 0.17, "step": 1200 }, { "epoch": 1.3213530655391121, "grad_norm": 5.942857265472412, "learning_rate": 1.1198026779422128e-05, "loss": 0.2369, "step": 1250 }, { "epoch": 1.3742071881606766, "grad_norm": 3.4256839752197266, "learning_rate": 1.0845665961945033e-05, "loss": 0.1843, "step": 1300 }, { "epoch": 1.427061310782241, "grad_norm": 21.69367218017578, "learning_rate": 1.0493305144467936e-05, "loss": 0.1798, "step": 1350 }, { "epoch": 1.4799154334038054, "grad_norm": 12.499094009399414, "learning_rate": 1.014094432699084e-05, "loss": 0.2137, "step": 1400 }, { "epoch": 1.53276955602537, "grad_norm": 9.75059700012207, "learning_rate": 9.788583509513743e-06, "loss": 0.1616, "step": 1450 }, { "epoch": 1.5856236786469344, "grad_norm": 16.770835876464844, "learning_rate": 9.436222692036646e-06, "loss": 0.1711, "step": 1500 }, { "epoch": 1.638477801268499, "grad_norm": 7.7109222412109375, "learning_rate": 9.083861874559549e-06, "loss": 0.1757, "step": 1550 }, { "epoch": 1.6913319238900635, "grad_norm": 8.317294120788574, "learning_rate": 8.731501057082454e-06, "loss": 0.1886, "step": 1600 }, { "epoch": 1.744186046511628, "grad_norm": 7.418989181518555, "learning_rate": 8.379140239605357e-06, "loss": 0.1743, "step": 1650 }, { "epoch": 1.7970401691331923, "grad_norm": 15.134026527404785, "learning_rate": 8.02677942212826e-06, "loss": 0.1829, "step": 1700 }, { "epoch": 1.8498942917547567, "grad_norm": 8.40051555633545, "learning_rate": 7.674418604651164e-06, "loss": 0.1516, "step": 1750 }, { "epoch": 1.9027484143763214, "grad_norm": 42.32719039916992, "learning_rate": 7.322057787174067e-06, "loss": 0.1574, "step": 1800 }, { "epoch": 1.955602536997886, "grad_norm": 7.198770046234131, "learning_rate": 6.969696969696971e-06, "loss": 0.1796, "step": 1850 }, { "epoch": 2.0, "eval_f1_macro": 0.9171441852345779, "eval_f1_micro": 0.9165967444202048, "eval_loss": 0.207748144865036, "eval_precision": 0.9397797660013765, "eval_recall": 0.8945299705207992, "eval_runtime": 10.3145, "eval_samples_per_second": 366.667, "eval_steps_per_second": 22.977, "step": 1892 }, { "epoch": 2.0084566596194504, "grad_norm": 3.685957193374634, "learning_rate": 6.6173361522198745e-06, "loss": 0.1328, "step": 1900 }, { "epoch": 2.061310782241015, "grad_norm": 12.144611358642578, "learning_rate": 6.2649753347427766e-06, "loss": 0.1201, "step": 1950 }, { "epoch": 2.1141649048625792, "grad_norm": 26.261823654174805, "learning_rate": 5.91261451726568e-06, "loss": 0.1465, "step": 2000 }, { "epoch": 2.1670190274841437, "grad_norm": 42.77655029296875, "learning_rate": 5.560253699788583e-06, "loss": 0.1231, "step": 2050 }, { "epoch": 2.219873150105708, "grad_norm": 10.951128959655762, "learning_rate": 5.207892882311487e-06, "loss": 0.1367, "step": 2100 }, { "epoch": 2.2727272727272725, "grad_norm": 12.861650466918945, "learning_rate": 4.855532064834391e-06, "loss": 0.1056, "step": 2150 }, { "epoch": 2.3255813953488373, "grad_norm": 16.26862144470215, "learning_rate": 4.5031712473572945e-06, "loss": 0.1259, "step": 2200 }, { "epoch": 2.3784355179704018, "grad_norm": 11.81278133392334, "learning_rate": 4.150810429880197e-06, "loss": 0.1214, "step": 2250 }, { "epoch": 2.431289640591966, "grad_norm": 1.1252022981643677, "learning_rate": 3.798449612403101e-06, "loss": 0.1017, "step": 2300 }, { "epoch": 2.4841437632135306, "grad_norm": 11.047788619995117, "learning_rate": 3.4460887949260045e-06, "loss": 0.1263, "step": 2350 }, { "epoch": 2.536997885835095, "grad_norm": 16.96929359436035, "learning_rate": 3.0937279774489083e-06, "loss": 0.0986, "step": 2400 }, { "epoch": 2.58985200845666, "grad_norm": 2.476245164871216, "learning_rate": 2.741367159971811e-06, "loss": 0.1105, "step": 2450 }, { "epoch": 2.6427061310782243, "grad_norm": 5.029143810272217, "learning_rate": 2.389006342494715e-06, "loss": 0.1219, "step": 2500 }, { "epoch": 2.6955602536997887, "grad_norm": 16.73661231994629, "learning_rate": 2.0366455250176183e-06, "loss": 0.1087, "step": 2550 }, { "epoch": 2.748414376321353, "grad_norm": 1.3909024000167847, "learning_rate": 1.6842847075405216e-06, "loss": 0.1118, "step": 2600 }, { "epoch": 2.8012684989429175, "grad_norm": 14.585844993591309, "learning_rate": 1.3319238900634251e-06, "loss": 0.1018, "step": 2650 }, { "epoch": 2.854122621564482, "grad_norm": 24.876344680786133, "learning_rate": 9.795630725863285e-07, "loss": 0.1001, "step": 2700 }, { "epoch": 2.9069767441860463, "grad_norm": 28.51801872253418, "learning_rate": 6.272022551092319e-07, "loss": 0.1004, "step": 2750 }, { "epoch": 2.9598308668076108, "grad_norm": 2.746856689453125, "learning_rate": 2.748414376321353e-07, "loss": 0.1181, "step": 2800 }, { "epoch": 3.0, "eval_f1_macro": 0.927417514244305, "eval_f1_micro": 0.9269727168763274, "eval_loss": 0.2070944607257843, "eval_precision": 0.9247066492829205, "eval_recall": 0.9292499181133311, "eval_runtime": 10.2948, "eval_samples_per_second": 367.371, "eval_steps_per_second": 23.021, "step": 2838 } ], "logging_steps": 50, "max_steps": 2838, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.200759923346432e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }