{ "best_global_step": 1000, "best_metric": 0.7299026676279741, "best_model_checkpoint": "./hpo_Buzzeitor_longformer-base-4096-bne-es/trial_0/checkpoint-1000", "epoch": 4.82771896053898, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.48123195380173245, "grad_norm": 2.3587965965270996, "learning_rate": 2.4559035121953966e-05, "loss": 0.6174, "step": 100 }, { "epoch": 0.48123195380173245, "eval_accuracy": 0.6756756756756757, "eval_f1": 0.6636363636363636, "eval_loss": 0.6910868883132935, "eval_precision": 0.662606978275181, "eval_recall": 0.6721254355400696, "eval_runtime": 2.3792, "eval_samples_per_second": 93.307, "eval_steps_per_second": 15.551, "step": 100 }, { "epoch": 0.9624639076034649, "grad_norm": 3.7914035320281982, "learning_rate": 2.3529306186022982e-05, "loss": 0.5708, "step": 200 }, { "epoch": 0.9624639076034649, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.6554819226574952, "eval_loss": 0.7087554931640625, "eval_precision": 0.6552459016393443, "eval_recall": 0.6649825783972125, "eval_runtime": 2.4015, "eval_samples_per_second": 92.444, "eval_steps_per_second": 15.407, "step": 200 }, { "epoch": 1.4475457170356112, "grad_norm": 18.273841857910156, "learning_rate": 2.2499577250091998e-05, "loss": 0.5561, "step": 300 }, { "epoch": 1.4475457170356112, "eval_accuracy": 0.7207207207207207, "eval_f1": 0.671409472880061, "eval_loss": 0.6270285248756409, "eval_precision": 0.7086640211640212, "eval_recall": 0.6648954703832752, "eval_runtime": 2.4, "eval_samples_per_second": 92.502, "eval_steps_per_second": 15.417, "step": 300 }, { "epoch": 1.9287776708373436, "grad_norm": 34.575313568115234, "learning_rate": 2.1480145603520324e-05, "loss": 0.5025, "step": 400 }, { "epoch": 1.9287776708373436, "eval_accuracy": 0.7207207207207207, "eval_f1": 0.6564839772386942, "eval_loss": 0.645313560962677, "eval_precision": 0.7231869254341164, "eval_recall": 0.6522648083623693, "eval_runtime": 2.3956, "eval_samples_per_second": 92.671, "eval_steps_per_second": 15.445, "step": 400 }, { "epoch": 2.41385948026949, "grad_norm": 31.36258888244629, "learning_rate": 2.045041666758934e-05, "loss": 0.4304, "step": 500 }, { "epoch": 2.41385948026949, "eval_accuracy": 0.6801801801801802, "eval_f1": 0.6252407332556646, "eval_loss": 0.8638490438461304, "eval_precision": 0.6532934131736527, "eval_recall": 0.6226480836236934, "eval_runtime": 2.403, "eval_samples_per_second": 92.386, "eval_steps_per_second": 15.398, "step": 500 }, { "epoch": 2.8950914340712224, "grad_norm": 28.394620895385742, "learning_rate": 1.942068773165836e-05, "loss": 0.3904, "step": 600 }, { "epoch": 2.8950914340712224, "eval_accuracy": 0.7117117117117117, "eval_f1": 0.6935817805383022, "eval_loss": 0.765376091003418, "eval_precision": 0.69203146374829, "eval_recall": 0.6956445993031359, "eval_runtime": 2.3963, "eval_samples_per_second": 92.644, "eval_steps_per_second": 15.441, "step": 600 }, { "epoch": 3.3801732435033687, "grad_norm": 2.409611463546753, "learning_rate": 1.839095879572737e-05, "loss": 0.2937, "step": 700 }, { "epoch": 3.3801732435033687, "eval_accuracy": 0.6441441441441441, "eval_f1": 0.6437899867980095, "eval_loss": 1.1262925863265991, "eval_precision": 0.6769874123511024, "eval_recall": 0.6824912891986062, "eval_runtime": 2.4008, "eval_samples_per_second": 92.469, "eval_steps_per_second": 15.412, "step": 700 }, { "epoch": 3.861405197305101, "grad_norm": 116.25942993164062, "learning_rate": 1.736122985979639e-05, "loss": 0.2703, "step": 800 }, { "epoch": 3.861405197305101, "eval_accuracy": 0.7207207207207207, "eval_f1": 0.7057968536251711, "eval_loss": 1.0032894611358643, "eval_precision": 0.7032828282828283, "eval_recall": 0.7103658536585367, "eval_runtime": 2.3859, "eval_samples_per_second": 93.047, "eval_steps_per_second": 15.508, "step": 800 }, { "epoch": 4.346487006737247, "grad_norm": 136.07870483398438, "learning_rate": 1.6331500923865403e-05, "loss": 0.2418, "step": 900 }, { "epoch": 4.346487006737247, "eval_accuracy": 0.7342342342342343, "eval_f1": 0.691048472697252, "eval_loss": 1.0043816566467285, "eval_precision": 0.7236044657097289, "eval_recall": 0.6831881533101045, "eval_runtime": 2.4, "eval_samples_per_second": 92.5, "eval_steps_per_second": 15.417, "step": 900 }, { "epoch": 4.82771896053898, "grad_norm": 120.01212310791016, "learning_rate": 1.5312069277293732e-05, "loss": 0.2207, "step": 1000 }, { "epoch": 4.82771896053898, "eval_accuracy": 0.7567567567567568, "eval_f1": 0.7299026676279741, "eval_loss": 0.9589651226997375, "eval_precision": 0.7414473684210526, "eval_recall": 0.723780487804878, "eval_runtime": 2.385, "eval_samples_per_second": 93.083, "eval_steps_per_second": 15.514, "step": 1000 } ], "logging_steps": 100, "max_steps": 2484, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9795381765152256.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }