| { |
| "best_metric": 0.6966096758842468, |
| "best_model_checkpoint": "bert_uncased_L-2_H-128_A-2_mnli/checkpoint-16874", |
| "epoch": 16.0, |
| "eval_steps": 500, |
| "global_step": 24544, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.641183853149414, |
| "learning_rate": 4.9e-05, |
| "loss": 0.9155, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6342333163525217, |
| "eval_loss": 0.8197218775749207, |
| "eval_runtime": 3.1144, |
| "eval_samples_per_second": 3151.502, |
| "eval_steps_per_second": 12.523, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.0699892044067383, |
| "learning_rate": 4.8e-05, |
| "loss": 0.8189, |
| "step": 3068 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6625573102394294, |
| "eval_loss": 0.7689471244812012, |
| "eval_runtime": 3.0712, |
| "eval_samples_per_second": 3195.829, |
| "eval_steps_per_second": 12.699, |
| "step": 3068 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.7944191694259644, |
| "learning_rate": 4.7e-05, |
| "loss": 0.7747, |
| "step": 4602 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6760061130922058, |
| "eval_loss": 0.7416538596153259, |
| "eval_runtime": 3.0841, |
| "eval_samples_per_second": 3182.445, |
| "eval_steps_per_second": 12.645, |
| "step": 4602 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.8538000583648682, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.7449, |
| "step": 6136 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.6851757514009169, |
| "eval_loss": 0.7284622192382812, |
| "eval_runtime": 3.0649, |
| "eval_samples_per_second": 3202.403, |
| "eval_steps_per_second": 12.725, |
| "step": 6136 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.2576873302459717, |
| "learning_rate": 4.5e-05, |
| "loss": 0.7198, |
| "step": 7670 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.693428425878757, |
| "eval_loss": 0.7110932469367981, |
| "eval_runtime": 3.0825, |
| "eval_samples_per_second": 3184.117, |
| "eval_steps_per_second": 12.652, |
| "step": 7670 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 2.2374134063720703, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.6996, |
| "step": 9204 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6977075904228223, |
| "eval_loss": 0.7117594480514526, |
| "eval_runtime": 3.0866, |
| "eval_samples_per_second": 3179.874, |
| "eval_steps_per_second": 12.635, |
| "step": 9204 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 2.3460779190063477, |
| "learning_rate": 4.3e-05, |
| "loss": 0.6812, |
| "step": 10738 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.7030056036678554, |
| "eval_loss": 0.7004757523536682, |
| "eval_runtime": 3.0836, |
| "eval_samples_per_second": 3182.923, |
| "eval_steps_per_second": 12.647, |
| "step": 10738 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 2.158047914505005, |
| "learning_rate": 4.2e-05, |
| "loss": 0.6649, |
| "step": 12272 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.7043301069791136, |
| "eval_loss": 0.6981356739997864, |
| "eval_runtime": 3.0881, |
| "eval_samples_per_second": 3178.334, |
| "eval_steps_per_second": 12.629, |
| "step": 12272 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 2.4495298862457275, |
| "learning_rate": 4.1e-05, |
| "loss": 0.6491, |
| "step": 13806 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.7036169128884361, |
| "eval_loss": 0.7056555151939392, |
| "eval_runtime": 3.0711, |
| "eval_samples_per_second": 3195.888, |
| "eval_steps_per_second": 12.699, |
| "step": 13806 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 2.2467546463012695, |
| "learning_rate": 4e-05, |
| "loss": 0.6358, |
| "step": 15340 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7076923076923077, |
| "eval_loss": 0.698304295539856, |
| "eval_runtime": 3.0715, |
| "eval_samples_per_second": 3195.477, |
| "eval_steps_per_second": 12.697, |
| "step": 15340 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 2.603757619857788, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.6224, |
| "step": 16874 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.7063678043810494, |
| "eval_loss": 0.6966096758842468, |
| "eval_runtime": 3.0786, |
| "eval_samples_per_second": 3188.18, |
| "eval_steps_per_second": 12.668, |
| "step": 16874 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 2.9877541065216064, |
| "learning_rate": 3.8e-05, |
| "loss": 0.6109, |
| "step": 18408 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.7145185939887927, |
| "eval_loss": 0.7001488208770752, |
| "eval_runtime": 3.1144, |
| "eval_samples_per_second": 3151.457, |
| "eval_steps_per_second": 12.522, |
| "step": 18408 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 2.9254136085510254, |
| "learning_rate": 3.7e-05, |
| "loss": 0.5994, |
| "step": 19942 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.7112582781456953, |
| "eval_loss": 0.7014375329017639, |
| "eval_runtime": 3.0536, |
| "eval_samples_per_second": 3214.286, |
| "eval_steps_per_second": 12.772, |
| "step": 19942 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 2.3332672119140625, |
| "learning_rate": 3.6e-05, |
| "loss": 0.5872, |
| "step": 21476 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.7084055017829852, |
| "eval_loss": 0.7061210870742798, |
| "eval_runtime": 3.0601, |
| "eval_samples_per_second": 3207.409, |
| "eval_steps_per_second": 12.745, |
| "step": 21476 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 3.173841953277588, |
| "learning_rate": 3.5e-05, |
| "loss": 0.5779, |
| "step": 23010 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.716760061130922, |
| "eval_loss": 0.7053535580635071, |
| "eval_runtime": 3.016, |
| "eval_samples_per_second": 3254.334, |
| "eval_steps_per_second": 12.931, |
| "step": 23010 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 2.5497801303863525, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.5681, |
| "step": 24544 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.7147223637289862, |
| "eval_loss": 0.7059070467948914, |
| "eval_runtime": 3.034, |
| "eval_samples_per_second": 3234.976, |
| "eval_steps_per_second": 12.854, |
| "step": 24544 |
| }, |
| { |
| "epoch": 16.0, |
| "step": 24544, |
| "total_flos": 3992627397378048.0, |
| "train_loss": 0.6793889937220527, |
| "train_runtime": 2308.8944, |
| "train_samples_per_second": 8504.114, |
| "train_steps_per_second": 33.219 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 76700, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 5 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3992627397378048.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|