{ "best_metric": 0.4286545217037201, "best_model_checkpoint": "./vit-augmentation/checkpoint-1926", "epoch": 16.0, "eval_steps": 500, "global_step": 5136, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.7570695877075195, "learning_rate": 4.1428571428571437e-05, "loss": 0.9124, "step": 321 }, { "epoch": 1.0, "eval_accuracy": 0.7805131761442441, "eval_f1": 0.768284202243805, "eval_loss": 0.6024736166000366, "eval_precision": 0.778781843539936, "eval_recall": 0.7805131761442441, "eval_runtime": 24.6229, "eval_samples_per_second": 117.127, "eval_steps_per_second": 14.661, "step": 321 }, { "epoch": 2.0, "grad_norm": 7.467956066131592, "learning_rate": 8.311688311688312e-05, "loss": 0.5876, "step": 642 }, { "epoch": 2.0, "eval_accuracy": 0.7864077669902912, "eval_f1": 0.7820269177484748, "eval_loss": 0.5819488167762756, "eval_precision": 0.7990366623686466, "eval_recall": 0.7864077669902912, "eval_runtime": 24.6924, "eval_samples_per_second": 116.797, "eval_steps_per_second": 14.62, "step": 642 }, { "epoch": 3.0, "grad_norm": 2.2375881671905518, "learning_rate": 9.985334621908699e-05, "loss": 0.5415, "step": 963 }, { "epoch": 3.0, "eval_accuracy": 0.8040915395284327, "eval_f1": 0.7864667726550387, "eval_loss": 0.6148691773414612, "eval_precision": 0.7943404456778191, "eval_recall": 0.8040915395284327, "eval_runtime": 23.4113, "eval_samples_per_second": 123.188, "eval_steps_per_second": 15.42, "step": 963 }, { "epoch": 4.0, "grad_norm": 4.80470085144043, "learning_rate": 9.894936461151184e-05, "loss": 0.4815, "step": 1284 }, { "epoch": 4.0, "eval_accuracy": 0.8294036061026352, "eval_f1": 0.8114752843394228, "eval_loss": 0.4653749465942383, "eval_precision": 0.8259066316761086, "eval_recall": 0.8294036061026352, "eval_runtime": 22.9294, "eval_samples_per_second": 125.778, "eval_steps_per_second": 15.744, "step": 1284 }, { "epoch": 5.0, "grad_norm": 8.0819091796875, "learning_rate": 9.723506398349735e-05, "loss": 0.4263, "step": 1605 }, { "epoch": 5.0, "eval_accuracy": 0.8259361997226075, "eval_f1": 0.8022540151903795, "eval_loss": 0.5480949282646179, "eval_precision": 0.8315275661759924, "eval_recall": 0.8259361997226075, "eval_runtime": 22.776, "eval_samples_per_second": 126.625, "eval_steps_per_second": 15.85, "step": 1605 }, { "epoch": 6.0, "grad_norm": 1.5868643522262573, "learning_rate": 9.473882326123909e-05, "loss": 0.3515, "step": 1926 }, { "epoch": 6.0, "eval_accuracy": 0.8592233009708737, "eval_f1": 0.857445188937737, "eval_loss": 0.4286545217037201, "eval_precision": 0.8579724186747698, "eval_recall": 0.8592233009708737, "eval_runtime": 22.7677, "eval_samples_per_second": 126.671, "eval_steps_per_second": 15.856, "step": 1926 }, { "epoch": 7.0, "grad_norm": 7.5292840003967285, "learning_rate": 9.15019657867844e-05, "loss": 0.3144, "step": 2247 }, { "epoch": 7.0, "eval_accuracy": 0.8363384188626907, "eval_f1": 0.8269755362382184, "eval_loss": 0.5004583597183228, "eval_precision": 0.8320109172887173, "eval_recall": 0.8363384188626907, "eval_runtime": 22.8371, "eval_samples_per_second": 126.286, "eval_steps_per_second": 15.808, "step": 2247 }, { "epoch": 8.0, "grad_norm": 9.491310119628906, "learning_rate": 8.757807524194037e-05, "loss": 0.2736, "step": 2568 }, { "epoch": 8.0, "eval_accuracy": 0.8294036061026352, "eval_f1": 0.8301623012381248, "eval_loss": 0.5306037664413452, "eval_precision": 0.8447810859364984, "eval_recall": 0.8294036061026352, "eval_runtime": 22.6737, "eval_samples_per_second": 127.196, "eval_steps_per_second": 15.922, "step": 2568 }, { "epoch": 9.0, "grad_norm": 5.960662841796875, "learning_rate": 8.303210861170248e-05, "loss": 0.2519, "step": 2889 }, { "epoch": 9.0, "eval_accuracy": 0.8578363384188626, "eval_f1": 0.8534043679644312, "eval_loss": 0.4732687175273895, "eval_precision": 0.8533865418942713, "eval_recall": 0.8578363384188626, "eval_runtime": 22.5796, "eval_samples_per_second": 127.726, "eval_steps_per_second": 15.988, "step": 2889 }, { "epoch": 10.0, "grad_norm": 8.973663330078125, "learning_rate": 7.793932087141109e-05, "loss": 0.2227, "step": 3210 }, { "epoch": 10.0, "eval_accuracy": 0.8585298196948682, "eval_f1": 0.851216238380984, "eval_loss": 0.4905049800872803, "eval_precision": 0.8519657875781843, "eval_recall": 0.8585298196948682, "eval_runtime": 24.7137, "eval_samples_per_second": 116.696, "eval_steps_per_second": 14.607, "step": 3210 }, { "epoch": 11.0, "grad_norm": 3.6425747871398926, "learning_rate": 7.23840191986112e-05, "loss": 0.1724, "step": 3531 }, { "epoch": 11.0, "eval_accuracy": 0.8654646324549237, "eval_f1": 0.862832998223053, "eval_loss": 0.5050108432769775, "eval_precision": 0.8670507633275476, "eval_recall": 0.8654646324549237, "eval_runtime": 24.5636, "eval_samples_per_second": 117.409, "eval_steps_per_second": 14.697, "step": 3531 }, { "epoch": 12.0, "grad_norm": 14.814494132995605, "learning_rate": 6.64581673326787e-05, "loss": 0.1596, "step": 3852 }, { "epoch": 12.0, "eval_accuracy": 0.8685852981969486, "eval_f1": 0.8631462417126987, "eval_loss": 0.526269257068634, "eval_precision": 0.8656616085283582, "eval_recall": 0.8685852981969486, "eval_runtime": 24.7987, "eval_samples_per_second": 116.296, "eval_steps_per_second": 14.557, "step": 3852 }, { "epoch": 13.0, "grad_norm": 1.3064632415771484, "learning_rate": 6.027949045818934e-05, "loss": 0.1397, "step": 4173 }, { "epoch": 13.0, "eval_accuracy": 0.8533287101248266, "eval_f1": 0.848769203208603, "eval_loss": 0.7043444514274597, "eval_precision": 0.8702783792779959, "eval_recall": 0.8533287101248266, "eval_runtime": 24.4467, "eval_samples_per_second": 117.971, "eval_steps_per_second": 14.767, "step": 4173 }, { "epoch": 14.0, "grad_norm": 0.027323821559548378, "learning_rate": 5.391170860718704e-05, "loss": 0.1298, "step": 4494 }, { "epoch": 14.0, "eval_accuracy": 0.8678918169209431, "eval_f1": 0.8631936421240504, "eval_loss": 0.6274930834770203, "eval_precision": 0.8734415776541141, "eval_recall": 0.8678918169209431, "eval_runtime": 24.4385, "eval_samples_per_second": 118.01, "eval_steps_per_second": 14.772, "step": 4494 }, { "epoch": 15.0, "grad_norm": 18.411346435546875, "learning_rate": 4.7479171433782145e-05, "loss": 0.1029, "step": 4815 }, { "epoch": 15.0, "eval_accuracy": 0.8807212205270458, "eval_f1": 0.8772158509937853, "eval_loss": 0.5564317107200623, "eval_precision": 0.8775761761640355, "eval_recall": 0.8807212205270458, "eval_runtime": 24.4883, "eval_samples_per_second": 117.771, "eval_steps_per_second": 14.742, "step": 4815 }, { "epoch": 16.0, "grad_norm": 17.630950927734375, "learning_rate": 4.1088364635238816e-05, "loss": 0.0893, "step": 5136 }, { "epoch": 16.0, "eval_accuracy": 0.880374479889043, "eval_f1": 0.8789241435580383, "eval_loss": 0.5668464303016663, "eval_precision": 0.8823340407710801, "eval_recall": 0.880374479889043, "eval_runtime": 24.4769, "eval_samples_per_second": 117.825, "eval_steps_per_second": 14.749, "step": 5136 }, { "epoch": 16.0, "step": 5136, "total_flos": 6.35834769042545e+18, "train_loss": 0.3223209912160475, "train_runtime": 1869.7645, "train_samples_per_second": 274.259, "train_steps_per_second": 17.168 } ], "logging_steps": 500, "max_steps": 32100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 6.35834769042545e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }