| { |
| "best_metric": 0.4286545217037201, |
| "best_model_checkpoint": "./vit-augmentation/checkpoint-1926", |
| "epoch": 16.0, |
| "eval_steps": 500, |
| "global_step": 5136, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.7570695877075195, |
| "learning_rate": 4.1428571428571437e-05, |
| "loss": 0.9124, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7805131761442441, |
| "eval_f1": 0.768284202243805, |
| "eval_loss": 0.6024736166000366, |
| "eval_precision": 0.778781843539936, |
| "eval_recall": 0.7805131761442441, |
| "eval_runtime": 24.6229, |
| "eval_samples_per_second": 117.127, |
| "eval_steps_per_second": 14.661, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 7.467956066131592, |
| "learning_rate": 8.311688311688312e-05, |
| "loss": 0.5876, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7864077669902912, |
| "eval_f1": 0.7820269177484748, |
| "eval_loss": 0.5819488167762756, |
| "eval_precision": 0.7990366623686466, |
| "eval_recall": 0.7864077669902912, |
| "eval_runtime": 24.6924, |
| "eval_samples_per_second": 116.797, |
| "eval_steps_per_second": 14.62, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 2.2375881671905518, |
| "learning_rate": 9.985334621908699e-05, |
| "loss": 0.5415, |
| "step": 963 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8040915395284327, |
| "eval_f1": 0.7864667726550387, |
| "eval_loss": 0.6148691773414612, |
| "eval_precision": 0.7943404456778191, |
| "eval_recall": 0.8040915395284327, |
| "eval_runtime": 23.4113, |
| "eval_samples_per_second": 123.188, |
| "eval_steps_per_second": 15.42, |
| "step": 963 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 4.80470085144043, |
| "learning_rate": 9.894936461151184e-05, |
| "loss": 0.4815, |
| "step": 1284 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8294036061026352, |
| "eval_f1": 0.8114752843394228, |
| "eval_loss": 0.4653749465942383, |
| "eval_precision": 0.8259066316761086, |
| "eval_recall": 0.8294036061026352, |
| "eval_runtime": 22.9294, |
| "eval_samples_per_second": 125.778, |
| "eval_steps_per_second": 15.744, |
| "step": 1284 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 8.0819091796875, |
| "learning_rate": 9.723506398349735e-05, |
| "loss": 0.4263, |
| "step": 1605 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8259361997226075, |
| "eval_f1": 0.8022540151903795, |
| "eval_loss": 0.5480949282646179, |
| "eval_precision": 0.8315275661759924, |
| "eval_recall": 0.8259361997226075, |
| "eval_runtime": 22.776, |
| "eval_samples_per_second": 126.625, |
| "eval_steps_per_second": 15.85, |
| "step": 1605 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.5868643522262573, |
| "learning_rate": 9.473882326123909e-05, |
| "loss": 0.3515, |
| "step": 1926 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8592233009708737, |
| "eval_f1": 0.857445188937737, |
| "eval_loss": 0.4286545217037201, |
| "eval_precision": 0.8579724186747698, |
| "eval_recall": 0.8592233009708737, |
| "eval_runtime": 22.7677, |
| "eval_samples_per_second": 126.671, |
| "eval_steps_per_second": 15.856, |
| "step": 1926 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 7.5292840003967285, |
| "learning_rate": 9.15019657867844e-05, |
| "loss": 0.3144, |
| "step": 2247 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8363384188626907, |
| "eval_f1": 0.8269755362382184, |
| "eval_loss": 0.5004583597183228, |
| "eval_precision": 0.8320109172887173, |
| "eval_recall": 0.8363384188626907, |
| "eval_runtime": 22.8371, |
| "eval_samples_per_second": 126.286, |
| "eval_steps_per_second": 15.808, |
| "step": 2247 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 9.491310119628906, |
| "learning_rate": 8.757807524194037e-05, |
| "loss": 0.2736, |
| "step": 2568 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.8294036061026352, |
| "eval_f1": 0.8301623012381248, |
| "eval_loss": 0.5306037664413452, |
| "eval_precision": 0.8447810859364984, |
| "eval_recall": 0.8294036061026352, |
| "eval_runtime": 22.6737, |
| "eval_samples_per_second": 127.196, |
| "eval_steps_per_second": 15.922, |
| "step": 2568 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 5.960662841796875, |
| "learning_rate": 8.303210861170248e-05, |
| "loss": 0.2519, |
| "step": 2889 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8578363384188626, |
| "eval_f1": 0.8534043679644312, |
| "eval_loss": 0.4732687175273895, |
| "eval_precision": 0.8533865418942713, |
| "eval_recall": 0.8578363384188626, |
| "eval_runtime": 22.5796, |
| "eval_samples_per_second": 127.726, |
| "eval_steps_per_second": 15.988, |
| "step": 2889 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 8.973663330078125, |
| "learning_rate": 7.793932087141109e-05, |
| "loss": 0.2227, |
| "step": 3210 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8585298196948682, |
| "eval_f1": 0.851216238380984, |
| "eval_loss": 0.4905049800872803, |
| "eval_precision": 0.8519657875781843, |
| "eval_recall": 0.8585298196948682, |
| "eval_runtime": 24.7137, |
| "eval_samples_per_second": 116.696, |
| "eval_steps_per_second": 14.607, |
| "step": 3210 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 3.6425747871398926, |
| "learning_rate": 7.23840191986112e-05, |
| "loss": 0.1724, |
| "step": 3531 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8654646324549237, |
| "eval_f1": 0.862832998223053, |
| "eval_loss": 0.5050108432769775, |
| "eval_precision": 0.8670507633275476, |
| "eval_recall": 0.8654646324549237, |
| "eval_runtime": 24.5636, |
| "eval_samples_per_second": 117.409, |
| "eval_steps_per_second": 14.697, |
| "step": 3531 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 14.814494132995605, |
| "learning_rate": 6.64581673326787e-05, |
| "loss": 0.1596, |
| "step": 3852 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8685852981969486, |
| "eval_f1": 0.8631462417126987, |
| "eval_loss": 0.526269257068634, |
| "eval_precision": 0.8656616085283582, |
| "eval_recall": 0.8685852981969486, |
| "eval_runtime": 24.7987, |
| "eval_samples_per_second": 116.296, |
| "eval_steps_per_second": 14.557, |
| "step": 3852 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 1.3064632415771484, |
| "learning_rate": 6.027949045818934e-05, |
| "loss": 0.1397, |
| "step": 4173 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8533287101248266, |
| "eval_f1": 0.848769203208603, |
| "eval_loss": 0.7043444514274597, |
| "eval_precision": 0.8702783792779959, |
| "eval_recall": 0.8533287101248266, |
| "eval_runtime": 24.4467, |
| "eval_samples_per_second": 117.971, |
| "eval_steps_per_second": 14.767, |
| "step": 4173 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.027323821559548378, |
| "learning_rate": 5.391170860718704e-05, |
| "loss": 0.1298, |
| "step": 4494 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8678918169209431, |
| "eval_f1": 0.8631936421240504, |
| "eval_loss": 0.6274930834770203, |
| "eval_precision": 0.8734415776541141, |
| "eval_recall": 0.8678918169209431, |
| "eval_runtime": 24.4385, |
| "eval_samples_per_second": 118.01, |
| "eval_steps_per_second": 14.772, |
| "step": 4494 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 18.411346435546875, |
| "learning_rate": 4.7479171433782145e-05, |
| "loss": 0.1029, |
| "step": 4815 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8807212205270458, |
| "eval_f1": 0.8772158509937853, |
| "eval_loss": 0.5564317107200623, |
| "eval_precision": 0.8775761761640355, |
| "eval_recall": 0.8807212205270458, |
| "eval_runtime": 24.4883, |
| "eval_samples_per_second": 117.771, |
| "eval_steps_per_second": 14.742, |
| "step": 4815 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 17.630950927734375, |
| "learning_rate": 4.1088364635238816e-05, |
| "loss": 0.0893, |
| "step": 5136 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.880374479889043, |
| "eval_f1": 0.8789241435580383, |
| "eval_loss": 0.5668464303016663, |
| "eval_precision": 0.8823340407710801, |
| "eval_recall": 0.880374479889043, |
| "eval_runtime": 24.4769, |
| "eval_samples_per_second": 117.825, |
| "eval_steps_per_second": 14.749, |
| "step": 5136 |
| }, |
| { |
| "epoch": 16.0, |
| "step": 5136, |
| "total_flos": 6.35834769042545e+18, |
| "train_loss": 0.3223209912160475, |
| "train_runtime": 1869.7645, |
| "train_samples_per_second": 274.259, |
| "train_steps_per_second": 17.168 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 32100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "total_flos": 6.35834769042545e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|