{ "best_metric": 0.8314220183486238, "best_model_checkpoint": "tiny-bert-sst2-distilled\\run-1\\checkpoint-2108", "epoch": 5.0, "eval_steps": 500, "global_step": 2635, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 7.193342816186545e-05, "loss": 1.1191, "step": 527 }, { "epoch": 1.0, "eval_accuracy": 0.8061926605504587, "eval_loss": 0.9445732831954956, "eval_runtime": 1.8865, "eval_samples_per_second": 462.238, "eval_steps_per_second": 3.711, "step": 527 }, { "epoch": 2.0, "learning_rate": 5.994452346822122e-05, "loss": 0.6346, "step": 1054 }, { "epoch": 2.0, "eval_accuracy": 0.8279816513761468, "eval_loss": 0.8901896476745605, "eval_runtime": 1.7665, "eval_samples_per_second": 493.618, "eval_steps_per_second": 3.963, "step": 1054 }, { "epoch": 3.0, "learning_rate": 4.795561877457697e-05, "loss": 0.4919, "step": 1581 }, { "epoch": 3.0, "eval_accuracy": 0.8211009174311926, "eval_loss": 0.9235907196998596, "eval_runtime": 1.9102, "eval_samples_per_second": 456.496, "eval_steps_per_second": 3.665, "step": 1581 }, { "epoch": 4.0, "learning_rate": 3.5966714080932725e-05, "loss": 0.4192, "step": 2108 }, { "epoch": 4.0, "eval_accuracy": 0.8314220183486238, "eval_loss": 0.9573733806610107, "eval_runtime": 1.9272, "eval_samples_per_second": 452.465, "eval_steps_per_second": 3.632, "step": 2108 }, { "epoch": 5.0, "learning_rate": 2.3977809387288486e-05, "loss": 0.377, "step": 2635 }, { "epoch": 5.0, "eval_accuracy": 0.8268348623853211, "eval_loss": 0.9544126987457275, "eval_runtime": 1.9749, "eval_samples_per_second": 441.534, "eval_steps_per_second": 3.544, "step": 2635 } ], "logging_steps": 500, "max_steps": 3689, "num_train_epochs": 7, "save_steps": 500, "total_flos": 40452267650880.0, "trial_name": null, "trial_params": { "alpha": 0.6227846651931359, "learning_rate": 8.39223328555097e-05, "num_train_epochs": 7, "temperature": 4 } }