| {"loss": 5.8937645, "grad_norm": 124.0, "learning_rate": 2e-05, "epoch": 0.03053435, "global_step/max_steps": "1/99", "percentage": "1.01%", "elapsed_time": "28s", "remaining_time": "46m 33s", "memory(GiB)": 30.46, "train_speed(iter/s)": 0.035076} |
| {"loss": 2.43893305, "grad_norm": 15.5625, "learning_rate": 9.93e-05, "epoch": 0.30534351, "global_step/max_steps": "10/99", "percentage": "10.10%", "elapsed_time": "4m 21s", "remaining_time": "38m 48s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.038221} |
| {"loss": 0.7714448, "grad_norm": 1.765625, "learning_rate": 9.385e-05, "epoch": 0.61068702, "global_step/max_steps": "20/99", "percentage": "20.20%", "elapsed_time": "8m 40s", "remaining_time": "34m 17s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.038396} |
| {"loss": 0.68012047, "grad_norm": 1.796875, "learning_rate": 8.354e-05, "epoch": 0.91603053, "global_step/max_steps": "30/99", "percentage": "30.30%", "elapsed_time": "13m 0s", "remaining_time": "29m 55s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.038426} |
| {"loss": 0.5094286, "grad_norm": 2.0625, "learning_rate": 6.952e-05, "epoch": 1.21374046, "global_step/max_steps": "40/99", "percentage": "40.40%", "elapsed_time": "17m 40s", "remaining_time": "26m 3s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.037732} |
| {"loss": 0.35093291, "grad_norm": 1.203125, "learning_rate": 5.334e-05, "epoch": 1.51908397, "global_step/max_steps": "50/99", "percentage": "50.51%", "elapsed_time": "22m 0s", "remaining_time": "21m 33s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.037876} |
| {"loss": 0.36673875, "grad_norm": 1.265625, "learning_rate": 3.679e-05, "epoch": 1.82442748, "global_step/max_steps": "60/99", "percentage": "60.61%", "elapsed_time": "26m 19s", "remaining_time": "17m 6s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.037981} |
| {"loss": 0.31192484, "grad_norm": 1.1484375, "learning_rate": 2.17e-05, "epoch": 2.1221374, "global_step/max_steps": "70/99", "percentage": "70.71%", "elapsed_time": "30m 59s", "remaining_time": "12m 50s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.037641} |
| {"loss": 0.21114304, "grad_norm": 0.98046875, "learning_rate": 9.75e-06, "epoch": 2.42748092, "global_step/max_steps": "80/99", "percentage": "80.81%", "elapsed_time": "35m 19s", "remaining_time": "8m 23s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.037752} |
| {"loss": 0.18445172, "grad_norm": 0.98046875, "learning_rate": 2.24e-06, "epoch": 2.73282443, "global_step/max_steps": "90/99", "percentage": "90.91%", "elapsed_time": "39m 39s", "remaining_time": "3m 57s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.03782} |
| {"train_runtime": 2649.7904, "train_samples_per_second": 2.372, "train_steps_per_second": 0.037, "total_flos": 2.8009044484442726e+17, "train_loss": 0.63843817, "epoch": 3.0, "global_step/max_steps": "99/99", "percentage": "100.00%", "elapsed_time": "44m 6s", "remaining_time": "0s", "memory(GiB)": 36.13, "train_speed(iter/s)": 0.037403} |
|
|