MoM-Qwen32B-based / trainer_log.jsonl
TzJ2006's picture
Upload folder using huggingface_hub
6895df0 verified
{"current_steps": 10, "total_steps": 40, "loss": 6.0403, "lr": 9.53153893518325e-05, "epoch": 0.5128205128205128, "percentage": 25.0, "elapsed_time": "0:00:24", "remaining_time": "0:01:13"}
{"current_steps": 20, "total_steps": 40, "loss": 5.4718, "lr": 6.294095225512603e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "0:00:44", "remaining_time": "0:00:44"}
{"current_steps": 30, "total_steps": 40, "loss": 4.8283, "lr": 2.132117818244771e-05, "epoch": 1.5128205128205128, "percentage": 75.0, "elapsed_time": "0:01:06", "remaining_time": "0:00:22"}
{"current_steps": 40, "total_steps": 40, "loss": 4.6286, "lr": 1.9026509541272275e-07, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "0:01:24", "remaining_time": "0:00:00"}
{"current_steps": 40, "total_steps": 40, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "0:01:25", "remaining_time": "0:00:00"}
{"current_steps": 50, "total_steps": 90, "loss": 5.1321, "lr": 5.096956658859122e-05, "epoch": 1.1355932203389831, "percentage": 55.56, "elapsed_time": "0:00:20", "remaining_time": "0:00:16"}
{"current_steps": 60, "total_steps": 90, "loss": 4.2593, "lr": 3.199111375976449e-05, "epoch": 1.3615819209039548, "percentage": 66.67, "elapsed_time": "0:00:36", "remaining_time": "0:00:18"}
{"current_steps": 70, "total_steps": 90, "loss": 4.0298, "lr": 1.5687918106563326e-05, "epoch": 1.5875706214689265, "percentage": 77.78, "elapsed_time": "0:00:52", "remaining_time": "0:00:14"}
{"current_steps": 80, "total_steps": 90, "loss": 3.9493, "lr": 4.4818529516926726e-06, "epoch": 1.8135593220338984, "percentage": 88.89, "elapsed_time": "0:01:09", "remaining_time": "0:00:08"}
{"current_steps": 89, "total_steps": 90, "epoch": 2.0, "percentage": 98.89, "elapsed_time": "0:01:23", "remaining_time": "0:00:00"}
{"current_steps": 50, "total_steps": 225, "loss": 5.0217, "lr": 9.596765194911181e-05, "epoch": 1.1355932203389831, "percentage": 22.22, "elapsed_time": "0:00:18", "remaining_time": "0:01:03"}
{"current_steps": 60, "total_steps": 225, "loss": 3.9458, "lr": 9.236573524788887e-05, "epoch": 1.3615819209039548, "percentage": 26.67, "elapsed_time": "0:00:34", "remaining_time": "0:01:34"}
{"current_steps": 70, "total_steps": 225, "loss": 3.6254, "lr": 8.774114695766286e-05, "epoch": 1.5875706214689265, "percentage": 31.11, "elapsed_time": "0:00:50", "remaining_time": "0:01:51"}
{"current_steps": 80, "total_steps": 225, "loss": 3.4809, "lr": 8.22055205725199e-05, "epoch": 1.8135593220338984, "percentage": 35.56, "elapsed_time": "0:01:07", "remaining_time": "0:02:03"}
{"current_steps": 90, "total_steps": 225, "loss": 3.4035, "lr": 7.589248124491627e-05, "epoch": 2.022598870056497, "percentage": 40.0, "elapsed_time": "0:01:23", "remaining_time": "0:02:05"}
{"current_steps": 100, "total_steps": 225, "loss": 3.3861, "lr": 6.895442019201897e-05, "epoch": 2.248587570621469, "percentage": 44.44, "elapsed_time": "0:01:40", "remaining_time": "0:02:05"}
{"current_steps": 110, "total_steps": 225, "loss": 3.3066, "lr": 6.15588161057485e-05, "epoch": 2.4745762711864407, "percentage": 48.89, "elapsed_time": "0:01:57", "remaining_time": "0:02:02"}
{"current_steps": 120, "total_steps": 225, "loss": 3.2884, "lr": 5.3884192364450325e-05, "epoch": 2.7005649717514126, "percentage": 53.33, "elapsed_time": "0:02:13", "remaining_time": "0:01:56"}
{"current_steps": 130, "total_steps": 225, "loss": 3.2397, "lr": 4.611580763554969e-05, "epoch": 2.926553672316384, "percentage": 57.78, "elapsed_time": "0:02:30", "remaining_time": "0:01:50"}
{"current_steps": 140, "total_steps": 225, "loss": 3.1814, "lr": 3.844118389425153e-05, "epoch": 3.135593220338983, "percentage": 62.22, "elapsed_time": "0:02:46", "remaining_time": "0:01:40"}
{"current_steps": 150, "total_steps": 225, "loss": 3.1947, "lr": 3.104557980798104e-05, "epoch": 3.361581920903955, "percentage": 66.67, "elapsed_time": "0:03:03", "remaining_time": "0:01:31"}
{"current_steps": 160, "total_steps": 225, "loss": 3.2494, "lr": 2.410751875508373e-05, "epoch": 3.5875706214689265, "percentage": 71.11, "elapsed_time": "0:03:19", "remaining_time": "0:01:21"}
{"current_steps": 170, "total_steps": 225, "loss": 3.199, "lr": 1.7794479427480117e-05, "epoch": 3.8135593220338984, "percentage": 75.56, "elapsed_time": "0:03:34", "remaining_time": "0:01:09"}
{"current_steps": 180, "total_steps": 225, "loss": 3.1656, "lr": 1.225885304233716e-05, "epoch": 4.022598870056497, "percentage": 80.0, "elapsed_time": "0:03:50", "remaining_time": "0:00:57"}
{"current_steps": 190, "total_steps": 225, "loss": 3.2354, "lr": 7.63426475211113e-06, "epoch": 4.248587570621469, "percentage": 84.44, "elapsed_time": "0:04:06", "remaining_time": "0:00:45"}
{"current_steps": 200, "total_steps": 225, "loss": 3.216, "lr": 4.032348050888179e-06, "epoch": 4.47457627118644, "percentage": 88.89, "elapsed_time": "0:04:25", "remaining_time": "0:00:33"}
{"current_steps": 210, "total_steps": 225, "loss": 3.1459, "lr": 1.5400500400166939e-06, "epoch": 4.700564971751413, "percentage": 93.33, "elapsed_time": "0:04:42", "remaining_time": "0:00:20"}
{"current_steps": 220, "total_steps": 225, "loss": 3.2043, "lr": 2.1753260154906973e-07, "epoch": 4.926553672316384, "percentage": 97.78, "elapsed_time": "0:04:58", "remaining_time": "0:00:06"}
{"current_steps": 224, "total_steps": 225, "epoch": 5.0, "percentage": 99.56, "elapsed_time": "0:05:03", "remaining_time": "0:00:01"}