| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 67, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 354.5310432434082, |
| "epoch": 0.14925373134328357, |
| "grad_norm": 26.582666397094727, |
| "kl": 0.02456921637058258, |
| "learning_rate": 2.981532510892707e-06, |
| "loss": 0.001, |
| "reward": 0.38080358956940474, |
| "reward_std": 0.3429007441736758, |
| "rewards/accuracy_reward": 0.12935268479632214, |
| "rewards/format_reward": 0.2514509041327983, |
| "step": 10 |
| }, |
| { |
| "completion_length": 82.56049494743347, |
| "epoch": 0.29850746268656714, |
| "grad_norm": 6.207046031951904, |
| "kl": 0.192266845703125, |
| "learning_rate": 2.6657189421854562e-06, |
| "loss": 0.0077, |
| "reward": 1.1937500540167094, |
| "reward_std": 0.2890436253976077, |
| "rewards/accuracy_reward": 0.2324776900582947, |
| "rewards/format_reward": 0.9612723555415869, |
| "step": 20 |
| }, |
| { |
| "completion_length": 163.46440453529357, |
| "epoch": 0.44776119402985076, |
| "grad_norm": 0.9332170486450195, |
| "kl": 0.28681182861328125, |
| "learning_rate": 2.03755192431795e-06, |
| "loss": 0.0115, |
| "reward": 1.377120592445135, |
| "reward_std": 0.35549838868901135, |
| "rewards/accuracy_reward": 0.4252232332248241, |
| "rewards/format_reward": 0.9518973540514708, |
| "step": 30 |
| }, |
| { |
| "completion_length": 289.2572672843933, |
| "epoch": 0.5970149253731343, |
| "grad_norm": 0.30436766147613525, |
| "kl": 0.11348419189453125, |
| "learning_rate": 1.2653483024396534e-06, |
| "loss": 0.0045, |
| "reward": 1.5142857864499093, |
| "reward_std": 0.3293194776400924, |
| "rewards/accuracy_reward": 0.5575893112458289, |
| "rewards/format_reward": 0.9566964589059352, |
| "step": 40 |
| }, |
| { |
| "completion_length": 328.3682068824768, |
| "epoch": 0.746268656716418, |
| "grad_norm": 0.4712439179420471, |
| "kl": 0.0519775390625, |
| "learning_rate": 5.560194134252441e-07, |
| "loss": 0.0021, |
| "reward": 1.4938616767525672, |
| "reward_std": 0.352480823546648, |
| "rewards/accuracy_reward": 0.5599330620840192, |
| "rewards/format_reward": 0.9339286085218191, |
| "step": 50 |
| }, |
| { |
| "completion_length": 326.48863105773927, |
| "epoch": 0.8955223880597015, |
| "grad_norm": 1.6945326328277588, |
| "kl": 0.05061798095703125, |
| "learning_rate": 9.962936025419756e-08, |
| "loss": 0.002, |
| "reward": 1.528236673772335, |
| "reward_std": 0.33247090512886646, |
| "rewards/accuracy_reward": 0.574107170663774, |
| "rewards/format_reward": 0.9541294977068902, |
| "step": 60 |
| }, |
| { |
| "completion_length": 317.54422964368547, |
| "epoch": 1.0, |
| "kl": 0.05271693638392857, |
| "reward": 1.535608057464872, |
| "reward_std": 0.3223346844315529, |
| "rewards/accuracy_reward": 0.5788690721882241, |
| "rewards/format_reward": 0.9567389748990536, |
| "step": 67, |
| "total_flos": 0.0, |
| "train_loss": 0.004519763499943178, |
| "train_runtime": 8988.1744, |
| "train_samples_per_second": 0.834, |
| "train_steps_per_second": 0.007 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 67, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|