{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5, "eval_steps": 50, "global_step": 1040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.49662184969039436, "calibration/batch_distribution_entropy": 0.24855682571268609, "calibration/batch_entropy_100bins": 0.34211678603258494, "calibration/batch_entropy_10bins": 0.24855682571268609, "calibration/batch_entropy_50bins": 0.39298323694966103, "calibration/batch_uniqueness": 0.4957406739381661, "calibration/confidence_entropy": 0.21799519909421344, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.24855682571268609, "calibration/distribution_entropy_100": 0.34211678603258494, "calibration/ece": 0.4413155346150276, "calibration/mean_confidence": 0.922292316091292, "calibration/unique_confidence_per_question": 0.03385416666666667, "calibration/unique_confidences": 13.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01961805555555556, "completions/max_length": 4037.2, "completions/max_terminated_length": 4037.2, "completions/mean_length": 492.128466796875, "completions/mean_terminated_length": 501.96421508789064, "completions/min_length": 0.0, "completions/min_terminated_length": 2.6, "epoch": 0.01201923076923077, "grad_norm": 0.004471676889806986, "learning_rate": 1.201923076923077e-07, "loss": 0.0078, "num_tokens": 8748584.0, "reward": 0.5809432864189148, "reward_std": 0.528658103942871, "rewards/accuracy_reward": 0.25043402910232543, "rewards/brier_reward": 0.3032692611217499, "rewards/confidence_one_or_zero": 0.3365451395511627, "rewards/format_reward": 0.6081597208976746, "rewards/mean_confidence_reward": 0.8397148966789245, "sampling/batch_mean_priority_error": 0.020286041666666678, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 1.0, "sampling/error_ema_max": 0.0528124988079071, "sampling/error_ema_mean": 1.4395399466593517e-05, "sampling/priority_kl": 0.007231711782515049, "sampling/priority_scale": 1048575.9998779297, "sampling/prob_entropy": 10.301719474792481, "sampling/prob_max": 3.357546593178995e-05, "sampling/prob_min": 0.0, "sampling/prompt_draws_max": 1.0, "sampling/prompt_draws_mean": 0.007200000155717134, "sampling/prompt_draws_total": 216.0, "sampling/seen_fraction": 0.007200000155717134, "sampling/unseen_fraction": 0.9927999998442829, "signal/accuracy_reward/centered_abs_mean": 0.298681640625, "signal/accuracy_reward/group_std_mean": 0.3567140340805054, "signal/accuracy_reward/group_zero_std_frac": 0.11944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1493408203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1493408203125, "signal/advantage_abs_mean": 0.4503032624721527, "signal/advantage_pre_scale_abs_mean": 0.4503032624721527, "signal/advantage_pre_scale_std": 0.5379110932350158, "signal/advantage_std": 0.5379110932350158, "signal/brier_reward/centered_abs_mean": 0.3076389491558075, "signal/brier_reward/group_std_mean": 0.3620465755462646, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15381947457790374, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15381947457790374, "signal/confidence_one_or_zero/centered_abs_mean": 0.40524630546569823, "signal/confidence_one_or_zero/group_std_mean": 0.4523835420608521, "signal/confidence_one_or_zero/group_zero_std_frac": 0.00555555559694767, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.0524628275306895e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.0524628275306895e-06, "signal/format_reward/centered_abs_mean": 0.4301974833011627, "signal/format_reward/group_std_mean": 0.4691725194454193, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21509874165058135, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21509874165058135, "signal/mean_confidence_reward/centered_abs_mean": 0.157260724902153, "signal/mean_confidence_reward/group_std_mean": 0.23579104840755463, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.572607152411365e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.572607152411365e-06, "step": 5 }, { "calibration/aurc": 0.6212086769784675, "calibration/batch_distribution_entropy": 0.2554635874966734, "calibration/batch_entropy_100bins": 0.34783759217112353, "calibration/batch_entropy_10bins": 0.2554635874966734, "calibration/batch_entropy_50bins": 0.39758666158825434, "calibration/batch_uniqueness": 0.5003519549953523, "calibration/confidence_entropy": 0.20785773862853948, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.2554635874966734, "calibration/distribution_entropy_100": 0.34783759217112353, "calibration/ece": 0.5789486640908559, "calibration/mean_confidence": 0.9201863305340018, "calibration/unique_confidence_per_question": 0.0359375, "calibration/unique_confidences": 13.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02387152777777777, "completions/max_length": 4004.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 515.1827270507813, "completions/mean_terminated_length": 527.7875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.02403846153846154, "grad_norm": 0.004891601856797934, "learning_rate": 2.403846153846154e-07, "loss": 0.0047, "num_tokens": 17765505.0, "reward": 0.5613437056541443, "reward_std": 0.5221718430519104, "rewards/accuracy_reward": 0.2384548604488373, "rewards/brier_reward": 0.2904594004154205, "rewards/confidence_one_or_zero": 0.3299479126930237, "rewards/format_reward": 0.59375, "rewards/mean_confidence_reward": 0.8279155731201172, "sampling/batch_mean_priority_error": 0.02943812500000001, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 1.0, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 4.464865050977096e-05, "sampling/priority_kl": 0.019392204470932484, "sampling/priority_scale": 1048575.9998779297, "sampling/prob_entropy": 10.289560317993164, "sampling/prob_max": 3.398626940906979e-05, "sampling/prob_min": 0.0, "sampling/prompt_draws_max": 1.0, "sampling/prompt_draws_mean": 0.019200000166893005, "sampling/prompt_draws_total": 576.0, "sampling/seen_fraction": 0.019200000166893005, "sampling/unseen_fraction": 0.980799999833107, "signal/accuracy_reward/centered_abs_mean": 0.28582356572151185, "signal/accuracy_reward/group_std_mean": 0.34282330274581907, "signal/accuracy_reward/group_zero_std_frac": 0.1472222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14291178286075593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14291178286075593, "signal/advantage_abs_mean": 0.44686338901519773, "signal/advantage_pre_scale_abs_mean": 0.44686338901519773, "signal/advantage_pre_scale_std": 0.5340252995491028, "signal/advantage_std": 0.5340252995491028, "signal/brier_reward/centered_abs_mean": 0.2982248544692993, "signal/brier_reward/group_std_mean": 0.3519687294960022, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14911242723464965, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.14911242723464965, "signal/confidence_one_or_zero/centered_abs_mean": 0.39952799677848816, "signal/confidence_one_or_zero/group_std_mean": 0.4472978889942169, "signal/confidence_one_or_zero/group_zero_std_frac": 0.01111111119389534, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.995279803348239e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.995279803348239e-06, "signal/format_reward/centered_abs_mean": 0.43882378935813904, "signal/format_reward/group_std_mean": 0.4742614209651947, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21941189467906952, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21941189467906952, "signal/mean_confidence_reward/centered_abs_mean": 0.16937248408794403, "signal/mean_confidence_reward/group_std_mean": 0.24667055308818817, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6937246755333036e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6937246755333036e-06, "step": 10 }, { "calibration/aurc": 0.5686256298557191, "calibration/batch_distribution_entropy": 0.2613920164321266, "calibration/batch_entropy_100bins": 0.34257888465762576, "calibration/batch_entropy_10bins": 0.2613920164321266, "calibration/batch_entropy_50bins": 0.3977664628482245, "calibration/batch_uniqueness": 0.4988012462838534, "calibration/confidence_entropy": 0.21749303316202773, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.2613920164321266, "calibration/distribution_entropy_100": 0.34257888465762576, "calibration/ece": 0.5288600027898627, "calibration/mean_confidence": 0.9190490067386252, "calibration/unique_confidence_per_question": 0.03333333333333333, "calibration/unique_confidences": 12.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019010416666666675, "completions/max_length": 3969.8, "completions/max_terminated_length": 3969.8, "completions/mean_length": 501.7993957519531, "completions/mean_terminated_length": 511.546826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 11.2, "epoch": 0.036057692307692304, "grad_norm": 0.005199067294597626, "learning_rate": 3.6057692307692306e-07, "loss": 0.0017, "num_tokens": 26654394.0, "reward": 0.6018155813217163, "reward_std": 0.5264561772346497, "rewards/accuracy_reward": 0.2533854156732559, "rewards/brier_reward": 0.3099447190761566, "rewards/confidence_one_or_zero": 0.32890625596046447, "rewards/format_reward": 0.6402777791023254, "rewards/mean_confidence_reward": 0.8362808942794799, "sampling/batch_mean_priority_error": 0.019512222916666676, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 1.0, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 7.241204875754192e-05, "sampling/priority_kl": 0.029195376113057135, "sampling/priority_scale": 419434.3227580553, "sampling/prob_entropy": 10.279756927490235, "sampling/prob_max": 3.4387885534670204e-05, "sampling/prob_min": 5.380612662975182e-07, "sampling/prompt_draws_max": 1.0, "sampling/prompt_draws_mean": 0.031199999153614044, "sampling/prompt_draws_total": 936.0, "sampling/seen_fraction": 0.031199999153614044, "sampling/unseen_fraction": 0.968800000846386, "signal/accuracy_reward/centered_abs_mean": 0.30558268427848817, "signal/accuracy_reward/group_std_mean": 0.36401288509368895, "signal/accuracy_reward/group_zero_std_frac": 0.11388889104127883, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15279134213924409, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15279134213924409, "signal/advantage_abs_mean": 0.44527676701545715, "signal/advantage_pre_scale_abs_mean": 0.44527676701545715, "signal/advantage_pre_scale_std": 0.5341985344886779, "signal/advantage_std": 0.5341985344886779, "signal/brier_reward/centered_abs_mean": 0.31033373475074766, "signal/brier_reward/group_std_mean": 0.36489441990852356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15516686737537383, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.15516686737537383, "signal/confidence_one_or_zero/centered_abs_mean": 0.40199110507965086, "signal/confidence_one_or_zero/group_std_mean": 0.45113515853881836, "signal/confidence_one_or_zero/group_zero_std_frac": 0.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.019910829811124e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.019910829811124e-06, "signal/format_reward/centered_abs_mean": 0.4189778625965118, "signal/format_reward/group_std_mean": 0.46179532408714297, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2094889312982559, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2094889312982559, "signal/mean_confidence_reward/centered_abs_mean": 0.16203747689723969, "signal/mean_confidence_reward/group_std_mean": 0.2359348714351654, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6203747463805485e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6203747463805485e-06, "step": 15 }, { "calibration/aurc": 0.5781377068007135, "calibration/batch_distribution_entropy": 0.30133848780637856, "calibration/batch_entropy_100bins": 0.3604450194123049, "calibration/batch_entropy_10bins": 0.30133848780637856, "calibration/batch_entropy_50bins": 0.4156054714933938, "calibration/batch_uniqueness": 0.5137717719347905, "calibration/confidence_entropy": 0.23150646493074922, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.30133848780637856, "calibration/distribution_entropy_100": 0.3604450194123049, "calibration/ece": 0.5328683362672083, "calibration/mean_confidence": 0.9074953262400243, "calibration/unique_confidence_per_question": 0.0359375, "calibration/unique_confidences": 13.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953124999999998, "completions/max_length": 4071.4, "completions/max_terminated_length": 4071.4, "completions/mean_length": 484.62213745117185, "completions/mean_terminated_length": 494.3174987792969, "completions/min_length": 0.0, "completions/min_terminated_length": 10.2, "epoch": 0.04807692307692308, "grad_norm": 0.05662711709737778, "learning_rate": 4.807692307692308e-07, "loss": 0.0053, "num_tokens": 35325977.0, "reward": 0.6991640329360962, "reward_std": 0.5071477174758912, "rewards/accuracy_reward": 0.29210069179534914, "rewards/brier_reward": 0.36349549889564514, "rewards/confidence_one_or_zero": 0.32812500596046446, "rewards/format_reward": 0.7427083373069763, "rewards/mean_confidence_reward": 0.8493289351463318, "sampling/batch_mean_priority_error": 0.011812916666667365, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9972222222222221, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 9.249055292457343e-05, "sampling/priority_kl": 0.03000004291534424, "sampling/priority_scale": 4.116325568780303, "sampling/prob_entropy": 10.27895278930664, "sampling/prob_max": 3.4690356551436706e-05, "sampling/prob_min": 2.692946486604342e-06, "sampling/prompt_draws_max": 1.2, "sampling/prompt_draws_mean": 0.04320000112056732, "sampling/prompt_draws_total": 1296.0, "sampling/seen_fraction": 0.043193334341049196, "sampling/unseen_fraction": 0.9568066656589508, "signal/accuracy_reward/centered_abs_mean": 0.32266167402267454, "signal/accuracy_reward/group_std_mean": 0.3808844566345215, "signal/accuracy_reward/group_zero_std_frac": 0.0777777798473835, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16133083701133727, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16133083701133727, "signal/advantage_abs_mean": 0.4216520845890045, "signal/advantage_pre_scale_abs_mean": 0.4216520845890045, "signal/advantage_pre_scale_std": 0.5141208887100219, "signal/advantage_std": 0.5141208887100219, "signal/brier_reward/centered_abs_mean": 0.32142401337623594, "signal/brier_reward/group_std_mean": 0.373612767457962, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16071200668811797, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.16071200668811797, "signal/confidence_one_or_zero/centered_abs_mean": 0.3991102397441864, "signal/confidence_one_or_zero/group_std_mean": 0.4487784802913666, "signal/confidence_one_or_zero/group_zero_std_frac": 0.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.9911023122840564e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.9911023122840564e-06, "signal/format_reward/centered_abs_mean": 0.34194878935813905, "signal/format_reward/group_std_mean": 0.4118743896484375, "signal/format_reward/group_zero_std_frac": 0.002777777798473835, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17097439467906952, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.17097439467906952, "signal/mean_confidence_reward/centered_abs_mean": 0.14605502486228944, "signal/mean_confidence_reward/group_std_mean": 0.21780959367752076, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.4605501519326936e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.4605501519326936e-06, "step": 20 }, { "calibration/aurc": 0.6459646670099846, "calibration/batch_distribution_entropy": 0.305665693826494, "calibration/batch_entropy_100bins": 0.36347840431491163, "calibration/batch_entropy_10bins": 0.305665693826494, "calibration/batch_entropy_50bins": 0.4226184384984603, "calibration/batch_uniqueness": 0.5333775639553542, "calibration/confidence_entropy": 0.23703443276990782, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.305665693826494, "calibration/distribution_entropy_100": 0.36347840431491163, "calibration/ece": 0.5938233061064975, "calibration/mean_confidence": 0.9098523283644399, "calibration/unique_confidence_per_question": 0.0359375, "calibration/unique_confidences": 13.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018055555555555557, "completions/max_length": 4032.4, "completions/max_terminated_length": 4032.4, "completions/mean_length": 451.4315185546875, "completions/mean_terminated_length": 459.79645385742185, "completions/min_length": 0.0, "completions/min_terminated_length": 26.4, "epoch": 0.06009615384615385, "grad_norm": 0.041369203478097916, "learning_rate": 6.009615384615385e-07, "loss": -0.0065, "num_tokens": 43588260.0, "reward": 0.815249752998352, "reward_std": 0.45611422061920165, "rewards/accuracy_reward": 0.33463541865348817, "rewards/brier_reward": 0.42292370200157164, "rewards/confidence_one_or_zero": 0.31111111044883727, "rewards/format_reward": 0.8729166746139526, "rewards/mean_confidence_reward": 0.8755728960037231, "sampling/batch_mean_priority_error": 0.01072958402777778, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9916666666666668, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00010534993198234588, "sampling/priority_kl": 0.030000149086117745, "sampling/priority_scale": 3.0989843607880174, "sampling/prob_entropy": 10.278955841064453, "sampling/prob_max": 3.494143529678695e-05, "sampling/prob_min": 2.578220801296993e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.0551999993622303, "sampling/prompt_draws_total": 1656.0, "sampling/seen_fraction": 0.05510666668415069, "sampling/unseen_fraction": 0.9448933333158493, "signal/accuracy_reward/centered_abs_mean": 0.3340549051761627, "signal/accuracy_reward/group_std_mean": 0.3912248969078064, "signal/accuracy_reward/group_zero_std_frac": 0.06388889048248529, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16702745258808135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16702745258808135, "signal/advantage_abs_mean": 0.3729071855545044, "signal/advantage_pre_scale_abs_mean": 0.3729071855545044, "signal/advantage_pre_scale_std": 0.4633388459682465, "signal/advantage_std": 0.4633388459682465, "signal/brier_reward/centered_abs_mean": 0.3166819095611572, "signal/brier_reward/group_std_mean": 0.3681463062763214, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1583409547805786, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.1583409547805786, "signal/confidence_one_or_zero/centered_abs_mean": 0.3850585877895355, "signal/confidence_one_or_zero/group_std_mean": 0.43928542137146, "signal/confidence_one_or_zero/group_zero_std_frac": 0.00555555559694767, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.85058583560749e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.85058583560749e-06, "signal/format_reward/centered_abs_mean": 0.20249566435813904, "signal/format_reward/group_std_mean": 0.3022570312023163, "signal/format_reward/group_zero_std_frac": 0.058333334140479566, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10124783217906952, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.10124783217906952, "signal/mean_confidence_reward/centered_abs_mean": 0.11333362460136413, "signal/mean_confidence_reward/group_std_mean": 0.17320298850536348, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.13333621811762e-06, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 1.13333621811762e-06, "step": 25 }, { "calibration/aurc": 0.5997521391931024, "calibration/batch_distribution_entropy": 0.30450032721554765, "calibration/batch_entropy_100bins": 0.3638448903661742, "calibration/batch_entropy_10bins": 0.30450032721554765, "calibration/batch_entropy_50bins": 0.4255578176283213, "calibration/batch_uniqueness": 0.5224223472424432, "calibration/confidence_entropy": 0.2562116276037758, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.30450032721554765, "calibration/distribution_entropy_100": 0.3638448903661742, "calibration/ece": 0.5441324652383763, "calibration/mean_confidence": 0.9045782350307932, "calibration/unique_confidence_per_question": 0.03385416666666667, "calibration/unique_confidences": 13.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012413194444444442, "completions/max_length": 3953.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 454.72821655273435, "completions/mean_terminated_length": 460.4580322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 63.8, "epoch": 0.07211538461538461, "grad_norm": 0.002127213403582573, "learning_rate": 7.211538461538461e-07, "loss": -0.015, "num_tokens": 51927145.0, "reward": 0.9009054064750671, "reward_std": 0.4041502892971039, "rewards/accuracy_reward": 0.38237847089767457, "rewards/brier_reward": 0.4791316449642181, "rewards/confidence_one_or_zero": 0.262934023141861, "rewards/format_reward": 0.9402777791023255, "rewards/mean_confidence_reward": 0.8828180313110352, "sampling/batch_mean_priority_error": 0.00947305555555556, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.986111111111111, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00011624346661847085, "sampling/priority_kl": 0.030000174790620802, "sampling/priority_scale": 2.581620621774346, "sampling/prob_entropy": 10.278950500488282, "sampling/prob_max": 3.516698052408174e-05, "sampling/prob_min": 3.98486627091188e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.0671999990940094, "sampling/prompt_draws_total": 2016.0, "sampling/seen_fraction": 0.06696666553616523, "sampling/unseen_fraction": 0.9330333344638347, "signal/accuracy_reward/centered_abs_mean": 0.3201768696308136, "signal/accuracy_reward/group_std_mean": 0.3793734133243561, "signal/accuracy_reward/group_zero_std_frac": 0.07777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1600884348154068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1600884348154068, "signal/advantage_abs_mean": 0.32962233424186704, "signal/advantage_pre_scale_abs_mean": 0.32962233424186704, "signal/advantage_pre_scale_std": 0.41350186467170713, "signal/advantage_std": 0.41350186467170713, "signal/brier_reward/centered_abs_mean": 0.2959588646888733, "signal/brier_reward/group_std_mean": 0.34889656901359556, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14797943234443664, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.14797943234443664, "signal/confidence_one_or_zero/centered_abs_mean": 0.3444932758808136, "signal/confidence_one_or_zero/group_std_mean": 0.41049314141273496, "signal/confidence_one_or_zero/group_zero_std_frac": 0.019444444589316844, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.4449325994501125e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.4449325994501125e-06, "signal/format_reward/centered_abs_mean": 0.10245225727558135, "signal/format_reward/group_std_mean": 0.1856452167034149, "signal/format_reward/group_zero_std_frac": 0.2833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05122612863779068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05122612863779068, "signal/mean_confidence_reward/centered_abs_mean": 0.09547019451856613, "signal/mean_confidence_reward/group_std_mean": 0.14561118185520172, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.54701920363732e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.54701920363732e-07, "step": 30 }, { "calibration/aurc": 0.4745980647435296, "calibration/batch_distribution_entropy": 0.33344258957053646, "calibration/batch_entropy_100bins": 0.3649651982296054, "calibration/batch_entropy_10bins": 0.33344258957053646, "calibration/batch_entropy_50bins": 0.42468040346264324, "calibration/batch_uniqueness": 0.5059170401082715, "calibration/confidence_entropy": 0.27285792284673105, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.33344258957053646, "calibration/distribution_entropy_100": 0.3649651982296054, "calibration/ece": 0.4374113852388136, "calibration/mean_confidence": 0.8955356761082479, "calibration/unique_confidence_per_question": 0.03854166666666666, "calibration/unique_confidences": 14.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01258680555555558, "completions/max_length": 3892.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 487.2180541992187, "completions/mean_terminated_length": 493.463916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.08413461538461539, "grad_norm": 0.0013091641012579203, "learning_rate": 8.41346153846154e-07, "loss": -0.0115, "num_tokens": 60623225.0, "reward": 0.9715142011642456, "reward_std": 0.367927622795105, "rewards/accuracy_reward": 0.43394096493721007, "rewards/brier_reward": 0.5377981483936309, "rewards/confidence_one_or_zero": 0.2161458373069763, "rewards/format_reward": 0.9712673544883728, "rewards/mean_confidence_reward": 0.8788381457328797, "sampling/batch_mean_priority_error": 0.00579201388888889, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.975, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00012595559237524868, "sampling/priority_kl": 0.029999414458870888, "sampling/priority_scale": 2.2547397849150004, "sampling/prob_entropy": 10.278950309753418, "sampling/prob_max": 3.5374295111978424e-05, "sampling/prob_min": 5.2708895964315164e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.07920000106096267, "sampling/prompt_draws_total": 2376.0, "sampling/seen_fraction": 0.07872666716575623, "sampling/unseen_fraction": 0.9212733328342437, "signal/accuracy_reward/centered_abs_mean": 0.30965169072151183, "signal/accuracy_reward/group_std_mean": 0.3712247610092163, "signal/accuracy_reward/group_zero_std_frac": 0.08611111361533404, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15482584536075591, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15482584536075591, "signal/advantage_abs_mean": 0.29973416328430175, "signal/advantage_pre_scale_abs_mean": 0.29973416328430175, "signal/advantage_pre_scale_std": 0.37970858812332153, "signal/advantage_std": 0.37970858812332153, "signal/brier_reward/centered_abs_mean": 0.2735893577337265, "signal/brier_reward/group_std_mean": 0.328358393907547, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.13679467886686325, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.13679467886686325, "signal/confidence_one_or_zero/centered_abs_mean": 0.3033962666988373, "signal/confidence_one_or_zero/group_std_mean": 0.38285431265830994, "signal/confidence_one_or_zero/group_zero_std_frac": 0.01666666679084301, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.0339625936903757e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.0339625936903757e-06, "signal/format_reward/centered_abs_mean": 0.05086263045668602, "signal/format_reward/group_std_mean": 0.10426565259695053, "signal/format_reward/group_zero_std_frac": 0.544444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02543131522834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02543131522834301, "signal/mean_confidence_reward/centered_abs_mean": 0.08879880607128143, "signal/mean_confidence_reward/group_std_mean": 0.13397251665592194, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.879880056156253e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.879880056156253e-07, "step": 35 }, { "calibration/aurc": 0.4944071264636566, "calibration/batch_distribution_entropy": 0.37269805542921963, "calibration/batch_entropy_100bins": 0.3644702705632904, "calibration/batch_entropy_10bins": 0.37269805542921963, "calibration/batch_entropy_50bins": 0.4263596164138678, "calibration/batch_uniqueness": 0.4926999252218061, "calibration/confidence_entropy": 0.3128198436922223, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.37269805542921963, "calibration/distribution_entropy_100": 0.3644702705632904, "calibration/ece": 0.4117707299093361, "calibration/mean_confidence": 0.8824584332751864, "calibration/unique_confidence_per_question": 0.0328125, "calibration/unique_confidences": 12.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888884, "completions/max_length": 3999.8, "completions/max_terminated_length": 3999.8, "completions/mean_length": 521.1518188476563, "completions/mean_terminated_length": 526.0406677246094, "completions/min_length": 0.0, "completions/min_terminated_length": 101.8, "epoch": 0.09615384615384616, "grad_norm": 0.0012512662215158343, "learning_rate": 9.615384615384617e-07, "loss": -0.0058, "num_tokens": 69738734.0, "reward": 1.0121073603630066, "reward_std": 0.3443099856376648, "rewards/accuracy_reward": 0.46319444179534913, "rewards/brier_reward": 0.578621792793274, "rewards/confidence_one_or_zero": 0.13663194328546524, "rewards/format_reward": 0.9823784708976746, "rewards/mean_confidence_reward": 0.864686906337738, "sampling/batch_mean_priority_error": 0.006002986111111111, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9694444444444444, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0001326042431173846, "sampling/priority_kl": 0.030000335350632668, "sampling/priority_scale": 2.025655818078667, "sampling/prob_entropy": 10.27895622253418, "sampling/prob_max": 3.5567282611737025e-05, "sampling/prob_min": 6.42574004814378e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.09120000004768372, "sampling/prompt_draws_total": 2736.0, "sampling/seen_fraction": 0.09033999890089035, "sampling/unseen_fraction": 0.9096600010991096, "signal/accuracy_reward/centered_abs_mean": 0.3021592855453491, "signal/accuracy_reward/group_std_mean": 0.3629056513309479, "signal/accuracy_reward/group_zero_std_frac": 0.09444444552063942, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15107964277267455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15107964277267455, "signal/advantage_abs_mean": 0.28105672001838683, "signal/advantage_pre_scale_abs_mean": 0.28105672001838683, "signal/advantage_pre_scale_std": 0.36014688611030576, "signal/advantage_std": 0.36014688611030576, "signal/brier_reward/centered_abs_mean": 0.2543889760971069, "signal/brier_reward/group_std_mean": 0.30701815485954287, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12719448804855346, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.12719448804855346, "signal/confidence_one_or_zero/centered_abs_mean": 0.21121961772441863, "signal/confidence_one_or_zero/group_std_mean": 0.3036592543125153, "signal/confidence_one_or_zero/group_zero_std_frac": 0.07777777947485447, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.1121961253811607e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.1121961253811607e-06, "signal/format_reward/centered_abs_mean": 0.03193901851773262, "signal/format_reward/group_std_mean": 0.06820150762796402, "signal/format_reward/group_zero_std_frac": 0.694444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01596950925886631, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01596950925886631, "signal/mean_confidence_reward/centered_abs_mean": 0.08309155404567718, "signal/mean_confidence_reward/group_std_mean": 0.1238383561372757, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.30915519145492e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.30915519145492e-07, "step": 40 }, { "calibration/aurc": 0.3610586952487986, "calibration/batch_distribution_entropy": 0.38911473171552363, "calibration/batch_entropy_100bins": 0.35479215071058084, "calibration/batch_entropy_10bins": 0.38911473171552363, "calibration/batch_entropy_50bins": 0.4150642023443708, "calibration/batch_uniqueness": 0.47124124647406396, "calibration/confidence_entropy": 0.33383225819897927, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.03674540682414698, "calibration/coverage@15%": 0.05249343832020997, "calibration/coverage@20%": 0.05249343832020997, "calibration/coverage@25%": 0.2442367730349496, "calibration/coverage@30%": 0.2542105263157895, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.38911473171552363, "calibration/distribution_entropy_100": 0.35479215071058084, "calibration/ece": 0.29194698660035917, "calibration/mean_confidence": 0.8754128244232631, "calibration/unique_confidence_per_question": 0.028645833333333332, "calibration/unique_confidences": 11.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222232, "completions/max_length": 4021.8, "completions/max_terminated_length": 4021.8, "completions/mean_length": 558.5407104492188, "completions/mean_terminated_length": 564.9087158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 119.6, "epoch": 0.10817307692307693, "grad_norm": 0.001072445884346962, "learning_rate": 1.0817307692307693e-06, "loss": -0.0059, "num_tokens": 79291715.0, "reward": 1.0605151176452636, "reward_std": 0.3090484321117401, "rewards/accuracy_reward": 0.5111979186534882, "rewards/brier_reward": 0.627174460887909, "rewards/confidence_one_or_zero": 0.09826388955116272, "rewards/format_reward": 0.9826388835906983, "rewards/mean_confidence_reward": 0.8520801186561584, "sampling/batch_mean_priority_error": 0.005454097222222222, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9694444444444444, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00013870045950170606, "sampling/priority_kl": 0.030001122504472733, "sampling/priority_scale": 1.853211653465405, "sampling/prob_entropy": 10.278968048095702, "sampling/prob_max": 3.5752333496930076e-05, "sampling/prob_min": 7.469276897609234e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.10320000052452087, "sampling/prompt_draws_total": 3096.0, "sampling/seen_fraction": 0.10200666785240173, "sampling/unseen_fraction": 0.8979933321475982, "signal/accuracy_reward/centered_abs_mean": 0.2584472715854645, "signal/accuracy_reward/group_std_mean": 0.32760846614837646, "signal/accuracy_reward/group_zero_std_frac": 0.1166666679084301, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12922363579273224, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12922363579273224, "signal/advantage_abs_mean": 0.2409671425819397, "signal/advantage_pre_scale_abs_mean": 0.2409671425819397, "signal/advantage_pre_scale_std": 0.3296055316925049, "signal/advantage_std": 0.3296055316925049, "signal/brier_reward/centered_abs_mean": 0.21698220670223237, "signal/brier_reward/group_std_mean": 0.2722037315368652, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10849110335111618, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.10849110335111618, "signal/confidence_one_or_zero/centered_abs_mean": 0.15730251967906952, "signal/confidence_one_or_zero/group_std_mean": 0.24223730564117432, "signal/confidence_one_or_zero/group_zero_std_frac": 0.19999999850988387, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.5730251334389323e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.5730251334389323e-06, "signal/format_reward/centered_abs_mean": 0.02931857593357563, "signal/format_reward/group_std_mean": 0.05820476859807968, "signal/format_reward/group_zero_std_frac": 0.7472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014659287966787815, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014659287966787815, "signal/mean_confidence_reward/centered_abs_mean": 0.08275075107812882, "signal/mean_confidence_reward/group_std_mean": 0.1179319441318512, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.27507483336376e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.27507483336376e-07, "step": 45 }, { "calibration/aurc": 0.33053533787751727, "calibration/batch_distribution_entropy": 0.512697310085846, "calibration/batch_entropy_100bins": 0.3786039277099914, "calibration/batch_entropy_10bins": 0.512697310085846, "calibration/batch_entropy_50bins": 0.4452083349799242, "calibration/batch_uniqueness": 0.5273792798695895, "calibration/confidence_entropy": 0.3942478093536125, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.034557537107060214, "calibration/coverage@25%": 0.3376609853829223, "calibration/coverage@30%": 0.3381914893617021, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.512697310085846, "calibration/distribution_entropy_100": 0.3786039277099914, "calibration/ece": 0.22357226767279878, "calibration/mean_confidence": 0.8412826863703291, "calibration/unique_confidence_per_question": 0.03072916666666666, "calibration/unique_confidences": 11.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011718749999999977, "completions/max_length": 3947.2, "completions/max_terminated_length": 3947.2, "completions/mean_length": 580.1506225585938, "completions/mean_terminated_length": 587.1082397460938, "completions/min_length": 0.0, "completions/min_terminated_length": 131.6, "epoch": 0.1201923076923077, "grad_norm": 0.0008270725375041366, "learning_rate": 1.201923076923077e-06, "loss": -0.004, "num_tokens": 89082154.0, "reward": 1.1280964374542237, "reward_std": 0.2733824193477631, "rewards/accuracy_reward": 0.5832465171813965, "rewards/brier_reward": 0.6891612410545349, "rewards/confidence_one_or_zero": 0.05451388955116272, "rewards/format_reward": 0.9837673544883728, "rewards/mean_confidence_reward": 0.8320742964744567, "sampling/batch_mean_priority_error": 0.005454166666666669, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9388888888888889, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.000144621345680207, "sampling/priority_kl": 0.029999880492687224, "sampling/priority_scale": 1.7189445613417775, "sampling/prob_entropy": 10.27895565032959, "sampling/prob_max": 3.592690773075446e-05, "sampling/prob_min": 8.405669541389216e-06, "sampling/prompt_draws_max": 2.0, "sampling/prompt_draws_mean": 0.11519999951124191, "sampling/prompt_draws_total": 3456.0, "sampling/seen_fraction": 0.11345999985933304, "sampling/unseen_fraction": 0.8865400001406669, "signal/accuracy_reward/centered_abs_mean": 0.2265787750482559, "signal/accuracy_reward/group_std_mean": 0.29308363795280457, "signal/accuracy_reward/group_zero_std_frac": 0.1944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11328938752412795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11328938752412795, "signal/advantage_abs_mean": 0.20797210037708283, "signal/advantage_pre_scale_abs_mean": 0.20797210037708283, "signal/advantage_pre_scale_std": 0.30454687476158143, "signal/advantage_std": 0.30454687476158143, "signal/brier_reward/centered_abs_mean": 0.18070028126239776, "signal/brier_reward/group_std_mean": 0.23263510167598725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09035014063119888, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.09035014063119888, "signal/confidence_one_or_zero/centered_abs_mean": 0.09245876669883728, "signal/confidence_one_or_zero/group_std_mean": 0.1641049787402153, "signal/confidence_one_or_zero/group_zero_std_frac": 0.37222222685813905, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.245876526620123e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.245876526620123e-07, "signal/format_reward/centered_abs_mean": 0.0284559465944767, "signal/format_reward/group_std_mean": 0.056744760274887084, "signal/format_reward/group_zero_std_frac": 0.7555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01422797329723835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01422797329723835, "signal/mean_confidence_reward/centered_abs_mean": 0.08219865411520004, "signal/mean_confidence_reward/group_std_mean": 0.11515908986330033, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.219864980674174e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.219864980674174e-07, "step": 50 }, { "epoch": 0.1201923076923077, "eval_calibration/aurc": 0.3777325068473874, "eval_calibration/batch_distribution_entropy": 0.5340746979902073, "eval_calibration/batch_entropy_100bins": 0.36490768829677633, "eval_calibration/batch_entropy_10bins": 0.5340746979902073, "eval_calibration/batch_entropy_50bins": 0.4295634265060346, "eval_calibration/batch_uniqueness": 0.5068985753349399, "eval_calibration/confidence_entropy": 0.42309028695130074, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.026501766784452298, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5340746979902073, "eval_calibration/distribution_entropy_100": 0.36490768829677633, "eval_calibration/ece": 0.23831272084805666, "eval_calibration/mean_confidence": 0.8301855123674912, "eval_calibration/unique_confidence_per_question": 0.010416666666666666, "eval_calibration/unique_confidences": 12, "eval_completions/clipped_ratio": 0.012152777777777771, "eval_completions/max_length": 2896.3333333333335, "eval_completions/max_terminated_length": 2896.3333333333335, "eval_completions/mean_length": 610.5784810384115, "eval_completions/mean_terminated_length": 618.1752726236979, "eval_completions/min_length": 44.0, "eval_completions/min_terminated_length": 183.83333333333334, "eval_loss": 0.0, "eval_num_tokens": 89082154.0, "eval_reward": 1.1268197298049927, "eval_reward_std": 0.429857795437177, "eval_rewards/accuracy_reward": 0.581597218910853, "eval_rewards/brier_reward": 0.689386377731959, "eval_rewards/confidence_one_or_zero": 0.026909722092871863, "eval_rewards/format_reward": 0.9826388855775198, "eval_rewards/mean_confidence_reward": 0.816510409116745, "eval_runtime": 203.2672, "eval_samples_per_second": 4.92, "eval_signal/accuracy_reward/centered_abs_mean": 0.4685329894224803, "eval_signal/accuracy_reward/group_std_mean": 0.4912516276041667, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23426649471124014, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23426649471124014, "eval_signal/advantage_abs_mean": 0.39997173845767975, "eval_signal/advantage_pre_scale_abs_mean": 0.39997173845767975, "eval_signal/advantage_pre_scale_std": 0.42536558707555133, "eval_signal/advantage_std": 0.42536558707555133, "eval_signal/brier_reward/centered_abs_mean": 0.3129906157652537, "eval_signal/brier_reward/group_std_mean": 0.3424776792526245, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15649530788262686, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.15649530788262686, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.05094400964056452, "eval_signal/confidence_one_or_zero/group_std_mean": 0.12426702585071325, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.3888889029622078, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.094400895207704e-07, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.094400895207704e-07, "eval_signal/format_reward/centered_abs_mean": 0.033311632151405014, "eval_signal/format_reward/group_std_mean": 0.08924104770024617, "eval_signal/format_reward/group_zero_std_frac": 0.5277777860562006, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016655816075702507, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.016655816075702507, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.08704317485292752, "eval_signal/mean_confidence_reward/group_std_mean": 0.13417261466383934, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.704317148537181e-07, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 8.704317148537181e-07, "eval_steps_per_second": 0.03, "step": 50 }, { "epoch": 0.1201923076923077, "step": 50, "train_probe_calibration/aurc": 0.3112787978069011, "train_probe_calibration/batch_distribution_entropy": 0.518454152532018, "train_probe_calibration/batch_entropy_100bins": 0.3640552531542229, "train_probe_calibration/batch_entropy_10bins": 0.518454152532018, "train_probe_calibration/batch_entropy_50bins": 0.42855995364850563, "train_probe_calibration/batch_uniqueness": 0.4955930864197531, "train_probe_calibration/confidence_entropy": 0.41655487998101076, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.0, "train_probe_calibration/coverage@15%": 0.0, "train_probe_calibration/coverage@20%": 0.0, "train_probe_calibration/coverage@25%": 0.0, "train_probe_calibration/coverage@30%": 0.528, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.518454152532018, "train_probe_calibration/distribution_entropy_100": 0.3640552531542229, "train_probe_calibration/ece": 0.20810666666666672, "train_probe_calibration/mean_confidence": 0.8338844444444444, "train_probe_calibration/unique_confidence_per_question": 0.011284722222222222, "train_probe_calibration/unique_confidences": 13, "train_probe_completions/clipped_ratio": 0.017361111111111105, "train_probe_completions/max_length": 2972.0, "train_probe_completions/max_terminated_length": 2972.0, "train_probe_completions/mean_length": 631.3243204752604, "train_probe_completions/mean_terminated_length": 642.1988016764323, "train_probe_completions/min_length": 23.666666666666668, "train_probe_completions/min_terminated_length": 173.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 89082154.0, "train_probe_reward": 1.151051640510559, "train_probe_reward_std": 0.4297498017549515, "train_probe_rewards/accuracy_reward": 0.6111111144224802, "train_probe_rewards/brier_reward": 0.7144128779570261, "train_probe_rewards/confidence_one_or_zero": 0.026041666356225807, "train_probe_rewards/format_reward": 0.9765625, "train_probe_rewards/mean_confidence_reward": 0.8143402536710104, "train_probe_runtime": 210.8945, "train_probe_samples_per_second": 4.742, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.458984375, "train_probe_signal/accuracy_reward/group_std_mean": 0.48603397111097973, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2294921875, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2294921875, "train_probe_signal/advantage_abs_mean": 0.39387212693691254, "train_probe_signal/advantage_pre_scale_abs_mean": 0.39387212693691254, "train_probe_signal/advantage_pre_scale_std": 0.42617770036061603, "train_probe_signal/advantage_std": 0.42617770036061603, "train_probe_signal/brier_reward/centered_abs_mean": 0.30468609432379407, "train_probe_signal/brier_reward/group_std_mean": 0.3368734121322632, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15234304716189703, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.15234304716189703, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.04980468718955914, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.13184053326646486, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.3055555646618207, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.980468446547093e-07, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.980468446547093e-07, "train_probe_signal/format_reward/centered_abs_mean": 0.04421658099939426, "train_probe_signal/format_reward/group_std_mean": 0.10708824172616005, "train_probe_signal/format_reward/group_zero_std_frac": 0.4722222263614337, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02210829049969713, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.02210829049969713, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.09213974326848984, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.14700974648197493, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.213974484130935e-07, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 9.213974484130935e-07, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.38612993561216935, "calibration/batch_distribution_entropy": 0.5405270158216778, "calibration/batch_entropy_100bins": 0.3653346702300958, "calibration/batch_entropy_10bins": 0.5405270158216778, "calibration/batch_entropy_50bins": 0.42908534837434686, "calibration/batch_uniqueness": 0.49659988221929785, "calibration/confidence_entropy": 0.4437692389330308, "calibration/coverage@0%": 0.003172045997444586, "calibration/coverage@1%": 0.003172045997444586, "calibration/coverage@10%": 0.003172045997444586, "calibration/coverage@15%": 0.007427365146380757, "calibration/coverage@20%": 0.12019332259318927, "calibration/coverage@25%": 0.2031720459974446, "calibration/coverage@30%": 0.20787178490083885, "calibration/coverage@5%": 0.003172045997444586, "calibration/distribution_entropy_10": 0.5405270158216778, "calibration/distribution_entropy_100": 0.3653346702300958, "calibration/ece": 0.22770160682164314, "calibration/mean_confidence": 0.8139400842135398, "calibration/unique_confidence_per_question": 0.028645833333333336, "calibration/unique_confidences": 11.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014930555555555558, "completions/max_length": 4055.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 632.4847290039063, "completions/mean_terminated_length": 642.06376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.2, "epoch": 0.13221153846153846, "grad_norm": 0.0007234508520923555, "learning_rate": 1.3221153846153848e-06, "loss": -0.0063, "num_tokens": 99434298.0, "reward": 1.1398391246795654, "reward_std": 0.2665547639131546, "rewards/accuracy_reward": 0.5956597089767456, "rewards/brier_reward": 0.7043145775794983, "rewards/confidence_one_or_zero": 0.021006944589316845, "rewards/format_reward": 0.9796875, "rewards/mean_confidence_reward": 0.8053324580192566, "sampling/batch_mean_priority_error": 0.004031666666666667, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9555555555555555, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00015117116854526102, "sampling/priority_kl": 0.03000043034553528, "sampling/priority_scale": 1.6105974317062646, "sampling/prob_entropy": 10.278947639465333, "sampling/prob_max": 3.609474879340269e-05, "sampling/prob_min": 8.840246482577641e-06, "sampling/prompt_draws_max": 2.2, "sampling/prompt_draws_mean": 0.12720000147819518, "sampling/prompt_draws_total": 3816.0, "sampling/seen_fraction": 0.12482666820287705, "sampling/unseen_fraction": 0.875173331797123, "signal/accuracy_reward/centered_abs_mean": 0.23159722089767457, "signal/accuracy_reward/group_std_mean": 0.290811088681221, "signal/accuracy_reward/group_zero_std_frac": 0.22777778208255767, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11579861044883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11579861044883728, "signal/advantage_abs_mean": 0.20904322266578673, "signal/advantage_pre_scale_abs_mean": 0.20904322266578673, "signal/advantage_pre_scale_std": 0.30396526455879214, "signal/advantage_std": 0.30396526455879214, "signal/brier_reward/centered_abs_mean": 0.1729253500699997, "signal/brier_reward/group_std_mean": 0.21837244629859925, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08646267503499985, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.08646267503499985, "signal/confidence_one_or_zero/centered_abs_mean": 0.03847656212747097, "signal/confidence_one_or_zero/group_std_mean": 0.08330218568444252, "signal/confidence_one_or_zero/group_zero_std_frac": 0.6250000059604645, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.847656159905455e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.847656159905455e-07, "signal/format_reward/centered_abs_mean": 0.03191189244389534, "signal/format_reward/group_std_mean": 0.05313037186861038, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01595594622194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01595594622194767, "signal/mean_confidence_reward/centered_abs_mean": 0.08242994993925094, "signal/mean_confidence_reward/group_std_mean": 0.11044336557388305, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.242994795182313e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.242994795182313e-07, "step": 55 }, { "calibration/aurc": 0.3336939658670478, "calibration/batch_distribution_entropy": 0.5553734722247106, "calibration/batch_entropy_100bins": 0.3494255823263553, "calibration/batch_entropy_10bins": 0.5553734722247106, "calibration/batch_entropy_50bins": 0.4113381418560308, "calibration/batch_uniqueness": 0.4833634475353441, "calibration/confidence_entropy": 0.48042285577547605, "calibration/coverage@0%": 0.003222263714138264, "calibration/coverage@1%": 0.003222263714138264, "calibration/coverage@10%": 0.003222263714138264, "calibration/coverage@15%": 0.003222263714138264, "calibration/coverage@20%": 0.11631561781737232, "calibration/coverage@25%": 0.27990400831869156, "calibration/coverage@30%": 0.27990400831869156, "calibration/coverage@5%": 0.003222263714138264, "calibration/distribution_entropy_10": 0.5553734722247106, "calibration/distribution_entropy_100": 0.3494255823263553, "calibration/ece": 0.1802489469086288, "calibration/mean_confidence": 0.7959282239222343, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011545138888888907, "completions/max_length": 4024.4, "completions/max_terminated_length": 4024.4, "completions/mean_length": 628.0548706054688, "completions/mean_terminated_length": 635.40615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.4, "epoch": 0.14423076923076922, "grad_norm": 0.0007936802576296031, "learning_rate": 1.4423076923076922e-06, "loss": -0.0038, "num_tokens": 109729394.0, "reward": 1.173889660835266, "reward_std": 0.24853478670120238, "rewards/accuracy_reward": 0.6305555582046509, "rewards/brier_reward": 0.7328330159187317, "rewards/confidence_one_or_zero": 0.006944444682449103, "rewards/format_reward": 0.9843749880790711, "rewards/mean_confidence_reward": 0.7818385362625122, "sampling/batch_mean_priority_error": 0.004377708333333334, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9388888888888889, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00015656949544791133, "sampling/priority_kl": 0.02999980337917805, "sampling/priority_scale": 1.5206903575453907, "sampling/prob_entropy": 10.27895908355713, "sampling/prob_max": 3.625823956099339e-05, "sampling/prob_min": 7.926992839202285e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.13919999897480012, "sampling/prompt_draws_total": 4176.0, "sampling/seen_fraction": 0.1361933320760727, "sampling/unseen_fraction": 0.8638066679239274, "signal/accuracy_reward/centered_abs_mean": 0.21881510317325592, "signal/accuracy_reward/group_std_mean": 0.2803986519575119, "signal/accuracy_reward/group_zero_std_frac": 0.23888889253139495, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10940755158662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10940755158662796, "signal/advantage_abs_mean": 0.1892775774002075, "signal/advantage_pre_scale_abs_mean": 0.1892775774002075, "signal/advantage_pre_scale_std": 0.28426212072372437, "signal/advantage_std": 0.28426212072372437, "signal/brier_reward/centered_abs_mean": 0.15051650404930114, "signal/brier_reward/group_std_mean": 0.19497915208339692, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07525825202465057, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.07525825202465057, "signal/confidence_one_or_zero/centered_abs_mean": 0.013183593563735486, "signal/confidence_one_or_zero/group_std_mean": 0.0330656249076128, "signal/confidence_one_or_zero/group_zero_std_frac": 0.8333333492279053, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3183593381427272e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3183593381427272e-07, "signal/format_reward/centered_abs_mean": 0.02564019113779068, "signal/format_reward/group_std_mean": 0.050878601521253584, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01282009556889534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01282009556889534, "signal/mean_confidence_reward/centered_abs_mean": 0.0763712003827095, "signal/mean_confidence_reward/group_std_mean": 0.10629059821367264, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.637119892933697e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.637119892933697e-07, "step": 60 }, { "calibration/aurc": 0.37586397587645826, "calibration/batch_distribution_entropy": 0.5549776555813649, "calibration/batch_entropy_100bins": 0.31981659342569346, "calibration/batch_entropy_10bins": 0.5549776555813649, "calibration/batch_entropy_50bins": 0.3764829191915983, "calibration/batch_uniqueness": 0.40589435403374124, "calibration/confidence_entropy": 0.5348350374864965, "calibration/coverage@0%": 0.0026654322500107444, "calibration/coverage@1%": 0.0026654322500107444, "calibration/coverage@10%": 0.0026654322500107444, "calibration/coverage@15%": 0.04246310706235907, "calibration/coverage@20%": 0.17721111767243863, "calibration/coverage@25%": 0.20479732456899033, "calibration/coverage@30%": 0.20479732456899033, "calibration/coverage@5%": 0.0026654322500107444, "calibration/distribution_entropy_10": 0.5549776555813649, "calibration/distribution_entropy_100": 0.31981659342569346, "calibration/ece": 0.16123688431669309, "calibration/mean_confidence": 0.7563851204504758, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01866319444444444, "completions/max_length": 4017.2, "completions/max_terminated_length": 4017.2, "completions/mean_length": 665.1203979492187, "completions/mean_terminated_length": 677.7648315429688, "completions/min_length": 0.0, "completions/min_terminated_length": 195.2, "epoch": 0.15625, "grad_norm": 0.0006811345228925347, "learning_rate": 1.5625e-06, "loss": -0.0077, "num_tokens": 120519677.0, "reward": 1.1617473363876343, "reward_std": 0.2451097071170807, "rewards/accuracy_reward": 0.6177951335906983, "rewards/brier_reward": 0.7292956352233887, "rewards/confidence_one_or_zero": 0.0018229167035315186, "rewards/format_reward": 0.9763888955116272, "rewards/mean_confidence_reward": 0.7422178864479065, "sampling/batch_mean_priority_error": 0.005695416666666666, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.95, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00016235393995884807, "sampling/priority_kl": 0.03000013642013073, "sampling/priority_scale": 1.4449297070968896, "sampling/prob_entropy": 10.278961563110352, "sampling/prob_max": 3.641730290837586e-05, "sampling/prob_min": 8.587727643316612e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.1512000024318695, "sampling/prompt_draws_total": 4536.0, "sampling/seen_fraction": 0.14750000238418579, "sampling/unseen_fraction": 0.8524999976158142, "signal/accuracy_reward/centered_abs_mean": 0.21707356870174407, "signal/accuracy_reward/group_std_mean": 0.27858763337135317, "signal/accuracy_reward/group_zero_std_frac": 0.2361111134290695, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10853678435087204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10853678435087204, "signal/advantage_abs_mean": 0.1863380938768387, "signal/advantage_pre_scale_abs_mean": 0.1863380938768387, "signal/advantage_pre_scale_std": 0.2836464047431946, "signal/advantage_std": 0.2836464047431946, "signal/brier_reward/centered_abs_mean": 0.14009126722812654, "signal/brier_reward/group_std_mean": 0.1813989222049713, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07004563361406327, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.07004563361406327, "signal/confidence_one_or_zero/centered_abs_mean": 0.003521050268318504, "signal/confidence_one_or_zero/group_std_mean": 0.010013032704591751, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9444444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.5210501181381915e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.5210501181381915e-08, "signal/format_reward/centered_abs_mean": 0.03613281212747097, "signal/format_reward/group_std_mean": 0.062471595406532285, "signal/format_reward/group_zero_std_frac": 0.7583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.018066406063735486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.018066406063735486, "signal/mean_confidence_reward/centered_abs_mean": 0.08147167861461639, "signal/mean_confidence_reward/group_std_mean": 0.11126352846622467, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.147167591232574e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.147167591232574e-07, "step": 65 }, { "calibration/aurc": 0.3543039655168289, "calibration/batch_distribution_entropy": 0.5730260289511547, "calibration/batch_entropy_100bins": 0.31485441323741503, "calibration/batch_entropy_10bins": 0.5730260289511547, "calibration/batch_entropy_50bins": 0.37064152096135977, "calibration/batch_uniqueness": 0.425199174266254, "calibration/confidence_entropy": 0.5637355662717745, "calibration/coverage@0%": 0.0015804016757562183, "calibration/coverage@1%": 0.0015804016757562183, "calibration/coverage@10%": 0.0015804016757562183, "calibration/coverage@15%": 0.0015804016757562183, "calibration/coverage@20%": 0.028442691684597947, "calibration/coverage@25%": 0.10854879248035391, "calibration/coverage@30%": 0.4010498976969764, "calibration/coverage@5%": 0.0015804016757562183, "calibration/distribution_entropy_10": 0.5730260289511547, "calibration/distribution_entropy_100": 0.31485441323741503, "calibration/ece": 0.1173558645266779, "calibration/mean_confidence": 0.7295070335410896, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01597222222222221, "completions/max_length": 4049.2, "completions/max_terminated_length": 4049.2, "completions/mean_length": 684.5787475585937, "completions/mean_terminated_length": 695.9264892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 201.6, "epoch": 0.16826923076923078, "grad_norm": 0.0007994287298060954, "learning_rate": 1.682692307692308e-06, "loss": -0.0082, "num_tokens": 131515944.0, "reward": 1.1738406658172607, "reward_std": 0.2270677924156189, "rewards/accuracy_reward": 0.6275173664093018, "rewards/brier_reward": 0.7403752088546753, "rewards/confidence_one_or_zero": 0.001909722265554592, "rewards/format_reward": 0.9797743082046508, "rewards/mean_confidence_reward": 0.7171657919883728, "sampling/batch_mean_priority_error": 0.006220486111111113, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9305555555555556, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00016924338706303388, "sampling/priority_kl": 0.030000567063689233, "sampling/priority_scale": 1.3802560209762305, "sampling/prob_entropy": 10.278955459594727, "sampling/prob_max": 3.657195411506109e-05, "sampling/prob_min": 9.199866326525807e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.16319999992847442, "sampling/prompt_draws_total": 4896.0, "sampling/seen_fraction": 0.15870000123977662, "sampling/unseen_fraction": 0.8412999987602234, "signal/accuracy_reward/centered_abs_mean": 0.20630967915058135, "signal/accuracy_reward/group_std_mean": 0.2702312171459198, "signal/accuracy_reward/group_zero_std_frac": 0.2416666716337204, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10315483957529067, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10315483957529067, "signal/advantage_abs_mean": 0.17068581879138947, "signal/advantage_pre_scale_abs_mean": 0.17068581879138947, "signal/advantage_pre_scale_std": 0.26389814019203184, "signal/advantage_std": 0.26389814019203184, "signal/brier_reward/centered_abs_mean": 0.12424491196870804, "signal/brier_reward/group_std_mean": 0.16228446662425994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06212245598435402, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.06212245598435402, "signal/confidence_one_or_zero/centered_abs_mean": 0.0036349825211800637, "signal/confidence_one_or_zero/group_std_mean": 0.009772197715938092, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9472222328186035, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.634982306266465e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.634982306266465e-08, "signal/format_reward/centered_abs_mean": 0.029291448928415776, "signal/format_reward/group_std_mean": 0.05109502263367176, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014645724464207888, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014645724464207888, "signal/mean_confidence_reward/centered_abs_mean": 0.07622585743665695, "signal/mean_confidence_reward/group_std_mean": 0.10273666977882386, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.622585485478339e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.622585485478339e-07, "step": 70 }, { "calibration/aurc": 0.2858884438881962, "calibration/batch_distribution_entropy": 0.5609883010680576, "calibration/batch_entropy_100bins": 0.31588468551229204, "calibration/batch_entropy_10bins": 0.5609883010680576, "calibration/batch_entropy_50bins": 0.37185434081367924, "calibration/batch_uniqueness": 0.4256885939815541, "calibration/confidence_entropy": 0.5814513279306728, "calibration/coverage@0%": 0.00585995756396415, "calibration/coverage@1%": 0.00585995756396415, "calibration/coverage@10%": 0.010634493372982716, "calibration/coverage@15%": 0.022864303181662667, "calibration/coverage@20%": 0.02446004786251373, "calibration/coverage@25%": 0.4076161182910999, "calibration/coverage@30%": 0.7452000677239121, "calibration/coverage@5%": 0.00585995756396415, "calibration/distribution_entropy_10": 0.5609883010680576, "calibration/distribution_entropy_100": 0.31588468551229204, "calibration/ece": 0.10178788260454422, "calibration/mean_confidence": 0.7036365611088247, "calibration/unique_confidence_per_question": 0.0265625, "calibration/unique_confidences": 10.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014756944444444442, "completions/max_length": 3767.4, "completions/max_terminated_length": 3767.4, "completions/mean_length": 663.8887939453125, "completions/mean_terminated_length": 673.8903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 212.4, "epoch": 0.18028846153846154, "grad_norm": 0.0005497903912328184, "learning_rate": 1.8028846153846156e-06, "loss": -0.0113, "num_tokens": 142235079.0, "reward": 1.2008941411972045, "reward_std": 0.21300730109214783, "rewards/accuracy_reward": 0.6611979246139527, "rewards/brier_reward": 0.7575034737586975, "rewards/confidence_one_or_zero": 0.0012152778159361332, "rewards/format_reward": 0.9830729126930237, "rewards/mean_confidence_reward": 0.6975086808204651, "sampling/batch_mean_priority_error": 0.006739583333333335, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9277777777777777, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00017708493396639824, "sampling/priority_kl": 0.029999949410557745, "sampling/priority_scale": 1.324010860780254, "sampling/prob_entropy": 10.278962707519531, "sampling/prob_max": 3.672436432680115e-05, "sampling/prob_min": 9.772332668944728e-06, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.17520000040531158, "sampling/prompt_draws_total": 5256.0, "sampling/seen_fraction": 0.16990666687488556, "sampling/unseen_fraction": 0.8300933331251145, "signal/accuracy_reward/centered_abs_mean": 0.19198676347732543, "signal/accuracy_reward/group_std_mean": 0.25041328370571136, "signal/accuracy_reward/group_zero_std_frac": 0.3000000029802322, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09599338173866272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09599338173866272, "signal/advantage_abs_mean": 0.15800958573818208, "signal/advantage_pre_scale_abs_mean": 0.15800958573818208, "signal/advantage_pre_scale_std": 0.2527189999818802, "signal/advantage_std": 0.2527189999818802, "signal/brier_reward/centered_abs_mean": 0.11541662514209747, "signal/brier_reward/group_std_mean": 0.15058891773223876, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.057708312571048734, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.057708312571048734, "signal/confidence_one_or_zero/centered_abs_mean": 0.0022460937150754036, "signal/confidence_one_or_zero/group_std_mean": 0.005444145016372204, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222089767456, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.2460936577317624e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.2460936577317624e-08, "signal/format_reward/centered_abs_mean": 0.02916124165058136, "signal/format_reward/group_std_mean": 0.05634359717369079, "signal/format_reward/group_zero_std_frac": 0.7638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01458062082529068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01458062082529068, "signal/mean_confidence_reward/centered_abs_mean": 0.07778618484735489, "signal/mean_confidence_reward/group_std_mean": 0.1058467149734497, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.778618396514502e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.778618396514502e-07, "step": 75 }, { "calibration/aurc": 0.4032866542071951, "calibration/batch_distribution_entropy": 0.519863369326861, "calibration/batch_entropy_100bins": 0.28161969711614626, "calibration/batch_entropy_10bins": 0.519863369326861, "calibration/batch_entropy_50bins": 0.33151815087660375, "calibration/batch_uniqueness": 0.36394117551198735, "calibration/confidence_entropy": 0.6014145393968999, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0032, "calibration/coverage@25%": 0.005310817941952506, "calibration/coverage@30%": 0.2021108179419525, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.519863369326861, "calibration/distribution_entropy_100": 0.28161969711614626, "calibration/ece": 0.14039651033528555, "calibration/mean_confidence": 0.6901999298390014, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3871.8, "completions/max_terminated_length": 3871.8, "completions/mean_length": 687.1427368164062, "completions/mean_terminated_length": 695.88330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 191.4, "epoch": 0.19230769230769232, "grad_norm": 0.0006263877148739994, "learning_rate": 1.9230769230769234e-06, "loss": -0.0087, "num_tokens": 153266035.0, "reward": 1.18265540599823, "reward_std": 0.20969317853450775, "rewards/accuracy_reward": 0.6322916626930237, "rewards/brier_reward": 0.747154700756073, "rewards/confidence_one_or_zero": 0.0008680555794853718, "rewards/format_reward": 0.9858506917953491, "rewards/mean_confidence_reward": 0.6823298692703247, "sampling/batch_mean_priority_error": 0.009656250000000002, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9166666666666667, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00018732520984485746, "sampling/priority_kl": 0.029999008402228355, "sampling/priority_scale": 1.2748031734023244, "sampling/prob_entropy": 10.278940010070801, "sampling/prob_max": 3.6873606586596e-05, "sampling/prob_min": 1.0306679178029298e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.18720000088214875, "sampling/prompt_draws_total": 5616.0, "sampling/seen_fraction": 0.18101999759674073, "sampling/unseen_fraction": 0.8189800024032593, "signal/accuracy_reward/centered_abs_mean": 0.20177951455116272, "signal/accuracy_reward/group_std_mean": 0.25801017582416536, "signal/accuracy_reward/group_zero_std_frac": 0.28888889253139494, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10088975727558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10088975727558136, "signal/advantage_abs_mean": 0.1581605225801468, "signal/advantage_pre_scale_abs_mean": 0.1581605225801468, "signal/advantage_pre_scale_std": 0.2474273145198822, "signal/advantage_std": 0.2474273145198822, "signal/brier_reward/centered_abs_mean": 0.10887281149625778, "signal/brier_reward/group_std_mean": 0.1416696459054947, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05443640574812889, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05443640574812889, "signal/confidence_one_or_zero/centered_abs_mean": 0.001649305538740009, "signal/confidence_one_or_zero/group_std_mean": 0.004259948246181011, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777671813965, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6493054744159964e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6493054744159964e-08, "signal/format_reward/centered_abs_mean": 0.02550455778837204, "signal/format_reward/group_std_mean": 0.052160313725471495, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01275227889418602, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01275227889418602, "signal/mean_confidence_reward/centered_abs_mean": 0.07146369963884354, "signal/mean_confidence_reward/group_std_mean": 0.09777705222368241, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.146369853217039e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.146369853217039e-07, "step": 80 }, { "calibration/aurc": 0.3661781722935645, "calibration/batch_distribution_entropy": 0.4992257819234746, "calibration/batch_entropy_100bins": 0.2705582175171825, "calibration/batch_entropy_10bins": 0.4992257819234746, "calibration/batch_entropy_50bins": 0.31849675606594413, "calibration/batch_uniqueness": 0.33240218609144073, "calibration/confidence_entropy": 0.610284052868314, "calibration/coverage@0%": 0.002702702702702703, "calibration/coverage@1%": 0.002702702702702703, "calibration/coverage@10%": 0.002702702702702703, "calibration/coverage@15%": 0.002702702702702703, "calibration/coverage@20%": 0.002702702702702703, "calibration/coverage@25%": 0.20270270270270271, "calibration/coverage@30%": 0.20270270270270271, "calibration/coverage@5%": 0.002702702702702703, "calibration/distribution_entropy_10": 0.4992257819234746, "calibration/distribution_entropy_100": 0.2705582175171825, "calibration/ece": 0.12068481756481757, "calibration/mean_confidence": 0.6806887197314783, "calibration/unique_confidence_per_question": 0.018229166666666668, "calibration/unique_confidences": 7.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4002.4, "completions/max_terminated_length": 4002.4, "completions/mean_length": 665.6905395507813, "completions/mean_terminated_length": 676.254638671875, "completions/min_length": 0.0, "completions/min_terminated_length": 173.6, "epoch": 0.20432692307692307, "grad_norm": 0.0009465516777709126, "learning_rate": 2.043269230769231e-06, "loss": -0.011, "num_tokens": 164055622.0, "reward": 1.1916949510574342, "reward_std": 0.21416583359241487, "rewards/accuracy_reward": 0.6455729126930236, "rewards/brier_reward": 0.754991102218628, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9828125, "rewards/mean_confidence_reward": 0.6632769107818604, "sampling/batch_mean_priority_error": 0.011204861111111113, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.875, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00019981984514743089, "sampling/priority_kl": 0.02999875508248806, "sampling/priority_scale": 1.231435882905498, "sampling/prob_entropy": 10.27894401550293, "sampling/prob_max": 3.701617824845016e-05, "sampling/prob_min": 1.0804863813973498e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.19919999837875366, "sampling/prompt_draws_total": 5976.0, "sampling/seen_fraction": 0.19177333116531373, "sampling/unseen_fraction": 0.8082266688346863, "signal/accuracy_reward/centered_abs_mean": 0.20201280117034912, "signal/accuracy_reward/group_std_mean": 0.2671117514371872, "signal/accuracy_reward/group_zero_std_frac": 0.25, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10100640058517456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10100640058517456, "signal/advantage_abs_mean": 0.15618274211883545, "signal/advantage_pre_scale_abs_mean": 0.15618274211883545, "signal/advantage_pre_scale_std": 0.2473001003265381, "signal/advantage_std": 0.2473001003265381, "signal/brier_reward/centered_abs_mean": 0.10262170583009719, "signal/brier_reward/group_std_mean": 0.13768472969532014, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.051310852915048596, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.051310852915048596, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006727430503815412, "signal/confidence_one_or_zero/group_std_mean": 0.0019641853868961334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/format_reward/centered_abs_mean": 0.03017578162252903, "signal/format_reward/group_std_mean": 0.05734373852610588, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015087890811264515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015087890811264515, "signal/mean_confidence_reward/centered_abs_mean": 0.07156276404857635, "signal/mean_confidence_reward/group_std_mean": 0.09738876074552535, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.156276069508749e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.156276069508749e-07, "step": 85 }, { "calibration/aurc": 0.37116374332762697, "calibration/batch_distribution_entropy": 0.4757943579865319, "calibration/batch_entropy_100bins": 0.26224067309847543, "calibration/batch_entropy_10bins": 0.4757943579865319, "calibration/batch_entropy_50bins": 0.3087054773529833, "calibration/batch_uniqueness": 0.28793524936717196, "calibration/confidence_entropy": 0.6222985334875628, "calibration/coverage@0%": 0.0005494505494505495, "calibration/coverage@1%": 0.0005494505494505495, "calibration/coverage@10%": 0.0005494505494505495, "calibration/coverage@15%": 0.0005494505494505495, "calibration/coverage@20%": 0.0005494505494505495, "calibration/coverage@25%": 0.04475997686524002, "calibration/coverage@30%": 0.20054945054945056, "calibration/coverage@5%": 0.0005494505494505495, "calibration/distribution_entropy_10": 0.4757943579865319, "calibration/distribution_entropy_100": 0.26224067309847543, "calibration/ece": 0.0960165185039298, "calibration/mean_confidence": 0.6651432014026919, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015364583333333348, "completions/max_length": 3960.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 679.5826416015625, "completions/mean_terminated_length": 690.2464477539063, "completions/min_length": 0.0, "completions/min_terminated_length": 179.6, "epoch": 0.21634615384615385, "grad_norm": 0.0005822376697324216, "learning_rate": 2.1634615384615387e-06, "loss": -0.0117, "num_tokens": 174998654.0, "reward": 1.1882888317108153, "reward_std": 0.2097031891345978, "rewards/accuracy_reward": 0.637586796283722, "rewards/brier_reward": 0.7558179020881652, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9831597328186035, "rewards/mean_confidence_reward": 0.6578081369400024, "sampling/batch_mean_priority_error": 0.011411458333333336, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8833333333333334, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00021357860241550953, "sampling/priority_kl": 0.02999962829053402, "sampling/priority_scale": 1.1929583669174462, "sampling/prob_entropy": 10.278938102722169, "sampling/prob_max": 3.715199272846803e-05, "sampling/prob_min": 1.1269766037003138e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.21120000183582305, "sampling/prompt_draws_total": 6336.0, "sampling/seen_fraction": 0.20212666392326356, "sampling/unseen_fraction": 0.7978733360767365, "signal/accuracy_reward/centered_abs_mean": 0.2021755665540695, "signal/accuracy_reward/group_std_mean": 0.26527499556541445, "signal/accuracy_reward/group_zero_std_frac": 0.2555555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10108778327703476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10108778327703476, "signal/advantage_abs_mean": 0.15595764219760894, "signal/advantage_pre_scale_abs_mean": 0.15595764219760894, "signal/advantage_pre_scale_std": 0.24449576139450074, "signal/advantage_std": 0.24449576139450074, "signal/brier_reward/centered_abs_mean": 0.10028855502605438, "signal/brier_reward/group_std_mean": 0.13360466957092285, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05014427751302719, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05014427751302719, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02744140550494194, "signal/format_reward/group_std_mean": 0.050358953326940535, "signal/format_reward/group_zero_std_frac": 0.7916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01372070275247097, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01372070275247097, "signal/mean_confidence_reward/centered_abs_mean": 0.06903888806700706, "signal/mean_confidence_reward/group_std_mean": 0.0928740844130516, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.903888788656331e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.903888788656331e-07, "step": 90 }, { "calibration/aurc": 0.3758242948585995, "calibration/batch_distribution_entropy": 0.4563553410376876, "calibration/batch_entropy_100bins": 0.24585762197374333, "calibration/batch_entropy_10bins": 0.4563553410376876, "calibration/batch_entropy_50bins": 0.28941961464449506, "calibration/batch_uniqueness": 0.24668494033689709, "calibration/confidence_entropy": 0.619717511469672, "calibration/coverage@0%": 0.002688172043010753, "calibration/coverage@1%": 0.002688172043010753, "calibration/coverage@10%": 0.027419354838709675, "calibration/coverage@15%": 0.05049822665090357, "calibration/coverage@20%": 0.05470324180659384, "calibration/coverage@25%": 0.13965527496313093, "calibration/coverage@30%": 0.13965527496313093, "calibration/coverage@5%": 0.002688172043010753, "calibration/distribution_entropy_10": 0.4563553410376876, "calibration/distribution_entropy_100": 0.24585762197374333, "calibration/ece": 0.10688778213917556, "calibration/mean_confidence": 0.6715519727450985, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 4000.8, "completions/max_terminated_length": 4000.8, "completions/mean_length": 644.25087890625, "completions/mean_terminated_length": 651.0717529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 198.4, "epoch": 0.2283653846153846, "grad_norm": 0.0006384830921888351, "learning_rate": 2.283653846153846e-06, "loss": -0.0086, "num_tokens": 185514024.0, "reward": 1.1923712730407714, "reward_std": 0.1970378041267395, "rewards/accuracy_reward": 0.6372395873069763, "rewards/brier_reward": 0.7592951536178589, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9881944417953491, "rewards/mean_confidence_reward": 0.6610156297683716, "sampling/batch_mean_priority_error": 0.012123263888888892, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9027777777777779, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00022780775325372815, "sampling/priority_kl": 0.030000927299261092, "sampling/priority_scale": 1.1581266285385936, "sampling/prob_entropy": 10.278959465026855, "sampling/prob_max": 3.729468226083554e-05, "sampling/prob_min": 1.1713916137523483e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.22319999933242798, "sampling/prompt_draws_total": 6696.0, "sampling/seen_fraction": 0.21301333606243134, "sampling/unseen_fraction": 0.7869866639375687, "signal/accuracy_reward/centered_abs_mean": 0.19498155415058135, "signal/accuracy_reward/group_std_mean": 0.2563039720058441, "signal/accuracy_reward/group_zero_std_frac": 0.275, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09749077707529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09749077707529068, "signal/advantage_abs_mean": 0.14624694287776946, "signal/advantage_pre_scale_abs_mean": 0.14624694287776946, "signal/advantage_pre_scale_std": 0.23240326046943666, "signal/advantage_std": 0.23240326046943666, "signal/brier_reward/centered_abs_mean": 0.09518856704235076, "signal/brier_reward/group_std_mean": 0.12528109848499297, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04759428352117538, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04759428352117538, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572877861559, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.01911892332136631, "signal/format_reward/group_std_mean": 0.037534280493855474, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009559461660683156, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009559461660683156, "signal/mean_confidence_reward/centered_abs_mean": 0.06400986313819886, "signal/mean_confidence_reward/group_std_mean": 0.08521138578653335, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.400986194421421e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.400986194421421e-07, "step": 95 }, { "calibration/aurc": 0.3678310838070479, "calibration/batch_distribution_entropy": 0.46524390786738545, "calibration/batch_entropy_100bins": 0.2504651973371159, "calibration/batch_entropy_10bins": 0.46524390786738545, "calibration/batch_entropy_50bins": 0.2948435778123125, "calibration/batch_uniqueness": 0.2624876520204674, "calibration/confidence_entropy": 0.6178975760044858, "calibration/coverage@0%": 0.0026709523267721014, "calibration/coverage@1%": 0.0026709523267721014, "calibration/coverage@10%": 0.0026709523267721014, "calibration/coverage@15%": 0.04598646034816247, "calibration/coverage@20%": 0.06513539651837524, "calibration/coverage@25%": 0.09243198444488443, "calibration/coverage@30%": 0.36099780535738873, "calibration/coverage@5%": 0.0026709523267721014, "calibration/distribution_entropy_10": 0.46524390786738545, "calibration/distribution_entropy_100": 0.2504651973371159, "calibration/ece": 0.12128469451627992, "calibration/mean_confidence": 0.6711969216252879, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01493055555555558, "completions/max_length": 3220.6, "completions/max_terminated_length": 3220.6, "completions/mean_length": 642.8724853515625, "completions/mean_terminated_length": 652.767138671875, "completions/min_length": 0.0, "completions/min_terminated_length": 207.6, "epoch": 0.2403846153846154, "grad_norm": 0.0006411863723769784, "learning_rate": 2.403846153846154e-06, "loss": -0.0114, "num_tokens": 196021611.0, "reward": 1.1830902338027953, "reward_std": 0.1911737948656082, "rewards/accuracy_reward": 0.6262152671813965, "rewards/brier_reward": 0.7555767059326172, "rewards/confidence_one_or_zero": 0.0006076388934161514, "rewards/format_reward": 0.9843749880790711, "rewards/mean_confidence_reward": 0.6688211679458618, "sampling/batch_mean_priority_error": 0.011630208333333334, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.888888888888889, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00024215736775659024, "sampling/priority_kl": 0.029999716952443124, "sampling/priority_scale": 1.126824200199917, "sampling/prob_entropy": 10.278961944580079, "sampling/prob_max": 3.743151755770668e-05, "sampling/prob_min": 1.2130623326811474e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.23519999980926515, "sampling/prompt_draws_total": 7056.0, "sampling/seen_fraction": 0.22354000210762023, "sampling/unseen_fraction": 0.7764599978923797, "signal/accuracy_reward/centered_abs_mean": 0.17940538227558137, "signal/accuracy_reward/group_std_mean": 0.23320743143558503, "signal/accuracy_reward/group_zero_std_frac": 0.34722222089767457, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08970269113779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08970269113779068, "signal/advantage_abs_mean": 0.14159118086099626, "signal/advantage_pre_scale_abs_mean": 0.14159118086099626, "signal/advantage_pre_scale_std": 0.2317925751209259, "signal/advantage_std": 0.2317925751209259, "signal/brier_reward/centered_abs_mean": 0.09510144293308258, "signal/brier_reward/group_std_mean": 0.12647282779216767, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04755072146654129, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04755072146654129, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011773003381676972, "signal/confidence_one_or_zero/group_std_mean": 0.0034373244270682335, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9805555462837219, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1773002484005702e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1773002484005702e-08, "signal/format_reward/centered_abs_mean": 0.02516275979578495, "signal/format_reward/group_std_mean": 0.04855161756277084, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012581379897892476, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012581379897892476, "signal/mean_confidence_reward/centered_abs_mean": 0.06360747069120407, "signal/mean_confidence_reward/group_std_mean": 0.08851810693740844, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.360746851896692e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.360746851896692e-07, "step": 100 }, { "epoch": 0.2403846153846154, "eval_calibration/aurc": 0.3215100260380746, "eval_calibration/batch_distribution_entropy": 0.4526846744320586, "eval_calibration/batch_entropy_100bins": 0.23913880523342096, "eval_calibration/batch_entropy_10bins": 0.4526846744320586, "eval_calibration/batch_entropy_50bins": 0.28151033228733163, "eval_calibration/batch_uniqueness": 0.22393147108541367, "eval_calibration/confidence_entropy": 0.6107200486093004, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.17032484635645304, "eval_calibration/coverage@30%": 0.17032484635645304, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.4526846744320586, "eval_calibration/distribution_entropy_100": 0.23913880523342096, "eval_calibration/ece": 0.06097453906935911, "eval_calibration/mean_confidence": 0.6855575065847235, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.009548611111111105, "eval_completions/max_length": 2007.0, "eval_completions/max_terminated_length": 2007.0, "eval_completions/mean_length": 630.4380289713541, "eval_completions/mean_terminated_length": 636.6086832682291, "eval_completions/min_length": 53.666666666666664, "eval_completions/min_terminated_length": 231.83333333333334, "eval_loss": 0.0, "eval_num_tokens": 196021611.0, "eval_reward": 1.2005070646603901, "eval_reward_std": 0.34502998491128284, "eval_rewards/accuracy_reward": 0.6475694477558136, "eval_rewards/brier_reward": 0.7647157112757365, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9887152711550394, "eval_rewards/mean_confidence_reward": 0.6778211891651154, "eval_runtime": 206.6087, "eval_samples_per_second": 4.84, "eval_signal/accuracy_reward/centered_abs_mean": 0.4409722189108531, "eval_signal/accuracy_reward/group_std_mean": 0.47597145040829975, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22048610945542654, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22048610945542654, "eval_signal/advantage_abs_mean": 0.31116824348767597, "eval_signal/advantage_pre_scale_abs_mean": 0.31116824348767597, "eval_signal/advantage_pre_scale_std": 0.34272391100724536, "eval_signal/advantage_std": 0.34272391100724536, "eval_signal/brier_reward/centered_abs_mean": 0.16785739362239838, "eval_signal/brier_reward/group_std_mean": 0.19804431994756064, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08392869681119919, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08392869681119919, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.02153862826526165, "eval_signal/format_reward/group_std_mean": 0.057330875347057976, "eval_signal/format_reward/group_zero_std_frac": 0.6944444676240286, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010769314132630825, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.010769314132630825, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.06253797250489394, "eval_signal/mean_confidence_reward/group_std_mean": 0.09316463147600491, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.253797171969685e-07, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 6.253797171969685e-07, "eval_steps_per_second": 0.029, "step": 100 }, { "epoch": 0.2403846153846154, "step": 100, "train_probe_calibration/aurc": 0.2912553971506111, "train_probe_calibration/batch_distribution_entropy": 0.4504727867061737, "train_probe_calibration/batch_entropy_100bins": 0.23646639818152504, "train_probe_calibration/batch_entropy_10bins": 0.4504727867061737, "train_probe_calibration/batch_entropy_50bins": 0.2783644179450236, "train_probe_calibration/batch_uniqueness": 0.21059908012963577, "train_probe_calibration/confidence_entropy": 0.6113843826161691, "train_probe_calibration/coverage@0%": 0.003524229074889868, "train_probe_calibration/coverage@1%": 0.003524229074889868, "train_probe_calibration/coverage@10%": 0.003524229074889868, "train_probe_calibration/coverage@15%": 0.003524229074889868, "train_probe_calibration/coverage@20%": 0.15859030837004406, "train_probe_calibration/coverage@25%": 0.16299559471365638, "train_probe_calibration/coverage@30%": 0.16299559471365638, "train_probe_calibration/coverage@5%": 0.003524229074889868, "train_probe_calibration/distribution_entropy_10": 0.4504727867061737, "train_probe_calibration/distribution_entropy_100": 0.23646639818152504, "train_probe_calibration/ece": 0.024537444933920727, "train_probe_calibration/mean_confidence": 0.6848017621145375, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.013020833333333334, "train_probe_completions/max_length": 1784.3333333333333, "train_probe_completions/max_terminated_length": 1784.3333333333333, "train_probe_completions/mean_length": 630.1005452473959, "train_probe_completions/mean_terminated_length": 638.4372863769531, "train_probe_completions/min_length": 31.0, "train_probe_completions/min_terminated_length": 199.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 196021611.0, "train_probe_reward": 1.2102249264717102, "train_probe_reward_std": 0.3475934366385142, "train_probe_rewards/accuracy_reward": 0.6631944477558136, "train_probe_rewards/brier_reward": 0.7719986935456594, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9852430522441864, "train_probe_rewards/mean_confidence_reward": 0.6746961772441864, "train_probe_runtime": 205.5697, "train_probe_samples_per_second": 4.865, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4342447866996129, "train_probe_signal/accuracy_reward/group_std_mean": 0.47248346110184986, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21712239334980646, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.21712239334980646, "train_probe_signal/advantage_abs_mean": 0.30893393854300183, "train_probe_signal/advantage_pre_scale_abs_mean": 0.30893393854300183, "train_probe_signal/advantage_pre_scale_std": 0.3456893265247345, "train_probe_signal/advantage_std": 0.3456893265247345, "train_probe_signal/brier_reward/centered_abs_mean": 0.16557522614796957, "train_probe_signal/brier_reward/group_std_mean": 0.1985683018962542, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08278761307398479, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08278761307398479, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.028157552083333332, "train_probe_signal/format_reward/group_std_mean": 0.0715202484279871, "train_probe_signal/format_reward/group_zero_std_frac": 0.6388889054457346, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014078776041666666, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.014078776041666666, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.06591253789762656, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.09998770679036777, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.59125400185682e-07, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 6.59125400185682e-07, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.26389656392228356, "calibration/batch_distribution_entropy": 0.45371543209103865, "calibration/batch_entropy_100bins": 0.23856088528938027, "calibration/batch_entropy_10bins": 0.45371543209103865, "calibration/batch_entropy_50bins": 0.28083001427987325, "calibration/batch_uniqueness": 0.20227893860033813, "calibration/confidence_entropy": 0.6076272761171853, "calibration/coverage@0%": 0.003161061382141975, "calibration/coverage@1%": 0.003161061382141975, "calibration/coverage@10%": 0.03545272804880864, "calibration/coverage@15%": 0.066505359627756, "calibration/coverage@20%": 0.1984970699244401, "calibration/coverage@25%": 0.442561798088433, "calibration/coverage@30%": 0.5094067178745292, "calibration/coverage@5%": 0.003161061382141975, "calibration/distribution_entropy_10": 0.45371543209103865, "calibration/distribution_entropy_100": 0.23856088528938027, "calibration/ece": 0.07547122674488144, "calibration/mean_confidence": 0.6852840604168908, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3383.4, "completions/max_terminated_length": 3383.4, "completions/mean_length": 634.3682373046875, "completions/mean_terminated_length": 642.1746948242187, "completions/min_length": 0.0, "completions/min_terminated_length": 176.6, "epoch": 0.25240384615384615, "grad_norm": 0.0006580045446753502, "learning_rate": 2.5240384615384618e-06, "loss": -0.0119, "num_tokens": 206421181.0, "reward": 1.2218866109848023, "reward_std": 0.19819111227989197, "rewards/accuracy_reward": 0.6784722208976746, "rewards/brier_reward": 0.7780476689338685, "rewards/confidence_one_or_zero": 0.0006076388992369175, "rewards/format_reward": 0.9872395873069764, "rewards/mean_confidence_reward": 0.6806759357452392, "sampling/batch_mean_priority_error": 0.011409722222222226, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.9, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0002556914696469903, "sampling/priority_kl": 0.030000254139304162, "sampling/priority_scale": 1.098377120634541, "sampling/prob_entropy": 10.278963851928712, "sampling/prob_max": 3.757354352273978e-05, "sampling/prob_min": 1.2527930266514886e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.24720000028610228, "sampling/prompt_draws_total": 7416.0, "sampling/seen_fraction": 0.23444666862487792, "sampling/unseen_fraction": 0.765553331375122, "signal/accuracy_reward/centered_abs_mean": 0.191796875, "signal/accuracy_reward/group_std_mean": 0.2503849893808365, "signal/accuracy_reward/group_zero_std_frac": 0.3000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0958984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0958984375, "signal/advantage_abs_mean": 0.14796263575553895, "signal/advantage_pre_scale_abs_mean": 0.14796263575553895, "signal/advantage_pre_scale_std": 0.23866211771965026, "signal/advantage_std": 0.23866211771965026, "signal/brier_reward/centered_abs_mean": 0.09807889461517334, "signal/brier_reward/group_std_mean": 0.12925267368555068, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04903944730758667, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04903944730758667, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011338975746184587, "signal/confidence_one_or_zero/group_std_mean": 0.002487868396565318, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.133897491456537e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.133897491456537e-08, "signal/format_reward/centered_abs_mean": 0.021641710214316844, "signal/format_reward/group_std_mean": 0.0401812169700861, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010820855107158422, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010820855107158422, "signal/mean_confidence_reward/centered_abs_mean": 0.0599391832947731, "signal/mean_confidence_reward/group_std_mean": 0.08279892951250076, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.993918307467538e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.993918307467538e-07, "step": 105 }, { "calibration/aurc": 0.26641149139303594, "calibration/batch_distribution_entropy": 0.4376444450609268, "calibration/batch_entropy_100bins": 0.22824844943430406, "calibration/batch_entropy_10bins": 0.4376444450609268, "calibration/batch_entropy_50bins": 0.26869038164509174, "calibration/batch_uniqueness": 0.15933272420322098, "calibration/confidence_entropy": 0.5984371837654384, "calibration/coverage@0%": 0.0031290796344647515, "calibration/coverage@1%": 0.0031290796344647515, "calibration/coverage@10%": 0.0031290796344647515, "calibration/coverage@15%": 0.1655629420683272, "calibration/coverage@20%": 0.3696741775111553, "calibration/coverage@25%": 0.5557291666666667, "calibration/coverage@30%": 0.6296296296296295, "calibration/coverage@5%": 0.0031290796344647515, "calibration/distribution_entropy_10": 0.4376444450609268, "calibration/distribution_entropy_100": 0.22824844943430406, "calibration/ece": 0.10459396096814946, "calibration/mean_confidence": 0.6990674712288072, "calibration/unique_confidence_per_question": 0.0171875, "calibration/unique_confidences": 6.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012934027777777768, "completions/max_length": 3801.6, "completions/max_terminated_length": 3801.6, "completions/mean_length": 622.4458251953125, "completions/mean_terminated_length": 630.5509155273437, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.2644230769230769, "grad_norm": 0.0007942788652144372, "learning_rate": 2.6442307692307696e-06, "loss": -0.0128, "num_tokens": 216648717.0, "reward": 1.2171909093856812, "reward_std": 0.19215842187404633, "rewards/accuracy_reward": 0.670312511920929, "rewards/brier_reward": 0.777683699131012, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9863715291023254, "rewards/mean_confidence_reward": 0.6971328139305115, "sampling/batch_mean_priority_error": 0.010491319444444447, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8666666666666666, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00026847636327147483, "sampling/priority_kl": 0.029999426752328872, "sampling/priority_scale": 1.0725965857040136, "sampling/prob_entropy": 10.278949737548828, "sampling/prob_max": 3.771020783460699e-05, "sampling/prob_min": 1.2901787158625666e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.2592000007629395, "sampling/prompt_draws_total": 7776.0, "sampling/seen_fraction": 0.24499333202838897, "sampling/unseen_fraction": 0.755006667971611, "signal/accuracy_reward/centered_abs_mean": 0.17727864384651185, "signal/accuracy_reward/group_std_mean": 0.23305801749229432, "signal/accuracy_reward/group_zero_std_frac": 0.34722222089767457, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08863932192325592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08863932192325592, "signal/advantage_abs_mean": 0.14164598286151886, "signal/advantage_pre_scale_abs_mean": 0.14164598286151886, "signal/advantage_pre_scale_std": 0.2351542145013809, "signal/advantage_std": 0.2351542145013809, "signal/brier_reward/centered_abs_mean": 0.09551279246807098, "signal/brier_reward/group_std_mean": 0.12876519858837127, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04775639623403549, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04775639623403549, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.023551432229578496, "signal/format_reward/group_std_mean": 0.04473615065217018, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011775716114789248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011775716114789248, "signal/mean_confidence_reward/centered_abs_mean": 0.057459690421819684, "signal/mean_confidence_reward/group_std_mean": 0.0822980672121048, "signal/mean_confidence_reward/group_zero_std_frac": 0.002777777798473835, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.745968792325584e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.745968792325584e-07, "step": 110 }, { "calibration/aurc": 0.24452289289161647, "calibration/batch_distribution_entropy": 0.3926279866114429, "calibration/batch_entropy_100bins": 0.20270615798497577, "calibration/batch_entropy_10bins": 0.3926279866114429, "calibration/batch_entropy_50bins": 0.23862240942175567, "calibration/batch_uniqueness": 0.027939809524415986, "calibration/confidence_entropy": 0.5924180543458887, "calibration/coverage@0%": 0.0015950943282864696, "calibration/coverage@1%": 0.0015950943282864696, "calibration/coverage@10%": 0.049489831170391727, "calibration/coverage@15%": 0.049489831170391727, "calibration/coverage@20%": 0.26600106589988565, "calibration/coverage@25%": 0.6319309239288837, "calibration/coverage@30%": 0.6708350335179248, "calibration/coverage@5%": 0.049489831170391727, "calibration/distribution_entropy_10": 0.3926279866114429, "calibration/distribution_entropy_100": 0.20270615798497577, "calibration/ece": 0.07019371319484866, "calibration/mean_confidence": 0.7079467487920142, "calibration/unique_confidence_per_question": 0.01614583333333333, "calibration/unique_confidences": 6.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019010416666666675, "completions/max_length": 3956.6, "completions/max_terminated_length": 3956.6, "completions/mean_length": 638.0852416992187, "completions/mean_terminated_length": 650.6302490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 160.8, "epoch": 0.2764423076923077, "grad_norm": 0.0006896952399984002, "learning_rate": 2.7644230769230775e-06, "loss": -0.0187, "num_tokens": 227081667.0, "reward": 1.2036991119384766, "reward_std": 0.19953635036945344, "rewards/accuracy_reward": 0.6560763955116272, "rewards/brier_reward": 0.7710996150970459, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9802083253860474, "rewards/mean_confidence_reward": 0.6872612833976746, "sampling/batch_mean_priority_error": 0.011953125000000004, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8805555555555555, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00028192729223519564, "sampling/priority_kl": 0.029999881237745284, "sampling/priority_scale": 1.0489887594711036, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 3.784664804697968e-05, "sampling/prob_min": 1.3257731006888207e-05, "sampling/prompt_draws_max": 3.0, "sampling/prompt_draws_mean": 0.2712000012397766, "sampling/prompt_draws_total": 8136.0, "sampling/seen_fraction": 0.2555199980735779, "sampling/unseen_fraction": 0.7444800019264222, "signal/accuracy_reward/centered_abs_mean": 0.18827039897441863, "signal/accuracy_reward/group_std_mean": 0.24098731875419616, "signal/accuracy_reward/group_zero_std_frac": 0.3416666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09413519948720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09413519948720932, "signal/advantage_abs_mean": 0.15093255639076233, "signal/advantage_pre_scale_abs_mean": 0.15093255639076233, "signal/advantage_pre_scale_std": 0.24881745576858522, "signal/advantage_std": 0.24881745576858522, "signal/brier_reward/centered_abs_mean": 0.09960789233446121, "signal/brier_reward/group_std_mean": 0.131089448928833, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.049803946167230606, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.049803946167230606, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.03161892369389534, "signal/format_reward/group_std_mean": 0.05357316210865974, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01580946184694767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01580946184694767, "signal/mean_confidence_reward/centered_abs_mean": 0.05904405862092972, "signal/mean_confidence_reward/group_std_mean": 0.08394580483436584, "signal/mean_confidence_reward/group_zero_std_frac": 0.00555555559694767, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.904405384171696e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.904405384171696e-07, "step": 115 }, { "calibration/aurc": 0.29914037716218794, "calibration/batch_distribution_entropy": 0.3724276398058252, "calibration/batch_entropy_100bins": 0.1926613136182875, "calibration/batch_entropy_10bins": 0.3724276398058252, "calibration/batch_entropy_50bins": 0.22679778115751045, "calibration/batch_uniqueness": -0.008368241406920666, "calibration/confidence_entropy": 0.6073866082278495, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.02888283378746594, "calibration/coverage@15%": 0.07471616712079927, "calibration/coverage@20%": 0.1263931959166108, "calibration/coverage@25%": 0.1390916086150235, "calibration/coverage@30%": 0.4805212748275576, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.3724276398058252, "calibration/distribution_entropy_100": 0.1926613136182875, "calibration/ece": 0.06616926435315493, "calibration/mean_confidence": 0.6935359204897231, "calibration/unique_confidence_per_question": 0.011979166666666666, "calibration/unique_confidences": 4.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01440972222222221, "completions/max_length": 3441.8, "completions/max_terminated_length": 3441.8, "completions/mean_length": 615.9207641601563, "completions/mean_terminated_length": 625.0067504882812, "completions/min_length": 0.0, "completions/min_terminated_length": 181.8, "epoch": 0.28846153846153844, "grad_norm": 0.0009943849872797728, "learning_rate": 2.8846153846153845e-06, "loss": -0.0133, "num_tokens": 237272690.0, "reward": 1.2263082265853882, "reward_std": 0.18344857096672057, "rewards/accuracy_reward": 0.6831597208976745, "rewards/brier_reward": 0.78411306142807, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9853298544883728, "rewards/mean_confidence_reward": 0.6838411569595337, "sampling/batch_mean_priority_error": 0.013315972222222222, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8416666666666668, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0002971566456835717, "sampling/priority_kl": 0.029999834671616554, "sampling/priority_scale": 1.0274231314193458, "sampling/prob_entropy": 10.278933334350587, "sampling/prob_max": 3.79780241928529e-05, "sampling/prob_min": 1.2208350563014392e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.28320000171661375, "sampling/prompt_draws_total": 8496.0, "sampling/seen_fraction": 0.26567999720573426, "sampling/unseen_fraction": 0.7343200027942658, "signal/accuracy_reward/centered_abs_mean": 0.17330729067325593, "signal/accuracy_reward/group_std_mean": 0.22918421924114227, "signal/accuracy_reward/group_zero_std_frac": 0.347222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08665364533662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08665364533662796, "signal/advantage_abs_mean": 0.13476323038339616, "signal/advantage_pre_scale_abs_mean": 0.13476323038339616, "signal/advantage_pre_scale_std": 0.22975938022136688, "signal/advantage_std": 0.22975938022136688, "signal/brier_reward/centered_abs_mean": 0.08760800659656524, "signal/brier_reward/group_std_mean": 0.11845918893814086, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04380400329828262, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04380400329828262, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.02306315116584301, "signal/format_reward/group_std_mean": 0.03991239666938782, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011531575582921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011531575582921505, "signal/mean_confidence_reward/centered_abs_mean": 0.051366379112005235, "signal/mean_confidence_reward/group_std_mean": 0.07370935529470443, "signal/mean_confidence_reward/group_zero_std_frac": 0.002777777798473835, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.136637696523394e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.136637696523394e-07, "step": 120 }, { "calibration/aurc": 0.28633294767991674, "calibration/batch_distribution_entropy": 0.4130528797346272, "calibration/batch_entropy_100bins": 0.21892998572305739, "calibration/batch_entropy_10bins": 0.4130528797346272, "calibration/batch_entropy_50bins": 0.2577208369356918, "calibration/batch_uniqueness": 0.10911798642188608, "calibration/confidence_entropy": 0.6022333939945146, "calibration/coverage@0%": 0.0005208333333333333, "calibration/coverage@1%": 0.0005208333333333333, "calibration/coverage@10%": 0.022452948215839864, "calibration/coverage@15%": 0.0445582113737346, "calibration/coverage@20%": 0.1449467695747928, "calibration/coverage@25%": 0.43465486633249795, "calibration/coverage@30%": 0.46481359649122805, "calibration/coverage@5%": 0.0005208333333333333, "calibration/distribution_entropy_10": 0.4130528797346272, "calibration/distribution_entropy_100": 0.21892998572305739, "calibration/ece": 0.08499723916874016, "calibration/mean_confidence": 0.696090644907764, "calibration/unique_confidence_per_question": 0.014583333333333334, "calibration/unique_confidences": 5.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013888888888888885, "completions/max_length": 3665.8, "completions/max_terminated_length": 3665.8, "completions/mean_length": 659.197314453125, "completions/mean_terminated_length": 668.5060913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 202.4, "epoch": 0.3004807692307692, "grad_norm": 0.0009211119031533599, "learning_rate": 3.0048076923076923e-06, "loss": -0.0125, "num_tokens": 248003059.0, "reward": 1.1965364456176757, "reward_std": 0.19677658081054689, "rewards/accuracy_reward": 0.6400173664093017, "rewards/brier_reward": 0.7674516320228577, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9855902671813965, "rewards/mean_confidence_reward": 0.6775911569595336, "sampling/batch_mean_priority_error": 0.01447222222222222, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8083333333333333, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0003136721788905561, "sampling/priority_kl": 0.02999979183077812, "sampling/priority_scale": 1.0074425875907764, "sampling/prob_entropy": 10.27892837524414, "sampling/prob_max": 3.810692724073306e-05, "sampling/prob_min": 1.2523106124717743e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.29520000219345094, "sampling/prompt_draws_total": 8856.0, "sampling/seen_fraction": 0.27564666271209715, "sampling/unseen_fraction": 0.7243533372879029, "signal/accuracy_reward/centered_abs_mean": 0.19204101264476775, "signal/accuracy_reward/group_std_mean": 0.24852104783058165, "signal/accuracy_reward/group_zero_std_frac": 0.3083333343267441, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09602050632238388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09602050632238388, "signal/advantage_abs_mean": 0.14804043769836425, "signal/advantage_pre_scale_abs_mean": 0.14804043769836425, "signal/advantage_pre_scale_std": 0.23920418024063111, "signal/advantage_std": 0.23920418024063111, "signal/brier_reward/centered_abs_mean": 0.0960723802447319, "signal/brier_reward/group_std_mean": 0.1261401355266571, "signal/brier_reward/group_zero_std_frac": 0.008333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04803619012236595, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04803619012236595, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.023676214925944804, "signal/format_reward/group_std_mean": 0.042564719542860986, "signal/format_reward/group_zero_std_frac": 0.8305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011838107462972402, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011838107462972402, "signal/mean_confidence_reward/centered_abs_mean": 0.05564128011465073, "signal/mean_confidence_reward/group_std_mean": 0.07778785824775696, "signal/mean_confidence_reward/group_zero_std_frac": 0.013888888992369175, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.564127889101656e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.564127889101656e-07, "step": 125 }, { "calibration/aurc": 0.2770300314859839, "calibration/batch_distribution_entropy": 0.4395422044627269, "calibration/batch_entropy_100bins": 0.24320209729782186, "calibration/batch_entropy_10bins": 0.4395422044627269, "calibration/batch_entropy_50bins": 0.2862935739620296, "calibration/batch_uniqueness": 0.21519033964356288, "calibration/confidence_entropy": 0.6144014297973173, "calibration/coverage@0%": 0.0005305039787798408, "calibration/coverage@1%": 0.0005305039787798408, "calibration/coverage@10%": 0.056487505235236636, "calibration/coverage@15%": 0.18665173868483526, "calibration/coverage@20%": 0.36770437026378266, "calibration/coverage@25%": 0.4574792010415144, "calibration/coverage@30%": 0.7263157894736842, "calibration/coverage@5%": 0.035267346084043, "calibration/distribution_entropy_10": 0.4395422044627269, "calibration/distribution_entropy_100": 0.24320209729782186, "calibration/ece": 0.1087024526647045, "calibration/mean_confidence": 0.6762298586141063, "calibration/unique_confidence_per_question": 0.015104166666666665, "calibration/unique_confidences": 5.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01857638888888886, "completions/max_length": 3667.8, "completions/max_terminated_length": 3667.8, "completions/mean_length": 665.5612915039062, "completions/mean_terminated_length": 678.1058227539063, "completions/min_length": 0.0, "completions/min_terminated_length": 205.8, "epoch": 0.3125, "grad_norm": 0.0007662621792405844, "learning_rate": 3.125e-06, "loss": -0.0174, "num_tokens": 258767733.0, "reward": 1.2184621334075927, "reward_std": 0.18495941460132598, "rewards/accuracy_reward": 0.6723090171813965, "rewards/brier_reward": 0.783264982700348, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.981336796283722, "rewards/mean_confidence_reward": 0.664318585395813, "sampling/batch_mean_priority_error": 0.015236111111111112, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8055555555555556, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00033118717256002127, "sampling/priority_kl": 0.03000081516802311, "sampling/priority_scale": 0.9889760912163184, "sampling/prob_entropy": 10.27894229888916, "sampling/prob_max": 3.823187798843719e-05, "sampling/prob_min": 1.2823050019505899e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3072000026702881, "sampling/prompt_draws_total": 9216.0, "sampling/seen_fraction": 0.28531333804130554, "sampling/unseen_fraction": 0.7146866619586945, "signal/accuracy_reward/centered_abs_mean": 0.17028537392616272, "signal/accuracy_reward/group_std_mean": 0.22526440620422364, "signal/accuracy_reward/group_zero_std_frac": 0.3555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08514268696308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08514268696308136, "signal/advantage_abs_mean": 0.13408068269491197, "signal/advantage_pre_scale_abs_mean": 0.13408068269491197, "signal/advantage_pre_scale_std": 0.22951894402503967, "signal/advantage_std": 0.22951894402503967, "signal/brier_reward/centered_abs_mean": 0.08635973781347275, "signal/brier_reward/group_std_mean": 0.11832628399133682, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04317986890673638, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04317986890673638, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.03133138045668602, "signal/format_reward/group_std_mean": 0.055094408243894576, "signal/format_reward/group_zero_std_frac": 0.7888888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01566569022834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01566569022834301, "signal/mean_confidence_reward/centered_abs_mean": 0.06074300110340118, "signal/mean_confidence_reward/group_std_mean": 0.0846795991063118, "signal/mean_confidence_reward/group_zero_std_frac": 0.00555555559694767, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.074300131331257e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.074300131331257e-07, "step": 130 }, { "calibration/aurc": 0.3045633905704531, "calibration/batch_distribution_entropy": 0.4904286109595712, "calibration/batch_entropy_100bins": 0.27985768204534994, "calibration/batch_entropy_10bins": 0.4904286109595712, "calibration/batch_entropy_50bins": 0.3294439352444273, "calibration/batch_uniqueness": 0.3409336534741945, "calibration/confidence_entropy": 0.612664591868816, "calibration/coverage@0%": 0.0010471275946903505, "calibration/coverage@1%": 0.0010471275946903505, "calibration/coverage@10%": 0.04461668140046463, "calibration/coverage@15%": 0.12446196389804717, "calibration/coverage@20%": 0.25337443074154853, "calibration/coverage@25%": 0.3707428517941801, "calibration/coverage@30%": 0.5226702799638021, "calibration/coverage@5%": 0.0010471275946903505, "calibration/distribution_entropy_10": 0.4904286109595712, "calibration/distribution_entropy_100": 0.27985768204534994, "calibration/ece": 0.1163251270119496, "calibration/mean_confidence": 0.6686119593374754, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018576388888888885, "completions/max_length": 3678.8, "completions/max_terminated_length": 3678.8, "completions/mean_length": 694.5222412109375, "completions/mean_terminated_length": 707.6533081054688, "completions/min_length": 0.0, "completions/min_terminated_length": 206.8, "epoch": 0.3245192307692308, "grad_norm": 0.0012281141243875027, "learning_rate": 3.245192307692308e-06, "loss": -0.0183, "num_tokens": 269858293.0, "reward": 1.2133918523788452, "reward_std": 0.19259853959083556, "rewards/accuracy_reward": 0.6637152910232544, "rewards/brier_reward": 0.7818053007125855, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.981249988079071, "rewards/mean_confidence_reward": 0.6537803769111633, "sampling/batch_mean_priority_error": 0.014315972222222223, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8055555555555556, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00034877662546932696, "sampling/priority_kl": 0.030000032857060432, "sampling/priority_scale": 0.9717868029838428, "sampling/prob_entropy": 10.278975677490234, "sampling/prob_max": 3.8358993333531546e-05, "sampling/prob_min": 1.3112282431393396e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.31919999718666076, "sampling/prompt_draws_total": 9576.0, "sampling/seen_fraction": 0.29510666728019713, "sampling/unseen_fraction": 0.7048933327198028, "signal/accuracy_reward/centered_abs_mean": 0.17950303554534913, "signal/accuracy_reward/group_std_mean": 0.23662585020065308, "signal/accuracy_reward/group_zero_std_frac": 0.33055556416511533, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08975151777267457, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08975151777267457, "signal/advantage_abs_mean": 0.14017547965049743, "signal/advantage_pre_scale_abs_mean": 0.14017547965049743, "signal/advantage_pre_scale_std": 0.23439024090766908, "signal/advantage_std": 0.23439024090766908, "signal/brier_reward/centered_abs_mean": 0.08922516703605651, "signal/brier_reward/group_std_mean": 0.12257051169872284, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.044612583518028257, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.044612583518028257, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.03119574598968029, "signal/format_reward/group_std_mean": 0.05646200627088547, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015597872994840145, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015597872994840145, "signal/mean_confidence_reward/centered_abs_mean": 0.0654410794377327, "signal/mean_confidence_reward/group_std_mean": 0.09104419052600861, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.544107804984378e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.544107804984378e-07, "step": 135 }, { "calibration/aurc": 0.18749490988916556, "calibration/batch_distribution_entropy": 0.5415738700817532, "calibration/batch_entropy_100bins": 0.32182948863917904, "calibration/batch_entropy_10bins": 0.5415738700817532, "calibration/batch_entropy_50bins": 0.37885246686854196, "calibration/batch_uniqueness": 0.41862065299611595, "calibration/confidence_entropy": 0.5883915406218511, "calibration/coverage@0%": 0.008034688346883468, "calibration/coverage@1%": 0.008034688346883468, "calibration/coverage@10%": 0.1927743034213184, "calibration/coverage@15%": 0.2530638476572433, "calibration/coverage@20%": 0.5535869100406655, "calibration/coverage@25%": 0.7756670084830667, "calibration/coverage@30%": 0.9135496445544167, "calibration/coverage@5%": 0.09550135501355014, "calibration/distribution_entropy_10": 0.5415738700817532, "calibration/distribution_entropy_100": 0.32182948863917904, "calibration/ece": 0.07815413711451906, "calibration/mean_confidence": 0.6764555789425304, "calibration/unique_confidence_per_question": 0.02395833333333333, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028038194444444463, "completions/max_length": 3858.4, "completions/max_terminated_length": 3858.4, "completions/mean_length": 683.644970703125, "completions/mean_terminated_length": 703.34462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 201.8, "epoch": 0.33653846153846156, "grad_norm": 0.0006961016333661973, "learning_rate": 3.365384615384616e-06, "loss": -0.0251, "num_tokens": 280792859.0, "reward": 1.236612319946289, "reward_std": 0.19623805284500123, "rewards/accuracy_reward": 0.7059027791023255, "rewards/brier_reward": 0.7958673954010009, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9714409589767456, "rewards/mean_confidence_reward": 0.6710633635520935, "sampling/batch_mean_priority_error": 0.011197916666666667, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.825, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.00036346286069601776, "sampling/priority_kl": 0.030000948533415794, "sampling/priority_scale": 0.9559410989983007, "sampling/prob_entropy": 10.278955841064453, "sampling/prob_max": 3.8486441917484625e-05, "sampling/prob_min": 1.3388117440626957e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3312000036239624, "sampling/prompt_draws_total": 9936.0, "sampling/seen_fraction": 0.30485999584198, "sampling/unseen_fraction": 0.69514000415802, "signal/accuracy_reward/centered_abs_mean": 0.17124565839767455, "signal/accuracy_reward/group_std_mean": 0.22862593531608583, "signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08562282919883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08562282919883728, "signal/advantage_abs_mean": 0.14275099337100983, "signal/advantage_pre_scale_abs_mean": 0.14275099337100983, "signal/advantage_pre_scale_std": 0.24261143207550048, "signal/advantage_std": 0.24261143207550048, "signal/brier_reward/centered_abs_mean": 0.09853724390268326, "signal/brier_reward/group_std_mean": 0.1336524322628975, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04926862195134163, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04926862195134163, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.04031575620174408, "signal/format_reward/group_std_mean": 0.06712066158652305, "signal/format_reward/group_zero_std_frac": 0.7472222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02015787810087204, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02015787810087204, "signal/mean_confidence_reward/centered_abs_mean": 0.07758951485157013, "signal/mean_confidence_reward/group_std_mean": 0.10319010466337204, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.758951369396527e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.758951369396527e-07, "step": 140 }, { "calibration/aurc": 0.22960033807496577, "calibration/batch_distribution_entropy": 0.4959596321391365, "calibration/batch_entropy_100bins": 0.2828537847475544, "calibration/batch_entropy_10bins": 0.4959596321391365, "calibration/batch_entropy_50bins": 0.3329708988689269, "calibration/batch_uniqueness": 0.29531464718269096, "calibration/confidence_entropy": 0.5656490610797936, "calibration/coverage@0%": 0.013919308526411784, "calibration/coverage@1%": 0.013919308526411784, "calibration/coverage@10%": 0.013919308526411784, "calibration/coverage@15%": 0.21862042889332853, "calibration/coverage@20%": 0.45811751394931627, "calibration/coverage@25%": 0.793972602739726, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.013919308526411784, "calibration/distribution_entropy_10": 0.4959596321391365, "calibration/distribution_entropy_100": 0.2828537847475544, "calibration/ece": 0.07045841533199215, "calibration/mean_confidence": 0.7150147551275411, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02161458333333337, "completions/max_length": 3232.6, "completions/max_terminated_length": 3232.6, "completions/mean_length": 680.2881103515625, "completions/mean_terminated_length": 695.2589965820313, "completions/min_length": 0.0, "completions/min_terminated_length": 211.4, "epoch": 0.3485576923076923, "grad_norm": 0.0007520325016230345, "learning_rate": 3.4855769230769233e-06, "loss": -0.0219, "num_tokens": 291734930.0, "reward": 1.227646541595459, "reward_std": 0.1878492623567581, "rewards/accuracy_reward": 0.6833333373069763, "rewards/brier_reward": 0.7935603260993958, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9783854126930237, "rewards/mean_confidence_reward": 0.697022533416748, "sampling/batch_mean_priority_error": 0.010859374999999998, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8111111111111111, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0003761312167625874, "sampling/priority_kl": 0.02999988608062267, "sampling/priority_scale": 0.94111793639604, "sampling/prob_entropy": 10.278964805603028, "sampling/prob_max": 3.8614498043898494e-05, "sampling/prob_min": 1.3654396025231107e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3431999981403351, "sampling/prompt_draws_total": 10296.0, "sampling/seen_fraction": 0.31462000012397767, "sampling/unseen_fraction": 0.6853799998760224, "signal/accuracy_reward/centered_abs_mean": 0.16524522602558137, "signal/accuracy_reward/group_std_mean": 0.2168790102005005, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08262261301279068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08262261301279068, "signal/advantage_abs_mean": 0.13840331137180328, "signal/advantage_pre_scale_abs_mean": 0.13840331137180328, "signal/advantage_pre_scale_std": 0.23685503900051116, "signal/advantage_std": 0.23685503900051116, "signal/brier_reward/centered_abs_mean": 0.099394890666008, "signal/brier_reward/group_std_mean": 0.13434166312217713, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.049697445333004, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.049697445333004, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.03282877579331398, "signal/format_reward/group_std_mean": 0.057800959795713425, "signal/format_reward/group_zero_std_frac": 0.7722222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01641438789665699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01641438789665699, "signal/mean_confidence_reward/centered_abs_mean": 0.07129204124212266, "signal/mean_confidence_reward/group_std_mean": 0.09770053625106812, "signal/mean_confidence_reward/group_zero_std_frac": 0.002777777798473835, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.129204050215776e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.129204050215776e-07, "step": 145 }, { "calibration/aurc": 0.22222502440396444, "calibration/batch_distribution_entropy": 0.44819059016372265, "calibration/batch_entropy_100bins": 0.26214575431917136, "calibration/batch_entropy_10bins": 0.44819059016372265, "calibration/batch_entropy_50bins": 0.3085937405017596, "calibration/batch_uniqueness": 0.12955742353336192, "calibration/confidence_entropy": 0.5445938599239366, "calibration/coverage@0%": 0.003794037940379404, "calibration/coverage@1%": 0.003794037940379404, "calibration/coverage@10%": 0.01745031027333772, "calibration/coverage@15%": 0.16208621165564271, "calibration/coverage@20%": 0.1759586285469275, "calibration/coverage@25%": 0.7342802841846832, "calibration/coverage@30%": 0.8715341334626141, "calibration/coverage@5%": 0.003794037940379404, "calibration/distribution_entropy_10": 0.44819059016372265, "calibration/distribution_entropy_100": 0.26214575431917136, "calibration/ece": 0.06575164506779053, "calibration/mean_confidence": 0.7346546095304988, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024131944444444442, "completions/max_length": 3942.8, "completions/max_terminated_length": 3942.8, "completions/mean_length": 710.7791748046875, "completions/mean_terminated_length": 728.4054809570313, "completions/min_length": 0.0, "completions/min_terminated_length": 218.8, "epoch": 0.3605769230769231, "grad_norm": 0.0008258689776994288, "learning_rate": 3.605769230769231e-06, "loss": -0.0233, "num_tokens": 303037570.0, "reward": 1.235294795036316, "reward_std": 0.18425678908824922, "rewards/accuracy_reward": 0.6968750119209289, "rewards/brier_reward": 0.7978322505950928, "rewards/confidence_one_or_zero": 0.00026041667442768814, "rewards/format_reward": 0.9758680462837219, "rewards/mean_confidence_reward": 0.7146310806274414, "sampling/batch_mean_priority_error": 0.008072916666666667, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7638888888888888, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0003874113957863301, "sampling/priority_kl": 0.030000269412994385, "sampling/priority_scale": 0.9272584259742871, "sampling/prob_entropy": 10.278953742980956, "sampling/prob_max": 3.8738126750104127e-05, "sampling/prob_min": 1.3909414155932609e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.35520000457763673, "sampling/prompt_draws_total": 10656.0, "sampling/seen_fraction": 0.3240066647529602, "sampling/unseen_fraction": 0.6759933352470398, "signal/accuracy_reward/centered_abs_mean": 0.15432942807674407, "signal/accuracy_reward/group_std_mean": 0.20061970353126526, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07716471403837204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07716471403837204, "signal/advantage_abs_mean": 0.13734771311283112, "signal/advantage_pre_scale_abs_mean": 0.13734771311283112, "signal/advantage_pre_scale_std": 0.24063621163368226, "signal/advantage_std": 0.24063621163368226, "signal/brier_reward/centered_abs_mean": 0.10788649320602417, "signal/brier_reward/group_std_mean": 0.14216929376125337, "signal/brier_reward/group_zero_std_frac": 0.008333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.053943246603012085, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.053943246603012085, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572761446238, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.03764105886220932, "signal/format_reward/group_std_mean": 0.059189148247241974, "signal/format_reward/group_zero_std_frac": 0.794444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01882052943110466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01882052943110466, "signal/mean_confidence_reward/centered_abs_mean": 0.07287055253982544, "signal/mean_confidence_reward/group_std_mean": 0.0999222919344902, "signal/mean_confidence_reward/group_zero_std_frac": 0.008333333395421505, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.28705470010027e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.28705470010027e-07, "step": 150 }, { "epoch": 0.3605769230769231, "eval_calibration/aurc": 0.26528970355274367, "eval_calibration/batch_distribution_entropy": 0.45167656638554243, "eval_calibration/batch_entropy_100bins": 0.2686325582166219, "eval_calibration/batch_entropy_10bins": 0.45167656638554243, "eval_calibration/batch_entropy_50bins": 0.3162299010942306, "eval_calibration/batch_uniqueness": 0.07833556234382769, "eval_calibration/confidence_entropy": 0.5360695238200753, "eval_calibration/coverage@0%": 0.0008787346221441124, "eval_calibration/coverage@1%": 0.0008787346221441124, "eval_calibration/coverage@10%": 0.0008787346221441124, "eval_calibration/coverage@15%": 0.0008787346221441124, "eval_calibration/coverage@20%": 0.0008787346221441124, "eval_calibration/coverage@25%": 0.04569420035149385, "eval_calibration/coverage@30%": 0.8611599297012302, "eval_calibration/coverage@5%": 0.0008787346221441124, "eval_calibration/distribution_entropy_10": 0.45167656638554243, "eval_calibration/distribution_entropy_100": 0.2686325582166219, "eval_calibration/ece": 0.06616871704745168, "eval_calibration/mean_confidence": 0.7304920913884007, "eval_calibration/unique_confidence_per_question": 0.010416666666666666, "eval_calibration/unique_confidences": 12, "eval_completions/clipped_ratio": 0.01128472222222221, "eval_completions/max_length": 2444.5, "eval_completions/max_terminated_length": 2444.5, "eval_completions/mean_length": 721.1014099121094, "eval_completions/mean_terminated_length": 729.3672587076823, "eval_completions/min_length": 61.333333333333336, "eval_completions/min_terminated_length": 276.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 303037570.0, "eval_reward": 1.213831086953481, "eval_reward_std": 0.362030953168869, "eval_rewards/accuracy_reward": 0.6562500099341074, "eval_rewards/brier_reward": 0.783550351858139, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.987847218910853, "eval_rewards/mean_confidence_reward": 0.7216145892937978, "eval_runtime": 204.1612, "eval_samples_per_second": 4.898, "eval_signal/accuracy_reward/centered_abs_mean": 0.4382595469554265, "eval_signal/accuracy_reward/group_std_mean": 0.4749273806810379, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21912977347771326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21912977347771326, "eval_signal/advantage_abs_mean": 0.32130875686804455, "eval_signal/advantage_pre_scale_abs_mean": 0.32130875686804455, "eval_signal/advantage_pre_scale_std": 0.3592623174190521, "eval_signal/advantage_std": 0.3592623174190521, "eval_signal/brier_reward/centered_abs_mean": 0.20470946778853735, "eval_signal/brier_reward/group_std_mean": 0.24981755266586939, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10235473389426868, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.10235473389426868, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.023328993003815413, "eval_signal/format_reward/group_std_mean": 0.06276767483601968, "eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496501907706, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.011664496501907706, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.11676974222064018, "eval_signal/mean_confidence_reward/group_std_mean": 0.15584261218706766, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.1676973485919007e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.1676973485919007e-06, "eval_steps_per_second": 0.029, "step": 150 }, { "epoch": 0.3605769230769231, "step": 150, "train_probe_calibration/aurc": 0.22968415288321048, "train_probe_calibration/batch_distribution_entropy": 0.4644610670848097, "train_probe_calibration/batch_entropy_100bins": 0.2657689272124461, "train_probe_calibration/batch_entropy_10bins": 0.4644610670848097, "train_probe_calibration/batch_entropy_50bins": 0.31285888100927634, "train_probe_calibration/batch_uniqueness": 0.0961154339441112, "train_probe_calibration/confidence_entropy": 0.5351932847465817, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.0, "train_probe_calibration/coverage@15%": 0.0, "train_probe_calibration/coverage@20%": 0.0, "train_probe_calibration/coverage@25%": 0.782608695652174, "train_probe_calibration/coverage@30%": 0.9804791481810116, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.4644610670848097, "train_probe_calibration/distribution_entropy_100": 0.2657689272124461, "train_probe_calibration/ece": 0.047249334516415234, "train_probe_calibration/mean_confidence": 0.7368677905944986, "train_probe_calibration/unique_confidence_per_question": 0.009548611111111112, "train_probe_calibration/unique_confidences": 11, "train_probe_completions/clipped_ratio": 0.019965277777777773, "train_probe_completions/max_length": 2151.1666666666665, "train_probe_completions/max_terminated_length": 2151.1666666666665, "train_probe_completions/mean_length": 720.7236124674479, "train_probe_completions/mean_terminated_length": 735.2484944661459, "train_probe_completions/min_length": 41.333333333333336, "train_probe_completions/min_terminated_length": 247.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 303037570.0, "train_probe_reward": 1.2241229216257732, "train_probe_reward_std": 0.37155655523141223, "train_probe_rewards/accuracy_reward": 0.6805555522441864, "train_probe_rewards/brier_reward": 0.7893771429856619, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9782986044883728, "train_probe_rewards/mean_confidence_reward": 0.7208767135938009, "train_probe_runtime": 206.6309, "train_probe_samples_per_second": 4.84, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4214409738779068, "train_probe_signal/accuracy_reward/group_std_mean": 0.46496439973513287, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2107204869389534, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2107204869389534, "train_probe_signal/advantage_abs_mean": 0.31965141991774243, "train_probe_signal/advantage_pre_scale_abs_mean": 0.31965141991774243, "train_probe_signal/advantage_pre_scale_std": 0.36928872764110565, "train_probe_signal/advantage_std": 0.36928872764110565, "train_probe_signal/brier_reward/centered_abs_mean": 0.20193182677030563, "train_probe_signal/brier_reward/group_std_mean": 0.2502300019065539, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10096591338515282, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.10096591338515282, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.04150390625, "train_probe_signal/format_reward/group_std_mean": 0.1078145479162534, "train_probe_signal/format_reward/group_zero_std_frac": 0.4444444552063942, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.020751953125, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.020751953125, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.11885850255688031, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.16483317812283835, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.1885849744430743e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.1885849744430743e-06, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.2557523680414625, "calibration/batch_distribution_entropy": 0.4446285204684316, "calibration/batch_entropy_100bins": 0.2549144802425309, "calibration/batch_entropy_10bins": 0.4446285204684316, "calibration/batch_entropy_50bins": 0.30008120165977276, "calibration/batch_uniqueness": 0.06078989133921503, "calibration/confidence_entropy": 0.539813055658638, "calibration/coverage@0%": 0.009716893674677317, "calibration/coverage@1%": 0.009716893674677317, "calibration/coverage@10%": 0.009716893674677317, "calibration/coverage@15%": 0.018093857025462658, "calibration/coverage@20%": 0.3081061517111193, "calibration/coverage@25%": 0.48860219392747817, "calibration/coverage@30%": 0.8685701872786454, "calibration/coverage@5%": 0.009716893674677317, "calibration/distribution_entropy_10": 0.4446285204684316, "calibration/distribution_entropy_100": 0.2549144802425309, "calibration/ece": 0.08582490386189781, "calibration/mean_confidence": 0.7317781102423115, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020920138888888905, "completions/max_length": 4018.8, "completions/max_terminated_length": 4018.8, "completions/mean_length": 730.2631958007812, "completions/mean_terminated_length": 745.8842163085938, "completions/min_length": 0.0, "completions/min_terminated_length": 220.2, "epoch": 0.37259615384615385, "grad_norm": 0.0007210660842247307, "learning_rate": 3.725961538461539e-06, "loss": -0.022, "num_tokens": 314521082.0, "reward": 1.2058180570602417, "reward_std": 0.1962680220603943, "rewards/accuracy_reward": 0.6491319298744201, "rewards/brier_reward": 0.7835837721824646, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.97890625, "rewards/mean_confidence_reward": 0.7079079747200012, "sampling/batch_mean_priority_error": 0.010718749999999999, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7972222222222223, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0003989488526713103, "sampling/priority_kl": 0.030000166222453117, "sampling/priority_scale": 0.9142814338905737, "sampling/prob_entropy": 10.278959274291992, "sampling/prob_max": 3.8861997745698317e-05, "sampling/prob_min": 1.4154319433146157e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.3671999990940094, "sampling/prompt_draws_total": 11016.0, "sampling/seen_fraction": 0.3333600044250488, "sampling/unseen_fraction": 0.6666399955749511, "signal/accuracy_reward/centered_abs_mean": 0.17177734375, "signal/accuracy_reward/group_std_mean": 0.22341654002666472, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.085888671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.085888671875, "signal/advantage_abs_mean": 0.14426402002573013, "signal/advantage_pre_scale_abs_mean": 0.14426402002573013, "signal/advantage_pre_scale_std": 0.2449123114347458, "signal/advantage_std": 0.2449123114347458, "signal/brier_reward/centered_abs_mean": 0.11227346062660218, "signal/brier_reward/group_std_mean": 0.1499951809644699, "signal/brier_reward/group_zero_std_frac": 0.036111111380159856, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05613673031330109, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05613673031330109, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572877861559, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.03428276889026165, "signal/format_reward/group_std_mean": 0.06242974400520325, "signal/format_reward/group_zero_std_frac": 0.75, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017141384445130824, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017141384445130824, "signal/mean_confidence_reward/centered_abs_mean": 0.07776611000299453, "signal/mean_confidence_reward/group_std_mean": 0.1076754778623581, "signal/mean_confidence_reward/group_zero_std_frac": 0.04166666753590107, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.776610914334014e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.776610914334014e-07, "step": 155 }, { "calibration/aurc": 0.2439373889828011, "calibration/batch_distribution_entropy": 0.5504421513946065, "calibration/batch_entropy_100bins": 0.33446785407269986, "calibration/batch_entropy_10bins": 0.5504421513946065, "calibration/batch_entropy_50bins": 0.39373014616984314, "calibration/batch_uniqueness": 0.4654293862688167, "calibration/confidence_entropy": 0.5986497568225043, "calibration/coverage@0%": 0.001610020467583384, "calibration/coverage@1%": 0.001610020467583384, "calibration/coverage@10%": 0.14383160308307236, "calibration/coverage@15%": 0.19684997578648447, "calibration/coverage@20%": 0.30920282176116654, "calibration/coverage@25%": 0.531721487620661, "calibration/coverage@30%": 0.7082227931036897, "calibration/coverage@5%": 0.1370430913337251, "calibration/distribution_entropy_10": 0.5504421513946065, "calibration/distribution_entropy_100": 0.33446785407269986, "calibration/ece": 0.1376500234882277, "calibration/mean_confidence": 0.64873141349647, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02465277777777779, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 711.8350708007813, "completions/mean_terminated_length": 729.8376831054687, "completions/min_length": 0.0, "completions/min_terminated_length": 223.2, "epoch": 0.38461538461538464, "grad_norm": 0.0005352473235689104, "learning_rate": 3.846153846153847e-06, "loss": -0.0249, "num_tokens": 325805742.0, "reward": 1.2114521980285644, "reward_std": 0.1843463361263275, "rewards/accuracy_reward": 0.6668402671813964, "rewards/brier_reward": 0.7807913780212402, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9752604126930237, "rewards/mean_confidence_reward": 0.614529812335968, "sampling/batch_mean_priority_error": 0.01641840277777778, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.8166666666666667, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0004144439706578851, "sampling/priority_kl": 0.030000307038426398, "sampling/priority_scale": 0.9023418128257618, "sampling/prob_entropy": 10.278948974609374, "sampling/prob_max": 3.8993890484562144e-05, "sampling/prob_min": 1.43893730637501e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.37919999957084655, "sampling/prompt_draws_total": 11376.0, "sampling/seen_fraction": 0.34318000078201294, "sampling/unseen_fraction": 0.6568199992179871, "signal/accuracy_reward/centered_abs_mean": 0.16278212070465087, "signal/accuracy_reward/group_std_mean": 0.21243431568145751, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08139106035232543, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08139106035232543, "signal/advantage_abs_mean": 0.13364365547895432, "signal/advantage_pre_scale_abs_mean": 0.13364365547895432, "signal/advantage_pre_scale_std": 0.23043625950813293, "signal/advantage_std": 0.23043625950813293, "signal/brier_reward/centered_abs_mean": 0.10205348432064057, "signal/brier_reward/group_std_mean": 0.1377951443195343, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.051026742160320285, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.051026742160320285, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.041357421875, "signal/format_reward/group_std_mean": 0.07086744979023933, "signal/format_reward/group_zero_std_frac": 0.7361111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0206787109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0206787109375, "signal/mean_confidence_reward/centered_abs_mean": 0.09281512051820755, "signal/mean_confidence_reward/group_std_mean": 0.12052757143974305, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.281511893277639e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.281511893277639e-07, "step": 160 }, { "calibration/aurc": 0.2216550039950434, "calibration/batch_distribution_entropy": 0.5601830934410311, "calibration/batch_entropy_100bins": 0.3656942587139888, "calibration/batch_entropy_10bins": 0.5601830934410311, "calibration/batch_entropy_50bins": 0.4304893644745743, "calibration/batch_uniqueness": 0.5556512416662471, "calibration/confidence_entropy": 0.6361458858573619, "calibration/coverage@0%": 0.005390835579514825, "calibration/coverage@1%": 0.005390835579514825, "calibration/coverage@10%": 0.18005116166647134, "calibration/coverage@15%": 0.2898822023363278, "calibration/coverage@20%": 0.3762952458145887, "calibration/coverage@25%": 0.482056475647318, "calibration/coverage@30%": 0.7994447425005069, "calibration/coverage@5%": 0.12570333557951482, "calibration/distribution_entropy_10": 0.5601830934410311, "calibration/distribution_entropy_100": 0.3656942587139888, "calibration/ece": 0.13297860585793592, "calibration/mean_confidence": 0.5568156688622672, "calibration/unique_confidence_per_question": 0.022395833333333334, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02031250000000002, "completions/max_length": 3688.0, "completions/max_terminated_length": 3688.0, "completions/mean_length": 720.4741333007812, "completions/mean_terminated_length": 735.3773803710938, "completions/min_length": 0.0, "completions/min_terminated_length": 225.4, "epoch": 0.39663461538461536, "grad_norm": 0.0006447676569223404, "learning_rate": 3.966346153846154e-06, "loss": -0.0217, "num_tokens": 337217700.0, "reward": 1.2038058042526245, "reward_std": 0.17525742650032045, "rewards/accuracy_reward": 0.650868046283722, "rewards/brier_reward": 0.7770451307296753, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.979687511920929, "rewards/mean_confidence_reward": 0.5470833301544189, "sampling/batch_mean_priority_error": 0.02227604166666667, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7833333333333333, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0004395937954541296, "sampling/priority_kl": 0.029999757558107375, "sampling/priority_scale": 0.8911275923484936, "sampling/prob_entropy": 10.278950691223145, "sampling/prob_max": 3.912305910489522e-05, "sampling/prob_min": 1.461659157939721e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.39120000004768374, "sampling/prompt_draws_total": 11736.0, "sampling/seen_fraction": 0.3527533352375031, "sampling/unseen_fraction": 0.6472466647624969, "signal/accuracy_reward/centered_abs_mean": 0.17914496660232543, "signal/accuracy_reward/group_std_mean": 0.23302085399627687, "signal/accuracy_reward/group_zero_std_frac": 0.3444444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08957248330116271, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08957248330116271, "signal/advantage_abs_mean": 0.12544142454862595, "signal/advantage_pre_scale_abs_mean": 0.12544142454862595, "signal/advantage_pre_scale_std": 0.21346945464611053, "signal/advantage_std": 0.21346945464611053, "signal/brier_reward/centered_abs_mean": 0.09407158941030502, "signal/brier_reward/group_std_mean": 0.12839447557926179, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04703579470515251, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04703579470515251, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006618923507630825, "signal/confidence_one_or_zero/group_std_mean": 0.0016652445774525404, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.618923009682476e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.618923009682476e-09, "signal/format_reward/centered_abs_mean": 0.033984375, "signal/format_reward/group_std_mean": 0.06068957597017288, "signal/format_reward/group_zero_std_frac": 0.7611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0169921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0169921875, "signal/mean_confidence_reward/centered_abs_mean": 0.08409613519906997, "signal/mean_confidence_reward/group_std_mean": 0.10996766984462739, "signal/mean_confidence_reward/group_zero_std_frac": 0.0, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.409613201365573e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.409613201365573e-07, "step": 165 }, { "calibration/aurc": 0.20885690512845118, "calibration/batch_distribution_entropy": 0.5220946730173782, "calibration/batch_entropy_100bins": 0.31367912429843525, "calibration/batch_entropy_10bins": 0.5220946730173782, "calibration/batch_entropy_50bins": 0.3692579898384084, "calibration/batch_uniqueness": 0.445031632020122, "calibration/confidence_entropy": 0.6104902665786043, "calibration/coverage@0%": 0.0010627337520287912, "calibration/coverage@1%": 0.0010627337520287912, "calibration/coverage@10%": 0.09353585203159868, "calibration/coverage@15%": 0.29444203202348196, "calibration/coverage@20%": 0.5902344275452739, "calibration/coverage@25%": 0.6245352191020022, "calibration/coverage@30%": 0.7654614101797976, "calibration/coverage@5%": 0.0010627337520287912, "calibration/distribution_entropy_10": 0.5220946730173782, "calibration/distribution_entropy_100": 0.31367912429843525, "calibration/ece": 0.100633464513873, "calibration/mean_confidence": 0.6394755651544398, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02309027777777777, "completions/max_length": 3179.2, "completions/max_terminated_length": 3179.2, "completions/mean_length": 700.7806640625, "completions/mean_terminated_length": 717.4763305664062, "completions/min_length": 0.0, "completions/min_terminated_length": 207.2, "epoch": 0.40865384615384615, "grad_norm": 0.0006071875686757267, "learning_rate": 4.086538461538462e-06, "loss": -0.0234, "num_tokens": 348414821.0, "reward": 1.2222771883010863, "reward_std": 0.18217334151268005, "rewards/accuracy_reward": 0.6748263955116272, "rewards/brier_reward": 0.7928924798965454, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9768229246139526, "rewards/mean_confidence_reward": 0.6243949770927429, "sampling/batch_mean_priority_error": 0.01674305555555556, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7277777777777777, "sampling/error_ema_max": 0.05624999850988388, "sampling/error_ema_mean": 0.0004621386586222798, "sampling/priority_kl": 0.029999804869294166, "sampling/priority_scale": 0.8803985654609278, "sampling/prob_entropy": 10.278963851928712, "sampling/prob_max": 3.924263146473095e-05, "sampling/prob_min": 1.4835979709459934e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.4032000005245209, "sampling/prompt_draws_total": 12096.0, "sampling/seen_fraction": 0.3616133391857147, "sampling/unseen_fraction": 0.6383866608142853, "signal/accuracy_reward/centered_abs_mean": 0.1626953125, "signal/accuracy_reward/group_std_mean": 0.21717799603939056, "signal/accuracy_reward/group_zero_std_frac": 0.375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08134765625, "signal/advantage_abs_mean": 0.1283343553543091, "signal/advantage_pre_scale_abs_mean": 0.1283343553543091, "signal/advantage_pre_scale_std": 0.22712890207767486, "signal/advantage_std": 0.22712890207767486, "signal/brier_reward/centered_abs_mean": 0.09392926543951034, "signal/brier_reward/group_std_mean": 0.13066150546073912, "signal/brier_reward/group_zero_std_frac": 0.016666667349636555, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04696463271975517, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04696463271975517, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.03892686627805233, "signal/format_reward/group_std_mean": 0.07041922360658645, "signal/format_reward/group_zero_std_frac": 0.7222222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019463433139026164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.019463433139026164, "signal/mean_confidence_reward/centered_abs_mean": 0.08394259884953499, "signal/mean_confidence_reward/group_std_mean": 0.11203955709934235, "signal/mean_confidence_reward/group_zero_std_frac": 0.016666667349636555, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.394259793931269e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.394259793931269e-07, "step": 170 }, { "calibration/aurc": 0.2306798394707507, "calibration/batch_distribution_entropy": 0.38562760462846135, "calibration/batch_entropy_100bins": 0.20862008032824364, "calibration/batch_entropy_10bins": 0.38562760462846135, "calibration/batch_entropy_50bins": 0.2455841831177888, "calibration/batch_uniqueness": -0.08259410038297318, "calibration/confidence_entropy": 0.5396309137914621, "calibration/coverage@0%": 0.004212333055377086, "calibration/coverage@1%": 0.004212333055377086, "calibration/coverage@10%": 0.20264575342091234, "calibration/coverage@15%": 0.47179227748508346, "calibration/coverage@20%": 0.5140382667898963, "calibration/coverage@25%": 0.5668867516383812, "calibration/coverage@30%": 0.6010457534209123, "calibration/coverage@5%": 0.004212333055377086, "calibration/distribution_entropy_10": 0.38562760462846135, "calibration/distribution_entropy_100": 0.20862008032824364, "calibration/ece": 0.15894475234322572, "calibration/mean_confidence": 0.7424218898846368, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3438.0, "completions/max_terminated_length": 3438.0, "completions/mean_length": 675.3561645507813, "completions/mean_terminated_length": 686.0961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 194.2, "epoch": 0.4206730769230769, "grad_norm": 0.0007575862691737711, "learning_rate": 4.20673076923077e-06, "loss": -0.0157, "num_tokens": 359284844.0, "reward": 1.2133283615112305, "reward_std": 0.18129928410053253, "rewards/accuracy_reward": 0.6555555582046508, "rewards/brier_reward": 0.786798620223999, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9842882037162781, "rewards/mean_confidence_reward": 0.716137146949768, "sampling/batch_mean_priority_error": 0.016479166666666673, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7444444444444444, "sampling/error_ema_max": 0.07174999564886093, "sampling/error_ema_mean": 0.0004800788010470569, "sampling/priority_kl": 0.03000015616416931, "sampling/priority_scale": 0.8703835427528247, "sampling/prob_entropy": 10.278958511352538, "sampling/prob_max": 3.936297362088226e-05, "sampling/prob_min": 1.5047048691485543e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.415200001001358, "sampling/prompt_draws_total": 12456.0, "sampling/seen_fraction": 0.3704466700553894, "sampling/unseen_fraction": 0.6295533299446106, "signal/accuracy_reward/centered_abs_mean": 0.16691623330116273, "signal/accuracy_reward/group_std_mean": 0.21467551290988923, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08345811665058137, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08345811665058137, "signal/advantage_abs_mean": 0.1352637842297554, "signal/advantage_pre_scale_abs_mean": 0.1352637842297554, "signal/advantage_pre_scale_std": 0.23468086123466492, "signal/advantage_std": 0.23468086123466492, "signal/brier_reward/centered_abs_mean": 0.10355425029993057, "signal/brier_reward/group_std_mean": 0.13606458008289338, "signal/brier_reward/group_zero_std_frac": 0.13611111268401146, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.051777125149965283, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.051777125149965283, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02566731721162796, "signal/format_reward/group_std_mean": 0.04588991403579712, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01283365860581398, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01283365860581398, "signal/mean_confidence_reward/centered_abs_mean": 0.06004774793982506, "signal/mean_confidence_reward/group_std_mean": 0.08248940110206604, "signal/mean_confidence_reward/group_zero_std_frac": 0.20000000521540642, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.004774718348927e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.004774718348927e-07, "step": 175 }, { "calibration/aurc": 0.23788836281119957, "calibration/batch_distribution_entropy": 0.3135210491423183, "calibration/batch_entropy_100bins": 0.1708648779490961, "calibration/batch_entropy_10bins": 0.3135210491423183, "calibration/batch_entropy_50bins": 0.2011393697511128, "calibration/batch_uniqueness": -0.24657734863529396, "calibration/confidence_entropy": 0.5349542704968283, "calibration/coverage@0%": 0.0005277044854881266, "calibration/coverage@1%": 0.0005277044854881266, "calibration/coverage@10%": 0.1397947201922944, "calibration/coverage@15%": 0.1722554531765876, "calibration/coverage@20%": 0.20052770448548812, "calibration/coverage@25%": 0.6988382212767134, "calibration/coverage@30%": 0.7187335092348285, "calibration/coverage@5%": 0.0005277044854881266, "calibration/distribution_entropy_10": 0.3135210491423183, "calibration/distribution_entropy_100": 0.1708648779490961, "calibration/ece": 0.09756549754912114, "calibration/mean_confidence": 0.7477576020125064, "calibration/unique_confidence_per_question": 0.01614583333333333, "calibration/unique_confidences": 6.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111139, "completions/max_length": 2763.6, "completions/max_terminated_length": 2763.6, "completions/mean_length": 635.5590209960938, "completions/mean_terminated_length": 641.1690063476562, "completions/min_length": 0.0, "completions/min_terminated_length": 198.2, "epoch": 0.4326923076923077, "grad_norm": 0.0008080592378973961, "learning_rate": 4.326923076923077e-06, "loss": -0.0088, "num_tokens": 369703348.0, "reward": 1.2152099132537841, "reward_std": 0.17329217493534088, "rewards/accuracy_reward": 0.6556423544883728, "rewards/brier_reward": 0.7836167335510253, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9911458253860473, "rewards/mean_confidence_reward": 0.7386631965637207, "sampling/batch_mean_priority_error": 0.01854861111111112, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7194444444444446, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0005015615141019225, "sampling/priority_kl": 0.030001365393400193, "sampling/priority_scale": 0.8609562695259229, "sampling/prob_entropy": 10.278947448730468, "sampling/prob_max": 3.948486191802658e-05, "sampling/prob_min": 1.5251663899107371e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.4272000014781952, "sampling/prompt_draws_total": 12816.0, "sampling/seen_fraction": 0.37929333448410035, "sampling/unseen_fraction": 0.6207066655158997, "signal/accuracy_reward/centered_abs_mean": 0.16618380844593048, "signal/accuracy_reward/group_std_mean": 0.21578561067581176, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08309190422296524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08309190422296524, "signal/advantage_abs_mean": 0.1297558218240738, "signal/advantage_pre_scale_abs_mean": 0.1297558218240738, "signal/advantage_pre_scale_std": 0.22660800516605378, "signal/advantage_std": 0.22660800516605378, "signal/brier_reward/centered_abs_mean": 0.09994956403970719, "signal/brier_reward/group_std_mean": 0.12978233247995377, "signal/brier_reward/group_zero_std_frac": 0.25, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04997478201985359, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04997478201985359, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.015700954757630826, "signal/format_reward/group_std_mean": 0.0326525878161192, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007850477378815413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007850477378815413, "signal/mean_confidence_reward/centered_abs_mean": 0.0490852989256382, "signal/mean_confidence_reward/group_std_mean": 0.06961074024438858, "signal/mean_confidence_reward/group_zero_std_frac": 0.347222226858139, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.908529831482156e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.908529831482156e-07, "step": 180 }, { "calibration/aurc": 0.2195309944937937, "calibration/batch_distribution_entropy": 0.46614281882144226, "calibration/batch_entropy_100bins": 0.24709190776634332, "calibration/batch_entropy_10bins": 0.46614281882144226, "calibration/batch_entropy_50bins": 0.29087259590896697, "calibration/batch_uniqueness": 0.11431428212023334, "calibration/confidence_entropy": 0.5664291168624079, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.21468253968253967, "calibration/coverage@15%": 0.2621847211834123, "calibration/coverage@20%": 0.37131170531039637, "calibration/coverage@25%": 0.6566805328150284, "calibration/coverage@30%": 0.7982800231056182, "calibration/coverage@5%": 0.0, "calibration/distribution_entropy_10": 0.46614281882144226, "calibration/distribution_entropy_100": 0.24709190776634332, "calibration/ece": 0.11462966079621192, "calibration/mean_confidence": 0.7002868308990463, "calibration/unique_confidence_per_question": 0.018229166666666668, "calibration/unique_confidences": 7.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444442, "completions/max_length": 2952.8, "completions/max_terminated_length": 2952.8, "completions/mean_length": 627.3454956054687, "completions/mean_terminated_length": 633.7, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.44471153846153844, "grad_norm": 0.0006965293432585895, "learning_rate": 4.447115384615385e-06, "loss": -0.0111, "num_tokens": 380053184.0, "reward": 1.223191213607788, "reward_std": 0.16192339062690736, "rewards/accuracy_reward": 0.65625, "rewards/brier_reward": 0.8004485845565796, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9896701455116272, "rewards/mean_confidence_reward": 0.6838238000869751, "sampling/batch_mean_priority_error": 0.017381944444444446, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7138888888888889, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0005218912032432854, "sampling/priority_kl": 0.029999763518571854, "sampling/priority_scale": 0.852015310456045, "sampling/prob_entropy": 10.278967094421386, "sampling/prob_max": 3.960507456213236e-05, "sampling/prob_min": 1.54499604832381e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.43920000195503234, "sampling/prompt_draws_total": 13176.0, "sampling/seen_fraction": 0.3879866719245911, "sampling/unseen_fraction": 0.612013328075409, "signal/accuracy_reward/centered_abs_mean": 0.15327690839767455, "signal/accuracy_reward/group_std_mean": 0.2048683673143387, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07663845419883727, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07663845419883727, "signal/advantage_abs_mean": 0.11860896944999695, "signal/advantage_pre_scale_abs_mean": 0.11860896944999695, "signal/advantage_pre_scale_std": 0.2088211804628372, "signal/advantage_std": 0.2088211804628372, "signal/brier_reward/centered_abs_mean": 0.09137874394655228, "signal/brier_reward/group_std_mean": 0.12084674090147018, "signal/brier_reward/group_zero_std_frac": 0.13333333544433118, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04568937197327614, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04568937197327614, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.017301432229578494, "signal/format_reward/group_std_mean": 0.03283517360687256, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008650716114789247, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008650716114789247, "signal/mean_confidence_reward/centered_abs_mean": 0.060839305818080905, "signal/mean_confidence_reward/group_std_mean": 0.08073554188013077, "signal/mean_confidence_reward/group_zero_std_frac": 0.1666666679084301, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.083930202294141e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.083930202294141e-07, "step": 185 }, { "calibration/aurc": 0.21473587964092697, "calibration/batch_distribution_entropy": 0.6310474175894865, "calibration/batch_entropy_100bins": 0.35102006180287754, "calibration/batch_entropy_10bins": 0.6310474175894865, "calibration/batch_entropy_50bins": 0.41321513729733095, "calibration/batch_uniqueness": 0.4533243875178625, "calibration/confidence_entropy": 0.5994709288007358, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.07847561271199939, "calibration/coverage@15%": 0.07847561271199939, "calibration/coverage@20%": 0.45539064069003243, "calibration/coverage@25%": 0.8117419117341532, "calibration/coverage@30%": 0.9213213582677164, "calibration/coverage@5%": 0.07847561271199939, "calibration/distribution_entropy_10": 0.6310474175894865, "calibration/distribution_entropy_100": 0.35102006180287754, "calibration/ece": 0.139439512544235, "calibration/mean_confidence": 0.595194758209025, "calibration/unique_confidence_per_question": 0.022395833333333334, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012152777777777768, "completions/max_length": 3434.0, "completions/max_terminated_length": 3434.0, "completions/mean_length": 632.26484375, "completions/mean_terminated_length": 640.1283569335938, "completions/min_length": 0.0, "completions/min_terminated_length": 182.2, "epoch": 0.4567307692307692, "grad_norm": 0.0005936509114690125, "learning_rate": 4.567307692307692e-06, "loss": -0.0125, "num_tokens": 390448267.0, "reward": 1.2223145484924316, "reward_std": 0.15787692070007325, "rewards/accuracy_reward": 0.6589409828186035, "rewards/brier_reward": 0.7980021953582763, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9876736164093017, "rewards/mean_confidence_reward": 0.6120138883590698, "sampling/batch_mean_priority_error": 0.021647569444444445, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6944444444444444, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0005465433234348894, "sampling/priority_kl": 0.03000170923769474, "sampling/priority_scale": 0.8435857356758788, "sampling/prob_entropy": 10.278957557678222, "sampling/prob_max": 3.9722044311929495e-05, "sampling/prob_min": 1.5640574201825074e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.451199996471405, "sampling/prompt_draws_total": 13536.0, "sampling/seen_fraction": 0.3963599979877472, "sampling/unseen_fraction": 0.6036400020122528, "signal/accuracy_reward/centered_abs_mean": 0.1682779908180237, "signal/accuracy_reward/group_std_mean": 0.22021067440509795, "signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08413899540901185, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08413899540901185, "signal/advantage_abs_mean": 0.11482741385698318, "signal/advantage_pre_scale_abs_mean": 0.11482741385698318, "signal/advantage_pre_scale_std": 0.19687965214252473, "signal/advantage_std": 0.19687965214252473, "signal/brier_reward/centered_abs_mean": 0.09163180440664291, "signal/brier_reward/group_std_mean": 0.12160668224096298, "signal/brier_reward/group_zero_std_frac": 0.03333333395421505, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.045815902203321456, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.045815902203321456, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.020030382089316845, "signal/format_reward/group_std_mean": 0.03791286274790764, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010015191044658422, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010015191044658422, "signal/mean_confidence_reward/centered_abs_mean": 0.07467366904020309, "signal/mean_confidence_reward/group_std_mean": 0.09739873111248017, "signal/mean_confidence_reward/group_zero_std_frac": 0.05000000111758709, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.4673666858871e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.4673666858871e-07, "step": 190 }, { "calibration/aurc": 0.20660881131868614, "calibration/batch_distribution_entropy": 0.4472838898131556, "calibration/batch_entropy_100bins": 0.24029866810382172, "calibration/batch_entropy_10bins": 0.4472838898131556, "calibration/batch_entropy_50bins": 0.28287570409194335, "calibration/batch_uniqueness": -0.012685817494700501, "calibration/confidence_entropy": 0.5425070079135119, "calibration/coverage@0%": 0.00263863235518769, "calibration/coverage@1%": 0.00263863235518769, "calibration/coverage@10%": 0.2060695032310762, "calibration/coverage@15%": 0.25063183744858286, "calibration/coverage@20%": 0.5009042063459346, "calibration/coverage@25%": 0.7282959128636407, "calibration/coverage@30%": 0.7795261848813401, "calibration/coverage@5%": 0.07927905230269425, "calibration/distribution_entropy_10": 0.4472838898131556, "calibration/distribution_entropy_100": 0.24029866810382172, "calibration/ece": 0.10933349429072714, "calibration/mean_confidence": 0.7094343831929872, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111116, "completions/max_length": 3384.8, "completions/max_terminated_length": 3384.8, "completions/mean_length": 604.729345703125, "completions/mean_terminated_length": 613.4677368164063, "completions/min_length": 0.0, "completions/min_terminated_length": 181.4, "epoch": 0.46875, "grad_norm": 0.0006664813263341784, "learning_rate": 4.6875000000000004e-06, "loss": -0.0163, "num_tokens": 400524029.0, "reward": 1.2562130212783813, "reward_std": 0.16529837548732756, "rewards/accuracy_reward": 0.7055555582046509, "rewards/brier_reward": 0.8210926651954651, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9857638955116272, "rewards/mean_confidence_reward": 0.6951171875, "sampling/batch_mean_priority_error": 0.017201388888888898, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.711111111111111, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0005688484525308013, "sampling/priority_kl": 0.0300009872764349, "sampling/priority_scale": 0.8355700433952734, "sampling/prob_entropy": 10.27896671295166, "sampling/prob_max": 3.9841223770054055e-05, "sampling/prob_min": 1.5827041352167726e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.46320000290870667, "sampling/prompt_draws_total": 13896.0, "sampling/seen_fraction": 0.4048066735267639, "sampling/unseen_fraction": 0.5951933264732361, "signal/accuracy_reward/centered_abs_mean": 0.15091145932674407, "signal/accuracy_reward/group_std_mean": 0.20120739936828613, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07545572966337204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07545572966337204, "signal/advantage_abs_mean": 0.12027461379766465, "signal/advantage_pre_scale_abs_mean": 0.12027461379766465, "signal/advantage_pre_scale_std": 0.2162972718477249, "signal/advantage_std": 0.2162972718477249, "signal/brier_reward/centered_abs_mean": 0.09864553064107895, "signal/brier_reward/group_std_mean": 0.13091023713350297, "signal/brier_reward/group_zero_std_frac": 0.20277777761220933, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.049322765320539474, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.049322765320539474, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.023763021267950536, "signal/format_reward/group_std_mean": 0.04480189755558968, "signal/format_reward/group_zero_std_frac": 0.8138889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011881510633975268, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011881510633975268, "signal/mean_confidence_reward/centered_abs_mean": 0.06588922217488288, "signal/mean_confidence_reward/group_std_mean": 0.08890577405691147, "signal/mean_confidence_reward/group_zero_std_frac": 0.2833333373069763, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.588921792172187e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.588921792172187e-07, "step": 195 }, { "calibration/aurc": 0.23729910544861918, "calibration/batch_distribution_entropy": 0.4545196357256331, "calibration/batch_entropy_100bins": 0.24545220588673908, "calibration/batch_entropy_10bins": 0.4545196357256331, "calibration/batch_entropy_50bins": 0.288942365386451, "calibration/batch_uniqueness": -0.03244423971310495, "calibration/confidence_entropy": 0.5201880994887154, "calibration/coverage@0%": 0.03456750156587587, "calibration/coverage@1%": 0.03456750156587587, "calibration/coverage@10%": 0.24514294203790565, "calibration/coverage@15%": 0.41627155096179014, "calibration/coverage@20%": 0.431494648074651, "calibration/coverage@25%": 0.5556250906734358, "calibration/coverage@30%": 0.5726113920432987, "calibration/coverage@5%": 0.03456750156587587, "calibration/distribution_entropy_10": 0.4545196357256331, "calibration/distribution_entropy_100": 0.24545220588673908, "calibration/ece": 0.15331469183532745, "calibration/mean_confidence": 0.7156514431371688, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013368055555555581, "completions/max_length": 3403.8, "completions/max_terminated_length": 3403.8, "completions/mean_length": 616.7714599609375, "completions/mean_terminated_length": 625.16923828125, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.4807692307692308, "grad_norm": 0.0007234844961203635, "learning_rate": 4.807692307692308e-06, "loss": -0.0135, "num_tokens": 410753716.0, "reward": 1.2360211133956909, "reward_std": 0.16374669075012208, "rewards/accuracy_reward": 0.6888020753860473, "rewards/brier_reward": 0.7968539476394654, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9863715291023254, "rewards/mean_confidence_reward": 0.7336067795753479, "sampling/batch_mean_priority_error": 0.01865972222222222, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.675, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0005898760398849844, "sampling/priority_kl": 0.029998957365751266, "sampling/priority_scale": 0.8279964744346217, "sampling/prob_entropy": 10.27893295288086, "sampling/prob_max": 3.9958462730282916e-05, "sampling/prob_min": 1.6006992518668993e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.47519999742507935, "sampling/prompt_draws_total": 14256.0, "sampling/seen_fraction": 0.4130400002002716, "sampling/unseen_fraction": 0.5869599997997283, "signal/accuracy_reward/centered_abs_mean": 0.14013129323720933, "signal/accuracy_reward/group_std_mean": 0.1888304203748703, "signal/accuracy_reward/group_zero_std_frac": 0.4527777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07006564661860466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07006564661860466, "signal/advantage_abs_mean": 0.11893169432878495, "signal/advantage_pre_scale_abs_mean": 0.11893169432878495, "signal/advantage_pre_scale_std": 0.21923663318157197, "signal/advantage_std": 0.21923663318157197, "signal/brier_reward/centered_abs_mean": 0.10698548257350922, "signal/brier_reward/group_std_mean": 0.13870647549629211, "signal/brier_reward/group_zero_std_frac": 0.13055555894970894, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05349274128675461, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05349274128675461, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.02221679650247097, "signal/format_reward/group_std_mean": 0.04155225083231926, "signal/format_reward/group_zero_std_frac": 0.8250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011108398251235486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011108398251235486, "signal/mean_confidence_reward/centered_abs_mean": 0.06812093928456306, "signal/mean_confidence_reward/group_std_mean": 0.09376206994056702, "signal/mean_confidence_reward/group_zero_std_frac": 0.1750000037252903, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.812093602093228e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.812093602093228e-07, "step": 200 }, { "epoch": 0.4807692307692308, "eval_calibration/aurc": 0.24002113019857146, "eval_calibration/batch_distribution_entropy": 0.48160177072803806, "eval_calibration/batch_entropy_100bins": 0.25873340992629923, "eval_calibration/batch_entropy_10bins": 0.48160177072803806, "eval_calibration/batch_entropy_50bins": 0.30457678389374016, "eval_calibration/batch_uniqueness": 0.01719030076599557, "eval_calibration/confidence_entropy": 0.49903381393986235, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.1235657546337158, "eval_calibration/coverage@20%": 0.1235657546337158, "eval_calibration/coverage@25%": 0.8093556928508385, "eval_calibration/coverage@30%": 0.9196822594880847, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.48160177072803806, "eval_calibration/distribution_entropy_100": 0.25873340992629923, "eval_calibration/ece": 0.06690203000882607, "eval_calibration/mean_confidence": 0.738217122683142, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.012152777777777771, "eval_completions/max_length": 2276.1666666666665, "eval_completions/max_terminated_length": 2276.1666666666665, "eval_completions/mean_length": 586.3701680501302, "eval_completions/mean_terminated_length": 593.6424357096354, "eval_completions/min_length": 44.5, "eval_completions/min_terminated_length": 192.33333333333334, "eval_loss": 0.0, "eval_num_tokens": 410753716.0, "eval_reward": 1.2204153537750244, "eval_reward_std": 0.37025384108225506, "eval_rewards/accuracy_reward": 0.6675347288449606, "eval_rewards/brier_reward": 0.7897742787996928, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.9835069378217062, "eval_rewards/mean_confidence_reward": 0.7268229126930237, "eval_runtime": 207.2024, "eval_samples_per_second": 4.826, "eval_signal/accuracy_reward/centered_abs_mean": 0.4338650157054265, "eval_signal/accuracy_reward/group_std_mean": 0.47237643599510193, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21693250785271326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21693250785271326, "eval_signal/advantage_abs_mean": 0.3235609481732051, "eval_signal/advantage_pre_scale_abs_mean": 0.3235609481732051, "eval_signal/advantage_pre_scale_std": 0.36790668964385986, "eval_signal/advantage_std": 0.36790668964385986, "eval_signal/brier_reward/centered_abs_mean": 0.22734156996011734, "eval_signal/brier_reward/group_std_mean": 0.27066074560085934, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.11367078498005867, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.11367078498005867, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.03152126741285125, "eval_signal/format_reward/group_std_mean": 0.08380424821128447, "eval_signal/format_reward/group_zero_std_frac": 0.5555555745959282, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.015760633706425626, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.015760633706425626, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.13929034397006035, "eval_signal/mean_confidence_reward/group_std_mean": 0.2003222107887268, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.3929034328915197e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.3929034328915197e-06, "eval_steps_per_second": 0.029, "step": 200 }, { "epoch": 0.4807692307692308, "step": 200, "train_probe_calibration/aurc": 0.2113365019677367, "train_probe_calibration/batch_distribution_entropy": 0.4955069873679676, "train_probe_calibration/batch_entropy_100bins": 0.26813957141588457, "train_probe_calibration/batch_entropy_10bins": 0.4955069873679676, "train_probe_calibration/batch_entropy_50bins": 0.3156495650088622, "train_probe_calibration/batch_uniqueness": 0.0653170297438859, "train_probe_calibration/confidence_entropy": 0.49672003313055696, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.1411042944785276, "train_probe_calibration/coverage@15%": 0.1411042944785276, "train_probe_calibration/coverage@20%": 0.1411042944785276, "train_probe_calibration/coverage@25%": 0.8773006134969326, "train_probe_calibration/coverage@30%": 0.9570552147239264, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.4955069873679676, "train_probe_calibration/distribution_entropy_100": 0.26813957141588457, "train_probe_calibration/ece": 0.046099912357580895, "train_probe_calibration/mean_confidence": 0.738737949167397, "train_probe_calibration/unique_confidence_per_question": 0.010416666666666666, "train_probe_calibration/unique_confidences": 12, "train_probe_completions/clipped_ratio": 0.010243055555555566, "train_probe_completions/max_length": 2403.3333333333335, "train_probe_completions/max_terminated_length": 2403.3333333333335, "train_probe_completions/mean_length": 601.1067911783854, "train_probe_completions/mean_terminated_length": 607.4044189453125, "train_probe_completions/min_length": 32.833333333333336, "train_probe_completions/min_terminated_length": 177.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 410753716.0, "train_probe_reward": 1.2459991375605266, "train_probe_reward_std": 0.35098352034886676, "train_probe_rewards/accuracy_reward": 0.6901041666666666, "train_probe_rewards/brier_reward": 0.8114279508590698, "train_probe_rewards/confidence_one_or_zero": 0.0008680555814256271, "train_probe_rewards/format_reward": 0.9904513955116272, "train_probe_rewards/mean_confidence_reward": 0.7316840390364329, "train_probe_runtime": 182.4753, "train_probe_samples_per_second": 5.48, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4162868907054265, "train_probe_signal/accuracy_reward/group_std_mean": 0.46246403952439624, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20814344535271326, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20814344535271326, "train_probe_signal/advantage_abs_mean": 0.30224540332953137, "train_probe_signal/advantage_pre_scale_abs_mean": 0.30224540332953137, "train_probe_signal/advantage_pre_scale_std": 0.3477505644162496, "train_probe_signal/advantage_std": 0.3477505644162496, "train_probe_signal/brier_reward/centered_abs_mean": 0.2085253670811653, "train_probe_signal/brier_reward/group_std_mean": 0.2562006364266078, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10426268354058266, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.10426268354058266, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.018391926772892475, "train_probe_signal/format_reward/group_std_mean": 0.051025692373514175, "train_probe_signal/format_reward/group_zero_std_frac": 0.7222222437461218, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963386446238, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.009195963386446238, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.13380803540349007, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.1927381455898285, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.3380803428238626e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.3380803428238626e-06, "train_probe_steps_per_second": 0.033 }, { "calibration/aurc": 0.22641020059736908, "calibration/batch_distribution_entropy": 0.586543433079215, "calibration/batch_entropy_100bins": 0.31469597338663524, "calibration/batch_entropy_10bins": 0.586543433079215, "calibration/batch_entropy_50bins": 0.3704550081325571, "calibration/batch_uniqueness": 0.3057555199387029, "calibration/confidence_entropy": 0.48789312654324435, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.11884628551865799, "calibration/coverage@15%": 0.11884628551865799, "calibration/coverage@20%": 0.31884628551865796, "calibration/coverage@25%": 0.728710047005235, "calibration/coverage@30%": 0.9123436701244794, "calibration/coverage@5%": 0.03695652173913043, "calibration/distribution_entropy_10": 0.586543433079215, "calibration/distribution_entropy_100": 0.31469597338663524, "calibration/ece": 0.12346906503340056, "calibration/mean_confidence": 0.720077070835353, "calibration/unique_confidence_per_question": 0.023958333333333335, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01675347222222221, "completions/max_length": 3890.2, "completions/max_terminated_length": 3890.2, "completions/mean_length": 571.915625, "completions/mean_terminated_length": 581.7823852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.49278846153846156, "grad_norm": 0.0007555597112514079, "learning_rate": 4.927884615384616e-06, "loss": -0.0201, "num_tokens": 420431336.0, "reward": 1.2604899883270264, "reward_std": 0.1605339229106903, "rewards/accuracy_reward": 0.7164062619209289, "rewards/brier_reward": 0.8213125944137574, "rewards/confidence_one_or_zero": 0.0008680555736646056, "rewards/format_reward": 0.9832465171813964, "rewards/mean_confidence_reward": 0.7297421932220459, "sampling/batch_mean_priority_error": 0.013986111111111105, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6888888888888889, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0006075228797271848, "sampling/priority_kl": 0.029999784752726556, "sampling/priority_scale": 0.8207675993675366, "sampling/prob_entropy": 10.278958702087403, "sampling/prob_max": 4.0076706500258294e-05, "sampling/prob_min": 1.61830939759966e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.487200003862381, "sampling/prompt_draws_total": 14616.0, "sampling/seen_fraction": 0.42125333547592164, "sampling/unseen_fraction": 0.5787466645240784, "signal/accuracy_reward/centered_abs_mean": 0.1313639312982559, "signal/accuracy_reward/group_std_mean": 0.1768402189016342, "signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06568196564912795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06568196564912795, "signal/advantage_abs_mean": 0.11555989980697631, "signal/advantage_pre_scale_abs_mean": 0.11555989980697631, "signal/advantage_pre_scale_std": 0.21790286600589753, "signal/advantage_std": 0.21790286600589753, "signal/brier_reward/centered_abs_mean": 0.1100591003894806, "signal/brier_reward/group_std_mean": 0.14190090894699098, "signal/brier_reward/group_zero_std_frac": 0.033333333767950536, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0550295501947403, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0550295501947403, "signal/confidence_one_or_zero/centered_abs_mean": 0.0016276041511446237, "signal/confidence_one_or_zero/group_std_mean": 0.003662066720426083, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6276040781804114e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6276040781804114e-08, "signal/format_reward/centered_abs_mean": 0.02888997383415699, "signal/format_reward/group_std_mean": 0.052112925052642825, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014444986917078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014444986917078495, "signal/mean_confidence_reward/centered_abs_mean": 0.08421467393636703, "signal/mean_confidence_reward/group_std_mean": 0.10899612158536912, "signal/mean_confidence_reward/group_zero_std_frac": 0.05277777872979641, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 8.421466986874293e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 8.421466986874293e-07, "step": 205 }, { "calibration/aurc": 0.1461755373917801, "calibration/batch_distribution_entropy": 0.6707544144938835, "calibration/batch_entropy_100bins": 0.37178587247237216, "calibration/batch_entropy_10bins": 0.6707544144938835, "calibration/batch_entropy_50bins": 0.4376603136294584, "calibration/batch_uniqueness": 0.5310754998290197, "calibration/confidence_entropy": 0.4849550254131734, "calibration/coverage@0%": 0.002102507635094426, "calibration/coverage@1%": 0.14442151395239228, "calibration/coverage@10%": 0.4801659934974561, "calibration/coverage@15%": 0.4953812431926138, "calibration/coverage@20%": 0.6357792125051241, "calibration/coverage@25%": 0.8384257306194505, "calibration/coverage@30%": 0.9014771570440239, "calibration/coverage@5%": 0.2594849378129214, "calibration/distribution_entropy_10": 0.6707544144938835, "calibration/distribution_entropy_100": 0.37178587247237216, "calibration/ece": 0.09125336785763163, "calibration/mean_confidence": 0.6740264282946639, "calibration/unique_confidence_per_question": 0.023958333333333335, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01059027777777779, "completions/max_length": 3380.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 601.1892578125, "completions/mean_terminated_length": 607.6563232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 153.6, "epoch": 0.5048076923076923, "grad_norm": 0.0007844443898648024, "learning_rate": 4.987980769230769e-06, "loss": -0.0119, "num_tokens": 430468588.0, "reward": 1.2284770011901855, "reward_std": 0.1580641061067581, "rewards/accuracy_reward": 0.6583333373069763, "rewards/brier_reward": 0.8092842698097229, "rewards/confidence_one_or_zero": 0.0006944444496184588, "rewards/format_reward": 0.9893229246139527, "rewards/mean_confidence_reward": 0.6731336712837219, "sampling/batch_mean_priority_error": 0.019003472222222213, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6805555555555556, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0006265622796490788, "sampling/priority_kl": 0.0299996230751276, "sampling/priority_scale": 0.8139940678840503, "sampling/prob_entropy": 10.278954696655273, "sampling/prob_max": 4.019671468995512e-05, "sampling/prob_min": 1.6353473620256408e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.49920000433921813, "sampling/prompt_draws_total": 14976.0, "sampling/seen_fraction": 0.4294800043106079, "sampling/unseen_fraction": 0.570519995689392, "signal/accuracy_reward/centered_abs_mean": 0.14608289897441865, "signal/accuracy_reward/group_std_mean": 0.1931061327457428, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07304144948720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07304144948720932, "signal/advantage_abs_mean": 0.11451363265514374, "signal/advantage_pre_scale_abs_mean": 0.11451363265514374, "signal/advantage_pre_scale_std": 0.2036235064268112, "signal/advantage_std": 0.2036235064268112, "signal/brier_reward/centered_abs_mean": 0.11961710602045059, "signal/brier_reward/group_std_mean": 0.15400556921958924, "signal/brier_reward/group_zero_std_frac": 0.013888888992369175, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.05980855301022529, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.05980855301022529, "signal/confidence_one_or_zero/centered_abs_mean": 0.0013129340019077062, "signal/confidence_one_or_zero/group_std_mean": 0.0032778555527329446, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3129339038187026e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3129339038187026e-08, "signal/format_reward/centered_abs_mean": 0.018994140811264514, "signal/format_reward/group_std_mean": 0.036902287229895595, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009497070405632257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009497070405632257, "signal/mean_confidence_reward/centered_abs_mean": 0.09470810443162918, "signal/mean_confidence_reward/group_std_mean": 0.1206357717514038, "signal/mean_confidence_reward/group_zero_std_frac": 0.013888888992369175, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.470810482525849e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.470810482525849e-07, "step": 210 }, { "calibration/aurc": 0.14871075118082513, "calibration/batch_distribution_entropy": 0.6749137696780482, "calibration/batch_entropy_100bins": 0.3727244117417784, "calibration/batch_entropy_10bins": 0.6749137696780482, "calibration/batch_entropy_50bins": 0.43876514687196533, "calibration/batch_uniqueness": 0.5322232887996369, "calibration/confidence_entropy": 0.48361457584113304, "calibration/coverage@0%": 0.09635040020423015, "calibration/coverage@1%": 0.09635040020423015, "calibration/coverage@10%": 0.39986044205220733, "calibration/coverage@15%": 0.6437249317552453, "calibration/coverage@20%": 0.7111178626707388, "calibration/coverage@25%": 0.7794437929301391, "calibration/coverage@30%": 0.926595744680851, "calibration/coverage@5%": 0.2446842144983858, "calibration/distribution_entropy_10": 0.6749137696780482, "calibration/distribution_entropy_100": 0.3727244117417784, "calibration/ece": 0.11950171895871634, "calibration/mean_confidence": 0.6841320759702054, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01059027777777779, "completions/max_length": 2960.2, "completions/max_terminated_length": 2960.2, "completions/mean_length": 544.8087768554688, "completions/mean_terminated_length": 550.6374694824219, "completions/min_length": 0.0, "completions/min_terminated_length": 144.8, "epoch": 0.5168269230769231, "grad_norm": 0.0010941576911136508, "learning_rate": 4.957932692307692e-06, "loss": -0.0102, "num_tokens": 439827985.0, "reward": 1.2625808477401734, "reward_std": 0.15077738016843795, "rewards/accuracy_reward": 0.718836784362793, "rewards/brier_reward": 0.8174222230911254, "rewards/confidence_one_or_zero": 0.0047743055620230734, "rewards/format_reward": 0.9888888955116272, "rewards/mean_confidence_reward": 0.6864542722702026, "sampling/batch_mean_priority_error": 0.014708333333333315, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.7027777777777777, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0006463405909016729, "sampling/priority_kl": 0.030000553280115128, "sampling/priority_scale": 0.8077474296791479, "sampling/prob_entropy": 10.278956031799316, "sampling/prob_max": 4.031913995277137e-05, "sampling/prob_min": 1.6515195602551104e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5111999988555909, "sampling/prompt_draws_total": 15336.0, "sampling/seen_fraction": 0.437773334980011, "sampling/unseen_fraction": 0.562226665019989, "signal/accuracy_reward/centered_abs_mean": 0.1398925766348839, "signal/accuracy_reward/group_std_mean": 0.18380642533302308, "signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06994628831744194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06994628831744194, "signal/advantage_abs_mean": 0.10771286487579346, "signal/advantage_pre_scale_abs_mean": 0.10771286487579346, "signal/advantage_pre_scale_std": 0.1967930257320404, "signal/advantage_std": 0.1967930257320404, "signal/brier_reward/centered_abs_mean": 0.10499090403318405, "signal/brier_reward/group_std_mean": 0.14038468152284622, "signal/brier_reward/group_zero_std_frac": 0.033333334140479565, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.052495452016592024, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.052495452016592024, "signal/confidence_one_or_zero/centered_abs_mean": 0.008479817654006183, "signal/confidence_one_or_zero/group_std_mean": 0.016532309353351593, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9305555701255799, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.479817523721067e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.479817523721067e-08, "signal/format_reward/centered_abs_mean": 0.01947699673473835, "signal/format_reward/group_std_mean": 0.037950745224952696, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009738498367369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009738498367369175, "signal/mean_confidence_reward/centered_abs_mean": 0.09180866181850433, "signal/mean_confidence_reward/group_std_mean": 0.1205675944685936, "signal/mean_confidence_reward/group_zero_std_frac": 0.0361111119389534, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 9.180865731650556e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 9.180865731650556e-07, "step": 215 }, { "calibration/aurc": 0.23897279996570475, "calibration/batch_distribution_entropy": 0.6549670881134286, "calibration/batch_entropy_100bins": 0.3524090104914431, "calibration/batch_entropy_10bins": 0.6549670881134286, "calibration/batch_entropy_50bins": 0.4148501852205088, "calibration/batch_uniqueness": 0.48453062977196665, "calibration/confidence_entropy": 0.5208603656696392, "calibration/coverage@0%": 0.032112316565714005, "calibration/coverage@1%": 0.08569321842247793, "calibration/coverage@10%": 0.08569321842247793, "calibration/coverage@15%": 0.4360667565516437, "calibration/coverage@20%": 0.4980136219467849, "calibration/coverage@25%": 0.5319574558906188, "calibration/coverage@30%": 0.6400633783411409, "calibration/coverage@5%": 0.08569321842247793, "calibration/distribution_entropy_10": 0.6549670881134286, "calibration/distribution_entropy_100": 0.3524090104914431, "calibration/ece": 0.12067979587505484, "calibration/mean_confidence": 0.7013421687699827, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008854166666666673, "completions/max_length": 2822.8, "completions/max_terminated_length": 2822.8, "completions/mean_length": 574.1637939453125, "completions/mean_terminated_length": 579.2975830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.4, "epoch": 0.5288461538461539, "grad_norm": 0.0008874834165908396, "learning_rate": 4.927884615384616e-06, "loss": -0.0088, "num_tokens": 449520944.0, "reward": 1.2366694450378417, "reward_std": 0.14118025302886963, "rewards/accuracy_reward": 0.6623263835906983, "rewards/brier_reward": 0.8200264930725097, "rewards/confidence_one_or_zero": 0.0019965277635492383, "rewards/format_reward": 0.9909722208976746, "rewards/mean_confidence_reward": 0.6875000238418579, "sampling/batch_mean_priority_error": 0.016652777777777773, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6777777777777778, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0006648902199231088, "sampling/priority_kl": 0.02999868765473366, "sampling/priority_scale": 0.8017917334800586, "sampling/prob_entropy": 10.278938484191894, "sampling/prob_max": 4.0442708268528804e-05, "sampling/prob_min": 1.667476535658352e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.523199999332428, "sampling/prompt_draws_total": 15696.0, "sampling/seen_fraction": 0.4460400104522705, "sampling/unseen_fraction": 0.5539599895477295, "signal/accuracy_reward/centered_abs_mean": 0.13765191435813903, "signal/accuracy_reward/group_std_mean": 0.179826021194458, "signal/accuracy_reward/group_zero_std_frac": 0.49444445967674255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06882595717906952, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06882595717906952, "signal/advantage_abs_mean": 0.10393836796283722, "signal/advantage_pre_scale_abs_mean": 0.10393836796283722, "signal/advantage_pre_scale_std": 0.18965348303318025, "signal/advantage_std": 0.18965348303318025, "signal/brier_reward/centered_abs_mean": 0.08609333783388137, "signal/brier_reward/group_std_mean": 0.11452355682849884, "signal/brier_reward/group_zero_std_frac": 0.06388889122754335, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043046668916940686, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.043046668916940686, "signal/confidence_one_or_zero/centered_abs_mean": 0.0036295572528615592, "signal/confidence_one_or_zero/group_std_mean": 0.00784936579875648, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9638888835906982, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.629557205897527e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.629557205897527e-08, "signal/format_reward/centered_abs_mean": 0.01592881940305233, "signal/format_reward/group_std_mean": 0.030093761160969734, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007964409701526166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007964409701526166, "signal/mean_confidence_reward/centered_abs_mean": 0.0710023283958435, "signal/mean_confidence_reward/group_std_mean": 0.09437335133552552, "signal/mean_confidence_reward/group_zero_std_frac": 0.07222222406417131, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.100233005985501e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.100233005985501e-07, "step": 220 }, { "calibration/aurc": 0.14094224662459304, "calibration/batch_distribution_entropy": 0.566623231366884, "calibration/batch_entropy_100bins": 0.30633370359683026, "calibration/batch_entropy_10bins": 0.566623231366884, "calibration/batch_entropy_50bins": 0.36061107943639037, "calibration/batch_uniqueness": 0.3183684648717434, "calibration/confidence_entropy": 0.5458095920829429, "calibration/coverage@0%": 0.08241021163558863, "calibration/coverage@1%": 0.08241021163558863, "calibration/coverage@10%": 0.3662256485440952, "calibration/coverage@15%": 0.6514003532685047, "calibration/coverage@20%": 0.7386676534884836, "calibration/coverage@25%": 0.85249343832021, "calibration/coverage@30%": 0.8771653543307087, "calibration/coverage@5%": 0.20669219596979227, "calibration/distribution_entropy_10": 0.566623231366884, "calibration/distribution_entropy_100": 0.30633370359683026, "calibration/ece": 0.12998478649698814, "calibration/mean_confidence": 0.7048597419529478, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007031250000000022, "completions/max_length": 3238.0, "completions/max_terminated_length": 3238.0, "completions/mean_length": 582.3898681640625, "completions/mean_terminated_length": 586.5300170898438, "completions/min_length": 0.0, "completions/min_terminated_length": 140.4, "epoch": 0.5408653846153846, "grad_norm": 0.000878737133461982, "learning_rate": 4.897836538461539e-06, "loss": -0.0071, "num_tokens": 459297339.0, "reward": 1.268056082725525, "reward_std": 0.14799101948738097, "rewards/accuracy_reward": 0.7123263955116272, "rewards/brier_reward": 0.8308030605316162, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.992968761920929, "rewards/mean_confidence_reward": 0.6942693829536438, "sampling/batch_mean_priority_error": 0.015236111111111112, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6083333333333334, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0006837929598987102, "sampling/priority_kl": 0.029999540001153947, "sampling/priority_scale": 0.7958873092429712, "sampling/prob_entropy": 10.278943252563476, "sampling/prob_max": 4.055707686347887e-05, "sampling/prob_min": 1.6831377070047893e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5351999998092651, "sampling/prompt_draws_total": 16056.0, "sampling/seen_fraction": 0.4536533296108246, "sampling/unseen_fraction": 0.5463466703891754, "signal/accuracy_reward/centered_abs_mean": 0.14823133498430252, "signal/accuracy_reward/group_std_mean": 0.19535186886787415, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07411566749215126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07411566749215126, "signal/advantage_abs_mean": 0.1074238434433937, "signal/advantage_pre_scale_abs_mean": 0.1074238434433937, "signal/advantage_pre_scale_std": 0.19590988755226135, "signal/advantage_std": 0.19590988755226135, "signal/brier_reward/centered_abs_mean": 0.08263461142778397, "signal/brier_reward/group_std_mean": 0.11035842001438141, "signal/brier_reward/group_zero_std_frac": 0.10000000223517418, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.041317305713891986, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.041317305713891986, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006510416511446238, "signal/confidence_one_or_zero/group_std_mean": 0.0013663037680089474, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.510416739047286e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.510416739047286e-09, "signal/format_reward/centered_abs_mean": 0.012972005270421505, "signal/format_reward/group_std_mean": 0.029025893285870553, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006486002635210752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006486002635210752, "signal/mean_confidence_reward/centered_abs_mean": 0.06351888552308083, "signal/mean_confidence_reward/group_std_mean": 0.08457329869270325, "signal/mean_confidence_reward/group_zero_std_frac": 0.1361111119389534, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.351887918754074e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.351887918754074e-07, "step": 225 }, { "calibration/aurc": 0.27389093170088574, "calibration/batch_distribution_entropy": 0.6147581307636807, "calibration/batch_entropy_100bins": 0.35248461902091915, "calibration/batch_entropy_10bins": 0.6147581307636807, "calibration/batch_entropy_50bins": 0.4149391903580725, "calibration/batch_uniqueness": 0.53075158986908, "calibration/confidence_entropy": 0.6116111945484981, "calibration/coverage@0%": 0.16600427324666853, "calibration/coverage@1%": 0.16600427324666853, "calibration/coverage@10%": 0.27494610286396765, "calibration/coverage@15%": 0.3732418053072486, "calibration/coverage@20%": 0.525133256149047, "calibration/coverage@25%": 0.5403165022223455, "calibration/coverage@30%": 0.6074272857188354, "calibration/coverage@5%": 0.2184861334049799, "calibration/distribution_entropy_10": 0.6147581307636807, "calibration/distribution_entropy_100": 0.35248461902091915, "calibration/ece": 0.1604331230059492, "calibration/mean_confidence": 0.6082048790180217, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333325, "completions/max_length": 3444.4, "completions/max_terminated_length": 3444.4, "completions/mean_length": 609.4268188476562, "completions/mean_terminated_length": 615.0386962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.5528846153846154, "grad_norm": 0.0006518294103443623, "learning_rate": 4.867788461538462e-06, "loss": -0.009, "num_tokens": 469448784.0, "reward": 1.2289212703704835, "reward_std": 0.1348082423210144, "rewards/accuracy_reward": 0.6572916746139527, "rewards/brier_reward": 0.8097397804260253, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9907986044883728, "rewards/mean_confidence_reward": 0.6189192771911621, "sampling/batch_mean_priority_error": 0.018409722222222223, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6194444444444444, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0007028119522146881, "sampling/priority_kl": 0.029999968782067298, "sampling/priority_scale": 0.7902504027122632, "sampling/prob_entropy": 10.27895679473877, "sampling/prob_max": 4.067026457050815e-05, "sampling/prob_min": 1.6983808018267153e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5472000002861023, "sampling/prompt_draws_total": 16416.0, "sampling/seen_fraction": 0.4611199975013733, "sampling/unseen_fraction": 0.5388800024986267, "signal/accuracy_reward/centered_abs_mean": 0.1420247435569763, "signal/accuracy_reward/group_std_mean": 0.18404721319675446, "signal/accuracy_reward/group_zero_std_frac": 0.4805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07101237177848815, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07101237177848815, "signal/advantage_abs_mean": 0.09798679202795028, "signal/advantage_pre_scale_abs_mean": 0.09798679202795028, "signal/advantage_pre_scale_std": 0.1785242438316345, "signal/advantage_std": 0.1785242438316345, "signal/brier_reward/centered_abs_mean": 0.07310768216848373, "signal/brier_reward/group_std_mean": 0.09774392247200012, "signal/brier_reward/group_zero_std_frac": 0.04722222276031971, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03655384108424187, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03655384108424187, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.016503906436264516, "signal/format_reward/group_std_mean": 0.0324388038367033, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008251953218132258, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008251953218132258, "signal/mean_confidence_reward/centered_abs_mean": 0.06303982138633728, "signal/mean_confidence_reward/group_std_mean": 0.08338831216096879, "signal/mean_confidence_reward/group_zero_std_frac": 0.05000000055879354, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.30398199064075e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.30398199064075e-07, "step": 230 }, { "calibration/aurc": 0.1102212275482241, "calibration/batch_distribution_entropy": 0.6097776666063451, "calibration/batch_entropy_100bins": 0.3411749959336878, "calibration/batch_entropy_10bins": 0.6097776666063451, "calibration/batch_entropy_50bins": 0.401625685047952, "calibration/batch_uniqueness": 0.4715644556634209, "calibration/confidence_entropy": 0.5820979355769902, "calibration/coverage@0%": 0.026701570680628273, "calibration/coverage@1%": 0.026701570680628273, "calibration/coverage@10%": 0.5605327597168789, "calibration/coverage@15%": 0.6912604228899767, "calibration/coverage@20%": 0.856343607976538, "calibration/coverage@25%": 0.9007216618645737, "calibration/coverage@30%": 0.9732620320855615, "calibration/coverage@5%": 0.3460591668717388, "calibration/distribution_entropy_10": 0.6097776666063451, "calibration/distribution_entropy_100": 0.3411749959336878, "calibration/ece": 0.13206102299034156, "calibration/mean_confidence": 0.6639744295390552, "calibration/unique_confidence_per_question": 0.018229166666666668, "calibration/unique_confidences": 7.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008854166666666673, "completions/max_length": 3477.6, "completions/max_terminated_length": 3477.6, "completions/mean_length": 614.6240356445312, "completions/mean_terminated_length": 620.1178588867188, "completions/min_length": 0.0, "completions/min_terminated_length": 176.2, "epoch": 0.5649038461538461, "grad_norm": 0.0005726331728510559, "learning_rate": 4.837740384615385e-06, "loss": -0.0107, "num_tokens": 479633509.0, "reward": 1.2486154317855835, "reward_std": 0.13345372974872588, "rewards/accuracy_reward": 0.6847222089767456, "rewards/brier_reward": 0.8213493824005127, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9911458253860473, "rewards/mean_confidence_reward": 0.6699392318725585, "sampling/batch_mean_priority_error": 0.02002777777777778, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6583333333333334, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0007279346697032452, "sampling/priority_kl": 0.030000114068388938, "sampling/priority_scale": 0.7850202381843701, "sampling/prob_entropy": 10.278967475891113, "sampling/prob_max": 4.078907732036896e-05, "sampling/prob_min": 1.713214842311572e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5592000126838684, "sampling/prompt_draws_total": 16776.0, "sampling/seen_fraction": 0.46881999969482424, "sampling/unseen_fraction": 0.5311800003051758, "signal/accuracy_reward/centered_abs_mean": 0.13311631977558136, "signal/accuracy_reward/group_std_mean": 0.17697255909442902, "signal/accuracy_reward/group_zero_std_frac": 0.49444445967674255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06655815988779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06655815988779068, "signal/advantage_abs_mean": 0.09503975510597229, "signal/advantage_pre_scale_abs_mean": 0.09503975510597229, "signal/advantage_pre_scale_std": 0.18311896324157714, "signal/advantage_std": 0.18311896324157714, "signal/brier_reward/centered_abs_mean": 0.06759776845574379, "signal/brier_reward/group_std_mean": 0.09351931512355804, "signal/brier_reward/group_zero_std_frac": 0.1083333358168602, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033798884227871896, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033798884227871896, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.015733507089316846, "signal/format_reward/group_std_mean": 0.03246893137693405, "signal/format_reward/group_zero_std_frac": 0.8555555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007866753544658423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007866753544658423, "signal/mean_confidence_reward/centered_abs_mean": 0.0571609228849411, "signal/mean_confidence_reward/group_std_mean": 0.07677199840545654, "signal/mean_confidence_reward/group_zero_std_frac": 0.11944444626569747, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.716092005059181e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.716092005059181e-07, "step": 235 }, { "calibration/aurc": 0.1632135963960027, "calibration/batch_distribution_entropy": 0.4861958787539655, "calibration/batch_entropy_100bins": 0.26268981924080526, "calibration/batch_entropy_10bins": 0.4861958787539655, "calibration/batch_entropy_50bins": 0.3092342049246103, "calibration/batch_uniqueness": 0.09928411464403394, "calibration/confidence_entropy": 0.5340064855582761, "calibration/coverage@0%": 0.012550408048883846, "calibration/coverage@1%": 0.012550408048883846, "calibration/coverage@10%": 0.28228749390998803, "calibration/coverage@15%": 0.5240212071014891, "calibration/coverage@20%": 0.6545321803971482, "calibration/coverage@25%": 0.8324212202344337, "calibration/coverage@30%": 0.9084603502248711, "calibration/coverage@5%": 0.05803197436651038, "calibration/distribution_entropy_10": 0.4861958787539655, "calibration/distribution_entropy_100": 0.26268981924080526, "calibration/ece": 0.12853953979598606, "calibration/mean_confidence": 0.7273770341281883, "calibration/unique_confidence_per_question": 0.019791666666666666, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007638888888888884, "completions/max_length": 3554.2, "completions/max_terminated_length": 3554.2, "completions/mean_length": 640.9253662109375, "completions/mean_terminated_length": 645.88857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 175.4, "epoch": 0.5769230769230769, "grad_norm": 0.0005463262787088752, "learning_rate": 4.807692307692308e-06, "loss": -0.0075, "num_tokens": 490094857.0, "reward": 1.2511736631393433, "reward_std": 0.14524880647659302, "rewards/accuracy_reward": 0.6892361044883728, "rewards/brier_reward": 0.8207356572151184, "rewards/confidence_one_or_zero": 0.0006076389050576836, "rewards/format_reward": 0.9923611164093018, "rewards/mean_confidence_reward": 0.7208593845367431, "sampling/batch_mean_priority_error": 0.018305555555555554, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6611111111111111, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0007488534669391811, "sampling/priority_kl": 0.03000020459294319, "sampling/priority_scale": 0.7801542580360546, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 4.091102891834453e-05, "sampling/prob_min": 1.7276007201871835e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5711999893188476, "sampling/prompt_draws_total": 17136.0, "sampling/seen_fraction": 0.4765933334827423, "sampling/unseen_fraction": 0.5234066665172576, "signal/accuracy_reward/centered_abs_mean": 0.1394205719232559, "signal/accuracy_reward/group_std_mean": 0.18355306684970857, "signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06971028596162795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06971028596162795, "signal/advantage_abs_mean": 0.10699738562107086, "signal/advantage_pre_scale_abs_mean": 0.10699738562107086, "signal/advantage_pre_scale_std": 0.1997263699769974, "signal/advantage_std": 0.1997263699769974, "signal/brier_reward/centered_abs_mean": 0.08055151253938675, "signal/brier_reward/group_std_mean": 0.10744158029556275, "signal/brier_reward/group_zero_std_frac": 0.15555555671453475, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040275756269693375, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.040275756269693375, "signal/confidence_one_or_zero/centered_abs_mean": 0.0010145399603061378, "signal/confidence_one_or_zero/group_std_mean": 0.0015925956889986993, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0145398476879563e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0145398476879563e-08, "signal/format_reward/centered_abs_mean": 0.01343315988779068, "signal/format_reward/group_std_mean": 0.027276124432682992, "signal/format_reward/group_zero_std_frac": 0.8805555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00671657994389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00671657994389534, "signal/mean_confidence_reward/centered_abs_mean": 0.053337682783603665, "signal/mean_confidence_reward/group_std_mean": 0.07268032133579254, "signal/mean_confidence_reward/group_zero_std_frac": 0.1916666656732559, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.333767774118314e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.333767774118314e-07, "step": 240 }, { "calibration/aurc": 0.09553962138261952, "calibration/batch_distribution_entropy": 0.6930865139911127, "calibration/batch_entropy_100bins": 0.3858514920175591, "calibration/batch_entropy_10bins": 0.6930865139911127, "calibration/batch_entropy_50bins": 0.45421813337823486, "calibration/batch_uniqueness": 0.5950436333597635, "calibration/confidence_entropy": 0.5898984752654866, "calibration/coverage@0%": 0.09646038175943286, "calibration/coverage@1%": 0.21339159869064978, "calibration/coverage@10%": 0.592138100501477, "calibration/coverage@15%": 0.7390341236991794, "calibration/coverage@20%": 0.850719112347446, "calibration/coverage@25%": 0.9280562517404622, "calibration/coverage@30%": 0.962962962962963, "calibration/coverage@5%": 0.48448592927246625, "calibration/distribution_entropy_10": 0.6930865139911127, "calibration/distribution_entropy_100": 0.3858514920175591, "calibration/ece": 0.14864530817334146, "calibration/mean_confidence": 0.6301486821970961, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555559, "completions/max_length": 3296.6, "completions/max_terminated_length": 3296.6, "completions/mean_length": 649.9940307617187, "completions/mean_terminated_length": 657.7092407226562, "completions/min_length": 0.0, "completions/min_terminated_length": 177.4, "epoch": 0.5889423076923077, "grad_norm": 0.00048271266859956086, "learning_rate": 4.777644230769231e-06, "loss": -0.0127, "num_tokens": 500675460.0, "reward": 1.2657977104187013, "reward_std": 0.13289434760808944, "rewards/accuracy_reward": 0.7050347089767456, "rewards/brier_reward": 0.8383535146713257, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9881944298744202, "rewards/mean_confidence_reward": 0.6343446135520935, "sampling/batch_mean_priority_error": 0.020006944444444442, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6694444444444445, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0007706952514126897, "sampling/priority_kl": 0.02999957129359245, "sampling/priority_scale": 0.7756844341056421, "sampling/prob_entropy": 10.278955841064453, "sampling/prob_max": 4.103910687263124e-05, "sampling/prob_min": 1.7415898764738812e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5832000017166138, "sampling/prompt_draws_total": 17496.0, "sampling/seen_fraction": 0.4846266686916351, "sampling/unseen_fraction": 0.5153733313083648, "signal/accuracy_reward/centered_abs_mean": 0.13336588442325592, "signal/accuracy_reward/group_std_mean": 0.17644108831882477, "signal/accuracy_reward/group_zero_std_frac": 0.4944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06668294221162796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06668294221162796, "signal/advantage_abs_mean": 0.09448128640651703, "signal/advantage_pre_scale_abs_mean": 0.09448128640651703, "signal/advantage_pre_scale_std": 0.17919619977474213, "signal/advantage_std": 0.17919619977474213, "signal/brier_reward/centered_abs_mean": 0.07075202167034149, "signal/brier_reward/group_std_mean": 0.09702449142932892, "signal/brier_reward/group_zero_std_frac": 0.030555556155741216, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035376010835170744, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035376010835170744, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02027994804084301, "signal/format_reward/group_std_mean": 0.03983944281935692, "signal/format_reward/group_zero_std_frac": 0.8305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010139974020421504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010139974020421504, "signal/mean_confidence_reward/centered_abs_mean": 0.06342665180563926, "signal/mean_confidence_reward/group_std_mean": 0.08341833353042602, "signal/mean_confidence_reward/group_zero_std_frac": 0.030555556155741216, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.342664960357069e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.342664960357069e-07, "step": 245 }, { "calibration/aurc": 0.16545562957720988, "calibration/batch_distribution_entropy": 0.6516094600536402, "calibration/batch_entropy_100bins": 0.3457829111167018, "calibration/batch_entropy_10bins": 0.6516094600536402, "calibration/batch_entropy_50bins": 0.40705004824595303, "calibration/batch_uniqueness": 0.46248375969187416, "calibration/confidence_entropy": 0.5245775845540271, "calibration/coverage@0%": 0.007853403141361256, "calibration/coverage@1%": 0.007853403141361256, "calibration/coverage@10%": 0.35422205268554696, "calibration/coverage@15%": 0.5339711789098691, "calibration/coverage@20%": 0.634497870918797, "calibration/coverage@25%": 0.6957461049419, "calibration/coverage@30%": 0.9298429319371728, "calibration/coverage@5%": 0.13723436933694882, "calibration/distribution_entropy_10": 0.6516094600536402, "calibration/distribution_entropy_100": 0.3457829111167018, "calibration/ece": 0.11143773320785884, "calibration/mean_confidence": 0.7164784939762043, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01215277777777779, "completions/max_length": 3935.2, "completions/max_terminated_length": 3935.2, "completions/mean_length": 682.9305541992187, "completions/mean_terminated_length": 691.3621459960938, "completions/min_length": 0.0, "completions/min_terminated_length": 178.2, "epoch": 0.6009615384615384, "grad_norm": 0.0005736547173000872, "learning_rate": 4.747596153846154e-06, "loss": -0.0129, "num_tokens": 511674948.0, "reward": 1.2450485229492188, "reward_std": 0.15984397232532502, "rewards/accuracy_reward": 0.6749131798744201, "rewards/brier_reward": 0.8274962782859803, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9876736164093017, "rewards/mean_confidence_reward": 0.6966883778572083, "sampling/batch_mean_priority_error": 0.014520833333333325, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.638888888888889, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0007915808353573083, "sampling/priority_kl": 0.029998910427093507, "sampling/priority_scale": 0.7715063988929615, "sampling/prob_entropy": 10.27894859313965, "sampling/prob_max": 4.116657073609531e-05, "sampling/prob_min": 1.7544802904012614e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.5952000021934509, "sampling/prompt_draws_total": 17856.0, "sampling/seen_fraction": 0.4925266742706299, "sampling/unseen_fraction": 0.5074733257293701, "signal/accuracy_reward/centered_abs_mean": 0.15193684995174409, "signal/accuracy_reward/group_std_mean": 0.1979072332382202, "signal/accuracy_reward/group_zero_std_frac": 0.4416666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07596842497587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07596842497587204, "signal/advantage_abs_mean": 0.11579143851995469, "signal/advantage_pre_scale_abs_mean": 0.11579143851995469, "signal/advantage_pre_scale_std": 0.20864003598690034, "signal/advantage_std": 0.20864003598690034, "signal/brier_reward/centered_abs_mean": 0.09156632274389268, "signal/brier_reward/group_std_mean": 0.12338759154081344, "signal/brier_reward/group_zero_std_frac": 0.06388888973742723, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04578316137194634, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04578316137194634, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02147352434694767, "signal/format_reward/group_std_mean": 0.0425883911550045, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010736762173473835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010736762173473835, "signal/mean_confidence_reward/centered_abs_mean": 0.07210828363895416, "signal/mean_confidence_reward/group_std_mean": 0.0960259661078453, "signal/mean_confidence_reward/group_zero_std_frac": 0.08333333432674409, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.210828130155278e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.210828130155278e-07, "step": 250 }, { "epoch": 0.6009615384615384, "eval_calibration/aurc": 0.1658275768643548, "eval_calibration/batch_distribution_entropy": 0.653935370011339, "eval_calibration/batch_entropy_100bins": 0.3449556613935705, "eval_calibration/batch_entropy_10bins": 0.653935370011339, "eval_calibration/batch_entropy_50bins": 0.40607622325667125, "eval_calibration/batch_uniqueness": 0.4734441366574332, "eval_calibration/confidence_entropy": 0.4784185724453361, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.35175438596491226, "eval_calibration/coverage@15%": 0.35175438596491226, "eval_calibration/coverage@20%": 0.6894736842105263, "eval_calibration/coverage@25%": 0.8719298245614036, "eval_calibration/coverage@30%": 0.9780701754385965, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.653935370011339, "eval_calibration/distribution_entropy_100": 0.3449556613935705, "eval_calibration/ece": 0.07307017543859642, "eval_calibration/mean_confidence": 0.7571052631578947, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.009548611111111105, "eval_completions/max_length": 2102.1666666666665, "eval_completions/max_terminated_length": 2102.1666666666665, "eval_completions/mean_length": 644.3861287434896, "eval_completions/mean_terminated_length": 650.6084696451823, "eval_completions/min_length": 54.5, "eval_completions/min_terminated_length": 229.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 511674948.0, "eval_reward": 1.2462879220644634, "eval_reward_std": 0.348220556974411, "eval_rewards/accuracy_reward": 0.6822916666666666, "eval_rewards/brier_reward": 0.8206857641537985, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9895833333333334, "eval_rewards/mean_confidence_reward": 0.7492187718550364, "eval_runtime": 203.1518, "eval_samples_per_second": 4.922, "eval_signal/accuracy_reward/centered_abs_mean": 0.4181857605775197, "eval_signal/accuracy_reward/group_std_mean": 0.4631023903687795, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20909288028875986, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20909288028875986, "eval_signal/advantage_abs_mean": 0.30280476808547974, "eval_signal/advantage_pre_scale_abs_mean": 0.30280476808547974, "eval_signal/advantage_pre_scale_std": 0.34675098955631256, "eval_signal/advantage_std": 0.34675098955631256, "eval_signal/brier_reward/centered_abs_mean": 0.19453559319178262, "eval_signal/brier_reward/group_std_mean": 0.24338269233703613, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09726779659589131, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.09726779659589131, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.020182291356225807, "eval_signal/format_reward/group_std_mean": 0.05892556471129259, "eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010091145678112904, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.010091145678112904, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.13532442972064018, "eval_signal/mean_confidence_reward/group_std_mean": 0.18789123992125192, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.3532442532474913e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.3532442532474913e-06, "eval_steps_per_second": 0.03, "step": 250 }, { "epoch": 0.6009615384615384, "step": 250, "train_probe_calibration/aurc": 0.13297189810899127, "train_probe_calibration/batch_distribution_entropy": 0.6515029645632585, "train_probe_calibration/batch_entropy_100bins": 0.3455663791755371, "train_probe_calibration/batch_entropy_10bins": 0.6515029645632585, "train_probe_calibration/batch_entropy_50bins": 0.40679515034827146, "train_probe_calibration/batch_uniqueness": 0.4740345275528414, "train_probe_calibration/confidence_entropy": 0.4782895608513383, "train_probe_calibration/coverage@0%": 0.0008710801393728223, "train_probe_calibration/coverage@1%": 0.0008710801393728223, "train_probe_calibration/coverage@10%": 0.35714285714285715, "train_probe_calibration/coverage@15%": 0.6933797909407665, "train_probe_calibration/coverage@20%": 0.8179442508710801, "train_probe_calibration/coverage@25%": 0.8945993031358885, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0008710801393728223, "train_probe_calibration/distribution_entropy_10": 0.6515029645632585, "train_probe_calibration/distribution_entropy_100": 0.3455663791755371, "train_probe_calibration/ece": 0.06977351916376293, "train_probe_calibration/mean_confidence": 0.7554878048780488, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.0034722222222222467, "train_probe_completions/max_length": 2169.3333333333335, "train_probe_completions/max_terminated_length": 2169.3333333333335, "train_probe_completions/mean_length": 656.5750223795573, "train_probe_completions/mean_terminated_length": 658.8591105143229, "train_probe_completions/min_length": 58.833333333333336, "train_probe_completions/min_terminated_length": 201.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 511674948.0, "train_probe_reward": 1.277138610680898, "train_probe_reward_std": 0.3227684100468953, "train_probe_rewards/accuracy_reward": 0.7161458333333334, "train_probe_rewards/brier_reward": 0.8415885468324026, "train_probe_rewards/confidence_one_or_zero": 0.0008680555814256271, "train_probe_rewards/format_reward": 0.9965277910232544, "train_probe_rewards/mean_confidence_reward": 0.7528645694255829, "train_probe_runtime": 166.5968, "train_probe_samples_per_second": 6.003, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3948025157054265, "train_probe_signal/accuracy_reward/group_std_mean": 0.4493370900551478, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19740125785271326, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19740125785271326, "train_probe_signal/advantage_abs_mean": 0.27557384471098584, "train_probe_signal/advantage_pre_scale_abs_mean": 0.27557384471098584, "train_probe_signal/advantage_pre_scale_std": 0.32040198644002277, "train_probe_signal/advantage_std": 0.32040198644002277, "train_probe_signal/brier_reward/centered_abs_mean": 0.17354112366835275, "train_probe_signal/brier_reward/group_std_mean": 0.22265213479598364, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08677056183417638, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08677056183417638, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.006727430348594983, "train_probe_signal/format_reward/group_std_mean": 0.019641855110724766, "train_probe_signal/format_reward/group_zero_std_frac": 0.8888889153798422, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0033637151742974916, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0033637151742974916, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1332410884400209, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.17862537999947867, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.332410799174492e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.332410799174492e-06, "train_probe_steps_per_second": 0.036 }, { "calibration/aurc": 0.11053866199233855, "calibration/batch_distribution_entropy": 0.6553593655311809, "calibration/batch_entropy_100bins": 0.3494508559530446, "calibration/batch_entropy_10bins": 0.6553593655311809, "calibration/batch_entropy_50bins": 0.41136789356044556, "calibration/batch_uniqueness": 0.47738488620607616, "calibration/confidence_entropy": 0.49347215678980705, "calibration/coverage@0%": 0.0010498687664041995, "calibration/coverage@1%": 0.0010498687664041995, "calibration/coverage@10%": 0.6203634476385791, "calibration/coverage@15%": 0.7494999706104414, "calibration/coverage@20%": 0.7756438133090919, "calibration/coverage@25%": 0.8393311222141723, "calibration/coverage@30%": 0.9272251308900523, "calibration/coverage@5%": 0.39120135439401754, "calibration/distribution_entropy_10": 0.6553593655311809, "calibration/distribution_entropy_100": 0.3494508559530446, "calibration/ece": 0.15268235447490713, "calibration/mean_confidence": 0.7408424220083726, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00894097222222221, "completions/max_length": 3785.2, "completions/max_terminated_length": 3785.2, "completions/mean_length": 660.906689453125, "completions/mean_terminated_length": 666.9714111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 168.6, "epoch": 0.6129807692307693, "grad_norm": 0.00047295857802964747, "learning_rate": 4.7175480769230775e-06, "loss": -0.0093, "num_tokens": 522387505.0, "reward": 1.2695069313049316, "reward_std": 0.14805976301431656, "rewards/accuracy_reward": 0.7131944537162781, "rewards/brier_reward": 0.8347456574440002, "rewards/confidence_one_or_zero": 0.0005208333546761424, "rewards/format_reward": 0.9910590410232544, "rewards/mean_confidence_reward": 0.7380295157432556, "sampling/batch_mean_priority_error": 0.011499999999999993, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6277777777777779, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0008047144394367934, "sampling/priority_kl": 0.03000001311302185, "sampling/priority_scale": 0.7673150241142139, "sampling/prob_entropy": 10.278948783874512, "sampling/prob_max": 4.1286813939223066e-05, "sampling/prob_min": 1.7676256538834424e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6072000026702881, "sampling/prompt_draws_total": 18216.0, "sampling/seen_fraction": 0.49989998936653135, "sampling/unseen_fraction": 0.5001000106334687, "signal/accuracy_reward/centered_abs_mean": 0.1382269948720932, "signal/accuracy_reward/group_std_mean": 0.1822678416967392, "signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0691134974360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0691134974360466, "signal/advantage_abs_mean": 0.10847936570644379, "signal/advantage_pre_scale_abs_mean": 0.10847936570644379, "signal/advantage_pre_scale_std": 0.2013978600502014, "signal/advantage_std": 0.2013978600502014, "signal/brier_reward/centered_abs_mean": 0.08796882182359696, "signal/brier_reward/group_std_mean": 0.11736268252134323, "signal/brier_reward/group_zero_std_frac": 0.06666666846722365, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04398441091179848, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04398441091179848, "signal/confidence_one_or_zero/centered_abs_mean": 0.0009874132345430553, "signal/confidence_one_or_zero/group_std_mean": 0.0023483963683247565, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.874132445020223e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.874132445020223e-09, "signal/format_reward/centered_abs_mean": 0.01550021693110466, "signal/format_reward/group_std_mean": 0.030670232325792312, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00775010846555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00775010846555233, "signal/mean_confidence_reward/centered_abs_mean": 0.06791991218924523, "signal/mean_confidence_reward/group_std_mean": 0.0899539515376091, "signal/mean_confidence_reward/group_zero_std_frac": 0.0750000024214387, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.791991154386778e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.791991154386778e-07, "step": 255 }, { "calibration/aurc": 0.13061344417676773, "calibration/batch_distribution_entropy": 0.7119939536306921, "calibration/batch_entropy_100bins": 0.3947636348685257, "calibration/batch_entropy_10bins": 0.7119939536306921, "calibration/batch_entropy_50bins": 0.4647093637451295, "calibration/batch_uniqueness": 0.6090274589011522, "calibration/confidence_entropy": 0.5537789059431736, "calibration/coverage@0%": 0.0994236422574895, "calibration/coverage@1%": 0.0994236422574895, "calibration/coverage@10%": 0.390270058015489, "calibration/coverage@15%": 0.6097525669281451, "calibration/coverage@20%": 0.7780259450593018, "calibration/coverage@25%": 0.8862583121109224, "calibration/coverage@30%": 0.9415506508205999, "calibration/coverage@5%": 0.31689589896770565, "calibration/distribution_entropy_10": 0.7119939536306921, "calibration/distribution_entropy_100": 0.3947636348685257, "calibration/ece": 0.1114447297112239, "calibration/mean_confidence": 0.6725977867976177, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008072916666666697, "completions/max_length": 2881.2, "completions/max_terminated_length": 2881.2, "completions/mean_length": 658.5054809570313, "completions/mean_terminated_length": 663.83076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.625, "grad_norm": 0.0004797276924364269, "learning_rate": 4.6875000000000004e-06, "loss": -0.0062, "num_tokens": 533054992.0, "reward": 1.279056715965271, "reward_std": 0.14249440133571625, "rewards/accuracy_reward": 0.7252604126930237, "rewards/brier_reward": 0.8409123301506043, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9919270753860474, "rewards/mean_confidence_reward": 0.6728732585906982, "sampling/batch_mean_priority_error": 0.013777777777777772, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6083333333333334, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0008190826396457851, "sampling/priority_kl": 0.029998597875237464, "sampling/priority_scale": 0.7635116636054591, "sampling/prob_entropy": 10.27892894744873, "sampling/prob_max": 4.141414974583313e-05, "sampling/prob_min": 1.780548664100934e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6192000031471252, "sampling/prompt_draws_total": 18576.0, "sampling/seen_fraction": 0.5075666785240174, "sampling/unseen_fraction": 0.4924333214759827, "signal/accuracy_reward/centered_abs_mean": 0.14213866889476776, "signal/accuracy_reward/group_std_mean": 0.19297046065330506, "signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07106933444738388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07106933444738388, "signal/advantage_abs_mean": 0.10037857741117477, "signal/advantage_pre_scale_abs_mean": 0.10037857741117477, "signal/advantage_pre_scale_std": 0.18658272922039032, "signal/advantage_std": 0.18658272922039032, "signal/brier_reward/centered_abs_mean": 0.08256456702947616, "signal/brier_reward/group_std_mean": 0.11133366227149963, "signal/brier_reward/group_zero_std_frac": 0.01666666679084301, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04128228351473808, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04128228351473808, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006618923507630825, "signal/confidence_one_or_zero/group_std_mean": 0.0016652445774525404, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.618923364953844e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.618923364953844e-09, "signal/format_reward/centered_abs_mean": 0.014415147714316845, "signal/format_reward/group_std_mean": 0.02902185283601284, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0072075738571584225, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0072075738571584225, "signal/mean_confidence_reward/centered_abs_mean": 0.07456976771354676, "signal/mean_confidence_reward/group_std_mean": 0.09588464945554734, "signal/mean_confidence_reward/group_zero_std_frac": 0.01666666679084301, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.456976504727209e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.456976504727209e-07, "step": 260 }, { "calibration/aurc": 0.18852691153159343, "calibration/batch_distribution_entropy": 0.6839669052669889, "calibration/batch_entropy_100bins": 0.3899278269620705, "calibration/batch_entropy_10bins": 0.6839669052669889, "calibration/batch_entropy_50bins": 0.459016728920367, "calibration/batch_uniqueness": 0.5566917367060518, "calibration/confidence_entropy": 0.508166540858328, "calibration/coverage@0%": 0.007313596491228071, "calibration/coverage@1%": 0.007313596491228071, "calibration/coverage@10%": 0.2226528216755989, "calibration/coverage@15%": 0.4099509467330068, "calibration/coverage@20%": 0.5052252704677362, "calibration/coverage@25%": 0.7828199512629264, "calibration/coverage@30%": 0.8094723100178646, "calibration/coverage@5%": 0.17213198834226556, "calibration/distribution_entropy_10": 0.6839669052669889, "calibration/distribution_entropy_100": 0.3899278269620705, "calibration/ece": 0.12156555954972767, "calibration/mean_confidence": 0.6929760522571534, "calibration/unique_confidence_per_question": 0.023958333333333335, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3458.6, "completions/max_terminated_length": 3458.6, "completions/mean_length": 680.2973999023437, "completions/mean_terminated_length": 685.6692993164063, "completions/min_length": 0.0, "completions/min_terminated_length": 210.2, "epoch": 0.6370192307692307, "grad_norm": 0.00045690243132412434, "learning_rate": 4.657451923076923e-06, "loss": -0.0076, "num_tokens": 543990706.0, "reward": 1.2635553359985352, "reward_std": 0.13125103265047072, "rewards/accuracy_reward": 0.6978298544883728, "rewards/brier_reward": 0.8370790004730224, "rewards/confidence_one_or_zero": 0.009027777757728473, "rewards/format_reward": 0.9921875, "rewards/mean_confidence_reward": 0.7020920038223266, "sampling/batch_mean_priority_error": 0.011541666666666662, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6444444444444445, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0008350071730092168, "sampling/priority_kl": 0.029999512434005737, "sampling/priority_scale": 0.7598387658363208, "sampling/prob_entropy": 10.278968620300294, "sampling/prob_max": 4.154030175413936e-05, "sampling/prob_min": 1.7932389891939237e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6311999917030334, "sampling/prompt_draws_total": 18936.0, "sampling/seen_fraction": 0.5150799989700318, "sampling/unseen_fraction": 0.48492000102996824, "signal/accuracy_reward/centered_abs_mean": 0.12266167551279068, "signal/accuracy_reward/group_std_mean": 0.16602103412151337, "signal/accuracy_reward/group_zero_std_frac": 0.5166666746139527, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06133083775639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06133083775639534, "signal/advantage_abs_mean": 0.0934511199593544, "signal/advantage_pre_scale_abs_mean": 0.0934511199593544, "signal/advantage_pre_scale_std": 0.17902643084526063, "signal/advantage_std": 0.17902643084526063, "signal/brier_reward/centered_abs_mean": 0.0809135839343071, "signal/brier_reward/group_std_mean": 0.10942707508802414, "signal/brier_reward/group_zero_std_frac": 0.02500000037252903, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04045679196715355, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04045679196715355, "signal/confidence_one_or_zero/centered_abs_mean": 0.01451822918606922, "signal/confidence_one_or_zero/group_std_mean": 0.02576328720897436, "signal/confidence_one_or_zero/group_zero_std_frac": 0.8972222208976746, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.451822868148156e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.451822868148156e-07, "signal/format_reward/centered_abs_mean": 0.013216145616024733, "signal/format_reward/group_std_mean": 0.026454249024391176, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006608072808012367, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006608072808012367, "signal/mean_confidence_reward/centered_abs_mean": 0.07293184846639633, "signal/mean_confidence_reward/group_std_mean": 0.09473260641098022, "signal/mean_confidence_reward/group_zero_std_frac": 0.02500000037252903, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.293184808077058e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.293184808077058e-07, "step": 265 }, { "calibration/aurc": 0.1961220920497752, "calibration/batch_distribution_entropy": 0.5809251673671703, "calibration/batch_entropy_100bins": 0.34474063958986223, "calibration/batch_entropy_10bins": 0.5809251673671703, "calibration/batch_entropy_50bins": 0.4058231030683695, "calibration/batch_uniqueness": 0.3972966344560953, "calibration/confidence_entropy": 0.438974753112911, "calibration/coverage@0%": 0.011518324607329843, "calibration/coverage@1%": 0.11727748691099475, "calibration/coverage@10%": 0.1649214659685864, "calibration/coverage@15%": 0.4003130425273058, "calibration/coverage@20%": 0.5895827851102029, "calibration/coverage@25%": 0.6401852823306403, "calibration/coverage@30%": 0.8300192746708952, "calibration/coverage@5%": 0.1324607329842932, "calibration/distribution_entropy_10": 0.5809251673671703, "calibration/distribution_entropy_100": 0.34474063958986223, "calibration/ece": 0.13466166017912237, "calibration/mean_confidence": 0.759952178567523, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888884, "completions/max_length": 3589.8, "completions/max_terminated_length": 3589.8, "completions/mean_length": 707.28369140625, "completions/mean_terminated_length": 711.668896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 252.8, "epoch": 0.6490384615384616, "grad_norm": 0.0005051839398220181, "learning_rate": 4.627403846153847e-06, "loss": -0.0071, "num_tokens": 555252822.0, "reward": 1.2621881484985351, "reward_std": 0.14973460435867308, "rewards/accuracy_reward": 0.6929687619209289, "rewards/brier_reward": 0.8376414775848389, "rewards/confidence_one_or_zero": 0.05598958358168602, "rewards/format_reward": 0.99375, "rewards/mean_confidence_reward": 0.7462586879730224, "sampling/batch_mean_priority_error": 0.016298611111111104, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5972222222222222, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0008497483097016812, "sampling/priority_kl": 0.030001790821552278, "sampling/priority_scale": 0.7562869013054296, "sampling/prob_entropy": 10.278965950012207, "sampling/prob_max": 4.166323342360556e-05, "sampling/prob_min": 1.8056220142170787e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6432000041007996, "sampling/prompt_draws_total": 19296.0, "sampling/seen_fraction": 0.5222799897193908, "sampling/unseen_fraction": 0.4777200102806091, "signal/accuracy_reward/centered_abs_mean": 0.14156358540058137, "signal/accuracy_reward/group_std_mean": 0.19307936429977418, "signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07078179270029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07078179270029068, "signal/advantage_abs_mean": 0.10628217607736587, "signal/advantage_pre_scale_abs_mean": 0.10628217607736587, "signal/advantage_pre_scale_std": 0.19850011467933654, "signal/advantage_std": 0.19850011467933654, "signal/brier_reward/centered_abs_mean": 0.09157379269599915, "signal/brier_reward/group_std_mean": 0.12708270996809007, "signal/brier_reward/group_zero_std_frac": 0.0305555559694767, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.045786896347999574, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.045786896347999574, "signal/confidence_one_or_zero/centered_abs_mean": 0.08119032084941864, "signal/confidence_one_or_zero/group_std_mean": 0.11917831152677535, "signal/confidence_one_or_zero/group_zero_std_frac": 0.6194444537162781, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.119032031572714e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.119032031572714e-07, "signal/format_reward/centered_abs_mean": 0.011154513899236917, "signal/format_reward/group_std_mean": 0.023835764080286027, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005577256949618459, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005577256949618459, "signal/mean_confidence_reward/centered_abs_mean": 0.07271484434604644, "signal/mean_confidence_reward/group_std_mean": 0.09719483703374862, "signal/mean_confidence_reward/group_zero_std_frac": 0.03611111212521791, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.271484378179593e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.271484378179593e-07, "step": 270 }, { "calibration/aurc": 0.1864150533610327, "calibration/batch_distribution_entropy": 0.6408848797337117, "calibration/batch_entropy_100bins": 0.4092029903128368, "calibration/batch_entropy_10bins": 0.6408848797337117, "calibration/batch_entropy_50bins": 0.4817071393473589, "calibration/batch_uniqueness": 0.6019178475709618, "calibration/confidence_entropy": 0.42906083533825345, "calibration/coverage@0%": 0.031413612565445025, "calibration/coverage@1%": 0.031413612565445025, "calibration/coverage@10%": 0.27362383769329013, "calibration/coverage@15%": 0.4249930657811958, "calibration/coverage@20%": 0.5929327449426574, "calibration/coverage@25%": 0.798945107189404, "calibration/coverage@30%": 0.878995504394491, "calibration/coverage@5%": 0.14640315539772328, "calibration/distribution_entropy_10": 0.6408848797337117, "calibration/distribution_entropy_100": 0.4092029903128368, "calibration/ece": 0.10553314892878747, "calibration/mean_confidence": 0.7237165907374583, "calibration/unique_confidence_per_question": 0.025520833333333336, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008940972222222232, "completions/max_length": 3354.2, "completions/max_terminated_length": 3354.2, "completions/mean_length": 684.2703125, "completions/mean_terminated_length": 690.500439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 180.6, "epoch": 0.6610576923076923, "grad_norm": 0.0005132072838023305, "learning_rate": 4.597355769230769e-06, "loss": -0.0086, "num_tokens": 566232160.0, "reward": 1.2669503688812256, "reward_std": 0.13826109766960143, "rewards/accuracy_reward": 0.7004340291023254, "rewards/brier_reward": 0.8423903942108154, "rewards/confidence_one_or_zero": 0.12612847462296486, "rewards/format_reward": 0.9910590171813964, "rewards/mean_confidence_reward": 0.7350477337837219, "sampling/batch_mean_priority_error": 0.015659722222222217, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6027777777777779, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0008683778694830834, "sampling/priority_kl": 0.029999760910868645, "sampling/priority_scale": 0.7529768407111987, "sampling/prob_entropy": 10.2789737701416, "sampling/prob_max": 4.178994277026504e-05, "sampling/prob_min": 1.8177531092078426e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6552000045776367, "sampling/prompt_draws_total": 19656.0, "sampling/seen_fraction": 0.5296066761016845, "sampling/unseen_fraction": 0.47039332389831545, "signal/accuracy_reward/centered_abs_mean": 0.12946506291627885, "signal/accuracy_reward/group_std_mean": 0.1760338395833969, "signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06473253145813943, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06473253145813943, "signal/advantage_abs_mean": 0.09839854985475541, "signal/advantage_pre_scale_abs_mean": 0.09839854985475541, "signal/advantage_pre_scale_std": 0.19160001575946808, "signal/advantage_std": 0.19160001575946808, "signal/brier_reward/centered_abs_mean": 0.08714145421981812, "signal/brier_reward/group_std_mean": 0.11808212995529174, "signal/brier_reward/group_zero_std_frac": 0.027777778543531896, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04357072710990906, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04357072710990906, "signal/confidence_one_or_zero/centered_abs_mean": 0.12776150181889534, "signal/confidence_one_or_zero/group_std_mean": 0.1739050790667534, "signal/confidence_one_or_zero/group_zero_std_frac": 0.4805555582046509, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.2776150015270105e-06, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.2776150015270105e-06, "signal/format_reward/centered_abs_mean": 0.01463216133415699, "signal/format_reward/group_std_mean": 0.028245595470070838, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007316080667078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007316080667078495, "signal/mean_confidence_reward/centered_abs_mean": 0.07807481437921523, "signal/mean_confidence_reward/group_std_mean": 0.10356819778680801, "signal/mean_confidence_reward/group_zero_std_frac": 0.027777778543531896, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.807481210875266e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.807481210875266e-07, "step": 275 }, { "calibration/aurc": 0.16869080575234072, "calibration/batch_distribution_entropy": 0.664091431699852, "calibration/batch_entropy_100bins": 0.3700299330201189, "calibration/batch_entropy_10bins": 0.664091431699852, "calibration/batch_entropy_50bins": 0.43559325011712813, "calibration/batch_uniqueness": 0.48766579592898707, "calibration/confidence_entropy": 0.4723369159996705, "calibration/coverage@0%": 0.06598825401202599, "calibration/coverage@1%": 0.06598825401202599, "calibration/coverage@10%": 0.26032505422653046, "calibration/coverage@15%": 0.4990277088435562, "calibration/coverage@20%": 0.5279106395236692, "calibration/coverage@25%": 0.8020478480261879, "calibration/coverage@30%": 0.9015880044183131, "calibration/coverage@5%": 0.08221862050417258, "calibration/distribution_entropy_10": 0.664091431699852, "calibration/distribution_entropy_100": 0.3700299330201189, "calibration/ece": 0.11553050746975288, "calibration/mean_confidence": 0.686896090546837, "calibration/unique_confidence_per_question": 0.025520833333333336, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00694444444444442, "completions/max_length": 3185.0, "completions/max_terminated_length": 3185.0, "completions/mean_length": 674.0809936523438, "completions/mean_terminated_length": 678.8185791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 198.4, "epoch": 0.6730769230769231, "grad_norm": 0.0005486437003128231, "learning_rate": 4.567307692307692e-06, "loss": -0.0059, "num_tokens": 577073221.0, "reward": 1.272284698486328, "reward_std": 0.13358466923236847, "rewards/accuracy_reward": 0.7074652671813965, "rewards/brier_reward": 0.8440346002578736, "rewards/confidence_one_or_zero": 0.0027777778159361333, "rewards/format_reward": 0.9930555582046509, "rewards/mean_confidence_reward": 0.6953281283378601, "sampling/batch_mean_priority_error": 0.015076388888888872, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.6222222222222221, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0008863226044923067, "sampling/priority_kl": 0.03000061586499214, "sampling/priority_scale": 0.7499105036491528, "sampling/prob_entropy": 10.278962135314941, "sampling/prob_max": 4.191710904706269e-05, "sampling/prob_min": 1.829475695558358e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6672000050544739, "sampling/prompt_draws_total": 20016.0, "sampling/seen_fraction": 0.5368266701698303, "sampling/unseen_fraction": 0.4631733298301697, "signal/accuracy_reward/centered_abs_mean": 0.12867838591337205, "signal/accuracy_reward/group_std_mean": 0.17682747542858124, "signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06433919295668603, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06433919295668603, "signal/advantage_abs_mean": 0.09517031162977219, "signal/advantage_pre_scale_abs_mean": 0.09517031162977219, "signal/advantage_pre_scale_std": 0.18133316040039063, "signal/advantage_std": 0.18133316040039063, "signal/brier_reward/centered_abs_mean": 0.08287988752126693, "signal/brier_reward/group_std_mean": 0.11331825703382492, "signal/brier_reward/group_zero_std_frac": 0.14722222685813904, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.041439943760633466, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.041439943760633466, "signal/confidence_one_or_zero/centered_abs_mean": 0.005208333174232393, "signal/confidence_one_or_zero/group_std_mean": 0.012185867689549924, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9416666746139526, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.2083332491292825e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.2083332491292825e-08, "signal/format_reward/centered_abs_mean": 0.01195746548473835, "signal/format_reward/group_std_mean": 0.022025031968951225, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005978732742369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005978732742369175, "signal/mean_confidence_reward/centered_abs_mean": 0.07131211757659912, "signal/mean_confidence_reward/group_std_mean": 0.09345167428255081, "signal/mean_confidence_reward/group_zero_std_frac": 0.16111111044883727, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.13121221451729e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.13121221451729e-07, "step": 280 }, { "calibration/aurc": 0.10583889998251635, "calibration/batch_distribution_entropy": 0.6460838299377851, "calibration/batch_entropy_100bins": 0.35334087685404303, "calibration/batch_entropy_10bins": 0.6460838299377851, "calibration/batch_entropy_50bins": 0.4159471632250901, "calibration/batch_uniqueness": 0.4410605124714365, "calibration/confidence_entropy": 0.4774449305819912, "calibration/coverage@0%": 0.1712793733681462, "calibration/coverage@1%": 0.28541270670147956, "calibration/coverage@10%": 0.5194118255004352, "calibration/coverage@15%": 0.7913901762402088, "calibration/coverage@20%": 0.8342494342906877, "calibration/coverage@25%": 0.9118363794604004, "calibration/coverage@30%": 0.9326697127937337, "calibration/coverage@5%": 0.447759954308094, "calibration/distribution_entropy_10": 0.6460838299377851, "calibration/distribution_entropy_100": 0.35334087685404303, "calibration/ece": 0.13417588936031327, "calibration/mean_confidence": 0.7112051397954744, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00590277777777779, "completions/max_length": 3611.0, "completions/max_terminated_length": 3611.0, "completions/mean_length": 694.354443359375, "completions/mean_terminated_length": 698.5254760742188, "completions/min_length": 0.0, "completions/min_terminated_length": 226.4, "epoch": 0.6850961538461539, "grad_norm": 0.00047999402158893645, "learning_rate": 4.537259615384616e-06, "loss": -0.0058, "num_tokens": 588172472.0, "reward": 1.298223352432251, "reward_std": 0.11750643104314804, "rewards/accuracy_reward": 0.7395833253860473, "rewards/brier_reward": 0.8629256367683411, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9939236164093017, "rewards/mean_confidence_reward": 0.7073857069015503, "sampling/batch_mean_priority_error": 0.014631944444444428, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5805555555555556, "sampling/error_ema_max": 0.0756249949336052, "sampling/error_ema_mean": 0.0009036242845468223, "sampling/priority_kl": 0.029998960718512536, "sampling/priority_scale": 0.7471439658896998, "sampling/prob_entropy": 10.278946495056152, "sampling/prob_max": 4.204997385386378e-05, "sampling/prob_min": 1.8408967298455538e-05, "sampling/prompt_draws_max": 4.0, "sampling/prompt_draws_mean": 0.6791999936103821, "sampling/prompt_draws_total": 20376.0, "sampling/seen_fraction": 0.5442400097846984, "sampling/unseen_fraction": 0.4557599902153015, "signal/accuracy_reward/centered_abs_mean": 0.11642795205116271, "signal/accuracy_reward/group_std_mean": 0.15990724563598632, "signal/accuracy_reward/group_zero_std_frac": 0.5277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05821397602558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05821397602558136, "signal/advantage_abs_mean": 0.08154689669609069, "signal/advantage_pre_scale_abs_mean": 0.08154689669609069, "signal/advantage_pre_scale_std": 0.16533636152744294, "signal/advantage_std": 0.16533636152744294, "signal/brier_reward/centered_abs_mean": 0.07062470018863679, "signal/brier_reward/group_std_mean": 0.09852024167776108, "signal/brier_reward/group_zero_std_frac": 0.14444444626569747, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03531235009431839, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03531235009431839, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.011013455037027598, "signal/format_reward/group_std_mean": 0.022193879634141923, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005506727518513799, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005506727518513799, "signal/mean_confidence_reward/centered_abs_mean": 0.06535256057977676, "signal/mean_confidence_reward/group_std_mean": 0.0859237179160118, "signal/mean_confidence_reward/group_zero_std_frac": 0.1611111119389534, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.53525580673886e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.53525580673886e-07, "step": 285 }, { "calibration/aurc": 0.17162131598213481, "calibration/batch_distribution_entropy": 0.6749536064273652, "calibration/batch_entropy_100bins": 0.3733026957392366, "calibration/batch_entropy_10bins": 0.6749536064273652, "calibration/batch_entropy_50bins": 0.4394458934372163, "calibration/batch_uniqueness": 0.5114593856578805, "calibration/confidence_entropy": 0.49050545259714856, "calibration/coverage@0%": 0.0643979057591623, "calibration/coverage@1%": 0.0643979057591623, "calibration/coverage@10%": 0.5205018787496098, "calibration/coverage@15%": 0.5777854982460253, "calibration/coverage@20%": 0.6465968586387435, "calibration/coverage@25%": 0.6732984293193718, "calibration/coverage@30%": 0.7230366492146596, "calibration/coverage@5%": 0.30811697924868053, "calibration/distribution_entropy_10": 0.6749536064273652, "calibration/distribution_entropy_100": 0.3733026957392366, "calibration/ece": 0.11309064965504387, "calibration/mean_confidence": 0.6855380969615151, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004427083333333348, "completions/max_length": 3676.2, "completions/max_terminated_length": 3676.2, "completions/mean_length": 689.5286499023438, "completions/mean_terminated_length": 692.6000366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 202.4, "epoch": 0.6971153846153846, "grad_norm": 0.0005578218260779977, "learning_rate": 4.507211538461539e-06, "loss": -0.0039, "num_tokens": 599223810.0, "reward": 1.2588875055313111, "reward_std": 0.11392218917608261, "rewards/accuracy_reward": 0.6797742962837219, "rewards/brier_reward": 0.8425005674362183, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9954861164093017, "rewards/mean_confidence_reward": 0.7026808500289917, "sampling/batch_mean_priority_error": 0.020199652777777757, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5555555555555556, "sampling/error_ema_max": 0.07812499850988389, "sampling/error_ema_mean": 0.0009238900383934379, "sampling/priority_kl": 0.030000124126672745, "sampling/priority_scale": 0.7441923797829076, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 4.216997185721993e-05, "sampling/prob_min": 1.7492775805294512e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.6911999940872192, "sampling/prompt_draws_total": 20736.0, "sampling/seen_fraction": 0.5508933305740357, "sampling/unseen_fraction": 0.44910666942596433, "signal/accuracy_reward/centered_abs_mean": 0.11195204108953476, "signal/accuracy_reward/group_std_mean": 0.1539290338754654, "signal/accuracy_reward/group_zero_std_frac": 0.5361111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05597602054476738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05597602054476738, "signal/advantage_abs_mean": 0.08156270831823349, "signal/advantage_pre_scale_abs_mean": 0.08156270831823349, "signal/advantage_pre_scale_std": 0.16118119955062865, "signal/advantage_std": 0.16118119955062865, "signal/brier_reward/centered_abs_mean": 0.07383809015154838, "signal/brier_reward/group_std_mean": 0.09918788969516754, "signal/brier_reward/group_zero_std_frac": 0.17777777910232545, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03691904507577419, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03691904507577419, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.00814887140877545, "signal/format_reward/group_std_mean": 0.01690137628465891, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004074435704387725, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004074435704387725, "signal/mean_confidence_reward/centered_abs_mean": 0.06701208576560021, "signal/mean_confidence_reward/group_std_mean": 0.08645372539758682, "signal/mean_confidence_reward/group_zero_std_frac": 0.19444444179534912, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.70120812173991e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.70120812173991e-07, "step": 290 }, { "calibration/aurc": 0.1382071528975812, "calibration/batch_distribution_entropy": 0.6787802551891892, "calibration/batch_entropy_100bins": 0.36278431485319834, "calibration/batch_entropy_10bins": 0.6787802551891892, "calibration/batch_entropy_50bins": 0.427063825644149, "calibration/batch_uniqueness": 0.5060428668027408, "calibration/confidence_entropy": 0.4827378502143943, "calibration/coverage@0%": 0.12598425196850394, "calibration/coverage@1%": 0.2199105971128609, "calibration/coverage@10%": 0.5906996629131781, "calibration/coverage@15%": 0.6597831464379947, "calibration/coverage@20%": 0.7319604771328057, "calibration/coverage@25%": 0.7403693931398416, "calibration/coverage@30%": 0.7403693931398416, "calibration/coverage@5%": 0.5577887377163278, "calibration/distribution_entropy_10": 0.6787802551891892, "calibration/distribution_entropy_100": 0.36278431485319834, "calibration/ece": 0.1557866114345061, "calibration/mean_confidence": 0.7255701798802711, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003385416666666674, "completions/max_length": 3645.8, "completions/max_terminated_length": 3645.8, "completions/mean_length": 704.8776977539062, "completions/mean_terminated_length": 707.2697143554688, "completions/min_length": 0.0, "completions/min_terminated_length": 209.2, "epoch": 0.7091346153846154, "grad_norm": 0.000640033686067909, "learning_rate": 4.477163461538462e-06, "loss": -0.0014, "num_tokens": 610431169.0, "reward": 1.2582273960113526, "reward_std": 0.12031746208667755, "rewards/accuracy_reward": 0.6776909708976746, "rewards/brier_reward": 0.8424825191497802, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9962673544883728, "rewards/mean_confidence_reward": 0.6952007293701172, "sampling/batch_mean_priority_error": 0.018067708333333325, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5194444444444445, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0009439987828955054, "sampling/priority_kl": 0.030000235140323638, "sampling/priority_scale": 0.7413066328270361, "sampling/prob_entropy": 10.278957557678222, "sampling/prob_max": 4.2287943506380544e-05, "sampling/prob_min": 1.7601668878342026e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7032000064849854, "sampling/prompt_draws_total": 21096.0, "sampling/seen_fraction": 0.5573600053787231, "sampling/unseen_fraction": 0.44263999462127684, "signal/accuracy_reward/centered_abs_mean": 0.12797851413488387, "signal/accuracy_reward/group_std_mean": 0.16741083860397338, "signal/accuracy_reward/group_zero_std_frac": 0.5333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06398925706744193, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06398925706744193, "signal/advantage_abs_mean": 0.0889437347650528, "signal/advantage_pre_scale_abs_mean": 0.0889437347650528, "signal/advantage_pre_scale_std": 0.16828711926937104, "signal/advantage_std": 0.16828711926937104, "signal/brier_reward/centered_abs_mean": 0.07769880592823028, "signal/brier_reward/group_std_mean": 0.10278659611940384, "signal/brier_reward/group_zero_std_frac": 0.16388889104127885, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03884940296411514, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03884940296411514, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.00691731758415699, "signal/format_reward/group_std_mean": 0.01580982506275177, "signal/format_reward/group_zero_std_frac": 0.9250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003458658792078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003458658792078495, "signal/mean_confidence_reward/centered_abs_mean": 0.06594567447900772, "signal/mean_confidence_reward/group_std_mean": 0.08560345023870468, "signal/mean_confidence_reward/group_zero_std_frac": 0.16944444328546523, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.594567025786091e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.594567025786091e-07, "step": 295 }, { "calibration/aurc": 0.1393076428472104, "calibration/batch_distribution_entropy": 0.6548606113464862, "calibration/batch_entropy_100bins": 0.3476336789716639, "calibration/batch_entropy_10bins": 0.6548606113464862, "calibration/batch_entropy_50bins": 0.40922874221964145, "calibration/batch_uniqueness": 0.46888306010664504, "calibration/confidence_entropy": 0.4854339588086577, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.2814272469458988, "calibration/coverage@15%": 0.6440798371911454, "calibration/coverage@20%": 0.8484375, "calibration/coverage@25%": 0.8744791666666668, "calibration/coverage@30%": 0.9484375, "calibration/coverage@5%": 0.1928855802792321, "calibration/distribution_entropy_10": 0.6548606113464862, "calibration/distribution_entropy_100": 0.3476336789716639, "calibration/ece": 0.09438192770046852, "calibration/mean_confidence": 0.737763256923395, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002951388888888884, "completions/max_length": 3239.0, "completions/max_terminated_length": 3239.0, "completions/mean_length": 714.7509765625, "completions/mean_terminated_length": 716.9332641601562, "completions/min_length": 0.0, "completions/min_terminated_length": 182.8, "epoch": 0.7211538461538461, "grad_norm": 0.0006034580292180181, "learning_rate": 4.447115384615385e-06, "loss": -0.0022, "num_tokens": 621738700.0, "reward": 1.275432014465332, "reward_std": 0.11760217696428299, "rewards/accuracy_reward": 0.7190972208976746, "rewards/brier_reward": 0.8347038507461548, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9970486044883728, "rewards/mean_confidence_reward": 0.7160360455513001, "sampling/batch_mean_priority_error": 0.02022395833333332, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5833333333333333, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.000968804070726037, "sampling/priority_kl": 0.02999955527484417, "sampling/priority_scale": 0.7386725247139111, "sampling/prob_entropy": 10.278958892822265, "sampling/prob_max": 4.241131755406968e-05, "sampling/prob_min": 1.7708083396428266e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7152000069618225, "sampling/prompt_draws_total": 21456.0, "sampling/seen_fraction": 0.5639866590499878, "sampling/unseen_fraction": 0.4360133409500122, "signal/accuracy_reward/centered_abs_mean": 0.11738281100988388, "signal/accuracy_reward/group_std_mean": 0.1607626795768738, "signal/accuracy_reward/group_zero_std_frac": 0.5222222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05869140550494194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05869140550494194, "signal/advantage_abs_mean": 0.08313068300485611, "signal/advantage_pre_scale_abs_mean": 0.08313068300485611, "signal/advantage_pre_scale_std": 0.16150366961956025, "signal/advantage_std": 0.16150366961956025, "signal/brier_reward/centered_abs_mean": 0.07401107624173164, "signal/brier_reward/group_std_mean": 0.09984385818243027, "signal/brier_reward/group_zero_std_frac": 0.15277777910232543, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03700553812086582, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03700553812086582, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005425347259733826, "signal/format_reward/group_std_mean": 0.012002479285001755, "signal/format_reward/group_zero_std_frac": 0.944444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002712673629866913, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002712673629866913, "signal/mean_confidence_reward/centered_abs_mean": 0.06404319405555725, "signal/mean_confidence_reward/group_std_mean": 0.08435939252376556, "signal/mean_confidence_reward/group_zero_std_frac": 0.17222222238779067, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.404319151442906e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.404319151442906e-07, "step": 300 }, { "epoch": 0.7211538461538461, "eval_calibration/aurc": 0.17038049269262773, "eval_calibration/batch_distribution_entropy": 0.6199671776505913, "eval_calibration/batch_entropy_100bins": 0.3268388624035065, "eval_calibration/batch_entropy_10bins": 0.6199671776505913, "eval_calibration/batch_entropy_50bins": 0.3847494206129197, "eval_calibration/batch_uniqueness": 0.3539211074677266, "eval_calibration/confidence_entropy": 0.45050842980070266, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.516100957354221, "eval_calibration/coverage@20%": 0.7728459530026109, "eval_calibration/coverage@25%": 0.8677110530896431, "eval_calibration/coverage@30%": 0.9843342036553525, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6199671776505913, "eval_calibration/distribution_entropy_100": 0.3268388624035065, "eval_calibration/ece": 0.07167101827676242, "eval_calibration/mean_confidence": 0.7650565709312447, "eval_calibration/unique_confidence_per_question": 0.009548611111111112, "eval_calibration/unique_confidences": 11, "eval_completions/clipped_ratio": 0.0026041666666666665, "eval_completions/max_length": 2238.3333333333335, "eval_completions/max_terminated_length": 2238.3333333333335, "eval_completions/mean_length": 717.2819620768229, "eval_completions/mean_terminated_length": 719.1739807128906, "eval_completions/min_length": 183.33333333333334, "eval_completions/min_terminated_length": 260.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 621738700.0, "eval_reward": 1.260056495666504, "eval_reward_std": 0.3358425050973892, "eval_rewards/accuracy_reward": 0.6927083333333334, "eval_rewards/brier_reward": 0.8299935062726339, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9973958333333334, "eval_rewards/mean_confidence_reward": 0.7630642155806223, "eval_runtime": 145.5855, "eval_samples_per_second": 6.869, "eval_signal/accuracy_reward/centered_abs_mean": 0.4129774272441864, "eval_signal/accuracy_reward/group_std_mean": 0.46044932802518207, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2064887136220932, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2064887136220932, "eval_signal/advantage_abs_mean": 0.29073385894298553, "eval_signal/advantage_pre_scale_abs_mean": 0.29073385894298553, "eval_signal/advantage_pre_scale_std": 0.3327233244975408, "eval_signal/advantage_std": 0.3327233244975408, "eval_signal/brier_reward/centered_abs_mean": 0.1913610895474752, "eval_signal/brier_reward/group_std_mean": 0.2476587469379107, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0956805447737376, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0956805447737376, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/format_reward/group_std_mean": 0.014731391333043575, "eval_signal/format_reward/group_zero_std_frac": 0.9166666766007742, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.1503743181626002, "eval_signal/mean_confidence_reward/group_std_mean": 0.19400002310673395, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.5037432300838798e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.5037432300838798e-06, "eval_steps_per_second": 0.041, "step": 300 }, { "epoch": 0.7211538461538461, "step": 300, "train_probe_calibration/aurc": 0.13103238168185818, "train_probe_calibration/batch_distribution_entropy": 0.6086489322342906, "train_probe_calibration/batch_entropy_100bins": 0.32399377529298606, "train_probe_calibration/batch_entropy_10bins": 0.6086489322342906, "train_probe_calibration/batch_entropy_50bins": 0.38140023009953883, "train_probe_calibration/batch_uniqueness": 0.3492309125655866, "train_probe_calibration/confidence_entropy": 0.4497829731002935, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.515230635335074, "train_probe_calibration/coverage@15%": 0.7737162750217581, "train_probe_calibration/coverage@20%": 0.8598781549173194, "train_probe_calibration/coverage@25%": 0.9512619669277633, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.6086489322342906, "train_probe_calibration/distribution_entropy_100": 0.32399377529298606, "train_probe_calibration/ece": 0.038424717145343686, "train_probe_calibration/mean_confidence": 0.7651436031331593, "train_probe_calibration/unique_confidence_per_question": 0.010416666666666666, "train_probe_calibration/unique_confidences": 12, "train_probe_completions/clipped_ratio": 0.001736111111111105, "train_probe_completions/max_length": 2880.1666666666665, "train_probe_completions/max_terminated_length": 2880.1666666666665, "train_probe_completions/mean_length": 747.1415100097656, "train_probe_completions/mean_terminated_length": 748.4186096191406, "train_probe_completions/min_length": 203.83333333333334, "train_probe_completions/min_terminated_length": 243.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 621738700.0, "train_probe_reward": 1.2938086589177449, "train_probe_reward_std": 0.31472263236840564, "train_probe_rewards/accuracy_reward": 0.7352430522441864, "train_probe_rewards/brier_reward": 0.8549631237983704, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9973958333333334, "train_probe_rewards/mean_confidence_reward": 0.7631510297457377, "train_probe_runtime": 165.7718, "train_probe_samples_per_second": 6.032, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3800455729166667, "train_probe_signal/accuracy_reward/group_std_mean": 0.44128966828187305, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19002278645833334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19002278645833334, "train_probe_signal/advantage_abs_mean": 0.26244449118773144, "train_probe_signal/advantage_pre_scale_abs_mean": 0.26244449118773144, "train_probe_signal/advantage_pre_scale_std": 0.3124504089355469, "train_probe_signal/advantage_std": 0.3124504089355469, "train_probe_signal/brier_reward/centered_abs_mean": 0.1670305853088697, "train_probe_signal/brier_reward/group_std_mean": 0.22494085133075714, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08351529265443484, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08351529265443484, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "train_probe_signal/format_reward/group_std_mean": 0.014731391333043575, "train_probe_signal/format_reward/group_zero_std_frac": 0.9166666766007742, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025227863807231188, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.15324705094099045, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.1951539988319079, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.5324703970994353e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.5324703970994353e-06, "train_probe_steps_per_second": 0.036 }, { "calibration/aurc": 0.13092579680447874, "calibration/batch_distribution_entropy": 0.6276670537021948, "calibration/batch_entropy_100bins": 0.3316895599320206, "calibration/batch_entropy_10bins": 0.6276670537021948, "calibration/batch_entropy_50bins": 0.3904595832598581, "calibration/batch_uniqueness": 0.38209639015063, "calibration/confidence_entropy": 0.4591777053056802, "calibration/coverage@0%": 0.0010526315789473684, "calibration/coverage@1%": 0.0010526315789473684, "calibration/coverage@10%": 0.41942434210526314, "calibration/coverage@15%": 0.5662409246026293, "calibration/coverage@20%": 0.7830812859691332, "calibration/coverage@25%": 0.9054183811099111, "calibration/coverage@30%": 0.9398232984293194, "calibration/coverage@5%": 0.3413212719298246, "calibration/distribution_entropy_10": 0.6276670537021948, "calibration/distribution_entropy_100": 0.3316895599320206, "calibration/ece": 0.08329600836702859, "calibration/mean_confidence": 0.749807476925117, "calibration/unique_confidence_per_question": 0.025520833333333333, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005381944444444442, "completions/max_length": 3670.6, "completions/max_terminated_length": 3670.6, "completions/mean_length": 771.6765747070312, "completions/mean_terminated_length": 775.8438354492188, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.7331730769230769, "grad_norm": 0.0007620741962455213, "learning_rate": 4.4170673076923085e-06, "loss": -0.0037, "num_tokens": 633738398.0, "reward": 1.2771973609924316, "reward_std": 0.12964553087949754, "rewards/accuracy_reward": 0.7176215171813964, "rewards/brier_reward": 0.8421401739120483, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9946180582046509, "rewards/mean_confidence_reward": 0.7522072315216064, "sampling/batch_mean_priority_error": 0.013479166666666658, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5555555555555556, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0009865233907476068, "sampling/priority_kl": 0.029999896138906478, "sampling/priority_scale": 0.736354249692522, "sampling/prob_entropy": 10.278947830200195, "sampling/prob_max": 4.253947699908167e-05, "sampling/prob_min": 1.781034661689773e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7271999955177307, "sampling/prompt_draws_total": 21816.0, "sampling/seen_fraction": 0.570739996433258, "sampling/unseen_fraction": 0.42926000356674193, "signal/accuracy_reward/centered_abs_mean": 0.12093641608953476, "signal/accuracy_reward/group_std_mean": 0.162637859582901, "signal/accuracy_reward/group_zero_std_frac": 0.522222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06046820804476738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06046820804476738, "signal/advantage_abs_mean": 0.09479492604732513, "signal/advantage_pre_scale_abs_mean": 0.09479492604732513, "signal/advantage_pre_scale_std": 0.18216970562934875, "signal/advantage_std": 0.18216970562934875, "signal/brier_reward/centered_abs_mean": 0.0808374285697937, "signal/brier_reward/group_std_mean": 0.10848785042762757, "signal/brier_reward/group_zero_std_frac": 0.17500000298023224, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04041871428489685, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04041871428489685, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007291666604578495, "signal/format_reward/group_std_mean": 0.016734511591494083, "signal/format_reward/group_zero_std_frac": 0.919444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036458333022892477, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0036458333022892477, "signal/mean_confidence_reward/centered_abs_mean": 0.06553446054458618, "signal/mean_confidence_reward/group_std_mean": 0.08630905747413635, "signal/mean_confidence_reward/group_zero_std_frac": 0.18611111342906952, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.553445928147994e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.553445928147994e-07, "step": 305 }, { "calibration/aurc": 0.13837794223912891, "calibration/batch_distribution_entropy": 0.7199264324569175, "calibration/batch_entropy_100bins": 0.3920106756730617, "calibration/batch_entropy_10bins": 0.7199264324569175, "calibration/batch_entropy_50bins": 0.46146862472273603, "calibration/batch_uniqueness": 0.5973591612208066, "calibration/confidence_entropy": 0.5280266050339674, "calibration/coverage@0%": 0.1471692683938273, "calibration/coverage@1%": 0.1471692683938273, "calibration/coverage@10%": 0.501302979724963, "calibration/coverage@15%": 0.5405839022139196, "calibration/coverage@20%": 0.6710906688447321, "calibration/coverage@25%": 0.9076763769086709, "calibration/coverage@30%": 0.95, "calibration/coverage@5%": 0.4018550029124053, "calibration/distribution_entropy_10": 0.7199264324569175, "calibration/distribution_entropy_100": 0.3920106756730617, "calibration/ece": 0.14846240057694993, "calibration/mean_confidence": 0.687106785034867, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00399305555555558, "completions/max_length": 3469.8, "completions/max_terminated_length": 3469.8, "completions/mean_length": 777.2681518554688, "completions/mean_terminated_length": 780.4172485351562, "completions/min_length": 47.4, "completions/min_terminated_length": 246.8, "epoch": 0.7451923076923077, "grad_norm": 0.0005892643239349127, "learning_rate": 4.3870192307692315e-06, "loss": -0.003, "num_tokens": 645810607.0, "reward": 1.279511857032776, "reward_std": 0.11136721819639206, "rewards/accuracy_reward": 0.7183159708976745, "rewards/brier_reward": 0.8445999145507812, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.99609375, "rewards/mean_confidence_reward": 0.7028311610221862, "sampling/batch_mean_priority_error": 0.01767881944444443, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5083333333333333, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.001006615860387683, "sampling/priority_kl": 0.030000757426023483, "sampling/priority_scale": 0.7339580834144727, "sampling/prob_entropy": 10.278960227966309, "sampling/prob_max": 4.2663155909394845e-05, "sampling/prob_min": 1.7912808471010066e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7391999959945679, "sampling/prompt_draws_total": 22176.0, "sampling/seen_fraction": 0.5771666646003724, "sampling/unseen_fraction": 0.4228333353996277, "signal/accuracy_reward/centered_abs_mean": 0.114306640625, "signal/accuracy_reward/group_std_mean": 0.1513543099164963, "signal/accuracy_reward/group_zero_std_frac": 0.5694444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0571533203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0571533203125, "signal/advantage_abs_mean": 0.08048739284276962, "signal/advantage_pre_scale_abs_mean": 0.08048739284276962, "signal/advantage_pre_scale_std": 0.1592124491930008, "signal/advantage_std": 0.1592124491930008, "signal/brier_reward/centered_abs_mean": 0.07133363783359528, "signal/brier_reward/group_std_mean": 0.09470874518156051, "signal/brier_reward/group_zero_std_frac": 0.1166666679084301, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03566681891679764, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03566681891679764, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00728624127805233, "signal/format_reward/group_std_mean": 0.01631898358464241, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003643120639026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003643120639026165, "signal/mean_confidence_reward/centered_abs_mean": 0.06672917604446411, "signal/mean_confidence_reward/group_std_mean": 0.08671197891235352, "signal/mean_confidence_reward/group_zero_std_frac": 0.12222222313284874, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.672917265859723e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.672917265859723e-07, "step": 310 }, { "calibration/aurc": 0.13686577318136903, "calibration/batch_distribution_entropy": 0.6244593056933125, "calibration/batch_entropy_100bins": 0.34029339548266907, "calibration/batch_entropy_10bins": 0.6244593056933125, "calibration/batch_entropy_50bins": 0.40058787926118855, "calibration/batch_uniqueness": 0.40405619414598126, "calibration/confidence_entropy": 0.47154955375434, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5093171296296296, "calibration/coverage@15%": 0.5838955026455026, "calibration/coverage@20%": 0.7709579672454998, "calibration/coverage@25%": 0.8654971714534379, "calibration/coverage@30%": 0.9467798085291557, "calibration/coverage@5%": 0.19791666666666666, "calibration/distribution_entropy_10": 0.6244593056933125, "calibration/distribution_entropy_100": 0.34029339548266907, "calibration/ece": 0.11483152296855784, "calibration/mean_confidence": 0.7404223662829652, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004253472222222232, "completions/max_length": 3368.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 790.6902954101563, "completions/mean_terminated_length": 794.0724731445313, "completions/min_length": 0.0, "completions/min_terminated_length": 238.2, "epoch": 0.7572115384615384, "grad_norm": 0.0007672872743569314, "learning_rate": 4.356971153846154e-06, "loss": -0.0027, "num_tokens": 658014783.0, "reward": 1.290112805366516, "reward_std": 0.12518732845783234, "rewards/accuracy_reward": 0.7293402910232544, "rewards/brier_reward": 0.8555580139160156, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9953125, "rewards/mean_confidence_reward": 0.7429038882255554, "sampling/batch_mean_priority_error": 0.014930555555555548, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5194444444444444, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0010217264294624328, "sampling/priority_kl": 0.029999467357993127, "sampling/priority_scale": 0.7315961777931079, "sampling/prob_entropy": 10.278939628601075, "sampling/prob_max": 4.2783647950273006e-05, "sampling/prob_min": 1.8013632143265568e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.751199996471405, "sampling/prompt_draws_total": 22536.0, "sampling/seen_fraction": 0.5833466529846192, "sampling/unseen_fraction": 0.41665334701538087, "signal/accuracy_reward/centered_abs_mean": 0.11973741352558136, "signal/accuracy_reward/group_std_mean": 0.16205187290906906, "signal/accuracy_reward/group_zero_std_frac": 0.5250000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05986870676279068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05986870676279068, "signal/advantage_abs_mean": 0.09079453721642494, "signal/advantage_pre_scale_abs_mean": 0.09079453721642494, "signal/advantage_pre_scale_std": 0.17762331664562225, "signal/advantage_std": 0.17762331664562225, "signal/brier_reward/centered_abs_mean": 0.0763278879225254, "signal/brier_reward/group_std_mean": 0.10317817330360413, "signal/brier_reward/group_zero_std_frac": 0.13611111082136632, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0381639439612627, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0381639439612627, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00849609375, "signal/format_reward/group_std_mean": 0.0171542102470994, "signal/format_reward/group_zero_std_frac": 0.9277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004248046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004248046875, "signal/mean_confidence_reward/centered_abs_mean": 0.0643157720565796, "signal/mean_confidence_reward/group_std_mean": 0.08580757826566696, "signal/mean_confidence_reward/group_zero_std_frac": 0.15277777947485446, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.431576821341878e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.431576821341878e-07, "step": 315 }, { "calibration/aurc": 0.16717747584400544, "calibration/batch_distribution_entropy": 0.602479901250202, "calibration/batch_entropy_100bins": 0.324314097132174, "calibration/batch_entropy_10bins": 0.602479901250202, "calibration/batch_entropy_50bins": 0.38177730778586694, "calibration/batch_uniqueness": 0.32554403739572646, "calibration/confidence_entropy": 0.4465209078155052, "calibration/coverage@0%": 0.0016101816101816101, "calibration/coverage@1%": 0.0016101816101816101, "calibration/coverage@10%": 0.2815870954907109, "calibration/coverage@15%": 0.46984229412824813, "calibration/coverage@20%": 0.6756455747056269, "calibration/coverage@25%": 0.8430287430287431, "calibration/coverage@30%": 0.90006292006292, "calibration/coverage@5%": 0.1304059931285062, "calibration/distribution_entropy_10": 0.602479901250202, "calibration/distribution_entropy_100": 0.324314097132174, "calibration/ece": 0.11514510673588034, "calibration/mean_confidence": 0.7590287926881031, "calibration/unique_confidence_per_question": 0.027083333333333338, "calibration/unique_confidences": 10.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222232, "completions/max_length": 3799.6, "completions/max_terminated_length": 3799.6, "completions/mean_length": 851.075341796875, "completions/mean_terminated_length": 861.0564086914062, "completions/min_length": 0.0, "completions/min_terminated_length": 247.4, "epoch": 0.7692307692307693, "grad_norm": 0.0007334538386203349, "learning_rate": 4.326923076923077e-06, "loss": -0.0122, "num_tokens": 670893411.0, "reward": 1.2725468873977661, "reward_std": 0.145135360956192, "rewards/accuracy_reward": 0.72265625, "rewards/brier_reward": 0.834314513206482, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9881076335906982, "rewards/mean_confidence_reward": 0.7703335165977478, "sampling/batch_mean_priority_error": 0.019050540123456784, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.49444444444444446, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0010432714829221369, "sampling/priority_kl": 0.030000781640410423, "sampling/priority_scale": 0.7293619335396215, "sampling/prob_entropy": 10.278955459594727, "sampling/prob_max": 4.290501019568183e-05, "sampling/prob_min": 1.811251713661477e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7632000088691712, "sampling/prompt_draws_total": 22896.0, "sampling/seen_fraction": 0.5894733190536499, "sampling/unseen_fraction": 0.4105266809463501, "signal/accuracy_reward/centered_abs_mean": 0.13217773288488388, "signal/accuracy_reward/group_std_mean": 0.17295660972595214, "signal/accuracy_reward/group_zero_std_frac": 0.5166666626930236, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06608886644244194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06608886644244194, "signal/advantage_abs_mean": 0.1076438531279564, "signal/advantage_pre_scale_abs_mean": 0.1076438531279564, "signal/advantage_pre_scale_std": 0.20443538427352906, "signal/advantage_std": 0.20443538427352906, "signal/brier_reward/centered_abs_mean": 0.09236456230282783, "signal/brier_reward/group_std_mean": 0.12330232709646224, "signal/brier_reward/group_zero_std_frac": 0.225, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.046182281151413915, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.046182281151413915, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.018429904524236918, "signal/format_reward/group_std_mean": 0.03126038983464241, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009214952262118459, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009214952262118459, "signal/mean_confidence_reward/centered_abs_mean": 0.06820750385522842, "signal/mean_confidence_reward/group_std_mean": 0.09028767794370651, "signal/mean_confidence_reward/group_zero_std_frac": 0.2500000029802322, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.820750058977865e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.820750058977865e-07, "step": 320 }, { "calibration/aurc": 0.10863199741676803, "calibration/batch_distribution_entropy": 0.6347477303485826, "calibration/batch_entropy_100bins": 0.34220862177900163, "calibration/batch_entropy_10bins": 0.6347477303485826, "calibration/batch_entropy_50bins": 0.40284245266912944, "calibration/batch_uniqueness": 0.40180875509585673, "calibration/confidence_entropy": 0.46986021781998544, "calibration/coverage@0%": 0.061942257217847775, "calibration/coverage@1%": 0.061942257217847775, "calibration/coverage@10%": 0.477210801653874, "calibration/coverage@15%": 0.7232488607091858, "calibration/coverage@20%": 0.8684191024109602, "calibration/coverage@25%": 0.9326315789473684, "calibration/coverage@30%": 0.9547368421052631, "calibration/coverage@5%": 0.309322237655811, "calibration/distribution_entropy_10": 0.6347477303485826, "calibration/distribution_entropy_100": 0.34220862177900163, "calibration/ece": 0.1094783425165194, "calibration/mean_confidence": 0.7294889187832778, "calibration/unique_confidence_per_question": 0.0265625, "calibration/unique_confidences": 10.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011024305555555558, "completions/max_length": 3871.2, "completions/max_terminated_length": 3871.2, "completions/mean_length": 931.85478515625, "completions/mean_terminated_length": 942.4100463867187, "completions/min_length": 0.0, "completions/min_terminated_length": 252.6, "epoch": 0.78125, "grad_norm": 0.000653931638225913, "learning_rate": 4.296875e-06, "loss": -0.0099, "num_tokens": 684696186.0, "reward": 1.258277130126953, "reward_std": 0.15136132538318633, "rewards/accuracy_reward": 0.703906238079071, "rewards/brier_reward": 0.8240050315856934, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9886284589767456, "rewards/mean_confidence_reward": 0.7231320858001709, "sampling/batch_mean_priority_error": 0.01323225308641975, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5083333333333333, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0010600588750094176, "sampling/priority_kl": 0.0299997728317976, "sampling/priority_scale": 0.7272611201042309, "sampling/prob_entropy": 10.27895393371582, "sampling/prob_max": 4.3025674676755445e-05, "sampling/prob_min": 1.8203360014013015e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7751999974250794, "sampling/prompt_draws_total": 23256.0, "sampling/seen_fraction": 0.5954999923706055, "sampling/unseen_fraction": 0.40450000762939453, "signal/accuracy_reward/centered_abs_mean": 0.14385308176279069, "signal/accuracy_reward/group_std_mean": 0.19072539806365968, "signal/accuracy_reward/group_zero_std_frac": 0.450000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07192654088139534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07192654088139534, "signal/advantage_abs_mean": 0.10959743559360505, "signal/advantage_pre_scale_abs_mean": 0.10959743559360505, "signal/advantage_pre_scale_std": 0.2033509284257889, "signal/advantage_std": 0.2033509284257889, "signal/brier_reward/centered_abs_mean": 0.09024559706449509, "signal/brier_reward/group_std_mean": 0.12317982912063599, "signal/brier_reward/group_zero_std_frac": 0.10833333600312471, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04512279853224754, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04512279853224754, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.019015842117369175, "signal/format_reward/group_std_mean": 0.03431776948273182, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009507921058684587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009507921058684587, "signal/mean_confidence_reward/centered_abs_mean": 0.07643213719129563, "signal/mean_confidence_reward/group_std_mean": 0.10056560337543488, "signal/mean_confidence_reward/group_zero_std_frac": 0.12222222611308098, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.643213734809251e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.643213734809251e-07, "step": 325 }, { "calibration/aurc": 0.11886209120977206, "calibration/batch_distribution_entropy": 0.7440964345869737, "calibration/batch_entropy_100bins": 0.40832423826981046, "calibration/batch_entropy_10bins": 0.7440964345869737, "calibration/batch_entropy_50bins": 0.4806726866603972, "calibration/batch_uniqueness": 0.5958464567420566, "calibration/confidence_entropy": 0.5508633686135129, "calibration/coverage@0%": 0.13636579655279554, "calibration/coverage@1%": 0.13636579655279554, "calibration/coverage@10%": 0.3909505278628627, "calibration/coverage@15%": 0.7179516933647312, "calibration/coverage@20%": 0.8417117793387895, "calibration/coverage@25%": 0.9079508984453867, "calibration/coverage@30%": 0.9534564102564103, "calibration/coverage@5%": 0.26832501380613044, "calibration/distribution_entropy_10": 0.7440964345869737, "calibration/distribution_entropy_100": 0.40832423826981046, "calibration/ece": 0.14901436740330448, "calibration/mean_confidence": 0.633296158815817, "calibration/unique_confidence_per_question": 0.02760416666666667, "calibration/unique_confidences": 10.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111116, "completions/max_length": 3919.2, "completions/max_terminated_length": 3919.2, "completions/mean_length": 923.294970703125, "completions/mean_terminated_length": 936.7350952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.7932692307692307, "grad_norm": 0.0006061934400349855, "learning_rate": 4.266826923076923e-06, "loss": -0.0167, "num_tokens": 698460000.0, "reward": 1.250968337059021, "reward_std": 0.12344784587621689, "rewards/accuracy_reward": 0.6842013955116272, "rewards/brier_reward": 0.8321323394775391, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9855902791023254, "rewards/mean_confidence_reward": 0.6281623840332031, "sampling/batch_mean_priority_error": 0.020135886873267825, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.49444444444444435, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.001079965545795858, "sampling/priority_kl": 0.029999606683850287, "sampling/priority_scale": 0.7252804696327075, "sampling/prob_entropy": 10.278935813903809, "sampling/prob_max": 4.3146366806467995e-05, "sampling/prob_min": 1.8293721950612962e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7871999979019165, "sampling/prompt_draws_total": 23616.0, "sampling/seen_fraction": 0.601419997215271, "sampling/unseen_fraction": 0.398580002784729, "signal/accuracy_reward/centered_abs_mean": 0.11248915046453475, "signal/accuracy_reward/group_std_mean": 0.15042957961559295, "signal/accuracy_reward/group_zero_std_frac": 0.5555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05624457523226738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05624457523226738, "signal/advantage_abs_mean": 0.08699684739112853, "signal/advantage_pre_scale_abs_mean": 0.08699684739112853, "signal/advantage_pre_scale_std": 0.1748094767332077, "signal/advantage_std": 0.1748094767332077, "signal/brier_reward/centered_abs_mean": 0.07624856233596802, "signal/brier_reward/group_std_mean": 0.10298327952623368, "signal/brier_reward/group_zero_std_frac": 0.06111111249774694, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03812428116798401, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03812428116798401, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02356770820915699, "signal/format_reward/group_std_mean": 0.0421368557959795, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011783854104578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011783854104578495, "signal/mean_confidence_reward/centered_abs_mean": 0.06578600704669953, "signal/mean_confidence_reward/group_std_mean": 0.08733049482107162, "signal/mean_confidence_reward/group_zero_std_frac": 0.06388889048248529, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.578600618922792e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.578600618922792e-07, "step": 330 }, { "calibration/aurc": 0.08823840005491149, "calibration/batch_distribution_entropy": 0.6795859117610638, "calibration/batch_entropy_100bins": 0.3769886039645168, "calibration/batch_entropy_10bins": 0.6795859117610638, "calibration/batch_entropy_50bins": 0.44378488496252083, "calibration/batch_uniqueness": 0.5419092140454492, "calibration/confidence_entropy": 0.5797361103491635, "calibration/coverage@0%": 0.07404611229393553, "calibration/coverage@1%": 0.32532828569161565, "calibration/coverage@10%": 0.62353149111474, "calibration/coverage@15%": 0.8074700707412464, "calibration/coverage@20%": 0.8522986812087805, "calibration/coverage@25%": 0.9249272262817085, "calibration/coverage@30%": 0.9376344086021506, "calibration/coverage@5%": 0.47406383457698664, "calibration/distribution_entropy_10": 0.6795859117610638, "calibration/distribution_entropy_100": 0.3769886039645168, "calibration/ece": 0.1873793358632409, "calibration/mean_confidence": 0.617761095095165, "calibration/unique_confidence_per_question": 0.026041666666666664, "calibration/unique_confidences": 10.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013107638888888884, "completions/max_length": 3929.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 956.8102416992188, "completions/mean_terminated_length": 969.6889038085938, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.8052884615384616, "grad_norm": 0.0007015041192062199, "learning_rate": 4.236778846153847e-06, "loss": -0.0143, "num_tokens": 712620406.0, "reward": 1.2798907995223998, "reward_std": 0.11801934242248535, "rewards/accuracy_reward": 0.7401909708976746, "rewards/brier_reward": 0.8328598380088806, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.986718761920929, "rewards/mean_confidence_reward": 0.5951099038124085, "sampling/batch_mean_priority_error": 0.025409692578360808, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.48888888888888893, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0011080709984526039, "sampling/priority_kl": 0.029999785125255585, "sampling/priority_scale": 0.7234982907539234, "sampling/prob_entropy": 10.27892837524414, "sampling/prob_max": 4.3270392779959366e-05, "sampling/prob_min": 1.8385029761702754e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.7991999983787537, "sampling/prompt_draws_total": 23976.0, "sampling/seen_fraction": 0.6074000000953674, "sampling/unseen_fraction": 0.39259999990463257, "signal/accuracy_reward/centered_abs_mean": 0.12392035871744156, "signal/accuracy_reward/group_std_mean": 0.1636717587709427, "signal/accuracy_reward/group_zero_std_frac": 0.5333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06196017935872078, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06196017935872078, "signal/advantage_abs_mean": 0.08390375822782517, "signal/advantage_pre_scale_abs_mean": 0.08390375822782517, "signal/advantage_pre_scale_std": 0.16726355850696564, "signal/advantage_std": 0.16726355850696564, "signal/brier_reward/centered_abs_mean": 0.07296821475028992, "signal/brier_reward/group_std_mean": 0.09915484488010406, "signal/brier_reward/group_zero_std_frac": 0.05555555708706379, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03648410737514496, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03648410737514496, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.022119140625, "signal/format_reward/group_std_mean": 0.04018164724111557, "signal/format_reward/group_zero_std_frac": 0.8388889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0110595703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0110595703125, "signal/mean_confidence_reward/centered_abs_mean": 0.06477264314889908, "signal/mean_confidence_reward/group_std_mean": 0.08538633137941361, "signal/mean_confidence_reward/group_zero_std_frac": 0.05555555708706379, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.477264264503902e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.477264264503902e-07, "step": 335 }, { "calibration/aurc": 0.13969130786823786, "calibration/batch_distribution_entropy": 0.6532072460044838, "calibration/batch_entropy_100bins": 0.35603898661564687, "calibration/batch_entropy_10bins": 0.6532072460044838, "calibration/batch_entropy_50bins": 0.4191233343814, "calibration/batch_uniqueness": 0.4666210398887169, "calibration/confidence_entropy": 0.5266185188445387, "calibration/coverage@0%": 0.04281984334203655, "calibration/coverage@1%": 0.12428198433420365, "calibration/coverage@10%": 0.4448985862018387, "calibration/coverage@15%": 0.5834735535647629, "calibration/coverage@20%": 0.6554527202314298, "calibration/coverage@25%": 0.7479446061963418, "calibration/coverage@30%": 0.8989081746920494, "calibration/coverage@5%": 0.39751933088268976, "calibration/distribution_entropy_10": 0.6532072460044838, "calibration/distribution_entropy_100": 0.35603898661564687, "calibration/ece": 0.11965576739251063, "calibration/mean_confidence": 0.7021747605627129, "calibration/unique_confidence_per_question": 0.025520833333333333, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011284722222222232, "completions/max_length": 4009.8, "completions/max_terminated_length": 4009.8, "completions/mean_length": 956.4407104492187, "completions/mean_terminated_length": 967.5976806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 290.6, "epoch": 0.8173076923076923, "grad_norm": 0.0009944693883880973, "learning_rate": 4.20673076923077e-06, "loss": -0.0111, "num_tokens": 726726955.0, "reward": 1.2707728147506714, "reward_std": 0.12885977178812028, "rewards/accuracy_reward": 0.7121527791023254, "rewards/brier_reward": 0.8406639814376831, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9887152791023255, "rewards/mean_confidence_reward": 0.6793183445930481, "sampling/batch_mean_priority_error": 0.01789025548696844, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.48611111111111105, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0011320403078570961, "sampling/priority_kl": 0.030000582709908484, "sampling/priority_scale": 0.721624392340891, "sampling/prob_entropy": 10.27894229888916, "sampling/prob_max": 4.339007573435083e-05, "sampling/prob_min": 1.8476794139132835e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8111999988555908, "sampling/prompt_draws_total": 24336.0, "sampling/seen_fraction": 0.6130866646766663, "sampling/unseen_fraction": 0.3869133353233337, "signal/accuracy_reward/centered_abs_mean": 0.12188584953546525, "signal/accuracy_reward/group_std_mean": 0.16384209394454957, "signal/accuracy_reward/group_zero_std_frac": 0.5250000178813934, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06094292476773262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06094292476773262, "signal/advantage_abs_mean": 0.09127618968486786, "signal/advantage_pre_scale_abs_mean": 0.09127618968486786, "signal/advantage_pre_scale_std": 0.18212750256061555, "signal/advantage_std": 0.18212750256061555, "signal/brier_reward/centered_abs_mean": 0.07710054516792297, "signal/brier_reward/group_std_mean": 0.10524273961782456, "signal/brier_reward/group_zero_std_frac": 0.09722222462296486, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03855027258396149, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03855027258396149, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01925998292863369, "signal/format_reward/group_std_mean": 0.03434903621673584, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009629991464316845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009629991464316845, "signal/mean_confidence_reward/centered_abs_mean": 0.06390919610857963, "signal/mean_confidence_reward/group_std_mean": 0.08523715138435364, "signal/mean_confidence_reward/group_zero_std_frac": 0.1027777798473835, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.390919111254334e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.390919111254334e-07, "step": 340 }, { "calibration/aurc": 0.09080647825464169, "calibration/batch_distribution_entropy": 0.5828487105108893, "calibration/batch_entropy_100bins": 0.30621682881224266, "calibration/batch_entropy_10bins": 0.5828487105108893, "calibration/batch_entropy_50bins": 0.36047349633099196, "calibration/batch_uniqueness": 0.32978425497608505, "calibration/confidence_entropy": 0.4501530039067473, "calibration/coverage@0%": 0.17561666048288466, "calibration/coverage@1%": 0.28143676630299047, "calibration/coverage@10%": 0.667472317927639, "calibration/coverage@15%": 0.7943842026331502, "calibration/coverage@20%": 0.8553810501040052, "calibration/coverage@25%": 0.8659630606860158, "calibration/coverage@30%": 0.9324538258575199, "calibration/coverage@5%": 0.4009431340756205, "calibration/distribution_entropy_10": 0.5828487105108893, "calibration/distribution_entropy_100": 0.30621682881224266, "calibration/ece": 0.13318603844096294, "calibration/mean_confidence": 0.7800264505674386, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007812500000000023, "completions/max_length": 3928.8, "completions/max_terminated_length": 3928.8, "completions/mean_length": 894.5711059570312, "completions/mean_terminated_length": 901.781787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 221.2, "epoch": 0.8293269230769231, "grad_norm": 0.0008186151972040534, "learning_rate": 4.176682692307693e-06, "loss": -0.0084, "num_tokens": 740097726.0, "reward": 1.2895998001098632, "reward_std": 0.1312810465693474, "rewards/accuracy_reward": 0.7419270992279052, "rewards/brier_reward": 0.8452433705329895, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9920138835906982, "rewards/mean_confidence_reward": 0.7631656527519226, "sampling/batch_mean_priority_error": 0.014798804012345674, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4916666666666667, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.001147703337483108, "sampling/priority_kl": 0.03000086471438408, "sampling/priority_scale": 0.7201250732643529, "sampling/prob_entropy": 10.278956031799316, "sampling/prob_max": 4.351890675025061e-05, "sampling/prob_min": 1.8564553465694188e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.823200011253357, "sampling/prompt_draws_total": 24696.0, "sampling/seen_fraction": 0.6190933346748352, "sampling/unseen_fraction": 0.3809066653251648, "signal/accuracy_reward/centered_abs_mean": 0.1175509974360466, "signal/accuracy_reward/group_std_mean": 0.15728657841682434, "signal/accuracy_reward/group_zero_std_frac": 0.5388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0587754987180233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0587754987180233, "signal/advantage_abs_mean": 0.093961463868618, "signal/advantage_pre_scale_abs_mean": 0.093961463868618, "signal/advantage_pre_scale_std": 0.1893182247877121, "signal/advantage_std": 0.1893182247877121, "signal/brier_reward/centered_abs_mean": 0.07490430921316146, "signal/brier_reward/group_std_mean": 0.10306931734085083, "signal/brier_reward/group_zero_std_frac": 0.16944444477558135, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03745215460658073, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03745215460658073, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.014811197947710752, "signal/format_reward/group_std_mean": 0.03360258936882019, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007405598973855376, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007405598973855376, "signal/mean_confidence_reward/centered_abs_mean": 0.059064753353595734, "signal/mean_confidence_reward/group_std_mean": 0.07968888133764267, "signal/mean_confidence_reward/group_zero_std_frac": 0.17777778208255768, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.906475166739256e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.906475166739256e-07, "step": 345 }, { "calibration/aurc": 0.17821393399430568, "calibration/batch_distribution_entropy": 0.5281294051861017, "calibration/batch_entropy_100bins": 0.2789663542218751, "calibration/batch_entropy_10bins": 0.5281294051861017, "calibration/batch_entropy_50bins": 0.3283946785521962, "calibration/batch_uniqueness": 0.2076128794208711, "calibration/confidence_entropy": 0.4363416647275226, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.36962873167834004, "calibration/coverage@15%": 0.5259358465608466, "calibration/coverage@20%": 0.676443308413955, "calibration/coverage@25%": 0.784228192926247, "calibration/coverage@30%": 0.8718103195543829, "calibration/coverage@5%": 0.13733681462140993, "calibration/distribution_entropy_10": 0.5281294051861017, "calibration/distribution_entropy_100": 0.2789663542218751, "calibration/ece": 0.1380739637193096, "calibration/mean_confidence": 0.7937734840831027, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012239583333333326, "completions/max_length": 3893.8, "completions/max_terminated_length": 3893.8, "completions/mean_length": 956.0777099609375, "completions/mean_terminated_length": 967.9925659179687, "completions/min_length": 0.0, "completions/min_terminated_length": 256.2, "epoch": 0.8413461538461539, "grad_norm": 0.0008964097942225635, "learning_rate": 4.146634615384616e-06, "loss": -0.0127, "num_tokens": 754233501.0, "reward": 1.2726234674453736, "reward_std": 0.15154092907905578, "rewards/accuracy_reward": 0.7247395753860474, "rewards/brier_reward": 0.8327310800552368, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9877604126930237, "rewards/mean_confidence_reward": 0.7958799719810485, "sampling/batch_mean_priority_error": 0.014881944444444437, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4722222222222222, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0011662805685773493, "sampling/priority_kl": 0.030000032857060432, "sampling/priority_scale": 0.7186060607200488, "sampling/prob_entropy": 10.278964614868164, "sampling/prob_max": 4.364315245766193e-05, "sampling/prob_min": 1.8648596233106217e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8351999878883362, "sampling/prompt_draws_total": 25056.0, "sampling/seen_fraction": 0.6248266577720643, "sampling/unseen_fraction": 0.3751733422279358, "signal/accuracy_reward/centered_abs_mean": 0.1272081181406975, "signal/accuracy_reward/group_std_mean": 0.1718853384256363, "signal/accuracy_reward/group_zero_std_frac": 0.49722222685813905, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06360405907034875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06360405907034875, "signal/advantage_abs_mean": 0.10877088755369187, "signal/advantage_pre_scale_abs_mean": 0.10877088755369187, "signal/advantage_pre_scale_std": 0.21029807925224303, "signal/advantage_std": 0.21029807925224303, "signal/brier_reward/centered_abs_mean": 0.08816509544849396, "signal/brier_reward/group_std_mean": 0.12144324630498886, "signal/brier_reward/group_zero_std_frac": 0.20000000149011612, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04408254772424698, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04408254772424698, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572877861559, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.019775390811264514, "signal/format_reward/group_std_mean": 0.03549950160086155, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009887695405632257, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009887695405632257, "signal/mean_confidence_reward/centered_abs_mean": 0.05625826939940452, "signal/mean_confidence_reward/group_std_mean": 0.07565618604421616, "signal/mean_confidence_reward/group_zero_std_frac": 0.23888888955116272, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.625826929644973e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.625826929644973e-07, "step": 350 }, { "epoch": 0.8413461538461539, "eval_calibration/aurc": 0.16606825973650804, "eval_calibration/batch_distribution_entropy": 0.5848651349995244, "eval_calibration/batch_entropy_100bins": 0.3121246426668811, "eval_calibration/batch_entropy_10bins": 0.5848651349995244, "eval_calibration/batch_entropy_50bins": 0.367428079213045, "eval_calibration/batch_uniqueness": 0.3537960150186423, "eval_calibration/confidence_entropy": 0.4525167738181449, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.5004389815627743, "eval_calibration/coverage@20%": 0.7471466198419666, "eval_calibration/coverage@25%": 0.8805970149253731, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5848651349995244, "eval_calibration/distribution_entropy_100": 0.3121246426668811, "eval_calibration/ece": 0.07936786654960498, "eval_calibration/mean_confidence": 0.7861281826163302, "eval_calibration/unique_confidence_per_question": 0.010416666666666666, "eval_calibration/unique_confidences": 12, "eval_completions/clipped_ratio": 0.008680555555555544, "eval_completions/max_length": 3213.5, "eval_completions/max_terminated_length": 3213.5, "eval_completions/mean_length": 1018.7047322591146, "eval_completions/mean_terminated_length": 1027.7530924479167, "eval_completions/min_length": 129.83333333333334, "eval_completions/min_terminated_length": 324.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 754233501.0, "eval_reward": 1.2533830801645915, "eval_reward_std": 0.35429341594378155, "eval_rewards/accuracy_reward": 0.6987847288449606, "eval_rewards/brier_reward": 0.8192505836486816, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.9887152711550394, "eval_rewards/mean_confidence_reward": 0.7776041527589163, "eval_runtime": 207.5115, "eval_samples_per_second": 4.819, "eval_signal/accuracy_reward/centered_abs_mean": 0.4018554737170537, "eval_signal/accuracy_reward/group_std_mean": 0.45326830446720123, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20092773685852686, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20092773685852686, "eval_signal/advantage_abs_mean": 0.30226531128088635, "eval_signal/advantage_pre_scale_abs_mean": 0.30226531128088635, "eval_signal/advantage_pre_scale_std": 0.3529137223958969, "eval_signal/advantage_std": 0.3529137223958969, "eval_signal/brier_reward/centered_abs_mean": 0.2024331366022428, "eval_signal/brier_reward/group_std_mean": 0.2561540404955546, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1012165683011214, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.1012165683011214, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.021755641947189968, "eval_signal/format_reward/group_std_mean": 0.06084661930799484, "eval_signal/format_reward/group_zero_std_frac": 0.6666666915019354, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010877820973594984, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.010877820973594984, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.12927334010601044, "eval_signal/mean_confidence_reward/group_std_mean": 0.1735323245326678, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.2927333538452028e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.2927333538452028e-06, "eval_steps_per_second": 0.029, "step": 350 }, { "epoch": 0.8413461538461539, "step": 350, "train_probe_calibration/aurc": 0.13379297260752054, "train_probe_calibration/batch_distribution_entropy": 0.5711268571988026, "train_probe_calibration/batch_entropy_100bins": 0.301687839849546, "train_probe_calibration/batch_entropy_10bins": 0.5711268571988026, "train_probe_calibration/batch_entropy_50bins": 0.35514204380253295, "train_probe_calibration/batch_uniqueness": 0.32175586312125304, "train_probe_calibration/confidence_entropy": 0.44695025580987513, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.5123022847100176, "train_probe_calibration/coverage@15%": 0.5123022847100176, "train_probe_calibration/coverage@20%": 0.8321616871704746, "train_probe_calibration/coverage@25%": 0.9226713532513181, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.5711268571988026, "train_probe_calibration/distribution_entropy_100": 0.301687839849546, "train_probe_calibration/ece": 0.06252196836555338, "train_probe_calibration/mean_confidence": 0.7897627416520212, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.011284722222222229, "train_probe_completions/max_length": 3109.1666666666665, "train_probe_completions/max_terminated_length": 3109.1666666666665, "train_probe_completions/mean_length": 1027.9149475097656, "train_probe_completions/mean_terminated_length": 1039.559346516927, "train_probe_completions/min_length": 97.5, "train_probe_completions/min_terminated_length": 289.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 754233501.0, "train_probe_reward": 1.2771899302800496, "train_probe_reward_std": 0.3486747443675995, "train_probe_rewards/accuracy_reward": 0.7326388955116272, "train_probe_rewards/brier_reward": 0.8338780403137207, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9878472288449606, "train_probe_rewards/mean_confidence_reward": 0.7801649272441864, "train_probe_runtime": 210.7138, "train_probe_samples_per_second": 4.746, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3829210102558136, "train_probe_signal/accuracy_reward/group_std_mean": 0.44330157339572906, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1914605051279068, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1914605051279068, "train_probe_signal/advantage_abs_mean": 0.28897509972254437, "train_probe_signal/advantage_pre_scale_abs_mean": 0.28897509972254437, "train_probe_signal/advantage_pre_scale_std": 0.346636101603508, "train_probe_signal/advantage_std": 0.346636101603508, "train_probe_signal/brier_reward/centered_abs_mean": 0.19374607255061468, "train_probe_signal/brier_reward/group_std_mean": 0.2492145225405693, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09687303627530734, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.09687303627530734, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.02332899331425627, "train_probe_signal/format_reward/group_std_mean": 0.06276767483601968, "train_probe_signal/format_reward/group_zero_std_frac": 0.6666666766007742, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496657128135, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.011664496657128135, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1300916684170564, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.17594958345095316, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.3009165892678236e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.3009165892678236e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.21392597337683425, "calibration/batch_distribution_entropy": 0.5947614532869031, "calibration/batch_entropy_100bins": 0.32152541550615227, "calibration/batch_entropy_10bins": 0.5947614532869031, "calibration/batch_entropy_50bins": 0.37665393610920955, "calibration/batch_uniqueness": 0.39637935511209765, "calibration/confidence_entropy": 0.47112190410813054, "calibration/coverage@0%": 0.0005347593582887701, "calibration/coverage@1%": 0.0005347593582887701, "calibration/coverage@10%": 0.03333899216252158, "calibration/coverage@15%": 0.20688396570749515, "calibration/coverage@20%": 0.5179778252432606, "calibration/coverage@25%": 0.745892119333808, "calibration/coverage@30%": 0.8624386627335564, "calibration/coverage@5%": 0.03333899216252158, "calibration/distribution_entropy_10": 0.5947614532869031, "calibration/distribution_entropy_100": 0.32152541550615227, "calibration/ece": 0.12702372029237322, "calibration/mean_confidence": 0.768511211859987, "calibration/unique_confidence_per_question": 0.025520833333333333, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011111111111111138, "completions/max_length": 3950.6, "completions/max_terminated_length": 3950.6, "completions/mean_length": 1083.3509521484375, "completions/mean_terminated_length": 1095.9435668945312, "completions/min_length": 0.0, "completions/min_terminated_length": 284.8, "epoch": 0.8533653846153846, "grad_norm": 0.0007383286720141768, "learning_rate": 4.116586538461539e-06, "loss": -0.0118, "num_tokens": 769812904.0, "reward": 1.2695351600646974, "reward_std": 0.1539124518632889, "rewards/accuracy_reward": 0.7167534708976746, "rewards/brier_reward": 0.8334994554519654, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9888020992279053, "rewards/mean_confidence_reward": 0.767276120185852, "sampling/batch_mean_priority_error": 0.009087106963734565, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5166666666666666, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0011780404252931475, "sampling/priority_kl": 0.029998865351080893, "sampling/priority_scale": 0.7173613130347803, "sampling/prob_entropy": 10.278946495056152, "sampling/prob_max": 4.3774918594863266e-05, "sampling/prob_min": 1.872878747235518e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8472000002861023, "sampling/prompt_draws_total": 25416.0, "sampling/seen_fraction": 0.6307400107383728, "sampling/unseen_fraction": 0.3692599892616272, "signal/accuracy_reward/centered_abs_mean": 0.136865234375, "signal/accuracy_reward/group_std_mean": 0.1819414019584656, "signal/accuracy_reward/group_zero_std_frac": 0.46944445967674253, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0684326171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0684326171875, "signal/advantage_abs_mean": 0.11170721650123597, "signal/advantage_pre_scale_abs_mean": 0.11170721650123597, "signal/advantage_pre_scale_std": 0.20967997014522552, "signal/advantage_std": 0.20967997014522552, "signal/brier_reward/centered_abs_mean": 0.08867686092853547, "signal/brier_reward/group_std_mean": 0.12051396965980529, "signal/brier_reward/group_zero_std_frac": 0.061111112125217915, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.044338430464267733, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.044338430464267733, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.01946072056889534, "signal/format_reward/group_std_mean": 0.03850421905517578, "signal/format_reward/group_zero_std_frac": 0.8333333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00973036028444767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00973036028444767, "signal/mean_confidence_reward/centered_abs_mean": 0.06412911564111709, "signal/mean_confidence_reward/group_std_mean": 0.08581863492727279, "signal/mean_confidence_reward/group_zero_std_frac": 0.06388888973742723, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.41291137526423e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.41291137526423e-07, "step": 355 }, { "calibration/aurc": 0.13148102052335414, "calibration/batch_distribution_entropy": 0.6065593156304296, "calibration/batch_entropy_100bins": 0.3349155943149549, "calibration/batch_entropy_10bins": 0.6065593156304296, "calibration/batch_entropy_50bins": 0.39367936060083886, "calibration/batch_uniqueness": 0.4486734367925983, "calibration/confidence_entropy": 0.49709864708157836, "calibration/coverage@0%": 0.0010582677165354329, "calibration/coverage@1%": 0.0010582677165354329, "calibration/coverage@10%": 0.39493326306021215, "calibration/coverage@15%": 0.5546346030242957, "calibration/coverage@20%": 0.7751250022507082, "calibration/coverage@25%": 0.9252238588909896, "calibration/coverage@30%": 0.9725593667546175, "calibration/coverage@5%": 0.20250454482663352, "calibration/distribution_entropy_10": 0.6065593156304296, "calibration/distribution_entropy_100": 0.3349155943149549, "calibration/ece": 0.08411662682927092, "calibration/mean_confidence": 0.7514742164045105, "calibration/unique_confidence_per_question": 0.0265625, "calibration/unique_confidences": 10.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 3931.8, "completions/max_terminated_length": 3931.8, "completions/mean_length": 1107.271875, "completions/mean_terminated_length": 1121.4744506835937, "completions/min_length": 0.0, "completions/min_terminated_length": 287.6, "epoch": 0.8653846153846154, "grad_norm": 0.0007773222750984132, "learning_rate": 4.086538461538462e-06, "loss": -0.0146, "num_tokens": 785668036.0, "reward": 1.2651597738265992, "reward_std": 0.13495090901851653, "rewards/accuracy_reward": 0.7113715291023255, "rewards/brier_reward": 0.8316938519477844, "rewards/confidence_one_or_zero": 0.0006076388992369175, "rewards/format_reward": 0.9872395753860473, "rewards/mean_confidence_reward": 0.7259296298027038, "sampling/batch_mean_priority_error": 0.012178544557823126, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.47777777777777775, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0011908516753464937, "sampling/priority_kl": 0.029999721422791482, "sampling/priority_scale": 0.7162323415046558, "sampling/prob_entropy": 10.278964042663574, "sampling/prob_max": 4.390918838907965e-05, "sampling/prob_min": 1.881135249277577e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8592000007629395, "sampling/prompt_draws_total": 25776.0, "sampling/seen_fraction": 0.6366600155830383, "sampling/unseen_fraction": 0.3633399844169617, "signal/accuracy_reward/centered_abs_mean": 0.11122504472732545, "signal/accuracy_reward/group_std_mean": 0.1490047350525856, "signal/accuracy_reward/group_zero_std_frac": 0.5583333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05561252236366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05561252236366272, "signal/advantage_abs_mean": 0.09449863135814666, "signal/advantage_pre_scale_abs_mean": 0.09449863135814666, "signal/advantage_pre_scale_std": 0.18920642435550689, "signal/advantage_std": 0.18920642435550689, "signal/brier_reward/centered_abs_mean": 0.07522271126508713, "signal/brier_reward/group_std_mean": 0.10542623102664947, "signal/brier_reward/group_zero_std_frac": 0.02222222276031971, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037611355632543565, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037611355632543565, "signal/confidence_one_or_zero/centered_abs_mean": 0.001177300326526165, "signal/confidence_one_or_zero/group_std_mean": 0.0034373244270682335, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9805555462837219, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1773002484005702e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1773002484005702e-08, "signal/format_reward/centered_abs_mean": 0.022216796875, "signal/format_reward/group_std_mean": 0.04519350677728653, "signal/format_reward/group_zero_std_frac": 0.800000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0111083984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0111083984375, "signal/mean_confidence_reward/centered_abs_mean": 0.06626706123352051, "signal/mean_confidence_reward/group_std_mean": 0.09146434217691421, "signal/mean_confidence_reward/group_zero_std_frac": 0.02222222276031971, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.626705726375803e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.626705726375803e-07, "step": 360 }, { "calibration/aurc": 0.11277692746806756, "calibration/batch_distribution_entropy": 0.6456395996278379, "calibration/batch_entropy_100bins": 0.35729776936261837, "calibration/batch_entropy_10bins": 0.6456395996278379, "calibration/batch_entropy_50bins": 0.4198155415901105, "calibration/batch_uniqueness": 0.473133505996256, "calibration/confidence_entropy": 0.5311463132708747, "calibration/coverage@0%": 0.06946531022399341, "calibration/coverage@1%": 0.06946531022399341, "calibration/coverage@10%": 0.5981245716041369, "calibration/coverage@15%": 0.7243289521094971, "calibration/coverage@20%": 0.8575133118963988, "calibration/coverage@25%": 0.9006357537602036, "calibration/coverage@30%": 0.941569895397018, "calibration/coverage@5%": 0.35447750809190104, "calibration/distribution_entropy_10": 0.6456395996278379, "calibration/distribution_entropy_100": 0.35729776936261837, "calibration/ece": 0.12188690030338205, "calibration/mean_confidence": 0.7149970556060781, "calibration/unique_confidence_per_question": 0.0296875, "calibration/unique_confidences": 11.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0140625, "completions/max_length": 3901.2, "completions/max_terminated_length": 3901.2, "completions/mean_length": 1143.3982177734374, "completions/mean_terminated_length": 1159.8265380859375, "completions/min_length": 0.0, "completions/min_terminated_length": 300.6, "epoch": 0.8774038461538461, "grad_norm": 0.0007361032767221332, "learning_rate": 4.0564903846153846e-06, "loss": -0.0169, "num_tokens": 801933423.0, "reward": 1.277792716026306, "reward_std": 0.13029044717550278, "rewards/accuracy_reward": 0.7333333373069764, "rewards/brier_reward": 0.8366477251052856, "rewards/confidence_one_or_zero": 0.0011284722480922938, "rewards/format_reward": 0.9855902791023254, "rewards/mean_confidence_reward": 0.7020756244659424, "sampling/batch_mean_priority_error": 0.013198098471161029, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.48888888888888893, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0012046080781146884, "sampling/priority_kl": 0.0300000824034214, "sampling/priority_scale": 0.7152359307045117, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 4.404441424412653e-05, "sampling/prob_min": 1.8891548461397178e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8712000012397766, "sampling/prompt_draws_total": 26136.0, "sampling/seen_fraction": 0.6425066709518432, "sampling/unseen_fraction": 0.3574933290481567, "signal/accuracy_reward/centered_abs_mean": 0.10416666716337204, "signal/accuracy_reward/group_std_mean": 0.14664508551359176, "signal/accuracy_reward/group_zero_std_frac": 0.5500000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05208333358168602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05208333358168602, "signal/advantage_abs_mean": 0.08776064664125442, "signal/advantage_pre_scale_abs_mean": 0.08776064664125442, "signal/advantage_pre_scale_std": 0.18522117137908936, "signal/advantage_std": 0.18522117137908936, "signal/brier_reward/centered_abs_mean": 0.06859320253133774, "signal/brier_reward/group_std_mean": 0.09766077995300293, "signal/brier_reward/group_zero_std_frac": 0.013888888992369175, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03429660126566887, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03429660126566887, "signal/confidence_one_or_zero/centered_abs_mean": 0.002164713526144624, "signal/confidence_one_or_zero/group_std_mean": 0.005785720981657505, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9694444417953492, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.1647135284297293e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.1647135284297293e-08, "signal/format_reward/centered_abs_mean": 0.024555121548473835, "signal/format_reward/group_std_mean": 0.0473395012319088, "signal/format_reward/group_zero_std_frac": 0.800000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012277560774236917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012277560774236917, "signal/mean_confidence_reward/centered_abs_mean": 0.06469639614224434, "signal/mean_confidence_reward/group_std_mean": 0.08819877356290817, "signal/mean_confidence_reward/group_zero_std_frac": 0.013888888992369175, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.469639174611075e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.469639174611075e-07, "step": 365 }, { "calibration/aurc": 0.1386283079594242, "calibration/batch_distribution_entropy": 0.6931053922664379, "calibration/batch_entropy_100bins": 0.38151320223980084, "calibration/batch_entropy_10bins": 0.6931053922664379, "calibration/batch_entropy_50bins": 0.4491111688447984, "calibration/batch_uniqueness": 0.5672203195561937, "calibration/confidence_entropy": 0.538125630240651, "calibration/coverage@0%": 0.06229394364436791, "calibration/coverage@1%": 0.1340216923354674, "calibration/coverage@10%": 0.44910862347893205, "calibration/coverage@15%": 0.5674081734008574, "calibration/coverage@20%": 0.7405481357036434, "calibration/coverage@25%": 0.8005214966111949, "calibration/coverage@30%": 0.919051442164734, "calibration/coverage@5%": 0.2802612486748973, "calibration/distribution_entropy_10": 0.6931053922664379, "calibration/distribution_entropy_100": 0.38151320223980084, "calibration/ece": 0.13417186403782377, "calibration/mean_confidence": 0.7066106638231785, "calibration/unique_confidence_per_question": 0.0296875, "calibration/unique_confidences": 11.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022829861111111117, "completions/max_length": 4059.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 1201.0027099609374, "completions/mean_terminated_length": 1229.2183837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 341.8, "epoch": 0.8894230769230769, "grad_norm": 0.0007796495920047164, "learning_rate": 4.026442307692308e-06, "loss": -0.0246, "num_tokens": 818856078.0, "reward": 1.2549877882003784, "reward_std": 0.15138355791568756, "rewards/accuracy_reward": 0.7102430582046508, "rewards/brier_reward": 0.8225485563278199, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9771701335906983, "rewards/mean_confidence_reward": 0.6898022055625915, "sampling/batch_mean_priority_error": 0.016525694816846175, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.5111111111111111, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0012221216456964612, "sampling/priority_kl": 0.030001383274793625, "sampling/priority_scale": 0.714642256568186, "sampling/prob_entropy": 10.278974151611328, "sampling/prob_max": 4.4190864718984814e-05, "sampling/prob_min": 1.8967684445669876e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.8832000017166137, "sampling/prompt_draws_total": 26496.0, "sampling/seen_fraction": 0.6487133264541626, "sampling/unseen_fraction": 0.3512866735458374, "signal/accuracy_reward/centered_abs_mean": 0.12760416567325591, "signal/accuracy_reward/group_std_mean": 0.1682043880224228, "signal/accuracy_reward/group_zero_std_frac": 0.5194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06380208283662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06380208283662796, "signal/advantage_abs_mean": 0.10744879245758057, "signal/advantage_pre_scale_abs_mean": 0.10744879245758057, "signal/advantage_pre_scale_std": 0.21186884343624116, "signal/advantage_std": 0.21186884343624116, "signal/brier_reward/centered_abs_mean": 0.07936687171459197, "signal/brier_reward/group_std_mean": 0.11007404923439026, "signal/brier_reward/group_zero_std_frac": 0.00555555559694767, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03968343585729599, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03968343585729599, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008409288129769266, "signal/confidence_one_or_zero/group_std_mean": 0.0024552317336201668, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.409287488575501e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.409287488575501e-09, "signal/format_reward/centered_abs_mean": 0.03474934808909893, "signal/format_reward/group_std_mean": 0.062300239503383634, "signal/format_reward/group_zero_std_frac": 0.7500000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017374674044549464, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017374674044549464, "signal/mean_confidence_reward/centered_abs_mean": 0.07127341628074646, "signal/mean_confidence_reward/group_std_mean": 0.0958697572350502, "signal/mean_confidence_reward/group_zero_std_frac": 0.00555555559694767, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.127341632440221e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.127341632440221e-07, "step": 370 }, { "calibration/aurc": 0.10029335766168149, "calibration/batch_distribution_entropy": 0.6902467942968492, "calibration/batch_entropy_100bins": 0.3845658940121653, "calibration/batch_entropy_10bins": 0.6902467942968492, "calibration/batch_entropy_50bins": 0.45234008291154665, "calibration/batch_uniqueness": 0.5422519454806408, "calibration/confidence_entropy": 0.5160662325119072, "calibration/coverage@0%": 0.10886992645820467, "calibration/coverage@1%": 0.18280609667097064, "calibration/coverage@10%": 0.538345710678048, "calibration/coverage@15%": 0.6977265965974512, "calibration/coverage@20%": 0.8670921079320463, "calibration/coverage@25%": 0.9028599732154458, "calibration/coverage@30%": 0.9792349726775956, "calibration/coverage@5%": 0.38965790086026597, "calibration/distribution_entropy_10": 0.6902467942968492, "calibration/distribution_entropy_100": 0.3845658940121653, "calibration/ece": 0.12545484203132018, "calibration/mean_confidence": 0.7241922030059907, "calibration/unique_confidence_per_question": 0.040625, "calibration/unique_confidences": 15.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01684027777777781, "completions/max_length": 4045.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 1237.6139892578126, "completions/mean_terminated_length": 1258.8419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 356.2, "epoch": 0.9014423076923077, "grad_norm": 0.0007148521253839135, "learning_rate": 3.996394230769231e-06, "loss": -0.0189, "num_tokens": 836198671.0, "reward": 1.2773468732833861, "reward_std": 0.149992173910141, "rewards/accuracy_reward": 0.7309895873069763, "rewards/brier_reward": 0.8406170248985291, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9830729246139527, "rewards/mean_confidence_reward": 0.7109095573425293, "sampling/batch_mean_priority_error": 0.013326693225758213, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.425, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0012390354182571172, "sampling/priority_kl": 0.030000606551766396, "sampling/priority_scale": 0.713545459578745, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 4.431868874235079e-05, "sampling/prob_min": 1.9047261230298317e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.895199990272522, "sampling/prompt_draws_total": 26856.0, "sampling/seen_fraction": 0.6540533304214478, "sampling/unseen_fraction": 0.34594666957855225, "signal/accuracy_reward/centered_abs_mean": 0.138232421875, "signal/accuracy_reward/group_std_mean": 0.17746042907238008, "signal/accuracy_reward/group_zero_std_frac": 0.5055555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0691162109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0691162109375, "signal/advantage_abs_mean": 0.10759708732366562, "signal/advantage_pre_scale_abs_mean": 0.10759708732366562, "signal/advantage_pre_scale_std": 0.20929722487926483, "signal/advantage_std": 0.20929722487926483, "signal/brier_reward/centered_abs_mean": 0.0766251802444458, "signal/brier_reward/group_std_mean": 0.10730268806219101, "signal/brier_reward/group_zero_std_frac": 0.05000000093132258, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0383125901222229, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0383125901222229, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.02868381068110466, "signal/format_reward/group_std_mean": 0.05322592183947563, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01434190534055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01434190534055233, "signal/mean_confidence_reward/centered_abs_mean": 0.06712552011013032, "signal/mean_confidence_reward/group_std_mean": 0.09156746119260788, "signal/mean_confidence_reward/group_zero_std_frac": 0.05000000093132258, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.712551794407773e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.712551794407773e-07, "step": 375 }, { "calibration/aurc": 0.06687940158463018, "calibration/batch_distribution_entropy": 0.6765535702173531, "calibration/batch_entropy_100bins": 0.3947297330988418, "calibration/batch_entropy_10bins": 0.6765535702173531, "calibration/batch_entropy_50bins": 0.4615792721378192, "calibration/batch_uniqueness": 0.5223129261859903, "calibration/confidence_entropy": 0.48673361828445605, "calibration/coverage@0%": 0.10752003188823844, "calibration/coverage@1%": 0.38548056959488225, "calibration/coverage@10%": 0.7209976575948486, "calibration/coverage@15%": 0.8182014845492833, "calibration/coverage@20%": 0.8610435822792895, "calibration/coverage@25%": 0.919575639140328, "calibration/coverage@30%": 0.9627777777777778, "calibration/coverage@5%": 0.6677153378203822, "calibration/distribution_entropy_10": 0.6765535702173531, "calibration/distribution_entropy_100": 0.3947297330988418, "calibration/ece": 0.1527699526413858, "calibration/mean_confidence": 0.7355369041664158, "calibration/unique_confidence_per_question": 0.04114583333333334, "calibration/unique_confidences": 15.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029079861111111115, "completions/max_length": 4048.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 1363.7867431640625, "completions/mean_terminated_length": 1404.6439697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 416.0, "epoch": 0.9134615384615384, "grad_norm": 0.0005625736084766686, "learning_rate": 3.966346153846154e-06, "loss": -0.0313, "num_tokens": 855025046.0, "reward": 1.2572867155075074, "reward_std": 0.15128274112939835, "rewards/accuracy_reward": 0.7096354007720947, "rewards/brier_reward": 0.8341776728630066, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9707465171813965, "rewards/mean_confidence_reward": 0.6915950655937195, "sampling/batch_mean_priority_error": 0.015527221156608076, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.43888888888888894, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0012547509046271442, "sampling/priority_kl": 0.029999660328030586, "sampling/priority_scale": 0.7124824702506884, "sampling/prob_entropy": 10.278944396972657, "sampling/prob_max": 4.444640653673559e-05, "sampling/prob_min": 1.9126197730656714e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9072000026702881, "sampling/prompt_draws_total": 27216.0, "sampling/seen_fraction": 0.6593066573143005, "sampling/unseen_fraction": 0.34069334268569945, "signal/accuracy_reward/centered_abs_mean": 0.12245551496744156, "signal/accuracy_reward/group_std_mean": 0.16243197470903398, "signal/accuracy_reward/group_zero_std_frac": 0.5250000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06122775748372078, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06122775748372078, "signal/advantage_abs_mean": 0.10677067637443542, "signal/advantage_pre_scale_abs_mean": 0.10677067637443542, "signal/advantage_pre_scale_std": 0.21411246061325073, "signal/advantage_std": 0.21411246061325073, "signal/brier_reward/centered_abs_mean": 0.08327719122171402, "signal/brier_reward/group_std_mean": 0.11531828492879867, "signal/brier_reward/group_zero_std_frac": 0.07777777835726737, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04163859561085701, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04163859561085701, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.04182400144636631, "signal/format_reward/group_std_mean": 0.07100048959255219, "signal/format_reward/group_zero_std_frac": 0.7305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.020912000723183156, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.020912000723183156, "signal/mean_confidence_reward/centered_abs_mean": 0.07033055424690246, "signal/mean_confidence_reward/group_std_mean": 0.0946671724319458, "signal/mean_confidence_reward/group_zero_std_frac": 0.07777777835726737, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.033055226202123e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.033055226202123e-07, "step": 380 }, { "calibration/aurc": 0.09305614350965419, "calibration/batch_distribution_entropy": 0.6883623876554645, "calibration/batch_entropy_100bins": 0.3986153066388806, "calibration/batch_entropy_10bins": 0.6883623876554645, "calibration/batch_entropy_50bins": 0.4679957218573433, "calibration/batch_uniqueness": 0.5225835025994161, "calibration/confidence_entropy": 0.48683141606517133, "calibration/coverage@0%": 0.2549390269850611, "calibration/coverage@1%": 0.31022357983058957, "calibration/coverage@10%": 0.5844218566775878, "calibration/coverage@15%": 0.711092937754674, "calibration/coverage@20%": 0.9079099138819948, "calibration/coverage@25%": 0.9586206896551724, "calibration/coverage@30%": 0.9586206896551724, "calibration/coverage@5%": 0.4897845827164402, "calibration/distribution_entropy_10": 0.6883623876554645, "calibration/distribution_entropy_100": 0.3986153066388806, "calibration/ece": 0.117542802957425, "calibration/mean_confidence": 0.7463551204519809, "calibration/unique_confidence_per_question": 0.049479166666666664, "calibration/unique_confidences": 19.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024826388888888905, "completions/max_length": 4044.2, "completions/max_terminated_length": 4044.2, "completions/mean_length": 1388.8342041015626, "completions/mean_terminated_length": 1424.3485107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 400.2, "epoch": 0.9254807692307693, "grad_norm": 0.0005671771941706538, "learning_rate": 3.936298076923077e-06, "loss": -0.0298, "num_tokens": 874080512.0, "reward": 1.2843957901000977, "reward_std": 0.16392693817615508, "rewards/accuracy_reward": 0.7434895992279053, "rewards/brier_reward": 0.8502007126808167, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9750868082046509, "rewards/mean_confidence_reward": 0.7203326463699341, "sampling/batch_mean_priority_error": 0.009188816231087813, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4416666666666667, "sampling/error_ema_max": 0.07874999940395355, "sampling/error_ema_mean": 0.0012672836193814874, "sampling/priority_kl": 0.030001260340213776, "sampling/priority_scale": 0.7116214931709692, "sampling/prob_entropy": 10.278961372375488, "sampling/prob_max": 4.457824543351308e-05, "sampling/prob_min": 1.9202485782443544e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9192000031471252, "sampling/prompt_draws_total": 27576.0, "sampling/seen_fraction": 0.6646333336830139, "sampling/unseen_fraction": 0.3353666663169861, "signal/accuracy_reward/centered_abs_mean": 0.12014431208372116, "signal/accuracy_reward/group_std_mean": 0.1654144525527954, "signal/accuracy_reward/group_zero_std_frac": 0.49722222089767454, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06007215604186058, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06007215604186058, "signal/advantage_abs_mean": 0.11156907230615616, "signal/advantage_pre_scale_abs_mean": 0.11156907230615616, "signal/advantage_pre_scale_std": 0.22239162027835846, "signal/advantage_std": 0.22239162027835846, "signal/brier_reward/centered_abs_mean": 0.0873231202363968, "signal/brier_reward/group_std_mean": 0.1248858317732811, "signal/brier_reward/group_zero_std_frac": 0.10000000298023223, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0436615601181984, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0436615601181984, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.04041341133415699, "signal/format_reward/group_std_mean": 0.07462992146611214, "signal/format_reward/group_zero_std_frac": 0.6916666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.020206705667078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.020206705667078495, "signal/mean_confidence_reward/centered_abs_mean": 0.07641240209341049, "signal/mean_confidence_reward/group_std_mean": 0.10640424340963364, "signal/mean_confidence_reward/group_zero_std_frac": 0.10000000298023223, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.641240131306404e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.641240131306404e-07, "step": 385 }, { "calibration/aurc": 0.13190790475605713, "calibration/batch_distribution_entropy": 0.6945369113155875, "calibration/batch_entropy_100bins": 0.3849626963468917, "calibration/batch_entropy_10bins": 0.6945369113155875, "calibration/batch_entropy_50bins": 0.4531718574953187, "calibration/batch_uniqueness": 0.5077238360997742, "calibration/confidence_entropy": 0.48116454635255723, "calibration/coverage@0%": 0.12612137203166227, "calibration/coverage@1%": 0.21842906433935458, "calibration/coverage@10%": 0.42993735385232174, "calibration/coverage@15%": 0.5684094677216365, "calibration/coverage@20%": 0.653749367578372, "calibration/coverage@25%": 0.887906932118014, "calibration/coverage@30%": 0.9514267379679143, "calibration/coverage@5%": 0.3843560040766311, "calibration/distribution_entropy_10": 0.6945369113155875, "calibration/distribution_entropy_100": 0.3849626963468917, "calibration/ece": 0.10824475809618166, "calibration/mean_confidence": 0.73170941520179, "calibration/unique_confidence_per_question": 0.04010416666666667, "calibration/unique_confidences": 15.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025173611111111095, "completions/max_length": 4080.2, "completions/max_terminated_length": 4080.2, "completions/mean_length": 1396.7363037109376, "completions/mean_terminated_length": 1433.1280517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 430.2, "epoch": 0.9375, "grad_norm": 0.000409966945881024, "learning_rate": 3.90625e-06, "loss": -0.0266, "num_tokens": 893290818.0, "reward": 1.27172749042511, "reward_std": 0.15499115586280823, "rewards/accuracy_reward": 0.7238715171813965, "rewards/brier_reward": 0.8447428822517395, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9748263835906983, "rewards/mean_confidence_reward": 0.7116460919380188, "sampling/batch_mean_priority_error": 0.012741569140127442, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.47777777777777786, "sampling/error_ema_max": 0.10777499675750732, "sampling/error_ema_mean": 0.001280206791125238, "sampling/priority_kl": 0.029999075457453728, "sampling/priority_scale": 0.7110501586692408, "sampling/prob_entropy": 10.27893943786621, "sampling/prob_max": 4.471761203603819e-05, "sampling/prob_min": 1.9275546219432726e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9312000036239624, "sampling/prompt_draws_total": 27936.0, "sampling/seen_fraction": 0.6701466679573059, "sampling/unseen_fraction": 0.32985333204269407, "signal/accuracy_reward/centered_abs_mean": 0.11694335788488389, "signal/accuracy_reward/group_std_mean": 0.16504150331020356, "signal/accuracy_reward/group_zero_std_frac": 0.48611111640930177, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05847167894244194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05847167894244194, "signal/advantage_abs_mean": 0.1060485914349556, "signal/advantage_pre_scale_abs_mean": 0.1060485914349556, "signal/advantage_pre_scale_std": 0.21162693500518798, "signal/advantage_std": 0.21162693500518798, "signal/brier_reward/centered_abs_mean": 0.08900733143091202, "signal/brier_reward/group_std_mean": 0.12304618060588837, "signal/brier_reward/group_zero_std_frac": 0.1194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04450366571545601, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04450366571545601, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.03811848983168602, "signal/format_reward/group_std_mean": 0.06721950992941857, "signal/format_reward/group_zero_std_frac": 0.7361111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01905924491584301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01905924491584301, "signal/mean_confidence_reward/centered_abs_mean": 0.07885867208242417, "signal/mean_confidence_reward/group_std_mean": 0.10565297603607178, "signal/mean_confidence_reward/group_zero_std_frac": 0.1250000014901161, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.885867375989619e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.885867375989619e-07, "step": 390 }, { "calibration/aurc": 0.13851231496365907, "calibration/batch_distribution_entropy": 0.6436947726909951, "calibration/batch_entropy_100bins": 0.3657471373198543, "calibration/batch_entropy_10bins": 0.6436947726909951, "calibration/batch_entropy_50bins": 0.42894801110205344, "calibration/batch_uniqueness": 0.4334929718033763, "calibration/confidence_entropy": 0.4724938880811342, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.05520833333333334, "calibration/coverage@10%": 0.6129555591818974, "calibration/coverage@15%": 0.6619791666666667, "calibration/coverage@20%": 0.7166666666666667, "calibration/coverage@25%": 0.7223958333333333, "calibration/coverage@30%": 0.9573684210526316, "calibration/coverage@5%": 0.3250827582991421, "calibration/distribution_entropy_10": 0.6436947726909951, "calibration/distribution_entropy_100": 0.3657471373198543, "calibration/ece": 0.13354448480012385, "calibration/mean_confidence": 0.7630965564173401, "calibration/unique_confidence_per_question": 0.04114583333333333, "calibration/unique_confidences": 15.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111116, "completions/max_length": 4012.6, "completions/max_terminated_length": 4012.6, "completions/mean_length": 1314.4784912109376, "completions/mean_terminated_length": 1333.518115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 404.8, "epoch": 0.9495192307692307, "grad_norm": 0.0004194934736005962, "learning_rate": 3.876201923076923e-06, "loss": -0.0172, "num_tokens": 911520586.0, "reward": 1.2895013809204101, "reward_std": 0.13368285447359085, "rewards/accuracy_reward": 0.7513888955116272, "rewards/brier_reward": 0.8418354272842408, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9857638955116272, "rewards/mean_confidence_reward": 0.728717315196991, "sampling/batch_mean_priority_error": 0.01850263710004697, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4416666666666666, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.001296161743812263, "sampling/priority_kl": 0.029999866709113122, "sampling/priority_scale": 0.7105536759132519, "sampling/prob_entropy": 10.278941345214843, "sampling/prob_max": 4.4857734610559416e-05, "sampling/prob_min": 1.934731080837082e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9431999921798706, "sampling/prompt_draws_total": 28296.0, "sampling/seen_fraction": 0.6755799889564514, "sampling/unseen_fraction": 0.32442001104354856, "signal/accuracy_reward/centered_abs_mean": 0.1146484375, "signal/accuracy_reward/group_std_mean": 0.15858114361763, "signal/accuracy_reward/group_zero_std_frac": 0.5194444596767426, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05732421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05732421875, "signal/advantage_abs_mean": 0.09345841109752655, "signal/advantage_pre_scale_abs_mean": 0.09345841109752655, "signal/advantage_pre_scale_std": 0.18798796832561493, "signal/advantage_std": 0.18798796832561493, "signal/brier_reward/centered_abs_mean": 0.0772012397646904, "signal/brier_reward/group_std_mean": 0.10745720118284226, "signal/brier_reward/group_zero_std_frac": 0.16944444477558135, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0386006198823452, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0386006198823452, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02316623255610466, "signal/format_reward/group_std_mean": 0.03984398618340492, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01158311627805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01158311627805233, "signal/mean_confidence_reward/centered_abs_mean": 0.07465712130069732, "signal/mean_confidence_reward/group_std_mean": 0.09905266910791397, "signal/mean_confidence_reward/group_zero_std_frac": 0.17777777910232545, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.465711860277224e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.465711860277224e-07, "step": 395 }, { "calibration/aurc": 0.17381065033169904, "calibration/batch_distribution_entropy": 0.7264575998314206, "calibration/batch_entropy_100bins": 0.41238367312173424, "calibration/batch_entropy_10bins": 0.7264575998314206, "calibration/batch_entropy_50bins": 0.48482897701067085, "calibration/batch_uniqueness": 0.553584006751787, "calibration/confidence_entropy": 0.5046126707223293, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.28964513992561025, "calibration/coverage@15%": 0.4644649068420569, "calibration/coverage@20%": 0.6632898498571449, "calibration/coverage@25%": 0.7977037298323743, "calibration/coverage@30%": 0.9234206058439817, "calibration/coverage@5%": 0.12178938666589316, "calibration/distribution_entropy_10": 0.7264575998314206, "calibration/distribution_entropy_100": 0.41238367312173424, "calibration/ece": 0.09958374316931348, "calibration/mean_confidence": 0.6901195510198486, "calibration/unique_confidence_per_question": 0.03958333333333334, "calibration/unique_confidences": 15.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019704861111111117, "completions/max_length": 4046.8, "completions/max_terminated_length": 4046.8, "completions/mean_length": 1286.089697265625, "completions/mean_terminated_length": 1311.7608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 412.8, "epoch": 0.9615384615384616, "grad_norm": 0.000408906169468537, "learning_rate": 3.846153846153847e-06, "loss": -0.0214, "num_tokens": 929409907.0, "reward": 1.2701274394989013, "reward_std": 0.13115532398223878, "rewards/accuracy_reward": 0.7190972447395325, "rewards/brier_reward": 0.8408486485481262, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9802951335906982, "rewards/mean_confidence_reward": 0.6962047338485717, "sampling/batch_mean_priority_error": 0.01543325384199716, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.44722222222222224, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0013169086771085859, "sampling/priority_kl": 0.029999712482094765, "sampling/priority_scale": 0.7101320088142529, "sampling/prob_entropy": 10.278942680358886, "sampling/prob_max": 4.4999313831795004e-05, "sampling/prob_min": 1.9418062584009023e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9551999926567077, "sampling/prompt_draws_total": 28656.0, "sampling/seen_fraction": 0.6809599995613098, "sampling/unseen_fraction": 0.3190400004386902, "signal/accuracy_reward/centered_abs_mean": 0.1167534738779068, "signal/accuracy_reward/group_std_mean": 0.15651549100875856, "signal/accuracy_reward/group_zero_std_frac": 0.544444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0583767369389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0583767369389534, "signal/advantage_abs_mean": 0.0936752513051033, "signal/advantage_pre_scale_abs_mean": 0.0936752513051033, "signal/advantage_pre_scale_std": 0.18921864926815032, "signal/advantage_std": 0.18921864926815032, "signal/brier_reward/centered_abs_mean": 0.0845369666814804, "signal/brier_reward/group_std_mean": 0.1135342001914978, "signal/brier_reward/group_zero_std_frac": 0.16388889104127885, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0422684833407402, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0422684833407402, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.02873806469142437, "signal/format_reward/group_std_mean": 0.04568745791912079, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014369032345712185, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014369032345712185, "signal/mean_confidence_reward/centered_abs_mean": 0.07661729902029038, "signal/mean_confidence_reward/group_std_mean": 0.100196972489357, "signal/mean_confidence_reward/group_zero_std_frac": 0.16666666865348817, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.661729910068971e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.661729910068971e-07, "step": 400 }, { "epoch": 0.9615384615384616, "eval_calibration/aurc": 0.14373158731161875, "eval_calibration/batch_distribution_entropy": 0.7376132763277105, "eval_calibration/batch_entropy_100bins": 0.4222256539447724, "eval_calibration/batch_entropy_10bins": 0.7376132763277105, "eval_calibration/batch_entropy_50bins": 0.49703720826994124, "eval_calibration/batch_uniqueness": 0.5167880426479297, "eval_calibration/confidence_entropy": 0.4762156650786053, "eval_calibration/coverage@0%": 0.0008718395815170009, "eval_calibration/coverage@1%": 0.0008718395815170009, "eval_calibration/coverage@10%": 0.45422842197035745, "eval_calibration/coverage@15%": 0.6687009590235397, "eval_calibration/coverage@20%": 0.7707061900610288, "eval_calibration/coverage@25%": 0.8857890148212729, "eval_calibration/coverage@30%": 0.981691368788143, "eval_calibration/coverage@5%": 0.0008718395815170009, "eval_calibration/distribution_entropy_10": 0.7376132763277105, "eval_calibration/distribution_entropy_100": 0.4222256539447724, "eval_calibration/ece": 0.023292783412285037, "eval_calibration/mean_confidence": 0.6979463351440592, "eval_calibration/unique_confidence_per_question": 0.026041666666666668, "eval_calibration/unique_confidences": 30, "eval_completions/clipped_ratio": 0.00434027777777779, "eval_completions/max_length": 3106.3333333333335, "eval_completions/max_terminated_length": 3106.3333333333335, "eval_completions/mean_length": 1229.538594563802, "eval_completions/mean_terminated_length": 1234.8570149739583, "eval_completions/min_length": 170.66666666666666, "eval_completions/min_terminated_length": 471.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 929409907.0, "eval_reward": 1.267914394537608, "eval_reward_std": 0.3055635889371236, "eval_rewards/accuracy_reward": 0.6935763855775198, "eval_rewards/brier_reward": 0.8465787768363953, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9956597288449606, "eval_rewards/mean_confidence_reward": 0.6949170331160227, "eval_runtime": 207.1959, "eval_samples_per_second": 4.826, "eval_signal/accuracy_reward/centered_abs_mean": 0.4138997296492259, "eval_signal/accuracy_reward/group_std_mean": 0.46077505747477215, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20694986482461294, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20694986482461294, "eval_signal/advantage_abs_mean": 0.26225854456424713, "eval_signal/advantage_pre_scale_abs_mean": 0.26225854456424713, "eval_signal/advantage_pre_scale_std": 0.3032806118329366, "eval_signal/advantage_std": 0.3032806118329366, "eval_signal/brier_reward/centered_abs_mean": 0.15997044493754706, "eval_signal/brier_reward/group_std_mean": 0.21410523851712546, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07998522246877353, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.07998522246877353, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.008409287935743729, "eval_signal/format_reward/group_std_mean": 0.02455231888840596, "eval_signal/format_reward/group_zero_std_frac": 0.8611111342906952, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.2043088103334109, "eval_signal/mean_confidence_reward/group_std_mean": 0.2400745451450348, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.043088102254842e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.043088102254842e-06, "eval_steps_per_second": 0.029, "step": 400 }, { "epoch": 0.9615384615384616, "step": 400, "train_probe_calibration/aurc": 0.1189485048532275, "train_probe_calibration/batch_distribution_entropy": 0.7404175967039297, "train_probe_calibration/batch_entropy_100bins": 0.4319272238513574, "train_probe_calibration/batch_entropy_10bins": 0.7404175967039297, "train_probe_calibration/batch_entropy_50bins": 0.5063624573463197, "train_probe_calibration/batch_uniqueness": 0.5271480740354949, "train_probe_calibration/confidence_entropy": 0.4770563278287817, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.5663176265270506, "train_probe_calibration/coverage@15%": 0.725130890052356, "train_probe_calibration/coverage@20%": 0.7827225130890052, "train_probe_calibration/coverage@25%": 0.956369982547993, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0, "train_probe_calibration/distribution_entropy_10": 0.7404175967039297, "train_probe_calibration/distribution_entropy_100": 0.4319272238513574, "train_probe_calibration/ece": 0.03721958610977619, "train_probe_calibration/mean_confidence": 0.7000523817904639, "train_probe_calibration/unique_confidence_per_question": 0.026909722222222224, "train_probe_calibration/unique_confidences": 31, "train_probe_completions/clipped_ratio": 0.007638888888888899, "train_probe_completions/max_length": 3379.5, "train_probe_completions/max_terminated_length": 3379.5, "train_probe_completions/mean_length": 1260.2481486002605, "train_probe_completions/mean_terminated_length": 1269.918924967448, "train_probe_completions/min_length": 143.5, "train_probe_completions/min_terminated_length": 424.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 929409907.0, "train_probe_reward": 1.2899209260940552, "train_probe_reward_std": 0.29275282720724743, "train_probe_rewards/accuracy_reward": 0.7282986044883728, "train_probe_rewards/brier_reward": 0.8567376335461935, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9947916666666666, "train_probe_rewards/mean_confidence_reward": 0.6964062650998434, "train_probe_runtime": 206.9266, "train_probe_samples_per_second": 4.833, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3830837706724803, "train_probe_signal/accuracy_reward/group_std_mean": 0.4421002119779587, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19154188533624014, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19154188533624014, "train_probe_signal/advantage_abs_mean": 0.24344467123349509, "train_probe_signal/advantage_pre_scale_abs_mean": 0.24344467123349509, "train_probe_signal/advantage_pre_scale_std": 0.29259032507737476, "train_probe_signal/advantage_std": 0.29259032507737476, "train_probe_signal/brier_reward/centered_abs_mean": 0.14901887873808542, "train_probe_signal/brier_reward/group_std_mean": 0.20320318390925726, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07450943936904271, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07450943936904271, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.009982638681928316, "train_probe_signal/format_reward/group_std_mean": 0.026473373795549076, "train_probe_signal/format_reward/group_zero_std_frac": 0.8611111342906952, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.004991319340964158, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004991319340964158, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.20346981287002563, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.24091133226950964, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.034698032578793e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.034698032578793e-06, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.08449489142619329, "calibration/batch_distribution_entropy": 0.6227200143289963, "calibration/batch_entropy_100bins": 0.36338874779102365, "calibration/batch_entropy_10bins": 0.6227200143289963, "calibration/batch_entropy_50bins": 0.4252669096203041, "calibration/batch_uniqueness": 0.3437965242623367, "calibration/confidence_entropy": 0.4589139405558756, "calibration/coverage@0%": 0.1618964947089947, "calibration/coverage@1%": 0.18094411375661373, "calibration/coverage@10%": 0.639110911662315, "calibration/coverage@15%": 0.9039912280701753, "calibration/coverage@20%": 0.976842105263158, "calibration/coverage@25%": 0.9873684210526316, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.35717879677980857, "calibration/distribution_entropy_10": 0.6227200143289963, "calibration/distribution_entropy_100": 0.36338874779102365, "calibration/ece": 0.1369168523797913, "calibration/mean_confidence": 0.7638626715936297, "calibration/unique_confidence_per_question": 0.0453125, "calibration/unique_confidences": 17.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007291666666666674, "completions/max_length": 3925.2, "completions/max_terminated_length": 3925.2, "completions/mean_length": 1233.3153076171875, "completions/mean_terminated_length": 1242.411474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 377.4, "epoch": 0.9735576923076923, "grad_norm": 0.0005183282773941755, "learning_rate": 3.81610576923077e-06, "loss": -0.0065, "num_tokens": 946737955.0, "reward": 1.2722141742706299, "reward_std": 0.13236766010522844, "rewards/accuracy_reward": 0.7138020753860473, "rewards/brier_reward": 0.8379905343055725, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9926215171813965, "rewards/mean_confidence_reward": 0.7111494064331054, "sampling/batch_mean_priority_error": 0.017268427815570656, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4083333333333333, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0013354042312130332, "sampling/priority_kl": 0.030000364780426024, "sampling/priority_scale": 0.7095950543647632, "sampling/prob_entropy": 10.278936386108398, "sampling/prob_max": 4.51350053481292e-05, "sampling/prob_min": 1.948899807757698e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9672000050544739, "sampling/prompt_draws_total": 29016.0, "sampling/seen_fraction": 0.686026668548584, "sampling/unseen_fraction": 0.31397333145141604, "signal/accuracy_reward/centered_abs_mean": 0.13479274809360503, "signal/accuracy_reward/group_std_mean": 0.17868539094924926, "signal/accuracy_reward/group_zero_std_frac": 0.49166667461395264, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06739637404680252, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06739637404680252, "signal/advantage_abs_mean": 0.09586342722177506, "signal/advantage_pre_scale_abs_mean": 0.09586342722177506, "signal/advantage_pre_scale_std": 0.18204142153263092, "signal/advantage_std": 0.18204142153263092, "signal/brier_reward/centered_abs_mean": 0.08206898719072342, "signal/brier_reward/group_std_mean": 0.11170048117637635, "signal/brier_reward/group_zero_std_frac": 0.19722222685813903, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04103449359536171, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04103449359536171, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.012939453218132257, "signal/format_reward/group_std_mean": 0.024650372192263605, "signal/format_reward/group_zero_std_frac": 0.9, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0064697266090661286, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0064697266090661286, "signal/mean_confidence_reward/centered_abs_mean": 0.07400602400302887, "signal/mean_confidence_reward/group_std_mean": 0.09739783853292465, "signal/mean_confidence_reward/group_zero_std_frac": 0.20555555820465088, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.400601930385164e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.400601930385164e-07, "step": 405 }, { "calibration/aurc": 0.15484554277953202, "calibration/batch_distribution_entropy": 0.6140492358734193, "calibration/batch_entropy_100bins": 0.3570470771855673, "calibration/batch_entropy_10bins": 0.6140492358734193, "calibration/batch_entropy_50bins": 0.41687620588852903, "calibration/batch_uniqueness": 0.31276141098895854, "calibration/confidence_entropy": 0.4564292639629343, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.11302083333333332, "calibration/coverage@10%": 0.44270833333333337, "calibration/coverage@15%": 0.5036458333333333, "calibration/coverage@20%": 0.6453125, "calibration/coverage@25%": 0.8445162521815008, "calibration/coverage@30%": 0.9244791666666667, "calibration/coverage@5%": 0.14583333333333331, "calibration/distribution_entropy_10": 0.6140492358734193, "calibration/distribution_entropy_100": 0.3570470771855673, "calibration/ece": 0.11358587200609367, "calibration/mean_confidence": 0.7408229315798783, "calibration/unique_confidence_per_question": 0.05, "calibration/unique_confidences": 19.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3927.8, "completions/max_terminated_length": 3927.8, "completions/mean_length": 1141.2799560546875, "completions/mean_terminated_length": 1145.9073486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 334.2, "epoch": 0.9855769230769231, "grad_norm": 0.00041302453610114753, "learning_rate": 3.7860576923076927e-06, "loss": -0.0047, "num_tokens": 962938460.0, "reward": 1.295286440849304, "reward_std": 0.10948915630578995, "rewards/accuracy_reward": 0.7405382037162781, "rewards/brier_reward": 0.8540129423141479, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9960069298744202, "rewards/mean_confidence_reward": 0.7417306542396546, "sampling/batch_mean_priority_error": 0.015546130820105933, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4416666666666666, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0013538436032831669, "sampling/priority_kl": 0.03000061847269535, "sampling/priority_scale": 0.7092480838997289, "sampling/prob_entropy": 10.278968238830567, "sampling/prob_max": 4.527691780822352e-05, "sampling/prob_min": 1.955830375663936e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.979200005531311, "sampling/prompt_draws_total": 29376.0, "sampling/seen_fraction": 0.6912333369255066, "sampling/unseen_fraction": 0.3087666630744934, "signal/accuracy_reward/centered_abs_mean": 0.11591254472732544, "signal/accuracy_reward/group_std_mean": 0.15239314138889312, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05795627236366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05795627236366272, "signal/advantage_abs_mean": 0.08225717097520828, "signal/advantage_pre_scale_abs_mean": 0.08225717097520828, "signal/advantage_pre_scale_std": 0.16391776800155639, "signal/advantage_std": 0.16391776800155639, "signal/brier_reward/centered_abs_mean": 0.07231278643012047, "signal/brier_reward/group_std_mean": 0.09499142915010453, "signal/brier_reward/group_zero_std_frac": 0.31388888955116273, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036156393215060235, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036156393215060235, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.0058593748603016135, "signal/format_reward/group_std_mean": 0.010942479129880667, "signal/format_reward/group_zero_std_frac": 0.9555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0029296874301508067, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0029296874301508067, "signal/mean_confidence_reward/centered_abs_mean": 0.06354806199669838, "signal/mean_confidence_reward/group_std_mean": 0.08368089199066162, "signal/mean_confidence_reward/group_zero_std_frac": 0.32777778506278993, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.354805918817874e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.354805918817874e-07, "step": 410 }, { "calibration/aurc": 0.20428814046532634, "calibration/batch_distribution_entropy": 0.6639657042279807, "calibration/batch_entropy_100bins": 0.36625001474860847, "calibration/batch_entropy_10bins": 0.6639657042279807, "calibration/batch_entropy_50bins": 0.4311435914864713, "calibration/batch_uniqueness": 0.43971812448499553, "calibration/confidence_entropy": 0.4823920956258697, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.3251643245435366, "calibration/coverage@15%": 0.3486671698927818, "calibration/coverage@20%": 0.5319422349391003, "calibration/coverage@25%": 0.5914233410340884, "calibration/coverage@30%": 0.6817874865008953, "calibration/coverage@5%": 0.1489891050264514, "calibration/distribution_entropy_10": 0.6639657042279807, "calibration/distribution_entropy_100": 0.36625001474860847, "calibration/ece": 0.12688027308188327, "calibration/mean_confidence": 0.7077171406744632, "calibration/unique_confidence_per_question": 0.03177083333333333, "calibration/unique_confidences": 12.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0035590277777777677, "completions/max_length": 3899.6, "completions/max_terminated_length": 3899.6, "completions/mean_length": 1136.95966796875, "completions/mean_terminated_length": 1141.050439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 376.8, "epoch": 0.9975961538461539, "grad_norm": 0.0003201451036147773, "learning_rate": 3.756009615384616e-06, "loss": -0.0029, "num_tokens": 979157035.0, "reward": 1.2846659660339355, "reward_std": 0.11020902842283249, "rewards/accuracy_reward": 0.7144965410232544, "rewards/brier_reward": 0.858467161655426, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9963541507720948, "rewards/mean_confidence_reward": 0.7005656719207763, "sampling/batch_mean_priority_error": 0.015297752478049573, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4416666666666666, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0013716007117182017, "sampling/priority_kl": 0.030000818893313408, "sampling/priority_scale": 0.7091951430076733, "sampling/prob_entropy": 10.278961944580079, "sampling/prob_max": 4.542603783193044e-05, "sampling/prob_min": 1.9623946718638764e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 0.9911999940872193, "sampling/prompt_draws_total": 29736.0, "sampling/seen_fraction": 0.6965666651725769, "sampling/unseen_fraction": 0.3034333348274231, "signal/accuracy_reward/centered_abs_mean": 0.11788736879825593, "signal/accuracy_reward/group_std_mean": 0.15433158874511718, "signal/accuracy_reward/group_zero_std_frac": 0.5666666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05894368439912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05894368439912796, "signal/advantage_abs_mean": 0.08145553916692734, "signal/advantage_pre_scale_abs_mean": 0.08145553916692734, "signal/advantage_pre_scale_std": 0.1603451281785965, "signal/advantage_std": 0.1603451281785965, "signal/brier_reward/centered_abs_mean": 0.06856870576739311, "signal/brier_reward/group_std_mean": 0.09311682730913162, "signal/brier_reward/group_zero_std_frac": 0.2194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03428435288369656, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03428435288369656, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.006564670125953853, "signal/format_reward/group_std_mean": 0.014062346518039703, "signal/format_reward/group_zero_std_frac": 0.9361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0032823350629769266, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032823350629769266, "signal/mean_confidence_reward/centered_abs_mean": 0.06457249075174332, "signal/mean_confidence_reward/group_std_mean": 0.08391422182321548, "signal/mean_confidence_reward/group_zero_std_frac": 0.22777777910232544, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.45724924197566e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.45724924197566e-07, "step": 415 }, { "calibration/aurc": 0.15826559352185168, "calibration/batch_distribution_entropy": 0.6645688676194069, "calibration/batch_entropy_100bins": 0.3655125923463937, "calibration/batch_entropy_10bins": 0.6645688676194069, "calibration/batch_entropy_50bins": 0.42807686348036145, "calibration/batch_uniqueness": 0.43058323792640574, "calibration/confidence_entropy": 0.49414698113893907, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5363545554067046, "calibration/coverage@15%": 0.6613748966207811, "calibration/coverage@20%": 0.7334378801963009, "calibration/coverage@25%": 0.7501672650130548, "calibration/coverage@30%": 0.7892950391644908, "calibration/coverage@5%": 0.44908787774709624, "calibration/distribution_entropy_10": 0.6645688676194069, "calibration/distribution_entropy_100": 0.3655125923463937, "calibration/ece": 0.10076544504268907, "calibration/mean_confidence": 0.6712129246549783, "calibration/unique_confidence_per_question": 0.035416666666666666, "calibration/unique_confidences": 13.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001736111111111116, "completions/max_length": 3455.4, "completions/max_terminated_length": 3455.4, "completions/mean_length": 1117.3010498046874, "completions/mean_terminated_length": 1119.234912109375, "completions/min_length": 106.2, "completions/min_terminated_length": 362.4, "epoch": 1.0096153846153846, "grad_norm": 0.0004065485845785588, "learning_rate": 3.725961538461539e-06, "loss": -0.0, "num_tokens": 995089724.0, "reward": 1.2725646257400514, "reward_std": 0.11338547170162201, "rewards/accuracy_reward": 0.694531238079071, "rewards/brier_reward": 0.8528412699699401, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9977430582046509, "rewards/mean_confidence_reward": 0.6812376379966736, "sampling/batch_mean_priority_error": 0.01960522136886875, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4083333333333334, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0013906704727560282, "sampling/priority_kl": 0.029997918754816055, "sampling/priority_scale": 0.708841937663965, "sampling/prob_entropy": 10.278946876525879, "sampling/prob_max": 4.5564852916868405e-05, "sampling/prob_min": 1.9692146815941668e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.0032000064849853, "sampling/prompt_draws_total": 30096.0, "sampling/seen_fraction": 0.7014533281326294, "sampling/unseen_fraction": 0.2985466718673706, "signal/accuracy_reward/centered_abs_mean": 0.128759765625, "signal/accuracy_reward/group_std_mean": 0.1751173049211502, "signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0643798828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0643798828125, "signal/advantage_abs_mean": 0.08181026428937913, "signal/advantage_pre_scale_abs_mean": 0.08181026428937913, "signal/advantage_pre_scale_std": 0.15451219081878662, "signal/advantage_std": 0.15451219081878662, "signal/brier_reward/centered_abs_mean": 0.06762193292379379, "signal/brier_reward/group_std_mean": 0.0920287773013115, "signal/brier_reward/group_zero_std_frac": 0.14444444626569747, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033810966461896894, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033810966461896894, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004242621548473835, "signal/format_reward/group_std_mean": 0.010189045500010253, "signal/format_reward/group_zero_std_frac": 0.950000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021213107742369176, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021213107742369176, "signal/mean_confidence_reward/centered_abs_mean": 0.06683679148554802, "signal/mean_confidence_reward/group_std_mean": 0.08758924454450608, "signal/mean_confidence_reward/group_zero_std_frac": 0.1500000014901161, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.683678748231614e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.683678748231614e-07, "step": 420 }, { "calibration/aurc": 0.05998141922513901, "calibration/batch_distribution_entropy": 0.6505124534342099, "calibration/batch_entropy_100bins": 0.3638051185873787, "calibration/batch_entropy_10bins": 0.6505124534342099, "calibration/batch_entropy_50bins": 0.42808093280425225, "calibration/batch_uniqueness": 0.41772793882104836, "calibration/confidence_entropy": 0.4729893772005923, "calibration/coverage@0%": 0.2644555047867711, "calibration/coverage@1%": 0.4766000153687816, "calibration/coverage@10%": 0.6602030146987719, "calibration/coverage@15%": 0.9266926381808889, "calibration/coverage@20%": 0.9624020887728459, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.600641020659787, "calibration/distribution_entropy_10": 0.6505124534342099, "calibration/distribution_entropy_100": 0.3638051185873787, "calibration/ece": 0.15267265542144076, "calibration/mean_confidence": 0.7118193110200474, "calibration/unique_confidence_per_question": 0.0390625, "calibration/unique_confidences": 15.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0043402777777777676, "completions/max_length": 3774.8, "completions/max_terminated_length": 3774.8, "completions/mean_length": 1087.99921875, "completions/mean_terminated_length": 1092.8842041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 337.6, "epoch": 1.0216346153846154, "grad_norm": 0.000375980423996225, "learning_rate": 3.695913461538462e-06, "loss": -0.004, "num_tokens": 1010726195.0, "reward": 1.284477162361145, "reward_std": 0.10648652762174607, "rewards/accuracy_reward": 0.712413203716278, "rewards/brier_reward": 0.8608673214912415, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9956597208976745, "rewards/mean_confidence_reward": 0.7011353492736816, "sampling/batch_mean_priority_error": 0.018059459333132105, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4333333333333333, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0014110021525993942, "sampling/priority_kl": 0.030000055208802223, "sampling/priority_scale": 0.7089194476371631, "sampling/prob_entropy": 10.27895622253418, "sampling/prob_max": 4.5716890599578616e-05, "sampling/prob_min": 1.9756038454943337e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.0152000188827515, "sampling/prompt_draws_total": 30456.0, "sampling/seen_fraction": 0.7066600084304809, "sampling/unseen_fraction": 0.29333999156951907, "signal/accuracy_reward/centered_abs_mean": 0.1138400599360466, "signal/accuracy_reward/group_std_mean": 0.15474726110696793, "signal/accuracy_reward/group_zero_std_frac": 0.5416666746139527, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0569200299680233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0569200299680233, "signal/advantage_abs_mean": 0.07521377429366112, "signal/advantage_pre_scale_abs_mean": 0.07521377429366112, "signal/advantage_pre_scale_std": 0.15179350972175598, "signal/advantage_std": 0.15179350972175598, "signal/brier_reward/centered_abs_mean": 0.06321329697966575, "signal/brier_reward/group_std_mean": 0.08789711743593216, "signal/brier_reward/group_zero_std_frac": 0.1944444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03160664848983288, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03160664848983288, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.00712890645954758, "signal/format_reward/group_std_mean": 0.014370427234098315, "signal/format_reward/group_zero_std_frac": 0.9361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00356445322977379, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00356445322977379, "signal/mean_confidence_reward/centered_abs_mean": 0.06138454973697662, "signal/mean_confidence_reward/group_std_mean": 0.08237865716218948, "signal/mean_confidence_reward/group_zero_std_frac": 0.19722222685813903, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.138454864412779e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.138454864412779e-07, "step": 425 }, { "calibration/aurc": 0.1623026411211937, "calibration/batch_distribution_entropy": 0.5908128190664673, "calibration/batch_entropy_100bins": 0.3205678919351199, "calibration/batch_entropy_10bins": 0.5908128190664673, "calibration/batch_entropy_50bins": 0.37684439145325405, "calibration/batch_uniqueness": 0.3022808775676606, "calibration/confidence_entropy": 0.46237970751720275, "calibration/coverage@0%": 0.09869451697127937, "calibration/coverage@1%": 0.10966057441253263, "calibration/coverage@10%": 0.2581171127067015, "calibration/coverage@15%": 0.5452921018276763, "calibration/coverage@20%": 0.694095409051349, "calibration/coverage@25%": 0.800100630983464, "calibration/coverage@30%": 0.8564485422106178, "calibration/coverage@5%": 0.2393671127067015, "calibration/distribution_entropy_10": 0.5908128190664673, "calibration/distribution_entropy_100": 0.3205678919351199, "calibration/ece": 0.11328370918962101, "calibration/mean_confidence": 0.7135982632765554, "calibration/unique_confidence_per_question": 0.028645833333333332, "calibration/unique_confidences": 11.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002517361111111116, "completions/max_length": 3556.4, "completions/max_terminated_length": 3556.4, "completions/mean_length": 1057.6259765625, "completions/mean_terminated_length": 1060.273388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 344.4, "epoch": 1.0336538461538463, "grad_norm": 0.0003816092503257096, "learning_rate": 3.665865384615385e-06, "loss": -0.0019, "num_tokens": 1026012798.0, "reward": 1.2823591470718383, "reward_std": 0.10721758604049683, "rewards/accuracy_reward": 0.7147569417953491, "rewards/brier_reward": 0.8524640798568726, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9974826455116272, "rewards/mean_confidence_reward": 0.7300627708435059, "sampling/batch_mean_priority_error": 0.014562219208853807, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4138888888888889, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0014308568555861712, "sampling/priority_kl": 0.02999901808798313, "sampling/priority_scale": 0.7088448344962671, "sampling/prob_entropy": 10.278940200805664, "sampling/prob_max": 4.5860721729695794e-05, "sampling/prob_min": 1.9819942463072947e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.0271999835968018, "sampling/prompt_draws_total": 30816.0, "sampling/seen_fraction": 0.7115333437919616, "sampling/unseen_fraction": 0.28846665620803835, "signal/accuracy_reward/centered_abs_mean": 0.11008029580116271, "signal/accuracy_reward/group_std_mean": 0.15001013576984407, "signal/accuracy_reward/group_zero_std_frac": 0.5583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05504014790058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05504014790058136, "signal/advantage_abs_mean": 0.07600466758012772, "signal/advantage_pre_scale_abs_mean": 0.07600466758012772, "signal/advantage_pre_scale_std": 0.15484246909618377, "signal/advantage_std": 0.15484246909618377, "signal/brier_reward/centered_abs_mean": 0.06448182314634324, "signal/brier_reward/group_std_mean": 0.08896796703338623, "signal/brier_reward/group_zero_std_frac": 0.2583333343267441, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03224091157317162, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03224091157317162, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004833984328433872, "signal/format_reward/group_std_mean": 0.013044581189751625, "signal/format_reward/group_zero_std_frac": 0.9305555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002416992164216936, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002416992164216936, "signal/mean_confidence_reward/centered_abs_mean": 0.057430972903966905, "signal/mean_confidence_reward/group_std_mean": 0.07837551683187485, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777778506278994, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.743097062804736e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.743097062804736e-07, "step": 430 }, { "calibration/aurc": 0.1205467953686877, "calibration/batch_distribution_entropy": 0.5210485050832704, "calibration/batch_entropy_100bins": 0.28943236080843737, "calibration/batch_entropy_10bins": 0.5210485050832704, "calibration/batch_entropy_50bins": 0.34000717947510306, "calibration/batch_uniqueness": 0.10985326071123118, "calibration/confidence_entropy": 0.4302312746231255, "calibration/coverage@0%": 0.15156522687609075, "calibration/coverage@1%": 0.15469022687609074, "calibration/coverage@10%": 0.5183311166469629, "calibration/coverage@15%": 0.6411402916174069, "calibration/coverage@20%": 0.8527174639700501, "calibration/coverage@25%": 0.9147707840736363, "calibration/coverage@30%": 0.9346197151382085, "calibration/coverage@5%": 0.2399388827900692, "calibration/distribution_entropy_10": 0.5210485050832704, "calibration/distribution_entropy_100": 0.28943236080843737, "calibration/ece": 0.10173666440002979, "calibration/mean_confidence": 0.7815555736898057, "calibration/unique_confidence_per_question": 0.03645833333333333, "calibration/unique_confidences": 14.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00425347222222221, "completions/max_length": 3720.4, "completions/max_terminated_length": 3720.4, "completions/mean_length": 1028.5506958007813, "completions/mean_terminated_length": 1033.01962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 328.2, "epoch": 1.0456730769230769, "grad_norm": 0.0004367251240182668, "learning_rate": 3.635817307692308e-06, "loss": -0.0043, "num_tokens": 1040972006.0, "reward": 1.2838960409164428, "reward_std": 0.12252383381128311, "rewards/accuracy_reward": 0.7300347208976745, "rewards/brier_reward": 0.8419958472251892, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9957465171813965, "rewards/mean_confidence_reward": 0.7535378098487854, "sampling/batch_mean_priority_error": 0.014893905477138685, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.4, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0014472598442807794, "sampling/priority_kl": 0.03000083900988102, "sampling/priority_scale": 0.7090173423988745, "sampling/prob_entropy": 10.278966331481934, "sampling/prob_max": 4.6011665108380837e-05, "sampling/prob_min": 1.9881115804309957e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.039199995994568, "sampling/prompt_draws_total": 31176.0, "sampling/seen_fraction": 0.7165466666221618, "sampling/unseen_fraction": 0.2834533333778381, "signal/accuracy_reward/centered_abs_mean": 0.12941623032093047, "signal/accuracy_reward/group_std_mean": 0.16836948990821837, "signal/accuracy_reward/group_zero_std_frac": 0.5277777731418609, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06470811516046523, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06470811516046523, "signal/advantage_abs_mean": 0.09151800125837325, "signal/advantage_pre_scale_abs_mean": 0.09151800125837325, "signal/advantage_pre_scale_std": 0.17786999344825744, "signal/advantage_std": 0.17786999344825744, "signal/brier_reward/centered_abs_mean": 0.07523893266916275, "signal/brier_reward/group_std_mean": 0.0994583934545517, "signal/brier_reward/group_zero_std_frac": 0.27500000298023225, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037619466334581374, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037619466334581374, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.007373047061264515, "signal/format_reward/group_std_mean": 0.01584246177226305, "signal/format_reward/group_zero_std_frac": 0.9277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036865235306322575, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0036865235306322575, "signal/mean_confidence_reward/centered_abs_mean": 0.06189207062125206, "signal/mean_confidence_reward/group_std_mean": 0.08114207834005356, "signal/mean_confidence_reward/group_zero_std_frac": 0.2916666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.189206828821626e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.189206828821626e-07, "step": 435 }, { "calibration/aurc": 0.10714301687641507, "calibration/batch_distribution_entropy": 0.5955961633333724, "calibration/batch_entropy_100bins": 0.34357802421851297, "calibration/batch_entropy_10bins": 0.5955961633333724, "calibration/batch_entropy_50bins": 0.4022762759967714, "calibration/batch_uniqueness": 0.2600934386043134, "calibration/confidence_entropy": 0.44799529131081606, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.2546138743455497, "calibration/coverage@10%": 0.5192162958115183, "calibration/coverage@15%": 0.7130208333333334, "calibration/coverage@20%": 0.8552028795811518, "calibration/coverage@25%": 0.9090095986038396, "calibration/coverage@30%": 0.9408376963350786, "calibration/coverage@5%": 0.46792375654450263, "calibration/distribution_entropy_10": 0.5955961633333724, "calibration/distribution_entropy_100": 0.34357802421851297, "calibration/ece": 0.11402197692917163, "calibration/mean_confidence": 0.7536124024710045, "calibration/unique_confidence_per_question": 0.0453125, "calibration/unique_confidences": 17.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 3729.4, "completions/max_terminated_length": 3729.4, "completions/mean_length": 1017.2171997070312, "completions/mean_terminated_length": 1020.3733520507812, "completions/min_length": 0.0, "completions/min_terminated_length": 311.2, "epoch": 1.0576923076923077, "grad_norm": 0.00039360576192848384, "learning_rate": 3.605769230769231e-06, "loss": -0.0023, "num_tokens": 1055791948.0, "reward": 1.2873583793640138, "reward_std": 0.1034932404756546, "rewards/accuracy_reward": 0.71875, "rewards/brier_reward": 0.8590771198272705, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.996875, "rewards/mean_confidence_reward": 0.7335354685783386, "sampling/batch_mean_priority_error": 0.014799672360578007, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3805555555555556, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0014623381663113832, "sampling/priority_kl": 0.03000078722834587, "sampling/priority_scale": 0.7089662969810888, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 4.6152883442118764e-05, "sampling/prob_min": 1.99433427042095e-05, "sampling/prompt_draws_max": 5.0, "sampling/prompt_draws_mean": 1.051199984550476, "sampling/prompt_draws_total": 31536.0, "sampling/seen_fraction": 0.7211666703224182, "sampling/unseen_fraction": 0.2788333296775818, "signal/accuracy_reward/centered_abs_mean": 0.10870225876569747, "signal/accuracy_reward/group_std_mean": 0.14185952842235566, "signal/accuracy_reward/group_zero_std_frac": 0.600000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05435112938284874, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05435112938284874, "signal/advantage_abs_mean": 0.07555654495954514, "signal/advantage_pre_scale_abs_mean": 0.07555654495954514, "signal/advantage_pre_scale_std": 0.1532256066799164, "signal/advantage_std": 0.1532256066799164, "signal/brier_reward/centered_abs_mean": 0.06442730501294136, "signal/brier_reward/group_std_mean": 0.08673342168331147, "signal/brier_reward/group_zero_std_frac": 0.23055555820465087, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03221365250647068, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03221365250647068, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005946180550381541, "signal/format_reward/group_std_mean": 0.015451083704829217, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0029730902751907706, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0029730902751907706, "signal/mean_confidence_reward/centered_abs_mean": 0.061560317128896716, "signal/mean_confidence_reward/group_std_mean": 0.08096567541360855, "signal/mean_confidence_reward/group_zero_std_frac": 0.2361111134290695, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.156031759019242e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.156031759019242e-07, "step": 440 }, { "calibration/aurc": 0.15670651247509326, "calibration/batch_distribution_entropy": 0.6795985620528495, "calibration/batch_entropy_100bins": 0.39968524286734586, "calibration/batch_entropy_10bins": 0.6795985620528495, "calibration/batch_entropy_50bins": 0.47013045656830627, "calibration/batch_uniqueness": 0.4595951769139866, "calibration/confidence_entropy": 0.48444565435591047, "calibration/coverage@0%": 0.10548302872062662, "calibration/coverage@1%": 0.12532637075718017, "calibration/coverage@10%": 0.38832387030820453, "calibration/coverage@15%": 0.5937442798776023, "calibration/coverage@20%": 0.6182411249602829, "calibration/coverage@25%": 0.8065749029521875, "calibration/coverage@30%": 0.8878541727105695, "calibration/coverage@5%": 0.1869341180046141, "calibration/distribution_entropy_10": 0.6795985620528495, "calibration/distribution_entropy_100": 0.39968524286734586, "calibration/ece": 0.11142151772778952, "calibration/mean_confidence": 0.6838090146706527, "calibration/unique_confidence_per_question": 0.04791666666666666, "calibration/unique_confidences": 18.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002951388888888884, "completions/max_length": 3774.6, "completions/max_terminated_length": 3774.6, "completions/mean_length": 1003.1759521484375, "completions/mean_terminated_length": 1006.1692138671875, "completions/min_length": 0.0, "completions/min_terminated_length": 313.0, "epoch": 1.0697115384615385, "grad_norm": 0.00040377015830017626, "learning_rate": 3.575721153846154e-06, "loss": -0.0024, "num_tokens": 1070414551.0, "reward": 1.3242327213287353, "reward_std": 0.0985756278038025, "rewards/accuracy_reward": 0.7759548544883728, "rewards/brier_reward": 0.8754475831985473, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9970486164093018, "rewards/mean_confidence_reward": 0.7241761922836304, "sampling/batch_mean_priority_error": 0.011732953869047609, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3472222222222222, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0014753591967746615, "sampling/priority_kl": 0.030000388249754907, "sampling/priority_scale": 0.7085408985847608, "sampling/prob_entropy": 10.278962707519531, "sampling/prob_max": 4.628084570867941e-05, "sampling/prob_min": 1.9537291882443243e-05, "sampling/prompt_draws_max": 5.6, "sampling/prompt_draws_mean": 1.0631999969482422, "sampling/prompt_draws_total": 31896.0, "sampling/seen_fraction": 0.7253000020980835, "sampling/unseen_fraction": 0.2746999979019165, "signal/accuracy_reward/centered_abs_mean": 0.10059136152267456, "signal/accuracy_reward/group_std_mean": 0.13695140928030014, "signal/accuracy_reward/group_zero_std_frac": 0.5916666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05029568076133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05029568076133728, "signal/advantage_abs_mean": 0.06849844828248024, "signal/advantage_pre_scale_abs_mean": 0.06849844828248024, "signal/advantage_pre_scale_std": 0.1458652526140213, "signal/advantage_std": 0.1458652526140213, "signal/brier_reward/centered_abs_mean": 0.05644915029406548, "signal/brier_reward/group_std_mean": 0.07987928092479706, "signal/brier_reward/group_zero_std_frac": 0.15000000298023225, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02822457514703274, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02822457514703274, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.005598958255723119, "signal/format_reward/group_std_mean": 0.013899841345846653, "signal/format_reward/group_zero_std_frac": 0.9305555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027994791278615596, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027994791278615596, "signal/mean_confidence_reward/centered_abs_mean": 0.05799039080739021, "signal/mean_confidence_reward/group_std_mean": 0.07693097293376923, "signal/mean_confidence_reward/group_zero_std_frac": 0.1583333358168602, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.79903905872925e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.79903905872925e-07, "step": 445 }, { "calibration/aurc": 0.19829493130072745, "calibration/batch_distribution_entropy": 0.6906534768349407, "calibration/batch_entropy_100bins": 0.4124322883013575, "calibration/batch_entropy_10bins": 0.6906534768349407, "calibration/batch_entropy_50bins": 0.48449631588350217, "calibration/batch_uniqueness": 0.4989087204350981, "calibration/confidence_entropy": 0.49707835632558944, "calibration/coverage@0%": 0.03172123015873016, "calibration/coverage@1%": 0.03172123015873016, "calibration/coverage@10%": 0.26872519841269843, "calibration/coverage@15%": 0.5488925212473441, "calibration/coverage@20%": 0.5915760529933758, "calibration/coverage@25%": 0.5942215556388785, "calibration/coverage@30%": 0.7055076448777237, "calibration/coverage@5%": 0.2476686507936508, "calibration/distribution_entropy_10": 0.6906534768349407, "calibration/distribution_entropy_100": 0.4124322883013575, "calibration/ece": 0.11379584590896452, "calibration/mean_confidence": 0.6684599442887945, "calibration/unique_confidence_per_question": 0.04687500000000001, "calibration/unique_confidences": 18.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888906, "completions/max_length": 3719.4, "completions/max_terminated_length": 3719.4, "completions/mean_length": 1025.4586059570313, "completions/mean_terminated_length": 1031.8718627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 332.6, "epoch": 1.0817307692307692, "grad_norm": 0.00046888578799553216, "learning_rate": 3.5456730769230774e-06, "loss": -0.0058, "num_tokens": 1085298394.0, "reward": 1.3057713031768798, "reward_std": 0.11162674725055695, "rewards/accuracy_reward": 0.7561631917953491, "rewards/brier_reward": 0.8614419937133789, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9939236164093017, "rewards/mean_confidence_reward": 0.6888800263404846, "sampling/batch_mean_priority_error": 0.01921851300705466, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3777777777777778, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0014933944679796697, "sampling/priority_kl": 0.030000264942646026, "sampling/priority_scale": 0.7086831629509106, "sampling/prob_entropy": 10.278955841064453, "sampling/prob_max": 4.6429499343503265e-05, "sampling/prob_min": 1.9281814093119465e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.0752000093460083, "sampling/prompt_draws_total": 32256.0, "sampling/seen_fraction": 0.7299266695976258, "sampling/unseen_fraction": 0.27007333040237425, "signal/accuracy_reward/centered_abs_mean": 0.11822374314069747, "signal/accuracy_reward/group_std_mean": 0.15489685237407685, "signal/accuracy_reward/group_zero_std_frac": 0.5611111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05911187157034874, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05911187157034874, "signal/advantage_abs_mean": 0.07847933173179626, "signal/advantage_pre_scale_abs_mean": 0.07847933173179626, "signal/advantage_pre_scale_std": 0.16222485899925232, "signal/advantage_std": 0.16222485899925232, "signal/brier_reward/centered_abs_mean": 0.06763920560479164, "signal/brier_reward/group_std_mean": 0.09310382902622223, "signal/brier_reward/group_zero_std_frac": 0.11388889253139496, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03381960280239582, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03381960280239582, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.011338975839316845, "signal/format_reward/group_std_mean": 0.026788859441876412, "signal/format_reward/group_zero_std_frac": 0.8694444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005669487919658422, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005669487919658422, "signal/mean_confidence_reward/centered_abs_mean": 0.06197234690189361, "signal/mean_confidence_reward/group_std_mean": 0.08344518542289733, "signal/mean_confidence_reward/group_zero_std_frac": 0.11388889253139496, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.197234370119986e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.197234370119986e-07, "step": 450 }, { "epoch": 1.0817307692307692, "eval_calibration/aurc": 0.13411705755388098, "eval_calibration/batch_distribution_entropy": 0.7462482530200102, "eval_calibration/batch_entropy_100bins": 0.42241674770712895, "eval_calibration/batch_entropy_10bins": 0.7462482530200102, "eval_calibration/batch_entropy_50bins": 0.496558810090405, "eval_calibration/batch_uniqueness": 0.600966601402125, "eval_calibration/confidence_entropy": 0.4965210712316667, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.544259421560035, "eval_calibration/coverage@15%": 0.6985100788781771, "eval_calibration/coverage@20%": 0.7572304995617879, "eval_calibration/coverage@25%": 0.8632778264680105, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.7462482530200102, "eval_calibration/distribution_entropy_100": 0.42241674770712895, "eval_calibration/ece": 0.041625847129790935, "eval_calibration/mean_confidence": 0.6837050591827103, "eval_calibration/unique_confidence_per_question": 0.016493055555555556, "eval_calibration/unique_confidences": 19, "eval_completions/clipped_ratio": 0.006944444444444438, "eval_completions/max_length": 2698.0, "eval_completions/max_terminated_length": 2698.0, "eval_completions/mean_length": 1044.328837076823, "eval_completions/mean_terminated_length": 1051.6819254557292, "eval_completions/min_length": 155.16666666666666, "eval_completions/min_terminated_length": 400.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 1085298394.0, "eval_reward": 1.2724303007125854, "eval_reward_std": 0.3061056633790334, "eval_rewards/accuracy_reward": 0.7048611044883728, "eval_rewards/brier_reward": 0.8495345413684845, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9904513855775198, "eval_rewards/mean_confidence_reward": 0.6771766245365143, "eval_runtime": 201.6774, "eval_samples_per_second": 4.958, "eval_signal/accuracy_reward/centered_abs_mean": 0.4029947866996129, "eval_signal/accuracy_reward/group_std_mean": 0.45439985394477844, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20149739334980646, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20149739334980646, "eval_signal/advantage_abs_mean": 0.2554394875963529, "eval_signal/advantage_pre_scale_abs_mean": 0.2554394875963529, "eval_signal/advantage_pre_scale_std": 0.3054951975742976, "eval_signal/advantage_std": 0.3054951975742976, "eval_signal/brier_reward/centered_abs_mean": 0.15039577335119247, "eval_signal/brier_reward/group_std_mean": 0.20635540535052618, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07519788667559624, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.07519788667559624, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.018391926928112905, "eval_signal/format_reward/group_std_mean": 0.051025692063073315, "eval_signal/format_reward/group_zero_std_frac": 0.722222238779068, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963464056453, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963464056453, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.20515539248784384, "eval_signal/mean_confidence_reward/group_std_mean": 0.2403454432884852, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.05155390631262e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.05155390631262e-06, "eval_steps_per_second": 0.03, "step": 450 }, { "epoch": 1.0817307692307692, "step": 450, "train_probe_calibration/aurc": 0.1033654966199605, "train_probe_calibration/batch_distribution_entropy": 0.7499117856336898, "train_probe_calibration/batch_entropy_100bins": 0.42785477507255537, "train_probe_calibration/batch_entropy_10bins": 0.7499117856336898, "train_probe_calibration/batch_entropy_50bins": 0.5023441304265334, "train_probe_calibration/batch_uniqueness": 0.5832717759310557, "train_probe_calibration/confidence_entropy": 0.4965396364266109, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.6482456140350877, "train_probe_calibration/coverage@15%": 0.7666666666666667, "train_probe_calibration/coverage@20%": 0.8780701754385964, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.39649122807017545, "train_probe_calibration/distribution_entropy_10": 0.7499117856336898, "train_probe_calibration/distribution_entropy_100": 0.42785477507255537, "train_probe_calibration/ece": 0.07051012742617996, "train_probe_calibration/mean_confidence": 0.6876272994744047, "train_probe_calibration/unique_confidence_per_question": 0.024305555555555556, "train_probe_calibration/unique_confidences": 28, "train_probe_completions/clipped_ratio": 0.007812500000000019, "train_probe_completions/max_length": 2930.0, "train_probe_completions/max_terminated_length": 2930.0, "train_probe_completions/mean_length": 1057.2571411132812, "train_probe_completions/mean_terminated_length": 1065.5167032877605, "train_probe_completions/min_length": 54.166666666666664, "train_probe_completions/min_terminated_length": 351.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 1085298394.0, "train_probe_reward": 1.2937801281611125, "train_probe_reward_std": 0.29190226395924884, "train_probe_rewards/accuracy_reward": 0.7465277711550394, "train_probe_rewards/brier_reward": 0.8514354924360911, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9895833432674408, "train_probe_rewards/mean_confidence_reward": 0.6804644962151846, "train_probe_runtime": 211.8545, "train_probe_samples_per_second": 4.72, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3678385416666667, "train_probe_signal/accuracy_reward/group_std_mean": 0.4330555697282155, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18391927083333334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18391927083333334, "train_probe_signal/advantage_abs_mean": 0.23508351047833762, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23508351047833762, "train_probe_signal/advantage_pre_scale_std": 0.2930772602558136, "train_probe_signal/advantage_std": 0.2930772602558136, "train_probe_signal/brier_reward/centered_abs_mean": 0.14737369120121002, "train_probe_signal/brier_reward/group_std_mean": 0.19736657788356146, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07368684560060501, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07368684560060501, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.019748263837148745, "train_probe_signal/format_reward/group_std_mean": 0.049431003319720425, "train_probe_signal/format_reward/group_zero_std_frac": 0.7500000298023224, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009874131918574372, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.009874131918574372, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.20315797378619513, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.23725442836682, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.031579716306927e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.031579716306927e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.13629810162538877, "calibration/batch_distribution_entropy": 0.7171101453541321, "calibration/batch_entropy_100bins": 0.40900396610625045, "calibration/batch_entropy_10bins": 0.7171101453541321, "calibration/batch_entropy_50bins": 0.4814728512715501, "calibration/batch_uniqueness": 0.5450898760297638, "calibration/confidence_entropy": 0.49317485953668533, "calibration/coverage@0%": 0.051458885941644564, "calibration/coverage@1%": 0.0843501326259947, "calibration/coverage@10%": 0.4297356031906254, "calibration/coverage@15%": 0.6599059517453104, "calibration/coverage@20%": 0.7393530701754385, "calibration/coverage@25%": 0.9118708006718925, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.14801061007957558, "calibration/distribution_entropy_10": 0.7171101453541321, "calibration/distribution_entropy_100": 0.40900396610625045, "calibration/ece": 0.12500761844623415, "calibration/mean_confidence": 0.6974867715920466, "calibration/unique_confidence_per_question": 0.03802083333333333, "calibration/unique_confidences": 14.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0109375, "completions/max_length": 3788.0, "completions/max_terminated_length": 3788.0, "completions/mean_length": 1065.3917724609375, "completions/mean_terminated_length": 1077.2124877929687, "completions/min_length": 0.0, "completions/min_terminated_length": 335.6, "epoch": 1.09375, "grad_norm": 0.0005689400713890791, "learning_rate": 3.5156250000000003e-06, "loss": -0.0114, "num_tokens": 1100698651.0, "reward": 1.282351565361023, "reward_std": 0.14021986573934556, "rewards/accuracy_reward": 0.732812511920929, "rewards/brier_reward": 0.842814815044403, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.989062511920929, "rewards/mean_confidence_reward": 0.6714834690093994, "sampling/batch_mean_priority_error": 0.016140534315776323, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3555555555555555, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0015142235439270734, "sampling/priority_kl": 0.03000020533800125, "sampling/priority_scale": 0.7086563527351245, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 4.657171957660467e-05, "sampling/prob_min": 1.9341520965099334e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.0871999979019165, "sampling/prompt_draws_total": 32616.0, "sampling/seen_fraction": 0.7342666625976563, "sampling/unseen_fraction": 0.26573333740234373, "signal/accuracy_reward/centered_abs_mean": 0.1413736969232559, "signal/accuracy_reward/group_std_mean": 0.1889444947242737, "signal/accuracy_reward/group_zero_std_frac": 0.450000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07068684846162795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07068684846162795, "signal/advantage_abs_mean": 0.09725596755743027, "signal/advantage_pre_scale_abs_mean": 0.09725596755743027, "signal/advantage_pre_scale_std": 0.18758226335048675, "signal/advantage_std": 0.18758226335048675, "signal/brier_reward/centered_abs_mean": 0.08304826766252518, "signal/brier_reward/group_std_mean": 0.11446874588727951, "signal/brier_reward/group_zero_std_frac": 0.0972222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04152413383126259, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04152413383126259, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.01982421875, "signal/format_reward/group_std_mean": 0.04075228720903397, "signal/format_reward/group_zero_std_frac": 0.825000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009912109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009912109375, "signal/mean_confidence_reward/centered_abs_mean": 0.072690649330616, "signal/mean_confidence_reward/group_std_mean": 0.09616097360849381, "signal/mean_confidence_reward/group_zero_std_frac": 0.10555555671453476, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.269064440151851e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.269064440151851e-07, "step": 455 }, { "calibration/aurc": 0.10542953162029697, "calibration/batch_distribution_entropy": 0.6119959945602249, "calibration/batch_entropy_100bins": 0.34331494201870555, "calibration/batch_entropy_10bins": 0.6119959945602249, "calibration/batch_entropy_50bins": 0.4041447949551973, "calibration/batch_uniqueness": 0.38240274912033423, "calibration/confidence_entropy": 0.46975072875067064, "calibration/coverage@0%": 0.11705626640419946, "calibration/coverage@1%": 0.2493479330708661, "calibration/coverage@10%": 0.6614818330000416, "calibration/coverage@15%": 0.7343451860184145, "calibration/coverage@20%": 0.7878556847060784, "calibration/coverage@25%": 0.8256509602966297, "calibration/coverage@30%": 0.8790442861309004, "calibration/coverage@5%": 0.5737981320043328, "calibration/distribution_entropy_10": 0.6119959945602249, "calibration/distribution_entropy_100": 0.34331494201870555, "calibration/ece": 0.1093204766721867, "calibration/mean_confidence": 0.6956372109085324, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0065972222222221875, "completions/max_length": 3885.8, "completions/max_terminated_length": 3885.8, "completions/mean_length": 1045.6061767578126, "completions/mean_terminated_length": 1052.6762451171876, "completions/min_length": 0.0, "completions/min_terminated_length": 356.4, "epoch": 1.1057692307692308, "grad_norm": 0.0004963566898368299, "learning_rate": 3.4855769230769233e-06, "loss": -0.0065, "num_tokens": 1115864770.0, "reward": 1.297843074798584, "reward_std": 0.11444427967071533, "rewards/accuracy_reward": 0.7450520753860473, "rewards/brier_reward": 0.8572170615196228, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9934027791023254, "rewards/mean_confidence_reward": 0.7170493602752686, "sampling/batch_mean_priority_error": 0.01892611882716048, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3472222222222222, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.001534794899635017, "sampling/priority_kl": 0.029999419301748275, "sampling/priority_scale": 0.7087399541633204, "sampling/prob_entropy": 10.278960227966309, "sampling/prob_max": 4.671617498388514e-05, "sampling/prob_min": 1.9399503071326762e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.0992000102996826, "sampling/prompt_draws_total": 32976.0, "sampling/seen_fraction": 0.7386133313179016, "sampling/unseen_fraction": 0.26138666868209837, "signal/accuracy_reward/centered_abs_mean": 0.10742730051279067, "signal/accuracy_reward/group_std_mean": 0.14884938895702363, "signal/accuracy_reward/group_zero_std_frac": 0.547222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05371365025639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05371365025639534, "signal/advantage_abs_mean": 0.07775596380233765, "signal/advantage_pre_scale_abs_mean": 0.07775596380233765, "signal/advantage_pre_scale_std": 0.16309164762496947, "signal/advantage_std": 0.16309164762496947, "signal/brier_reward/centered_abs_mean": 0.07044132426381111, "signal/brier_reward/group_std_mean": 0.09729270190000534, "signal/brier_reward/group_zero_std_frac": 0.1833333343267441, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035220662131905556, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035220662131905556, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.012337239738553763, "signal/format_reward/group_std_mean": 0.028919679298996925, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0061686198692768816, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0061686198692768816, "signal/mean_confidence_reward/centered_abs_mean": 0.06461131945252419, "signal/mean_confidence_reward/group_std_mean": 0.08675065785646438, "signal/mean_confidence_reward/group_zero_std_frac": 0.19166666865348816, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.461131533797016e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.461131533797016e-07, "step": 460 }, { "calibration/aurc": 0.09359492378663284, "calibration/batch_distribution_entropy": 0.6444820041232762, "calibration/batch_entropy_100bins": 0.34567299261889833, "calibration/batch_entropy_10bins": 0.6444820041232762, "calibration/batch_entropy_50bins": 0.4069206539688053, "calibration/batch_uniqueness": 0.4058231469905339, "calibration/confidence_entropy": 0.46850383484094343, "calibration/coverage@0%": 0.08489583333333334, "calibration/coverage@1%": 0.25885416666666666, "calibration/coverage@10%": 0.5697916666666666, "calibration/coverage@15%": 0.9046960159003872, "calibration/coverage@20%": 0.9368139668557263, "calibration/coverage@25%": 0.9630606860158311, "calibration/coverage@30%": 0.9889182058047494, "calibration/coverage@5%": 0.5152302631578947, "calibration/distribution_entropy_10": 0.6444820041232762, "calibration/distribution_entropy_100": 0.34567299261889833, "calibration/ece": 0.1616714965172559, "calibration/mean_confidence": 0.7261069838474573, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004079861111111116, "completions/max_length": 3468.8, "completions/max_terminated_length": 3468.8, "completions/mean_length": 979.820849609375, "completions/mean_terminated_length": 983.8570434570313, "completions/min_length": 0.0, "completions/min_terminated_length": 310.4, "epoch": 1.1177884615384615, "grad_norm": 0.0004011022101622075, "learning_rate": 3.4555288461538466e-06, "loss": -0.0027, "num_tokens": 1130241874.0, "reward": 1.30813148021698, "reward_std": 0.098023721575737, "rewards/accuracy_reward": 0.7587673664093018, "rewards/brier_reward": 0.8615612864494324, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9959201335906982, "rewards/mean_confidence_reward": 0.707803475856781, "sampling/batch_mean_priority_error": 0.019616319444444426, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32499999999999996, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0015541198663413525, "sampling/priority_kl": 0.029999390617012976, "sampling/priority_scale": 0.7084668695228175, "sampling/prob_entropy": 10.278964424133301, "sampling/prob_max": 4.684552040998824e-05, "sampling/prob_min": 1.945980329765007e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1112000226974488, "sampling/prompt_draws_total": 33336.0, "sampling/seen_fraction": 0.7424600005149842, "sampling/unseen_fraction": 0.2575399994850159, "signal/accuracy_reward/centered_abs_mean": 0.10092773586511612, "signal/accuracy_reward/group_std_mean": 0.13783197551965715, "signal/accuracy_reward/group_zero_std_frac": 0.5916666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05046386793255806, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05046386793255806, "signal/advantage_abs_mean": 0.06758801788091659, "signal/advantage_pre_scale_abs_mean": 0.06758801788091659, "signal/advantage_pre_scale_std": 0.1427416533231735, "signal/advantage_std": 0.1427416533231735, "signal/brier_reward/centered_abs_mean": 0.06408771499991417, "signal/brier_reward/group_std_mean": 0.08897604048252106, "signal/brier_reward/group_zero_std_frac": 0.18055555522441863, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032043857499957085, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.032043857499957085, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.007687717024236917, "signal/format_reward/group_std_mean": 0.018355799838900566, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0038438585121184587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0038438585121184587, "signal/mean_confidence_reward/centered_abs_mean": 0.06441164910793304, "signal/mean_confidence_reward/group_std_mean": 0.08494757264852523, "signal/mean_confidence_reward/group_zero_std_frac": 0.18333333134651184, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.44116460080113e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.44116460080113e-07, "step": 465 }, { "calibration/aurc": 0.14959051893570235, "calibration/batch_distribution_entropy": 0.6709319069566796, "calibration/batch_entropy_100bins": 0.3686111766059616, "calibration/batch_entropy_10bins": 0.6709319069566796, "calibration/batch_entropy_50bins": 0.43392311302166864, "calibration/batch_uniqueness": 0.47061380252948803, "calibration/confidence_entropy": 0.4698123180967967, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.11279373368146214, "calibration/coverage@10%": 0.5033365397828775, "calibration/coverage@15%": 0.5956836608492511, "calibration/coverage@20%": 0.62553112546379, "calibration/coverage@25%": 0.7751461988304094, "calibration/coverage@30%": 0.8230409356725147, "calibration/coverage@5%": 0.3579304658513124, "calibration/distribution_entropy_10": 0.6709319069566796, "calibration/distribution_entropy_100": 0.3686111766059616, "calibration/ece": 0.10774334189113621, "calibration/mean_confidence": 0.6822683751007925, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111094, "completions/max_length": 3636.0, "completions/max_terminated_length": 3636.0, "completions/mean_length": 1004.15703125, "completions/mean_terminated_length": 1009.826318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 334.2, "epoch": 1.1298076923076923, "grad_norm": 0.0005025005084462464, "learning_rate": 3.4254807692307695e-06, "loss": -0.0059, "num_tokens": 1144918851.0, "reward": 1.2935538053512574, "reward_std": 0.10897820442914963, "rewards/accuracy_reward": 0.7296875, "rewards/brier_reward": 0.8633089423179626, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9940972089767456, "rewards/mean_confidence_reward": 0.6967671632766723, "sampling/batch_mean_priority_error": 0.018480902777777763, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.38333333333333336, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0015776523388922215, "sampling/priority_kl": 0.029999713972210885, "sampling/priority_scale": 0.7086878955131397, "sampling/prob_entropy": 10.27896728515625, "sampling/prob_max": 4.699049750342965e-05, "sampling/prob_min": 1.9505477393977343e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.123200011253357, "sampling/prompt_draws_total": 33696.0, "sampling/seen_fraction": 0.7466799855232239, "sampling/unseen_fraction": 0.2533200144767761, "signal/accuracy_reward/centered_abs_mean": 0.1121744766831398, "signal/accuracy_reward/group_std_mean": 0.14886936098337172, "signal/accuracy_reward/group_zero_std_frac": 0.5666666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0560872383415699, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0560872383415699, "signal/advantage_abs_mean": 0.07866804748773575, "signal/advantage_pre_scale_abs_mean": 0.07866804748773575, "signal/advantage_pre_scale_std": 0.1617803692817688, "signal/advantage_std": 0.1617803692817688, "signal/brier_reward/centered_abs_mean": 0.07295527458190917, "signal/brier_reward/group_std_mean": 0.09744260013103485, "signal/brier_reward/group_zero_std_frac": 0.20833333134651183, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03647763729095459, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03647763729095459, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.01085069440305233, "signal/format_reward/group_std_mean": 0.023369645327329637, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005425347201526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005425347201526165, "signal/mean_confidence_reward/centered_abs_mean": 0.0644960232079029, "signal/mean_confidence_reward/group_std_mean": 0.08588441014289856, "signal/mean_confidence_reward/group_zero_std_frac": 0.2194444477558136, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.449602551583667e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.449602551583667e-07, "step": 470 }, { "calibration/aurc": 0.20161512188607883, "calibration/batch_distribution_entropy": 0.5796123879186574, "calibration/batch_entropy_100bins": 0.3144117230666262, "calibration/batch_entropy_10bins": 0.5796123879186574, "calibration/batch_entropy_50bins": 0.37012039325497403, "calibration/batch_uniqueness": 0.25974887218320325, "calibration/confidence_entropy": 0.4370606776018837, "calibration/coverage@0%": 0.0005249343832020997, "calibration/coverage@1%": 0.0005249343832020997, "calibration/coverage@10%": 0.1614624343832021, "calibration/coverage@15%": 0.4038601372813059, "calibration/coverage@20%": 0.6582004721668278, "calibration/coverage@25%": 0.7330276926872391, "calibration/coverage@30%": 0.7796344647519582, "calibration/coverage@5%": 0.13750410104986877, "calibration/distribution_entropy_10": 0.5796123879186574, "calibration/distribution_entropy_100": 0.3144117230666262, "calibration/ece": 0.14319296054480074, "calibration/mean_confidence": 0.7470921170078164, "calibration/unique_confidence_per_question": 0.023437499999999997, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007638888888888906, "completions/max_length": 3197.4, "completions/max_terminated_length": 3197.4, "completions/mean_length": 962.5202270507813, "completions/mean_terminated_length": 969.9363037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 334.6, "epoch": 1.1418269230769231, "grad_norm": 0.000619587255641818, "learning_rate": 3.3954326923076925e-06, "loss": -0.0078, "num_tokens": 1159110892.0, "reward": 1.2515861749649049, "reward_std": 0.1271716073155403, "rewards/accuracy_reward": 0.6802951335906983, "rewards/brier_reward": 0.8305015563964844, "rewards/confidence_one_or_zero": 0.0007812500232830643, "rewards/format_reward": 0.9923611164093018, "rewards/mean_confidence_reward": 0.7246870160102844, "sampling/batch_mean_priority_error": 0.01913888888888888, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.33888888888888885, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0015997483395040035, "sampling/priority_kl": 0.03000010587275028, "sampling/priority_scale": 0.7091093718772754, "sampling/prob_entropy": 10.278954887390137, "sampling/prob_max": 4.7144032578216866e-05, "sampling/prob_min": 1.9522143702488393e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1351999759674072, "sampling/prompt_draws_total": 34056.0, "sampling/seen_fraction": 0.7509999871253967, "sampling/unseen_fraction": 0.24900001287460327, "signal/accuracy_reward/centered_abs_mean": 0.11660698801279068, "signal/accuracy_reward/group_std_mean": 0.16022705137729645, "signal/accuracy_reward/group_zero_std_frac": 0.5194444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05830349400639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05830349400639534, "signal/advantage_abs_mean": 0.08898119479417801, "signal/advantage_pre_scale_abs_mean": 0.08898119479417801, "signal/advantage_pre_scale_std": 0.1809540420770645, "signal/advantage_std": 0.1809540420770645, "signal/brier_reward/centered_abs_mean": 0.08139911442995071, "signal/brier_reward/group_std_mean": 0.10895915925502778, "signal/brier_reward/group_zero_std_frac": 0.23055555522441865, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040699557214975354, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.040699557214975354, "signal/confidence_one_or_zero/centered_abs_mean": 0.0014485677238553762, "signal/confidence_one_or_zero/group_std_mean": 0.0031183868646621703, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111164093017, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.448567594763972e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.448567594763972e-08, "signal/format_reward/centered_abs_mean": 0.014257812313735485, "signal/format_reward/group_std_mean": 0.033097638934850696, "signal/format_reward/group_zero_std_frac": 0.8416666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007128906156867743, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007128906156867743, "signal/mean_confidence_reward/centered_abs_mean": 0.06632542088627816, "signal/mean_confidence_reward/group_std_mean": 0.08892321586608887, "signal/mean_confidence_reward/group_zero_std_frac": 0.2416666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.632541840190243e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.632541840190243e-07, "step": 475 }, { "calibration/aurc": 0.1505559280296241, "calibration/batch_distribution_entropy": 0.7007643129699044, "calibration/batch_entropy_100bins": 0.3944490515473666, "calibration/batch_entropy_10bins": 0.7007643129699044, "calibration/batch_entropy_50bins": 0.46433904134937665, "calibration/batch_uniqueness": 0.5243341324771948, "calibration/confidence_entropy": 0.47274465400556076, "calibration/coverage@0%": 0.03875456576162474, "calibration/coverage@1%": 0.03875456576162474, "calibration/coverage@10%": 0.4747473679080457, "calibration/coverage@15%": 0.5356127446310162, "calibration/coverage@20%": 0.6051703213524231, "calibration/coverage@25%": 0.8049892598336099, "calibration/coverage@30%": 0.8858638743455497, "calibration/coverage@5%": 0.2838690084455597, "calibration/distribution_entropy_10": 0.7007643129699044, "calibration/distribution_entropy_100": 0.3944490515473666, "calibration/ece": 0.10062478087275668, "calibration/mean_confidence": 0.6347946440611388, "calibration/unique_confidence_per_question": 0.025520833333333336, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004253472222222188, "completions/max_length": 3582.4, "completions/max_terminated_length": 3582.4, "completions/mean_length": 957.6535766601562, "completions/mean_terminated_length": 961.7112426757812, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 1.1538461538461537, "grad_norm": 0.0004493494925554842, "learning_rate": 3.365384615384616e-06, "loss": -0.0035, "num_tokens": 1173217301.0, "reward": 1.29926335811615, "reward_std": 0.11280029714107513, "rewards/accuracy_reward": 0.7427951335906983, "rewards/brier_reward": 0.8599711418151855, "rewards/confidence_one_or_zero": 0.002777777798473835, "rewards/format_reward": 0.9957465171813965, "rewards/mean_confidence_reward": 0.6893273830413819, "sampling/batch_mean_priority_error": 0.018572631173611093, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3416666666666667, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0016196536598727107, "sampling/priority_kl": 0.03000083565711975, "sampling/priority_scale": 0.7094456852180884, "sampling/prob_entropy": 10.27894229888916, "sampling/prob_max": 4.72917192382738e-05, "sampling/prob_min": 1.9575109763536603e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1471999883651733, "sampling/prompt_draws_total": 34416.0, "sampling/seen_fraction": 0.7551133275032044, "sampling/unseen_fraction": 0.24488667249679566, "signal/accuracy_reward/centered_abs_mean": 0.11629231721162796, "signal/accuracy_reward/group_std_mean": 0.15797212719917297, "signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05814615860581398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05814615860581398, "signal/advantage_abs_mean": 0.07877274304628372, "signal/advantage_pre_scale_abs_mean": 0.07877274304628372, "signal/advantage_pre_scale_std": 0.1593018352985382, "signal/advantage_std": 0.1593018352985382, "signal/brier_reward/centered_abs_mean": 0.07354676201939583, "signal/brier_reward/group_std_mean": 0.10252981781959533, "signal/brier_reward/group_zero_std_frac": 0.16944444477558135, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03677338100969792, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03677338100969792, "signal/confidence_one_or_zero/centered_abs_mean": 0.004372829850763082, "signal/confidence_one_or_zero/group_std_mean": 0.005886570177972316, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.372829707222081e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.372829707222081e-08, "signal/format_reward/centered_abs_mean": 0.00796983502805233, "signal/format_reward/group_std_mean": 0.019131885096430778, "signal/format_reward/group_zero_std_frac": 0.9055555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003984917514026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003984917514026165, "signal/mean_confidence_reward/centered_abs_mean": 0.0666200652718544, "signal/mean_confidence_reward/group_std_mean": 0.09021197110414506, "signal/mean_confidence_reward/group_zero_std_frac": 0.18055555820465088, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.66200674004358e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.66200674004358e-07, "step": 480 }, { "calibration/aurc": 0.13403767836577124, "calibration/batch_distribution_entropy": 0.6798431874057342, "calibration/batch_entropy_100bins": 0.3820831852568066, "calibration/batch_entropy_10bins": 0.6798431874057342, "calibration/batch_entropy_50bins": 0.4497821436301697, "calibration/batch_uniqueness": 0.5025837024384686, "calibration/confidence_entropy": 0.4837146415950676, "calibration/coverage@0%": 0.04804177545691906, "calibration/coverage@1%": 0.23603133159268933, "calibration/coverage@10%": 0.44020887728459535, "calibration/coverage@15%": 0.4981723237597911, "calibration/coverage@20%": 0.7363672214969539, "calibration/coverage@25%": 0.8303062445604874, "calibration/coverage@30%": 0.9237597911227156, "calibration/coverage@5%": 0.37650130548302874, "calibration/distribution_entropy_10": 0.6798431874057342, "calibration/distribution_entropy_100": 0.3820831852568066, "calibration/ece": 0.11172792917754566, "calibration/mean_confidence": 0.6669808665143604, "calibration/unique_confidence_per_question": 0.025520833333333336, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0014756944444444641, "completions/max_length": 3134.2, "completions/max_terminated_length": 3134.2, "completions/mean_length": 949.7003662109375, "completions/mean_terminated_length": 951.1052368164062, "completions/min_length": 0.0, "completions/min_terminated_length": 310.2, "epoch": 1.1658653846153846, "grad_norm": 0.0005296074668876827, "learning_rate": 3.3353365384615388e-06, "loss": -0.0007, "num_tokens": 1187263033.0, "reward": 1.2922637701034545, "reward_std": 0.10782433152198792, "rewards/accuracy_reward": 0.7294270753860473, "rewards/brier_reward": 0.856648850440979, "rewards/confidence_one_or_zero": 0.0008680555794853718, "rewards/format_reward": 0.9984375, "rewards/mean_confidence_reward": 0.7005642294883728, "sampling/batch_mean_priority_error": 0.015800347222222205, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.38888888888888884, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.001638196362182498, "sampling/priority_kl": 0.030000192299485206, "sampling/priority_scale": 0.7103035271400586, "sampling/prob_entropy": 10.27894287109375, "sampling/prob_max": 4.745964688481763e-05, "sampling/prob_min": 1.9623675325419753e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1592000007629395, "sampling/prompt_draws_total": 34776.0, "sampling/seen_fraction": 0.7596666693687439, "sampling/unseen_fraction": 0.24033333063125611, "signal/accuracy_reward/centered_abs_mean": 0.11507704108953476, "signal/accuracy_reward/group_std_mean": 0.15593255162239075, "signal/accuracy_reward/group_zero_std_frac": 0.5416666746139527, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05753852054476738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05753852054476738, "signal/advantage_abs_mean": 0.0767291359603405, "signal/advantage_pre_scale_abs_mean": 0.0767291359603405, "signal/advantage_pre_scale_std": 0.15259878933429719, "signal/advantage_std": 0.15259878933429719, "signal/brier_reward/centered_abs_mean": 0.07359779626131058, "signal/brier_reward/group_std_mean": 0.10185133069753646, "signal/brier_reward/group_zero_std_frac": 0.20000000298023224, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03679889813065529, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03679889813065529, "signal/confidence_one_or_zero/centered_abs_mean": 0.0016710068914107979, "signal/confidence_one_or_zero/group_std_mean": 0.004611522844061256, "signal/confidence_one_or_zero/group_zero_std_frac": 0.975, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6710067995973077e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6710067995973077e-08, "signal/format_reward/centered_abs_mean": 0.0030164929921738803, "signal/format_reward/group_std_mean": 0.008539893664419651, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0015082464960869402, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0015082464960869402, "signal/mean_confidence_reward/centered_abs_mean": 0.06438992619514465, "signal/mean_confidence_reward/group_std_mean": 0.08493052572011947, "signal/mean_confidence_reward/group_zero_std_frac": 0.22500000298023223, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.438992159019108e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.438992159019108e-07, "step": 485 }, { "calibration/aurc": 0.10957053794746666, "calibration/batch_distribution_entropy": 0.47153568446971417, "calibration/batch_entropy_100bins": 0.2574468096316364, "calibration/batch_entropy_10bins": 0.47153568446971417, "calibration/batch_entropy_50bins": 0.30306221884388146, "calibration/batch_uniqueness": 0.01182822670089628, "calibration/confidence_entropy": 0.41203219808780067, "calibration/coverage@0%": 0.003645833333333333, "calibration/coverage@1%": 0.003645833333333333, "calibration/coverage@10%": 0.5036458333333333, "calibration/coverage@15%": 0.6979166666666667, "calibration/coverage@20%": 0.9109375, "calibration/coverage@25%": 0.9651041666666668, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.40816742819843344, "calibration/distribution_entropy_10": 0.47153568446971417, "calibration/distribution_entropy_100": 0.2574468096316364, "calibration/ece": 0.106922930401436, "calibration/mean_confidence": 0.7968985885824631, "calibration/unique_confidence_per_question": 0.025520833333333336, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013020833333333482, "completions/max_length": 3576.4, "completions/max_terminated_length": 3576.4, "completions/mean_length": 944.9557373046875, "completions/mean_terminated_length": 946.2088623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 279.4, "epoch": 1.1778846153846154, "grad_norm": 0.000669217377435416, "learning_rate": 3.3052884615384617e-06, "loss": -0.001, "num_tokens": 1201210939.0, "reward": 1.292154812812805, "reward_std": 0.1198092296719551, "rewards/accuracy_reward": 0.729687488079071, "rewards/brier_reward": 0.8559090256690979, "rewards/confidence_one_or_zero": 0.0005208333430346101, "rewards/format_reward": 0.9986979007720947, "rewards/mean_confidence_reward": 0.764504897594452, "sampling/batch_mean_priority_error": 0.012920138888888873, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.28055555555555556, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.001652483455836773, "sampling/priority_kl": 0.03000018261373043, "sampling/priority_scale": 0.710580509970896, "sampling/prob_entropy": 10.278962326049804, "sampling/prob_max": 4.760355150210671e-05, "sampling/prob_min": 1.9676406736834906e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1711999893188476, "sampling/prompt_draws_total": 35136.0, "sampling/seen_fraction": 0.7635533332824707, "sampling/unseen_fraction": 0.23644666671752929, "signal/accuracy_reward/centered_abs_mean": 0.12069227546453476, "signal/accuracy_reward/group_std_mean": 0.16156412959098815, "signal/accuracy_reward/group_zero_std_frac": 0.5277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06034613773226738, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06034613773226738, "signal/advantage_abs_mean": 0.08784503787755966, "signal/advantage_pre_scale_abs_mean": 0.08784503787755966, "signal/advantage_pre_scale_std": 0.17135231494903563, "signal/advantage_std": 0.17135231494903563, "signal/brier_reward/centered_abs_mean": 0.07915262952446937, "signal/brier_reward/group_std_mean": 0.10748916268348693, "signal/brier_reward/group_zero_std_frac": 0.3000000029802322, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039576314762234686, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.039576314762234686, "signal/confidence_one_or_zero/centered_abs_mean": 0.000998263864312321, "signal/confidence_one_or_zero/group_std_mean": 0.0026473373174667357, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.982638005112676e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.982638005112676e-09, "signal/format_reward/centered_abs_mean": 0.002479383663740009, "signal/format_reward/group_std_mean": 0.006169932056218385, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012396918318700045, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012396918318700045, "signal/mean_confidence_reward/centered_abs_mean": 0.06347360759973526, "signal/mean_confidence_reward/group_std_mean": 0.08472321182489395, "signal/mean_confidence_reward/group_zero_std_frac": 0.3388888955116272, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.347360454128647e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.347360454128647e-07, "step": 490 }, { "calibration/aurc": 0.09521846205223616, "calibration/batch_distribution_entropy": 0.36980064712623495, "calibration/batch_entropy_100bins": 0.19743328193770213, "calibration/batch_entropy_10bins": 0.36980064712623495, "calibration/batch_entropy_50bins": 0.23241526505332483, "calibration/batch_uniqueness": -0.24817345815044733, "calibration/confidence_entropy": 0.38735411680887133, "calibration/coverage@0%": 0.0026191895725828007, "calibration/coverage@1%": 0.0026191895725828007, "calibration/coverage@10%": 0.5172412847871823, "calibration/coverage@15%": 0.9143114969024759, "calibration/coverage@20%": 0.9509227362204724, "calibration/coverage@25%": 0.9661458333333334, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.3112806068954175, "calibration/distribution_entropy_10": 0.36980064712623495, "calibration/distribution_entropy_100": 0.19743328193770213, "calibration/ece": 0.07073480712716064, "calibration/mean_confidence": 0.8144085704304098, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001388888888888884, "completions/max_length": 3515.2, "completions/max_terminated_length": 3515.2, "completions/mean_length": 971.435595703125, "completions/mean_terminated_length": 972.7840454101563, "completions/min_length": 0.0, "completions/min_terminated_length": 312.8, "epoch": 1.1899038461538463, "grad_norm": 0.0005312844878062606, "learning_rate": 3.2752403846153846e-06, "loss": -0.001, "num_tokens": 1215500757.0, "reward": 1.297963547706604, "reward_std": 0.11535521298646927, "rewards/accuracy_reward": 0.7472222208976745, "rewards/brier_reward": 0.8500782132148743, "rewards/confidence_one_or_zero": 0.0032986111473292112, "rewards/format_reward": 0.9986110925674438, "rewards/mean_confidence_reward": 0.773796558380127, "sampling/batch_mean_priority_error": 0.018286666666666646, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3388888888888889, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0016717145452275872, "sampling/priority_kl": 0.02999982200562954, "sampling/priority_scale": 0.7107961237663403, "sampling/prob_entropy": 10.278962326049804, "sampling/prob_max": 4.7745810297783466e-05, "sampling/prob_min": 1.9729911946342328e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.1832000017166138, "sampling/prompt_draws_total": 35496.0, "sampling/seen_fraction": 0.7672733306884766, "sampling/unseen_fraction": 0.23272666931152344, "signal/accuracy_reward/centered_abs_mean": 0.11678602248430252, "signal/accuracy_reward/group_std_mean": 0.15678493082523345, "signal/accuracy_reward/group_zero_std_frac": 0.544444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05839301124215126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05839301124215126, "signal/advantage_abs_mean": 0.08481334894895554, "signal/advantage_pre_scale_abs_mean": 0.08481334894895554, "signal/advantage_pre_scale_std": 0.1691618800163269, "signal/advantage_std": 0.1691618800163269, "signal/brier_reward/centered_abs_mean": 0.0775853805243969, "signal/brier_reward/group_std_mean": 0.10400516092777252, "signal/brier_reward/group_zero_std_frac": 0.3111111164093018, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03879269026219845, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03879269026219845, "signal/confidence_one_or_zero/centered_abs_mean": 0.00521918386220932, "signal/confidence_one_or_zero/group_std_mean": 0.00827118419110775, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.2191836630299804e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.2191836630299804e-08, "signal/format_reward/centered_abs_mean": 0.002615017397329211, "signal/format_reward/group_std_mean": 0.006256770342588425, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013075086986646055, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013075086986646055, "signal/mean_confidence_reward/centered_abs_mean": 0.060755544900894166, "signal/mean_confidence_reward/group_std_mean": 0.08054654747247696, "signal/mean_confidence_reward/group_zero_std_frac": 0.35277777910232544, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.07555386977765e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.07555386977765e-07, "step": 495 }, { "calibration/aurc": 0.1642228836953578, "calibration/batch_distribution_entropy": 0.5205450125344753, "calibration/batch_entropy_100bins": 0.2780713068077269, "calibration/batch_entropy_10bins": 0.5205450125344753, "calibration/batch_entropy_50bins": 0.3273410432180067, "calibration/batch_uniqueness": 0.11008029513888892, "calibration/confidence_entropy": 0.4279427060637147, "calibration/coverage@0%": 0.0005208333333333333, "calibration/coverage@1%": 0.0005208333333333333, "calibration/coverage@10%": 0.31302083333333336, "calibration/coverage@15%": 0.34635416666666663, "calibration/coverage@20%": 0.6489583333333334, "calibration/coverage@25%": 0.8666666666666666, "calibration/coverage@30%": 0.9427083333333333, "calibration/coverage@5%": 0.1515625, "calibration/distribution_entropy_10": 0.5205450125344753, "calibration/distribution_entropy_100": 0.2780713068077269, "calibration/ece": 0.11427083333333339, "calibration/mean_confidence": 0.7689583333333335, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002690972222222232, "completions/max_length": 3503.4, "completions/max_terminated_length": 3503.4, "completions/mean_length": 976.9451293945312, "completions/mean_terminated_length": 979.6044799804688, "completions/min_length": 73.4, "completions/min_terminated_length": 329.2, "epoch": 1.2019230769230769, "grad_norm": 0.0005742677603848279, "learning_rate": 3.245192307692308e-06, "loss": -0.0012, "num_tokens": 1229878109.0, "reward": 1.2855183839797975, "reward_std": 0.10680401921272278, "rewards/accuracy_reward": 0.7275173664093018, "rewards/brier_reward": 0.8461951017379761, "rewards/confidence_one_or_zero": 0.0024305555154569445, "rewards/format_reward": 0.9973090291023254, "rewards/mean_confidence_reward": 0.7703506827354432, "sampling/batch_mean_priority_error": 0.02147395833333332, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32499999999999996, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0016937416279688478, "sampling/priority_kl": 0.02999955452978611, "sampling/priority_scale": 0.7113852322334424, "sampling/prob_entropy": 10.278951644897461, "sampling/prob_max": 4.7900483332341536e-05, "sampling/prob_min": 1.9779334616032428e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.19520001411438, "sampling/prompt_draws_total": 35856.0, "sampling/seen_fraction": 0.771233332157135, "sampling/unseen_fraction": 0.22876666784286498, "signal/accuracy_reward/centered_abs_mean": 0.10395507663488388, "signal/accuracy_reward/group_std_mean": 0.1416932612657547, "signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05197753831744194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05197753831744194, "signal/advantage_abs_mean": 0.07803195416927337, "signal/advantage_pre_scale_abs_mean": 0.07803195416927337, "signal/advantage_pre_scale_std": 0.15995383262634277, "signal/advantage_std": 0.15995383262634277, "signal/brier_reward/centered_abs_mean": 0.073187355697155, "signal/brier_reward/group_std_mean": 0.09856675863265991, "signal/brier_reward/group_zero_std_frac": 0.3388888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0365936778485775, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0365936778485775, "signal/confidence_one_or_zero/centered_abs_mean": 0.0036132814129814506, "signal/confidence_one_or_zero/group_std_mean": 0.005107206478714943, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333373069764, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.613281194247975e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.613281194247975e-08, "signal/format_reward/centered_abs_mean": 0.0033365884679369628, "signal/format_reward/group_std_mean": 0.006680760718882084, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016682942339684814, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016682942339684814, "signal/mean_confidence_reward/centered_abs_mean": 0.059506846219301225, "signal/mean_confidence_reward/group_std_mean": 0.07812358886003494, "signal/mean_confidence_reward/group_zero_std_frac": 0.37222222089767454, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.950684339950385e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.950684339950385e-07, "step": 500 }, { "epoch": 1.2019230769230769, "eval_calibration/aurc": 0.1517025700700997, "eval_calibration/batch_distribution_entropy": 0.5561453822123794, "eval_calibration/batch_entropy_100bins": 0.30538964238624244, "eval_calibration/batch_entropy_10bins": 0.5561453822123794, "eval_calibration/batch_entropy_50bins": 0.35949974585406874, "eval_calibration/batch_uniqueness": 0.1863500945179584, "eval_calibration/confidence_entropy": 0.4351659852744771, "eval_calibration/coverage@0%": 0.004347826086956522, "eval_calibration/coverage@1%": 0.004347826086956522, "eval_calibration/coverage@10%": 0.004347826086956522, "eval_calibration/coverage@15%": 0.717391304347826, "eval_calibration/coverage@20%": 0.7947826086956522, "eval_calibration/coverage@25%": 0.9034782608695652, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.004347826086956522, "eval_calibration/distribution_entropy_10": 0.5561453822123794, "eval_calibration/distribution_entropy_100": 0.30538964238624244, "eval_calibration/ece": 0.04030434782608717, "eval_calibration/mean_confidence": 0.7420434782608697, "eval_calibration/unique_confidence_per_question": 0.009548611111111112, "eval_calibration/unique_confidences": 11, "eval_completions/clipped_ratio": 0.0017361111111111234, "eval_completions/max_length": 2621.0, "eval_completions/max_terminated_length": 2621.0, "eval_completions/mean_length": 976.9887288411459, "eval_completions/mean_terminated_length": 978.6986490885416, "eval_completions/min_length": 218.16666666666666, "eval_completions/min_terminated_length": 329.5, "eval_loss": 0.0, "eval_num_tokens": 1229878109.0, "eval_reward": 1.2767446438471477, "eval_reward_std": 0.3123304198185603, "eval_rewards/accuracy_reward": 0.7005208333333334, "eval_rewards/brier_reward": 0.8546896775563558, "eval_rewards/confidence_one_or_zero": 0.0017361111628512542, "eval_rewards/format_reward": 0.9982638955116272, "eval_rewards/mean_confidence_reward": 0.740755170583725, "eval_runtime": 166.5302, "eval_samples_per_second": 6.005, "eval_signal/accuracy_reward/centered_abs_mean": 0.4050021668275197, "eval_signal/accuracy_reward/group_std_mean": 0.4556471159060796, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20250108341375986, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20250108341375986, "eval_signal/advantage_abs_mean": 0.2665083234508832, "eval_signal/advantage_pre_scale_abs_mean": 0.2665083234508832, "eval_signal/advantage_pre_scale_std": 0.31057239572207135, "eval_signal/advantage_std": 0.31057239572207135, "eval_signal/brier_reward/centered_abs_mean": 0.16707724332809448, "eval_signal/brier_reward/group_std_mean": 0.2277213086684545, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08353862166404724, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08353862166404724, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "eval_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "eval_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "eval_signal/format_reward/group_std_mean": 0.009820927555362383, "eval_signal/format_reward/group_zero_std_frac": 0.944444457689921, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.1944471299648285, "eval_signal/mean_confidence_reward/group_std_mean": 0.2322454775373141, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.94447125068109e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.94447125068109e-06, "eval_steps_per_second": 0.036, "step": 500 }, { "epoch": 1.2019230769230769, "step": 500, "train_probe_calibration/aurc": 0.10092870521186632, "train_probe_calibration/batch_distribution_entropy": 0.5295895610072994, "train_probe_calibration/batch_entropy_100bins": 0.292901367615838, "train_probe_calibration/batch_entropy_10bins": 0.5295895610072994, "train_probe_calibration/batch_entropy_50bins": 0.3447987508529416, "train_probe_calibration/batch_uniqueness": 0.12208538290895064, "train_probe_calibration/confidence_entropy": 0.4257298528067076, "train_probe_calibration/coverage@0%": 0.006076388888888889, "train_probe_calibration/coverage@1%": 0.006076388888888889, "train_probe_calibration/coverage@10%": 0.7395833333333334, "train_probe_calibration/coverage@15%": 0.828125, "train_probe_calibration/coverage@20%": 0.9210069444444444, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.006076388888888889, "train_probe_calibration/distribution_entropy_10": 0.5295895610072994, "train_probe_calibration/distribution_entropy_100": 0.292901367615838, "train_probe_calibration/ece": 0.04473090277777759, "train_probe_calibration/mean_confidence": 0.757873263888889, "train_probe_calibration/unique_confidence_per_question": 0.010416666666666666, "train_probe_calibration/unique_confidences": 12, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 2861.6666666666665, "train_probe_completions/max_terminated_length": 2861.6666666666665, "train_probe_completions/mean_length": 996.1976013183594, "train_probe_completions/mean_terminated_length": 996.1976013183594, "train_probe_completions/min_length": 311.8333333333333, "train_probe_completions/min_terminated_length": 311.8333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 1229878109.0, "train_probe_reward": 1.3183907469113667, "train_probe_reward_std": 0.2839902589718501, "train_probe_rewards/accuracy_reward": 0.7526041567325592, "train_probe_rewards/brier_reward": 0.8841620783011118, "train_probe_rewards/confidence_one_or_zero": 0.0017361111628512542, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/mean_confidence_reward": 0.7578732271989187, "train_probe_runtime": 150.5336, "train_probe_samples_per_second": 6.643, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3594292551279068, "train_probe_signal/accuracy_reward/group_std_mean": 0.4275974631309509, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1797146275639534, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1797146275639534, "train_probe_signal/advantage_abs_mean": 0.23350405196348825, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23350405196348825, "train_probe_signal/advantage_pre_scale_std": 0.28261765340964, "train_probe_signal/advantage_std": 0.28261765340964, "train_probe_signal/brier_reward/centered_abs_mean": 0.13781099766492844, "train_probe_signal/brier_reward/group_std_mean": 0.195537269115448, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06890549883246422, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.06890549883246422, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.18608286480108896, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.22383885085582733, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.8608286040944222e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.8608286040944222e-06, "train_probe_steps_per_second": 0.04 }, { "calibration/aurc": 0.14251614829725212, "calibration/batch_distribution_entropy": 0.5263690608949052, "calibration/batch_entropy_100bins": 0.2825064938167437, "calibration/batch_entropy_10bins": 0.5263690608949052, "calibration/batch_entropy_50bins": 0.33256207360429674, "calibration/batch_uniqueness": 0.1299214529303327, "calibration/confidence_entropy": 0.4352356192038183, "calibration/coverage@0%": 0.0026041666666666665, "calibration/coverage@1%": 0.0026041666666666665, "calibration/coverage@10%": 0.42893684725848563, "calibration/coverage@15%": 0.637861727589208, "calibration/coverage@20%": 0.7889074738903394, "calibration/coverage@25%": 0.8707966166231506, "calibration/coverage@30%": 0.915625, "calibration/coverage@5%": 0.13054150348128807, "calibration/distribution_entropy_10": 0.5263690608949052, "calibration/distribution_entropy_100": 0.2825064938167437, "calibration/ece": 0.08594960291557871, "calibration/mean_confidence": 0.7611633757615319, "calibration/unique_confidence_per_question": 0.023958333333333335, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001475694444444442, "completions/max_length": 3047.6, "completions/max_terminated_length": 3047.6, "completions/mean_length": 951.081591796875, "completions/mean_terminated_length": 952.4546752929688, "completions/min_length": 0.0, "completions/min_terminated_length": 297.6, "epoch": 1.2139423076923077, "grad_norm": 0.0005359905189834535, "learning_rate": 3.215144230769231e-06, "loss": 0.0, "num_tokens": 1243919369.0, "reward": 1.2760514259338378, "reward_std": 0.10757787972688675, "rewards/accuracy_reward": 0.7075520873069763, "rewards/brier_reward": 0.8460114002227783, "rewards/confidence_one_or_zero": 0.00303819450782612, "rewards/format_reward": 0.998524296283722, "rewards/mean_confidence_reward": 0.7550451397895813, "sampling/batch_mean_priority_error": 0.02132645833333332, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32222222222222224, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0017179204151034355, "sampling/priority_kl": 0.030000147596001624, "sampling/priority_scale": 0.7120238006813452, "sampling/prob_entropy": 10.278959465026855, "sampling/prob_max": 4.8056802188511935e-05, "sampling/prob_min": 1.9828132644761352e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.207200002670288, "sampling/prompt_draws_total": 36216.0, "sampling/seen_fraction": 0.7751600027084351, "sampling/unseen_fraction": 0.22483999729156495, "signal/accuracy_reward/centered_abs_mean": 0.112841796875, "signal/accuracy_reward/group_std_mean": 0.1478495255112648, "signal/accuracy_reward/group_zero_std_frac": 0.5777777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0564208984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0564208984375, "signal/advantage_abs_mean": 0.08059335947036743, "signal/advantage_pre_scale_abs_mean": 0.08059335947036743, "signal/advantage_pre_scale_std": 0.16366566717624664, "signal/advantage_std": 0.16366566717624664, "signal/brier_reward/centered_abs_mean": 0.0721733033657074, "signal/brier_reward/group_std_mean": 0.09534063339233398, "signal/brier_reward/group_zero_std_frac": 0.30555555522441863, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0360866516828537, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0360866516828537, "signal/confidence_one_or_zero/centered_abs_mean": 0.004823133698664606, "signal/confidence_one_or_zero/group_std_mean": 0.007467509433627129, "signal/confidence_one_or_zero/group_zero_std_frac": 0.975, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.8231336791104697e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.8231336791104697e-08, "signal/format_reward/centered_abs_mean": 0.002728949720039964, "signal/format_reward/group_std_mean": 0.006286143884062767, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001364474860019982, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001364474860019982, "signal/mean_confidence_reward/centered_abs_mean": 0.05904992073774338, "signal/mean_confidence_reward/group_std_mean": 0.07791801393032075, "signal/mean_confidence_reward/group_zero_std_frac": 0.325, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.904991667193826e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.904991667193826e-07, "step": 505 }, { "calibration/aurc": 0.15843764969117557, "calibration/batch_distribution_entropy": 0.5827104619682373, "calibration/batch_entropy_100bins": 0.31697319022488957, "calibration/batch_entropy_10bins": 0.5827104619682373, "calibration/batch_entropy_50bins": 0.3731357109494903, "calibration/batch_uniqueness": 0.24571379969595064, "calibration/confidence_entropy": 0.44576822093219964, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.3182042152419016, "calibration/coverage@15%": 0.5514867442168618, "calibration/coverage@20%": 0.7391858641388367, "calibration/coverage@25%": 0.8683088043313957, "calibration/coverage@30%": 0.936482939632546, "calibration/coverage@5%": 0.05511811023622047, "calibration/distribution_entropy_10": 0.5827104619682373, "calibration/distribution_entropy_100": 0.31697319022488957, "calibration/ece": 0.09253316062817325, "calibration/mean_confidence": 0.6995938054322008, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004600694444444419, "completions/max_length": 3544.0, "completions/max_terminated_length": 3544.0, "completions/mean_length": 1037.9000122070313, "completions/mean_terminated_length": 1042.7189819335938, "completions/min_length": 0.0, "completions/min_terminated_length": 296.4, "epoch": 1.2259615384615385, "grad_norm": 0.00042660924373194575, "learning_rate": 3.185096153846154e-06, "loss": -0.0038, "num_tokens": 1259024649.0, "reward": 1.284938645362854, "reward_std": 0.10022608935832977, "rewards/accuracy_reward": 0.7225694537162781, "rewards/brier_reward": 0.851894474029541, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9953993201255799, "rewards/mean_confidence_reward": 0.7036577582359314, "sampling/batch_mean_priority_error": 0.019823124999999987, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.35000000000000003, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.00173952616751194, "sampling/priority_kl": 0.029999304562807083, "sampling/priority_scale": 0.7129302441840991, "sampling/prob_entropy": 10.278946113586425, "sampling/prob_max": 4.822285627597012e-05, "sampling/prob_min": 1.987423274840694e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.2192000150680542, "sampling/prompt_draws_total": 36576.0, "sampling/seen_fraction": 0.7792199850082397, "sampling/unseen_fraction": 0.22078001499176025, "signal/accuracy_reward/centered_abs_mean": 0.10558810830116272, "signal/accuracy_reward/group_std_mean": 0.14366630464792252, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05279405415058136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05279405415058136, "signal/advantage_abs_mean": 0.07034819722175598, "signal/advantage_pre_scale_abs_mean": 0.07034819722175598, "signal/advantage_pre_scale_std": 0.1494814023375511, "signal/advantage_std": 0.1494814023375511, "signal/brier_reward/centered_abs_mean": 0.06675559654831886, "signal/brier_reward/group_std_mean": 0.0913299024105072, "signal/brier_reward/group_zero_std_frac": 0.2527777820825577, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03337779827415943, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03337779827415943, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.00739474818110466, "signal/format_reward/group_std_mean": 0.01648670230060816, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00369737409055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00369737409055233, "signal/mean_confidence_reward/centered_abs_mean": 0.05924249067902565, "signal/mean_confidence_reward/group_std_mean": 0.07888469099998474, "signal/mean_confidence_reward/group_zero_std_frac": 0.27222222089767456, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.924248853261816e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.924248853261816e-07, "step": 510 }, { "calibration/aurc": 0.14249268358674544, "calibration/batch_distribution_entropy": 0.7107646528720579, "calibration/batch_entropy_100bins": 0.3949343638565591, "calibration/batch_entropy_10bins": 0.7107646528720579, "calibration/batch_entropy_50bins": 0.4649103431474706, "calibration/batch_uniqueness": 0.5531128800054119, "calibration/confidence_entropy": 0.49678547413664287, "calibration/coverage@0%": 0.12320026178010472, "calibration/coverage@1%": 0.1399269197207679, "calibration/coverage@10%": 0.5555655541012217, "calibration/coverage@15%": 0.6448489310645724, "calibration/coverage@20%": 0.7060291230366492, "calibration/coverage@25%": 0.7723958333333333, "calibration/coverage@30%": 0.7817708333333333, "calibration/coverage@5%": 0.4678037739965095, "calibration/distribution_entropy_10": 0.7107646528720579, "calibration/distribution_entropy_100": 0.3949343638565591, "calibration/ece": 0.1659895833333333, "calibration/mean_confidence": 0.6266151287085516, "calibration/unique_confidence_per_question": 0.023958333333333335, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003645833333333326, "completions/max_length": 3360.6, "completions/max_terminated_length": 3360.6, "completions/mean_length": 1012.97119140625, "completions/mean_terminated_length": 1016.7093627929687, "completions/min_length": 0.0, "completions/min_terminated_length": 297.8, "epoch": 1.2379807692307692, "grad_norm": 0.00041118654189631343, "learning_rate": 3.1550480769230772e-06, "loss": -0.0021, "num_tokens": 1273792253.0, "reward": 1.2771016120910645, "reward_std": 0.09434493482112885, "rewards/accuracy_reward": 0.70390625, "rewards/brier_reward": 0.8539300918579101, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9963541626930237, "rewards/mean_confidence_reward": 0.6344878196716308, "sampling/batch_mean_priority_error": 0.023963541666666654, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.36666666666666664, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0017642594641074539, "sampling/priority_kl": 0.030000543966889382, "sampling/priority_scale": 0.7140926540596411, "sampling/prob_entropy": 10.278964042663574, "sampling/prob_max": 4.839725079364143e-05, "sampling/prob_min": 1.9917297686333767e-05, "sampling/prompt_draws_max": 6.0, "sampling/prompt_draws_mean": 1.2312000036239623, "sampling/prompt_draws_total": 36936.0, "sampling/seen_fraction": 0.7833999991416931, "sampling/unseen_fraction": 0.21660000085830688, "signal/accuracy_reward/centered_abs_mean": 0.10814344584941864, "signal/accuracy_reward/group_std_mean": 0.14549138844013215, "signal/accuracy_reward/group_zero_std_frac": 0.5777778029441833, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05407172292470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05407172292470932, "signal/advantage_abs_mean": 0.06500188633799553, "signal/advantage_pre_scale_abs_mean": 0.06500188633799553, "signal/advantage_pre_scale_std": 0.13270943313837053, "signal/advantage_std": 0.13270943313837053, "signal/brier_reward/centered_abs_mean": 0.069306131452322, "signal/brier_reward/group_std_mean": 0.09529425203800201, "signal/brier_reward/group_zero_std_frac": 0.18611111342906952, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034653065726161, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034653065726161, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.006901041604578495, "signal/format_reward/group_std_mean": 0.016902656853199007, "signal/format_reward/group_zero_std_frac": 0.9166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0034505208022892475, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0034505208022892475, "signal/mean_confidence_reward/centered_abs_mean": 0.0655409075319767, "signal/mean_confidence_reward/group_std_mean": 0.08637888133525848, "signal/mean_confidence_reward/group_zero_std_frac": 0.1972222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.554090873578388e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.554090873578388e-07, "step": 515 }, { "calibration/aurc": 0.11034997527151753, "calibration/batch_distribution_entropy": 0.6606235870600051, "calibration/batch_entropy_100bins": 0.37894630253316874, "calibration/batch_entropy_10bins": 0.6606235870600051, "calibration/batch_entropy_50bins": 0.44608945604224043, "calibration/batch_uniqueness": 0.48710002437172895, "calibration/confidence_entropy": 0.46217896173955075, "calibration/coverage@0%": 0.326305045202683, "calibration/coverage@1%": 0.3531308586426697, "calibration/coverage@10%": 0.5324284289943687, "calibration/coverage@15%": 0.7396593314142887, "calibration/coverage@20%": 0.8100803062793416, "calibration/coverage@25%": 0.845740115818856, "calibration/coverage@30%": 0.9000499937507811, "calibration/coverage@5%": 0.4778884532976136, "calibration/distribution_entropy_10": 0.6606235870600051, "calibration/distribution_entropy_100": 0.37894630253316874, "calibration/ece": 0.15638544680169772, "calibration/mean_confidence": 0.6244671800405404, "calibration/unique_confidence_per_question": 0.026041666666666664, "calibration/unique_confidences": 10.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444442, "completions/max_length": 3761.6, "completions/max_terminated_length": 3761.6, "completions/mean_length": 1067.0155395507813, "completions/mean_terminated_length": 1076.108544921875, "completions/min_length": 0.0, "completions/min_terminated_length": 321.8, "epoch": 1.25, "grad_norm": 0.00043587424443103373, "learning_rate": 3.125e-06, "loss": -0.0086, "num_tokens": 1289181520.0, "reward": 1.2708593368530274, "reward_std": 0.10695169419050217, "rewards/accuracy_reward": 0.711718761920929, "rewards/brier_reward": 0.8384949564933777, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9914930582046508, "rewards/mean_confidence_reward": 0.5937291979789734, "sampling/batch_mean_priority_error": 0.03257645833333332, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.33333333333333337, "sampling/error_ema_max": 0.12712499499320984, "sampling/error_ema_mean": 0.0017990589840337634, "sampling/priority_kl": 0.030000123009085656, "sampling/priority_scale": 0.7153894484275952, "sampling/prob_entropy": 10.27896556854248, "sampling/prob_max": 4.8576216795481744e-05, "sampling/prob_min": 1.9846079521812498e-05, "sampling/prompt_draws_max": 6.2, "sampling/prompt_draws_mean": 1.2431999921798706, "sampling/prompt_draws_total": 37296.0, "sampling/seen_fraction": 0.7875866651535034, "sampling/unseen_fraction": 0.21241333484649658, "signal/accuracy_reward/centered_abs_mean": 0.12175021916627884, "signal/accuracy_reward/group_std_mean": 0.16610720455646516, "signal/accuracy_reward/group_zero_std_frac": 0.5027777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06087510958313942, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06087510958313942, "signal/advantage_abs_mean": 0.07055862694978714, "signal/advantage_pre_scale_abs_mean": 0.07055862694978714, "signal/advantage_pre_scale_std": 0.14613762199878694, "signal/advantage_std": 0.14613762199878694, "signal/brier_reward/centered_abs_mean": 0.07722499519586563, "signal/brier_reward/group_std_mean": 0.10501042306423188, "signal/brier_reward/group_zero_std_frac": 0.14166666716337203, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03861249759793282, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03861249759793282, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.015755208395421504, "signal/format_reward/group_std_mean": 0.034674161300063136, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007877604197710752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007877604197710752, "signal/mean_confidence_reward/centered_abs_mean": 0.06463623270392418, "signal/mean_confidence_reward/group_std_mean": 0.08526600450277329, "signal/mean_confidence_reward/group_zero_std_frac": 0.15, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.463623094532522e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.463623094532522e-07, "step": 520 }, { "calibration/aurc": 0.12669337083961585, "calibration/batch_distribution_entropy": 0.5590922243032654, "calibration/batch_entropy_100bins": 0.3075703534005839, "calibration/batch_entropy_10bins": 0.5590922243032654, "calibration/batch_entropy_50bins": 0.36206684357654284, "calibration/batch_uniqueness": 0.2584583537271246, "calibration/confidence_entropy": 0.4585718308112806, "calibration/coverage@0%": 0.0777363900621045, "calibration/coverage@1%": 0.0777363900621045, "calibration/coverage@10%": 0.36548442155816746, "calibration/coverage@15%": 0.6758849441412209, "calibration/coverage@20%": 0.7699068744066566, "calibration/coverage@25%": 0.8686922655944602, "calibration/coverage@30%": 0.9425866979393532, "calibration/coverage@5%": 0.34415108822483415, "calibration/distribution_entropy_10": 0.5590922243032654, "calibration/distribution_entropy_100": 0.3075703534005839, "calibration/ece": 0.09078399273439078, "calibration/mean_confidence": 0.6898426965050423, "calibration/unique_confidence_per_question": 0.0265625, "calibration/unique_confidences": 10.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013020833333333304, "completions/max_length": 3502.8, "completions/max_terminated_length": 3502.8, "completions/mean_length": 1041.5444580078124, "completions/mean_terminated_length": 1055.194970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 310.6, "epoch": 1.2620192307692308, "grad_norm": 0.0005399513756856322, "learning_rate": 3.094951923076923e-06, "loss": -0.014, "num_tokens": 1304277552.0, "reward": 1.2725529432296754, "reward_std": 0.12192474752664566, "rewards/accuracy_reward": 0.7072916746139526, "rewards/brier_reward": 0.8508217453956604, "rewards/confidence_one_or_zero": 0.003125000064028427, "rewards/format_reward": 0.9869791507720947, "rewards/mean_confidence_reward": 0.6684785842895508, "sampling/batch_mean_priority_error": 0.021243124999999984, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.32222222222222224, "sampling/error_ema_max": 0.13218749463558196, "sampling/error_ema_mean": 0.0018275268841534853, "sampling/priority_kl": 0.029999518766999245, "sampling/priority_scale": 0.7165736616356299, "sampling/prob_entropy": 10.278942108154297, "sampling/prob_max": 4.875140512012876e-05, "sampling/prob_min": 1.94367141375551e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.2551999807357788, "sampling/prompt_draws_total": 37656.0, "sampling/seen_fraction": 0.7915599942207336, "sampling/unseen_fraction": 0.20844000577926636, "signal/accuracy_reward/centered_abs_mean": 0.12051866203546524, "signal/accuracy_reward/group_std_mean": 0.15819574296474456, "signal/accuracy_reward/group_zero_std_frac": 0.5555555641651153, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06025933101773262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06025933101773262, "signal/advantage_abs_mean": 0.08612038418650628, "signal/advantage_pre_scale_abs_mean": 0.08612038418650628, "signal/advantage_pre_scale_std": 0.1748889595270157, "signal/advantage_std": 0.1748889595270157, "signal/brier_reward/centered_abs_mean": 0.07994573190808296, "signal/brier_reward/group_std_mean": 0.10857147574424744, "signal/brier_reward/group_zero_std_frac": 0.1944444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03997286595404148, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03997286595404148, "signal/confidence_one_or_zero/centered_abs_mean": 0.0013020833604969085, "signal/confidence_one_or_zero/group_std_mean": 0.00297891478985548, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3020832767551838e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3020832767551838e-08, "signal/format_reward/centered_abs_mean": 0.02253689169883728, "signal/format_reward/group_std_mean": 0.043699033185839654, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01126844584941864, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01126844584941864, "signal/mean_confidence_reward/centered_abs_mean": 0.06638413742184639, "signal/mean_confidence_reward/group_std_mean": 0.0887540727853775, "signal/mean_confidence_reward/group_zero_std_frac": 0.19722222685813903, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.638413196924375e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.638413196924375e-07, "step": 525 }, { "calibration/aurc": 0.1575017173793545, "calibration/batch_distribution_entropy": 0.4244637415882807, "calibration/batch_entropy_100bins": 0.25155728112065345, "calibration/batch_entropy_10bins": 0.4244637415882807, "calibration/batch_entropy_50bins": 0.29612916117252525, "calibration/batch_uniqueness": -0.01812680104952442, "calibration/confidence_entropy": 0.39954801937750983, "calibration/coverage@0%": 0.030234880010291933, "calibration/coverage@1%": 0.1964617929390518, "calibration/coverage@10%": 0.35107920718707286, "calibration/coverage@15%": 0.4797605073611032, "calibration/coverage@20%": 0.49194402501674783, "calibration/coverage@25%": 0.8504267548121826, "calibration/coverage@30%": 0.9523887048784596, "calibration/coverage@5%": 0.21493144993113625, "calibration/distribution_entropy_10": 0.4244637415882807, "calibration/distribution_entropy_100": 0.25155728112065345, "calibration/ece": 0.11915949920399335, "calibration/mean_confidence": 0.778276141262144, "calibration/unique_confidence_per_question": 0.025520833333333333, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3533.2, "completions/max_terminated_length": 3533.2, "completions/mean_length": 1003.426123046875, "completions/mean_terminated_length": 1015.3042358398437, "completions/min_length": 0.0, "completions/min_terminated_length": 316.8, "epoch": 1.2740384615384617, "grad_norm": 0.0005864729755558074, "learning_rate": 3.0649038461538464e-06, "loss": -0.0134, "num_tokens": 1318960189.0, "reward": 1.295579433441162, "reward_std": 0.12813606709241868, "rewards/accuracy_reward": 0.74765625, "rewards/brier_reward": 0.8552927732467651, "rewards/confidence_one_or_zero": 0.01319444440305233, "rewards/format_reward": 0.9881944537162781, "rewards/mean_confidence_reward": 0.7612300276756286, "sampling/batch_mean_priority_error": 0.012533472222222208, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.3194444444444445, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0018447082722559572, "sampling/priority_kl": 0.03000018410384655, "sampling/priority_scale": 0.7177888571983203, "sampling/prob_entropy": 10.278960990905762, "sampling/prob_max": 4.892680808552541e-05, "sampling/prob_min": 1.9476248780847527e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.267199993133545, "sampling/prompt_draws_total": 38016.0, "sampling/seen_fraction": 0.7954866647720337, "sampling/unseen_fraction": 0.2045133352279663, "signal/accuracy_reward/centered_abs_mean": 0.10601671040058136, "signal/accuracy_reward/group_std_mean": 0.14474707543849946, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05300835520029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05300835520029068, "signal/advantage_abs_mean": 0.0865313470363617, "signal/advantage_pre_scale_abs_mean": 0.0865313470363617, "signal/advantage_pre_scale_std": 0.18588004112243653, "signal/advantage_std": 0.18588004112243653, "signal/brier_reward/centered_abs_mean": 0.0755378246307373, "signal/brier_reward/group_std_mean": 0.10764126926660537, "signal/brier_reward/group_zero_std_frac": 0.225, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03776891231536865, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03776891231536865, "signal/confidence_one_or_zero/centered_abs_mean": 0.016254340205341576, "signal/confidence_one_or_zero/group_std_mean": 0.021729598753154278, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9361111164093018, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6254339669785621e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6254339669785621e-07, "signal/format_reward/centered_abs_mean": 0.02116970419883728, "signal/format_reward/group_std_mean": 0.044343719631433486, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01058485209941864, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01058485209941864, "signal/mean_confidence_reward/centered_abs_mean": 0.06626470685005188, "signal/mean_confidence_reward/group_std_mean": 0.0918770357966423, "signal/mean_confidence_reward/group_zero_std_frac": 0.23333333134651185, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.626470508308557e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.626470508308557e-07, "step": 530 }, { "calibration/aurc": 0.1569989778980939, "calibration/batch_distribution_entropy": 0.43016315371581937, "calibration/batch_entropy_100bins": 0.24929648735262097, "calibration/batch_entropy_10bins": 0.43016315371581937, "calibration/batch_entropy_50bins": 0.2934677913281341, "calibration/batch_uniqueness": -0.014269761773715925, "calibration/confidence_entropy": 0.4131327922802903, "calibration/coverage@0%": 0.021600163948871234, "calibration/coverage@1%": 0.021600163948871234, "calibration/coverage@10%": 0.33457047537762075, "calibration/coverage@15%": 0.5210108246875189, "calibration/coverage@20%": 0.543657795926932, "calibration/coverage@25%": 0.7147992623067101, "calibration/coverage@30%": 0.9004071499503474, "calibration/coverage@5%": 0.32035569549687365, "calibration/distribution_entropy_10": 0.43016315371581937, "calibration/distribution_entropy_100": 0.24929648735262097, "calibration/ece": 0.11942447875570532, "calibration/mean_confidence": 0.7816901375303423, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016319444444444442, "completions/max_length": 3661.6, "completions/max_terminated_length": 3661.6, "completions/mean_length": 1042.116064453125, "completions/mean_terminated_length": 1059.5912841796876, "completions/min_length": 0.0, "completions/min_terminated_length": 318.2, "epoch": 1.2860576923076923, "grad_norm": 0.000569651136174798, "learning_rate": 3.0348557692307694e-06, "loss": -0.0164, "num_tokens": 1334048246.0, "reward": 1.2901008605957032, "reward_std": 0.13372049778699874, "rewards/accuracy_reward": 0.7421006798744202, "rewards/brier_reward": 0.8544050455093384, "rewards/confidence_one_or_zero": 0.012847222294658423, "rewards/format_reward": 0.9836805582046508, "rewards/mean_confidence_reward": 0.7595043301582336, "sampling/batch_mean_priority_error": 0.014679791666666655, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.33888888888888885, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.001859647035598755, "sampling/priority_kl": 0.029999933019280434, "sampling/priority_scale": 0.7192020476097241, "sampling/prob_entropy": 10.278958702087403, "sampling/prob_max": 4.91113263706211e-05, "sampling/prob_min": 1.9514276573318057e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.279200005531311, "sampling/prompt_draws_total": 38376.0, "sampling/seen_fraction": 0.7994733214378357, "sampling/unseen_fraction": 0.2005266785621643, "signal/accuracy_reward/centered_abs_mean": 0.11075846254825591, "signal/accuracy_reward/group_std_mean": 0.15180006623268127, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05537923127412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05537923127412796, "signal/advantage_abs_mean": 0.09288178682327271, "signal/advantage_pre_scale_abs_mean": 0.09288178682327271, "signal/advantage_pre_scale_std": 0.19578463435173035, "signal/advantage_std": 0.19578463435173035, "signal/brier_reward/centered_abs_mean": 0.08117109388113022, "signal/brier_reward/group_std_mean": 0.11099212318658828, "signal/brier_reward/group_zero_std_frac": 0.22777777910232544, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04058554694056511, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04058554694056511, "signal/confidence_one_or_zero/centered_abs_mean": 0.01097005195915699, "signal/confidence_one_or_zero/group_std_mean": 0.01581360977143049, "signal/confidence_one_or_zero/group_zero_std_frac": 0.95, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.097005167594034e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.097005167594034e-07, "signal/format_reward/centered_abs_mean": 0.0247178815305233, "signal/format_reward/group_std_mean": 0.04667534232139588, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01235894076526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01235894076526165, "signal/mean_confidence_reward/centered_abs_mean": 0.0684033289551735, "signal/mean_confidence_reward/group_std_mean": 0.09226205348968505, "signal/mean_confidence_reward/group_zero_std_frac": 0.2611111134290695, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.840332844149089e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.840332844149089e-07, "step": 535 }, { "calibration/aurc": 0.10487956573505355, "calibration/batch_distribution_entropy": 0.4847926145012039, "calibration/batch_entropy_100bins": 0.2802343493743758, "calibration/batch_entropy_10bins": 0.4847926145012039, "calibration/batch_entropy_50bins": 0.32988734192973024, "calibration/batch_uniqueness": 0.10339296265997477, "calibration/confidence_entropy": 0.4206212969895116, "calibration/coverage@0%": 0.025190412070028183, "calibration/coverage@1%": 0.025190412070028183, "calibration/coverage@10%": 0.6500353826812535, "calibration/coverage@15%": 0.8218298884514436, "calibration/coverage@20%": 0.8951689632545932, "calibration/coverage@25%": 0.9317585301837269, "calibration/coverage@30%": 0.9653543307086615, "calibration/coverage@5%": 0.18086323528902554, "calibration/distribution_entropy_10": 0.4847926145012039, "calibration/distribution_entropy_100": 0.2802343493743758, "calibration/ece": 0.06519686971041702, "calibration/mean_confidence": 0.7616189501538526, "calibration/unique_confidence_per_question": 0.023958333333333335, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012499999999999978, "completions/max_length": 3479.0, "completions/max_terminated_length": 3479.0, "completions/mean_length": 998.4607788085938, "completions/mean_terminated_length": 1011.1102172851563, "completions/min_length": 0.0, "completions/min_terminated_length": 303.6, "epoch": 1.2980769230769231, "grad_norm": 0.0005864489357918501, "learning_rate": 3.0048076923076923e-06, "loss": -0.0134, "num_tokens": 1348647378.0, "reward": 1.3082192659378051, "reward_std": 0.12426633834838867, "rewards/accuracy_reward": 0.7584201455116272, "rewards/brier_reward": 0.8705028891563416, "rewards/confidence_one_or_zero": 0.014496527798473834, "rewards/format_reward": 0.9875, "rewards/mean_confidence_reward": 0.7618720889091491, "sampling/batch_mean_priority_error": 0.013787291666666656, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.275, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0018759463913738728, "sampling/priority_kl": 0.03000033609569073, "sampling/priority_scale": 0.7201821506721899, "sampling/prob_entropy": 10.278959465026855, "sampling/prob_max": 4.927778500132263e-05, "sampling/prob_min": 1.955580955836922e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.2911999940872192, "sampling/prompt_draws_total": 38736.0, "sampling/seen_fraction": 0.8030200004577637, "sampling/unseen_fraction": 0.19697999954223633, "signal/accuracy_reward/centered_abs_mean": 0.10099283754825591, "signal/accuracy_reward/group_std_mean": 0.14111926555633544, "signal/accuracy_reward/group_zero_std_frac": 0.5722222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05049641877412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05049641877412796, "signal/advantage_abs_mean": 0.08462517410516739, "signal/advantage_pre_scale_abs_mean": 0.08462517410516739, "signal/advantage_pre_scale_std": 0.1842319041490555, "signal/advantage_std": 0.1842319041490555, "signal/brier_reward/centered_abs_mean": 0.07206552177667618, "signal/brier_reward/group_std_mean": 0.10260828882455826, "signal/brier_reward/group_zero_std_frac": 0.24722222089767457, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03603276088833809, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03603276088833809, "signal/confidence_one_or_zero/centered_abs_mean": 0.013037109375, "signal/confidence_one_or_zero/group_std_mean": 0.017157964035868646, "signal/confidence_one_or_zero/group_zero_std_frac": 0.950000011920929, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3037108601565705e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3037108601565705e-07, "signal/format_reward/centered_abs_mean": 0.02145182304084301, "signal/format_reward/group_std_mean": 0.039249108731746675, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010725911520421506, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010725911520421506, "signal/mean_confidence_reward/centered_abs_mean": 0.06385495141148567, "signal/mean_confidence_reward/group_std_mean": 0.08670918941497803, "signal/mean_confidence_reward/group_zero_std_frac": 0.2666666626930237, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.385494771166122e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.385494771166122e-07, "step": 540 }, { "calibration/aurc": 0.0925614115676943, "calibration/batch_distribution_entropy": 0.5153764623631993, "calibration/batch_entropy_100bins": 0.2838094451090586, "calibration/batch_entropy_10bins": 0.5153764623631993, "calibration/batch_entropy_50bins": 0.3340958867840345, "calibration/batch_uniqueness": 0.1330143627366373, "calibration/confidence_entropy": 0.4356818326638935, "calibration/coverage@0%": 0.006799534297347009, "calibration/coverage@1%": 0.10496226395613965, "calibration/coverage@10%": 0.7552523354340004, "calibration/coverage@15%": 0.8411124730780685, "calibration/coverage@20%": 0.860109834555641, "calibration/coverage@25%": 0.9328083989501312, "calibration/coverage@30%": 0.9328083989501312, "calibration/coverage@5%": 0.5268695781056467, "calibration/distribution_entropy_10": 0.5153764623631993, "calibration/distribution_entropy_100": 0.2838094451090586, "calibration/ece": 0.11489496489971199, "calibration/mean_confidence": 0.740481303681016, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01137152777777779, "completions/max_length": 3503.0, "completions/max_terminated_length": 3503.0, "completions/mean_length": 1055.1447875976562, "completions/mean_terminated_length": 1067.214404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 291.2, "epoch": 1.3100961538461537, "grad_norm": 0.00045707475510425866, "learning_rate": 2.974759615384616e-06, "loss": -0.0128, "num_tokens": 1363906294.0, "reward": 1.2843178749084472, "reward_std": 0.11411627978086472, "rewards/accuracy_reward": 0.717881953716278, "rewards/brier_reward": 0.8621111154556275, "rewards/confidence_one_or_zero": 0.006857638875953853, "rewards/format_reward": 0.9886284708976746, "rewards/mean_confidence_reward": 0.7079152345657349, "sampling/batch_mean_priority_error": 0.02168749999999999, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2972222222222222, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0018944249954074622, "sampling/priority_kl": 0.030000030994415283, "sampling/priority_scale": 0.721052998281084, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 4.943923995597288e-05, "sampling/prob_min": 1.959796609298792e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3032000064849854, "sampling/prompt_draws_total": 39096.0, "sampling/seen_fraction": 0.8064000010490417, "sampling/unseen_fraction": 0.19359999895095825, "signal/accuracy_reward/centered_abs_mean": 0.09945746511220932, "signal/accuracy_reward/group_std_mean": 0.14147261381149293, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04972873255610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04972873255610466, "signal/advantage_abs_mean": 0.07555333971977234, "signal/advantage_pre_scale_abs_mean": 0.07555333971977234, "signal/advantage_pre_scale_std": 0.1664302110671997, "signal/advantage_std": 0.1664302110671997, "signal/brier_reward/centered_abs_mean": 0.07284339591860771, "signal/brier_reward/group_std_mean": 0.0998562067747116, "signal/brier_reward/group_zero_std_frac": 0.2777777761220932, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036421697959303855, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036421697959303855, "signal/confidence_one_or_zero/centered_abs_mean": 0.006461588526144623, "signal/confidence_one_or_zero/group_std_mean": 0.009746908582746983, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9666666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.461588242245853e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.461588242245853e-08, "signal/format_reward/centered_abs_mean": 0.020111762173473834, "signal/format_reward/group_std_mean": 0.0422509741038084, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010055881086736917, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010055881086736917, "signal/mean_confidence_reward/centered_abs_mean": 0.06666777282953262, "signal/mean_confidence_reward/group_std_mean": 0.0885131299495697, "signal/mean_confidence_reward/group_zero_std_frac": 0.28611111342906953, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.666776812380704e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.666776812380704e-07, "step": 545 }, { "calibration/aurc": 0.16459503744651127, "calibration/batch_distribution_entropy": 0.5923094750073634, "calibration/batch_entropy_100bins": 0.3324064309706861, "calibration/batch_entropy_10bins": 0.5923094750073634, "calibration/batch_entropy_50bins": 0.3913034722476989, "calibration/batch_uniqueness": 0.3446194186815835, "calibration/confidence_entropy": 0.47171471433285006, "calibration/coverage@0%": 0.146875, "calibration/coverage@1%": 0.17760416666666667, "calibration/coverage@10%": 0.5500827271430608, "calibration/coverage@15%": 0.5785037797746398, "calibration/coverage@20%": 0.6137341320781629, "calibration/coverage@25%": 0.6347867636571103, "calibration/coverage@30%": 0.7637341320781628, "calibration/coverage@5%": 0.48028352767080296, "calibration/distribution_entropy_10": 0.5923094750073634, "calibration/distribution_entropy_100": 0.3324064309706861, "calibration/ece": 0.11403086741667849, "calibration/mean_confidence": 0.6745715935969667, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009027777777777768, "completions/max_length": 3682.8, "completions/max_terminated_length": 3682.8, "completions/mean_length": 1073.9698120117187, "completions/mean_terminated_length": 1084.0141479492188, "completions/min_length": 0.0, "completions/min_terminated_length": 306.4, "epoch": 1.3221153846153846, "grad_norm": 0.00048103288281708956, "learning_rate": 2.9447115384615386e-06, "loss": -0.0103, "num_tokens": 1379368762.0, "reward": 1.2940167665481568, "reward_std": 0.11286754608154297, "rewards/accuracy_reward": 0.7332465410232544, "rewards/brier_reward": 0.863887631893158, "rewards/confidence_one_or_zero": 0.003125000064028427, "rewards/format_reward": 0.9908854007720947, "rewards/mean_confidence_reward": 0.6962173104286193, "sampling/batch_mean_priority_error": 0.017588541666666645, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24722222222222223, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0019167169695720077, "sampling/priority_kl": 0.030000125616788866, "sampling/priority_scale": 0.7219337046379224, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 4.9601865612203255e-05, "sampling/prob_min": 1.964022740139626e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3152000188827515, "sampling/prompt_draws_total": 39456.0, "sampling/seen_fraction": 0.8097066640853882, "sampling/unseen_fraction": 0.1902933359146118, "signal/accuracy_reward/centered_abs_mean": 0.10252278447151184, "signal/accuracy_reward/group_std_mean": 0.14087741523981095, "signal/accuracy_reward/group_zero_std_frac": 0.575000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05126139223575592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05126139223575592, "signal/advantage_abs_mean": 0.07567564025521278, "signal/advantage_pre_scale_abs_mean": 0.07567564025521278, "signal/advantage_pre_scale_std": 0.16293342113494874, "signal/advantage_std": 0.16293342113494874, "signal/brier_reward/centered_abs_mean": 0.07278087660670281, "signal/brier_reward/group_std_mean": 0.10256119221448898, "signal/brier_reward/group_zero_std_frac": 0.2250000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036390438303351404, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036390438303351404, "signal/confidence_one_or_zero/centered_abs_mean": 0.004318576550576836, "signal/confidence_one_or_zero/group_std_mean": 0.00713206510990858, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.3185764297959395e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.3185764297959395e-08, "signal/format_reward/centered_abs_mean": 0.01661783866584301, "signal/format_reward/group_std_mean": 0.035409700125455856, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008308919332921504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008308919332921504, "signal/mean_confidence_reward/centered_abs_mean": 0.06690583825111389, "signal/mean_confidence_reward/group_std_mean": 0.08986445367336274, "signal/mean_confidence_reward/group_zero_std_frac": 0.2416666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.690583632007474e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.690583632007474e-07, "step": 550 }, { "epoch": 1.3221153846153846, "eval_calibration/aurc": 0.14916881488210604, "eval_calibration/batch_distribution_entropy": 0.5969406634321193, "eval_calibration/batch_entropy_100bins": 0.33255700130558596, "eval_calibration/batch_entropy_10bins": 0.5969406634321193, "eval_calibration/batch_entropy_50bins": 0.39148072120973537, "eval_calibration/batch_uniqueness": 0.30708720276119833, "eval_calibration/confidence_entropy": 0.44911964238701935, "eval_calibration/coverage@0%": 0.004366812227074236, "eval_calibration/coverage@1%": 0.004366812227074236, "eval_calibration/coverage@10%": 0.004366812227074236, "eval_calibration/coverage@15%": 0.7013100436681222, "eval_calibration/coverage@20%": 0.7449781659388647, "eval_calibration/coverage@25%": 0.8698689956331878, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.004366812227074236, "eval_calibration/distribution_entropy_10": 0.5969406634321193, "eval_calibration/distribution_entropy_100": 0.33255700130558596, "eval_calibration/ece": 0.0260611353711793, "eval_calibration/mean_confidence": 0.7135720524017469, "eval_calibration/unique_confidence_per_question": 0.010416666666666666, "eval_calibration/unique_confidences": 12, "eval_completions/clipped_ratio": 0.005208333333333352, "eval_completions/max_length": 3089.1666666666665, "eval_completions/max_terminated_length": 3089.1666666666665, "eval_completions/mean_length": 1039.5005696614583, "eval_completions/mean_terminated_length": 1044.9644978841145, "eval_completions/min_length": 121.83333333333333, "eval_completions/min_terminated_length": 337.0, "eval_loss": 0.0, "eval_num_tokens": 1379368762.0, "eval_reward": 1.2735520402590434, "eval_reward_std": 0.3102180262406667, "eval_rewards/accuracy_reward": 0.7048611044883728, "eval_rewards/brier_reward": 0.8483050564924876, "eval_rewards/confidence_one_or_zero": 0.004340277907128136, "eval_rewards/format_reward": 0.9939236243565878, "eval_rewards/mean_confidence_reward": 0.7092360655466715, "eval_runtime": 206.7898, "eval_samples_per_second": 4.836, "eval_signal/accuracy_reward/centered_abs_mean": 0.4040798594554265, "eval_signal/accuracy_reward/group_std_mean": 0.454540491104126, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20203992972771326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20203992972771326, "eval_signal/advantage_abs_mean": 0.26199281215667725, "eval_signal/advantage_pre_scale_abs_mean": 0.26199281215667725, "eval_signal/advantage_pre_scale_std": 0.3097561299800873, "eval_signal/advantage_std": 0.3097561299800873, "eval_signal/brier_reward/centered_abs_mean": 0.16876979420582452, "eval_signal/brier_reward/group_std_mean": 0.22740193704764047, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08438489710291226, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08438489710291226, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.008409287935743729, "eval_signal/confidence_one_or_zero/group_std_mean": 0.02455231888840596, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.8611111342906952, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.409287488575501e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.409287488575501e-08, "eval_signal/format_reward/centered_abs_mean": 0.011664496424297491, "eval_signal/format_reward/group_std_mean": 0.031383837262789406, "eval_signal/format_reward/group_zero_std_frac": 0.8333333631356558, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.21962020794550577, "eval_signal/mean_confidence_reward/group_std_mean": 0.25181200603644055, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.196201952150053e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.196201952150053e-06, "eval_steps_per_second": 0.029, "step": 550 }, { "epoch": 1.3221153846153846, "step": 550, "train_probe_calibration/aurc": 0.10523588155297994, "train_probe_calibration/batch_distribution_entropy": 0.6093510187951117, "train_probe_calibration/batch_entropy_100bins": 0.34401648735790014, "train_probe_calibration/batch_entropy_10bins": 0.6093510187951117, "train_probe_calibration/batch_entropy_50bins": 0.40497064277758876, "train_probe_calibration/batch_uniqueness": 0.33258799228299335, "train_probe_calibration/confidence_entropy": 0.44647008666728183, "train_probe_calibration/coverage@0%": 0.013054830287206266, "train_probe_calibration/coverage@1%": 0.013054830287206266, "train_probe_calibration/coverage@10%": 0.6736292428198434, "train_probe_calibration/coverage@15%": 0.763272410791993, "train_probe_calibration/coverage@20%": 0.8790252393385553, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.013054830287206266, "train_probe_calibration/distribution_entropy_10": 0.6093510187951117, "train_probe_calibration/distribution_entropy_100": 0.34401648735790014, "train_probe_calibration/ece": 0.041583986074847604, "train_probe_calibration/mean_confidence": 0.7203829416884248, "train_probe_calibration/unique_confidence_per_question": 0.011284722222222222, "train_probe_calibration/unique_confidences": 13, "train_probe_completions/clipped_ratio": 0.0026041666666666665, "train_probe_completions/max_length": 2821.5, "train_probe_completions/max_terminated_length": 2821.5, "train_probe_completions/mean_length": 1065.775899251302, "train_probe_completions/mean_terminated_length": 1068.367899576823, "train_probe_completions/min_length": 219.33333333333334, "train_probe_completions/min_terminated_length": 329.3333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 1379368762.0, "train_probe_reward": 1.3099865317344666, "train_probe_reward_std": 0.2795550574858983, "train_probe_rewards/accuracy_reward": 0.7500000099341074, "train_probe_rewards/brier_reward": 0.8725626766681671, "train_probe_rewards/confidence_one_or_zero": 0.008680555736646056, "train_probe_rewards/format_reward": 0.9973958333333334, "train_probe_rewards/mean_confidence_reward": 0.7185069024562836, "train_probe_runtime": 173.9441, "train_probe_samples_per_second": 5.749, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3633897503217061, "train_probe_signal/accuracy_reward/group_std_mean": 0.42989715933799744, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18169487516085306, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18169487516085306, "train_probe_signal/advantage_abs_mean": 0.22928122927745184, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22928122927745184, "train_probe_signal/advantage_pre_scale_std": 0.2797047644853592, "train_probe_signal/advantage_std": 0.2797047644853592, "train_probe_signal/brier_reward/centered_abs_mean": 0.14238324761390686, "train_probe_signal/brier_reward/group_std_mean": 0.19753756870826086, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07119162380695343, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07119162380695343, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.01671006918574373, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.04611522859583298, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.750000019868215, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6710068469668235e-07, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6710068469668235e-07, "train_probe_signal/format_reward/centered_abs_mean": 0.0049370659204820795, "train_probe_signal/format_reward/group_std_mean": 0.011741982462505499, "train_probe_signal/format_reward/group_zero_std_frac": 0.944444457689921, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0024685329602410397, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0024685329602410397, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.21267250428597131, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.24548349777857462, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.1267249697605925e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.1267249697605925e-06, "train_probe_steps_per_second": 0.034 }, { "calibration/aurc": 0.13244701628255368, "calibration/batch_distribution_entropy": 0.5633908185400199, "calibration/batch_entropy_100bins": 0.32076954253274204, "calibration/batch_entropy_10bins": 0.5633908185400199, "calibration/batch_entropy_50bins": 0.3776047154618286, "calibration/batch_uniqueness": 0.2638795656491243, "calibration/confidence_entropy": 0.4354364276359979, "calibration/coverage@0%": 0.01723647697291977, "calibration/coverage@1%": 0.01723647697291977, "calibration/coverage@10%": 0.45358358508878655, "calibration/coverage@15%": 0.6231473760474622, "calibration/coverage@20%": 0.6979631730756086, "calibration/coverage@25%": 0.8934800392670157, "calibration/coverage@30%": 0.9400169066317627, "calibration/coverage@5%": 0.3031577652317745, "calibration/distribution_entropy_10": 0.5633908185400199, "calibration/distribution_entropy_100": 0.32076954253274204, "calibration/ece": 0.10251768482381678, "calibration/mean_confidence": 0.7301818809265057, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005468749999999978, "completions/max_length": 3548.2, "completions/max_terminated_length": 3548.2, "completions/mean_length": 1017.2588500976562, "completions/mean_terminated_length": 1022.870556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 303.4, "epoch": 1.3341346153846154, "grad_norm": 0.0005966307362541556, "learning_rate": 2.9146634615384615e-06, "loss": -0.0048, "num_tokens": 1394187200.0, "reward": 1.3012691497802735, "reward_std": 0.11076888740062714, "rewards/accuracy_reward": 0.746874988079071, "rewards/brier_reward": 0.8611177206039429, "rewards/confidence_one_or_zero": 0.00494791658129543, "rewards/format_reward": 0.994531261920929, "rewards/mean_confidence_reward": 0.713351571559906, "sampling/batch_mean_priority_error": 0.01391326388888887, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24999999999999994, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0019340271828696132, "sampling/priority_kl": 0.03000037595629692, "sampling/priority_scale": 0.7224328219657764, "sampling/prob_entropy": 10.278961563110352, "sampling/prob_max": 4.974793046130799e-05, "sampling/prob_min": 1.9638664525700733e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3272000074386596, "sampling/prompt_draws_total": 39816.0, "sampling/seen_fraction": 0.8126399993896485, "sampling/unseen_fraction": 0.18736000061035157, "signal/accuracy_reward/centered_abs_mean": 0.11321614533662797, "signal/accuracy_reward/group_std_mean": 0.1554545447230339, "signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05660807266831398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05660807266831398, "signal/advantage_abs_mean": 0.07871722057461739, "signal/advantage_pre_scale_abs_mean": 0.07871722057461739, "signal/advantage_pre_scale_std": 0.15928139686584472, "signal/advantage_std": 0.15928139686584472, "signal/brier_reward/centered_abs_mean": 0.07254311144351959, "signal/brier_reward/group_std_mean": 0.09697409868240356, "signal/brier_reward/group_zero_std_frac": 0.2527777761220932, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036271555721759795, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036271555721759795, "signal/confidence_one_or_zero/centered_abs_mean": 0.006168619776144624, "signal/confidence_one_or_zero/group_std_mean": 0.008891820348799229, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222328186035, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.168619535174002e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.168619535174002e-08, "signal/format_reward/centered_abs_mean": 0.009749348927289247, "signal/format_reward/group_std_mean": 0.021487992629408835, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004874674463644623, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004874674463644623, "signal/mean_confidence_reward/centered_abs_mean": 0.06694743260741234, "signal/mean_confidence_reward/group_std_mean": 0.0881627306342125, "signal/mean_confidence_reward/group_zero_std_frac": 0.26388889253139497, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.694743092339195e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.694743092339195e-07, "step": 555 }, { "calibration/aurc": 0.18615071715847942, "calibration/batch_distribution_entropy": 0.5391185474680029, "calibration/batch_entropy_100bins": 0.3057328655456327, "calibration/batch_entropy_10bins": 0.5391185474680029, "calibration/batch_entropy_50bins": 0.3599037826039988, "calibration/batch_uniqueness": 0.22894520104889549, "calibration/confidence_entropy": 0.43995140877076694, "calibration/coverage@0%": 0.0203604967718753, "calibration/coverage@1%": 0.0203604967718753, "calibration/coverage@10%": 0.28963133010520864, "calibration/coverage@15%": 0.570893208075795, "calibration/coverage@20%": 0.5835928841258398, "calibration/coverage@25%": 0.8432911523191866, "calibration/coverage@30%": 0.8639158847184987, "calibration/coverage@5%": 0.27608966343854197, "calibration/distribution_entropy_10": 0.5391185474680029, "calibration/distribution_entropy_100": 0.3057328655456327, "calibration/ece": 0.14419091037754006, "calibration/mean_confidence": 0.731739201801835, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004600694444444419, "completions/max_length": 3374.8, "completions/max_terminated_length": 3374.8, "completions/mean_length": 983.3083251953125, "completions/mean_terminated_length": 987.8928466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 287.2, "epoch": 1.3461538461538463, "grad_norm": 0.0004465328238438815, "learning_rate": 2.8846153846153845e-06, "loss": -0.0045, "num_tokens": 1408643552.0, "reward": 1.2871931076049805, "reward_std": 0.10639832764863968, "rewards/accuracy_reward": 0.7247395753860474, "rewards/brier_reward": 0.8542326092720032, "rewards/confidence_one_or_zero": 0.00512152787996456, "rewards/format_reward": 0.9953993082046508, "rewards/mean_confidence_reward": 0.734346342086792, "sampling/batch_mean_priority_error": 0.018213541666666656, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.30833333333333335, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0019512236351147294, "sampling/priority_kl": 0.03000052720308304, "sampling/priority_scale": 0.7237809360725805, "sampling/prob_entropy": 10.278968620300294, "sampling/prob_max": 4.992713656974957e-05, "sampling/prob_min": 1.960492882062681e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.339199995994568, "sampling/prompt_draws_total": 40176.0, "sampling/seen_fraction": 0.816160011291504, "sampling/unseen_fraction": 0.1838399887084961, "signal/accuracy_reward/centered_abs_mean": 0.10935872346162796, "signal/accuracy_reward/group_std_mean": 0.14367188811302184, "signal/accuracy_reward/group_zero_std_frac": 0.5888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05467936173081398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05467936173081398, "signal/advantage_abs_mean": 0.07600348442792892, "signal/advantage_pre_scale_abs_mean": 0.07600348442792892, "signal/advantage_pre_scale_std": 0.15933072566986084, "signal/advantage_std": 0.15933072566986084, "signal/brier_reward/centered_abs_mean": 0.06570380330085754, "signal/brier_reward/group_std_mean": 0.09007837623357773, "signal/brier_reward/group_zero_std_frac": 0.27777778208255766, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03285190165042877, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03285190165042877, "signal/confidence_one_or_zero/centered_abs_mean": 0.00596245659980923, "signal/confidence_one_or_zero/group_std_mean": 0.009446198446676135, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9666666746139526, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.962456164354535e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.962456164354535e-08, "signal/format_reward/centered_abs_mean": 0.008490668423473836, "signal/format_reward/group_std_mean": 0.019875646382570267, "signal/format_reward/group_zero_std_frac": 0.9027777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004245334211736918, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004245334211736918, "signal/mean_confidence_reward/centered_abs_mean": 0.05294933393597603, "signal/mean_confidence_reward/group_std_mean": 0.0712207317352295, "signal/mean_confidence_reward/group_zero_std_frac": 0.30833333134651186, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.294933203003893e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.294933203003893e-07, "step": 560 }, { "calibration/aurc": 0.12142111368921756, "calibration/batch_distribution_entropy": 0.5238570877525583, "calibration/batch_entropy_100bins": 0.28641818336889885, "calibration/batch_entropy_10bins": 0.5238570877525583, "calibration/batch_entropy_50bins": 0.3371668512544869, "calibration/batch_uniqueness": 0.17762239623692094, "calibration/confidence_entropy": 0.44057883362318667, "calibration/coverage@0%": 0.010473959768531277, "calibration/coverage@1%": 0.010473959768531277, "calibration/coverage@10%": 0.5173849545329292, "calibration/coverage@15%": 0.7562441443923946, "calibration/coverage@20%": 0.8324249104436483, "calibration/coverage@25%": 0.8717277486910995, "calibration/coverage@30%": 0.9293193717277486, "calibration/coverage@5%": 0.2633535409203637, "calibration/distribution_entropy_10": 0.5238570877525583, "calibration/distribution_entropy_100": 0.28641818336889885, "calibration/ece": 0.13695384403416913, "calibration/mean_confidence": 0.7534154036924774, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00512152777777779, "completions/max_length": 3400.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 970.3908081054688, "completions/mean_terminated_length": 975.3807373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 275.6, "epoch": 1.3581730769230769, "grad_norm": 0.0005166183691471815, "learning_rate": 2.8545673076923082e-06, "loss": -0.0057, "num_tokens": 1422888790.0, "reward": 1.2914889097213744, "reward_std": 0.10451765954494477, "rewards/accuracy_reward": 0.7174479246139527, "rewards/brier_reward": 0.870636236667633, "rewards/confidence_one_or_zero": 0.008940972248092293, "rewards/format_reward": 0.9948784828186035, "rewards/mean_confidence_reward": 0.750803804397583, "sampling/batch_mean_priority_error": 0.015555694444444431, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2861111111111111, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0019691059831529857, "sampling/priority_kl": 0.03000061921775341, "sampling/priority_scale": 0.7250464618904516, "sampling/prob_entropy": 10.278957557678222, "sampling/prob_max": 5.010353270336054e-05, "sampling/prob_min": 1.9642066035885364e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.351199984550476, "sampling/prompt_draws_total": 40536.0, "sampling/seen_fraction": 0.8195199966430664, "sampling/unseen_fraction": 0.1804800033569336, "signal/accuracy_reward/centered_abs_mean": 0.09441731572151184, "signal/accuracy_reward/group_std_mean": 0.1311457559466362, "signal/accuracy_reward/group_zero_std_frac": 0.6027777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04720865786075592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04720865786075592, "signal/advantage_abs_mean": 0.07170857340097428, "signal/advantage_pre_scale_abs_mean": 0.07170857340097428, "signal/advantage_pre_scale_std": 0.15759305655956268, "signal/advantage_std": 0.15759305655956268, "signal/brier_reward/centered_abs_mean": 0.06307352632284165, "signal/brier_reward/group_std_mean": 0.08904340118169785, "signal/brier_reward/group_zero_std_frac": 0.2805555582046509, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031536763161420824, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031536763161420824, "signal/confidence_one_or_zero/centered_abs_mean": 0.010639105923473835, "signal/confidence_one_or_zero/group_std_mean": 0.01555493324995041, "signal/confidence_one_or_zero/group_zero_std_frac": 0.95, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0639105525456216e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0639105525456216e-07, "signal/format_reward/centered_abs_mean": 0.009662543423473835, "signal/format_reward/group_std_mean": 0.02305259294807911, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0048312717117369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0048312717117369175, "signal/mean_confidence_reward/centered_abs_mean": 0.057669497281312945, "signal/mean_confidence_reward/group_std_mean": 0.07707741409540177, "signal/mean_confidence_reward/group_zero_std_frac": 0.3027777850627899, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.766949357166595e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.766949357166595e-07, "step": 565 }, { "calibration/aurc": 0.11921036526172504, "calibration/batch_distribution_entropy": 0.5480813620266501, "calibration/batch_entropy_100bins": 0.30107017435370487, "calibration/batch_entropy_10bins": 0.5480813620266501, "calibration/batch_entropy_50bins": 0.35441493797457274, "calibration/batch_uniqueness": 0.22293690516241052, "calibration/confidence_entropy": 0.44491889967539644, "calibration/coverage@0%": 0.00939947780678851, "calibration/coverage@1%": 0.1324361270214482, "calibration/coverage@10%": 0.5075941861577788, "calibration/coverage@15%": 0.576529271982466, "calibration/coverage@20%": 0.7843186540538324, "calibration/coverage@25%": 0.9754745974760661, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.27188905444752776, "calibration/distribution_entropy_10": 0.5480813620266501, "calibration/distribution_entropy_100": 0.30107017435370487, "calibration/ece": 0.09685982504135167, "calibration/mean_confidence": 0.7530256108487235, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002604166666666674, "completions/max_length": 3331.2, "completions/max_terminated_length": 3331.2, "completions/mean_length": 920.540380859375, "completions/mean_terminated_length": 922.9418823242188, "completions/min_length": 0.0, "completions/min_terminated_length": 261.8, "epoch": 1.3701923076923077, "grad_norm": 0.0005127664771862328, "learning_rate": 2.8245192307692307e-06, "loss": -0.0021, "num_tokens": 1436592839.0, "reward": 1.3091936588287354, "reward_std": 0.10138077139854432, "rewards/accuracy_reward": 0.7527777671813964, "rewards/brier_reward": 0.868198013305664, "rewards/confidence_one_or_zero": 0.014236111333593727, "rewards/format_reward": 0.9973958373069763, "rewards/mean_confidence_reward": 0.7676869392395019, "sampling/batch_mean_priority_error": 0.017774374999999985, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.28888888888888886, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0019868580624461175, "sampling/priority_kl": 0.030000395700335504, "sampling/priority_scale": 0.7266766966087743, "sampling/prob_entropy": 10.27896671295166, "sampling/prob_max": 5.02947841596324e-05, "sampling/prob_min": 1.9675568910315633e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3631999969482422, "sampling/prompt_draws_total": 40896.0, "sampling/seen_fraction": 0.8230933308601379, "sampling/unseen_fraction": 0.17690666913986205, "signal/accuracy_reward/centered_abs_mean": 0.09973958432674408, "signal/accuracy_reward/group_std_mean": 0.13625123351812363, "signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04986979216337204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04986979216337204, "signal/advantage_abs_mean": 0.07208630740642548, "signal/advantage_pre_scale_abs_mean": 0.07208630740642548, "signal/advantage_pre_scale_std": 0.15612260699272157, "signal/advantage_std": 0.15612260699272157, "signal/brier_reward/centered_abs_mean": 0.06025089025497436, "signal/brier_reward/group_std_mean": 0.08231211751699448, "signal/brier_reward/group_zero_std_frac": 0.32222222685813906, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03012544512748718, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03012544512748718, "signal/confidence_one_or_zero/centered_abs_mean": 0.013237847201526164, "signal/confidence_one_or_zero/group_std_mean": 0.01707735490053892, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9527777791023254, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3237846872016234e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3237846872016234e-07, "signal/format_reward/centered_abs_mean": 0.0049153645522892475, "signal/format_reward/group_std_mean": 0.012703005410730838, "signal/format_reward/group_zero_std_frac": 0.9333333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0024576822761446238, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0024576822761446238, "signal/mean_confidence_reward/centered_abs_mean": 0.05037656500935554, "signal/mean_confidence_reward/group_std_mean": 0.0677335023880005, "signal/mean_confidence_reward/group_zero_std_frac": 0.33611111640930175, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.037656308104487e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.037656308104487e-07, "step": 570 }, { "calibration/aurc": 0.14634278778879556, "calibration/batch_distribution_entropy": 0.47070895536337715, "calibration/batch_entropy_100bins": 0.2672472458527859, "calibration/batch_entropy_10bins": 0.47070895536337715, "calibration/batch_entropy_50bins": 0.31459913379368903, "calibration/batch_uniqueness": 0.05658321692658895, "calibration/confidence_entropy": 0.41867838259744794, "calibration/coverage@0%": 0.11620439353136577, "calibration/coverage@1%": 0.11620439353136577, "calibration/coverage@10%": 0.32988513087866067, "calibration/coverage@15%": 0.5988134457916969, "calibration/coverage@20%": 0.7043593564173719, "calibration/coverage@25%": 0.9228826697127938, "calibration/coverage@30%": 0.9359375, "calibration/coverage@5%": 0.13348188044236053, "calibration/distribution_entropy_10": 0.47070895536337715, "calibration/distribution_entropy_100": 0.2672472458527859, "calibration/ece": 0.12027931569792079, "calibration/mean_confidence": 0.7783694586745135, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00321180555555558, "completions/max_length": 3425.6, "completions/max_terminated_length": 3425.6, "completions/mean_length": 931.355126953125, "completions/mean_terminated_length": 934.3935180664063, "completions/min_length": 0.0, "completions/min_terminated_length": 263.4, "epoch": 1.3822115384615383, "grad_norm": 0.0005164265749044716, "learning_rate": 2.7944711538461537e-06, "loss": -0.0033, "num_tokens": 1450395042.0, "reward": 1.2989335775375366, "reward_std": 0.10441743582487106, "rewards/accuracy_reward": 0.7419270873069763, "rewards/brier_reward": 0.8592235565185546, "rewards/confidence_one_or_zero": 0.006423611292848364, "rewards/format_reward": 0.9967013955116272, "rewards/mean_confidence_reward": 0.7564403891563416, "sampling/batch_mean_priority_error": 0.0193909722222222, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24722222222222223, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0020075502339750527, "sampling/priority_kl": 0.02999947778880596, "sampling/priority_scale": 0.7279814063804224, "sampling/prob_entropy": 10.278944969177246, "sampling/prob_max": 5.0473969895392654e-05, "sampling/prob_min": 1.9712427456397563e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3751999855041503, "sampling/prompt_draws_total": 41256.0, "sampling/seen_fraction": 0.8263200044631958, "sampling/unseen_fraction": 0.1736799955368042, "signal/accuracy_reward/centered_abs_mean": 0.1043891042470932, "signal/accuracy_reward/group_std_mean": 0.1397392600774765, "signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521945521235466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0521945521235466, "signal/advantage_abs_mean": 0.07473682761192321, "signal/advantage_pre_scale_abs_mean": 0.07473682761192321, "signal/advantage_pre_scale_std": 0.159325709939003, "signal/advantage_std": 0.159325709939003, "signal/brier_reward/centered_abs_mean": 0.065924521535635, "signal/brier_reward/group_std_mean": 0.09040133357048034, "signal/brier_reward/group_zero_std_frac": 0.29444444477558135, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0329622607678175, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0329622607678175, "signal/confidence_one_or_zero/centered_abs_mean": 0.005121527786832303, "signal/confidence_one_or_zero/group_std_mean": 0.007448598276823759, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9749999880790711, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.121527379969848e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.121527379969848e-08, "signal/format_reward/centered_abs_mean": 0.006043836660683155, "signal/format_reward/group_std_mean": 0.013839713856577873, "signal/format_reward/group_zero_std_frac": 0.9333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0030219183303415776, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0030219183303415776, "signal/mean_confidence_reward/centered_abs_mean": 0.054480834305286406, "signal/mean_confidence_reward/group_std_mean": 0.07193099856376647, "signal/mean_confidence_reward/group_zero_std_frac": 0.3111111164093018, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.448083584269625e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.448083584269625e-07, "step": 575 }, { "calibration/aurc": 0.08932079073471824, "calibration/batch_distribution_entropy": 0.5381646310222294, "calibration/batch_entropy_100bins": 0.2897085398638714, "calibration/batch_entropy_10bins": 0.5381646310222294, "calibration/batch_entropy_50bins": 0.34104020568284665, "calibration/batch_uniqueness": 0.21316519945185397, "calibration/confidence_entropy": 0.4485409395001082, "calibration/coverage@0%": 0.09574185902138946, "calibration/coverage@1%": 0.40310812781007205, "calibration/coverage@10%": 0.6712954711336622, "calibration/coverage@15%": 0.790268415175699, "calibration/coverage@20%": 0.8702820072348633, "calibration/coverage@25%": 0.9377659574468085, "calibration/coverage@30%": 0.9377659574468085, "calibration/coverage@5%": 0.5254277448234277, "calibration/distribution_entropy_10": 0.5381646310222294, "calibration/distribution_entropy_100": 0.2897085398638714, "calibration/ece": 0.12022005527506718, "calibration/mean_confidence": 0.7487579877428203, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006510416666666674, "completions/max_length": 3385.4, "completions/max_terminated_length": 3385.4, "completions/mean_length": 913.8025146484375, "completions/mean_terminated_length": 919.8221069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 242.6, "epoch": 1.3942307692307692, "grad_norm": 0.0005168268107809126, "learning_rate": 2.7644230769230775e-06, "loss": -0.0064, "num_tokens": 1464024639.0, "reward": 1.2927096128463744, "reward_std": 0.10726402699947357, "rewards/accuracy_reward": 0.7303819537162781, "rewards/brier_reward": 0.8615329027175903, "rewards/confidence_one_or_zero": 0.0038194443448446693, "rewards/format_reward": 0.9934895753860473, "rewards/mean_confidence_reward": 0.737480640411377, "sampling/batch_mean_priority_error": 0.0157292361111111, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.21388888888888888, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0020283636171370746, "sampling/priority_kl": 0.03000028133392334, "sampling/priority_scale": 0.7286482155555859, "sampling/prob_entropy": 10.278956604003906, "sampling/prob_max": 5.0625135918380694e-05, "sampling/prob_min": 1.9754447203013113e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3871999979019165, "sampling/prompt_draws_total": 41616.0, "sampling/seen_fraction": 0.8290400028228759, "sampling/unseen_fraction": 0.170959997177124, "signal/accuracy_reward/centered_abs_mean": 0.10659722238779068, "signal/accuracy_reward/group_std_mean": 0.14086600244045258, "signal/accuracy_reward/group_zero_std_frac": 0.5916666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05329861119389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05329861119389534, "signal/advantage_abs_mean": 0.07496702522039414, "signal/advantage_pre_scale_abs_mean": 0.07496702522039414, "signal/advantage_pre_scale_std": 0.16025392711162567, "signal/advantage_std": 0.16025392711162567, "signal/brier_reward/centered_abs_mean": 0.06117345690727234, "signal/brier_reward/group_std_mean": 0.08554955422878266, "signal/brier_reward/group_zero_std_frac": 0.286111119389534, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03058672845363617, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03058672845363617, "signal/confidence_one_or_zero/centered_abs_mean": 0.004839409724809229, "signal/confidence_one_or_zero/group_std_mean": 0.006505797291174531, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9805555462837219, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.839409761814295e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.839409761814295e-08, "signal/format_reward/centered_abs_mean": 0.01199544258415699, "signal/format_reward/group_std_mean": 0.027937114983797074, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005997721292078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005997721292078495, "signal/mean_confidence_reward/centered_abs_mean": 0.05558514967560768, "signal/mean_confidence_reward/group_std_mean": 0.07502751946449279, "signal/mean_confidence_reward/group_zero_std_frac": 0.29444445073604586, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.55851465833257e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.55851465833257e-07, "step": 580 }, { "calibration/aurc": 0.13578598524695468, "calibration/batch_distribution_entropy": 0.6923252720826605, "calibration/batch_entropy_100bins": 0.38076604303053696, "calibration/batch_entropy_10bins": 0.6923252720826605, "calibration/batch_entropy_50bins": 0.44823162511258763, "calibration/batch_uniqueness": 0.5211813855646842, "calibration/confidence_entropy": 0.4883225648832853, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5262217544051508, "calibration/coverage@15%": 0.6181509985919922, "calibration/coverage@20%": 0.7440733804491955, "calibration/coverage@25%": 0.8506527415143603, "calibration/coverage@30%": 0.8908616187989556, "calibration/coverage@5%": 0.1427678974204749, "calibration/distribution_entropy_10": 0.6923252720826605, "calibration/distribution_entropy_100": 0.38076604303053696, "calibration/ece": 0.11659514989132358, "calibration/mean_confidence": 0.6826111711071315, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00434027777777779, "completions/max_length": 3558.6, "completions/max_terminated_length": 3558.6, "completions/mean_length": 931.40166015625, "completions/mean_terminated_length": 935.4742553710937, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 1.40625, "grad_norm": 0.0004743568133562803, "learning_rate": 2.7343750000000004e-06, "loss": -0.0033, "num_tokens": 1477845746.0, "reward": 1.300478959083557, "reward_std": 0.10303066670894623, "rewards/accuracy_reward": 0.7401909589767456, "rewards/brier_reward": 0.8650924324989319, "rewards/confidence_one_or_zero": 0.001996527798473835, "rewards/format_reward": 0.9956597328186035, "rewards/mean_confidence_reward": 0.7386232614517212, "sampling/batch_mean_priority_error": 0.017812638888888878, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.25555555555555554, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.00204639732837677, "sampling/priority_kl": 0.029999741166830064, "sampling/priority_scale": 0.7295177638297901, "sampling/prob_entropy": 10.278957939147949, "sampling/prob_max": 5.07840006321203e-05, "sampling/prob_min": 1.9794189211097545e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.3992000102996827, "sampling/prompt_draws_total": 41976.0, "sampling/seen_fraction": 0.8318466663360595, "sampling/unseen_fraction": 0.16815333366394042, "signal/accuracy_reward/centered_abs_mean": 0.097607421875, "signal/accuracy_reward/group_std_mean": 0.13638707101345063, "signal/accuracy_reward/group_zero_std_frac": 0.5833333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0488037109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0488037109375, "signal/advantage_abs_mean": 0.07051034420728683, "signal/advantage_pre_scale_abs_mean": 0.07051034420728683, "signal/advantage_pre_scale_std": 0.15343783795833588, "signal/advantage_std": 0.15343783795833588, "signal/brier_reward/centered_abs_mean": 0.05911755934357643, "signal/brier_reward/group_std_mean": 0.08391907587647437, "signal/brier_reward/group_zero_std_frac": 0.27777778208255766, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029558779671788216, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029558779671788216, "signal/confidence_one_or_zero/centered_abs_mean": 0.001578775979578495, "signal/confidence_one_or_zero/group_std_mean": 0.0020235927775502207, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.5787759366503452e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.5787759366503452e-08, "signal/format_reward/centered_abs_mean": 0.008083767257630825, "signal/format_reward/group_std_mean": 0.01864474155008793, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0040418836288154125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0040418836288154125, "signal/mean_confidence_reward/centered_abs_mean": 0.05392475575208664, "signal/mean_confidence_reward/group_std_mean": 0.07254645675420761, "signal/mean_confidence_reward/group_zero_std_frac": 0.2916666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.392475145526987e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.392475145526987e-07, "step": 585 }, { "calibration/aurc": 0.12619754771127717, "calibration/batch_distribution_entropy": 0.5520588005609969, "calibration/batch_entropy_100bins": 0.3022191708991097, "calibration/batch_entropy_10bins": 0.5520588005609969, "calibration/batch_entropy_50bins": 0.355767518117215, "calibration/batch_uniqueness": 0.25986767018800117, "calibration/confidence_entropy": 0.44241364927549753, "calibration/coverage@0%": 0.1192790139616056, "calibration/coverage@1%": 0.1192790139616056, "calibration/coverage@10%": 0.39546247818499125, "calibration/coverage@15%": 0.609375, "calibration/coverage@20%": 0.7259080497382199, "calibration/coverage@25%": 0.8811709205933681, "calibration/coverage@30%": 0.9625, "calibration/coverage@5%": 0.3761480148342059, "calibration/distribution_entropy_10": 0.5520588005609969, "calibration/distribution_entropy_100": 0.3022191708991097, "calibration/ece": 0.10985983856893541, "calibration/mean_confidence": 0.7686387434554974, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666674, "completions/max_length": 3448.6, "completions/max_terminated_length": 3448.6, "completions/mean_length": 858.2862915039062, "completions/mean_terminated_length": 862.65830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 247.0, "epoch": 1.4182692307692308, "grad_norm": 0.0004786239587701857, "learning_rate": 2.7043269230769233e-06, "loss": -0.0049, "num_tokens": 1490827028.0, "reward": 1.3150073766708374, "reward_std": 0.09183973670005799, "rewards/accuracy_reward": 0.7597222208976746, "rewards/brier_reward": 0.8752251267433167, "rewards/confidence_one_or_zero": 0.006163194461259991, "rewards/format_reward": 0.9950520873069764, "rewards/mean_confidence_reward": 0.7613700985908508, "sampling/batch_mean_priority_error": 0.018032986111111104, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2555555555555556, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.002064537163823843, "sampling/priority_kl": 0.030000126734375954, "sampling/priority_scale": 0.7309222042793408, "sampling/prob_entropy": 10.278956604003906, "sampling/prob_max": 5.0964941328857095e-05, "sampling/prob_min": 1.9828715812764132e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4111999988555908, "sampling/prompt_draws_total": 42336.0, "sampling/seen_fraction": 0.8349533319473267, "sampling/unseen_fraction": 0.16504666805267335, "signal/accuracy_reward/centered_abs_mean": 0.09001736044883728, "signal/accuracy_reward/group_std_mean": 0.12046184241771699, "signal/accuracy_reward/group_zero_std_frac": 0.6444444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04500868022441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04500868022441864, "signal/advantage_abs_mean": 0.06484012752771377, "signal/advantage_pre_scale_abs_mean": 0.06484012752771377, "signal/advantage_pre_scale_std": 0.14825278222560884, "signal/advantage_std": 0.14825278222560884, "signal/brier_reward/centered_abs_mean": 0.05299278721213341, "signal/brier_reward/group_std_mean": 0.07479698657989502, "signal/brier_reward/group_zero_std_frac": 0.2833333402872086, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026496393606066704, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.026496393606066704, "signal/confidence_one_or_zero/centered_abs_mean": 0.008360460097901524, "signal/confidence_one_or_zero/group_std_mean": 0.012945984303951264, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9555555582046509, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.360459915479624e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.360459915479624e-08, "signal/format_reward/centered_abs_mean": 0.008816189365461469, "signal/format_reward/group_std_mean": 0.018386806827038527, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004408094682730734, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004408094682730734, "signal/mean_confidence_reward/centered_abs_mean": 0.04662701785564423, "signal/mean_confidence_reward/group_std_mean": 0.06395412832498551, "signal/mean_confidence_reward/group_zero_std_frac": 0.30277778804302213, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.66270154220183e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.66270154220183e-07, "step": 590 }, { "calibration/aurc": 0.13849873769615206, "calibration/batch_distribution_entropy": 0.48137985436237296, "calibration/batch_entropy_100bins": 0.2622942038657148, "calibration/batch_entropy_10bins": 0.48137985436237296, "calibration/batch_entropy_50bins": 0.3087684929060569, "calibration/batch_uniqueness": 0.08955665497088608, "calibration/confidence_entropy": 0.41628624550681853, "calibration/coverage@0%": 0.12970925805047867, "calibration/coverage@1%": 0.12970925805047867, "calibration/coverage@10%": 0.44916095517841603, "calibration/coverage@15%": 0.6550220300261097, "calibration/coverage@20%": 0.7295827893820713, "calibration/coverage@25%": 0.7760307876414274, "calibration/coverage@30%": 0.9441253263707573, "calibration/coverage@5%": 0.169292591383812, "calibration/distribution_entropy_10": 0.48137985436237296, "calibration/distribution_entropy_100": 0.2622942038657148, "calibration/ece": 0.10869756309834637, "calibration/mean_confidence": 0.802804395126197, "calibration/unique_confidence_per_question": 0.01979166666666667, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001822916666666674, "completions/max_length": 3234.6, "completions/max_terminated_length": 3234.6, "completions/mean_length": 858.982373046875, "completions/mean_terminated_length": 860.55595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 248.8, "epoch": 1.4302884615384617, "grad_norm": 0.0005619844305329025, "learning_rate": 2.6742788461538467e-06, "loss": -0.0012, "num_tokens": 1503825673.0, "reward": 1.3214678764343262, "reward_std": 0.09593596607446671, "rewards/accuracy_reward": 0.7690972208976745, "rewards/brier_reward": 0.8758194446563721, "rewards/confidence_one_or_zero": 0.008159722306299955, "rewards/format_reward": 0.9980034708976746, "rewards/mean_confidence_reward": 0.7763272404670716, "sampling/batch_mean_priority_error": 0.013152847222222213, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2555555555555556, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.002083018142729998, "sampling/priority_kl": 0.03000060245394707, "sampling/priority_scale": 0.7321809828514233, "sampling/prob_entropy": 10.278948402404785, "sampling/prob_max": 5.113998267916031e-05, "sampling/prob_min": 1.9864497153321282e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.423200011253357, "sampling/prompt_draws_total": 42696.0, "sampling/seen_fraction": 0.8378800034523011, "sampling/unseen_fraction": 0.16211999654769899, "signal/accuracy_reward/centered_abs_mean": 0.09106987863779067, "signal/accuracy_reward/group_std_mean": 0.12727132737636565, "signal/accuracy_reward/group_zero_std_frac": 0.6138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04553493931889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04553493931889534, "signal/advantage_abs_mean": 0.06666973233222961, "signal/advantage_pre_scale_abs_mean": 0.06666973233222961, "signal/advantage_pre_scale_std": 0.1487389624118805, "signal/advantage_std": 0.1487389624118805, "signal/brier_reward/centered_abs_mean": 0.05577497556805611, "signal/brier_reward/group_std_mean": 0.07818025052547455, "signal/brier_reward/group_zero_std_frac": 0.2833333373069763, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027887487784028053, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027887487784028053, "signal/confidence_one_or_zero/centered_abs_mean": 0.010763888782821595, "signal/confidence_one_or_zero/group_std_mean": 0.015660046227276327, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9472222208976746, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0763889122245018e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0763889122245018e-07, "signal/format_reward/centered_abs_mean": 0.0037706162431277333, "signal/format_reward/group_std_mean": 0.00909621324390173, "signal/format_reward/group_zero_std_frac": 0.9555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018853081215638666, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0018853081215638666, "signal/mean_confidence_reward/centered_abs_mean": 0.04750293791294098, "signal/mean_confidence_reward/group_std_mean": 0.06490557044744491, "signal/mean_confidence_reward/group_zero_std_frac": 0.31388888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.750293499000691e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.750293499000691e-07, "step": 595 }, { "calibration/aurc": 0.11944654806083935, "calibration/batch_distribution_entropy": 0.5310314355245698, "calibration/batch_entropy_100bins": 0.30088690024296083, "calibration/batch_entropy_10bins": 0.5310314355245698, "calibration/batch_entropy_50bins": 0.35419919065675515, "calibration/batch_uniqueness": 0.21813118524641628, "calibration/confidence_entropy": 0.42552355286541294, "calibration/coverage@0%": 0.008333333333333333, "calibration/coverage@1%": 0.008333333333333333, "calibration/coverage@10%": 0.5651928877142514, "calibration/coverage@15%": 0.7183434071393817, "calibration/coverage@20%": 0.7904855643044619, "calibration/coverage@25%": 0.8083333333333332, "calibration/coverage@30%": 0.9651041666666668, "calibration/coverage@5%": 0.2861527943823776, "calibration/distribution_entropy_10": 0.5310314355245698, "calibration/distribution_entropy_100": 0.30088690024296083, "calibration/ece": 0.11940171993680018, "calibration/mean_confidence": 0.7663529327918157, "calibration/unique_confidence_per_question": 0.02395833333333333, "calibration/unique_confidences": 9.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888888838, "completions/max_length": 3184.2, "completions/max_terminated_length": 3184.2, "completions/mean_length": 854.9868896484375, "completions/mean_terminated_length": 856.8657470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 217.4, "epoch": 1.4423076923076923, "grad_norm": 0.0005304485675878823, "learning_rate": 2.6442307692307696e-06, "loss": -0.0011, "num_tokens": 1516804210.0, "reward": 1.3223771572113037, "reward_std": 0.10662513226270676, "rewards/accuracy_reward": 0.7762152671813964, "rewards/brier_reward": 0.8709542870521545, "rewards/confidence_one_or_zero": 0.006423611100763083, "rewards/format_reward": 0.997569453716278, "rewards/mean_confidence_reward": 0.7678810715675354, "sampling/batch_mean_priority_error": 0.011822916666666653, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.28055555555555556, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0020938485860824583, "sampling/priority_kl": 0.02999958209693432, "sampling/priority_scale": 0.7341154158348218, "sampling/prob_entropy": 10.278950881958007, "sampling/prob_max": 5.1341670769033954e-05, "sampling/prob_min": 1.9893073113053105e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.435200023651123, "sampling/prompt_draws_total": 43056.0, "sampling/seen_fraction": 0.8412066698074341, "sampling/unseen_fraction": 0.15879333019256592, "signal/accuracy_reward/centered_abs_mean": 0.10538194328546524, "signal/accuracy_reward/group_std_mean": 0.14465567022562026, "signal/accuracy_reward/group_zero_std_frac": 0.5666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05269097164273262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05269097164273262, "signal/advantage_abs_mean": 0.07484711110591888, "signal/advantage_pre_scale_abs_mean": 0.07484711110591888, "signal/advantage_pre_scale_std": 0.16025152802467346, "signal/advantage_std": 0.16025152802467346, "signal/brier_reward/centered_abs_mean": 0.06213294193148613, "signal/brier_reward/group_std_mean": 0.08648014217615127, "signal/brier_reward/group_zero_std_frac": 0.2972222208976746, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031066470965743064, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031066470965743064, "signal/confidence_one_or_zero/centered_abs_mean": 0.010112847248092294, "signal/confidence_one_or_zero/group_std_mean": 0.01544865956529975, "signal/confidence_one_or_zero/group_zero_std_frac": 0.950000011920929, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0112846808851827e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0112846808851827e-07, "signal/format_reward/centered_abs_mean": 0.00461154505610466, "signal/format_reward/group_std_mean": 0.011821653135120869, "signal/format_reward/group_zero_std_frac": 0.9388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00230577252805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00230577252805233, "signal/mean_confidence_reward/centered_abs_mean": 0.04797801896929741, "signal/mean_confidence_reward/group_std_mean": 0.0646023727953434, "signal/mean_confidence_reward/group_zero_std_frac": 0.3277777791023254, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.797801636868826e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.797801636868826e-07, "step": 600 }, { "epoch": 1.4423076923076923, "eval_calibration/aurc": 0.1346761021889891, "eval_calibration/batch_distribution_entropy": 0.5733805972211355, "eval_calibration/batch_entropy_100bins": 0.3126081544974916, "eval_calibration/batch_entropy_10bins": 0.5733805972211355, "eval_calibration/batch_entropy_50bins": 0.3679972615168838, "eval_calibration/batch_uniqueness": 0.2611101591861721, "eval_calibration/confidence_entropy": 0.4430562863986335, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5864465682015638, "eval_calibration/coverage@15%": 0.7497827975673328, "eval_calibration/coverage@20%": 0.8583840139009556, "eval_calibration/coverage@25%": 0.8583840139009556, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5733805972211355, "eval_calibration/distribution_entropy_100": 0.3126081544974916, "eval_calibration/ece": 0.04639443961772359, "eval_calibration/mean_confidence": 0.7647263249348393, "eval_calibration/unique_confidence_per_question": 0.0078125, "eval_calibration/unique_confidences": 9, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 2566.5, "eval_completions/max_terminated_length": 2566.5, "eval_completions/mean_length": 882.0298665364584, "eval_completions/mean_terminated_length": 882.0298665364584, "eval_completions/min_length": 304.3333333333333, "eval_completions/min_terminated_length": 304.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 1516804210.0, "eval_reward": 1.2891743381818135, "eval_reward_std": 0.31100672483444214, "eval_rewards/accuracy_reward": 0.723090281089147, "eval_rewards/brier_reward": 0.8561111291249593, "eval_rewards/confidence_one_or_zero": 0.005208333333333333, "eval_rewards/format_reward": 0.9991319477558136, "eval_rewards/mean_confidence_reward": 0.7640624543031057, "eval_runtime": 151.9223, "eval_samples_per_second": 6.582, "eval_signal/accuracy_reward/centered_abs_mean": 0.3875325520833333, "eval_signal/accuracy_reward/group_std_mean": 0.445096492767334, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19376627604166666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19376627604166666, "eval_signal/advantage_abs_mean": 0.26332706212997437, "eval_signal/advantage_pre_scale_abs_mean": 0.26332706212997437, "eval_signal/advantage_pre_scale_std": 0.30966611703236896, "eval_signal/advantage_std": 0.30966611703236896, "eval_signal/brier_reward/centered_abs_mean": 0.16171767810980478, "eval_signal/brier_reward/group_std_mean": 0.22035816063483557, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08085883905490239, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08085883905490239, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.009874132151405016, "eval_signal/confidence_one_or_zero/group_std_mean": 0.023483964304129284, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.8888888955116272, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.874131497629908e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.874131497629908e-08, "eval_signal/format_reward/centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/group_std_mean": 0.0049104637776811915, "eval_signal/format_reward/group_zero_std_frac": 0.9722222288449606, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008409287935743729, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.16384546707073847, "eval_signal/mean_confidence_reward/group_std_mean": 0.19949180136124292, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6384545915570925e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6384545915570925e-06, "eval_steps_per_second": 0.039, "step": 600 }, { "epoch": 1.4423076923076923, "step": 600, "train_probe_calibration/aurc": 0.10479481723911278, "train_probe_calibration/batch_distribution_entropy": 0.5860862023827814, "train_probe_calibration/batch_entropy_100bins": 0.3205852918546655, "train_probe_calibration/batch_entropy_10bins": 0.5860862023827814, "train_probe_calibration/batch_entropy_50bins": 0.3773878185447479, "train_probe_calibration/batch_uniqueness": 0.2788052930056711, "train_probe_calibration/confidence_entropy": 0.43982697754627376, "train_probe_calibration/coverage@0%": 0.008695652173913044, "train_probe_calibration/coverage@1%": 0.008695652173913044, "train_probe_calibration/coverage@10%": 0.7052173913043478, "train_probe_calibration/coverage@15%": 0.7939130434782609, "train_probe_calibration/coverage@20%": 0.8486956521739131, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.008695652173913044, "train_probe_calibration/distribution_entropy_10": 0.5860862023827814, "train_probe_calibration/distribution_entropy_100": 0.3205852918546655, "train_probe_calibration/ece": 0.03444637681159412, "train_probe_calibration/mean_confidence": 0.7627420289855074, "train_probe_calibration/unique_confidence_per_question": 0.011284722222222222, "train_probe_calibration/unique_confidences": 13, "train_probe_completions/clipped_ratio": 0.0017361111111111234, "train_probe_completions/max_length": 3118.3333333333335, "train_probe_completions/max_terminated_length": 3118.3333333333335, "train_probe_completions/mean_length": 913.1567789713541, "train_probe_completions/mean_terminated_length": 914.722666422526, "train_probe_completions/min_length": 178.5, "train_probe_completions/min_terminated_length": 271.3333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 1516804210.0, "train_probe_reward": 1.315513809521993, "train_probe_reward_std": 0.29222285747528076, "train_probe_rewards/accuracy_reward": 0.7578124900658926, "train_probe_rewards/brier_reward": 0.8749358355998993, "train_probe_rewards/confidence_one_or_zero": 0.006076389069979389, "train_probe_rewards/format_reward": 0.9982638955116272, "train_probe_rewards/mean_confidence_reward": 0.7614177962144216, "train_probe_runtime": 171.292, "train_probe_samples_per_second": 5.838, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3546549479166667, "train_probe_signal/accuracy_reward/group_std_mean": 0.42514685293038684, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.17732747395833334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17732747395833334, "train_probe_signal/advantage_abs_mean": 0.23652945458889008, "train_probe_signal/advantage_pre_scale_abs_mean": 0.23652945458889008, "train_probe_signal/advantage_pre_scale_std": 0.29100040594736737, "train_probe_signal/advantage_std": 0.29100040594736737, "train_probe_signal/brier_reward/centered_abs_mean": 0.14432349801063538, "train_probe_signal/brier_reward/group_std_mean": 0.2020617425441742, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07216174900531769, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07216174900531769, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.011773003110041222, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.034373246443768345, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.8055555820465088, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1773002484005701e-07, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1773002484005701e-07, "train_probe_signal/format_reward/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/format_reward/group_std_mean": 0.009820927555362383, "train_probe_signal/format_reward/group_zero_std_frac": 0.944444457689921, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1672744701306025, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.20480586091677347, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6727445692292047e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6727445692292047e-06, "train_probe_steps_per_second": 0.035 }, { "calibration/aurc": 0.2025863559052315, "calibration/batch_distribution_entropy": 0.6094061078151227, "calibration/batch_entropy_100bins": 0.32643694694181385, "calibration/batch_entropy_10bins": 0.6094061078151227, "calibration/batch_entropy_50bins": 0.38427629223435283, "calibration/batch_uniqueness": 0.35345220359531254, "calibration/confidence_entropy": 0.4623222893283905, "calibration/coverage@0%": 0.0020942841199528784, "calibration/coverage@1%": 0.0020942841199528784, "calibration/coverage@10%": 0.16511511745328622, "calibration/coverage@15%": 0.19219845078661957, "calibration/coverage@20%": 0.5574318453467625, "calibration/coverage@25%": 0.8025488314663892, "calibration/coverage@30%": 0.8855078430882883, "calibration/coverage@5%": 0.0020942841199528784, "calibration/distribution_entropy_10": 0.6094061078151227, "calibration/distribution_entropy_100": 0.32643694694181385, "calibration/ece": 0.11574754381809685, "calibration/mean_confidence": 0.7419782801931942, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0019097222222222321, "completions/max_length": 3675.2, "completions/max_terminated_length": 3675.2, "completions/mean_length": 899.375439453125, "completions/mean_terminated_length": 901.0966918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 254.6, "epoch": 1.4543269230769231, "grad_norm": 0.0005201512249186635, "learning_rate": 2.6141826923076926e-06, "loss": -0.0007, "num_tokens": 1530249463.0, "reward": 1.3011293888092041, "reward_std": 0.10355799049139022, "rewards/accuracy_reward": 0.7501736044883728, "rewards/brier_reward": 0.8539797186851501, "rewards/confidence_one_or_zero": 0.003211805532919243, "rewards/format_reward": 0.9980902791023254, "rewards/mean_confidence_reward": 0.7592641592025757, "sampling/batch_mean_priority_error": 0.022168472222222204, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2638888888888889, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0021110890433192254, "sampling/priority_kl": 0.029999648779630662, "sampling/priority_scale": 0.7359204589622095, "sampling/prob_entropy": 10.27893238067627, "sampling/prob_max": 5.153878591954708e-05, "sampling/prob_min": 1.9922950014006346e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4471999883651734, "sampling/prompt_draws_total": 43416.0, "sampling/seen_fraction": 0.8443333506584167, "sampling/unseen_fraction": 0.15566664934158325, "signal/accuracy_reward/centered_abs_mean": 0.10117187350988388, "signal/accuracy_reward/group_std_mean": 0.14249942302703858, "signal/accuracy_reward/group_zero_std_frac": 0.5555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05058593675494194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05058593675494194, "signal/advantage_abs_mean": 0.07119562327861786, "signal/advantage_pre_scale_abs_mean": 0.07119562327861786, "signal/advantage_pre_scale_std": 0.1525435507297516, "signal/advantage_std": 0.1525435507297516, "signal/brier_reward/centered_abs_mean": 0.05812448561191559, "signal/brier_reward/group_std_mean": 0.08289218097925186, "signal/brier_reward/group_zero_std_frac": 0.2472222179174423, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029062242805957796, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029062242805957796, "signal/confidence_one_or_zero/centered_abs_mean": 0.00451931421412155, "signal/confidence_one_or_zero/group_std_mean": 0.007009075395762921, "signal/confidence_one_or_zero/group_zero_std_frac": 0.975, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.519313883122322e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.519313883122322e-08, "signal/format_reward/centered_abs_mean": 0.003656683978624642, "signal/format_reward/group_std_mean": 0.009607256762683392, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001828341989312321, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001828341989312321, "signal/mean_confidence_reward/centered_abs_mean": 0.05031117424368858, "signal/mean_confidence_reward/group_std_mean": 0.06716113239526748, "signal/mean_confidence_reward/group_zero_std_frac": 0.2666666656732559, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.031117325415834e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.031117325415834e-07, "step": 605 }, { "calibration/aurc": 0.18925906471339116, "calibration/batch_distribution_entropy": 0.5951494301130872, "calibration/batch_entropy_100bins": 0.31397400838451495, "calibration/batch_entropy_10bins": 0.5951494301130872, "calibration/batch_entropy_50bins": 0.36960512261335693, "calibration/batch_uniqueness": 0.34783256793587974, "calibration/confidence_entropy": 0.46064776300794225, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.13490813648293962, "calibration/coverage@10%": 0.1942257217847769, "calibration/coverage@15%": 0.2793733681462141, "calibration/coverage@20%": 0.608043679286336, "calibration/coverage@25%": 0.772106179286336, "calibration/coverage@30%": 0.8325228459530025, "calibration/coverage@5%": 0.13490813648293962, "calibration/distribution_entropy_10": 0.5951494301130872, "calibration/distribution_entropy_100": 0.31397400838451495, "calibration/ece": 0.1528422618435751, "calibration/mean_confidence": 0.7713018239927909, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888888838, "completions/max_length": 3768.2, "completions/max_terminated_length": 3768.2, "completions/mean_length": 932.9999145507812, "completions/mean_terminated_length": 935.0447998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 247.4, "epoch": 1.4663461538461537, "grad_norm": 0.0006306435097940266, "learning_rate": 2.584134615384616e-06, "loss": -0.0024, "num_tokens": 1544096950.0, "reward": 1.2903532981872559, "reward_std": 0.1076474979519844, "rewards/accuracy_reward": 0.7316840291023254, "rewards/brier_reward": 0.8511776804924012, "rewards/confidence_one_or_zero": 0.0012152777577284723, "rewards/format_reward": 0.9978298544883728, "rewards/mean_confidence_reward": 0.7503623723983764, "sampling/batch_mean_priority_error": 0.01682638888888887, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2722222222222222, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.002135100308805704, "sampling/priority_kl": 0.03000096157193184, "sampling/priority_scale": 0.7381196321221069, "sampling/prob_entropy": 10.278960800170898, "sampling/prob_max": 5.1753289881162345e-05, "sampling/prob_min": 1.994912381633185e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4591999769210815, "sampling/prompt_draws_total": 43776.0, "sampling/seen_fraction": 0.8476733326911926, "sampling/unseen_fraction": 0.15232666730880737, "signal/accuracy_reward/centered_abs_mean": 0.10917426198720932, "signal/accuracy_reward/group_std_mean": 0.15016919672489165, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05458713099360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05458713099360466, "signal/advantage_abs_mean": 0.0764501854777336, "signal/advantage_pre_scale_abs_mean": 0.0764501854777336, "signal/advantage_pre_scale_std": 0.15649850368499757, "signal/advantage_std": 0.15649850368499757, "signal/brier_reward/centered_abs_mean": 0.06043119207024574, "signal/brier_reward/group_std_mean": 0.082360278069973, "signal/brier_reward/group_zero_std_frac": 0.25, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03021559603512287, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03021559603512287, "signal/confidence_one_or_zero/centered_abs_mean": 0.0015082465368323028, "signal/confidence_one_or_zero/group_std_mean": 0.0018771322444081306, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.5082464699389676e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.5082464699389676e-08, "signal/format_reward/centered_abs_mean": 0.004139539937023073, "signal/format_reward/group_std_mean": 0.010728820972144604, "signal/format_reward/group_zero_std_frac": 0.944444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020697699685115365, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0020697699685115365, "signal/mean_confidence_reward/centered_abs_mean": 0.0506988450884819, "signal/mean_confidence_reward/group_std_mean": 0.06803653538227081, "signal/mean_confidence_reward/group_zero_std_frac": 0.2694444477558136, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.069884593922325e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.069884593922325e-07, "step": 610 }, { "calibration/aurc": 0.11789342149139752, "calibration/batch_distribution_entropy": 0.6047390118271716, "calibration/batch_entropy_100bins": 0.3246738368739022, "calibration/batch_entropy_10bins": 0.6047390118271716, "calibration/batch_entropy_50bins": 0.3822007875892891, "calibration/batch_uniqueness": 0.38138563368055556, "calibration/confidence_entropy": 0.4734669462340119, "calibration/coverage@0%": 0.06666666666666667, "calibration/coverage@1%": 0.19583333333333336, "calibration/coverage@10%": 0.6197916666666666, "calibration/coverage@15%": 0.6838541666666667, "calibration/coverage@20%": 0.7208333333333333, "calibration/coverage@25%": 0.8546875, "calibration/coverage@30%": 0.8953125, "calibration/coverage@5%": 0.3583333333333334, "calibration/distribution_entropy_10": 0.6047390118271716, "calibration/distribution_entropy_100": 0.3246738368739022, "calibration/ece": 0.11421875, "calibration/mean_confidence": 0.7418229166666668, "calibration/unique_confidence_per_question": 0.019270833333333334, "calibration/unique_confidences": 7.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0014756944444444641, "completions/max_length": 3789.0, "completions/max_terminated_length": 3789.0, "completions/mean_length": 950.9626831054687, "completions/mean_terminated_length": 952.3809326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 252.2, "epoch": 1.4783653846153846, "grad_norm": 0.0005810891161672771, "learning_rate": 2.554086538461539e-06, "loss": -0.0, "num_tokens": 1558176616.0, "reward": 1.3133844137191772, "reward_std": 0.10787461549043656, "rewards/accuracy_reward": 0.7603298664093018, "rewards/brier_reward": 0.8678995728492737, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.998524296283722, "rewards/mean_confidence_reward": 0.7534403920173645, "sampling/batch_mean_priority_error": 0.01978472222222221, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.23333333333333334, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0021563940681517124, "sampling/priority_kl": 0.029999713599681854, "sampling/priority_scale": 0.7398386180168017, "sampling/prob_entropy": 10.278945922851562, "sampling/prob_max": 5.1947306928923354e-05, "sampling/prob_min": 1.9979511489509604e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4711999893188477, "sampling/prompt_draws_total": 44136.0, "sampling/seen_fraction": 0.8506266713142395, "sampling/unseen_fraction": 0.1493733286857605, "signal/accuracy_reward/centered_abs_mean": 0.11985134780406952, "signal/accuracy_reward/group_std_mean": 0.15811396539211273, "signal/accuracy_reward/group_zero_std_frac": 0.55, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05992567390203476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05992567390203476, "signal/advantage_abs_mean": 0.08039970993995667, "signal/advantage_pre_scale_abs_mean": 0.08039970993995667, "signal/advantage_pre_scale_std": 0.16128072142601013, "signal/advantage_std": 0.16128072142601013, "signal/brier_reward/centered_abs_mean": 0.058254283666610715, "signal/brier_reward/group_std_mean": 0.07793922573328019, "signal/brier_reward/group_zero_std_frac": 0.2638888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029127141833305357, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029127141833305357, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.002533637161832303, "signal/format_reward/group_std_mean": 0.005043594865128398, "signal/format_reward/group_zero_std_frac": 0.9777777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012668185809161514, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012668185809161514, "signal/mean_confidence_reward/centered_abs_mean": 0.04812428429722786, "signal/mean_confidence_reward/group_std_mean": 0.062294195592403415, "signal/mean_confidence_reward/group_zero_std_frac": 0.2694444447755814, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.812428301192995e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.812428301192995e-07, "step": 615 }, { "calibration/aurc": 0.1058172588479028, "calibration/batch_distribution_entropy": 0.6578273898301509, "calibration/batch_entropy_100bins": 0.35653299906657165, "calibration/batch_entropy_10bins": 0.6578273898301509, "calibration/batch_entropy_50bins": 0.41970487784557414, "calibration/batch_uniqueness": 0.5193359375, "calibration/confidence_entropy": 0.4997947054499211, "calibration/coverage@0%": 0.065625, "calibration/coverage@1%": 0.1296875, "calibration/coverage@10%": 0.5354166666666667, "calibration/coverage@15%": 0.6609375, "calibration/coverage@20%": 0.8901041666666668, "calibration/coverage@25%": 0.9182291666666668, "calibration/coverage@30%": 0.9848958333333332, "calibration/coverage@5%": 0.3864583333333333, "calibration/distribution_entropy_10": 0.6578273898301509, "calibration/distribution_entropy_100": 0.35653299906657165, "calibration/ece": 0.11013020833333327, "calibration/mean_confidence": 0.7221614583333333, "calibration/unique_confidence_per_question": 0.0203125, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888888617, "completions/max_length": 3434.0, "completions/max_terminated_length": 3434.0, "completions/mean_length": 941.9648681640625, "completions/mean_terminated_length": 944.02392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 269.6, "epoch": 1.4903846153846154, "grad_norm": 0.00043419600115157664, "learning_rate": 2.5240384615384618e-06, "loss": -0.0009, "num_tokens": 1572119859.0, "reward": 1.323990035057068, "reward_std": 0.08798304051160813, "rewards/accuracy_reward": 0.7714409828186035, "rewards/brier_reward": 0.8786945819854737, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9978298544883728, "rewards/mean_confidence_reward": 0.7352184534072876, "sampling/batch_mean_priority_error": 0.013222222222222213, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.19166666666666668, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0021701503079384564, "sampling/priority_kl": 0.029999665170907974, "sampling/priority_scale": 0.7408775746589527, "sampling/prob_entropy": 10.278956604003906, "sampling/prob_max": 5.211559619056061e-05, "sampling/prob_min": 2.0017361021018586e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.4832000017166138, "sampling/prompt_draws_total": 44496.0, "sampling/seen_fraction": 0.8530933260917664, "sampling/unseen_fraction": 0.14690667390823364, "signal/accuracy_reward/centered_abs_mean": 0.08047417402267457, "signal/accuracy_reward/group_std_mean": 0.11685203611850739, "signal/accuracy_reward/group_zero_std_frac": 0.6194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04023708701133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04023708701133728, "signal/advantage_abs_mean": 0.05966672003269195, "signal/advantage_pre_scale_abs_mean": 0.05966672003269195, "signal/advantage_pre_scale_std": 0.1359117567539215, "signal/advantage_std": 0.1359117567539215, "signal/brier_reward/centered_abs_mean": 0.0505802758038044, "signal/brier_reward/group_std_mean": 0.07184310257434845, "signal/brier_reward/group_zero_std_frac": 0.1722222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0252901379019022, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0252901379019022, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003255208255723119, "signal/confidence_one_or_zero/group_std_mean": 0.0006831518840044737, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.2552080142522754e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.2552080142522754e-09, "signal/format_reward/centered_abs_mean": 0.0040961371967568995, "signal/format_reward/group_std_mean": 0.009779365314170719, "signal/format_reward/group_zero_std_frac": 0.9527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020480685983784498, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0020480685983784498, "signal/mean_confidence_reward/centered_abs_mean": 0.049345605075359344, "signal/mean_confidence_reward/group_std_mean": 0.06616655513644218, "signal/mean_confidence_reward/group_zero_std_frac": 0.18333333134651184, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 4.93456042249818e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 4.93456042249818e-07, "step": 620 }, { "calibration/aurc": 0.15336518701159388, "calibration/batch_distribution_entropy": 0.6800076074158324, "calibration/batch_entropy_100bins": 0.3622864830464606, "calibration/batch_entropy_10bins": 0.6800076074158324, "calibration/batch_entropy_50bins": 0.4264777860961084, "calibration/batch_uniqueness": 0.5138020531346246, "calibration/confidence_entropy": 0.5045679523521033, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.06631853785900783, "calibration/coverage@10%": 0.3418826731392584, "calibration/coverage@15%": 0.5271101989576695, "calibration/coverage@20%": 0.5976153626398852, "calibration/coverage@25%": 0.8201260253695442, "calibration/coverage@30%": 0.9593709203655353, "calibration/coverage@5%": 0.17774225319175183, "calibration/distribution_entropy_10": 0.6800076074158324, "calibration/distribution_entropy_100": 0.3622864830464606, "calibration/ece": 0.09629617928490819, "calibration/mean_confidence": 0.6966044761549471, "calibration/unique_confidence_per_question": 0.020833333333333336, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002343749999999978, "completions/max_length": 3773.8, "completions/max_terminated_length": 3773.8, "completions/mean_length": 1026.7007934570313, "completions/mean_terminated_length": 1029.1406860351562, "completions/min_length": 0.0, "completions/min_terminated_length": 326.8, "epoch": 1.5024038461538463, "grad_norm": 0.00045041058911010623, "learning_rate": 2.4939903846153847e-06, "loss": -0.0015, "num_tokens": 1587079484.0, "reward": 1.3066246509552002, "reward_std": 0.0912088081240654, "rewards/accuracy_reward": 0.7471354246139527, "rewards/brier_reward": 0.8684430360794068, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.997656238079071, "rewards/mean_confidence_reward": 0.728438937664032, "sampling/batch_mean_priority_error": 0.018874999999999982, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.24722222222222223, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.0021893881261348725, "sampling/priority_kl": 0.030000680685043336, "sampling/priority_scale": 0.7427042663795873, "sampling/prob_entropy": 10.278961181640625, "sampling/prob_max": 5.23165290360339e-05, "sampling/prob_min": 2.0047180805704555e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.495199990272522, "sampling/prompt_draws_total": 44856.0, "sampling/seen_fraction": 0.8559533357620239, "sampling/unseen_fraction": 0.14404666423797607, "signal/accuracy_reward/centered_abs_mean": 0.1004937082529068, "signal/accuracy_reward/group_std_mean": 0.1350753501057625, "signal/accuracy_reward/group_zero_std_frac": 0.6055555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0502468541264534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0502468541264534, "signal/advantage_abs_mean": 0.06540203243494033, "signal/advantage_pre_scale_abs_mean": 0.06540203243494033, "signal/advantage_pre_scale_std": 0.13887231945991516, "signal/advantage_std": 0.13887231945991516, "signal/brier_reward/centered_abs_mean": 0.05391479507088661, "signal/brier_reward/group_std_mean": 0.07308482676744461, "signal/brier_reward/group_zero_std_frac": 0.19722222685813903, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026957397535443305, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.026957397535443305, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.004258897609543055, "signal/format_reward/group_std_mean": 0.009096213802695274, "signal/format_reward/group_zero_std_frac": 0.9583333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021294488047715276, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021294488047715276, "signal/mean_confidence_reward/centered_abs_mean": 0.0523322694003582, "signal/mean_confidence_reward/group_std_mean": 0.06916438788175583, "signal/mean_confidence_reward/group_zero_std_frac": 0.19722222685813903, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.233226659129286e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.233226659129286e-07, "step": 625 }, { "calibration/aurc": 0.12000735705395464, "calibration/batch_distribution_entropy": 0.6255764616425743, "calibration/batch_entropy_100bins": 0.3354601150908449, "calibration/batch_entropy_10bins": 0.6255764616425743, "calibration/batch_entropy_50bins": 0.3948982197857547, "calibration/batch_uniqueness": 0.4280289172596209, "calibration/confidence_entropy": 0.47212258337113344, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.21232513398378453, "calibration/coverage@10%": 0.38557459862122667, "calibration/coverage@15%": 0.48163925438596494, "calibration/coverage@20%": 0.8848291993037425, "calibration/coverage@25%": 0.9450713148733453, "calibration/coverage@30%": 0.9932291666666668, "calibration/coverage@5%": 0.3062012304750126, "calibration/distribution_entropy_10": 0.6255764616425743, "calibration/distribution_entropy_100": 0.3354601150908449, "calibration/ece": 0.10917860213998729, "calibration/mean_confidence": 0.7486235025901614, "calibration/unique_confidence_per_question": 0.01875, "calibration/unique_confidences": 7.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002777777777777768, "completions/max_length": 3631.8, "completions/max_terminated_length": 3631.8, "completions/mean_length": 1021.059814453125, "completions/mean_terminated_length": 1023.94638671875, "completions/min_length": 0.0, "completions/min_terminated_length": 284.0, "epoch": 1.5144230769230769, "grad_norm": 0.0005333871231414378, "learning_rate": 2.463942307692308e-06, "loss": -0.0021, "num_tokens": 1601977741.0, "reward": 1.3167559146881103, "reward_std": 0.0949985533952713, "rewards/accuracy_reward": 0.7634548664093017, "rewards/brier_reward": 0.8730803966522217, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9969618082046509, "rewards/mean_confidence_reward": 0.7371064901351929, "sampling/batch_mean_priority_error": 0.014197530864197522, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.22777777777777777, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.002204936416819692, "sampling/priority_kl": 0.02999979294836521, "sampling/priority_scale": 0.7445269406074658, "sampling/prob_entropy": 10.278949737548828, "sampling/prob_max": 5.25149138411507e-05, "sampling/prob_min": 2.0075886277481912e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.507200002670288, "sampling/prompt_draws_total": 45216.0, "sampling/seen_fraction": 0.8587599992752075, "sampling/unseen_fraction": 0.14124000072479248, "signal/accuracy_reward/centered_abs_mean": 0.09511176496744156, "signal/accuracy_reward/group_std_mean": 0.12550814002752303, "signal/accuracy_reward/group_zero_std_frac": 0.6333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04755588248372078, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04755588248372078, "signal/advantage_abs_mean": 0.06778244376182556, "signal/advantage_pre_scale_abs_mean": 0.06778244376182556, "signal/advantage_pre_scale_std": 0.14562630653381348, "signal/advantage_std": 0.14562630653381348, "signal/brier_reward/centered_abs_mean": 0.055177373439073564, "signal/brier_reward/group_std_mean": 0.07687650173902512, "signal/brier_reward/group_zero_std_frac": 0.2, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027588686719536782, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.027588686719536782, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008083767141215503, "signal/confidence_one_or_zero/group_std_mean": 0.0018047165125608445, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.083766545041726e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.083766545041726e-09, "signal/format_reward/centered_abs_mean": 0.005821397621184587, "signal/format_reward/group_std_mean": 0.015639285184443, "signal/format_reward/group_zero_std_frac": 0.9166666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0029106988105922936, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0029106988105922936, "signal/mean_confidence_reward/centered_abs_mean": 0.053772156685590745, "signal/mean_confidence_reward/group_std_mean": 0.0706539012491703, "signal/mean_confidence_reward/group_zero_std_frac": 0.20555555820465088, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.377215302360127e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.377215302360127e-07, "step": 630 }, { "calibration/aurc": 0.06758576087491405, "calibration/batch_distribution_entropy": 0.6739458670298898, "calibration/batch_entropy_100bins": 0.36148430969206863, "calibration/batch_entropy_10bins": 0.6739458670298898, "calibration/batch_entropy_50bins": 0.42553348060237434, "calibration/batch_uniqueness": 0.507114894490273, "calibration/confidence_entropy": 0.4857174340818884, "calibration/coverage@0%": 0.12276725871313672, "calibration/coverage@1%": 0.2607880920464701, "calibration/coverage@10%": 0.7569473367679553, "calibration/coverage@15%": 0.8394321061326202, "calibration/coverage@20%": 0.9129786771105308, "calibration/coverage@25%": 0.9682291666666668, "calibration/coverage@30%": 0.9682291666666668, "calibration/coverage@5%": 0.5899399641137999, "calibration/distribution_entropy_10": 0.6739458670298898, "calibration/distribution_entropy_100": 0.36148430969206863, "calibration/ece": 0.14519666110659998, "calibration/mean_confidence": 0.7185335590463714, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003906249999999978, "completions/max_length": 3880.8, "completions/max_terminated_length": 3880.8, "completions/mean_length": 1042.0560668945313, "completions/mean_terminated_length": 1046.1614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 307.2, "epoch": 1.5264423076923077, "grad_norm": 0.0005072118365205824, "learning_rate": 2.433894230769231e-06, "loss": -0.0038, "num_tokens": 1617074483.0, "reward": 1.3064142227172852, "reward_std": 0.0989925280213356, "rewards/accuracy_reward": 0.7481770873069763, "rewards/brier_reward": 0.868543004989624, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9960937619209289, "rewards/mean_confidence_reward": 0.729508101940155, "sampling/batch_mean_priority_error": 0.016961419753086404, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.2416666666666667, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.002223148616030812, "sampling/priority_kl": 0.030000782012939452, "sampling/priority_scale": 0.7464619219535962, "sampling/prob_entropy": 10.2789737701416, "sampling/prob_max": 5.2718666120199485e-05, "sampling/prob_min": 2.0103476708754896e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5192000150680542, "sampling/prompt_draws_total": 45576.0, "sampling/seen_fraction": 0.8615866661071777, "sampling/unseen_fraction": 0.13841333389282226, "signal/accuracy_reward/centered_abs_mean": 0.09907226413488388, "signal/accuracy_reward/group_std_mean": 0.1344002977013588, "signal/accuracy_reward/group_zero_std_frac": 0.597222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04953613206744194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04953613206744194, "signal/advantage_abs_mean": 0.06925319880247116, "signal/advantage_pre_scale_abs_mean": 0.06925319880247116, "signal/advantage_pre_scale_std": 0.14847098886966706, "signal/advantage_std": 0.14847098886966706, "signal/brier_reward/centered_abs_mean": 0.05832105129957199, "signal/brier_reward/group_std_mean": 0.08077382892370225, "signal/brier_reward/group_zero_std_frac": 0.1972222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029160525649785995, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.029160525649785995, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.00697157122194767, "signal/format_reward/group_std_mean": 0.01599796488881111, "signal/format_reward/group_zero_std_frac": 0.9222222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003485785610973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003485785610973835, "signal/mean_confidence_reward/centered_abs_mean": 0.05409794896841049, "signal/mean_confidence_reward/group_std_mean": 0.07281152158975601, "signal/mean_confidence_reward/group_zero_std_frac": 0.2, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.409794880506525e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.409794880506525e-07, "step": 635 }, { "calibration/aurc": 0.1493596396212617, "calibration/batch_distribution_entropy": 0.6154120129307172, "calibration/batch_entropy_100bins": 0.327493360904968, "calibration/batch_entropy_10bins": 0.6154120129307172, "calibration/batch_entropy_50bins": 0.3855198856591432, "calibration/batch_uniqueness": 0.38064147099576423, "calibration/confidence_entropy": 0.4642811581171026, "calibration/coverage@0%": 0.0005208333333333333, "calibration/coverage@1%": 0.0005208333333333333, "calibration/coverage@10%": 0.39853397241148086, "calibration/coverage@15%": 0.4210599270674741, "calibration/coverage@20%": 0.6583795691906005, "calibration/coverage@25%": 0.7992833442123586, "calibration/coverage@30%": 0.9686684073107049, "calibration/coverage@5%": 0.23565933853804272, "calibration/distribution_entropy_10": 0.6154120129307172, "calibration/distribution_entropy_100": 0.327493360904968, "calibration/ece": 0.11085883086933453, "calibration/mean_confidence": 0.7440802817524015, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666674, "completions/max_length": 3869.0, "completions/max_terminated_length": 3869.0, "completions/mean_length": 1044.1888916015625, "completions/mean_terminated_length": 1048.6419189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 344.0, "epoch": 1.5384615384615383, "grad_norm": 0.0005268683307804167, "learning_rate": 2.403846153846154e-06, "loss": -0.0053, "num_tokens": 1632200083.0, "reward": 1.2998849391937255, "reward_std": 0.10648185312747956, "rewards/accuracy_reward": 0.7403645873069763, "rewards/brier_reward": 0.863817822933197, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9955729126930237, "rewards/mean_confidence_reward": 0.7254003286361694, "sampling/batch_mean_priority_error": 0.015239513888888875, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.23055555555555554, "sampling/error_ema_max": 0.1524374932050705, "sampling/error_ema_mean": 0.002240751124918461, "sampling/priority_kl": 0.030000269040465356, "sampling/priority_scale": 0.7487170279258862, "sampling/prob_entropy": 10.27895679473877, "sampling/prob_max": 5.293711947160773e-05, "sampling/prob_min": 2.012807090068236e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5312000036239624, "sampling/prompt_draws_total": 45936.0, "sampling/seen_fraction": 0.8645133256912232, "sampling/unseen_fraction": 0.13548667430877687, "signal/accuracy_reward/centered_abs_mean": 0.1005154088139534, "signal/accuracy_reward/group_std_mean": 0.13976921439170836, "signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0502577044069767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0502577044069767, "signal/advantage_abs_mean": 0.07303778156638145, "signal/advantage_pre_scale_abs_mean": 0.07303778156638145, "signal/advantage_pre_scale_std": 0.15646542310714723, "signal/advantage_std": 0.15646542310714723, "signal/brier_reward/centered_abs_mean": 0.06378994956612587, "signal/brier_reward/group_std_mean": 0.08879801481962205, "signal/brier_reward/group_zero_std_frac": 0.2194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031894974783062933, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031894974783062933, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.00820855046622455, "signal/format_reward/group_std_mean": 0.01877419650554657, "signal/format_reward/group_zero_std_frac": 0.9111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004104275233112275, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004104275233112275, "signal/mean_confidence_reward/centered_abs_mean": 0.05598941519856453, "signal/mean_confidence_reward/group_std_mean": 0.07363492101430893, "signal/mean_confidence_reward/group_zero_std_frac": 0.23333333432674408, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.598941356765863e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.598941356765863e-07, "step": 640 }, { "calibration/aurc": 0.10121633485801577, "calibration/batch_distribution_entropy": 0.647102852218405, "calibration/batch_entropy_100bins": 0.3475252170040523, "calibration/batch_entropy_10bins": 0.647102852218405, "calibration/batch_entropy_50bins": 0.40910106254626905, "calibration/batch_uniqueness": 0.4184239848201855, "calibration/confidence_entropy": 0.47056694558071294, "calibration/coverage@0%": 0.12291666666666667, "calibration/coverage@1%": 0.2486727589208007, "calibration/coverage@10%": 0.615986727589208, "calibration/coverage@15%": 0.6795583115752828, "calibration/coverage@20%": 0.8546154264577893, "calibration/coverage@25%": 0.8957612597911228, "calibration/coverage@30%": 0.9583333333333333, "calibration/coverage@5%": 0.5100195822454309, "calibration/distribution_entropy_10": 0.647102852218405, "calibration/distribution_entropy_100": 0.3475252170040523, "calibration/ece": 0.11775878481288057, "calibration/mean_confidence": 0.7193620811575283, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111116, "completions/max_length": 3893.4, "completions/max_terminated_length": 3893.4, "completions/mean_length": 1168.6723388671876, "completions/mean_terminated_length": 1175.3999267578124, "completions/min_length": 0.0, "completions/min_terminated_length": 342.4, "epoch": 1.5504807692307692, "grad_norm": 0.0004638670652639121, "learning_rate": 2.373798076923077e-06, "loss": -0.0056, "num_tokens": 1648768276.0, "reward": 1.2972732782363892, "reward_std": 0.10741431713104248, "rewards/accuracy_reward": 0.7346354246139526, "rewards/brier_reward": 0.8657999157905578, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9940972328186035, "rewards/mean_confidence_reward": 0.703185749053955, "sampling/batch_mean_priority_error": 0.01578472222222221, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.21944444444444441, "sampling/error_ema_max": 0.15972749292850494, "sampling/error_ema_mean": 0.002255667420104146, "sampling/priority_kl": 0.02999972328543663, "sampling/priority_scale": 0.7503741204505786, "sampling/prob_entropy": 10.278932762145995, "sampling/prob_max": 5.313203946570866e-05, "sampling/prob_min": 2.015899335674476e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5432000160217285, "sampling/prompt_draws_total": 46296.0, "sampling/seen_fraction": 0.8670133352279663, "sampling/unseen_fraction": 0.1329866647720337, "signal/accuracy_reward/centered_abs_mean": 0.10655924528837205, "signal/accuracy_reward/group_std_mean": 0.14632892608642578, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05327962264418602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05327962264418602, "signal/advantage_abs_mean": 0.07435599118471145, "signal/advantage_pre_scale_abs_mean": 0.07435599118471145, "signal/advantage_pre_scale_std": 0.156350177526474, "signal/advantage_std": 0.156350177526474, "signal/brier_reward/centered_abs_mean": 0.06651555225253106, "signal/brier_reward/group_std_mean": 0.09179123044013977, "signal/brier_reward/group_zero_std_frac": 0.21111110746860504, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03325777612626553, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03325777612626553, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.010601128358393908, "signal/format_reward/group_std_mean": 0.022472953796386717, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005300564179196954, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005300564179196954, "signal/mean_confidence_reward/centered_abs_mean": 0.062262913584709166, "signal/mean_confidence_reward/group_std_mean": 0.08256838023662567, "signal/mean_confidence_reward/group_zero_std_frac": 0.2194444417953491, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.226291361599578e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.226291361599578e-07, "step": 645 }, { "calibration/aurc": 0.13022814826365287, "calibration/batch_distribution_entropy": 0.6004951907444687, "calibration/batch_entropy_100bins": 0.3225284149216801, "calibration/batch_entropy_10bins": 0.6004951907444687, "calibration/batch_entropy_50bins": 0.3796752315797697, "calibration/batch_uniqueness": 0.33640241705561535, "calibration/confidence_entropy": 0.4576013413890502, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.16230366492146597, "calibration/coverage@10%": 0.4286458333333334, "calibration/coverage@15%": 0.5036458333333333, "calibration/coverage@20%": 0.7052083333333334, "calibration/coverage@25%": 0.9473113001745201, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.40098985602094245, "calibration/distribution_entropy_10": 0.6004951907444687, "calibration/distribution_entropy_100": 0.3225284149216801, "calibration/ece": 0.1170646542321117, "calibration/mean_confidence": 0.754144169938918, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006249999999999978, "completions/max_length": 3870.4, "completions/max_terminated_length": 3870.4, "completions/mean_length": 1101.4054077148437, "completions/mean_terminated_length": 1108.5246948242188, "completions/min_length": 0.0, "completions/min_terminated_length": 342.4, "epoch": 1.5625, "grad_norm": 0.0004739153664559126, "learning_rate": 2.3437500000000002e-06, "loss": -0.0062, "num_tokens": 1664519922.0, "reward": 1.3281028270721436, "reward_std": 0.09923821091651916, "rewards/accuracy_reward": 0.7807291626930237, "rewards/brier_reward": 0.8817115902900696, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.99375, "rewards/mean_confidence_reward": 0.7447699666023254, "sampling/batch_mean_priority_error": 0.015923611111111097, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20277777777777778, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0022721522953361275, "sampling/priority_kl": 0.029999472200870514, "sampling/priority_scale": 0.7521035611396656, "sampling/prob_entropy": 10.278953170776367, "sampling/prob_max": 5.332994842319749e-05, "sampling/prob_min": 2.0186370602459646e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5552000045776366, "sampling/prompt_draws_total": 46656.0, "sampling/seen_fraction": 0.8695200085639954, "sampling/unseen_fraction": 0.13047999143600464, "signal/accuracy_reward/centered_abs_mean": 0.09236111044883728, "signal/accuracy_reward/group_std_mean": 0.12668517976999283, "signal/accuracy_reward/group_zero_std_frac": 0.6166666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04618055522441864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04618055522441864, "signal/advantage_abs_mean": 0.0662669561803341, "signal/advantage_pre_scale_abs_mean": 0.0662669561803341, "signal/advantage_pre_scale_std": 0.15250126719474794, "signal/advantage_std": 0.15250126719474794, "signal/brier_reward/centered_abs_mean": 0.061259324103593825, "signal/brier_reward/group_std_mean": 0.08748518377542495, "signal/brier_reward/group_zero_std_frac": 0.25833334028720856, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030629662051796912, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.030629662051796912, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.011512586940079927, "signal/format_reward/group_std_mean": 0.025058790668845178, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057562934700399635, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0057562934700399635, "signal/mean_confidence_reward/centered_abs_mean": 0.05550645664334297, "signal/mean_confidence_reward/group_std_mean": 0.07544975653290749, "signal/mean_confidence_reward/group_zero_std_frac": 0.2694444447755814, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.550645596485992e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.550645596485992e-07, "step": 650 }, { "epoch": 1.5625, "eval_calibration/aurc": 0.1271157797067916, "eval_calibration/batch_distribution_entropy": 0.6324314643752337, "eval_calibration/batch_entropy_100bins": 0.3458577555472716, "eval_calibration/batch_entropy_10bins": 0.6324314643752337, "eval_calibration/batch_entropy_50bins": 0.4071381538986471, "eval_calibration/batch_uniqueness": 0.38409579930558946, "eval_calibration/confidence_entropy": 0.45110729568686725, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5148601398601399, "eval_calibration/coverage@15%": 0.7508741258741258, "eval_calibration/coverage@20%": 0.791083916083916, "eval_calibration/coverage@25%": 0.9475524475524476, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6324314643752337, "eval_calibration/distribution_entropy_100": 0.3458577555472716, "eval_calibration/ece": 0.026005244755244586, "eval_calibration/mean_confidence": 0.7260052447552449, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 3015.1666666666665, "eval_completions/max_terminated_length": 3015.1666666666665, "eval_completions/mean_length": 1108.1156412760417, "eval_completions/mean_terminated_length": 1114.9664103190105, "eval_completions/min_length": 92.0, "eval_completions/min_terminated_length": 413.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 1664519922.0, "eval_reward": 1.2852014501889546, "eval_reward_std": 0.30990825096766156, "eval_rewards/accuracy_reward": 0.723090281089147, "eval_rewards/brier_reward": 0.8542426228523254, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9930555621782938, "eval_rewards/mean_confidence_reward": 0.7209634979565939, "eval_runtime": 211.902, "eval_samples_per_second": 4.719, "eval_signal/accuracy_reward/centered_abs_mean": 0.3867730001608531, "eval_signal/accuracy_reward/group_std_mean": 0.44401001930236816, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19338650008042654, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19338650008042654, "eval_signal/advantage_abs_mean": 0.25779375185569126, "eval_signal/advantage_pre_scale_abs_mean": 0.25779375185569126, "eval_signal/advantage_pre_scale_std": 0.30922891199588776, "eval_signal/advantage_std": 0.30922891199588776, "eval_signal/brier_reward/centered_abs_mean": 0.16802789767583212, "eval_signal/brier_reward/group_std_mean": 0.2282408873240153, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08401394883791606, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08401394883791606, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.013454860697189966, "eval_signal/format_reward/group_std_mean": 0.03928371022144953, "eval_signal/format_reward/group_zero_std_frac": 0.7777778108914694, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430348594983, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430348594983, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.20234372218449911, "eval_signal/mean_confidence_reward/group_std_mean": 0.24267150461673737, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.0234372755112418e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.0234372755112418e-06, "eval_steps_per_second": 0.028, "step": 650 }, { "epoch": 1.5625, "step": 650, "train_probe_calibration/aurc": 0.09629671541450747, "train_probe_calibration/batch_distribution_entropy": 0.6276142254740842, "train_probe_calibration/batch_entropy_100bins": 0.34791016850026435, "train_probe_calibration/batch_entropy_10bins": 0.6276142254740842, "train_probe_calibration/batch_entropy_50bins": 0.4095542212191468, "train_probe_calibration/batch_uniqueness": 0.36119736906450195, "train_probe_calibration/confidence_entropy": 0.4449701838043957, "train_probe_calibration/coverage@0%": 0.004370629370629371, "train_probe_calibration/coverage@1%": 0.004370629370629371, "train_probe_calibration/coverage@10%": 0.6704545454545454, "train_probe_calibration/coverage@15%": 0.8041958041958042, "train_probe_calibration/coverage@20%": 0.8767482517482518, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.004370629370629371, "train_probe_calibration/distribution_entropy_10": 0.6276142254740842, "train_probe_calibration/distribution_entropy_100": 0.34791016850026435, "train_probe_calibration/ece": 0.052840909090908994, "train_probe_calibration/mean_confidence": 0.7326486013986014, "train_probe_calibration/unique_confidence_per_question": 0.010416666666666666, "train_probe_calibration/unique_confidences": 12, "train_probe_completions/clipped_ratio": 0.006944444444444438, "train_probe_completions/max_length": 3299.8333333333335, "train_probe_completions/max_terminated_length": 3299.8333333333335, "train_probe_completions/mean_length": 1149.763203938802, "train_probe_completions/mean_terminated_length": 1157.6914672851562, "train_probe_completions/min_length": 59.0, "train_probe_completions/min_terminated_length": 355.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 1664519922.0, "train_probe_reward": 1.307287057240804, "train_probe_reward_std": 0.29700932403405506, "train_probe_rewards/accuracy_reward": 0.7491319278875986, "train_probe_rewards/brier_reward": 0.8723719517389933, "train_probe_rewards/confidence_one_or_zero": 0.0026041667442768812, "train_probe_rewards/format_reward": 0.9930555522441864, "train_probe_rewards/mean_confidence_reward": 0.7275607585906982, "train_probe_runtime": 209.4521, "train_probe_samples_per_second": 4.774, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3669162342945735, "train_probe_signal/accuracy_reward/group_std_mean": 0.4330385575691859, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18345811714728674, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18345811714728674, "train_probe_signal/advantage_abs_mean": 0.24020937830209732, "train_probe_signal/advantage_pre_scale_abs_mean": 0.24020937830209732, "train_probe_signal/advantage_pre_scale_std": 0.29617064197858173, "train_probe_signal/advantage_std": 0.29617064197858173, "train_probe_signal/brier_reward/centered_abs_mean": 0.14779583364725113, "train_probe_signal/brier_reward/group_std_mean": 0.20587429155906042, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07389791682362556, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07389791682362556, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0050455727614462376, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.014731391333043575, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.9166666766007742, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453005e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453005e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.013454860697189966, "train_probe_signal/format_reward/group_std_mean": 0.03928371022144953, "train_probe_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430348594983, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006727430348594983, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.19929740081230798, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.24239403754472733, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.992973921005614e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.992973921005614e-06, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.10075111118953009, "calibration/batch_distribution_entropy": 0.5685699983151254, "calibration/batch_entropy_100bins": 0.31157743001517346, "calibration/batch_entropy_10bins": 0.5685699983151254, "calibration/batch_entropy_50bins": 0.3667839093332813, "calibration/batch_uniqueness": 0.2724217686970425, "calibration/confidence_entropy": 0.4463080918393615, "calibration/coverage@0%": 0.11823734729493893, "calibration/coverage@1%": 0.11823734729493893, "calibration/coverage@10%": 0.5741792102966841, "calibration/coverage@15%": 0.8676347076788831, "calibration/coverage@20%": 0.9130890052356021, "calibration/coverage@25%": 0.9455497382198953, "calibration/coverage@30%": 0.9748691099476441, "calibration/coverage@5%": 0.2265570462478185, "calibration/distribution_entropy_10": 0.5685699983151254, "calibration/distribution_entropy_100": 0.31157743001517346, "calibration/ece": 0.10682209860383933, "calibration/mean_confidence": 0.7507646160558464, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0052951388888889065, "completions/max_length": 4049.8, "completions/max_terminated_length": 4049.8, "completions/mean_length": 1132.967041015625, "completions/mean_terminated_length": 1139.044775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 344.8, "epoch": 1.5745192307692308, "grad_norm": 0.0005549096968024969, "learning_rate": 2.3137019230769236e-06, "loss": -0.006, "num_tokens": 1680651318.0, "reward": 1.3247023105621338, "reward_std": 0.11366159170866012, "rewards/accuracy_reward": 0.7790798664093017, "rewards/brier_reward": 0.875691819190979, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9946180582046509, "rewards/mean_confidence_reward": 0.7438610911369323, "sampling/batch_mean_priority_error": 0.010328194444444435, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.25555555555555554, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0022848639637231827, "sampling/priority_kl": 0.030000000447034835, "sampling/priority_scale": 0.7543738663429395, "sampling/prob_entropy": 10.278957939147949, "sampling/prob_max": 5.354871318559162e-05, "sampling/prob_min": 2.0208099886076526e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.567199993133545, "sampling/prompt_draws_total": 47016.0, "sampling/seen_fraction": 0.8722799897193909, "sampling/unseen_fraction": 0.12772001028060914, "signal/accuracy_reward/centered_abs_mean": 0.11072590947151184, "signal/accuracy_reward/group_std_mean": 0.1466132655739784, "signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05536295473575592, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05536295473575592, "signal/advantage_abs_mean": 0.08102467954158783, "signal/advantage_pre_scale_abs_mean": 0.08102467954158783, "signal/advantage_pre_scale_std": 0.1670161157846451, "signal/advantage_std": 0.1670161157846451, "signal/brier_reward/centered_abs_mean": 0.07082972005009651, "signal/brier_reward/group_std_mean": 0.09686807245016098, "signal/brier_reward/group_zero_std_frac": 0.25555555820465087, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035414860025048255, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035414860025048255, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.009939236287027597, "signal/format_reward/group_std_mean": 0.022142794728279114, "signal/format_reward/group_zero_std_frac": 0.8972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004969618143513798, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004969618143513798, "signal/mean_confidence_reward/centered_abs_mean": 0.062287604063749315, "signal/mean_confidence_reward/group_std_mean": 0.08283003866672516, "signal/mean_confidence_reward/group_zero_std_frac": 0.27222222089767456, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.22875973022019e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.22875973022019e-07, "step": 655 }, { "calibration/aurc": 0.0863774517347887, "calibration/batch_distribution_entropy": 0.5152594103145469, "calibration/batch_entropy_100bins": 0.2818539806355876, "calibration/batch_entropy_10bins": 0.5152594103145469, "calibration/batch_entropy_50bins": 0.3317939456450146, "calibration/batch_uniqueness": 0.12971007215826963, "calibration/confidence_entropy": 0.4175701939998537, "calibration/coverage@0%": 0.004699753142956087, "calibration/coverage@1%": 0.2383308997922163, "calibration/coverage@10%": 0.6815367990148502, "calibration/coverage@15%": 0.7087168189730201, "calibration/coverage@20%": 0.8961352262837249, "calibration/coverage@25%": 0.9514360313315928, "calibration/coverage@30%": 0.9791122715404701, "calibration/coverage@5%": 0.47374620658072814, "calibration/distribution_entropy_10": 0.5152594103145469, "calibration/distribution_entropy_100": 0.2818539806355876, "calibration/ece": 0.10784367922937763, "calibration/mean_confidence": 0.7653679821743472, "calibration/unique_confidence_per_question": 0.025520833333333336, "calibration/unique_confidences": 9.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00546875, "completions/max_length": 3719.0, "completions/max_terminated_length": 3719.0, "completions/mean_length": 1113.911572265625, "completions/mean_terminated_length": 1120.09814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 338.6, "epoch": 1.5865384615384617, "grad_norm": 0.000522863061632961, "learning_rate": 2.283653846153846e-06, "loss": -0.0064, "num_tokens": 1696576859.0, "reward": 1.3091982364654542, "reward_std": 0.10364175438880921, "rewards/accuracy_reward": 0.7514756917953491, "rewards/brier_reward": 0.872548270225525, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9943576455116272, "rewards/mean_confidence_reward": 0.7404010176658631, "sampling/batch_mean_priority_error": 0.018774722222222213, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20833333333333334, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0023031047079712153, "sampling/priority_kl": 0.029999990016222, "sampling/priority_scale": 0.756784874224104, "sampling/prob_entropy": 10.278948020935058, "sampling/prob_max": 5.3775295964442196e-05, "sampling/prob_min": 2.0230506925145165e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.579199981689453, "sampling/prompt_draws_total": 47376.0, "sampling/seen_fraction": 0.8750333428382874, "sampling/unseen_fraction": 0.12496665716171265, "signal/accuracy_reward/centered_abs_mean": 0.0965549036860466, "signal/accuracy_reward/group_std_mean": 0.1316729962825775, "signal/accuracy_reward/group_zero_std_frac": 0.6027777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0482774518430233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0482774518430233, "signal/advantage_abs_mean": 0.07157290279865265, "signal/advantage_pre_scale_abs_mean": 0.07157290279865265, "signal/advantage_pre_scale_std": 0.15894106924533843, "signal/advantage_std": 0.15894106924533843, "signal/brier_reward/centered_abs_mean": 0.06766726225614547, "signal/brier_reward/group_std_mean": 0.09276285767555237, "signal/brier_reward/group_zero_std_frac": 0.29722222983837127, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033833631128072736, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033833631128072736, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.010508897621184588, "signal/format_reward/group_std_mean": 0.024353000894188882, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005254448810592294, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005254448810592294, "signal/mean_confidence_reward/centered_abs_mean": 0.057074010372161865, "signal/mean_confidence_reward/group_std_mean": 0.07592665776610374, "signal/mean_confidence_reward/group_zero_std_frac": 0.3027777820825577, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.707401101062714e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.707401101062714e-07, "step": 660 }, { "calibration/aurc": 0.12276055299354609, "calibration/batch_distribution_entropy": 0.5013685251115868, "calibration/batch_entropy_100bins": 0.27919717568735347, "calibration/batch_entropy_10bins": 0.5013685251115868, "calibration/batch_entropy_50bins": 0.3286663978466972, "calibration/batch_uniqueness": 0.1017443592414641, "calibration/confidence_entropy": 0.41964784067908434, "calibration/coverage@0%": 0.006303522885504853, "calibration/coverage@1%": 0.24174120324829168, "calibration/coverage@10%": 0.42533609125158983, "calibration/coverage@15%": 0.6526996167428637, "calibration/coverage@20%": 0.7504232809130803, "calibration/coverage@25%": 0.8795194169072108, "calibration/coverage@30%": 0.954172378872531, "calibration/coverage@5%": 0.3723595899461068, "calibration/distribution_entropy_10": 0.5013685251115868, "calibration/distribution_entropy_100": 0.27919717568735347, "calibration/ece": 0.12758788380365582, "calibration/mean_confidence": 0.7620131446358779, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0046875, "completions/max_length": 3796.8, "completions/max_terminated_length": 3796.8, "completions/mean_length": 1092.1130493164062, "completions/mean_terminated_length": 1097.2896240234375, "completions/min_length": 0.0, "completions/min_terminated_length": 324.4, "epoch": 1.5985576923076923, "grad_norm": 0.0005642919568344951, "learning_rate": 2.2536057692307694e-06, "loss": -0.0051, "num_tokens": 1712260593.0, "reward": 1.3006659269332885, "reward_std": 0.12148597985506057, "rewards/accuracy_reward": 0.7433159828186036, "rewards/brier_reward": 0.8627753138542176, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.995225703716278, "rewards/mean_confidence_reward": 0.7463836789131164, "sampling/batch_mean_priority_error": 0.014619930555555546, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18055555555555552, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0023194745648652316, "sampling/priority_kl": 0.030000966414809226, "sampling/priority_scale": 0.7585157931083814, "sampling/prob_entropy": 10.278953742980956, "sampling/prob_max": 5.397429194999859e-05, "sampling/prob_min": 2.026002111961134e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.5911999940872192, "sampling/prompt_draws_total": 47736.0, "sampling/seen_fraction": 0.8773533344268799, "sampling/unseen_fraction": 0.12264666557312012, "signal/accuracy_reward/centered_abs_mean": 0.1187445729970932, "signal/accuracy_reward/group_std_mean": 0.15946634411811828, "signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0593722864985466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0593722864985466, "signal/advantage_abs_mean": 0.0857117235660553, "signal/advantage_pre_scale_abs_mean": 0.0857117235660553, "signal/advantage_pre_scale_std": 0.1751396745443344, "signal/advantage_std": 0.1751396745443344, "signal/brier_reward/centered_abs_mean": 0.07548005133867264, "signal/brier_reward/group_std_mean": 0.10238111168146133, "signal/brier_reward/group_zero_std_frac": 0.22222222685813903, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03774002566933632, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03774002566933632, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008409288129769266, "signal/confidence_one_or_zero/group_std_mean": 0.0024552317336201668, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.409287488575501e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.409287488575501e-09, "signal/format_reward/centered_abs_mean": 0.008805338572710753, "signal/format_reward/group_std_mean": 0.021177830919623376, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004402669286355377, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004402669286355377, "signal/mean_confidence_reward/centered_abs_mean": 0.06224251911044121, "signal/mean_confidence_reward/group_std_mean": 0.0842223346233368, "signal/mean_confidence_reward/group_zero_std_frac": 0.23611111640930177, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.224251819730853e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.224251819730853e-07, "step": 665 }, { "calibration/aurc": 0.14450476197851136, "calibration/batch_distribution_entropy": 0.5765642673767126, "calibration/batch_entropy_100bins": 0.32623106838479715, "calibration/batch_entropy_10bins": 0.5765642673767126, "calibration/batch_entropy_50bins": 0.3840339353281199, "calibration/batch_uniqueness": 0.3014844645350637, "calibration/confidence_entropy": 0.44685345164390433, "calibration/coverage@0%": 0.10156524122777513, "calibration/coverage@1%": 0.24797121854016604, "calibration/coverage@10%": 0.39848095112594883, "calibration/coverage@15%": 0.526121066747444, "calibration/coverage@20%": 0.70951778967666, "calibration/coverage@25%": 0.736237360957404, "calibration/coverage@30%": 0.9067455000069071, "calibration/coverage@5%": 0.26001310335691996, "calibration/distribution_entropy_10": 0.5765642673767126, "calibration/distribution_entropy_100": 0.32623106838479715, "calibration/ece": 0.12060073742183959, "calibration/mean_confidence": 0.7040622262912196, "calibration/unique_confidence_per_question": 0.026041666666666668, "calibration/unique_confidences": 10.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008159722222222231, "completions/max_length": 3857.2, "completions/max_terminated_length": 3857.2, "completions/mean_length": 1086.3653076171875, "completions/mean_terminated_length": 1095.41630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 307.0, "epoch": 1.6105769230769231, "grad_norm": 0.0005110831116326153, "learning_rate": 2.2235576923076924e-06, "loss": -0.0096, "num_tokens": 1727855969.0, "reward": 1.2902185916900635, "reward_std": 0.11893133968114852, "rewards/accuracy_reward": 0.7259548664093017, "rewards/brier_reward": 0.8628010869026184, "rewards/confidence_one_or_zero": 0.0009548611182253808, "rewards/format_reward": 0.9916666626930237, "rewards/mean_confidence_reward": 0.7324783086776734, "sampling/batch_mean_priority_error": 0.013338541666666653, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20833333333333334, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0023346453439444304, "sampling/priority_kl": 0.030000185966491698, "sampling/priority_scale": 0.760347467684187, "sampling/prob_entropy": 10.278972244262695, "sampling/prob_max": 5.417778957053088e-05, "sampling/prob_min": 2.02883380552521e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6032000064849854, "sampling/prompt_draws_total": 48096.0, "sampling/seen_fraction": 0.8797000050544739, "sampling/unseen_fraction": 0.12029999494552612, "signal/accuracy_reward/centered_abs_mean": 0.11508788913488388, "signal/accuracy_reward/group_std_mean": 0.15065041184425354, "signal/accuracy_reward/group_zero_std_frac": 0.5777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05754394456744194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05754394456744194, "signal/advantage_abs_mean": 0.08531344085931777, "signal/advantage_pre_scale_abs_mean": 0.08531344085931777, "signal/advantage_pre_scale_std": 0.17639075517654418, "signal/advantage_std": 0.17639075517654418, "signal/brier_reward/centered_abs_mean": 0.0753116026520729, "signal/brier_reward/group_std_mean": 0.10187897086143494, "signal/brier_reward/group_zero_std_frac": 0.2527777820825577, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03765580132603645, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03765580132603645, "signal/confidence_one_or_zero/centered_abs_mean": 0.001719835086259991, "signal/confidence_one_or_zero/group_std_mean": 0.0033398654311895372, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.7198350121816475e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.7198350121816475e-08, "signal/format_reward/centered_abs_mean": 0.014876302052289247, "signal/format_reward/group_std_mean": 0.029834812879562377, "signal/format_reward/group_zero_std_frac": 0.8722222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007438151026144623, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007438151026144623, "signal/mean_confidence_reward/centered_abs_mean": 0.06751834452152253, "signal/mean_confidence_reward/group_std_mean": 0.08781284093856812, "signal/mean_confidence_reward/group_zero_std_frac": 0.26111111640930174, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.751834575879911e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.751834575879911e-07, "step": 670 }, { "calibration/aurc": 0.10970071896599873, "calibration/batch_distribution_entropy": 0.568013647591677, "calibration/batch_entropy_100bins": 0.3102414092567368, "calibration/batch_entropy_10bins": 0.568013647591677, "calibration/batch_entropy_50bins": 0.3652111673130844, "calibration/batch_uniqueness": 0.22665178946390013, "calibration/confidence_entropy": 0.4394298617068798, "calibration/coverage@0%": 0.0010443864229765013, "calibration/coverage@1%": 0.11925019117231687, "calibration/coverage@10%": 0.5128259821504217, "calibration/coverage@15%": 0.7708662648180452, "calibration/coverage@20%": 0.8658396625012432, "calibration/coverage@25%": 0.9119364882506528, "calibration/coverage@30%": 0.9583999999999999, "calibration/coverage@5%": 0.27560069804636217, "calibration/distribution_entropy_10": 0.568013647591677, "calibration/distribution_entropy_100": 0.3102414092567368, "calibration/ece": 0.07774484295633795, "calibration/mean_confidence": 0.736116044646728, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555559, "completions/max_length": 3540.6, "completions/max_terminated_length": 3540.6, "completions/mean_length": 1091.9043701171875, "completions/mean_terminated_length": 1105.053369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 341.6, "epoch": 1.6225961538461537, "grad_norm": 0.0005459992680698633, "learning_rate": 2.1935096153846157e-06, "loss": -0.0141, "num_tokens": 1743536499.0, "reward": 1.2934834003448485, "reward_std": 0.12105322629213333, "rewards/accuracy_reward": 0.7386284708976746, "rewards/brier_reward": 0.8602157592773437, "rewards/confidence_one_or_zero": 0.0006076389050576836, "rewards/format_reward": 0.9881076335906982, "rewards/mean_confidence_reward": 0.7474018931388855, "sampling/batch_mean_priority_error": 0.017633680555555545, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16388888888888892, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0023507375735789537, "sampling/priority_kl": 0.03000086210668087, "sampling/priority_scale": 0.7620475710136816, "sampling/prob_entropy": 10.27895736694336, "sampling/prob_max": 5.437384315882809e-05, "sampling/prob_min": 2.031473413808271e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6151999950408935, "sampling/prompt_draws_total": 48456.0, "sampling/seen_fraction": 0.8819133400917053, "sampling/unseen_fraction": 0.11808665990829467, "signal/accuracy_reward/centered_abs_mean": 0.10015733391046525, "signal/accuracy_reward/group_std_mean": 0.13727897256612778, "signal/accuracy_reward/group_zero_std_frac": 0.5833333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05007866695523262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05007866695523262, "signal/advantage_abs_mean": 0.07990130856633186, "signal/advantage_pre_scale_abs_mean": 0.07990130856633186, "signal/advantage_pre_scale_std": 0.1813565194606781, "signal/advantage_std": 0.1813565194606781, "signal/brier_reward/centered_abs_mean": 0.06943394243717194, "signal/brier_reward/group_std_mean": 0.09994765222072602, "signal/brier_reward/group_zero_std_frac": 0.2611111134290695, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03471697121858597, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03471697121858597, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011555989156477153, "signal/confidence_one_or_zero/group_std_mean": 0.002839442901313305, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1555989232192587e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1555989232192587e-08, "signal/format_reward/centered_abs_mean": 0.021370442770421504, "signal/format_reward/group_std_mean": 0.04681904092431068, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010685221385210752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010685221385210752, "signal/mean_confidence_reward/centered_abs_mean": 0.061398386210203174, "signal/mean_confidence_reward/group_std_mean": 0.08617035299539566, "signal/mean_confidence_reward/group_zero_std_frac": 0.2722222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.139838546914688e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.139838546914688e-07, "step": 675 }, { "calibration/aurc": 0.09587670960100644, "calibration/batch_distribution_entropy": 0.6175683596353149, "calibration/batch_entropy_100bins": 0.3329512515331028, "calibration/batch_entropy_10bins": 0.6175683596353149, "calibration/batch_entropy_50bins": 0.39194482619865295, "calibration/batch_uniqueness": 0.3871926237126157, "calibration/confidence_entropy": 0.46478713643106306, "calibration/coverage@0%": 0.0843501326259947, "calibration/coverage@1%": 0.0843501326259947, "calibration/coverage@10%": 0.5346998344195839, "calibration/coverage@15%": 0.7661888774463907, "calibration/coverage@20%": 0.892814975451509, "calibration/coverage@25%": 0.9522546419098143, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.40982914163222484, "calibration/distribution_entropy_10": 0.6175683596353149, "calibration/distribution_entropy_100": 0.3329512515331028, "calibration/ece": 0.10635281530775749, "calibration/mean_confidence": 0.738215510844212, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013541666666666697, "completions/max_length": 3670.6, "completions/max_terminated_length": 3670.6, "completions/mean_length": 1083.8117431640626, "completions/mean_terminated_length": 1098.7698486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 303.6, "epoch": 1.6346153846153846, "grad_norm": 0.0005681436159648001, "learning_rate": 2.1634615384615387e-06, "loss": -0.0154, "num_tokens": 1759137466.0, "reward": 1.2990227937698364, "reward_std": 0.12866669595241548, "rewards/accuracy_reward": 0.7521701455116272, "rewards/brier_reward": 0.8594894886016846, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9863715291023254, "rewards/mean_confidence_reward": 0.724100124835968, "sampling/batch_mean_priority_error": 0.014114583333333323, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18611111111111112, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0023681627586483955, "sampling/priority_kl": 0.03000106252729893, "sampling/priority_scale": 0.7635146678658202, "sampling/prob_entropy": 10.278950500488282, "sampling/prob_max": 5.456006110762246e-05, "sampling/prob_min": 2.0344129006844013e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6272000074386597, "sampling/prompt_draws_total": 48816.0, "sampling/seen_fraction": 0.8839533448219299, "sampling/unseen_fraction": 0.11604665517807007, "signal/accuracy_reward/centered_abs_mean": 0.10556098073720932, "signal/accuracy_reward/group_std_mean": 0.1469675600528717, "signal/accuracy_reward/group_zero_std_frac": 0.5499999940395355, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05278049036860466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05278049036860466, "signal/advantage_abs_mean": 0.08514032810926438, "signal/advantage_pre_scale_abs_mean": 0.08514032810926438, "signal/advantage_pre_scale_std": 0.18456737399101258, "signal/advantage_std": 0.18456737399101258, "signal/brier_reward/centered_abs_mean": 0.0730242095887661, "signal/brier_reward/group_std_mean": 0.10391690582036972, "signal/brier_reward/group_zero_std_frac": 0.2472222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03651210479438305, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03651210479438305, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.0242567278444767, "signal/format_reward/group_std_mean": 0.04920388534665108, "signal/format_reward/group_zero_std_frac": 0.7861111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01212836392223835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01212836392223835, "signal/mean_confidence_reward/centered_abs_mean": 0.06222810372710228, "signal/mean_confidence_reward/group_std_mean": 0.08487941175699235, "signal/mean_confidence_reward/group_zero_std_frac": 0.2555555611848831, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.222810156941705e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.222810156941705e-07, "step": 680 }, { "calibration/aurc": 0.11139059240130803, "calibration/batch_distribution_entropy": 0.5826805325067836, "calibration/batch_entropy_100bins": 0.31370010142444527, "calibration/batch_entropy_10bins": 0.5826805325067836, "calibration/batch_entropy_50bins": 0.3692826837717405, "calibration/batch_uniqueness": 0.26415515433613046, "calibration/confidence_entropy": 0.44579667814925283, "calibration/coverage@0%": 0.0015789473684210526, "calibration/coverage@1%": 0.21343152310484434, "calibration/coverage@10%": 0.6413132971561079, "calibration/coverage@15%": 0.6920358264160477, "calibration/coverage@20%": 0.7286971880429265, "calibration/coverage@25%": 0.9074074074074074, "calibration/coverage@30%": 0.9465608465608465, "calibration/coverage@5%": 0.2941253664665643, "calibration/distribution_entropy_10": 0.5826805325067836, "calibration/distribution_entropy_100": 0.31370010142444527, "calibration/ece": 0.0968422284319643, "calibration/mean_confidence": 0.7433893348079172, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01909722222222223, "completions/max_length": 3731.2, "completions/max_terminated_length": 3731.2, "completions/mean_length": 1044.6437622070312, "completions/mean_terminated_length": 1065.1299072265624, "completions/min_length": 0.0, "completions/min_terminated_length": 362.4, "epoch": 1.6466346153846154, "grad_norm": 0.0005210732924751937, "learning_rate": 2.1334134615384616e-06, "loss": -0.0232, "num_tokens": 1774231634.0, "reward": 1.3034531831741334, "reward_std": 0.14097131937742233, "rewards/accuracy_reward": 0.7689236164093017, "rewards/brier_reward": 0.8570653319358825, "rewards/confidence_one_or_zero": 0.0004340277868323028, "rewards/format_reward": 0.9809027671813965, "rewards/mean_confidence_reward": 0.7295277953147888, "sampling/batch_mean_priority_error": 0.0184565972222222, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16111111111111112, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002385252341628075, "sampling/priority_kl": 0.029998932778835297, "sampling/priority_scale": 0.7653319059638306, "sampling/prob_entropy": 10.278940200805664, "sampling/prob_max": 5.4764158267062155e-05, "sampling/prob_min": 2.0372347353259102e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6392000198364258, "sampling/prompt_draws_total": 49176.0, "sampling/seen_fraction": 0.8861200094223023, "sampling/unseen_fraction": 0.11387999057769775, "signal/accuracy_reward/centered_abs_mean": 0.10553385317325592, "signal/accuracy_reward/group_std_mean": 0.14716241806745528, "signal/accuracy_reward/group_zero_std_frac": 0.550000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05276692658662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05276692658662796, "signal/advantage_abs_mean": 0.09143907949328423, "signal/advantage_pre_scale_abs_mean": 0.09143907949328423, "signal/advantage_pre_scale_std": 0.19914956390857697, "signal/advantage_std": 0.19914956390857697, "signal/brier_reward/centered_abs_mean": 0.07749231234192848, "signal/brier_reward/group_std_mean": 0.11150548756122589, "signal/brier_reward/group_zero_std_frac": 0.21944444477558137, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03874615617096424, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03874615617096424, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008192274020984769, "signal/confidence_one_or_zero/group_std_mean": 0.0018573501612991095, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.192273526219651e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.192273526219651e-09, "signal/format_reward/centered_abs_mean": 0.03379991315305233, "signal/format_reward/group_std_mean": 0.06723003908991813, "signal/format_reward/group_zero_std_frac": 0.7111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016899956576526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016899956576526164, "signal/mean_confidence_reward/centered_abs_mean": 0.06801529824733735, "signal/mean_confidence_reward/group_std_mean": 0.09337150603532791, "signal/mean_confidence_reward/group_zero_std_frac": 0.2333333373069763, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.80152959375846e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.80152959375846e-07, "step": 685 }, { "calibration/aurc": 0.15859837238043628, "calibration/batch_distribution_entropy": 0.5560990964924017, "calibration/batch_entropy_100bins": 0.30055202226479294, "calibration/batch_entropy_10bins": 0.5560990964924017, "calibration/batch_entropy_50bins": 0.3538049777191362, "calibration/batch_uniqueness": 0.23692587593867795, "calibration/confidence_entropy": 0.4395761004833318, "calibration/coverage@0%": 0.10421626984126986, "calibration/coverage@1%": 0.10421626984126986, "calibration/coverage@10%": 0.4268589877471457, "calibration/coverage@15%": 0.5054053536619326, "calibration/coverage@20%": 0.6296827137287664, "calibration/coverage@25%": 0.6529631370091896, "calibration/coverage@30%": 0.8776994569757728, "calibration/coverage@5%": 0.2671792328042328, "calibration/distribution_entropy_10": 0.5560990964924017, "calibration/distribution_entropy_100": 0.30055202226479294, "calibration/ece": 0.1268294085909217, "calibration/mean_confidence": 0.7558809610832637, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012326388888888883, "completions/max_length": 3738.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 1039.7527099609374, "completions/mean_terminated_length": 1052.7323120117187, "completions/min_length": 0.0, "completions/min_terminated_length": 337.0, "epoch": 1.6586538461538463, "grad_norm": 0.0005724552902393043, "learning_rate": 2.103365384615385e-06, "loss": -0.0163, "num_tokens": 1789305713.0, "reward": 1.2902822971343995, "reward_std": 0.12336039692163467, "rewards/accuracy_reward": 0.7328124880790711, "rewards/brier_reward": 0.8601503968238831, "rewards/confidence_one_or_zero": 0.0006076388934161514, "rewards/format_reward": 0.9875868082046508, "rewards/mean_confidence_reward": 0.7491449594497681, "sampling/batch_mean_priority_error": 0.016593749999999984, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18055555555555555, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0024040149059146644, "sampling/priority_kl": 0.029999926313757898, "sampling/priority_scale": 0.7672343671089038, "sampling/prob_entropy": 10.27895393371582, "sampling/prob_max": 5.49718904949259e-05, "sampling/prob_min": 2.039941209659446e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.651200008392334, "sampling/prompt_draws_total": 49536.0, "sampling/seen_fraction": 0.8882933378219604, "sampling/unseen_fraction": 0.11170666217803955, "signal/accuracy_reward/centered_abs_mean": 0.0985568568110466, "signal/accuracy_reward/group_std_mean": 0.13777171075344086, "signal/accuracy_reward/group_zero_std_frac": 0.5722222268581391, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0492784284055233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0492784284055233, "signal/advantage_abs_mean": 0.08106912076473236, "signal/advantage_pre_scale_abs_mean": 0.08106912076473236, "signal/advantage_pre_scale_std": 0.1815045118331909, "signal/advantage_std": 0.1815045118331909, "signal/brier_reward/centered_abs_mean": 0.06963968127965928, "signal/brier_reward/group_std_mean": 0.10002299994230271, "signal/brier_reward/group_zero_std_frac": 0.2666666656732559, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03481984063982964, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03481984063982964, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011555989389307798, "signal/confidence_one_or_zero/group_std_mean": 0.0028394428081810474, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1555989232192587e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1555989232192587e-08, "signal/format_reward/centered_abs_mean": 0.02232530377805233, "signal/format_reward/group_std_mean": 0.047450629994273186, "signal/format_reward/group_zero_std_frac": 0.7861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011162651889026164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011162651889026164, "signal/mean_confidence_reward/centered_abs_mean": 0.0594577394425869, "signal/mean_confidence_reward/group_std_mean": 0.08299815356731415, "signal/mean_confidence_reward/group_zero_std_frac": 0.275, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.945773523308162e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.945773523308162e-07, "step": 690 }, { "calibration/aurc": 0.14306273310915993, "calibration/batch_distribution_entropy": 0.5412362104100029, "calibration/batch_entropy_100bins": 0.28788372913303284, "calibration/batch_entropy_10bins": 0.5412362104100029, "calibration/batch_entropy_50bins": 0.3388920680156938, "calibration/batch_uniqueness": 0.19993184384829876, "calibration/confidence_entropy": 0.43030819920007846, "calibration/coverage@0%": 0.0015679466324005852, "calibration/coverage@1%": 0.0015679466324005852, "calibration/coverage@10%": 0.29376102142085764, "calibration/coverage@15%": 0.5923256735882328, "calibration/coverage@20%": 0.6827225130890052, "calibration/coverage@25%": 0.9539267015706805, "calibration/coverage@30%": 0.9952879581151833, "calibration/coverage@5%": 0.12271677169767474, "calibration/distribution_entropy_10": 0.5412362104100029, "calibration/distribution_entropy_100": 0.28788372913303284, "calibration/ece": 0.10015638456385936, "calibration/mean_confidence": 0.7878304375760394, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555559, "completions/max_length": 3752.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 1083.738134765625, "completions/mean_terminated_length": 1096.7212158203124, "completions/min_length": 0.0, "completions/min_terminated_length": 365.8, "epoch": 1.6706730769230769, "grad_norm": 0.0005091908969916403, "learning_rate": 2.073317307692308e-06, "loss": -0.014, "num_tokens": 1804906792.0, "reward": 1.2935439109802247, "reward_std": 0.13235266357660294, "rewards/accuracy_reward": 0.7440104126930237, "rewards/brier_reward": 0.8550411939620972, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9880208134651184, "rewards/mean_confidence_reward": 0.7656136989593506, "sampling/batch_mean_priority_error": 0.015585138888888872, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.19444444444444448, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0024218444246798753, "sampling/priority_kl": 0.030001531541347503, "sampling/priority_scale": 0.7690952242119238, "sampling/prob_entropy": 10.278961181640625, "sampling/prob_max": 5.517598328879103e-05, "sampling/prob_min": 2.0425973343662917e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6631999969482423, "sampling/prompt_draws_total": 49896.0, "sampling/seen_fraction": 0.8903999924659729, "sampling/unseen_fraction": 0.1096000075340271, "signal/accuracy_reward/centered_abs_mean": 0.10761176198720931, "signal/accuracy_reward/group_std_mean": 0.14982850253582, "signal/accuracy_reward/group_zero_std_frac": 0.5416666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05380588099360466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05380588099360466, "signal/advantage_abs_mean": 0.0892701119184494, "signal/advantage_pre_scale_abs_mean": 0.0892701119184494, "signal/advantage_pre_scale_std": 0.19025329649448394, "signal/advantage_std": 0.19025329649448394, "signal/brier_reward/centered_abs_mean": 0.0762951672077179, "signal/brier_reward/group_std_mean": 0.10774291157722474, "signal/brier_reward/group_zero_std_frac": 0.28333333134651184, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03814758360385895, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03814758360385895, "signal/confidence_one_or_zero/centered_abs_mean": 0.0008409288129769266, "signal/confidence_one_or_zero/group_std_mean": 0.0024552317336201668, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.409287488575501e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.409287488575501e-09, "signal/format_reward/centered_abs_mean": 0.02054036445915699, "signal/format_reward/group_std_mean": 0.04146440476179123, "signal/format_reward/group_zero_std_frac": 0.8222222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010270182229578494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010270182229578494, "signal/mean_confidence_reward/centered_abs_mean": 0.05742166191339493, "signal/mean_confidence_reward/group_std_mean": 0.08025620728731156, "signal/mean_confidence_reward/group_zero_std_frac": 0.31388888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.74216630866431e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.74216630866431e-07, "step": 695 }, { "calibration/aurc": 0.17708395332896149, "calibration/batch_distribution_entropy": 0.47968307546433503, "calibration/batch_entropy_100bins": 0.2581500156911111, "calibration/batch_entropy_10bins": 0.47968307546433503, "calibration/batch_entropy_50bins": 0.30389002163931644, "calibration/batch_uniqueness": 0.0036444937827355916, "calibration/confidence_entropy": 0.41149710071485873, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.1581151832460733, "calibration/coverage@10%": 0.18272251308900525, "calibration/coverage@15%": 0.3733681462140992, "calibration/coverage@20%": 0.6550717906875544, "calibration/coverage@25%": 0.7416890339425588, "calibration/coverage@30%": 0.8869976764968722, "calibration/coverage@5%": 0.17172774869109947, "calibration/distribution_entropy_10": 0.47968307546433503, "calibration/distribution_entropy_100": 0.2581500156911111, "calibration/ece": 0.11511028309375874, "calibration/mean_confidence": 0.779884540079836, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0140625, "completions/max_length": 4008.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 1111.742431640625, "completions/mean_terminated_length": 1127.698046875, "completions/min_length": 0.0, "completions/min_terminated_length": 359.6, "epoch": 1.6826923076923077, "grad_norm": 0.0005041222320869565, "learning_rate": 2.043269230769231e-06, "loss": -0.0171, "num_tokens": 1820841809.0, "reward": 1.272655439376831, "reward_std": 0.13738891631364822, "rewards/accuracy_reward": 0.7166666626930237, "rewards/brier_reward": 0.8430387616157532, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9855902791023254, "rewards/mean_confidence_reward": 0.7627676367759705, "sampling/batch_mean_priority_error": 0.02263194444444442, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17777777777777776, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0024414270650595427, "sampling/priority_kl": 0.030000188574194907, "sampling/priority_scale": 0.7714008987648413, "sampling/prob_entropy": 10.278949165344239, "sampling/prob_max": 5.540331112570129e-05, "sampling/prob_min": 2.044780703727156e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6751999855041504, "sampling/prompt_draws_total": 50256.0, "sampling/seen_fraction": 0.8926600098609925, "sampling/unseen_fraction": 0.10733999013900757, "signal/accuracy_reward/centered_abs_mean": 0.11134982705116273, "signal/accuracy_reward/group_std_mean": 0.15158891975879668, "signal/accuracy_reward/group_zero_std_frac": 0.5500000178813934, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05567491352558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05567491352558136, "signal/advantage_abs_mean": 0.09317022860050202, "signal/advantage_pre_scale_abs_mean": 0.09317022860050202, "signal/advantage_pre_scale_std": 0.19810806214809418, "signal/advantage_std": 0.19810806214809418, "signal/brier_reward/centered_abs_mean": 0.07885801494121551, "signal/brier_reward/group_std_mean": 0.11172184944152833, "signal/brier_reward/group_zero_std_frac": 0.30555555820465086, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039429007470607756, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.039429007470607756, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.02542317658662796, "signal/format_reward/group_std_mean": 0.05069509223103523, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01271158829331398, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01271158829331398, "signal/mean_confidence_reward/centered_abs_mean": 0.059342540055513385, "signal/mean_confidence_reward/group_std_mean": 0.08301161825656891, "signal/mean_confidence_reward/group_zero_std_frac": 0.338888892531395, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.934253863415507e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.934253863415507e-07, "step": 700 }, { "epoch": 1.6826923076923077, "eval_calibration/aurc": 0.14504904838655935, "eval_calibration/batch_distribution_entropy": 0.5285209554550367, "eval_calibration/batch_entropy_100bins": 0.28874062383944954, "eval_calibration/batch_entropy_10bins": 0.5285209554550367, "eval_calibration/batch_entropy_50bins": 0.33990079059964734, "eval_calibration/batch_uniqueness": 0.12871531778070863, "eval_calibration/confidence_entropy": 0.42277103690494733, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.7372583479789103, "eval_calibration/coverage@20%": 0.8295254833040422, "eval_calibration/coverage@25%": 0.9332161687170475, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5285209554550367, "eval_calibration/distribution_entropy_100": 0.28874062383944954, "eval_calibration/ece": 0.05492091388400712, "eval_calibration/mean_confidence": 0.779701230228471, "eval_calibration/unique_confidence_per_question": 0.009548611111111112, "eval_calibration/unique_confidences": 11, "eval_completions/clipped_ratio": 0.009548611111111105, "eval_completions/max_length": 2959.8333333333335, "eval_completions/max_terminated_length": 2959.8333333333335, "eval_completions/mean_length": 1100.4611206054688, "eval_completions/mean_terminated_length": 1111.1649169921875, "eval_completions/min_length": 157.33333333333334, "eval_completions/min_terminated_length": 408.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 1820841809.0, "eval_reward": 1.2722798387209575, "eval_reward_std": 0.3444438676039378, "eval_rewards/accuracy_reward": 0.7170138855775198, "eval_rewards/brier_reward": 0.8396831452846527, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.987847218910853, "eval_rewards/mean_confidence_reward": 0.7702256739139557, "eval_runtime": 206.6268, "eval_samples_per_second": 4.84, "eval_signal/accuracy_reward/centered_abs_mean": 0.3889973958333333, "eval_signal/accuracy_reward/group_std_mean": 0.4453851133584976, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19449869791666666, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19449869791666666, "eval_signal/advantage_abs_mean": 0.2865140189727147, "eval_signal/advantage_pre_scale_abs_mean": 0.2865140189727147, "eval_signal/advantage_pre_scale_std": 0.343355471889178, "eval_signal/advantage_std": 0.343355471889178, "eval_signal/brier_reward/centered_abs_mean": 0.19361031303803125, "eval_signal/brier_reward/group_std_mean": 0.2558669224381447, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09680515651901563, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.09680515651901563, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.023111979166666668, "eval_signal/format_reward/group_std_mean": 0.05925192994376024, "eval_signal/format_reward/group_zero_std_frac": 0.694444457689921, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011555989583333334, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.011555989583333334, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.1663682535290718, "eval_signal/mean_confidence_reward/group_std_mean": 0.2147673244277636, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6636825156031894e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6636825156031894e-06, "eval_steps_per_second": 0.029, "step": 700 }, { "epoch": 1.6826923076923077, "step": 700, "train_probe_calibration/aurc": 0.09761829923854591, "train_probe_calibration/batch_distribution_entropy": 0.5113301053234623, "train_probe_calibration/batch_entropy_100bins": 0.2753161203855543, "train_probe_calibration/batch_entropy_10bins": 0.5113301053234623, "train_probe_calibration/batch_entropy_50bins": 0.3240976823403679, "train_probe_calibration/batch_uniqueness": 0.0860380738460258, "train_probe_calibration/confidence_entropy": 0.4179497070733501, "train_probe_calibration/coverage@0%": 0.000877963125548727, "train_probe_calibration/coverage@1%": 0.000877963125548727, "train_probe_calibration/coverage@10%": 0.6611062335381914, "train_probe_calibration/coverage@15%": 0.8674275680421423, "train_probe_calibration/coverage@20%": 0.9429323968393327, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.000877963125548727, "train_probe_calibration/distribution_entropy_10": 0.5113301053234623, "train_probe_calibration/distribution_entropy_100": 0.2753161203855543, "train_probe_calibration/ece": 0.04934152765583824, "train_probe_calibration/mean_confidence": 0.7838454784899035, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.012847222222222213, "train_probe_completions/max_length": 3267.8333333333335, "train_probe_completions/max_terminated_length": 3267.8333333333335, "train_probe_completions/mean_length": 1117.0877278645833, "train_probe_completions/mean_terminated_length": 1131.6324666341145, "train_probe_completions/min_length": 68.83333333333333, "train_probe_completions/min_terminated_length": 401.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 1820841809.0, "train_probe_reward": 1.3121604919433594, "train_probe_reward_std": 0.3140617559353511, "train_probe_rewards/accuracy_reward": 0.7647569477558136, "train_probe_rewards/brier_reward": 0.8708333671092987, "train_probe_rewards/confidence_one_or_zero": 0.0008680555814256271, "train_probe_rewards/format_reward": 0.9887152711550394, "train_probe_rewards/mean_confidence_reward": 0.7749999562899271, "train_probe_runtime": 215.9823, "train_probe_samples_per_second": 4.63, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3469509482383728, "train_probe_signal/accuracy_reward/group_std_mean": 0.41993160049120587, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1734754741191864, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1734754741191864, "train_probe_signal/advantage_abs_mean": 0.2481620361407598, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2481620361407598, "train_probe_signal/advantage_pre_scale_std": 0.31520261367162067, "train_probe_signal/advantage_std": 0.31520261367162067, "train_probe_signal/brier_reward/centered_abs_mean": 0.15899253884951273, "train_probe_signal/brier_reward/group_std_mean": 0.2224351391196251, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07949626942475636, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07949626942475636, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.021647135261446238, "train_probe_signal/format_reward/group_std_mean": 0.057857212610542774, "train_probe_signal/format_reward/group_zero_std_frac": 0.6944444626569748, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010823567630723119, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010823567630723119, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1647297888994217, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.21364916115999222, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.6472978738117188e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.6472978738117188e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.09527763748953584, "calibration/batch_distribution_entropy": 0.4666990231139505, "calibration/batch_entropy_100bins": 0.2543638325193935, "calibration/batch_entropy_10bins": 0.4666990231139505, "calibration/batch_entropy_50bins": 0.2994329880695009, "calibration/batch_uniqueness": -0.0006402490523223392, "calibration/confidence_entropy": 0.41268119792488084, "calibration/coverage@0%": 0.001591715136898851, "calibration/coverage@1%": 0.11693262422780795, "calibration/coverage@10%": 0.6053837453253181, "calibration/coverage@15%": 0.6613588502464217, "calibration/coverage@20%": 0.9261853853413919, "calibration/coverage@25%": 0.9798408488063661, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.4151094610454094, "calibration/distribution_entropy_10": 0.4666990231139505, "calibration/distribution_entropy_100": 0.2543638325193935, "calibration/ece": 0.10833089271311573, "calibration/mean_confidence": 0.7738942866147162, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02387152777777777, "completions/max_length": 3863.6, "completions/max_terminated_length": 3863.6, "completions/mean_length": 1091.7198974609375, "completions/mean_terminated_length": 1118.4887451171876, "completions/min_length": 0.0, "completions/min_terminated_length": 347.2, "epoch": 1.6947115384615383, "grad_norm": 0.0005031170439906418, "learning_rate": 2.013221153846154e-06, "loss": -0.0285, "num_tokens": 1836506294.0, "reward": 1.2860944509506225, "reward_std": 0.15281510651111602, "rewards/accuracy_reward": 0.7423611164093018, "rewards/brier_reward": 0.8538578391075134, "rewards/confidence_one_or_zero": 0.0006944444554392249, "rewards/format_reward": 0.9759548664093017, "rewards/mean_confidence_reward": 0.7574574589729309, "sampling/batch_mean_priority_error": 0.015291874999999988, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.20555555555555555, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0024599439930170774, "sampling/priority_kl": 0.029999907687306406, "sampling/priority_scale": 0.7738102614646778, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 5.563581798924133e-05, "sampling/prob_min": 2.0469454102567398e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6871999979019165, "sampling/prompt_draws_total": 50616.0, "sampling/seen_fraction": 0.894920003414154, "sampling/unseen_fraction": 0.10507999658584595, "signal/accuracy_reward/centered_abs_mean": 0.11489800363779068, "signal/accuracy_reward/group_std_mean": 0.15308040380477905, "signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05744900181889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05744900181889534, "signal/advantage_abs_mean": 0.1047695130109787, "signal/advantage_pre_scale_abs_mean": 0.1047695130109787, "signal/advantage_pre_scale_std": 0.22305024564266204, "signal/advantage_std": 0.22305024564266204, "signal/brier_reward/centered_abs_mean": 0.08341319859027863, "signal/brier_reward/group_std_mean": 0.11793318986892701, "signal/brier_reward/group_zero_std_frac": 0.3083333373069763, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04170659929513931, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04170659929513931, "signal/confidence_one_or_zero/centered_abs_mean": 0.001312933990266174, "signal/confidence_one_or_zero/group_std_mean": 0.0030315483920276163, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3129339393458395e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3129339393458395e-08, "signal/format_reward/centered_abs_mean": 0.04051106758415699, "signal/format_reward/group_std_mean": 0.07489528208971023, "signal/format_reward/group_zero_std_frac": 0.7, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.020255533792078496, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.020255533792078496, "signal/mean_confidence_reward/centered_abs_mean": 0.0620111808180809, "signal/mean_confidence_reward/group_std_mean": 0.08973864912986755, "signal/mean_confidence_reward/group_zero_std_frac": 0.325000011920929, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.201117571436044e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.201117571436044e-07, "step": 705 }, { "calibration/aurc": 0.05065273865740427, "calibration/batch_distribution_entropy": 0.42453929252108147, "calibration/batch_entropy_100bins": 0.22995059355568942, "calibration/batch_entropy_10bins": 0.42453929252108147, "calibration/batch_entropy_50bins": 0.2706941181643254, "calibration/batch_uniqueness": -0.09111012720397868, "calibration/confidence_entropy": 0.4049054598791278, "calibration/coverage@0%": 0.0005390835579514825, "calibration/coverage@1%": 0.3773087855908114, "calibration/coverage@10%": 0.8881542699724518, "calibration/coverage@15%": 0.8881542699724518, "calibration/coverage@20%": 0.9079889807162533, "calibration/coverage@25%": 0.9212121212121213, "calibration/coverage@30%": 0.9592286501377411, "calibration/coverage@5%": 0.7564973755142573, "calibration/distribution_entropy_10": 0.42453929252108147, "calibration/distribution_entropy_100": 0.22995059355568942, "calibration/ece": 0.12564958761058773, "calibration/mean_confidence": 0.806028914134358, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033333333333333326, "completions/max_length": 3931.6, "completions/max_terminated_length": 3931.6, "completions/mean_length": 1060.0455810546875, "completions/mean_terminated_length": 1096.588037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 344.4, "epoch": 1.7067307692307692, "grad_norm": 0.0005156388506293297, "learning_rate": 1.983173076923077e-06, "loss": -0.0423, "num_tokens": 1851793635.0, "reward": 1.2908846139907837, "reward_std": 0.17143570482730866, "rewards/accuracy_reward": 0.7593749761581421, "rewards/brier_reward": 0.8557994365692139, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9665798544883728, "rewards/mean_confidence_reward": 0.7478854060173035, "sampling/batch_mean_priority_error": 0.016651180555555548, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.18055555555555555, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0024777584709227084, "sampling/priority_kl": 0.030000355467200278, "sampling/priority_scale": 0.7764478266471997, "sampling/prob_entropy": 10.278940582275391, "sampling/prob_max": 5.5878546845633537e-05, "sampling/prob_min": 2.0488816153374502e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.6991999864578247, "sampling/prompt_draws_total": 50976.0, "sampling/seen_fraction": 0.8972199916839599, "sampling/unseen_fraction": 0.10278000831604003, "signal/accuracy_reward/centered_abs_mean": 0.11018880158662796, "signal/accuracy_reward/group_std_mean": 0.15256147384643554, "signal/accuracy_reward/group_zero_std_frac": 0.5333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05509440079331398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05509440079331398, "signal/advantage_abs_mean": 0.11043548285961151, "signal/advantage_pre_scale_abs_mean": 0.11043548285961151, "signal/advantage_pre_scale_std": 0.23827863931655885, "signal/advantage_std": 0.23827863931655885, "signal/brier_reward/centered_abs_mean": 0.08768158704042435, "signal/brier_reward/group_std_mean": 0.1298682451248169, "signal/brier_reward/group_zero_std_frac": 0.2888888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043840793520212175, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.043840793520212175, "signal/confidence_one_or_zero/centered_abs_mean": 0.0007758246618323028, "signal/confidence_one_or_zero/group_std_mean": 0.001424409542232752, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 7.758245601507952e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 7.758245601507952e-09, "signal/format_reward/centered_abs_mean": 0.05783962681889534, "signal/format_reward/group_std_mean": 0.10528798550367355, "signal/format_reward/group_zero_std_frac": 0.5888888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02891981340944767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02891981340944767, "signal/mean_confidence_reward/centered_abs_mean": 0.07172510176897048, "signal/mean_confidence_reward/group_std_mean": 0.10591092556715012, "signal/mean_confidence_reward/group_zero_std_frac": 0.3000000059604645, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.172509754127532e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.172509754127532e-07, "step": 710 }, { "calibration/aurc": 0.12651208488571192, "calibration/batch_distribution_entropy": 0.4406230705036201, "calibration/batch_entropy_100bins": 0.24175972802918233, "calibration/batch_entropy_10bins": 0.4406230705036201, "calibration/batch_entropy_50bins": 0.2845956401963263, "calibration/batch_uniqueness": -0.04112757521393523, "calibration/confidence_entropy": 0.4019345807333458, "calibration/coverage@0%": 0.0032071301247771833, "calibration/coverage@1%": 0.0032071301247771833, "calibration/coverage@10%": 0.5005391301827457, "calibration/coverage@15%": 0.6837769353175447, "calibration/coverage@20%": 0.7536103363758805, "calibration/coverage@25%": 0.8005333333333333, "calibration/coverage@30%": 0.9648, "calibration/coverage@5%": 0.25429476399576834, "calibration/distribution_entropy_10": 0.4406230705036201, "calibration/distribution_entropy_100": 0.24175972802918233, "calibration/ece": 0.11199074373692071, "calibration/mean_confidence": 0.7841360414479299, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.026215277777777768, "completions/max_length": 3809.6, "completions/max_terminated_length": 3809.6, "completions/mean_length": 1087.0016845703126, "completions/mean_terminated_length": 1116.36162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 349.6, "epoch": 1.71875, "grad_norm": 0.0004386329383123666, "learning_rate": 1.953125e-06, "loss": -0.0338, "num_tokens": 1867390998.0, "reward": 1.2846812963485719, "reward_std": 0.16560640335083007, "rewards/accuracy_reward": 0.7434027791023254, "rewards/brier_reward": 0.8521600246429444, "rewards/confidence_one_or_zero": 0.0011284722480922938, "rewards/format_reward": 0.9737847208976745, "rewards/mean_confidence_reward": 0.7552181720733643, "sampling/batch_mean_priority_error": 0.021241319444444427, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17222222222222222, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002497523045167327, "sampling/priority_kl": 0.029999979585409165, "sampling/priority_scale": 0.7792064726585523, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 5.612889508483931e-05, "sampling/prob_min": 2.0507384033408015e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7111999988555908, "sampling/prompt_draws_total": 51336.0, "sampling/seen_fraction": 0.8995266675949096, "sampling/unseen_fraction": 0.10047333240509033, "signal/accuracy_reward/centered_abs_mean": 0.11961805522441864, "signal/accuracy_reward/group_std_mean": 0.1661587804555893, "signal/accuracy_reward/group_zero_std_frac": 0.4888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05980902761220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05980902761220932, "signal/advantage_abs_mean": 0.10705532133579254, "signal/advantage_pre_scale_abs_mean": 0.10705532133579254, "signal/advantage_pre_scale_std": 0.23213987052440643, "signal/advantage_std": 0.23213987052440643, "signal/brier_reward/centered_abs_mean": 0.08633466958999633, "signal/brier_reward/group_std_mean": 0.1285327583551407, "signal/brier_reward/group_zero_std_frac": 0.272222226858139, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.043167334794998166, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.043167334794998166, "signal/confidence_one_or_zero/centered_abs_mean": 0.002099609375, "signal/confidence_one_or_zero/group_std_mean": 0.004754898836836219, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777671813965, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.0996092331415638e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.0996092331415638e-08, "signal/format_reward/centered_abs_mean": 0.04519314244389534, "signal/format_reward/group_std_mean": 0.08304037302732467, "signal/format_reward/group_zero_std_frac": 0.669444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02259657122194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02259657122194767, "signal/mean_confidence_reward/centered_abs_mean": 0.06462962925434113, "signal/mean_confidence_reward/group_std_mean": 0.09531620144844055, "signal/mean_confidence_reward/group_zero_std_frac": 0.29722222685813904, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.462962801379035e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.462962801379035e-07, "step": 715 }, { "calibration/aurc": 0.12515534149725946, "calibration/batch_distribution_entropy": 0.6140921867201559, "calibration/batch_entropy_100bins": 0.33031808133393403, "calibration/batch_entropy_10bins": 0.6140921867201559, "calibration/batch_entropy_50bins": 0.38884510084452834, "calibration/batch_uniqueness": 0.3432815524821929, "calibration/confidence_entropy": 0.45580034763085564, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.4892404793508831, "calibration/coverage@15%": 0.7023772313197574, "calibration/coverage@20%": 0.776623220299957, "calibration/coverage@25%": 0.8490904818341354, "calibration/coverage@30%": 0.9391746551321021, "calibration/coverage@5%": 0.3691450261025064, "calibration/distribution_entropy_10": 0.6140921867201559, "calibration/distribution_entropy_100": 0.33031808133393403, "calibration/ece": 0.09690335220922594, "calibration/mean_confidence": 0.7311622409754512, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021875, "completions/max_length": 4014.8, "completions/max_terminated_length": 4014.8, "completions/mean_length": 1126.20751953125, "completions/mean_terminated_length": 1151.4857177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 335.2, "epoch": 1.7307692307692308, "grad_norm": 0.00038971329922787845, "learning_rate": 1.9230769230769234e-06, "loss": -0.0253, "num_tokens": 1883474380.0, "reward": 1.2772084712982177, "reward_std": 0.16071378737688063, "rewards/accuracy_reward": 0.7256076335906982, "rewards/brier_reward": 0.8507566690444947, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9780381917953491, "rewards/mean_confidence_reward": 0.7236449599266053, "sampling/batch_mean_priority_error": 0.02141319444444443, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16666666666666669, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0025197225622832775, "sampling/priority_kl": 0.02999928630888462, "sampling/priority_scale": 0.7810052573448047, "sampling/prob_entropy": 10.278952980041504, "sampling/prob_max": 5.633040564134717e-05, "sampling/prob_min": 2.053321950370446e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.723200011253357, "sampling/prompt_draws_total": 51696.0, "sampling/seen_fraction": 0.9013800024986267, "sampling/unseen_fraction": 0.09861999750137329, "signal/accuracy_reward/centered_abs_mean": 0.12566731870174408, "signal/accuracy_reward/group_std_mean": 0.17436597347259522, "signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06283365935087204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06283365935087204, "signal/advantage_abs_mean": 0.10567668825387955, "signal/advantage_pre_scale_abs_mean": 0.10567668825387955, "signal/advantage_pre_scale_std": 0.21769486963748932, "signal/advantage_std": 0.21769486963748932, "signal/brier_reward/centered_abs_mean": 0.08949927836656571, "signal/brier_reward/group_std_mean": 0.1280209854245186, "signal/brier_reward/group_zero_std_frac": 0.24722222089767457, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.044749639183282855, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.044749639183282855, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006618923507630825, "signal/confidence_one_or_zero/group_std_mean": 0.0016652445774525404, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.618923364953844e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.618923364953844e-09, "signal/format_reward/centered_abs_mean": 0.03660481795668602, "signal/format_reward/group_std_mean": 0.06963877379894257, "signal/format_reward/group_zero_std_frac": 0.7111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01830240897834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01830240897834301, "signal/mean_confidence_reward/centered_abs_mean": 0.06871153563261032, "signal/mean_confidence_reward/group_std_mean": 0.09722689241170883, "signal/mean_confidence_reward/group_zero_std_frac": 0.26388889253139497, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.871153459542256e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.871153459542256e-07, "step": 720 }, { "calibration/aurc": 0.03142397230761359, "calibration/batch_distribution_entropy": 0.6257232630459896, "calibration/batch_entropy_100bins": 0.3396185513087483, "calibration/batch_entropy_10bins": 0.6257232630459896, "calibration/batch_entropy_50bins": 0.39979346361853624, "calibration/batch_uniqueness": 0.36902582625996616, "calibration/confidence_entropy": 0.45576618081231, "calibration/coverage@0%": 0.2551352944478752, "calibration/coverage@1%": 0.554943884024782, "calibration/coverage@10%": 0.909504708137898, "calibration/coverage@15%": 0.9487870619946092, "calibration/coverage@20%": 0.9681940700808624, "calibration/coverage@25%": 0.984366576819407, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.8229580442487784, "calibration/distribution_entropy_10": 0.6257232630459896, "calibration/distribution_entropy_100": 0.3396185513087483, "calibration/ece": 0.1760231815956147, "calibration/mean_confidence": 0.7242368288686128, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018055555555555557, "completions/max_length": 3828.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 1194.933251953125, "completions/mean_terminated_length": 1217.0438232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 366.2, "epoch": 1.7427884615384617, "grad_norm": 0.0004138158110436052, "learning_rate": 1.8930288461538463e-06, "loss": -0.021, "num_tokens": 1900354091.0, "reward": 1.317106509208679, "reward_std": 0.14283051192760468, "rewards/accuracy_reward": 0.7918402910232544, "rewards/brier_reward": 0.8604142665863037, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9819444417953491, "rewards/mean_confidence_reward": 0.6967925429344177, "sampling/batch_mean_priority_error": 0.02047395833333332, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.15555555555555553, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002539826137945056, "sampling/priority_kl": 0.029999739304184914, "sampling/priority_scale": 0.7832678138511255, "sampling/prob_entropy": 10.278940773010254, "sampling/prob_max": 5.655627537635155e-05, "sampling/prob_min": 2.0555373703246004e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.735199999809265, "sampling/prompt_draws_total": 52056.0, "sampling/seen_fraction": 0.9033733367919922, "sampling/unseen_fraction": 0.09662666320800781, "signal/accuracy_reward/centered_abs_mean": 0.11361762136220932, "signal/accuracy_reward/group_std_mean": 0.16370243728160858, "signal/accuracy_reward/group_zero_std_frac": 0.47500001192092894, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05680881068110466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05680881068110466, "signal/advantage_abs_mean": 0.08912541419267654, "signal/advantage_pre_scale_abs_mean": 0.08912541419267654, "signal/advantage_pre_scale_std": 0.19675522148609162, "signal/advantage_std": 0.19675522148609162, "signal/brier_reward/centered_abs_mean": 0.07736555710434914, "signal/brier_reward/group_std_mean": 0.11556062549352646, "signal/brier_reward/group_zero_std_frac": 0.19444444626569748, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03868277855217457, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03868277855217457, "signal/confidence_one_or_zero/centered_abs_mean": 0.0004937065881676972, "signal/confidence_one_or_zero/group_std_mean": 0.001174198230728507, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.9370658672387435e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.9370658672387435e-09, "signal/format_reward/centered_abs_mean": 0.03257378488779068, "signal/format_reward/group_std_mean": 0.06698863953351974, "signal/format_reward/group_zero_std_frac": 0.7083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01628689244389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01628689244389534, "signal/mean_confidence_reward/centered_abs_mean": 0.06848578304052352, "signal/mean_confidence_reward/group_std_mean": 0.09661134332418442, "signal/mean_confidence_reward/group_zero_std_frac": 0.20277778059244156, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.848578095741686e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.848578095741686e-07, "step": 725 }, { "calibration/aurc": 0.14456499448653962, "calibration/batch_distribution_entropy": 0.6292655368791673, "calibration/batch_entropy_100bins": 0.347850189209286, "calibration/batch_entropy_10bins": 0.6292655368791673, "calibration/batch_entropy_50bins": 0.40948361456826393, "calibration/batch_uniqueness": 0.4494320384125043, "calibration/confidence_entropy": 0.47106112366511493, "calibration/coverage@0%": 0.1103448275862069, "calibration/coverage@1%": 0.1103448275862069, "calibration/coverage@10%": 0.47038486034027843, "calibration/coverage@15%": 0.6424399455239072, "calibration/coverage@20%": 0.7320380864849364, "calibration/coverage@25%": 0.7943072157725353, "calibration/coverage@30%": 0.9107367566470469, "calibration/coverage@5%": 0.19433942456415387, "calibration/distribution_entropy_10": 0.6292655368791673, "calibration/distribution_entropy_100": 0.347850189209286, "calibration/ece": 0.11105866402149867, "calibration/mean_confidence": 0.6971300219725503, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015538194444444464, "completions/max_length": 3970.4, "completions/max_terminated_length": 3970.4, "completions/mean_length": 1178.22333984375, "completions/mean_terminated_length": 1196.9110595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 353.8, "epoch": 1.7548076923076923, "grad_norm": 0.0003167448448948562, "learning_rate": 1.8629807692307695e-06, "loss": -0.0197, "num_tokens": 1917059544.0, "reward": 1.2899388551712037, "reward_std": 0.1306004211306572, "rewards/accuracy_reward": 0.7422743201255798, "rewards/brier_reward": 0.8531275868415833, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9844618201255798, "rewards/mean_confidence_reward": 0.6970572829246521, "sampling/batch_mean_priority_error": 0.018291666666666647, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17222222222222222, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0025618880987167358, "sampling/priority_kl": 0.029999687150120736, "sampling/priority_scale": 0.7856972634559497, "sampling/prob_entropy": 10.27893886566162, "sampling/prob_max": 5.6790837697917594e-05, "sampling/prob_min": 2.0575926464516668e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7472000122070312, "sampling/prompt_draws_total": 52416.0, "sampling/seen_fraction": 0.9053866744041443, "sampling/unseen_fraction": 0.09461332559585571, "signal/accuracy_reward/centered_abs_mean": 0.10812174528837204, "signal/accuracy_reward/group_std_mean": 0.1530183345079422, "signal/accuracy_reward/group_zero_std_frac": 0.522222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05406087264418602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05406087264418602, "signal/advantage_abs_mean": 0.0842123880982399, "signal/advantage_pre_scale_abs_mean": 0.0842123880982399, "signal/advantage_pre_scale_std": 0.1876286417245865, "signal/advantage_std": 0.1876286417245865, "signal/brier_reward/centered_abs_mean": 0.0749520257115364, "signal/brier_reward/group_std_mean": 0.10771744549274445, "signal/brier_reward/group_zero_std_frac": 0.18611111640930175, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0374760128557682, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0374760128557682, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02612304650247097, "signal/format_reward/group_std_mean": 0.051665914803743364, "signal/format_reward/group_zero_std_frac": 0.7777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013061523251235486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013061523251235486, "signal/mean_confidence_reward/centered_abs_mean": 0.0662548802793026, "signal/mean_confidence_reward/group_std_mean": 0.09253504127264023, "signal/mean_confidence_reward/group_zero_std_frac": 0.18888889253139496, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.625487912970129e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.625487912970129e-07, "step": 730 }, { "calibration/aurc": 0.1911213039384451, "calibration/batch_distribution_entropy": 0.5815784623872695, "calibration/batch_entropy_100bins": 0.3156432238254143, "calibration/batch_entropy_10bins": 0.5815784623872695, "calibration/batch_entropy_50bins": 0.3715700960227042, "calibration/batch_uniqueness": 0.2773596382466226, "calibration/confidence_entropy": 0.4474254359773086, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1801047120418848, "calibration/coverage@15%": 0.4702755641247623, "calibration/coverage@20%": 0.49230769230769234, "calibration/coverage@25%": 0.737332625994695, "calibration/coverage@30%": 0.9899204244031831, "calibration/coverage@5%": 0.1293193717277487, "calibration/distribution_entropy_10": 0.5815784623872695, "calibration/distribution_entropy_100": 0.3156432238254143, "calibration/ece": 0.14364294131946279, "calibration/mean_confidence": 0.7393319764923701, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111139, "completions/max_length": 3983.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 1152.609423828125, "completions/mean_terminated_length": 1169.4090576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 397.0, "epoch": 1.7668269230769231, "grad_norm": 0.0003992510319221765, "learning_rate": 1.8329326923076924e-06, "loss": -0.0169, "num_tokens": 1933420836.0, "reward": 1.2813831567764282, "reward_std": 0.13340601325035095, "rewards/accuracy_reward": 0.7334201335906982, "rewards/brier_reward": 0.8441761016845704, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.98515625, "rewards/mean_confidence_reward": 0.6882100462913513, "sampling/batch_mean_priority_error": 0.02233506944444443, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002584920870140195, "sampling/priority_kl": 0.030000413581728936, "sampling/priority_scale": 0.7875803768867626, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 5.6999971275217834e-05, "sampling/prob_min": 2.0601512733264827e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7592000246047974, "sampling/prompt_draws_total": 52776.0, "sampling/seen_fraction": 0.907146668434143, "sampling/unseen_fraction": 0.09285333156585693, "signal/accuracy_reward/centered_abs_mean": 0.11967773735523224, "signal/accuracy_reward/group_std_mean": 0.16079054325819014, "signal/accuracy_reward/group_zero_std_frac": 0.522222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05983886867761612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05983886867761612, "signal/advantage_abs_mean": 0.08656046390533448, "signal/advantage_pre_scale_abs_mean": 0.08656046390533448, "signal/advantage_pre_scale_std": 0.18703484237194062, "signal/advantage_std": 0.18703484237194062, "signal/brier_reward/centered_abs_mean": 0.07910026758909225, "signal/brier_reward/group_std_mean": 0.11268624216318131, "signal/brier_reward/group_zero_std_frac": 0.17500000298023224, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039550133794546125, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.039550133794546125, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003255208255723119, "signal/confidence_one_or_zero/group_std_mean": 0.0006831518840044737, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.2552080142522754e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.2552080142522754e-09, "signal/format_reward/centered_abs_mean": 0.02683919295668602, "signal/format_reward/group_std_mean": 0.05439011082053184, "signal/format_reward/group_zero_std_frac": 0.7666666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01341959647834301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01341959647834301, "signal/mean_confidence_reward/centered_abs_mean": 0.06722780838608741, "signal/mean_confidence_reward/group_std_mean": 0.09363017976284027, "signal/mean_confidence_reward/group_zero_std_frac": 0.1833333343267441, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.7227805402581e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.7227805402581e-07, "step": 735 }, { "calibration/aurc": 0.07135814786399183, "calibration/batch_distribution_entropy": 0.5937620416887921, "calibration/batch_entropy_100bins": 0.32064189271944316, "calibration/batch_entropy_10bins": 0.5937620416887921, "calibration/batch_entropy_50bins": 0.3774544481669698, "calibration/batch_uniqueness": 0.32722568282924336, "calibration/confidence_entropy": 0.44936877206762604, "calibration/coverage@0%": 0.2635238016710642, "calibration/coverage@1%": 0.35501313366391496, "calibration/coverage@10%": 0.7962148920012707, "calibration/coverage@15%": 0.8760531857395459, "calibration/coverage@20%": 0.8953349265772423, "calibration/coverage@25%": 0.9400240368011715, "calibration/coverage@30%": 0.9630606860158311, "calibration/coverage@5%": 0.5747872021644158, "calibration/distribution_entropy_10": 0.5937620416887921, "calibration/distribution_entropy_100": 0.32064189271944316, "calibration/ece": 0.1270965697419163, "calibration/mean_confidence": 0.7460525301778327, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010329861111111116, "completions/max_length": 3854.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 1144.8279541015625, "completions/mean_terminated_length": 1156.9118408203126, "completions/min_length": 0.0, "completions/min_terminated_length": 350.6, "epoch": 1.7788461538461537, "grad_norm": 0.00032714667031541467, "learning_rate": 1.8028846153846156e-06, "loss": -0.0117, "num_tokens": 1949681606.0, "reward": 1.2939432859420776, "reward_std": 0.12169390916824341, "rewards/accuracy_reward": 0.7418402910232544, "rewards/brier_reward": 0.8563619971275329, "rewards/confidence_one_or_zero": 0.0005208333546761424, "rewards/format_reward": 0.9896701335906982, "rewards/mean_confidence_reward": 0.7095920205116272, "sampling/batch_mean_priority_error": 0.019717013888888878, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16111111111111112, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0026072211097925902, "sampling/priority_kl": 0.030000557750463487, "sampling/priority_scale": 0.7897889913292602, "sampling/prob_entropy": 10.278955268859864, "sampling/prob_max": 5.7226319040637466e-05, "sampling/prob_min": 2.0624370517907664e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7711999893188477, "sampling/prompt_draws_total": 53136.0, "sampling/seen_fraction": 0.9089933156967163, "sampling/unseen_fraction": 0.0910066843032837, "signal/accuracy_reward/centered_abs_mean": 0.11880425363779068, "signal/accuracy_reward/group_std_mean": 0.15754524171352385, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05940212681889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05940212681889534, "signal/advantage_abs_mean": 0.08523241132497787, "signal/advantage_pre_scale_abs_mean": 0.08523241132497787, "signal/advantage_pre_scale_std": 0.17800681591033934, "signal/advantage_std": 0.17800681591033934, "signal/brier_reward/centered_abs_mean": 0.07595813944935799, "signal/brier_reward/group_std_mean": 0.10386582911014557, "signal/brier_reward/group_zero_std_frac": 0.19444445073604583, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037979069724678995, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037979069724678995, "signal/confidence_one_or_zero/centered_abs_mean": 0.0009006076375953853, "signal/confidence_one_or_zero/group_std_mean": 0.001515774242579937, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.006076595596824e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.006076595596824e-09, "signal/format_reward/centered_abs_mean": 0.017822265625, "signal/format_reward/group_std_mean": 0.03541161455214024, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0089111328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0089111328125, "signal/mean_confidence_reward/centered_abs_mean": 0.06562201455235481, "signal/mean_confidence_reward/group_std_mean": 0.08824090212583542, "signal/mean_confidence_reward/group_zero_std_frac": 0.20833333432674409, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.562201065207773e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.562201065207773e-07, "step": 740 }, { "calibration/aurc": 0.044976670240617245, "calibration/batch_distribution_entropy": 0.4711153755218615, "calibration/batch_entropy_100bins": 0.25180322817921036, "calibration/batch_entropy_10bins": 0.4711153755218615, "calibration/batch_entropy_50bins": 0.2964186860704684, "calibration/batch_uniqueness": 0.039686466266251816, "calibration/confidence_entropy": 0.41058957351063724, "calibration/coverage@0%": 0.31086406657963445, "calibration/coverage@1%": 0.3129528394255875, "calibration/coverage@10%": 0.922811956048738, "calibration/coverage@15%": 0.9473006418624891, "calibration/coverage@20%": 0.9738903394255874, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6403679830287206, "calibration/distribution_entropy_10": 0.4711153755218615, "calibration/distribution_entropy_100": 0.25180322817921036, "calibration/ece": 0.10536104765013041, "calibration/mean_confidence": 0.7969752230200176, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00546875, "completions/max_length": 4029.2, "completions/max_terminated_length": 4029.2, "completions/mean_length": 1128.099853515625, "completions/mean_terminated_length": 1134.4380615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 394.2, "epoch": 1.7908653846153846, "grad_norm": 0.00034849203075282276, "learning_rate": 1.7728365384615387e-06, "loss": -0.005, "num_tokens": 1965781412.0, "reward": 1.3357252836227418, "reward_std": 0.0930047869682312, "rewards/accuracy_reward": 0.7889756798744202, "rewards/brier_reward": 0.887928593158722, "rewards/confidence_one_or_zero": 0.0006944444554392249, "rewards/format_reward": 0.9945312380790711, "rewards/mean_confidence_reward": 0.7552994608879089, "sampling/batch_mean_priority_error": 0.014748263888888868, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.175, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0026240932755172254, "sampling/priority_kl": 0.02999989651143551, "sampling/priority_scale": 0.7923730551963672, "sampling/prob_entropy": 10.27895565032959, "sampling/prob_max": 5.7472297339700165e-05, "sampling/prob_min": 2.0643966854549946e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.7831999778747558, "sampling/prompt_draws_total": 53496.0, "sampling/seen_fraction": 0.9109466791152954, "sampling/unseen_fraction": 0.08905332088470459, "signal/accuracy_reward/centered_abs_mean": 0.08330620601773261, "signal/accuracy_reward/group_std_mean": 0.11775979846715927, "signal/accuracy_reward/group_zero_std_frac": 0.6305555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04165310300886631, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04165310300886631, "signal/advantage_abs_mean": 0.06172395497560501, "signal/advantage_pre_scale_abs_mean": 0.06172395497560501, "signal/advantage_pre_scale_std": 0.1456386312842369, "signal/advantage_std": 0.1456386312842369, "signal/brier_reward/centered_abs_mean": 0.05597181022167206, "signal/brier_reward/group_std_mean": 0.08043636679649353, "signal/brier_reward/group_zero_std_frac": 0.30555556416511537, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02798590511083603, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.02798590511083603, "signal/confidence_one_or_zero/centered_abs_mean": 0.0013454860891215503, "signal/confidence_one_or_zero/group_std_mean": 0.003928370773792267, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777671813965, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3454859981720801e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3454859981720801e-08, "signal/format_reward/centered_abs_mean": 0.009792751679196954, "signal/format_reward/group_std_mean": 0.02300018724054098, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004896375839598477, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004896375839598477, "signal/mean_confidence_reward/centered_abs_mean": 0.05380534529685974, "signal/mean_confidence_reward/group_std_mean": 0.07323706895112991, "signal/mean_confidence_reward/group_zero_std_frac": 0.31111111044883727, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.38053438958741e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.38053438958741e-07, "step": 745 }, { "calibration/aurc": 0.11353951064295462, "calibration/batch_distribution_entropy": 0.5589911152512703, "calibration/batch_entropy_100bins": 0.3068109774965393, "calibration/batch_entropy_10bins": 0.5589911152512703, "calibration/batch_entropy_50bins": 0.3611729185489008, "calibration/batch_uniqueness": 0.21741473871920394, "calibration/confidence_entropy": 0.4441730678233955, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.16335078534031414, "calibration/coverage@10%": 0.5125457796204305, "calibration/coverage@15%": 0.7319539847079646, "calibration/coverage@20%": 0.811123733562073, "calibration/coverage@25%": 0.8920657185711904, "calibration/coverage@30%": 0.9080231653797011, "calibration/coverage@5%": 0.3494851216817216, "calibration/distribution_entropy_10": 0.5589911152512703, "calibration/distribution_entropy_100": 0.3068109774965393, "calibration/ece": 0.10185885955664371, "calibration/mean_confidence": 0.7363732055069778, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111094, "completions/max_length": 3862.2, "completions/max_terminated_length": 3862.2, "completions/mean_length": 1151.9552978515626, "completions/mean_terminated_length": 1160.3269287109374, "completions/min_length": 0.0, "completions/min_terminated_length": 391.4, "epoch": 1.8028846153846154, "grad_norm": 0.0003831425274256617, "learning_rate": 1.7427884615384616e-06, "loss": -0.008, "num_tokens": 1982158337.0, "reward": 1.3099550485610962, "reward_std": 0.11404931545257568, "rewards/accuracy_reward": 0.7524305582046509, "rewards/brier_reward": 0.87466961145401, "rewards/confidence_one_or_zero": 0.00034722223062999547, "rewards/format_reward": 0.9927951455116272, "rewards/mean_confidence_reward": 0.7468029499053955, "sampling/batch_mean_priority_error": 0.013510416666666653, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17777777777777776, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002636962477117777, "sampling/priority_kl": 0.029998420923948287, "sampling/priority_scale": 0.7955237090354785, "sampling/prob_entropy": 10.278936958312988, "sampling/prob_max": 5.7744530931813645e-05, "sampling/prob_min": 2.0657485219999216e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.795199990272522, "sampling/prompt_draws_total": 53856.0, "sampling/seen_fraction": 0.9130800008773804, "sampling/unseen_fraction": 0.08691999912261963, "signal/accuracy_reward/centered_abs_mean": 0.1026584193110466, "signal/accuracy_reward/group_std_mean": 0.14243891835212708, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0513292096555233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0513292096555233, "signal/advantage_abs_mean": 0.07915246114134789, "signal/advantage_pre_scale_abs_mean": 0.07915246114134789, "signal/advantage_pre_scale_std": 0.16908468604087828, "signal/advantage_std": 0.16908468604087828, "signal/brier_reward/centered_abs_mean": 0.07080509662628173, "signal/brier_reward/group_std_mean": 0.09794733822345733, "signal/brier_reward/group_zero_std_frac": 0.27777778208255766, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035402548313140866, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035402548313140866, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006727430387400091, "signal/confidence_one_or_zero/group_std_mean": 0.0019641853868961334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/format_reward/centered_abs_mean": 0.012559678684920073, "signal/format_reward/group_std_mean": 0.02510856781154871, "signal/format_reward/group_zero_std_frac": 0.8916666626930236, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006279839342460037, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006279839342460037, "signal/mean_confidence_reward/centered_abs_mean": 0.060461973398923875, "signal/mean_confidence_reward/group_std_mean": 0.08233949095010758, "signal/mean_confidence_reward/group_zero_std_frac": 0.2916666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.04619685873331e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.04619685873331e-07, "step": 750 }, { "epoch": 1.8028846153846154, "eval_calibration/aurc": 0.12918669119756246, "eval_calibration/batch_distribution_entropy": 0.570947120401169, "eval_calibration/batch_entropy_100bins": 0.31176983812988257, "eval_calibration/batch_entropy_10bins": 0.570947120401169, "eval_calibration/batch_entropy_50bins": 0.36701040905277965, "eval_calibration/batch_uniqueness": 0.22771267764757755, "eval_calibration/confidence_entropy": 0.4321299611272379, "eval_calibration/coverage@0%": 0.0008748906386701663, "eval_calibration/coverage@1%": 0.0008748906386701663, "eval_calibration/coverage@10%": 0.600174978127734, "eval_calibration/coverage@15%": 0.7961504811898513, "eval_calibration/coverage@20%": 0.8958880139982502, "eval_calibration/coverage@25%": 0.9667541557305337, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0008748906386701663, "eval_calibration/distribution_entropy_10": 0.570947120401169, "eval_calibration/distribution_entropy_100": 0.31176983812988257, "eval_calibration/ece": 0.023053368328958688, "eval_calibration/mean_confidence": 0.7554243219597551, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.0078125, "eval_completions/max_length": 3036.3333333333335, "eval_completions/max_terminated_length": 3036.3333333333335, "eval_completions/mean_length": 1114.5510864257812, "eval_completions/mean_terminated_length": 1123.3021850585938, "eval_completions/min_length": 98.16666666666667, "eval_completions/min_terminated_length": 455.0, "eval_loss": 0.0, "eval_num_tokens": 1982158337.0, "eval_reward": 1.2921201586723328, "eval_reward_std": 0.3189353197813034, "eval_rewards/accuracy_reward": 0.7343750099341074, "eval_rewards/brier_reward": 0.8576627671718597, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9921875, "eval_rewards/mean_confidence_reward": 0.7495225568612417, "eval_runtime": 203.2359, "eval_samples_per_second": 4.92, "eval_signal/accuracy_reward/centered_abs_mean": 0.3766276041666667, "eval_signal/accuracy_reward/group_std_mean": 0.43860721588134766, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.18831380208333334, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.18831380208333334, "eval_signal/advantage_abs_mean": 0.26045594612757367, "eval_signal/advantage_pre_scale_abs_mean": 0.26045594612757367, "eval_signal/advantage_pre_scale_std": 0.31735892593860626, "eval_signal/advantage_std": 0.31735892593860626, "eval_signal/brier_reward/centered_abs_mean": 0.1701806883017222, "eval_signal/brier_reward/group_std_mean": 0.23481983691453934, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0850903441508611, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0850903441508611, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.015136718439559141, "eval_signal/format_reward/group_std_mean": 0.044194173688689865, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359219779571, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359219779571, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.18361271917819977, "eval_signal/mean_confidence_reward/group_std_mean": 0.23008321225643158, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.8361271637938141e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.8361271637938141e-06, "eval_steps_per_second": 0.03, "step": 750 }, { "epoch": 1.8028846153846154, "step": 750, "train_probe_calibration/aurc": 0.09515691675342279, "train_probe_calibration/batch_distribution_entropy": 0.5559239476940845, "train_probe_calibration/batch_entropy_100bins": 0.30124399218193226, "train_probe_calibration/batch_entropy_10bins": 0.5559239476940845, "train_probe_calibration/batch_entropy_50bins": 0.3546195535096131, "train_probe_calibration/batch_uniqueness": 0.18609991806998472, "train_probe_calibration/confidence_entropy": 0.42859308955033887, "train_probe_calibration/coverage@0%": 0.0008726003490401396, "train_probe_calibration/coverage@1%": 0.0008726003490401396, "train_probe_calibration/coverage@10%": 0.7705061082024433, "train_probe_calibration/coverage@15%": 0.8403141361256544, "train_probe_calibration/coverage@20%": 0.9101221640488656, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0008726003490401396, "train_probe_calibration/distribution_entropy_10": 0.5559239476940845, "train_probe_calibration/distribution_entropy_100": 0.30124399218193226, "train_probe_calibration/ece": 0.026527050610820127, "train_probe_calibration/mean_confidence": 0.7626527050610821, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.007638888888888899, "train_probe_completions/max_length": 3220.3333333333335, "train_probe_completions/max_terminated_length": 3220.3333333333335, "train_probe_completions/mean_length": 1125.8380330403645, "train_probe_completions/mean_terminated_length": 1134.5219319661458, "train_probe_completions/min_length": 124.33333333333333, "train_probe_completions/min_terminated_length": 399.1666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 1982158337.0, "train_probe_reward": 1.3254329760869343, "train_probe_reward_std": 0.2898659110069275, "train_probe_rewards/accuracy_reward": 0.7777777711550394, "train_probe_rewards/brier_reward": 0.8782812555631002, "train_probe_rewards/confidence_one_or_zero": 0.0008680555814256271, "train_probe_rewards/format_reward": 0.9947916666666666, "train_probe_rewards/mean_confidence_reward": 0.7586805323759714, "train_probe_runtime": 198.345, "train_probe_samples_per_second": 5.042, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3351779480775197, "train_probe_signal/accuracy_reward/group_std_mean": 0.4128282864888509, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16758897403875986, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.16758897403875986, "train_probe_signal/advantage_abs_mean": 0.2279451290766398, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2279451290766398, "train_probe_signal/advantage_pre_scale_std": 0.2899184872706731, "train_probe_signal/advantage_std": 0.2899184872706731, "train_probe_signal/brier_reward/centered_abs_mean": 0.14683595051368079, "train_probe_signal/brier_reward/group_std_mean": 0.2066178321838379, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07341797525684039, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07341797525684039, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/format_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/format_reward/group_zero_std_frac": 0.8333333532015482, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.17816295226415, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.2219566653172175, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7816294833513287e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7816294833513287e-06, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.08235913041108442, "calibration/batch_distribution_entropy": 0.5500182521390489, "calibration/batch_entropy_100bins": 0.2986105969772843, "calibration/batch_entropy_10bins": 0.5500182521390489, "calibration/batch_entropy_50bins": 0.351519563282679, "calibration/batch_uniqueness": 0.19279256086586521, "calibration/confidence_entropy": 0.4438144496211079, "calibration/coverage@0%": 0.11076115485564304, "calibration/coverage@1%": 0.11076115485564304, "calibration/coverage@10%": 0.7177205038646448, "calibration/coverage@15%": 0.8222280028529407, "calibration/coverage@20%": 0.926595744680851, "calibration/coverage@25%": 0.9627659574468085, "calibration/coverage@30%": 0.9627659574468085, "calibration/coverage@5%": 0.3648423271930728, "calibration/distribution_entropy_10": 0.5500182521390489, "calibration/distribution_entropy_100": 0.2986105969772843, "calibration/ece": 0.09704431335458163, "calibration/mean_confidence": 0.7434555824273797, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00746527777777779, "completions/max_length": 3774.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 1096.2972412109375, "completions/mean_terminated_length": 1104.5652099609374, "completions/min_length": 0.0, "completions/min_terminated_length": 383.8, "epoch": 1.8149038461538463, "grad_norm": 0.000413256959291175, "learning_rate": 1.7127403846153848e-06, "loss": -0.0082, "num_tokens": 1997896449.0, "reward": 1.324518609046936, "reward_std": 0.11593645066022873, "rewards/accuracy_reward": 0.7758680582046509, "rewards/brier_reward": 0.8807059168815613, "rewards/confidence_one_or_zero": 0.0006944444496184588, "rewards/format_reward": 0.9924479007720948, "rewards/mean_confidence_reward": 0.7667656183242798, "sampling/batch_mean_priority_error": 0.013126805555555541, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.14166666666666666, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0026503289118409157, "sampling/priority_kl": 0.03000011146068573, "sampling/priority_scale": 0.7983648717170582, "sampling/prob_entropy": 10.278961563110352, "sampling/prob_max": 5.800310827908106e-05, "sampling/prob_min": 2.0673956169048324e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8072000026702881, "sampling/prompt_draws_total": 54216.0, "sampling/seen_fraction": 0.9150466680526733, "sampling/unseen_fraction": 0.08495333194732665, "signal/accuracy_reward/centered_abs_mean": 0.10064018815755844, "signal/accuracy_reward/group_std_mean": 0.13902317732572556, "signal/accuracy_reward/group_zero_std_frac": 0.5777777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05032009407877922, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05032009407877922, "signal/advantage_abs_mean": 0.07687721401453018, "signal/advantage_pre_scale_abs_mean": 0.07687721401453018, "signal/advantage_pre_scale_std": 0.1724386215209961, "signal/advantage_std": 0.1724386215209961, "signal/brier_reward/centered_abs_mean": 0.06619351655244828, "signal/brier_reward/group_std_mean": 0.09618951678276062, "signal/brier_reward/group_zero_std_frac": 0.3027777820825577, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03309675827622414, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03309675827622414, "signal/confidence_one_or_zero/centered_abs_mean": 0.0012803819496184587, "signal/confidence_one_or_zero/group_std_mean": 0.002897548582404852, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.2803818094653252e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.2803818094653252e-08, "signal/format_reward/centered_abs_mean": 0.013926866464316845, "signal/format_reward/group_std_mean": 0.03264928236603737, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0069634332321584225, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0069634332321584225, "signal/mean_confidence_reward/centered_abs_mean": 0.054513295739889146, "signal/mean_confidence_reward/group_std_mean": 0.07755149900913239, "signal/mean_confidence_reward/group_zero_std_frac": 0.31666667461395265, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.451329343486577e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.451329343486577e-07, "step": 755 }, { "calibration/aurc": 0.15240919905327432, "calibration/batch_distribution_entropy": 0.4942784512127184, "calibration/batch_entropy_100bins": 0.26749138967220476, "calibration/batch_entropy_10bins": 0.4942784512127184, "calibration/batch_entropy_50bins": 0.3148865359476951, "calibration/batch_uniqueness": 0.09131136876063979, "calibration/confidence_entropy": 0.4264967519358572, "calibration/coverage@0%": 0.19791259415198284, "calibration/coverage@1%": 0.2005235602094241, "calibration/coverage@10%": 0.4353681520038709, "calibration/coverage@15%": 0.5631992935300282, "calibration/coverage@20%": 0.6284375861118765, "calibration/coverage@25%": 0.6861228988702123, "calibration/coverage@30%": 0.7128244695508406, "calibration/coverage@5%": 0.4264442674894352, "calibration/distribution_entropy_10": 0.4942784512127184, "calibration/distribution_entropy_100": 0.26749138967220476, "calibration/ece": 0.1297135731642174, "calibration/mean_confidence": 0.7767179014679333, "calibration/unique_confidence_per_question": 0.01979166666666667, "calibration/unique_confidences": 7.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010156249999999978, "completions/max_length": 3959.8, "completions/max_terminated_length": 3959.8, "completions/mean_length": 1118.2989501953125, "completions/mean_terminated_length": 1129.805322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 319.8, "epoch": 1.8269230769230769, "grad_norm": 0.000439549854490906, "learning_rate": 1.682692307692308e-06, "loss": -0.0094, "num_tokens": 2013854613.0, "reward": 1.2982789754867554, "reward_std": 0.1340230643749237, "rewards/accuracy_reward": 0.7386284828186035, "rewards/brier_reward": 0.868070662021637, "rewards/confidence_one_or_zero": 0.0008680555678438395, "rewards/format_reward": 0.9898437619209289, "rewards/mean_confidence_reward": 0.7592884659767151, "sampling/batch_mean_priority_error": 0.01165805555555555, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002661566250026226, "sampling/priority_kl": 0.030000675842165946, "sampling/priority_scale": 0.8004713356727734, "sampling/prob_entropy": 10.27895393371582, "sampling/prob_max": 5.822636740049347e-05, "sampling/prob_min": 2.069712099910248e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8191999912261962, "sampling/prompt_draws_total": 54576.0, "sampling/seen_fraction": 0.916700005531311, "sampling/unseen_fraction": 0.08329999446868896, "signal/accuracy_reward/centered_abs_mean": 0.1171820729970932, "signal/accuracy_reward/group_std_mean": 0.15496748685836792, "signal/accuracy_reward/group_zero_std_frac": 0.5583333492279052, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0585910364985466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0585910364985466, "signal/advantage_abs_mean": 0.09350767433643341, "signal/advantage_pre_scale_abs_mean": 0.09350767433643341, "signal/advantage_pre_scale_std": 0.19366811215877533, "signal/advantage_std": 0.19366811215877533, "signal/brier_reward/centered_abs_mean": 0.07623469531536102, "signal/brier_reward/group_std_mean": 0.10736920237541199, "signal/brier_reward/group_zero_std_frac": 0.31666667461395265, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03811734765768051, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03811734765768051, "signal/confidence_one_or_zero/centered_abs_mean": 0.0016493055154569447, "signal/confidence_one_or_zero/group_std_mean": 0.004259948246181011, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777671813965, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6493054033617227e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6493054033617227e-08, "signal/format_reward/centered_abs_mean": 0.01809353344142437, "signal/format_reward/group_std_mean": 0.04034832753241062, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009046766720712186, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009046766720712186, "signal/mean_confidence_reward/centered_abs_mean": 0.06380566284060478, "signal/mean_confidence_reward/group_std_mean": 0.08879230618476867, "signal/mean_confidence_reward/group_zero_std_frac": 0.3388888895511627, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.38056599200354e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.38056599200354e-07, "step": 760 }, { "calibration/aurc": 0.14281510974146783, "calibration/batch_distribution_entropy": 0.47071555151756134, "calibration/batch_entropy_100bins": 0.2578550493311225, "calibration/batch_entropy_10bins": 0.47071555151756134, "calibration/batch_entropy_50bins": 0.30354279201285356, "calibration/batch_uniqueness": 0.03149784874336764, "calibration/confidence_entropy": 0.42275016873700616, "calibration/coverage@0%": 0.0015665796344647518, "calibration/coverage@1%": 0.0015665796344647518, "calibration/coverage@10%": 0.4061340976251519, "calibration/coverage@15%": 0.599683554376951, "calibration/coverage@20%": 0.6860280201828867, "calibration/coverage@25%": 0.9121581419441547, "calibration/coverage@30%": 0.929390517923267, "calibration/coverage@5%": 0.3643477282163753, "calibration/distribution_entropy_10": 0.47071555151756134, "calibration/distribution_entropy_100": 0.2578550493311225, "calibration/ece": 0.10513159295486174, "calibration/mean_confidence": 0.7598361444108246, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012760416666666673, "completions/max_length": 3986.8, "completions/max_terminated_length": 3986.8, "completions/mean_length": 1143.3353515625, "completions/mean_terminated_length": 1158.1183837890626, "completions/min_length": 0.0, "completions/min_terminated_length": 365.6, "epoch": 1.8389423076923077, "grad_norm": 0.00036109599750488997, "learning_rate": 1.6526442307692309e-06, "loss": -0.0147, "num_tokens": 2030106252.0, "reward": 1.3092787742614747, "reward_std": 0.12436873316764832, "rewards/accuracy_reward": 0.7537326335906982, "rewards/brier_reward": 0.8776569604873657, "rewards/confidence_one_or_zero": 0.0014756944845430553, "rewards/format_reward": 0.9871527910232544, "rewards/mean_confidence_reward": 0.7626976013183594, "sampling/batch_mean_priority_error": 0.0149636111111111, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.17222222222222222, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0026781358756124973, "sampling/priority_kl": 0.029999176412820815, "sampling/priority_scale": 0.803554934146814, "sampling/prob_entropy": 10.27893524169922, "sampling/prob_max": 5.850200104760006e-05, "sampling/prob_min": 2.0712401601485907e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8312000036239624, "sampling/prompt_draws_total": 54936.0, "sampling/seen_fraction": 0.9186333417892456, "sampling/unseen_fraction": 0.0813666582107544, "signal/accuracy_reward/centered_abs_mean": 0.0984754778444767, "signal/accuracy_reward/group_std_mean": 0.1380874663591385, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04923773892223835, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04923773892223835, "signal/advantage_abs_mean": 0.08254304528236389, "signal/advantage_pre_scale_abs_mean": 0.08254304528236389, "signal/advantage_pre_scale_std": 0.1854351818561554, "signal/advantage_std": 0.1854351818561554, "signal/brier_reward/centered_abs_mean": 0.07168641835451126, "signal/brier_reward/group_std_mean": 0.1029394656419754, "signal/brier_reward/group_zero_std_frac": 0.325, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03584320917725563, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03584320917725563, "signal/confidence_one_or_zero/centered_abs_mean": 0.0027506509562954307, "signal/confidence_one_or_zero/group_std_mean": 0.006121202511712909, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222089767456, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.7506509070462926e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.7506509070462926e-08, "signal/format_reward/centered_abs_mean": 0.0229600690305233, "signal/format_reward/group_std_mean": 0.0469165489077568, "signal/format_reward/group_zero_std_frac": 0.7972222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01148003451526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01148003451526165, "signal/mean_confidence_reward/centered_abs_mean": 0.060264953225851056, "signal/mean_confidence_reward/group_std_mean": 0.08513574749231338, "signal/mean_confidence_reward/group_zero_std_frac": 0.34722222089767457, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.026494929756154e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.026494929756154e-07, "step": 765 }, { "calibration/aurc": 0.0969113642472206, "calibration/batch_distribution_entropy": 0.5355198771027292, "calibration/batch_entropy_100bins": 0.2940913247107916, "calibration/batch_entropy_10bins": 0.5355198771027292, "calibration/batch_entropy_50bins": 0.3461995490917764, "calibration/batch_uniqueness": 0.17352555787323637, "calibration/confidence_entropy": 0.4251987915992827, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.39703934791792184, "calibration/coverage@10%": 0.5134270126524422, "calibration/coverage@15%": 0.7232000000000001, "calibration/coverage@20%": 0.8891553133514986, "calibration/coverage@25%": 0.9139211625794733, "calibration/coverage@30%": 0.9462466848319708, "calibration/coverage@5%": 0.48544393797740987, "calibration/distribution_entropy_10": 0.5355198771027292, "calibration/distribution_entropy_100": 0.2940913247107916, "calibration/ece": 0.12311490454704932, "calibration/mean_confidence": 0.7432522071893854, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01918402777777779, "completions/max_length": 3894.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 1133.996337890625, "completions/mean_terminated_length": 1156.170947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 332.4, "epoch": 1.8509615384615383, "grad_norm": 0.0003580257180146873, "learning_rate": 1.622596153846154e-06, "loss": -0.024, "num_tokens": 2046281922.0, "reward": 1.2793505191802979, "reward_std": 0.13908308297395705, "rewards/accuracy_reward": 0.739062488079071, "rewards/brier_reward": 0.8388078927993774, "rewards/confidence_one_or_zero": 0.000520833337213844, "rewards/format_reward": 0.9808159708976746, "rewards/mean_confidence_reward": 0.739722204208374, "sampling/batch_mean_priority_error": 0.029795208333333316, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.14444444444444446, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0026988604106009005, "sampling/priority_kl": 0.03000030443072319, "sampling/priority_scale": 0.8066103636985644, "sampling/prob_entropy": 10.278947448730468, "sampling/prob_max": 5.877538642380386e-05, "sampling/prob_min": 2.0727198352687994e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8432000160217286, "sampling/prompt_draws_total": 55296.0, "sampling/seen_fraction": 0.9205333352088928, "sampling/unseen_fraction": 0.07946666479110717, "signal/accuracy_reward/centered_abs_mean": 0.09299045205116271, "signal/accuracy_reward/group_std_mean": 0.14082872718572617, "signal/accuracy_reward/group_zero_std_frac": 0.5194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04649522602558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04649522602558136, "signal/advantage_abs_mean": 0.08546854555606842, "signal/advantage_pre_scale_abs_mean": 0.08546854555606842, "signal/advantage_pre_scale_std": 0.19609870314598082, "signal/advantage_std": 0.19609870314598082, "signal/brier_reward/centered_abs_mean": 0.0788871854543686, "signal/brier_reward/group_std_mean": 0.11534269303083419, "signal/brier_reward/group_zero_std_frac": 0.29166666567325594, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0394435927271843, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0394435927271843, "signal/confidence_one_or_zero/centered_abs_mean": 0.0009765624767169356, "signal/confidence_one_or_zero/group_std_mean": 0.002295762859284878, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.765624042756826e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.765624042756826e-09, "signal/format_reward/centered_abs_mean": 0.03418511338531971, "signal/format_reward/group_std_mean": 0.0701390728354454, "signal/format_reward/group_zero_std_frac": 0.6916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017092556692659855, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017092556692659855, "signal/mean_confidence_reward/centered_abs_mean": 0.06734917759895324, "signal/mean_confidence_reward/group_std_mean": 0.09585417956113815, "signal/mean_confidence_reward/group_zero_std_frac": 0.31111110746860504, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.734917519679584e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.734917519679584e-07, "step": 770 }, { "calibration/aurc": 0.12452181176719923, "calibration/batch_distribution_entropy": 0.5645438819251132, "calibration/batch_entropy_100bins": 0.30598169572049205, "calibration/batch_entropy_10bins": 0.5645438819251132, "calibration/batch_entropy_50bins": 0.36019670145980476, "calibration/batch_uniqueness": 0.21593060217032994, "calibration/confidence_entropy": 0.43755199598394123, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.5315212310817679, "calibration/coverage@15%": 0.6500954204779696, "calibration/coverage@20%": 0.7159204687677779, "calibration/coverage@25%": 0.7367021276595744, "calibration/coverage@30%": 0.9347236294044805, "calibration/coverage@5%": 0.3420548763580603, "calibration/distribution_entropy_10": 0.5645438819251132, "calibration/distribution_entropy_100": 0.30598169572049205, "calibration/ece": 0.1157229115376667, "calibration/mean_confidence": 0.7470467694516723, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020572916666666698, "completions/max_length": 4037.2, "completions/max_terminated_length": 4037.2, "completions/mean_length": 1137.8390869140626, "completions/mean_terminated_length": 1161.6483642578125, "completions/min_length": 0.0, "completions/min_terminated_length": 346.0, "epoch": 1.8629807692307692, "grad_norm": 0.0003609874111134559, "learning_rate": 1.592548076923077e-06, "loss": -0.0248, "num_tokens": 2062517284.0, "reward": 1.2978382110595703, "reward_std": 0.16978729665279388, "rewards/accuracy_reward": 0.757118034362793, "rewards/brier_reward": 0.859116506576538, "rewards/confidence_one_or_zero": 0.0005208333430346101, "rewards/format_reward": 0.9794270873069764, "rewards/mean_confidence_reward": 0.7398263812065125, "sampling/batch_mean_priority_error": 0.015230972222222208, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002722617890685797, "sampling/priority_kl": 0.030000732839107515, "sampling/priority_scale": 0.8091420352226123, "sampling/prob_entropy": 10.278940010070801, "sampling/prob_max": 5.902362318011001e-05, "sampling/prob_min": 2.07467583095422e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8552000045776367, "sampling/prompt_draws_total": 55656.0, "sampling/seen_fraction": 0.9221866846084594, "sampling/unseen_fraction": 0.07781331539154053, "signal/accuracy_reward/centered_abs_mean": 0.13017577975988387, "signal/accuracy_reward/group_std_mean": 0.1837555378675461, "signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06508788987994193, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06508788987994193, "signal/advantage_abs_mean": 0.10840746015310287, "signal/advantage_pre_scale_abs_mean": 0.10840746015310287, "signal/advantage_pre_scale_std": 0.22453723847866058, "signal/advantage_std": 0.22453723847866058, "signal/brier_reward/centered_abs_mean": 0.0910664215683937, "signal/brier_reward/group_std_mean": 0.13270987272262574, "signal/brier_reward/group_zero_std_frac": 0.2333333373069763, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04553321078419685, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04553321078419685, "signal/confidence_one_or_zero/centered_abs_mean": 0.0009657118120230734, "signal/confidence_one_or_zero/group_std_mean": 0.001996822049841285, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.6571170615789e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.6571170615789e-09, "signal/format_reward/centered_abs_mean": 0.0372775599360466, "signal/format_reward/group_std_mean": 0.07882281616330147, "signal/format_reward/group_zero_std_frac": 0.6472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0186387799680233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0186387799680233, "signal/mean_confidence_reward/centered_abs_mean": 0.07244938015937805, "signal/mean_confidence_reward/group_std_mean": 0.1059411883354187, "signal/mean_confidence_reward/group_zero_std_frac": 0.2472222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.244937705763732e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.244937705763732e-07, "step": 775 }, { "calibration/aurc": 0.11927318188747289, "calibration/batch_distribution_entropy": 0.5832886490653397, "calibration/batch_entropy_100bins": 0.32257344192265663, "calibration/batch_entropy_10bins": 0.5832886490653397, "calibration/batch_entropy_50bins": 0.37972823663678856, "calibration/batch_uniqueness": 0.2869097012867606, "calibration/confidence_entropy": 0.4442802120811054, "calibration/coverage@0%": 0.0026913712699970275, "calibration/coverage@1%": 0.0026913712699970275, "calibration/coverage@10%": 0.6345159195735076, "calibration/coverage@15%": 0.7238751797743481, "calibration/coverage@20%": 0.7616195582547809, "calibration/coverage@25%": 0.8136705538510954, "calibration/coverage@30%": 0.8939209882708932, "calibration/coverage@5%": 0.398541891975399, "calibration/distribution_entropy_10": 0.5832886490653397, "calibration/distribution_entropy_100": 0.32257344192265663, "calibration/ece": 0.124093627094425, "calibration/mean_confidence": 0.7332632525470724, "calibration/unique_confidence_per_question": 0.025, "calibration/unique_confidences": 9.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02274305555555558, "completions/max_length": 3808.8, "completions/max_terminated_length": 3808.8, "completions/mean_length": 1098.0076538085937, "completions/mean_terminated_length": 1123.6128173828124, "completions/min_length": 0.0, "completions/min_terminated_length": 338.4, "epoch": 1.875, "grad_norm": 0.00035296386340633035, "learning_rate": 1.5625e-06, "loss": -0.0281, "num_tokens": 2078260412.0, "reward": 1.2954104661941528, "reward_std": 0.16067642271518706, "rewards/accuracy_reward": 0.7480902791023254, "rewards/brier_reward": 0.8654589772224426, "rewards/confidence_one_or_zero": 0.0014756944379769265, "rewards/format_reward": 0.9772569537162781, "rewards/mean_confidence_reward": 0.739305567741394, "sampling/batch_mean_priority_error": 0.016687638888888877, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11944444444444444, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0027397376950830223, "sampling/priority_kl": 0.029999836534261703, "sampling/priority_scale": 0.8112623870139941, "sampling/prob_entropy": 10.27894859313965, "sampling/prob_max": 5.925482328166254e-05, "sampling/prob_min": 2.0771039271494373e-05, "sampling/prompt_draws_max": 7.0, "sampling/prompt_draws_mean": 1.8672000169754028, "sampling/prompt_draws_total": 56016.0, "sampling/seen_fraction": 0.9236533403396606, "sampling/unseen_fraction": 0.07634665966033935, "signal/accuracy_reward/centered_abs_mean": 0.10810547024011612, "signal/accuracy_reward/group_std_mean": 0.15696669220924378, "signal/accuracy_reward/group_zero_std_frac": 0.4888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05405273512005806, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05405273512005806, "signal/advantage_abs_mean": 0.09703816920518875, "signal/advantage_pre_scale_abs_mean": 0.09703816920518875, "signal/advantage_pre_scale_std": 0.21690228283405305, "signal/advantage_std": 0.21690228283405305, "signal/brier_reward/centered_abs_mean": 0.08053490072488785, "signal/brier_reward/group_std_mean": 0.1245546817779541, "signal/brier_reward/group_zero_std_frac": 0.22777777910232544, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040267450362443925, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.040267450362443925, "signal/confidence_one_or_zero/centered_abs_mean": 0.0024576822528615595, "signal/confidence_one_or_zero/group_std_mean": 0.004898328334093094, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777671813965, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.4576820578658953e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.4576820578658953e-08, "signal/format_reward/centered_abs_mean": 0.04134114608168602, "signal/format_reward/group_std_mean": 0.08592917323112488, "signal/format_reward/group_zero_std_frac": 0.6250000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02067057304084301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02067057304084301, "signal/mean_confidence_reward/centered_abs_mean": 0.06903146356344222, "signal/mean_confidence_reward/group_std_mean": 0.1041269913315773, "signal/mean_confidence_reward/group_zero_std_frac": 0.23611111044883729, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.903146299919172e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.903146299919172e-07, "step": 780 }, { "calibration/aurc": 0.0742543345201262, "calibration/batch_distribution_entropy": 0.589344371392418, "calibration/batch_entropy_100bins": 0.32672389653760925, "calibration/batch_entropy_10bins": 0.589344371392418, "calibration/batch_entropy_50bins": 0.38461408465571767, "calibration/batch_uniqueness": 0.31280304906023604, "calibration/confidence_entropy": 0.44947390020650085, "calibration/coverage@0%": 0.45254355752482356, "calibration/coverage@1%": 0.4780291397957228, "calibration/coverage@10%": 0.7283491565285981, "calibration/coverage@15%": 0.7909260581440964, "calibration/coverage@20%": 0.8653551912568307, "calibration/coverage@25%": 0.8749551912568307, "calibration/coverage@30%": 0.9204109289617486, "calibration/coverage@5%": 0.6444980282635661, "calibration/distribution_entropy_10": 0.589344371392418, "calibration/distribution_entropy_100": 0.32672389653760925, "calibration/ece": 0.1305743347675779, "calibration/mean_confidence": 0.7089313602672528, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03116319444444444, "completions/max_length": 3744.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 1160.1212890625, "completions/mean_terminated_length": 1197.4378662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 397.8, "epoch": 1.8870192307692308, "grad_norm": 0.00033553142566233873, "learning_rate": 1.5324519230769232e-06, "loss": -0.0351, "num_tokens": 2094745393.0, "reward": 1.2564839124679565, "reward_std": 0.17908986806869506, "rewards/accuracy_reward": 0.6998263955116272, "rewards/brier_reward": 0.8443773746490478, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9687499880790711, "rewards/mean_confidence_reward": 0.7083628416061402, "sampling/batch_mean_priority_error": 0.021218819444444433, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11666666666666667, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002760501578450203, "sampling/priority_kl": 0.029999128356575967, "sampling/priority_scale": 0.8130677400855347, "sampling/prob_entropy": 10.278962326049804, "sampling/prob_max": 5.946964593022131e-05, "sampling/prob_min": 2.066538181679789e-05, "sampling/prompt_draws_max": 7.2, "sampling/prompt_draws_mean": 1.8791999816894531, "sampling/prompt_draws_total": 56376.0, "sampling/seen_fraction": 0.925, "sampling/unseen_fraction": 0.075, "signal/accuracy_reward/centered_abs_mean": 0.11258680373430252, "signal/accuracy_reward/group_std_mean": 0.16565240919589996, "signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05629340186715126, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05629340186715126, "signal/advantage_abs_mean": 0.11116953194141388, "signal/advantage_pre_scale_abs_mean": 0.11116953194141388, "signal/advantage_pre_scale_std": 0.23421459197998046, "signal/advantage_std": 0.23421459197998046, "signal/brier_reward/centered_abs_mean": 0.09295912683010102, "signal/brier_reward/group_std_mean": 0.1409787118434906, "signal/brier_reward/group_zero_std_frac": 0.21666666567325593, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04647956341505051, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.04647956341505051, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.05329861119389534, "signal/format_reward/group_std_mean": 0.10066336393356323, "signal/format_reward/group_zero_std_frac": 0.5888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02664930559694767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02664930559694767, "signal/mean_confidence_reward/centered_abs_mean": 0.07824294120073319, "signal/mean_confidence_reward/group_std_mean": 0.11478137820959092, "signal/mean_confidence_reward/group_zero_std_frac": 0.2222222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 7.824294129932241e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 7.824294129932241e-07, "step": 785 }, { "calibration/aurc": 0.06961830567263416, "calibration/batch_distribution_entropy": 0.5126674195657974, "calibration/batch_entropy_100bins": 0.28559755646404594, "calibration/batch_entropy_10bins": 0.5126674195657974, "calibration/batch_entropy_50bins": 0.3362008225397263, "calibration/batch_uniqueness": 0.13931816006468628, "calibration/confidence_entropy": 0.42045557884474016, "calibration/coverage@0%": 0.1750028368794326, "calibration/coverage@1%": 0.4881517730496454, "calibration/coverage@10%": 0.6925202687569989, "calibration/coverage@15%": 0.8557704804304482, "calibration/coverage@20%": 0.9347311367850519, "calibration/coverage@25%": 0.9788617886178862, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5073219858156028, "calibration/distribution_entropy_10": 0.5126674195657974, "calibration/distribution_entropy_100": 0.28559755646404594, "calibration/ece": 0.14474426545519645, "calibration/mean_confidence": 0.7469527089649397, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022048611111111116, "completions/max_length": 3817.6, "completions/max_terminated_length": 3817.6, "completions/mean_length": 1154.2109619140624, "completions/mean_terminated_length": 1180.3725341796876, "completions/min_length": 0.0, "completions/min_terminated_length": 356.6, "epoch": 1.8990384615384617, "grad_norm": 0.00026942059048451483, "learning_rate": 1.5024038461538462e-06, "loss": -0.0259, "num_tokens": 2111142735.0, "reward": 1.2929217100143433, "reward_std": 0.1408931255340576, "rewards/accuracy_reward": 0.7424479126930237, "rewards/brier_reward": 0.8654301285743713, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9779513955116272, "rewards/mean_confidence_reward": 0.7021756172180176, "sampling/batch_mean_priority_error": 0.02039062499999999, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.16666666666666669, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0027823722921311855, "sampling/priority_kl": 0.03000016063451767, "sampling/priority_scale": 0.8168170273536817, "sampling/prob_entropy": 10.278951454162598, "sampling/prob_max": 5.9781748859677464e-05, "sampling/prob_min": 2.014181300182827e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.8911999940872193, "sampling/prompt_draws_total": 56736.0, "sampling/seen_fraction": 0.9269533276557922, "sampling/unseen_fraction": 0.07304667234420777, "signal/accuracy_reward/centered_abs_mean": 0.09775933176279068, "signal/accuracy_reward/group_std_mean": 0.14343787282705306, "signal/accuracy_reward/group_zero_std_frac": 0.5361111342906952, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04887966588139534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04887966588139534, "signal/advantage_abs_mean": 0.08402638137340546, "signal/advantage_pre_scale_abs_mean": 0.08402638137340546, "signal/advantage_pre_scale_std": 0.19729985892772675, "signal/advantage_std": 0.19729985892772675, "signal/brier_reward/centered_abs_mean": 0.07575573846697807, "signal/brier_reward/group_std_mean": 0.1154901534318924, "signal/brier_reward/group_zero_std_frac": 0.26666666865348815, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.037877869233489035, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.037877869233489035, "signal/confidence_one_or_zero/centered_abs_mean": 0.0004937065881676972, "signal/confidence_one_or_zero/group_std_mean": 0.001174198230728507, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.9370658672387435e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.9370658672387435e-09, "signal/format_reward/centered_abs_mean": 0.03956163190305233, "signal/format_reward/group_std_mean": 0.07983514145016671, "signal/format_reward/group_zero_std_frac": 0.6611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019780815951526164, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.019780815951526164, "signal/mean_confidence_reward/centered_abs_mean": 0.06639526039361954, "signal/mean_confidence_reward/group_std_mean": 0.09458223283290863, "signal/mean_confidence_reward/group_zero_std_frac": 0.28611111640930176, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.639525736318319e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.639525736318319e-07, "step": 790 }, { "calibration/aurc": 0.10809945506791305, "calibration/batch_distribution_entropy": 0.51655803839253, "calibration/batch_entropy_100bins": 0.2764817265817528, "calibration/batch_entropy_10bins": 0.51655803839253, "calibration/batch_entropy_50bins": 0.3254698150951826, "calibration/batch_uniqueness": 0.10905104035191324, "calibration/confidence_entropy": 0.4123756179040833, "calibration/coverage@0%": 0.16886543535620052, "calibration/coverage@1%": 0.18258575197889182, "calibration/coverage@10%": 0.5745128779395297, "calibration/coverage@15%": 0.7417426545086119, "calibration/coverage@20%": 0.7608465608465609, "calibration/coverage@25%": 0.9472339954015346, "calibration/coverage@30%": 0.9883597883597883, "calibration/coverage@5%": 0.509484882418813, "calibration/distribution_entropy_10": 0.51655803839253, "calibration/distribution_entropy_100": 0.2764817265817528, "calibration/ece": 0.11734529183564894, "calibration/mean_confidence": 0.7646222394616007, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018142361111111095, "completions/max_length": 3878.4, "completions/max_terminated_length": 3878.4, "completions/mean_length": 1139.3298095703126, "completions/mean_terminated_length": 1160.3458251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 348.0, "epoch": 1.9110576923076923, "grad_norm": 0.0003292847832199186, "learning_rate": 1.4723557692307693e-06, "loss": -0.021, "num_tokens": 2127333094.0, "reward": 1.3033838033676148, "reward_std": 0.13692439794540406, "rewards/accuracy_reward": 0.7607638835906982, "rewards/brier_reward": 0.8641319513320923, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9818576455116272, "rewards/mean_confidence_reward": 0.7087239623069763, "sampling/batch_mean_priority_error": 0.01860763888888887, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0028058627154678105, "sampling/priority_kl": 0.02999955378472805, "sampling/priority_scale": 0.8200449288124219, "sampling/prob_entropy": 10.278947067260741, "sampling/prob_max": 6.007041083648801e-05, "sampling/prob_min": 2.0152244542259722e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9031999826431274, "sampling/prompt_draws_total": 57096.0, "sampling/seen_fraction": 0.9286933422088623, "sampling/unseen_fraction": 0.07130665779113769, "signal/accuracy_reward/centered_abs_mean": 0.10312500149011612, "signal/accuracy_reward/group_std_mean": 0.1497005671262741, "signal/accuracy_reward/group_zero_std_frac": 0.5250000178813934, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05156250074505806, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05156250074505806, "signal/advantage_abs_mean": 0.08454073220491409, "signal/advantage_pre_scale_abs_mean": 0.08454073220491409, "signal/advantage_pre_scale_std": 0.19431009292602539, "signal/advantage_std": 0.19431009292602539, "signal/brier_reward/centered_abs_mean": 0.07752134352922439, "signal/brier_reward/group_std_mean": 0.11462950706481934, "signal/brier_reward/group_zero_std_frac": 0.25833333730697633, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.038760671764612196, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.038760671764612196, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.032470703125, "signal/format_reward/group_std_mean": 0.06638929843902588, "signal/format_reward/group_zero_std_frac": 0.7138888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0162353515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0162353515625, "signal/mean_confidence_reward/centered_abs_mean": 0.0648546002805233, "signal/mean_confidence_reward/group_std_mean": 0.09345088601112365, "signal/mean_confidence_reward/group_zero_std_frac": 0.26944445371627807, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.485460176008928e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.485460176008928e-07, "step": 795 }, { "calibration/aurc": 0.08310682378745267, "calibration/batch_distribution_entropy": 0.5474554567180643, "calibration/batch_entropy_100bins": 0.2976009201485093, "calibration/batch_entropy_10bins": 0.5474554567180643, "calibration/batch_entropy_50bins": 0.3503309880562792, "calibration/batch_uniqueness": 0.1914895892966749, "calibration/confidence_entropy": 0.4297139293492359, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.1569553805774278, "calibration/coverage@10%": 0.5246396339494996, "calibration/coverage@15%": 0.900323914456959, "calibration/coverage@20%": 0.9470198799550718, "calibration/coverage@25%": 0.9659520529071368, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.47196373180625156, "calibration/distribution_entropy_10": 0.5474554567180643, "calibration/distribution_entropy_100": 0.2976009201485093, "calibration/ece": 0.09881079840529157, "calibration/mean_confidence": 0.7574989102430648, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015538194444444441, "completions/max_length": 3858.8, "completions/max_terminated_length": 3858.8, "completions/mean_length": 1155.186572265625, "completions/mean_terminated_length": 1173.2910888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 392.2, "epoch": 1.9230769230769231, "grad_norm": 0.00032893981551751494, "learning_rate": 1.4423076923076922e-06, "loss": -0.0176, "num_tokens": 2143736747.0, "reward": 1.2860599279403686, "reward_std": 0.12449116706848144, "rewards/accuracy_reward": 0.7293402791023255, "rewards/brier_reward": 0.8583038091659546, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9844618082046509, "rewards/mean_confidence_reward": 0.6920920133590698, "sampling/batch_mean_priority_error": 0.02531944444444443, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1222222222222222, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002828038949519396, "sampling/priority_kl": 0.030001144856214523, "sampling/priority_scale": 0.8224454701179639, "sampling/prob_entropy": 10.278958320617676, "sampling/prob_max": 6.031659504515119e-05, "sampling/prob_min": 2.0170245261397212e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9151999950408936, "sampling/prompt_draws_total": 57456.0, "sampling/seen_fraction": 0.930133330821991, "sampling/unseen_fraction": 0.06986666917800903, "signal/accuracy_reward/centered_abs_mean": 0.09819878488779069, "signal/accuracy_reward/group_std_mean": 0.13898843824863433, "signal/accuracy_reward/group_zero_std_frac": 0.5638889074325562, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04909939244389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04909939244389534, "signal/advantage_abs_mean": 0.08012338802218437, "signal/advantage_pre_scale_abs_mean": 0.08012338802218437, "signal/advantage_pre_scale_std": 0.18103047609329223, "signal/advantage_std": 0.18103047609329223, "signal/brier_reward/centered_abs_mean": 0.07400564849376678, "signal/brier_reward/group_std_mean": 0.10794279575347901, "signal/brier_reward/group_zero_std_frac": 0.25000000596046446, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03700282424688339, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03700282424688339, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.02664388008415699, "signal/format_reward/group_std_mean": 0.053740817308425906, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013321940042078494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013321940042078494, "signal/mean_confidence_reward/centered_abs_mean": 0.06345649063587189, "signal/mean_confidence_reward/group_std_mean": 0.088161501288414, "signal/mean_confidence_reward/group_zero_std_frac": 0.2583333343267441, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.345648898786749e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.345648898786749e-07, "step": 800 }, { "epoch": 1.9230769230769231, "eval_calibration/aurc": 0.11880398434664954, "eval_calibration/batch_distribution_entropy": 0.6147144311896361, "eval_calibration/batch_entropy_100bins": 0.3377655202084739, "eval_calibration/batch_entropy_10bins": 0.6147144311896361, "eval_calibration/batch_entropy_50bins": 0.39761210538908526, "eval_calibration/batch_uniqueness": 0.3144897799357819, "eval_calibration/confidence_entropy": 0.44134527960603626, "eval_calibration/coverage@0%": 0.0008849557522123894, "eval_calibration/coverage@1%": 0.0008849557522123894, "eval_calibration/coverage@10%": 0.5610619469026549, "eval_calibration/coverage@15%": 0.7911504424778761, "eval_calibration/coverage@20%": 0.8734513274336283, "eval_calibration/coverage@25%": 0.9362831858407079, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0008849557522123894, "eval_calibration/distribution_entropy_10": 0.6147144311896361, "eval_calibration/distribution_entropy_100": 0.3377655202084739, "eval_calibration/ece": 0.0228318584070795, "eval_calibration/mean_confidence": 0.7276106194690266, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.016493055555555563, "eval_completions/max_length": 3315.8333333333335, "eval_completions/max_terminated_length": 3315.8333333333335, "eval_completions/mean_length": 1101.993428548177, "eval_completions/mean_terminated_length": 1120.671407063802, "eval_completions/min_length": 95.0, "eval_completions/min_terminated_length": 414.0, "eval_loss": 0.0, "eval_num_tokens": 2143736747.0, "eval_reward": 1.2803370555241902, "eval_reward_std": 0.3288910339275996, "eval_rewards/accuracy_reward": 0.7282986144224802, "eval_rewards/brier_reward": 0.851458340883255, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.980902781089147, "eval_rewards/mean_confidence_reward": 0.713715265194575, "eval_runtime": 215.3204, "eval_samples_per_second": 4.644, "eval_signal/accuracy_reward/centered_abs_mean": 0.3815646668275197, "eval_signal/accuracy_reward/group_std_mean": 0.44144123792648315, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19078233341375986, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19078233341375986, "eval_signal/advantage_abs_mean": 0.2663094103336334, "eval_signal/advantage_pre_scale_abs_mean": 0.2663094103336334, "eval_signal/advantage_pre_scale_std": 0.3311290790637334, "eval_signal/advantage_std": 0.3311290790637334, "eval_signal/brier_reward/centered_abs_mean": 0.1741362934311231, "eval_signal/brier_reward/group_std_mean": 0.23898712793986002, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08706814671556155, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08706814671556155, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.0360243059694767, "eval_signal/format_reward/group_std_mean": 0.08358859581251939, "eval_signal/format_reward/group_zero_std_frac": 0.6111111293236414, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.01801215298473835, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.01801215298473835, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.21578231950600943, "eval_signal/mean_confidence_reward/group_std_mean": 0.2589796607693036, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.1578231326202513e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.1578231326202513e-06, "eval_steps_per_second": 0.028, "step": 800 }, { "epoch": 1.9230769230769231, "step": 800, "train_probe_calibration/aurc": 0.07785864721043029, "train_probe_calibration/batch_distribution_entropy": 0.6141568816793782, "train_probe_calibration/batch_entropy_100bins": 0.33691340500194467, "train_probe_calibration/batch_entropy_10bins": 0.6141568816793782, "train_probe_calibration/batch_entropy_50bins": 0.3966090091550676, "train_probe_calibration/batch_uniqueness": 0.3150661881499335, "train_probe_calibration/confidence_entropy": 0.4417113423189894, "train_probe_calibration/coverage@0%": 0.0, "train_probe_calibration/coverage@1%": 0.0, "train_probe_calibration/coverage@10%": 0.7555163283318623, "train_probe_calibration/coverage@15%": 0.8711385701676964, "train_probe_calibration/coverage@20%": 0.940864960282436, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.559576345984113, "train_probe_calibration/distribution_entropy_10": 0.6141568816793782, "train_probe_calibration/distribution_entropy_100": 0.33691340500194467, "train_probe_calibration/ece": 0.05710503089143854, "train_probe_calibration/mean_confidence": 0.7289496910856135, "train_probe_calibration/unique_confidence_per_question": 0.0078125, "train_probe_calibration/unique_confidences": 9, "train_probe_completions/clipped_ratio": 0.013888888888888895, "train_probe_completions/max_length": 3508.1666666666665, "train_probe_completions/max_terminated_length": 3508.1666666666665, "train_probe_completions/mean_length": 1159.2106526692708, "train_probe_completions/mean_terminated_length": 1175.3173014322917, "train_probe_completions/min_length": 67.5, "train_probe_completions/min_terminated_length": 381.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 2143736747.0, "train_probe_reward": 1.3131625652313232, "train_probe_reward_std": 0.3047017107407252, "train_probe_rewards/accuracy_reward": 0.7725694477558136, "train_probe_rewards/brier_reward": 0.8702343702316284, "train_probe_rewards/confidence_one_or_zero": 0.0, "train_probe_rewards/format_reward": 0.9835069477558136, "train_probe_rewards/mean_confidence_reward": 0.7169270614782969, "train_probe_runtime": 219.8338, "train_probe_samples_per_second": 4.549, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3395182341337204, "train_probe_signal/accuracy_reward/group_std_mean": 0.41524408757686615, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1697591170668602, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1697591170668602, "train_probe_signal/advantage_abs_mean": 0.2342825730641683, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2342825730641683, "train_probe_signal/advantage_pre_scale_std": 0.30573564767837524, "train_probe_signal/advantage_std": 0.30573564767837524, "train_probe_signal/brier_reward/centered_abs_mean": 0.1541151429216067, "train_probe_signal/brier_reward/group_std_mean": 0.21736273914575577, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07705757146080335, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07705757146080335, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.0, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/centered_abs_mean": 0.03173828125, "train_probe_signal/format_reward/group_std_mean": 0.08731999310354392, "train_probe_signal/format_reward/group_zero_std_frac": 0.5277777860562006, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.015869140625, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.015869140625, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.21296656876802444, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.256157249212265, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 2.1296655935051e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 2.1296655935051e-06, "train_probe_steps_per_second": 0.027 }, { "calibration/aurc": 0.14018359887347748, "calibration/batch_distribution_entropy": 0.5496416672348073, "calibration/batch_entropy_100bins": 0.30464898744357305, "calibration/batch_entropy_10bins": 0.5496416672348073, "calibration/batch_entropy_50bins": 0.3586278588392549, "calibration/batch_uniqueness": 0.21255569752260492, "calibration/confidence_entropy": 0.43085971630365183, "calibration/coverage@0%": 0.10600522193211488, "calibration/coverage@1%": 0.10600522193211488, "calibration/coverage@10%": 0.4817272836293558, "calibration/coverage@15%": 0.6566745855658296, "calibration/coverage@20%": 0.681375514946289, "calibration/coverage@25%": 0.7320855614973262, "calibration/coverage@30%": 0.9173718776152853, "calibration/coverage@5%": 0.3815107334653093, "calibration/distribution_entropy_10": 0.5496416672348073, "calibration/distribution_entropy_100": 0.30464898744357305, "calibration/ece": 0.12319044946411499, "calibration/mean_confidence": 0.7137513905267816, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013368055555555581, "completions/max_length": 3851.6, "completions/max_terminated_length": 3851.6, "completions/mean_length": 1107.5060913085938, "completions/mean_terminated_length": 1122.5991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 374.4, "epoch": 1.9350961538461537, "grad_norm": 0.00039220356848090887, "learning_rate": 1.4122596153846154e-06, "loss": -0.0167, "num_tokens": 2159599793.0, "reward": 1.3191929578781127, "reward_std": 0.12780744731426238, "rewards/accuracy_reward": 0.7817708373069763, "rewards/brier_reward": 0.8699685335159302, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9866319417953491, "rewards/mean_confidence_reward": 0.7295781254768372, "sampling/batch_mean_priority_error": 0.015404583333333314, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.10833333333333332, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0028499219566583633, "sampling/priority_kl": 0.02999933548271656, "sampling/priority_scale": 0.8254779875511303, "sampling/prob_entropy": 10.278951835632324, "sampling/prob_max": 6.059883380657993e-05, "sampling/prob_min": 2.0182952357572504e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9272000074386597, "sampling/prompt_draws_total": 57816.0, "sampling/seen_fraction": 0.9317333340644837, "sampling/unseen_fraction": 0.06826666593551636, "signal/accuracy_reward/centered_abs_mean": 0.11254340261220933, "signal/accuracy_reward/group_std_mean": 0.15324022769927978, "signal/accuracy_reward/group_zero_std_frac": 0.547222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05627170130610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05627170130610466, "signal/advantage_abs_mean": 0.08396558910608291, "signal/advantage_pre_scale_abs_mean": 0.08396558910608291, "signal/advantage_pre_scale_std": 0.186562117934227, "signal/advantage_std": 0.186562117934227, "signal/brier_reward/centered_abs_mean": 0.07254556939005852, "signal/brier_reward/group_std_mean": 0.10383311659097672, "signal/brier_reward/group_zero_std_frac": 0.2750000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03627278469502926, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03627278469502926, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.02421874962747097, "signal/format_reward/group_std_mean": 0.05017288029193878, "signal/format_reward/group_zero_std_frac": 0.7805555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012109374813735484, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012109374813735484, "signal/mean_confidence_reward/centered_abs_mean": 0.06297873258590699, "signal/mean_confidence_reward/group_std_mean": 0.08698192089796067, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777777910232543, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.297873142102617e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.297873142102617e-07, "step": 805 }, { "calibration/aurc": 0.09635417618896773, "calibration/batch_distribution_entropy": 0.6069344126434967, "calibration/batch_entropy_100bins": 0.33837644445400744, "calibration/batch_entropy_10bins": 0.6069344126434967, "calibration/batch_entropy_50bins": 0.39833127552625586, "calibration/batch_uniqueness": 0.3299621599214643, "calibration/confidence_entropy": 0.4488800064944396, "calibration/coverage@0%": 0.2592541216380225, "calibration/coverage@1%": 0.35666938755149735, "calibration/coverage@10%": 0.6201582130176267, "calibration/coverage@15%": 0.7854698715538, "calibration/coverage@20%": 0.8520324225968146, "calibration/coverage@25%": 0.9184785179743304, "calibration/coverage@30%": 0.9486772486772488, "calibration/coverage@5%": 0.5282726669246759, "calibration/distribution_entropy_10": 0.6069344126434967, "calibration/distribution_entropy_100": 0.33837644445400744, "calibration/ece": 0.14434275009587091, "calibration/mean_confidence": 0.7124749362295653, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010937499999999978, "completions/max_length": 3784.4, "completions/max_terminated_length": 3784.4, "completions/mean_length": 1093.8130126953124, "completions/mean_terminated_length": 1106.06572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 320.4, "epoch": 1.9471153846153846, "grad_norm": 0.00026345642982050776, "learning_rate": 1.3822115384615387e-06, "loss": -0.0131, "num_tokens": 2175325671.0, "reward": 1.3192132711410522, "reward_std": 0.11112102270126342, "rewards/accuracy_reward": 0.7668402791023254, "rewards/brier_reward": 0.8825091242790222, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9890625, "rewards/mean_confidence_reward": 0.7354774475097656, "sampling/batch_mean_priority_error": 0.014980902777777763, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.12222222222222223, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0028622327372431755, "sampling/priority_kl": 0.02999916300177574, "sampling/priority_scale": 0.8274989903206006, "sampling/prob_entropy": 10.27893295288086, "sampling/prob_max": 6.0829608264612035e-05, "sampling/prob_min": 2.0205341570544988e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9391999959945678, "sampling/prompt_draws_total": 58176.0, "sampling/seen_fraction": 0.9329733371734619, "sampling/unseen_fraction": 0.06702666282653809, "signal/accuracy_reward/centered_abs_mean": 0.08981119841337204, "signal/accuracy_reward/group_std_mean": 0.125412817299366, "signal/accuracy_reward/group_zero_std_frac": 0.619444465637207, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04490559920668602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04490559920668602, "signal/advantage_abs_mean": 0.07182730138301849, "signal/advantage_pre_scale_abs_mean": 0.07182730138301849, "signal/advantage_pre_scale_std": 0.16563299298286438, "signal/advantage_std": 0.16563299298286438, "signal/brier_reward/centered_abs_mean": 0.06471699699759484, "signal/brier_reward/group_std_mean": 0.09428556412458419, "signal/brier_reward/group_zero_std_frac": 0.3027777761220932, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03235849849879742, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03235849849879742, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572877861559, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.020095486007630826, "signal/format_reward/group_std_mean": 0.04359339289367199, "signal/format_reward/group_zero_std_frac": 0.8027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010047743003815413, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010047743003815413, "signal/mean_confidence_reward/centered_abs_mean": 0.057460668683052066, "signal/mean_confidence_reward/group_std_mean": 0.08030145466327668, "signal/mean_confidence_reward/group_zero_std_frac": 0.3111111134290695, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.746066847223119e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.746066847223119e-07, "step": 810 }, { "calibration/aurc": 0.11943973394907752, "calibration/batch_distribution_entropy": 0.6097454318555073, "calibration/batch_entropy_100bins": 0.3330386406506821, "calibration/batch_entropy_10bins": 0.6097454318555073, "calibration/batch_entropy_50bins": 0.3920476992539233, "calibration/batch_uniqueness": 0.35021879727501143, "calibration/confidence_entropy": 0.46230057877054287, "calibration/coverage@0%": 0.0005208333333333333, "calibration/coverage@1%": 0.11968093832020996, "calibration/coverage@10%": 0.4596600877192983, "calibration/coverage@15%": 0.6889399384355663, "calibration/coverage@20%": 0.7673364872512395, "calibration/coverage@25%": 0.8386753920915095, "calibration/coverage@30%": 0.928375872600349, "calibration/coverage@5%": 0.44544956140350883, "calibration/distribution_entropy_10": 0.6097454318555073, "calibration/distribution_entropy_100": 0.3330386406506821, "calibration/ece": 0.12871242879192818, "calibration/mean_confidence": 0.7380755399174433, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010763888888888906, "completions/max_length": 3856.6, "completions/max_terminated_length": 3856.6, "completions/mean_length": 1123.5369384765625, "completions/mean_terminated_length": 1135.761328125, "completions/min_length": 0.0, "completions/min_terminated_length": 347.8, "epoch": 1.9591346153846154, "grad_norm": 0.00031497349846176803, "learning_rate": 1.3521634615384617e-06, "loss": -0.0125, "num_tokens": 2191361008.0, "reward": 1.2978993654251099, "reward_std": 0.11660217195749283, "rewards/accuracy_reward": 0.7500867962837219, "rewards/brier_reward": 0.8564614892005921, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9892361164093018, "rewards/mean_confidence_reward": 0.7145020127296448, "sampling/batch_mean_priority_error": 0.01364756944444443, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.125, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002879917761310935, "sampling/priority_kl": 0.030000254139304162, "sampling/priority_scale": 0.8304974854225293, "sampling/prob_entropy": 10.278964805603028, "sampling/prob_max": 6.111022739787585e-05, "sampling/prob_min": 2.0217650671838783e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.951200008392334, "sampling/prompt_draws_total": 58536.0, "sampling/seen_fraction": 0.9345066666603088, "sampling/unseen_fraction": 0.06549333333969116, "signal/accuracy_reward/centered_abs_mean": 0.09825846254825592, "signal/accuracy_reward/group_std_mean": 0.13935578167438506, "signal/accuracy_reward/group_zero_std_frac": 0.5611111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04912923127412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04912923127412796, "signal/advantage_abs_mean": 0.07806268632411957, "signal/advantage_pre_scale_abs_mean": 0.07806268632411957, "signal/advantage_pre_scale_std": 0.16955201625823973, "signal/advantage_std": 0.16955201625823973, "signal/brier_reward/centered_abs_mean": 0.0731019839644432, "signal/brier_reward/group_std_mean": 0.1011770486831665, "signal/brier_reward/group_zero_std_frac": 0.2722222298383713, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0365509919822216, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0365509919822216, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006727430503815412, "signal/confidence_one_or_zero/group_std_mean": 0.0019641853868961334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/format_reward/centered_abs_mean": 0.01690538190305233, "signal/format_reward/group_std_mean": 0.03581056408584118, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008452690951526166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008452690951526166, "signal/mean_confidence_reward/centered_abs_mean": 0.0661589540541172, "signal/mean_confidence_reward/group_std_mean": 0.08866492062807083, "signal/mean_confidence_reward/group_zero_std_frac": 0.28333333432674407, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.615895244976855e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.615895244976855e-07, "step": 815 }, { "calibration/aurc": 0.08312325960839878, "calibration/batch_distribution_entropy": 0.5998779297506034, "calibration/batch_entropy_100bins": 0.3265478904334089, "calibration/batch_entropy_10bins": 0.5998779297506034, "calibration/batch_entropy_50bins": 0.38440689311760784, "calibration/batch_uniqueness": 0.34809773650721676, "calibration/confidence_entropy": 0.4625567678265304, "calibration/coverage@0%": 0.13159268929503914, "calibration/coverage@1%": 0.2706551892950392, "calibration/coverage@10%": 0.7121716413964393, "calibration/coverage@15%": 0.7550979655032367, "calibration/coverage@20%": 0.8651255174136491, "calibration/coverage@25%": 0.9336898395721924, "calibration/coverage@30%": 0.9336898395721924, "calibration/coverage@5%": 0.5316787422227396, "calibration/distribution_entropy_10": 0.5998779297506034, "calibration/distribution_entropy_100": 0.3265478904334089, "calibration/ece": 0.11884740100683082, "calibration/mean_confidence": 0.7191161629178507, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444442, "completions/max_length": 3874.8, "completions/max_terminated_length": 3874.8, "completions/mean_length": 1103.8129638671876, "completions/mean_terminated_length": 1113.31953125, "completions/min_length": 0.0, "completions/min_terminated_length": 335.4, "epoch": 1.9711538461538463, "grad_norm": 0.00028875298448838294, "learning_rate": 1.3221153846153848e-06, "loss": -0.0115, "num_tokens": 2207179365.0, "reward": 1.3053389310836792, "reward_std": 0.10983250439167022, "rewards/accuracy_reward": 0.7494791626930237, "rewards/brier_reward": 0.8696907758712769, "rewards/confidence_one_or_zero": 0.0005208333488553763, "rewards/format_reward": 0.9914930582046508, "rewards/mean_confidence_reward": 0.7453255295753479, "sampling/batch_mean_priority_error": 0.017092013888888875, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09166666666666666, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0028944870922714473, "sampling/priority_kl": 0.03000035136938095, "sampling/priority_scale": 0.8323657929664477, "sampling/prob_entropy": 10.278962516784668, "sampling/prob_max": 6.1331047618296e-05, "sampling/prob_min": 2.0240285448380745e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9632000207901001, "sampling/prompt_draws_total": 58896.0, "sampling/seen_fraction": 0.935646653175354, "sampling/unseen_fraction": 0.064353346824646, "signal/accuracy_reward/centered_abs_mean": 0.08990885466337203, "signal/accuracy_reward/group_std_mean": 0.12569952458143235, "signal/accuracy_reward/group_zero_std_frac": 0.6111111164093017, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04495442733168602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04495442733168602, "signal/advantage_abs_mean": 0.07215002328157424, "signal/advantage_pre_scale_abs_mean": 0.07215002328157424, "signal/advantage_pre_scale_std": 0.16824093759059905, "signal/advantage_std": 0.16824093759059905, "signal/brier_reward/centered_abs_mean": 0.06411735936999322, "signal/brier_reward/group_std_mean": 0.09321332573890687, "signal/brier_reward/group_zero_std_frac": 0.286111107468605, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03205867968499661, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03205867968499661, "signal/confidence_one_or_zero/centered_abs_mean": 0.0009440104477107525, "signal/confidence_one_or_zero/group_std_mean": 0.0016452476382255554, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.44010345449442e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.44010345449442e-09, "signal/format_reward/centered_abs_mean": 0.01577690988779068, "signal/format_reward/group_std_mean": 0.035082895308732986, "signal/format_reward/group_zero_std_frac": 0.8388888835906982, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00788845494389534, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00788845494389534, "signal/mean_confidence_reward/centered_abs_mean": 0.05973497703671456, "signal/mean_confidence_reward/group_std_mean": 0.08283800929784775, "signal/mean_confidence_reward/group_zero_std_frac": 0.2972222238779068, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.973497536615469e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.973497536615469e-07, "step": 820 }, { "calibration/aurc": 0.134517132188666, "calibration/batch_distribution_entropy": 0.5765132471956196, "calibration/batch_entropy_100bins": 0.3098934592550557, "calibration/batch_entropy_10bins": 0.5765132471956196, "calibration/batch_entropy_50bins": 0.3648015662008893, "calibration/batch_uniqueness": 0.2773345030487901, "calibration/confidence_entropy": 0.4444361845612733, "calibration/coverage@0%": 0.08743455497382199, "calibration/coverage@1%": 0.1130890052356021, "calibration/coverage@10%": 0.473882286361064, "calibration/coverage@15%": 0.6896555209298408, "calibration/coverage@20%": 0.7025587467362925, "calibration/coverage@25%": 0.7706706060189639, "calibration/coverage@30%": 0.8, "calibration/coverage@5%": 0.4251553643439793, "calibration/distribution_entropy_10": 0.5765132471956196, "calibration/distribution_entropy_100": 0.3098934592550557, "calibration/ece": 0.14405314137800523, "calibration/mean_confidence": 0.7510510679057971, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009809027777777767, "completions/max_length": 3832.2, "completions/max_terminated_length": 3832.2, "completions/mean_length": 1109.3079833984375, "completions/mean_terminated_length": 1120.2862060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 319.8, "epoch": 1.9831730769230769, "grad_norm": 0.0003624038945417851, "learning_rate": 1.292067307692308e-06, "loss": -0.0095, "num_tokens": 2223047681.0, "reward": 1.2855605363845826, "reward_std": 0.12681028991937637, "rewards/accuracy_reward": 0.725, "rewards/brier_reward": 0.8560023784637452, "rewards/confidence_one_or_zero": 0.0014756944321561605, "rewards/format_reward": 0.9901041746139526, "rewards/mean_confidence_reward": 0.7217751502990722, "sampling/batch_mean_priority_error": 0.020626736111111096, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.125, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0029144018888473512, "sampling/priority_kl": 0.029999225586652755, "sampling/priority_scale": 0.8351738035911694, "sampling/prob_entropy": 10.278949737548828, "sampling/prob_max": 6.160250341054052e-05, "sampling/prob_min": 2.0253971888450907e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9752000093460083, "sampling/prompt_draws_total": 59256.0, "sampling/seen_fraction": 0.9370333194732666, "sampling/unseen_fraction": 0.0629666805267334, "signal/accuracy_reward/centered_abs_mean": 0.11440972238779068, "signal/accuracy_reward/group_std_mean": 0.15993244349956512, "signal/accuracy_reward/group_zero_std_frac": 0.5111111164093017, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05720486119389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05720486119389534, "signal/advantage_abs_mean": 0.084007428586483, "signal/advantage_pre_scale_abs_mean": 0.084007428586483, "signal/advantage_pre_scale_std": 0.17651504874229432, "signal/advantage_std": 0.17651504874229432, "signal/brier_reward/centered_abs_mean": 0.0703900247812271, "signal/brier_reward/group_std_mean": 0.10124405175447464, "signal/brier_reward/group_zero_std_frac": 0.21666666567325593, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03519501239061355, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03519501239061355, "signal/confidence_one_or_zero/centered_abs_mean": 0.0020128038129769267, "signal/confidence_one_or_zero/group_std_mean": 0.0033502712845802306, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.0128037192534975e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.0128037192534975e-08, "signal/format_reward/centered_abs_mean": 0.01800130195915699, "signal/format_reward/group_std_mean": 0.04087369553744793, "signal/format_reward/group_zero_std_frac": 0.8055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009000650979578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009000650979578495, "signal/mean_confidence_reward/centered_abs_mean": 0.06586859598755837, "signal/mean_confidence_reward/group_std_mean": 0.09096481949090958, "signal/mean_confidence_reward/group_zero_std_frac": 0.23055555522441865, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.586859740309591e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.586859740309591e-07, "step": 825 }, { "calibration/aurc": 0.05599930548950739, "calibration/batch_distribution_entropy": 0.596382853068725, "calibration/batch_entropy_100bins": 0.3233534373407135, "calibration/batch_entropy_10bins": 0.596382853068725, "calibration/batch_entropy_50bins": 0.3806464346227048, "calibration/batch_uniqueness": 0.3026821286182817, "calibration/confidence_entropy": 0.44498857508984246, "calibration/coverage@0%": 0.3928695921741526, "calibration/coverage@1%": 0.5114820990811839, "calibration/coverage@10%": 0.7170144628173889, "calibration/coverage@15%": 0.8434561514934973, "calibration/coverage@20%": 0.9599111234550757, "calibration/coverage@25%": 0.9810026385224274, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.633519389230383, "calibration/distribution_entropy_10": 0.596382853068725, "calibration/distribution_entropy_100": 0.3233534373407135, "calibration/ece": 0.11589207315098178, "calibration/mean_confidence": 0.7559549854800441, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009288194444444443, "completions/max_length": 3944.6, "completions/max_terminated_length": 3944.6, "completions/mean_length": 1123.5111328125, "completions/mean_terminated_length": 1134.081689453125, "completions/min_length": 0.0, "completions/min_terminated_length": 343.4, "epoch": 1.9951923076923077, "grad_norm": 0.00034137690090574324, "learning_rate": 1.2620192307692309e-06, "loss": -0.0091, "num_tokens": 2239053249.0, "reward": 1.3294086456298828, "reward_std": 0.11628881245851516, "rewards/accuracy_reward": 0.7828125, "rewards/brier_reward": 0.8852781653404236, "rewards/confidence_one_or_zero": 8.680555620230735e-05, "rewards/format_reward": 0.9907118201255798, "rewards/mean_confidence_reward": 0.7438917636871338, "sampling/batch_mean_priority_error": 0.015333333333333319, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11111111111111112, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0029324951581656935, "sampling/priority_kl": 0.029998801276087762, "sampling/priority_scale": 0.8383028804557398, "sampling/prob_entropy": 10.278944396972657, "sampling/prob_max": 6.189564446685835e-05, "sampling/prob_min": 2.0265720377210527e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9871999740600585, "sampling/prompt_draws_total": 59616.0, "sampling/seen_fraction": 0.9384733319282532, "sampling/unseen_fraction": 0.06152666807174682, "signal/accuracy_reward/centered_abs_mean": 0.10640190988779068, "signal/accuracy_reward/group_std_mean": 0.1437681049108505, "signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05320095494389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05320095494389534, "signal/advantage_abs_mean": 0.07790218591690064, "signal/advantage_pre_scale_abs_mean": 0.07790218591690064, "signal/advantage_pre_scale_std": 0.17275477647781373, "signal/advantage_std": 0.17275477647781373, "signal/brier_reward/centered_abs_mean": 0.06396772861480712, "signal/brier_reward/group_std_mean": 0.09234273433685303, "signal/brier_reward/group_zero_std_frac": 0.3000000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03198386430740356, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03198386430740356, "signal/confidence_one_or_zero/centered_abs_mean": 0.0001681857625953853, "signal/confidence_one_or_zero/group_std_mean": 0.0004910463467240334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151001e-09, "signal/format_reward/centered_abs_mean": 0.01687825508415699, "signal/format_reward/group_std_mean": 0.038504977524280545, "signal/format_reward/group_zero_std_frac": 0.8166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008439127542078494, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008439127542078494, "signal/mean_confidence_reward/centered_abs_mean": 0.05897117778658867, "signal/mean_confidence_reward/group_std_mean": 0.08022859096527099, "signal/mean_confidence_reward/group_zero_std_frac": 0.30555556416511537, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.897117375752714e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.897117375752714e-07, "step": 830 }, { "calibration/aurc": 0.09532195250897704, "calibration/batch_distribution_entropy": 0.5789066551694979, "calibration/batch_entropy_100bins": 0.3114747096174578, "calibration/batch_entropy_10bins": 0.5789066551694979, "calibration/batch_entropy_50bins": 0.36666298854309254, "calibration/batch_uniqueness": 0.28124029620299623, "calibration/confidence_entropy": 0.4461880173219065, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.09018567639257294, "calibration/coverage@10%": 0.6630341494576916, "calibration/coverage@15%": 0.7041357090282889, "calibration/coverage@20%": 0.7343744358187398, "calibration/coverage@25%": 0.9196472857536511, "calibration/coverage@30%": 0.954282171397079, "calibration/coverage@5%": 0.5344841473745607, "calibration/distribution_entropy_10": 0.5789066551694979, "calibration/distribution_entropy_100": 0.3114747096174578, "calibration/ece": 0.10387739165931135, "calibration/mean_confidence": 0.7439021672650166, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00581597222222221, "completions/max_length": 3222.0, "completions/max_terminated_length": 3222.0, "completions/mean_length": 1013.2931518554688, "completions/mean_terminated_length": 1019.6262084960938, "completions/min_length": 109.0, "completions/min_terminated_length": 390.0, "epoch": 2.0072115384615383, "grad_norm": 0.00037373026134446263, "learning_rate": 1.231971153846154e-06, "loss": -0.0107, "num_tokens": 2254636269.0, "reward": 1.3175593137741088, "reward_std": 0.11637879014015198, "rewards/accuracy_reward": 0.7723090291023255, "rewards/brier_reward": 0.8709544539451599, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9918402791023254, "rewards/mean_confidence_reward": 0.7433246612548828, "sampling/batch_mean_priority_error": 0.018690972222222206, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09722222222222224, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0029484704602509735, "sampling/priority_kl": 0.030000633746385574, "sampling/priority_scale": 0.8412145436042919, "sampling/prob_entropy": 10.278964805603028, "sampling/prob_max": 6.217749323695898e-05, "sampling/prob_min": 2.027921473199967e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 1.9991999864578247, "sampling/prompt_draws_total": 59976.0, "sampling/seen_fraction": 0.9398399949073791, "sampling/unseen_fraction": 0.06016000509262085, "signal/accuracy_reward/centered_abs_mean": 0.1052788645029068, "signal/accuracy_reward/group_std_mean": 0.14525774121284485, "signal/accuracy_reward/group_zero_std_frac": 0.5583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0526394322514534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0526394322514534, "signal/advantage_abs_mean": 0.07736062556505204, "signal/advantage_pre_scale_abs_mean": 0.07736062556505204, "signal/advantage_pre_scale_std": 0.17310604453086853, "signal/advantage_std": 0.17310604453086853, "signal/brier_reward/centered_abs_mean": 0.06680887416005135, "signal/brier_reward/group_std_mean": 0.09623768329620361, "signal/brier_reward/group_zero_std_frac": 0.2916666716337204, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033404437080025676, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033404437080025676, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006076388992369175, "signal/confidence_one_or_zero/group_std_mean": 0.0009333631955087185, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9972222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.076388103792851e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.076388103792851e-09, "signal/format_reward/centered_abs_mean": 0.01511501744389534, "signal/format_reward/group_std_mean": 0.03533230200409889, "signal/format_reward/group_zero_std_frac": 0.8277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00755750872194767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00755750872194767, "signal/mean_confidence_reward/centered_abs_mean": 0.055697161704301834, "signal/mean_confidence_reward/group_std_mean": 0.07788751721382141, "signal/mean_confidence_reward/group_zero_std_frac": 0.3083333373069763, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.569716222453281e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.569716222453281e-07, "step": 835 }, { "calibration/aurc": 0.14482306548752338, "calibration/batch_distribution_entropy": 0.600104045211634, "calibration/batch_entropy_100bins": 0.3210407699158253, "calibration/batch_entropy_10bins": 0.600104045211634, "calibration/batch_entropy_50bins": 0.3779239999487719, "calibration/batch_uniqueness": 0.3226214688891113, "calibration/confidence_entropy": 0.45247366041500064, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.3567177980852915, "calibration/coverage@15%": 0.5672233964662163, "calibration/coverage@20%": 0.6812894580518601, "calibration/coverage@25%": 0.8855333139928441, "calibration/coverage@30%": 0.9259632254410322, "calibration/coverage@5%": 0.140625, "calibration/distribution_entropy_10": 0.600104045211634, "calibration/distribution_entropy_100": 0.3210407699158253, "calibration/ece": 0.08676930940638516, "calibration/mean_confidence": 0.758886644010665, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006076388888888884, "completions/max_length": 3938.6, "completions/max_terminated_length": 3938.6, "completions/mean_length": 1111.3046875, "completions/mean_terminated_length": 1118.1972412109376, "completions/min_length": 0.0, "completions/min_terminated_length": 314.0, "epoch": 2.019230769230769, "grad_norm": 0.0003290143795311451, "learning_rate": 1.201923076923077e-06, "loss": -0.0074, "num_tokens": 2270521187.0, "reward": 1.3089183807373046, "reward_std": 0.11291085630655288, "rewards/accuracy_reward": 0.7555555462837219, "rewards/brier_reward": 0.8684296846389771, "rewards/confidence_one_or_zero": 0.0010416666569653898, "rewards/format_reward": 0.9938368082046509, "rewards/mean_confidence_reward": 0.7367968678474426, "sampling/batch_mean_priority_error": 0.018819444444444423, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09166666666666667, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0029684344306588175, "sampling/priority_kl": 0.030000444129109384, "sampling/priority_scale": 0.8429635942680761, "sampling/prob_entropy": 10.27894229888916, "sampling/prob_max": 6.239795475266874e-05, "sampling/prob_min": 2.030375944741536e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.0112000465393067, "sampling/prompt_draws_total": 60336.0, "sampling/seen_fraction": 0.9408199906349182, "sampling/unseen_fraction": 0.05918000936508179, "signal/accuracy_reward/centered_abs_mean": 0.10628255158662796, "signal/accuracy_reward/group_std_mean": 0.14591727107763292, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05314127579331398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05314127579331398, "signal/advantage_abs_mean": 0.07720207050442696, "signal/advantage_pre_scale_abs_mean": 0.07720207050442696, "signal/advantage_pre_scale_std": 0.16867663860321044, "signal/advantage_std": 0.16867663860321044, "signal/brier_reward/centered_abs_mean": 0.06350096985697747, "signal/brier_reward/group_std_mean": 0.09030406326055526, "signal/brier_reward/group_zero_std_frac": 0.28888889253139494, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03175048492848873, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03175048492848873, "signal/confidence_one_or_zero/centered_abs_mean": 0.0014214409398846327, "signal/confidence_one_or_zero/group_std_mean": 0.0018314871937036513, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.4214408849966275e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.4214408849966275e-08, "signal/format_reward/centered_abs_mean": 0.011496310774236917, "signal/format_reward/group_std_mean": 0.026431189477443696, "signal/format_reward/group_zero_std_frac": 0.875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005748155387118458, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005748155387118458, "signal/mean_confidence_reward/centered_abs_mean": 0.058305129408836365, "signal/mean_confidence_reward/group_std_mean": 0.0791845440864563, "signal/mean_confidence_reward/group_zero_std_frac": 0.3000000029802322, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.830512634474872e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.830512634474872e-07, "step": 840 }, { "calibration/aurc": 0.06706975557753085, "calibration/batch_distribution_entropy": 0.5721016540996581, "calibration/batch_entropy_100bins": 0.3115216554832395, "calibration/batch_entropy_10bins": 0.5721016540996581, "calibration/batch_entropy_50bins": 0.36671825245671313, "calibration/batch_uniqueness": 0.23765986424922766, "calibration/confidence_entropy": 0.4423131495801164, "calibration/coverage@0%": 0.20417194819509507, "calibration/coverage@1%": 0.3680449049909621, "calibration/coverage@10%": 0.6030081601539127, "calibration/coverage@15%": 0.906186369660654, "calibration/coverage@20%": 0.9657121772245313, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.4381744026348315, "calibration/distribution_entropy_10": 0.5721016540996581, "calibration/distribution_entropy_100": 0.3115216554832395, "calibration/ece": 0.1172741215185497, "calibration/mean_confidence": 0.7487478732080494, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008940972222222232, "completions/max_length": 3910.4, "completions/max_terminated_length": 3910.4, "completions/mean_length": 1185.9043212890624, "completions/mean_terminated_length": 1196.602978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 345.0, "epoch": 2.03125, "grad_norm": 0.0003298562951385975, "learning_rate": 1.1718750000000001e-06, "loss": -0.0107, "num_tokens": 2287292437.0, "reward": 1.2747956275939942, "reward_std": 0.11422278136014938, "rewards/accuracy_reward": 0.7032986044883728, "rewards/brier_reward": 0.8552193760871887, "rewards/confidence_one_or_zero": 0.0, "rewards/format_reward": 0.9910590291023255, "rewards/mean_confidence_reward": 0.7114800333976745, "sampling/batch_mean_priority_error": 0.022694444444444427, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08055555555555556, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.002991753304377198, "sampling/priority_kl": 0.029999999701976775, "sampling/priority_scale": 0.8449041426414624, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 6.262821407290175e-05, "sampling/prob_min": 2.0326085723354483e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.023200035095215, "sampling/prompt_draws_total": 60696.0, "sampling/seen_fraction": 0.941866660118103, "sampling/unseen_fraction": 0.05813333988189697, "signal/accuracy_reward/centered_abs_mean": 0.1039605051279068, "signal/accuracy_reward/group_std_mean": 0.1419745147228241, "signal/accuracy_reward/group_zero_std_frac": 0.5805555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0519802525639534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0519802525639534, "signal/advantage_abs_mean": 0.0762131504714489, "signal/advantage_pre_scale_abs_mean": 0.0762131504714489, "signal/advantage_pre_scale_std": 0.16904608607292176, "signal/advantage_std": 0.16904608607292176, "signal/brier_reward/centered_abs_mean": 0.06413592025637627, "signal/brier_reward/group_std_mean": 0.0929721713066101, "signal/brier_reward/group_zero_std_frac": 0.2583333313465118, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03206796012818813, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03206796012818813, "signal/confidence_one_or_zero/centered_abs_mean": 0.0, "signal/confidence_one_or_zero/group_std_mean": 0.0, "signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "signal/format_reward/centered_abs_mean": 0.016704644449055193, "signal/format_reward/group_std_mean": 0.0384040080010891, "signal/format_reward/group_zero_std_frac": 0.8194444656372071, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008352322224527597, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008352322224527597, "signal/mean_confidence_reward/centered_abs_mean": 0.06115180775523186, "signal/mean_confidence_reward/group_std_mean": 0.08378626108169555, "signal/mean_confidence_reward/group_zero_std_frac": 0.2611111104488373, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.115180099186545e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.115180099186545e-07, "step": 845 }, { "calibration/aurc": 0.09126816611407072, "calibration/batch_distribution_entropy": 0.516295811065282, "calibration/batch_entropy_100bins": 0.27987268448332003, "calibration/batch_entropy_10bins": 0.516295811065282, "calibration/batch_entropy_50bins": 0.32946159587166834, "calibration/batch_uniqueness": 0.10043751990069968, "calibration/confidence_entropy": 0.4204486071838677, "calibration/coverage@0%": 0.1371714221667972, "calibration/coverage@1%": 0.2718972707307659, "calibration/coverage@10%": 0.6968140041355322, "calibration/coverage@15%": 0.7426602756256365, "calibration/coverage@20%": 0.7659504662980957, "calibration/coverage@25%": 0.8975470486401967, "calibration/coverage@30%": 0.9748031496062992, "calibration/coverage@5%": 0.5180043382809688, "calibration/distribution_entropy_10": 0.516295811065282, "calibration/distribution_entropy_100": 0.27987268448332003, "calibration/ece": 0.09604928174517917, "calibration/mean_confidence": 0.7789018314999862, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010763888888888906, "completions/max_length": 3898.6, "completions/max_terminated_length": 3898.6, "completions/mean_length": 1155.826318359375, "completions/mean_terminated_length": 1168.4384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 356.0, "epoch": 2.043269230769231, "grad_norm": 0.00034717281232587993, "learning_rate": 1.141826923076923e-06, "loss": -0.0113, "num_tokens": 2303699396.0, "reward": 1.2955496788024903, "reward_std": 0.10840608775615693, "rewards/accuracy_reward": 0.73359375, "rewards/brier_reward": 0.8682549834251404, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9892361164093018, "rewards/mean_confidence_reward": 0.7277135252952576, "sampling/batch_mean_priority_error": 0.015092013888888877, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08888888888888888, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0030097827315330506, "sampling/priority_kl": 0.030000165477395056, "sampling/priority_scale": 0.8467580139869824, "sampling/prob_entropy": 10.278955841064453, "sampling/prob_max": 6.285476847551763e-05, "sampling/prob_min": 2.0349307305878028e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.035200023651123, "sampling/prompt_draws_total": 61056.0, "sampling/seen_fraction": 0.9428733229637146, "sampling/unseen_fraction": 0.0571266770362854, "signal/accuracy_reward/centered_abs_mean": 0.08740776926279067, "signal/accuracy_reward/group_std_mean": 0.12122353315353393, "signal/accuracy_reward/group_zero_std_frac": 0.6277777791023255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04370388463139534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04370388463139534, "signal/advantage_abs_mean": 0.071315798163414, "signal/advantage_pre_scale_abs_mean": 0.071315798163414, "signal/advantage_pre_scale_std": 0.1623822569847107, "signal/advantage_std": 0.1623822569847107, "signal/brier_reward/centered_abs_mean": 0.06464180201292039, "signal/brier_reward/group_std_mean": 0.09374651610851288, "signal/brier_reward/group_zero_std_frac": 0.2611111134290695, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03232090100646019, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03232090100646019, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572877861559, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.01946614608168602, "signal/format_reward/group_std_mean": 0.04095918200910091, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00973307304084301, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00973307304084301, "signal/mean_confidence_reward/centered_abs_mean": 0.05734120234847069, "signal/mean_confidence_reward/group_std_mean": 0.0792054533958435, "signal/mean_confidence_reward/group_zero_std_frac": 0.27500000298023225, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.7341202364114e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.7341202364114e-07, "step": 850 }, { "epoch": 2.043269230769231, "eval_calibration/aurc": 0.12376605829554335, "eval_calibration/batch_distribution_entropy": 0.6170005564464504, "eval_calibration/batch_entropy_100bins": 0.33893845519545, "eval_calibration/batch_entropy_10bins": 0.6170005564464504, "eval_calibration/batch_entropy_50bins": 0.3989928654778244, "eval_calibration/batch_uniqueness": 0.327932433642645, "eval_calibration/confidence_entropy": 0.4474724073814369, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5520559930008749, "eval_calibration/coverage@15%": 0.7637795275590551, "eval_calibration/coverage@20%": 0.8162729658792651, "eval_calibration/coverage@25%": 0.9588801399825022, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.6170005564464504, "eval_calibration/distribution_entropy_100": 0.33893845519545, "eval_calibration/ece": 0.025065616797900182, "eval_calibration/mean_confidence": 0.7349518810148731, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.011111111111111127, "eval_completions/max_length": 3113.3333333333335, "eval_completions/max_terminated_length": 3113.3333333333335, "eval_completions/mean_length": 1146.4441324869792, "eval_completions/mean_terminated_length": 1159.1681518554688, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 466.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 2303699396.0, "eval_reward": 1.2894940972328186, "eval_reward_std": 0.31144089500109357, "eval_rewards/accuracy_reward": 0.725694457689921, "eval_rewards/brier_reward": 0.8610915839672089, "eval_rewards/confidence_one_or_zero": 0.0, "eval_rewards/format_reward": 0.9921875099341074, "eval_rewards/mean_confidence_reward": 0.7292100787162781, "eval_runtime": 213.4681, "eval_samples_per_second": 4.685, "eval_signal/accuracy_reward/centered_abs_mean": 0.3849826355775197, "eval_signal/accuracy_reward/group_std_mean": 0.4438539495070775, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19249131778875986, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19249131778875986, "eval_signal/advantage_abs_mean": 0.2564364845554034, "eval_signal/advantage_pre_scale_abs_mean": 0.2564364845554034, "eval_signal/advantage_pre_scale_std": 0.3103178143501282, "eval_signal/advantage_std": 0.3103178143501282, "eval_signal/brier_reward/centered_abs_mean": 0.1578616127371788, "eval_signal/brier_reward/group_std_mean": 0.22026897221803665, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0789308063685894, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0789308063685894, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 1.0, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/centered_abs_mean": 0.015028211598594984, "eval_signal/format_reward/group_std_mean": 0.041204764818151794, "eval_signal/format_reward/group_zero_std_frac": 0.7777778108914694, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007514105799297492, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007514105799297492, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.1957817698518435, "eval_signal/mean_confidence_reward/group_std_mean": 0.23651453604300818, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.9578177254212883e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.9578177254212883e-06, "eval_steps_per_second": 0.028, "step": 850 }, { "epoch": 2.043269230769231, "step": 850, "train_probe_calibration/aurc": 0.07444353586731828, "train_probe_calibration/batch_distribution_entropy": 0.5981090318149916, "train_probe_calibration/batch_entropy_100bins": 0.3278047570973816, "train_probe_calibration/batch_entropy_10bins": 0.5981090318149916, "train_probe_calibration/batch_entropy_50bins": 0.3858864562185043, "train_probe_calibration/batch_uniqueness": 0.28846979706331144, "train_probe_calibration/confidence_entropy": 0.4422734115458898, "train_probe_calibration/coverage@0%": 0.002617801047120419, "train_probe_calibration/coverage@1%": 0.002617801047120419, "train_probe_calibration/coverage@10%": 0.7853403141361257, "train_probe_calibration/coverage@15%": 0.8359511343804538, "train_probe_calibration/coverage@20%": 0.9720767888307156, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.5732984293193717, "train_probe_calibration/distribution_entropy_10": 0.5981090318149916, "train_probe_calibration/distribution_entropy_100": 0.3278047570973816, "train_probe_calibration/ece": 0.043935427574170946, "train_probe_calibration/mean_confidence": 0.7504799301919721, "train_probe_calibration/unique_confidence_per_question": 0.009548611111111112, "train_probe_calibration/unique_confidences": 11, "train_probe_completions/clipped_ratio": 0.00434027777777779, "train_probe_completions/max_length": 3625.8333333333335, "train_probe_completions/max_terminated_length": 3625.8333333333335, "train_probe_completions/mean_length": 1171.7156575520833, "train_probe_completions/mean_terminated_length": 1176.8045654296875, "train_probe_completions/min_length": 142.5, "train_probe_completions/min_terminated_length": 381.3333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 2303699396.0, "train_probe_reward": 1.3320420583089192, "train_probe_reward_std": 0.2776293953259786, "train_probe_rewards/accuracy_reward": 0.7838541666666666, "train_probe_rewards/brier_reward": 0.8854231834411621, "train_probe_rewards/confidence_one_or_zero": 0.0017361111628512542, "train_probe_rewards/format_reward": 0.9947916766007742, "train_probe_rewards/mean_confidence_reward": 0.7465711534023285, "train_probe_runtime": 208.723, "train_probe_samples_per_second": 4.791, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3287217865387599, "train_probe_signal/accuracy_reward/group_std_mean": 0.40822718540827435, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16436089326937994, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.16436089326937994, "train_probe_signal/advantage_abs_mean": 0.216099684437116, "train_probe_signal/advantage_pre_scale_abs_mean": 0.216099684437116, "train_probe_signal/advantage_pre_scale_std": 0.27747392157713574, "train_probe_signal/advantage_std": 0.27747392157713574, "train_probe_signal/brier_reward/centered_abs_mean": 0.13258452092607817, "train_probe_signal/brier_reward/group_std_mean": 0.18689996004104614, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06629226046303908, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.06629226046303908, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/format_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/format_reward/group_zero_std_frac": 0.8333333631356558, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1832817792892456, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.22317108511924744, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.832817683104319e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.832817683104319e-06, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.09007888032879449, "calibration/batch_distribution_entropy": 0.6190782522581999, "calibration/batch_entropy_100bins": 0.339766442064793, "calibration/batch_entropy_10bins": 0.6190782522581999, "calibration/batch_entropy_50bins": 0.3999675582236997, "calibration/batch_uniqueness": 0.3461391671099906, "calibration/confidence_entropy": 0.45763288876498853, "calibration/coverage@0%": 0.11989392096017934, "calibration/coverage@1%": 0.15130753352562437, "calibration/coverage@10%": 0.7290703941733264, "calibration/coverage@15%": 0.7876857827380854, "calibration/coverage@20%": 0.84802688708862, "calibration/coverage@25%": 0.8917965838078045, "calibration/coverage@30%": 0.9583613387029579, "calibration/coverage@5%": 0.5147525764057768, "calibration/distribution_entropy_10": 0.6190782522581999, "calibration/distribution_entropy_100": 0.339766442064793, "calibration/ece": 0.106227240554117, "calibration/mean_confidence": 0.7179123295037517, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333326, "completions/max_length": 4016.8, "completions/max_terminated_length": 4016.8, "completions/mean_length": 1155.4071044921875, "completions/mean_terminated_length": 1166.94560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 386.6, "epoch": 2.0552884615384617, "grad_norm": 0.00033060292480513453, "learning_rate": 1.1117788461538462e-06, "loss": -0.0106, "num_tokens": 2320095190.0, "reward": 1.322322916984558, "reward_std": 0.12814645171165467, "rewards/accuracy_reward": 0.778125, "rewards/brier_reward": 0.8764886498451233, "rewards/confidence_one_or_zero": 0.0001736111124046147, "rewards/format_reward": 0.9900173544883728, "rewards/mean_confidence_reward": 0.7386140108108521, "sampling/batch_mean_priority_error": 0.014572916666666647, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09444444444444446, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.003024717140942812, "sampling/priority_kl": 0.029999865218997, "sampling/priority_scale": 0.8490281641716138, "sampling/prob_entropy": 10.278955459594727, "sampling/prob_max": 6.310095050139352e-05, "sampling/prob_min": 2.036733421846293e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.0471999645233154, "sampling/prompt_draws_total": 61416.0, "sampling/seen_fraction": 0.943993330001831, "sampling/unseen_fraction": 0.056006669998168945, "signal/accuracy_reward/centered_abs_mean": 0.11383463591337203, "signal/accuracy_reward/group_std_mean": 0.158145971596241, "signal/accuracy_reward/group_zero_std_frac": 0.5111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05691731795668602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05691731795668602, "signal/advantage_abs_mean": 0.08426995873451233, "signal/advantage_pre_scale_abs_mean": 0.08426995873451233, "signal/advantage_pre_scale_std": 0.18189284801483155, "signal/advantage_std": 0.18189284801483155, "signal/brier_reward/centered_abs_mean": 0.07048432901501656, "signal/brier_reward/group_std_mean": 0.10308757573366165, "signal/brier_reward/group_zero_std_frac": 0.24722222089767457, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03524216450750828, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03524216450750828, "signal/confidence_one_or_zero/centered_abs_mean": 0.0003363715251907706, "signal/confidence_one_or_zero/group_std_mean": 0.0009820926934480667, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302003e-09, "signal/format_reward/centered_abs_mean": 0.018017578125, "signal/format_reward/group_std_mean": 0.04189658388495445, "signal/format_reward/group_zero_std_frac": 0.7972222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0090087890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0090087890625, "signal/mean_confidence_reward/centered_abs_mean": 0.0627426914870739, "signal/mean_confidence_reward/group_std_mean": 0.08627793490886689, "signal/mean_confidence_reward/group_zero_std_frac": 0.2555555522441864, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.274268912420666e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.274268912420666e-07, "step": 855 }, { "calibration/aurc": 0.06585142036589152, "calibration/batch_distribution_entropy": 0.5993146527254817, "calibration/batch_entropy_100bins": 0.3219695299662035, "calibration/batch_entropy_10bins": 0.5993146527254817, "calibration/batch_entropy_50bins": 0.37901732125286536, "calibration/batch_uniqueness": 0.3283448677885148, "calibration/confidence_entropy": 0.4534555868322087, "calibration/coverage@0%": 0.0020833333333333333, "calibration/coverage@1%": 0.21415682414698164, "calibration/coverage@10%": 0.7858256473448325, "calibration/coverage@15%": 0.8751886482939633, "calibration/coverage@20%": 0.9480314960629922, "calibration/coverage@25%": 0.9585301837270341, "calibration/coverage@30%": 0.9826771653543307, "calibration/coverage@5%": 0.509247578346114, "calibration/distribution_entropy_10": 0.5993146527254817, "calibration/distribution_entropy_100": 0.3219695299662035, "calibration/ece": 0.10910423112703256, "calibration/mean_confidence": 0.7536399901403481, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013454861111111115, "completions/max_length": 3936.8, "completions/max_terminated_length": 3936.8, "completions/mean_length": 1172.1067626953125, "completions/mean_terminated_length": 1188.252099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 399.0, "epoch": 2.0673076923076925, "grad_norm": 0.00032517011277377605, "learning_rate": 1.0817307692307693e-06, "loss": -0.0157, "num_tokens": 2336673156.0, "reward": 1.2997122764587403, "reward_std": 0.1217604398727417, "rewards/accuracy_reward": 0.753125011920929, "rewards/brier_reward": 0.8597398161888122, "rewards/confidence_one_or_zero": 0.0010416667035315185, "rewards/format_reward": 0.9865451455116272, "rewards/mean_confidence_reward": 0.7358984351158142, "sampling/batch_mean_priority_error": 0.01943229166666666, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08888888888888888, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.0030421236995607616, "sampling/priority_kl": 0.030000335723161697, "sampling/priority_scale": 0.8511217415565625, "sampling/prob_entropy": 10.27894630432129, "sampling/prob_max": 6.33396688499488e-05, "sampling/prob_min": 2.038745406025555e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.0591999530792235, "sampling/prompt_draws_total": 61776.0, "sampling/seen_fraction": 0.9450266599655152, "sampling/unseen_fraction": 0.05497334003448486, "signal/accuracy_reward/centered_abs_mean": 0.1017795130610466, "signal/accuracy_reward/group_std_mean": 0.14184551537036896, "signal/accuracy_reward/group_zero_std_frac": 0.5638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0508897565305233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0508897565305233, "signal/advantage_abs_mean": 0.08031857311725617, "signal/advantage_pre_scale_abs_mean": 0.08031857311725617, "signal/advantage_pre_scale_std": 0.18018272817134856, "signal/advantage_std": 0.18018272817134856, "signal/brier_reward/centered_abs_mean": 0.06887489780783654, "signal/brier_reward/group_std_mean": 0.09927848726511002, "signal/brier_reward/group_zero_std_frac": 0.2638888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03443744890391827, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03443744890391827, "signal/confidence_one_or_zero/centered_abs_mean": 0.0019314235891215504, "signal/confidence_one_or_zero/group_std_mean": 0.004263852536678314, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9805555462837219, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.9314235188971908e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.9314235188971908e-08, "signal/format_reward/centered_abs_mean": 0.02167426198720932, "signal/format_reward/group_std_mean": 0.04529851377010345, "signal/format_reward/group_zero_std_frac": 0.7916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01083713099360466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01083713099360466, "signal/mean_confidence_reward/centered_abs_mean": 0.0583306223154068, "signal/mean_confidence_reward/group_std_mean": 0.08277992159128189, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777778506278994, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.833062118654198e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.833062118654198e-07, "step": 860 }, { "calibration/aurc": 0.11603837439787193, "calibration/batch_distribution_entropy": 0.6149790592596738, "calibration/batch_entropy_100bins": 0.3336354259414588, "calibration/batch_entropy_10bins": 0.6149790592596738, "calibration/batch_entropy_50bins": 0.3927502252423204, "calibration/batch_uniqueness": 0.3461293343834476, "calibration/confidence_entropy": 0.45388902139774084, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.6761772873094903, "calibration/coverage@15%": 0.7397942336205208, "calibration/coverage@20%": 0.8286319299555995, "calibration/coverage@25%": 0.8827070729755906, "calibration/coverage@30%": 0.9983914209115282, "calibration/coverage@5%": 0.12650918635170602, "calibration/distribution_entropy_10": 0.6149790592596738, "calibration/distribution_entropy_100": 0.3336354259414588, "calibration/ece": 0.1132342967072548, "calibration/mean_confidence": 0.7376037727930024, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555559, "completions/max_length": 3987.8, "completions/max_terminated_length": 3987.8, "completions/mean_length": 1180.26572265625, "completions/mean_terminated_length": 1194.537158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 374.2, "epoch": 2.079326923076923, "grad_norm": 0.00033040158450603485, "learning_rate": 1.0516826923076925e-06, "loss": -0.0155, "num_tokens": 2353379865.0, "reward": 1.3009061336517334, "reward_std": 0.1203909158706665, "rewards/accuracy_reward": 0.7534722208976745, "rewards/brier_reward": 0.8601308584213256, "rewards/confidence_one_or_zero": 0.001128472265554592, "rewards/format_reward": 0.9881944417953491, "rewards/mean_confidence_reward": 0.7362447857856751, "sampling/batch_mean_priority_error": 0.019236180555555545, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08888888888888888, "sampling/error_ema_max": 0.17066249251365662, "sampling/error_ema_mean": 0.003059811145067215, "sampling/priority_kl": 0.030000845715403558, "sampling/priority_scale": 0.853543382906355, "sampling/prob_entropy": 10.278954887390137, "sampling/prob_max": 6.359893741318956e-05, "sampling/prob_min": 2.040498839050997e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.0711999893188477, "sampling/prompt_draws_total": 62136.0, "sampling/seen_fraction": 0.9461133360862732, "sampling/unseen_fraction": 0.053886663913726804, "signal/accuracy_reward/centered_abs_mean": 0.09594184011220933, "signal/accuracy_reward/group_std_mean": 0.13271133005619049, "signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04797092005610466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04797092005610466, "signal/advantage_abs_mean": 0.07722088620066643, "signal/advantage_pre_scale_abs_mean": 0.07722088620066643, "signal/advantage_pre_scale_std": 0.17718519866466523, "signal/advantage_std": 0.17718519866466523, "signal/brier_reward/centered_abs_mean": 0.06708870679140091, "signal/brier_reward/group_std_mean": 0.09765137284994126, "signal/brier_reward/group_zero_std_frac": 0.2750000059604645, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03354435339570046, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03354435339570046, "signal/confidence_one_or_zero/centered_abs_mean": 0.0019802517374046145, "signal/confidence_one_or_zero/group_std_mean": 0.0035894179251044988, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.98025162490012e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.98025162490012e-08, "signal/format_reward/centered_abs_mean": 0.02185329906642437, "signal/format_reward/group_std_mean": 0.048659897595644, "signal/format_reward/group_zero_std_frac": 0.774999988079071, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010926649533212185, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010926649533212185, "signal/mean_confidence_reward/centered_abs_mean": 0.060614097863435745, "signal/mean_confidence_reward/group_std_mean": 0.08568830490112304, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777777910232543, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.061409635549353e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.061409635549353e-07, "step": 865 }, { "calibration/aurc": 0.0948379996016497, "calibration/batch_distribution_entropy": 0.5135562906550475, "calibration/batch_entropy_100bins": 0.2770817450004746, "calibration/batch_entropy_10bins": 0.5135562906550475, "calibration/batch_entropy_50bins": 0.32617614706948517, "calibration/batch_uniqueness": 0.10571137929105578, "calibration/confidence_entropy": 0.421645673634799, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.24772940203912927, "calibration/coverage@10%": 0.6397247472177792, "calibration/coverage@15%": 0.698163496533323, "calibration/coverage@20%": 0.7657894736842106, "calibration/coverage@25%": 0.9245362099766388, "calibration/coverage@30%": 0.9586684073107049, "calibration/coverage@5%": 0.577195633961183, "calibration/distribution_entropy_10": 0.5135562906550475, "calibration/distribution_entropy_100": 0.2770817450004746, "calibration/ece": 0.12980076800239265, "calibration/mean_confidence": 0.7757306178170007, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01137152777777779, "completions/max_length": 4021.6, "completions/max_terminated_length": 4021.6, "completions/mean_length": 1178.4822265625, "completions/mean_terminated_length": 1192.223583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 308.0, "epoch": 2.0913461538461537, "grad_norm": 0.0003403700247872621, "learning_rate": 1.0216346153846154e-06, "loss": -0.0138, "num_tokens": 2370032364.0, "reward": 1.2729515790939332, "reward_std": 0.11964078694581985, "rewards/accuracy_reward": 0.7024305582046508, "rewards/brier_reward": 0.8548296451568603, "rewards/confidence_one_or_zero": 0.0004340277810115367, "rewards/format_reward": 0.9886284828186035, "rewards/mean_confidence_reward": 0.7271978974342346, "sampling/batch_mean_priority_error": 0.025902777777777757, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.0861111111111111, "sampling/error_ema_max": 0.1780436158180237, "sampling/error_ema_mean": 0.00308342189528048, "sampling/priority_kl": 0.030000101029872894, "sampling/priority_scale": 0.856144517636858, "sampling/prob_entropy": 10.278953170776367, "sampling/prob_max": 6.386785244103522e-05, "sampling/prob_min": 2.0420402506715618e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.083199977874756, "sampling/prompt_draws_total": 62496.0, "sampling/seen_fraction": 0.9472266674041748, "sampling/unseen_fraction": 0.052773332595825194, "signal/accuracy_reward/centered_abs_mean": 0.1059787318110466, "signal/accuracy_reward/group_std_mean": 0.14246926009654998, "signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0529893659055233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0529893659055233, "signal/advantage_abs_mean": 0.07931408137083054, "signal/advantage_pre_scale_abs_mean": 0.07931408137083054, "signal/advantage_pre_scale_std": 0.17501305937767028, "signal/advantage_std": 0.17501305937767028, "signal/brier_reward/centered_abs_mean": 0.06485896408557892, "signal/brier_reward/group_std_mean": 0.09463429301977158, "signal/brier_reward/group_zero_std_frac": 0.311111119389534, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03242948204278946, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03242948204278946, "signal/confidence_one_or_zero/centered_abs_mean": 0.0007758246618323028, "signal/confidence_one_or_zero/group_std_mean": 0.001424409542232752, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 7.758245601507952e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 7.758245601507952e-09, "signal/format_reward/centered_abs_mean": 0.02083875872194767, "signal/format_reward/group_std_mean": 0.04646492563188076, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010419379360973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010419379360973835, "signal/mean_confidence_reward/centered_abs_mean": 0.05682915970683098, "signal/mean_confidence_reward/group_std_mean": 0.0796852469444275, "signal/mean_confidence_reward/group_zero_std_frac": 0.32222222685813906, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.682916025762097e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.682916025762097e-07, "step": 870 }, { "calibration/aurc": 0.1326259607295234, "calibration/batch_distribution_entropy": 0.5703000664198925, "calibration/batch_entropy_100bins": 0.3109658784851812, "calibration/batch_entropy_10bins": 0.5703000664198925, "calibration/batch_entropy_50bins": 0.36606400076699525, "calibration/batch_uniqueness": 0.26676141664879094, "calibration/confidence_entropy": 0.4490646421766165, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.13263707571801567, "calibration/coverage@10%": 0.2908921726570746, "calibration/coverage@15%": 0.6724807127374366, "calibration/coverage@20%": 0.8730004205402586, "calibration/coverage@25%": 0.9331179101221639, "calibration/coverage@30%": 0.9471804101221639, "calibration/coverage@5%": 0.2830592744847508, "calibration/distribution_entropy_10": 0.5703000664198925, "calibration/distribution_entropy_100": 0.3109658784851812, "calibration/ece": 0.11515947318894779, "calibration/mean_confidence": 0.7536589003345024, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010677083333333327, "completions/max_length": 3974.4, "completions/max_terminated_length": 3974.4, "completions/mean_length": 1170.606982421875, "completions/mean_terminated_length": 1183.2097412109374, "completions/min_length": 0.0, "completions/min_terminated_length": 372.2, "epoch": 2.1033653846153846, "grad_norm": 0.00039038530667312443, "learning_rate": 9.915865384615386e-07, "loss": -0.0127, "num_tokens": 2386605948.0, "reward": 1.2966346979141234, "reward_std": 0.11709458529949188, "rewards/accuracy_reward": 0.7470486044883728, "rewards/brier_reward": 0.8568825960159302, "rewards/confidence_one_or_zero": 0.0019097222539130598, "rewards/format_reward": 0.9893229007720947, "rewards/mean_confidence_reward": 0.7596822857856751, "sampling/batch_mean_priority_error": 0.022411527777777763, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003110817261040211, "sampling/priority_kl": 0.0300005417317152, "sampling/priority_scale": 0.8590299667092041, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 6.415506068151444e-05, "sampling/prob_min": 2.0433545432752e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.095199966430664, "sampling/prompt_draws_total": 62856.0, "sampling/seen_fraction": 0.9483733415603638, "sampling/unseen_fraction": 0.05162665843963623, "signal/accuracy_reward/centered_abs_mean": 0.09424912929534912, "signal/accuracy_reward/group_std_mean": 0.1341444432735443, "signal/accuracy_reward/group_zero_std_frac": 0.5805555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04712456464767456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04712456464767456, "signal/advantage_abs_mean": 0.07558977454900742, "signal/advantage_pre_scale_abs_mean": 0.07558977454900742, "signal/advantage_pre_scale_std": 0.17589679658412932, "signal/advantage_std": 0.17589679658412932, "signal/brier_reward/centered_abs_mean": 0.06083095222711563, "signal/brier_reward/group_std_mean": 0.09111705869436264, "signal/brier_reward/group_zero_std_frac": 0.2805555611848831, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030415476113557816, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.030415476113557816, "signal/confidence_one_or_zero/centered_abs_mean": 0.0034722222364507614, "signal/confidence_one_or_zero/group_std_mean": 0.006823088601231575, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222089767456, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.472221976608125e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.472221976608125e-08, "signal/format_reward/centered_abs_mean": 0.019514974392950534, "signal/format_reward/group_std_mean": 0.04219624921679497, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009757487196475267, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009757487196475267, "signal/mean_confidence_reward/centered_abs_mean": 0.05601774528622627, "signal/mean_confidence_reward/group_std_mean": 0.0795501410961151, "signal/mean_confidence_reward/group_zero_std_frac": 0.3000000029802322, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.601774432761886e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.601774432761886e-07, "step": 875 }, { "calibration/aurc": 0.15871351254864058, "calibration/batch_distribution_entropy": 0.5979251893860597, "calibration/batch_entropy_100bins": 0.32784764455624166, "calibration/batch_entropy_10bins": 0.5979251893860597, "calibration/batch_entropy_50bins": 0.3859369426411611, "calibration/batch_uniqueness": 0.321340491986652, "calibration/confidence_entropy": 0.44463021494639604, "calibration/coverage@0%": 0.0036635759955593017, "calibration/coverage@1%": 0.0036635759955593017, "calibration/coverage@10%": 0.2672878219176986, "calibration/coverage@15%": 0.62364241134756, "calibration/coverage@20%": 0.7431073626128978, "calibration/coverage@25%": 0.8104618719351233, "calibration/coverage@30%": 0.8886401039120709, "calibration/coverage@5%": 0.2076019580433531, "calibration/distribution_entropy_10": 0.5979251893860597, "calibration/distribution_entropy_100": 0.32784764455624166, "calibration/ece": 0.1257229745901393, "calibration/mean_confidence": 0.7385559006044398, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666652, "completions/max_length": 4006.4, "completions/max_terminated_length": 4006.4, "completions/mean_length": 1191.8174560546875, "completions/mean_terminated_length": 1206.124072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 375.8, "epoch": 2.1153846153846154, "grad_norm": 0.0003725996648427099, "learning_rate": 9.615384615384617e-07, "loss": -0.0146, "num_tokens": 2403446789.0, "reward": 1.2804926872253417, "reward_std": 0.12390426397323609, "rewards/accuracy_reward": 0.7283854126930237, "rewards/brier_reward": 0.8445640206336975, "rewards/confidence_one_or_zero": 0.001215277798473835, "rewards/format_reward": 0.9880208373069763, "rewards/mean_confidence_reward": 0.7525034785270691, "sampling/batch_mean_priority_error": 0.018138958333333323, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08888888888888888, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0031300032045692206, "sampling/priority_kl": 0.030000030994415283, "sampling/priority_scale": 0.86199485657271, "sampling/prob_entropy": 10.278945159912109, "sampling/prob_max": 6.444982864195481e-05, "sampling/prob_min": 2.044653374468908e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.107200002670288, "sampling/prompt_draws_total": 63216.0, "sampling/seen_fraction": 0.9495066642761231, "sampling/unseen_fraction": 0.05049333572387695, "signal/accuracy_reward/centered_abs_mean": 0.10213216096162796, "signal/accuracy_reward/group_std_mean": 0.14350362718105317, "signal/accuracy_reward/group_zero_std_frac": 0.550000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05106608048081398, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05106608048081398, "signal/advantage_abs_mean": 0.08228934407234192, "signal/advantage_pre_scale_abs_mean": 0.08228934407234192, "signal/advantage_pre_scale_std": 0.18068719208240508, "signal/advantage_std": 0.18068719208240508, "signal/brier_reward/centered_abs_mean": 0.07063276022672653, "signal/brier_reward/group_std_mean": 0.1014021709561348, "signal/brier_reward/group_zero_std_frac": 0.2472222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035316380113363265, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035316380113363265, "signal/confidence_one_or_zero/centered_abs_mean": 0.0021592881996184586, "signal/confidence_one_or_zero/group_std_mean": 0.0035119739361107348, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.1592879306808755e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.1592879306808755e-08, "signal/format_reward/centered_abs_mean": 0.020681423507630824, "signal/format_reward/group_std_mean": 0.04035768248140812, "signal/format_reward/group_zero_std_frac": 0.8277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010340711753815412, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010340711753815412, "signal/mean_confidence_reward/centered_abs_mean": 0.057702585309743884, "signal/mean_confidence_reward/group_std_mean": 0.08118895143270492, "signal/mean_confidence_reward/group_zero_std_frac": 0.26111111640930174, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.770258098891645e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.770258098891645e-07, "step": 880 }, { "calibration/aurc": 0.06402128082310694, "calibration/batch_distribution_entropy": 0.5680207879108812, "calibration/batch_entropy_100bins": 0.30439327731708266, "calibration/batch_entropy_10bins": 0.5680207879108812, "calibration/batch_entropy_50bins": 0.35832684101570556, "calibration/batch_uniqueness": 0.2564380669528326, "calibration/confidence_entropy": 0.4407376487347753, "calibration/coverage@0%": 0.1181074878729164, "calibration/coverage@1%": 0.1181074878729164, "calibration/coverage@10%": 0.7444454004999521, "calibration/coverage@15%": 0.8812842511410043, "calibration/coverage@20%": 0.9736083672879692, "calibration/coverage@25%": 0.9923705722070844, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5450419304602916, "calibration/distribution_entropy_10": 0.5680207879108812, "calibration/distribution_entropy_100": 0.30439327731708266, "calibration/ece": 0.10531805626526428, "calibration/mean_confidence": 0.7626525515430307, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009635416666666697, "completions/max_length": 3861.8, "completions/max_terminated_length": 3861.8, "completions/mean_length": 1187.0781494140624, "completions/mean_terminated_length": 1198.6516845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 378.2, "epoch": 2.1274038461538463, "grad_norm": 0.0003827160981018096, "learning_rate": 9.314903846153847e-07, "loss": -0.0097, "num_tokens": 2420203177.0, "reward": 1.3280963182449341, "reward_std": 0.11590573638677597, "rewards/accuracy_reward": 0.7860243082046509, "rewards/brier_reward": 0.8797884583473206, "rewards/confidence_one_or_zero": 0.0010416666977107526, "rewards/format_reward": 0.9903645873069763, "rewards/mean_confidence_reward": 0.7674363493919373, "sampling/batch_mean_priority_error": 0.016940972222222215, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08055555555555555, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003148121014237404, "sampling/priority_kl": 0.030001012980937956, "sampling/priority_scale": 0.864310544799082, "sampling/prob_entropy": 10.27896728515625, "sampling/prob_max": 6.470941589213908e-05, "sampling/prob_min": 2.0465672423597426e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.1191999912261963, "sampling/prompt_draws_total": 63576.0, "sampling/seen_fraction": 0.9504666686058044, "sampling/unseen_fraction": 0.04953333139419556, "signal/accuracy_reward/centered_abs_mean": 0.09685872495174408, "signal/accuracy_reward/group_std_mean": 0.13430885076522828, "signal/accuracy_reward/group_zero_std_frac": 0.5888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04842936247587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04842936247587204, "signal/advantage_abs_mean": 0.07655163407325745, "signal/advantage_pre_scale_abs_mean": 0.07655163407325745, "signal/advantage_pre_scale_std": 0.1744627386331558, "signal/advantage_std": 0.1744627386331558, "signal/brier_reward/centered_abs_mean": 0.061053629219532016, "signal/brier_reward/group_std_mean": 0.09024021923542022, "signal/brier_reward/group_zero_std_frac": 0.2944444537162781, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030526814609766008, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.030526814609766008, "signal/confidence_one_or_zero/centered_abs_mean": 0.0019205729477107525, "signal/confidence_one_or_zero/group_std_mean": 0.0036947034299373628, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.9205727141979877e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.9205727141979877e-08, "signal/format_reward/centered_abs_mean": 0.017789713852107526, "signal/format_reward/group_std_mean": 0.04016635827720165, "signal/format_reward/group_zero_std_frac": 0.8111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008894856926053763, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008894856926053763, "signal/mean_confidence_reward/centered_abs_mean": 0.054922057688236235, "signal/mean_confidence_reward/group_std_mean": 0.07804816663265228, "signal/mean_confidence_reward/group_zero_std_frac": 0.3083333373069763, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.492205900736736e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.492205900736736e-07, "step": 885 }, { "calibration/aurc": 0.08135021556057123, "calibration/batch_distribution_entropy": 0.524748091346437, "calibration/batch_entropy_100bins": 0.2862168308453696, "calibration/batch_entropy_10bins": 0.524748091346437, "calibration/batch_entropy_50bins": 0.3369298223216449, "calibration/batch_uniqueness": 0.16690429657296696, "calibration/confidence_entropy": 0.42909584582563703, "calibration/coverage@0%": 0.2532940378039133, "calibration/coverage@1%": 0.2622650140572114, "calibration/coverage@10%": 0.5535564940030449, "calibration/coverage@15%": 0.8217641250795115, "calibration/coverage@20%": 0.8642814403829415, "calibration/coverage@25%": 0.9159268929503916, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5171448845043642, "calibration/distribution_entropy_10": 0.524748091346437, "calibration/distribution_entropy_100": 0.2862168308453696, "calibration/ece": 0.12077254211531878, "calibration/mean_confidence": 0.769572349046679, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010156249999999978, "completions/max_length": 3911.6, "completions/max_terminated_length": 3911.6, "completions/mean_length": 1136.7791748046875, "completions/mean_terminated_length": 1148.4474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 323.8, "epoch": 2.139423076923077, "grad_norm": 0.0003794643562287092, "learning_rate": 9.014423076923078e-07, "loss": -0.0122, "num_tokens": 2436393785.0, "reward": 1.2959206342697143, "reward_std": 0.12398441880941391, "rewards/accuracy_reward": 0.7411458373069764, "rewards/brier_reward": 0.8608364820480346, "rewards/confidence_one_or_zero": 0.0021701388992369174, "rewards/format_reward": 0.98984375, "rewards/mean_confidence_reward": 0.7559620976448059, "sampling/batch_mean_priority_error": 0.01584027777777776, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0031612716615200044, "sampling/priority_kl": 0.030000754073262213, "sampling/priority_scale": 0.8672416865592822, "sampling/prob_entropy": 10.278964042663574, "sampling/prob_max": 6.500642193714156e-05, "sampling/prob_min": 2.0479490194702522e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.1311999797821044, "sampling/prompt_draws_total": 63936.0, "sampling/seen_fraction": 0.95152667760849, "sampling/unseen_fraction": 0.04847332239151001, "signal/accuracy_reward/centered_abs_mean": 0.10128038078546524, "signal/accuracy_reward/group_std_mean": 0.1370842784643173, "signal/accuracy_reward/group_zero_std_frac": 0.5916666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05064019039273262, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05064019039273262, "signal/advantage_abs_mean": 0.08475142419338226, "signal/advantage_pre_scale_abs_mean": 0.08475142419338226, "signal/advantage_pre_scale_std": 0.18326860070228576, "signal/advantage_std": 0.18326860070228576, "signal/brier_reward/centered_abs_mean": 0.06973624005913734, "signal/brier_reward/group_std_mean": 0.09959090650081634, "signal/brier_reward/group_zero_std_frac": 0.25277777910232546, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03486812002956867, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03486812002956867, "signal/confidence_one_or_zero/centered_abs_mean": 0.0037923176772892476, "signal/confidence_one_or_zero/group_std_mean": 0.007589569129049778, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9666666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.79231739344732e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.79231739344732e-08, "signal/format_reward/centered_abs_mean": 0.018202039785683154, "signal/format_reward/group_std_mean": 0.03888495191931725, "signal/format_reward/group_zero_std_frac": 0.8222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009101019892841577, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009101019892841577, "signal/mean_confidence_reward/centered_abs_mean": 0.05642659738659859, "signal/mean_confidence_reward/group_std_mean": 0.07863938361406327, "signal/mean_confidence_reward/group_zero_std_frac": 0.26111111640930174, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.642659402838035e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.642659402838035e-07, "step": 890 }, { "calibration/aurc": 0.0672622035824589, "calibration/batch_distribution_entropy": 0.5153431049136842, "calibration/batch_entropy_100bins": 0.27666561559182135, "calibration/batch_entropy_10bins": 0.5153431049136842, "calibration/batch_entropy_50bins": 0.325686286262536, "calibration/batch_uniqueness": 0.13510791260886815, "calibration/confidence_entropy": 0.4249389713275911, "calibration/coverage@0%": 0.23533583556747092, "calibration/coverage@1%": 0.23533583556747092, "calibration/coverage@10%": 0.6751500670241286, "calibration/coverage@15%": 0.890733604463142, "calibration/coverage@20%": 0.9260365535248042, "calibration/coverage@25%": 0.9811119234116624, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6207139909790096, "calibration/distribution_entropy_10": 0.5153431049136842, "calibration/distribution_entropy_100": 0.27666561559182135, "calibration/ece": 0.11191498549810834, "calibration/mean_confidence": 0.7828315720120509, "calibration/unique_confidence_per_question": 0.021354166666666664, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017708333333333326, "completions/max_length": 4015.2, "completions/max_terminated_length": 4015.2, "completions/mean_length": 1184.7015625, "completions/mean_terminated_length": 1206.0575927734376, "completions/min_length": 0.0, "completions/min_terminated_length": 380.8, "epoch": 2.1514423076923075, "grad_norm": 0.000362895370926708, "learning_rate": 8.713942307692308e-07, "loss": -0.0209, "num_tokens": 2453134187.0, "reward": 1.2979624271392822, "reward_std": 0.13720027953386307, "rewards/accuracy_reward": 0.7483506917953491, "rewards/brier_reward": 0.8652673482894897, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9822916746139526, "rewards/mean_confidence_reward": 0.75567706823349, "sampling/batch_mean_priority_error": 0.01643749999999999, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.1388888888888889, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0031778300646692513, "sampling/priority_kl": 0.02999963201582432, "sampling/priority_scale": 0.8716409623390063, "sampling/prob_entropy": 10.278945159912109, "sampling/prob_max": 6.538898887811228e-05, "sampling/prob_min": 2.0479632075876e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.1432000160217286, "sampling/prompt_draws_total": 64296.0, "sampling/seen_fraction": 0.9529199957847595, "sampling/unseen_fraction": 0.04708000421524048, "signal/accuracy_reward/centered_abs_mean": 0.1037814661860466, "signal/accuracy_reward/group_std_mean": 0.14674909710884093, "signal/accuracy_reward/group_zero_std_frac": 0.5444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0518907330930233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0518907330930233, "signal/advantage_abs_mean": 0.09052008986473084, "signal/advantage_pre_scale_abs_mean": 0.09052008986473084, "signal/advantage_pre_scale_std": 0.19969316124916076, "signal/advantage_std": 0.19969316124916076, "signal/brier_reward/centered_abs_mean": 0.07278569489717483, "signal/brier_reward/group_std_mean": 0.1068168118596077, "signal/brier_reward/group_zero_std_frac": 0.2638888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036392847448587416, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036392847448587416, "signal/confidence_one_or_zero/centered_abs_mean": 0.0005045572877861559, "signal/confidence_one_or_zero/group_std_mean": 0.0014731390401721, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9916666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0455724931453e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0455724931453e-09, "signal/format_reward/centered_abs_mean": 0.02977430522441864, "signal/format_reward/group_std_mean": 0.05782720670104027, "signal/format_reward/group_zero_std_frac": 0.7527777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01488715261220932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01488715261220932, "signal/mean_confidence_reward/centered_abs_mean": 0.059619140625, "signal/mean_confidence_reward/group_std_mean": 0.0842504397034645, "signal/mean_confidence_reward/group_zero_std_frac": 0.2916666716337204, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.961913984720013e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.961913984720013e-07, "step": 895 }, { "calibration/aurc": 0.1151792000974751, "calibration/batch_distribution_entropy": 0.6237504339432944, "calibration/batch_entropy_100bins": 0.3416187716182247, "calibration/batch_entropy_10bins": 0.6237504339432944, "calibration/batch_entropy_50bins": 0.4021480906035585, "calibration/batch_uniqueness": 0.36068083610505663, "calibration/confidence_entropy": 0.45672126327320495, "calibration/coverage@0%": 0.0016217158800279005, "calibration/coverage@1%": 0.31676063706114854, "calibration/coverage@10%": 0.6080577514273167, "calibration/coverage@15%": 0.6460583456043814, "calibration/coverage@20%": 0.676589746570565, "calibration/coverage@25%": 0.8457617975419833, "calibration/coverage@30%": 0.8878378209337962, "calibration/coverage@5%": 0.43888339920948616, "calibration/distribution_entropy_10": 0.6237504339432944, "calibration/distribution_entropy_100": 0.3416187716182247, "calibration/ece": 0.13816415671046162, "calibration/mean_confidence": 0.7257923929019635, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01710069444444442, "completions/max_length": 3986.4, "completions/max_terminated_length": 3986.4, "completions/mean_length": 1174.2949951171875, "completions/mean_terminated_length": 1194.865283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 339.8, "epoch": 2.1634615384615383, "grad_norm": 0.00030595791758969426, "learning_rate": 8.41346153846154e-07, "loss": -0.0217, "num_tokens": 2469756945.0, "reward": 1.2932031631469727, "reward_std": 0.13037577122449875, "rewards/accuracy_reward": 0.7412326335906982, "rewards/brier_reward": 0.862172544002533, "rewards/confidence_one_or_zero": 0.0016493055794853718, "rewards/format_reward": 0.9829861044883728, "rewards/mean_confidence_reward": 0.7530859351158142, "sampling/batch_mean_priority_error": 0.017564236111111096, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.10277777777777777, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003195428755134344, "sampling/priority_kl": 0.029999273642897607, "sampling/priority_scale": 0.876445037056692, "sampling/prob_entropy": 10.278936004638672, "sampling/prob_max": 6.5792647365015e-05, "sampling/prob_min": 2.0472642790991814e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.1552000045776367, "sampling/prompt_draws_total": 64656.0, "sampling/seen_fraction": 0.9543733358383178, "sampling/unseen_fraction": 0.04562666416168213, "signal/accuracy_reward/centered_abs_mean": 0.09095594584941864, "signal/accuracy_reward/group_std_mean": 0.13292890042066574, "signal/accuracy_reward/group_zero_std_frac": 0.5611111044883728, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04547797292470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04547797292470932, "signal/advantage_abs_mean": 0.08368964940309524, "signal/advantage_pre_scale_abs_mean": 0.08368964940309524, "signal/advantage_pre_scale_std": 0.19153147637844087, "signal/advantage_std": 0.19153147637844087, "signal/brier_reward/centered_abs_mean": 0.07003483921289444, "signal/brier_reward/group_std_mean": 0.10422394722700119, "signal/brier_reward/group_zero_std_frac": 0.2416666716337204, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03501741960644722, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03501741960644722, "signal/confidence_one_or_zero/centered_abs_mean": 0.002891710109543055, "signal/confidence_one_or_zero/group_std_mean": 0.0054592888336628675, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777671813965, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.8917099115233213e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.8917099115233213e-08, "signal/format_reward/centered_abs_mean": 0.03011067695915699, "signal/format_reward/group_std_mean": 0.05745980739593506, "signal/format_reward/group_zero_std_frac": 0.7638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.015055338479578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015055338479578495, "signal/mean_confidence_reward/centered_abs_mean": 0.05772487968206406, "signal/mean_confidence_reward/group_std_mean": 0.08457386791706085, "signal/mean_confidence_reward/group_zero_std_frac": 0.24444445073604584, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.772487611466204e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.772487611466204e-07, "step": 900 }, { "epoch": 2.1634615384615383, "eval_calibration/aurc": 0.12391385848990848, "eval_calibration/batch_distribution_entropy": 0.5881655655004824, "eval_calibration/batch_entropy_100bins": 0.3189768360923287, "eval_calibration/batch_entropy_10bins": 0.5881655655004824, "eval_calibration/batch_entropy_50bins": 0.37549437044592116, "eval_calibration/batch_uniqueness": 0.26470764035785677, "eval_calibration/confidence_entropy": 0.43872068387637836, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.5814977973568282, "eval_calibration/coverage@15%": 0.7629955947136564, "eval_calibration/coverage@20%": 0.8607929515418502, "eval_calibration/coverage@25%": 0.9286343612334802, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.5881655655004824, "eval_calibration/distribution_entropy_100": 0.3189768360923287, "eval_calibration/ece": 0.04660792951541838, "eval_calibration/mean_confidence": 0.7623788546255507, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.013020833333333351, "eval_completions/max_length": 3145.1666666666665, "eval_completions/max_terminated_length": 3145.1666666666665, "eval_completions/mean_length": 1151.9165445963542, "eval_completions/mean_terminated_length": 1167.2333984375, "eval_completions/min_length": 84.0, "eval_completions/min_terminated_length": 382.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 2469756945.0, "eval_reward": 1.278518795967102, "eval_reward_std": 0.3357190191745758, "eval_rewards/accuracy_reward": 0.7248263855775198, "eval_rewards/brier_reward": 0.8469531536102295, "eval_rewards/confidence_one_or_zero": 0.0008680555814256271, "eval_rewards/format_reward": 0.9852430621782938, "eval_rewards/mean_confidence_reward": 0.7511284351348877, "eval_runtime": 214.2565, "eval_samples_per_second": 4.667, "eval_signal/accuracy_reward/centered_abs_mean": 0.3844943592945735, "eval_signal/accuracy_reward/group_std_mean": 0.44289974868297577, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19224717964728674, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19224717964728674, "eval_signal/advantage_abs_mean": 0.2765105913082759, "eval_signal/advantage_pre_scale_abs_mean": 0.2765105913082759, "eval_signal/advantage_pre_scale_std": 0.3355718404054642, "eval_signal/advantage_std": 0.3355718404054642, "eval_signal/brier_reward/centered_abs_mean": 0.17802898089090982, "eval_signal/brier_reward/group_std_mean": 0.2396557405591011, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08901449044545491, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.08901449044545491, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0016818575871487458, "eval_signal/confidence_one_or_zero/group_std_mean": 0.0049104637776811915, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222288449606, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.6818574977151002e-08, "eval_signal/format_reward/centered_abs_mean": 0.028374566230922937, "eval_signal/format_reward/group_std_mean": 0.07749906585862239, "eval_signal/format_reward/group_zero_std_frac": 0.5833333432674408, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014187283115461469, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.014187283115461469, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.17690427352984747, "eval_signal/mean_confidence_reward/group_std_mean": 0.22398289293050766, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.76904264511298e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.76904264511298e-06, "eval_steps_per_second": 0.028, "step": 900 }, { "epoch": 2.1634615384615383, "step": 900, "train_probe_calibration/aurc": 0.08825829287780615, "train_probe_calibration/batch_distribution_entropy": 0.5748713882314315, "train_probe_calibration/batch_entropy_100bins": 0.3128677793937022, "train_probe_calibration/batch_entropy_10bins": 0.5748713882314315, "train_probe_calibration/batch_entropy_50bins": 0.3683028877440073, "train_probe_calibration/batch_uniqueness": 0.2436643079308503, "train_probe_calibration/confidence_entropy": 0.43504627966835785, "train_probe_calibration/coverage@0%": 0.0017574692442882249, "train_probe_calibration/coverage@1%": 0.0017574692442882249, "train_probe_calibration/coverage@10%": 0.7135325131810193, "train_probe_calibration/coverage@15%": 0.8637961335676626, "train_probe_calibration/coverage@20%": 0.9288224956063269, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.0017574692442882249, "train_probe_calibration/distribution_entropy_10": 0.5748713882314315, "train_probe_calibration/distribution_entropy_100": 0.3128677793937022, "train_probe_calibration/ece": 0.037170474516695806, "train_probe_calibration/mean_confidence": 0.7673989455184536, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.014583333333333337, "train_probe_completions/max_length": 3627.5, "train_probe_completions/max_terminated_length": 3627.5, "train_probe_completions/mean_length": 1189.9757283528645, "train_probe_completions/mean_terminated_length": 1207.6895345052083, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 403.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 2469756945.0, "train_probe_reward": 1.31611434618632, "train_probe_reward_std": 0.30907075603802997, "train_probe_rewards/accuracy_reward": 0.7725694477558136, "train_probe_rewards/brier_reward": 0.8717969159285227, "train_probe_rewards/confidence_one_or_zero": 0.0017361111628512542, "train_probe_rewards/format_reward": 0.987847218910853, "train_probe_rewards/mean_confidence_reward": 0.7580729226271311, "train_probe_runtime": 219.1252, "train_probe_samples_per_second": 4.564, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3404947916666667, "train_probe_signal/accuracy_reward/group_std_mean": 0.4156750937302907, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.17024739583333334, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17024739583333334, "train_probe_signal/advantage_abs_mean": 0.2414021318157514, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2414021318157514, "train_probe_signal/advantage_pre_scale_std": 0.3106756657361984, "train_probe_signal/advantage_std": 0.3106756657361984, "train_probe_signal/brier_reward/centered_abs_mean": 0.15325956543286642, "train_probe_signal/brier_reward/group_std_mean": 0.217030035952727, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07662978271643321, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07662978271643321, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "train_probe_signal/format_reward/centered_abs_mean": 0.023220485852410395, "train_probe_signal/format_reward/group_std_mean": 0.0597782659654816, "train_probe_signal/format_reward/group_zero_std_frac": 0.6944444676240286, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011610242926205197, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.011610242926205197, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.17286781469980875, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.2182989940047264, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7286780765365013e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7286780765365013e-06, "train_probe_steps_per_second": 0.027 }, { "calibration/aurc": 0.11285190153335967, "calibration/batch_distribution_entropy": 0.5125113264155846, "calibration/batch_entropy_100bins": 0.28051964622634007, "calibration/batch_entropy_10bins": 0.5125113264155846, "calibration/batch_entropy_50bins": 0.33022318876779816, "calibration/batch_uniqueness": 0.11970550029297737, "calibration/confidence_entropy": 0.4248528232860238, "calibration/coverage@0%": 0.0010471204188481676, "calibration/coverage@1%": 0.15916230366492146, "calibration/coverage@10%": 0.475216599372537, "calibration/coverage@15%": 0.6550666859718866, "calibration/coverage@20%": 0.8777354111405836, "calibration/coverage@25%": 0.9380374115826703, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.41120545975349587, "calibration/distribution_entropy_10": 0.5125113264155846, "calibration/distribution_entropy_100": 0.28051964622634007, "calibration/ece": 0.1134556583005788, "calibration/mean_confidence": 0.7646870969901838, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012065972222222231, "completions/max_length": 3955.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 1146.1902099609374, "completions/mean_terminated_length": 1160.305859375, "completions/min_length": 0.0, "completions/min_terminated_length": 370.2, "epoch": 2.175480769230769, "grad_norm": 0.0003137659514322877, "learning_rate": 8.11298076923077e-07, "loss": -0.0156, "num_tokens": 2486076896.0, "reward": 1.3248236179351807, "reward_std": 0.12037934958934784, "rewards/accuracy_reward": 0.7915798664093018, "rewards/brier_reward": 0.8702048897743225, "rewards/confidence_one_or_zero": 0.0006076388992369175, "rewards/format_reward": 0.9878472328186035, "rewards/mean_confidence_reward": 0.7613367795944214, "sampling/batch_mean_priority_error": 0.01599999999999998, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.09722222222222222, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0032106395810842514, "sampling/priority_kl": 0.029998685419559478, "sampling/priority_scale": 0.8803082048194483, "sampling/prob_entropy": 10.278942108154297, "sampling/prob_max": 6.614316371269524e-05, "sampling/prob_min": 2.047557645710185e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.167199993133545, "sampling/prompt_draws_total": 65016.0, "sampling/seen_fraction": 0.9556066632270813, "sampling/unseen_fraction": 0.0443933367729187, "signal/accuracy_reward/centered_abs_mean": 0.10290256142616272, "signal/accuracy_reward/group_std_mean": 0.14169478863477708, "signal/accuracy_reward/group_zero_std_frac": 0.5611111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05145128071308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05145128071308136, "signal/advantage_abs_mean": 0.08037637546658516, "signal/advantage_pre_scale_abs_mean": 0.08037637546658516, "signal/advantage_pre_scale_std": 0.18246108591556548, "signal/advantage_std": 0.18246108591556548, "signal/brier_reward/centered_abs_mean": 0.06332063004374504, "signal/brier_reward/group_std_mean": 0.09189697206020356, "signal/brier_reward/group_zero_std_frac": 0.2972222208976746, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03166031502187252, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03166031502187252, "signal/confidence_one_or_zero/centered_abs_mean": 0.0011664496269077063, "signal/confidence_one_or_zero/group_std_mean": 0.003138383664190769, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1664495858099145e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1664495858099145e-08, "signal/format_reward/centered_abs_mean": 0.02082248292863369, "signal/format_reward/group_std_mean": 0.04095887430012226, "signal/format_reward/group_zero_std_frac": 0.8222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010411241464316845, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010411241464316845, "signal/mean_confidence_reward/centered_abs_mean": 0.05295898914337158, "signal/mean_confidence_reward/group_std_mean": 0.07509669959545136, "signal/mean_confidence_reward/group_zero_std_frac": 0.31388888955116273, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.295898745316663e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.295898745316663e-07, "step": 905 }, { "calibration/aurc": 0.13006745286642976, "calibration/batch_distribution_entropy": 0.5223763623022502, "calibration/batch_entropy_100bins": 0.282748995104773, "calibration/batch_entropy_10bins": 0.5223763623022502, "calibration/batch_entropy_50bins": 0.33284754219692675, "calibration/batch_uniqueness": 0.15022688073890672, "calibration/confidence_entropy": 0.43531714143483746, "calibration/coverage@0%": 0.14986118137011228, "calibration/coverage@1%": 0.14986118137011228, "calibration/coverage@10%": 0.17308017873158985, "calibration/coverage@15%": 0.6216689548354984, "calibration/coverage@20%": 0.9512319467919914, "calibration/coverage@25%": 0.9937007874015749, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.16727542939122048, "calibration/distribution_entropy_10": 0.5223763623022502, "calibration/distribution_entropy_100": 0.282748995104773, "calibration/ece": 0.09975166442201888, "calibration/mean_confidence": 0.7799321903281939, "calibration/unique_confidence_per_question": 0.020833333333333332, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111093, "completions/max_length": 3917.4, "completions/max_terminated_length": 3917.4, "completions/mean_length": 1192.1790283203125, "completions/mean_terminated_length": 1209.365478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 353.4, "epoch": 2.1875, "grad_norm": 0.0003341422707308084, "learning_rate": 7.8125e-07, "loss": -0.0176, "num_tokens": 2502874798.0, "reward": 1.2831647396087646, "reward_std": 0.13368360996246337, "rewards/accuracy_reward": 0.7313368082046509, "rewards/brier_reward": 0.849213969707489, "rewards/confidence_one_or_zero": 0.0006944444554392249, "rewards/format_reward": 0.9857638835906982, "rewards/mean_confidence_reward": 0.7395833253860473, "sampling/batch_mean_priority_error": 0.017489583333333322, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.06666666666666668, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003227849630638957, "sampling/priority_kl": 0.029999754205346108, "sampling/priority_scale": 0.8832817733054981, "sampling/prob_entropy": 10.278969383239746, "sampling/prob_max": 6.644506938755513e-05, "sampling/prob_min": 2.0487737128860316e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.179200029373169, "sampling/prompt_draws_total": 65376.0, "sampling/seen_fraction": 0.9565999984741211, "sampling/unseen_fraction": 0.043400001525878903, "signal/accuracy_reward/centered_abs_mean": 0.10817599892616273, "signal/accuracy_reward/group_std_mean": 0.15262871086597443, "signal/accuracy_reward/group_zero_std_frac": 0.5277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05408799946308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05408799946308136, "signal/advantage_abs_mean": 0.08564771115779876, "signal/advantage_pre_scale_abs_mean": 0.08564771115779876, "signal/advantage_pre_scale_std": 0.19176922142505645, "signal/advantage_std": 0.19176922142505645, "signal/brier_reward/centered_abs_mean": 0.06797481775283813, "signal/brier_reward/group_std_mean": 0.10257073640823364, "signal/brier_reward/group_zero_std_frac": 0.2638888955116272, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033987408876419066, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.033987408876419066, "signal/confidence_one_or_zero/centered_abs_mean": 0.001302083337213844, "signal/confidence_one_or_zero/group_std_mean": 0.0029789147432893516, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.302083241228047e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.302083241228047e-08, "signal/format_reward/centered_abs_mean": 0.02587890625, "signal/format_reward/group_std_mean": 0.05563742592930794, "signal/format_reward/group_zero_std_frac": 0.7472222328186036, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012939453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012939453125, "signal/mean_confidence_reward/centered_abs_mean": 0.058856339752674104, "signal/mean_confidence_reward/group_std_mean": 0.08640173077583313, "signal/mean_confidence_reward/group_zero_std_frac": 0.27499999701976774, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.88563352721394e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.88563352721394e-07, "step": 910 }, { "calibration/aurc": 0.10734249873858166, "calibration/batch_distribution_entropy": 0.5846067326248493, "calibration/batch_entropy_100bins": 0.3120906730568165, "calibration/batch_entropy_10bins": 0.5846067326248493, "calibration/batch_entropy_50bins": 0.3673880907377006, "calibration/batch_uniqueness": 0.2989200019476256, "calibration/confidence_entropy": 0.4400907935995416, "calibration/coverage@0%": 0.11556728232189975, "calibration/coverage@1%": 0.11556728232189975, "calibration/coverage@10%": 0.5006087125384353, "calibration/coverage@15%": 0.701646585434703, "calibration/coverage@20%": 0.7472763925729443, "calibration/coverage@25%": 0.9019538873994637, "calibration/coverage@30%": 0.9163538873994639, "calibration/coverage@5%": 0.4473732813107564, "calibration/distribution_entropy_10": 0.5846067326248493, "calibration/distribution_entropy_100": 0.3120906730568165, "calibration/ece": 0.13515245517625668, "calibration/mean_confidence": 0.7708814941117261, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015885416666666673, "completions/max_length": 3968.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 1211.677880859375, "completions/mean_terminated_length": 1231.2791748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 379.4, "epoch": 2.199519230769231, "grad_norm": 0.0003999628243036568, "learning_rate": 7.512019230769231e-07, "loss": -0.0198, "num_tokens": 2519922959.0, "reward": 1.3061288833618163, "reward_std": 0.13455301523208618, "rewards/accuracy_reward": 0.7528645873069764, "rewards/brier_reward": 0.8754372000694275, "rewards/confidence_one_or_zero": 0.000260416668606922, "rewards/format_reward": 0.9839409589767456, "rewards/mean_confidence_reward": 0.7495818614959717, "sampling/batch_mean_priority_error": 0.011972222222222214, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08611111111111111, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0032408578786998986, "sampling/priority_kl": 0.029999370872974395, "sampling/priority_scale": 0.8859618365531787, "sampling/prob_entropy": 10.278959465026855, "sampling/prob_max": 6.67307889671065e-05, "sampling/prob_min": 2.0502503321040422e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.191200017929077, "sampling/prompt_draws_total": 65736.0, "sampling/seen_fraction": 0.9575066685676574, "sampling/unseen_fraction": 0.04249333143234253, "signal/accuracy_reward/centered_abs_mean": 0.098388671875, "signal/accuracy_reward/group_std_mean": 0.13771490454673768, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0491943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0491943359375, "signal/advantage_abs_mean": 0.08455693274736405, "signal/advantage_pre_scale_abs_mean": 0.08455693274736405, "signal/advantage_pre_scale_std": 0.19366905689239503, "signal/advantage_std": 0.19366905689239503, "signal/brier_reward/centered_abs_mean": 0.06902645826339722, "signal/brier_reward/group_std_mean": 0.10544530004262924, "signal/brier_reward/group_zero_std_frac": 0.2888888895511627, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03451322913169861, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03451322913169861, "signal/confidence_one_or_zero/centered_abs_mean": 0.0004937065881676972, "signal/confidence_one_or_zero/group_std_mean": 0.001174198230728507, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9944444417953491, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.9370658672387435e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.9370658672387435e-09, "signal/format_reward/centered_abs_mean": 0.029150390625, "signal/format_reward/group_std_mean": 0.06167853176593781, "signal/format_reward/group_zero_std_frac": 0.725, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0145751953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0145751953125, "signal/mean_confidence_reward/centered_abs_mean": 0.06032561212778091, "signal/mean_confidence_reward/group_std_mean": 0.08825595080852508, "signal/mean_confidence_reward/group_zero_std_frac": 0.30555555820465086, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.03256125941698e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.03256125941698e-07, "step": 915 }, { "calibration/aurc": 0.05147939346039525, "calibration/batch_distribution_entropy": 0.5554592712525979, "calibration/batch_entropy_100bins": 0.3054071326847551, "calibration/batch_entropy_10bins": 0.5554592712525979, "calibration/batch_entropy_50bins": 0.3595203351504873, "calibration/batch_uniqueness": 0.2235803159489635, "calibration/confidence_entropy": 0.43550255930955634, "calibration/coverage@0%": 0.12471754581242238, "calibration/coverage@1%": 0.32483220253296385, "calibration/coverage@10%": 0.8518813929328273, "calibration/coverage@15%": 0.9233775087260035, "calibration/coverage@20%": 0.9572916666666668, "calibration/coverage@25%": 0.9828125, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6240863469354763, "calibration/distribution_entropy_10": 0.5554592712525979, "calibration/distribution_entropy_100": 0.3054071326847551, "calibration/ece": 0.11478028141041932, "calibration/mean_confidence": 0.7626465657632444, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012326388888888883, "completions/max_length": 4030.2, "completions/max_terminated_length": 4030.2, "completions/mean_length": 1243.1653076171874, "completions/mean_terminated_length": 1258.8077880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 377.6, "epoch": 2.2115384615384617, "grad_norm": 0.00030230532865971327, "learning_rate": 7.211538461538461e-07, "loss": -0.015, "num_tokens": 2537358143.0, "reward": 1.3202105522155763, "reward_std": 0.11819398552179336, "rewards/accuracy_reward": 0.7690104126930237, "rewards/brier_reward": 0.8837220311164856, "rewards/confidence_one_or_zero": 0.0014756945020053536, "rewards/format_reward": 0.9876736044883728, "rewards/mean_confidence_reward": 0.7527213454246521, "sampling/batch_mean_priority_error": 0.01480034722222221, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08055555555555556, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0032544697634875773, "sampling/priority_kl": 0.02999933548271656, "sampling/priority_scale": 0.8895649968879298, "sampling/prob_entropy": 10.27895221710205, "sampling/prob_max": 6.707530410494655e-05, "sampling/prob_min": 2.0509682144620456e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2032000064849853, "sampling/prompt_draws_total": 66096.0, "sampling/seen_fraction": 0.9585599899291992, "sampling/unseen_fraction": 0.04144001007080078, "signal/accuracy_reward/centered_abs_mean": 0.09224717915058137, "signal/accuracy_reward/group_std_mean": 0.13409704864025115, "signal/accuracy_reward/group_zero_std_frac": 0.5666666686534881, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04612358957529068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04612358957529068, "signal/advantage_abs_mean": 0.07567234933376313, "signal/advantage_pre_scale_abs_mean": 0.07567234933376313, "signal/advantage_pre_scale_std": 0.17569505870342256, "signal/advantage_std": 0.17569505870342256, "signal/brier_reward/centered_abs_mean": 0.0633341558277607, "signal/brier_reward/group_std_mean": 0.09425111263990402, "signal/brier_reward/group_zero_std_frac": 0.2666666716337204, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03166707791388035, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03166707791388035, "signal/confidence_one_or_zero/centered_abs_mean": 0.002381727460306138, "signal/confidence_one_or_zero/group_std_mean": 0.0034978067502379417, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.3817273842041685e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.3817273842041685e-08, "signal/format_reward/centered_abs_mean": 0.02136501707136631, "signal/format_reward/group_std_mean": 0.04142211228609085, "signal/format_reward/group_zero_std_frac": 0.825000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010682508535683154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010682508535683154, "signal/mean_confidence_reward/centered_abs_mean": 0.05657335072755813, "signal/mean_confidence_reward/group_std_mean": 0.07980925291776657, "signal/mean_confidence_reward/group_zero_std_frac": 0.28611111640930176, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.657335009345843e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.657335009345843e-07, "step": 920 }, { "calibration/aurc": 0.0879748932949886, "calibration/batch_distribution_entropy": 0.5164573947181831, "calibration/batch_entropy_100bins": 0.27981525602896584, "calibration/batch_entropy_10bins": 0.5164573947181831, "calibration/batch_entropy_50bins": 0.32939399202438713, "calibration/batch_uniqueness": 0.13016719252669615, "calibration/confidence_entropy": 0.4321006951580036, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.17060367454068243, "calibration/coverage@10%": 0.594761664136493, "calibration/coverage@15%": 0.847351488641174, "calibration/coverage@20%": 0.8934965429659941, "calibration/coverage@25%": 0.9884210526315791, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.3633532753575516, "calibration/distribution_entropy_10": 0.5164573947181831, "calibration/distribution_entropy_100": 0.27981525602896584, "calibration/ece": 0.11073895301248071, "calibration/mean_confidence": 0.7676560881506441, "calibration/unique_confidence_per_question": 0.020833333333333336, "calibration/unique_confidences": 8.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111139, "completions/max_length": 4039.2, "completions/max_terminated_length": 4039.2, "completions/mean_length": 1223.8078857421874, "completions/mean_terminated_length": 1241.5063720703124, "completions/min_length": 0.0, "completions/min_terminated_length": 395.0, "epoch": 2.2235576923076925, "grad_norm": 0.0003381640126463026, "learning_rate": 6.911057692307694e-07, "loss": -0.0185, "num_tokens": 2554543098.0, "reward": 1.3357814073562622, "reward_std": 0.12255254238843918, "rewards/accuracy_reward": 0.8040798783302308, "rewards/brier_reward": 0.8818773984909057, "rewards/confidence_one_or_zero": 0.0003472222248092294, "rewards/format_reward": 0.9855902671813965, "rewards/mean_confidence_reward": 0.7671050190925598, "sampling/batch_mean_priority_error": 0.014381944444444428, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.07222222222222222, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003269455023109913, "sampling/priority_kl": 0.030000782385468484, "sampling/priority_scale": 0.8927285850746557, "sampling/prob_entropy": 10.278964614868164, "sampling/prob_max": 6.739153468515724e-05, "sampling/prob_min": 2.051973424386233e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2152000427246095, "sampling/prompt_draws_total": 66456.0, "sampling/seen_fraction": 0.9595400094985962, "sampling/unseen_fraction": 0.04045999050140381, "signal/accuracy_reward/centered_abs_mean": 0.08742946833372116, "signal/accuracy_reward/group_std_mean": 0.13191601932048796, "signal/accuracy_reward/group_zero_std_frac": 0.5500000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04371473416686058, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04371473416686058, "signal/advantage_abs_mean": 0.0757093921303749, "signal/advantage_pre_scale_abs_mean": 0.0757093921303749, "signal/advantage_pre_scale_std": 0.18412608802318572, "signal/advantage_std": 0.18412608802318572, "signal/brier_reward/centered_abs_mean": 0.06260225027799607, "signal/brier_reward/group_std_mean": 0.09538311511278152, "signal/brier_reward/group_zero_std_frac": 0.25, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031301125138998034, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031301125138998034, "signal/confidence_one_or_zero/centered_abs_mean": 0.0006727430503815412, "signal/confidence_one_or_zero/group_std_mean": 0.0019641853868961334, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9888888835906983, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.7274299908604006e-09, "signal/format_reward/centered_abs_mean": 0.02479383647441864, "signal/format_reward/group_std_mean": 0.0480035699903965, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01239691823720932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01239691823720932, "signal/mean_confidence_reward/centered_abs_mean": 0.05885334834456444, "signal/mean_confidence_reward/group_std_mean": 0.08518647253513337, "signal/mean_confidence_reward/group_zero_std_frac": 0.25555555820465087, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.88533464451757e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.88533464451757e-07, "step": 925 }, { "calibration/aurc": 0.04662198307011302, "calibration/batch_distribution_entropy": 0.516365256651883, "calibration/batch_entropy_100bins": 0.27891007924346534, "calibration/batch_entropy_10bins": 0.516365256651883, "calibration/batch_entropy_50bins": 0.3283284325581336, "calibration/batch_uniqueness": 0.14541062928179097, "calibration/confidence_entropy": 0.4226501508386008, "calibration/coverage@0%": 0.003685752774035858, "calibration/coverage@1%": 0.26561503772239636, "calibration/coverage@10%": 0.8407537880959353, "calibration/coverage@15%": 0.9285548718823253, "calibration/coverage@20%": 0.9957559681697614, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5987353612990702, "calibration/distribution_entropy_10": 0.516365256651883, "calibration/distribution_entropy_100": 0.27891007924346534, "calibration/ece": 0.10740721265309265, "calibration/mean_confidence": 0.7776348798779866, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015104166666666651, "completions/max_length": 3987.2, "completions/max_terminated_length": 3987.2, "completions/mean_length": 1215.920947265625, "completions/mean_terminated_length": 1234.5720458984374, "completions/min_length": 0.0, "completions/min_terminated_length": 381.8, "epoch": 2.235576923076923, "grad_norm": 0.00032682056189514697, "learning_rate": 6.610576923076924e-07, "loss": -0.0176, "num_tokens": 2571639243.0, "reward": 1.3099323987960816, "reward_std": 0.12759646475315095, "rewards/accuracy_reward": 0.7669270992279053, "rewards/brier_reward": 0.867940080165863, "rewards/confidence_one_or_zero": 0.0013888889225199819, "rewards/format_reward": 0.9849826455116272, "rewards/mean_confidence_reward": 0.7527873277664184, "sampling/batch_mean_priority_error": 0.02201562499999999, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08055555555555556, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0032882534898817537, "sampling/priority_kl": 0.029999984428286552, "sampling/priority_scale": 0.8953011691337451, "sampling/prob_entropy": 10.278952026367188, "sampling/prob_max": 6.767441664123908e-05, "sampling/prob_min": 2.0523589410004205e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2272000312805176, "sampling/prompt_draws_total": 66816.0, "sampling/seen_fraction": 0.9603600144386292, "sampling/unseen_fraction": 0.03963998556137085, "signal/accuracy_reward/centered_abs_mean": 0.10499674379825592, "signal/accuracy_reward/group_std_mean": 0.14623960852622986, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05249837189912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05249837189912796, "signal/advantage_abs_mean": 0.08412635624408722, "signal/advantage_pre_scale_abs_mean": 0.08412635624408722, "signal/advantage_pre_scale_std": 0.18825245201587676, "signal/advantage_std": 0.18825245201587676, "signal/brier_reward/centered_abs_mean": 0.06787939220666886, "signal/brier_reward/group_std_mean": 0.09967477023601531, "signal/brier_reward/group_zero_std_frac": 0.27777778208255766, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03393969610333443, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03393969610333443, "signal/confidence_one_or_zero/centered_abs_mean": 0.002473958348855376, "signal/confidence_one_or_zero/group_std_mean": 0.004445959627628326, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9833333253860473, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.4739582471511314e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.4739582471511314e-08, "signal/format_reward/centered_abs_mean": 0.02551540769636631, "signal/format_reward/group_std_mean": 0.050245880335569384, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012757703848183154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012757703848183154, "signal/mean_confidence_reward/centered_abs_mean": 0.060628201067447665, "signal/mean_confidence_reward/group_std_mean": 0.08692292124032974, "signal/mean_confidence_reward/group_zero_std_frac": 0.2805555611848831, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.062819920771289e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.062819920771289e-07, "step": 930 }, { "calibration/aurc": 0.09865934087702413, "calibration/batch_distribution_entropy": 0.5613819194359139, "calibration/batch_entropy_100bins": 0.30502884058675644, "calibration/batch_entropy_10bins": 0.5613819194359139, "calibration/batch_entropy_50bins": 0.35907501581343804, "calibration/batch_uniqueness": 0.21185733367966794, "calibration/confidence_entropy": 0.4414002995727424, "calibration/coverage@0%": 0.11916010498687664, "calibration/coverage@1%": 0.22384760498687664, "calibration/coverage@10%": 0.6438190081681894, "calibration/coverage@15%": 0.715426839846917, "calibration/coverage@20%": 0.8546681971527119, "calibration/coverage@25%": 0.9043575479930193, "calibration/coverage@30%": 0.9801210732984293, "calibration/coverage@5%": 0.38435554410545286, "calibration/distribution_entropy_10": 0.5613819194359139, "calibration/distribution_entropy_100": 0.30502884058675644, "calibration/ece": 0.10591542545003192, "calibration/mean_confidence": 0.7539848946683242, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008333333333333326, "completions/max_length": 3919.2, "completions/max_terminated_length": 3919.2, "completions/mean_length": 1211.381689453125, "completions/mean_terminated_length": 1221.6566162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 394.0, "epoch": 2.2475961538461537, "grad_norm": 0.0003420446300879121, "learning_rate": 6.310096153846154e-07, "loss": -0.009, "num_tokens": 2588697464.0, "reward": 1.3223564863204955, "reward_std": 0.11324650794267654, "rewards/accuracy_reward": 0.77890625, "rewards/brier_reward": 0.8741249918937684, "rewards/confidence_one_or_zero": 0.0013888888817746193, "rewards/format_reward": 0.9916666626930237, "rewards/mean_confidence_reward": 0.7576215147972107, "sampling/batch_mean_priority_error": 0.015006944444444434, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08611111111111111, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0033051112201064824, "sampling/priority_kl": 0.029999464750289917, "sampling/priority_scale": 0.8995260178809985, "sampling/prob_entropy": 10.27895565032959, "sampling/prob_max": 6.806203018641099e-05, "sampling/prob_min": 2.052525269391481e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2392000198364257, "sampling/prompt_draws_total": 67176.0, "sampling/seen_fraction": 0.9614533305168151, "sampling/unseen_fraction": 0.03854666948318482, "signal/accuracy_reward/centered_abs_mean": 0.09180229902267456, "signal/accuracy_reward/group_std_mean": 0.1310224562883377, "signal/accuracy_reward/group_zero_std_frac": 0.5916666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04590114951133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04590114951133728, "signal/advantage_abs_mean": 0.07339992076158523, "signal/advantage_pre_scale_abs_mean": 0.07339992076158523, "signal/advantage_pre_scale_std": 0.16779326498508454, "signal/advantage_std": 0.16779326498508454, "signal/brier_reward/centered_abs_mean": 0.06400536745786667, "signal/brier_reward/group_std_mean": 0.0935688391327858, "signal/brier_reward/group_zero_std_frac": 0.3111111164093018, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032002683728933334, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.032002683728933334, "signal/confidence_one_or_zero/centered_abs_mean": 0.0023111979360692205, "signal/confidence_one_or_zero/group_std_mean": 0.0038206671364605428, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9861111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.311197775384244e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.311197775384244e-08, "signal/format_reward/centered_abs_mean": 0.01534288190305233, "signal/format_reward/group_std_mean": 0.03594543486833572, "signal/format_reward/group_zero_std_frac": 0.825000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007671440951526165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007671440951526165, "signal/mean_confidence_reward/centered_abs_mean": 0.05464844331145287, "signal/mean_confidence_reward/group_std_mean": 0.07704180479049683, "signal/mean_confidence_reward/group_zero_std_frac": 0.325, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.46484409369441e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.46484409369441e-07, "step": 935 }, { "calibration/aurc": 0.20805025517207953, "calibration/batch_distribution_entropy": 0.5147650665098341, "calibration/batch_entropy_100bins": 0.2828651513916462, "calibration/batch_entropy_10bins": 0.5147650665098341, "calibration/batch_entropy_50bins": 0.332984279498441, "calibration/batch_uniqueness": 0.13400243013850727, "calibration/confidence_entropy": 0.4250079763322886, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.19530026109660575, "calibration/coverage@15%": 0.2, "calibration/coverage@20%": 0.3621052631578947, "calibration/coverage@25%": 0.6842105263157895, "calibration/coverage@30%": 0.8644356955380577, "calibration/coverage@5%": 0.17806788511749347, "calibration/distribution_entropy_10": 0.5147650665098341, "calibration/distribution_entropy_100": 0.2828651513916462, "calibration/ece": 0.13677418047081075, "calibration/mean_confidence": 0.7845539229954371, "calibration/unique_confidence_per_question": 0.02239583333333333, "calibration/unique_confidences": 8.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666652, "completions/max_length": 4023.2, "completions/max_terminated_length": 4023.2, "completions/mean_length": 1196.5845947265625, "completions/mean_terminated_length": 1209.4766357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 357.0, "epoch": 2.2596153846153846, "grad_norm": 0.00033075769897550344, "learning_rate": 6.009615384615385e-07, "loss": -0.0139, "num_tokens": 2605555558.0, "reward": 1.3028273344039918, "reward_std": 0.12005721032619476, "rewards/accuracy_reward": 0.7556423544883728, "rewards/brier_reward": 0.8603268980979919, "rewards/confidence_one_or_zero": 0.0035590277751907707, "rewards/format_reward": 0.9896701455116272, "rewards/mean_confidence_reward": 0.7611631751060486, "sampling/batch_mean_priority_error": 0.015236111111111094, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.11111111111111112, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003319880459457636, "sampling/priority_kl": 0.030000341683626176, "sampling/priority_scale": 0.9037518800469115, "sampling/prob_entropy": 10.27896785736084, "sampling/prob_max": 6.845462194178253e-05, "sampling/prob_min": 2.0527686865534632e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.251200008392334, "sampling/prompt_draws_total": 67536.0, "sampling/seen_fraction": 0.9625133395195007, "sampling/unseen_fraction": 0.03748666048049927, "signal/accuracy_reward/centered_abs_mean": 0.09748806357383728, "signal/accuracy_reward/group_std_mean": 0.13585958927869796, "signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04874403178691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04874403178691864, "signal/advantage_abs_mean": 0.07968758940696716, "signal/advantage_pre_scale_abs_mean": 0.07968758940696716, "signal/advantage_pre_scale_std": 0.1783791333436966, "signal/advantage_std": 0.1783791333436966, "signal/brier_reward/centered_abs_mean": 0.06847795248031616, "signal/brier_reward/group_std_mean": 0.09856122732162476, "signal/brier_reward/group_zero_std_frac": 0.272222226858139, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03423897624015808, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03423897624015808, "signal/confidence_one_or_zero/centered_abs_mean": 0.0056152343284338714, "signal/confidence_one_or_zero/group_std_mean": 0.009219340793788434, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9666666626930237, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.615234499600774e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.615234499600774e-08, "signal/format_reward/centered_abs_mean": 0.01794162318110466, "signal/format_reward/group_std_mean": 0.03940186202526093, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00897081159055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00897081159055233, "signal/mean_confidence_reward/centered_abs_mean": 0.05832188725471497, "signal/mean_confidence_reward/group_std_mean": 0.0822516843676567, "signal/mean_confidence_reward/group_zero_std_frac": 0.28611111342906953, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.832188662679983e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.832188662679983e-07, "step": 940 }, { "calibration/aurc": 0.08275949176278782, "calibration/batch_distribution_entropy": 0.5367319900238796, "calibration/batch_entropy_100bins": 0.29038277184202704, "calibration/batch_entropy_10bins": 0.5367319900238796, "calibration/batch_entropy_50bins": 0.34183390065854946, "calibration/batch_uniqueness": 0.15807961196908696, "calibration/confidence_entropy": 0.4283059576867362, "calibration/coverage@0%": 0.0020929242417979613, "calibration/coverage@1%": 0.1342078067483254, "calibration/coverage@10%": 0.5568662763841568, "calibration/coverage@15%": 0.7838610378188214, "calibration/coverage@20%": 0.9398416886543537, "calibration/coverage@25%": 0.9398416886543537, "calibration/coverage@30%": 0.9889182058047494, "calibration/coverage@5%": 0.4866640950946416, "calibration/distribution_entropy_10": 0.5367319900238796, "calibration/distribution_entropy_100": 0.29038277184202704, "calibration/ece": 0.0891567783594712, "calibration/mean_confidence": 0.7716399624464515, "calibration/unique_confidence_per_question": 0.02447916666666667, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0073784722222222324, "completions/max_length": 3970.2, "completions/max_terminated_length": 3970.2, "completions/mean_length": 1171.341259765625, "completions/mean_terminated_length": 1180.1314208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 366.8, "epoch": 2.2716346153846154, "grad_norm": 0.0003024759062100202, "learning_rate": 5.709134615384615e-07, "loss": -0.0076, "num_tokens": 2622145377.0, "reward": 1.3309629917144776, "reward_std": 0.10374844670295716, "rewards/accuracy_reward": 0.7901041746139527, "rewards/brier_reward": 0.8792714834213257, "rewards/confidence_one_or_zero": 0.0013888889399822802, "rewards/format_reward": 0.9925347208976746, "rewards/mean_confidence_reward": 0.7810798525810242, "sampling/batch_mean_priority_error": 0.02181430555555554, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08611111111111111, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003340022638440132, "sampling/priority_kl": 0.029998552426695822, "sampling/priority_scale": 0.9088814555900171, "sampling/prob_entropy": 10.278948783874512, "sampling/prob_max": 6.890085205668584e-05, "sampling/prob_min": 2.052076088148169e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2631999492645263, "sampling/prompt_draws_total": 67896.0, "sampling/seen_fraction": 0.9637333393096924, "sampling/unseen_fraction": 0.036266660690307616, "signal/accuracy_reward/centered_abs_mean": 0.09259982705116272, "signal/accuracy_reward/group_std_mean": 0.12963927239179612, "signal/accuracy_reward/group_zero_std_frac": 0.5972222208976745, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04629991352558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04629991352558136, "signal/advantage_abs_mean": 0.06978111416101455, "signal/advantage_pre_scale_abs_mean": 0.06978111416101455, "signal/advantage_pre_scale_std": 0.1631460428237915, "signal/advantage_std": 0.1631460428237915, "signal/brier_reward/centered_abs_mean": 0.05654947534203529, "signal/brier_reward/group_std_mean": 0.0824481189250946, "signal/brier_reward/group_zero_std_frac": 0.32222222089767455, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028274737671017645, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.028274737671017645, "signal/confidence_one_or_zero/centered_abs_mean": 0.0022894965135492385, "signal/confidence_one_or_zero/group_std_mean": 0.0044072819873690605, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9805555462837219, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.2894964502029323e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.2894964502029323e-08, "signal/format_reward/centered_abs_mean": 0.01277126707136631, "signal/format_reward/group_std_mean": 0.02690165415406227, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006385633535683155, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006385633535683155, "signal/mean_confidence_reward/centered_abs_mean": 0.05080154687166214, "signal/mean_confidence_reward/group_std_mean": 0.07160323113203049, "signal/mean_confidence_reward/group_zero_std_frac": 0.3277777791023254, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.080154494407907e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.080154494407907e-07, "step": 945 }, { "calibration/aurc": 0.09588108721796844, "calibration/batch_distribution_entropy": 0.5394386918957037, "calibration/batch_entropy_100bins": 0.297117790418978, "calibration/batch_entropy_10bins": 0.5394386918957037, "calibration/batch_entropy_50bins": 0.34976225555564866, "calibration/batch_uniqueness": 0.19306110715988942, "calibration/confidence_entropy": 0.4337806183847327, "calibration/coverage@0%": 0.0015764260249554365, "calibration/coverage@1%": 0.0015764260249554365, "calibration/coverage@10%": 0.4995804428607521, "calibration/coverage@15%": 0.7345839123050795, "calibration/coverage@20%": 0.9384760498687663, "calibration/coverage@25%": 0.9557291666666666, "calibration/coverage@30%": 0.9838541666666666, "calibration/coverage@5%": 0.4185485880107233, "calibration/distribution_entropy_10": 0.5394386918957037, "calibration/distribution_entropy_100": 0.297117790418978, "calibration/ece": 0.11819121550731962, "calibration/mean_confidence": 0.7558088564255337, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009375, "completions/max_length": 3963.4, "completions/max_terminated_length": 3963.4, "completions/mean_length": 1201.931005859375, "completions/mean_terminated_length": 1213.7183837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 353.8, "epoch": 2.2836538461538463, "grad_norm": 0.00037571621942333877, "learning_rate": 5.408653846153847e-07, "loss": -0.0107, "num_tokens": 2639093542.0, "reward": 1.31483793258667, "reward_std": 0.12279156893491745, "rewards/accuracy_reward": 0.7750868201255798, "rewards/brier_reward": 0.8638620734214782, "rewards/confidence_one_or_zero": 0.0012152777868323027, "rewards/format_reward": 0.9907118082046509, "rewards/mean_confidence_reward": 0.7566597104072571, "sampling/batch_mean_priority_error": 0.016486180555555542, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08888888888888888, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003358766110613942, "sampling/priority_kl": 0.02999931052327156, "sampling/priority_scale": 0.9136200845008716, "sampling/prob_entropy": 10.27894992828369, "sampling/prob_max": 6.932988035259768e-05, "sampling/prob_min": 2.0518549717962742e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2751999855041505, "sampling/prompt_draws_total": 68256.0, "sampling/seen_fraction": 0.9648200035095215, "sampling/unseen_fraction": 0.03517999649047852, "signal/accuracy_reward/centered_abs_mean": 0.10987955629825592, "signal/accuracy_reward/group_std_mean": 0.15317733138799666, "signal/accuracy_reward/group_zero_std_frac": 0.5305555582046508, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05493977814912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05493977814912796, "signal/advantage_abs_mean": 0.08325956612825394, "signal/advantage_pre_scale_abs_mean": 0.08325956612825394, "signal/advantage_pre_scale_std": 0.18065005242824556, "signal/advantage_std": 0.18065005242824556, "signal/brier_reward/centered_abs_mean": 0.06960290074348449, "signal/brier_reward/group_std_mean": 0.098955138027668, "signal/brier_reward/group_zero_std_frac": 0.28611111342906953, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034801450371742246, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034801450371742246, "signal/confidence_one_or_zero/centered_abs_mean": 0.0022677951259538532, "signal/confidence_one_or_zero/group_std_mean": 0.0049757368862628935, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9777777791023254, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 2.2677950539673474e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 2.2677950539673474e-08, "signal/format_reward/centered_abs_mean": 0.016129557462409138, "signal/format_reward/group_std_mean": 0.03346684370189905, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008064778731204569, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008064778731204569, "signal/mean_confidence_reward/centered_abs_mean": 0.057556481659412385, "signal/mean_confidence_reward/group_std_mean": 0.07936900556087494, "signal/mean_confidence_reward/group_zero_std_frac": 0.29722222983837127, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.755647976002365e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.755647976002365e-07, "step": 950 }, { "epoch": 2.2836538461538463, "eval_calibration/aurc": 0.1323624339907363, "eval_calibration/batch_distribution_entropy": 0.5527244664044493, "eval_calibration/batch_entropy_100bins": 0.3033107718398318, "eval_calibration/batch_entropy_10bins": 0.5527244664044493, "eval_calibration/batch_entropy_50bins": 0.35705253308267776, "eval_calibration/batch_uniqueness": 0.1874871369537685, "eval_calibration/confidence_entropy": 0.4302961789890096, "eval_calibration/coverage@0%": 0.001755926251097454, "eval_calibration/coverage@1%": 0.001755926251097454, "eval_calibration/coverage@10%": 0.001755926251097454, "eval_calibration/coverage@15%": 0.7647058823529411, "eval_calibration/coverage@20%": 0.8595258999122037, "eval_calibration/coverage@25%": 0.9727831431079894, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.001755926251097454, "eval_calibration/distribution_entropy_10": 0.5527244664044493, "eval_calibration/distribution_entropy_100": 0.3033107718398318, "eval_calibration/ece": 0.03213345039508359, "eval_calibration/mean_confidence": 0.766988586479368, "eval_calibration/unique_confidence_per_question": 0.008680555555555556, "eval_calibration/unique_confidences": 10, "eval_completions/clipped_ratio": 0.010416666666666666, "eval_completions/max_length": 3121.0, "eval_completions/max_terminated_length": 3121.0, "eval_completions/mean_length": 1174.4849446614583, "eval_completions/mean_terminated_length": 1187.0264282226562, "eval_completions/min_length": 169.5, "eval_completions/min_terminated_length": 426.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 2639093542.0, "eval_reward": 1.2817957599957783, "eval_reward_std": 0.33336834609508514, "eval_rewards/accuracy_reward": 0.7265624900658926, "eval_rewards/brier_reward": 0.8482986390590668, "eval_rewards/confidence_one_or_zero": 0.0017361111628512542, "eval_rewards/format_reward": 0.988715281089147, "eval_rewards/mean_confidence_reward": 0.7583333154519399, "eval_runtime": 205.7147, "eval_samples_per_second": 4.861, "eval_signal/accuracy_reward/centered_abs_mean": 0.38330078125, "eval_signal/accuracy_reward/group_std_mean": 0.4423869450887044, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.191650390625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.191650390625, "eval_signal/advantage_abs_mean": 0.2746702879667282, "eval_signal/advantage_pre_scale_abs_mean": 0.2746702879667282, "eval_signal/advantage_pre_scale_std": 0.3323294669389725, "eval_signal/advantage_std": 0.3323294669389725, "eval_signal/brier_reward/centered_abs_mean": 0.18063857903083166, "eval_signal/brier_reward/group_std_mean": 0.24481445302565893, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09031928951541583, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.09031928951541583, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.0033637151742974916, "eval_signal/confidence_one_or_zero/group_std_mean": 0.009820927555362383, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.944444457689921, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.3637149954302004e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.3637149954302004e-08, "eval_signal/format_reward/centered_abs_mean": 0.021647135571887095, "eval_signal/format_reward/group_std_mean": 0.05785721136877934, "eval_signal/format_reward/group_zero_std_frac": 0.694444457689921, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010823567785943547, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.010823567785943547, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.17593313256899515, "eval_signal/mean_confidence_reward/group_std_mean": 0.22150815278291702, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7593313259567367e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7593313259567367e-06, "eval_steps_per_second": 0.029, "step": 950 }, { "epoch": 2.2836538461538463, "step": 950, "train_probe_calibration/aurc": 0.08694457521157567, "train_probe_calibration/batch_distribution_entropy": 0.5292848064008367, "train_probe_calibration/batch_entropy_100bins": 0.2957835531105676, "train_probe_calibration/batch_entropy_10bins": 0.5292848064008367, "train_probe_calibration/batch_entropy_50bins": 0.3481916129839666, "train_probe_calibration/batch_uniqueness": 0.14519625865892766, "train_probe_calibration/confidence_entropy": 0.42145287989154906, "train_probe_calibration/coverage@0%": 0.007936507936507936, "train_probe_calibration/coverage@1%": 0.007936507936507936, "train_probe_calibration/coverage@10%": 0.736331569664903, "train_probe_calibration/coverage@15%": 0.8694885361552028, "train_probe_calibration/coverage@20%": 0.935626102292769, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.007936507936507936, "train_probe_calibration/distribution_entropy_10": 0.5292848064008367, "train_probe_calibration/distribution_entropy_100": 0.2957835531105676, "train_probe_calibration/ece": 0.0483245149911815, "train_probe_calibration/mean_confidence": 0.7779541446208115, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.022916666666666658, "train_probe_completions/max_length": 3290.1666666666665, "train_probe_completions/max_terminated_length": 3290.1666666666665, "train_probe_completions/mean_length": 1182.226094563802, "train_probe_completions/mean_terminated_length": 1209.64501953125, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 403.8333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 2639093542.0, "train_probe_reward": 1.311934808890025, "train_probe_reward_std": 0.3206389049688975, "train_probe_rewards/accuracy_reward": 0.7708333432674408, "train_probe_rewards/brier_reward": 0.8686458369096121, "train_probe_rewards/confidence_one_or_zero": 0.007812500155220429, "train_probe_rewards/format_reward": 0.9843749900658926, "train_probe_rewards/mean_confidence_reward": 0.7664062082767487, "train_probe_runtime": 216.4321, "train_probe_samples_per_second": 4.62, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3417968700329463, "train_probe_signal/accuracy_reward/group_std_mean": 0.41666914025942486, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.17089843501647314, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17089843501647314, "train_probe_signal/advantage_abs_mean": 0.24869356552759805, "train_probe_signal/advantage_pre_scale_abs_mean": 0.24869356552759805, "train_probe_signal/advantage_pre_scale_std": 0.32127704719702405, "train_probe_signal/advantage_std": 0.32127704719702405, "train_probe_signal/brier_reward/centered_abs_mean": 0.16127172112464905, "train_probe_signal/brier_reward/group_std_mean": 0.22831493616104126, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08063586056232452, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.08063586056232452, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.015136718439559141, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.044194173688689865, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.750000019868215, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.5136717479435902e-07, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.5136717479435902e-07, "train_probe_signal/format_reward/centered_abs_mean": 0.02983940951526165, "train_probe_signal/format_reward/group_std_mean": 0.07643071251610915, "train_probe_signal/format_reward/group_zero_std_frac": 0.6111111243565878, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014919704757630825, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.014919704757630825, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.1732530097166697, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.21967636048793793, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7325300518677977e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7325300518677977e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.07333166444541113, "calibration/batch_distribution_entropy": 0.4691600399329013, "calibration/batch_entropy_100bins": 0.259992130435832, "calibration/batch_entropy_10bins": 0.4691600399329013, "calibration/batch_entropy_50bins": 0.30605852931163485, "calibration/batch_uniqueness": 0.02508672457747697, "calibration/confidence_entropy": 0.41341911692850275, "calibration/coverage@0%": 0.1371778412351656, "calibration/coverage@1%": 0.29612520965621825, "calibration/coverage@10%": 0.6635685193374894, "calibration/coverage@15%": 0.8417596776609555, "calibration/coverage@20%": 0.913203132801689, "calibration/coverage@25%": 0.943324250681199, "calibration/coverage@30%": 0.9662125340599456, "calibration/coverage@5%": 0.613810694638307, "calibration/distribution_entropy_10": 0.4691600399329013, "calibration/distribution_entropy_100": 0.259992130435832, "calibration/ece": 0.1054306746835024, "calibration/mean_confidence": 0.7830915209381537, "calibration/unique_confidence_per_question": 0.02291666666666667, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008072916666666674, "completions/max_length": 3900.2, "completions/max_terminated_length": 3900.2, "completions/mean_length": 1190.8841064453125, "completions/mean_terminated_length": 1200.72021484375, "completions/min_length": 0.0, "completions/min_terminated_length": 359.0, "epoch": 2.295673076923077, "grad_norm": 0.00038597677485086024, "learning_rate": 5.108173076923077e-07, "loss": -0.0088, "num_tokens": 2655893167.0, "reward": 1.3142507553100586, "reward_std": 0.12009938955307006, "rewards/accuracy_reward": 0.7626736164093018, "rewards/brier_reward": 0.8738854169845581, "rewards/confidence_one_or_zero": 0.006510416604578495, "rewards/format_reward": 0.9919270873069763, "rewards/mean_confidence_reward": 0.7650685906410217, "sampling/batch_mean_priority_error": 0.014256944444444437, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.0611111111111111, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0033722519874572754, "sampling/priority_kl": 0.03000014051795006, "sampling/priority_scale": 0.9171127259498462, "sampling/prob_entropy": 10.278940582275391, "sampling/prob_max": 6.968646630411968e-05, "sampling/prob_min": 2.0528324967017397e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2871999740600586, "sampling/prompt_draws_total": 68616.0, "sampling/seen_fraction": 0.9656333327293396, "sampling/unseen_fraction": 0.0343666672706604, "signal/accuracy_reward/centered_abs_mean": 0.1023220494389534, "signal/accuracy_reward/group_std_mean": 0.14170491993427276, "signal/accuracy_reward/group_zero_std_frac": 0.5666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511610247194767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0511610247194767, "signal/advantage_abs_mean": 0.0812824696302414, "signal/advantage_pre_scale_abs_mean": 0.0812824696302414, "signal/advantage_pre_scale_std": 0.17862094044685364, "signal/advantage_std": 0.17862094044685364, "signal/brier_reward/centered_abs_mean": 0.06971872299909591, "signal/brier_reward/group_std_mean": 0.09945357143878937, "signal/brier_reward/group_zero_std_frac": 0.2611111104488373, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03485936149954796, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03485936149954796, "signal/confidence_one_or_zero/centered_abs_mean": 0.010997178964316845, "signal/confidence_one_or_zero/group_std_mean": 0.01997890342026949, "signal/confidence_one_or_zero/group_zero_std_frac": 0.919444453716278, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0997178350180547e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0997178350180547e-07, "signal/format_reward/centered_abs_mean": 0.014317491091787815, "signal/format_reward/group_std_mean": 0.03275507166981697, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0071587455458939075, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0071587455458939075, "signal/mean_confidence_reward/centered_abs_mean": 0.05922640487551689, "signal/mean_confidence_reward/group_std_mean": 0.08267918974161148, "signal/mean_confidence_reward/group_zero_std_frac": 0.2805555611848831, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.922640070821217e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.922640070821217e-07, "step": 955 }, { "calibration/aurc": 0.15454163559109715, "calibration/batch_distribution_entropy": 0.5259110373168256, "calibration/batch_entropy_100bins": 0.2926179457862498, "calibration/batch_entropy_10bins": 0.5259110373168256, "calibration/batch_entropy_50bins": 0.34446511126087703, "calibration/batch_uniqueness": 0.18261172039536924, "calibration/confidence_entropy": 0.4419454509507362, "calibration/coverage@0%": 0.0031620826368647173, "calibration/coverage@1%": 0.0031620826368647173, "calibration/coverage@10%": 0.1914038723973695, "calibration/coverage@15%": 0.6911513946873098, "calibration/coverage@20%": 0.7164730321142104, "calibration/coverage@25%": 0.7899632365839073, "calibration/coverage@30%": 0.9133823932449123, "calibration/coverage@5%": 0.08895155632107524, "calibration/distribution_entropy_10": 0.5259110373168256, "calibration/distribution_entropy_100": 0.2926179457862498, "calibration/ece": 0.07072003946950807, "calibration/mean_confidence": 0.7394015883287092, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009635416666666674, "completions/max_length": 4009.6, "completions/max_terminated_length": 4009.6, "completions/mean_length": 1197.4577392578126, "completions/mean_terminated_length": 1209.1400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 299.4, "epoch": 2.3076923076923075, "grad_norm": 0.00035911460872739553, "learning_rate": 4.807692307692308e-07, "loss": -0.0132, "num_tokens": 2672770984.0, "reward": 1.2979086875915526, "reward_std": 0.115200474858284, "rewards/accuracy_reward": 0.7491319417953491, "rewards/brier_reward": 0.8563055396080017, "rewards/confidence_one_or_zero": 0.00486111119389534, "rewards/format_reward": 0.9903645753860474, "rewards/mean_confidence_reward": 0.7599635362625122, "sampling/batch_mean_priority_error": 0.02080034722222221, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.06666666666666667, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.003393551055341959, "sampling/priority_kl": 0.02999952659010887, "sampling/priority_scale": 0.9202498614555225, "sampling/prob_entropy": 10.278947830200195, "sampling/prob_max": 7.002019556239248e-05, "sampling/prob_min": 2.0540579134831206e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.2991999626159667, "sampling/prompt_draws_total": 68976.0, "sampling/seen_fraction": 0.9663866639137269, "sampling/unseen_fraction": 0.03361333608627319, "signal/accuracy_reward/centered_abs_mean": 0.10064019113779069, "signal/accuracy_reward/group_std_mean": 0.1408391058444977, "signal/accuracy_reward/group_zero_std_frac": 0.5666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05032009556889534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05032009556889534, "signal/advantage_abs_mean": 0.07774469703435898, "signal/advantage_pre_scale_abs_mean": 0.07774469703435898, "signal/advantage_pre_scale_std": 0.1746388465166092, "signal/advantage_std": 0.1746388465166092, "signal/brier_reward/centered_abs_mean": 0.0663982793688774, "signal/brier_reward/group_std_mean": 0.09372083991765975, "signal/brier_reward/group_zero_std_frac": 0.30277777910232545, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0331991396844387, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0331991396844387, "signal/confidence_one_or_zero/centered_abs_mean": 0.008007812686264515, "signal/confidence_one_or_zero/group_std_mean": 0.013047228381037713, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9527777910232544, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.007812297705641e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.007812297705641e-08, "signal/format_reward/centered_abs_mean": 0.01668294258415699, "signal/format_reward/group_std_mean": 0.03328734934329987, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008341471292078495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008341471292078495, "signal/mean_confidence_reward/centered_abs_mean": 0.054650018364191054, "signal/mean_confidence_reward/group_std_mean": 0.07545189410448075, "signal/mean_confidence_reward/group_zero_std_frac": 0.31944444179534914, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.465001663651492e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.465001663651492e-07, "step": 960 }, { "calibration/aurc": 0.09435869514788404, "calibration/batch_distribution_entropy": 0.5417798515703095, "calibration/batch_entropy_100bins": 0.2991118932828889, "calibration/batch_entropy_10bins": 0.5417798515703095, "calibration/batch_entropy_50bins": 0.3521096811827305, "calibration/batch_uniqueness": 0.195715446392127, "calibration/confidence_entropy": 0.4327788700734908, "calibration/coverage@0%": 0.00842801821291399, "calibration/coverage@1%": 0.07491878338441794, "calibration/coverage@10%": 0.510230192019455, "calibration/coverage@15%": 0.8736405967921123, "calibration/coverage@20%": 0.9055978336342175, "calibration/coverage@25%": 0.9472295514511874, "calibration/coverage@30%": 0.9804749340369394, "calibration/coverage@5%": 0.42329687799096083, "calibration/distribution_entropy_10": 0.5417798515703095, "calibration/distribution_entropy_100": 0.2991118932828889, "calibration/ece": 0.08588226087876341, "calibration/mean_confidence": 0.7723784778485797, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888884, "completions/max_length": 3968.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 1192.00224609375, "completions/mean_terminated_length": 1203.069677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 362.2, "epoch": 2.3197115384615383, "grad_norm": 0.0003501853789202869, "learning_rate": 4.507211538461539e-07, "loss": -0.0082, "num_tokens": 2689622018.0, "reward": 1.3000178813934327, "reward_std": 0.12227881848812103, "rewards/accuracy_reward": 0.7453125, "rewards/brier_reward": 0.8639963388442993, "rewards/confidence_one_or_zero": 0.0023437499767169356, "rewards/format_reward": 0.990711796283722, "rewards/mean_confidence_reward": 0.753797733783722, "sampling/batch_mean_priority_error": 0.02541145833333331, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.058333333333333334, "sampling/error_ema_max": 0.20756810903549194, "sampling/error_ema_mean": 0.0034138909075409176, "sampling/priority_kl": 0.029999924823641777, "sampling/priority_scale": 0.9233753741020336, "sampling/prob_entropy": 10.278951263427734, "sampling/prob_max": 7.034816226223484e-05, "sampling/prob_min": 2.0551019042613915e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.311199998855591, "sampling/prompt_draws_total": 69336.0, "sampling/seen_fraction": 0.9671800017356873, "sampling/unseen_fraction": 0.03281999826431274, "signal/accuracy_reward/centered_abs_mean": 0.10756293386220932, "signal/accuracy_reward/group_std_mean": 0.1473521500825882, "signal/accuracy_reward/group_zero_std_frac": 0.5611111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05378146693110466, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05378146693110466, "signal/advantage_abs_mean": 0.08300841152668, "signal/advantage_pre_scale_abs_mean": 0.08300841152668, "signal/advantage_pre_scale_std": 0.1792859762907028, "signal/advantage_std": 0.1792859762907028, "signal/brier_reward/centered_abs_mean": 0.06932666748762131, "signal/brier_reward/group_std_mean": 0.09935078769922256, "signal/brier_reward/group_zero_std_frac": 0.2972222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034663333743810656, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034663333743810656, "signal/confidence_one_or_zero/centered_abs_mean": 0.003640407882630825, "signal/confidence_one_or_zero/group_std_mean": 0.0070123381447046995, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9694444417953492, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 3.6404079040153195e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 3.6404079040153195e-08, "signal/format_reward/centered_abs_mean": 0.015728081949055193, "signal/format_reward/group_std_mean": 0.0342374749481678, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007864040974527597, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007864040974527597, "signal/mean_confidence_reward/centered_abs_mean": 0.058252228796482085, "signal/mean_confidence_reward/group_std_mean": 0.08077710419893265, "signal/mean_confidence_reward/group_zero_std_frac": 0.3111111164093018, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.825222729072265e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.825222729072265e-07, "step": 965 }, { "calibration/aurc": 0.1283695774898897, "calibration/batch_distribution_entropy": 0.4090666755266697, "calibration/batch_entropy_100bins": 0.2291534308213528, "calibration/batch_entropy_10bins": 0.4090666755266697, "calibration/batch_entropy_50bins": 0.26975571109144936, "calibration/batch_uniqueness": -0.11918714253546994, "calibration/confidence_entropy": 0.3987882773655907, "calibration/coverage@0%": 0.006338832649334719, "calibration/coverage@1%": 0.006338832649334719, "calibration/coverage@10%": 0.33537109071385085, "calibration/coverage@15%": 0.6480689601414199, "calibration/coverage@20%": 0.8397426788405131, "calibration/coverage@25%": 0.9216353890922493, "calibration/coverage@30%": 0.9573457688029435, "calibration/coverage@5%": 0.31146316060632395, "calibration/distribution_entropy_10": 0.4090666755266697, "calibration/distribution_entropy_100": 0.2291534308213528, "calibration/ece": 0.10992615151961782, "calibration/mean_confidence": 0.7916775578849042, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011371527777777769, "completions/max_length": 3996.6, "completions/max_terminated_length": 3996.6, "completions/mean_length": 1157.7234619140625, "completions/mean_terminated_length": 1171.0572021484375, "completions/min_length": 0.0, "completions/min_terminated_length": 340.0, "epoch": 2.331730769230769, "grad_norm": 0.0003276313655078411, "learning_rate": 4.20673076923077e-07, "loss": -0.0134, "num_tokens": 2706052112.0, "reward": 1.2972458362579347, "reward_std": 0.1148213267326355, "rewards/accuracy_reward": 0.7394965291023254, "rewards/brier_reward": 0.8667853713035584, "rewards/confidence_one_or_zero": 0.007812499930150807, "rewards/format_reward": 0.9881944417953491, "rewards/mean_confidence_reward": 0.7607065796852112, "sampling/batch_mean_priority_error": 0.01752951388888887, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.075, "sampling/error_ema_max": 0.214666748046875, "sampling/error_ema_mean": 0.0034340286627411844, "sampling/priority_kl": 0.029998504742980003, "sampling/priority_scale": 0.9262121139792725, "sampling/prob_entropy": 10.278935623168945, "sampling/prob_max": 7.06596183590591e-05, "sampling/prob_min": 2.0564094666042364e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.323199987411499, "sampling/prompt_draws_total": 69696.0, "sampling/seen_fraction": 0.9679066658020019, "sampling/unseen_fraction": 0.03209333419799805, "signal/accuracy_reward/centered_abs_mean": 0.09217122495174408, "signal/accuracy_reward/group_std_mean": 0.13487593978643417, "signal/accuracy_reward/group_zero_std_frac": 0.5666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04608561247587204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04608561247587204, "signal/advantage_abs_mean": 0.0762186512351036, "signal/advantage_pre_scale_abs_mean": 0.0762186512351036, "signal/advantage_pre_scale_std": 0.17515968084335326, "signal/advantage_std": 0.17515968084335326, "signal/brier_reward/centered_abs_mean": 0.06812559142708778, "signal/brier_reward/group_std_mean": 0.0956755131483078, "signal/brier_reward/group_zero_std_frac": 0.325, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03406279571354389, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03406279571354389, "signal/confidence_one_or_zero/centered_abs_mean": 0.011111111100763082, "signal/confidence_one_or_zero/group_std_mean": 0.016250798664987087, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9472222328186035, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.1111110467254549e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.1111110467254549e-07, "signal/format_reward/centered_abs_mean": 0.019780815951526164, "signal/format_reward/group_std_mean": 0.03664836846292019, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009890407975763082, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009890407975763082, "signal/mean_confidence_reward/centered_abs_mean": 0.05831093341112137, "signal/mean_confidence_reward/group_std_mean": 0.07978967428207398, "signal/mean_confidence_reward/group_zero_std_frac": 0.3305555582046509, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.831093289998535e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.831093289998535e-07, "step": 970 }, { "calibration/aurc": 0.07742137009449009, "calibration/batch_distribution_entropy": 0.4473659611295971, "calibration/batch_entropy_100bins": 0.2406289326352915, "calibration/batch_entropy_10bins": 0.4473659611295971, "calibration/batch_entropy_50bins": 0.2832644861547543, "calibration/batch_uniqueness": -0.030094964409126422, "calibration/confidence_entropy": 0.40054666523372673, "calibration/coverage@0%": 0.004720330152492104, "calibration/coverage@1%": 0.19002216742283334, "calibration/coverage@10%": 0.564974172832612, "calibration/coverage@15%": 0.8678512720113531, "calibration/coverage@20%": 0.9352835711953708, "calibration/coverage@25%": 0.968969470921398, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5474675415328771, "calibration/distribution_entropy_10": 0.4473659611295971, "calibration/distribution_entropy_100": 0.2406289326352915, "calibration/ece": 0.09206130155147745, "calibration/mean_confidence": 0.8047266900851249, "calibration/unique_confidence_per_question": 0.021354166666666667, "calibration/unique_confidences": 8.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00980902777777779, "completions/max_length": 4012.4, "completions/max_terminated_length": 4012.4, "completions/mean_length": 1177.9795166015624, "completions/mean_terminated_length": 1189.7114501953124, "completions/min_length": 0.0, "completions/min_terminated_length": 375.6, "epoch": 2.34375, "grad_norm": 0.0003336299560032785, "learning_rate": 3.90625e-07, "loss": -0.011, "num_tokens": 2722713668.0, "reward": 1.3038686752319335, "reward_std": 0.12683693766593934, "rewards/accuracy_reward": 0.7487847328186035, "rewards/brier_reward": 0.8687464833259583, "rewards/confidence_one_or_zero": 0.006163194449618459, "rewards/format_reward": 0.9901909708976746, "rewards/mean_confidence_reward": 0.7557253837585449, "sampling/batch_mean_priority_error": 0.017069444444444432, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.041666666666666664, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0034511482808738947, "sampling/priority_kl": 0.02999942600727081, "sampling/priority_scale": 0.9295170843834057, "sampling/prob_entropy": 10.27896728515625, "sampling/prob_max": 7.100841903593392e-05, "sampling/prob_min": 2.057480924122501e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.335199975967407, "sampling/prompt_draws_total": 70056.0, "sampling/seen_fraction": 0.9686466693878174, "sampling/unseen_fraction": 0.031353330612182616, "signal/accuracy_reward/centered_abs_mean": 0.10644531100988389, "signal/accuracy_reward/group_std_mean": 0.1484037458896637, "signal/accuracy_reward/group_zero_std_frac": 0.5416666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05322265550494194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05322265550494194, "signal/advantage_abs_mean": 0.08296125084161758, "signal/advantage_pre_scale_abs_mean": 0.08296125084161758, "signal/advantage_pre_scale_std": 0.18262528777122497, "signal/advantage_std": 0.18262528777122497, "signal/brier_reward/centered_abs_mean": 0.06895918846130371, "signal/brier_reward/group_std_mean": 0.10257805585861206, "signal/brier_reward/group_zero_std_frac": 0.27500000298023225, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.034479594230651854, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.034479594230651854, "signal/confidence_one_or_zero/centered_abs_mean": 0.00921766497194767, "signal/confidence_one_or_zero/group_std_mean": 0.013176932744681836, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9583333373069763, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 9.217664711513863e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 9.217664711513863e-08, "signal/format_reward/centered_abs_mean": 0.01813693568110466, "signal/format_reward/group_std_mean": 0.041134567186236384, "signal/format_reward/group_zero_std_frac": 0.8055555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00906846784055233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00906846784055233, "signal/mean_confidence_reward/centered_abs_mean": 0.05696495622396469, "signal/mean_confidence_reward/group_std_mean": 0.08042566478252411, "signal/mean_confidence_reward/group_zero_std_frac": 0.3000000059604645, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.69649512272008e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.69649512272008e-07, "step": 975 }, { "calibration/aurc": 0.114188890279602, "calibration/batch_distribution_entropy": 0.509483514083351, "calibration/batch_entropy_100bins": 0.27925843126992217, "calibration/batch_entropy_10bins": 0.509483514083351, "calibration/batch_entropy_50bins": 0.3287385069273901, "calibration/batch_uniqueness": 0.10562402381743607, "calibration/confidence_entropy": 0.4224627921340617, "calibration/coverage@0%": 0.004688859878154917, "calibration/coverage@1%": 0.004688859878154917, "calibration/coverage@10%": 0.48668718904833363, "calibration/coverage@15%": 0.8230040970112507, "calibration/coverage@20%": 0.8691468565882465, "calibration/coverage@25%": 0.9188481675392669, "calibration/coverage@30%": 0.9785340314136125, "calibration/coverage@5%": 0.13314838990426459, "calibration/distribution_entropy_10": 0.509483514083351, "calibration/distribution_entropy_100": 0.27925843126992217, "calibration/ece": 0.08088774396367386, "calibration/mean_confidence": 0.7717182568799339, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009288194444444443, "completions/max_length": 3904.8, "completions/max_terminated_length": 3904.8, "completions/mean_length": 1206.857666015625, "completions/mean_terminated_length": 1218.132958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 376.0, "epoch": 2.355769230769231, "grad_norm": 0.0003652828745543957, "learning_rate": 3.6057692307692306e-07, "loss": -0.0112, "num_tokens": 2739742364.0, "reward": 1.2970961809158326, "reward_std": 0.12603443413972854, "rewards/accuracy_reward": 0.7352430582046509, "rewards/brier_reward": 0.868222451210022, "rewards/confidence_one_or_zero": 0.006076388992369175, "rewards/format_reward": 0.9907118082046509, "rewards/mean_confidence_reward": 0.7478255152702331, "sampling/batch_mean_priority_error": 0.012765624999999978, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.0638888888888889, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0034659527707844974, "sampling/priority_kl": 0.03000069931149483, "sampling/priority_scale": 0.9330740154488012, "sampling/prob_entropy": 10.278945541381836, "sampling/prob_max": 7.137841748772189e-05, "sampling/prob_min": 2.058420177490916e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.3472000122070313, "sampling/prompt_draws_total": 70416.0, "sampling/seen_fraction": 0.9693666696548462, "sampling/unseen_fraction": 0.03063333034515381, "signal/accuracy_reward/centered_abs_mean": 0.10789930522441864, "signal/accuracy_reward/group_std_mean": 0.15151995718479155, "signal/accuracy_reward/group_zero_std_frac": 0.5305555582046508, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05394965261220932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05394965261220932, "signal/advantage_abs_mean": 0.08410682827234268, "signal/advantage_pre_scale_abs_mean": 0.08410682827234268, "signal/advantage_pre_scale_std": 0.18120644092559815, "signal/advantage_std": 0.18120644092559815, "signal/brier_reward/centered_abs_mean": 0.07255458235740661, "signal/brier_reward/group_std_mean": 0.10437788367271424, "signal/brier_reward/group_zero_std_frac": 0.2472222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.036277291178703305, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.036277291178703305, "signal/confidence_one_or_zero/centered_abs_mean": 0.008268229104578495, "signal/confidence_one_or_zero/group_std_mean": 0.01167407687753439, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9638888716697693, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 8.268229265695481e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 8.268229265695481e-08, "signal/format_reward/centered_abs_mean": 0.01691080741584301, "signal/format_reward/group_std_mean": 0.03558777719736099, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008455403707921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008455403707921505, "signal/mean_confidence_reward/centered_abs_mean": 0.06018635854125023, "signal/mean_confidence_reward/group_std_mean": 0.0855872243642807, "signal/mean_confidence_reward/group_zero_std_frac": 0.2555555522441864, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.018635644977622e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.018635644977622e-07, "step": 980 }, { "calibration/aurc": 0.09821316486708462, "calibration/batch_distribution_entropy": 0.4763177103621107, "calibration/batch_entropy_100bins": 0.2620128684370206, "calibration/batch_entropy_10bins": 0.4763177103621107, "calibration/batch_entropy_50bins": 0.30843730939136715, "calibration/batch_uniqueness": 0.03894753407550304, "calibration/confidence_entropy": 0.41985677353875783, "calibration/coverage@0%": 0.0010443935427574171, "calibration/coverage@1%": 0.3315063263525305, "calibration/coverage@10%": 0.5141361256544503, "calibration/coverage@15%": 0.6827754260888939, "calibration/coverage@20%": 0.8849085343704444, "calibration/coverage@25%": 0.9062789084181313, "calibration/coverage@30%": 0.9482308048103608, "calibration/coverage@5%": 0.3743946335078534, "calibration/distribution_entropy_10": 0.4763177103621107, "calibration/distribution_entropy_100": 0.2620128684370206, "calibration/ece": 0.10219283998519561, "calibration/mean_confidence": 0.768914136498992, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010850694444444442, "completions/max_length": 4002.2, "completions/max_terminated_length": 4002.2, "completions/mean_length": 1192.258154296875, "completions/mean_terminated_length": 1205.37119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 324.6, "epoch": 2.3677884615384617, "grad_norm": 0.00036673585418611765, "learning_rate": 3.305288461538462e-07, "loss": -0.0131, "num_tokens": 2756580154.0, "reward": 1.3056840419769287, "reward_std": 0.12273909598588943, "rewards/accuracy_reward": 0.7542534708976746, "rewards/brier_reward": 0.8679502487182618, "rewards/confidence_one_or_zero": 0.0036458333488553762, "rewards/format_reward": 0.9891493082046509, "rewards/mean_confidence_reward": 0.7502586841583252, "sampling/batch_mean_priority_error": 0.015890624999999985, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.07222222222222223, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003479621047154069, "sampling/priority_kl": 0.03000035434961319, "sampling/priority_scale": 0.9369861424202099, "sampling/prob_entropy": 10.278960037231446, "sampling/prob_max": 7.176910003181547e-05, "sampling/prob_min": 2.0589240375556982e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.3592000007629395, "sampling/prompt_draws_total": 70776.0, "sampling/seen_fraction": 0.9701933264732361, "sampling/unseen_fraction": 0.029806673526763916, "signal/accuracy_reward/centered_abs_mean": 0.11163736879825592, "signal/accuracy_reward/group_std_mean": 0.15212086737155914, "signal/accuracy_reward/group_zero_std_frac": 0.5444444596767426, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05581868439912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05581868439912796, "signal/advantage_abs_mean": 0.08209312856197357, "signal/advantage_pre_scale_abs_mean": 0.08209312856197357, "signal/advantage_pre_scale_std": 0.1826746553182602, "signal/advantage_std": 0.1826746553182602, "signal/brier_reward/centered_abs_mean": 0.06789509430527688, "signal/brier_reward/group_std_mean": 0.09828180372714997, "signal/brier_reward/group_zero_std_frac": 0.2916666746139526, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03394754715263844, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03394754715263844, "signal/confidence_one_or_zero/centered_abs_mean": 0.005815972248092294, "signal/confidence_one_or_zero/group_std_mean": 0.00891453055664897, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9694444417953492, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.815972059508567e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.815972059508567e-08, "signal/format_reward/centered_abs_mean": 0.01933051198720932, "signal/format_reward/group_std_mean": 0.04063206985592842, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00966525599360466, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00966525599360466, "signal/mean_confidence_reward/centered_abs_mean": 0.05723828449845314, "signal/mean_confidence_reward/group_std_mean": 0.08099256753921509, "signal/mean_confidence_reward/group_zero_std_frac": 0.31666667461395265, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.723828166992462e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.723828166992462e-07, "step": 985 }, { "calibration/aurc": 0.12899346784143634, "calibration/batch_distribution_entropy": 0.6010847128653459, "calibration/batch_entropy_100bins": 0.335635511985207, "calibration/batch_entropy_10bins": 0.6010847128653459, "calibration/batch_entropy_50bins": 0.39510469417189975, "calibration/batch_uniqueness": 0.32869682683431645, "calibration/confidence_entropy": 0.4531429864179647, "calibration/coverage@0%": 0.0027100271002710027, "calibration/coverage@1%": 0.1635455362386522, "calibration/coverage@10%": 0.5229942110518833, "calibration/coverage@15%": 0.6890601465083612, "calibration/coverage@20%": 0.7557347769854298, "calibration/coverage@25%": 0.8328105328297377, "calibration/coverage@30%": 0.8637517871241706, "calibration/coverage@5%": 0.3896951780608053, "calibration/distribution_entropy_10": 0.6010847128653459, "calibration/distribution_entropy_100": 0.335635511985207, "calibration/ece": 0.1153655501319042, "calibration/mean_confidence": 0.7248026823679614, "calibration/unique_confidence_per_question": 0.023437499999999997, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013368055555555536, "completions/max_length": 4035.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 1169.717041015625, "completions/mean_terminated_length": 1185.7587158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 349.0, "epoch": 2.3798076923076925, "grad_norm": 0.00033228841493837535, "learning_rate": 3.0048076923076924e-07, "loss": -0.0134, "num_tokens": 2773171422.0, "reward": 1.3092052459716796, "reward_std": 0.11055757701396943, "rewards/accuracy_reward": 0.7555555701255798, "rewards/brier_reward": 0.8762081742286683, "rewards/confidence_one_or_zero": 0.002951388928340748, "rewards/format_reward": 0.9866319298744202, "rewards/mean_confidence_reward": 0.7412042856216431, "sampling/batch_mean_priority_error": 0.014855972222222204, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.06666666666666665, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003495658095926046, "sampling/priority_kl": 0.029999981075525282, "sampling/priority_scale": 0.9406435906654224, "sampling/prob_entropy": 10.27895278930664, "sampling/prob_max": 7.214548531919718e-05, "sampling/prob_min": 2.0596579270204528e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.3711999893188476, "sampling/prompt_draws_total": 71136.0, "sampling/seen_fraction": 0.9709399938583374, "sampling/unseen_fraction": 0.029060006141662598, "signal/accuracy_reward/centered_abs_mean": 0.08971354067325592, "signal/accuracy_reward/group_std_mean": 0.131047847867012, "signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04485677033662796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04485677033662796, "signal/advantage_abs_mean": 0.07056974172592163, "signal/advantage_pre_scale_abs_mean": 0.07056974172592163, "signal/advantage_pre_scale_std": 0.16588001251220702, "signal/advantage_std": 0.16588001251220702, "signal/brier_reward/centered_abs_mean": 0.0633417621254921, "signal/brier_reward/group_std_mean": 0.0932878315448761, "signal/brier_reward/group_zero_std_frac": 0.29722222983837127, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03167088106274605, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03167088106274605, "signal/confidence_one_or_zero/centered_abs_mean": 0.00477430549217388, "signal/confidence_one_or_zero/group_std_mean": 0.007662679813802242, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222089767456, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.774305395471856e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.774305395471856e-08, "signal/format_reward/centered_abs_mean": 0.018674045242369174, "signal/format_reward/group_std_mean": 0.03792152032256126, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009337022621184587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009337022621184587, "signal/mean_confidence_reward/centered_abs_mean": 0.05140403136610985, "signal/mean_confidence_reward/group_std_mean": 0.07264350578188897, "signal/mean_confidence_reward/group_zero_std_frac": 0.3305555611848831, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.140403288805828e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.140403288805828e-07, "step": 990 }, { "calibration/aurc": 0.12118961159305712, "calibration/batch_distribution_entropy": 0.5614932309347664, "calibration/batch_entropy_100bins": 0.312126605886408, "calibration/batch_entropy_10bins": 0.5614932309347664, "calibration/batch_entropy_50bins": 0.3674303902833075, "calibration/batch_uniqueness": 0.24348487145883607, "calibration/confidence_entropy": 0.45600217907986407, "calibration/coverage@0%": 0.11927083333333333, "calibration/coverage@1%": 0.11927083333333333, "calibration/coverage@10%": 0.6682374557913351, "calibration/coverage@15%": 0.7743050950486294, "calibration/coverage@20%": 0.8369791666666668, "calibration/coverage@25%": 0.865625, "calibration/coverage@30%": 0.8802083333333334, "calibration/coverage@5%": 0.396875, "calibration/distribution_entropy_10": 0.5614932309347664, "calibration/distribution_entropy_100": 0.312126605886408, "calibration/ece": 0.08444297082228104, "calibration/mean_confidence": 0.7185261936339524, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007725694444444442, "completions/max_length": 3925.6, "completions/max_terminated_length": 3925.6, "completions/mean_length": 1177.221826171875, "completions/mean_terminated_length": 1186.37841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 351.0, "epoch": 2.391826923076923, "grad_norm": 0.0003700850938912481, "learning_rate": 2.7043269230769233e-07, "loss": -0.0092, "num_tokens": 2789834041.0, "reward": 1.311913752555847, "reward_std": 0.11160127222537994, "rewards/accuracy_reward": 0.7585069417953492, "rewards/brier_reward": 0.8730313658714295, "rewards/confidence_one_or_zero": 0.0027777777751907705, "rewards/format_reward": 0.9922743082046509, "rewards/mean_confidence_reward": 0.7425931811332702, "sampling/batch_mean_priority_error": 0.014538194444444439, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.0638888888888889, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0035095302388072013, "sampling/priority_kl": 0.03000053986907005, "sampling/priority_scale": 0.945136696123518, "sampling/prob_entropy": 10.278954315185548, "sampling/prob_max": 7.25828402210027e-05, "sampling/prob_min": 2.0597735419869424e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.3832000255584718, "sampling/prompt_draws_total": 71496.0, "sampling/seen_fraction": 0.971780002117157, "sampling/unseen_fraction": 0.028219997882843018, "signal/accuracy_reward/centered_abs_mean": 0.09546440988779067, "signal/accuracy_reward/group_std_mean": 0.13176625818014145, "signal/accuracy_reward/group_zero_std_frac": 0.6083333373069764, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04773220494389534, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04773220494389534, "signal/advantage_abs_mean": 0.07565358579158783, "signal/advantage_pre_scale_abs_mean": 0.07565358579158783, "signal/advantage_pre_scale_std": 0.17233096063137054, "signal/advantage_std": 0.17233096063137054, "signal/brier_reward/centered_abs_mean": 0.062432841211557386, "signal/brier_reward/group_std_mean": 0.08957359939813614, "signal/brier_reward/group_zero_std_frac": 0.2944444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031216420605778693, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031216420605778693, "signal/confidence_one_or_zero/centered_abs_mean": 0.004915364505723119, "signal/confidence_one_or_zero/group_std_mean": 0.008829268533736468, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9666666507720947, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.915364328894611e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.915364328894611e-08, "signal/format_reward/centered_abs_mean": 0.014274088852107524, "signal/format_reward/group_std_mean": 0.03213131241500378, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007137044426053762, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007137044426053762, "signal/mean_confidence_reward/centered_abs_mean": 0.05760388597846031, "signal/mean_confidence_reward/group_std_mean": 0.07929947078227997, "signal/mean_confidence_reward/group_zero_std_frac": 0.3083333343267441, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.760388432918262e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.760388432918262e-07, "step": 995 }, { "calibration/aurc": 0.1082374968660619, "calibration/batch_distribution_entropy": 0.5258876466513132, "calibration/batch_entropy_100bins": 0.2916314963423535, "calibration/batch_entropy_10bins": 0.5258876466513132, "calibration/batch_entropy_50bins": 0.3433038789361407, "calibration/batch_uniqueness": 0.1551850622828222, "calibration/confidence_entropy": 0.4261764350444436, "calibration/coverage@0%": 0.1582286656589254, "calibration/coverage@1%": 0.18799367871375566, "calibration/coverage@10%": 0.6456064650845373, "calibration/coverage@15%": 0.7005630981791973, "calibration/coverage@20%": 0.7442322830248527, "calibration/coverage@25%": 0.90729044834308, "calibration/coverage@30%": 0.9736842105263157, "calibration/coverage@5%": 0.29036588798198704, "calibration/distribution_entropy_10": 0.5258876466513132, "calibration/distribution_entropy_100": 0.2916314963423535, "calibration/ece": 0.10422118197110612, "calibration/mean_confidence": 0.7664825552288578, "calibration/unique_confidence_per_question": 0.022916666666666665, "calibration/unique_confidences": 8.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01206597222222221, "completions/max_length": 4046.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 1195.227685546875, "completions/mean_terminated_length": 1209.8916748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 318.4, "epoch": 2.4038461538461537, "grad_norm": 0.0003808954788837582, "learning_rate": 2.403846153846154e-07, "loss": -0.0137, "num_tokens": 2806684056.0, "reward": 1.3327399015426635, "reward_std": 0.1265719160437584, "rewards/accuracy_reward": 0.7993055582046509, "rewards/brier_reward": 0.8782251477241516, "rewards/confidence_one_or_zero": 0.007378472317941487, "rewards/format_reward": 0.9879340291023254, "rewards/mean_confidence_reward": 0.7483344674110413, "sampling/batch_mean_priority_error": 0.015536458333333319, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.05277777777777778, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003524286998435855, "sampling/priority_kl": 0.029999404773116113, "sampling/priority_scale": 0.9481643735663965, "sampling/prob_entropy": 10.278943824768067, "sampling/prob_max": 7.291929505299777e-05, "sampling/prob_min": 2.060989645542577e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.39520001411438, "sampling/prompt_draws_total": 71856.0, "sampling/seen_fraction": 0.9724133372306824, "sampling/unseen_fraction": 0.02758666276931763, "signal/accuracy_reward/centered_abs_mean": 0.11416015625, "signal/accuracy_reward/group_std_mean": 0.154748073220253, "signal/accuracy_reward/group_zero_std_frac": 0.5444444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.057080078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.057080078125, "signal/advantage_abs_mean": 0.08525396138429642, "signal/advantage_pre_scale_abs_mean": 0.08525396138429642, "signal/advantage_pre_scale_std": 0.18693980872631072, "signal/advantage_std": 0.18693980872631072, "signal/brier_reward/centered_abs_mean": 0.07114665731787681, "signal/brier_reward/group_std_mean": 0.10072609037160873, "signal/brier_reward/group_zero_std_frac": 0.2611111134290695, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.035573328658938405, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.035573328658938405, "signal/confidence_one_or_zero/centered_abs_mean": 0.010801866371184587, "signal/confidence_one_or_zero/group_std_mean": 0.01483048414811492, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9555555582046509, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.0801865997223104e-07, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.0801865997223104e-07, "signal/format_reward/centered_abs_mean": 0.021066622994840144, "signal/format_reward/group_std_mean": 0.044086290150880815, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010533311497420072, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010533311497420072, "signal/mean_confidence_reward/centered_abs_mean": 0.060543371737003325, "signal/mean_confidence_reward/group_std_mean": 0.08616852164268493, "signal/mean_confidence_reward/group_zero_std_frac": 0.26388889253139497, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 6.054337063687854e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 6.054337063687854e-07, "step": 1000 }, { "epoch": 2.4038461538461537, "eval_calibration/aurc": 0.13409442568496843, "eval_calibration/batch_distribution_entropy": 0.568377548088194, "eval_calibration/batch_entropy_100bins": 0.31589399351948105, "eval_calibration/batch_entropy_10bins": 0.568377548088194, "eval_calibration/batch_entropy_50bins": 0.3718652980491398, "eval_calibration/batch_uniqueness": 0.2256740864367637, "eval_calibration/confidence_entropy": 0.43497557334362497, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.6070484581497797, "eval_calibration/coverage@15%": 0.7436123348017621, "eval_calibration/coverage@20%": 0.8440528634361234, "eval_calibration/coverage@25%": 0.9198237885462555, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/distribution_entropy_10": 0.568377548088194, "eval_calibration/distribution_entropy_100": 0.31589399351948105, "eval_calibration/ece": 0.030616740088105585, "eval_calibration/mean_confidence": 0.7574008810572688, "eval_calibration/unique_confidence_per_question": 0.009548611111111112, "eval_calibration/unique_confidences": 11, "eval_completions/clipped_ratio": 0.013020833333333334, "eval_completions/max_length": 3174.1666666666665, "eval_completions/max_terminated_length": 3174.1666666666665, "eval_completions/mean_length": 1153.855448404948, "eval_completions/mean_terminated_length": 1169.1691080729167, "eval_completions/min_length": 92.5, "eval_completions/min_terminated_length": 431.0, "eval_loss": 0.0, "eval_num_tokens": 2806684056.0, "eval_reward": 1.2744812965393066, "eval_reward_std": 0.33850590387980145, "eval_rewards/accuracy_reward": 0.7196180522441864, "eval_rewards/brier_reward": 0.8440863788127899, "eval_rewards/confidence_one_or_zero": 0.0034722223257025084, "eval_rewards/format_reward": 0.9852430522441864, "eval_rewards/mean_confidence_reward": 0.7462239265441895, "eval_runtime": 214.7505, "eval_samples_per_second": 4.657, "eval_signal/accuracy_reward/centered_abs_mean": 0.3906792551279068, "eval_signal/accuracy_reward/group_std_mean": 0.4469083696603775, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1953396275639534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.1953396275639534, "eval_signal/advantage_abs_mean": 0.27881846328576404, "eval_signal/advantage_pre_scale_abs_mean": 0.27881846328576404, "eval_signal/advantage_pre_scale_std": 0.3381160447994868, "eval_signal/advantage_std": 0.3381160447994868, "eval_signal/brier_reward/centered_abs_mean": 0.18172378093004227, "eval_signal/brier_reward/group_std_mean": 0.2443449025352796, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09086189046502113, "eval_signal/brier_reward/weight": 0.5, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.09086189046502113, "eval_signal/confidence_one_or_zero/centered_abs_mean": 0.006727430348594983, "eval_signal/confidence_one_or_zero/group_std_mean": 0.019641855110724766, "eval_signal/confidence_one_or_zero/group_zero_std_frac": 0.8888888955116272, "eval_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.72743022770798e-08, "eval_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "eval_signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.72743022770798e-08, "eval_signal/format_reward/centered_abs_mean": 0.028591579447189968, "eval_signal/format_reward/group_std_mean": 0.08347788328925769, "eval_signal/format_reward/group_zero_std_frac": 0.5277777860562006, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014295789723594984, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.014295789723594984, "eval_signal/mean_confidence_reward/centered_abs_mean": 0.18783635646104813, "eval_signal/mean_confidence_reward/group_std_mean": 0.23139711221059164, "eval_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "eval_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.8783634156231226e-06, "eval_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "eval_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.8783634156231226e-06, "eval_steps_per_second": 0.028, "step": 1000 }, { "epoch": 2.4038461538461537, "step": 1000, "train_probe_calibration/aurc": 0.08278325344477341, "train_probe_calibration/batch_distribution_entropy": 0.5657352345973774, "train_probe_calibration/batch_entropy_100bins": 0.3148932591301308, "train_probe_calibration/batch_entropy_10bins": 0.5657352345973774, "train_probe_calibration/batch_entropy_50bins": 0.37068724971774347, "train_probe_calibration/batch_uniqueness": 0.22923492591324754, "train_probe_calibration/confidence_entropy": 0.4316876402690614, "train_probe_calibration/coverage@0%": 0.006993006993006993, "train_probe_calibration/coverage@1%": 0.006993006993006993, "train_probe_calibration/coverage@10%": 0.756993006993007, "train_probe_calibration/coverage@15%": 0.8487762237762237, "train_probe_calibration/coverage@20%": 0.9755244755244755, "train_probe_calibration/coverage@25%": 1.0, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.006993006993006993, "train_probe_calibration/distribution_entropy_10": 0.5657352345973774, "train_probe_calibration/distribution_entropy_100": 0.3148932591301308, "train_probe_calibration/ece": 0.04641608391608378, "train_probe_calibration/mean_confidence": 0.7628496503496504, "train_probe_calibration/unique_confidence_per_question": 0.008680555555555556, "train_probe_calibration/unique_confidences": 10, "train_probe_completions/clipped_ratio": 0.010243055555555566, "train_probe_completions/max_length": 3426.6666666666665, "train_probe_completions/max_terminated_length": 3426.6666666666665, "train_probe_completions/mean_length": 1182.697245279948, "train_probe_completions/mean_terminated_length": 1195.1104532877605, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 391.3333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 2806684056.0, "train_probe_reward": 1.3287967244784038, "train_probe_reward_std": 0.2908843557039897, "train_probe_rewards/accuracy_reward": 0.7838541766007742, "train_probe_rewards/brier_reward": 0.880668431520462, "train_probe_rewards/confidence_one_or_zero": 0.006944444651405017, "train_probe_rewards/format_reward": 0.9930555621782938, "train_probe_rewards/mean_confidence_reward": 0.7575520773728689, "train_probe_runtime": 210.6731, "train_probe_samples_per_second": 4.747, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3299153695503871, "train_probe_signal/accuracy_reward/group_std_mean": 0.40984514355659485, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16495768477519354, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.16495768477519354, "train_probe_signal/advantage_abs_mean": 0.22570937126874924, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22570937126874924, "train_probe_signal/advantage_pre_scale_std": 0.2904963940382004, "train_probe_signal/advantage_std": 0.2904963940382004, "train_probe_signal/brier_reward/centered_abs_mean": 0.1419260154167811, "train_probe_signal/brier_reward/group_std_mean": 0.20473192383845648, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07096300770839055, "train_probe_signal/brier_reward/weight": 0.5, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.07096300770839055, "train_probe_signal/confidence_one_or_zero/centered_abs_mean": 0.013454860697189966, "train_probe_signal/confidence_one_or_zero/group_std_mean": 0.03928371022144953, "train_probe_signal/confidence_one_or_zero/group_zero_std_frac": 0.7777778108914694, "train_probe_signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 1.3454859981720801e-07, "train_probe_signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "train_probe_signal/confidence_one_or_zero/weighted_centered_abs_mean": 1.3454859981720801e-07, "train_probe_signal/format_reward/centered_abs_mean": 0.013454860697189966, "train_probe_signal/format_reward/group_std_mean": 0.03928371022144953, "train_probe_signal/format_reward/group_zero_std_frac": 0.7777778108914694, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430348594983, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006727430348594983, "train_probe_signal/mean_confidence_reward/centered_abs_mean": 0.17776689926783243, "train_probe_signal/mean_confidence_reward/group_std_mean": 0.21993445108334223, "train_probe_signal/mean_confidence_reward/group_zero_std_frac": 0.0, "train_probe_signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 1.7776690318290396e-06, "train_probe_signal/mean_confidence_reward/weight": 9.999999747378752e-06, "train_probe_signal/mean_confidence_reward/weighted_centered_abs_mean": 1.7776690318290396e-06, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.08983140347452864, "calibration/batch_distribution_entropy": 0.4862920985635162, "calibration/batch_entropy_100bins": 0.26166824612290984, "calibration/batch_entropy_10bins": 0.4862920985635162, "calibration/batch_entropy_50bins": 0.3080316255791384, "calibration/batch_uniqueness": 0.058742779627740674, "calibration/confidence_entropy": 0.4193815918272631, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.7181727908967728, "calibration/coverage@15%": 0.8119686864028626, "calibration/coverage@20%": 0.8841824854644381, "calibration/coverage@25%": 0.9353757635253054, "calibration/coverage@30%": 0.9875, "calibration/coverage@5%": 0.5207319418073775, "calibration/distribution_entropy_10": 0.4862920985635162, "calibration/distribution_entropy_100": 0.26166824612290984, "calibration/ece": 0.1097107746709409, "calibration/mean_confidence": 0.779109564129616, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009635416666666674, "completions/max_length": 4010.0, "completions/max_terminated_length": 4010.0, "completions/mean_length": 1164.0114501953126, "completions/mean_terminated_length": 1175.432861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 329.6, "epoch": 2.4158653846153846, "grad_norm": 0.00032606004970148206, "learning_rate": 2.103365384615385e-07, "loss": -0.01, "num_tokens": 2823159004.0, "reward": 1.3159709930419923, "reward_std": 0.11136936545372009, "rewards/accuracy_reward": 0.7671875, "rewards/brier_reward": 0.8743745565414429, "rewards/confidence_one_or_zero": 0.002864583331393078, "rewards/format_reward": 0.9903645873069763, "rewards/mean_confidence_reward": 0.7671875119209289, "sampling/batch_mean_priority_error": 0.01744618055555554, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.061111111111111116, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0035401007160544397, "sampling/priority_kl": 0.0300002608448267, "sampling/priority_scale": 0.9519850552314892, "sampling/prob_entropy": 10.278963851928712, "sampling/prob_max": 7.331292872549965e-05, "sampling/prob_min": 2.0615904577425682e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.407200002670288, "sampling/prompt_draws_total": 72216.0, "sampling/seen_fraction": 0.973146665096283, "sampling/unseen_fraction": 0.02685333490371704, "signal/accuracy_reward/centered_abs_mean": 0.09576822966337203, "signal/accuracy_reward/group_std_mean": 0.1338461920619011, "signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04788411483168602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04788411483168602, "signal/advantage_abs_mean": 0.07476246803998947, "signal/advantage_pre_scale_abs_mean": 0.07476246803998947, "signal/advantage_pre_scale_std": 0.16831520795822144, "signal/advantage_std": 0.16831520795822144, "signal/brier_reward/centered_abs_mean": 0.060066214948892596, "signal/brier_reward/group_std_mean": 0.08783653080463409, "signal/brier_reward/group_zero_std_frac": 0.3194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030033107474446298, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.030033107474446298, "signal/confidence_one_or_zero/centered_abs_mean": 0.004714626760687679, "signal/confidence_one_or_zero/group_std_mean": 0.007716905884444713, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.7146265558239975e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.7146265558239975e-08, "signal/format_reward/centered_abs_mean": 0.016531033255159856, "signal/format_reward/group_std_mean": 0.03449917696416378, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008265516627579928, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008265516627579928, "signal/mean_confidence_reward/centered_abs_mean": 0.0552905336022377, "signal/mean_confidence_reward/group_std_mean": 0.0773746132850647, "signal/mean_confidence_reward/group_zero_std_frac": 0.32777778506278993, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.529053282771202e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.529053282771202e-07, "step": 1005 }, { "calibration/aurc": 0.21375878198864506, "calibration/batch_distribution_entropy": 0.5777321783955085, "calibration/batch_entropy_100bins": 0.31826796586466405, "calibration/batch_entropy_10bins": 0.5777321783955085, "calibration/batch_entropy_50bins": 0.3746598998833386, "calibration/batch_uniqueness": 0.2724663195034364, "calibration/confidence_entropy": 0.4344012037873786, "calibration/coverage@0%": 0.005265935286765727, "calibration/coverage@1%": 0.005265935286765727, "calibration/coverage@10%": 0.23780679094163043, "calibration/coverage@15%": 0.4462542287974838, "calibration/coverage@20%": 0.4462542287974838, "calibration/coverage@25%": 0.6007957518989147, "calibration/coverage@30%": 0.6465743777587609, "calibration/coverage@5%": 0.22250336086247474, "calibration/distribution_entropy_10": 0.5777321783955085, "calibration/distribution_entropy_100": 0.31826796586466405, "calibration/ece": 0.14821627013700106, "calibration/mean_confidence": 0.7294570024087697, "calibration/unique_confidence_per_question": 0.024479166666666666, "calibration/unique_confidences": 9.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444419, "completions/max_length": 3955.6, "completions/max_terminated_length": 3955.6, "completions/mean_length": 1155.7947021484374, "completions/mean_terminated_length": 1167.70869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 309.4, "epoch": 2.4278846153846154, "grad_norm": 0.00034449115628376603, "learning_rate": 1.8028846153846153e-07, "loss": -0.0115, "num_tokens": 2839577855.0, "reward": 1.3057581901550293, "reward_std": 0.11530058681964875, "rewards/accuracy_reward": 0.7427083373069763, "rewards/brier_reward": 0.8788626194000244, "rewards/confidence_one_or_zero": 0.0037326389632653446, "rewards/format_reward": 0.9899305582046509, "rewards/mean_confidence_reward": 0.7416854858398437, "sampling/batch_mean_priority_error": 0.014730902777777763, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.08333333333333334, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0035560850985348223, "sampling/priority_kl": 0.02999974824488163, "sampling/priority_scale": 0.9572434485191479, "sampling/prob_entropy": 10.27894992828369, "sampling/prob_max": 7.380725000984966e-05, "sampling/prob_min": 2.060995284409728e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.419200038909912, "sampling/prompt_draws_total": 72576.0, "sampling/seen_fraction": 0.9740266680717469, "sampling/unseen_fraction": 0.025973331928253175, "signal/accuracy_reward/centered_abs_mean": 0.09190538227558136, "signal/accuracy_reward/group_std_mean": 0.13165127336978913, "signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04595269113779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04595269113779068, "signal/advantage_abs_mean": 0.07484828606247902, "signal/advantage_pre_scale_abs_mean": 0.07484828606247902, "signal/advantage_pre_scale_std": 0.17094075083732604, "signal/advantage_std": 0.17094075083732604, "signal/brier_reward/centered_abs_mean": 0.06410380974411964, "signal/brier_reward/group_std_mean": 0.09359335005283356, "signal/brier_reward/group_zero_std_frac": 0.2833333373069763, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03205190487205982, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03205190487205982, "signal/confidence_one_or_zero/centered_abs_mean": 0.006060112884733826, "signal/confidence_one_or_zero/group_std_mean": 0.01077386476099491, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9583333373069763, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.060112909267445e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.060112909267445e-08, "signal/format_reward/centered_abs_mean": 0.018250868655741215, "signal/format_reward/group_std_mean": 0.039764121547341345, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009125434327870607, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009125434327870607, "signal/mean_confidence_reward/centered_abs_mean": 0.05567202791571617, "signal/mean_confidence_reward/group_std_mean": 0.07979700714349747, "signal/mean_confidence_reward/group_zero_std_frac": 0.30277777910232545, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.567202720158093e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.567202720158093e-07, "step": 1010 }, { "calibration/aurc": 0.10069863430886601, "calibration/batch_distribution_entropy": 0.5385201084042638, "calibration/batch_entropy_100bins": 0.295772450904362, "calibration/batch_entropy_10bins": 0.5385201084042638, "calibration/batch_entropy_50bins": 0.34817854364645356, "calibration/batch_uniqueness": 0.18474362211384246, "calibration/confidence_entropy": 0.4417251183958671, "calibration/coverage@0%": 0.1368421052631579, "calibration/coverage@1%": 0.2763157894736842, "calibration/coverage@10%": 0.6819344535481182, "calibration/coverage@15%": 0.7071976114428551, "calibration/coverage@20%": 0.7562380224968754, "calibration/coverage@25%": 0.8, "calibration/coverage@30%": 0.8908136482939633, "calibration/coverage@5%": 0.5855627809056849, "calibration/distribution_entropy_10": 0.5385201084042638, "calibration/distribution_entropy_100": 0.295772450904362, "calibration/ece": 0.13028352871666612, "calibration/mean_confidence": 0.736554621132017, "calibration/unique_confidence_per_question": 0.020312499999999997, "calibration/unique_confidences": 7.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 3879.6, "completions/max_terminated_length": 3879.6, "completions/mean_length": 1163.62529296875, "completions/mean_terminated_length": 1175.6705322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 369.0, "epoch": 2.4399038461538463, "grad_norm": 0.0003656467597465962, "learning_rate": 1.5024038461538462e-07, "loss": -0.0113, "num_tokens": 2856091874.0, "reward": 1.3181779623031615, "reward_std": 0.11705977618694305, "rewards/accuracy_reward": 0.7717881798744202, "rewards/brier_reward": 0.8749693989753723, "rewards/confidence_one_or_zero": 0.002690972265554592, "rewards/format_reward": 0.9895833373069763, "rewards/mean_confidence_reward": 0.7427213311195373, "sampling/batch_mean_priority_error": 0.014354166666666657, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.061111111111111116, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003569081239402294, "sampling/priority_kl": 0.02999938577413559, "sampling/priority_scale": 0.9616323411231861, "sampling/prob_entropy": 10.278946685791016, "sampling/prob_max": 7.424467476084829e-05, "sampling/prob_min": 2.0611210493370892e-05, "sampling/prompt_draws_max": 8.0, "sampling/prompt_draws_mean": 2.4312000274658203, "sampling/prompt_draws_total": 72936.0, "sampling/seen_fraction": 0.9747933387756348, "sampling/unseen_fraction": 0.025206661224365233, "signal/accuracy_reward/centered_abs_mean": 0.1018825963139534, "signal/accuracy_reward/group_std_mean": 0.14123183190822602, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0509412981569767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0509412981569767, "signal/advantage_abs_mean": 0.07748131155967712, "signal/advantage_pre_scale_abs_mean": 0.07748131155967712, "signal/advantage_pre_scale_std": 0.17561177611351014, "signal/advantage_std": 0.17561177611351014, "signal/brier_reward/centered_abs_mean": 0.0639573760330677, "signal/brier_reward/group_std_mean": 0.09368740767240524, "signal/brier_reward/group_zero_std_frac": 0.2722222238779068, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03197868801653385, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03197868801653385, "signal/confidence_one_or_zero/centered_abs_mean": 0.004356553789693862, "signal/confidence_one_or_zero/group_std_mean": 0.007231736136600375, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222328186035, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.35655380215394e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.35655380215394e-08, "signal/format_reward/centered_abs_mean": 0.018543837033212184, "signal/format_reward/group_std_mean": 0.040525621920824054, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009271918516606092, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009271918516606092, "signal/mean_confidence_reward/centered_abs_mean": 0.05968533381819725, "signal/mean_confidence_reward/group_std_mean": 0.08249245584011078, "signal/mean_confidence_reward/group_zero_std_frac": 0.2805555522441864, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.968533400846354e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.968533400846354e-07, "step": 1015 }, { "calibration/aurc": 0.07653606272396447, "calibration/batch_distribution_entropy": 0.6288167408136538, "calibration/batch_entropy_100bins": 0.3500217781385155, "calibration/batch_entropy_10bins": 0.6288167408136538, "calibration/batch_entropy_50bins": 0.4120399739197384, "calibration/batch_uniqueness": 0.39337521507351514, "calibration/confidence_entropy": 0.455890063812096, "calibration/coverage@0%": 0.3141049782428512, "calibration/coverage@1%": 0.46896834852880237, "calibration/coverage@10%": 0.7289743490122945, "calibration/coverage@15%": 0.7870126657687526, "calibration/coverage@20%": 0.8191179289266473, "calibration/coverage@25%": 0.8948741193535017, "calibration/coverage@30%": 0.9583333333333333, "calibration/coverage@5%": 0.5418569553805774, "calibration/distribution_entropy_10": 0.6288167408136538, "calibration/distribution_entropy_100": 0.3500217781385155, "calibration/ece": 0.14023634997927878, "calibration/mean_confidence": 0.7137680877883685, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009809027777777767, "completions/max_length": 3967.6, "completions/max_terminated_length": 3967.6, "completions/mean_length": 1186.3420166015626, "completions/mean_terminated_length": 1198.19521484375, "completions/min_length": 0.0, "completions/min_terminated_length": 382.8, "epoch": 2.451923076923077, "grad_norm": 0.0003450646181590855, "learning_rate": 1.201923076923077e-07, "loss": -0.0112, "num_tokens": 2872886630.0, "reward": 1.3077133417129516, "reward_std": 0.1105430081486702, "rewards/accuracy_reward": 0.7447916626930237, "rewards/brier_reward": 0.8804292678833008, "rewards/confidence_one_or_zero": 0.004253472259733826, "rewards/format_reward": 0.9901909708976746, "rewards/mean_confidence_reward": 0.7359375, "sampling/batch_mean_priority_error": 0.019133680555555543, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.05, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0035847860854119062, "sampling/priority_kl": 0.029998553916811943, "sampling/priority_scale": 0.9654334722785279, "sampling/prob_entropy": 10.27893943786621, "sampling/prob_max": 7.464435184374451e-05, "sampling/prob_min": 2.0355882952571847e-05, "sampling/prompt_draws_max": 8.4, "sampling/prompt_draws_mean": 2.4432000160217284, "sampling/prompt_draws_total": 73296.0, "sampling/seen_fraction": 0.9754599928855896, "sampling/unseen_fraction": 0.0245400071144104, "signal/accuracy_reward/centered_abs_mean": 0.09067925289273263, "signal/accuracy_reward/group_std_mean": 0.12974078506231307, "signal/accuracy_reward/group_zero_std_frac": 0.5972222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04533962644636631, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04533962644636631, "signal/advantage_abs_mean": 0.07144246324896812, "signal/advantage_pre_scale_abs_mean": 0.07144246324896812, "signal/advantage_pre_scale_std": 0.16677022576332093, "signal/advantage_std": 0.16677022576332093, "signal/brier_reward/centered_abs_mean": 0.06360199972987175, "signal/brier_reward/group_std_mean": 0.093186055123806, "signal/brier_reward/group_zero_std_frac": 0.27777778506278994, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031800999864935876, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031800999864935876, "signal/confidence_one_or_zero/centered_abs_mean": 0.006819661497138441, "signal/confidence_one_or_zero/group_std_mean": 0.010803023539483547, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9611111044883728, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 6.819661280133005e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 6.819661280133005e-08, "signal/format_reward/centered_abs_mean": 0.01769205741584301, "signal/format_reward/group_std_mean": 0.038584979623556136, "signal/format_reward/group_zero_std_frac": 0.8222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008846028707921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008846028707921505, "signal/mean_confidence_reward/centered_abs_mean": 0.05903239473700524, "signal/mean_confidence_reward/group_std_mean": 0.0819184049963951, "signal/mean_confidence_reward/group_zero_std_frac": 0.28055556416511535, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.903239184590347e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.903239184590347e-07, "step": 1020 }, { "calibration/aurc": 0.07430514258786, "calibration/batch_distribution_entropy": 0.5098176087204103, "calibration/batch_entropy_100bins": 0.2840145574128153, "calibration/batch_entropy_10bins": 0.5098176087204103, "calibration/batch_entropy_50bins": 0.3343373416693276, "calibration/batch_uniqueness": 0.1409113739591033, "calibration/confidence_entropy": 0.4284641227215422, "calibration/coverage@0%": 0.13056060585348517, "calibration/coverage@1%": 0.31228384345139637, "calibration/coverage@10%": 0.8268208768494343, "calibration/coverage@15%": 0.8967770887728459, "calibration/coverage@20%": 0.926019908616188, "calibration/coverage@25%": 0.9536458333333332, "calibration/coverage@30%": 0.9536458333333332, "calibration/coverage@5%": 0.49786913500927277, "calibration/distribution_entropy_10": 0.5098176087204103, "calibration/distribution_entropy_100": 0.2840145574128153, "calibration/ece": 0.11457569452038867, "calibration/mean_confidence": 0.7698891784741114, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009374999999999977, "completions/max_length": 3959.8, "completions/max_terminated_length": 3959.8, "completions/mean_length": 1172.4079833984374, "completions/mean_terminated_length": 1183.5778564453126, "completions/min_length": 0.0, "completions/min_terminated_length": 331.4, "epoch": 2.4639423076923075, "grad_norm": 0.0004397702869027853, "learning_rate": 9.014423076923076e-08, "loss": -0.01, "num_tokens": 2889486818.0, "reward": 1.3201935291290283, "reward_std": 0.11437996476888657, "rewards/accuracy_reward": 0.7700520753860474, "rewards/brier_reward": 0.8796947836875916, "rewards/confidence_one_or_zero": 0.0035590278333984316, "rewards/format_reward": 0.990625, "rewards/mean_confidence_reward": 0.7564895629882813, "sampling/batch_mean_priority_error": 0.013347291666666655, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.05, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.0035991712007671594, "sampling/priority_kl": 0.02999994195997715, "sampling/priority_scale": 0.969786411547102, "sampling/prob_entropy": 10.278949737548828, "sampling/prob_max": 7.508765411330387e-05, "sampling/prob_min": 1.9963686645496638e-05, "sampling/prompt_draws_max": 9.0, "sampling/prompt_draws_mean": 2.4552000522613526, "sampling/prompt_draws_total": 73656.0, "sampling/seen_fraction": 0.9761600017547607, "sampling/unseen_fraction": 0.023839998245239257, "signal/accuracy_reward/centered_abs_mean": 0.0878634974360466, "signal/accuracy_reward/group_std_mean": 0.1292222797870636, "signal/accuracy_reward/group_zero_std_frac": 0.5722222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0439317487180233, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0439317487180233, "signal/advantage_abs_mean": 0.07393695563077926, "signal/advantage_pre_scale_abs_mean": 0.07393695563077926, "signal/advantage_pre_scale_std": 0.1688719093799591, "signal/advantage_std": 0.1688719093799591, "signal/brier_reward/centered_abs_mean": 0.06520559415221214, "signal/brier_reward/group_std_mean": 0.09605432748794555, "signal/brier_reward/group_zero_std_frac": 0.2666666656732559, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03260279707610607, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03260279707610607, "signal/confidence_one_or_zero/centered_abs_mean": 0.005040147551335394, "signal/confidence_one_or_zero/group_std_mean": 0.007720847195014357, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222208976745, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.0401475704120455e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.0401475704120455e-08, "signal/format_reward/centered_abs_mean": 0.01625434048473835, "signal/format_reward/group_std_mean": 0.03423248454928398, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008127170242369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008127170242369175, "signal/mean_confidence_reward/centered_abs_mean": 0.05742301493883133, "signal/mean_confidence_reward/group_std_mean": 0.08127100467681884, "signal/mean_confidence_reward/group_zero_std_frac": 0.27777777910232543, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.742301368627523e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.742301368627523e-07, "step": 1025 }, { "calibration/aurc": 0.190737357158577, "calibration/batch_distribution_entropy": 0.6315100168513081, "calibration/batch_entropy_100bins": 0.3447852289048042, "calibration/batch_entropy_10bins": 0.6315100168513081, "calibration/batch_entropy_50bins": 0.40587559288847, "calibration/batch_uniqueness": 0.3688717344268504, "calibration/confidence_entropy": 0.45521653229510173, "calibration/coverage@0%": 0.0005249343832020997, "calibration/coverage@1%": 0.0005249343832020997, "calibration/coverage@10%": 0.21187835813717001, "calibration/coverage@15%": 0.5564304461942258, "calibration/coverage@20%": 0.6035728518228416, "calibration/coverage@25%": 0.7324071402069505, "calibration/coverage@30%": 0.7968586387434555, "calibration/coverage@5%": 0.10551181102362203, "calibration/distribution_entropy_10": 0.6315100168513081, "calibration/distribution_entropy_100": 0.3447852289048042, "calibration/ece": 0.10167360074755058, "calibration/mean_confidence": 0.7203472767998242, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008940972222222232, "completions/max_length": 3926.6, "completions/max_terminated_length": 3926.6, "completions/mean_length": 1155.531884765625, "completions/mean_terminated_length": 1166.0040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 369.6, "epoch": 2.4759615384615383, "grad_norm": 0.00048662739573046565, "learning_rate": 6.009615384615386e-08, "loss": -0.0097, "num_tokens": 2905924817.0, "reward": 1.3112340927124024, "reward_std": 0.11016112565994263, "rewards/accuracy_reward": 0.7549479126930236, "rewards/brier_reward": 0.876445984840393, "rewards/confidence_one_or_zero": 0.0024305555562023073, "rewards/format_reward": 0.9910590291023255, "rewards/mean_confidence_reward": 0.7597517251968384, "sampling/batch_mean_priority_error": 0.018508680555555542, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.03888888888888889, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003616081550717354, "sampling/priority_kl": 0.030000020191073416, "sampling/priority_scale": 0.9718193710548804, "sampling/prob_entropy": 10.278950881958007, "sampling/prob_max": 7.537075434811413e-05, "sampling/prob_min": 1.998338666453492e-05, "sampling/prompt_draws_max": 9.0, "sampling/prompt_draws_mean": 2.467199945449829, "sampling/prompt_draws_total": 74016.0, "sampling/seen_fraction": 0.9765533447265625, "sampling/unseen_fraction": 0.0234466552734375, "signal/accuracy_reward/centered_abs_mean": 0.09672851413488388, "signal/accuracy_reward/group_std_mean": 0.13473242670297622, "signal/accuracy_reward/group_zero_std_frac": 0.5861111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04836425706744194, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04836425706744194, "signal/advantage_abs_mean": 0.07378808557987213, "signal/advantage_pre_scale_abs_mean": 0.07378808557987213, "signal/advantage_pre_scale_std": 0.16930631101131438, "signal/advantage_std": 0.16930631101131438, "signal/brier_reward/centered_abs_mean": 0.06198948621749878, "signal/brier_reward/group_std_mean": 0.08934446573257446, "signal/brier_reward/group_zero_std_frac": 0.3194444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03099474310874939, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.03099474310874939, "signal/confidence_one_or_zero/centered_abs_mean": 0.004220920044463128, "signal/confidence_one_or_zero/group_std_mean": 0.008400925993919372, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9638888835906982, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 4.220919791464439e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 4.220919791464439e-08, "signal/format_reward/centered_abs_mean": 0.01584743931889534, "signal/format_reward/group_std_mean": 0.03291997648775578, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00792371965944767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00792371965944767, "signal/mean_confidence_reward/centered_abs_mean": 0.05436865985393524, "signal/mean_confidence_reward/group_std_mean": 0.07493493854999542, "signal/mean_confidence_reward/group_zero_std_frac": 0.33333333730697634, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.43686576293112e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.43686576293112e-07, "step": 1030 }, { "calibration/aurc": 0.09454584045054779, "calibration/batch_distribution_entropy": 0.5348206829324342, "calibration/batch_entropy_100bins": 0.2949276423704754, "calibration/batch_entropy_10bins": 0.5348206829324342, "calibration/batch_entropy_50bins": 0.34718404870924985, "calibration/batch_uniqueness": 0.15781809362199886, "calibration/confidence_entropy": 0.4316328098559209, "calibration/coverage@0%": 0.2692378857342571, "calibration/coverage@1%": 0.2750121639494802, "calibration/coverage@10%": 0.49451697127937333, "calibration/coverage@15%": 0.8351814077458659, "calibration/coverage@20%": 0.8727412423846823, "calibration/coverage@25%": 0.8931067776327242, "calibration/coverage@30%": 0.9379582789382072, "calibration/coverage@5%": 0.48296841484892716, "calibration/distribution_entropy_10": 0.5348206829324342, "calibration/distribution_entropy_100": 0.2949276423704754, "calibration/ece": 0.14140299592935995, "calibration/mean_confidence": 0.7589548983025295, "calibration/unique_confidence_per_question": 0.0234375, "calibration/unique_confidences": 9.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007118055555555558, "completions/max_length": 4002.2, "completions/max_terminated_length": 4002.2, "completions/mean_length": 1200.6076416015626, "completions/mean_terminated_length": 1209.26611328125, "completions/min_length": 0.0, "completions/min_terminated_length": 324.4, "epoch": 2.487980769230769, "grad_norm": 0.00033582025207579136, "learning_rate": 3.004807692307693e-08, "loss": -0.0079, "num_tokens": 2922816713.0, "reward": 1.3218994140625, "reward_std": 0.1117009773850441, "rewards/accuracy_reward": 0.7754340291023254, "rewards/brier_reward": 0.8754678845405579, "rewards/confidence_one_or_zero": 0.003645833331393078, "rewards/format_reward": 0.9928819537162781, "rewards/mean_confidence_reward": 0.7542786240577698, "sampling/batch_mean_priority_error": 0.015368055555555543, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.0611111111111111, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003631386300548911, "sampling/priority_kl": 0.030000073090195655, "sampling/priority_scale": 0.9759026825660839, "sampling/prob_entropy": 10.278952598571777, "sampling/prob_max": 7.579407974844798e-05, "sampling/prob_min": 1.9983825040981174e-05, "sampling/prompt_draws_max": 9.0, "sampling/prompt_draws_mean": 2.479199981689453, "sampling/prompt_draws_total": 74376.0, "sampling/seen_fraction": 0.9772333383560181, "sampling/unseen_fraction": 0.022766661643981934, "signal/accuracy_reward/centered_abs_mean": 0.09267035722732545, "signal/accuracy_reward/group_std_mean": 0.1369502991437912, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04633517861366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04633517861366272, "signal/advantage_abs_mean": 0.07139594107866287, "signal/advantage_pre_scale_abs_mean": 0.07139594107866287, "signal/advantage_pre_scale_std": 0.1638278156518936, "signal/advantage_std": 0.1638278156518936, "signal/brier_reward/centered_abs_mean": 0.06340188980102539, "signal/brier_reward/group_std_mean": 0.09231583029031754, "signal/brier_reward/group_zero_std_frac": 0.2944444477558136, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031700944900512694, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.031700944900512694, "signal/confidence_one_or_zero/centered_abs_mean": 0.005414496490266174, "signal/confidence_one_or_zero/group_std_mean": 0.008160660043358803, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9722222089767456, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.4144963712587925e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.4144963712587925e-08, "signal/format_reward/centered_abs_mean": 0.013075086567550898, "signal/format_reward/group_std_mean": 0.029334478825330735, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006537543283775449, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006537543283775449, "signal/mean_confidence_reward/centered_abs_mean": 0.05938764438033104, "signal/mean_confidence_reward/group_std_mean": 0.08216754347085953, "signal/mean_confidence_reward/group_zero_std_frac": 0.3055555522441864, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.938764388702112e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.938764388702112e-07, "step": 1035 }, { "calibration/aurc": 0.09311068544688798, "calibration/batch_distribution_entropy": 0.49783279102907524, "calibration/batch_entropy_100bins": 0.27292808943936137, "calibration/batch_entropy_10bins": 0.49783279102907524, "calibration/batch_entropy_50bins": 0.32128653094852655, "calibration/batch_uniqueness": 0.08343998356816631, "calibration/confidence_entropy": 0.4269823165432417, "calibration/coverage@0%": 0.0005221932114882506, "calibration/coverage@1%": 0.0005221932114882506, "calibration/coverage@10%": 0.7010835291108992, "calibration/coverage@15%": 0.7568982758052714, "calibration/coverage@20%": 0.8080768529044391, "calibration/coverage@25%": 0.9448275862068967, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.27050854970185123, "calibration/distribution_entropy_10": 0.49783279102907524, "calibration/distribution_entropy_100": 0.27292808943936137, "calibration/ece": 0.10161390043986591, "calibration/mean_confidence": 0.7690658569311672, "calibration/unique_confidence_per_question": 0.021875, "calibration/unique_confidences": 8.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007638888888888906, "completions/max_length": 3960.6, "completions/max_terminated_length": 3960.6, "completions/mean_length": 1175.173974609375, "completions/mean_terminated_length": 1184.4427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 362.0, "epoch": 2.5, "grad_norm": 0.000330591865349561, "learning_rate": 0.0, "loss": -0.008, "num_tokens": 2939416413.0, "reward": 1.3254358530044557, "reward_std": 0.10986914932727813, "rewards/accuracy_reward": 0.7760416626930237, "rewards/brier_reward": 0.8824537873268128, "rewards/confidence_one_or_zero": 0.003559027856681496, "rewards/format_reward": 0.9923611164093018, "rewards/mean_confidence_reward": 0.7549175500869751, "sampling/batch_mean_priority_error": 0.011868055555555548, "sampling/batch_prompt_draws": 72.0, "sampling/batch_unique_prompts": 72.0, "sampling/batch_unseen_draw_fraction": 0.05277777777777778, "sampling/error_ema_max": 0.24306130409240723, "sampling/error_ema_mean": 0.003644486190751195, "sampling/priority_kl": 0.03000090755522251, "sampling/priority_scale": 0.9802652895683422, "sampling/prob_entropy": 10.278954124450683, "sampling/prob_max": 7.624288991792127e-05, "sampling/prob_min": 1.998272418859415e-05, "sampling/prompt_draws_max": 9.0, "sampling/prompt_draws_mean": 2.4911999702453613, "sampling/prompt_draws_total": 74736.0, "sampling/seen_fraction": 0.9778799891471863, "sampling/unseen_fraction": 0.02212001085281372, "signal/accuracy_reward/centered_abs_mean": 0.09396701455116271, "signal/accuracy_reward/group_std_mean": 0.13123033344745635, "signal/accuracy_reward/group_zero_std_frac": 0.5972222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04698350727558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04698350727558136, "signal/advantage_abs_mean": 0.07230262234807014, "signal/advantage_pre_scale_abs_mean": 0.07230262234807014, "signal/advantage_pre_scale_std": 0.1636380672454834, "signal/advantage_std": 0.1636380672454834, "signal/brier_reward/centered_abs_mean": 0.0633222870528698, "signal/brier_reward/group_std_mean": 0.09267549812793732, "signal/brier_reward/group_zero_std_frac": 0.26944445073604584, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0316611435264349, "signal/brier_reward/weight": 0.5, "signal/brier_reward/weighted_centered_abs_mean": 0.0316611435264349, "signal/confidence_one_or_zero/centered_abs_mean": 0.005799696198664606, "signal/confidence_one_or_zero/group_std_mean": 0.009705995582044125, "signal/confidence_one_or_zero/group_zero_std_frac": 0.9638888835906982, "signal/confidence_one_or_zero/scaled_weighted_centered_abs_mean": 5.799696225494699e-08, "signal/confidence_one_or_zero/weight": 9.999999747378752e-06, "signal/confidence_one_or_zero/weighted_centered_abs_mean": 5.799696225494699e-08, "signal/format_reward/centered_abs_mean": 0.014149305317550898, "signal/format_reward/group_std_mean": 0.032183651253581046, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007074652658775449, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007074652658775449, "signal/mean_confidence_reward/centered_abs_mean": 0.05784342586994171, "signal/mean_confidence_reward/group_std_mean": 0.08098789751529693, "signal/mean_confidence_reward/group_zero_std_frac": 0.28888889253139494, "signal/mean_confidence_reward/scaled_weighted_centered_abs_mean": 5.784342533843301e-07, "signal/mean_confidence_reward/weight": 9.999999747378752e-06, "signal/mean_confidence_reward/weighted_centered_abs_mean": 5.784342533843301e-07, "step": 1040 }, { "epoch": 2.5, "step": 1040, "total_flos": 0.0, "train_loss": -0.011179522975817235, "train_runtime": 247236.2535, "train_samples_per_second": 0.303, "train_steps_per_second": 0.004 } ], "logging_steps": 5, "max_steps": 1040, "num_input_tokens_seen": 2939416413, "num_train_epochs": 3, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }