| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9995757318625371, | |
| "eval_steps": 500, | |
| "global_step": 589, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.0016970725498515061, | |
| "grad_norm": 0.5869814984650515, | |
| "kl": 0.0, | |
| "learning_rate": 1.6949152542372882e-08, | |
| "loss": 0.0841, | |
| "max_completion_length": 375.125, | |
| "max_terminated_completion_length": 375.125, | |
| "mean_completion_length": 230.78515625, | |
| "mean_terminated_completion_length": 230.78515625, | |
| "min_completion_length": 136.9375, | |
| "min_terminated_completion_length": 136.9375, | |
| "num_tokens": 145209.0, | |
| "reward": 0.644873904529959, | |
| "reward_std": 0.18686308059841394, | |
| "rewards/format_reward/mean": 0.8515625, | |
| "rewards/format_reward/std": 0.2877223640680313, | |
| "rewards/qatch_metrics/mean": 0.6026549516245723, | |
| "rewards/qatch_metrics/std": 0.20010850299149752, | |
| "rewards/tag_count_reward/mean": 0.94921875, | |
| "rewards/tag_count_reward/std": 0.10330275679007173, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0029296875, | |
| "epoch": 0.00848536274925753, | |
| "grad_norm": 0.46509812608026285, | |
| "kl": 0.00024968385696411133, | |
| "learning_rate": 8.47457627118644e-08, | |
| "loss": 0.0041, | |
| "max_completion_length": 544.203125, | |
| "max_terminated_completion_length": 423.109375, | |
| "mean_completion_length": 264.08984375, | |
| "mean_terminated_completion_length": 256.1645188331604, | |
| "min_completion_length": 136.0, | |
| "min_terminated_completion_length": 136.0, | |
| "num_tokens": 744405.0, | |
| "reward": 0.5485858838073909, | |
| "reward_std": 0.21158753434428945, | |
| "rewards/format_reward/mean": 0.8740234375, | |
| "rewards/format_reward/std": 0.2784869666211307, | |
| "rewards/qatch_metrics/mean": 0.4864739590557292, | |
| "rewards/qatch_metrics/std": 0.24045464565278962, | |
| "rewards/tag_count_reward/mean": 0.95361328125, | |
| "rewards/tag_count_reward/std": 0.11037775257136673, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.01697072549851506, | |
| "grad_norm": 0.5090702771289867, | |
| "kl": 0.0003334641456604004, | |
| "learning_rate": 1.694915254237288e-07, | |
| "loss": 0.0362, | |
| "max_completion_length": 396.3625, | |
| "max_terminated_completion_length": 396.3625, | |
| "mean_completion_length": 251.7640625, | |
| "mean_terminated_completion_length": 251.7640625, | |
| "min_completion_length": 139.7875, | |
| "min_terminated_completion_length": 139.7875, | |
| "num_tokens": 1505847.0, | |
| "reward": 0.5182547142729164, | |
| "reward_std": 0.19356031545903535, | |
| "rewards/format_reward/mean": 0.86484375, | |
| "rewards/format_reward/std": 0.31712658517062664, | |
| "rewards/qatch_metrics/mean": 0.45220911494106986, | |
| "rewards/qatch_metrics/std": 0.21342533665010704, | |
| "rewards/tag_count_reward/mean": 0.9478515625, | |
| "rewards/tag_count_reward/std": 0.12756802467629313, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.025456088247772592, | |
| "grad_norm": 0.5061129007011103, | |
| "kl": 0.0003319263458251953, | |
| "learning_rate": 2.542372881355932e-07, | |
| "loss": 0.0285, | |
| "max_completion_length": 410.6125, | |
| "max_terminated_completion_length": 410.6125, | |
| "mean_completion_length": 253.809375, | |
| "mean_terminated_completion_length": 253.809375, | |
| "min_completion_length": 136.5625, | |
| "min_terminated_completion_length": 136.5625, | |
| "num_tokens": 2242611.0, | |
| "reward": 0.5680677419528365, | |
| "reward_std": 0.2350599060737295, | |
| "rewards/format_reward/mean": 0.8734375, | |
| "rewards/format_reward/std": 0.2996659129858017, | |
| "rewards/qatch_metrics/mean": 0.5095718788910745, | |
| "rewards/qatch_metrics/std": 0.2679574864036113, | |
| "rewards/tag_count_reward/mean": 0.9517578125, | |
| "rewards/tag_count_reward/std": 0.12166438875719905, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.03394145099703012, | |
| "grad_norm": 0.47380382111384095, | |
| "kl": 0.000345766544342041, | |
| "learning_rate": 3.389830508474576e-07, | |
| "loss": 0.0115, | |
| "max_completion_length": 398.1625, | |
| "max_terminated_completion_length": 396.6375, | |
| "mean_completion_length": 247.84609375, | |
| "mean_terminated_completion_length": 247.6327085494995, | |
| "min_completion_length": 133.625, | |
| "min_terminated_completion_length": 133.625, | |
| "num_tokens": 2978334.0, | |
| "reward": 0.5601951198652386, | |
| "reward_std": 0.23269697735086084, | |
| "rewards/format_reward/mean": 0.8828125, | |
| "rewards/format_reward/std": 0.27684398628771306, | |
| "rewards/qatch_metrics/mean": 0.4989197966642678, | |
| "rewards/qatch_metrics/std": 0.2642190517857671, | |
| "rewards/tag_count_reward/mean": 0.956640625, | |
| "rewards/tag_count_reward/std": 0.11600371869280934, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.04242681374628765, | |
| "grad_norm": 0.5039885136692384, | |
| "kl": 0.0003740072250366211, | |
| "learning_rate": 4.23728813559322e-07, | |
| "loss": 0.0205, | |
| "max_completion_length": 411.8875, | |
| "max_terminated_completion_length": 410.7, | |
| "mean_completion_length": 246.78828125, | |
| "mean_terminated_completion_length": 246.5934377670288, | |
| "min_completion_length": 137.275, | |
| "min_terminated_completion_length": 137.275, | |
| "num_tokens": 3744287.0, | |
| "reward": 0.520661533344537, | |
| "reward_std": 0.1745435191784054, | |
| "rewards/format_reward/mean": 0.90234375, | |
| "rewards/format_reward/std": 0.24458114355802535, | |
| "rewards/qatch_metrics/mean": 0.4496523497058661, | |
| "rewards/qatch_metrics/std": 0.19190275410510366, | |
| "rewards/tag_count_reward/mean": 0.964453125, | |
| "rewards/tag_count_reward/std": 0.09545199656859041, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.050912176495545185, | |
| "grad_norm": 1.0974593422795826, | |
| "kl": 0.0005740642547607422, | |
| "learning_rate": 5.084745762711864e-07, | |
| "loss": 0.0018, | |
| "max_completion_length": 384.2125, | |
| "max_terminated_completion_length": 384.2125, | |
| "mean_completion_length": 237.18125, | |
| "mean_terminated_completion_length": 237.18125, | |
| "min_completion_length": 130.5, | |
| "min_terminated_completion_length": 130.5, | |
| "num_tokens": 4485415.0, | |
| "reward": 0.5452621880918741, | |
| "reward_std": 0.20544360427884384, | |
| "rewards/format_reward/mean": 0.94140625, | |
| "rewards/format_reward/std": 0.16056813038885592, | |
| "rewards/qatch_metrics/mean": 0.4732174502521957, | |
| "rewards/qatch_metrics/std": 0.2355043698938971, | |
| "rewards/tag_count_reward/mean": 0.977734375, | |
| "rewards/tag_count_reward/std": 0.06598658803850413, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.05939753924480271, | |
| "grad_norm": 0.5510078795192889, | |
| "kl": 0.001086878776550293, | |
| "learning_rate": 5.932203389830508e-07, | |
| "loss": 0.0106, | |
| "max_completion_length": 378.875, | |
| "max_terminated_completion_length": 378.875, | |
| "mean_completion_length": 233.89609375, | |
| "mean_terminated_completion_length": 233.83135452270508, | |
| "min_completion_length": 130.4, | |
| "min_terminated_completion_length": 130.4, | |
| "num_tokens": 5202722.0, | |
| "reward": 0.5302521850913763, | |
| "reward_std": 0.21840204107575117, | |
| "rewards/format_reward/mean": 0.96015625, | |
| "rewards/format_reward/std": 0.12185105979442597, | |
| "rewards/qatch_metrics/mean": 0.45281276348978283, | |
| "rewards/qatch_metrics/std": 0.25459515522234144, | |
| "rewards/tag_count_reward/mean": 0.9869140625, | |
| "rewards/tag_count_reward/std": 0.042276546079665425, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.06788290199406025, | |
| "grad_norm": 0.5064824943755408, | |
| "kl": 0.0016827106475830079, | |
| "learning_rate": 6.779661016949152e-07, | |
| "loss": 0.0076, | |
| "max_completion_length": 356.1125, | |
| "max_terminated_completion_length": 356.1125, | |
| "mean_completion_length": 214.5484375, | |
| "mean_terminated_completion_length": 214.5484375, | |
| "min_completion_length": 124.3875, | |
| "min_terminated_completion_length": 124.3875, | |
| "num_tokens": 5878240.0, | |
| "reward": 0.563486835360527, | |
| "reward_std": 0.18809315691469236, | |
| "rewards/format_reward/mean": 0.9875, | |
| "rewards/format_reward/std": 0.04801956303417683, | |
| "rewards/qatch_metrics/mean": 0.4882588581647724, | |
| "rewards/qatch_metrics/std": 0.21843010729644446, | |
| "rewards/tag_count_reward/mean": 0.9943359375, | |
| "rewards/tag_count_reward/std": 0.021368160098791122, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.07636826474331777, | |
| "grad_norm": 0.44285083486339555, | |
| "kl": 0.0023781299591064454, | |
| "learning_rate": 7.627118644067796e-07, | |
| "loss": 0.0113, | |
| "max_completion_length": 368.0375, | |
| "max_terminated_completion_length": 368.0375, | |
| "mean_completion_length": 214.41953125, | |
| "mean_terminated_completion_length": 214.41953125, | |
| "min_completion_length": 128.6375, | |
| "min_terminated_completion_length": 128.6375, | |
| "num_tokens": 6579321.0, | |
| "reward": 0.5647365102544427, | |
| "reward_std": 0.22302861073985697, | |
| "rewards/format_reward/mean": 0.99140625, | |
| "rewards/format_reward/std": 0.03239456303417683, | |
| "rewards/qatch_metrics/mean": 0.4890856771729887, | |
| "rewards/qatch_metrics/std": 0.261802218714729, | |
| "rewards/tag_count_reward/mean": 0.9974609375, | |
| "rewards/tag_count_reward/std": 0.009661140758544207, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.0848536274925753, | |
| "grad_norm": 0.4720746704888002, | |
| "kl": 0.002991390228271484, | |
| "learning_rate": 8.47457627118644e-07, | |
| "loss": 0.0075, | |
| "max_completion_length": 340.0625, | |
| "max_terminated_completion_length": 340.0625, | |
| "mean_completion_length": 208.53984375, | |
| "mean_terminated_completion_length": 208.53984375, | |
| "min_completion_length": 127.65, | |
| "min_terminated_completion_length": 127.65, | |
| "num_tokens": 7290236.0, | |
| "reward": 0.5338654654100538, | |
| "reward_std": 0.18355813907000992, | |
| "rewards/format_reward/mean": 0.98671875, | |
| "rewards/format_reward/std": 0.046808474138379096, | |
| "rewards/qatch_metrics/mean": 0.45339870161842555, | |
| "rewards/qatch_metrics/std": 0.21397299794516583, | |
| "rewards/tag_count_reward/mean": 0.99609375, | |
| "rewards/tag_count_reward/std": 0.014540977776050568, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.09333899024183284, | |
| "grad_norm": 0.4871987622986304, | |
| "kl": 0.0020163536071777345, | |
| "learning_rate": 9.322033898305083e-07, | |
| "loss": 0.0164, | |
| "max_completion_length": 351.9, | |
| "max_terminated_completion_length": 351.9, | |
| "mean_completion_length": 216.00078125, | |
| "mean_terminated_completion_length": 216.00078125, | |
| "min_completion_length": 129.05, | |
| "min_terminated_completion_length": 129.05, | |
| "num_tokens": 7987549.0, | |
| "reward": 0.6214733822271228, | |
| "reward_std": 0.17753000780940056, | |
| "rewards/format_reward/mean": 0.99375, | |
| "rewards/format_reward/std": 0.0186834741383791, | |
| "rewards/qatch_metrics/mean": 0.5555247416021303, | |
| "rewards/qatch_metrics/std": 0.20830317698419093, | |
| "rewards/tag_count_reward/mean": 0.998046875, | |
| "rewards/tag_count_reward/std": 0.006233368534594774, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.10182435299109037, | |
| "grad_norm": 0.5387323940395295, | |
| "kl": 0.002319526672363281, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0099, | |
| "max_completion_length": 379.05, | |
| "max_terminated_completion_length": 379.05, | |
| "mean_completion_length": 227.08125, | |
| "mean_terminated_completion_length": 227.08125, | |
| "min_completion_length": 134.5125, | |
| "min_terminated_completion_length": 134.5125, | |
| "num_tokens": 8717925.0, | |
| "reward": 0.6110664181411266, | |
| "reward_std": 0.2127673305818462, | |
| "rewards/format_reward/mean": 0.9921875, | |
| "rewards/format_reward/std": 0.029269563034176826, | |
| "rewards/qatch_metrics/mean": 0.5434765691512439, | |
| "rewards/qatch_metrics/std": 0.2495489997054392, | |
| "rewards/tag_count_reward/mean": 0.9978515625, | |
| "rewards/tag_count_reward/std": 0.008098640758544207, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.1103097157403479, | |
| "grad_norm": 0.4498268409373828, | |
| "kl": 0.002933502197265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.009, | |
| "max_completion_length": 397.025, | |
| "max_terminated_completion_length": 397.025, | |
| "mean_completion_length": 248.5015625, | |
| "mean_terminated_completion_length": 248.5015625, | |
| "min_completion_length": 138.4375, | |
| "min_terminated_completion_length": 138.4375, | |
| "num_tokens": 9471671.0, | |
| "reward": 0.6043949913233518, | |
| "reward_std": 0.20021632187999786, | |
| "rewards/format_reward/mean": 0.99296875, | |
| "rewards/format_reward/std": 0.024164126068353654, | |
| "rewards/qatch_metrics/mean": 0.5355473988340236, | |
| "rewards/qatch_metrics/std": 0.2343682548031211, | |
| "rewards/tag_count_reward/mean": 0.99765625, | |
| "rewards/tag_count_reward/std": 0.007795868534594774, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.11879507848960542, | |
| "grad_norm": 0.44644717987910515, | |
| "kl": 0.0018777847290039062, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0093, | |
| "max_completion_length": 405.225, | |
| "max_terminated_completion_length": 405.225, | |
| "mean_completion_length": 254.95703125, | |
| "mean_terminated_completion_length": 254.95703125, | |
| "min_completion_length": 145.6375, | |
| "min_terminated_completion_length": 145.6375, | |
| "num_tokens": 10229920.0, | |
| "reward": 0.5728242984041572, | |
| "reward_std": 0.20719886436127127, | |
| "rewards/format_reward/mean": 0.99140625, | |
| "rewards/format_reward/std": 0.030414126068353652, | |
| "rewards/qatch_metrics/mean": 0.49862370180781, | |
| "rewards/qatch_metrics/std": 0.24304795570205898, | |
| "rewards/tag_count_reward/mean": 0.9970703125, | |
| "rewards/tag_count_reward/std": 0.010519255418330431, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.12728044123886295, | |
| "grad_norm": 0.3890047568004376, | |
| "kl": 0.0022896766662597657, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0066, | |
| "max_completion_length": 400.325, | |
| "max_terminated_completion_length": 400.325, | |
| "mean_completion_length": 255.21796875, | |
| "mean_terminated_completion_length": 255.21796875, | |
| "min_completion_length": 142.4125, | |
| "min_terminated_completion_length": 142.4125, | |
| "num_tokens": 10982935.0, | |
| "reward": 0.5981784490868449, | |
| "reward_std": 0.20388207387877627, | |
| "rewards/format_reward/mean": 0.99140625, | |
| "rewards/format_reward/std": 0.03239456303417683, | |
| "rewards/qatch_metrics/mean": 0.5284406258782838, | |
| "rewards/qatch_metrics/std": 0.2378254350507632, | |
| "rewards/tag_count_reward/mean": 0.997265625, | |
| "rewards/tag_count_reward/std": 0.010442390758544206, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.1357658039881205, | |
| "grad_norm": 0.4734021038801748, | |
| "kl": 0.004249286651611328, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0036, | |
| "max_completion_length": 476.625, | |
| "max_terminated_completion_length": 385.2, | |
| "mean_completion_length": 242.740625, | |
| "mean_terminated_completion_length": 236.76265659332276, | |
| "min_completion_length": 131.325, | |
| "min_terminated_completion_length": 131.325, | |
| "num_tokens": 11707835.0, | |
| "reward": 0.6007180890068412, | |
| "reward_std": 0.19771749172359704, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.010519563034176827, | |
| "rewards/qatch_metrics/mean": 0.5307276081643068, | |
| "rewards/qatch_metrics/std": 0.23232332081533968, | |
| "rewards/tag_count_reward/mean": 0.9982421875, | |
| "rewards/tag_count_reward/std": 0.0063268646597862245, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.14425116673737803, | |
| "grad_norm": 0.5550507752472053, | |
| "kl": 0.004662895202636718, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0131, | |
| "max_completion_length": 354.9, | |
| "max_terminated_completion_length": 353.7, | |
| "mean_completion_length": 222.73515625, | |
| "mean_terminated_completion_length": 222.59322929382324, | |
| "min_completion_length": 126.1, | |
| "min_terminated_completion_length": 126.1, | |
| "num_tokens": 12420696.0, | |
| "reward": 0.5713210420683026, | |
| "reward_std": 0.20502175178662582, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.49599349409390925, | |
| "rewards/qatch_metrics/std": 0.24089139703291948, | |
| "rewards/tag_count_reward/mean": 0.99921875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.15273652948663555, | |
| "grad_norm": 0.45943066166392005, | |
| "kl": 0.006258773803710938, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0011, | |
| "max_completion_length": 353.65, | |
| "max_terminated_completion_length": 353.65, | |
| "mean_completion_length": 209.85625, | |
| "mean_terminated_completion_length": 209.85625, | |
| "min_completion_length": 126.7, | |
| "min_terminated_completion_length": 126.7, | |
| "num_tokens": 13093952.0, | |
| "reward": 0.5877333108335734, | |
| "reward_std": 0.2013142277864972, | |
| "rewards/format_reward/mean": 0.9953125, | |
| "rewards/format_reward/std": 0.01875, | |
| "rewards/qatch_metrics/mean": 0.5156122450251133, | |
| "rewards/qatch_metrics/std": 0.2362876289524138, | |
| "rewards/tag_count_reward/mean": 0.9986328125, | |
| "rewards/tag_count_reward/std": 0.00546875, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.1612218922358931, | |
| "grad_norm": 0.47537315022674664, | |
| "kl": 0.00478668212890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0059, | |
| "max_completion_length": 388.825, | |
| "max_terminated_completion_length": 388.825, | |
| "mean_completion_length": 233.484375, | |
| "mean_terminated_completion_length": 233.44223976135254, | |
| "min_completion_length": 132.05, | |
| "min_terminated_completion_length": 132.05, | |
| "num_tokens": 13797692.0, | |
| "reward": 0.629512550495565, | |
| "reward_std": 0.1995036465290468, | |
| "rewards/format_reward/mean": 0.9953125, | |
| "rewards/format_reward/std": 0.01875, | |
| "rewards/qatch_metrics/mean": 0.5647757858969271, | |
| "rewards/qatch_metrics/std": 0.23445965750142933, | |
| "rewards/tag_count_reward/mean": 0.9984375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.1697072549851506, | |
| "grad_norm": 0.4496445378223136, | |
| "kl": 0.005317878723144531, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0023, | |
| "max_completion_length": 477.175, | |
| "max_terminated_completion_length": 378.3875, | |
| "mean_completion_length": 238.48828125, | |
| "mean_terminated_completion_length": 232.22250003814696, | |
| "min_completion_length": 135.375, | |
| "min_terminated_completion_length": 135.375, | |
| "num_tokens": 14519757.0, | |
| "reward": 0.616143305413425, | |
| "reward_std": 0.21505323713645338, | |
| "rewards/format_reward/mean": 0.99453125, | |
| "rewards/format_reward/std": 0.021875, | |
| "rewards/qatch_metrics/mean": 0.5491966172121465, | |
| "rewards/qatch_metrics/std": 0.25156389316543937, | |
| "rewards/tag_count_reward/mean": 0.9974609375, | |
| "rewards/tag_count_reward/std": 0.01015625, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.17819261773440814, | |
| "grad_norm": 0.41078725396750676, | |
| "kl": 0.006396865844726563, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0078, | |
| "max_completion_length": 507.9875, | |
| "max_terminated_completion_length": 410.6875, | |
| "mean_completion_length": 268.640625, | |
| "mean_terminated_completion_length": 262.4217189788818, | |
| "min_completion_length": 146.275, | |
| "min_terminated_completion_length": 146.275, | |
| "num_tokens": 15300097.0, | |
| "reward": 0.5715526139363647, | |
| "reward_std": 0.19694713180651888, | |
| "rewards/format_reward/mean": 0.98828125, | |
| "rewards/format_reward/std": 0.04291412606835365, | |
| "rewards/qatch_metrics/mean": 0.49751823087281083, | |
| "rewards/qatch_metrics/std": 0.23075101965805517, | |
| "rewards/tag_count_reward/mean": 0.9966796875, | |
| "rewards/tag_count_reward/std": 0.012291031517088413, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.18667798048366568, | |
| "grad_norm": 0.4705988351599047, | |
| "kl": 0.00809478759765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0063, | |
| "max_completion_length": 510.475, | |
| "max_terminated_completion_length": 414.45, | |
| "mean_completion_length": 266.71875, | |
| "mean_terminated_completion_length": 260.4818754196167, | |
| "min_completion_length": 143.2375, | |
| "min_terminated_completion_length": 143.2375, | |
| "num_tokens": 16075113.0, | |
| "reward": 0.6181550739333034, | |
| "reward_std": 0.20731538808722688, | |
| "rewards/format_reward/mean": 0.984375, | |
| "rewards/format_reward/std": 0.05655868910253048, | |
| "rewards/qatch_metrics/mean": 0.5528846375025751, | |
| "rewards/qatch_metrics/std": 0.24232594616341885, | |
| "rewards/tag_count_reward/mean": 0.9953125, | |
| "rewards/tag_count_reward/std": 0.018254890758544206, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.1951633432329232, | |
| "grad_norm": 0.5269887176128529, | |
| "kl": 0.008547210693359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0275, | |
| "max_completion_length": 627.3, | |
| "max_terminated_completion_length": 432.275, | |
| "mean_completion_length": 274.62890625, | |
| "mean_terminated_completion_length": 262.2500005722046, | |
| "min_completion_length": 141.1, | |
| "min_terminated_completion_length": 141.1, | |
| "num_tokens": 16875662.0, | |
| "reward": 0.6138749863952399, | |
| "reward_std": 0.19207796775735914, | |
| "rewards/format_reward/mean": 0.9796875, | |
| "rewards/format_reward/std": 0.07059738524258137, | |
| "rewards/qatch_metrics/mean": 0.5485270860604942, | |
| "rewards/qatch_metrics/std": 0.223397574480623, | |
| "rewards/tag_count_reward/mean": 0.9931640625, | |
| "rewards/tag_count_reward/std": 0.024337119515985252, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.20364870598218074, | |
| "grad_norm": 0.4900206468911076, | |
| "kl": 0.008158111572265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0105, | |
| "max_completion_length": 492.8875, | |
| "max_terminated_completion_length": 396.925, | |
| "mean_completion_length": 244.5609375, | |
| "mean_terminated_completion_length": 238.40921897888182, | |
| "min_completion_length": 135.7375, | |
| "min_terminated_completion_length": 135.7375, | |
| "num_tokens": 17626668.0, | |
| "reward": 0.6636539177969099, | |
| "reward_std": 0.1829118503286736, | |
| "rewards/format_reward/mean": 0.9890625, | |
| "rewards/format_reward/std": 0.037433474138379094, | |
| "rewards/qatch_metrics/mean": 0.6057578155770897, | |
| "rewards/qatch_metrics/std": 0.2140827751267352, | |
| "rewards/tag_count_reward/mean": 0.9970703125, | |
| "rewards/tag_count_reward/std": 0.010139618534594774, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.21213406873143828, | |
| "grad_norm": 0.43519045682082536, | |
| "kl": 0.008423614501953124, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0032, | |
| "max_completion_length": 407.475, | |
| "max_terminated_completion_length": 407.475, | |
| "mean_completion_length": 247.94609375, | |
| "mean_terminated_completion_length": 247.8765106201172, | |
| "min_completion_length": 141.3875, | |
| "min_terminated_completion_length": 141.3875, | |
| "num_tokens": 18362919.0, | |
| "reward": 0.669626927562058, | |
| "reward_std": 0.16123297743451986, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.015625, | |
| "rewards/qatch_metrics/mean": 0.6119117233271026, | |
| "rewards/qatch_metrics/std": 0.1890674727541409, | |
| "rewards/tag_count_reward/mean": 0.9978515625, | |
| "rewards/tag_count_reward/std": 0.00785774551331997, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.2206194314806958, | |
| "grad_norm": 0.41870212569889126, | |
| "kl": 0.008110809326171874, | |
| "learning_rate": 1e-06, | |
| "loss": 0.01, | |
| "max_completion_length": 412.9125, | |
| "max_terminated_completion_length": 412.9125, | |
| "mean_completion_length": 246.471875, | |
| "mean_terminated_completion_length": 246.471875, | |
| "min_completion_length": 141.5625, | |
| "min_terminated_completion_length": 141.5625, | |
| "num_tokens": 19118675.0, | |
| "reward": 0.6062422685325146, | |
| "reward_std": 0.16898088851594367, | |
| "rewards/format_reward/mean": 0.9921875, | |
| "rewards/format_reward/std": 0.029269563034176826, | |
| "rewards/qatch_metrics/mean": 0.5378815137548372, | |
| "rewards/qatch_metrics/std": 0.19735472834436224, | |
| "rewards/tag_count_reward/mean": 0.996484375, | |
| "rewards/tag_count_reward/std": 0.013358114659786225, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.22910479422995333, | |
| "grad_norm": 0.41574450928689877, | |
| "kl": 0.007587051391601563, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0043, | |
| "max_completion_length": 397.2625, | |
| "max_terminated_completion_length": 396.875, | |
| "mean_completion_length": 249.65703125, | |
| "mean_terminated_completion_length": 249.53135433197022, | |
| "min_completion_length": 141.95, | |
| "min_terminated_completion_length": 141.95, | |
| "num_tokens": 19851772.0, | |
| "reward": 0.6879865879192948, | |
| "reward_std": 0.18575836885720493, | |
| "rewards/format_reward/mean": 0.99453125, | |
| "rewards/format_reward/std": 0.01753891110420227, | |
| "rewards/qatch_metrics/mean": 0.6336606794851832, | |
| "rewards/qatch_metrics/std": 0.21785120766144245, | |
| "rewards/tag_count_reward/mean": 0.9984375, | |
| "rewards/tag_count_reward/std": 0.005754890758544207, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.23759015697921085, | |
| "grad_norm": 0.4136989371369671, | |
| "kl": 0.0091278076171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0106, | |
| "max_completion_length": 420.6625, | |
| "max_terminated_completion_length": 420.6625, | |
| "mean_completion_length": 257.865625, | |
| "mean_terminated_completion_length": 257.865625, | |
| "min_completion_length": 142.225, | |
| "min_terminated_completion_length": 142.225, | |
| "num_tokens": 20621088.0, | |
| "reward": 0.5586759101599454, | |
| "reward_std": 0.16987628990318626, | |
| "rewards/format_reward/mean": 0.99453125, | |
| "rewards/format_reward/std": 0.021875, | |
| "rewards/qatch_metrics/mean": 0.4815304725110764, | |
| "rewards/qatch_metrics/std": 0.1985160624521086, | |
| "rewards/tag_count_reward/mean": 0.9984375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.2460755197284684, | |
| "grad_norm": 0.45248602480265726, | |
| "kl": 0.009127426147460937, | |
| "learning_rate": 1e-06, | |
| "loss": 0.017, | |
| "max_completion_length": 396.2125, | |
| "max_terminated_completion_length": 396.2125, | |
| "mean_completion_length": 241.41015625, | |
| "mean_terminated_completion_length": 241.41015625, | |
| "min_completion_length": 136.1, | |
| "min_terminated_completion_length": 136.1, | |
| "num_tokens": 21342509.0, | |
| "reward": 0.6528394367545843, | |
| "reward_std": 0.17242726529948413, | |
| "rewards/format_reward/mean": 0.99375, | |
| "rewards/format_reward/std": 0.023019563034176828, | |
| "rewards/qatch_metrics/mean": 0.592448961827904, | |
| "rewards/qatch_metrics/std": 0.20204391193292395, | |
| "rewards/tag_count_reward/mean": 0.99765625, | |
| "rewards/tag_count_reward/std": 0.008730955049395561, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.2545608824777259, | |
| "grad_norm": 0.46925733454426966, | |
| "kl": 0.009392547607421874, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0173, | |
| "max_completion_length": 458.875, | |
| "max_terminated_completion_length": 361.9625, | |
| "mean_completion_length": 223.65234375, | |
| "mean_terminated_completion_length": 211.30524559020995, | |
| "min_completion_length": 126.3375, | |
| "min_terminated_completion_length": 126.3375, | |
| "num_tokens": 22040304.0, | |
| "reward": 0.6618358360603451, | |
| "reward_std": 0.1770564364385791, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.007394563034176826, | |
| "rewards/qatch_metrics/mean": 0.6024929718114436, | |
| "rewards/qatch_metrics/std": 0.20813672725344076, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.0034111407585442066, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.26304624522698344, | |
| "grad_norm": 0.4534313100158683, | |
| "kl": 0.011934280395507812, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0061, | |
| "max_completion_length": 351.4875, | |
| "max_terminated_completion_length": 351.4875, | |
| "mean_completion_length": 210.28828125, | |
| "mean_terminated_completion_length": 210.28828125, | |
| "min_completion_length": 129.6, | |
| "min_terminated_completion_length": 129.6, | |
| "num_tokens": 22749889.0, | |
| "reward": 0.6222010221332311, | |
| "reward_std": 0.1620815466158092, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.5557718772441149, | |
| "rewards/qatch_metrics/std": 0.19015400451608003, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.00390625, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.271531607976241, | |
| "grad_norm": 0.42288295756922156, | |
| "kl": 0.012346267700195312, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0135, | |
| "max_completion_length": 353.0875, | |
| "max_terminated_completion_length": 353.0875, | |
| "mean_completion_length": 209.4390625, | |
| "mean_terminated_completion_length": 209.4390625, | |
| "min_completion_length": 129.1875, | |
| "min_terminated_completion_length": 129.1875, | |
| "num_tokens": 23455907.0, | |
| "reward": 0.6466430865228177, | |
| "reward_std": 0.20642389697022737, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.5844697948734392, | |
| "rewards/qatch_metrics/std": 0.2425005478609819, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.2800169707254985, | |
| "grad_norm": 0.420736036444175, | |
| "kl": 0.010843467712402344, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0016, | |
| "max_completion_length": 387.525, | |
| "max_terminated_completion_length": 387.525, | |
| "mean_completion_length": 237.9015625, | |
| "mean_terminated_completion_length": 237.9015625, | |
| "min_completion_length": 137.7125, | |
| "min_terminated_completion_length": 137.7125, | |
| "num_tokens": 24194565.0, | |
| "reward": 0.6289940118789673, | |
| "reward_std": 0.1789963062328752, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.007394563034176826, | |
| "rewards/qatch_metrics/mean": 0.5638325612526387, | |
| "rewards/qatch_metrics/std": 0.2099793650675565, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.0018486407585442065, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.28850233347475607, | |
| "grad_norm": 0.473135664736807, | |
| "kl": 0.010087966918945312, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0132, | |
| "max_completion_length": 381.625, | |
| "max_terminated_completion_length": 381.625, | |
| "mean_completion_length": 244.29921875, | |
| "mean_terminated_completion_length": 244.29921875, | |
| "min_completion_length": 139.85, | |
| "min_terminated_completion_length": 139.85, | |
| "num_tokens": 24932596.0, | |
| "reward": 0.6755074242129921, | |
| "reward_std": 0.16164480685256422, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.6182440191041678, | |
| "rewards/qatch_metrics/std": 0.190170365315862, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.29698769622401355, | |
| "grad_norm": 0.3578649437623612, | |
| "kl": 0.011128616333007813, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0096, | |
| "max_completion_length": 380.475, | |
| "max_terminated_completion_length": 379.2, | |
| "mean_completion_length": 243.5375, | |
| "mean_terminated_completion_length": 243.3351043701172, | |
| "min_completion_length": 137.95, | |
| "min_terminated_completion_length": 137.95, | |
| "num_tokens": 25665524.0, | |
| "reward": 0.6930831637233495, | |
| "reward_std": 0.15477339186982136, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.6389213577824193, | |
| "rewards/qatch_metrics/std": 0.182086344579875, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.3054730589732711, | |
| "grad_norm": 0.4488215699487985, | |
| "kl": 0.012044906616210938, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0218, | |
| "max_completion_length": 594.85, | |
| "max_terminated_completion_length": 403.2625, | |
| "mean_completion_length": 259.12734375, | |
| "mean_terminated_completion_length": 246.79718780517578, | |
| "min_completion_length": 137.3125, | |
| "min_terminated_completion_length": 137.3125, | |
| "num_tokens": 26433959.0, | |
| "reward": 0.6390639709308743, | |
| "reward_std": 0.17005168935284018, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.5757255241274833, | |
| "rewards/qatch_metrics/std": 0.1995467180851847, | |
| "rewards/tag_count_reward/mean": 0.9986328125, | |
| "rewards/tag_count_reward/std": 0.00546875, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.31395842172252864, | |
| "grad_norm": 0.47487318016681307, | |
| "kl": 0.01200714111328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.002, | |
| "max_completion_length": 404.3125, | |
| "max_terminated_completion_length": 404.3125, | |
| "mean_completion_length": 251.146875, | |
| "mean_terminated_completion_length": 251.146875, | |
| "min_completion_length": 141.675, | |
| "min_terminated_completion_length": 141.675, | |
| "num_tokens": 27179475.0, | |
| "reward": 0.693291904591024, | |
| "reward_std": 0.13804341420182026, | |
| "rewards/format_reward/mean": 0.99375, | |
| "rewards/format_reward/std": 0.021039126068353654, | |
| "rewards/qatch_metrics/mean": 0.6399481799657224, | |
| "rewards/qatch_metrics/std": 0.16183511681156232, | |
| "rewards/tag_count_reward/mean": 0.99921875, | |
| "rewards/tag_count_reward/std": 0.002629890758544207, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.3224437844717862, | |
| "grad_norm": 0.2670377646086036, | |
| "kl": 0.01207733154296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0022, | |
| "max_completion_length": 411.95, | |
| "max_terminated_completion_length": 411.95, | |
| "mean_completion_length": 262.88046875, | |
| "mean_terminated_completion_length": 262.88046875, | |
| "min_completion_length": 149.55, | |
| "min_terminated_completion_length": 149.55, | |
| "num_tokens": 27923482.0, | |
| "reward": 0.7146790754050016, | |
| "reward_std": 0.13475597079377621, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6646500058704987, | |
| "rewards/qatch_metrics/std": 0.1582620675675571, | |
| "rewards/tag_count_reward/mean": 0.99921875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.3309291472210437, | |
| "grad_norm": 0.40099494926639095, | |
| "kl": 0.012256622314453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.005, | |
| "max_completion_length": 410.775, | |
| "max_terminated_completion_length": 410.775, | |
| "mean_completion_length": 269.840625, | |
| "mean_terminated_completion_length": 269.77177104949953, | |
| "min_completion_length": 151.3875, | |
| "min_terminated_completion_length": 151.3875, | |
| "num_tokens": 28672190.0, | |
| "reward": 0.7005478017032146, | |
| "reward_std": 0.19210629537701607, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.013644563034176826, | |
| "rewards/qatch_metrics/mean": 0.6482317747548223, | |
| "rewards/qatch_metrics/std": 0.22610313724726439, | |
| "rewards/tag_count_reward/mean": 0.998828125, | |
| "rewards/tag_count_reward/std": 0.0041923907585442064, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.3394145099703012, | |
| "grad_norm": 0.41505899544216474, | |
| "kl": 0.011727142333984374, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0086, | |
| "max_completion_length": 405.825, | |
| "max_terminated_completion_length": 405.825, | |
| "mean_completion_length": 261.77421875, | |
| "mean_terminated_completion_length": 261.77421875, | |
| "min_completion_length": 144.4125, | |
| "min_terminated_completion_length": 144.4125, | |
| "num_tokens": 29424845.0, | |
| "reward": 0.6669761884957552, | |
| "reward_std": 0.15929648398887367, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6084140667691826, | |
| "rewards/qatch_metrics/std": 0.1869291772134602, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.34789987271955874, | |
| "grad_norm": 0.42611181604234827, | |
| "kl": 0.012878036499023438, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0043, | |
| "max_completion_length": 382.55, | |
| "max_terminated_completion_length": 382.55, | |
| "mean_completion_length": 238.64140625, | |
| "mean_terminated_completion_length": 238.64140625, | |
| "min_completion_length": 138.8875, | |
| "min_terminated_completion_length": 138.8875, | |
| "num_tokens": 30161458.0, | |
| "reward": 0.640521077811718, | |
| "reward_std": 0.1832962979795411, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.5771755239344202, | |
| "rewards/qatch_metrics/std": 0.21567311461549252, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.3563852354688163, | |
| "grad_norm": 0.45939009285062743, | |
| "kl": 0.012913131713867187, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0052, | |
| "max_completion_length": 353.4875, | |
| "max_terminated_completion_length": 353.4875, | |
| "mean_completion_length": 217.97109375, | |
| "mean_terminated_completion_length": 217.97109375, | |
| "min_completion_length": 127.8125, | |
| "min_terminated_completion_length": 127.8125, | |
| "num_tokens": 30856301.0, | |
| "reward": 0.7069711482152343, | |
| "reward_std": 0.1675035478800055, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6554554741829633, | |
| "rewards/qatch_metrics/std": 0.19661965729173972, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.3648705982180738, | |
| "grad_norm": 0.3673904553883833, | |
| "kl": 0.015240478515625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0063, | |
| "max_completion_length": 343.725, | |
| "max_terminated_completion_length": 343.725, | |
| "mean_completion_length": 206.66875, | |
| "mean_terminated_completion_length": 206.66875, | |
| "min_completion_length": 121.8125, | |
| "min_terminated_completion_length": 121.8125, | |
| "num_tokens": 31518373.0, | |
| "reward": 0.6342932254076004, | |
| "reward_std": 0.13992415480315684, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.5698716203682125, | |
| "rewards/qatch_metrics/std": 0.16446736766956746, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.37335596096733137, | |
| "grad_norm": 0.4086966430203708, | |
| "kl": 0.016983795166015624, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0148, | |
| "max_completion_length": 331.5875, | |
| "max_terminated_completion_length": 331.5875, | |
| "mean_completion_length": 203.140625, | |
| "mean_terminated_completion_length": 203.140625, | |
| "min_completion_length": 126.05, | |
| "min_terminated_completion_length": 126.05, | |
| "num_tokens": 32201529.0, | |
| "reward": 0.7193620746955276, | |
| "reward_std": 0.13337895747390577, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6699526109252474, | |
| "rewards/qatch_metrics/std": 0.1564568679634249, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.3818413237165889, | |
| "grad_norm": 0.4166230820161933, | |
| "kl": 0.017269134521484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0097, | |
| "max_completion_length": 327.6875, | |
| "max_terminated_completion_length": 327.6875, | |
| "mean_completion_length": 201.41015625, | |
| "mean_terminated_completion_length": 201.41015625, | |
| "min_completion_length": 128.9625, | |
| "min_terminated_completion_length": 128.9625, | |
| "num_tokens": 32872998.0, | |
| "reward": 0.7264922646805644, | |
| "reward_std": 0.18219351805892076, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.010519563034176827, | |
| "rewards/qatch_metrics/mean": 0.6786627656780183, | |
| "rewards/qatch_metrics/std": 0.21386719078727764, | |
| "rewards/tag_count_reward/mean": 0.998828125, | |
| "rewards/tag_count_reward/std": 0.0041923907585442064, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.3903266864658464, | |
| "grad_norm": 0.4191868850505723, | |
| "kl": 0.017938995361328126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0129, | |
| "max_completion_length": 451.725, | |
| "max_terminated_completion_length": 353.225, | |
| "mean_completion_length": 218.825, | |
| "mean_terminated_completion_length": 212.57406253814696, | |
| "min_completion_length": 127.575, | |
| "min_terminated_completion_length": 127.575, | |
| "num_tokens": 33580118.0, | |
| "reward": 0.687440705485642, | |
| "reward_std": 0.1581920109805651, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6326393289258704, | |
| "rewards/qatch_metrics/std": 0.18519277906743808, | |
| "rewards/tag_count_reward/mean": 0.9986328125, | |
| "rewards/tag_count_reward/std": 0.00546875, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.39881204921510394, | |
| "grad_norm": 0.3915595613976664, | |
| "kl": 0.018732452392578126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0021, | |
| "max_completion_length": 341.5125, | |
| "max_terminated_completion_length": 341.5125, | |
| "mean_completion_length": 206.21640625, | |
| "mean_terminated_completion_length": 206.21640625, | |
| "min_completion_length": 126.6875, | |
| "min_terminated_completion_length": 126.6875, | |
| "num_tokens": 34262107.0, | |
| "reward": 0.6562905197963118, | |
| "reward_std": 0.17552886928315276, | |
| "rewards/format_reward/mean": 0.99453125, | |
| "rewards/format_reward/std": 0.01791412606835365, | |
| "rewards/qatch_metrics/mean": 0.5963367223972454, | |
| "rewards/qatch_metrics/std": 0.20513383029028773, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.00390625, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.4072974119643615, | |
| "grad_norm": 0.41780588099560895, | |
| "kl": 0.016981887817382812, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0024, | |
| "max_completion_length": 364.3875, | |
| "max_terminated_completion_length": 364.3875, | |
| "mean_completion_length": 226.12109375, | |
| "mean_terminated_completion_length": 226.12109375, | |
| "min_completion_length": 132.8, | |
| "min_terminated_completion_length": 132.8, | |
| "num_tokens": 34987478.0, | |
| "reward": 0.6238435052335263, | |
| "reward_std": 0.13907186635769903, | |
| "rewards/format_reward/mean": 0.99453125, | |
| "rewards/format_reward/std": 0.01791412606835365, | |
| "rewards/qatch_metrics/mean": 0.5582786456216127, | |
| "rewards/qatch_metrics/std": 0.16314728397410364, | |
| "rewards/tag_count_reward/mean": 0.9970703125, | |
| "rewards/tag_count_reward/std": 0.010519255418330431, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.415782774713619, | |
| "grad_norm": 0.3988040198156247, | |
| "kl": 0.016478347778320312, | |
| "learning_rate": 1e-06, | |
| "loss": -0.005, | |
| "max_completion_length": 388.9875, | |
| "max_terminated_completion_length": 388.9875, | |
| "mean_completion_length": 240.81328125, | |
| "mean_terminated_completion_length": 240.81328125, | |
| "min_completion_length": 142.9375, | |
| "min_terminated_completion_length": 142.9375, | |
| "num_tokens": 35747063.0, | |
| "reward": 0.7002341380342841, | |
| "reward_std": 0.1491245893528685, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6476674530887976, | |
| "rewards/qatch_metrics/std": 0.17473252283816693, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.00390625, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.42426813746287656, | |
| "grad_norm": 0.45958220885702605, | |
| "kl": 0.015369796752929687, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0005, | |
| "max_completion_length": 410.275, | |
| "max_terminated_completion_length": 410.275, | |
| "mean_completion_length": 244.340625, | |
| "mean_terminated_completion_length": 244.340625, | |
| "min_completion_length": 136.1, | |
| "min_terminated_completion_length": 136.1, | |
| "num_tokens": 36493451.0, | |
| "reward": 0.6826806226745248, | |
| "reward_std": 0.13986645113172927, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.015625, | |
| "rewards/qatch_metrics/mean": 0.627269014150761, | |
| "rewards/qatch_metrics/std": 0.1630945382259597, | |
| "rewards/tag_count_reward/mean": 0.9978515625, | |
| "rewards/tag_count_reward/std": 0.00859375, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.43275350021213405, | |
| "grad_norm": 0.468814692153863, | |
| "kl": 0.015785980224609374, | |
| "learning_rate": 1e-06, | |
| "loss": 0.008, | |
| "max_completion_length": 378.375, | |
| "max_terminated_completion_length": 378.375, | |
| "mean_completion_length": 225.4109375, | |
| "mean_terminated_completion_length": 225.4109375, | |
| "min_completion_length": 126.15, | |
| "min_terminated_completion_length": 126.15, | |
| "num_tokens": 37198329.0, | |
| "reward": 0.6527191938832402, | |
| "reward_std": 0.14599450018140486, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.5917445349274203, | |
| "rewards/qatch_metrics/std": 0.17125596017576755, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.4412388629613916, | |
| "grad_norm": 0.39946326627159484, | |
| "kl": 0.01713104248046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0226, | |
| "max_completion_length": 387.8, | |
| "max_terminated_completion_length": 387.8, | |
| "mean_completion_length": 241.371875, | |
| "mean_terminated_completion_length": 241.371875, | |
| "min_completion_length": 136.8625, | |
| "min_terminated_completion_length": 136.8625, | |
| "num_tokens": 37916069.0, | |
| "reward": 0.6606664573773742, | |
| "reward_std": 0.13361305266153067, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.6011976579669863, | |
| "rewards/qatch_metrics/std": 0.1558800196624361, | |
| "rewards/tag_count_reward/mean": 0.99921875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.44972422571064913, | |
| "grad_norm": 0.35617013231446487, | |
| "kl": 0.016695404052734376, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0215, | |
| "max_completion_length": 472.775, | |
| "max_terminated_completion_length": 375.7125, | |
| "mean_completion_length": 237.80234375, | |
| "mean_terminated_completion_length": 231.58619804382323, | |
| "min_completion_length": 135.75, | |
| "min_terminated_completion_length": 135.75, | |
| "num_tokens": 38657096.0, | |
| "reward": 0.7420335970818996, | |
| "reward_std": 0.15533771787304432, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6967283933190629, | |
| "rewards/qatch_metrics/std": 0.18226129743270575, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.45820958845990667, | |
| "grad_norm": 0.3817842719797035, | |
| "kl": 0.017615509033203126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0017, | |
| "max_completion_length": 346.1625, | |
| "max_terminated_completion_length": 346.1625, | |
| "mean_completion_length": 222.296875, | |
| "mean_terminated_completion_length": 222.296875, | |
| "min_completion_length": 132.825, | |
| "min_terminated_completion_length": 132.825, | |
| "num_tokens": 39369220.0, | |
| "reward": 0.727664845623076, | |
| "reward_std": 0.14582993834046648, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.679720576805994, | |
| "rewards/qatch_metrics/std": 0.17155658558476716, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.4666949512091642, | |
| "grad_norm": 0.39577481704264245, | |
| "kl": 0.020616912841796876, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0098, | |
| "max_completion_length": 364.15, | |
| "max_terminated_completion_length": 364.15, | |
| "mean_completion_length": 226.7078125, | |
| "mean_terminated_completion_length": 226.70078144073486, | |
| "min_completion_length": 134.875, | |
| "min_terminated_completion_length": 134.875, | |
| "num_tokens": 40068382.0, | |
| "reward": 0.7044001279398799, | |
| "reward_std": 0.14971714210696518, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6523273481987417, | |
| "rewards/qatch_metrics/std": 0.17615436944179236, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.4751803139584217, | |
| "grad_norm": 0.3046744949783554, | |
| "kl": 0.020497894287109374, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0084, | |
| "max_completion_length": 495.8125, | |
| "max_terminated_completion_length": 401.9625, | |
| "mean_completion_length": 266.5421875, | |
| "mean_terminated_completion_length": 260.47192726135256, | |
| "min_completion_length": 151.6125, | |
| "min_terminated_completion_length": 151.6125, | |
| "num_tokens": 40870708.0, | |
| "reward": 0.6285300446674228, | |
| "reward_std": 0.12592219962971285, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.5631947932997718, | |
| "rewards/qatch_metrics/std": 0.1479642984108068, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.48366567670767924, | |
| "grad_norm": 0.33874642526713644, | |
| "kl": 0.020623779296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0027, | |
| "max_completion_length": 390.9375, | |
| "max_terminated_completion_length": 390.9375, | |
| "mean_completion_length": 250.72734375, | |
| "mean_terminated_completion_length": 250.72734375, | |
| "min_completion_length": 146.025, | |
| "min_terminated_completion_length": 146.025, | |
| "num_tokens": 41589543.0, | |
| "reward": 0.736239911057055, | |
| "reward_std": 0.1530169295379892, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6897973996005021, | |
| "rewards/qatch_metrics/std": 0.17983283039648085, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.4921510394569368, | |
| "grad_norm": 0.4441970060118225, | |
| "kl": 0.0211395263671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0033, | |
| "max_completion_length": 349.9625, | |
| "max_terminated_completion_length": 349.9625, | |
| "mean_completion_length": 228.60703125, | |
| "mean_terminated_completion_length": 228.60703125, | |
| "min_completion_length": 138.4125, | |
| "min_terminated_completion_length": 138.4125, | |
| "num_tokens": 42293216.0, | |
| "reward": 0.7176182683557272, | |
| "reward_std": 0.15007240361701407, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6678895858502074, | |
| "rewards/qatch_metrics/std": 0.17653617557152756, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5006364022061943, | |
| "grad_norm": 0.5321322382404972, | |
| "kl": 0.02186431884765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0054, | |
| "max_completion_length": 334.275, | |
| "max_terminated_completion_length": 334.275, | |
| "mean_completion_length": 203.31171875, | |
| "mean_terminated_completion_length": 203.31171875, | |
| "min_completion_length": 127.475, | |
| "min_terminated_completion_length": 127.475, | |
| "num_tokens": 42965119.0, | |
| "reward": 0.721503934636712, | |
| "reward_std": 0.1488500821404159, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.6723575570387765, | |
| "rewards/qatch_metrics/std": 0.1751177478581667, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5091217649554518, | |
| "grad_norm": 0.4389967475905595, | |
| "kl": 0.020308685302734376, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0037, | |
| "max_completion_length": 340.1375, | |
| "max_terminated_completion_length": 340.1375, | |
| "mean_completion_length": 210.65078125, | |
| "mean_terminated_completion_length": 210.65078125, | |
| "min_completion_length": 129.0375, | |
| "min_terminated_completion_length": 129.0375, | |
| "num_tokens": 43631264.0, | |
| "reward": 0.7329939803108573, | |
| "reward_std": 0.15094346418045462, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.6858752616448328, | |
| "rewards/qatch_metrics/std": 0.17758055413141846, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.5176071277047094, | |
| "grad_norm": 0.337389770462881, | |
| "kl": 0.0194244384765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0102, | |
| "max_completion_length": 345.8, | |
| "max_terminated_completion_length": 344.3375, | |
| "mean_completion_length": 210.73984375, | |
| "mean_terminated_completion_length": 210.56921882629393, | |
| "min_completion_length": 129.6875, | |
| "min_terminated_completion_length": 129.6875, | |
| "num_tokens": 44321315.0, | |
| "reward": 0.7070499777793884, | |
| "reward_std": 0.13233158598304726, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.655536724627018, | |
| "rewards/qatch_metrics/std": 0.15573070517275484, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.5260924904539669, | |
| "grad_norm": 0.46407475307844764, | |
| "kl": 0.0195098876953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0236, | |
| "max_completion_length": 480.5625, | |
| "max_terminated_completion_length": 382.0, | |
| "mean_completion_length": 234.2328125, | |
| "mean_terminated_completion_length": 227.9933334350586, | |
| "min_completion_length": 131.5875, | |
| "min_terminated_completion_length": 131.5875, | |
| "num_tokens": 45038557.0, | |
| "reward": 0.7017397930845618, | |
| "reward_std": 0.1679267267445539, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.015625, | |
| "rewards/qatch_metrics/mean": 0.6496341172983193, | |
| "rewards/qatch_metrics/std": 0.1964063866399556, | |
| "rewards/tag_count_reward/mean": 0.998828125, | |
| "rewards/tag_count_reward/std": 0.0046875, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5345778532032245, | |
| "grad_norm": 0.4351328007064976, | |
| "kl": 0.021776580810546876, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0026, | |
| "max_completion_length": 356.4, | |
| "max_terminated_completion_length": 356.4, | |
| "mean_completion_length": 222.5203125, | |
| "mean_terminated_completion_length": 222.5203125, | |
| "min_completion_length": 134.3, | |
| "min_terminated_completion_length": 134.3, | |
| "num_tokens": 45775623.0, | |
| "reward": 0.6846541265025735, | |
| "reward_std": 0.13933788809226827, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.629338023607852, | |
| "rewards/qatch_metrics/std": 0.1633110191760352, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.00390625, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.543063215952482, | |
| "grad_norm": 0.5065267200070752, | |
| "kl": 0.017189788818359374, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0011, | |
| "max_completion_length": 370.3625, | |
| "max_terminated_completion_length": 370.3625, | |
| "mean_completion_length": 229.86484375, | |
| "mean_terminated_completion_length": 229.86484375, | |
| "min_completion_length": 132.675, | |
| "min_terminated_completion_length": 132.675, | |
| "num_tokens": 46477946.0, | |
| "reward": 0.7529282798990607, | |
| "reward_std": 0.17313570996630007, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.7095112036258797, | |
| "rewards/qatch_metrics/std": 0.20355941310699563, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5515485787017395, | |
| "grad_norm": 0.3908298626994803, | |
| "kl": 0.017131805419921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0084, | |
| "max_completion_length": 357.0375, | |
| "max_terminated_completion_length": 357.0375, | |
| "mean_completion_length": 229.78203125, | |
| "mean_terminated_completion_length": 229.78203125, | |
| "min_completion_length": 132.3125, | |
| "min_terminated_completion_length": 132.3125, | |
| "num_tokens": 47196611.0, | |
| "reward": 0.726746228709817, | |
| "reward_std": 0.1392112417612225, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6788351578405127, | |
| "rewards/qatch_metrics/std": 0.16352780560264363, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.560033941450997, | |
| "grad_norm": 0.3571881664231259, | |
| "kl": 0.01622467041015625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0044, | |
| "max_completion_length": 356.8375, | |
| "max_terminated_completion_length": 356.8375, | |
| "mean_completion_length": 230.815625, | |
| "mean_terminated_completion_length": 230.815625, | |
| "min_completion_length": 141.1625, | |
| "min_terminated_completion_length": 141.1625, | |
| "num_tokens": 47932855.0, | |
| "reward": 0.7320297859609127, | |
| "reward_std": 0.15043658841605065, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.684832819364965, | |
| "rewards/qatch_metrics/std": 0.17661658204742708, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.5685193042002545, | |
| "grad_norm": 0.42552437137219973, | |
| "kl": 0.01709747314453125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0009, | |
| "max_completion_length": 359.0, | |
| "max_terminated_completion_length": 358.8625, | |
| "mean_completion_length": 230.64296875, | |
| "mean_terminated_completion_length": 230.5740104675293, | |
| "min_completion_length": 134.65, | |
| "min_terminated_completion_length": 134.65, | |
| "num_tokens": 48657486.0, | |
| "reward": 0.6706796623766422, | |
| "reward_std": 0.15375317606376485, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6127710969405598, | |
| "rewards/qatch_metrics/std": 0.18041889240848832, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5770046669495121, | |
| "grad_norm": 0.4166771095875191, | |
| "kl": 0.017682647705078124, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0044, | |
| "max_completion_length": 365.1125, | |
| "max_terminated_completion_length": 365.1125, | |
| "mean_completion_length": 236.26796875, | |
| "mean_terminated_completion_length": 236.26796875, | |
| "min_completion_length": 135.75, | |
| "min_terminated_completion_length": 135.75, | |
| "num_tokens": 49369045.0, | |
| "reward": 0.7130919683724641, | |
| "reward_std": 0.14616872374899686, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.015625, | |
| "rewards/qatch_metrics/mean": 0.6629781271796673, | |
| "rewards/qatch_metrics/std": 0.1712187082041055, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.00390625, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5854900296987696, | |
| "grad_norm": 0.3930807031443483, | |
| "kl": 0.017128753662109374, | |
| "learning_rate": 1e-06, | |
| "loss": -0.001, | |
| "max_completion_length": 390.65, | |
| "max_terminated_completion_length": 390.65, | |
| "mean_completion_length": 255.5375, | |
| "mean_terminated_completion_length": 255.5375, | |
| "min_completion_length": 143.0125, | |
| "min_terminated_completion_length": 143.0125, | |
| "num_tokens": 50123765.0, | |
| "reward": 0.7640590518712997, | |
| "reward_std": 0.1435461052635219, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.7226177143631503, | |
| "rewards/qatch_metrics/std": 0.16846177625120617, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.5939753924480271, | |
| "grad_norm": 0.3580759515721783, | |
| "kl": 0.0180328369140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0118, | |
| "max_completion_length": 400.5125, | |
| "max_terminated_completion_length": 400.5125, | |
| "mean_completion_length": 266.19765625, | |
| "mean_terminated_completion_length": 266.19765625, | |
| "min_completion_length": 150.525, | |
| "min_terminated_completion_length": 150.525, | |
| "num_tokens": 50907522.0, | |
| "reward": 0.6403236113488674, | |
| "reward_std": 0.15107874246314168, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.007394563034176826, | |
| "rewards/qatch_metrics/mean": 0.5771500020520761, | |
| "rewards/qatch_metrics/std": 0.17677784110419453, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.6024607551972847, | |
| "grad_norm": 0.3745573524520842, | |
| "kl": 0.01865386962890625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0043, | |
| "max_completion_length": 516.8375, | |
| "max_terminated_completion_length": 419.15, | |
| "mean_completion_length": 291.19296875, | |
| "mean_terminated_completion_length": 284.97937507629393, | |
| "min_completion_length": 159.025, | |
| "min_terminated_completion_length": 159.025, | |
| "num_tokens": 51693625.0, | |
| "reward": 0.7342174492776394, | |
| "reward_std": 0.14967644196003677, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.010519563034176827, | |
| "rewards/qatch_metrics/mean": 0.6877856820356101, | |
| "rewards/qatch_metrics/std": 0.1758563325740397, | |
| "rewards/tag_count_reward/mean": 0.9982421875, | |
| "rewards/tag_count_reward/std": 0.005545922368764877, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.6109461179465422, | |
| "grad_norm": 0.43129069835886913, | |
| "kl": 0.016158294677734376, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0006, | |
| "max_completion_length": 482.6875, | |
| "max_terminated_completion_length": 385.0375, | |
| "mean_completion_length": 264.7234375, | |
| "mean_terminated_completion_length": 258.5384376525879, | |
| "min_completion_length": 146.6, | |
| "min_terminated_completion_length": 146.6, | |
| "num_tokens": 52463943.0, | |
| "reward": 0.7258525386452674, | |
| "reward_std": 0.13552326717763208, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.677622918318957, | |
| "rewards/qatch_metrics/std": 0.1593391936738044, | |
| "rewards/tag_count_reward/mean": 0.9990234375, | |
| "rewards/tag_count_reward/std": 0.00390625, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6194314806957998, | |
| "grad_norm": 0.37639413936933497, | |
| "kl": 0.019573974609375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0028, | |
| "max_completion_length": 425.575, | |
| "max_terminated_completion_length": 425.575, | |
| "mean_completion_length": 271.60078125, | |
| "mean_terminated_completion_length": 271.60078125, | |
| "min_completion_length": 152.7125, | |
| "min_terminated_completion_length": 152.7125, | |
| "num_tokens": 53210824.0, | |
| "reward": 0.734311668202281, | |
| "reward_std": 0.12053130697458983, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6877356803161092, | |
| "rewards/qatch_metrics/std": 0.14115702654235066, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.6279168434450573, | |
| "grad_norm": 0.566849518686874, | |
| "kl": 0.0207061767578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0082, | |
| "max_completion_length": 490.825, | |
| "max_terminated_completion_length": 386.3, | |
| "mean_completion_length": 260.1546875, | |
| "mean_terminated_completion_length": 253.42911491394042, | |
| "min_completion_length": 143.5625, | |
| "min_terminated_completion_length": 143.5625, | |
| "num_tokens": 53973470.0, | |
| "reward": 0.6677503928542137, | |
| "reward_std": 0.14497404757421464, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6094627652317286, | |
| "rewards/qatch_metrics/std": 0.16993254381231965, | |
| "rewards/tag_count_reward/mean": 0.998828125, | |
| "rewards/tag_count_reward/std": 0.0046875, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6364022061943148, | |
| "grad_norm": 0.49048084012884774, | |
| "kl": 0.017724609375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0067, | |
| "max_completion_length": 391.1375, | |
| "max_terminated_completion_length": 391.1375, | |
| "mean_completion_length": 258.92265625, | |
| "mean_terminated_completion_length": 258.92265625, | |
| "min_completion_length": 143.1, | |
| "min_terminated_completion_length": 143.1, | |
| "num_tokens": 54724571.0, | |
| "reward": 0.724555074237287, | |
| "reward_std": 0.14221483873843682, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6761424570227973, | |
| "rewards/qatch_metrics/std": 0.16734275622293354, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6448875689435724, | |
| "grad_norm": 0.43138015818615777, | |
| "kl": 0.018140411376953124, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0054, | |
| "max_completion_length": 371.8, | |
| "max_terminated_completion_length": 371.8, | |
| "mean_completion_length": 242.0015625, | |
| "mean_terminated_completion_length": 242.0015625, | |
| "min_completion_length": 136.5125, | |
| "min_terminated_completion_length": 136.5125, | |
| "num_tokens": 55445197.0, | |
| "reward": 0.7430899446830154, | |
| "reward_std": 0.12899101262446494, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6979481811635196, | |
| "rewards/qatch_metrics/std": 0.15142290932126343, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6533729316928298, | |
| "grad_norm": 0.3773247374528365, | |
| "kl": 0.01764984130859375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0049, | |
| "max_completion_length": 392.6, | |
| "max_terminated_completion_length": 392.6, | |
| "mean_completion_length": 252.34921875, | |
| "mean_terminated_completion_length": 252.34921875, | |
| "min_completion_length": 140.9125, | |
| "min_terminated_completion_length": 140.9125, | |
| "num_tokens": 56193916.0, | |
| "reward": 0.7242604551836849, | |
| "reward_std": 0.14704363771597856, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6757958389462146, | |
| "rewards/qatch_metrics/std": 0.172355884683202, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6618582944420874, | |
| "grad_norm": 0.3633389159903467, | |
| "kl": 0.016083908081054688, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0106, | |
| "max_completion_length": 374.2, | |
| "max_terminated_completion_length": 374.2, | |
| "mean_completion_length": 242.759375, | |
| "mean_terminated_completion_length": 242.759375, | |
| "min_completion_length": 136.4375, | |
| "min_terminated_completion_length": 136.4375, | |
| "num_tokens": 56926552.0, | |
| "reward": 0.7279293669387699, | |
| "reward_std": 0.1125695963304679, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6800317747285589, | |
| "rewards/qatch_metrics/std": 0.132177343883086, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6703436571913449, | |
| "grad_norm": 0.3321459793740948, | |
| "kl": 0.021373748779296875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0057, | |
| "max_completion_length": 383.7375, | |
| "max_terminated_completion_length": 383.7375, | |
| "mean_completion_length": 255.8953125, | |
| "mean_terminated_completion_length": 255.8953125, | |
| "min_completion_length": 140.3125, | |
| "min_terminated_completion_length": 140.3125, | |
| "num_tokens": 57651810.0, | |
| "reward": 0.7192921109497548, | |
| "reward_std": 0.1022657451685518, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6699737045913935, | |
| "rewards/qatch_metrics/std": 0.11943948618136346, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.6788290199406024, | |
| "grad_norm": 0.3287054941040406, | |
| "kl": 0.018202972412109376, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0059, | |
| "max_completion_length": 490.95, | |
| "max_terminated_completion_length": 393.125, | |
| "mean_completion_length": 273.2359375, | |
| "mean_terminated_completion_length": 267.0583854675293, | |
| "min_completion_length": 151.3125, | |
| "min_terminated_completion_length": 151.3125, | |
| "num_tokens": 58402320.0, | |
| "reward": 0.7099972570315003, | |
| "reward_std": 0.0980605365242809, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6590270875021815, | |
| "rewards/qatch_metrics/std": 0.11513400385156274, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.68731438268986, | |
| "grad_norm": 0.33108410847958825, | |
| "kl": 0.0172943115234375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.009, | |
| "max_completion_length": 452.5625, | |
| "max_terminated_completion_length": 452.5625, | |
| "mean_completion_length": 260.38828125, | |
| "mean_terminated_completion_length": 260.38828125, | |
| "min_completion_length": 143.2625, | |
| "min_terminated_completion_length": 143.2625, | |
| "num_tokens": 59126545.0, | |
| "reward": 0.757123382948339, | |
| "reward_std": 0.13950567932333796, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.7146419312804937, | |
| "rewards/qatch_metrics/std": 0.163762611662969, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.6957997454391175, | |
| "grad_norm": 0.38779399939512355, | |
| "kl": 0.019302749633789064, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0049, | |
| "max_completion_length": 386.15, | |
| "max_terminated_completion_length": 386.15, | |
| "mean_completion_length": 248.64765625, | |
| "mean_terminated_completion_length": 248.64765625, | |
| "min_completion_length": 135.0375, | |
| "min_terminated_completion_length": 135.0375, | |
| "num_tokens": 59854014.0, | |
| "reward": 0.6697430610656738, | |
| "reward_std": 0.13663577265979257, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6116921915265265, | |
| "rewards/qatch_metrics/std": 0.1598288557077467, | |
| "rewards/tag_count_reward/mean": 0.99921875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7042851081883751, | |
| "grad_norm": 0.38534843921485207, | |
| "kl": 0.020642852783203124, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0011, | |
| "max_completion_length": 379.15, | |
| "max_terminated_completion_length": 379.15, | |
| "mean_completion_length": 244.28984375, | |
| "mean_terminated_completion_length": 244.28984375, | |
| "min_completion_length": 140.1125, | |
| "min_terminated_completion_length": 140.1125, | |
| "num_tokens": 60580769.0, | |
| "reward": 0.7280639600008726, | |
| "reward_std": 0.16118349129828857, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6802820329554379, | |
| "rewards/qatch_metrics/std": 0.189628272918344, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7127704709376326, | |
| "grad_norm": 0.41856360269376014, | |
| "kl": 0.019040298461914063, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0021, | |
| "max_completion_length": 356.1, | |
| "max_terminated_completion_length": 356.1, | |
| "mean_completion_length": 219.8203125, | |
| "mean_terminated_completion_length": 219.8203125, | |
| "min_completion_length": 125.65, | |
| "min_terminated_completion_length": 125.65, | |
| "num_tokens": 61250203.0, | |
| "reward": 0.7225630840286612, | |
| "reward_std": 0.12974205100908875, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6738104194388143, | |
| "rewards/qatch_metrics/std": 0.15181050510145724, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7212558336868902, | |
| "grad_norm": 0.3937056039529797, | |
| "kl": 0.019232559204101562, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0043, | |
| "max_completion_length": 379.0375, | |
| "max_terminated_completion_length": 379.0375, | |
| "mean_completion_length": 233.62421875, | |
| "mean_terminated_completion_length": 233.62421875, | |
| "min_completion_length": 134.5375, | |
| "min_terminated_completion_length": 134.5375, | |
| "num_tokens": 61966234.0, | |
| "reward": 0.70792027246207, | |
| "reward_std": 0.12816281174309552, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6564916724339127, | |
| "rewards/qatch_metrics/std": 0.1508849366568029, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7297411964361477, | |
| "grad_norm": 0.5037887591736032, | |
| "kl": 0.020272445678710938, | |
| "learning_rate": 1e-06, | |
| "loss": 0.012, | |
| "max_completion_length": 375.15, | |
| "max_terminated_completion_length": 375.15, | |
| "mean_completion_length": 229.83125, | |
| "mean_terminated_completion_length": 229.83125, | |
| "min_completion_length": 130.7125, | |
| "min_terminated_completion_length": 130.7125, | |
| "num_tokens": 62664466.0, | |
| "reward": 0.7342685505747795, | |
| "reward_std": 0.17085397026967258, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6874666733667254, | |
| "rewards/qatch_metrics/std": 0.20063702878542244, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7382265591854051, | |
| "grad_norm": 0.3633604126270278, | |
| "kl": 0.022269439697265626, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "max_completion_length": 360.2, | |
| "max_terminated_completion_length": 360.2, | |
| "mean_completion_length": 222.63359375, | |
| "mean_terminated_completion_length": 222.63359375, | |
| "min_completion_length": 132.325, | |
| "min_terminated_completion_length": 132.325, | |
| "num_tokens": 63358061.0, | |
| "reward": 0.6870032804086804, | |
| "reward_std": 0.13428880202773144, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.6321362034417689, | |
| "rewards/qatch_metrics/std": 0.156926771113649, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.7467119219346627, | |
| "grad_norm": 0.4800236287326641, | |
| "kl": 0.023519515991210938, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0003, | |
| "max_completion_length": 443.9375, | |
| "max_terminated_completion_length": 347.7, | |
| "mean_completion_length": 214.21796875, | |
| "mean_terminated_completion_length": 208.0436981201172, | |
| "min_completion_length": 126.7625, | |
| "min_terminated_completion_length": 126.7625, | |
| "num_tokens": 64047268.0, | |
| "reward": 0.711919279024005, | |
| "reward_std": 0.12310527702211402, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6611848997883498, | |
| "rewards/qatch_metrics/std": 0.1447742166754324, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7551972846839202, | |
| "grad_norm": 0.4201025924727673, | |
| "kl": 0.02261962890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0082, | |
| "max_completion_length": 357.1125, | |
| "max_terminated_completion_length": 357.1125, | |
| "mean_completion_length": 218.71328125, | |
| "mean_terminated_completion_length": 218.71328125, | |
| "min_completion_length": 128.2375, | |
| "min_terminated_completion_length": 128.2375, | |
| "num_tokens": 64734389.0, | |
| "reward": 0.7627341808751226, | |
| "reward_std": 0.1422961473122996, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.7209901120513678, | |
| "rewards/qatch_metrics/std": 0.16715139605657897, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7636826474331778, | |
| "grad_norm": 0.4006147867496434, | |
| "kl": 0.024700164794921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0093, | |
| "max_completion_length": 386.5375, | |
| "max_terminated_completion_length": 386.5375, | |
| "mean_completion_length": 241.91875, | |
| "mean_terminated_completion_length": 241.91875, | |
| "min_completion_length": 134.525, | |
| "min_terminated_completion_length": 134.525, | |
| "num_tokens": 65441437.0, | |
| "reward": 0.7677853129804134, | |
| "reward_std": 0.12218148316605948, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.7271049507893622, | |
| "rewards/qatch_metrics/std": 0.14334085483569653, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00234375, | |
| "epoch": 0.7721680101824353, | |
| "grad_norm": 0.3658163947410702, | |
| "kl": 0.02187652587890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0227, | |
| "max_completion_length": 688.375, | |
| "max_terminated_completion_length": 397.6625, | |
| "mean_completion_length": 284.06015625, | |
| "mean_terminated_completion_length": 265.58802165985105, | |
| "min_completion_length": 148.8875, | |
| "min_terminated_completion_length": 148.8875, | |
| "num_tokens": 66251850.0, | |
| "reward": 0.7470061536878347, | |
| "reward_std": 0.12179219683312112, | |
| "rewards/format_reward/mean": 0.99375, | |
| "rewards/format_reward/std": 0.025, | |
| "rewards/qatch_metrics/mean": 0.7032333370298147, | |
| "rewards/qatch_metrics/std": 0.14236632530810311, | |
| "rewards/tag_count_reward/mean": 0.99765625, | |
| "rewards/tag_count_reward/std": 0.009375, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7806533729316928, | |
| "grad_norm": 0.34917157118165654, | |
| "kl": 0.0217071533203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0126, | |
| "max_completion_length": 391.725, | |
| "max_terminated_completion_length": 391.725, | |
| "mean_completion_length": 248.1203125, | |
| "mean_terminated_completion_length": 248.1203125, | |
| "min_completion_length": 140.675, | |
| "min_terminated_completion_length": 140.675, | |
| "num_tokens": 66984580.0, | |
| "reward": 0.7774452485144139, | |
| "reward_std": 0.12198666574477102, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.7385500054806471, | |
| "rewards/qatch_metrics/std": 0.14199718742675033, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7891387356809504, | |
| "grad_norm": 0.3726744702698002, | |
| "kl": 0.02341766357421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.004, | |
| "max_completion_length": 395.0, | |
| "max_terminated_completion_length": 395.0, | |
| "mean_completion_length": 253.27421875, | |
| "mean_terminated_completion_length": 253.27421875, | |
| "min_completion_length": 140.625, | |
| "min_terminated_completion_length": 140.625, | |
| "num_tokens": 67738499.0, | |
| "reward": 0.794437967427075, | |
| "reward_std": 0.12837592368014156, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.015625, | |
| "rewards/qatch_metrics/mean": 0.7587252637371421, | |
| "rewards/qatch_metrics/std": 0.14986626643221826, | |
| "rewards/tag_count_reward/mean": 0.9982421875, | |
| "rewards/tag_count_reward/std": 0.00703125, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.7976240984302079, | |
| "grad_norm": 0.389946408646022, | |
| "kl": 0.02283782958984375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.008, | |
| "max_completion_length": 387.8125, | |
| "max_terminated_completion_length": 387.8125, | |
| "mean_completion_length": 245.39140625, | |
| "mean_terminated_completion_length": 245.39140625, | |
| "min_completion_length": 143.0, | |
| "min_terminated_completion_length": 143.0, | |
| "num_tokens": 68509480.0, | |
| "reward": 0.7041194688528776, | |
| "reward_std": 0.11882331115193664, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.652284383343067, | |
| "rewards/qatch_metrics/std": 0.13897184773813934, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8061094611794655, | |
| "grad_norm": 0.39373349119302825, | |
| "kl": 0.023323822021484374, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0014, | |
| "max_completion_length": 391.95, | |
| "max_terminated_completion_length": 391.95, | |
| "mean_completion_length": 240.12890625, | |
| "mean_terminated_completion_length": 240.12890625, | |
| "min_completion_length": 138.4375, | |
| "min_terminated_completion_length": 138.4375, | |
| "num_tokens": 69235469.0, | |
| "reward": 0.6929324485361577, | |
| "reward_std": 0.11601882965769619, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6389278700109571, | |
| "rewards/qatch_metrics/std": 0.13619600916281344, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.814594823928723, | |
| "grad_norm": 0.31566026316392565, | |
| "kl": 0.023133087158203124, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0125, | |
| "max_completion_length": 389.2, | |
| "max_terminated_completion_length": 389.2, | |
| "mean_completion_length": 261.484375, | |
| "mean_terminated_completion_length": 261.484375, | |
| "min_completion_length": 149.7, | |
| "min_terminated_completion_length": 149.7, | |
| "num_tokens": 70006537.0, | |
| "reward": 0.7002820556983351, | |
| "reward_std": 0.10403733076527714, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6474825547542423, | |
| "rewards/qatch_metrics/std": 0.12227221787907183, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.8230801866779804, | |
| "grad_norm": 0.3916282140760076, | |
| "kl": 0.022870635986328124, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0015, | |
| "max_completion_length": 513.35, | |
| "max_terminated_completion_length": 416.4, | |
| "mean_completion_length": 287.2375, | |
| "mean_terminated_completion_length": 281.0057813644409, | |
| "min_completion_length": 157.5125, | |
| "min_terminated_completion_length": 157.5125, | |
| "num_tokens": 70801449.0, | |
| "reward": 0.7234059922397137, | |
| "reward_std": 0.144961252133362, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.6749054736457765, | |
| "rewards/qatch_metrics/std": 0.1701597002800554, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.831565549427238, | |
| "grad_norm": 0.4048625985955833, | |
| "kl": 0.0215118408203125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0079, | |
| "max_completion_length": 397.9125, | |
| "max_terminated_completion_length": 397.9125, | |
| "mean_completion_length": 274.13828125, | |
| "mean_terminated_completion_length": 274.13828125, | |
| "min_completion_length": 158.65, | |
| "min_terminated_completion_length": 158.65, | |
| "num_tokens": 71576394.0, | |
| "reward": 0.7753009174019099, | |
| "reward_std": 0.13990603366255527, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.7358664112325641, | |
| "rewards/qatch_metrics/std": 0.16444662949361372, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8400509121764955, | |
| "grad_norm": 0.45480327809125837, | |
| "kl": 0.021681976318359376, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0077, | |
| "max_completion_length": 402.625, | |
| "max_terminated_completion_length": 402.625, | |
| "mean_completion_length": 279.434375, | |
| "mean_terminated_completion_length": 279.434375, | |
| "min_completion_length": 163.8625, | |
| "min_terminated_completion_length": 163.8625, | |
| "num_tokens": 72365974.0, | |
| "reward": 0.7349002780392766, | |
| "reward_std": 0.14578591349127237, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6882213579956442, | |
| "rewards/qatch_metrics/std": 0.17151983033400028, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8485362749257531, | |
| "grad_norm": 0.455563317859272, | |
| "kl": 0.02360382080078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0068, | |
| "max_completion_length": 390.85, | |
| "max_terminated_completion_length": 390.85, | |
| "mean_completion_length": 272.459375, | |
| "mean_terminated_completion_length": 272.459375, | |
| "min_completion_length": 161.525, | |
| "min_terminated_completion_length": 161.525, | |
| "num_tokens": 73127810.0, | |
| "reward": 0.6920099869370461, | |
| "reward_std": 0.1463924885727465, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6378770900890232, | |
| "rewards/qatch_metrics/std": 0.1722982805222273, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 500 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8570216376750106, | |
| "grad_norm": 0.3592879756239025, | |
| "kl": 0.023213958740234374, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0019, | |
| "max_completion_length": 367.0625, | |
| "max_terminated_completion_length": 367.0625, | |
| "mean_completion_length": 250.75, | |
| "mean_terminated_completion_length": 250.75, | |
| "min_completion_length": 142.4375, | |
| "min_terminated_completion_length": 142.4375, | |
| "num_tokens": 73879090.0, | |
| "reward": 0.679243203997612, | |
| "reward_std": 0.10322665490675717, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.622742450950318, | |
| "rewards/qatch_metrics/std": 0.12102952525019646, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 505 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8655070004242681, | |
| "grad_norm": 0.49006180502072655, | |
| "kl": 0.02385711669921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0198, | |
| "max_completion_length": 333.4375, | |
| "max_terminated_completion_length": 333.4375, | |
| "mean_completion_length": 215.06875, | |
| "mean_terminated_completion_length": 215.06875, | |
| "min_completion_length": 127.7375, | |
| "min_terminated_completion_length": 127.7375, | |
| "num_tokens": 74560250.0, | |
| "reward": 0.767749921604991, | |
| "reward_std": 0.11815319370944052, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.7268565176054835, | |
| "rewards/qatch_metrics/std": 0.13863611537963152, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 510 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8739923631735257, | |
| "grad_norm": 0.7469863828980855, | |
| "kl": 0.030249786376953126, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0018, | |
| "max_completion_length": 330.825, | |
| "max_terminated_completion_length": 330.825, | |
| "mean_completion_length": 204.48125, | |
| "mean_terminated_completion_length": 204.48125, | |
| "min_completion_length": 127.4375, | |
| "min_terminated_completion_length": 127.4375, | |
| "num_tokens": 75264162.0, | |
| "reward": 0.7245119333267211, | |
| "reward_std": 0.12906064465278178, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6759882854319585, | |
| "rewards/qatch_metrics/std": 0.15146840937541128, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8824777259227832, | |
| "grad_norm": 0.3482788479104542, | |
| "kl": 0.0268707275390625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0122, | |
| "max_completion_length": 328.775, | |
| "max_terminated_completion_length": 328.775, | |
| "mean_completion_length": 209.07734375, | |
| "mean_terminated_completion_length": 209.07734375, | |
| "min_completion_length": 130.0375, | |
| "min_terminated_completion_length": 130.0375, | |
| "num_tokens": 75977909.0, | |
| "reward": 0.740051432326436, | |
| "reward_std": 0.1375454404973425, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.694270056951791, | |
| "rewards/qatch_metrics/std": 0.16157473798375577, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 520 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.8909630886720408, | |
| "grad_norm": 0.3343696053079637, | |
| "kl": 0.0249237060546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0131, | |
| "max_completion_length": 458.1125, | |
| "max_terminated_completion_length": 361.0125, | |
| "mean_completion_length": 233.49296875, | |
| "mean_terminated_completion_length": 227.33677101135254, | |
| "min_completion_length": 135.2375, | |
| "min_terminated_completion_length": 135.2375, | |
| "num_tokens": 76707116.0, | |
| "reward": 0.7603778231889009, | |
| "reward_std": 0.13856751608545892, | |
| "rewards/format_reward/mean": 0.9953125, | |
| "rewards/format_reward/std": 0.01875, | |
| "rewards/qatch_metrics/mean": 0.7187234393088147, | |
| "rewards/qatch_metrics/std": 0.16234044475131668, | |
| "rewards/tag_count_reward/mean": 0.9986328125, | |
| "rewards/tag_count_reward/std": 0.00546875, | |
| "step": 525 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.8994484514212983, | |
| "grad_norm": 0.4386545160583766, | |
| "kl": 0.0244140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0114, | |
| "max_completion_length": 358.5, | |
| "max_terminated_completion_length": 358.5, | |
| "mean_completion_length": 231.14140625, | |
| "mean_terminated_completion_length": 231.14140625, | |
| "min_completion_length": 132.1875, | |
| "min_terminated_completion_length": 132.1875, | |
| "num_tokens": 77437409.0, | |
| "reward": 0.7168525701388717, | |
| "reward_std": 0.11293118421453982, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6671036507934331, | |
| "rewards/qatch_metrics/std": 0.1319870605453616, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 530 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9079338141705557, | |
| "grad_norm": 0.4352590305875257, | |
| "kl": 0.023015975952148438, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0007, | |
| "max_completion_length": 371.703125, | |
| "max_terminated_completion_length": 371.703125, | |
| "mean_completion_length": 235.138671875, | |
| "mean_terminated_completion_length": 235.138671875, | |
| "min_completion_length": 137.53125, | |
| "min_terminated_completion_length": 137.53125, | |
| "num_tokens": 584510.0, | |
| "reward": 0.706055467016995, | |
| "reward_std": 0.16225836067314958, | |
| "rewards/format_reward/mean": 0.9990234375, | |
| "rewards/format_reward/std": 0.00390625, | |
| "rewards/qatch_metrics/mean": 0.654326502524782, | |
| "rewards/qatch_metrics/std": 0.19047756004147232, | |
| "rewards/tag_count_reward/mean": 0.99951171875, | |
| "rewards/tag_count_reward/std": 0.001953125, | |
| "step": 535 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.9164191769198133, | |
| "grad_norm": 0.41139413863979357, | |
| "kl": 0.020354461669921876, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0129, | |
| "max_completion_length": 461.45, | |
| "max_terminated_completion_length": 365.5625, | |
| "mean_completion_length": 232.33203125, | |
| "mean_terminated_completion_length": 226.21109409332274, | |
| "min_completion_length": 134.2, | |
| "min_terminated_completion_length": 134.2, | |
| "num_tokens": 1305127.0, | |
| "reward": 0.7247285695746541, | |
| "reward_std": 0.1155310778063722, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6763695365632885, | |
| "rewards/qatch_metrics/std": 0.13525987763423472, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 540 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9249045396690708, | |
| "grad_norm": 0.44480492417723494, | |
| "kl": 0.020232391357421876, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0189, | |
| "max_completion_length": 380.375, | |
| "max_terminated_completion_length": 380.375, | |
| "mean_completion_length": 241.66875, | |
| "mean_terminated_completion_length": 241.66875, | |
| "min_completion_length": 138.7375, | |
| "min_terminated_completion_length": 138.7375, | |
| "num_tokens": 2023119.0, | |
| "reward": 0.7838764563202858, | |
| "reward_std": 0.1253039419418201, | |
| "rewards/format_reward/mean": 0.99765625, | |
| "rewards/format_reward/std": 0.009375, | |
| "rewards/qatch_metrics/mean": 0.7460242195054889, | |
| "rewards/qatch_metrics/std": 0.1466813159175217, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 545 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9333899024183284, | |
| "grad_norm": 0.36697768491887467, | |
| "kl": 0.025629425048828126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0093, | |
| "max_completion_length": 347.35, | |
| "max_terminated_completion_length": 347.35, | |
| "mean_completion_length": 209.89765625, | |
| "mean_terminated_completion_length": 209.89765625, | |
| "min_completion_length": 123.725, | |
| "min_terminated_completion_length": 123.725, | |
| "num_tokens": 2710700.0, | |
| "reward": 0.7269751146435738, | |
| "reward_std": 0.12317023314535618, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6790010459721089, | |
| "rewards/qatch_metrics/std": 0.1443980704061687, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.0015625, | |
| "step": 550 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9418752651675859, | |
| "grad_norm": 0.3870701334560838, | |
| "kl": 0.021868133544921876, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0043, | |
| "max_completion_length": 342.7625, | |
| "max_terminated_completion_length": 342.7625, | |
| "mean_completion_length": 209.63203125, | |
| "mean_terminated_completion_length": 209.63203125, | |
| "min_completion_length": 130.75, | |
| "min_terminated_completion_length": 130.75, | |
| "num_tokens": 3405557.0, | |
| "reward": 0.7488542322069407, | |
| "reward_std": 0.14193637002063042, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.7047296927776188, | |
| "rewards/qatch_metrics/std": 0.16692508138457923, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9503606279168434, | |
| "grad_norm": 0.3424419653165648, | |
| "kl": 0.02395477294921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "max_completion_length": 391.3125, | |
| "max_terminated_completion_length": 391.3125, | |
| "mean_completion_length": 246.44453125, | |
| "mean_terminated_completion_length": 246.44453125, | |
| "min_completion_length": 141.5625, | |
| "min_terminated_completion_length": 141.5625, | |
| "num_tokens": 4139806.0, | |
| "reward": 0.6758879590779543, | |
| "reward_std": 0.11450046111822303, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6188755245762877, | |
| "rewards/qatch_metrics/std": 0.13397113499231636, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 560 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.958845990666101, | |
| "grad_norm": 0.4568777536093838, | |
| "kl": 0.02312164306640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0033, | |
| "max_completion_length": 368.5625, | |
| "max_terminated_completion_length": 368.5625, | |
| "mean_completion_length": 235.325, | |
| "mean_terminated_completion_length": 235.325, | |
| "min_completion_length": 133.875, | |
| "min_terminated_completion_length": 133.875, | |
| "num_tokens": 4862734.0, | |
| "reward": 0.695911860279739, | |
| "reward_std": 0.13646231470629572, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.003125, | |
| "rewards/qatch_metrics/mean": 0.6423411538125947, | |
| "rewards/qatch_metrics/std": 0.1604516421444714, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 565 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9673313534153585, | |
| "grad_norm": 0.38980210121528, | |
| "kl": 0.02292327880859375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.001, | |
| "max_completion_length": 383.1125, | |
| "max_terminated_completion_length": 383.1125, | |
| "mean_completion_length": 252.1046875, | |
| "mean_terminated_completion_length": 252.1046875, | |
| "min_completion_length": 139.3, | |
| "min_terminated_completion_length": 139.3, | |
| "num_tokens": 5584404.0, | |
| "reward": 0.718302433565259, | |
| "reward_std": 0.11655738109402591, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.6688093788805418, | |
| "rewards/qatch_metrics/std": 0.13670958022848936, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 570 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.9758167161646161, | |
| "grad_norm": 0.36047089293163476, | |
| "kl": 0.022237396240234374, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0054, | |
| "max_completion_length": 514.825, | |
| "max_terminated_completion_length": 418.8625, | |
| "mean_completion_length": 279.5015625, | |
| "mean_terminated_completion_length": 273.3623962402344, | |
| "min_completion_length": 146.55, | |
| "min_terminated_completion_length": 146.55, | |
| "num_tokens": 6346694.0, | |
| "reward": 0.7575846627354622, | |
| "reward_std": 0.1412591182626784, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.00625, | |
| "rewards/qatch_metrics/mean": 0.7150007851421833, | |
| "rewards/qatch_metrics/std": 0.16609934358857573, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.00078125, | |
| "step": 575 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.00078125, | |
| "epoch": 0.9843020789138736, | |
| "grad_norm": 0.38530790890122946, | |
| "kl": 0.022991943359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0211, | |
| "max_completion_length": 521.75, | |
| "max_terminated_completion_length": 426.1375, | |
| "mean_completion_length": 288.83515625, | |
| "mean_terminated_completion_length": 282.6679691314697, | |
| "min_completion_length": 153.6875, | |
| "min_terminated_completion_length": 153.6875, | |
| "num_tokens": 7139459.0, | |
| "reward": 0.7111776934936642, | |
| "reward_std": 0.1209044887451455, | |
| "rewards/format_reward/mean": 0.99453125, | |
| "rewards/format_reward/std": 0.021875, | |
| "rewards/qatch_metrics/mean": 0.6609213570598513, | |
| "rewards/qatch_metrics/std": 0.1400010462384671, | |
| "rewards/tag_count_reward/mean": 0.998828125, | |
| "rewards/tag_count_reward/std": 0.0046875, | |
| "step": 580 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0015625, | |
| "epoch": 0.9927874416631312, | |
| "grad_norm": 0.39292403345032983, | |
| "kl": 0.022852325439453126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0046, | |
| "max_completion_length": 484.4625, | |
| "max_terminated_completion_length": 388.3125, | |
| "mean_completion_length": 255.30859375, | |
| "mean_terminated_completion_length": 249.1358856201172, | |
| "min_completion_length": 135.95, | |
| "min_terminated_completion_length": 135.95, | |
| "num_tokens": 7917326.0, | |
| "reward": 0.7616185914725065, | |
| "reward_std": 0.10822350736707449, | |
| "rewards/format_reward/mean": 0.996875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.7199533893726766, | |
| "rewards/qatch_metrics/std": 0.12633397565223276, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.00234375, | |
| "step": 585 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "clipped_completions_ratio": 0.0, | |
| "epoch": 0.9995757318625371, | |
| "kl": 0.02290058135986328, | |
| "max_completion_length": 382.140625, | |
| "max_terminated_completion_length": 382.140625, | |
| "mean_completion_length": 241.40234375, | |
| "mean_terminated_completion_length": 241.40234375, | |
| "min_completion_length": 134.609375, | |
| "min_terminated_completion_length": 134.609375, | |
| "num_tokens": 8519146.0, | |
| "reward": 0.7855706405825913, | |
| "reward_std": 0.14083891209156718, | |
| "rewards/format_reward/mean": 0.998046875, | |
| "rewards/format_reward/std": 0.0078125, | |
| "rewards/qatch_metrics/mean": 0.7479742880677804, | |
| "rewards/qatch_metrics/std": 0.16560156882042065, | |
| "rewards/tag_count_reward/mean": 0.999755859375, | |
| "rewards/tag_count_reward/std": 0.0009765625, | |
| "step": 589, | |
| "total_flos": 0.0, | |
| "train_loss": 0.000777434696821284, | |
| "train_runtime": 8130.4848, | |
| "train_samples_per_second": 1.16, | |
| "train_steps_per_second": 0.072 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 589, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 59, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |