| {"eval_loss":0.5167275071,"eval_binary":0.0,"eval_rouge":0.7697632054,"eval_llm_as_a_judge":0.0,"eval_runtime":32.4672,"eval_samples_per_second":0.739,"eval_steps_per_second":0.37,"epoch":0.0,"step":0,"loss":null,"grad_norm":null,"learning_rate":null,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.0492902208,"step":250,"loss":0.2184,"grad_norm":0.9176418185,"learning_rate":0.000012266,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.0985804416,"step":500,"loss":0.0936,"grad_norm":1.5168706179,"learning_rate":0.0000245813,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.1478706625,"step":750,"loss":0.0713,"grad_norm":0.9926924109,"learning_rate":0.0000368966,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.1971608833,"step":1000,"loss":0.0686,"grad_norm":0.7068266869,"learning_rate":0.0000492118,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.2464511041,"step":1250,"loss":0.0639,"grad_norm":0.9005385637,"learning_rate":0.0000493929,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.2957413249,"step":1500,"loss":0.0583,"grad_norm":0.3320270777,"learning_rate":0.0000487444,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.3450315457,"step":1750,"loss":0.0523,"grad_norm":0.5344585776,"learning_rate":0.0000480958,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.3943217666,"step":2000,"loss":0.0528,"grad_norm":0.3012170494,"learning_rate":0.0000474472,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.4436119874,"step":2250,"loss":0.0503,"grad_norm":0.2801056206,"learning_rate":0.0000467986,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.4929022082,"step":2500,"loss":0.0506,"grad_norm":0.3180363774,"learning_rate":0.0000461501,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.542192429,"step":2750,"loss":0.0504,"grad_norm":0.3937314153,"learning_rate":0.0000455015,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.5914826498,"step":3000,"loss":0.0468,"grad_norm":0.2334299833,"learning_rate":0.0000448529,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.6407728707,"step":3250,"loss":0.0455,"grad_norm":0.1807185113,"learning_rate":0.0000442043,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.6900630915,"step":3500,"loss":0.0485,"grad_norm":0.4132614434,"learning_rate":0.0000435558,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.7393533123,"step":3750,"loss":0.0496,"grad_norm":0.1720288992,"learning_rate":0.0000429072,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.7886435331,"step":4000,"loss":0.0467,"grad_norm":0.3105004132,"learning_rate":0.0000422586,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.8379337539,"step":4250,"loss":0.0461,"grad_norm":0.2848104239,"learning_rate":0.00004161,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.8872239748,"step":4500,"loss":0.046,"grad_norm":0.5095773339,"learning_rate":0.0000409614,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.9365141956,"step":4750,"loss":0.0455,"grad_norm":0.2530672252,"learning_rate":0.0000403129,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":0.9858044164,"step":5000,"loss":0.045,"grad_norm":0.189792484,"learning_rate":0.0000396643,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":0.1263345331,"eval_binary":0.125,"eval_rouge":0.9474400463,"eval_llm_as_a_judge":0.2083333333,"eval_runtime":40.2686,"eval_samples_per_second":0.596,"eval_steps_per_second":0.298,"epoch":1.0,"step":5072,"loss":null,"grad_norm":null,"learning_rate":null,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.0350946372,"step":5250,"loss":0.0437,"grad_norm":0.2292327881,"learning_rate":0.0000390157,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.084384858,"step":5500,"loss":0.0405,"grad_norm":0.1288205832,"learning_rate":0.0000383671,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.1336750789,"step":5750,"loss":0.0418,"grad_norm":0.2277369201,"learning_rate":0.0000377186,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.1829652997,"step":6000,"loss":0.0408,"grad_norm":0.4681090713,"learning_rate":0.00003707,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.2322555205,"step":6250,"loss":0.0411,"grad_norm":0.299074769,"learning_rate":0.0000364214,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.2815457413,"step":6500,"loss":0.0385,"grad_norm":0.2794261277,"learning_rate":0.0000357728,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.3308359621,"step":6750,"loss":0.0428,"grad_norm":0.2944606841,"learning_rate":0.0000351243,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.380126183,"step":7000,"loss":0.0392,"grad_norm":0.1948146522,"learning_rate":0.0000344757,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.4294164038,"step":7250,"loss":0.0411,"grad_norm":0.3118600249,"learning_rate":0.0000338271,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.4787066246,"step":7500,"loss":0.0421,"grad_norm":0.3751600087,"learning_rate":0.0000331785,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.5279968454,"step":7750,"loss":0.0384,"grad_norm":0.2926328778,"learning_rate":0.00003253,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.5772870662,"step":8000,"loss":0.0405,"grad_norm":0.2896762192,"learning_rate":0.0000318814,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.6265772871,"step":8250,"loss":0.0394,"grad_norm":0.1533144563,"learning_rate":0.0000312328,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.6758675079,"step":8500,"loss":0.0412,"grad_norm":0.2862010002,"learning_rate":0.0000305842,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.7251577287,"step":8750,"loss":0.0364,"grad_norm":0.3057603538,"learning_rate":0.0000299357,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.7744479495,"step":9000,"loss":0.0407,"grad_norm":0.1886971146,"learning_rate":0.0000292871,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.8237381703,"step":9250,"loss":0.0378,"grad_norm":0.2258825749,"learning_rate":0.0000286385,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.8730283912,"step":9500,"loss":0.0406,"grad_norm":0.2644851804,"learning_rate":0.0000279899,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.922318612,"step":9750,"loss":0.0368,"grad_norm":0.1238234639,"learning_rate":0.0000273414,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":1.9716088328,"step":10000,"loss":0.0375,"grad_norm":0.1553805172,"learning_rate":0.0000266928,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":0.1164036393,"eval_binary":0.1666666667,"eval_rouge":0.9475961059,"eval_llm_as_a_judge":0.2916666667,"eval_runtime":44.2421,"eval_samples_per_second":0.542,"eval_steps_per_second":0.271,"epoch":2.0,"step":10144,"loss":null,"grad_norm":null,"learning_rate":null,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.0208990536,"step":10250,"loss":0.0371,"grad_norm":0.1808101982,"learning_rate":0.0000260442,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.0701892744,"step":10500,"loss":0.0356,"grad_norm":0.0859490708,"learning_rate":0.0000253956,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.1194794953,"step":10750,"loss":0.0336,"grad_norm":0.2650609612,"learning_rate":0.0000247471,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.1687697161,"step":11000,"loss":0.0339,"grad_norm":0.2764821351,"learning_rate":0.0000240985,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.2180599369,"step":11250,"loss":0.0346,"grad_norm":0.2537253201,"learning_rate":0.0000234499,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.2673501577,"step":11500,"loss":0.0346,"grad_norm":0.2574119568,"learning_rate":0.0000228013,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.3166403785,"step":11750,"loss":0.0342,"grad_norm":0.174378708,"learning_rate":0.0000221528,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.3659305994,"step":12000,"loss":0.0345,"grad_norm":0.1928940415,"learning_rate":0.0000215042,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.4152208202,"step":12250,"loss":0.0331,"grad_norm":0.2048736215,"learning_rate":0.0000208556,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.464511041,"step":12500,"loss":0.0325,"grad_norm":0.1820557863,"learning_rate":0.000020207,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.5138012618,"step":12750,"loss":0.0319,"grad_norm":0.2215508074,"learning_rate":0.0000195584,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.5630914826,"step":13000,"loss":0.0349,"grad_norm":0.1621879637,"learning_rate":0.0000189099,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.6123817035,"step":13250,"loss":0.0339,"grad_norm":0.2562701404,"learning_rate":0.0000182613,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.6616719243,"step":13500,"loss":0.036,"grad_norm":0.335306704,"learning_rate":0.0000176127,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.7109621451,"step":13750,"loss":0.0323,"grad_norm":0.1239904389,"learning_rate":0.0000169641,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.7602523659,"step":14000,"loss":0.0355,"grad_norm":0.2811101675,"learning_rate":0.0000163156,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.8095425868,"step":14250,"loss":0.0338,"grad_norm":0.3872686327,"learning_rate":0.000015667,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.8588328076,"step":14500,"loss":0.0327,"grad_norm":0.284791261,"learning_rate":0.0000150184,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.9081230284,"step":14750,"loss":0.0342,"grad_norm":0.3155638278,"learning_rate":0.0000143698,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":2.9574132492,"step":15000,"loss":0.0336,"grad_norm":0.2762741148,"learning_rate":0.0000137213,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":0.1214129031,"eval_binary":0.2916666667,"eval_rouge":0.9544938655,"eval_llm_as_a_judge":0.3333333333,"eval_runtime":38.498,"eval_samples_per_second":0.623,"eval_steps_per_second":0.312,"epoch":3.0,"step":15216,"loss":null,"grad_norm":null,"learning_rate":null,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.00670347,"step":15250,"loss":0.0318,"grad_norm":0.3155117929,"learning_rate":0.0000130727,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.0559936909,"step":15500,"loss":0.0302,"grad_norm":0.1165657192,"learning_rate":0.0000124241,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.1052839117,"step":15750,"loss":0.0275,"grad_norm":0.4619110823,"learning_rate":0.0000117755,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.1545741325,"step":16000,"loss":0.0292,"grad_norm":0.3043929935,"learning_rate":0.000011127,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.2038643533,"step":16250,"loss":0.0294,"grad_norm":0.1710922122,"learning_rate":0.0000104784,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.2531545741,"step":16500,"loss":0.0287,"grad_norm":0.186401099,"learning_rate":0.0000098298,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.302444795,"step":16750,"loss":0.0319,"grad_norm":0.2804664969,"learning_rate":0.0000091812,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.3517350158,"step":17000,"loss":0.0286,"grad_norm":0.2021189779,"learning_rate":0.0000085327,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.4010252366,"step":17250,"loss":0.0288,"grad_norm":0.3104719818,"learning_rate":0.0000078841,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.4503154574,"step":17500,"loss":0.0287,"grad_norm":0.1001435593,"learning_rate":0.0000072355,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.4996056782,"step":17750,"loss":0.0291,"grad_norm":0.1255590767,"learning_rate":0.0000065869,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.5488958991,"step":18000,"loss":0.0288,"grad_norm":0.1633451134,"learning_rate":0.0000059384,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.5981861199,"step":18250,"loss":0.0277,"grad_norm":0.2357726395,"learning_rate":0.0000052898,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.6474763407,"step":18500,"loss":0.0275,"grad_norm":0.4976824224,"learning_rate":0.0000046412,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.6967665615,"step":18750,"loss":0.0272,"grad_norm":0.2571507692,"learning_rate":0.0000039926,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.7460567823,"step":19000,"loss":0.0287,"grad_norm":0.1579860747,"learning_rate":0.0000033441,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.7953470032,"step":19250,"loss":0.0296,"grad_norm":0.1648528278,"learning_rate":0.0000026955,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.844637224,"step":19500,"loss":0.0277,"grad_norm":0.1328402907,"learning_rate":0.0000020469,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.8939274448,"step":19750,"loss":0.0289,"grad_norm":0.2820516229,"learning_rate":0.0000013983,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.9432176656,"step":20000,"loss":0.0292,"grad_norm":0.1284302324,"learning_rate":0.0000007498,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":3.9925078864,"step":20250,"loss":0.0283,"grad_norm":0.3274129331,"learning_rate":0.0000001012,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":0.1293401569,"eval_binary":0.375,"eval_rouge":0.9521755236,"eval_llm_as_a_judge":0.4166666667,"eval_runtime":35.7003,"eval_samples_per_second":0.672,"eval_steps_per_second":0.336,"epoch":4.0,"step":20288,"loss":null,"grad_norm":null,"learning_rate":null,"train_runtime":null,"train_samples_per_second":null,"train_steps_per_second":null,"total_flos":null,"train_loss":null} | |
| {"eval_loss":null,"eval_binary":null,"eval_rouge":null,"eval_llm_as_a_judge":null,"eval_runtime":null,"eval_samples_per_second":null,"eval_steps_per_second":null,"epoch":4.0,"step":20288,"loss":null,"grad_norm":null,"learning_rate":null,"train_runtime":7466.5185,"train_samples_per_second":5.434,"train_steps_per_second":2.717,"total_flos":8.261775423e+17,"train_loss":0.0412241326} | |