{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.6875, "eval_steps": 200, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011160714285714285, "grad_norm": NaN, "learning_rate": 0.0, "loss": 9.119, "step": 1 }, { "epoch": 0.11160714285714286, "grad_norm": 17.632238388061523, "learning_rate": 2.1428571428571428e-05, "loss": 4.1469, "step": 100 }, { "epoch": 0.22321428571428573, "grad_norm": 14.409270286560059, "learning_rate": 4.375e-05, "loss": 2.5294, "step": 200 }, { "epoch": 0.22321428571428573, "eval_full_en_cosine_accuracy@1": 0.7467105263157895, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7467105263157895, "eval_full_en_cosine_map@100": 0.2121058701298033, "eval_full_en_cosine_map@150": 0.2294109301872967, "eval_full_en_cosine_map@20": 0.34167650006204187, "eval_full_en_cosine_map@200": 0.2492171685943861, "eval_full_en_cosine_map@50": 0.237336657426832, "eval_full_en_cosine_map@500": 0.3000288940307502, "eval_full_en_cosine_mrr@1": 0.7467105263157895, "eval_full_en_cosine_mrr@100": 0.8460592769803298, "eval_full_en_cosine_mrr@150": 0.8460592769803298, "eval_full_en_cosine_mrr@20": 0.8458948032961192, "eval_full_en_cosine_mrr@200": 0.8460592769803298, "eval_full_en_cosine_mrr@50": 0.8460122844991269, "eval_full_en_cosine_ndcg@1": 0.7467105263157895, "eval_full_en_cosine_ndcg@100": 0.4430509248084704, "eval_full_en_cosine_ndcg@150": 0.4894828917681416, "eval_full_en_cosine_ndcg@20": 0.5367541274871807, "eval_full_en_cosine_ndcg@200": 0.5361903606133726, "eval_full_en_cosine_ndcg@50": 0.448683811733402, "eval_full_en_cosine_precision@1": 0.7467105263157895, "eval_full_en_cosine_precision@100": 0.31240131578947367, "eval_full_en_cosine_precision@150": 0.26592105263157895, "eval_full_en_cosine_precision@20": 0.4965460526315789, "eval_full_en_cosine_precision@200": 0.23370065789473685, "eval_full_en_cosine_precision@50": 0.3904605263157895, "eval_full_en_cosine_recall@1": 0.010753343030902496, "eval_full_en_cosine_recall@100": 0.39446255566624855, "eval_full_en_cosine_recall@150": 0.49544823712709557, "eval_full_en_cosine_recall@20": 0.13279013317825217, "eval_full_en_cosine_recall@200": 0.5739614992682516, "eval_full_en_cosine_recall@50": 0.25254843470147753, "eval_runtime": 1.5828, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5361903606133726, "eval_steps_per_second": 0.0, "step": 200 }, { "epoch": 0.33482142857142855, "grad_norm": 16.260934829711914, "learning_rate": 4.915413533834587e-05, "loss": 2.3611, "step": 300 }, { "epoch": 0.44642857142857145, "grad_norm": 13.242988586425781, "learning_rate": 4.797932330827068e-05, "loss": 2.192, "step": 400 }, { "epoch": 0.44642857142857145, "eval_full_en_cosine_accuracy@1": 0.7368421052631579, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7368421052631579, "eval_full_en_cosine_map@100": 0.2088144416212806, "eval_full_en_cosine_map@150": 0.22677217670719133, "eval_full_en_cosine_map@20": 0.3349832137166454, "eval_full_en_cosine_map@200": 0.245946497368659, "eval_full_en_cosine_map@50": 0.23473921202287384, "eval_full_en_cosine_map@500": 0.2973985707303743, "eval_full_en_cosine_mrr@1": 0.7368421052631579, "eval_full_en_cosine_mrr@100": 0.8394156306336016, "eval_full_en_cosine_mrr@150": 0.8394156306336016, "eval_full_en_cosine_mrr@20": 0.8392713554720135, "eval_full_en_cosine_mrr@200": 0.8394156306336016, "eval_full_en_cosine_mrr@50": 0.8393810045948205, "eval_full_en_cosine_ndcg@1": 0.7368421052631579, "eval_full_en_cosine_ndcg@100": 0.43855475512592684, "eval_full_en_cosine_ndcg@150": 0.48609390907359196, "eval_full_en_cosine_ndcg@20": 0.5288083416910968, "eval_full_en_cosine_ndcg@200": 0.5318117937684201, "eval_full_en_cosine_ndcg@50": 0.4453338982563473, "eval_full_en_cosine_precision@1": 0.7368421052631579, "eval_full_en_cosine_precision@100": 0.3088157894736842, "eval_full_en_cosine_precision@150": 0.2644517543859649, "eval_full_en_cosine_precision@20": 0.4875, "eval_full_en_cosine_precision@200": 0.23172697368421055, "eval_full_en_cosine_precision@50": 0.38782894736842105, "eval_full_en_cosine_recall@1": 0.010619007443519193, "eval_full_en_cosine_recall@100": 0.3902042311088277, "eval_full_en_cosine_recall@150": 0.4925745165667779, "eval_full_en_cosine_recall@20": 0.1301764615450556, "eval_full_en_cosine_recall@200": 0.5696006364444781, "eval_full_en_cosine_recall@50": 0.2518199886564403, "eval_runtime": 1.5596, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5318117937684201, "eval_steps_per_second": 0.0, "step": 400 }, { "epoch": 0.5580357142857143, "grad_norm": 13.307888984680176, "learning_rate": 4.680451127819549e-05, "loss": 2.0338, "step": 500 }, { "epoch": 0.6696428571428571, "grad_norm": 12.763930320739746, "learning_rate": 4.56296992481203e-05, "loss": 1.9009, "step": 600 }, { "epoch": 0.6696428571428571, "eval_full_en_cosine_accuracy@1": 0.7302631578947368, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7302631578947368, "eval_full_en_cosine_map@100": 0.2146410944227793, "eval_full_en_cosine_map@150": 0.23271596511985665, "eval_full_en_cosine_map@20": 0.3429678297332613, "eval_full_en_cosine_map@200": 0.2520997707361607, "eval_full_en_cosine_map@50": 0.2404899713826549, "eval_full_en_cosine_map@500": 0.302904619520322, "eval_full_en_cosine_mrr@1": 0.7302631578947368, "eval_full_en_cosine_mrr@100": 0.8306572094298247, "eval_full_en_cosine_mrr@150": 0.8306572094298247, "eval_full_en_cosine_mrr@20": 0.8304491697994989, "eval_full_en_cosine_mrr@200": 0.8306572094298247, "eval_full_en_cosine_mrr@50": 0.8306058114035089, "eval_full_en_cosine_ndcg@1": 0.7302631578947368, "eval_full_en_cosine_ndcg@100": 0.4445617284976941, "eval_full_en_cosine_ndcg@150": 0.4922393935902775, "eval_full_en_cosine_ndcg@20": 0.5357880041966661, "eval_full_en_cosine_ndcg@200": 0.5383209000398446, "eval_full_en_cosine_ndcg@50": 0.4504820590447715, "eval_full_en_cosine_precision@1": 0.7302631578947368, "eval_full_en_cosine_precision@100": 0.31358552631578945, "eval_full_en_cosine_precision@150": 0.2677412280701754, "eval_full_en_cosine_precision@20": 0.49720394736842105, "eval_full_en_cosine_precision@200": 0.23452302631578953, "eval_full_en_cosine_precision@50": 0.3932894736842105, "eval_full_en_cosine_recall@1": 0.010303516134180577, "eval_full_en_cosine_recall@100": 0.3970033142271577, "eval_full_en_cosine_recall@150": 0.5001101850184368, "eval_full_en_cosine_recall@20": 0.13302896177814508, "eval_full_en_cosine_recall@200": 0.5777429812058247, "eval_full_en_cosine_recall@50": 0.254528957048419, "eval_runtime": 1.5616, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5383209000398446, "eval_steps_per_second": 0.0, "step": 600 }, { "epoch": 0.78125, "grad_norm": 13.439990997314453, "learning_rate": 4.4454887218045117e-05, "loss": 1.8404, "step": 700 }, { "epoch": 0.8928571428571429, "grad_norm": 12.594465255737305, "learning_rate": 4.3280075187969924e-05, "loss": 1.7692, "step": 800 }, { "epoch": 0.8928571428571429, "eval_full_en_cosine_accuracy@1": 0.7368421052631579, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7368421052631579, "eval_full_en_cosine_map@100": 0.21030614519224017, "eval_full_en_cosine_map@150": 0.22737063252522982, "eval_full_en_cosine_map@20": 0.3442880676713117, "eval_full_en_cosine_map@200": 0.24764067563282596, "eval_full_en_cosine_map@50": 0.23827484272575025, "eval_full_en_cosine_map@500": 0.2987091429260604, "eval_full_en_cosine_mrr@1": 0.7368421052631579, "eval_full_en_cosine_mrr@100": 0.8404268619187053, "eval_full_en_cosine_mrr@150": 0.8404268619187053, "eval_full_en_cosine_mrr@20": 0.8402307852965749, "eval_full_en_cosine_mrr@200": 0.8404268619187053, "eval_full_en_cosine_mrr@50": 0.8403738058915406, "eval_full_en_cosine_ndcg@1": 0.7368421052631579, "eval_full_en_cosine_ndcg@100": 0.440670430732987, "eval_full_en_cosine_ndcg@150": 0.486778222456143, "eval_full_en_cosine_ndcg@20": 0.5383903905850532, "eval_full_en_cosine_ndcg@200": 0.5352292016764449, "eval_full_en_cosine_ndcg@50": 0.45046850998342597, "eval_full_en_cosine_precision@1": 0.7368421052631579, "eval_full_en_cosine_precision@100": 0.3099342105263158, "eval_full_en_cosine_precision@150": 0.26390350877192986, "eval_full_en_cosine_precision@20": 0.5, "eval_full_en_cosine_precision@200": 0.23320723684210526, "eval_full_en_cosine_precision@50": 0.39335526315789476, "eval_full_en_cosine_recall@1": 0.01051277780149725, "eval_full_en_cosine_recall@100": 0.39158535797000443, "eval_full_en_cosine_recall@150": 0.4917399858788313, "eval_full_en_cosine_recall@20": 0.13328036442285973, "eval_full_en_cosine_recall@200": 0.5734492892933252, "eval_full_en_cosine_recall@50": 0.254129727850083, "eval_runtime": 1.5752, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5352292016764449, "eval_steps_per_second": 0.0, "step": 800 }, { "epoch": 1.0044642857142858, "grad_norm": 13.140974998474121, "learning_rate": 4.212875939849624e-05, "loss": 1.6921, "step": 900 }, { "epoch": 1.1160714285714286, "grad_norm": 12.160736083984375, "learning_rate": 4.096569548872181e-05, "loss": 1.3861, "step": 1000 }, { "epoch": 1.1160714285714286, "eval_full_en_cosine_accuracy@1": 0.7401315789473685, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7401315789473685, "eval_full_en_cosine_map@100": 0.21155466872463927, "eval_full_en_cosine_map@150": 0.2291636549745022, "eval_full_en_cosine_map@20": 0.3373673798048492, "eval_full_en_cosine_map@200": 0.24905074192004603, "eval_full_en_cosine_map@50": 0.2376950112180141, "eval_full_en_cosine_map@500": 0.3006802538137734, "eval_full_en_cosine_mrr@1": 0.7401315789473685, "eval_full_en_cosine_mrr@100": 0.8405236576289212, "eval_full_en_cosine_mrr@150": 0.8405236576289212, "eval_full_en_cosine_mrr@20": 0.8403143274853806, "eval_full_en_cosine_mrr@200": 0.8405236576289212, "eval_full_en_cosine_mrr@50": 0.840463849016481, "eval_full_en_cosine_ndcg@1": 0.7401315789473685, "eval_full_en_cosine_ndcg@100": 0.44212858816477746, "eval_full_en_cosine_ndcg@150": 0.48946706445562127, "eval_full_en_cosine_ndcg@20": 0.5332180756481385, "eval_full_en_cosine_ndcg@200": 0.5367929588661781, "eval_full_en_cosine_ndcg@50": 0.44979391873656477, "eval_full_en_cosine_precision@1": 0.7401315789473685, "eval_full_en_cosine_precision@100": 0.3114473684210526, "eval_full_en_cosine_precision@150": 0.266469298245614, "eval_full_en_cosine_precision@20": 0.49243421052631575, "eval_full_en_cosine_precision@200": 0.2345888157894737, "eval_full_en_cosine_precision@50": 0.3921052631578947, "eval_full_en_cosine_recall@1": 0.010392607884295562, "eval_full_en_cosine_recall@100": 0.3933254279416559, "eval_full_en_cosine_recall@150": 0.4957503189606009, "eval_full_en_cosine_recall@20": 0.13107623492706288, "eval_full_en_cosine_recall@200": 0.5753954619760326, "eval_full_en_cosine_recall@50": 0.2539746341397596, "eval_runtime": 1.6397, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5367929588661781, "eval_steps_per_second": 0.0, "step": 1000 }, { "epoch": 1.2276785714285714, "grad_norm": 13.078369140625, "learning_rate": 3.9790883458646615e-05, "loss": 1.3863, "step": 1100 }, { "epoch": 1.3392857142857144, "grad_norm": 11.990692138671875, "learning_rate": 3.861607142857143e-05, "loss": 1.3546, "step": 1200 }, { "epoch": 1.3392857142857144, "eval_full_en_cosine_accuracy@1": 0.7203947368421053, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7203947368421053, "eval_full_en_cosine_map@100": 0.20552277525856266, "eval_full_en_cosine_map@150": 0.22274311961933413, "eval_full_en_cosine_map@20": 0.3363904557549852, "eval_full_en_cosine_map@200": 0.24106738760441354, "eval_full_en_cosine_map@50": 0.23370113464760453, "eval_full_en_cosine_map@500": 0.28981293048421486, "eval_full_en_cosine_mrr@1": 0.7203947368421053, "eval_full_en_cosine_mrr@100": 0.8322617799738206, "eval_full_en_cosine_mrr@150": 0.8322617799738206, "eval_full_en_cosine_mrr@20": 0.8320620443153339, "eval_full_en_cosine_mrr@200": 0.8322617799738206, "eval_full_en_cosine_mrr@50": 0.8322050649102997, "eval_full_en_cosine_ndcg@1": 0.7203947368421053, "eval_full_en_cosine_ndcg@100": 0.43445871937106545, "eval_full_en_cosine_ndcg@150": 0.48130417146010107, "eval_full_en_cosine_ndcg@20": 0.531477407982968, "eval_full_en_cosine_ndcg@200": 0.5259375639543232, "eval_full_en_cosine_ndcg@50": 0.4444057356887903, "eval_full_en_cosine_precision@1": 0.7203947368421053, "eval_full_en_cosine_precision@100": 0.3039802631578947, "eval_full_en_cosine_precision@150": 0.25999999999999995, "eval_full_en_cosine_precision@20": 0.4925986842105263, "eval_full_en_cosine_precision@200": 0.22763157894736838, "eval_full_en_cosine_precision@50": 0.3867105263157895, "eval_full_en_cosine_recall@1": 0.010318104890368607, "eval_full_en_cosine_recall@100": 0.385615965839615, "eval_full_en_cosine_recall@150": 0.48656381032984825, "eval_full_en_cosine_recall@20": 0.13139326985918445, "eval_full_en_cosine_recall@200": 0.5617757383007209, "eval_full_en_cosine_recall@50": 0.2506285703289517, "eval_runtime": 1.5585, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5259375639543232, "eval_steps_per_second": 0.0, "step": 1200 }, { "epoch": 1.4508928571428572, "grad_norm": 15.019533157348633, "learning_rate": 3.744125939849624e-05, "loss": 1.373, "step": 1300 }, { "epoch": 1.5625, "grad_norm": 10.545878410339355, "learning_rate": 3.626644736842105e-05, "loss": 1.3364, "step": 1400 }, { "epoch": 1.5625, "eval_full_en_cosine_accuracy@1": 0.7171052631578947, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7171052631578947, "eval_full_en_cosine_map@100": 0.20833018055660496, "eval_full_en_cosine_map@150": 0.22583322401021033, "eval_full_en_cosine_map@20": 0.34006318172507877, "eval_full_en_cosine_map@200": 0.24462161151730188, "eval_full_en_cosine_map@50": 0.23483789231739935, "eval_full_en_cosine_map@500": 0.2946124561805931, "eval_full_en_cosine_mrr@1": 0.7171052631578947, "eval_full_en_cosine_mrr@100": 0.8267713172687238, "eval_full_en_cosine_mrr@150": 0.8267713172687238, "eval_full_en_cosine_mrr@20": 0.8265913362952838, "eval_full_en_cosine_mrr@200": 0.8267713172687238, "eval_full_en_cosine_mrr@50": 0.8267343568902494, "eval_full_en_cosine_ndcg@1": 0.7171052631578947, "eval_full_en_cosine_ndcg@100": 0.4377486787968229, "eval_full_en_cosine_ndcg@150": 0.4850669425848544, "eval_full_en_cosine_ndcg@20": 0.5331724259953773, "eval_full_en_cosine_ndcg@200": 0.5302927064126869, "eval_full_en_cosine_ndcg@50": 0.4451308688476405, "eval_full_en_cosine_precision@1": 0.7171052631578947, "eval_full_en_cosine_precision@100": 0.3074671052631579, "eval_full_en_cosine_precision@150": 0.2625657894736842, "eval_full_en_cosine_precision@20": 0.4947368421052632, "eval_full_en_cosine_precision@200": 0.23016447368421053, "eval_full_en_cosine_precision@50": 0.38769736842105257, "eval_full_en_cosine_recall@1": 0.010208074045806198, "eval_full_en_cosine_recall@100": 0.3902466549235702, "eval_full_en_cosine_recall@150": 0.49226776551348056, "eval_full_en_cosine_recall@20": 0.13255572846134298, "eval_full_en_cosine_recall@200": 0.5680994353864672, "eval_full_en_cosine_recall@50": 0.25126941591084845, "eval_runtime": 1.5595, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5302927064126869, "eval_steps_per_second": 0.0, "step": 1400 }, { "epoch": 1.6741071428571428, "grad_norm": 18.495975494384766, "learning_rate": 3.509163533834587e-05, "loss": 1.2876, "step": 1500 }, { "epoch": 1.7857142857142856, "grad_norm": 12.646751403808594, "learning_rate": 3.391682330827068e-05, "loss": 1.3094, "step": 1600 }, { "epoch": 1.7857142857142856, "eval_full_en_cosine_accuracy@1": 0.7072368421052632, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7072368421052632, "eval_full_en_cosine_map@100": 0.20923239071614674, "eval_full_en_cosine_map@150": 0.225604138471006, "eval_full_en_cosine_map@20": 0.34034356587585846, "eval_full_en_cosine_map@200": 0.24539737099429304, "eval_full_en_cosine_map@50": 0.23464702413938254, "eval_full_en_cosine_map@500": 0.29597166286299953, "eval_full_en_cosine_mrr@1": 0.7072368421052632, "eval_full_en_cosine_mrr@100": 0.8214137967940215, "eval_full_en_cosine_mrr@150": 0.8214137967940215, "eval_full_en_cosine_mrr@20": 0.8213699371448987, "eval_full_en_cosine_mrr@200": 0.8214137967940215, "eval_full_en_cosine_mrr@50": 0.8213699371448987, "eval_full_en_cosine_ndcg@1": 0.7072368421052632, "eval_full_en_cosine_ndcg@100": 0.4396726832556684, "eval_full_en_cosine_ndcg@150": 0.4847816359827512, "eval_full_en_cosine_ndcg@20": 0.532792025753163, "eval_full_en_cosine_ndcg@200": 0.5323403273572274, "eval_full_en_cosine_ndcg@50": 0.4452189433184465, "eval_full_en_cosine_precision@1": 0.7072368421052632, "eval_full_en_cosine_precision@100": 0.3098026315789474, "eval_full_en_cosine_precision@150": 0.26274122807017547, "eval_full_en_cosine_precision@20": 0.4935855263157895, "eval_full_en_cosine_precision@200": 0.23192434210526314, "eval_full_en_cosine_precision@50": 0.38763157894736844, "eval_full_en_cosine_recall@1": 0.010122149362902188, "eval_full_en_cosine_recall@100": 0.39236988612007834, "eval_full_en_cosine_recall@150": 0.4910778378543689, "eval_full_en_cosine_recall@20": 0.13108496301513997, "eval_full_en_cosine_recall@200": 0.5709689534914331, "eval_full_en_cosine_recall@50": 0.25093448303772187, "eval_runtime": 1.5873, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5323403273572274, "eval_steps_per_second": 0.0, "step": 1600 }, { "epoch": 1.8973214285714286, "grad_norm": 11.858412742614746, "learning_rate": 3.274201127819549e-05, "loss": 1.2784, "step": 1700 }, { "epoch": 2.0089285714285716, "grad_norm": 11.152688026428223, "learning_rate": 3.1567199248120306e-05, "loss": 1.2204, "step": 1800 }, { "epoch": 2.0089285714285716, "eval_full_en_cosine_accuracy@1": 0.7368421052631579, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7368421052631579, "eval_full_en_cosine_map@100": 0.2098412194483687, "eval_full_en_cosine_map@150": 0.22663911455304064, "eval_full_en_cosine_map@20": 0.3433147887298301, "eval_full_en_cosine_map@200": 0.24620266722190678, "eval_full_en_cosine_map@50": 0.23714915519951082, "eval_full_en_cosine_map@500": 0.29690932859887553, "eval_full_en_cosine_mrr@1": 0.7368421052631579, "eval_full_en_cosine_mrr@100": 0.8394024772357531, "eval_full_en_cosine_mrr@150": 0.8394024772357531, "eval_full_en_cosine_mrr@20": 0.8393426686233129, "eval_full_en_cosine_mrr@200": 0.8394024772357531, "eval_full_en_cosine_mrr@50": 0.8393426686233129, "eval_full_en_cosine_ndcg@1": 0.7368421052631579, "eval_full_en_cosine_ndcg@100": 0.4396519841053572, "eval_full_en_cosine_ndcg@150": 0.4856325134708184, "eval_full_en_cosine_ndcg@20": 0.5375317893335387, "eval_full_en_cosine_ndcg@200": 0.533015167774829, "eval_full_en_cosine_ndcg@50": 0.44810398395306655, "eval_full_en_cosine_precision@1": 0.7368421052631579, "eval_full_en_cosine_precision@100": 0.3084539473684211, "eval_full_en_cosine_precision@150": 0.2627631578947368, "eval_full_en_cosine_precision@20": 0.49769736842105267, "eval_full_en_cosine_precision@200": 0.2314309210526316, "eval_full_en_cosine_precision@50": 0.3891447368421053, "eval_full_en_cosine_recall@1": 0.010440810366523372, "eval_full_en_cosine_recall@100": 0.39036009395952986, "eval_full_en_cosine_recall@150": 0.49041982254882954, "eval_full_en_cosine_recall@20": 0.13228070304056636, "eval_full_en_cosine_recall@200": 0.5704962189819233, "eval_full_en_cosine_recall@50": 0.25248213212752935, "eval_runtime": 1.6049, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.533015167774829, "eval_steps_per_second": 0.0, "step": 1800 }, { "epoch": 2.1205357142857144, "grad_norm": 12.91015625, "learning_rate": 3.0392387218045114e-05, "loss": 0.9617, "step": 1900 }, { "epoch": 2.232142857142857, "grad_norm": 11.646313667297363, "learning_rate": 2.9217575187969924e-05, "loss": 1.0004, "step": 2000 }, { "epoch": 2.232142857142857, "eval_full_en_cosine_accuracy@1": 0.7236842105263158, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 1.0, "eval_full_en_cosine_map@1": 0.7236842105263158, "eval_full_en_cosine_map@100": 0.20775225168018954, "eval_full_en_cosine_map@150": 0.22393096419950168, "eval_full_en_cosine_map@20": 0.3380596885262807, "eval_full_en_cosine_map@200": 0.24259765295506924, "eval_full_en_cosine_map@50": 0.23452814948810471, "eval_full_en_cosine_map@500": 0.2920026964508484, "eval_full_en_cosine_mrr@1": 0.7236842105263158, "eval_full_en_cosine_mrr@100": 0.8325452625382137, "eval_full_en_cosine_mrr@150": 0.8325452625382137, "eval_full_en_cosine_mrr@20": 0.8324781304222094, "eval_full_en_cosine_mrr@200": 0.8325452625382137, "eval_full_en_cosine_mrr@50": 0.8325452625382137, "eval_full_en_cosine_ndcg@1": 0.7236842105263158, "eval_full_en_cosine_ndcg@100": 0.4376001104057169, "eval_full_en_cosine_ndcg@150": 0.48181431955382, "eval_full_en_cosine_ndcg@20": 0.5323035546433559, "eval_full_en_cosine_ndcg@200": 0.5276663014224582, "eval_full_en_cosine_ndcg@50": 0.44660441452063837, "eval_full_en_cosine_precision@1": 0.7236842105263158, "eval_full_en_cosine_precision@100": 0.30644736842105263, "eval_full_en_cosine_precision@150": 0.259890350877193, "eval_full_en_cosine_precision@20": 0.4916118421052632, "eval_full_en_cosine_precision@200": 0.2280921052631579, "eval_full_en_cosine_precision@50": 0.3886842105263158, "eval_full_en_cosine_recall@1": 0.010329446437905086, "eval_full_en_cosine_recall@100": 0.38885062846601265, "eval_full_en_cosine_recall@150": 0.4854595951837256, "eval_full_en_cosine_recall@20": 0.131078016933875, "eval_full_en_cosine_recall@200": 0.5630724982932908, "eval_full_en_cosine_recall@50": 0.252357645205228, "eval_runtime": 1.5613, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5276663014224582, "eval_steps_per_second": 0.0, "step": 2000 }, { "epoch": 2.34375, "grad_norm": 12.087961196899414, "learning_rate": 2.8042763157894735e-05, "loss": 0.9694, "step": 2100 }, { "epoch": 2.455357142857143, "grad_norm": 8.181659698486328, "learning_rate": 2.6867951127819552e-05, "loss": 0.9843, "step": 2200 }, { "epoch": 2.455357142857143, "eval_full_en_cosine_accuracy@1": 0.7236842105263158, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 1.0, "eval_full_en_cosine_map@1": 0.7236842105263158, "eval_full_en_cosine_map@100": 0.209953160245849, "eval_full_en_cosine_map@150": 0.22760030144833215, "eval_full_en_cosine_map@20": 0.34078157961918865, "eval_full_en_cosine_map@200": 0.24749824184265867, "eval_full_en_cosine_map@50": 0.2365248444512811, "eval_full_en_cosine_map@500": 0.29789431690676116, "eval_full_en_cosine_mrr@1": 0.7236842105263158, "eval_full_en_cosine_mrr@100": 0.8318935359231412, "eval_full_en_cosine_mrr@150": 0.8318935359231412, "eval_full_en_cosine_mrr@20": 0.8316833751044278, "eval_full_en_cosine_mrr@200": 0.8318935359231412, "eval_full_en_cosine_mrr@50": 0.8318935359231412, "eval_full_en_cosine_ndcg@1": 0.7236842105263158, "eval_full_en_cosine_ndcg@100": 0.44076958126493176, "eval_full_en_cosine_ndcg@150": 0.48838061313116793, "eval_full_en_cosine_ndcg@20": 0.5350320556020238, "eval_full_en_cosine_ndcg@200": 0.5355574509263721, "eval_full_en_cosine_ndcg@50": 0.44803994906340594, "eval_full_en_cosine_precision@1": 0.7236842105263158, "eval_full_en_cosine_precision@100": 0.3099671052631579, "eval_full_en_cosine_precision@150": 0.2648464912280702, "eval_full_en_cosine_precision@20": 0.49588815789473684, "eval_full_en_cosine_precision@200": 0.23342105263157892, "eval_full_en_cosine_precision@50": 0.39052631578947367, "eval_full_en_cosine_recall@1": 0.010284539147879572, "eval_full_en_cosine_recall@100": 0.39296182819932773, "eval_full_en_cosine_recall@150": 0.4959148528891931, "eval_full_en_cosine_recall@20": 0.13200577828629578, "eval_full_en_cosine_recall@200": 0.5749370249014907, "eval_full_en_cosine_recall@50": 0.25310992970173135, "eval_runtime": 1.8632, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5355574509263721, "eval_steps_per_second": 0.0, "step": 2200 }, { "epoch": 2.5669642857142856, "grad_norm": 13.6882905960083, "learning_rate": 2.5693139097744363e-05, "loss": 0.9743, "step": 2300 }, { "epoch": 2.678571428571429, "grad_norm": 11.966975212097168, "learning_rate": 2.4518327067669177e-05, "loss": 0.9252, "step": 2400 }, { "epoch": 2.678571428571429, "eval_full_en_cosine_accuracy@1": 0.7335526315789473, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7335526315789473, "eval_full_en_cosine_map@100": 0.20983286336268822, "eval_full_en_cosine_map@150": 0.22675852672419078, "eval_full_en_cosine_map@20": 0.34004090105732804, "eval_full_en_cosine_map@200": 0.24584993568226646, "eval_full_en_cosine_map@50": 0.23672594782424658, "eval_full_en_cosine_map@500": 0.29632183596698103, "eval_full_en_cosine_mrr@1": 0.7335526315789473, "eval_full_en_cosine_mrr@100": 0.83135268727374, "eval_full_en_cosine_mrr@150": 0.83135268727374, "eval_full_en_cosine_mrr@20": 0.8311351294903929, "eval_full_en_cosine_mrr@200": 0.83135268727374, "eval_full_en_cosine_mrr@50": 0.8312917710944029, "eval_full_en_cosine_ndcg@1": 0.7335526315789473, "eval_full_en_cosine_ndcg@100": 0.4400577813719261, "eval_full_en_cosine_ndcg@150": 0.4859220111165228, "eval_full_en_cosine_ndcg@20": 0.5344170691501652, "eval_full_en_cosine_ndcg@200": 0.5320416498978522, "eval_full_en_cosine_ndcg@50": 0.4485020943766835, "eval_full_en_cosine_precision@1": 0.7335526315789473, "eval_full_en_cosine_precision@100": 0.30907894736842106, "eval_full_en_cosine_precision@150": 0.26278508771929826, "eval_full_en_cosine_precision@20": 0.4960526315789474, "eval_full_en_cosine_precision@200": 0.23090460526315787, "eval_full_en_cosine_precision@50": 0.39151315789473684, "eval_full_en_cosine_recall@1": 0.010402156873475942, "eval_full_en_cosine_recall@100": 0.39206565501916524, "eval_full_en_cosine_recall@150": 0.49176955829136443, "eval_full_en_cosine_recall@20": 0.1321996647113643, "eval_full_en_cosine_recall@200": 0.569344104113959, "eval_full_en_cosine_recall@50": 0.2535254041631645, "eval_runtime": 1.5826, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5320416498978522, "eval_steps_per_second": 0.0, "step": 2400 }, { "epoch": 2.790178571428571, "grad_norm": 11.857823371887207, "learning_rate": 2.3343515037593984e-05, "loss": 0.9272, "step": 2500 }, { "epoch": 2.9017857142857144, "grad_norm": 12.297764778137207, "learning_rate": 2.2168703007518798e-05, "loss": 0.9279, "step": 2600 }, { "epoch": 2.9017857142857144, "eval_full_en_cosine_accuracy@1": 0.7368421052631579, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7368421052631579, "eval_full_en_cosine_map@100": 0.20939105710550232, "eval_full_en_cosine_map@150": 0.22725165687553775, "eval_full_en_cosine_map@20": 0.3403680329074837, "eval_full_en_cosine_map@200": 0.24658865195474836, "eval_full_en_cosine_map@50": 0.23612691752121232, "eval_full_en_cosine_map@500": 0.29718900909315255, "eval_full_en_cosine_mrr@1": 0.7368421052631579, "eval_full_en_cosine_mrr@100": 0.8391709003546018, "eval_full_en_cosine_mrr@150": 0.8391709003546018, "eval_full_en_cosine_mrr@20": 0.8391064008705977, "eval_full_en_cosine_mrr@200": 0.8391709003546018, "eval_full_en_cosine_mrr@50": 0.8391064008705977, "eval_full_en_cosine_ndcg@1": 0.7368421052631579, "eval_full_en_cosine_ndcg@100": 0.4389185422351881, "eval_full_en_cosine_ndcg@150": 0.4868646893605612, "eval_full_en_cosine_ndcg@20": 0.5359014833764041, "eval_full_en_cosine_ndcg@200": 0.5332804255738979, "eval_full_en_cosine_ndcg@50": 0.44749591453362436, "eval_full_en_cosine_precision@1": 0.7368421052631579, "eval_full_en_cosine_precision@100": 0.30779605263157894, "eval_full_en_cosine_precision@150": 0.26355263157894737, "eval_full_en_cosine_precision@20": 0.49588815789473684, "eval_full_en_cosine_precision@200": 0.2316282894736842, "eval_full_en_cosine_precision@50": 0.38901315789473684, "eval_full_en_cosine_recall@1": 0.010425572953236805, "eval_full_en_cosine_recall@100": 0.3892001066901767, "eval_full_en_cosine_recall@150": 0.492569756570653, "eval_full_en_cosine_recall@20": 0.13284603422933672, "eval_full_en_cosine_recall@200": 0.5706210722984945, "eval_full_en_cosine_recall@50": 0.2518705529759721, "eval_runtime": 1.577, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5332804255738979, "eval_steps_per_second": 0.0, "step": 2600 }, { "epoch": 3.013392857142857, "grad_norm": 12.120986938476562, "learning_rate": 2.099389097744361e-05, "loss": 0.857, "step": 2700 }, { "epoch": 3.125, "grad_norm": 14.276410102844238, "learning_rate": 1.9819078947368423e-05, "loss": 0.7313, "step": 2800 }, { "epoch": 3.125, "eval_full_en_cosine_accuracy@1": 0.7269736842105263, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 1.0, "eval_full_en_cosine_map@1": 0.7269736842105263, "eval_full_en_cosine_map@100": 0.20830025965749158, "eval_full_en_cosine_map@150": 0.22525408557521698, "eval_full_en_cosine_map@20": 0.34094306993307805, "eval_full_en_cosine_map@200": 0.24400549054611867, "eval_full_en_cosine_map@50": 0.23400685602624646, "eval_full_en_cosine_map@500": 0.29401532392219154, "eval_full_en_cosine_mrr@1": 0.7269736842105263, "eval_full_en_cosine_mrr@100": 0.8315051952798665, "eval_full_en_cosine_mrr@150": 0.8315051952798665, "eval_full_en_cosine_mrr@20": 0.8314268744778616, "eval_full_en_cosine_mrr@200": 0.8315051952798665, "eval_full_en_cosine_mrr@50": 0.8315051952798665, "eval_full_en_cosine_ndcg@1": 0.7269736842105263, "eval_full_en_cosine_ndcg@100": 0.43885977048304636, "eval_full_en_cosine_ndcg@150": 0.48486671483618976, "eval_full_en_cosine_ndcg@20": 0.5365677326031855, "eval_full_en_cosine_ndcg@200": 0.5299990147795507, "eval_full_en_cosine_ndcg@50": 0.44591298214905706, "eval_full_en_cosine_precision@1": 0.7269736842105263, "eval_full_en_cosine_precision@100": 0.308125, "eval_full_en_cosine_precision@150": 0.2621052631578948, "eval_full_en_cosine_precision@20": 0.49786184210526313, "eval_full_en_cosine_precision@200": 0.22980263157894737, "eval_full_en_cosine_precision@50": 0.3870394736842105, "eval_full_en_cosine_recall@1": 0.010317820884117123, "eval_full_en_cosine_recall@100": 0.38998825691236244, "eval_full_en_cosine_recall@150": 0.4900687458798103, "eval_full_en_cosine_recall@20": 0.13271573138828288, "eval_full_en_cosine_recall@200": 0.5659226272090475, "eval_full_en_cosine_recall@50": 0.25218483369820577, "eval_runtime": 1.607, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5299990147795507, "eval_steps_per_second": 0.0, "step": 2800 }, { "epoch": 3.236607142857143, "grad_norm": 8.85190486907959, "learning_rate": 1.8644266917293237e-05, "loss": 0.7103, "step": 2900 }, { "epoch": 3.3482142857142856, "grad_norm": 8.932626724243164, "learning_rate": 1.7469454887218044e-05, "loss": 0.7187, "step": 3000 }, { "epoch": 3.3482142857142856, "eval_full_en_cosine_accuracy@1": 0.7269736842105263, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7269736842105263, "eval_full_en_cosine_map@100": 0.20842370079433947, "eval_full_en_cosine_map@150": 0.22608431932756923, "eval_full_en_cosine_map@20": 0.34026464907579207, "eval_full_en_cosine_map@200": 0.2451065024940476, "eval_full_en_cosine_map@50": 0.23418777403622906, "eval_full_en_cosine_map@500": 0.2945476002258968, "eval_full_en_cosine_mrr@1": 0.7269736842105263, "eval_full_en_cosine_mrr@100": 0.8303256958684593, "eval_full_en_cosine_mrr@150": 0.8303256958684593, "eval_full_en_cosine_mrr@20": 0.830265887256019, "eval_full_en_cosine_mrr@200": 0.8303256958684593, "eval_full_en_cosine_mrr@50": 0.830265887256019, "eval_full_en_cosine_ndcg@1": 0.7269736842105263, "eval_full_en_cosine_ndcg@100": 0.4379203478644915, "eval_full_en_cosine_ndcg@150": 0.4860723616469748, "eval_full_en_cosine_ndcg@20": 0.534483012777908, "eval_full_en_cosine_ndcg@200": 0.5318565059446251, "eval_full_en_cosine_ndcg@50": 0.4443024102705765, "eval_full_en_cosine_precision@1": 0.7269736842105263, "eval_full_en_cosine_precision@100": 0.30750000000000005, "eval_full_en_cosine_precision@150": 0.26370614035087714, "eval_full_en_cosine_precision@20": 0.49588815789473684, "eval_full_en_cosine_precision@200": 0.23116776315789475, "eval_full_en_cosine_precision@50": 0.38539473684210523, "eval_full_en_cosine_recall@1": 0.010298189290703101, "eval_full_en_cosine_recall@100": 0.38891472258186655, "eval_full_en_cosine_recall@150": 0.4925623824521817, "eval_full_en_cosine_recall@20": 0.13215936080151625, "eval_full_en_cosine_recall@200": 0.5698259119139981, "eval_full_en_cosine_recall@50": 0.2502092759755724, "eval_runtime": 1.6179, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5318565059446251, "eval_steps_per_second": 0.0, "step": 3000 }, { "epoch": 3.4598214285714284, "grad_norm": 12.761665344238281, "learning_rate": 1.6294642857142858e-05, "loss": 0.7067, "step": 3100 }, { "epoch": 3.571428571428571, "grad_norm": 12.318887710571289, "learning_rate": 1.5119830827067668e-05, "loss": 0.7157, "step": 3200 }, { "epoch": 3.571428571428571, "eval_full_en_cosine_accuracy@1": 0.7072368421052632, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 1.0, "eval_full_en_cosine_map@1": 0.7072368421052632, "eval_full_en_cosine_map@100": 0.21126096647489126, "eval_full_en_cosine_map@150": 0.22897332387217115, "eval_full_en_cosine_map@20": 0.34020926250086975, "eval_full_en_cosine_map@200": 0.24883265008518762, "eval_full_en_cosine_map@50": 0.2366562995235259, "eval_full_en_cosine_map@500": 0.30009134506130936, "eval_full_en_cosine_mrr@1": 0.7072368421052632, "eval_full_en_cosine_mrr@100": 0.8208446325794724, "eval_full_en_cosine_mrr@150": 0.8208446325794724, "eval_full_en_cosine_mrr@20": 0.8206285125693021, "eval_full_en_cosine_mrr@200": 0.8208446325794724, "eval_full_en_cosine_mrr@50": 0.8208446325794724, "eval_full_en_cosine_ndcg@1": 0.7072368421052632, "eval_full_en_cosine_ndcg@100": 0.4420871692985379, "eval_full_en_cosine_ndcg@150": 0.48983718804719595, "eval_full_en_cosine_ndcg@20": 0.5349182539944062, "eval_full_en_cosine_ndcg@200": 0.5368995914478877, "eval_full_en_cosine_ndcg@50": 0.4481578438397021, "eval_full_en_cosine_precision@1": 0.7072368421052632, "eval_full_en_cosine_precision@100": 0.3118421052631579, "eval_full_en_cosine_precision@150": 0.26625, "eval_full_en_cosine_precision@20": 0.49786184210526313, "eval_full_en_cosine_precision@200": 0.2341282894736842, "eval_full_en_cosine_precision@50": 0.39125, "eval_full_en_cosine_recall@1": 0.010071368365416018, "eval_full_en_cosine_recall@100": 0.39435465355460575, "eval_full_en_cosine_recall@150": 0.49776297598034985, "eval_full_en_cosine_recall@20": 0.1332224887798492, "eval_full_en_cosine_recall@200": 0.5769437157052201, "eval_full_en_cosine_recall@50": 0.25406609475829245, "eval_runtime": 1.5833, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5368995914478877, "eval_steps_per_second": 0.0, "step": 3200 }, { "epoch": 3.6830357142857144, "grad_norm": 10.974320411682129, "learning_rate": 1.3945018796992482e-05, "loss": 0.7113, "step": 3300 }, { "epoch": 3.794642857142857, "grad_norm": 11.004631042480469, "learning_rate": 1.2770206766917295e-05, "loss": 0.7013, "step": 3400 }, { "epoch": 3.794642857142857, "eval_full_en_cosine_accuracy@1": 0.7269736842105263, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 1.0, "eval_full_en_cosine_map@1": 0.7269736842105263, "eval_full_en_cosine_map@100": 0.20998333195374114, "eval_full_en_cosine_map@150": 0.22683318021248486, "eval_full_en_cosine_map@20": 0.34034679376659244, "eval_full_en_cosine_map@200": 0.24654495691213385, "eval_full_en_cosine_map@50": 0.23617479010012724, "eval_full_en_cosine_map@500": 0.29617185416029185, "eval_full_en_cosine_mrr@1": 0.7269736842105263, "eval_full_en_cosine_mrr@100": 0.8291805255603549, "eval_full_en_cosine_mrr@150": 0.8291805255603549, "eval_full_en_cosine_mrr@20": 0.8291105367585632, "eval_full_en_cosine_mrr@200": 0.8291805255603549, "eval_full_en_cosine_mrr@50": 0.8291805255603549, "eval_full_en_cosine_ndcg@1": 0.7269736842105263, "eval_full_en_cosine_ndcg@100": 0.4407299508694298, "eval_full_en_cosine_ndcg@150": 0.48655314671133576, "eval_full_en_cosine_ndcg@20": 0.5349966588302529, "eval_full_en_cosine_ndcg@200": 0.5341334488223752, "eval_full_en_cosine_ndcg@50": 0.448065635044085, "eval_full_en_cosine_precision@1": 0.7269736842105263, "eval_full_en_cosine_precision@100": 0.30973684210526314, "eval_full_en_cosine_precision@150": 0.26320175438596494, "eval_full_en_cosine_precision@20": 0.4965460526315789, "eval_full_en_cosine_precision@200": 0.23210526315789473, "eval_full_en_cosine_precision@50": 0.3907894736842106, "eval_full_en_cosine_recall@1": 0.010311461817674684, "eval_full_en_cosine_recall@100": 0.3931693265429022, "eval_full_en_cosine_recall@150": 0.49300140763214356, "eval_full_en_cosine_recall@20": 0.1329270784727238, "eval_full_en_cosine_recall@200": 0.573228327517634, "eval_full_en_cosine_recall@50": 0.25330386821616296, "eval_runtime": 1.577, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5341334488223752, "eval_steps_per_second": 0.0, "step": 3400 }, { "epoch": 3.90625, "grad_norm": 12.102640151977539, "learning_rate": 1.1595394736842107e-05, "loss": 0.6903, "step": 3500 }, { "epoch": 4.017857142857143, "grad_norm": 7.348757743835449, "learning_rate": 1.0420582706766918e-05, "loss": 0.6462, "step": 3600 }, { "epoch": 4.017857142857143, "eval_full_en_cosine_accuracy@1": 0.7203947368421053, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.9967105263157895, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7203947368421053, "eval_full_en_cosine_map@100": 0.2102732775077637, "eval_full_en_cosine_map@150": 0.22767943965852241, "eval_full_en_cosine_map@20": 0.338502447126724, "eval_full_en_cosine_map@200": 0.24667619158922902, "eval_full_en_cosine_map@50": 0.23576300870587916, "eval_full_en_cosine_map@500": 0.2971463650911015, "eval_full_en_cosine_mrr@1": 0.7203947368421053, "eval_full_en_cosine_mrr@100": 0.8263833835420962, "eval_full_en_cosine_mrr@150": 0.8263833835420962, "eval_full_en_cosine_mrr@20": 0.8263213180008847, "eval_full_en_cosine_mrr@200": 0.8263833835420962, "eval_full_en_cosine_mrr@50": 0.8263213180008847, "eval_full_en_cosine_ndcg@1": 0.7203947368421053, "eval_full_en_cosine_ndcg@100": 0.44114478517461736, "eval_full_en_cosine_ndcg@150": 0.4883455168714466, "eval_full_en_cosine_ndcg@20": 0.53288860900767, "eval_full_en_cosine_ndcg@200": 0.5334866046140189, "eval_full_en_cosine_ndcg@50": 0.4473951526251337, "eval_full_en_cosine_precision@1": 0.7203947368421053, "eval_full_en_cosine_precision@100": 0.31078947368421056, "eval_full_en_cosine_precision@150": 0.265219298245614, "eval_full_en_cosine_precision@20": 0.4934210526315789, "eval_full_en_cosine_precision@200": 0.23212171052631578, "eval_full_en_cosine_precision@50": 0.3899342105263158, "eval_full_en_cosine_recall@1": 0.01018155854728512, "eval_full_en_cosine_recall@100": 0.3935816727444405, "eval_full_en_cosine_recall@150": 0.4958028561341766, "eval_full_en_cosine_recall@20": 0.13181077303144853, "eval_full_en_cosine_recall@200": 0.5716317929962068, "eval_full_en_cosine_recall@50": 0.25274553753777246, "eval_runtime": 1.6024, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5334866046140189, "eval_steps_per_second": 0.0, "step": 3600 }, { "epoch": 4.129464285714286, "grad_norm": 8.786450386047363, "learning_rate": 9.24577067669173e-06, "loss": 0.5162, "step": 3700 }, { "epoch": 4.241071428571429, "grad_norm": 10.602435111999512, "learning_rate": 8.070958646616542e-06, "loss": 0.524, "step": 3800 }, { "epoch": 4.241071428571429, "eval_full_en_cosine_accuracy@1": 0.7302631578947368, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7302631578947368, "eval_full_en_cosine_map@100": 0.21150798737582682, "eval_full_en_cosine_map@150": 0.22868847990327232, "eval_full_en_cosine_map@20": 0.3411525812655742, "eval_full_en_cosine_map@200": 0.2480155691306444, "eval_full_en_cosine_map@50": 0.23814436251631807, "eval_full_en_cosine_map@500": 0.29792672341621373, "eval_full_en_cosine_mrr@1": 0.7302631578947368, "eval_full_en_cosine_mrr@100": 0.8323485085820613, "eval_full_en_cosine_mrr@150": 0.8323485085820613, "eval_full_en_cosine_mrr@20": 0.8321467731829576, "eval_full_en_cosine_mrr@200": 0.8323485085820613, "eval_full_en_cosine_mrr@50": 0.832296294714058, "eval_full_en_cosine_ndcg@1": 0.7302631578947368, "eval_full_en_cosine_ndcg@100": 0.44247378999755477, "eval_full_en_cosine_ndcg@150": 0.48886293038433404, "eval_full_en_cosine_ndcg@20": 0.5351701323930714, "eval_full_en_cosine_ndcg@200": 0.5352268343210608, "eval_full_en_cosine_ndcg@50": 0.4502625298651447, "eval_full_en_cosine_precision@1": 0.7302631578947368, "eval_full_en_cosine_precision@100": 0.311546052631579, "eval_full_en_cosine_precision@150": 0.265219298245614, "eval_full_en_cosine_precision@20": 0.49588815789473684, "eval_full_en_cosine_precision@200": 0.23268092105263163, "eval_full_en_cosine_precision@50": 0.3930921052631579, "eval_full_en_cosine_recall@1": 0.010244630514181254, "eval_full_en_cosine_recall@100": 0.39498767852245736, "eval_full_en_cosine_recall@150": 0.49574169519464223, "eval_full_en_cosine_recall@20": 0.1324589336710221, "eval_full_en_cosine_recall@200": 0.574019804020236, "eval_full_en_cosine_recall@50": 0.2548099607629461, "eval_runtime": 1.5919, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5352268343210608, "eval_steps_per_second": 0.0, "step": 3800 }, { "epoch": 4.352678571428571, "grad_norm": 11.65066909790039, "learning_rate": 6.896146616541354e-06, "loss": 0.5303, "step": 3900 }, { "epoch": 4.464285714285714, "grad_norm": 10.764215469360352, "learning_rate": 5.721334586466166e-06, "loss": 0.5269, "step": 4000 }, { "epoch": 4.464285714285714, "eval_full_en_cosine_accuracy@1": 0.7368421052631579, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7368421052631579, "eval_full_en_cosine_map@100": 0.2101198919267321, "eval_full_en_cosine_map@150": 0.2276536266469315, "eval_full_en_cosine_map@20": 0.34076177455520346, "eval_full_en_cosine_map@200": 0.24678319516569472, "eval_full_en_cosine_map@50": 0.23677969810249233, "eval_full_en_cosine_map@500": 0.297249372287514, "eval_full_en_cosine_mrr@1": 0.7368421052631579, "eval_full_en_cosine_mrr@100": 0.8373899157616261, "eval_full_en_cosine_mrr@150": 0.8373899157616261, "eval_full_en_cosine_mrr@20": 0.837172357978279, "eval_full_en_cosine_mrr@200": 0.8373899157616261, "eval_full_en_cosine_mrr@50": 0.837328999582289, "eval_full_en_cosine_ndcg@1": 0.7368421052631579, "eval_full_en_cosine_ndcg@100": 0.4408521323246635, "eval_full_en_cosine_ndcg@150": 0.48834055710549873, "eval_full_en_cosine_ndcg@20": 0.5353264293739176, "eval_full_en_cosine_ndcg@200": 0.5341206282180626, "eval_full_en_cosine_ndcg@50": 0.44939083758113085, "eval_full_en_cosine_precision@1": 0.7368421052631579, "eval_full_en_cosine_precision@100": 0.30953947368421053, "eval_full_en_cosine_precision@150": 0.26460526315789473, "eval_full_en_cosine_precision@20": 0.4947368421052632, "eval_full_en_cosine_precision@200": 0.23187500000000003, "eval_full_en_cosine_precision@50": 0.3913815789473684, "eval_full_en_cosine_recall@1": 0.010305566449078924, "eval_full_en_cosine_recall@100": 0.3922740640225546, "eval_full_en_cosine_recall@150": 0.4949163913773604, "eval_full_en_cosine_recall@20": 0.13233275450376297, "eval_full_en_cosine_recall@200": 0.572041877895568, "eval_full_en_cosine_recall@50": 0.2535655251683108, "eval_runtime": 1.5798, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5341206282180626, "eval_steps_per_second": 0.0, "step": 4000 }, { "epoch": 4.575892857142857, "grad_norm": 10.328286170959473, "learning_rate": 4.546522556390977e-06, "loss": 0.4824, "step": 4100 }, { "epoch": 4.6875, "grad_norm": 10.712604522705078, "learning_rate": 3.3717105263157897e-06, "loss": 0.5222, "step": 4200 }, { "epoch": 4.6875, "eval_full_en_cosine_accuracy@1": 0.7302631578947368, "eval_full_en_cosine_accuracy@100": 1.0, "eval_full_en_cosine_accuracy@150": 1.0, "eval_full_en_cosine_accuracy@20": 0.993421052631579, "eval_full_en_cosine_accuracy@200": 1.0, "eval_full_en_cosine_accuracy@50": 0.9967105263157895, "eval_full_en_cosine_map@1": 0.7302631578947368, "eval_full_en_cosine_map@100": 0.21090472549603356, "eval_full_en_cosine_map@150": 0.22845220292726734, "eval_full_en_cosine_map@20": 0.342326318294358, "eval_full_en_cosine_map@200": 0.24733168088568283, "eval_full_en_cosine_map@50": 0.23774626029530496, "eval_full_en_cosine_map@500": 0.2977609786459198, "eval_full_en_cosine_mrr@1": 0.7302631578947368, "eval_full_en_cosine_mrr@100": 0.8356281328320803, "eval_full_en_cosine_mrr@150": 0.8356281328320803, "eval_full_en_cosine_mrr@20": 0.8354127506265665, "eval_full_en_cosine_mrr@200": 0.8356281328320803, "eval_full_en_cosine_mrr@50": 0.8355693922305765, "eval_full_en_cosine_ndcg@1": 0.7302631578947368, "eval_full_en_cosine_ndcg@100": 0.44188546614809043, "eval_full_en_cosine_ndcg@150": 0.48899866366733713, "eval_full_en_cosine_ndcg@20": 0.5372965681233445, "eval_full_en_cosine_ndcg@200": 0.5342393130950145, "eval_full_en_cosine_ndcg@50": 0.4501724823363586, "eval_full_en_cosine_precision@1": 0.7302631578947368, "eval_full_en_cosine_precision@100": 0.3101973684210526, "eval_full_en_cosine_precision@150": 0.2649780701754386, "eval_full_en_cosine_precision@20": 0.49786184210526313, "eval_full_en_cosine_precision@200": 0.23199013157894738, "eval_full_en_cosine_precision@50": 0.3921710526315789, "eval_full_en_cosine_recall@1": 0.01024195976751409, "eval_full_en_cosine_recall@100": 0.3937143686320033, "eval_full_en_cosine_recall@150": 0.4954717634968576, "eval_full_en_cosine_recall@20": 0.13293505289394864, "eval_full_en_cosine_recall@200": 0.5715937768635994, "eval_full_en_cosine_recall@50": 0.2542542782427721, "eval_runtime": 1.579, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5342393130950145, "eval_steps_per_second": 0.0, "step": 4200 } ], "logging_steps": 100, "max_steps": 4480, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }