{ "best_global_step": 3600, "best_metric": 0.9848043970255416, "best_model_checkpoint": "/workspace/AI/Trend_Primus-FineWeb_Filtering-pipeline/securebert_finetuned/offensive_vs_rest/checkpoint-3600", "epoch": 2.8391167192429023, "eval_steps": 300, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07886435331230283, "grad_norm": 1.846426010131836, "learning_rate": 5.600000000000001e-06, "loss": 0.2696, "step": 100 }, { "epoch": 0.15772870662460567, "grad_norm": 5.872535228729248, "learning_rate": 1.1314285714285715e-05, "loss": 0.1452, "step": 200 }, { "epoch": 0.23659305993690852, "grad_norm": 1.9362571239471436, "learning_rate": 1.702857142857143e-05, "loss": 0.0772, "step": 300 }, { "epoch": 0.23659305993690852, "eval_f1": 0.7849643551523007, "eval_f2": 0.8937426210153483, "eval_loss": 0.04672536998987198, "eval_precision": 0.6525862068965518, "eval_recall": 0.9847154471544716, "eval_runtime": 24.5242, "eval_samples_per_second": 735.193, "eval_steps_per_second": 11.499, "step": 300 }, { "epoch": 0.31545741324921134, "grad_norm": 5.031215667724609, "learning_rate": 1.9722061378112335e-05, "loss": 0.0442, "step": 400 }, { "epoch": 0.3943217665615142, "grad_norm": 1.0280592441558838, "learning_rate": 1.9143022582513028e-05, "loss": 0.0364, "step": 500 }, { "epoch": 0.47318611987381703, "grad_norm": 1.5782877206802368, "learning_rate": 1.8563983786913724e-05, "loss": 0.0305, "step": 600 }, { "epoch": 0.47318611987381703, "eval_f1": 0.8569032979318055, "eval_f2": 0.9358403027898174, "eval_loss": 0.02299814671278, "eval_precision": 0.7512864493996569, "eval_recall": 0.9970731707317073, "eval_runtime": 24.8283, "eval_samples_per_second": 726.188, "eval_steps_per_second": 11.358, "step": 600 }, { "epoch": 0.5520504731861199, "grad_norm": 1.143188714981079, "learning_rate": 1.798494499131442e-05, "loss": 0.0311, "step": 700 }, { "epoch": 0.6309148264984227, "grad_norm": 1.4598668813705444, "learning_rate": 1.7405906195715113e-05, "loss": 0.0273, "step": 800 }, { "epoch": 0.7097791798107256, "grad_norm": 0.9353739619255066, "learning_rate": 1.682686740011581e-05, "loss": 0.0232, "step": 900 }, { "epoch": 0.7097791798107256, "eval_f1": 0.9401762250734271, "eval_f2": 0.9688415955142092, "eval_loss": 0.01698540337383747, "eval_precision": 0.8959929286977019, "eval_recall": 0.9889430894308943, "eval_runtime": 24.5846, "eval_samples_per_second": 733.386, "eval_steps_per_second": 11.471, "step": 900 }, { "epoch": 0.7886435331230284, "grad_norm": 1.172129511833191, "learning_rate": 1.6247828604516505e-05, "loss": 0.0232, "step": 1000 }, { "epoch": 0.8675078864353313, "grad_norm": 0.7822222113609314, "learning_rate": 1.56687898089172e-05, "loss": 0.0217, "step": 1100 }, { "epoch": 0.9463722397476341, "grad_norm": 0.9808489680290222, "learning_rate": 1.5089751013317892e-05, "loss": 0.0167, "step": 1200 }, { "epoch": 0.9463722397476341, "eval_f1": 0.9422098936662043, "eval_f2": 0.9726995036273387, "eval_loss": 0.013439147733151913, "eval_precision": 0.8954305799648506, "eval_recall": 0.9941463414634146, "eval_runtime": 25.9737, "eval_samples_per_second": 694.163, "eval_steps_per_second": 10.857, "step": 1200 }, { "epoch": 1.025236593059937, "grad_norm": 0.4348973035812378, "learning_rate": 1.4510712217718588e-05, "loss": 0.0162, "step": 1300 }, { "epoch": 1.1041009463722398, "grad_norm": 0.4453680217266083, "learning_rate": 1.3931673422119283e-05, "loss": 0.0135, "step": 1400 }, { "epoch": 1.1829652996845426, "grad_norm": 1.2444119453430176, "learning_rate": 1.3352634626519977e-05, "loss": 0.0093, "step": 1500 }, { "epoch": 1.1829652996845426, "eval_f1": 0.9642065251821349, "eval_f2": 0.9794710084304009, "eval_loss": 0.014342821203172207, "eval_precision": 0.939796233405372, "eval_recall": 0.9899186991869918, "eval_runtime": 24.7342, "eval_samples_per_second": 728.949, "eval_steps_per_second": 11.401, "step": 1500 }, { "epoch": 1.2618296529968454, "grad_norm": 1.2138129472732544, "learning_rate": 1.2773595830920673e-05, "loss": 0.0106, "step": 1600 }, { "epoch": 1.3406940063091484, "grad_norm": 3.329469680786133, "learning_rate": 1.2194557035321368e-05, "loss": 0.0101, "step": 1700 }, { "epoch": 1.4195583596214512, "grad_norm": 0.7627914547920227, "learning_rate": 1.1615518239722064e-05, "loss": 0.0118, "step": 1800 }, { "epoch": 1.4195583596214512, "eval_f1": 0.9513143568206563, "eval_f2": 0.9767471572760955, "eval_loss": 0.01234134566038847, "eval_precision": 0.9117471675611211, "eval_recall": 0.9944715447154472, "eval_runtime": 25.1145, "eval_samples_per_second": 717.911, "eval_steps_per_second": 11.229, "step": 1800 }, { "epoch": 1.498422712933754, "grad_norm": 0.9591709971427917, "learning_rate": 1.1036479444122757e-05, "loss": 0.0093, "step": 1900 }, { "epoch": 1.5772870662460567, "grad_norm": 0.4569564759731293, "learning_rate": 1.0457440648523451e-05, "loss": 0.0094, "step": 2000 }, { "epoch": 1.6561514195583595, "grad_norm": 0.7519212365150452, "learning_rate": 9.88419224088014e-06, "loss": 0.0094, "step": 2100 }, { "epoch": 1.6561514195583595, "eval_f1": 0.9646464646464646, "eval_f2": 0.9819420345736135, "eval_loss": 0.012274333275854588, "eval_precision": 0.9371358478994174, "eval_recall": 0.9938211382113821, "eval_runtime": 24.295, "eval_samples_per_second": 742.127, "eval_steps_per_second": 11.607, "step": 2100 }, { "epoch": 1.7350157728706623, "grad_norm": 0.06854517012834549, "learning_rate": 9.305153445280834e-06, "loss": 0.0101, "step": 2200 }, { "epoch": 1.8138801261829653, "grad_norm": 1.0062646865844727, "learning_rate": 8.726114649681529e-06, "loss": 0.0106, "step": 2300 }, { "epoch": 1.8927444794952681, "grad_norm": 0.1466594785451889, "learning_rate": 8.147075854082223e-06, "loss": 0.0079, "step": 2400 }, { "epoch": 1.8927444794952681, "eval_f1": 0.9619496855345911, "eval_f2": 0.9813923644529997, "eval_loss": 0.011407392099499702, "eval_precision": 0.9312024353120244, "eval_recall": 0.9947967479674796, "eval_runtime": 26.3939, "eval_samples_per_second": 683.112, "eval_steps_per_second": 10.684, "step": 2400 }, { "epoch": 1.971608832807571, "grad_norm": 0.26108694076538086, "learning_rate": 7.568037058482919e-06, "loss": 0.0094, "step": 2500 }, { "epoch": 2.050473186119874, "grad_norm": 0.024676967412233353, "learning_rate": 6.988998262883614e-06, "loss": 0.0073, "step": 2600 }, { "epoch": 2.1293375394321767, "grad_norm": 1.5645203590393066, "learning_rate": 6.409959467284309e-06, "loss": 0.0041, "step": 2700 }, { "epoch": 2.1293375394321767, "eval_f1": 0.9681407513076558, "eval_f2": 0.9830050212437235, "eval_loss": 0.011503643356263638, "eval_precision": 0.9443413729128015, "eval_recall": 0.9931707317073171, "eval_runtime": 25.3938, "eval_samples_per_second": 710.014, "eval_steps_per_second": 11.105, "step": 2700 }, { "epoch": 2.2082018927444795, "grad_norm": 0.933417022228241, "learning_rate": 5.830920671685003e-06, "loss": 0.0045, "step": 2800 }, { "epoch": 2.2870662460567823, "grad_norm": 0.7878792881965637, "learning_rate": 5.251881876085698e-06, "loss": 0.0037, "step": 2900 }, { "epoch": 2.365930599369085, "grad_norm": 0.09505568444728851, "learning_rate": 4.6728430804863925e-06, "loss": 0.0035, "step": 3000 }, { "epoch": 2.365930599369085, "eval_f1": 0.9727229223161589, "eval_f2": 0.9839292629404931, "eval_loss": 0.013037587516009808, "eval_precision": 0.9546023794614903, "eval_recall": 0.9915447154471545, "eval_runtime": 26.4329, "eval_samples_per_second": 682.105, "eval_steps_per_second": 10.669, "step": 3000 }, { "epoch": 2.444794952681388, "grad_norm": 0.08773530274629593, "learning_rate": 4.093804284887088e-06, "loss": 0.0029, "step": 3100 }, { "epoch": 2.5236593059936907, "grad_norm": 0.060790352523326874, "learning_rate": 3.5147654892877827e-06, "loss": 0.0039, "step": 3200 }, { "epoch": 2.6025236593059935, "grad_norm": 1.6132954359054565, "learning_rate": 2.9357266936884776e-06, "loss": 0.0043, "step": 3300 }, { "epoch": 2.6025236593059935, "eval_f1": 0.975609756097561, "eval_f2": 0.9833732289577538, "eval_loss": 0.014466837979853153, "eval_precision": 0.9629394995248653, "eval_recall": 0.9886178861788618, "eval_runtime": 26.7793, "eval_samples_per_second": 673.282, "eval_steps_per_second": 10.531, "step": 3300 }, { "epoch": 2.6813880126182967, "grad_norm": 0.24806837737560272, "learning_rate": 2.356687898089172e-06, "loss": 0.0047, "step": 3400 }, { "epoch": 2.7602523659305995, "grad_norm": 3.355231523513794, "learning_rate": 1.777649102489867e-06, "loss": 0.0031, "step": 3500 }, { "epoch": 2.8391167192429023, "grad_norm": 0.44861266016960144, "learning_rate": 1.1986103068905617e-06, "loss": 0.004, "step": 3600 }, { "epoch": 2.8391167192429023, "eval_f1": 0.9762820512820513, "eval_f2": 0.9848043970255416, "eval_loss": 0.01391169149428606, "eval_precision": 0.9624012638230648, "eval_recall": 0.9905691056910569, "eval_runtime": 26.114, "eval_samples_per_second": 690.435, "eval_steps_per_second": 10.799, "step": 3600 } ], "logging_steps": 100, "max_steps": 3804, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 300, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2122157786968064e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }