| { | |
| "best_global_step": 1100, | |
| "best_metric": 0.8458904558156242, | |
| "best_model_checkpoint": "modernbert-ai-detector\\checkpoint-1000", | |
| "epoch": 0.6327445293962563, | |
| "eval_steps": 100, | |
| "global_step": 1200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.026364355391510677, | |
| "grad_norm": 58.5, | |
| "learning_rate": 2.5843881856540085e-06, | |
| "loss": 2.6514, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.052728710783021354, | |
| "grad_norm": 38.0, | |
| "learning_rate": 5.221518987341772e-06, | |
| "loss": 2.4826, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.052728710783021354, | |
| "eval_accuracy": 0.3540233562629979, | |
| "eval_f1_ai": 0.1182551168079388, | |
| "eval_f1_human": 0.3140407288317256, | |
| "eval_f1_macro": 0.2998623455656316, | |
| "eval_f1_micro": 0.3540233562629979, | |
| "eval_f1_mixed": 0.46729119105723055, | |
| "eval_f1_weighted": 0.35779828876973646, | |
| "eval_loss": 1.1378378868103027, | |
| "eval_precision_ai": 0.12716763005780346, | |
| "eval_precision_human": 0.26479891549932216, | |
| "eval_precision_mixed": 0.5093530118414278, | |
| "eval_precision_weighted": 0.37081108952953074, | |
| "eval_recall_ai": 0.11051004636785162, | |
| "eval_recall_human": 0.3857801184990125, | |
| "eval_recall_mixed": 0.4316463059918557, | |
| "eval_recall_weighted": 0.3540233562629979, | |
| "eval_runtime": 201.6513, | |
| "eval_samples_per_second": 61.998, | |
| "eval_steps_per_second": 1.939, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07909306617453203, | |
| "grad_norm": 30.625, | |
| "learning_rate": 7.858649789029536e-06, | |
| "loss": 2.1667, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10545742156604271, | |
| "grad_norm": 30.25, | |
| "learning_rate": 1.04957805907173e-05, | |
| "loss": 1.9264, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10545742156604271, | |
| "eval_accuracy": 0.560550311950088, | |
| "eval_f1_ai": 0.2870420995079278, | |
| "eval_f1_human": 0.08250526949713942, | |
| "eval_f1_macro": 0.35789350580335344, | |
| "eval_f1_micro": 0.560550311950088, | |
| "eval_f1_mixed": 0.7041331484049931, | |
| "eval_f1_weighted": 0.46673616146948954, | |
| "eval_loss": 0.9078124165534973, | |
| "eval_precision_ai": 0.49065420560747663, | |
| "eval_precision_human": 0.4840989399293286, | |
| "eval_precision_mixed": 0.5691990313032559, | |
| "eval_precision_weighted": 0.5322602945815579, | |
| "eval_recall_ai": 0.20285935085007728, | |
| "eval_recall_human": 0.04509545753785385, | |
| "eval_recall_mixed": 0.9229203025014543, | |
| "eval_recall_weighted": 0.560550311950088, | |
| "eval_runtime": 211.1463, | |
| "eval_samples_per_second": 59.21, | |
| "eval_steps_per_second": 1.852, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1318217769575534, | |
| "grad_norm": 24.875, | |
| "learning_rate": 1.3132911392405065e-05, | |
| "loss": 1.768, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15818613234906406, | |
| "grad_norm": 49.75, | |
| "learning_rate": 1.5770042194092827e-05, | |
| "loss": 1.6275, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15818613234906406, | |
| "eval_accuracy": 0.6739721644536875, | |
| "eval_f1_ai": 0.75, | |
| "eval_f1_human": 0.20677177565262342, | |
| "eval_f1_macro": 0.5733127667382775, | |
| "eval_f1_micro": 0.6739721644536875, | |
| "eval_f1_mixed": 0.763166524562209, | |
| "eval_f1_weighted": 0.625236416359176, | |
| "eval_loss": 0.7546337842941284, | |
| "eval_precision_ai": 0.6690909090909091, | |
| "eval_precision_human": 0.4813477737665463, | |
| "eval_precision_mixed": 0.6950185163062955, | |
| "eval_precision_weighted": 0.6377290935492025, | |
| "eval_recall_ai": 0.8531684698608965, | |
| "eval_recall_human": 0.1316655694535879, | |
| "eval_recall_mixed": 0.8461314717859221, | |
| "eval_recall_weighted": 0.6739721644536875, | |
| "eval_runtime": 215.6745, | |
| "eval_samples_per_second": 57.967, | |
| "eval_steps_per_second": 1.813, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.18455048774057475, | |
| "grad_norm": 14.25, | |
| "learning_rate": 1.8407172995780592e-05, | |
| "loss": 1.5471, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.21091484313208542, | |
| "grad_norm": 21.875, | |
| "learning_rate": 2.1044303797468356e-05, | |
| "loss": 1.4071, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21091484313208542, | |
| "eval_accuracy": 0.6927691569348904, | |
| "eval_f1_ai": 0.7882645141260478, | |
| "eval_f1_human": 0.3710217755443886, | |
| "eval_f1_macro": 0.639632496479439, | |
| "eval_f1_micro": 0.6927691569348904, | |
| "eval_f1_mixed": 0.7596111997678805, | |
| "eval_f1_weighted": 0.6711149677064479, | |
| "eval_loss": 0.6714780926704407, | |
| "eval_precision_ai": 0.6587960560456668, | |
| "eval_precision_human": 0.5097813578826237, | |
| "eval_precision_mixed": 0.7577424023154848, | |
| "eval_precision_weighted": 0.6770050165265453, | |
| "eval_recall_ai": 0.9810664605873262, | |
| "eval_recall_human": 0.2916392363396972, | |
| "eval_recall_mixed": 0.7614892379290285, | |
| "eval_recall_weighted": 0.6927691569348904, | |
| "eval_runtime": 198.7063, | |
| "eval_samples_per_second": 62.917, | |
| "eval_steps_per_second": 1.968, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2372791985235961, | |
| "grad_norm": 77.0, | |
| "learning_rate": 2.3681434599156117e-05, | |
| "loss": 1.3628, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2636435539151068, | |
| "grad_norm": 30.5, | |
| "learning_rate": 2.6318565400843882e-05, | |
| "loss": 1.2541, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2636435539151068, | |
| "eval_accuracy": 0.7350023996160614, | |
| "eval_f1_ai": 0.8567004988818167, | |
| "eval_f1_human": 0.30513595166163143, | |
| "eval_f1_macro": 0.654182029936718, | |
| "eval_f1_micro": 0.7350023996160614, | |
| "eval_f1_mixed": 0.8007096392667061, | |
| "eval_f1_weighted": 0.6918751713207526, | |
| "eval_loss": 0.588551938533783, | |
| "eval_precision_ai": 0.772093023255814, | |
| "eval_precision_human": 0.6488222698072805, | |
| "eval_precision_mixed": 0.7303128371089536, | |
| "eval_precision_weighted": 0.7191593239339089, | |
| "eval_recall_ai": 0.9621329211746522, | |
| "eval_recall_human": 0.19947333772218565, | |
| "eval_recall_mixed": 0.8861256544502618, | |
| "eval_recall_weighted": 0.7350023996160614, | |
| "eval_runtime": 200.2569, | |
| "eval_samples_per_second": 62.43, | |
| "eval_steps_per_second": 1.952, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29000790930661746, | |
| "grad_norm": 57.25, | |
| "learning_rate": 2.8955696202531646e-05, | |
| "loss": 1.2181, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3163722646981281, | |
| "grad_norm": 46.5, | |
| "learning_rate": 3.159282700421941e-05, | |
| "loss": 1.305, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3163722646981281, | |
| "eval_accuracy": 0.7118860982242842, | |
| "eval_f1_ai": 0.863129435620142, | |
| "eval_f1_human": 0.563509072724675, | |
| "eval_f1_macro": 0.7167971002003201, | |
| "eval_f1_micro": 0.7118860982242842, | |
| "eval_f1_mixed": 0.723752792256143, | |
| "eval_f1_weighted": 0.7136653128999944, | |
| "eval_loss": 0.5719289779663086, | |
| "eval_precision_ai": 0.766966966966967, | |
| "eval_precision_human": 0.4978540772532189, | |
| "eval_precision_mixed": 0.8393782383419689, | |
| "eval_precision_weighted": 0.7413978534670587, | |
| "eval_recall_ai": 0.9868624420401855, | |
| "eval_recall_human": 0.6491112574061882, | |
| "eval_recall_mixed": 0.6361256544502618, | |
| "eval_recall_weighted": 0.7118860982242842, | |
| "eval_runtime": 201.8683, | |
| "eval_samples_per_second": 61.931, | |
| "eval_steps_per_second": 1.937, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3427366200896388, | |
| "grad_norm": 40.25, | |
| "learning_rate": 3.422995780590718e-05, | |
| "loss": 1.1546, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3691009754811495, | |
| "grad_norm": 19.375, | |
| "learning_rate": 3.686708860759494e-05, | |
| "loss": 1.1359, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3691009754811495, | |
| "eval_accuracy": 0.7633978563429851, | |
| "eval_f1_ai": 0.9003927168868261, | |
| "eval_f1_human": 0.6369589977220956, | |
| "eval_f1_macro": 0.7701658728773962, | |
| "eval_f1_micro": 0.7633978563429851, | |
| "eval_f1_mixed": 0.7731459040232671, | |
| "eval_f1_weighted": 0.7663932988679265, | |
| "eval_loss": 0.5114254355430603, | |
| "eval_precision_ai": 0.8367617783676178, | |
| "eval_precision_human": 0.5612142498745609, | |
| "eval_precision_mixed": 0.8696837513631407, | |
| "eval_precision_weighted": 0.7879102421938303, | |
| "eval_recall_ai": 0.9744976816074189, | |
| "eval_recall_human": 0.7363396971691902, | |
| "eval_recall_mixed": 0.6958987783595113, | |
| "eval_recall_weighted": 0.7633978563429851, | |
| "eval_runtime": 212.8928, | |
| "eval_samples_per_second": 58.724, | |
| "eval_steps_per_second": 1.837, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.39546533087266017, | |
| "grad_norm": 23.125, | |
| "learning_rate": 3.95042194092827e-05, | |
| "loss": 1.0872, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.42182968626417083, | |
| "grad_norm": 157.0, | |
| "learning_rate": 4.214135021097047e-05, | |
| "loss": 0.9872, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.42182968626417083, | |
| "eval_accuracy": 0.8017917133258678, | |
| "eval_f1_ai": 0.9022680412371133, | |
| "eval_f1_human": 0.6192686096157416, | |
| "eval_f1_macro": 0.7856376091542879, | |
| "eval_f1_micro": 0.8017917133258678, | |
| "eval_f1_mixed": 0.8353761766100088, | |
| "eval_f1_weighted": 0.7967088719488636, | |
| "eval_loss": 0.4510592222213745, | |
| "eval_precision_ai": 0.9672855879752431, | |
| "eval_precision_human": 0.7100893997445722, | |
| "eval_precision_mixed": 0.7816499809910024, | |
| "eval_precision_weighted": 0.8026885272274894, | |
| "eval_recall_ai": 0.8454404945904173, | |
| "eval_recall_human": 0.5490454246214614, | |
| "eval_recall_mixed": 0.8970331588132635, | |
| "eval_recall_weighted": 0.8017917133258678, | |
| "eval_runtime": 206.8504, | |
| "eval_samples_per_second": 60.44, | |
| "eval_steps_per_second": 1.89, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4481940416556815, | |
| "grad_norm": 46.0, | |
| "learning_rate": 4.477848101265823e-05, | |
| "loss": 0.9952, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4745583970471922, | |
| "grad_norm": 23.375, | |
| "learning_rate": 4.7415611814346e-05, | |
| "loss": 0.8916, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4745583970471922, | |
| "eval_accuracy": 0.826427771556551, | |
| "eval_f1_ai": 0.9497544389875331, | |
| "eval_f1_human": 0.712417340191036, | |
| "eval_f1_macro": 0.8327089451194482, | |
| "eval_f1_micro": 0.826427771556551, | |
| "eval_f1_mixed": 0.8359550561797753, | |
| "eval_f1_weighted": 0.8294925079101134, | |
| "eval_loss": 0.39986124634742737, | |
| "eval_precision_ai": 0.9290465631929047, | |
| "eval_precision_human": 0.6434828776214494, | |
| "eval_precision_mixed": 0.8946757339525626, | |
| "eval_precision_weighted": 0.8407505866593362, | |
| "eval_recall_ai": 0.9714064914992272, | |
| "eval_recall_human": 0.7978933508887426, | |
| "eval_recall_mixed": 0.7844677137870855, | |
| "eval_recall_weighted": 0.826427771556551, | |
| "eval_runtime": 209.7976, | |
| "eval_samples_per_second": 59.591, | |
| "eval_steps_per_second": 1.864, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5009227524387029, | |
| "grad_norm": 68.5, | |
| "learning_rate": 4.9994139709329584e-05, | |
| "loss": 0.899, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5272871078302136, | |
| "grad_norm": 24.5, | |
| "learning_rate": 4.970112517580872e-05, | |
| "loss": 0.8748, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5272871078302136, | |
| "eval_accuracy": 0.8299472084466485, | |
| "eval_f1_ai": 0.943791329904482, | |
| "eval_f1_human": 0.727299016772701, | |
| "eval_f1_macro": 0.8360557403613388, | |
| "eval_f1_micro": 0.8299472084466485, | |
| "eval_f1_mixed": 0.8370768744068333, | |
| "eval_f1_weighted": 0.8324913584362222, | |
| "eval_loss": 0.3658604323863983, | |
| "eval_precision_ai": 0.8995098039215687, | |
| "eval_precision_human": 0.64853017019082, | |
| "eval_precision_mixed": 0.9174757281553398, | |
| "eval_precision_weighted": 0.8484025864969482, | |
| "eval_recall_ai": 0.9926584234930448, | |
| "eval_recall_human": 0.8278472679394339, | |
| "eval_recall_mixed": 0.7696335078534031, | |
| "eval_recall_weighted": 0.8299472084466485, | |
| "eval_runtime": 204.603, | |
| "eval_samples_per_second": 61.104, | |
| "eval_steps_per_second": 1.911, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5536514632217242, | |
| "grad_norm": 51.75, | |
| "learning_rate": 4.940811064228786e-05, | |
| "loss": 0.7453, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5800158186132349, | |
| "grad_norm": 34.5, | |
| "learning_rate": 4.9115096108766995e-05, | |
| "loss": 0.8091, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5800158186132349, | |
| "eval_accuracy": 0.8637817949128139, | |
| "eval_f1_ai": 0.9449456821948076, | |
| "eval_f1_human": 0.7056443455391462, | |
| "eval_f1_macro": 0.8458904558156242, | |
| "eval_f1_micro": 0.8637817949128139, | |
| "eval_f1_mixed": 0.8870813397129187, | |
| "eval_f1_weighted": 0.8549702638885073, | |
| "eval_loss": 0.32950422167778015, | |
| "eval_precision_ai": 0.9025677101653183, | |
| "eval_precision_human": 0.9154855643044619, | |
| "eval_precision_mixed": 0.8368583956667527, | |
| "eval_precision_weighted": 0.8695671658030227, | |
| "eval_recall_ai": 0.991499227202473, | |
| "eval_recall_human": 0.5740618828176431, | |
| "eval_recall_mixed": 0.943717277486911, | |
| "eval_recall_weighted": 0.8637817949128139, | |
| "eval_runtime": 206.2487, | |
| "eval_samples_per_second": 60.616, | |
| "eval_steps_per_second": 1.896, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6063801740047455, | |
| "grad_norm": 80.0, | |
| "learning_rate": 4.882208157524614e-05, | |
| "loss": 0.7375, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6327445293962563, | |
| "grad_norm": 28.375, | |
| "learning_rate": 4.8529067041725276e-05, | |
| "loss": 0.7169, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6327445293962563, | |
| "eval_accuracy": 0.8502639577667573, | |
| "eval_f1_ai": 0.866811861283297, | |
| "eval_f1_human": 0.7657316148597423, | |
| "eval_f1_macro": 0.8360143830281289, | |
| "eval_f1_micro": 0.8502639577667573, | |
| "eval_f1_mixed": 0.8754996729413475, | |
| "eval_f1_weighted": 0.847027475131161, | |
| "eval_loss": 0.39430809020996094, | |
| "eval_precision_ai": 0.7651582372079266, | |
| "eval_precision_human": 0.902591599642538, | |
| "eval_precision_mixed": 0.8750544820572425, | |
| "eval_precision_weighted": 0.8589967538180886, | |
| "eval_recall_ai": 0.999613601236476, | |
| "eval_recall_human": 0.6649111257406188, | |
| "eval_recall_mixed": 0.8759453170447935, | |
| "eval_recall_weighted": 0.8502639577667573, | |
| "eval_runtime": 214.0168, | |
| "eval_samples_per_second": 58.416, | |
| "eval_steps_per_second": 1.827, | |
| "step": 1200 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 9480, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.987622653354922e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |