LLM_Detector_Preview_model / trainer_state.json
Donnyed's picture
Upload folder using huggingface_hub
0515984 verified
{
"best_global_step": 1100,
"best_metric": 0.8458904558156242,
"best_model_checkpoint": "modernbert-ai-detector\\checkpoint-1000",
"epoch": 0.6327445293962563,
"eval_steps": 100,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.026364355391510677,
"grad_norm": 58.5,
"learning_rate": 2.5843881856540085e-06,
"loss": 2.6514,
"step": 50
},
{
"epoch": 0.052728710783021354,
"grad_norm": 38.0,
"learning_rate": 5.221518987341772e-06,
"loss": 2.4826,
"step": 100
},
{
"epoch": 0.052728710783021354,
"eval_accuracy": 0.3540233562629979,
"eval_f1_ai": 0.1182551168079388,
"eval_f1_human": 0.3140407288317256,
"eval_f1_macro": 0.2998623455656316,
"eval_f1_micro": 0.3540233562629979,
"eval_f1_mixed": 0.46729119105723055,
"eval_f1_weighted": 0.35779828876973646,
"eval_loss": 1.1378378868103027,
"eval_precision_ai": 0.12716763005780346,
"eval_precision_human": 0.26479891549932216,
"eval_precision_mixed": 0.5093530118414278,
"eval_precision_weighted": 0.37081108952953074,
"eval_recall_ai": 0.11051004636785162,
"eval_recall_human": 0.3857801184990125,
"eval_recall_mixed": 0.4316463059918557,
"eval_recall_weighted": 0.3540233562629979,
"eval_runtime": 201.6513,
"eval_samples_per_second": 61.998,
"eval_steps_per_second": 1.939,
"step": 100
},
{
"epoch": 0.07909306617453203,
"grad_norm": 30.625,
"learning_rate": 7.858649789029536e-06,
"loss": 2.1667,
"step": 150
},
{
"epoch": 0.10545742156604271,
"grad_norm": 30.25,
"learning_rate": 1.04957805907173e-05,
"loss": 1.9264,
"step": 200
},
{
"epoch": 0.10545742156604271,
"eval_accuracy": 0.560550311950088,
"eval_f1_ai": 0.2870420995079278,
"eval_f1_human": 0.08250526949713942,
"eval_f1_macro": 0.35789350580335344,
"eval_f1_micro": 0.560550311950088,
"eval_f1_mixed": 0.7041331484049931,
"eval_f1_weighted": 0.46673616146948954,
"eval_loss": 0.9078124165534973,
"eval_precision_ai": 0.49065420560747663,
"eval_precision_human": 0.4840989399293286,
"eval_precision_mixed": 0.5691990313032559,
"eval_precision_weighted": 0.5322602945815579,
"eval_recall_ai": 0.20285935085007728,
"eval_recall_human": 0.04509545753785385,
"eval_recall_mixed": 0.9229203025014543,
"eval_recall_weighted": 0.560550311950088,
"eval_runtime": 211.1463,
"eval_samples_per_second": 59.21,
"eval_steps_per_second": 1.852,
"step": 200
},
{
"epoch": 0.1318217769575534,
"grad_norm": 24.875,
"learning_rate": 1.3132911392405065e-05,
"loss": 1.768,
"step": 250
},
{
"epoch": 0.15818613234906406,
"grad_norm": 49.75,
"learning_rate": 1.5770042194092827e-05,
"loss": 1.6275,
"step": 300
},
{
"epoch": 0.15818613234906406,
"eval_accuracy": 0.6739721644536875,
"eval_f1_ai": 0.75,
"eval_f1_human": 0.20677177565262342,
"eval_f1_macro": 0.5733127667382775,
"eval_f1_micro": 0.6739721644536875,
"eval_f1_mixed": 0.763166524562209,
"eval_f1_weighted": 0.625236416359176,
"eval_loss": 0.7546337842941284,
"eval_precision_ai": 0.6690909090909091,
"eval_precision_human": 0.4813477737665463,
"eval_precision_mixed": 0.6950185163062955,
"eval_precision_weighted": 0.6377290935492025,
"eval_recall_ai": 0.8531684698608965,
"eval_recall_human": 0.1316655694535879,
"eval_recall_mixed": 0.8461314717859221,
"eval_recall_weighted": 0.6739721644536875,
"eval_runtime": 215.6745,
"eval_samples_per_second": 57.967,
"eval_steps_per_second": 1.813,
"step": 300
},
{
"epoch": 0.18455048774057475,
"grad_norm": 14.25,
"learning_rate": 1.8407172995780592e-05,
"loss": 1.5471,
"step": 350
},
{
"epoch": 0.21091484313208542,
"grad_norm": 21.875,
"learning_rate": 2.1044303797468356e-05,
"loss": 1.4071,
"step": 400
},
{
"epoch": 0.21091484313208542,
"eval_accuracy": 0.6927691569348904,
"eval_f1_ai": 0.7882645141260478,
"eval_f1_human": 0.3710217755443886,
"eval_f1_macro": 0.639632496479439,
"eval_f1_micro": 0.6927691569348904,
"eval_f1_mixed": 0.7596111997678805,
"eval_f1_weighted": 0.6711149677064479,
"eval_loss": 0.6714780926704407,
"eval_precision_ai": 0.6587960560456668,
"eval_precision_human": 0.5097813578826237,
"eval_precision_mixed": 0.7577424023154848,
"eval_precision_weighted": 0.6770050165265453,
"eval_recall_ai": 0.9810664605873262,
"eval_recall_human": 0.2916392363396972,
"eval_recall_mixed": 0.7614892379290285,
"eval_recall_weighted": 0.6927691569348904,
"eval_runtime": 198.7063,
"eval_samples_per_second": 62.917,
"eval_steps_per_second": 1.968,
"step": 400
},
{
"epoch": 0.2372791985235961,
"grad_norm": 77.0,
"learning_rate": 2.3681434599156117e-05,
"loss": 1.3628,
"step": 450
},
{
"epoch": 0.2636435539151068,
"grad_norm": 30.5,
"learning_rate": 2.6318565400843882e-05,
"loss": 1.2541,
"step": 500
},
{
"epoch": 0.2636435539151068,
"eval_accuracy": 0.7350023996160614,
"eval_f1_ai": 0.8567004988818167,
"eval_f1_human": 0.30513595166163143,
"eval_f1_macro": 0.654182029936718,
"eval_f1_micro": 0.7350023996160614,
"eval_f1_mixed": 0.8007096392667061,
"eval_f1_weighted": 0.6918751713207526,
"eval_loss": 0.588551938533783,
"eval_precision_ai": 0.772093023255814,
"eval_precision_human": 0.6488222698072805,
"eval_precision_mixed": 0.7303128371089536,
"eval_precision_weighted": 0.7191593239339089,
"eval_recall_ai": 0.9621329211746522,
"eval_recall_human": 0.19947333772218565,
"eval_recall_mixed": 0.8861256544502618,
"eval_recall_weighted": 0.7350023996160614,
"eval_runtime": 200.2569,
"eval_samples_per_second": 62.43,
"eval_steps_per_second": 1.952,
"step": 500
},
{
"epoch": 0.29000790930661746,
"grad_norm": 57.25,
"learning_rate": 2.8955696202531646e-05,
"loss": 1.2181,
"step": 550
},
{
"epoch": 0.3163722646981281,
"grad_norm": 46.5,
"learning_rate": 3.159282700421941e-05,
"loss": 1.305,
"step": 600
},
{
"epoch": 0.3163722646981281,
"eval_accuracy": 0.7118860982242842,
"eval_f1_ai": 0.863129435620142,
"eval_f1_human": 0.563509072724675,
"eval_f1_macro": 0.7167971002003201,
"eval_f1_micro": 0.7118860982242842,
"eval_f1_mixed": 0.723752792256143,
"eval_f1_weighted": 0.7136653128999944,
"eval_loss": 0.5719289779663086,
"eval_precision_ai": 0.766966966966967,
"eval_precision_human": 0.4978540772532189,
"eval_precision_mixed": 0.8393782383419689,
"eval_precision_weighted": 0.7413978534670587,
"eval_recall_ai": 0.9868624420401855,
"eval_recall_human": 0.6491112574061882,
"eval_recall_mixed": 0.6361256544502618,
"eval_recall_weighted": 0.7118860982242842,
"eval_runtime": 201.8683,
"eval_samples_per_second": 61.931,
"eval_steps_per_second": 1.937,
"step": 600
},
{
"epoch": 0.3427366200896388,
"grad_norm": 40.25,
"learning_rate": 3.422995780590718e-05,
"loss": 1.1546,
"step": 650
},
{
"epoch": 0.3691009754811495,
"grad_norm": 19.375,
"learning_rate": 3.686708860759494e-05,
"loss": 1.1359,
"step": 700
},
{
"epoch": 0.3691009754811495,
"eval_accuracy": 0.7633978563429851,
"eval_f1_ai": 0.9003927168868261,
"eval_f1_human": 0.6369589977220956,
"eval_f1_macro": 0.7701658728773962,
"eval_f1_micro": 0.7633978563429851,
"eval_f1_mixed": 0.7731459040232671,
"eval_f1_weighted": 0.7663932988679265,
"eval_loss": 0.5114254355430603,
"eval_precision_ai": 0.8367617783676178,
"eval_precision_human": 0.5612142498745609,
"eval_precision_mixed": 0.8696837513631407,
"eval_precision_weighted": 0.7879102421938303,
"eval_recall_ai": 0.9744976816074189,
"eval_recall_human": 0.7363396971691902,
"eval_recall_mixed": 0.6958987783595113,
"eval_recall_weighted": 0.7633978563429851,
"eval_runtime": 212.8928,
"eval_samples_per_second": 58.724,
"eval_steps_per_second": 1.837,
"step": 700
},
{
"epoch": 0.39546533087266017,
"grad_norm": 23.125,
"learning_rate": 3.95042194092827e-05,
"loss": 1.0872,
"step": 750
},
{
"epoch": 0.42182968626417083,
"grad_norm": 157.0,
"learning_rate": 4.214135021097047e-05,
"loss": 0.9872,
"step": 800
},
{
"epoch": 0.42182968626417083,
"eval_accuracy": 0.8017917133258678,
"eval_f1_ai": 0.9022680412371133,
"eval_f1_human": 0.6192686096157416,
"eval_f1_macro": 0.7856376091542879,
"eval_f1_micro": 0.8017917133258678,
"eval_f1_mixed": 0.8353761766100088,
"eval_f1_weighted": 0.7967088719488636,
"eval_loss": 0.4510592222213745,
"eval_precision_ai": 0.9672855879752431,
"eval_precision_human": 0.7100893997445722,
"eval_precision_mixed": 0.7816499809910024,
"eval_precision_weighted": 0.8026885272274894,
"eval_recall_ai": 0.8454404945904173,
"eval_recall_human": 0.5490454246214614,
"eval_recall_mixed": 0.8970331588132635,
"eval_recall_weighted": 0.8017917133258678,
"eval_runtime": 206.8504,
"eval_samples_per_second": 60.44,
"eval_steps_per_second": 1.89,
"step": 800
},
{
"epoch": 0.4481940416556815,
"grad_norm": 46.0,
"learning_rate": 4.477848101265823e-05,
"loss": 0.9952,
"step": 850
},
{
"epoch": 0.4745583970471922,
"grad_norm": 23.375,
"learning_rate": 4.7415611814346e-05,
"loss": 0.8916,
"step": 900
},
{
"epoch": 0.4745583970471922,
"eval_accuracy": 0.826427771556551,
"eval_f1_ai": 0.9497544389875331,
"eval_f1_human": 0.712417340191036,
"eval_f1_macro": 0.8327089451194482,
"eval_f1_micro": 0.826427771556551,
"eval_f1_mixed": 0.8359550561797753,
"eval_f1_weighted": 0.8294925079101134,
"eval_loss": 0.39986124634742737,
"eval_precision_ai": 0.9290465631929047,
"eval_precision_human": 0.6434828776214494,
"eval_precision_mixed": 0.8946757339525626,
"eval_precision_weighted": 0.8407505866593362,
"eval_recall_ai": 0.9714064914992272,
"eval_recall_human": 0.7978933508887426,
"eval_recall_mixed": 0.7844677137870855,
"eval_recall_weighted": 0.826427771556551,
"eval_runtime": 209.7976,
"eval_samples_per_second": 59.591,
"eval_steps_per_second": 1.864,
"step": 900
},
{
"epoch": 0.5009227524387029,
"grad_norm": 68.5,
"learning_rate": 4.9994139709329584e-05,
"loss": 0.899,
"step": 950
},
{
"epoch": 0.5272871078302136,
"grad_norm": 24.5,
"learning_rate": 4.970112517580872e-05,
"loss": 0.8748,
"step": 1000
},
{
"epoch": 0.5272871078302136,
"eval_accuracy": 0.8299472084466485,
"eval_f1_ai": 0.943791329904482,
"eval_f1_human": 0.727299016772701,
"eval_f1_macro": 0.8360557403613388,
"eval_f1_micro": 0.8299472084466485,
"eval_f1_mixed": 0.8370768744068333,
"eval_f1_weighted": 0.8324913584362222,
"eval_loss": 0.3658604323863983,
"eval_precision_ai": 0.8995098039215687,
"eval_precision_human": 0.64853017019082,
"eval_precision_mixed": 0.9174757281553398,
"eval_precision_weighted": 0.8484025864969482,
"eval_recall_ai": 0.9926584234930448,
"eval_recall_human": 0.8278472679394339,
"eval_recall_mixed": 0.7696335078534031,
"eval_recall_weighted": 0.8299472084466485,
"eval_runtime": 204.603,
"eval_samples_per_second": 61.104,
"eval_steps_per_second": 1.911,
"step": 1000
},
{
"epoch": 0.5536514632217242,
"grad_norm": 51.75,
"learning_rate": 4.940811064228786e-05,
"loss": 0.7453,
"step": 1050
},
{
"epoch": 0.5800158186132349,
"grad_norm": 34.5,
"learning_rate": 4.9115096108766995e-05,
"loss": 0.8091,
"step": 1100
},
{
"epoch": 0.5800158186132349,
"eval_accuracy": 0.8637817949128139,
"eval_f1_ai": 0.9449456821948076,
"eval_f1_human": 0.7056443455391462,
"eval_f1_macro": 0.8458904558156242,
"eval_f1_micro": 0.8637817949128139,
"eval_f1_mixed": 0.8870813397129187,
"eval_f1_weighted": 0.8549702638885073,
"eval_loss": 0.32950422167778015,
"eval_precision_ai": 0.9025677101653183,
"eval_precision_human": 0.9154855643044619,
"eval_precision_mixed": 0.8368583956667527,
"eval_precision_weighted": 0.8695671658030227,
"eval_recall_ai": 0.991499227202473,
"eval_recall_human": 0.5740618828176431,
"eval_recall_mixed": 0.943717277486911,
"eval_recall_weighted": 0.8637817949128139,
"eval_runtime": 206.2487,
"eval_samples_per_second": 60.616,
"eval_steps_per_second": 1.896,
"step": 1100
},
{
"epoch": 0.6063801740047455,
"grad_norm": 80.0,
"learning_rate": 4.882208157524614e-05,
"loss": 0.7375,
"step": 1150
},
{
"epoch": 0.6327445293962563,
"grad_norm": 28.375,
"learning_rate": 4.8529067041725276e-05,
"loss": 0.7169,
"step": 1200
},
{
"epoch": 0.6327445293962563,
"eval_accuracy": 0.8502639577667573,
"eval_f1_ai": 0.866811861283297,
"eval_f1_human": 0.7657316148597423,
"eval_f1_macro": 0.8360143830281289,
"eval_f1_micro": 0.8502639577667573,
"eval_f1_mixed": 0.8754996729413475,
"eval_f1_weighted": 0.847027475131161,
"eval_loss": 0.39430809020996094,
"eval_precision_ai": 0.7651582372079266,
"eval_precision_human": 0.902591599642538,
"eval_precision_mixed": 0.8750544820572425,
"eval_precision_weighted": 0.8589967538180886,
"eval_recall_ai": 0.999613601236476,
"eval_recall_human": 0.6649111257406188,
"eval_recall_mixed": 0.8759453170447935,
"eval_recall_weighted": 0.8502639577667573,
"eval_runtime": 214.0168,
"eval_samples_per_second": 58.416,
"eval_steps_per_second": 1.827,
"step": 1200
}
],
"logging_steps": 50,
"max_steps": 9480,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.987622653354922e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}