LLM_Detector_Preview_model / trainer_state.json

Upload folder using huggingface_hub

0515984 verified 5 months ago

16.2 kB

	{
	"best_global_step": 1100,
	"best_metric": 0.8458904558156242,
	"best_model_checkpoint": "modernbert-ai-detector\\checkpoint-1000",
	"epoch": 0.6327445293962563,
	"eval_steps": 100,
	"global_step": 1200,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.026364355391510677,
	"grad_norm": 58.5,
	"learning_rate": 2.5843881856540085e-06,
	"loss": 2.6514,
	"step": 50
	},
	{
	"epoch": 0.052728710783021354,
	"grad_norm": 38.0,
	"learning_rate": 5.221518987341772e-06,
	"loss": 2.4826,
	"step": 100
	},
	{
	"epoch": 0.052728710783021354,
	"eval_accuracy": 0.3540233562629979,
	"eval_f1_ai": 0.1182551168079388,
	"eval_f1_human": 0.3140407288317256,
	"eval_f1_macro": 0.2998623455656316,
	"eval_f1_micro": 0.3540233562629979,
	"eval_f1_mixed": 0.46729119105723055,
	"eval_f1_weighted": 0.35779828876973646,
	"eval_loss": 1.1378378868103027,
	"eval_precision_ai": 0.12716763005780346,
	"eval_precision_human": 0.26479891549932216,
	"eval_precision_mixed": 0.5093530118414278,
	"eval_precision_weighted": 0.37081108952953074,
	"eval_recall_ai": 0.11051004636785162,
	"eval_recall_human": 0.3857801184990125,
	"eval_recall_mixed": 0.4316463059918557,
	"eval_recall_weighted": 0.3540233562629979,
	"eval_runtime": 201.6513,
	"eval_samples_per_second": 61.998,
	"eval_steps_per_second": 1.939,
	"step": 100
	},
	{
	"epoch": 0.07909306617453203,
	"grad_norm": 30.625,
	"learning_rate": 7.858649789029536e-06,
	"loss": 2.1667,
	"step": 150
	},
	{
	"epoch": 0.10545742156604271,
	"grad_norm": 30.25,
	"learning_rate": 1.04957805907173e-05,
	"loss": 1.9264,
	"step": 200
	},
	{
	"epoch": 0.10545742156604271,
	"eval_accuracy": 0.560550311950088,
	"eval_f1_ai": 0.2870420995079278,
	"eval_f1_human": 0.08250526949713942,
	"eval_f1_macro": 0.35789350580335344,
	"eval_f1_micro": 0.560550311950088,
	"eval_f1_mixed": 0.7041331484049931,
	"eval_f1_weighted": 0.46673616146948954,
	"eval_loss": 0.9078124165534973,
	"eval_precision_ai": 0.49065420560747663,
	"eval_precision_human": 0.4840989399293286,
	"eval_precision_mixed": 0.5691990313032559,
	"eval_precision_weighted": 0.5322602945815579,
	"eval_recall_ai": 0.20285935085007728,
	"eval_recall_human": 0.04509545753785385,
	"eval_recall_mixed": 0.9229203025014543,
	"eval_recall_weighted": 0.560550311950088,
	"eval_runtime": 211.1463,
	"eval_samples_per_second": 59.21,
	"eval_steps_per_second": 1.852,
	"step": 200
	},
	{
	"epoch": 0.1318217769575534,
	"grad_norm": 24.875,
	"learning_rate": 1.3132911392405065e-05,
	"loss": 1.768,
	"step": 250
	},
	{
	"epoch": 0.15818613234906406,
	"grad_norm": 49.75,
	"learning_rate": 1.5770042194092827e-05,
	"loss": 1.6275,
	"step": 300
	},
	{
	"epoch": 0.15818613234906406,
	"eval_accuracy": 0.6739721644536875,
	"eval_f1_ai": 0.75,
	"eval_f1_human": 0.20677177565262342,
	"eval_f1_macro": 0.5733127667382775,
	"eval_f1_micro": 0.6739721644536875,
	"eval_f1_mixed": 0.763166524562209,
	"eval_f1_weighted": 0.625236416359176,
	"eval_loss": 0.7546337842941284,
	"eval_precision_ai": 0.6690909090909091,
	"eval_precision_human": 0.4813477737665463,
	"eval_precision_mixed": 0.6950185163062955,
	"eval_precision_weighted": 0.6377290935492025,
	"eval_recall_ai": 0.8531684698608965,
	"eval_recall_human": 0.1316655694535879,
	"eval_recall_mixed": 0.8461314717859221,
	"eval_recall_weighted": 0.6739721644536875,
	"eval_runtime": 215.6745,
	"eval_samples_per_second": 57.967,
	"eval_steps_per_second": 1.813,
	"step": 300
	},
	{
	"epoch": 0.18455048774057475,
	"grad_norm": 14.25,
	"learning_rate": 1.8407172995780592e-05,
	"loss": 1.5471,
	"step": 350
	},
	{
	"epoch": 0.21091484313208542,
	"grad_norm": 21.875,
	"learning_rate": 2.1044303797468356e-05,
	"loss": 1.4071,
	"step": 400
	},
	{
	"epoch": 0.21091484313208542,
	"eval_accuracy": 0.6927691569348904,
	"eval_f1_ai": 0.7882645141260478,
	"eval_f1_human": 0.3710217755443886,
	"eval_f1_macro": 0.639632496479439,
	"eval_f1_micro": 0.6927691569348904,
	"eval_f1_mixed": 0.7596111997678805,
	"eval_f1_weighted": 0.6711149677064479,
	"eval_loss": 0.6714780926704407,
	"eval_precision_ai": 0.6587960560456668,
	"eval_precision_human": 0.5097813578826237,
	"eval_precision_mixed": 0.7577424023154848,
	"eval_precision_weighted": 0.6770050165265453,
	"eval_recall_ai": 0.9810664605873262,
	"eval_recall_human": 0.2916392363396972,
	"eval_recall_mixed": 0.7614892379290285,
	"eval_recall_weighted": 0.6927691569348904,
	"eval_runtime": 198.7063,
	"eval_samples_per_second": 62.917,
	"eval_steps_per_second": 1.968,
	"step": 400
	},
	{
	"epoch": 0.2372791985235961,
	"grad_norm": 77.0,
	"learning_rate": 2.3681434599156117e-05,
	"loss": 1.3628,
	"step": 450
	},
	{
	"epoch": 0.2636435539151068,
	"grad_norm": 30.5,
	"learning_rate": 2.6318565400843882e-05,
	"loss": 1.2541,
	"step": 500
	},
	{
	"epoch": 0.2636435539151068,
	"eval_accuracy": 0.7350023996160614,
	"eval_f1_ai": 0.8567004988818167,
	"eval_f1_human": 0.30513595166163143,
	"eval_f1_macro": 0.654182029936718,
	"eval_f1_micro": 0.7350023996160614,
	"eval_f1_mixed": 0.8007096392667061,
	"eval_f1_weighted": 0.6918751713207526,
	"eval_loss": 0.588551938533783,
	"eval_precision_ai": 0.772093023255814,
	"eval_precision_human": 0.6488222698072805,
	"eval_precision_mixed": 0.7303128371089536,
	"eval_precision_weighted": 0.7191593239339089,
	"eval_recall_ai": 0.9621329211746522,
	"eval_recall_human": 0.19947333772218565,
	"eval_recall_mixed": 0.8861256544502618,
	"eval_recall_weighted": 0.7350023996160614,
	"eval_runtime": 200.2569,
	"eval_samples_per_second": 62.43,
	"eval_steps_per_second": 1.952,
	"step": 500
	},
	{
	"epoch": 0.29000790930661746,
	"grad_norm": 57.25,
	"learning_rate": 2.8955696202531646e-05,
	"loss": 1.2181,
	"step": 550
	},
	{
	"epoch": 0.3163722646981281,
	"grad_norm": 46.5,
	"learning_rate": 3.159282700421941e-05,
	"loss": 1.305,
	"step": 600
	},
	{
	"epoch": 0.3163722646981281,
	"eval_accuracy": 0.7118860982242842,
	"eval_f1_ai": 0.863129435620142,
	"eval_f1_human": 0.563509072724675,
	"eval_f1_macro": 0.7167971002003201,
	"eval_f1_micro": 0.7118860982242842,
	"eval_f1_mixed": 0.723752792256143,
	"eval_f1_weighted": 0.7136653128999944,
	"eval_loss": 0.5719289779663086,
	"eval_precision_ai": 0.766966966966967,
	"eval_precision_human": 0.4978540772532189,
	"eval_precision_mixed": 0.8393782383419689,
	"eval_precision_weighted": 0.7413978534670587,
	"eval_recall_ai": 0.9868624420401855,
	"eval_recall_human": 0.6491112574061882,
	"eval_recall_mixed": 0.6361256544502618,
	"eval_recall_weighted": 0.7118860982242842,
	"eval_runtime": 201.8683,
	"eval_samples_per_second": 61.931,
	"eval_steps_per_second": 1.937,
	"step": 600
	},
	{
	"epoch": 0.3427366200896388,
	"grad_norm": 40.25,
	"learning_rate": 3.422995780590718e-05,
	"loss": 1.1546,
	"step": 650
	},
	{
	"epoch": 0.3691009754811495,
	"grad_norm": 19.375,
	"learning_rate": 3.686708860759494e-05,
	"loss": 1.1359,
	"step": 700
	},
	{
	"epoch": 0.3691009754811495,
	"eval_accuracy": 0.7633978563429851,
	"eval_f1_ai": 0.9003927168868261,
	"eval_f1_human": 0.6369589977220956,
	"eval_f1_macro": 0.7701658728773962,
	"eval_f1_micro": 0.7633978563429851,
	"eval_f1_mixed": 0.7731459040232671,
	"eval_f1_weighted": 0.7663932988679265,
	"eval_loss": 0.5114254355430603,
	"eval_precision_ai": 0.8367617783676178,
	"eval_precision_human": 0.5612142498745609,
	"eval_precision_mixed": 0.8696837513631407,
	"eval_precision_weighted": 0.7879102421938303,
	"eval_recall_ai": 0.9744976816074189,
	"eval_recall_human": 0.7363396971691902,
	"eval_recall_mixed": 0.6958987783595113,
	"eval_recall_weighted": 0.7633978563429851,
	"eval_runtime": 212.8928,
	"eval_samples_per_second": 58.724,
	"eval_steps_per_second": 1.837,
	"step": 700
	},
	{
	"epoch": 0.39546533087266017,
	"grad_norm": 23.125,
	"learning_rate": 3.95042194092827e-05,
	"loss": 1.0872,
	"step": 750
	},
	{
	"epoch": 0.42182968626417083,
	"grad_norm": 157.0,
	"learning_rate": 4.214135021097047e-05,
	"loss": 0.9872,
	"step": 800
	},
	{
	"epoch": 0.42182968626417083,
	"eval_accuracy": 0.8017917133258678,
	"eval_f1_ai": 0.9022680412371133,
	"eval_f1_human": 0.6192686096157416,
	"eval_f1_macro": 0.7856376091542879,
	"eval_f1_micro": 0.8017917133258678,
	"eval_f1_mixed": 0.8353761766100088,
	"eval_f1_weighted": 0.7967088719488636,
	"eval_loss": 0.4510592222213745,
	"eval_precision_ai": 0.9672855879752431,
	"eval_precision_human": 0.7100893997445722,
	"eval_precision_mixed": 0.7816499809910024,
	"eval_precision_weighted": 0.8026885272274894,
	"eval_recall_ai": 0.8454404945904173,
	"eval_recall_human": 0.5490454246214614,
	"eval_recall_mixed": 0.8970331588132635,
	"eval_recall_weighted": 0.8017917133258678,
	"eval_runtime": 206.8504,
	"eval_samples_per_second": 60.44,
	"eval_steps_per_second": 1.89,
	"step": 800
	},
	{
	"epoch": 0.4481940416556815,
	"grad_norm": 46.0,
	"learning_rate": 4.477848101265823e-05,
	"loss": 0.9952,
	"step": 850
	},
	{
	"epoch": 0.4745583970471922,
	"grad_norm": 23.375,
	"learning_rate": 4.7415611814346e-05,
	"loss": 0.8916,
	"step": 900
	},
	{
	"epoch": 0.4745583970471922,
	"eval_accuracy": 0.826427771556551,
	"eval_f1_ai": 0.9497544389875331,
	"eval_f1_human": 0.712417340191036,
	"eval_f1_macro": 0.8327089451194482,
	"eval_f1_micro": 0.826427771556551,
	"eval_f1_mixed": 0.8359550561797753,
	"eval_f1_weighted": 0.8294925079101134,
	"eval_loss": 0.39986124634742737,
	"eval_precision_ai": 0.9290465631929047,
	"eval_precision_human": 0.6434828776214494,
	"eval_precision_mixed": 0.8946757339525626,
	"eval_precision_weighted": 0.8407505866593362,
	"eval_recall_ai": 0.9714064914992272,
	"eval_recall_human": 0.7978933508887426,
	"eval_recall_mixed": 0.7844677137870855,
	"eval_recall_weighted": 0.826427771556551,
	"eval_runtime": 209.7976,
	"eval_samples_per_second": 59.591,
	"eval_steps_per_second": 1.864,
	"step": 900
	},
	{
	"epoch": 0.5009227524387029,
	"grad_norm": 68.5,
	"learning_rate": 4.9994139709329584e-05,
	"loss": 0.899,
	"step": 950
	},
	{
	"epoch": 0.5272871078302136,
	"grad_norm": 24.5,
	"learning_rate": 4.970112517580872e-05,
	"loss": 0.8748,
	"step": 1000
	},
	{
	"epoch": 0.5272871078302136,
	"eval_accuracy": 0.8299472084466485,
	"eval_f1_ai": 0.943791329904482,
	"eval_f1_human": 0.727299016772701,
	"eval_f1_macro": 0.8360557403613388,
	"eval_f1_micro": 0.8299472084466485,
	"eval_f1_mixed": 0.8370768744068333,
	"eval_f1_weighted": 0.8324913584362222,
	"eval_loss": 0.3658604323863983,
	"eval_precision_ai": 0.8995098039215687,
	"eval_precision_human": 0.64853017019082,
	"eval_precision_mixed": 0.9174757281553398,
	"eval_precision_weighted": 0.8484025864969482,
	"eval_recall_ai": 0.9926584234930448,
	"eval_recall_human": 0.8278472679394339,
	"eval_recall_mixed": 0.7696335078534031,
	"eval_recall_weighted": 0.8299472084466485,
	"eval_runtime": 204.603,
	"eval_samples_per_second": 61.104,
	"eval_steps_per_second": 1.911,
	"step": 1000
	},
	{
	"epoch": 0.5536514632217242,
	"grad_norm": 51.75,
	"learning_rate": 4.940811064228786e-05,
	"loss": 0.7453,
	"step": 1050
	},
	{
	"epoch": 0.5800158186132349,
	"grad_norm": 34.5,
	"learning_rate": 4.9115096108766995e-05,
	"loss": 0.8091,
	"step": 1100
	},
	{
	"epoch": 0.5800158186132349,
	"eval_accuracy": 0.8637817949128139,
	"eval_f1_ai": 0.9449456821948076,
	"eval_f1_human": 0.7056443455391462,
	"eval_f1_macro": 0.8458904558156242,
	"eval_f1_micro": 0.8637817949128139,
	"eval_f1_mixed": 0.8870813397129187,
	"eval_f1_weighted": 0.8549702638885073,
	"eval_loss": 0.32950422167778015,
	"eval_precision_ai": 0.9025677101653183,
	"eval_precision_human": 0.9154855643044619,
	"eval_precision_mixed": 0.8368583956667527,
	"eval_precision_weighted": 0.8695671658030227,
	"eval_recall_ai": 0.991499227202473,
	"eval_recall_human": 0.5740618828176431,
	"eval_recall_mixed": 0.943717277486911,
	"eval_recall_weighted": 0.8637817949128139,
	"eval_runtime": 206.2487,
	"eval_samples_per_second": 60.616,
	"eval_steps_per_second": 1.896,
	"step": 1100
	},
	{
	"epoch": 0.6063801740047455,
	"grad_norm": 80.0,
	"learning_rate": 4.882208157524614e-05,
	"loss": 0.7375,
	"step": 1150
	},
	{
	"epoch": 0.6327445293962563,
	"grad_norm": 28.375,
	"learning_rate": 4.8529067041725276e-05,
	"loss": 0.7169,
	"step": 1200
	},
	{
	"epoch": 0.6327445293962563,
	"eval_accuracy": 0.8502639577667573,
	"eval_f1_ai": 0.866811861283297,
	"eval_f1_human": 0.7657316148597423,
	"eval_f1_macro": 0.8360143830281289,
	"eval_f1_micro": 0.8502639577667573,
	"eval_f1_mixed": 0.8754996729413475,
	"eval_f1_weighted": 0.847027475131161,
	"eval_loss": 0.39430809020996094,
	"eval_precision_ai": 0.7651582372079266,
	"eval_precision_human": 0.902591599642538,
	"eval_precision_mixed": 0.8750544820572425,
	"eval_precision_weighted": 0.8589967538180886,
	"eval_recall_ai": 0.999613601236476,
	"eval_recall_human": 0.6649111257406188,
	"eval_recall_mixed": 0.8759453170447935,
	"eval_recall_weighted": 0.8502639577667573,
	"eval_runtime": 214.0168,
	"eval_samples_per_second": 58.416,
	"eval_steps_per_second": 1.827,
	"step": 1200
	}
	],
	"logging_steps": 50,
	"max_steps": 9480,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 200,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 3,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 1
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.987622653354922e+16,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}