second_finetune_2560_3000multi / checkpoint-405 /trainer_state.json

Upload LoRA adapter folder

81998cb verified about 2 months ago

4.22 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 405,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.04938271604938271,
	"grad_norm": 0.24764642119407654,
	"learning_rate": 1.9984268150178167e-06,
	"loss": 0.4454,
	"step": 20
	},
	{
	"epoch": 0.09876543209876543,
	"grad_norm": 0.16292472183704376,
	"learning_rate": 1.976679926364567e-06,
	"loss": 0.4486,
	"step": 40
	},
	{
	"epoch": 0.14814814814814814,
	"grad_norm": 0.23981738090515137,
	"learning_rate": 1.9298944480495172e-06,
	"loss": 0.4176,
	"step": 60
	},
	{
	"epoch": 0.19753086419753085,
	"grad_norm": 0.25884947180747986,
	"learning_rate": 1.8592697928608701e-06,
	"loss": 0.4354,
	"step": 80
	},
	{
	"epoch": 0.24691358024691357,
	"grad_norm": 0.24895012378692627,
	"learning_rate": 1.7666165250516003e-06,
	"loss": 0.4689,
	"step": 100
	},
	{
	"epoch": 0.2962962962962963,
	"grad_norm": 0.2755189836025238,
	"learning_rate": 1.6543099439299843e-06,
	"loss": 0.4721,
	"step": 120
	},
	{
	"epoch": 0.345679012345679,
	"grad_norm": 0.24769575893878937,
	"learning_rate": 1.5252291896576213e-06,
	"loss": 0.4476,
	"step": 140
	},
	{
	"epoch": 0.3950617283950617,
	"grad_norm": 0.2432471662759781,
	"learning_rate": 1.3826834323650898e-06,
	"loss": 0.4631,
	"step": 160
	},
	{
	"epoch": 0.4444444444444444,
	"grad_norm": 0.2844541072845459,
	"learning_rate": 1.2303270368330437e-06,
	"loss": 0.4249,
	"step": 180
	},
	{
	"epoch": 0.49382716049382713,
	"grad_norm": 0.30305707454681396,
	"learning_rate": 1.0720658776137296e-06,
	"loss": 0.4358,
	"step": 200
	},
	{
	"epoch": 0.5432098765432098,
	"grad_norm": 0.2243204116821289,
	"learning_rate": 9.119572063390548e-07,
	"loss": 0.4057,
	"step": 220
	},
	{
	"epoch": 0.5925925925925926,
	"grad_norm": 0.2798885405063629,
	"learning_rate": 7.541056382602656e-07,
	"loss": 0.46,
	"step": 240
	},
	{
	"epoch": 0.6419753086419753,
	"grad_norm": 0.2705991566181183,
	"learning_rate": 6.025579245533673e-07,
	"loss": 0.4226,
	"step": 260
	},
	{
	"epoch": 0.691358024691358,
	"grad_norm": 0.30190983414649963,
	"learning_rate": 4.611992080530396e-07,
	"loss": 0.4285,
	"step": 280
	},
	{
	"epoch": 0.7407407407407407,
	"grad_norm": 0.3848233222961426,
	"learning_rate": 3.3365342204799606e-07,
	"loss": 0.4546,
	"step": 300
	},
	{
	"epoch": 0.7901234567901234,
	"grad_norm": 0.3378937244415283,
	"learning_rate": 2.231903855574101e-07,
	"loss": 0.4314,
	"step": 320
	},
	{
	"epoch": 0.8395061728395061,
	"grad_norm": 0.3310483396053314,
	"learning_rate": 1.3264197683414912e-07,
	"loss": 0.4168,
	"step": 340
	},
	{
	"epoch": 0.8888888888888888,
	"grad_norm": 0.3315976560115814,
	"learning_rate": 6.432953410719677e-08,
	"loss": 0.4389,
	"step": 360
	},
	{
	"epoch": 0.9382716049382716,
	"grad_norm": 0.29953354597091675,
	"learning_rate": 2.0004344749277036e-08,
	"loss": 0.4036,
	"step": 380
	},
	{
	"epoch": 0.9876543209876543,
	"grad_norm": 0.32311904430389404,
	"learning_rate": 8.027485151603786e-10,
	"loss": 0.4419,
	"step": 400
	}
	],
	"logging_steps": 20,
	"max_steps": 405,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 135,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 8.753047941650842e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}