Code-mint's picture
Upload folder using huggingface_hub
29f4816 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 250,
"global_step": 442,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 0.7410682106018066,
"epoch": 0.22624434389140272,
"grad_norm": 0.013439004309475422,
"learning_rate": 9.379474940334129e-05,
"loss": 0.1648,
"mean_token_accuracy": 0.962469134926796,
"num_tokens": 2038792.0,
"step": 50
},
{
"entropy": 0.7714774322509765,
"epoch": 0.45248868778280543,
"grad_norm": 0.017996039241552353,
"learning_rate": 8.186157517899762e-05,
"loss": 0.1352,
"mean_token_accuracy": 0.9685415130853653,
"num_tokens": 4075337.0,
"step": 100
},
{
"entropy": 0.754976252913475,
"epoch": 0.6787330316742082,
"grad_norm": 0.022379843518137932,
"learning_rate": 6.992840095465394e-05,
"loss": 0.1421,
"mean_token_accuracy": 0.9669514399766922,
"num_tokens": 6095739.0,
"step": 150
},
{
"entropy": 0.7515207546949386,
"epoch": 0.9049773755656109,
"grad_norm": 0.023339206352829933,
"learning_rate": 5.799522673031027e-05,
"loss": 0.1411,
"mean_token_accuracy": 0.9666793030500412,
"num_tokens": 8131188.0,
"step": 200
},
{
"entropy": 0.7280597138404846,
"epoch": 1.1312217194570136,
"grad_norm": 0.02840598113834858,
"learning_rate": 4.606205250596659e-05,
"loss": 0.1243,
"mean_token_accuracy": 0.9706432431936264,
"num_tokens": 10154395.0,
"step": 250
},
{
"epoch": 1.1312217194570136,
"eval_entropy": 0.6708883282703322,
"eval_loss": 0.12318716198205948,
"eval_mean_token_accuracy": 0.9712758501236504,
"eval_num_tokens": 10154395.0,
"eval_runtime": 910.6015,
"eval_samples_per_second": 10.385,
"eval_steps_per_second": 0.325,
"step": 250
},
{
"entropy": 0.6998866009712219,
"epoch": 1.3574660633484164,
"grad_norm": 0.0312146358191967,
"learning_rate": 3.4128878281622915e-05,
"loss": 0.125,
"mean_token_accuracy": 0.9700534969568253,
"num_tokens": 12220553.0,
"step": 300
},
{
"entropy": 0.6968072062730789,
"epoch": 1.5837104072398192,
"grad_norm": 0.040067460387945175,
"learning_rate": 2.2195704057279237e-05,
"loss": 0.1202,
"mean_token_accuracy": 0.9711974889039994,
"num_tokens": 14229662.0,
"step": 350
},
{
"entropy": 0.6959839969873428,
"epoch": 1.8099547511312217,
"grad_norm": 0.04520531743764877,
"learning_rate": 1.026252983293556e-05,
"loss": 0.1171,
"mean_token_accuracy": 0.9716462349891662,
"num_tokens": 16259627.0,
"step": 400
}
],
"logging_steps": 50,
"max_steps": 442,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2730396735530598e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}