phi-tiny-moe-lean-sft / train_results.json
rkumar1999's picture
Model save
d7b5923 verified
{
"entropy": 0.3062901334329085,
"epoch": 2.0,
"mean_token_accuracy": 0.9058145718141035,
"num_tokens": 63165740.0,
"total_flos": 1.38382400484488e+18,
"train_loss": 0.41364410038917293,
"train_runtime": 6390.138,
"train_samples": 31076,
"train_samples_per_second": 9.726,
"train_steps_per_second": 0.029
}