stonesstones commited on
Commit
5d0b573
·
verified ·
1 Parent(s): 35633c5

End of training

Browse files
README.md CHANGED
@@ -32,8 +32,8 @@ More information needed
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 0.0002
35
- - train_batch_size: 128
36
- - eval_batch_size: 32
37
  - seed: 42
38
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.5,0.9) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
39
  - lr_scheduler_type: cosine_with_min_lr
 
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 0.0002
35
+ - train_batch_size: 512
36
+ - eval_batch_size: 128
37
  - seed: 42
38
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.5,0.9) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
39
  - lr_scheduler_type: cosine_with_min_lr
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.125,
3
- "train_loss": 1.3442257046699524,
4
- "train_runtime": 21.7074,
5
- "train_samples_per_second": 9.435,
6
- "train_steps_per_second": 0.092
7
  }
 
1
  {
2
+ "epoch": 0.1875,
3
+ "train_loss": 0.4492587248484294,
4
+ "train_runtime": 35.9777,
5
+ "train_samples_per_second": 5.692,
6
+ "train_steps_per_second": 0.056
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b58258e12f0e3503a9ac6d19f156a0ebb77b4e282f1d09e43335f3a5d7939966
3
  size 31338740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87294c600e79526b4c1b877575f7c306e09b0afa797db54ee935476a21b2d6ae
3
  size 31338740
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.125,
3
- "train_loss": 1.3442257046699524,
4
- "train_runtime": 21.7074,
5
- "train_samples_per_second": 9.435,
6
- "train_steps_per_second": 0.092
7
  }
 
1
  {
2
+ "epoch": 0.1875,
3
+ "train_loss": 0.4492587248484294,
4
+ "train_runtime": 35.9777,
5
+ "train_samples_per_second": 5.692,
6
+ "train_steps_per_second": 0.056
7
  }
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.125,
6
  "eval_steps": 1,
7
- "global_step": 2,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -24,13 +24,20 @@
24
  "step": 2
25
  },
26
  {
27
- "epoch": 0.125,
28
- "step": 2,
29
- "total_flos": 147645328785408.0,
30
- "train_loss": 1.3442257046699524,
31
- "train_runtime": 21.7074,
32
- "train_samples_per_second": 9.435,
33
- "train_steps_per_second": 0.092
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "logging_steps": 1,
@@ -50,7 +57,7 @@
50
  "attributes": {}
51
  }
52
  },
53
- "total_flos": 147645328785408.0,
54
  "train_batch_size": 128,
55
  "trial_name": null,
56
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.1875,
6
  "eval_steps": 1,
7
+ "global_step": 3,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
24
  "step": 2
25
  },
26
  {
27
+ "epoch": 0.1875,
28
+ "grad_norm": 6.143799781799316,
29
+ "learning_rate": 4.0000000000000003e-07,
30
+ "loss": 1.3478,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.1875,
35
+ "step": 3,
36
+ "total_flos": 221467993178112.0,
37
+ "train_loss": 0.4492587248484294,
38
+ "train_runtime": 35.9777,
39
+ "train_samples_per_second": 5.692,
40
+ "train_steps_per_second": 0.056
41
  }
42
  ],
43
  "logging_steps": 1,
 
57
  "attributes": {}
58
  }
59
  },
60
+ "total_flos": 221467993178112.0,
61
  "train_batch_size": 128,
62
  "trial_name": null,
63
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5625939faf506c5952d731e03342453532cfdf8fa83e8ecc6feb4dba0dc45a5d
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8faac91036e8ec73700214fe8baff21d52da005b8df0a5bb0395bd50f3f1c8b
3
  size 5368