Anhltq commited on
Commit
faadbd7
·
verified ·
1 Parent(s): 0d4b64b

End of training

Browse files
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 1.5517725843614754e-08,
4
- "train_runtime": 3164.6163,
5
  "train_samples": 160,
6
  "train_samples_per_second": 0.101,
7
  "train_steps_per_second": 0.006
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -2.7790665626525877e-07,
4
+ "train_runtime": 3161.8325,
5
  "train_samples": 160,
6
  "train_samples_per_second": 0.101,
7
  "train_steps_per_second": 0.006
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d201826a563bcc93af96d44c916407d12947c0196732b9d5310152e84b2dc0b
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f56802790fcca3fcc69f9c09fbd2d62c07fab6a15110cc0d094adc7c40872245
3
  size 1976163472
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 1.5517725843614754e-08,
4
- "train_runtime": 3164.6163,
5
  "train_samples": 160,
6
  "train_samples_per_second": 0.101,
7
  "train_steps_per_second": 0.006
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -2.7790665626525877e-07,
4
+ "train_runtime": 3161.8325,
5
  "train_samples": 160,
6
  "train_samples_per_second": 0.101,
7
  "train_steps_per_second": 0.006
trainer_state.json CHANGED
@@ -54,9 +54,9 @@
54
  "kl": 0.0,
55
  "learning_rate": 4.193203929064353e-07,
56
  "loss": 0.0,
57
- "reward": 6.752281337976456,
58
  "reward_std": 0.0,
59
- "rewards/concensus_correctness_reward_func": 2.0488750264048576,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.125,
@@ -67,15 +67,15 @@
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 107.9375,
71
  "epoch": 0.4,
72
  "grad_norm": 0.0,
73
  "kl": 0.0,
74
  "learning_rate": 3.5042385616324236e-07,
75
  "loss": 0.0,
76
- "reward": 6.62740433216095,
77
  "reward_std": 0.0,
78
- "rewards/concensus_correctness_reward_func": 2.0488750264048576,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
@@ -88,27 +88,27 @@
88
  {
89
  "completion_length": 107.125,
90
  "epoch": 0.5,
91
- "grad_norm": 7.32659444224737e-08,
92
- "kl": 3.538913398437593e-10,
93
  "learning_rate": 2.706448363680831e-07,
94
  "loss": 0.0,
95
- "reward": 6.650562286376953,
96
- "reward_std": 0.03314562886953354,
97
  "rewards/concensus_correctness_reward_func": 1.9240000247955322,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 0.0,
101
  "rewards/question_recreation_reward_func": 1.0,
102
  "rewards/soft_format_reward_func": 0.0,
103
- "rewards/strict_format_reward_func": 0.484375,
104
- "rewards/xmlcount_reward_func": 1.2421875,
105
  "step": 10
106
  },
107
  {
108
  "completion_length": 100.4375,
109
  "epoch": 0.6,
110
- "grad_norm": 6.133474528269289e-08,
111
- "kl": 1.7213421271700113e-08,
112
  "learning_rate": 1.886286282148002e-07,
113
  "loss": 0.0,
114
  "reward": 6.5019373297691345,
@@ -126,8 +126,8 @@
126
  {
127
  "completion_length": 100.8125,
128
  "epoch": 0.7,
129
- "grad_norm": 0.003382494207471609,
130
- "kl": 0.00014448053725751864,
131
  "learning_rate": 1.1326296046939333e-07,
132
  "loss": 0.0,
133
  "reward": 6.753007158637047,
@@ -145,8 +145,8 @@
145
  {
146
  "completion_length": 90.8125,
147
  "epoch": 0.8,
148
- "grad_norm": 2.1626634406857193e-05,
149
- "kl": 4.578997401250007e-07,
150
  "learning_rate": 5.271487265090163e-08,
151
  "loss": 0.0,
152
  "reward": 6.7057811468839645,
@@ -164,8 +164,8 @@
164
  {
165
  "completion_length": 107.0,
166
  "epoch": 0.9,
167
- "grad_norm": 1.7583732869752566e-07,
168
- "kl": 7.5428810552419234e-06,
169
  "learning_rate": 1.3545689574841341e-08,
170
  "loss": 0.0,
171
  "reward": 7.298999786376953,
@@ -183,17 +183,17 @@
183
  {
184
  "completion_length": 89.625,
185
  "epoch": 1.0,
186
- "grad_norm": 4.646564775612205e-05,
187
- "kl": 2.6783712221423173e-06,
188
  "learning_rate": 0.0,
189
- "loss": 0.0,
190
- "reward": 6.455756992101669,
191
- "reward_std": 0.0,
192
  "rewards/concensus_correctness_reward_func": 2.0490000247955322,
193
  "rewards/consensus_reward_func": 2.0,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.0,
196
- "rewards/question_recreation_reward_func": 0.8755071678460808,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.4375,
199
  "rewards/xmlcount_reward_func": 1.09375,
@@ -203,8 +203,8 @@
203
  "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
- "train_loss": 1.5517725843614754e-08,
207
- "train_runtime": 3164.6163,
208
  "train_samples_per_second": 0.101,
209
  "train_steps_per_second": 0.006
210
  }
 
54
  "kl": 0.0,
55
  "learning_rate": 4.193203929064353e-07,
56
  "loss": 0.0,
57
+ "reward": 6.7521563321352005,
58
  "reward_std": 0.0,
59
+ "rewards/concensus_correctness_reward_func": 2.048750028014183,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 0.125,
 
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 108.1875,
71
  "epoch": 0.4,
72
  "grad_norm": 0.0,
73
  "kl": 0.0,
74
  "learning_rate": 3.5042385616324236e-07,
75
  "loss": 0.0,
76
+ "reward": 6.627154350280762,
77
  "reward_std": 0.0,
78
+ "rewards/concensus_correctness_reward_func": 2.048625022172928,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
 
88
  {
89
  "completion_length": 107.125,
90
  "epoch": 0.5,
91
+ "grad_norm": 0.0,
92
+ "kl": 0.0,
93
  "learning_rate": 2.706448363680831e-07,
94
  "loss": 0.0,
95
+ "reward": 6.673999786376953,
96
+ "reward_std": 0.0,
97
  "rewards/concensus_correctness_reward_func": 1.9240000247955322,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 0.0,
101
  "rewards/question_recreation_reward_func": 1.0,
102
  "rewards/soft_format_reward_func": 0.0,
103
+ "rewards/strict_format_reward_func": 0.5,
104
+ "rewards/xmlcount_reward_func": 1.25,
105
  "step": 10
106
  },
107
  {
108
  "completion_length": 100.4375,
109
  "epoch": 0.6,
110
+ "grad_norm": 0.0,
111
+ "kl": 0.0,
112
  "learning_rate": 1.886286282148002e-07,
113
  "loss": 0.0,
114
  "reward": 6.5019373297691345,
 
126
  {
127
  "completion_length": 100.8125,
128
  "epoch": 0.7,
129
+ "grad_norm": 0.0,
130
+ "kl": 0.0,
131
  "learning_rate": 1.1326296046939333e-07,
132
  "loss": 0.0,
133
  "reward": 6.753007158637047,
 
145
  {
146
  "completion_length": 90.8125,
147
  "epoch": 0.8,
148
+ "grad_norm": 0.0,
149
+ "kl": 0.0,
150
  "learning_rate": 5.271487265090163e-08,
151
  "loss": 0.0,
152
  "reward": 6.7057811468839645,
 
164
  {
165
  "completion_length": 107.0,
166
  "epoch": 0.9,
167
+ "grad_norm": 0.0,
168
+ "kl": 0.0,
169
  "learning_rate": 1.3545689574841341e-08,
170
  "loss": 0.0,
171
  "reward": 7.298999786376953,
 
183
  {
184
  "completion_length": 89.625,
185
  "epoch": 1.0,
186
+ "grad_norm": 3.1471806494209886e-08,
187
+ "kl": 0.0,
188
  "learning_rate": 0.0,
189
+ "loss": -0.0,
190
+ "reward": 6.455524355173111,
191
+ "reward_std": 0.00032897721393965185,
192
  "rewards/concensus_correctness_reward_func": 2.0490000247955322,
193
  "rewards/consensus_reward_func": 2.0,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.0,
196
+ "rewards/question_recreation_reward_func": 0.8752745383681031,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.4375,
199
  "rewards/xmlcount_reward_func": 1.09375,
 
203
  "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
+ "train_loss": -2.7790665626525877e-07,
207
+ "train_runtime": 3161.8325,
208
  "train_samples_per_second": 0.101,
209
  "train_steps_per_second": 0.006
210
  }