Word2Li commited on
Commit
5d33a7f
·
verified ·
1 Parent(s): a57a791

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +25 -16
README.md CHANGED
@@ -18,8 +18,8 @@ model-index:
18
  name: MMLU
19
  type: MMLU
20
  metrics:
21
- - name: Weighted Avg.
22
- type: Weighted Avg.
23
  value: 48.39
24
  verified: true
25
  - task:
@@ -28,8 +28,8 @@ model-index:
28
  name: IFEval
29
  type: IFEval
30
  metrics:
31
- - name: Avg.
32
- type: Avg.
33
  value: 50.11
34
  verified: true
35
  - task:
@@ -38,8 +38,8 @@ model-index:
38
  name: GSM8K
39
  type: GSM8K
40
  metrics:
41
- - name: pass@1
42
- type: pass@1
43
  value: 54.44
44
  verified: true
45
  - task:
@@ -48,8 +48,8 @@ model-index:
48
  name: MATH
49
  type: MATH
50
  metrics:
51
- - name: pass@1
52
- type: pass@1
53
  value: 13.80
54
  verified: true
55
  - task:
@@ -58,8 +58,8 @@ model-index:
58
  name: HumanEval
59
  type: HumanEval
60
  metrics:
61
- - name: pass@1
62
- type: pass@1
63
  value: 46.95
64
  verified: true
65
  - task:
@@ -68,8 +68,8 @@ model-index:
68
  name: MBPP
69
  type: MBPP
70
  metrics:
71
- - name: pass@1
72
- type: pass@1
73
  value: 45.00
74
  verified: true
75
  - task:
@@ -78,8 +78,8 @@ model-index:
78
  name: Hellaswag
79
  type: Hellaswag
80
  metrics:
81
- - name: pass@1
82
- type: pass@1
83
  value: 63.54
84
  verified: true
85
  - task:
@@ -88,8 +88,8 @@ model-index:
88
  name: GPQA
89
  type: GPQA
90
  metrics:
91
- - name: pass@1
92
- type: pass@1
93
  value: 20.20
94
  verified: true
95
  metrics:
@@ -147,6 +147,15 @@ The following hyperparameters were used during training:
147
  - lr_scheduler_warmup_ratio: 0.03
148
  - num_epochs: 1.0
149
 
 
 
 
 
 
 
 
 
 
150
  ### Framework versions
151
 
152
  - Transformers 4.45.2
 
18
  name: MMLU
19
  type: MMLU
20
  metrics:
21
+ - name: weighted accuracy
22
+ type: weighted accuracy
23
  value: 48.39
24
  verified: true
25
  - task:
 
28
  name: IFEval
29
  type: IFEval
30
  metrics:
31
+ - name: overall accuracy
32
+ type: overall accuracy
33
  value: 50.11
34
  verified: true
35
  - task:
 
38
  name: GSM8K
39
  type: GSM8K
40
  metrics:
41
+ - name: accuracy
42
+ type: accuracy
43
  value: 54.44
44
  verified: true
45
  - task:
 
48
  name: MATH
49
  type: MATH
50
  metrics:
51
+ - name: accuracy
52
+ type: accuracy
53
  value: 13.80
54
  verified: true
55
  - task:
 
58
  name: HumanEval
59
  type: HumanEval
60
  metrics:
61
+ - name: humaneval_pass@1
62
+ type: humaneval_pass@1
63
  value: 46.95
64
  verified: true
65
  - task:
 
68
  name: MBPP
69
  type: MBPP
70
  metrics:
71
+ - name: score
72
+ type: score
73
  value: 45.00
74
  verified: true
75
  - task:
 
78
  name: Hellaswag
79
  type: Hellaswag
80
  metrics:
81
+ - name: accuracy
82
+ type: accuracy
83
  value: 63.54
84
  verified: true
85
  - task:
 
88
  name: GPQA
89
  type: GPQA
90
  metrics:
91
+ - name: accuracy
92
+ type: accuracy
93
  value: 20.20
94
  verified: true
95
  metrics:
 
147
  - lr_scheduler_warmup_ratio: 0.03
148
  - num_epochs: 1.0
149
 
150
+ ### Training results
151
+
152
+ - epoch: 0.9973935708079931
153
+ - total_flos: 2.698045158024282e + 18
154
+ - train_loss: 0.5919382667707649
155
+ - train_runtime: 4471.5794
156
+ - train_samples_per_second: 16.469
157
+ - train_steps_per_second: 0.064
158
+
159
  ### Framework versions
160
 
161
  - Transformers 4.45.2