Commit
·
f5f9b77
1
Parent(s):
e989436
Add files
Browse files- 8b7178b44b/evaluation/rankeval/8b7178b44b_3.json +15 -1
- 8b7178b44b/evaluation/rankeval/8b7178b44b_3_lm-eval_global_step84877_2023-01-31-11-38-06_3shots_backup.json +15 -1
- 8b7178b44b/evaluation/rankeval/8b7178b44b_4.json +34 -1
- 8b7178b44b/evaluation/rankeval/8b7178b44b_4_lm-eval_global_step84877_2023-01-31-11-38-06_4shots_backup.json +34 -1
- 8b7178b44b/evaluation/rankeval/8b7178b44b_5.json +39 -1
- 8b7178b44b/evaluation/rankeval/8b7178b44b_5_lm-eval_global_step84877_2023-01-31-11-38-06_5shots_backup.json +39 -1
8b7178b44b/evaluation/rankeval/8b7178b44b_3.json
CHANGED
|
@@ -54,6 +54,18 @@
|
|
| 54 |
"acc_stderr": 0.013203196088537369,
|
| 55 |
"acc_norm": 0.32081911262798635,
|
| 56 |
"acc_norm_stderr": 0.013640943091946524
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
},
|
| 59 |
"versions": {
|
|
@@ -68,6 +80,8 @@
|
|
| 68 |
"storycloze_2016": 0,
|
| 69 |
"boolq": 1,
|
| 70 |
"arc_easy": 0,
|
| 71 |
-
"arc_challenge": 0
|
|
|
|
|
|
|
| 72 |
}
|
| 73 |
}
|
|
|
|
| 54 |
"acc_stderr": 0.013203196088537369,
|
| 55 |
"acc_norm": 0.32081911262798635,
|
| 56 |
"acc_norm_stderr": 0.013640943091946524
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.923,
|
| 60 |
+
"acc_stderr": 0.008434580140240651,
|
| 61 |
+
"acc_norm": 0.925,
|
| 62 |
+
"acc_norm_stderr": 0.00833333333333335
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7442872687704026,
|
| 66 |
+
"acc_stderr": 0.010178690109459862,
|
| 67 |
+
"acc_norm": 0.7519042437431991,
|
| 68 |
+
"acc_norm_stderr": 0.010077118315574703
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
"boolq": 1,
|
| 82 |
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
8b7178b44b/evaluation/rankeval/8b7178b44b_3_lm-eval_global_step84877_2023-01-31-11-38-06_3shots_backup.json
CHANGED
|
@@ -54,6 +54,18 @@
|
|
| 54 |
"acc_stderr": 0.013203196088537369,
|
| 55 |
"acc_norm": 0.32081911262798635,
|
| 56 |
"acc_norm_stderr": 0.013640943091946524
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
},
|
| 59 |
"versions": {
|
|
@@ -68,6 +80,8 @@
|
|
| 68 |
"storycloze_2016": 0,
|
| 69 |
"boolq": 1,
|
| 70 |
"arc_easy": 0,
|
| 71 |
-
"arc_challenge": 0
|
|
|
|
|
|
|
| 72 |
}
|
| 73 |
}
|
|
|
|
| 54 |
"acc_stderr": 0.013203196088537369,
|
| 55 |
"acc_norm": 0.32081911262798635,
|
| 56 |
"acc_norm_stderr": 0.013640943091946524
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.923,
|
| 60 |
+
"acc_stderr": 0.008434580140240651,
|
| 61 |
+
"acc_norm": 0.925,
|
| 62 |
+
"acc_norm_stderr": 0.00833333333333335
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7442872687704026,
|
| 66 |
+
"acc_stderr": 0.010178690109459862,
|
| 67 |
+
"acc_norm": 0.7519042437431991,
|
| 68 |
+
"acc_norm_stderr": 0.010077118315574703
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 80 |
"storycloze_2016": 0,
|
| 81 |
"boolq": 1,
|
| 82 |
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
8b7178b44b/evaluation/rankeval/8b7178b44b_4.json
CHANGED
|
@@ -38,6 +38,34 @@
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7097808658471406,
|
| 40 |
"acc_stderr": 0.010495529690730063
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
},
|
| 43 |
"versions": {
|
|
@@ -49,6 +77,11 @@
|
|
| 49 |
"hellaswag": 0,
|
| 50 |
"rte": 0,
|
| 51 |
"winogrande": 0,
|
| 52 |
-
"storycloze_2016": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
}
|
|
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7097808658471406,
|
| 40 |
"acc_stderr": 0.010495529690730063
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.6241590214067279,
|
| 44 |
+
"acc_stderr": 0.008471147248160114
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6401515151515151,
|
| 48 |
+
"acc_stderr": 0.009848484848484843,
|
| 49 |
+
"acc_norm": 0.6346801346801347,
|
| 50 |
+
"acc_norm_stderr": 0.009880576614806924
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.28924914675767915,
|
| 54 |
+
"acc_stderr": 0.013250012579393443,
|
| 55 |
+
"acc_norm": 0.318259385665529,
|
| 56 |
+
"acc_norm_stderr": 0.013611993916971453
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.927,
|
| 60 |
+
"acc_stderr": 0.008230354715244055,
|
| 61 |
+
"acc_norm": 0.928,
|
| 62 |
+
"acc_norm_stderr": 0.008178195576218681
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7453754080522307,
|
| 66 |
+
"acc_stderr": 0.010164432237060487,
|
| 67 |
+
"acc_norm": 0.7448313384113167,
|
| 68 |
+
"acc_norm_stderr": 0.010171571592521834
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
8b7178b44b/evaluation/rankeval/8b7178b44b_4_lm-eval_global_step84877_2023-01-31-11-38-06_4shots_backup.json
CHANGED
|
@@ -38,6 +38,34 @@
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7097808658471406,
|
| 40 |
"acc_stderr": 0.010495529690730063
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
},
|
| 43 |
"versions": {
|
|
@@ -49,6 +77,11 @@
|
|
| 49 |
"hellaswag": 0,
|
| 50 |
"rte": 0,
|
| 51 |
"winogrande": 0,
|
| 52 |
-
"storycloze_2016": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
}
|
|
|
|
| 38 |
"storycloze_2016": {
|
| 39 |
"acc": 0.7097808658471406,
|
| 40 |
"acc_stderr": 0.010495529690730063
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.6241590214067279,
|
| 44 |
+
"acc_stderr": 0.008471147248160114
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6401515151515151,
|
| 48 |
+
"acc_stderr": 0.009848484848484843,
|
| 49 |
+
"acc_norm": 0.6346801346801347,
|
| 50 |
+
"acc_norm_stderr": 0.009880576614806924
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.28924914675767915,
|
| 54 |
+
"acc_stderr": 0.013250012579393443,
|
| 55 |
+
"acc_norm": 0.318259385665529,
|
| 56 |
+
"acc_norm_stderr": 0.013611993916971453
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.927,
|
| 60 |
+
"acc_stderr": 0.008230354715244055,
|
| 61 |
+
"acc_norm": 0.928,
|
| 62 |
+
"acc_norm_stderr": 0.008178195576218681
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7453754080522307,
|
| 66 |
+
"acc_stderr": 0.010164432237060487,
|
| 67 |
+
"acc_norm": 0.7448313384113167,
|
| 68 |
+
"acc_norm_stderr": 0.010171571592521834
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
8b7178b44b/evaluation/rankeval/8b7178b44b_5.json
CHANGED
|
@@ -34,6 +34,38 @@
|
|
| 34 |
"winogrande": {
|
| 35 |
"acc": 0.569060773480663,
|
| 36 |
"acc_stderr": 0.01391779662333596
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
},
|
| 39 |
"versions": {
|
|
@@ -44,6 +76,12 @@
|
|
| 44 |
"copa": 0,
|
| 45 |
"hellaswag": 0,
|
| 46 |
"rte": 0,
|
| 47 |
-
"winogrande": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
}
|
|
|
|
| 34 |
"winogrande": {
|
| 35 |
"acc": 0.569060773480663,
|
| 36 |
"acc_stderr": 0.01391779662333596
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.7097808658471406,
|
| 40 |
+
"acc_stderr": 0.010495529690730063
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.6223241590214067,
|
| 44 |
+
"acc_stderr": 0.008479309208281643
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6456228956228957,
|
| 48 |
+
"acc_stderr": 0.00981500403025175,
|
| 49 |
+
"acc_norm": 0.6506734006734006,
|
| 50 |
+
"acc_norm_stderr": 0.0097828534493993
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.29180887372013653,
|
| 54 |
+
"acc_stderr": 0.01328452529240351,
|
| 55 |
+
"acc_norm": 0.33532423208191126,
|
| 56 |
+
"acc_norm_stderr": 0.013796182947785562
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.931,
|
| 60 |
+
"acc_stderr": 0.00801893405031515,
|
| 61 |
+
"acc_norm": 0.936,
|
| 62 |
+
"acc_norm_stderr": 0.007743640226919298
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7388465723612623,
|
| 66 |
+
"acc_stderr": 0.010248738649935581,
|
| 67 |
+
"acc_norm": 0.7459194776931447,
|
| 68 |
+
"acc_norm_stderr": 0.010157271999135055
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 76 |
"copa": 0,
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
+
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|
8b7178b44b/evaluation/rankeval/8b7178b44b_5_lm-eval_global_step84877_2023-01-31-11-38-06_5shots_backup.json
CHANGED
|
@@ -34,6 +34,38 @@
|
|
| 34 |
"winogrande": {
|
| 35 |
"acc": 0.569060773480663,
|
| 36 |
"acc_stderr": 0.01391779662333596
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
},
|
| 39 |
"versions": {
|
|
@@ -44,6 +76,12 @@
|
|
| 44 |
"copa": 0,
|
| 45 |
"hellaswag": 0,
|
| 46 |
"rte": 0,
|
| 47 |
-
"winogrande": 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
}
|
|
|
|
| 34 |
"winogrande": {
|
| 35 |
"acc": 0.569060773480663,
|
| 36 |
"acc_stderr": 0.01391779662333596
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.7097808658471406,
|
| 40 |
+
"acc_stderr": 0.010495529690730063
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.6223241590214067,
|
| 44 |
+
"acc_stderr": 0.008479309208281643
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.6456228956228957,
|
| 48 |
+
"acc_stderr": 0.00981500403025175,
|
| 49 |
+
"acc_norm": 0.6506734006734006,
|
| 50 |
+
"acc_norm_stderr": 0.0097828534493993
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.29180887372013653,
|
| 54 |
+
"acc_stderr": 0.01328452529240351,
|
| 55 |
+
"acc_norm": 0.33532423208191126,
|
| 56 |
+
"acc_norm_stderr": 0.013796182947785562
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.931,
|
| 60 |
+
"acc_stderr": 0.00801893405031515,
|
| 61 |
+
"acc_norm": 0.936,
|
| 62 |
+
"acc_norm_stderr": 0.007743640226919298
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7388465723612623,
|
| 66 |
+
"acc_stderr": 0.010248738649935581,
|
| 67 |
+
"acc_norm": 0.7459194776931447,
|
| 68 |
+
"acc_norm_stderr": 0.010157271999135055
|
| 69 |
}
|
| 70 |
},
|
| 71 |
"versions": {
|
|
|
|
| 76 |
"copa": 0,
|
| 77 |
"hellaswag": 0,
|
| 78 |
"rte": 0,
|
| 79 |
+
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
}
|
| 87 |
}
|