| task,metric,value,err,version | |
| anli_r1,acc,0.363,0.015213890444671283,0 | |
| anli_r2,acc,0.362,0.0152048409129195,0 | |
| anli_r3,acc,0.3516666666666667,0.013789711695404806,0 | |
| arc_challenge,acc,0.27559726962457337,0.013057169655761838,0 | |
| arc_challenge,acc_norm,0.31313993174061433,0.013552671543623501,0 | |
| arc_easy,acc,0.6203703703703703,0.009958037725468565,0 | |
| arc_easy,acc_norm,0.6085858585858586,0.010014917532627824,0 | |
| boolq,acc,0.5162079510703363,0.008740459157499082,1 | |
| cb,acc,0.39285714285714285,0.0658538889806635,1 | |
| cb,f1,0.3340305010893247,,1 | |
| copa,acc,0.74,0.04408440022768078,0 | |
| hellaswag,acc,0.44064927305317664,0.004954503606471609,0 | |
| hellaswag,acc_norm,0.5764787890858395,0.004931065434173691,0 | |
| piqa,acc,0.7285092491838956,0.010376251176596135,0 | |
| piqa,acc_norm,0.7393906420021763,0.010241826155811632,0 | |
| rte,acc,0.44765342960288806,0.029931070362939526,0 | |
| sciq,acc,0.91,0.009054390204866444,0 | |
| sciq,acc_norm,0.914,0.008870325962594766,0 | |
| storycloze_2016,acc,0.6932121859967931,0.010664275190473634,0 | |
| winogrande,acc,0.5501183898973955,0.013981711904049732,0 | |