| task,metric,value,err,version | |
| anli_r1,acc,0.346,0.01505026612756445,0 | |
| anli_r2,acc,0.345,0.015039986742055242,0 | |
| anli_r3,acc,0.3541666666666667,0.013811933499570963,0 | |
| arc_challenge,acc,0.25853242320819114,0.01279455375428868,0 | |
| arc_challenge,acc_norm,0.3037542662116041,0.01343890918477876,0 | |
| arc_easy,acc,0.5951178451178452,0.010072423960395703,0 | |
| arc_easy,acc_norm,0.5740740740740741,0.010146568651002255,0 | |
| boolq,acc,0.5293577981651376,0.008729967580199222,1 | |
| cb,acc,0.4642857142857143,0.06724777654937658,1 | |
| cb,f1,0.255,,1 | |
| copa,acc,0.79,0.040936018074033256,0 | |
| hellaswag,acc,0.42959569806811393,0.004940067402031042,0 | |
| hellaswag,acc_norm,0.5713005377414858,0.004938787067611811,0 | |
| piqa,acc,0.7415669205658324,0.01021397163677332,0 | |
| piqa,acc_norm,0.733949945593036,0.01031003926335282,0 | |
| rte,acc,0.5234657039711191,0.030063300411902652,0 | |
| sciq,acc,0.895,0.009698921026024954,0 | |
| sciq,acc_norm,0.891,0.009859828407037183,0 | |
| storycloze_2016,acc,0.6873329770176376,0.010720223172953168,0 | |
| winogrande,acc,0.5438042620363063,0.013998453610924324,0 | |