| task,metric,value,err,version | |
| anli_r1,acc,0.316,0.014709193056057134,0 | |
| anli_r2,acc,0.342,0.01500870618212173,0 | |
| anli_r3,acc,0.35583333333333333,0.013826518748493315,0 | |
| arc_challenge,acc,0.25597269624573377,0.012753013241244525,0 | |
| arc_challenge,acc_norm,0.2790102389078498,0.013106784883601336,0 | |
| arc_easy,acc,0.5883838383838383,0.01009821864671491,0 | |
| arc_easy,acc_norm,0.5631313131313131,0.010177672928157697,0 | |
| boolq,acc,0.5253822629969419,0.008733779541853497,1 | |
| cb,acc,0.5892857142857143,0.0663363415035954,1 | |
| cb,f1,0.47918622848200315,,1 | |
| copa,acc,0.76,0.04292346959909282,0 | |
| hellaswag,acc,0.43248356901015733,0.004944080605048776,0 | |
| hellaswag,acc_norm,0.5669189404501095,0.004944889545497957,0 | |
| piqa,acc,0.7421109902067464,0.010206956662056269,0 | |
| piqa,acc_norm,0.7372143634385201,0.010269354068140777,0 | |
| rte,acc,0.5126353790613718,0.030086851767188564,0 | |
| sciq,acc,0.876,0.010427498872343961,0 | |
| sciq,acc_norm,0.876,0.01042749887234396,0 | |
| storycloze_2016,acc,0.6809192944949225,0.01077897063531249,0 | |
| winogrande,acc,0.5603788476716653,0.01394964977601569,0 | |