task,metric,value,err,version anli_r1,acc,0.385,0.015395194445410808,0 anli_r2,acc,0.336,0.014944140233795021,0 anli_r3,acc,0.36333333333333334,0.013889898953170563,0 arc_challenge,acc,0.257679180887372,0.012780770562768402,0 arc_challenge,acc_norm,0.27559726962457337,0.013057169655761838,0 arc_easy,acc,0.5904882154882155,0.010090368160990059,0 arc_easy,acc_norm,0.5736531986531986,0.01014785860383514,0 boolq,acc,0.5666666666666667,0.008666972565214514,1 cb,acc,0.5535714285714286,0.06703189227942394,1 cb,f1,0.3077154912597951,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4266082453694483,0.0049357353003488666,0 hellaswag,acc_norm,0.566620195180243,0.004945291270072436,0 piqa,acc,0.7285092491838956,0.010376251176596135,0 piqa,acc_norm,0.7486398258977149,0.01012115601681925,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.872,0.010570133761108665,0 sciq,acc_norm,0.854,0.0111717862854965,0 storycloze_2016,acc,0.6841261357562801,0.010749892827011113,0 winogrande,acc,0.5445935280189423,0.013996485037729782,0