task,metric,value,err,version anli_r1,acc,0.317,0.014721675438880227,0 anli_r2,acc,0.346,0.015050266127564448,0 anli_r3,acc,0.3283333333333333,0.013562032919529012,0 arc_challenge,acc,0.27047781569965873,0.012980954547659554,0 arc_challenge,acc_norm,0.2883959044368601,0.013238394422428175,0 arc_easy,acc,0.5934343434343434,0.01007905641922352,0 arc_easy,acc_norm,0.5715488215488216,0.01015419573399097,0 boolq,acc,0.5773700305810398,0.008639722698719019,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.2557471264367816,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.45030870344552876,0.004965078477435579,0 hellaswag,acc_norm,0.599183429595698,0.004890623693243619,0 piqa,acc,0.7524483133841132,0.010069703966857106,0 piqa,acc_norm,0.750272034820457,0.010099232969867469,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.881,0.010244215145336664,0 sciq,acc_norm,0.87,0.010640169792499349,0 storycloze_2016,acc,0.6996258685195083,0.010600915927985021,0 winogrande,acc,0.5611681136543015,0.013946933444507032,0