task,metric,value,err,version anli_r1,acc,0.356,0.015149042659306628,0 anli_r2,acc,0.332,0.014899597242811483,0 anli_r3,acc,0.34833333333333333,0.01375943749887408,0 arc_challenge,acc,0.2568259385665529,0.0127669237941168,0 arc_challenge,acc_norm,0.2935153583617747,0.013307250444941127,0 arc_easy,acc,0.5728114478114478,0.010150415974210868,0 arc_easy,acc_norm,0.5256734006734006,0.010246249665591215,0 boolq,acc,0.5758409785932722,0.00864386902338812,1 cb,acc,0.5892857142857143,0.0663363415035954,1 cb,f1,0.4111718275652702,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4314877514439355,0.004942716091996078,0 hellaswag,acc_norm,0.5596494722166899,0.004954146286513344,0 piqa,acc,0.7323177366702938,0.01033011118937043,0 piqa,acc_norm,0.7334058759521219,0.010316749863541365,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.842,0.011539894677559562,0 sciq,acc_norm,0.812,0.01236158601510375,0 storycloze_2016,acc,0.6835916622127205,0.010754780097940887,0 winogrande,acc,0.56353591160221,0.013938569465677023,0