task,metric,value,err,version anli_r1,acc,0.326,0.01483050720454104,0 anli_r2,acc,0.332,0.014899597242811475,0 anli_r3,acc,0.3308333333333333,0.013588208070709002,0 arc_challenge,acc,0.23720136518771331,0.012430399829260842,0 arc_challenge,acc_norm,0.2721843003412969,0.013006600406423704,0 arc_easy,acc,0.5753367003367004,0.01014265368748041,0 arc_easy,acc_norm,0.49873737373737376,0.010259750807991153,0 boolq,acc,0.5559633027522936,0.008690105214920793,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.30718954248366015,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4340768771161123,0.0049462215121452765,0 hellaswag,acc_norm,0.5581557458673571,0.004955914693717967,0 piqa,acc,0.7328618063112078,0.01032344049261244,0 piqa,acc_norm,0.735582154515778,0.010289787244767158,0 rte,acc,0.516245487364621,0.030080573208738064,0 sciq,acc,0.813,0.01233625482807413,0 sciq,acc_norm,0.724,0.014142984975740666,0 storycloze_2016,acc,0.6873329770176376,0.010720223172953174,0 winogrande,acc,0.5627466456195738,0.013941393310695924,0