task,metric,value,err,version anli_r1,acc,0.324,0.014806864733738863,0 anli_r2,acc,0.319,0.014746404865473477,0 anli_r3,acc,0.3466666666666667,0.013744022550571952,0 arc_challenge,acc,0.2508532423208191,0.01266819862131543,0 arc_challenge,acc_norm,0.2781569965870307,0.013094469919538807,0 arc_easy,acc,0.5900673400673401,0.010091953527506255,0 arc_easy,acc_norm,0.5404040404040404,0.01022623074088903,0 boolq,acc,0.5238532110091743,0.008735097860690573,1 cb,acc,0.5357142857142857,0.0672477765493766,1 cb,f1,0.3670634920634921,,1 copa,acc,0.71,0.04560480215720684,0 hellaswag,acc,0.4342760406293567,0.0049464854665446254,0 hellaswag,acc_norm,0.5616411073491336,0.004951717622007978,0 piqa,acc,0.7383025027203483,0.010255630772708229,0 piqa,acc_norm,0.736126224156692,0.010282996367695571,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.864,0.010845350230472992,0 sciq,acc_norm,0.83,0.011884495834541669,0 storycloze_2016,acc,0.677712453233565,0.010807461374996356,0 winogrande,acc,0.5564325177584846,0.0139626949076204,0