task,metric,value,err,version anli_r1,acc,0.367,0.015249378464171749,0 anli_r2,acc,0.355,0.015139491543780532,0 anli_r3,acc,0.35583333333333333,0.01382651874849331,0 arc_challenge,acc,0.2525597269624573,0.012696728980207708,0 arc_challenge,acc_norm,0.2832764505119454,0.013167478735134576,0 arc_easy,acc,0.5917508417508418,0.010085566195791245,0 arc_easy,acc_norm,0.5669191919191919,0.010167478013701789,0 boolq,acc,0.5724770642201835,0.008652692997177337,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.3175,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.4297948615813583,0.004940349676769324,0 hellaswag,acc_norm,0.5615415255925115,0.0049518409782196935,0 piqa,acc,0.7295973884657236,0.010363167031620798,0 piqa,acc_norm,0.733949945593036,0.010310039263352826,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.874,0.010499249222408047,0 sciq,acc_norm,0.853,0.011203415395160328,0 storycloze_2016,acc,0.6932121859967931,0.010664275190473634,0 winogrande,acc,0.5666929755327546,0.013926915052757345,0