| task,metric,value,err,version | |
| anli_r1,acc,0.367,0.015249378464171749,0 | |
| anli_r2,acc,0.355,0.015139491543780532,0 | |
| anli_r3,acc,0.35583333333333333,0.01382651874849331,0 | |
| arc_challenge,acc,0.2525597269624573,0.012696728980207708,0 | |
| arc_challenge,acc_norm,0.2832764505119454,0.013167478735134576,0 | |
| arc_easy,acc,0.5917508417508418,0.010085566195791245,0 | |
| arc_easy,acc_norm,0.5669191919191919,0.010167478013701789,0 | |
| boolq,acc,0.5724770642201835,0.008652692997177337,1 | |
| cb,acc,0.5178571428571429,0.06737697508644647,1 | |
| cb,f1,0.3175,,1 | |
| copa,acc,0.78,0.04163331998932261,0 | |
| hellaswag,acc,0.4297948615813583,0.004940349676769324,0 | |
| hellaswag,acc_norm,0.5615415255925115,0.0049518409782196935,0 | |
| piqa,acc,0.7295973884657236,0.010363167031620798,0 | |
| piqa,acc_norm,0.733949945593036,0.010310039263352826,0 | |
| rte,acc,0.5487364620938628,0.029953149241808946,0 | |
| sciq,acc,0.874,0.010499249222408047,0 | |
| sciq,acc_norm,0.853,0.011203415395160328,0 | |
| storycloze_2016,acc,0.6932121859967931,0.010664275190473634,0 | |
| winogrande,acc,0.5666929755327546,0.013926915052757345,0 | |