task,metric,value,err,version anli_r1,acc,0.339,0.01497675877162034,0 anli_r2,acc,0.342,0.01500870618212173,0 anli_r3,acc,0.33416666666666667,0.01362243481313678,0 arc_challenge,acc,0.2773037542662116,0.013082095839059374,0 arc_challenge,acc_norm,0.2883959044368601,0.013238394422428173,0 arc_easy,acc,0.6056397306397306,0.010028176038392999,0 arc_easy,acc_norm,0.5862794612794613,0.010105878530238137,0 boolq,acc,0.5954128440366973,0.008584355308932694,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.3162533392229865,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.44981079466241786,0.004964579685712441,0 hellaswag,acc_norm,0.6027683728340968,0.0048832465794966485,0 piqa,acc,0.7470076169749728,0.010142888698862462,0 piqa,acc_norm,0.7540805223068553,0.010047331865625182,0 rte,acc,0.516245487364621,0.030080573208738064,0 sciq,acc,0.888,0.009977753031397236,0 sciq,acc_norm,0.884,0.010131468138756976,0 storycloze_2016,acc,0.7071084981293426,0.010523873293246304,0 winogrande,acc,0.5816890292028414,0.013863669961195911,0