| task,metric,value,err,version | |
| anli_r1,acc,0.312,0.014658474370509012,0 | |
| anli_r2,acc,0.357,0.015158521721486773,0 | |
| anli_r3,acc,0.3383333333333333,0.013664144006618268,0 | |
| arc_challenge,acc,0.26706484641638223,0.012928933196496366,0 | |
| arc_challenge,acc_norm,0.28242320819112626,0.013155456884097224,0 | |
| arc_easy,acc,0.5841750841750841,0.01011334824464787,0 | |
| arc_easy,acc_norm,0.5572390572390572,0.010192333348394466,0 | |
| boolq,acc,0.5819571865443425,0.00862677435207074,1 | |
| cb,acc,0.44642857142857145,0.06703189227942398,1 | |
| cb,f1,0.3602150537634408,,1 | |
| copa,acc,0.79,0.040936018074033256,0 | |
| hellaswag,acc,0.45578570005974905,0.004970234032728297,0 | |
| hellaswag,acc_norm,0.602370045807608,0.004884079750433877,0 | |
| piqa,acc,0.7453754080522307,0.010164432237060487,0 | |
| piqa,acc_norm,0.7519042437431991,0.010077118315574706,0 | |
| rte,acc,0.5306859205776173,0.03003973059219781,0 | |
| sciq,acc,0.874,0.01049924922240803,0 | |
| sciq,acc_norm,0.854,0.011171786285496497,0 | |
| storycloze_2016,acc,0.6937466595403528,0.010659088460112756,0 | |
| winogrande,acc,0.5501183898973955,0.01398171190404973,0 | |