| task,metric,value,err,version | |
| anli_r1,acc,0.33,0.014876872027456732,0 | |
| anli_r2,acc,0.371,0.015283736211823188,0 | |
| anli_r3,acc,0.32166666666666666,0.013490095282989521,0 | |
| arc_challenge,acc,0.2713310580204778,0.012993807727545803,0 | |
| arc_challenge,acc_norm,0.28498293515358364,0.013191348179838795,0 | |
| arc_easy,acc,0.5942760942760943,0.010075755540128871,0 | |
| arc_easy,acc_norm,0.5820707070707071,0.010120628211017883,0 | |
| boolq,acc,0.5804281345565749,0.008631175489166717,1 | |
| cb,acc,0.35714285714285715,0.06460957383809221,1 | |
| cb,f1,0.3051529790660225,,1 | |
| copa,acc,0.81,0.03942772444036623,0 | |
| hellaswag,acc,0.45309699263095,0.004967778940011933,0 | |
| hellaswag,acc_norm,0.6016729735112527,0.004885529674958343,0 | |
| piqa,acc,0.750272034820457,0.010099232969867493,0 | |
| piqa,acc_norm,0.7546245919477693,0.010039831320422387,0 | |
| rte,acc,0.5234657039711191,0.03006330041190266,0 | |
| sciq,acc,0.875,0.010463483381956722,0 | |
| sciq,acc_norm,0.865,0.010811655372416051,0 | |
| storycloze_2016,acc,0.7076429716729022,0.010518239729787736,0 | |
| winogrande,acc,0.5706393054459353,0.01391153749996916,0 | |