| { | |
| "results": { | |
| "anli_r1": { | |
| "acc": 0.317, | |
| "acc_stderr": 0.014721675438880227 | |
| }, | |
| "anli_r2": { | |
| "acc": 0.346, | |
| "acc_stderr": 0.015050266127564448 | |
| }, | |
| "anli_r3": { | |
| "acc": 0.3283333333333333, | |
| "acc_stderr": 0.013562032919529012 | |
| }, | |
| "cb": { | |
| "acc": 0.35714285714285715, | |
| "acc_stderr": 0.0646095738380922, | |
| "f1": 0.2557471264367816 | |
| }, | |
| "copa": { | |
| "acc": 0.73, | |
| "acc_stderr": 0.044619604333847394 | |
| }, | |
| "hellaswag": { | |
| "acc": 0.45030870344552876, | |
| "acc_stderr": 0.004965078477435579, | |
| "acc_norm": 0.599183429595698, | |
| "acc_norm_stderr": 0.004890623693243619 | |
| }, | |
| "rte": { | |
| "acc": 0.5342960288808665, | |
| "acc_stderr": 0.030025579819366426 | |
| }, | |
| "winogrande": { | |
| "acc": 0.5611681136543015, | |
| "acc_stderr": 0.013946933444507032 | |
| }, | |
| "storycloze_2016": { | |
| "acc": 0.6996258685195083, | |
| "acc_stderr": 0.010600915927985021 | |
| }, | |
| "boolq": { | |
| "acc": 0.5773700305810398, | |
| "acc_stderr": 0.008639722698719019 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.5934343434343434, | |
| "acc_stderr": 0.01007905641922352, | |
| "acc_norm": 0.5715488215488216, | |
| "acc_norm_stderr": 0.01015419573399097 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.27047781569965873, | |
| "acc_stderr": 0.012980954547659554, | |
| "acc_norm": 0.2883959044368601, | |
| "acc_norm_stderr": 0.013238394422428175 | |
| }, | |
| "sciq": { | |
| "acc": 0.881, | |
| "acc_stderr": 0.010244215145336664, | |
| "acc_norm": 0.87, | |
| "acc_norm_stderr": 0.010640169792499349 | |
| }, | |
| "piqa": { | |
| "acc": 0.7524483133841132, | |
| "acc_stderr": 0.010069703966857106, | |
| "acc_norm": 0.750272034820457, | |
| "acc_norm_stderr": 0.010099232969867469 | |
| } | |
| }, | |
| "versions": { | |
| "anli_r1": 0, | |
| "anli_r2": 0, | |
| "anli_r3": 0, | |
| "cb": 1, | |
| "copa": 0, | |
| "hellaswag": 0, | |
| "rte": 0, | |
| "winogrande": 0, | |
| "storycloze_2016": 0, | |
| "boolq": 1, | |
| "arc_easy": 0, | |
| "arc_challenge": 0, | |
| "sciq": 0, | |
| "piqa": 0 | |
| } | |
| } |