| { | |
| "results": { | |
| "anli_r1": { | |
| "acc": 0.335, | |
| "acc_stderr": 0.014933117490932575 | |
| }, | |
| "anli_r2": { | |
| "acc": 0.338, | |
| "acc_stderr": 0.014965960710224487 | |
| }, | |
| "anli_r3": { | |
| "acc": 0.33, | |
| "acc_stderr": 0.013579531277800922 | |
| }, | |
| "cb": { | |
| "acc": 0.4107142857142857, | |
| "acc_stderr": 0.0663363415035954, | |
| "f1": 0.1940928270042194 | |
| }, | |
| "copa": { | |
| "acc": 0.74, | |
| "acc_stderr": 0.04408440022768077 | |
| }, | |
| "hellaswag": { | |
| "acc": 0.458972316271659, | |
| "acc_stderr": 0.004972954732733369, | |
| "acc_norm": 0.6040629356701852, | |
| "acc_norm_stderr": 0.00488051543132316 | |
| }, | |
| "rte": { | |
| "acc": 0.5306859205776173, | |
| "acc_stderr": 0.03003973059219781 | |
| }, | |
| "winogrande": { | |
| "acc": 0.5485398579321231, | |
| "acc_stderr": 0.013986110301017762 | |
| }, | |
| "storycloze_2016": { | |
| "acc": 0.709246392303581, | |
| "acc_stderr": 0.010501233625213076 | |
| }, | |
| "boolq": { | |
| "acc": 0.5779816513761468, | |
| "acc_stderr": 0.008638040428462952 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.5698653198653199, | |
| "acc_stderr": 0.010159130445178506, | |
| "acc_norm": 0.51010101010101, | |
| "acc_norm_stderr": 0.01025768968745837 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.2508532423208191, | |
| "acc_stderr": 0.01266819862131543, | |
| "acc_norm": 0.26621160409556316, | |
| "acc_norm_stderr": 0.012915774781523212 | |
| }, | |
| "sciq": { | |
| "acc": 0.816, | |
| "acc_stderr": 0.012259457340938586, | |
| "acc_norm": 0.729, | |
| "acc_norm_stderr": 0.014062601350986186 | |
| }, | |
| "piqa": { | |
| "acc": 0.7524483133841132, | |
| "acc_stderr": 0.010069703966857114, | |
| "acc_norm": 0.7562568008705114, | |
| "acc_norm_stderr": 0.01001719947150061 | |
| } | |
| }, | |
| "versions": { | |
| "anli_r1": 0, | |
| "anli_r2": 0, | |
| "anli_r3": 0, | |
| "cb": 1, | |
| "copa": 0, | |
| "hellaswag": 0, | |
| "rte": 0, | |
| "winogrande": 0, | |
| "storycloze_2016": 0, | |
| "boolq": 1, | |
| "arc_easy": 0, | |
| "arc_challenge": 0, | |
| "sciq": 0, | |
| "piqa": 0 | |
| } | |
| } |