| task,metric,value,err,version | |
| anli_r1,acc,0.35,0.015090650341444236,0 | |
| anli_r2,acc,0.32,0.01475865230357488,0 | |
| anli_r3,acc,0.33416666666666667,0.013622434813136783,0 | |
| arc_challenge,acc,0.19539249146757678,0.01158690718995291,0 | |
| arc_challenge,acc_norm,0.22781569965870307,0.012256708602326903,0 | |
| arc_easy,acc,0.39436026936026936,0.010028176038392995,0 | |
| arc_easy,acc_norm,0.35395622895622897,0.00981237064417441,0 | |
| boolq,acc,0.5675840978593272,0.008664798701065799,1 | |
| cb,acc,0.44642857142857145,0.067031892279424,1 | |
| cb,f1,0.30886196246139225,,1 | |
| copa,acc,0.61,0.04902071300001975,0 | |
| hellaswag,acc,0.2802230631348337,0.004481902637505665,0 | |
| hellaswag,acc_norm,0.2960565624377614,0.00455583246277459,0 | |
| piqa,acc,0.6311207834602829,0.011257546676908804,0 | |
| piqa,acc_norm,0.6207834602829162,0.011320331012905077,0 | |
| rte,acc,0.4548736462093863,0.029973636495415252,0 | |
| sciq,acc,0.681,0.01474640486547349,0 | |
| sciq,acc_norm,0.645,0.015139491543780532,0 | |
| storycloze_2016,acc,0.569748797434527,0.011449379528209637,0 | |
| winogrande,acc,0.505130228887135,0.014051745961790516,0 | |