Update README.md
Browse files
README.md
CHANGED
@@ -55,4 +55,26 @@ Conversational AI.
|
|
55 |
|
56 |
## Evaluations
|
57 |
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
## Evaluations
|
57 |
|
58 |
+
| Tasks |Version| Filter |n-shot| Metric |Value | |Stderr|
|
59 |
+
|---------------------------------|-------|----------------|-----:|-----------|-----:|---|-----:|
|
60 |
+
|truthfulqa_mc2 | 2|none | 0|acc |0.5627|± |0.0154|
|
61 |
+
|gsm8k | 3|strict-match | 5|exact_match|0.5481|± |0.0137|
|
62 |
+
| | |flexible-extract| 5|exact_match|0.5557|± |0.0137|
|
63 |
+
|agieval_nous |N/A |none | 0|acc |0.3763|± |0.0093|
|
64 |
+
| | |none | 0|acc_norm |0.3665|± |0.0093|
|
65 |
+
| - agieval_aqua_rat | 1|none | 0|acc |0.2087|± |0.0255|
|
66 |
+
| | |none | 0|acc_norm |0.2047|± |0.0254|
|
67 |
+
| - agieval_logiqa_en | 1|none | 0|acc |0.3456|± |0.0187|
|
68 |
+
| | |none | 0|acc_norm |0.3594|± |0.0188|
|
69 |
+
| - agieval_lsat_ar | 1|none | 0|acc |0.1826|± |0.0255|
|
70 |
+
| | |none | 0|acc_norm |0.1783|± |0.0253|
|
71 |
+
| - agieval_lsat_lr | 1|none | 0|acc |0.3549|± |0.0212|
|
72 |
+
| | |none | 0|acc_norm |0.3451|± |0.0211|
|
73 |
+
| - agieval_lsat_rc | 1|none | 0|acc |0.5242|± |0.0305|
|
74 |
+
| | |none | 0|acc_norm |0.5130|± |0.0305|
|
75 |
+
| - agieval_sat_en | 1|none | 0|acc |0.6650|± |0.0330|
|
76 |
+
| | |none | 0|acc_norm |0.6505|± |0.0333|
|
77 |
+
| - agieval_sat_en_without_passage| 1|none | 0|acc |0.4175|± |0.0344|
|
78 |
+
| | |none | 0|acc_norm |0.3738|± |0.0338|
|
79 |
+
| - agieval_sat_math | 1|none | 0|acc |0.4227|± |0.0334|
|
80 |
+
| | |none | 0|acc_norm |0.3682|± |0.0326|
|