Update README.md
Browse files
README.md
CHANGED
@@ -163,6 +163,16 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
163 |
<td><strong>Recovery</strong>
|
164 |
</td>
|
165 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
<tr>
|
167 |
<td>MMLU (CoT, 0-shot)
|
168 |
</td>
|
@@ -223,6 +233,16 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
223 |
<td>99.7%
|
224 |
</td>
|
225 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
</table>
|
227 |
|
228 |
### Reproduction
|
|
|
163 |
<td><strong>Recovery</strong>
|
164 |
</td>
|
165 |
</tr>
|
166 |
+
<tr>
|
167 |
+
<td>MMLU (5-shot)
|
168 |
+
</td>
|
169 |
+
<td>87.41
|
170 |
+
</td>
|
171 |
+
<td>86.76
|
172 |
+
</td>
|
173 |
+
<td>99.3%
|
174 |
+
</td>
|
175 |
+
</tr>
|
176 |
<tr>
|
177 |
<td>MMLU (CoT, 0-shot)
|
178 |
</td>
|
|
|
233 |
<td>99.7%
|
234 |
</td>
|
235 |
</tr>
|
236 |
+
<tr>
|
237 |
+
<td><strong>Average</strong>
|
238 |
+
</td>
|
239 |
+
<td><strong>86.73</strong>
|
240 |
+
</td>
|
241 |
+
<td><strong>86.27</strong>
|
242 |
+
</td>
|
243 |
+
<td><strong>99.5%</strong>
|
244 |
+
</td>
|
245 |
+
</tr>
|
246 |
</table>
|
247 |
|
248 |
### Reproduction
|