Update README.md
Browse files
README.md
CHANGED
@@ -257,7 +257,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
257 |
</td>
|
258 |
</tr>
|
259 |
<tr>
|
260 |
-
<td>MMLU-Pro
|
261 |
</td>
|
262 |
<td>48.1
|
263 |
</td>
|
@@ -267,7 +267,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
267 |
</td>
|
268 |
</tr>
|
269 |
<tr>
|
270 |
-
<td>IFEval
|
271 |
</td>
|
272 |
<td>86.4
|
273 |
</td>
|
@@ -277,7 +277,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
277 |
</td>
|
278 |
</tr>
|
279 |
<tr>
|
280 |
-
<td>BBH
|
281 |
</td>
|
282 |
<td>55.8
|
283 |
</td>
|
@@ -287,7 +287,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
287 |
</td>
|
288 |
</tr>
|
289 |
<tr>
|
290 |
-
<td>Math
|
291 |
</td>
|
292 |
<td>26.1
|
293 |
</td>
|
@@ -297,7 +297,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
297 |
</td>
|
298 |
</tr>
|
299 |
<tr>
|
300 |
-
<td>GPQA ()
|
301 |
</td>
|
302 |
<td>15.4
|
303 |
</td>
|
@@ -307,7 +307,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
307 |
</td>
|
308 |
</tr>
|
309 |
<tr>
|
310 |
-
<td>MuSR (
|
311 |
</td>
|
312 |
<td>18.2
|
313 |
</td>
|
|
|
257 |
</td>
|
258 |
</tr>
|
259 |
<tr>
|
260 |
+
<td>MMLU-Pro (5-shot)
|
261 |
</td>
|
262 |
<td>48.1
|
263 |
</td>
|
|
|
267 |
</td>
|
268 |
</tr>
|
269 |
<tr>
|
270 |
+
<td>IFEval (0-shot)
|
271 |
</td>
|
272 |
<td>86.4
|
273 |
</td>
|
|
|
277 |
</td>
|
278 |
</tr>
|
279 |
<tr>
|
280 |
+
<td>BBH (3-shot)
|
281 |
</td>
|
282 |
<td>55.8
|
283 |
</td>
|
|
|
287 |
</td>
|
288 |
</tr>
|
289 |
<tr>
|
290 |
+
<td>Math-|v|-5 (4-shot)
|
291 |
</td>
|
292 |
<td>26.1
|
293 |
</td>
|
|
|
297 |
</td>
|
298 |
</tr>
|
299 |
<tr>
|
300 |
+
<td>GPQA (0-shot)
|
301 |
</td>
|
302 |
<td>15.4
|
303 |
</td>
|
|
|
307 |
</td>
|
308 |
</tr>
|
309 |
<tr>
|
310 |
+
<td>MuSR (0-shot)
|
311 |
</td>
|
312 |
<td>18.2
|
313 |
</td>
|