Update README.md
Browse files
README.md
CHANGED
|
@@ -129,19 +129,15 @@ We rely on [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-h
|
|
| 129 |
|
| 130 |
| Benchmark | | |
|
| 131 |
|----------------------------------|----------------|---------------------------|
|
| 132 |
-
| | Qwen3-32B
|
| 133 |
| **General** | | |
|
| 134 |
-
| mmlu |
|
| 135 |
-
|
|
| 136 |
-
| bbh | WIP | WIP |
|
| 137 |
| **Multilingual** | | |
|
| 138 |
-
| mgsm_en_cot_en |
|
| 139 |
-
| m_mmlu (avg) | WIP | WIP |
|
| 140 |
| **Math** | | |
|
| 141 |
-
| gpqa_main_zeroshot |
|
| 142 |
-
|
|
| 143 |
-
| leaderboard_math_hard (v3) | WIP | WIP |
|
| 144 |
-
| **Overall** | WIP | WIP |
|
| 145 |
|
| 146 |
<details>
|
| 147 |
<summary> Reproduce Model Quality Results </summary>
|
|
|
|
| 129 |
|
| 130 |
| Benchmark | | |
|
| 131 |
|----------------------------------|----------------|---------------------------|
|
| 132 |
+
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 133 |
| **General** | | |
|
| 134 |
+
| mmlu | 80.71 | 80.67 |
|
| 135 |
+
| bbh | 37.49 | 38.01 |
|
|
|
|
| 136 |
| **Multilingual** | | |
|
| 137 |
+
| mgsm_en_cot_en | 64.40 | WIP |
|
|
|
|
| 138 |
| **Math** | | |
|
| 139 |
+
| gpqa_main_zeroshot | 41.96 | 42.63 |
|
| 140 |
+
| **Overall** | 56.14 | WIP |
|
|
|
|
|
|
|
| 141 |
|
| 142 |
<details>
|
| 143 |
<summary> Reproduce Model Quality Results </summary>
|