shubhrapandit commited on
Commit
efeae75
·
verified ·
1 Parent(s): aa8cbb4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -26
README.md CHANGED
@@ -236,8 +236,8 @@ lm_eval \
236
  <tr>
237
  <th>Category</th>
238
  <th>Metric</th>
239
- <th>Qwen/Qwen2.5-VL-7B-Instruct</th>
240
- <th>neuralmagic/Qwen2.5-VL-7B-Instruct-quantized.w8a8</th>
241
  <th>Recovery (%)</th>
242
  </tr>
243
  </thead>
@@ -245,52 +245,52 @@ lm_eval \
245
  <tr>
246
  <td rowspan="6"><b>Vision</b></td>
247
  <td>MMMU (val, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
248
- <td>52.00</td>
249
- <td>52.33</td>
250
- <td>100.63%</td>
251
  </tr>
252
  <tr>
253
  <td>VQAv2 (val)<br><i>vqa_match</i></td>
254
- <td>75.59</td>
255
- <td>75.46</td>
256
- <td>99.83%</td>
257
  </tr>
258
  <tr>
259
  <td>DocVQA (val)<br><i>anls</i></td>
260
- <td>94.27</td>
261
- <td>94.09</td>
262
- <td>99.81%</td>
263
  </tr>
264
  <tr>
265
  <td>ChartQA (test, CoT)<br><i>anywhere_in_answer_relaxed_correctness</i></td>
266
- <td>86.44</td>
267
- <td>86.16</td>
268
- <td>99.68%</td>
269
  </tr>
270
  <tr>
271
  <td>Mathvista (testmini, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
272
- <td>69.47</td>
273
- <td>70.47</td>
274
- <td>101.44%</td>
275
  </tr>
276
  <tr>
277
  <td><b>Average Score</b></td>
278
- <td><b>75.95</b></td>
279
- <td><b>75.90</b></td>
280
- <td><b>99.93%</b></td>
281
  </tr>
282
  <tr>
283
  <td rowspan="2"><b>Text</b></td>
284
  <td>MGSM (CoT)</td>
285
- <td>58.72</td>
286
- <td>59.92</td>
287
- <td>102.04%</td>
288
  </tr>
289
  <tr>
290
  <td>MMLU (5-shot)</td>
291
- <td>71.09</td>
292
- <td>70.57</td>
293
- <td>99.27%</td>
294
  </tr>
295
  </tbody>
296
  </table>
 
236
  <tr>
237
  <th>Category</th>
238
  <th>Metric</th>
239
+ <th>Qwen/Qwen2.5-VL-72B-Instruct</th>
240
+ <th>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</th>
241
  <th>Recovery (%)</th>
242
  </tr>
243
  </thead>
 
245
  <tr>
246
  <td rowspan="6"><b>Vision</b></td>
247
  <td>MMMU (val, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
248
+ <td>64.33</td>
249
+ <td>67.56</td>
250
+ <td>105.02%</td>
251
  </tr>
252
  <tr>
253
  <td>VQAv2 (val)<br><i>vqa_match</i></td>
254
+ <td>81.94</td>
255
+ <td>81.91</td>
256
+ <td>99.96%</td>
257
  </tr>
258
  <tr>
259
  <td>DocVQA (val)<br><i>anls</i></td>
260
+ <td>94.71</td>
261
+ <td>94.71</td>
262
+ <td>100.00%</td>
263
  </tr>
264
  <tr>
265
  <td>ChartQA (test, CoT)<br><i>anywhere_in_answer_relaxed_correctness</i></td>
266
+ <td>88.96</td>
267
+ <td>89.40</td>
268
+ <td>100.49%</td>
269
  </tr>
270
  <tr>
271
  <td>Mathvista (testmini, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
272
+ <td>78.18</td>
273
+ <td>78.38</td>
274
+ <td>100.26%</td>
275
  </tr>
276
  <tr>
277
  <td><b>Average Score</b></td>
278
+ <td><b>81.62</b></td>
279
+ <td><b>82.00</b></td>
280
+ <td><b>100.46%</b></td>
281
  </tr>
282
  <tr>
283
  <td rowspan="2"><b>Text</b></td>
284
  <td>MGSM (CoT)</td>
285
+ <td>75.45</td>
286
+ <td>74.29</td>
287
+ <td>98.46%</td>
288
  </tr>
289
  <tr>
290
  <td>MMLU (5-shot)</td>
291
+ <td>86.16</td>
292
+ <td>85.65</td>
293
+ <td>99.41%</td>
294
  </tr>
295
  </tbody>
296
  </table>