shubhrapandit commited on
Commit
c9377ae
·
verified ·
1 Parent(s): 1fb421c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +64 -0
README.md CHANGED
@@ -239,6 +239,70 @@ lm_eval \
239
 
240
  ### Accuracy
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  ## Inference Performance
244
 
 
239
 
240
  ### Accuracy
241
 
242
+ <table>
243
+ <thead>
244
+ <tr>
245
+ <th>Category</th>
246
+ <th>Metric</th>
247
+ <th>Qwen/Qwen2-VL-72B-Instruct</th>
248
+ <th>nm-testing/Qwen2-VL-72B-Instruct-quantized.W4A16</th>
249
+ <th>Recovery (%)</th>
250
+ </tr>
251
+ </thead>
252
+ <tbody>
253
+ <tr>
254
+ <td rowspan="6"><b>Vision</b></td>
255
+ <td>MMMU (val, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
256
+ <td>62.11</td>
257
+ <td>60.11</td>
258
+ <td>96.78%</td>
259
+ </tr>
260
+ <tr>
261
+ <td>VQAv2 (val)<br><i>vqa_match</i></td>
262
+ <td>82.51</td>
263
+ <td>82.38</td>
264
+ <td>99.84%</td>
265
+ </tr>
266
+ <tr>
267
+ <td>DocVQA (val)<br><i>anls</i></td>
268
+ <td>95.01</td>
269
+ <td>94.94</td>
270
+ <td>99.93%</td>
271
+ </tr>
272
+ <tr>
273
+ <td>ChartQA (test, CoT)<br><i>anywhere_in_answer_relaxed_correctness</i></td>
274
+ <td>83.40</td>
275
+ <td>80.72</td>
276
+ <td>96.78%</td>
277
+ </tr>
278
+ <tr>
279
+ <td>Mathvista (testmini, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
280
+ <td>66.57</td>
281
+ <td>64.66</td>
282
+ <td>97.13%</td>
283
+ </tr>
284
+ <tr>
285
+ <td><b>Average Score</b></td>
286
+ <td><b>77.12</b></td>
287
+ <td><b>—</b></td>
288
+ <td><b>—</b></td>
289
+ </tr>
290
+ <tr>
291
+ <td rowspan="2"><b>Text</b></td>
292
+ <td>MGSM (CoT)</td>
293
+ <td>68.60</td>
294
+ <td>66.45</td>
295
+ <td>96.87%</td>
296
+ </tr>
297
+ <tr>
298
+ <td>MMLU (5-shot)</td>
299
+ <td>82.70</td>
300
+ <td>82.35</td>
301
+ <td>99.58%</td>
302
+ </tr>
303
+ </tbody>
304
+ </table>
305
+
306
 
307
  ## Inference Performance
308