shubhrapandit commited on
Commit
76064b0
·
verified ·
1 Parent(s): 8f24e90

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -12
README.md CHANGED
@@ -388,21 +388,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
388
  <td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w8a8</td>
389
  <td>1.84</td>
390
  <td>1.2</td>
391
- <td>293</td>
392
  <td>4.0</td>
393
- <td>1021</td>
394
  <td>4.6</td>
395
- <td>1135</td>
396
  </tr>
397
  <tr>
398
  <td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
399
  <td>2.73</td>
400
  <td>2.4</td>
401
- <td>314</td>
402
  <td>12.8</td>
403
- <td>1591</td>
404
  <td>16.0</td>
405
- <td>2019</td>
406
  </tr>
407
  <tr>
408
  <th rowspan="3" valign="top">H100x4</td>
@@ -419,21 +419,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
419
  <td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
420
  <td>1.70</td>
421
  <td>1.6</td>
422
- <td>236</td>
423
  <td>4.4</td>
424
- <td>623</td>
425
  <td>4.8</td>
426
- <td>669</td>
427
  </tr>
428
  <tr>
429
  <td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
430
  <td>2.35</td>
431
  <td>5.2</td>
432
- <td>350</td>
433
  <td>13.2</td>
434
- <td>910</td>
435
  <td>14.4</td>
436
- <td>994</td>
437
  </tr>
438
  </tbody>
439
  </table>
 
388
  <td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w8a8</td>
389
  <td>1.84</td>
390
  <td>1.2</td>
391
+ <td>586</td>
392
  <td>4.0</td>
393
+ <td>2042</td>
394
  <td>4.6</td>
395
+ <td>2270</td>
396
  </tr>
397
  <tr>
398
  <td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
399
  <td>2.73</td>
400
  <td>2.4</td>
401
+ <td>1256</td>
402
  <td>12.8</td>
403
+ <td>6364</td>
404
  <td>16.0</td>
405
+ <td>8076</td>
406
  </tr>
407
  <tr>
408
  <th rowspan="3" valign="top">H100x4</td>
 
419
  <td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
420
  <td>1.70</td>
421
  <td>1.6</td>
422
+ <td>457</td>
423
  <td>4.4</td>
424
+ <td>1207</td>
425
  <td>4.8</td>
426
+ <td>1296</td>
427
  </tr>
428
  <tr>
429
  <td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
430
  <td>2.35</td>
431
  <td>5.2</td>
432
+ <td>1400</td>
433
  <td>13.2</td>
434
+ <td>3640</td>
435
  <td>14.4</td>
436
+ <td>3976</td>
437
  </tr>
438
  </tbody>
439
  </table>