Update README.md
Browse files
README.md
CHANGED
@@ -388,21 +388,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
388 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w8a8</td>
|
389 |
<td>1.84</td>
|
390 |
<td>1.2</td>
|
391 |
-
<td>
|
392 |
<td>4.0</td>
|
393 |
-
<td>
|
394 |
<td>4.6</td>
|
395 |
-
<td>
|
396 |
</tr>
|
397 |
<tr>
|
398 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
399 |
<td>2.73</td>
|
400 |
<td>2.4</td>
|
401 |
-
<td>
|
402 |
<td>12.8</td>
|
403 |
-
<td>
|
404 |
<td>16.0</td>
|
405 |
-
<td>
|
406 |
</tr>
|
407 |
<tr>
|
408 |
<th rowspan="3" valign="top">H100x4</td>
|
@@ -419,21 +419,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
419 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
|
420 |
<td>1.70</td>
|
421 |
<td>1.6</td>
|
422 |
-
<td>
|
423 |
<td>4.4</td>
|
424 |
-
<td>
|
425 |
<td>4.8</td>
|
426 |
-
<td>
|
427 |
</tr>
|
428 |
<tr>
|
429 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
430 |
<td>2.35</td>
|
431 |
<td>5.2</td>
|
432 |
-
<td>
|
433 |
<td>13.2</td>
|
434 |
-
<td>
|
435 |
<td>14.4</td>
|
436 |
-
<td>
|
437 |
</tr>
|
438 |
</tbody>
|
439 |
</table>
|
|
|
388 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w8a8</td>
|
389 |
<td>1.84</td>
|
390 |
<td>1.2</td>
|
391 |
+
<td>586</td>
|
392 |
<td>4.0</td>
|
393 |
+
<td>2042</td>
|
394 |
<td>4.6</td>
|
395 |
+
<td>2270</td>
|
396 |
</tr>
|
397 |
<tr>
|
398 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
399 |
<td>2.73</td>
|
400 |
<td>2.4</td>
|
401 |
+
<td>1256</td>
|
402 |
<td>12.8</td>
|
403 |
+
<td>6364</td>
|
404 |
<td>16.0</td>
|
405 |
+
<td>8076</td>
|
406 |
</tr>
|
407 |
<tr>
|
408 |
<th rowspan="3" valign="top">H100x4</td>
|
|
|
419 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
|
420 |
<td>1.70</td>
|
421 |
<td>1.6</td>
|
422 |
+
<td>457</td>
|
423 |
<td>4.4</td>
|
424 |
+
<td>1207</td>
|
425 |
<td>4.8</td>
|
426 |
+
<td>1296</td>
|
427 |
</tr>
|
428 |
<tr>
|
429 |
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
430 |
<td>2.35</td>
|
431 |
<td>5.2</td>
|
432 |
+
<td>1400</td>
|
433 |
<td>13.2</td>
|
434 |
+
<td>3640</td>
|
435 |
<td>14.4</td>
|
436 |
+
<td>3976</td>
|
437 |
</tr>
|
438 |
</tbody>
|
439 |
</table>
|