Update README.md
Browse files
README.md
CHANGED
@@ -261,7 +261,7 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
261 |
</thead>
|
262 |
<tbody>
|
263 |
<tr>
|
264 |
-
<
|
265 |
<td>Qwen/Qwen2-VL-72B-Instruct</td>
|
266 |
<td></td>
|
267 |
<td>0.3</td>
|
@@ -305,8 +305,8 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
305 |
<td>377</td>
|
306 |
</tr>
|
307 |
<tr>
|
308 |
-
<td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
|
309 |
<td>H100x2</td>
|
|
|
310 |
<td>1.70</td>
|
311 |
<td>0.8</td>
|
312 |
<td>236</td>
|
@@ -316,8 +316,8 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
316 |
<td>669</td>
|
317 |
</tr>
|
318 |
<tr>
|
319 |
-
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
320 |
<td>H100x1</td>
|
|
|
321 |
<td>2.35</td>
|
322 |
<td>1.3</td>
|
323 |
<td>350</td>
|
|
|
261 |
</thead>
|
262 |
<tbody>
|
263 |
<tr>
|
264 |
+
<td>A100x4</td>
|
265 |
<td>Qwen/Qwen2-VL-72B-Instruct</td>
|
266 |
<td></td>
|
267 |
<td>0.3</td>
|
|
|
305 |
<td>377</td>
|
306 |
</tr>
|
307 |
<tr>
|
|
|
308 |
<td>H100x2</td>
|
309 |
+
<td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
|
310 |
<td>1.70</td>
|
311 |
<td>0.8</td>
|
312 |
<td>236</td>
|
|
|
316 |
<td>669</td>
|
317 |
</tr>
|
318 |
<tr>
|
|
|
319 |
<td>H100x1</td>
|
320 |
+
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
321 |
<td>2.35</td>
|
322 |
<td>1.3</td>
|
323 |
<td>350</td>
|