Update README.md
Browse files
README.md
CHANGED
@@ -269,6 +269,83 @@ library_name: transformers
|
|
269 |
</table>
|
270 |
|
271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
## Citation
|
273 |
|
274 |
```bibtex
|
|
|
269 |
</table>
|
270 |
|
271 |
|
272 |
+
### Optimizers
|
273 |
+
|
274 |
+
<table border="1" cellpadding="10" cellspacing="0" style="margin: 0 auto; border-collapse: collapse; text-align: center;">
|
275 |
+
<tr>
|
276 |
+
<th colspan="2">Batch Size</th>
|
277 |
+
<th>1</th>
|
278 |
+
<th>2</th>
|
279 |
+
<th>4</th>
|
280 |
+
<th>8</th>
|
281 |
+
<th>16</th>
|
282 |
+
<th>32</th>
|
283 |
+
<th>64</th>
|
284 |
+
<th>128</th>
|
285 |
+
<th>256</th>
|
286 |
+
<th>512</th>
|
287 |
+
<th>1024</th>
|
288 |
+
</tr>
|
289 |
+
<tr>
|
290 |
+
<td rowspan="4">Peak Mem (MB)</td>
|
291 |
+
<td>adamw_torch</td>
|
292 |
+
<td>601</td>
|
293 |
+
<td>605</td>
|
294 |
+
<td>633</td>
|
295 |
+
<td>707</td>
|
296 |
+
<td>857</td>
|
297 |
+
<td>1255</td>
|
298 |
+
<td>1637</td>
|
299 |
+
<td>2201</td>
|
300 |
+
<td>3787</td>
|
301 |
+
<td>6945</td>
|
302 |
+
<td>13293</td>
|
303 |
+
</tr>
|
304 |
+
<tr>
|
305 |
+
<td>adamw_bnb_8bit</td>
|
306 |
+
<td>589</td>
|
307 |
+
<td>595</td>
|
308 |
+
<td>625</td>
|
309 |
+
<td>699</td>
|
310 |
+
<td>849</td>
|
311 |
+
<td>1241</td>
|
312 |
+
<td>1625</td>
|
313 |
+
<td>2187</td>
|
314 |
+
<td>3773</td>
|
315 |
+
<td>6935</td>
|
316 |
+
<td>13283</td>
|
317 |
+
</tr>
|
318 |
+
<tr>
|
319 |
+
<td>adamw_hf</td>
|
320 |
+
<td>597</td>
|
321 |
+
<td>603</td>
|
322 |
+
<td>633</td>
|
323 |
+
<td>707</td>
|
324 |
+
<td>857</td>
|
325 |
+
<td>1251</td>
|
326 |
+
<td>1635</td>
|
327 |
+
<td>2197</td>
|
328 |
+
<td>3783</td>
|
329 |
+
<td>6941</td>
|
330 |
+
<td>13293</td>
|
331 |
+
</tr>
|
332 |
+
<tr>
|
333 |
+
<td>lion_32bit</td>
|
334 |
+
<td>591</td>
|
335 |
+
<td>597</td>
|
336 |
+
<td>627</td>
|
337 |
+
<td>701</td>
|
338 |
+
<td>851</td>
|
339 |
+
<td>1243</td>
|
340 |
+
<td>1627</td>
|
341 |
+
<td>2191</td>
|
342 |
+
<td>3777</td>
|
343 |
+
<td>6937</td>
|
344 |
+
<td>13285</td>
|
345 |
+
</tr>
|
346 |
+
</table>
|
347 |
+
|
348 |
+
|
349 |
## Citation
|
350 |
|
351 |
```bibtex
|