Merge pull request #178 from PocketDocLabs/main
Browse filesUpdate README.md to reflect current gradient checkpointing support
README.md
CHANGED
|
@@ -387,7 +387,7 @@ train_on_inputs: false
|
|
| 387 |
# don't use this, leads to wonky training (according to someone on the internet)
|
| 388 |
group_by_length: false
|
| 389 |
|
| 390 |
-
#
|
| 391 |
gradient_checkpointing: false
|
| 392 |
|
| 393 |
# stop training after this many evaluation losses have increased in a row
|
|
|
|
| 387 |
# don't use this, leads to wonky training (according to someone on the internet)
|
| 388 |
group_by_length: false
|
| 389 |
|
| 390 |
+
# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
|
| 391 |
gradient_checkpointing: false
|
| 392 |
|
| 393 |
# stop training after this many evaluation losses have increased in a row
|