cpt core 4
Browse files- .gitignore +2 -0
- scripts/cpt_core_model_4.py +2 -0
.gitignore
CHANGED
@@ -169,3 +169,5 @@ pretrain-data/
|
|
169 |
contrain-data/
|
170 |
core-data-*/
|
171 |
out/pretrain-core/step-*/
|
|
|
|
|
|
169 |
contrain-data/
|
170 |
core-data-*/
|
171 |
out/pretrain-core/step-*/
|
172 |
+
scripts/out/
|
173 |
+
scripts/unsloth_compiled_cache/
|
scripts/cpt_core_model_4.py
CHANGED
@@ -5,6 +5,7 @@ import torch
|
|
5 |
from transformers import AutoTokenizer
|
6 |
|
7 |
os.environ['WANDB_PROJECT'] = 'tangled-alpha-0.9-core'
|
|
|
8 |
|
9 |
max_seq_length = 16385
|
10 |
dtype = torch.bfloat16
|
@@ -111,6 +112,7 @@ trainer = UnslothTrainer(
|
|
111 |
seed=23,
|
112 |
output_dir=output_dir,
|
113 |
report_to='wandb',
|
|
|
114 |
|
115 |
do_eval=True,
|
116 |
save_steps=100,
|
|
|
5 |
from transformers import AutoTokenizer
|
6 |
|
7 |
os.environ['WANDB_PROJECT'] = 'tangled-alpha-0.9-core'
|
8 |
+
run_name = 'cpt-core-4'
|
9 |
|
10 |
max_seq_length = 16385
|
11 |
dtype = torch.bfloat16
|
|
|
112 |
seed=23,
|
113 |
output_dir=output_dir,
|
114 |
report_to='wandb',
|
115 |
+
run_name=run_name,
|
116 |
|
117 |
do_eval=True,
|
118 |
save_steps=100,
|