cpt core 4
Browse files- scripts/cpt_core_model_4.py +9 -27
scripts/cpt_core_model_4.py
CHANGED
@@ -45,27 +45,6 @@ model = FastLanguageModel.get_peft_model(
|
|
45 |
|
46 |
print(f'{model=}')
|
47 |
|
48 |
-
'''
|
49 |
-
from datasets import concatenate_datasets
|
50 |
-
from cpt_base_datasets import cpt_base_datasets
|
51 |
-
from cpt_instruct_datasets import cpt_instruct_datasets
|
52 |
-
from unsloth_utils import load_text_dataset, load_chat_dataset
|
53 |
-
|
54 |
-
core_datasets = []
|
55 |
-
|
56 |
-
for dataset_config in cpt_base_datasets:
|
57 |
-
dataset = load_text_dataset(tokenizer, **dataset_config)
|
58 |
-
print(f'{dataset=}')
|
59 |
-
core_datasets.append(dataset)
|
60 |
-
|
61 |
-
# for dataset_config in cpt_instruct_datasets:
|
62 |
-
# dataset = load_chat_dataset(tokenizer, **dataset_config)
|
63 |
-
# print(f'{dataset=}')
|
64 |
-
# core_datasets.append(dataset)
|
65 |
-
|
66 |
-
final_dataset = concatenate_datasets(core_datasets)
|
67 |
-
print(f'{final_dataset=}')
|
68 |
-
'''
|
69 |
|
70 |
from datasets import Dataset
|
71 |
from litdata import TokensLoader, StreamingDataset
|
@@ -97,20 +76,23 @@ trainer = UnslothTrainer(
|
|
97 |
model=model,
|
98 |
tokenizer=tokenizer,
|
99 |
train_dataset=train_dataset,
|
100 |
-
dataset_text_field='text',
|
101 |
max_seq_length=max_seq_length,
|
102 |
dataset_num_proc=32,
|
103 |
max_steps=len(litgpt_streaming_dataset),
|
|
|
104 |
|
105 |
args = UnslothTrainingArguments(
|
106 |
-
per_device_train_batch_size=
|
107 |
-
gradient_accumulation_steps=
|
108 |
|
109 |
-
warmup_ratio=0
|
110 |
num_train_epochs=1,
|
111 |
|
112 |
-
learning_rate=5e-5,
|
113 |
-
embedding_learning_rate=5e-6,
|
|
|
|
|
114 |
|
115 |
fp16=not is_bfloat16_supported(),
|
116 |
bf16=is_bfloat16_supported(),
|
|
|
45 |
|
46 |
print(f'{model=}')
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
from datasets import Dataset
|
50 |
from litdata import TokensLoader, StreamingDataset
|
|
|
76 |
model=model,
|
77 |
tokenizer=tokenizer,
|
78 |
train_dataset=train_dataset,
|
79 |
+
# dataset_text_field='text',
|
80 |
max_seq_length=max_seq_length,
|
81 |
dataset_num_proc=32,
|
82 |
max_steps=len(litgpt_streaming_dataset),
|
83 |
+
packing = False, # Can make training 5x faster for short sequences.
|
84 |
|
85 |
args = UnslothTrainingArguments(
|
86 |
+
per_device_train_batch_size=16,
|
87 |
+
gradient_accumulation_steps=64,
|
88 |
|
89 |
+
warmup_ratio=0,
|
90 |
num_train_epochs=1,
|
91 |
|
92 |
+
# learning_rate=5e-5,
|
93 |
+
# embedding_learning_rate=5e-6,
|
94 |
+
learning_rate = 5e-5 * 2,
|
95 |
+
embedding_learning_rate = 5e-5 / 2,
|
96 |
|
97 |
fp16=not is_bfloat16_supported(),
|
98 |
bf16=is_bfloat16_supported(),
|