cpt core 4
Browse files- scripts/cpt_core_model_4.py +10 -3
scripts/cpt_core_model_4.py
CHANGED
@@ -29,7 +29,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
29 |
model = FastLanguageModel.get_peft_model(
|
30 |
model,
|
31 |
# r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
32 |
-
r =
|
33 |
target_modules = [
|
34 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
35 |
"gate_proj",
|
@@ -49,6 +49,9 @@ model = FastLanguageModel.get_peft_model(
|
|
49 |
# print(f'{model=}')
|
50 |
|
51 |
|
|
|
|
|
|
|
52 |
from datasets import Dataset
|
53 |
from litdata import TokensLoader, StreamingDataset
|
54 |
|
@@ -58,6 +61,7 @@ litgpt_streaming_dataset = StreamingDataset(
|
|
58 |
item_loader=TokensLoader(block_size=dataset_block_size),
|
59 |
)
|
60 |
|
|
|
61 |
def unlsoth_generator():
|
62 |
global litgpt_streaming_dataset
|
63 |
|
@@ -68,7 +72,9 @@ def unlsoth_generator():
|
|
68 |
# train_dataset = Dataset.from_generator(unlsoth_generator, streaming=True)
|
69 |
train_dataset = Dataset.from_generator(unlsoth_generator)
|
70 |
|
71 |
-
|
|
|
|
|
72 |
from trl import SFTTrainer
|
73 |
from transformers import TrainingArguments
|
74 |
from unsloth import is_bfloat16_supported
|
@@ -104,7 +110,8 @@ trainer = UnslothTrainer(
|
|
104 |
fp16=not is_bfloat16_supported(),
|
105 |
bf16=is_bfloat16_supported(),
|
106 |
logging_steps=1,
|
107 |
-
optim='adamw_8bit',
|
|
|
108 |
weight_decay=0.01,
|
109 |
lr_scheduler_type='cosine',
|
110 |
seed=23,
|
|
|
29 |
model = FastLanguageModel.get_peft_model(
|
30 |
model,
|
31 |
# r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
32 |
+
r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
33 |
target_modules = [
|
34 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
35 |
"gate_proj",
|
|
|
49 |
# print(f'{model=}')
|
50 |
|
51 |
|
52 |
+
#
|
53 |
+
#
|
54 |
+
#
|
55 |
from datasets import Dataset
|
56 |
from litdata import TokensLoader, StreamingDataset
|
57 |
|
|
|
61 |
item_loader=TokensLoader(block_size=dataset_block_size),
|
62 |
)
|
63 |
|
64 |
+
|
65 |
def unlsoth_generator():
|
66 |
global litgpt_streaming_dataset
|
67 |
|
|
|
72 |
# train_dataset = Dataset.from_generator(unlsoth_generator, streaming=True)
|
73 |
train_dataset = Dataset.from_generator(unlsoth_generator)
|
74 |
|
75 |
+
#
|
76 |
+
#
|
77 |
+
#
|
78 |
from trl import SFTTrainer
|
79 |
from transformers import TrainingArguments
|
80 |
from unsloth import is_bfloat16_supported
|
|
|
110 |
fp16=not is_bfloat16_supported(),
|
111 |
bf16=is_bfloat16_supported(),
|
112 |
logging_steps=1,
|
113 |
+
# optim='adamw_8bit',
|
114 |
+
optim='adamw',
|
115 |
weight_decay=0.01,
|
116 |
lr_scheduler_type='cosine',
|
117 |
seed=23,
|