cpt core 4
Browse files
scripts/cpt_core_model_4.py
CHANGED
@@ -28,16 +28,16 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
28 |
|
29 |
model = FastLanguageModel.get_peft_model(
|
30 |
model,
|
31 |
-
|
32 |
-
r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
33 |
target_modules = [
|
34 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
35 |
"gate_proj",
|
36 |
"up_proj", "down_proj",
|
37 |
"embed_tokens", "lm_head",
|
38 |
],
|
39 |
-
|
40 |
-
lora_alpha = 2,
|
41 |
lora_dropout = 0, # Supports any, but = 0 is optimized
|
42 |
bias = "none", # Supports any, but = "none" is optimized
|
43 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
@@ -97,7 +97,7 @@ trainer = UnslothTrainer(
|
|
97 |
# gradient_accumulation_steps=64,
|
98 |
# per_device_train_batch_size=16,
|
99 |
# gradient_accumulation_steps=16,
|
100 |
-
per_device_train_batch_size=
|
101 |
# gradient_accumulation_steps=8,
|
102 |
|
103 |
warmup_ratio=0,
|
|
|
28 |
|
29 |
model = FastLanguageModel.get_peft_model(
|
30 |
model,
|
31 |
+
r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
32 |
+
# r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
33 |
target_modules = [
|
34 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
35 |
"gate_proj",
|
36 |
"up_proj", "down_proj",
|
37 |
"embed_tokens", "lm_head",
|
38 |
],
|
39 |
+
lora_alpha = 32,
|
40 |
+
# lora_alpha = 2,
|
41 |
lora_dropout = 0, # Supports any, but = 0 is optimized
|
42 |
bias = "none", # Supports any, but = "none" is optimized
|
43 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
|
|
97 |
# gradient_accumulation_steps=64,
|
98 |
# per_device_train_batch_size=16,
|
99 |
# gradient_accumulation_steps=16,
|
100 |
+
per_device_train_batch_size=1,
|
101 |
# gradient_accumulation_steps=8,
|
102 |
|
103 |
warmup_ratio=0,
|