cpt core 4
Browse files
scripts/cpt_core_model_4.py
CHANGED
@@ -37,17 +37,17 @@ model = FastLanguageModel.get_peft_model(
|
|
37 |
r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
38 |
# r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
39 |
target_modules=[
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
],
|
45 |
lora_alpha=32,
|
46 |
# lora_alpha=16,
|
47 |
lora_dropout=0, # Supports any, but = 0 is optimized
|
48 |
bias='none', # Supports any, but = "none" is optimized
|
49 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
50 |
-
# use_gradient_checkpointing=
|
51 |
use_gradient_checkpointing=False,
|
52 |
random_state=23,
|
53 |
use_rslora=True, # We support rank stabilized LoRA
|
|
|
37 |
r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
38 |
# r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
39 |
target_modules=[
|
40 |
+
'q_proj', 'k_proj', 'v_proj', 'o_proj',
|
41 |
+
'gate_proj',
|
42 |
+
'up_proj', 'down_proj',
|
43 |
+
'embed_tokens', 'lm_head',
|
44 |
],
|
45 |
lora_alpha=32,
|
46 |
# lora_alpha=16,
|
47 |
lora_dropout=0, # Supports any, but = 0 is optimized
|
48 |
bias='none', # Supports any, but = "none" is optimized
|
49 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
50 |
+
# use_gradient_checkpointing='unsloth', # True or "unsloth" for very long context
|
51 |
use_gradient_checkpointing=False,
|
52 |
random_state=23,
|
53 |
use_rslora=True, # We support rank stabilized LoRA
|