cpt core 4
Browse files
scripts/cpt_core_model_4.py
CHANGED
|
@@ -37,17 +37,17 @@ model = FastLanguageModel.get_peft_model(
|
|
| 37 |
r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 38 |
# r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 39 |
target_modules=[
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
],
|
| 45 |
lora_alpha=32,
|
| 46 |
# lora_alpha=16,
|
| 47 |
lora_dropout=0, # Supports any, but = 0 is optimized
|
| 48 |
bias='none', # Supports any, but = "none" is optimized
|
| 49 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
| 50 |
-
# use_gradient_checkpointing=
|
| 51 |
use_gradient_checkpointing=False,
|
| 52 |
random_state=23,
|
| 53 |
use_rslora=True, # We support rank stabilized LoRA
|
|
|
|
| 37 |
r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 38 |
# r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 39 |
target_modules=[
|
| 40 |
+
'q_proj', 'k_proj', 'v_proj', 'o_proj',
|
| 41 |
+
'gate_proj',
|
| 42 |
+
'up_proj', 'down_proj',
|
| 43 |
+
'embed_tokens', 'lm_head',
|
| 44 |
],
|
| 45 |
lora_alpha=32,
|
| 46 |
# lora_alpha=16,
|
| 47 |
lora_dropout=0, # Supports any, but = 0 is optimized
|
| 48 |
bias='none', # Supports any, but = "none" is optimized
|
| 49 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
| 50 |
+
# use_gradient_checkpointing='unsloth', # True or "unsloth" for very long context
|
| 51 |
use_gradient_checkpointing=False,
|
| 52 |
random_state=23,
|
| 53 |
use_rslora=True, # We support rank stabilized LoRA
|