mtasic85 commited on
Commit
041c526
·
1 Parent(s): 404ab86

cpt core 4

Browse files
Files changed (1) hide show
  1. scripts/cpt_core_model_4.py +5 -5
scripts/cpt_core_model_4.py CHANGED
@@ -28,16 +28,16 @@ model, tokenizer = FastLanguageModel.from_pretrained(
28
 
29
  model = FastLanguageModel.get_peft_model(
30
  model,
31
- # r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
32
- r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
33
  target_modules = [
34
  "q_proj", "k_proj", "v_proj", "o_proj",
35
  "gate_proj",
36
  "up_proj", "down_proj",
37
  "embed_tokens", "lm_head",
38
  ],
39
- # lora_alpha = 32,
40
- lora_alpha = 2,
41
  lora_dropout = 0, # Supports any, but = 0 is optimized
42
  bias = "none", # Supports any, but = "none" is optimized
43
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
@@ -97,7 +97,7 @@ trainer = UnslothTrainer(
97
  # gradient_accumulation_steps=64,
98
  # per_device_train_batch_size=16,
99
  # gradient_accumulation_steps=16,
100
- per_device_train_batch_size=2,
101
  # gradient_accumulation_steps=8,
102
 
103
  warmup_ratio=0,
 
28
 
29
  model = FastLanguageModel.get_peft_model(
30
  model,
31
+ r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
32
+ # r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
33
  target_modules = [
34
  "q_proj", "k_proj", "v_proj", "o_proj",
35
  "gate_proj",
36
  "up_proj", "down_proj",
37
  "embed_tokens", "lm_head",
38
  ],
39
+ lora_alpha = 32,
40
+ # lora_alpha = 2,
41
  lora_dropout = 0, # Supports any, but = 0 is optimized
42
  bias = "none", # Supports any, but = "none" is optimized
43
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
 
97
  # gradient_accumulation_steps=64,
98
  # per_device_train_batch_size=16,
99
  # gradient_accumulation_steps=16,
100
+ per_device_train_batch_size=1,
101
  # gradient_accumulation_steps=8,
102
 
103
  warmup_ratio=0,