tangledgroup
/

tangled-alpha-0.9-core

Text Generation

Model card Files Files and versions Community

mtasic85 commited on Mar 15

Commit

041c526

·

1 Parent(s): 404ab86

cpt core 4

Files changed (1) hide show

scripts/cpt_core_model_4.py +5 -5

scripts/cpt_core_model_4.py CHANGED Viewed

@@ -28,16 +28,16 @@ model, tokenizer = FastLanguageModel.from_pretrained(
 model = FastLanguageModel.get_peft_model(
     model,
-    # r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
-    r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
     target_modules = [
         "q_proj", "k_proj", "v_proj", "o_proj",
         "gate_proj",
         "up_proj", "down_proj",
         "embed_tokens", "lm_head",
     ],
-    # lora_alpha = 32,
-    lora_alpha = 2,
     lora_dropout = 0, # Supports any, but = 0 is optimized
     bias = "none",    # Supports any, but = "none" is optimized
     # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
@@ -97,7 +97,7 @@ trainer = UnslothTrainer(
         # gradient_accumulation_steps=64,
         # per_device_train_batch_size=16,
         # gradient_accumulation_steps=16,
-        per_device_train_batch_size=2,
         # gradient_accumulation_steps=8,
         warmup_ratio=0,

 model = FastLanguageModel.get_peft_model(
     model,
+    r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
+    # r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
     target_modules = [
         "q_proj", "k_proj", "v_proj", "o_proj",
         "gate_proj",
         "up_proj", "down_proj",
         "embed_tokens", "lm_head",
     ],
+    lora_alpha = 32,
+    # lora_alpha = 2,
     lora_dropout = 0, # Supports any, but = 0 is optimized
     bias = "none",    # Supports any, but = "none" is optimized
     # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
         # gradient_accumulation_steps=64,
         # per_device_train_batch_size=16,
         # gradient_accumulation_steps=16,
+        per_device_train_batch_size=1,
         # gradient_accumulation_steps=8,
         warmup_ratio=0,