mtasic85 commited on
Commit
76feacc
·
1 Parent(s): 83564d9

cpt core 4

Browse files
Files changed (1) hide show
  1. scripts/cpt_core_model_4.py +17 -17
scripts/cpt_core_model_4.py CHANGED
@@ -32,28 +32,25 @@ model, tokenizer = FastLanguageModel.from_pretrained(
32
 
33
  model = FastLanguageModel.get_peft_model(
34
  model,
35
- r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
36
- # r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
37
- target_modules = [
38
  "q_proj", "k_proj", "v_proj", "o_proj",
39
  "gate_proj",
40
  "up_proj", "down_proj",
41
  "embed_tokens", "lm_head",
42
  ],
43
- lora_alpha = 32,
44
- # lora_alpha = 2,
45
- lora_dropout = 0, # Supports any, but = 0 is optimized
46
- bias = "none", # Supports any, but = "none" is optimized
47
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
48
- # use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
49
- use_gradient_checkpointing = False,
50
- random_state = 3407,
51
- use_rslora = True, # We support rank stabilized LoRA
52
- loftq_config = None, # And LoftQ
53
  )
54
  # print(f'{model=}')
55
 
56
-
57
  #
58
  # dataset
59
  #
@@ -89,21 +86,20 @@ trainer = UnslothTrainer(
89
  model=model,
90
  tokenizer=tokenizer,
91
  train_dataset=train_dataset,
92
- # dataset_text_field='text',
93
  max_seq_length=max_seq_length,
94
  dataset_num_proc=32,
95
  max_steps=len(litgpt_streaming_dataset),
96
  packing=False, # Can make training 5x faster for short sequences.
97
 
98
- args = UnslothTrainingArguments(
99
  per_device_train_batch_size=1,
100
  # gradient_accumulation_steps=8,
101
 
102
  warmup_ratio=0,
103
  num_train_epochs=1,
104
 
105
- learning_rate = 5e-5,
106
- embedding_learning_rate = 5e-5 / 10.0,
107
 
108
  fp16=not is_bfloat16_supported(),
109
  bf16=is_bfloat16_supported(),
@@ -115,6 +111,10 @@ trainer = UnslothTrainer(
115
  seed=23,
116
  output_dir=output_dir,
117
  report_to='wandb',
 
 
 
 
118
  ),
119
  )
120
 
 
32
 
33
  model = FastLanguageModel.get_peft_model(
34
  model,
35
+ r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
36
+ target_modules=[
 
37
  "q_proj", "k_proj", "v_proj", "o_proj",
38
  "gate_proj",
39
  "up_proj", "down_proj",
40
  "embed_tokens", "lm_head",
41
  ],
42
+ lora_alpha=32,
43
+ lora_dropout=0, # Supports any, but = 0 is optimized
44
+ bias="none", # Supports any, but = "none" is optimized
 
45
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
46
+ # use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
47
+ use_gradient_checkpointing=False,
48
+ random_state=23,
49
+ use_rslora=True, # We support rank stabilized LoRA
50
+ loftq_config=None, # And LoftQ
51
  )
52
  # print(f'{model=}')
53
 
 
54
  #
55
  # dataset
56
  #
 
86
  model=model,
87
  tokenizer=tokenizer,
88
  train_dataset=train_dataset,
 
89
  max_seq_length=max_seq_length,
90
  dataset_num_proc=32,
91
  max_steps=len(litgpt_streaming_dataset),
92
  packing=False, # Can make training 5x faster for short sequences.
93
 
94
+ args=UnslothTrainingArguments(
95
  per_device_train_batch_size=1,
96
  # gradient_accumulation_steps=8,
97
 
98
  warmup_ratio=0,
99
  num_train_epochs=1,
100
 
101
+ learning_rate=5e-5,
102
+ embedding_learning_rate=5e-5 / 10.0,
103
 
104
  fp16=not is_bfloat16_supported(),
105
  bf16=is_bfloat16_supported(),
 
111
  seed=23,
112
  output_dir=output_dir,
113
  report_to='wandb',
114
+
115
+ do_eval=True,
116
+ save_steps=100,
117
+ eval_steps=100,
118
  ),
119
  )
120