mtasic85 commited on
Commit
10d6112
·
1 Parent(s): 1dd7bef

cpt core 4

Browse files
Files changed (1) hide show
  1. scripts/cpt_core_model_4.py +16 -6
scripts/cpt_core_model_4.py CHANGED
@@ -33,14 +33,16 @@ model, tokenizer = FastLanguageModel.from_pretrained(
33
 
34
  model = FastLanguageModel.get_peft_model(
35
  model,
36
- r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
 
37
  target_modules=[
38
  "q_proj", "k_proj", "v_proj", "o_proj",
39
  "gate_proj",
40
  "up_proj", "down_proj",
41
  "embed_tokens", "lm_head",
42
  ],
43
- lora_alpha=32,
 
44
  lora_dropout=0, # Supports any, but = 0 is optimized
45
  bias="none", # Supports any, but = "none" is optimized
46
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
@@ -73,6 +75,7 @@ def unlsoth_generator():
73
 
74
 
75
  train_dataset = Dataset.from_generator(unlsoth_generator)
 
76
 
77
  #
78
  # trainer
@@ -86,9 +89,11 @@ from unsloth import UnslothTrainer, UnslothTrainingArguments
86
  trainer = UnslothTrainer(
87
  model=model,
88
  tokenizer=tokenizer,
89
- train_dataset=train_dataset,
90
- max_seq_length=max_seq_length,
 
91
  dataset_num_proc=32,
 
92
  max_steps=len(litgpt_streaming_dataset),
93
  packing=False, # Can make training 5x faster for short sequences.
94
 
@@ -112,11 +117,16 @@ trainer = UnslothTrainer(
112
  seed=23,
113
  output_dir=output_dir,
114
  report_to='wandb',
 
 
115
  run_name=run_name,
116
 
117
  do_eval=True,
118
- save_steps=100,
119
- eval_steps=100,
 
 
 
120
  ),
121
  )
122
 
 
33
 
34
  model = FastLanguageModel.get_peft_model(
35
  model,
36
+ # r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
37
+ r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
38
  target_modules=[
39
  "q_proj", "k_proj", "v_proj", "o_proj",
40
  "gate_proj",
41
  "up_proj", "down_proj",
42
  "embed_tokens", "lm_head",
43
  ],
44
+ # lora_alpha=32,
45
+ lora_alpha=16,
46
  lora_dropout=0, # Supports any, but = 0 is optimized
47
  bias="none", # Supports any, but = "none" is optimized
48
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
 
75
 
76
 
77
  train_dataset = Dataset.from_generator(unlsoth_generator)
78
+ dataset = train_dataset.train_test_split(test_size=0.01)
79
 
80
  #
81
  # trainer
 
89
  trainer = UnslothTrainer(
90
  model=model,
91
  tokenizer=tokenizer,
92
+ # train_dataset=train_dataset,
93
+ train_dataset=dataset['train'],
94
+ eval_dataset=dataset['test'],
95
  dataset_num_proc=32,
96
+ max_seq_length=max_seq_length,
97
  max_steps=len(litgpt_streaming_dataset),
98
  packing=False, # Can make training 5x faster for short sequences.
99
 
 
117
  seed=23,
118
  output_dir=output_dir,
119
  report_to='wandb',
120
+
121
+ save_steps=100,
122
  run_name=run_name,
123
 
124
  do_eval=True,
125
+ fp16_full_eval=True,
126
+ per_device_eval_batch_size=2,
127
+ eval_accumulation_steps=4,
128
+ eval_strategy='steps',
129
+ eval_steps=10,
130
  ),
131
  )
132