mtasic85 commited on
Commit
8b6e5e1
·
1 Parent(s): accd6a7

cpt core 4

Browse files
Files changed (1) hide show
  1. scripts/cpt_core_model_4.py +10 -3
scripts/cpt_core_model_4.py CHANGED
@@ -29,7 +29,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
29
  model = FastLanguageModel.get_peft_model(
30
  model,
31
  # r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
32
- r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
33
  target_modules = [
34
  "q_proj", "k_proj", "v_proj", "o_proj",
35
  "gate_proj",
@@ -49,6 +49,9 @@ model = FastLanguageModel.get_peft_model(
49
  # print(f'{model=}')
50
 
51
 
 
 
 
52
  from datasets import Dataset
53
  from litdata import TokensLoader, StreamingDataset
54
 
@@ -58,6 +61,7 @@ litgpt_streaming_dataset = StreamingDataset(
58
  item_loader=TokensLoader(block_size=dataset_block_size),
59
  )
60
 
 
61
  def unlsoth_generator():
62
  global litgpt_streaming_dataset
63
 
@@ -68,7 +72,9 @@ def unlsoth_generator():
68
  # train_dataset = Dataset.from_generator(unlsoth_generator, streaming=True)
69
  train_dataset = Dataset.from_generator(unlsoth_generator)
70
 
71
-
 
 
72
  from trl import SFTTrainer
73
  from transformers import TrainingArguments
74
  from unsloth import is_bfloat16_supported
@@ -104,7 +110,8 @@ trainer = UnslothTrainer(
104
  fp16=not is_bfloat16_supported(),
105
  bf16=is_bfloat16_supported(),
106
  logging_steps=1,
107
- optim='adamw_8bit',
 
108
  weight_decay=0.01,
109
  lr_scheduler_type='cosine',
110
  seed=23,
 
29
  model = FastLanguageModel.get_peft_model(
30
  model,
31
  # r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
32
+ r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
33
  target_modules = [
34
  "q_proj", "k_proj", "v_proj", "o_proj",
35
  "gate_proj",
 
49
  # print(f'{model=}')
50
 
51
 
52
+ #
53
+ #
54
+ #
55
  from datasets import Dataset
56
  from litdata import TokensLoader, StreamingDataset
57
 
 
61
  item_loader=TokensLoader(block_size=dataset_block_size),
62
  )
63
 
64
+
65
  def unlsoth_generator():
66
  global litgpt_streaming_dataset
67
 
 
72
  # train_dataset = Dataset.from_generator(unlsoth_generator, streaming=True)
73
  train_dataset = Dataset.from_generator(unlsoth_generator)
74
 
75
+ #
76
+ #
77
+ #
78
  from trl import SFTTrainer
79
  from transformers import TrainingArguments
80
  from unsloth import is_bfloat16_supported
 
110
  fp16=not is_bfloat16_supported(),
111
  bf16=is_bfloat16_supported(),
112
  logging_steps=1,
113
+ # optim='adamw_8bit',
114
+ optim='adamw',
115
  weight_decay=0.01,
116
  lr_scheduler_type='cosine',
117
  seed=23,