mtasic85 commited on
Commit
eef5b70
·
1 Parent(s): 4f4772d

cpt core 4

Browse files
Files changed (1) hide show
  1. scripts/cpt_core_model_4.py +9 -27
scripts/cpt_core_model_4.py CHANGED
@@ -45,27 +45,6 @@ model = FastLanguageModel.get_peft_model(
45
 
46
  print(f'{model=}')
47
 
48
- '''
49
- from datasets import concatenate_datasets
50
- from cpt_base_datasets import cpt_base_datasets
51
- from cpt_instruct_datasets import cpt_instruct_datasets
52
- from unsloth_utils import load_text_dataset, load_chat_dataset
53
-
54
- core_datasets = []
55
-
56
- for dataset_config in cpt_base_datasets:
57
- dataset = load_text_dataset(tokenizer, **dataset_config)
58
- print(f'{dataset=}')
59
- core_datasets.append(dataset)
60
-
61
- # for dataset_config in cpt_instruct_datasets:
62
- # dataset = load_chat_dataset(tokenizer, **dataset_config)
63
- # print(f'{dataset=}')
64
- # core_datasets.append(dataset)
65
-
66
- final_dataset = concatenate_datasets(core_datasets)
67
- print(f'{final_dataset=}')
68
- '''
69
 
70
  from datasets import Dataset
71
  from litdata import TokensLoader, StreamingDataset
@@ -97,20 +76,23 @@ trainer = UnslothTrainer(
97
  model=model,
98
  tokenizer=tokenizer,
99
  train_dataset=train_dataset,
100
- dataset_text_field='text',
101
  max_seq_length=max_seq_length,
102
  dataset_num_proc=32,
103
  max_steps=len(litgpt_streaming_dataset),
 
104
 
105
  args = UnslothTrainingArguments(
106
- per_device_train_batch_size=8,
107
- gradient_accumulation_steps=8,
108
 
109
- warmup_ratio=0.1,
110
  num_train_epochs=1,
111
 
112
- learning_rate=5e-5,
113
- embedding_learning_rate=5e-6,
 
 
114
 
115
  fp16=not is_bfloat16_supported(),
116
  bf16=is_bfloat16_supported(),
 
45
 
46
  print(f'{model=}')
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  from datasets import Dataset
50
  from litdata import TokensLoader, StreamingDataset
 
76
  model=model,
77
  tokenizer=tokenizer,
78
  train_dataset=train_dataset,
79
+ # dataset_text_field='text',
80
  max_seq_length=max_seq_length,
81
  dataset_num_proc=32,
82
  max_steps=len(litgpt_streaming_dataset),
83
+ packing = False, # Can make training 5x faster for short sequences.
84
 
85
  args = UnslothTrainingArguments(
86
+ per_device_train_batch_size=16,
87
+ gradient_accumulation_steps=64,
88
 
89
+ warmup_ratio=0,
90
  num_train_epochs=1,
91
 
92
+ # learning_rate=5e-5,
93
+ # embedding_learning_rate=5e-6,
94
+ learning_rate = 5e-5 * 2,
95
+ embedding_learning_rate = 5e-5 / 2,
96
 
97
  fp16=not is_bfloat16_supported(),
98
  bf16=is_bfloat16_supported(),