mtasic85 commited on
Commit
9c11de1
·
1 Parent(s): a705f26

cpt core 4

Browse files
Files changed (1) hide show
  1. scripts/cpt_core_model_4.py +3 -4
scripts/cpt_core_model_4.py CHANGED
@@ -7,15 +7,14 @@ from transformers import AutoTokenizer
7
  os.environ['WANDB_PROJECT'] = 'tangled-alpha-0.9-core'
8
  run_name = 'cpt-core-4'
9
 
 
 
10
  max_seq_length = 16385
11
  dtype = torch.bfloat16
12
  load_in_4bit = False
13
  model_name = '../out/pretrain-core-3/hf'
14
  output_dir = '../out/cpt-core-4'
15
 
16
- dataset_input_dir = '../core-data-4-8193-16385-16385-1000/'
17
- dataset_block_size = 16385
18
-
19
  #
20
  # model
21
  #
@@ -44,7 +43,7 @@ model = FastLanguageModel.get_peft_model(
44
  # lora_alpha=32,
45
  lora_alpha=16,
46
  lora_dropout=0, # Supports any, but = 0 is optimized
47
- bias="none", # Supports any, but = "none" is optimized
48
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
49
  # use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
50
  use_gradient_checkpointing=False,
 
7
  os.environ['WANDB_PROJECT'] = 'tangled-alpha-0.9-core'
8
  run_name = 'cpt-core-4'
9
 
10
+ dataset_input_dir = '../core-data-4-8193-16385-16385-1000/'
11
+ dataset_block_size = 16385
12
  max_seq_length = 16385
13
  dtype = torch.bfloat16
14
  load_in_4bit = False
15
  model_name = '../out/pretrain-core-3/hf'
16
  output_dir = '../out/cpt-core-4'
17
 
 
 
 
18
  #
19
  # model
20
  #
 
43
  # lora_alpha=32,
44
  lora_alpha=16,
45
  lora_dropout=0, # Supports any, but = 0 is optimized
46
+ bias='none', # Supports any, but = "none" is optimized
47
  # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
48
  # use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
49
  use_gradient_checkpointing=False,