File size: 699 Bytes
9083130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
set -xe

BS=32
LR=5e-2
HS=32
LEN=32
INITIAL_F=0
END_F=0
EPOCH=25

accelerate launch --config_file brain.yaml train.py \
  --data_dir data \
  --do_train \
  --output_dir en_checkpoints_len${LEN}_sparse \
  --hidden_size $HS \
  --train_batch_size $BS \
  --max_seq_length $LEN \
  --learning_rate $LR \
  --num_train_epochs $EPOCH \
  --num_neg_samples 400 \
  --initial_file_number $INITIAL_F \
  --end_file_number $END_F \
  --num_workers 8 \
  --fp16 \
  --run_name "BS${BS}_LR${LR}_HS${HS}_LEN${LEN}_f${INITIAL_F}_EPOCH${EPOCH}" \
  --vocab_path vocab_wiki_4k_en.json \
  --train_full data \
  --sparse \
  --use_frequency \
  --use_bpe \
  --bpe_tokenizer wiki_bpe_tokenizer_4000_bytelevel.json