Spaces:
Runtime error
Runtime error
| train_common () { | |
| fairseq-train "$DATA" \ | |
| --combine-val \ | |
| --train-subset train \ | |
| --num-workers 2 \ | |
| --validate-interval-updates 1000 \ | |
| --save-interval-updates 1000 \ | |
| --no-epoch-checkpoints \ | |
| --ddp-backend fully_sharded \ | |
| --memory-efficient-fp16 \ | |
| --fp16-init-scale 4 \ | |
| --checkpoint-activations \ | |
| --arch transformer_lm_gpt \ | |
| --activation-fn gelu \ | |
| --share-decoder-input-output-embed \ | |
| --task language_modeling \ | |
| --sample-break-mode none \ | |
| --tokens-per-sample 2048 \ | |
| --optimizer adam --adam-betas "(0.9, 0.98)" \ | |
| --adam-eps 1e-08 \ | |
| --clip-norm 0.0 \ | |
| --lr-scheduler polynomial_decay \ | |
| --warmup-updates 750 \ | |
| --dropout 0.1 \ | |
| --attention-dropout 0.1 \ | |
| --weight-decay 0.01 \ | |
| --batch-size 16 \ | |
| --update-freq 2 \ | |
| --required-batch-size-multiple 1 \ | |
| --total-num-update 572204 \ | |
| --max-update 572204 \ | |
| --seed 1 \ | |
| --log-format json --log-interval 1 \ | |
| --distributed-world-size 8 --distributed-port 13177 \ | |
| "$@" | |
| } | |
| train_125M () { | |
| train_common --decoder-layers 12 \ | |
| --decoder-embed-dim 768 \ | |
| --decoder-ffn-embed-dim 3072 \ | |
| --decoder-attention-heads 12 "$@" | |
| } | |
| train_355M () { | |
| train_common --decoder-layers 24 \ | |
| --decoder-embed-dim 1024\ | |
| --decoder-ffn-embed-dim 4096 \ | |
| --decoder-attention-heads 16 \ | |
| --dropout 0.0 \ | |
| --attention-dropout 0.0 \ | |
| "$@" | |
| } | |
| train_1.3B () { | |
| train_common --decoder-layers 24 \ | |
| --decoder-embed-dim 2048 \ | |
| --decoder-ffn-embed-dim 8192 \ | |
| --decoder-attention-heads 32 \ | |
| --batch-size 4 \ | |
| --update-freq 16 \ | |
| --total-num-update 286102 \ | |
| --max-update 286102 \ | |
| "$@" | |
| } | |
| train_2.7B () { | |
| train_common --decoder-layers 32 \ | |
| --decoder-embed-dim 2560 \ | |
| --decoder-ffn-embed-dim 10240 \ | |
| --decoder-attention-heads 32 \ | |
| --batch-size 4 \ | |
| --update-freq 16 \ | |
| --total-num-update 286102 \ | |
| --max-update 286102 \ | |
| "$@" | |
| } | |