Sigrid De los Santos
Remove remaining binary file for Hugging Face
9df4cc0
export CUDA_VISIBLE_DEVICES=0,1,2,3
export NCCL_IGNORE_DISABLED_P2P=1
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
export TOKENIZERS_PARALLELISM=0
#---- Generalization ----
# deepspeed train_lora.py \
# --run_name GRCLS-sentiment-chatglm2-linear-1e-4lr \
# --base_model chatglm2 \
# --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
# --test_dataset sentiment-cls-instruct \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 1 \
# --log_interval 10 \
# --warmup_ratio 0.03 \
# --scheduler linear \
# --evaluation_strategy steps \
# --ds_config config_hf.json
# deepspeed train_lora.py \
# --run_name GRCLS-sentiment-llama2-linear-small \
# --base_model llama2 \
# --test_dataset sentiment-cls-instruct \
# --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 1 \
# --log_interval 10 \
# --warmup_ratio 0 \
# --scheduler linear \
# --evaluation_strategy steps \
# --eval_steps 100 \
# --ds_config config_hf.json
# deepspeed train_lora.py \
# --run_name GRCLS-sentiment-falcon-linear-small \
# --base_model falcon \
# --test_dataset sentiment-cls-instruct \
# --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 1 \
# --log_interval 10 \
# --warmup_ratio 0 \
# --scheduler linear \
# --evaluation_strategy steps \
# --eval_steps 100 \
# --ds_config config_hf.json
# deepspeed train_lora.py \
# --run_name GRCLS-sentiment-qwen-linear-small \
# --base_model qwen \
# --test_dataset sentiment-cls-instruct \
# --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 1 \
# --log_interval 10 \
# --warmup_ratio 0 \
# --scheduler linear \
# --evaluation_strategy steps \
# --eval_steps 100 \
# --ds_config config_hf.json
# deepspeed train_lora.py \
# --run_name GRCLS-sentiment-bloom-linear-small \
# --base_model bloom \
# --test_dataset sentiment-cls-instruct \
# --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 1 \
# --log_interval 10 \
# --warmup_ratio 0 \
# --scheduler linear \
# --evaluation_strategy steps \
# --eval_steps 100 \
# --ds_config config_hf.json
# deepspeed train_lora.py \
# --run_name GRCLS-sentiment-mpt-linear-small \
# --base_model mpt \
# --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
# --test_dataset sentiment-cls-instruct \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 1 \
# --log_interval 10 \
# --warmup_ratio 0.03 \
# --scheduler linear \
# --evaluation_strategy steps \
# --eval_steps 100 \
# --ds_config config_hf.json
#---- Multi-Task ----
# deepspeed train_lora.py \
# --run_name MT-chatglm2-linear \
# --base_model chatglm2 \
# --dataset sentiment-train,headline,finred*3,ner*15 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name MT-falcon-linear \
# --base_model falcon \
# --dataset sentiment-train,headline,finred*3,ner*15 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name MT-qwen-linear \
# --base_model qwen \
# --dataset sentiment-train,headline,finred*3,ner*15 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name MT-mpt-linear \
# --base_model mpt \
# --dataset sentiment-train,headline,finred*3,ner*15 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name MT-bloom-linear \
# --base_model bloom \
# --dataset sentiment-train,headline,finred*3,ner*15 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name MT-llama2-linear \
# --base_model llama2 \
# --dataset sentiment-train,headline,finred*3,ner*15 \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 4 \
# --log_interval 10
#---- FinEval ----
# deepspeed train_lora.py \
# --run_name fineval-internlm-linear \
# --base_model internlm \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name fineval-llama2-linear \
# --base_model llama2 \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name fineval-chatglm2-linear \
# --base_model chatglm2 \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name fineval-falcon-linear \
# --base_model falcon \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name fineval-qwen-linear \
# --base_model qwen \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name fineval-mpt-linear \
# --base_model mpt \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name fineval-bloom-linear \
# --base_model bloom \
# --dataset data/fingpt-fineval \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 50 \
# --log_interval 10
#---- ConvFinQA ----
# deepspeed train_lora.py \
# --run_name convfinqa-llama2-linear \
# --base_model llama2 \
# --ds_config config_hf.json \
# --dataset data/fingpt-convfinqa \
# --max_length 2048 \
# --batch_size 1 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name convfinqa-chatglm2-linear \
# --base_model chatglm2 \
# --dataset data/fingpt-convfinqa \
# --max_length 2048 \
# --batch_size 1 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name convfinqa-falcon-linear \
# --base_model falcon \
# --dataset data/fingpt-convfinqa \
# --max_length 2048 \
# --batch_size 1 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name convfinqa-qwen-linear \
# --base_model qwen \
# --dataset data/fingpt-convfinqa \
# --max_length 2048 \
# --batch_size 1 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name convfinqa-mpt-linear \
# --base_model mpt \
# --dataset data/fingpt-convfinqa \
# --max_length 2048 \
# --batch_size 1 \
# --learning_rate 1e-4 \
# --num_epochs 4
# deepspeed train_lora.py \
# --run_name convfinqa-bloom-linear \
# --base_model bloom \
# --dataset data/fingpt-convfinqa \
# --max_length 2048 \
# --batch_size 1 \
# --learning_rate 1e-4 \
# --num_epochs 4
#---- NER ----
# deepspeed train_lora.py \
# --run_name ner-llama2-linear \
# --base_model llama2 \
# --dataset data/fingpt-ner \
# --ds_config config_hf.json \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 100 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name ner-chatglm2-linear \
# --base_model chatglm2 \
# --dataset data/fingpt-ner \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 100 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name ner-falcon-linear \
# --base_model falcon \
# --dataset data/fingpt-ner \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 100 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name ner-qwen-linear \
# --base_model qwen \
# --dataset data/fingpt-ner \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 100 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name ner-mpt-linear \
# --base_model mpt \
# --dataset data/fingpt-ner \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 100 \
# --log_interval 10
# deepspeed train_lora.py \
# --run_name ner-bloom-linear \
# --base_model bloom \
# --dataset data/fingpt-ner \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 100 \
# --log_interval 10
#---- Headline (IE) ----
# deepspeed train_lora.py \
# --run_name headline-internlm-linear \
# --base_model internlm \
# --dataset data/fingpt-headline \
# --ds_config config_hf.json \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name headline-llama2-linear \
# --base_model llama2 \
# --dataset data/fingpt-headline \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name headline-chatglm2-linear \
# --base_model chatglm2 \
# --dataset data/fingpt-headline \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name headline-falcon-linear \
# --base_model falcon \
# --dataset data/fingpt-headline \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name headline-qwen-linear \
# --base_model qwen \
# --dataset data/fingpt-headline \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name headline-mpt-linear \
# --base_model mpt \
# --dataset data/fingpt-headline \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name headline-bloom-linear \
# --base_model bloom \
# --dataset data/fingpt-headline \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
#---- Sentiment Analysis ----
# deepspeed train_lora.py \
# --run_name sentiment-internlm-linear \
# --base_model internlm \
# --dataset data/fingpt-sentiment-train \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name sentiment-llama2-linear \
# --base_model llama2 \
# --dataset data/fingpt-sentiment-train \
# --ds_config config_hf.json \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name sentiment-chatglm2-linear \
# --base_model chatglm2 \
# --dataset data/fingpt-sentiment-train \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name sentiment-falcon-linear \
# --base_model falcon \
# --dataset data/fingpt-sentiment-train \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name sentiment-qwen-linear \
# --base_model qwen \
# --dataset data/fingpt-sentiment-train \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name sentiment-mpt-linear \
# --base_model mpt \
# --dataset data/fingpt-sentiment-train \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name sentiment-bloom-linear \
# --base_model bloom \
# --dataset data/fingpt-sentiment-train \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
#---- Relation Extraction ----
# deepspeed train_lora.py \
# --run_name finred-llama2-linear \
# --base_model llama2 \
# --dataset data/fingpt-finred-re \
# --ds_config config_hf.json \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name finred-chatglm2-linear \
# --base_model chatglm2 \
# --dataset data/fingpt-finred-re \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name finred-falcon-linear \
# --base_model falcon \
# --dataset data/fingpt-finred-re \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name finred-qwen-linear \
# --base_model qwen \
# --dataset data/fingpt-finred-re \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name finred-mpt-linear \
# --base_model mpt \
# --dataset data/fingpt-finred-re \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8
# deepspeed train_lora.py \
# --run_name finred-bloom-linear \
# --base_model bloom \
# --dataset data/fingpt-finred-re \
# --max_length 512 \
# --batch_size 4 \
# --learning_rate 1e-4 \
# --num_epochs 8