|
#!/usr/bin/env bash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export TRITON_CACHE_DIR="/home/align-anything/cache/triton" |
|
|
|
MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/0916_ti_to_ti_sft/" |
|
|
|
TRAIN_DATASETS="/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs" |
|
TRAIN_DATA_FILES="llf_ti2ti_13.5k_tokenized.pt" |
|
|
|
OUTPUT_DIR="../outputs/sft_tf_cham_1111_13.5k_ti2ti" |
|
|
|
export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33" |
|
|
|
source ./setup.sh |
|
|
|
|
|
|
|
deepspeed \ |
|
--master_port ${MASTER_PORT} \ |
|
--module align_anything.trainers.text_image_to_text_image.sft \ |
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
--train_datasets ${TRAIN_DATASETS} \ |
|
--train_data_files ${TRAIN_DATA_FILES} \ |
|
--output_dir ${OUTPUT_DIR} \ |
|
--train_template AA_textfeedback \ |
|
--train_split train \ |
|
--per_device_train_batch_size 4 \ |
|
--per_device_eval_batch_size 4 \ |
|
--gradient_accumulation_steps 2 \ |
|
--save_interval 400 \ |
|
--learning_rate 1e-6 \ |
|
--epochs 3 \ |
|
--lr_scheduler_type cosine |
|
|
|
|
|
export TRITON_CACHE_DIR="/home/align-anything/cache/triton" |
|
|
|
MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/0916_ti_to_ti_sft/" |
|
|
|
TRAIN_DATASETS="/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs" |
|
TRAIN_DATA_FILES="llf_ti2ti_6.75k_tokenized.pt" |
|
|
|
OUTPUT_DIR="../outputs/sft_tf_cham_1111_6.75k_ti2ti" |
|
|
|
export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33" |
|
|
|
source ./setup.sh |
|
|
|
|
|
|
|
deepspeed \ |
|
--master_port ${MASTER_PORT} \ |
|
--module align_anything.trainers.text_image_to_text_image.sft \ |
|
--model_name_or_path ${MODEL_NAME_OR_PATH} \ |
|
--train_datasets ${TRAIN_DATASETS} \ |
|
--train_data_files ${TRAIN_DATA_FILES} \ |
|
--output_dir ${OUTPUT_DIR} \ |
|
--train_template AA_textfeedback \ |
|
--train_split train \ |
|
--per_device_train_batch_size 4 \ |
|
--per_device_eval_batch_size 4 \ |
|
--gradient_accumulation_steps 2 \ |
|
--save_interval 400 \ |
|
--learning_rate 1e-6 \ |
|
--epochs 3 \ |
|
--lr_scheduler_type cosine |