|
#!/bin/bash |
|
|
|
MODEL_ID="THUDM/CogVideoX-2b" |
|
|
|
NUM_GPUS=8 |
|
|
|
|
|
DATA_ROOT="/path/to/my/datasets/video-dataset" |
|
CAPTION_COLUMN="prompt.txt" |
|
VIDEO_COLUMN="videos.txt" |
|
OUTPUT_DIR="/path/to/my/datasets/preprocessed-dataset" |
|
HEIGHT_BUCKETS="480 720" |
|
WIDTH_BUCKETS="720 960" |
|
FRAME_BUCKETS="49" |
|
MAX_NUM_FRAMES="49" |
|
MAX_SEQUENCE_LENGTH=226 |
|
TARGET_FPS=8 |
|
BATCH_SIZE=1 |
|
DTYPE=fp32 |
|
|
|
|
|
|
|
CMD_WITHOUT_PRE_ENCODING="\ |
|
torchrun --nproc_per_node=$NUM_GPUS \ |
|
training/prepare_dataset.py \ |
|
--model_id $MODEL_ID \ |
|
--data_root $DATA_ROOT \ |
|
--caption_column $CAPTION_COLUMN \ |
|
--video_column $VIDEO_COLUMN \ |
|
--output_dir $OUTPUT_DIR \ |
|
--height_buckets $HEIGHT_BUCKETS \ |
|
--width_buckets $WIDTH_BUCKETS \ |
|
--frame_buckets $FRAME_BUCKETS \ |
|
--max_num_frames $MAX_NUM_FRAMES \ |
|
--max_sequence_length $MAX_SEQUENCE_LENGTH \ |
|
--target_fps $TARGET_FPS \ |
|
--batch_size $BATCH_SIZE \ |
|
--dtype $DTYPE |
|
" |
|
|
|
CMD_WITH_PRE_ENCODING="$CMD_WITHOUT_PRE_ENCODING --save_latents_and_embeddings" |
|
|
|
|
|
CMD=$CMD_WITH_PRE_ENCODING |
|
|
|
echo "===== Running \`$CMD\` =====" |
|
eval $CMD |
|
echo -ne "===== Finished running script =====\n" |
|
|