Spaces:

jbilcke-hf
/

VideoModelStudio

Running

VideoModelStudio / training /prepare_dataset.sh

jbilcke-hf HF Staff

initial commit log 🪵🦫

91fb4ef 8 months ago

1.49 kB

	#!/bin/bash

	MODEL_ID="THUDM/CogVideoX-2b"

	NUM_GPUS=8

	# For more details on the expected data format, please refer to the README.
	DATA_ROOT="/path/to/my/datasets/video-dataset" # This needs to be the path to the base directory where your videos are located.
	CAPTION_COLUMN="prompt.txt"
	VIDEO_COLUMN="videos.txt"
	OUTPUT_DIR="/path/to/my/datasets/preprocessed-dataset"
	HEIGHT_BUCKETS="480 720"
	WIDTH_BUCKETS="720 960"
	FRAME_BUCKETS="49"
	MAX_NUM_FRAMES="49"
	MAX_SEQUENCE_LENGTH=226
	TARGET_FPS=8
	BATCH_SIZE=1
	DTYPE=fp32

	# To create a folder-style dataset structure without pre-encoding videos and captions
	# For Image-to-Video finetuning, make sure to pass `--save_image_latents`
	CMD_WITHOUT_PRE_ENCODING="\
	torchrun --nproc_per_node=$NUM_GPUS \
	training/prepare_dataset.py \
	--model_id $MODEL_ID \
	--data_root $DATA_ROOT \
	--caption_column $CAPTION_COLUMN \
	--video_column $VIDEO_COLUMN \
	--output_dir $OUTPUT_DIR \
	--height_buckets $HEIGHT_BUCKETS \
	--width_buckets $WIDTH_BUCKETS \
	--frame_buckets $FRAME_BUCKETS \
	--max_num_frames $MAX_NUM_FRAMES \
	--max_sequence_length $MAX_SEQUENCE_LENGTH \
	--target_fps $TARGET_FPS \
	--batch_size $BATCH_SIZE \
	--dtype $DTYPE
	"

	CMD_WITH_PRE_ENCODING="$CMD_WITHOUT_PRE_ENCODING --save_latents_and_embeddings"

	# Select which you'd like to run
	CMD=$CMD_WITH_PRE_ENCODING

	echo "===== Running \`$CMD\` ====="
	eval $CMD
	echo -ne "===== Finished running script =====\n"