Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- arguments.yaml +51 -0
- config.json +0 -0
- environ.txt +164 -0
- preprocessor_config.json +28 -0
- processor_config.json +5 -0
- pytorch_model.bin +3 -0
- script.sh +84 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
- wandb/debug-internal.log +20 -0
- wandb/debug.log +33 -0
- wandb/run-20250101_031915-9dphq5gk/files/output.log +2 -0
- wandb/run-20250101_031915-9dphq5gk/files/requirements.txt +248 -0
- wandb/run-20250101_031915-9dphq5gk/files/wandb-metadata.json +112 -0
- wandb/run-20250101_031915-9dphq5gk/logs/debug-internal.log +10 -0
- wandb/run-20250101_031915-9dphq5gk/logs/debug.log +26 -0
- wandb/run-20250101_031915-9dphq5gk/run-9dphq5gk.wandb +0 -0
- wandb/run-20250101_032225-2bzz3n13/files/config.yaml +98 -0
- wandb/run-20250101_032225-2bzz3n13/files/output.log +47 -0
- wandb/run-20250101_032225-2bzz3n13/files/requirements.txt +248 -0
- wandb/run-20250101_032225-2bzz3n13/files/wandb-metadata.json +112 -0
- wandb/run-20250101_032225-2bzz3n13/files/wandb-summary.json +1 -0
- wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log +20 -0
- wandb/run-20250101_032225-2bzz3n13/logs/debug.log +33 -0
- wandb/run-20250101_032225-2bzz3n13/run-2bzz3n13.wandb +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
wandb/run-20250101_032225-2bzz3n13/run-2bzz3n13.wandb filter=lfs diff=lfs merge=lfs -text
|
arguments.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data_cfgs:
|
2 |
+
eval_data_files: null
|
3 |
+
eval_datasets: null
|
4 |
+
eval_optional_args: []
|
5 |
+
eval_size: null
|
6 |
+
eval_split: null
|
7 |
+
eval_subset: null
|
8 |
+
eval_template: null
|
9 |
+
train_data_files: q0_10_preference.pt
|
10 |
+
train_datasets: /data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized
|
11 |
+
train_optional_args: []
|
12 |
+
train_size: null
|
13 |
+
train_split: train
|
14 |
+
train_subset: null
|
15 |
+
train_template: Chameleon_preference
|
16 |
+
logger_cfgs:
|
17 |
+
cache_dir: null
|
18 |
+
log_project: align-anything
|
19 |
+
log_run_name: dpo
|
20 |
+
log_type: wandb
|
21 |
+
output_dir: /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference
|
22 |
+
save_interval: 400.0
|
23 |
+
model_cfgs:
|
24 |
+
model_max_length: 4096
|
25 |
+
model_name_or_path: /data/align-anything/hantao/models/chameleon-7b
|
26 |
+
trust_remote_code: true
|
27 |
+
special_tokens: null
|
28 |
+
train_cfgs:
|
29 |
+
adam_betas:
|
30 |
+
- 0.9
|
31 |
+
- 0.95
|
32 |
+
bf16: true
|
33 |
+
ds_cfgs: ds_z3_config.json
|
34 |
+
epochs: 3.0
|
35 |
+
eval_interval: 10
|
36 |
+
eval_strategy: epoch
|
37 |
+
fp16: false
|
38 |
+
freeze_language_model: true
|
39 |
+
freeze_mm_proj: true
|
40 |
+
freeze_vision_tower: false
|
41 |
+
gradient_accumulation_steps: 2.0
|
42 |
+
gradient_checkpointing: true
|
43 |
+
learning_rate: 1.0e-06
|
44 |
+
lr_scheduler_type: cosine
|
45 |
+
lr_warmup_ratio: 0.03
|
46 |
+
per_device_eval_batch_size: 4.0
|
47 |
+
per_device_train_batch_size: 4.0
|
48 |
+
regularization: 0.001
|
49 |
+
scale_coeff: 0.1
|
50 |
+
seed: 42
|
51 |
+
weight_decay: 0.01
|
config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
environ.txt
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ADDR2LINE=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-addr2line
|
2 |
+
AR=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ar
|
3 |
+
AS=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-as
|
4 |
+
BROWSER=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/bin/helpers/browser.sh
|
5 |
+
BUILD=x86_64-conda-linux-gnu
|
6 |
+
CC=/data/align-anything/miniconda3/envs/hantao_stable/bin/gcc
|
7 |
+
CC_FOR_BUILD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-cc
|
8 |
+
CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
|
9 |
+
CMAKE_PREFIX_PATH=/data/align-anything/miniconda3/envs/jy-a:/data/align-anything/miniconda3/envs/jy-a/x86_64-conda-linux-gnu/sysroot/usr
|
10 |
+
COLORTERM=truecolor
|
11 |
+
CONDA_BACKUP_ADDR2LINE=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-addr2line
|
12 |
+
CONDA_BACKUP_AR=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ar
|
13 |
+
CONDA_BACKUP_AS=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-as
|
14 |
+
CONDA_BACKUP_BUILD=x86_64-conda-linux-gnu
|
15 |
+
CONDA_BACKUP_CC=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-cc
|
16 |
+
CONDA_BACKUP_CC_FOR_BUILD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-cc
|
17 |
+
CONDA_BACKUP_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -I/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/include
|
18 |
+
CONDA_BACKUP_CMAKE_PREFIX_PATH=/data/align-anything/miniconda3/envs/hantao_proxy:/data/align-anything/miniconda3/envs/hantao_proxy/x86_64-conda-linux-gnu/sysroot/usr
|
19 |
+
CONDA_BACKUP_CONDA_BUILD_SYSROOT=/data/align-anything/miniconda3/envs/hantao_proxy/x86_64-conda-linux-gnu/sysroot
|
20 |
+
CONDA_BACKUP_CONDA_TOOLCHAIN_BUILD=x86_64-conda-linux-gnu
|
21 |
+
CONDA_BACKUP_CONDA_TOOLCHAIN_HOST=x86_64-conda-linux-gnu
|
22 |
+
CONDA_BACKUP_CPP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-cpp
|
23 |
+
CONDA_BACKUP_CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -I/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/include
|
24 |
+
CONDA_BACKUP_CXX=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-c++
|
25 |
+
CONDA_BACKUP_CXXFILT=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-c++filt
|
26 |
+
CONDA_BACKUP_CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -I/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/include
|
27 |
+
CONDA_BACKUP_CXX_FOR_BUILD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-c++
|
28 |
+
CONDA_BACKUP_DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
|
29 |
+
CONDA_BACKUP_DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include
|
30 |
+
CONDA_BACKUP_DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
|
31 |
+
CONDA_BACKUP_DWP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-dwp
|
32 |
+
CONDA_BACKUP_ELFEDIT=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-elfedit
|
33 |
+
CONDA_BACKUP_GCC=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc
|
34 |
+
CONDA_BACKUP_GCC_AR=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc-ar
|
35 |
+
CONDA_BACKUP_GCC_NM=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc-nm
|
36 |
+
CONDA_BACKUP_GCC_RANLIB=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc-ranlib
|
37 |
+
CONDA_BACKUP_GPROF=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gprof
|
38 |
+
CONDA_BACKUP_GXX=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-g++
|
39 |
+
CONDA_BACKUP_HOST=x86_64-conda-linux-gnu
|
40 |
+
CONDA_BACKUP_LD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ld
|
41 |
+
CONDA_BACKUP_LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/hantao_proxy/lib -L/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/hantao_proxy/lib -L/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -L/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/lib/stubs
|
42 |
+
CONDA_BACKUP_LD_GOLD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ld.gold
|
43 |
+
CONDA_BACKUP_MESON_ARGS=-Dbuildtype=release
|
44 |
+
CONDA_BACKUP_NM=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-nm
|
45 |
+
CONDA_BACKUP_OBJCOPY=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-objcopy
|
46 |
+
CONDA_BACKUP_OBJDUMP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-objdump
|
47 |
+
CONDA_BACKUP_RANLIB=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ranlib
|
48 |
+
CONDA_BACKUP_READELF=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-readelf
|
49 |
+
CONDA_BACKUP_SIZE=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-size
|
50 |
+
CONDA_BACKUP_STRINGS=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-strings
|
51 |
+
CONDA_BACKUP_STRIP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-strip
|
52 |
+
CONDA_BACKUP__CONDA_PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_x86_64_conda_cos6_linux_gnu
|
53 |
+
CONDA_BACKUP_build_alias=x86_64-conda-linux-gnu
|
54 |
+
CONDA_BACKUP_host_alias=x86_64-conda-linux-gnu
|
55 |
+
CONDA_BUILD_SYSROOT=/data/align-anything/miniconda3/envs/jy-a/x86_64-conda-linux-gnu/sysroot
|
56 |
+
CONDA_DEFAULT_ENV=hantao_stable
|
57 |
+
CONDA_EXE=/data/align-anything/miniconda3/bin/conda
|
58 |
+
CONDA_PREFIX=/data/align-anything/miniconda3/envs/hantao_stable
|
59 |
+
CONDA_PREFIX_1=/home/align-anything/miniconda3
|
60 |
+
CONDA_PREFIX_10=/data/align-anything/miniconda3/envs/hantao_proxy
|
61 |
+
CONDA_PREFIX_2=/data/align-anything/miniconda3/envs/jy-a
|
62 |
+
CONDA_PREFIX_3=/data/align-anything/miniconda3
|
63 |
+
CONDA_PREFIX_4=/data/align-anything/miniconda3/envs/hantao_stable
|
64 |
+
CONDA_PREFIX_5=/data/align-anything/miniconda3/envs/hantao_cham
|
65 |
+
CONDA_PREFIX_6=/data/align-anything/miniconda3/envs/hantao_stable
|
66 |
+
CONDA_PREFIX_7=/data/align-anything/miniconda3/envs/hantao_stream
|
67 |
+
CONDA_PREFIX_8=/data/align-anything/miniconda3/envs/hantao_proxy
|
68 |
+
CONDA_PREFIX_9=/data/align-anything/miniconda3/envs/hantao_stable
|
69 |
+
CONDA_PROMPT_MODIFIER=(hantao_stable)
|
70 |
+
CONDA_PYTHON_EXE=/data/align-anything/miniconda3/bin/python
|
71 |
+
CONDA_ROOT=/home/align-anything/miniconda3
|
72 |
+
CONDA_SHLVL=11
|
73 |
+
CONDA_TOOLCHAIN_BUILD=x86_64-conda-linux-gnu
|
74 |
+
CONDA_TOOLCHAIN_HOST=x86_64-conda-linux-gnu
|
75 |
+
CPP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-cpp
|
76 |
+
CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
|
77 |
+
CROSS_RANK=0
|
78 |
+
CROSS_SIZE=1
|
79 |
+
CUDA_MODULE_LOADING=LAZY
|
80 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
81 |
+
CXX=/data/align-anything/miniconda3/envs/hantao_stable/bin/g++
|
82 |
+
CXXFILT=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-c++filt
|
83 |
+
CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
|
84 |
+
CXX_FOR_BUILD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-c++
|
85 |
+
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/2000/bus
|
86 |
+
DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
|
87 |
+
DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include
|
88 |
+
DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
|
89 |
+
DWP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-dwp
|
90 |
+
ELFEDIT=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-elfedit
|
91 |
+
GCC=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc
|
92 |
+
GCC_AR=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc-ar
|
93 |
+
GCC_NM=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc-nm
|
94 |
+
GCC_RANLIB=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc-ranlib
|
95 |
+
GIT_ASKPASS=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/extensions/git/dist/askpass.sh
|
96 |
+
GPROF=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gprof
|
97 |
+
GXX=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-g++
|
98 |
+
HOME=/home/align-anything
|
99 |
+
HOST=x86_64-conda-linux-gnu
|
100 |
+
LANG=en_US.UTF-8
|
101 |
+
LD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ld
|
102 |
+
LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
|
103 |
+
LD_GOLD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ld.gold
|
104 |
+
LD_LIBRARY_PATH=/data/align-anything/miniconda3/envs/hantao_stable/lib/python3.11/site-packages/cv2/../../lib64:
|
105 |
+
LESSCLOSE=/usr/bin/lesspipe %s %s
|
106 |
+
LESSOPEN=| /usr/bin/lesspipe %s
|
107 |
+
LOCAL_RANK=0
|
108 |
+
LOCAL_SIZE=8
|
109 |
+
LOGLEVEL=WARNING
|
110 |
+
LOGNAME=align-anything
|
111 |
+
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:
|
112 |
+
MASTER_ADDR=127.0.0.1
|
113 |
+
MASTER_PORT=14373
|
114 |
+
MOTD_SHOWN=pam
|
115 |
+
NM=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-nm
|
116 |
+
NVCC_PREPEND_FLAGS= -ccbin=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-c++
|
117 |
+
OBJCOPY=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-objcopy
|
118 |
+
OBJDUMP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-objdump
|
119 |
+
OLDPWD=/data/align-anything/hantao/LLaMA-Factory
|
120 |
+
PATH=/data/align-anything/miniconda3/envs/hantao_stable/bin:/data/align-anything/miniconda3/envs/hantao_stable/bin:/data/align-anything/miniconda3/bin:/data/align-anything/miniconda3/condabin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
|
121 |
+
PWD=/data/align-anything/hantao/align-anything/scripts
|
122 |
+
PYGAME_HIDE_SUPPORT_PROMPT=1
|
123 |
+
PYTHONHASHSEED=42
|
124 |
+
PYTHONPATH=/data/align-anything/hantao/align-anything
|
125 |
+
QT_QPA_FONTDIR=/data/align-anything/miniconda3/envs/hantao_stable/lib/python3.11/site-packages/cv2/qt/fonts
|
126 |
+
QT_QPA_PLATFORM_PLUGIN_PATH=/data/align-anything/miniconda3/envs/hantao_stable/lib/python3.11/site-packages/cv2/qt/plugins
|
127 |
+
RANK=0
|
128 |
+
RANLIB=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ranlib
|
129 |
+
READELF=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-readelf
|
130 |
+
SHELL=/bin/bash
|
131 |
+
SHLVL=3
|
132 |
+
SIZE=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-size
|
133 |
+
SSH_CLIENT=117.136.0.149 36325 30400
|
134 |
+
SSH_CONNECTION=111.205.232.251 37945 10.10.212.194 30400
|
135 |
+
SSL_CERT_DIR=/usr/lib/ssl/certs
|
136 |
+
SSL_CERT_FILE=/usr/lib/ssl/certs/ca-certificates.crt
|
137 |
+
STRINGS=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-strings
|
138 |
+
STRIP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-strip
|
139 |
+
TERM=screen
|
140 |
+
TERM_PROGRAM=vscode
|
141 |
+
TERM_PROGRAM_VERSION=0.41.3
|
142 |
+
TMUX=/tmp/tmux-2000/default,34082,51
|
143 |
+
TMUX_PANE=%59
|
144 |
+
TRITON_CACHE_DIR=/home/align-anything/cache/triton
|
145 |
+
USER=align-anything
|
146 |
+
VSCODE_GIT_ASKPASS_EXTRA_ARGS=
|
147 |
+
VSCODE_GIT_ASKPASS_MAIN=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/extensions/git/dist/askpass-main.js
|
148 |
+
VSCODE_GIT_ASKPASS_NODE=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/node
|
149 |
+
VSCODE_GIT_IPC_HANDLE=/run/user/2000/vscode-git-ef8058c264.sock
|
150 |
+
VSCODE_IPC_HOOK_CLI=/run/user/2000/vscode-ipc-db013265-9a8a-4fb7-ba94-00b66d808feb.sock
|
151 |
+
WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
|
152 |
+
WANDB_MODE=online
|
153 |
+
WANDB_SERVICE=2-650483-tcp-localhost-34395
|
154 |
+
WORLD_SIZE=8
|
155 |
+
XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
|
156 |
+
XDG_RUNTIME_DIR=/run/user/2000
|
157 |
+
XDG_SESSION_CLASS=user
|
158 |
+
XDG_SESSION_ID=11
|
159 |
+
XDG_SESSION_TYPE=tty
|
160 |
+
_=/data/align-anything/miniconda3/envs/hantao_stable/bin/deepspeed
|
161 |
+
_CE_CONDA=
|
162 |
+
_CE_M=
|
163 |
+
build_alias=x86_64-conda-linux-gnu
|
164 |
+
host_alias=x86_64-conda-linux-gnu
|
preprocessor_config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 512,
|
4 |
+
"width": 512
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"image_mean": [
|
12 |
+
1.0,
|
13 |
+
1.0,
|
14 |
+
1.0
|
15 |
+
],
|
16 |
+
"image_processor_type": "ChameleonImageProcessor",
|
17 |
+
"image_std": [
|
18 |
+
1.0,
|
19 |
+
1.0,
|
20 |
+
1.0
|
21 |
+
],
|
22 |
+
"processor_class": "ChameleonProcessor",
|
23 |
+
"resample": 1,
|
24 |
+
"rescale_factor": 0.0078,
|
25 |
+
"size": {
|
26 |
+
"shortest_edge": 512
|
27 |
+
}
|
28 |
+
}
|
processor_config.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"image_seq_length": 1024,
|
3 |
+
"image_token": "<image>",
|
4 |
+
"processor_class": "ChameleonProcessor"
|
5 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5029034ff3e67f17bc3721362121885c6ce954d0df00e82bc982d91ef3c507f
|
3 |
+
size 14086364170
|
script.sh
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
#
|
3 |
+
# Copyright 2024 PKU-Alignment Team. All Rights Reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License.
|
16 |
+
# ==============================================================================
|
17 |
+
|
18 |
+
export CC=/data/align-anything/miniconda3/envs/hantao_stable/bin/gcc
|
19 |
+
export CXX=/data/align-anything/miniconda3/envs/hantao_stable/bin/g++
|
20 |
+
|
21 |
+
export TRITON_CACHE_DIR="/home/align-anything/cache/triton"
|
22 |
+
|
23 |
+
export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
|
24 |
+
export WANDB_MODE=online
|
25 |
+
|
26 |
+
MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/chameleon-7b"
|
27 |
+
|
28 |
+
DATASET_PATH=(
|
29 |
+
"/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized"
|
30 |
+
"/data/align-anything/hantao/data/mm_interp/AA_preference_cosi_new_step10/tokenized"
|
31 |
+
"/data/align-anything/hantao/data/mm_interp/AA_preference_l0_new_step10/tokenized"
|
32 |
+
"/data/align-anything/hantao/data/mm_interp/AA_preference_random/tokenized"
|
33 |
+
)
|
34 |
+
|
35 |
+
DATASET_NAME=(
|
36 |
+
"q0_10_preference"
|
37 |
+
"q0_20_preference"
|
38 |
+
"q0_30_preference"
|
39 |
+
"q0_40_preference"
|
40 |
+
"q0_50_preference"
|
41 |
+
"q0_60_preference"
|
42 |
+
"q0_70_preference"
|
43 |
+
"q0_80_preference"
|
44 |
+
"q0_90_preference"
|
45 |
+
)
|
46 |
+
|
47 |
+
OUTPUT_PATH="/data/align-anything/hantao/align-anything/outputs/mm_interp"
|
48 |
+
mkdir -p $OUTPUT_PATH
|
49 |
+
|
50 |
+
# Initialize variables
|
51 |
+
|
52 |
+
for dataset_path in ${DATASET_PATH[@]}; do
|
53 |
+
for dataset_name in ${DATASET_NAME[@]}; do
|
54 |
+
TRAIN_DATASETS=$dataset_path
|
55 |
+
|
56 |
+
# dataset middle name
|
57 |
+
middle_name= echo "$dataset_path" | awk -F'/' '{print $(NF-1)}'
|
58 |
+
OUTPUT_DIR=$OUTPUT_PATH/$middle_name/$dataset_name
|
59 |
+
mkdir -p $OUTPUT_DIR
|
60 |
+
echo "Training on $TRAIN_DATASETS, output to $OUTPUT_DIR"
|
61 |
+
# Source the setup script
|
62 |
+
source ./setup.sh
|
63 |
+
|
64 |
+
# Execute deepspeed command
|
65 |
+
deepspeed \
|
66 |
+
--master_port ${MASTER_PORT} \
|
67 |
+
--module align_anything.trainers.text_image_to_text_image.dpo \
|
68 |
+
--model_name_or_path ${MODEL_NAME_OR_PATH} \
|
69 |
+
--train_datasets ${TRAIN_DATASETS} \
|
70 |
+
--output_dir ${OUTPUT_DIR} \
|
71 |
+
--per_device_train_batch_size 4 \
|
72 |
+
--per_device_eval_batch_size 4 \
|
73 |
+
--gradient_accumulation_steps 2 \
|
74 |
+
--train_template Chameleon_preference \
|
75 |
+
--train_split train \
|
76 |
+
--train_data_files ${dataset_name}.pt \
|
77 |
+
--learning_rate 1e-6 \
|
78 |
+
--epochs 3 \
|
79 |
+
--lr_scheduler_type cosine \
|
80 |
+
--save_interval 400
|
81 |
+
|
82 |
+
bash /data/align-anything/hantao/align-anything/outputs/cut.sh $OUTPUT_DIR
|
83 |
+
done
|
84 |
+
done
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<pad>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "<reserved08706>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "<unk>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/debug-internal.log
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-01-01T03:22:25.755777689Z","level":"INFO","msg":"using version","core version":"0.18.3"}
|
2 |
+
{"time":"2025-01-01T03:22:25.755807853Z","level":"INFO","msg":"created symlink","path":"/data/align-anything/hantao/align-anything/outputs/mm_interp/q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-core.log"}
|
3 |
+
{"time":"2025-01-01T03:22:25.758235027Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
|
4 |
+
{"time":"2025-01-01T03:22:25.778926827Z","level":"INFO","msg":"created new stream","id":"2bzz3n13"}
|
5 |
+
{"time":"2025-01-01T03:22:25.778989066Z","level":"INFO","msg":"stream: started","id":"2bzz3n13"}
|
6 |
+
{"time":"2025-01-01T03:22:25.779016519Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"2bzz3n13"}}
|
7 |
+
{"time":"2025-01-01T03:22:25.779052686Z","level":"INFO","msg":"sender: started","stream_id":{"value":"2bzz3n13"}}
|
8 |
+
{"time":"2025-01-01T03:22:25.779034819Z","level":"INFO","msg":"handler: started","stream_id":{"value":"2bzz3n13"}}
|
9 |
+
{"time":"2025-01-01T03:22:26.392432178Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
|
10 |
+
{"time":"2025-01-01T03:22:26.39641254Z","level":"INFO","msg":"Starting system monitor"}
|
11 |
+
{"time":"2025-01-01T04:18:12.751361379Z","level":"INFO","msg":"Stopping system monitor"}
|
12 |
+
{"time":"2025-01-01T04:18:12.776637037Z","level":"INFO","msg":"Stopped system monitor"}
|
13 |
+
{"time":"2025-01-01T04:18:13.326901072Z","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
14 |
+
{"time":"2025-01-01T04:18:13.326920848Z","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
15 |
+
{"time":"2025-01-01T04:18:14.470754716Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
16 |
+
{"time":"2025-01-01T04:18:16.234531428Z","level":"INFO","msg":"stream: closing","id":"2bzz3n13"}
|
17 |
+
{"time":"2025-01-01T04:18:16.234542406Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"2bzz3n13"}}
|
18 |
+
{"time":"2025-01-01T04:18:16.234551344Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"2bzz3n13"}}
|
19 |
+
{"time":"2025-01-01T04:18:16.23457812Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"2bzz3n13"}}
|
20 |
+
{"time":"2025-01-01T04:18:16.236373925Z","level":"INFO","msg":"stream: closed","id":"2bzz3n13"}
|
wandb/debug.log
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-01 03:22:25,741 INFO MainThread:650483 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
|
2 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Configure stats pid to 650483
|
3 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
|
4 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
|
5 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'online'}
|
6 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
7 |
+
2025-01-01 03:22:25,742 WARNING MainThread:650483 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
|
8 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
|
9 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():532] Logging user logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug.log
|
11 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():533] Logging internal logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log
|
12 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():617] calling init triggers
|
13 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
|
14 |
+
config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'q0_10_preference.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/chameleon-7b', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
|
15 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():667] starting backend
|
16 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():671] sending inform_init request
|
17 |
+
2025-01-01 03:22:25,748 INFO MainThread:650483 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2025-01-01 03:22:25,748 INFO MainThread:650483 [wandb_init.py:init():684] backend started and connected
|
19 |
+
2025-01-01 03:22:25,758 INFO MainThread:650483 [wandb_init.py:init():779] updated telemetry
|
20 |
+
2025-01-01 03:22:25,803 INFO MainThread:650483 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
|
21 |
+
2025-01-01 03:22:26,388 INFO MainThread:650483 [wandb_init.py:init():863] starting run threads in backend
|
22 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_console_start():2465] atexit reg
|
23 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2313] redirect: wrap_raw
|
24 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2378] Wrapping output streams.
|
25 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2403] Redirects installed.
|
26 |
+
2025-01-01 03:22:26,713 INFO MainThread:650483 [wandb_init.py:init():907] run started, returning control to user process
|
27 |
+
2025-01-01 04:18:12,748 INFO MainThread:650483 [wandb_run.py:_finish():2164] finishing run htlou/align-anything/2bzz3n13
|
28 |
+
2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
|
29 |
+
2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2410] restore
|
30 |
+
2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2416] restore done
|
31 |
+
2025-01-01 04:18:16,219 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4049] rendering history
|
32 |
+
2025-01-01 04:18:16,221 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
|
33 |
+
2025-01-01 04:18:16,232 INFO MainThread:650483 [wandb_run.py:_footer_sync_info():4008] logging synced files
|
wandb/run-20250101_031915-9dphq5gk/files/output.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
***** Running training *****
|
2 |
+
Training 1/3.0 epoch: 0%| | 0/180.0 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
|
wandb/run-20250101_031915-9dphq5gk/files/requirements.txt
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
align-anything==0.0.1.dev0
|
2 |
+
gitdb==4.0.11
|
3 |
+
wcwidth==0.2.13
|
4 |
+
identify==2.6.1
|
5 |
+
tomlkit==0.12.0
|
6 |
+
bitsandbytes==0.44.1
|
7 |
+
trl==0.9.6
|
8 |
+
pytest-split==0.8.0
|
9 |
+
gradio==4.44.1
|
10 |
+
pip==24.2
|
11 |
+
multidict==6.1.0
|
12 |
+
fairscale==0.4.13
|
13 |
+
mistral_common==1.4.4
|
14 |
+
python-dotenv==1.0.1
|
15 |
+
uvloop==0.20.0
|
16 |
+
absl-py==2.1.0
|
17 |
+
tiktoken==0.7.0
|
18 |
+
pydub==0.25.1
|
19 |
+
websockets==12.0
|
20 |
+
llamafactory==0.9.1.dev0
|
21 |
+
triton==3.0.0
|
22 |
+
tifffile==2024.9.20
|
23 |
+
safe-rlhf==0.0.1.dev0
|
24 |
+
pandas==2.2.3
|
25 |
+
grpcio==1.66.2
|
26 |
+
click==8.1.7
|
27 |
+
ninja==1.11.1.1
|
28 |
+
rich==13.9.2
|
29 |
+
Jinja2==3.1.4
|
30 |
+
Pygments==2.18.0
|
31 |
+
nvidia-cudnn-cu12==9.1.0.70
|
32 |
+
importlib_resources==6.4.5
|
33 |
+
GitPython==3.1.43
|
34 |
+
nvidia-cufft-cu12==11.0.2.54
|
35 |
+
tensorboard-data-server==0.7.2
|
36 |
+
align-anything==0.0.1.dev0
|
37 |
+
six==1.16.0
|
38 |
+
scipy==1.14.1
|
39 |
+
mpmath==1.3.0
|
40 |
+
jsonschema-specifications==2024.10.1
|
41 |
+
scikit-image==0.24.0
|
42 |
+
zipp==3.20.2
|
43 |
+
cycler==0.12.1
|
44 |
+
MarkupSafe==2.1.5
|
45 |
+
tzdata==2024.2
|
46 |
+
idna==3.10
|
47 |
+
pycountry==24.6.1
|
48 |
+
nvidia-nccl-cu12==2.20.5
|
49 |
+
matplotlib==3.9.2
|
50 |
+
pytz==2024.2
|
51 |
+
uvicorn==0.31.1
|
52 |
+
dill==0.3.8
|
53 |
+
pyparsing==3.1.4
|
54 |
+
pytest==7.2.0
|
55 |
+
jiter==0.6.1
|
56 |
+
safetensors==0.4.5
|
57 |
+
typing_extensions==4.12.2
|
58 |
+
decorator==4.4.2
|
59 |
+
typeguard==4.4.1
|
60 |
+
prometheus_client==0.21.0
|
61 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
62 |
+
sentencepiece==0.2.0
|
63 |
+
requests==2.32.3
|
64 |
+
kiwisolver==1.4.7
|
65 |
+
gdown==5.2.0
|
66 |
+
multiprocess==0.70.16
|
67 |
+
xxhash==3.5.0
|
68 |
+
PyYAML==6.0.2
|
69 |
+
gguf==0.10.0
|
70 |
+
nvidia-nvtx-cu12==12.1.105
|
71 |
+
hpsv2==1.2.0
|
72 |
+
tensorboard==2.18.0
|
73 |
+
nodeenv==1.9.1
|
74 |
+
filelock==3.16.1
|
75 |
+
distro==1.9.0
|
76 |
+
scikit-learn==1.5.2
|
77 |
+
huggingface-hub==0.25.2
|
78 |
+
pyairports==2.1.1
|
79 |
+
importlib_metadata==8.5.0
|
80 |
+
pyarrow==17.0.0
|
81 |
+
llvmlite==0.43.0
|
82 |
+
ray==2.37.0
|
83 |
+
tokenizers==0.20.3
|
84 |
+
nvidia-nvjitlink-cu12==12.6.77
|
85 |
+
av==14.0.1
|
86 |
+
deepspeed==0.15.2
|
87 |
+
clip==0.2.0
|
88 |
+
shtab==1.7.1
|
89 |
+
certifi==2024.8.30
|
90 |
+
braceexpand==0.1.7
|
91 |
+
nvidia-ml-py==12.560.30
|
92 |
+
webdataset==0.2.100
|
93 |
+
docker-pycreds==0.4.0
|
94 |
+
einops==0.8.0
|
95 |
+
iniconfig==2.0.0
|
96 |
+
tyro==0.9.2
|
97 |
+
torchvision==0.19.0
|
98 |
+
accelerate==0.34.2
|
99 |
+
beautifulsoup4==4.12.3
|
100 |
+
pyzmq==26.2.0
|
101 |
+
pycparser==2.22
|
102 |
+
nvidia-curand-cu12==10.3.2.106
|
103 |
+
msgpack==1.1.0
|
104 |
+
soxr==0.5.0.post1
|
105 |
+
platformdirs==4.3.6
|
106 |
+
h11==0.14.0
|
107 |
+
psutil==6.0.0
|
108 |
+
pydantic==2.9.2
|
109 |
+
shellingham==1.5.4
|
110 |
+
imageio-ffmpeg==0.5.1
|
111 |
+
wandb==0.18.3
|
112 |
+
audioread==3.0.1
|
113 |
+
annotated-types==0.7.0
|
114 |
+
docstring_parser==0.16
|
115 |
+
cloudpickle==3.1.0
|
116 |
+
regex==2024.9.11
|
117 |
+
packaging==24.1
|
118 |
+
timm==0.6.13
|
119 |
+
aiosignal==1.3.1
|
120 |
+
numba==0.60.0
|
121 |
+
orjson==3.10.7
|
122 |
+
rpds-py==0.20.0
|
123 |
+
virtualenv==20.26.6
|
124 |
+
joblib==1.4.2
|
125 |
+
charset-normalizer==3.4.0
|
126 |
+
httpx==0.27.2
|
127 |
+
ffmpy==0.4.0
|
128 |
+
lm-format-enforcer==0.10.6
|
129 |
+
yt-dlp==2024.8.6
|
130 |
+
sympy==1.13.3
|
131 |
+
python-dateutil==2.9.0.post0
|
132 |
+
nvidia-cusolver-cu12==11.4.5.107
|
133 |
+
msgspec==0.18.6
|
134 |
+
mdurl==0.1.2
|
135 |
+
torch==2.4.0
|
136 |
+
fastapi==0.115.0
|
137 |
+
optree==0.13.0
|
138 |
+
PySocks==1.7.1
|
139 |
+
transformers==4.46.0.dev0
|
140 |
+
torchlibrosa==0.1.0
|
141 |
+
fsspec==2024.6.1
|
142 |
+
nvidia-cublas-cu12==12.1.3.1
|
143 |
+
gradio_client==1.3.0
|
144 |
+
args==0.1.0
|
145 |
+
cffi==1.17.1
|
146 |
+
fonttools==4.54.1
|
147 |
+
clint==0.5.1
|
148 |
+
lark==1.2.2
|
149 |
+
tqdm==4.66.5
|
150 |
+
semantic-version==2.10.0
|
151 |
+
pooch==1.8.2
|
152 |
+
markdown-it-py==3.0.0
|
153 |
+
pydantic_core==2.23.4
|
154 |
+
sniffio==1.3.1
|
155 |
+
httptools==0.6.1
|
156 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
157 |
+
anyio==4.6.0
|
158 |
+
ftfy==6.3.0
|
159 |
+
Markdown==3.7
|
160 |
+
datasets==2.21.0
|
161 |
+
diffusers==0.30.3
|
162 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
163 |
+
vllm==0.6.2
|
164 |
+
starlette==0.38.6
|
165 |
+
flash-attn==2.7.0.post2
|
166 |
+
urllib3==2.2.3
|
167 |
+
Werkzeug==3.0.4
|
168 |
+
py-cpuinfo==9.0.0
|
169 |
+
moviepy==1.0.3
|
170 |
+
librosa==0.10.2.post1
|
171 |
+
peft==0.12.0
|
172 |
+
soupsieve==2.6
|
173 |
+
lazy_loader==0.4
|
174 |
+
pluggy==1.5.0
|
175 |
+
setuptools==75.1.0
|
176 |
+
sentry-sdk==2.16.0
|
177 |
+
tabulate==0.9.0
|
178 |
+
transformers==4.45.2
|
179 |
+
pre_commit==4.0.1
|
180 |
+
termcolor==2.5.0
|
181 |
+
frechet-audio-distance==0.1.2
|
182 |
+
pytorch-fid==0.3.0
|
183 |
+
setproctitle==1.3.3
|
184 |
+
jsonschema==4.23.0
|
185 |
+
aiofiles==23.2.1
|
186 |
+
contourpy==1.3.0
|
187 |
+
distlib==0.3.9
|
188 |
+
interegular==0.3.3
|
189 |
+
fire==0.7.0
|
190 |
+
diskcache==5.6.3
|
191 |
+
proglog==0.1.10
|
192 |
+
soundfile==0.12.1
|
193 |
+
protobuf==3.20.3
|
194 |
+
smmap==5.0.1
|
195 |
+
pycryptodomex==3.21.0
|
196 |
+
Brotli==1.1.0
|
197 |
+
pillow==10.4.0
|
198 |
+
frozenlist==1.4.1
|
199 |
+
numpy==1.26.4
|
200 |
+
mutagen==1.47.0
|
201 |
+
outlines==0.0.46
|
202 |
+
attrs==24.2.0
|
203 |
+
torchaudio==2.4.0
|
204 |
+
aiohttp==3.10.10
|
205 |
+
ruff==0.6.9
|
206 |
+
watchfiles==0.24.0
|
207 |
+
threadpoolctl==3.5.0
|
208 |
+
nest-asyncio==1.6.0
|
209 |
+
partial-json-parser==0.2.1.1.post4
|
210 |
+
sse-starlette==2.1.3
|
211 |
+
shortuuid==1.0.13
|
212 |
+
typer==0.12.5
|
213 |
+
prometheus-fastapi-instrumentator==7.0.0
|
214 |
+
imageio==2.35.1
|
215 |
+
wheel==0.44.0
|
216 |
+
image-reward==1.5
|
217 |
+
networkx==3.4.1
|
218 |
+
propcache==0.2.0
|
219 |
+
aiohappyeyeballs==2.4.3
|
220 |
+
nvidia-cusparse-cu12==12.1.0.106
|
221 |
+
xformers==0.0.27.post2
|
222 |
+
cfgv==3.4.0
|
223 |
+
python-multipart==0.0.12
|
224 |
+
httpcore==1.0.6
|
225 |
+
opencv-python==4.6.0.66
|
226 |
+
resampy==0.4.3
|
227 |
+
yarl==1.15.0
|
228 |
+
referencing==0.35.1
|
229 |
+
openai==1.51.2
|
230 |
+
hjson==3.1.0
|
231 |
+
llamafactory==0.9.1.dev0
|
232 |
+
jaraco.collections==5.1.0
|
233 |
+
backports.tarfile==1.2.0
|
234 |
+
more-itertools==10.3.0
|
235 |
+
wheel==0.43.0
|
236 |
+
importlib_metadata==8.0.0
|
237 |
+
zipp==3.19.2
|
238 |
+
autocommand==2.2.2
|
239 |
+
jaraco.functools==4.0.1
|
240 |
+
platformdirs==4.2.2
|
241 |
+
tomli==2.0.1
|
242 |
+
jaraco.text==3.12.1
|
243 |
+
typing_extensions==4.12.2
|
244 |
+
jaraco.context==5.3.0
|
245 |
+
importlib_resources==6.4.0
|
246 |
+
packaging==24.1
|
247 |
+
inflect==7.3.1
|
248 |
+
typeguard==4.3.0
|
wandb/run-20250101_031915-9dphq5gk/files/wandb-metadata.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2025-01-01T03:19:15.936404Z",
|
5 |
+
"args": [
|
6 |
+
"--local_rank=0",
|
7 |
+
"--model_name_or_path",
|
8 |
+
"/data/align-anything/hantao/models/chameleon-7b",
|
9 |
+
"--train_datasets",
|
10 |
+
"/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized",
|
11 |
+
"--output_dir",
|
12 |
+
"/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
|
13 |
+
"--per_device_train_batch_size",
|
14 |
+
"8",
|
15 |
+
"--per_device_eval_batch_size",
|
16 |
+
"8",
|
17 |
+
"--gradient_accumulation_steps",
|
18 |
+
"4",
|
19 |
+
"--train_template",
|
20 |
+
"Chameleon_preference",
|
21 |
+
"--train_split",
|
22 |
+
"train",
|
23 |
+
"--train_data_files",
|
24 |
+
"q0_10_preference.pt",
|
25 |
+
"--learning_rate",
|
26 |
+
"1e-6",
|
27 |
+
"--epochs",
|
28 |
+
"3",
|
29 |
+
"--lr_scheduler_type",
|
30 |
+
"cosine",
|
31 |
+
"--save_interval",
|
32 |
+
"400"
|
33 |
+
],
|
34 |
+
"program": "-m align_anything.trainers.text_image_to_text_image.dpo",
|
35 |
+
"git": {
|
36 |
+
"remote": "https://github.com/PKU-Alignment/align-anything.git",
|
37 |
+
"commit": "6fde660afc9985323f147930eedf188a5699adc7"
|
38 |
+
},
|
39 |
+
"email": "[email protected]",
|
40 |
+
"root": "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
|
41 |
+
"host": "lyg0194",
|
42 |
+
"username": "align-anything",
|
43 |
+
"executable": "/data/align-anything/miniconda3/envs/hantao_stable/bin/python",
|
44 |
+
"cpu_count": 64,
|
45 |
+
"cpu_count_logical": 128,
|
46 |
+
"gpu": "[NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB]",
|
47 |
+
"gpu_count": 8,
|
48 |
+
"disk": {
|
49 |
+
"/": {
|
50 |
+
"total": "939477946368",
|
51 |
+
"used": "596692971520"
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"memory": {
|
55 |
+
"total": "1081823907840"
|
56 |
+
},
|
57 |
+
"cpu": {
|
58 |
+
"count": 64,
|
59 |
+
"countLogical": 128
|
60 |
+
},
|
61 |
+
"gpu_nvidia": [
|
62 |
+
{
|
63 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
64 |
+
"memoryTotal": "85899345920",
|
65 |
+
"cudaCores": 6912,
|
66 |
+
"architecture": "Ampere"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
70 |
+
"memoryTotal": "85899345920",
|
71 |
+
"cudaCores": 6912,
|
72 |
+
"architecture": "Ampere"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
76 |
+
"memoryTotal": "85899345920",
|
77 |
+
"cudaCores": 6912,
|
78 |
+
"architecture": "Ampere"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
82 |
+
"memoryTotal": "85899345920",
|
83 |
+
"cudaCores": 6912,
|
84 |
+
"architecture": "Ampere"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
88 |
+
"memoryTotal": "85899345920",
|
89 |
+
"cudaCores": 6912,
|
90 |
+
"architecture": "Ampere"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
94 |
+
"memoryTotal": "85899345920",
|
95 |
+
"cudaCores": 6912,
|
96 |
+
"architecture": "Ampere"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
100 |
+
"memoryTotal": "85899345920",
|
101 |
+
"cudaCores": 6912,
|
102 |
+
"architecture": "Ampere"
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
106 |
+
"memoryTotal": "85899345920",
|
107 |
+
"cudaCores": 6912,
|
108 |
+
"architecture": "Ampere"
|
109 |
+
}
|
110 |
+
],
|
111 |
+
"cudaVersion": "12.4"
|
112 |
+
}
|
wandb/run-20250101_031915-9dphq5gk/logs/debug-internal.log
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-01-01T03:19:15.945723272Z","level":"INFO","msg":"using version","core version":"0.18.3"}
|
2 |
+
{"time":"2025-01-01T03:19:15.945753959Z","level":"INFO","msg":"created symlink","path":"/data/align-anything/hantao/align-anything/outputs/mm_interp/q0_10_preference/wandb/run-20250101_031915-9dphq5gk/logs/debug-core.log"}
|
3 |
+
{"time":"2025-01-01T03:19:15.949438002Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
|
4 |
+
{"time":"2025-01-01T03:19:15.976027597Z","level":"INFO","msg":"created new stream","id":"9dphq5gk"}
|
5 |
+
{"time":"2025-01-01T03:19:15.976056272Z","level":"INFO","msg":"stream: started","id":"9dphq5gk"}
|
6 |
+
{"time":"2025-01-01T03:19:15.976078609Z","level":"INFO","msg":"sender: started","stream_id":{"value":"9dphq5gk"}}
|
7 |
+
{"time":"2025-01-01T03:19:15.976082856Z","level":"INFO","msg":"handler: started","stream_id":{"value":"9dphq5gk"}}
|
8 |
+
{"time":"2025-01-01T03:19:15.976077521Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9dphq5gk"}}
|
9 |
+
{"time":"2025-01-01T03:19:16.581967595Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
|
10 |
+
{"time":"2025-01-01T03:19:16.586131154Z","level":"INFO","msg":"Starting system monitor"}
|
wandb/run-20250101_031915-9dphq5gk/logs/debug.log
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
|
2 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Configure stats pid to 646328
|
3 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
|
4 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
|
5 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'online'}
|
6 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
7 |
+
2025-01-01 03:19:15,933 WARNING MainThread:646328 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
|
8 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
|
9 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:_log_setup():532] Logging user logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_031915-9dphq5gk/logs/debug.log
|
11 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:_log_setup():533] Logging internal logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_031915-9dphq5gk/logs/debug-internal.log
|
12 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():617] calling init triggers
|
13 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
|
14 |
+
config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 8.0, 'per_device_eval_batch_size': 8.0, 'gradient_accumulation_steps': 4.0, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'q0_10_preference.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/chameleon-7b', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
|
15 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():667] starting backend
|
16 |
+
2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():671] sending inform_init request
|
17 |
+
2025-01-01 03:19:15,935 INFO MainThread:646328 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2025-01-01 03:19:15,936 INFO MainThread:646328 [wandb_init.py:init():684] backend started and connected
|
19 |
+
2025-01-01 03:19:15,938 INFO MainThread:646328 [wandb_init.py:init():779] updated telemetry
|
20 |
+
2025-01-01 03:19:15,998 INFO MainThread:646328 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
|
21 |
+
2025-01-01 03:19:16,578 INFO MainThread:646328 [wandb_init.py:init():863] starting run threads in backend
|
22 |
+
2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_console_start():2465] atexit reg
|
23 |
+
2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_redirect():2313] redirect: wrap_raw
|
24 |
+
2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_redirect():2378] Wrapping output streams.
|
25 |
+
2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_redirect():2403] Redirects installed.
|
26 |
+
2025-01-01 03:19:17,199 INFO MainThread:646328 [wandb_init.py:init():907] run started, returning control to user process
|
wandb/run-20250101_031915-9dphq5gk/run-9dphq5gk.wandb
ADDED
File without changes
|
wandb/run-20250101_032225-2bzz3n13/files/config.yaml
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_wandb:
|
2 |
+
value:
|
3 |
+
cli_version: 0.18.3
|
4 |
+
m: []
|
5 |
+
python_version: 3.11.10
|
6 |
+
t:
|
7 |
+
"1":
|
8 |
+
- 1
|
9 |
+
- 11
|
10 |
+
- 41
|
11 |
+
- 49
|
12 |
+
- 51
|
13 |
+
- 55
|
14 |
+
- 71
|
15 |
+
- 83
|
16 |
+
- 98
|
17 |
+
- 105
|
18 |
+
"2":
|
19 |
+
- 1
|
20 |
+
- 11
|
21 |
+
- 41
|
22 |
+
- 49
|
23 |
+
- 51
|
24 |
+
- 55
|
25 |
+
- 71
|
26 |
+
- 83
|
27 |
+
- 98
|
28 |
+
- 105
|
29 |
+
"3":
|
30 |
+
- 2
|
31 |
+
- 13
|
32 |
+
- 16
|
33 |
+
- 23
|
34 |
+
- 55
|
35 |
+
- 61
|
36 |
+
"4": 3.11.10
|
37 |
+
"5": 0.18.3
|
38 |
+
"6": 4.45.2
|
39 |
+
"8":
|
40 |
+
- 5
|
41 |
+
"12": 0.18.3
|
42 |
+
"13": linux-x86_64
|
43 |
+
data_cfgs:
|
44 |
+
value:
|
45 |
+
eval_data_files: null
|
46 |
+
eval_datasets: null
|
47 |
+
eval_optional_args: []
|
48 |
+
eval_size: null
|
49 |
+
eval_split: null
|
50 |
+
eval_subset: null
|
51 |
+
eval_template: null
|
52 |
+
train_data_files: q0_10_preference.pt
|
53 |
+
train_datasets: /data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized
|
54 |
+
train_optional_args: []
|
55 |
+
train_size: null
|
56 |
+
train_split: train
|
57 |
+
train_subset: null
|
58 |
+
train_template: Chameleon_preference
|
59 |
+
logger_cfgs:
|
60 |
+
value:
|
61 |
+
cache_dir: null
|
62 |
+
log_project: align-anything
|
63 |
+
log_run_name: dpo
|
64 |
+
log_type: wandb
|
65 |
+
output_dir: /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference
|
66 |
+
save_interval: 400
|
67 |
+
model_cfgs:
|
68 |
+
value:
|
69 |
+
model_max_length: 4096
|
70 |
+
model_name_or_path: /data/align-anything/hantao/models/chameleon-7b
|
71 |
+
trust_remote_code: true
|
72 |
+
special_tokens:
|
73 |
+
value: null
|
74 |
+
train_cfgs:
|
75 |
+
value:
|
76 |
+
adam_betas:
|
77 |
+
- 0.9
|
78 |
+
- 0.95
|
79 |
+
bf16: true
|
80 |
+
ds_cfgs: ds_z3_config.json
|
81 |
+
epochs: 3
|
82 |
+
eval_interval: 10
|
83 |
+
eval_strategy: epoch
|
84 |
+
fp16: false
|
85 |
+
freeze_language_model: true
|
86 |
+
freeze_mm_proj: true
|
87 |
+
freeze_vision_tower: false
|
88 |
+
gradient_accumulation_steps: 2
|
89 |
+
gradient_checkpointing: true
|
90 |
+
learning_rate: 1e-06
|
91 |
+
lr_scheduler_type: cosine
|
92 |
+
lr_warmup_ratio: 0.03
|
93 |
+
per_device_eval_batch_size: 4
|
94 |
+
per_device_train_batch_size: 4
|
95 |
+
regularization: 0.001
|
96 |
+
scale_coeff: 0.1
|
97 |
+
seed: 42
|
98 |
+
weight_decay: 0.01
|
wandb/run-20250101_032225-2bzz3n13/files/output.log
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
***** Running training *****
|
2 |
+
Training 1/3.0 epoch: 0%| | 0/357.0 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
|
3 |
+
Training 3/3.0 epoch (loss 0.0012): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 357/357.0 [54:49<00:00, 9.21s/it]
|
4 |
+
[2025-01-01 03:25:50,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=0, lr=[9.979871469976195e-07, 9.979871469976195e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
5 |
+
[2025-01-01 03:27:24,141] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
6 |
+
[2025-01-01 03:28:03,912] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
7 |
+
[2025-01-01 03:28:59,279] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=0, lr=[9.819814303479267e-07, 9.819814303479267e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
8 |
+
[2025-01-01 03:29:39,906] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
9 |
+
[2025-01-01 03:32:04,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=0, lr=[9.504844339512094e-07, 9.504844339512094e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
10 |
+
[2025-01-01 03:34:45,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=0, lr=[9.045084971874737e-07, 9.045084971874737e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
11 |
+
[2025-01-01 03:35:12,727] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
12 |
+
[2025-01-01 03:38:11,474] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=0, lr=[8.455313244934324e-07, 8.455313244934324e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
13 |
+
[2025-01-01 03:41:02,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=0, lr=[7.754484907260512e-07, 7.754484907260512e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
14 |
+
[2025-01-01 03:44:21,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=0, lr=[6.965125158269618e-07, 6.965125158269618e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
15 |
+
[2025-01-01 03:45:34,501] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
16 |
+
[2025-01-01 03:46:26,012] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
17 |
+
[2025-01-01 03:47:20,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=0, lr=[6.112604669781572e-07, 6.112604669781572e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
18 |
+
[2025-01-01 03:48:01,672] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
19 |
+
[2025-01-01 03:50:25,146] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=0, lr=[5.224324151752575e-07, 5.224324151752575e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
20 |
+
[2025-01-01 03:53:13,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=0, lr=[4.328833670911724e-07, 4.328833670911724e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
21 |
+
[2025-01-01 03:53:40,009] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
22 |
+
[2025-01-01 03:56:31,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=0, lr=[3.454915028125263e-07, 3.454915028125263e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
23 |
+
[2025-01-01 03:59:21,373] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=0, lr=[2.6306566876350067e-07, 2.6306566876350067e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
24 |
+
[2025-01-01 04:02:48,482] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=0, lr=[1.8825509907063326e-07, 1.8825509907063326e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
25 |
+
[2025-01-01 04:03:53,943] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
26 |
+
[2025-01-01 04:04:34,261] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
27 |
+
[2025-01-01 04:05:49,032] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=0, lr=[1.2346426699819456e-07, 1.2346426699819456e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
|
28 |
+
[2025-01-01 04:06:10,044] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
29 |
+
[2025-01-01 04:08:45,003] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=0, lr=[7.077560319906694e-08, 7.077560319906694e-08], mom=[[0.9, 0.95], [0.9, 0.95]]
|
30 |
+
[2025-01-01 04:11:41,549] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
31 |
+
[2025-01-01 04:11:41,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=0, lr=[3.188256468013139e-08, 3.188256468013139e-08], mom=[[0.9, 0.95], [0.9, 0.95]]
|
32 |
+
[2025-01-01 04:14:59,506] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=0, lr=[8.035205700685165e-09, 8.035205700685165e-09], mom=[[0.9, 0.95], [0.9, 0.95]]
|
33 |
+
Saving model to "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference" ...
|
34 |
+
Saving 16-bit model...
|
35 |
+
[2025-01-01 04:17:25,228] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step178 is about to be saved!
|
36 |
+
[2025-01-01 04:17:25,229] [INFO] [engine.py:3649:save_16bit_model] Saving model weights to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin, tag: global_step178
|
37 |
+
[2025-01-01 04:17:25,229] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin...
|
38 |
+
[2025-01-01 04:17:45,184] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin.
|
39 |
+
[2025-01-01 04:17:45,185] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step178 is ready now!
|
40 |
+
Model saved!
|
41 |
+
Saving 16-bit model...
|
42 |
+
[2025-01-01 04:17:52,182] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step178 is about to be saved!
|
43 |
+
[2025-01-01 04:17:52,183] [INFO] [engine.py:3649:save_16bit_model] Saving model weights to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin, tag: global_step178
|
44 |
+
[2025-01-01 04:17:52,183] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin...
|
45 |
+
[2025-01-01 04:18:12,699] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin.
|
46 |
+
[2025-01-01 04:18:12,701] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step178 is ready now!
|
47 |
+
Model saved!
|
wandb/run-20250101_032225-2bzz3n13/files/requirements.txt
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
align-anything==0.0.1.dev0
|
2 |
+
gitdb==4.0.11
|
3 |
+
wcwidth==0.2.13
|
4 |
+
identify==2.6.1
|
5 |
+
tomlkit==0.12.0
|
6 |
+
bitsandbytes==0.44.1
|
7 |
+
trl==0.9.6
|
8 |
+
pytest-split==0.8.0
|
9 |
+
gradio==4.44.1
|
10 |
+
pip==24.2
|
11 |
+
multidict==6.1.0
|
12 |
+
fairscale==0.4.13
|
13 |
+
mistral_common==1.4.4
|
14 |
+
python-dotenv==1.0.1
|
15 |
+
uvloop==0.20.0
|
16 |
+
absl-py==2.1.0
|
17 |
+
tiktoken==0.7.0
|
18 |
+
pydub==0.25.1
|
19 |
+
websockets==12.0
|
20 |
+
llamafactory==0.9.1.dev0
|
21 |
+
triton==3.0.0
|
22 |
+
tifffile==2024.9.20
|
23 |
+
safe-rlhf==0.0.1.dev0
|
24 |
+
pandas==2.2.3
|
25 |
+
grpcio==1.66.2
|
26 |
+
click==8.1.7
|
27 |
+
ninja==1.11.1.1
|
28 |
+
rich==13.9.2
|
29 |
+
Jinja2==3.1.4
|
30 |
+
Pygments==2.18.0
|
31 |
+
nvidia-cudnn-cu12==9.1.0.70
|
32 |
+
importlib_resources==6.4.5
|
33 |
+
GitPython==3.1.43
|
34 |
+
nvidia-cufft-cu12==11.0.2.54
|
35 |
+
tensorboard-data-server==0.7.2
|
36 |
+
align-anything==0.0.1.dev0
|
37 |
+
six==1.16.0
|
38 |
+
scipy==1.14.1
|
39 |
+
mpmath==1.3.0
|
40 |
+
jsonschema-specifications==2024.10.1
|
41 |
+
scikit-image==0.24.0
|
42 |
+
zipp==3.20.2
|
43 |
+
cycler==0.12.1
|
44 |
+
MarkupSafe==2.1.5
|
45 |
+
tzdata==2024.2
|
46 |
+
idna==3.10
|
47 |
+
pycountry==24.6.1
|
48 |
+
nvidia-nccl-cu12==2.20.5
|
49 |
+
matplotlib==3.9.2
|
50 |
+
pytz==2024.2
|
51 |
+
uvicorn==0.31.1
|
52 |
+
dill==0.3.8
|
53 |
+
pyparsing==3.1.4
|
54 |
+
pytest==7.2.0
|
55 |
+
jiter==0.6.1
|
56 |
+
safetensors==0.4.5
|
57 |
+
typing_extensions==4.12.2
|
58 |
+
decorator==4.4.2
|
59 |
+
typeguard==4.4.1
|
60 |
+
prometheus_client==0.21.0
|
61 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
62 |
+
sentencepiece==0.2.0
|
63 |
+
requests==2.32.3
|
64 |
+
kiwisolver==1.4.7
|
65 |
+
gdown==5.2.0
|
66 |
+
multiprocess==0.70.16
|
67 |
+
xxhash==3.5.0
|
68 |
+
PyYAML==6.0.2
|
69 |
+
gguf==0.10.0
|
70 |
+
nvidia-nvtx-cu12==12.1.105
|
71 |
+
hpsv2==1.2.0
|
72 |
+
tensorboard==2.18.0
|
73 |
+
nodeenv==1.9.1
|
74 |
+
filelock==3.16.1
|
75 |
+
distro==1.9.0
|
76 |
+
scikit-learn==1.5.2
|
77 |
+
huggingface-hub==0.25.2
|
78 |
+
pyairports==2.1.1
|
79 |
+
importlib_metadata==8.5.0
|
80 |
+
pyarrow==17.0.0
|
81 |
+
llvmlite==0.43.0
|
82 |
+
ray==2.37.0
|
83 |
+
tokenizers==0.20.3
|
84 |
+
nvidia-nvjitlink-cu12==12.6.77
|
85 |
+
av==14.0.1
|
86 |
+
deepspeed==0.15.2
|
87 |
+
clip==0.2.0
|
88 |
+
shtab==1.7.1
|
89 |
+
certifi==2024.8.30
|
90 |
+
braceexpand==0.1.7
|
91 |
+
nvidia-ml-py==12.560.30
|
92 |
+
webdataset==0.2.100
|
93 |
+
docker-pycreds==0.4.0
|
94 |
+
einops==0.8.0
|
95 |
+
iniconfig==2.0.0
|
96 |
+
tyro==0.9.2
|
97 |
+
torchvision==0.19.0
|
98 |
+
accelerate==0.34.2
|
99 |
+
beautifulsoup4==4.12.3
|
100 |
+
pyzmq==26.2.0
|
101 |
+
pycparser==2.22
|
102 |
+
nvidia-curand-cu12==10.3.2.106
|
103 |
+
msgpack==1.1.0
|
104 |
+
soxr==0.5.0.post1
|
105 |
+
platformdirs==4.3.6
|
106 |
+
h11==0.14.0
|
107 |
+
psutil==6.0.0
|
108 |
+
pydantic==2.9.2
|
109 |
+
shellingham==1.5.4
|
110 |
+
imageio-ffmpeg==0.5.1
|
111 |
+
wandb==0.18.3
|
112 |
+
audioread==3.0.1
|
113 |
+
annotated-types==0.7.0
|
114 |
+
docstring_parser==0.16
|
115 |
+
cloudpickle==3.1.0
|
116 |
+
regex==2024.9.11
|
117 |
+
packaging==24.1
|
118 |
+
timm==0.6.13
|
119 |
+
aiosignal==1.3.1
|
120 |
+
numba==0.60.0
|
121 |
+
orjson==3.10.7
|
122 |
+
rpds-py==0.20.0
|
123 |
+
virtualenv==20.26.6
|
124 |
+
joblib==1.4.2
|
125 |
+
charset-normalizer==3.4.0
|
126 |
+
httpx==0.27.2
|
127 |
+
ffmpy==0.4.0
|
128 |
+
lm-format-enforcer==0.10.6
|
129 |
+
yt-dlp==2024.8.6
|
130 |
+
sympy==1.13.3
|
131 |
+
python-dateutil==2.9.0.post0
|
132 |
+
nvidia-cusolver-cu12==11.4.5.107
|
133 |
+
msgspec==0.18.6
|
134 |
+
mdurl==0.1.2
|
135 |
+
torch==2.4.0
|
136 |
+
fastapi==0.115.0
|
137 |
+
optree==0.13.0
|
138 |
+
PySocks==1.7.1
|
139 |
+
transformers==4.46.0.dev0
|
140 |
+
torchlibrosa==0.1.0
|
141 |
+
fsspec==2024.6.1
|
142 |
+
nvidia-cublas-cu12==12.1.3.1
|
143 |
+
gradio_client==1.3.0
|
144 |
+
args==0.1.0
|
145 |
+
cffi==1.17.1
|
146 |
+
fonttools==4.54.1
|
147 |
+
clint==0.5.1
|
148 |
+
lark==1.2.2
|
149 |
+
tqdm==4.66.5
|
150 |
+
semantic-version==2.10.0
|
151 |
+
pooch==1.8.2
|
152 |
+
markdown-it-py==3.0.0
|
153 |
+
pydantic_core==2.23.4
|
154 |
+
sniffio==1.3.1
|
155 |
+
httptools==0.6.1
|
156 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
157 |
+
anyio==4.6.0
|
158 |
+
ftfy==6.3.0
|
159 |
+
Markdown==3.7
|
160 |
+
datasets==2.21.0
|
161 |
+
diffusers==0.30.3
|
162 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
163 |
+
vllm==0.6.2
|
164 |
+
starlette==0.38.6
|
165 |
+
flash-attn==2.7.0.post2
|
166 |
+
urllib3==2.2.3
|
167 |
+
Werkzeug==3.0.4
|
168 |
+
py-cpuinfo==9.0.0
|
169 |
+
moviepy==1.0.3
|
170 |
+
librosa==0.10.2.post1
|
171 |
+
peft==0.12.0
|
172 |
+
soupsieve==2.6
|
173 |
+
lazy_loader==0.4
|
174 |
+
pluggy==1.5.0
|
175 |
+
setuptools==75.1.0
|
176 |
+
sentry-sdk==2.16.0
|
177 |
+
tabulate==0.9.0
|
178 |
+
transformers==4.45.2
|
179 |
+
pre_commit==4.0.1
|
180 |
+
termcolor==2.5.0
|
181 |
+
frechet-audio-distance==0.1.2
|
182 |
+
pytorch-fid==0.3.0
|
183 |
+
setproctitle==1.3.3
|
184 |
+
jsonschema==4.23.0
|
185 |
+
aiofiles==23.2.1
|
186 |
+
contourpy==1.3.0
|
187 |
+
distlib==0.3.9
|
188 |
+
interegular==0.3.3
|
189 |
+
fire==0.7.0
|
190 |
+
diskcache==5.6.3
|
191 |
+
proglog==0.1.10
|
192 |
+
soundfile==0.12.1
|
193 |
+
protobuf==3.20.3
|
194 |
+
smmap==5.0.1
|
195 |
+
pycryptodomex==3.21.0
|
196 |
+
Brotli==1.1.0
|
197 |
+
pillow==10.4.0
|
198 |
+
frozenlist==1.4.1
|
199 |
+
numpy==1.26.4
|
200 |
+
mutagen==1.47.0
|
201 |
+
outlines==0.0.46
|
202 |
+
attrs==24.2.0
|
203 |
+
torchaudio==2.4.0
|
204 |
+
aiohttp==3.10.10
|
205 |
+
ruff==0.6.9
|
206 |
+
watchfiles==0.24.0
|
207 |
+
threadpoolctl==3.5.0
|
208 |
+
nest-asyncio==1.6.0
|
209 |
+
partial-json-parser==0.2.1.1.post4
|
210 |
+
sse-starlette==2.1.3
|
211 |
+
shortuuid==1.0.13
|
212 |
+
typer==0.12.5
|
213 |
+
prometheus-fastapi-instrumentator==7.0.0
|
214 |
+
imageio==2.35.1
|
215 |
+
wheel==0.44.0
|
216 |
+
image-reward==1.5
|
217 |
+
networkx==3.4.1
|
218 |
+
propcache==0.2.0
|
219 |
+
aiohappyeyeballs==2.4.3
|
220 |
+
nvidia-cusparse-cu12==12.1.0.106
|
221 |
+
xformers==0.0.27.post2
|
222 |
+
cfgv==3.4.0
|
223 |
+
python-multipart==0.0.12
|
224 |
+
httpcore==1.0.6
|
225 |
+
opencv-python==4.6.0.66
|
226 |
+
resampy==0.4.3
|
227 |
+
yarl==1.15.0
|
228 |
+
referencing==0.35.1
|
229 |
+
openai==1.51.2
|
230 |
+
hjson==3.1.0
|
231 |
+
llamafactory==0.9.1.dev0
|
232 |
+
jaraco.collections==5.1.0
|
233 |
+
backports.tarfile==1.2.0
|
234 |
+
more-itertools==10.3.0
|
235 |
+
wheel==0.43.0
|
236 |
+
importlib_metadata==8.0.0
|
237 |
+
zipp==3.19.2
|
238 |
+
autocommand==2.2.2
|
239 |
+
jaraco.functools==4.0.1
|
240 |
+
platformdirs==4.2.2
|
241 |
+
tomli==2.0.1
|
242 |
+
jaraco.text==3.12.1
|
243 |
+
typing_extensions==4.12.2
|
244 |
+
jaraco.context==5.3.0
|
245 |
+
importlib_resources==6.4.0
|
246 |
+
packaging==24.1
|
247 |
+
inflect==7.3.1
|
248 |
+
typeguard==4.3.0
|
wandb/run-20250101_032225-2bzz3n13/files/wandb-metadata.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2025-01-01T03:22:25.748812Z",
|
5 |
+
"args": [
|
6 |
+
"--local_rank=0",
|
7 |
+
"--model_name_or_path",
|
8 |
+
"/data/align-anything/hantao/models/chameleon-7b",
|
9 |
+
"--train_datasets",
|
10 |
+
"/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized",
|
11 |
+
"--output_dir",
|
12 |
+
"/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
|
13 |
+
"--per_device_train_batch_size",
|
14 |
+
"4",
|
15 |
+
"--per_device_eval_batch_size",
|
16 |
+
"4",
|
17 |
+
"--gradient_accumulation_steps",
|
18 |
+
"2",
|
19 |
+
"--train_template",
|
20 |
+
"Chameleon_preference",
|
21 |
+
"--train_split",
|
22 |
+
"train",
|
23 |
+
"--train_data_files",
|
24 |
+
"q0_10_preference.pt",
|
25 |
+
"--learning_rate",
|
26 |
+
"1e-6",
|
27 |
+
"--epochs",
|
28 |
+
"3",
|
29 |
+
"--lr_scheduler_type",
|
30 |
+
"cosine",
|
31 |
+
"--save_interval",
|
32 |
+
"400"
|
33 |
+
],
|
34 |
+
"program": "-m align_anything.trainers.text_image_to_text_image.dpo",
|
35 |
+
"git": {
|
36 |
+
"remote": "https://github.com/PKU-Alignment/align-anything.git",
|
37 |
+
"commit": "6fde660afc9985323f147930eedf188a5699adc7"
|
38 |
+
},
|
39 |
+
"email": "[email protected]",
|
40 |
+
"root": "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
|
41 |
+
"host": "lyg0194",
|
42 |
+
"username": "align-anything",
|
43 |
+
"executable": "/data/align-anything/miniconda3/envs/hantao_stable/bin/python",
|
44 |
+
"cpu_count": 64,
|
45 |
+
"cpu_count_logical": 128,
|
46 |
+
"gpu": "[NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB]",
|
47 |
+
"gpu_count": 8,
|
48 |
+
"disk": {
|
49 |
+
"/": {
|
50 |
+
"total": "939477946368",
|
51 |
+
"used": "596693139456"
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"memory": {
|
55 |
+
"total": "1081823907840"
|
56 |
+
},
|
57 |
+
"cpu": {
|
58 |
+
"count": 64,
|
59 |
+
"countLogical": 128
|
60 |
+
},
|
61 |
+
"gpu_nvidia": [
|
62 |
+
{
|
63 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
64 |
+
"memoryTotal": "85899345920",
|
65 |
+
"cudaCores": 6912,
|
66 |
+
"architecture": "Ampere"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
70 |
+
"memoryTotal": "85899345920",
|
71 |
+
"cudaCores": 6912,
|
72 |
+
"architecture": "Ampere"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
76 |
+
"memoryTotal": "85899345920",
|
77 |
+
"cudaCores": 6912,
|
78 |
+
"architecture": "Ampere"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
82 |
+
"memoryTotal": "85899345920",
|
83 |
+
"cudaCores": 6912,
|
84 |
+
"architecture": "Ampere"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
88 |
+
"memoryTotal": "85899345920",
|
89 |
+
"cudaCores": 6912,
|
90 |
+
"architecture": "Ampere"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
94 |
+
"memoryTotal": "85899345920",
|
95 |
+
"cudaCores": 6912,
|
96 |
+
"architecture": "Ampere"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
100 |
+
"memoryTotal": "85899345920",
|
101 |
+
"cudaCores": 6912,
|
102 |
+
"architecture": "Ampere"
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
106 |
+
"memoryTotal": "85899345920",
|
107 |
+
"cudaCores": 6912,
|
108 |
+
"architecture": "Ampere"
|
109 |
+
}
|
110 |
+
],
|
111 |
+
"cudaVersion": "12.4"
|
112 |
+
}
|
wandb/run-20250101_032225-2bzz3n13/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train/better_sample_reward":156.75074768066406,"train/worse_sample_reward":33.601295471191406,"train/reward":190.35205078125,"_wandb":{"runtime":3347},"_runtime":3347.002538272,"train/step":357,"train/loss":0.00119595427531749,"_step":357,"train/reward_margin":123.14945220947266,"train/lr":3.22238178339318e-10,"train/reward_accuracy":1,"train/epoch":3,"_timestamp":1.7357050364626048e+09}
|
wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2025-01-01T03:22:25.755777689Z","level":"INFO","msg":"using version","core version":"0.18.3"}
|
2 |
+
{"time":"2025-01-01T03:22:25.755807853Z","level":"INFO","msg":"created symlink","path":"/data/align-anything/hantao/align-anything/outputs/mm_interp/q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-core.log"}
|
3 |
+
{"time":"2025-01-01T03:22:25.758235027Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
|
4 |
+
{"time":"2025-01-01T03:22:25.778926827Z","level":"INFO","msg":"created new stream","id":"2bzz3n13"}
|
5 |
+
{"time":"2025-01-01T03:22:25.778989066Z","level":"INFO","msg":"stream: started","id":"2bzz3n13"}
|
6 |
+
{"time":"2025-01-01T03:22:25.779016519Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"2bzz3n13"}}
|
7 |
+
{"time":"2025-01-01T03:22:25.779052686Z","level":"INFO","msg":"sender: started","stream_id":{"value":"2bzz3n13"}}
|
8 |
+
{"time":"2025-01-01T03:22:25.779034819Z","level":"INFO","msg":"handler: started","stream_id":{"value":"2bzz3n13"}}
|
9 |
+
{"time":"2025-01-01T03:22:26.392432178Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
|
10 |
+
{"time":"2025-01-01T03:22:26.39641254Z","level":"INFO","msg":"Starting system monitor"}
|
11 |
+
{"time":"2025-01-01T04:18:12.751361379Z","level":"INFO","msg":"Stopping system monitor"}
|
12 |
+
{"time":"2025-01-01T04:18:12.776637037Z","level":"INFO","msg":"Stopped system monitor"}
|
13 |
+
{"time":"2025-01-01T04:18:13.326901072Z","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
14 |
+
{"time":"2025-01-01T04:18:13.326920848Z","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
15 |
+
{"time":"2025-01-01T04:18:14.470754716Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
16 |
+
{"time":"2025-01-01T04:18:16.234531428Z","level":"INFO","msg":"stream: closing","id":"2bzz3n13"}
|
17 |
+
{"time":"2025-01-01T04:18:16.234542406Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"2bzz3n13"}}
|
18 |
+
{"time":"2025-01-01T04:18:16.234551344Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"2bzz3n13"}}
|
19 |
+
{"time":"2025-01-01T04:18:16.23457812Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"2bzz3n13"}}
|
20 |
+
{"time":"2025-01-01T04:18:16.236373925Z","level":"INFO","msg":"stream: closed","id":"2bzz3n13"}
|
wandb/run-20250101_032225-2bzz3n13/logs/debug.log
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-01 03:22:25,741 INFO MainThread:650483 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
|
2 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Configure stats pid to 650483
|
3 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
|
4 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
|
5 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'online'}
|
6 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
7 |
+
2025-01-01 03:22:25,742 WARNING MainThread:650483 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
|
8 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
|
9 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():532] Logging user logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug.log
|
11 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():533] Logging internal logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log
|
12 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():617] calling init triggers
|
13 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
|
14 |
+
config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'q0_10_preference.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/chameleon-7b', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
|
15 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():667] starting backend
|
16 |
+
2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():671] sending inform_init request
|
17 |
+
2025-01-01 03:22:25,748 INFO MainThread:650483 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2025-01-01 03:22:25,748 INFO MainThread:650483 [wandb_init.py:init():684] backend started and connected
|
19 |
+
2025-01-01 03:22:25,758 INFO MainThread:650483 [wandb_init.py:init():779] updated telemetry
|
20 |
+
2025-01-01 03:22:25,803 INFO MainThread:650483 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
|
21 |
+
2025-01-01 03:22:26,388 INFO MainThread:650483 [wandb_init.py:init():863] starting run threads in backend
|
22 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_console_start():2465] atexit reg
|
23 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2313] redirect: wrap_raw
|
24 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2378] Wrapping output streams.
|
25 |
+
2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2403] Redirects installed.
|
26 |
+
2025-01-01 03:22:26,713 INFO MainThread:650483 [wandb_init.py:init():907] run started, returning control to user process
|
27 |
+
2025-01-01 04:18:12,748 INFO MainThread:650483 [wandb_run.py:_finish():2164] finishing run htlou/align-anything/2bzz3n13
|
28 |
+
2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
|
29 |
+
2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2410] restore
|
30 |
+
2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2416] restore done
|
31 |
+
2025-01-01 04:18:16,219 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4049] rendering history
|
32 |
+
2025-01-01 04:18:16,221 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
|
33 |
+
2025-01-01 04:18:16,232 INFO MainThread:650483 [wandb_run.py:_footer_sync_info():4008] logging synced files
|
wandb/run-20250101_032225-2bzz3n13/run-2bzz3n13.wandb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b24dc144b11274c9e88c6af74302af6ce2fbfcdcbbcbaab785ec88157d14e38f
|
3 |
+
size 3363509
|