htlou commited on
Commit
3c469b6
·
verified ·
1 Parent(s): dd7cde8

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20250101_032225-2bzz3n13/run-2bzz3n13.wandb filter=lfs diff=lfs merge=lfs -text
arguments.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_cfgs:
2
+ eval_data_files: null
3
+ eval_datasets: null
4
+ eval_optional_args: []
5
+ eval_size: null
6
+ eval_split: null
7
+ eval_subset: null
8
+ eval_template: null
9
+ train_data_files: q0_10_preference.pt
10
+ train_datasets: /data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized
11
+ train_optional_args: []
12
+ train_size: null
13
+ train_split: train
14
+ train_subset: null
15
+ train_template: Chameleon_preference
16
+ logger_cfgs:
17
+ cache_dir: null
18
+ log_project: align-anything
19
+ log_run_name: dpo
20
+ log_type: wandb
21
+ output_dir: /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference
22
+ save_interval: 400.0
23
+ model_cfgs:
24
+ model_max_length: 4096
25
+ model_name_or_path: /data/align-anything/hantao/models/chameleon-7b
26
+ trust_remote_code: true
27
+ special_tokens: null
28
+ train_cfgs:
29
+ adam_betas:
30
+ - 0.9
31
+ - 0.95
32
+ bf16: true
33
+ ds_cfgs: ds_z3_config.json
34
+ epochs: 3.0
35
+ eval_interval: 10
36
+ eval_strategy: epoch
37
+ fp16: false
38
+ freeze_language_model: true
39
+ freeze_mm_proj: true
40
+ freeze_vision_tower: false
41
+ gradient_accumulation_steps: 2.0
42
+ gradient_checkpointing: true
43
+ learning_rate: 1.0e-06
44
+ lr_scheduler_type: cosine
45
+ lr_warmup_ratio: 0.03
46
+ per_device_eval_batch_size: 4.0
47
+ per_device_train_batch_size: 4.0
48
+ regularization: 0.001
49
+ scale_coeff: 0.1
50
+ seed: 42
51
+ weight_decay: 0.01
config.json ADDED
The diff for this file is too large to render. See raw diff
 
environ.txt ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ADDR2LINE=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-addr2line
2
+ AR=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ar
3
+ AS=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-as
4
+ BROWSER=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/bin/helpers/browser.sh
5
+ BUILD=x86_64-conda-linux-gnu
6
+ CC=/data/align-anything/miniconda3/envs/hantao_stable/bin/gcc
7
+ CC_FOR_BUILD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-cc
8
+ CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
9
+ CMAKE_PREFIX_PATH=/data/align-anything/miniconda3/envs/jy-a:/data/align-anything/miniconda3/envs/jy-a/x86_64-conda-linux-gnu/sysroot/usr
10
+ COLORTERM=truecolor
11
+ CONDA_BACKUP_ADDR2LINE=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-addr2line
12
+ CONDA_BACKUP_AR=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ar
13
+ CONDA_BACKUP_AS=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-as
14
+ CONDA_BACKUP_BUILD=x86_64-conda-linux-gnu
15
+ CONDA_BACKUP_CC=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-cc
16
+ CONDA_BACKUP_CC_FOR_BUILD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-cc
17
+ CONDA_BACKUP_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -I/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/include
18
+ CONDA_BACKUP_CMAKE_PREFIX_PATH=/data/align-anything/miniconda3/envs/hantao_proxy:/data/align-anything/miniconda3/envs/hantao_proxy/x86_64-conda-linux-gnu/sysroot/usr
19
+ CONDA_BACKUP_CONDA_BUILD_SYSROOT=/data/align-anything/miniconda3/envs/hantao_proxy/x86_64-conda-linux-gnu/sysroot
20
+ CONDA_BACKUP_CONDA_TOOLCHAIN_BUILD=x86_64-conda-linux-gnu
21
+ CONDA_BACKUP_CONDA_TOOLCHAIN_HOST=x86_64-conda-linux-gnu
22
+ CONDA_BACKUP_CPP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-cpp
23
+ CONDA_BACKUP_CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -I/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/include
24
+ CONDA_BACKUP_CXX=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-c++
25
+ CONDA_BACKUP_CXXFILT=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-c++filt
26
+ CONDA_BACKUP_CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -I/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/include
27
+ CONDA_BACKUP_CXX_FOR_BUILD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-c++
28
+ CONDA_BACKUP_DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
29
+ CONDA_BACKUP_DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include
30
+ CONDA_BACKUP_DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/hantao_proxy/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
31
+ CONDA_BACKUP_DWP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-dwp
32
+ CONDA_BACKUP_ELFEDIT=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-elfedit
33
+ CONDA_BACKUP_GCC=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc
34
+ CONDA_BACKUP_GCC_AR=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc-ar
35
+ CONDA_BACKUP_GCC_NM=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc-nm
36
+ CONDA_BACKUP_GCC_RANLIB=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gcc-ranlib
37
+ CONDA_BACKUP_GPROF=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-gprof
38
+ CONDA_BACKUP_GXX=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-g++
39
+ CONDA_BACKUP_HOST=x86_64-conda-linux-gnu
40
+ CONDA_BACKUP_LD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ld
41
+ CONDA_BACKUP_LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/hantao_proxy/lib -L/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/hantao_proxy/lib -L/data/align-anything/miniconda3/envs/hantao_proxy/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs -L/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/hantao_proxy/targets/x86_64-linux/lib/stubs
42
+ CONDA_BACKUP_LD_GOLD=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ld.gold
43
+ CONDA_BACKUP_MESON_ARGS=-Dbuildtype=release
44
+ CONDA_BACKUP_NM=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-nm
45
+ CONDA_BACKUP_OBJCOPY=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-objcopy
46
+ CONDA_BACKUP_OBJDUMP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-objdump
47
+ CONDA_BACKUP_RANLIB=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-ranlib
48
+ CONDA_BACKUP_READELF=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-readelf
49
+ CONDA_BACKUP_SIZE=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-size
50
+ CONDA_BACKUP_STRINGS=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-strings
51
+ CONDA_BACKUP_STRIP=/data/align-anything/miniconda3/envs/hantao_proxy/bin/x86_64-conda-linux-gnu-strip
52
+ CONDA_BACKUP__CONDA_PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_x86_64_conda_cos6_linux_gnu
53
+ CONDA_BACKUP_build_alias=x86_64-conda-linux-gnu
54
+ CONDA_BACKUP_host_alias=x86_64-conda-linux-gnu
55
+ CONDA_BUILD_SYSROOT=/data/align-anything/miniconda3/envs/jy-a/x86_64-conda-linux-gnu/sysroot
56
+ CONDA_DEFAULT_ENV=hantao_stable
57
+ CONDA_EXE=/data/align-anything/miniconda3/bin/conda
58
+ CONDA_PREFIX=/data/align-anything/miniconda3/envs/hantao_stable
59
+ CONDA_PREFIX_1=/home/align-anything/miniconda3
60
+ CONDA_PREFIX_10=/data/align-anything/miniconda3/envs/hantao_proxy
61
+ CONDA_PREFIX_2=/data/align-anything/miniconda3/envs/jy-a
62
+ CONDA_PREFIX_3=/data/align-anything/miniconda3
63
+ CONDA_PREFIX_4=/data/align-anything/miniconda3/envs/hantao_stable
64
+ CONDA_PREFIX_5=/data/align-anything/miniconda3/envs/hantao_cham
65
+ CONDA_PREFIX_6=/data/align-anything/miniconda3/envs/hantao_stable
66
+ CONDA_PREFIX_7=/data/align-anything/miniconda3/envs/hantao_stream
67
+ CONDA_PREFIX_8=/data/align-anything/miniconda3/envs/hantao_proxy
68
+ CONDA_PREFIX_9=/data/align-anything/miniconda3/envs/hantao_stable
69
+ CONDA_PROMPT_MODIFIER=(hantao_stable)
70
+ CONDA_PYTHON_EXE=/data/align-anything/miniconda3/bin/python
71
+ CONDA_ROOT=/home/align-anything/miniconda3
72
+ CONDA_SHLVL=11
73
+ CONDA_TOOLCHAIN_BUILD=x86_64-conda-linux-gnu
74
+ CONDA_TOOLCHAIN_HOST=x86_64-conda-linux-gnu
75
+ CPP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-cpp
76
+ CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
77
+ CROSS_RANK=0
78
+ CROSS_SIZE=1
79
+ CUDA_MODULE_LOADING=LAZY
80
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
81
+ CXX=/data/align-anything/miniconda3/envs/hantao_stable/bin/g++
82
+ CXXFILT=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-c++filt
83
+ CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -I/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/include -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
84
+ CXX_FOR_BUILD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-c++
85
+ DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/2000/bus
86
+ DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
87
+ DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include -D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /data/align-anything/miniconda3/envs/jy-a/include
88
+ DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /data/align-anything/miniconda3/envs/jy-a/include
89
+ DWP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-dwp
90
+ ELFEDIT=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-elfedit
91
+ GCC=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc
92
+ GCC_AR=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc-ar
93
+ GCC_NM=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc-nm
94
+ GCC_RANLIB=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gcc-ranlib
95
+ GIT_ASKPASS=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/extensions/git/dist/askpass.sh
96
+ GPROF=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-gprof
97
+ GXX=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-g++
98
+ HOME=/home/align-anything
99
+ HOST=x86_64-conda-linux-gnu
100
+ LANG=en_US.UTF-8
101
+ LD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ld
102
+ LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/data/align-anything/miniconda3/envs/jy-a/lib -Wl,-rpath-link,/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib -L/data/align-anything/miniconda3/envs/jy-a/targets/x86_64-linux/lib/stubs
103
+ LD_GOLD=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ld.gold
104
+ LD_LIBRARY_PATH=/data/align-anything/miniconda3/envs/hantao_stable/lib/python3.11/site-packages/cv2/../../lib64:
105
+ LESSCLOSE=/usr/bin/lesspipe %s %s
106
+ LESSOPEN=| /usr/bin/lesspipe %s
107
+ LOCAL_RANK=0
108
+ LOCAL_SIZE=8
109
+ LOGLEVEL=WARNING
110
+ LOGNAME=align-anything
111
+ LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:
112
+ MASTER_ADDR=127.0.0.1
113
+ MASTER_PORT=14373
114
+ MOTD_SHOWN=pam
115
+ NM=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-nm
116
+ NVCC_PREPEND_FLAGS= -ccbin=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-c++
117
+ OBJCOPY=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-objcopy
118
+ OBJDUMP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-objdump
119
+ OLDPWD=/data/align-anything/hantao/LLaMA-Factory
120
+ PATH=/data/align-anything/miniconda3/envs/hantao_stable/bin:/data/align-anything/miniconda3/envs/hantao_stable/bin:/data/align-anything/miniconda3/bin:/data/align-anything/miniconda3/condabin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
121
+ PWD=/data/align-anything/hantao/align-anything/scripts
122
+ PYGAME_HIDE_SUPPORT_PROMPT=1
123
+ PYTHONHASHSEED=42
124
+ PYTHONPATH=/data/align-anything/hantao/align-anything
125
+ QT_QPA_FONTDIR=/data/align-anything/miniconda3/envs/hantao_stable/lib/python3.11/site-packages/cv2/qt/fonts
126
+ QT_QPA_PLATFORM_PLUGIN_PATH=/data/align-anything/miniconda3/envs/hantao_stable/lib/python3.11/site-packages/cv2/qt/plugins
127
+ RANK=0
128
+ RANLIB=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-ranlib
129
+ READELF=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-readelf
130
+ SHELL=/bin/bash
131
+ SHLVL=3
132
+ SIZE=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-size
133
+ SSH_CLIENT=117.136.0.149 36325 30400
134
+ SSH_CONNECTION=111.205.232.251 37945 10.10.212.194 30400
135
+ SSL_CERT_DIR=/usr/lib/ssl/certs
136
+ SSL_CERT_FILE=/usr/lib/ssl/certs/ca-certificates.crt
137
+ STRINGS=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-strings
138
+ STRIP=/data/align-anything/miniconda3/envs/jy-a/bin/x86_64-conda-linux-gnu-strip
139
+ TERM=screen
140
+ TERM_PROGRAM=vscode
141
+ TERM_PROGRAM_VERSION=0.41.3
142
+ TMUX=/tmp/tmux-2000/default,34082,51
143
+ TMUX_PANE=%59
144
+ TRITON_CACHE_DIR=/home/align-anything/cache/triton
145
+ USER=align-anything
146
+ VSCODE_GIT_ASKPASS_EXTRA_ARGS=
147
+ VSCODE_GIT_ASKPASS_MAIN=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/extensions/git/dist/askpass-main.js
148
+ VSCODE_GIT_ASKPASS_NODE=/home/align-anything/.cursor-server/cli/servers/Stable-51c8aff7cb5a89f4a0e462fbacab938bdbfaf140/server/node
149
+ VSCODE_GIT_IPC_HANDLE=/run/user/2000/vscode-git-ef8058c264.sock
150
+ VSCODE_IPC_HOOK_CLI=/run/user/2000/vscode-ipc-db013265-9a8a-4fb7-ba94-00b66d808feb.sock
151
+ WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
152
+ WANDB_MODE=online
153
+ WANDB_SERVICE=2-650483-tcp-localhost-34395
154
+ WORLD_SIZE=8
155
+ XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
156
+ XDG_RUNTIME_DIR=/run/user/2000
157
+ XDG_SESSION_CLASS=user
158
+ XDG_SESSION_ID=11
159
+ XDG_SESSION_TYPE=tty
160
+ _=/data/align-anything/miniconda3/envs/hantao_stable/bin/deepspeed
161
+ _CE_CONDA=
162
+ _CE_M=
163
+ build_alias=x86_64-conda-linux-gnu
164
+ host_alias=x86_64-conda-linux-gnu
preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 512,
4
+ "width": 512
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 1.0,
13
+ 1.0,
14
+ 1.0
15
+ ],
16
+ "image_processor_type": "ChameleonImageProcessor",
17
+ "image_std": [
18
+ 1.0,
19
+ 1.0,
20
+ 1.0
21
+ ],
22
+ "processor_class": "ChameleonProcessor",
23
+ "resample": 1,
24
+ "rescale_factor": 0.0078,
25
+ "size": {
26
+ "shortest_edge": 512
27
+ }
28
+ }
processor_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 1024,
3
+ "image_token": "<image>",
4
+ "processor_class": "ChameleonProcessor"
5
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5029034ff3e67f17bc3721362121885c6ce954d0df00e82bc982d91ef3c507f
3
+ size 14086364170
script.sh ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Copyright 2024 PKU-Alignment Team. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # ==============================================================================
17
+
18
+ export CC=/data/align-anything/miniconda3/envs/hantao_stable/bin/gcc
19
+ export CXX=/data/align-anything/miniconda3/envs/hantao_stable/bin/g++
20
+
21
+ export TRITON_CACHE_DIR="/home/align-anything/cache/triton"
22
+
23
+ export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
24
+ export WANDB_MODE=online
25
+
26
+ MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/chameleon-7b"
27
+
28
+ DATASET_PATH=(
29
+ "/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized"
30
+ "/data/align-anything/hantao/data/mm_interp/AA_preference_cosi_new_step10/tokenized"
31
+ "/data/align-anything/hantao/data/mm_interp/AA_preference_l0_new_step10/tokenized"
32
+ "/data/align-anything/hantao/data/mm_interp/AA_preference_random/tokenized"
33
+ )
34
+
35
+ DATASET_NAME=(
36
+ "q0_10_preference"
37
+ "q0_20_preference"
38
+ "q0_30_preference"
39
+ "q0_40_preference"
40
+ "q0_50_preference"
41
+ "q0_60_preference"
42
+ "q0_70_preference"
43
+ "q0_80_preference"
44
+ "q0_90_preference"
45
+ )
46
+
47
+ OUTPUT_PATH="/data/align-anything/hantao/align-anything/outputs/mm_interp"
48
+ mkdir -p $OUTPUT_PATH
49
+
50
+ # Initialize variables
51
+
52
+ for dataset_path in ${DATASET_PATH[@]}; do
53
+ for dataset_name in ${DATASET_NAME[@]}; do
54
+ TRAIN_DATASETS=$dataset_path
55
+
56
+ # dataset middle name
57
+ middle_name= echo "$dataset_path" | awk -F'/' '{print $(NF-1)}'
58
+ OUTPUT_DIR=$OUTPUT_PATH/$middle_name/$dataset_name
59
+ mkdir -p $OUTPUT_DIR
60
+ echo "Training on $TRAIN_DATASETS, output to $OUTPUT_DIR"
61
+ # Source the setup script
62
+ source ./setup.sh
63
+
64
+ # Execute deepspeed command
65
+ deepspeed \
66
+ --master_port ${MASTER_PORT} \
67
+ --module align_anything.trainers.text_image_to_text_image.dpo \
68
+ --model_name_or_path ${MODEL_NAME_OR_PATH} \
69
+ --train_datasets ${TRAIN_DATASETS} \
70
+ --output_dir ${OUTPUT_DIR} \
71
+ --per_device_train_batch_size 4 \
72
+ --per_device_eval_batch_size 4 \
73
+ --gradient_accumulation_steps 2 \
74
+ --train_template Chameleon_preference \
75
+ --train_split train \
76
+ --train_data_files ${dataset_name}.pt \
77
+ --learning_rate 1e-6 \
78
+ --epochs 3 \
79
+ --lr_scheduler_type cosine \
80
+ --save_interval 400
81
+
82
+ bash /data/align-anything/hantao/align-anything/outputs/cut.sh $OUTPUT_DIR
83
+ done
84
+ done
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "<reserved08706>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-01T03:22:25.755777689Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2025-01-01T03:22:25.755807853Z","level":"INFO","msg":"created symlink","path":"/data/align-anything/hantao/align-anything/outputs/mm_interp/q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-core.log"}
3
+ {"time":"2025-01-01T03:22:25.758235027Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
4
+ {"time":"2025-01-01T03:22:25.778926827Z","level":"INFO","msg":"created new stream","id":"2bzz3n13"}
5
+ {"time":"2025-01-01T03:22:25.778989066Z","level":"INFO","msg":"stream: started","id":"2bzz3n13"}
6
+ {"time":"2025-01-01T03:22:25.779016519Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"2bzz3n13"}}
7
+ {"time":"2025-01-01T03:22:25.779052686Z","level":"INFO","msg":"sender: started","stream_id":{"value":"2bzz3n13"}}
8
+ {"time":"2025-01-01T03:22:25.779034819Z","level":"INFO","msg":"handler: started","stream_id":{"value":"2bzz3n13"}}
9
+ {"time":"2025-01-01T03:22:26.392432178Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
10
+ {"time":"2025-01-01T03:22:26.39641254Z","level":"INFO","msg":"Starting system monitor"}
11
+ {"time":"2025-01-01T04:18:12.751361379Z","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-01-01T04:18:12.776637037Z","level":"INFO","msg":"Stopped system monitor"}
13
+ {"time":"2025-01-01T04:18:13.326901072Z","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
14
+ {"time":"2025-01-01T04:18:13.326920848Z","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2025-01-01T04:18:14.470754716Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
+ {"time":"2025-01-01T04:18:16.234531428Z","level":"INFO","msg":"stream: closing","id":"2bzz3n13"}
17
+ {"time":"2025-01-01T04:18:16.234542406Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"2bzz3n13"}}
18
+ {"time":"2025-01-01T04:18:16.234551344Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"2bzz3n13"}}
19
+ {"time":"2025-01-01T04:18:16.23457812Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"2bzz3n13"}}
20
+ {"time":"2025-01-01T04:18:16.236373925Z","level":"INFO","msg":"stream: closed","id":"2bzz3n13"}
wandb/debug.log ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-01 03:22:25,741 INFO MainThread:650483 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Configure stats pid to 650483
3
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
4
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
5
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'online'}
6
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2025-01-01 03:22:25,742 WARNING MainThread:650483 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
8
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
9
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying login settings: {}
10
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():532] Logging user logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug.log
11
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():533] Logging internal logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log
12
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():617] calling init triggers
13
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
14
+ config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'q0_10_preference.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/chameleon-7b', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
15
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():667] starting backend
16
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():671] sending inform_init request
17
+ 2025-01-01 03:22:25,748 INFO MainThread:650483 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2025-01-01 03:22:25,748 INFO MainThread:650483 [wandb_init.py:init():684] backend started and connected
19
+ 2025-01-01 03:22:25,758 INFO MainThread:650483 [wandb_init.py:init():779] updated telemetry
20
+ 2025-01-01 03:22:25,803 INFO MainThread:650483 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
21
+ 2025-01-01 03:22:26,388 INFO MainThread:650483 [wandb_init.py:init():863] starting run threads in backend
22
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_console_start():2465] atexit reg
23
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2313] redirect: wrap_raw
24
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2378] Wrapping output streams.
25
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2403] Redirects installed.
26
+ 2025-01-01 03:22:26,713 INFO MainThread:650483 [wandb_init.py:init():907] run started, returning control to user process
27
+ 2025-01-01 04:18:12,748 INFO MainThread:650483 [wandb_run.py:_finish():2164] finishing run htlou/align-anything/2bzz3n13
28
+ 2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
29
+ 2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2410] restore
30
+ 2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2416] restore done
31
+ 2025-01-01 04:18:16,219 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4049] rendering history
32
+ 2025-01-01 04:18:16,221 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
33
+ 2025-01-01 04:18:16,232 INFO MainThread:650483 [wandb_run.py:_footer_sync_info():4008] logging synced files
wandb/run-20250101_031915-9dphq5gk/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ***** Running training *****
2
+ Training 1/3.0 epoch: 0%| | 0/180.0 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
wandb/run-20250101_031915-9dphq5gk/files/requirements.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ align-anything==0.0.1.dev0
2
+ gitdb==4.0.11
3
+ wcwidth==0.2.13
4
+ identify==2.6.1
5
+ tomlkit==0.12.0
6
+ bitsandbytes==0.44.1
7
+ trl==0.9.6
8
+ pytest-split==0.8.0
9
+ gradio==4.44.1
10
+ pip==24.2
11
+ multidict==6.1.0
12
+ fairscale==0.4.13
13
+ mistral_common==1.4.4
14
+ python-dotenv==1.0.1
15
+ uvloop==0.20.0
16
+ absl-py==2.1.0
17
+ tiktoken==0.7.0
18
+ pydub==0.25.1
19
+ websockets==12.0
20
+ llamafactory==0.9.1.dev0
21
+ triton==3.0.0
22
+ tifffile==2024.9.20
23
+ safe-rlhf==0.0.1.dev0
24
+ pandas==2.2.3
25
+ grpcio==1.66.2
26
+ click==8.1.7
27
+ ninja==1.11.1.1
28
+ rich==13.9.2
29
+ Jinja2==3.1.4
30
+ Pygments==2.18.0
31
+ nvidia-cudnn-cu12==9.1.0.70
32
+ importlib_resources==6.4.5
33
+ GitPython==3.1.43
34
+ nvidia-cufft-cu12==11.0.2.54
35
+ tensorboard-data-server==0.7.2
36
+ align-anything==0.0.1.dev0
37
+ six==1.16.0
38
+ scipy==1.14.1
39
+ mpmath==1.3.0
40
+ jsonschema-specifications==2024.10.1
41
+ scikit-image==0.24.0
42
+ zipp==3.20.2
43
+ cycler==0.12.1
44
+ MarkupSafe==2.1.5
45
+ tzdata==2024.2
46
+ idna==3.10
47
+ pycountry==24.6.1
48
+ nvidia-nccl-cu12==2.20.5
49
+ matplotlib==3.9.2
50
+ pytz==2024.2
51
+ uvicorn==0.31.1
52
+ dill==0.3.8
53
+ pyparsing==3.1.4
54
+ pytest==7.2.0
55
+ jiter==0.6.1
56
+ safetensors==0.4.5
57
+ typing_extensions==4.12.2
58
+ decorator==4.4.2
59
+ typeguard==4.4.1
60
+ prometheus_client==0.21.0
61
+ nvidia-cuda-cupti-cu12==12.1.105
62
+ sentencepiece==0.2.0
63
+ requests==2.32.3
64
+ kiwisolver==1.4.7
65
+ gdown==5.2.0
66
+ multiprocess==0.70.16
67
+ xxhash==3.5.0
68
+ PyYAML==6.0.2
69
+ gguf==0.10.0
70
+ nvidia-nvtx-cu12==12.1.105
71
+ hpsv2==1.2.0
72
+ tensorboard==2.18.0
73
+ nodeenv==1.9.1
74
+ filelock==3.16.1
75
+ distro==1.9.0
76
+ scikit-learn==1.5.2
77
+ huggingface-hub==0.25.2
78
+ pyairports==2.1.1
79
+ importlib_metadata==8.5.0
80
+ pyarrow==17.0.0
81
+ llvmlite==0.43.0
82
+ ray==2.37.0
83
+ tokenizers==0.20.3
84
+ nvidia-nvjitlink-cu12==12.6.77
85
+ av==14.0.1
86
+ deepspeed==0.15.2
87
+ clip==0.2.0
88
+ shtab==1.7.1
89
+ certifi==2024.8.30
90
+ braceexpand==0.1.7
91
+ nvidia-ml-py==12.560.30
92
+ webdataset==0.2.100
93
+ docker-pycreds==0.4.0
94
+ einops==0.8.0
95
+ iniconfig==2.0.0
96
+ tyro==0.9.2
97
+ torchvision==0.19.0
98
+ accelerate==0.34.2
99
+ beautifulsoup4==4.12.3
100
+ pyzmq==26.2.0
101
+ pycparser==2.22
102
+ nvidia-curand-cu12==10.3.2.106
103
+ msgpack==1.1.0
104
+ soxr==0.5.0.post1
105
+ platformdirs==4.3.6
106
+ h11==0.14.0
107
+ psutil==6.0.0
108
+ pydantic==2.9.2
109
+ shellingham==1.5.4
110
+ imageio-ffmpeg==0.5.1
111
+ wandb==0.18.3
112
+ audioread==3.0.1
113
+ annotated-types==0.7.0
114
+ docstring_parser==0.16
115
+ cloudpickle==3.1.0
116
+ regex==2024.9.11
117
+ packaging==24.1
118
+ timm==0.6.13
119
+ aiosignal==1.3.1
120
+ numba==0.60.0
121
+ orjson==3.10.7
122
+ rpds-py==0.20.0
123
+ virtualenv==20.26.6
124
+ joblib==1.4.2
125
+ charset-normalizer==3.4.0
126
+ httpx==0.27.2
127
+ ffmpy==0.4.0
128
+ lm-format-enforcer==0.10.6
129
+ yt-dlp==2024.8.6
130
+ sympy==1.13.3
131
+ python-dateutil==2.9.0.post0
132
+ nvidia-cusolver-cu12==11.4.5.107
133
+ msgspec==0.18.6
134
+ mdurl==0.1.2
135
+ torch==2.4.0
136
+ fastapi==0.115.0
137
+ optree==0.13.0
138
+ PySocks==1.7.1
139
+ transformers==4.46.0.dev0
140
+ torchlibrosa==0.1.0
141
+ fsspec==2024.6.1
142
+ nvidia-cublas-cu12==12.1.3.1
143
+ gradio_client==1.3.0
144
+ args==0.1.0
145
+ cffi==1.17.1
146
+ fonttools==4.54.1
147
+ clint==0.5.1
148
+ lark==1.2.2
149
+ tqdm==4.66.5
150
+ semantic-version==2.10.0
151
+ pooch==1.8.2
152
+ markdown-it-py==3.0.0
153
+ pydantic_core==2.23.4
154
+ sniffio==1.3.1
155
+ httptools==0.6.1
156
+ nvidia-cuda-runtime-cu12==12.1.105
157
+ anyio==4.6.0
158
+ ftfy==6.3.0
159
+ Markdown==3.7
160
+ datasets==2.21.0
161
+ diffusers==0.30.3
162
+ nvidia-cuda-nvrtc-cu12==12.1.105
163
+ vllm==0.6.2
164
+ starlette==0.38.6
165
+ flash-attn==2.7.0.post2
166
+ urllib3==2.2.3
167
+ Werkzeug==3.0.4
168
+ py-cpuinfo==9.0.0
169
+ moviepy==1.0.3
170
+ librosa==0.10.2.post1
171
+ peft==0.12.0
172
+ soupsieve==2.6
173
+ lazy_loader==0.4
174
+ pluggy==1.5.0
175
+ setuptools==75.1.0
176
+ sentry-sdk==2.16.0
177
+ tabulate==0.9.0
178
+ transformers==4.45.2
179
+ pre_commit==4.0.1
180
+ termcolor==2.5.0
181
+ frechet-audio-distance==0.1.2
182
+ pytorch-fid==0.3.0
183
+ setproctitle==1.3.3
184
+ jsonschema==4.23.0
185
+ aiofiles==23.2.1
186
+ contourpy==1.3.0
187
+ distlib==0.3.9
188
+ interegular==0.3.3
189
+ fire==0.7.0
190
+ diskcache==5.6.3
191
+ proglog==0.1.10
192
+ soundfile==0.12.1
193
+ protobuf==3.20.3
194
+ smmap==5.0.1
195
+ pycryptodomex==3.21.0
196
+ Brotli==1.1.0
197
+ pillow==10.4.0
198
+ frozenlist==1.4.1
199
+ numpy==1.26.4
200
+ mutagen==1.47.0
201
+ outlines==0.0.46
202
+ attrs==24.2.0
203
+ torchaudio==2.4.0
204
+ aiohttp==3.10.10
205
+ ruff==0.6.9
206
+ watchfiles==0.24.0
207
+ threadpoolctl==3.5.0
208
+ nest-asyncio==1.6.0
209
+ partial-json-parser==0.2.1.1.post4
210
+ sse-starlette==2.1.3
211
+ shortuuid==1.0.13
212
+ typer==0.12.5
213
+ prometheus-fastapi-instrumentator==7.0.0
214
+ imageio==2.35.1
215
+ wheel==0.44.0
216
+ image-reward==1.5
217
+ networkx==3.4.1
218
+ propcache==0.2.0
219
+ aiohappyeyeballs==2.4.3
220
+ nvidia-cusparse-cu12==12.1.0.106
221
+ xformers==0.0.27.post2
222
+ cfgv==3.4.0
223
+ python-multipart==0.0.12
224
+ httpcore==1.0.6
225
+ opencv-python==4.6.0.66
226
+ resampy==0.4.3
227
+ yarl==1.15.0
228
+ referencing==0.35.1
229
+ openai==1.51.2
230
+ hjson==3.1.0
231
+ llamafactory==0.9.1.dev0
232
+ jaraco.collections==5.1.0
233
+ backports.tarfile==1.2.0
234
+ more-itertools==10.3.0
235
+ wheel==0.43.0
236
+ importlib_metadata==8.0.0
237
+ zipp==3.19.2
238
+ autocommand==2.2.2
239
+ jaraco.functools==4.0.1
240
+ platformdirs==4.2.2
241
+ tomli==2.0.1
242
+ jaraco.text==3.12.1
243
+ typing_extensions==4.12.2
244
+ jaraco.context==5.3.0
245
+ importlib_resources==6.4.0
246
+ packaging==24.1
247
+ inflect==7.3.1
248
+ typeguard==4.3.0
wandb/run-20250101_031915-9dphq5gk/files/wandb-metadata.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.31",
3
+ "python": "3.11.10",
4
+ "startedAt": "2025-01-01T03:19:15.936404Z",
5
+ "args": [
6
+ "--local_rank=0",
7
+ "--model_name_or_path",
8
+ "/data/align-anything/hantao/models/chameleon-7b",
9
+ "--train_datasets",
10
+ "/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized",
11
+ "--output_dir",
12
+ "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
13
+ "--per_device_train_batch_size",
14
+ "8",
15
+ "--per_device_eval_batch_size",
16
+ "8",
17
+ "--gradient_accumulation_steps",
18
+ "4",
19
+ "--train_template",
20
+ "Chameleon_preference",
21
+ "--train_split",
22
+ "train",
23
+ "--train_data_files",
24
+ "q0_10_preference.pt",
25
+ "--learning_rate",
26
+ "1e-6",
27
+ "--epochs",
28
+ "3",
29
+ "--lr_scheduler_type",
30
+ "cosine",
31
+ "--save_interval",
32
+ "400"
33
+ ],
34
+ "program": "-m align_anything.trainers.text_image_to_text_image.dpo",
35
+ "git": {
36
+ "remote": "https://github.com/PKU-Alignment/align-anything.git",
37
+ "commit": "6fde660afc9985323f147930eedf188a5699adc7"
38
+ },
39
+ "email": "[email protected]",
40
+ "root": "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
41
+ "host": "lyg0194",
42
+ "username": "align-anything",
43
+ "executable": "/data/align-anything/miniconda3/envs/hantao_stable/bin/python",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 128,
46
+ "gpu": "[NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB]",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "939477946368",
51
+ "used": "596692971520"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "1081823907840"
56
+ },
57
+ "cpu": {
58
+ "count": 64,
59
+ "countLogical": 128
60
+ },
61
+ "gpu_nvidia": [
62
+ {
63
+ "name": "NVIDIA A100-SXM4-80GB",
64
+ "memoryTotal": "85899345920",
65
+ "cudaCores": 6912,
66
+ "architecture": "Ampere"
67
+ },
68
+ {
69
+ "name": "NVIDIA A100-SXM4-80GB",
70
+ "memoryTotal": "85899345920",
71
+ "cudaCores": 6912,
72
+ "architecture": "Ampere"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere"
79
+ },
80
+ {
81
+ "name": "NVIDIA A100-SXM4-80GB",
82
+ "memoryTotal": "85899345920",
83
+ "cudaCores": 6912,
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A100-SXM4-80GB",
88
+ "memoryTotal": "85899345920",
89
+ "cudaCores": 6912,
90
+ "architecture": "Ampere"
91
+ },
92
+ {
93
+ "name": "NVIDIA A100-SXM4-80GB",
94
+ "memoryTotal": "85899345920",
95
+ "cudaCores": 6912,
96
+ "architecture": "Ampere"
97
+ },
98
+ {
99
+ "name": "NVIDIA A100-SXM4-80GB",
100
+ "memoryTotal": "85899345920",
101
+ "cudaCores": 6912,
102
+ "architecture": "Ampere"
103
+ },
104
+ {
105
+ "name": "NVIDIA A100-SXM4-80GB",
106
+ "memoryTotal": "85899345920",
107
+ "cudaCores": 6912,
108
+ "architecture": "Ampere"
109
+ }
110
+ ],
111
+ "cudaVersion": "12.4"
112
+ }
wandb/run-20250101_031915-9dphq5gk/logs/debug-internal.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-01T03:19:15.945723272Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2025-01-01T03:19:15.945753959Z","level":"INFO","msg":"created symlink","path":"/data/align-anything/hantao/align-anything/outputs/mm_interp/q0_10_preference/wandb/run-20250101_031915-9dphq5gk/logs/debug-core.log"}
3
+ {"time":"2025-01-01T03:19:15.949438002Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
4
+ {"time":"2025-01-01T03:19:15.976027597Z","level":"INFO","msg":"created new stream","id":"9dphq5gk"}
5
+ {"time":"2025-01-01T03:19:15.976056272Z","level":"INFO","msg":"stream: started","id":"9dphq5gk"}
6
+ {"time":"2025-01-01T03:19:15.976078609Z","level":"INFO","msg":"sender: started","stream_id":{"value":"9dphq5gk"}}
7
+ {"time":"2025-01-01T03:19:15.976082856Z","level":"INFO","msg":"handler: started","stream_id":{"value":"9dphq5gk"}}
8
+ {"time":"2025-01-01T03:19:15.976077521Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9dphq5gk"}}
9
+ {"time":"2025-01-01T03:19:16.581967595Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
10
+ {"time":"2025-01-01T03:19:16.586131154Z","level":"INFO","msg":"Starting system monitor"}
wandb/run-20250101_031915-9dphq5gk/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Configure stats pid to 646328
3
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
4
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
5
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'online'}
6
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2025-01-01 03:19:15,933 WARNING MainThread:646328 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
8
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
9
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_setup.py:_flush():79] Applying login settings: {}
10
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:_log_setup():532] Logging user logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_031915-9dphq5gk/logs/debug.log
11
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:_log_setup():533] Logging internal logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_031915-9dphq5gk/logs/debug-internal.log
12
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():617] calling init triggers
13
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
14
+ config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 8.0, 'per_device_eval_batch_size': 8.0, 'gradient_accumulation_steps': 4.0, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'q0_10_preference.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/chameleon-7b', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
15
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():667] starting backend
16
+ 2025-01-01 03:19:15,933 INFO MainThread:646328 [wandb_init.py:init():671] sending inform_init request
17
+ 2025-01-01 03:19:15,935 INFO MainThread:646328 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2025-01-01 03:19:15,936 INFO MainThread:646328 [wandb_init.py:init():684] backend started and connected
19
+ 2025-01-01 03:19:15,938 INFO MainThread:646328 [wandb_init.py:init():779] updated telemetry
20
+ 2025-01-01 03:19:15,998 INFO MainThread:646328 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
21
+ 2025-01-01 03:19:16,578 INFO MainThread:646328 [wandb_init.py:init():863] starting run threads in backend
22
+ 2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_console_start():2465] atexit reg
23
+ 2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_redirect():2313] redirect: wrap_raw
24
+ 2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_redirect():2378] Wrapping output streams.
25
+ 2025-01-01 03:19:17,193 INFO MainThread:646328 [wandb_run.py:_redirect():2403] Redirects installed.
26
+ 2025-01-01 03:19:17,199 INFO MainThread:646328 [wandb_init.py:init():907] run started, returning control to user process
wandb/run-20250101_031915-9dphq5gk/run-9dphq5gk.wandb ADDED
File without changes
wandb/run-20250101_032225-2bzz3n13/files/config.yaml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.3
4
+ m: []
5
+ python_version: 3.11.10
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 41
11
+ - 49
12
+ - 51
13
+ - 55
14
+ - 71
15
+ - 83
16
+ - 98
17
+ - 105
18
+ "2":
19
+ - 1
20
+ - 11
21
+ - 41
22
+ - 49
23
+ - 51
24
+ - 55
25
+ - 71
26
+ - 83
27
+ - 98
28
+ - 105
29
+ "3":
30
+ - 2
31
+ - 13
32
+ - 16
33
+ - 23
34
+ - 55
35
+ - 61
36
+ "4": 3.11.10
37
+ "5": 0.18.3
38
+ "6": 4.45.2
39
+ "8":
40
+ - 5
41
+ "12": 0.18.3
42
+ "13": linux-x86_64
43
+ data_cfgs:
44
+ value:
45
+ eval_data_files: null
46
+ eval_datasets: null
47
+ eval_optional_args: []
48
+ eval_size: null
49
+ eval_split: null
50
+ eval_subset: null
51
+ eval_template: null
52
+ train_data_files: q0_10_preference.pt
53
+ train_datasets: /data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized
54
+ train_optional_args: []
55
+ train_size: null
56
+ train_split: train
57
+ train_subset: null
58
+ train_template: Chameleon_preference
59
+ logger_cfgs:
60
+ value:
61
+ cache_dir: null
62
+ log_project: align-anything
63
+ log_run_name: dpo
64
+ log_type: wandb
65
+ output_dir: /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference
66
+ save_interval: 400
67
+ model_cfgs:
68
+ value:
69
+ model_max_length: 4096
70
+ model_name_or_path: /data/align-anything/hantao/models/chameleon-7b
71
+ trust_remote_code: true
72
+ special_tokens:
73
+ value: null
74
+ train_cfgs:
75
+ value:
76
+ adam_betas:
77
+ - 0.9
78
+ - 0.95
79
+ bf16: true
80
+ ds_cfgs: ds_z3_config.json
81
+ epochs: 3
82
+ eval_interval: 10
83
+ eval_strategy: epoch
84
+ fp16: false
85
+ freeze_language_model: true
86
+ freeze_mm_proj: true
87
+ freeze_vision_tower: false
88
+ gradient_accumulation_steps: 2
89
+ gradient_checkpointing: true
90
+ learning_rate: 1e-06
91
+ lr_scheduler_type: cosine
92
+ lr_warmup_ratio: 0.03
93
+ per_device_eval_batch_size: 4
94
+ per_device_train_batch_size: 4
95
+ regularization: 0.001
96
+ scale_coeff: 0.1
97
+ seed: 42
98
+ weight_decay: 0.01
wandb/run-20250101_032225-2bzz3n13/files/output.log ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ***** Running training *****
2
+ Training 1/3.0 epoch: 0%| | 0/357.0 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
3
+ Training 3/3.0 epoch (loss 0.0012): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 357/357.0 [54:49<00:00, 9.21s/it]
4
+ [2025-01-01 03:25:50,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=0, lr=[9.979871469976195e-07, 9.979871469976195e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
5
+ [2025-01-01 03:27:24,141] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
6
+ [2025-01-01 03:28:03,912] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
7
+ [2025-01-01 03:28:59,279] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=0, lr=[9.819814303479267e-07, 9.819814303479267e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
8
+ [2025-01-01 03:29:39,906] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
9
+ [2025-01-01 03:32:04,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=0, lr=[9.504844339512094e-07, 9.504844339512094e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
10
+ [2025-01-01 03:34:45,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=0, lr=[9.045084971874737e-07, 9.045084971874737e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
11
+ [2025-01-01 03:35:12,727] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
12
+ [2025-01-01 03:38:11,474] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=0, lr=[8.455313244934324e-07, 8.455313244934324e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
13
+ [2025-01-01 03:41:02,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=0, lr=[7.754484907260512e-07, 7.754484907260512e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
14
+ [2025-01-01 03:44:21,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=0, lr=[6.965125158269618e-07, 6.965125158269618e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
15
+ [2025-01-01 03:45:34,501] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
16
+ [2025-01-01 03:46:26,012] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
17
+ [2025-01-01 03:47:20,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=0, lr=[6.112604669781572e-07, 6.112604669781572e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
18
+ [2025-01-01 03:48:01,672] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
19
+ [2025-01-01 03:50:25,146] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=0, lr=[5.224324151752575e-07, 5.224324151752575e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
20
+ [2025-01-01 03:53:13,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=0, lr=[4.328833670911724e-07, 4.328833670911724e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
21
+ [2025-01-01 03:53:40,009] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
22
+ [2025-01-01 03:56:31,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=0, lr=[3.454915028125263e-07, 3.454915028125263e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
23
+ [2025-01-01 03:59:21,373] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=0, lr=[2.6306566876350067e-07, 2.6306566876350067e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
24
+ [2025-01-01 04:02:48,482] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=0, lr=[1.8825509907063326e-07, 1.8825509907063326e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
25
+ [2025-01-01 04:03:53,943] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
26
+ [2025-01-01 04:04:34,261] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
27
+ [2025-01-01 04:05:49,032] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=0, lr=[1.2346426699819456e-07, 1.2346426699819456e-07], mom=[[0.9, 0.95], [0.9, 0.95]]
28
+ [2025-01-01 04:06:10,044] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
29
+ [2025-01-01 04:08:45,003] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=0, lr=[7.077560319906694e-08, 7.077560319906694e-08], mom=[[0.9, 0.95], [0.9, 0.95]]
30
+ [2025-01-01 04:11:41,549] [WARNING] [stage3.py:2104:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
31
+ [2025-01-01 04:11:41,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=0, lr=[3.188256468013139e-08, 3.188256468013139e-08], mom=[[0.9, 0.95], [0.9, 0.95]]
32
+ [2025-01-01 04:14:59,506] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=0, lr=[8.035205700685165e-09, 8.035205700685165e-09], mom=[[0.9, 0.95], [0.9, 0.95]]
33
+ Saving model to "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference" ...
34
+ Saving 16-bit model...
35
+ [2025-01-01 04:17:25,228] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step178 is about to be saved!
36
+ [2025-01-01 04:17:25,229] [INFO] [engine.py:3649:save_16bit_model] Saving model weights to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin, tag: global_step178
37
+ [2025-01-01 04:17:25,229] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin...
38
+ [2025-01-01 04:17:45,184] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin.
39
+ [2025-01-01 04:17:45,185] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step178 is ready now!
40
+ Model saved!
41
+ Saving 16-bit model...
42
+ [2025-01-01 04:17:52,182] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step178 is about to be saved!
43
+ [2025-01-01 04:17:52,183] [INFO] [engine.py:3649:save_16bit_model] Saving model weights to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin, tag: global_step178
44
+ [2025-01-01 04:17:52,183] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin...
45
+ [2025-01-01 04:18:12,699] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/pytorch_model.bin.
46
+ [2025-01-01 04:18:12,701] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step178 is ready now!
47
+ Model saved!
wandb/run-20250101_032225-2bzz3n13/files/requirements.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ align-anything==0.0.1.dev0
2
+ gitdb==4.0.11
3
+ wcwidth==0.2.13
4
+ identify==2.6.1
5
+ tomlkit==0.12.0
6
+ bitsandbytes==0.44.1
7
+ trl==0.9.6
8
+ pytest-split==0.8.0
9
+ gradio==4.44.1
10
+ pip==24.2
11
+ multidict==6.1.0
12
+ fairscale==0.4.13
13
+ mistral_common==1.4.4
14
+ python-dotenv==1.0.1
15
+ uvloop==0.20.0
16
+ absl-py==2.1.0
17
+ tiktoken==0.7.0
18
+ pydub==0.25.1
19
+ websockets==12.0
20
+ llamafactory==0.9.1.dev0
21
+ triton==3.0.0
22
+ tifffile==2024.9.20
23
+ safe-rlhf==0.0.1.dev0
24
+ pandas==2.2.3
25
+ grpcio==1.66.2
26
+ click==8.1.7
27
+ ninja==1.11.1.1
28
+ rich==13.9.2
29
+ Jinja2==3.1.4
30
+ Pygments==2.18.0
31
+ nvidia-cudnn-cu12==9.1.0.70
32
+ importlib_resources==6.4.5
33
+ GitPython==3.1.43
34
+ nvidia-cufft-cu12==11.0.2.54
35
+ tensorboard-data-server==0.7.2
36
+ align-anything==0.0.1.dev0
37
+ six==1.16.0
38
+ scipy==1.14.1
39
+ mpmath==1.3.0
40
+ jsonschema-specifications==2024.10.1
41
+ scikit-image==0.24.0
42
+ zipp==3.20.2
43
+ cycler==0.12.1
44
+ MarkupSafe==2.1.5
45
+ tzdata==2024.2
46
+ idna==3.10
47
+ pycountry==24.6.1
48
+ nvidia-nccl-cu12==2.20.5
49
+ matplotlib==3.9.2
50
+ pytz==2024.2
51
+ uvicorn==0.31.1
52
+ dill==0.3.8
53
+ pyparsing==3.1.4
54
+ pytest==7.2.0
55
+ jiter==0.6.1
56
+ safetensors==0.4.5
57
+ typing_extensions==4.12.2
58
+ decorator==4.4.2
59
+ typeguard==4.4.1
60
+ prometheus_client==0.21.0
61
+ nvidia-cuda-cupti-cu12==12.1.105
62
+ sentencepiece==0.2.0
63
+ requests==2.32.3
64
+ kiwisolver==1.4.7
65
+ gdown==5.2.0
66
+ multiprocess==0.70.16
67
+ xxhash==3.5.0
68
+ PyYAML==6.0.2
69
+ gguf==0.10.0
70
+ nvidia-nvtx-cu12==12.1.105
71
+ hpsv2==1.2.0
72
+ tensorboard==2.18.0
73
+ nodeenv==1.9.1
74
+ filelock==3.16.1
75
+ distro==1.9.0
76
+ scikit-learn==1.5.2
77
+ huggingface-hub==0.25.2
78
+ pyairports==2.1.1
79
+ importlib_metadata==8.5.0
80
+ pyarrow==17.0.0
81
+ llvmlite==0.43.0
82
+ ray==2.37.0
83
+ tokenizers==0.20.3
84
+ nvidia-nvjitlink-cu12==12.6.77
85
+ av==14.0.1
86
+ deepspeed==0.15.2
87
+ clip==0.2.0
88
+ shtab==1.7.1
89
+ certifi==2024.8.30
90
+ braceexpand==0.1.7
91
+ nvidia-ml-py==12.560.30
92
+ webdataset==0.2.100
93
+ docker-pycreds==0.4.0
94
+ einops==0.8.0
95
+ iniconfig==2.0.0
96
+ tyro==0.9.2
97
+ torchvision==0.19.0
98
+ accelerate==0.34.2
99
+ beautifulsoup4==4.12.3
100
+ pyzmq==26.2.0
101
+ pycparser==2.22
102
+ nvidia-curand-cu12==10.3.2.106
103
+ msgpack==1.1.0
104
+ soxr==0.5.0.post1
105
+ platformdirs==4.3.6
106
+ h11==0.14.0
107
+ psutil==6.0.0
108
+ pydantic==2.9.2
109
+ shellingham==1.5.4
110
+ imageio-ffmpeg==0.5.1
111
+ wandb==0.18.3
112
+ audioread==3.0.1
113
+ annotated-types==0.7.0
114
+ docstring_parser==0.16
115
+ cloudpickle==3.1.0
116
+ regex==2024.9.11
117
+ packaging==24.1
118
+ timm==0.6.13
119
+ aiosignal==1.3.1
120
+ numba==0.60.0
121
+ orjson==3.10.7
122
+ rpds-py==0.20.0
123
+ virtualenv==20.26.6
124
+ joblib==1.4.2
125
+ charset-normalizer==3.4.0
126
+ httpx==0.27.2
127
+ ffmpy==0.4.0
128
+ lm-format-enforcer==0.10.6
129
+ yt-dlp==2024.8.6
130
+ sympy==1.13.3
131
+ python-dateutil==2.9.0.post0
132
+ nvidia-cusolver-cu12==11.4.5.107
133
+ msgspec==0.18.6
134
+ mdurl==0.1.2
135
+ torch==2.4.0
136
+ fastapi==0.115.0
137
+ optree==0.13.0
138
+ PySocks==1.7.1
139
+ transformers==4.46.0.dev0
140
+ torchlibrosa==0.1.0
141
+ fsspec==2024.6.1
142
+ nvidia-cublas-cu12==12.1.3.1
143
+ gradio_client==1.3.0
144
+ args==0.1.0
145
+ cffi==1.17.1
146
+ fonttools==4.54.1
147
+ clint==0.5.1
148
+ lark==1.2.2
149
+ tqdm==4.66.5
150
+ semantic-version==2.10.0
151
+ pooch==1.8.2
152
+ markdown-it-py==3.0.0
153
+ pydantic_core==2.23.4
154
+ sniffio==1.3.1
155
+ httptools==0.6.1
156
+ nvidia-cuda-runtime-cu12==12.1.105
157
+ anyio==4.6.0
158
+ ftfy==6.3.0
159
+ Markdown==3.7
160
+ datasets==2.21.0
161
+ diffusers==0.30.3
162
+ nvidia-cuda-nvrtc-cu12==12.1.105
163
+ vllm==0.6.2
164
+ starlette==0.38.6
165
+ flash-attn==2.7.0.post2
166
+ urllib3==2.2.3
167
+ Werkzeug==3.0.4
168
+ py-cpuinfo==9.0.0
169
+ moviepy==1.0.3
170
+ librosa==0.10.2.post1
171
+ peft==0.12.0
172
+ soupsieve==2.6
173
+ lazy_loader==0.4
174
+ pluggy==1.5.0
175
+ setuptools==75.1.0
176
+ sentry-sdk==2.16.0
177
+ tabulate==0.9.0
178
+ transformers==4.45.2
179
+ pre_commit==4.0.1
180
+ termcolor==2.5.0
181
+ frechet-audio-distance==0.1.2
182
+ pytorch-fid==0.3.0
183
+ setproctitle==1.3.3
184
+ jsonschema==4.23.0
185
+ aiofiles==23.2.1
186
+ contourpy==1.3.0
187
+ distlib==0.3.9
188
+ interegular==0.3.3
189
+ fire==0.7.0
190
+ diskcache==5.6.3
191
+ proglog==0.1.10
192
+ soundfile==0.12.1
193
+ protobuf==3.20.3
194
+ smmap==5.0.1
195
+ pycryptodomex==3.21.0
196
+ Brotli==1.1.0
197
+ pillow==10.4.0
198
+ frozenlist==1.4.1
199
+ numpy==1.26.4
200
+ mutagen==1.47.0
201
+ outlines==0.0.46
202
+ attrs==24.2.0
203
+ torchaudio==2.4.0
204
+ aiohttp==3.10.10
205
+ ruff==0.6.9
206
+ watchfiles==0.24.0
207
+ threadpoolctl==3.5.0
208
+ nest-asyncio==1.6.0
209
+ partial-json-parser==0.2.1.1.post4
210
+ sse-starlette==2.1.3
211
+ shortuuid==1.0.13
212
+ typer==0.12.5
213
+ prometheus-fastapi-instrumentator==7.0.0
214
+ imageio==2.35.1
215
+ wheel==0.44.0
216
+ image-reward==1.5
217
+ networkx==3.4.1
218
+ propcache==0.2.0
219
+ aiohappyeyeballs==2.4.3
220
+ nvidia-cusparse-cu12==12.1.0.106
221
+ xformers==0.0.27.post2
222
+ cfgv==3.4.0
223
+ python-multipart==0.0.12
224
+ httpcore==1.0.6
225
+ opencv-python==4.6.0.66
226
+ resampy==0.4.3
227
+ yarl==1.15.0
228
+ referencing==0.35.1
229
+ openai==1.51.2
230
+ hjson==3.1.0
231
+ llamafactory==0.9.1.dev0
232
+ jaraco.collections==5.1.0
233
+ backports.tarfile==1.2.0
234
+ more-itertools==10.3.0
235
+ wheel==0.43.0
236
+ importlib_metadata==8.0.0
237
+ zipp==3.19.2
238
+ autocommand==2.2.2
239
+ jaraco.functools==4.0.1
240
+ platformdirs==4.2.2
241
+ tomli==2.0.1
242
+ jaraco.text==3.12.1
243
+ typing_extensions==4.12.2
244
+ jaraco.context==5.3.0
245
+ importlib_resources==6.4.0
246
+ packaging==24.1
247
+ inflect==7.3.1
248
+ typeguard==4.3.0
wandb/run-20250101_032225-2bzz3n13/files/wandb-metadata.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-196-generic-x86_64-with-glibc2.31",
3
+ "python": "3.11.10",
4
+ "startedAt": "2025-01-01T03:22:25.748812Z",
5
+ "args": [
6
+ "--local_rank=0",
7
+ "--model_name_or_path",
8
+ "/data/align-anything/hantao/models/chameleon-7b",
9
+ "--train_datasets",
10
+ "/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized",
11
+ "--output_dir",
12
+ "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
13
+ "--per_device_train_batch_size",
14
+ "4",
15
+ "--per_device_eval_batch_size",
16
+ "4",
17
+ "--gradient_accumulation_steps",
18
+ "2",
19
+ "--train_template",
20
+ "Chameleon_preference",
21
+ "--train_split",
22
+ "train",
23
+ "--train_data_files",
24
+ "q0_10_preference.pt",
25
+ "--learning_rate",
26
+ "1e-6",
27
+ "--epochs",
28
+ "3",
29
+ "--lr_scheduler_type",
30
+ "cosine",
31
+ "--save_interval",
32
+ "400"
33
+ ],
34
+ "program": "-m align_anything.trainers.text_image_to_text_image.dpo",
35
+ "git": {
36
+ "remote": "https://github.com/PKU-Alignment/align-anything.git",
37
+ "commit": "6fde660afc9985323f147930eedf188a5699adc7"
38
+ },
39
+ "email": "[email protected]",
40
+ "root": "/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference",
41
+ "host": "lyg0194",
42
+ "username": "align-anything",
43
+ "executable": "/data/align-anything/miniconda3/envs/hantao_stable/bin/python",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 128,
46
+ "gpu": "[NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB]",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "939477946368",
51
+ "used": "596693139456"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "1081823907840"
56
+ },
57
+ "cpu": {
58
+ "count": 64,
59
+ "countLogical": 128
60
+ },
61
+ "gpu_nvidia": [
62
+ {
63
+ "name": "NVIDIA A100-SXM4-80GB",
64
+ "memoryTotal": "85899345920",
65
+ "cudaCores": 6912,
66
+ "architecture": "Ampere"
67
+ },
68
+ {
69
+ "name": "NVIDIA A100-SXM4-80GB",
70
+ "memoryTotal": "85899345920",
71
+ "cudaCores": 6912,
72
+ "architecture": "Ampere"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere"
79
+ },
80
+ {
81
+ "name": "NVIDIA A100-SXM4-80GB",
82
+ "memoryTotal": "85899345920",
83
+ "cudaCores": 6912,
84
+ "architecture": "Ampere"
85
+ },
86
+ {
87
+ "name": "NVIDIA A100-SXM4-80GB",
88
+ "memoryTotal": "85899345920",
89
+ "cudaCores": 6912,
90
+ "architecture": "Ampere"
91
+ },
92
+ {
93
+ "name": "NVIDIA A100-SXM4-80GB",
94
+ "memoryTotal": "85899345920",
95
+ "cudaCores": 6912,
96
+ "architecture": "Ampere"
97
+ },
98
+ {
99
+ "name": "NVIDIA A100-SXM4-80GB",
100
+ "memoryTotal": "85899345920",
101
+ "cudaCores": 6912,
102
+ "architecture": "Ampere"
103
+ },
104
+ {
105
+ "name": "NVIDIA A100-SXM4-80GB",
106
+ "memoryTotal": "85899345920",
107
+ "cudaCores": 6912,
108
+ "architecture": "Ampere"
109
+ }
110
+ ],
111
+ "cudaVersion": "12.4"
112
+ }
wandb/run-20250101_032225-2bzz3n13/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/better_sample_reward":156.75074768066406,"train/worse_sample_reward":33.601295471191406,"train/reward":190.35205078125,"_wandb":{"runtime":3347},"_runtime":3347.002538272,"train/step":357,"train/loss":0.00119595427531749,"_step":357,"train/reward_margin":123.14945220947266,"train/lr":3.22238178339318e-10,"train/reward_accuracy":1,"train/epoch":3,"_timestamp":1.7357050364626048e+09}
wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-01T03:22:25.755777689Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2025-01-01T03:22:25.755807853Z","level":"INFO","msg":"created symlink","path":"/data/align-anything/hantao/align-anything/outputs/mm_interp/q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-core.log"}
3
+ {"time":"2025-01-01T03:22:25.758235027Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
4
+ {"time":"2025-01-01T03:22:25.778926827Z","level":"INFO","msg":"created new stream","id":"2bzz3n13"}
5
+ {"time":"2025-01-01T03:22:25.778989066Z","level":"INFO","msg":"stream: started","id":"2bzz3n13"}
6
+ {"time":"2025-01-01T03:22:25.779016519Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"2bzz3n13"}}
7
+ {"time":"2025-01-01T03:22:25.779052686Z","level":"INFO","msg":"sender: started","stream_id":{"value":"2bzz3n13"}}
8
+ {"time":"2025-01-01T03:22:25.779034819Z","level":"INFO","msg":"handler: started","stream_id":{"value":"2bzz3n13"}}
9
+ {"time":"2025-01-01T03:22:26.392432178Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
10
+ {"time":"2025-01-01T03:22:26.39641254Z","level":"INFO","msg":"Starting system monitor"}
11
+ {"time":"2025-01-01T04:18:12.751361379Z","level":"INFO","msg":"Stopping system monitor"}
12
+ {"time":"2025-01-01T04:18:12.776637037Z","level":"INFO","msg":"Stopped system monitor"}
13
+ {"time":"2025-01-01T04:18:13.326901072Z","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
14
+ {"time":"2025-01-01T04:18:13.326920848Z","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2025-01-01T04:18:14.470754716Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
+ {"time":"2025-01-01T04:18:16.234531428Z","level":"INFO","msg":"stream: closing","id":"2bzz3n13"}
17
+ {"time":"2025-01-01T04:18:16.234542406Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"2bzz3n13"}}
18
+ {"time":"2025-01-01T04:18:16.234551344Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"2bzz3n13"}}
19
+ {"time":"2025-01-01T04:18:16.23457812Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"2bzz3n13"}}
20
+ {"time":"2025-01-01T04:18:16.236373925Z","level":"INFO","msg":"stream: closed","id":"2bzz3n13"}
wandb/run-20250101_032225-2bzz3n13/logs/debug.log ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-01 03:22:25,741 INFO MainThread:650483 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Configure stats pid to 650483
3
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
4
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
5
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'online'}
6
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2025-01-01 03:22:25,742 WARNING MainThread:650483 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
8
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
9
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_setup.py:_flush():79] Applying login settings: {}
10
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():532] Logging user logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug.log
11
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:_log_setup():533] Logging internal logs to /data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference/wandb/run-20250101_032225-2bzz3n13/logs/debug-internal.log
12
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():617] calling init triggers
13
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
14
+ config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/data/mm_interp/AA_preference_cocour_new_step10/tokenized', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'q0_10_preference.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '/data/align-anything/hantao/align-anything/outputs/mm_interp//q0_10_preference', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/chameleon-7b', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
15
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():667] starting backend
16
+ 2025-01-01 03:22:25,742 INFO MainThread:650483 [wandb_init.py:init():671] sending inform_init request
17
+ 2025-01-01 03:22:25,748 INFO MainThread:650483 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2025-01-01 03:22:25,748 INFO MainThread:650483 [wandb_init.py:init():684] backend started and connected
19
+ 2025-01-01 03:22:25,758 INFO MainThread:650483 [wandb_init.py:init():779] updated telemetry
20
+ 2025-01-01 03:22:25,803 INFO MainThread:650483 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
21
+ 2025-01-01 03:22:26,388 INFO MainThread:650483 [wandb_init.py:init():863] starting run threads in backend
22
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_console_start():2465] atexit reg
23
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2313] redirect: wrap_raw
24
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2378] Wrapping output streams.
25
+ 2025-01-01 03:22:26,708 INFO MainThread:650483 [wandb_run.py:_redirect():2403] Redirects installed.
26
+ 2025-01-01 03:22:26,713 INFO MainThread:650483 [wandb_init.py:init():907] run started, returning control to user process
27
+ 2025-01-01 04:18:12,748 INFO MainThread:650483 [wandb_run.py:_finish():2164] finishing run htlou/align-anything/2bzz3n13
28
+ 2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
29
+ 2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2410] restore
30
+ 2025-01-01 04:18:12,750 INFO MainThread:650483 [wandb_run.py:_restore():2416] restore done
31
+ 2025-01-01 04:18:16,219 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4049] rendering history
32
+ 2025-01-01 04:18:16,221 INFO MainThread:650483 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
33
+ 2025-01-01 04:18:16,232 INFO MainThread:650483 [wandb_run.py:_footer_sync_info():4008] logging synced files
wandb/run-20250101_032225-2bzz3n13/run-2bzz3n13.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b24dc144b11274c9e88c6af74302af6ce2fbfcdcbbcbaab785ec88157d14e38f
3
+ size 3363509