|
wandb: |
|
entity: null |
|
resume: auto |
|
run_id: 33sdhxdk |
|
experiment: |
|
project: training |
|
name: show-o-dpo |
|
output_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5_v2_new_2 |
|
save_every: 10000 |
|
eval_every: 2500 |
|
generate_every: 1000 |
|
log_every: 1 |
|
log_grad_norm_every: 20 |
|
logging_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5_v2_new_2/logs |
|
model: |
|
rw_model: /ssd7.7tb2/zrr/tcz/Show-o/geneval/outputs/train_set_hq_prune_full |
|
vq_model: |
|
type: magvitv2 |
|
vq_model_name: showlab/magvitv2 |
|
showo: |
|
load_from_showo: false |
|
pretrained_model_path: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5 |
|
w_clip_vit: false |
|
vocab_size: 58498 |
|
llm_vocab_size: 50295 |
|
llm_model_path: microsoft/phi-1_5 |
|
codebook_size: 8192 |
|
num_vq_tokens: 256 |
|
num_new_special_tokens: 10 |
|
gradient_checkpointing: true |
|
dataset: |
|
gen_type: t2i |
|
params: |
|
data_path: /ssd7.7tb2/zrr/tcz/Show-o/ov/t2i_dpo_draft.yaml |
|
validation_prompts_file: validation_prompts/showoprompts.txt |
|
shuffle_buffer_size: 1000 |
|
num_workers: 32 |
|
resolution: 256 |
|
pin_memory: true |
|
persistent_workers: true |
|
preprocessing: |
|
max_seq_length: 128 |
|
resolution: 256 |
|
center_crop: false |
|
random_flip: false |
|
optimizer: |
|
name: adamw |
|
params: |
|
learning_rate: 1.0e-05 |
|
scale_lr: false |
|
beta1: 0.9 |
|
beta2: 0.999 |
|
weight_decay: 0.01 |
|
epsilon: 1.0e-08 |
|
lr_scheduler: |
|
scheduler: cosine |
|
params: |
|
learning_rate: ${optimizer.params.learning_rate} |
|
warmup_ratio: 0.1 |
|
training: |
|
gradient_accumulation_steps: 1 |
|
batch_size_t2i: 10 |
|
mixed_precision: bf16 |
|
enable_tf32: true |
|
seed: 10086 |
|
overfit_one_batch: false |
|
cond_dropout_prob: 0.1 |
|
min_masking_rate: 0.0 |
|
label_smoothing: 0.0 |
|
max_grad_norm: null |
|
guidance_scale: 0.0 |
|
generation_timesteps: 12 |
|
beta: 0.1 |
|
reward_coef: 0 |
|
dpo_coef: 1 |
|
sft_coef: 0 |
|
num_epoch: 1 |
|
inference: |
|
generation_timesteps: 18 |
|
guidance_scale: 1.75 |
|
|