wandb: entity: null resume: auto run_id: 33sdhxdk experiment: project: training name: show-o-dpo output_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5_v2_new_2 save_every: 10000 eval_every: 2500 generate_every: 1000 log_every: 1 log_grad_norm_every: 20 logging_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5_v2_new_2/logs model: rw_model: /ssd7.7tb2/zrr/tcz/Show-o/geneval/outputs/train_set_hq_prune_full vq_model: type: magvitv2 vq_model_name: showlab/magvitv2 showo: load_from_showo: false pretrained_model_path: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5 w_clip_vit: false vocab_size: 58498 llm_vocab_size: 50295 llm_model_path: microsoft/phi-1_5 codebook_size: 8192 num_vq_tokens: 256 num_new_special_tokens: 10 gradient_checkpointing: true dataset: gen_type: t2i params: data_path: /ssd7.7tb2/zrr/tcz/Show-o/ov/t2i_dpo_draft.yaml validation_prompts_file: validation_prompts/showoprompts.txt shuffle_buffer_size: 1000 num_workers: 32 resolution: 256 pin_memory: true persistent_workers: true preprocessing: max_seq_length: 128 resolution: 256 center_crop: false random_flip: false optimizer: name: adamw params: learning_rate: 1.0e-05 scale_lr: false beta1: 0.9 beta2: 0.999 weight_decay: 0.01 epsilon: 1.0e-08 lr_scheduler: scheduler: cosine params: learning_rate: ${optimizer.params.learning_rate} warmup_ratio: 0.1 training: gradient_accumulation_steps: 1 batch_size_t2i: 10 mixed_precision: bf16 enable_tf32: true seed: 10086 overfit_one_batch: false cond_dropout_prob: 0.1 min_masking_rate: 0.0 label_smoothing: 0.0 max_grad_norm: null guidance_scale: 0.0 generation_timesteps: 12 beta: 0.1 reward_coef: 0 dpo_coef: 1 sft_coef: 0 num_epoch: 1 inference: generation_timesteps: 18 guidance_scale: 1.75