diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint.pt b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint.pt new file mode 100644 index 0000000000000000000000000000000000000000..604cd826461ce6090871279a41ca1bcaff506de9 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d11a3de8376139f69c734fe556f9ca21363379b6f226ec011471d00a9975b9b +size 10798884178 diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint_.pt b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint_.pt new file mode 100644 index 0000000000000000000000000000000000000000..604cd826461ce6090871279a41ca1bcaff506de9 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint_.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d11a3de8376139f69c734fe556f9ca21363379b6f226ec011471d00a9975b9b +size 10798884178 diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/config.yaml b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62a2e4355350baca19e673fd8b6a1ab17d4d8c13 --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/config.yaml @@ -0,0 +1,271 @@ +run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs +seed: 0 +allow_tf32: true +timeout: null +resolution: 256 +amp: bf16 +cfg_scale: 1.0 +evaluate_split: test +eval_dir_name: null +num_save_images: 64 +save_all_images: false +save_image_format: jpg +save_images_at_all_procs: false +save_latent_samples: false +latent_samples_dir: null +evaluate_dataset: sample_class +sample_class: + name: SampleClass + batch_size: 128 + n_worker: 8 + drop_last: false + seed: 0 + shuffle: false + num_classes: 1000 + num_samples: 50000 +autoencoder: + num_settings: 1 + name: dc-ae-f32c32-in-1.0-256px + scaling_factor: 0.3285 + latent_channels: null +autoencoder_dtype: fp32 +eval_autoencoder_setting_list: null +model: fp8coat_dit +dit: + name: DiT + in_channels: 32 + input_size: 8 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: SiTSampler + eval_scheduler: ODE_heun2 + num_inference_steps: 30 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 1 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + post_norm: false + class_dropout_prob: 0.1 + num_classes: 1000 + learn_sigma: false + unconditional: false + use_checkpoint: true + adaptive_channel: false + adaptive_channel_share_weights: true + only_load_backbone: false + freeze_backbone: false +uvit: + name: UViT + in_channels: 4 + input_size: 32 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: DPM_Solver + eval_scheduler: DPM_Solver + num_inference_steps: 30 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 2 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + mlp_time_embed: false + qkv_bias: false + act_layer: gelu + use_checkpoint: true + class_dropout_prob: 0.1 + num_classes: 1000 + attn_mode: null +sana_cls: + name: SanaCls + in_channels: 4 + input_size: 32 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: SanaScheduler + eval_scheduler: SanaScheduler + num_inference_steps: 250 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 2 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + post_norm: false + class_dropout_prob: 0.1 + num_classes: 1000 + unconditional: false + use_checkpoint: true + only_load_backbone: false + freeze_backbone: false + learn_sigma: false +usana_cls: + name: USanaCls + in_channels: 4 + input_size: 32 + cfg_channels: null + pretrained_path: null + pretrained_source: dc-ae + train_scheduler: DPM_Solver + eval_scheduler: DPM_Solver + num_inference_steps: 30 + flow_shift: 3.0 + reverse_time: false + use_cads: false + cads_noise_scale: 0.1 + cads_mixing_factor: 1.0 + cads_tau_min: 0.2 + cads_tau_max: 0.9 + use_guidance_interval: false + guidance_t_min: 0.2 + guidance_t_max: 0.8 + count_nfe: false + patch_size: 2 + hidden_size: 1152 + depth: 28 + num_heads: 16 + mlp_ratio: 4.0 + mlp_time_embed: false + qkv_bias: false + act_layer: gelu + use_checkpoint: true + class_dropout_prob: 0.1 + num_classes: 1000 + num_training_steps: 1000 +fp8: + name: FP8DiT +fp8coat: + name: FP8COATDiT + qchoice: linear + symm: true + row_blocksize: -1 + col_blocksize: -1 + linear_row_blocksize: 1 + linear_col_blocksize: 32 + min_blockunit_row: -1 + min_blockunit_col: -1 + fabit: MXE2M1_plus + fwbit: MXE2M1_plus + babit: MXE2M1_plus + bwbit: MXE2M1_plus + bobit: MXE2M1_plus + epsilon: 1.0e-08 +compute_fid: true +fid: + save_path: null + ref_path: assets/data/fid/imagenet_train_256.npz + precision_recall_ref_path: assets/data/precision_recall/VIRTUAL_imagenet256.npy +compute_inception_score: true +inception_score: {} +compute_cmmd: true +cmmd: + save_path: null + ref_path: assets/data/cmmd/VIRTUAL_imagenet256.npy +verbose: false +train_dataset: latent_imagenet +latent_imagenet: + name: LatentImageNet + batch_size: 128 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32_in_1.0_256px/imagenet_256 +latent_mjhq: + name: LatentMJHQ + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32/mjhq_1024 +latent_ffhq: + name: LatentFFHQ + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32/ffhq_1024 +latent_mapillary_vistas: + name: LatentMapillaryVistas + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + data_dir: assets/data/latent/dc_ae_f32c32/mapillary_vistas_2048 +latent_multiple_channel_imagenet: + name: LatentMultipleChannelImageNet + batch_size: 32 + n_worker: 8 + drop_last: true + seed: 0 + shuffle: true + dataset_sample_ratio: null + num_channels_list: null + data_dirs: + - assets/data/latent/dc_ae_f32c32/imagenet_512 +resume: true +resume_path: null +resume_schedule: true +num_epochs: null +max_steps: 500000 +clip_grad: null +num_store_images: 64 +save_checkpoint_steps: 1000 +eval_steps: 20000 +save_eval_checkpoint_steps: 100000 +optimizer: + name: adamw + lr: 0.0001 + warmup_lr: 0.0 + weight_decay: 0.0 + no_wd_keys: [] + betas: + - 0.9 + - 0.999 +lr_scheduler: + name: constant + warmup_steps: 1000 +log: true +wandb_entity: han2024 +wandb_project: dc_ae_diffusion +ema_decay: 0.9999 +ema_warmup_steps: 2000 +eval_ema: true diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/eval_results.csv b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/eval_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..d165a3d686a12c97ca65563f158cc31cd2db0edb --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/eval_results.csv @@ -0,0 +1,14 @@ +,fid,precision,recall,inception_score_mean,inception_score_std,cmmd +step_100000_autoencoder_setting_0_cfg_1.0,22.82251239617085,0.6495599746704102,0.5740000009536743,54.4717929909383,1.3851383102860535,0.573277473449707 +step_120000_autoencoder_setting_0_cfg_1.0,23.09154068529824,0.639739990234375,0.5774999856948853,56.29451314562423,1.3199133010401618,0.6319284439086914 +step_140000_autoencoder_setting_0_cfg_1.0,20.76137721606244,0.6392399668693542,0.5882999897003174,61.48154406857825,1.6071809236386536,0.6085634231567383 +step_160000_autoencoder_setting_0_cfg_1.0,19.763616799984906,0.6626799702644348,0.5631999969482422,66.3743937296074,1.3981266954044251,0.5801916122436523 +step_180000_autoencoder_setting_0_cfg_1.0,18.79058651979068,0.6640200018882751,0.5453000068664551,68.40362149122765,1.412380260108231,0.4642009735107422 +step_200000_autoencoder_setting_0_cfg_1.0,20.954522054968265,0.653499960899353,0.5582000017166138,63.14416956965454,1.0557505979988988,0.5255937576293945 +step_20000_autoencoder_setting_0_cfg_1.0,57.72364422441791,0.4287199974060058,0.5460999608039856,19.90047475034472,0.3800645592686704,1.1063814163208008 +step_220000_autoencoder_setting_0_cfg_1.0,18.47325050523102,0.6658399701118469,0.5615000128746033,69.24973211027222,1.3201464970260688,0.4718303680419922 +step_240000_autoencoder_setting_0_cfg_1.0,21.973751038787213,0.6630799770355225,0.5539000034332275,59.74237377356919,1.562433067266785,0.5408525466918945 +step_260000_autoencoder_setting_0_cfg_1.0,17.706635191026123,0.680079996585846,0.5595999956130981,71.48858761147187,1.564728001100873,0.48232078552246094 +step_40000_autoencoder_setting_0_cfg_1.0,44.55932573190052,0.5211399793624878,0.5742999911308289,26.626259701696387,0.5180268710510688,0.9006261825561523 +step_60000_autoencoder_setting_0_cfg_1.0,36.75938892258444,0.5808199644088745,0.5612999796867371,34.614852400498975,0.9469187747487444,0.8183717727661133 +step_80000_autoencoder_setting_0_cfg_1.0,30.842797217782447,0.608299970626831,0.5734999775886536,42.12004014484396,1.0538750580332945,0.7462501525878906 diff --git a/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/log.txt b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6ad596c4d99183b3702fa15017f8150577f73fb --- /dev/null +++ b/exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/log.txt @@ -0,0 +1,1398 @@ +run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cscan not find a checkpoint, will train from scratch + Train Epoch #1: 0%| | 0/1251 [00:00