han-cai commited on
Commit
45de501
·
verified ·
1 Parent(s): 80148bd

Upload folder using huggingface_hub

Browse files
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d11a3de8376139f69c734fe556f9ca21363379b6f226ec011471d00a9975b9b
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/checkpoint_.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d11a3de8376139f69c734fe556f9ca21363379b6f226ec011471d00a9975b9b
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/config.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs
2
+ seed: 0
3
+ allow_tf32: true
4
+ timeout: null
5
+ resolution: 256
6
+ amp: bf16
7
+ cfg_scale: 1.0
8
+ evaluate_split: test
9
+ eval_dir_name: null
10
+ num_save_images: 64
11
+ save_all_images: false
12
+ save_image_format: jpg
13
+ save_images_at_all_procs: false
14
+ save_latent_samples: false
15
+ latent_samples_dir: null
16
+ evaluate_dataset: sample_class
17
+ sample_class:
18
+ name: SampleClass
19
+ batch_size: 128
20
+ n_worker: 8
21
+ drop_last: false
22
+ seed: 0
23
+ shuffle: false
24
+ num_classes: 1000
25
+ num_samples: 50000
26
+ autoencoder:
27
+ num_settings: 1
28
+ name: dc-ae-f32c32-in-1.0-256px
29
+ scaling_factor: 0.3285
30
+ latent_channels: null
31
+ autoencoder_dtype: fp32
32
+ eval_autoencoder_setting_list: null
33
+ model: fp8coat_dit
34
+ dit:
35
+ name: DiT
36
+ in_channels: 32
37
+ input_size: 8
38
+ cfg_channels: null
39
+ pretrained_path: null
40
+ pretrained_source: dc-ae
41
+ train_scheduler: SiTSampler
42
+ eval_scheduler: ODE_heun2
43
+ num_inference_steps: 30
44
+ flow_shift: 3.0
45
+ reverse_time: false
46
+ use_cads: false
47
+ cads_noise_scale: 0.1
48
+ cads_mixing_factor: 1.0
49
+ cads_tau_min: 0.2
50
+ cads_tau_max: 0.9
51
+ use_guidance_interval: false
52
+ guidance_t_min: 0.2
53
+ guidance_t_max: 0.8
54
+ count_nfe: false
55
+ patch_size: 1
56
+ hidden_size: 1152
57
+ depth: 28
58
+ num_heads: 16
59
+ mlp_ratio: 4.0
60
+ post_norm: false
61
+ class_dropout_prob: 0.1
62
+ num_classes: 1000
63
+ learn_sigma: false
64
+ unconditional: false
65
+ use_checkpoint: true
66
+ adaptive_channel: false
67
+ adaptive_channel_share_weights: true
68
+ only_load_backbone: false
69
+ freeze_backbone: false
70
+ uvit:
71
+ name: UViT
72
+ in_channels: 4
73
+ input_size: 32
74
+ cfg_channels: null
75
+ pretrained_path: null
76
+ pretrained_source: dc-ae
77
+ train_scheduler: DPM_Solver
78
+ eval_scheduler: DPM_Solver
79
+ num_inference_steps: 30
80
+ flow_shift: 3.0
81
+ reverse_time: false
82
+ use_cads: false
83
+ cads_noise_scale: 0.1
84
+ cads_mixing_factor: 1.0
85
+ cads_tau_min: 0.2
86
+ cads_tau_max: 0.9
87
+ use_guidance_interval: false
88
+ guidance_t_min: 0.2
89
+ guidance_t_max: 0.8
90
+ count_nfe: false
91
+ patch_size: 2
92
+ hidden_size: 1152
93
+ depth: 28
94
+ num_heads: 16
95
+ mlp_ratio: 4.0
96
+ mlp_time_embed: false
97
+ qkv_bias: false
98
+ act_layer: gelu
99
+ use_checkpoint: true
100
+ class_dropout_prob: 0.1
101
+ num_classes: 1000
102
+ attn_mode: null
103
+ sana_cls:
104
+ name: SanaCls
105
+ in_channels: 4
106
+ input_size: 32
107
+ cfg_channels: null
108
+ pretrained_path: null
109
+ pretrained_source: dc-ae
110
+ train_scheduler: SanaScheduler
111
+ eval_scheduler: SanaScheduler
112
+ num_inference_steps: 250
113
+ flow_shift: 3.0
114
+ reverse_time: false
115
+ use_cads: false
116
+ cads_noise_scale: 0.1
117
+ cads_mixing_factor: 1.0
118
+ cads_tau_min: 0.2
119
+ cads_tau_max: 0.9
120
+ use_guidance_interval: false
121
+ guidance_t_min: 0.2
122
+ guidance_t_max: 0.8
123
+ count_nfe: false
124
+ patch_size: 2
125
+ hidden_size: 1152
126
+ depth: 28
127
+ num_heads: 16
128
+ mlp_ratio: 4.0
129
+ post_norm: false
130
+ class_dropout_prob: 0.1
131
+ num_classes: 1000
132
+ unconditional: false
133
+ use_checkpoint: true
134
+ only_load_backbone: false
135
+ freeze_backbone: false
136
+ learn_sigma: false
137
+ usana_cls:
138
+ name: USanaCls
139
+ in_channels: 4
140
+ input_size: 32
141
+ cfg_channels: null
142
+ pretrained_path: null
143
+ pretrained_source: dc-ae
144
+ train_scheduler: DPM_Solver
145
+ eval_scheduler: DPM_Solver
146
+ num_inference_steps: 30
147
+ flow_shift: 3.0
148
+ reverse_time: false
149
+ use_cads: false
150
+ cads_noise_scale: 0.1
151
+ cads_mixing_factor: 1.0
152
+ cads_tau_min: 0.2
153
+ cads_tau_max: 0.9
154
+ use_guidance_interval: false
155
+ guidance_t_min: 0.2
156
+ guidance_t_max: 0.8
157
+ count_nfe: false
158
+ patch_size: 2
159
+ hidden_size: 1152
160
+ depth: 28
161
+ num_heads: 16
162
+ mlp_ratio: 4.0
163
+ mlp_time_embed: false
164
+ qkv_bias: false
165
+ act_layer: gelu
166
+ use_checkpoint: true
167
+ class_dropout_prob: 0.1
168
+ num_classes: 1000
169
+ num_training_steps: 1000
170
+ fp8:
171
+ name: FP8DiT
172
+ fp8coat:
173
+ name: FP8COATDiT
174
+ qchoice: linear
175
+ symm: true
176
+ row_blocksize: -1
177
+ col_blocksize: -1
178
+ linear_row_blocksize: 1
179
+ linear_col_blocksize: 32
180
+ min_blockunit_row: -1
181
+ min_blockunit_col: -1
182
+ fabit: MXE2M1_plus
183
+ fwbit: MXE2M1_plus
184
+ babit: MXE2M1_plus
185
+ bwbit: MXE2M1_plus
186
+ bobit: MXE2M1_plus
187
+ epsilon: 1.0e-08
188
+ compute_fid: true
189
+ fid:
190
+ save_path: null
191
+ ref_path: assets/data/fid/imagenet_train_256.npz
192
+ precision_recall_ref_path: assets/data/precision_recall/VIRTUAL_imagenet256.npy
193
+ compute_inception_score: true
194
+ inception_score: {}
195
+ compute_cmmd: true
196
+ cmmd:
197
+ save_path: null
198
+ ref_path: assets/data/cmmd/VIRTUAL_imagenet256.npy
199
+ verbose: false
200
+ train_dataset: latent_imagenet
201
+ latent_imagenet:
202
+ name: LatentImageNet
203
+ batch_size: 128
204
+ n_worker: 8
205
+ drop_last: true
206
+ seed: 0
207
+ shuffle: true
208
+ data_dir: assets/data/latent/dc_ae_f32c32_in_1.0_256px/imagenet_256
209
+ latent_mjhq:
210
+ name: LatentMJHQ
211
+ batch_size: 32
212
+ n_worker: 8
213
+ drop_last: true
214
+ seed: 0
215
+ shuffle: true
216
+ data_dir: assets/data/latent/dc_ae_f32c32/mjhq_1024
217
+ latent_ffhq:
218
+ name: LatentFFHQ
219
+ batch_size: 32
220
+ n_worker: 8
221
+ drop_last: true
222
+ seed: 0
223
+ shuffle: true
224
+ data_dir: assets/data/latent/dc_ae_f32c32/ffhq_1024
225
+ latent_mapillary_vistas:
226
+ name: LatentMapillaryVistas
227
+ batch_size: 32
228
+ n_worker: 8
229
+ drop_last: true
230
+ seed: 0
231
+ shuffle: true
232
+ data_dir: assets/data/latent/dc_ae_f32c32/mapillary_vistas_2048
233
+ latent_multiple_channel_imagenet:
234
+ name: LatentMultipleChannelImageNet
235
+ batch_size: 32
236
+ n_worker: 8
237
+ drop_last: true
238
+ seed: 0
239
+ shuffle: true
240
+ dataset_sample_ratio: null
241
+ num_channels_list: null
242
+ data_dirs:
243
+ - assets/data/latent/dc_ae_f32c32/imagenet_512
244
+ resume: true
245
+ resume_path: null
246
+ resume_schedule: true
247
+ num_epochs: null
248
+ max_steps: 500000
249
+ clip_grad: null
250
+ num_store_images: 64
251
+ save_checkpoint_steps: 1000
252
+ eval_steps: 20000
253
+ save_eval_checkpoint_steps: 100000
254
+ optimizer:
255
+ name: adamw
256
+ lr: 0.0001
257
+ warmup_lr: 0.0
258
+ weight_decay: 0.0
259
+ no_wd_keys: []
260
+ betas:
261
+ - 0.9
262
+ - 0.999
263
+ lr_scheduler:
264
+ name: constant
265
+ warmup_steps: 1000
266
+ log: true
267
+ wandb_entity: han2024
268
+ wandb_project: dc_ae_diffusion
269
+ ema_decay: 0.9999
270
+ ema_warmup_steps: 2000
271
+ eval_ema: true
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/eval_results.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,fid,precision,recall,inception_score_mean,inception_score_std,cmmd
2
+ step_100000_autoencoder_setting_0_cfg_1.0,22.82251239617085,0.6495599746704102,0.5740000009536743,54.4717929909383,1.3851383102860535,0.573277473449707
3
+ step_120000_autoencoder_setting_0_cfg_1.0,23.09154068529824,0.639739990234375,0.5774999856948853,56.29451314562423,1.3199133010401618,0.6319284439086914
4
+ step_140000_autoencoder_setting_0_cfg_1.0,20.76137721606244,0.6392399668693542,0.5882999897003174,61.48154406857825,1.6071809236386536,0.6085634231567383
5
+ step_160000_autoencoder_setting_0_cfg_1.0,19.763616799984906,0.6626799702644348,0.5631999969482422,66.3743937296074,1.3981266954044251,0.5801916122436523
6
+ step_180000_autoencoder_setting_0_cfg_1.0,18.79058651979068,0.6640200018882751,0.5453000068664551,68.40362149122765,1.412380260108231,0.4642009735107422
7
+ step_200000_autoencoder_setting_0_cfg_1.0,20.954522054968265,0.653499960899353,0.5582000017166138,63.14416956965454,1.0557505979988988,0.5255937576293945
8
+ step_20000_autoencoder_setting_0_cfg_1.0,57.72364422441791,0.4287199974060058,0.5460999608039856,19.90047475034472,0.3800645592686704,1.1063814163208008
9
+ step_220000_autoencoder_setting_0_cfg_1.0,18.47325050523102,0.6658399701118469,0.5615000128746033,69.24973211027222,1.3201464970260688,0.4718303680419922
10
+ step_240000_autoencoder_setting_0_cfg_1.0,21.973751038787213,0.6630799770355225,0.5539000034332275,59.74237377356919,1.562433067266785,0.5408525466918945
11
+ step_260000_autoencoder_setting_0_cfg_1.0,17.706635191026123,0.680079996585846,0.5595999956130981,71.48858761147187,1.564728001100873,0.48232078552246094
12
+ step_40000_autoencoder_setting_0_cfg_1.0,44.55932573190052,0.5211399793624878,0.5742999911308289,26.626259701696387,0.5180268710510688,0.9006261825561523
13
+ step_60000_autoencoder_setting_0_cfg_1.0,36.75938892258444,0.5808199644088745,0.5612999796867371,34.614852400498975,0.9469187747487444,0.8183717727661133
14
+ step_80000_autoencoder_setting_0_cfg_1.0,30.842797217782447,0.608299970626831,0.5734999775886536,42.12004014484396,1.0538750580332945,0.7462501525878906
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/model.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FP8COATDiT(
2
+ (x_embedder): PatchEmbed(
3
+ (proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
4
+ (norm): Identity()
5
+ )
6
+ (t_embedder): TimestepEmbedder(
7
+ (mlp): Sequential(
8
+ (0): Linear(in_features=256, out_features=1152, bias=True)
9
+ (1): SiLU()
10
+ (2): Linear(in_features=1152, out_features=1152, bias=True)
11
+ )
12
+ )
13
+ (y_embedder): LabelEmbedder(
14
+ (embedding_table): Embedding(1001, 1152)
15
+ )
16
+ (blocks): ModuleList(
17
+ (0-27): 28 x FP8COATDiTBlock(
18
+ (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
19
+ (attn): FP8COATAttention(
20
+ (qkv): QAct_Linear(in_features=1152, out_features=3456, bias=True)
21
+ (q_norm): Identity()
22
+ (k_norm): Identity()
23
+ (attn_drop): Dropout(p=0.0, inplace=False)
24
+ (proj): QAct_Linear(in_features=1152, out_features=1152, bias=True)
25
+ (proj_drop): Dropout(p=0.0, inplace=False)
26
+ (qact_q_quantize): QAct_Quantize()
27
+ (qact_k_quantize): QAct_Quantize()
28
+ )
29
+ (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
30
+ (mlp): FP8COATMlp(
31
+ (fc1): QAct_Linear(in_features=1152, out_features=4608, bias=True)
32
+ (act): GELU(approximate='tanh')
33
+ (drop1): Dropout(p=0, inplace=False)
34
+ (norm): Identity()
35
+ (fc2): QAct_Linear(in_features=4608, out_features=1152, bias=True)
36
+ (drop2): Dropout(p=0, inplace=False)
37
+ (qact_quantize_gelu): QAct_Quantize()
38
+ (qact_quantize_norm): QAct_Quantize()
39
+ )
40
+ (adaLN_modulation): Sequential(
41
+ (0): SiLU()
42
+ (1): Linear(in_features=1152, out_features=6912, bias=True)
43
+ )
44
+ (qact_quantize_norm1): QAct_Quantize()
45
+ (qact_quantize_norm2): QAct_Quantize()
46
+ )
47
+ )
48
+ (final_layer): FinalLayer(
49
+ (norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
50
+ (linear): Linear(in_features=1152, out_features=32, bias=True)
51
+ (adaLN_modulation): Sequential(
52
+ (0): SiLU()
53
+ (1): Linear(in_features=1152, out_features=2304, bias=True)
54
+ )
55
+ )
56
+ )
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/slurm.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -A nvr_elm_llm #account
3
+ #SBATCH -p grizzly,polar,polar3,polar4 #partition
4
+ #SBATCH -t 04:00:00 #wall time limit, hr:min:sec
5
+ #SBATCH -N 1 #number of nodes
6
+ #SBATCH -J train_diffusion_dit #job name
7
+ #SBATCH --array=1-40%1
8
+ #SBATCH --output=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/slurm_out/%A_%a.out
9
+ #SBATCH --gpus-per-node 8
10
+ #SBATCH --exclusive
11
+
12
+ export LOGLEVEL=INFO
13
+ export PATH="/home/hcai/workspace/anaconda3/envs/efficientvit/bin:$PATH"
14
+
15
+ cd /lustre/fs12/portfolios/nvr/users/hcai/workspace/code/fp8-dit
16
+
17
+ read -r -d '' cmd <<EOF
18
+ torchrun --nnodes=1 --nproc_per_node=8 \
19
+ -m efficientvit.diffusioncore.trainer yaml=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/config.yaml
20
+ EOF
21
+
22
+ srun bash -c "${cmd}"
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/step_100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd43ba952a645c837ac580c184650a755dbf70ae7f7e8985eed8207667132b89
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_plus_cs/step_200000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c5376cbd89dae41145a00b7a6d007763b8697b3db993d6f07f27e54eba1bb4
3
+ size 10798884178