han-cai commited on
Commit
91daab0
·
verified ·
1 Parent(s): 45de501

Upload folder using huggingface_hub

Browse files
Files changed (12) hide show
  1. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint.pt +3 -0
  2. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint_.pt +3 -0
  3. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/config.yaml +271 -0
  4. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/eval_results.csv +77 -0
  5. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/log.txt +0 -0
  6. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/model.txt +56 -0
  7. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/slurm.sh +22 -0
  8. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_100000.pt +3 -0
  9. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_200000.pt +3 -0
  10. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_300000.pt +3 -0
  11. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_400000.pt +3 -0
  12. exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_500000.pt +3 -0
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b88b20fa7a404ff2ebcb1dae8300a4ca50433c19b3d02101b8f5ff86108ece
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint_.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b88b20fa7a404ff2ebcb1dae8300a4ca50433c19b3d02101b8f5ff86108ece
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/config.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs
2
+ seed: 0
3
+ allow_tf32: true
4
+ timeout: null
5
+ resolution: 256
6
+ amp: bf16
7
+ cfg_scale: 1.0
8
+ evaluate_split: test
9
+ eval_dir_name: null
10
+ num_save_images: 64
11
+ save_all_images: false
12
+ save_image_format: jpg
13
+ save_images_at_all_procs: false
14
+ save_latent_samples: false
15
+ latent_samples_dir: null
16
+ evaluate_dataset: sample_class
17
+ sample_class:
18
+ name: SampleClass
19
+ batch_size: 128
20
+ n_worker: 8
21
+ drop_last: false
22
+ seed: 0
23
+ shuffle: false
24
+ num_classes: 1000
25
+ num_samples: 50000
26
+ autoencoder:
27
+ num_settings: 1
28
+ name: dc-ae-f32c32-in-1.0-256px
29
+ scaling_factor: 0.3285
30
+ latent_channels: null
31
+ autoencoder_dtype: fp32
32
+ eval_autoencoder_setting_list: null
33
+ model: fp8coat_dit
34
+ dit:
35
+ name: DiT
36
+ in_channels: 32
37
+ input_size: 8
38
+ cfg_channels: null
39
+ pretrained_path: null
40
+ pretrained_source: dc-ae
41
+ train_scheduler: SiTSampler
42
+ eval_scheduler: ODE_heun2
43
+ num_inference_steps: 30
44
+ flow_shift: 3.0
45
+ reverse_time: false
46
+ use_cads: false
47
+ cads_noise_scale: 0.1
48
+ cads_mixing_factor: 1.0
49
+ cads_tau_min: 0.2
50
+ cads_tau_max: 0.9
51
+ use_guidance_interval: false
52
+ guidance_t_min: 0.2
53
+ guidance_t_max: 0.8
54
+ count_nfe: false
55
+ patch_size: 1
56
+ hidden_size: 1152
57
+ depth: 28
58
+ num_heads: 16
59
+ mlp_ratio: 4.0
60
+ post_norm: false
61
+ class_dropout_prob: 0.1
62
+ num_classes: 1000
63
+ learn_sigma: false
64
+ unconditional: false
65
+ use_checkpoint: true
66
+ adaptive_channel: false
67
+ adaptive_channel_share_weights: true
68
+ only_load_backbone: false
69
+ freeze_backbone: false
70
+ uvit:
71
+ name: UViT
72
+ in_channels: 4
73
+ input_size: 32
74
+ cfg_channels: null
75
+ pretrained_path: null
76
+ pretrained_source: dc-ae
77
+ train_scheduler: DPM_Solver
78
+ eval_scheduler: DPM_Solver
79
+ num_inference_steps: 30
80
+ flow_shift: 3.0
81
+ reverse_time: false
82
+ use_cads: false
83
+ cads_noise_scale: 0.1
84
+ cads_mixing_factor: 1.0
85
+ cads_tau_min: 0.2
86
+ cads_tau_max: 0.9
87
+ use_guidance_interval: false
88
+ guidance_t_min: 0.2
89
+ guidance_t_max: 0.8
90
+ count_nfe: false
91
+ patch_size: 2
92
+ hidden_size: 1152
93
+ depth: 28
94
+ num_heads: 16
95
+ mlp_ratio: 4.0
96
+ mlp_time_embed: false
97
+ qkv_bias: false
98
+ act_layer: gelu
99
+ use_checkpoint: true
100
+ class_dropout_prob: 0.1
101
+ num_classes: 1000
102
+ attn_mode: null
103
+ sana_cls:
104
+ name: SanaCls
105
+ in_channels: 4
106
+ input_size: 32
107
+ cfg_channels: null
108
+ pretrained_path: null
109
+ pretrained_source: dc-ae
110
+ train_scheduler: SanaScheduler
111
+ eval_scheduler: SanaScheduler
112
+ num_inference_steps: 250
113
+ flow_shift: 3.0
114
+ reverse_time: false
115
+ use_cads: false
116
+ cads_noise_scale: 0.1
117
+ cads_mixing_factor: 1.0
118
+ cads_tau_min: 0.2
119
+ cads_tau_max: 0.9
120
+ use_guidance_interval: false
121
+ guidance_t_min: 0.2
122
+ guidance_t_max: 0.8
123
+ count_nfe: false
124
+ patch_size: 2
125
+ hidden_size: 1152
126
+ depth: 28
127
+ num_heads: 16
128
+ mlp_ratio: 4.0
129
+ post_norm: false
130
+ class_dropout_prob: 0.1
131
+ num_classes: 1000
132
+ unconditional: false
133
+ use_checkpoint: true
134
+ only_load_backbone: false
135
+ freeze_backbone: false
136
+ learn_sigma: false
137
+ usana_cls:
138
+ name: USanaCls
139
+ in_channels: 4
140
+ input_size: 32
141
+ cfg_channels: null
142
+ pretrained_path: null
143
+ pretrained_source: dc-ae
144
+ train_scheduler: DPM_Solver
145
+ eval_scheduler: DPM_Solver
146
+ num_inference_steps: 30
147
+ flow_shift: 3.0
148
+ reverse_time: false
149
+ use_cads: false
150
+ cads_noise_scale: 0.1
151
+ cads_mixing_factor: 1.0
152
+ cads_tau_min: 0.2
153
+ cads_tau_max: 0.9
154
+ use_guidance_interval: false
155
+ guidance_t_min: 0.2
156
+ guidance_t_max: 0.8
157
+ count_nfe: false
158
+ patch_size: 2
159
+ hidden_size: 1152
160
+ depth: 28
161
+ num_heads: 16
162
+ mlp_ratio: 4.0
163
+ mlp_time_embed: false
164
+ qkv_bias: false
165
+ act_layer: gelu
166
+ use_checkpoint: true
167
+ class_dropout_prob: 0.1
168
+ num_classes: 1000
169
+ num_training_steps: 1000
170
+ fp8:
171
+ name: FP8DiT
172
+ fp8coat:
173
+ name: FP8COATDiT
174
+ qchoice: linear
175
+ symm: true
176
+ row_blocksize: -1
177
+ col_blocksize: -1
178
+ linear_row_blocksize: 1
179
+ linear_col_blocksize: 16
180
+ min_blockunit_row: -1
181
+ min_blockunit_col: -1
182
+ fabit: NVE2M1
183
+ fwbit: NVE2M1
184
+ babit: NVE2M1
185
+ bwbit: NVE2M1
186
+ bobit: NVE2M1
187
+ epsilon: 1.0e-08
188
+ compute_fid: true
189
+ fid:
190
+ save_path: null
191
+ ref_path: assets/data/fid/imagenet_train_256.npz
192
+ precision_recall_ref_path: assets/data/precision_recall/VIRTUAL_imagenet256.npy
193
+ compute_inception_score: true
194
+ inception_score: {}
195
+ compute_cmmd: true
196
+ cmmd:
197
+ save_path: null
198
+ ref_path: assets/data/cmmd/VIRTUAL_imagenet256.npy
199
+ verbose: false
200
+ train_dataset: latent_imagenet
201
+ latent_imagenet:
202
+ name: LatentImageNet
203
+ batch_size: 128
204
+ n_worker: 8
205
+ drop_last: true
206
+ seed: 0
207
+ shuffle: true
208
+ data_dir: assets/data/latent/dc_ae_f32c32_in_1.0_256px/imagenet_256
209
+ latent_mjhq:
210
+ name: LatentMJHQ
211
+ batch_size: 32
212
+ n_worker: 8
213
+ drop_last: true
214
+ seed: 0
215
+ shuffle: true
216
+ data_dir: assets/data/latent/dc_ae_f32c32/mjhq_1024
217
+ latent_ffhq:
218
+ name: LatentFFHQ
219
+ batch_size: 32
220
+ n_worker: 8
221
+ drop_last: true
222
+ seed: 0
223
+ shuffle: true
224
+ data_dir: assets/data/latent/dc_ae_f32c32/ffhq_1024
225
+ latent_mapillary_vistas:
226
+ name: LatentMapillaryVistas
227
+ batch_size: 32
228
+ n_worker: 8
229
+ drop_last: true
230
+ seed: 0
231
+ shuffle: true
232
+ data_dir: assets/data/latent/dc_ae_f32c32/mapillary_vistas_2048
233
+ latent_multiple_channel_imagenet:
234
+ name: LatentMultipleChannelImageNet
235
+ batch_size: 32
236
+ n_worker: 8
237
+ drop_last: true
238
+ seed: 0
239
+ shuffle: true
240
+ dataset_sample_ratio: null
241
+ num_channels_list: null
242
+ data_dirs:
243
+ - assets/data/latent/dc_ae_f32c32/imagenet_512
244
+ resume: true
245
+ resume_path: null
246
+ resume_schedule: true
247
+ num_epochs: null
248
+ max_steps: 500000
249
+ clip_grad: null
250
+ num_store_images: 64
251
+ save_checkpoint_steps: 1000
252
+ eval_steps: 20000
253
+ save_eval_checkpoint_steps: 100000
254
+ optimizer:
255
+ name: adamw
256
+ lr: 0.0001
257
+ warmup_lr: 0.0
258
+ weight_decay: 0.0
259
+ no_wd_keys: []
260
+ betas:
261
+ - 0.9
262
+ - 0.999
263
+ lr_scheduler:
264
+ name: constant
265
+ warmup_steps: 1000
266
+ log: true
267
+ wandb_entity: han2024
268
+ wandb_project: dc_ae_diffusion
269
+ ema_decay: 0.9999
270
+ ema_warmup_steps: 2000
271
+ eval_ema: true
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/eval_results.csv ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,fid,precision,recall,inception_score_mean,inception_score_std,cmmd
2
+ step_100000_autoencoder_setting_0_cfg_1.0,27.82081598705525,0.580020010471344,0.6139999628067017,46.420777211047714,1.0247874181815988,0.6939172744750977
3
+ step_10000_autoencoder_setting_0_cfg_1.0,78.50917779509035,0.3088999986648559,0.4490000009536743,15.570617267902197,0.3332917058620098,1.505136489868164
4
+ step_105000_autoencoder_setting_0_cfg_1.0,27.108763303815124,0.58433997631073,0.6107999682426453,47.7493937125107,1.2485355820824466,0.6833076477050781
5
+ step_110000_autoencoder_setting_0_cfg_1.0,26.572085273128607,0.5875200033187866,0.6103000044822693,48.91806079101078,1.320462264402051,0.6742477416992188
6
+ step_115000_autoencoder_setting_0_cfg_1.0,26.04053809255868,0.5945199728012085,0.6122999787330627,50.06943499322904,1.271234794980543,0.6644725799560547
7
+ step_120000_autoencoder_setting_0_cfg_1.0,25.48026900045761,0.59961998462677,0.6194999814033508,51.20050723062443,1.5078486704815637,0.6552934646606445
8
+ step_125000_autoencoder_setting_0_cfg_1.0,25.05591886633323,0.6003400087356567,0.6232999563217163,52.10802955307636,1.581453473677152,0.649571418762207
9
+ step_130000_autoencoder_setting_0_cfg_1.0,24.5844863096371,0.6046000123023987,0.6189000010490417,52.79456366277475,1.507362587027362,0.6395578384399414
10
+ step_135000_autoencoder_setting_0_cfg_1.0,24.18458923313409,0.6058200001716614,0.620199978351593,53.62105523874786,1.4409782134398583,0.6331205368041992
11
+ step_140000_autoencoder_setting_0_cfg_1.0,23.866081545629527,0.6055200099945068,0.6236000061035156,54.368579244722106,1.4469492288369885,0.6299018859863281
12
+ step_145000_autoencoder_setting_0_cfg_1.0,23.26773026378873,0.6069599986076355,0.6215999722480774,55.51033309373391,1.387295168322456,0.6201267242431641
13
+ step_150000_autoencoder_setting_0_cfg_1.0,22.445572104073108,0.6144399642944336,0.6243999600410461,57.17583559018384,1.5491262770197771,0.6072521209716797
14
+ step_15000_autoencoder_setting_0_cfg_1.0,68.12664722088442,0.3365599811077118,0.5128999948501587,17.673999189426127,0.1972535523911879,1.3135671615600586
15
+ step_155000_autoencoder_setting_0_cfg_1.0,21.6969907496823,0.6131199598312378,0.6244999766349792,58.32637361785574,1.5920632516988755,0.5936622619628906
16
+ step_160000_autoencoder_setting_0_cfg_1.0,21.259075753956097,0.6131199598312378,0.6218000054359436,59.29787913474204,1.77534055094449,0.5881786346435547
17
+ step_165000_autoencoder_setting_0_cfg_1.0,21.028425986100444,0.6096999645233154,0.6220999956130981,60.28639353096686,1.6039359868962844,0.5855560302734375
18
+ step_170000_autoencoder_setting_0_cfg_1.0,20.78157261474053,0.6087200045585632,0.623699963092804,61.02744304547458,1.5003141917637632,0.5818605422973633
19
+ step_175000_autoencoder_setting_0_cfg_1.0,20.412292669880458,0.613319993019104,0.6223999857902527,61.96596631327077,1.7293084120916875,0.577092170715332
20
+ step_180000_autoencoder_setting_0_cfg_1.0,20.27882333039713,0.612280011177063,0.6218999624252319,62.114020147042574,1.8010197629079103,0.5730390548706055
21
+ step_185000_autoencoder_setting_0_cfg_1.0,19.89168937788316,0.615339994430542,0.6232999563217163,63.27708418568627,1.451279607657161,0.5658864974975586
22
+ step_190000_autoencoder_setting_0_cfg_1.0,19.419406988353785,0.6139999628067017,0.6232999563217163,64.56478640219703,1.471862549083792,0.5564689636230469
23
+ step_195000_autoencoder_setting_0_cfg_1.0,18.97561403631505,0.6157199740409851,0.6226999759674072,65.76082161550052,1.3146110081612372,0.5470514297485352
24
+ step_200000_autoencoder_setting_0_cfg_1.0,19.026450618121032,0.6161999702453613,0.6227999925613403,66.43766544317899,1.4349545277425972,0.5500316619873047
25
+ step_20000_autoencoder_setting_0_cfg_1.0,69.80893117916492,0.3557800054550171,0.5060999989509583,17.63329056852647,0.2319949836815072,1.3469457626342771
26
+ step_205000_autoencoder_setting_0_cfg_1.0,18.5947097582615,0.6192600131034851,0.6297999620437622,67.63410420493474,1.536225845054557,0.5457401275634766
27
+ step_210000_autoencoder_setting_0_cfg_1.0,18.233572638968496,0.6248599886894226,0.623199999332428,68.6561121314874,1.5783403374324958,0.538945198059082
28
+ step_215000_autoencoder_setting_0_cfg_1.0,17.797767795291293,0.6285399794578552,0.6223999857902527,70.42323234086862,1.5883405198437344,0.5366802215576172
29
+ step_220000_autoencoder_setting_0_cfg_1.0,17.74635629321216,0.6273399591445923,0.6311999559402466,70.2145545808094,1.5785695221108964,0.5356073379516602
30
+ step_225000_autoencoder_setting_0_cfg_1.0,17.623212029857086,0.6253600120544434,0.6233999729156494,70.10889264234,1.402678692708326,0.5316734313964844
31
+ step_230000_autoencoder_setting_0_cfg_1.0,17.56820183523979,0.6272199749946594,0.6281999945640564,70.24948808293567,1.3719698368922175,0.5235671997070312
32
+ step_235000_autoencoder_setting_0_cfg_1.0,17.140324617321994,0.6325399875640869,0.6326999664306641,71.8477116161524,1.4453070017749934,0.5209445953369141
33
+ step_240000_autoencoder_setting_0_cfg_1.0,16.875031621310654,0.6319199800491333,0.6304000020027161,72.40102219505278,1.572925419822807,0.5216598510742188
34
+ step_245000_autoencoder_setting_0_cfg_1.0,16.603363011407623,0.6376399993896484,0.6248999834060669,73.19131748156653,1.634987063086786,0.5117654800415039
35
+ step_250000_autoencoder_setting_0_cfg_1.0,16.0659137008808,0.644540011882782,0.6232999563217163,75.05675046856344,1.4611675195953364,0.4990100860595703
36
+ step_25000_autoencoder_setting_0_cfg_1.0,69.20439116845444,0.3652600049972534,0.5146999955177307,17.835205670638455,0.1921896922272648,1.3399124145507812
37
+ step_255000_autoencoder_setting_0_cfg_1.0,16.18191134397,0.643779993057251,0.6266999840736389,74.87994735992554,1.5313646958234384,0.5011558532714844
38
+ step_260000_autoencoder_setting_0_cfg_1.0,16.307349111818837,0.6428399682044983,0.6293999552726746,74.55537296593175,1.7462488184454217,0.5011558532714844
39
+ step_265000_autoencoder_setting_0_cfg_1.0,16.351515906573184,0.6398400068283081,0.6279999613761902,73.60775233144288,2.1094810857369937,0.4945993423461914
40
+ step_270000_autoencoder_setting_0_cfg_1.0,15.720775344146716,0.6462000012397766,0.6182999610900879,75.93336593862954,1.9112681424085896,0.4835128784179687
41
+ step_275000_autoencoder_setting_0_cfg_1.0,15.563540840291353,0.6510800123214722,0.6226999759674072,76.73444862274553,2.1060209810007886,0.4808902740478515
42
+ step_280000_autoencoder_setting_0_cfg_1.0,15.702402057568406,0.6554399728775024,0.6204000115394592,76.80205589593925,1.7598216362881405,0.4860162734985351
43
+ step_285000_autoencoder_setting_0_cfg_1.0,15.735781046575084,0.6553199887275696,0.6211000084877014,76.72250513048573,1.9211665275192824,0.4916191101074219
44
+ step_290000_autoencoder_setting_0_cfg_1.0,15.69120289879271,0.6527199745178223,0.6200000047683716,77.60842637374755,2.02590143858928,0.4960298538208008
45
+ step_295000_autoencoder_setting_0_cfg_1.0,15.545950959937784,0.6571399569511414,0.626800000667572,78.00591850878438,1.96422643425752,0.4917383193969726
46
+ step_300000_autoencoder_setting_0_cfg_1.0,14.454307153848449,0.6623199582099915,0.616599977016449,82.6760750956561,2.181097692838078,0.4781484603881836
47
+ step_30000_autoencoder_setting_0_cfg_1.0,71.33487248804539,0.3709399998188019,0.5120999813079834,17.564975662944967,0.2190062162059579,1.3840198516845703
48
+ step_305000_autoencoder_setting_0_cfg_1.0,14.727343043843916,0.660319983959198,0.618399977684021,81.5701813105076,1.9043765039281204,0.4681348800659179
49
+ step_310000_autoencoder_setting_0_cfg_1.0,15.338016119750534,0.6581599712371826,0.6150999665260315,78.50163318136279,1.3744470633165384,0.4767179489135742
50
+ step_315000_autoencoder_setting_0_cfg_1.0,15.670858997474229,0.6570199728012085,0.6189999580383301,77.5421772667251,1.3151557061430104,0.4895925521850586
51
+ step_320000_autoencoder_setting_0_cfg_1.0,15.351098933787853,0.6589800119400024,0.6159999966621399,78.85159621337543,1.8036081361014704,0.4880428314208984
52
+ step_325000_autoencoder_setting_0_cfg_1.0,16.137012142700883,0.6609199643135071,0.6116999983787537,77.20548121816479,1.4635409927233674,0.506281852722168
53
+ step_330000_autoencoder_setting_0_cfg_1.0,16.935113423647522,0.6579399704933167,0.6132999658584595,75.14944105179288,1.6728809851158315,0.5131959915161133
54
+ step_335000_autoencoder_setting_0_cfg_1.0,16.642815521207183,0.6565999984741211,0.6182000041007996,74.89506149880103,1.7185640977473733,0.5041360855102539
55
+ step_340000_autoencoder_setting_0_cfg_1.0,16.329822569891235,0.6578199863433838,0.6171000003814697,76.13033508421296,1.453925495743798,0.5016326904296875
56
+ step_35000_autoencoder_setting_0_cfg_1.0,68.4526505825479,0.3855199813842773,0.523099958896637,18.50040439034826,0.2102358995042317,1.3387203216552734
57
+ step_360000_autoencoder_setting_0_cfg_1.0,18.51766157845032,0.641539990901947,0.6207000017166138,68.6112735921217,1.5894359804544205,0.5263090133666992
58
+ step_380000_autoencoder_setting_0_cfg_1.0,19.047118711635164,0.6372199654579163,0.6157000064849854,67.32176540925762,1.6363590511183173,0.5387067794799805
59
+ step_400000_autoencoder_setting_0_cfg_1.0,20.46042710661544,0.6325799822807312,0.6110000014305115,64.22305233265526,1.8580822376880224,0.5658864974975586
60
+ step_40000_autoencoder_setting_0_cfg_1.0,59.98117677660253,0.4123599827289581,0.5669999718666077,20.98464259395777,0.3452826016560876,1.1975765228271484
61
+ step_420000_autoencoder_setting_0_cfg_1.0,20.048889679310776,0.6316800117492676,0.611299991607666,64.82227407909255,1.662291796280963,0.5481243133544922
62
+ step_440000_autoencoder_setting_0_cfg_1.0,21.6126295301454,0.6124599575996399,0.6274999976158142,62.33693934455978,1.8068080125059365,0.582575798034668
63
+ step_45000_autoencoder_setting_0_cfg_1.0,52.35728384337608,0.4363399744033813,0.5849999785423279,23.625650383680828,0.4867593086153808,1.0684728622436523
64
+ step_460000_autoencoder_setting_0_cfg_1.0,24.502855264928712,0.5979999899864197,0.6233999729156494,57.90086638570746,1.6518872144366603,0.6576776504516602
65
+ step_480000_autoencoder_setting_0_cfg_1.0,22.52686733943784,0.6110399961471558,0.6092999577522278,62.57142330892974,1.7219275950198052,0.6278753280639648
66
+ step_500000_autoencoder_setting_0_cfg_1.0,23.343317025502756,0.6067399978637695,0.6193000078201294,60.4235201455905,1.2737023659678393,0.6389617919921875
67
+ step_50000_autoencoder_setting_0_cfg_1.0,49.921173116398734,0.4559399783611297,0.5971999764442444,25.213578656547632,0.4281732070550088,1.0330677032470703
68
+ step_5000_autoencoder_setting_0_cfg_1.0,90.67715449183368,0.2694199979305267,0.3879999816417694,13.429481826679131,0.210033970069514,1.7157793045043943
69
+ step_55000_autoencoder_setting_0_cfg_1.0,47.73685522428235,0.4755999743938446,0.5899999737739563,26.89910198011636,0.4931733345923623,1.0018348693847656
70
+ step_60000_autoencoder_setting_0_cfg_1.0,44.80032758845715,0.4915199875831604,0.5986999869346619,28.861349883325506,0.6176201506732034,0.9595155715942384
71
+ step_65000_autoencoder_setting_0_cfg_1.0,41.53898757569908,0.5051400065422058,0.6049000024795532,31.154324157233617,0.6160978188506026,0.9080171585083008
72
+ step_70000_autoencoder_setting_0_cfg_1.0,37.99431155890278,0.5188400149345398,0.6146000027656555,33.93576625895251,0.5479828376716884,0.8559226989746094
73
+ step_75000_autoencoder_setting_0_cfg_1.0,35.3211073866222,0.5319399833679199,0.6132999658584595,36.31162383244789,0.8240562594862988,0.8103847503662109
74
+ step_80000_autoencoder_setting_0_cfg_1.0,33.006981752393926,0.5404999852180481,0.6175000071525574,38.86142600259436,0.8896361831768164,0.774383544921875
75
+ step_85000_autoencoder_setting_0_cfg_1.0,30.9545240118008,0.5546599626541138,0.6133999824523926,41.26951645107668,0.9114569673854772,0.7370710372924805
76
+ step_90000_autoencoder_setting_0_cfg_1.0,29.674156243314485,0.5648999810218811,0.6136999726295471,43.06725174936357,0.909306073199211,0.7197856903076172
77
+ step_95000_autoencoder_setting_0_cfg_1.0,28.471735321998835,0.572659969329834,0.6085000038146973,44.97528607860245,0.9621155062307708,0.7002353668212891
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/model.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FP8COATDiT(
2
+ (x_embedder): PatchEmbed(
3
+ (proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
4
+ (norm): Identity()
5
+ )
6
+ (t_embedder): TimestepEmbedder(
7
+ (mlp): Sequential(
8
+ (0): Linear(in_features=256, out_features=1152, bias=True)
9
+ (1): SiLU()
10
+ (2): Linear(in_features=1152, out_features=1152, bias=True)
11
+ )
12
+ )
13
+ (y_embedder): LabelEmbedder(
14
+ (embedding_table): Embedding(1001, 1152)
15
+ )
16
+ (blocks): ModuleList(
17
+ (0-27): 28 x FP8COATDiTBlock(
18
+ (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
19
+ (attn): FP8COATAttention(
20
+ (qkv): QAct_Linear(in_features=1152, out_features=3456, bias=True)
21
+ (q_norm): Identity()
22
+ (k_norm): Identity()
23
+ (attn_drop): Dropout(p=0.0, inplace=False)
24
+ (proj): QAct_Linear(in_features=1152, out_features=1152, bias=True)
25
+ (proj_drop): Dropout(p=0.0, inplace=False)
26
+ (qact_q_quantize): QAct_Quantize()
27
+ (qact_k_quantize): QAct_Quantize()
28
+ )
29
+ (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
30
+ (mlp): FP8COATMlp(
31
+ (fc1): QAct_Linear(in_features=1152, out_features=4608, bias=True)
32
+ (act): GELU(approximate='tanh')
33
+ (drop1): Dropout(p=0, inplace=False)
34
+ (norm): Identity()
35
+ (fc2): QAct_Linear(in_features=4608, out_features=1152, bias=True)
36
+ (drop2): Dropout(p=0, inplace=False)
37
+ (qact_quantize_gelu): QAct_Quantize()
38
+ (qact_quantize_norm): QAct_Quantize()
39
+ )
40
+ (adaLN_modulation): Sequential(
41
+ (0): SiLU()
42
+ (1): Linear(in_features=1152, out_features=6912, bias=True)
43
+ )
44
+ (qact_quantize_norm1): QAct_Quantize()
45
+ (qact_quantize_norm2): QAct_Quantize()
46
+ )
47
+ )
48
+ (final_layer): FinalLayer(
49
+ (norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
50
+ (linear): Linear(in_features=1152, out_features=32, bias=True)
51
+ (adaLN_modulation): Sequential(
52
+ (0): SiLU()
53
+ (1): Linear(in_features=1152, out_features=2304, bias=True)
54
+ )
55
+ )
56
+ )
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/slurm.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -A nvr_elm_llm #account
3
+ #SBATCH -p grizzly,polar,polar3,polar4 #partition
4
+ #SBATCH -t 04:00:00 #wall time limit, hr:min:sec
5
+ #SBATCH -N 1 #number of nodes
6
+ #SBATCH -J train_diffusion_dit #job name
7
+ #SBATCH --array=1-30%1
8
+ #SBATCH --output=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/slurm_out/%A_%a.out
9
+ #SBATCH --gpus-per-node 8
10
+ #SBATCH --exclusive
11
+
12
+ export LOGLEVEL=INFO
13
+ export PATH="/home/hcai/workspace/anaconda3/envs/efficientvit/bin:$PATH"
14
+
15
+ cd /lustre/fs12/portfolios/nvr/users/hcai/workspace/code/fp8-dit
16
+
17
+ read -r -d '' cmd <<EOF
18
+ torchrun --nnodes=1 --nproc_per_node=8 \
19
+ -m efficientvit.diffusioncore.trainer yaml=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/config.yaml
20
+ EOF
21
+
22
+ srun bash -c "${cmd}"
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e93e161bc858fecdac014ca25bd5c06721b2b561375aa3a3dcc8c1f990a349
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_200000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef4dbcf4e31575f3e05c8413b7ab0b60e66680d1b8e48b446ae3fa9fc2a93e0
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_300000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141dc8d80d0907cc16f2d3786fa475ce7a823d263208636e92ba00eeb05c5d2c
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_400000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4c0ae8c74d2d361bd0e4a4a1570ee2cd0d8688a26a07067e5ccbde17cdd431
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_500000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11fe39b36b6414f011fa2cabf7e212f1e9f0cbb5ae631a7896b11a0f20cbade0
3
+ size 10798884178