han-cai commited on
Commit
80148bd
·
verified ·
1 Parent(s): bb5aa7c

Upload folder using huggingface_hub

Browse files
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ac9c7fbb85c3cb5c319220b4824a5aa2722ae71c4b0c0a9f7e3e6b58345a7d
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/checkpoint_.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ac9c7fbb85c3cb5c319220b4824a5aa2722ae71c4b0c0a9f7e3e6b58345a7d
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/config.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs
2
+ seed: 0
3
+ allow_tf32: true
4
+ timeout: null
5
+ resolution: 256
6
+ amp: bf16
7
+ cfg_scale: 1.0
8
+ evaluate_split: test
9
+ eval_dir_name: null
10
+ num_save_images: 64
11
+ save_all_images: false
12
+ save_image_format: jpg
13
+ save_images_at_all_procs: false
14
+ save_latent_samples: false
15
+ latent_samples_dir: null
16
+ evaluate_dataset: sample_class
17
+ sample_class:
18
+ name: SampleClass
19
+ batch_size: 128
20
+ n_worker: 8
21
+ drop_last: false
22
+ seed: 0
23
+ shuffle: false
24
+ num_classes: 1000
25
+ num_samples: 50000
26
+ autoencoder:
27
+ num_settings: 1
28
+ name: dc-ae-f32c32-in-1.0-256px
29
+ scaling_factor: 0.3285
30
+ latent_channels: null
31
+ autoencoder_dtype: fp32
32
+ eval_autoencoder_setting_list: null
33
+ model: fp8coat_dit
34
+ dit:
35
+ name: DiT
36
+ in_channels: 32
37
+ input_size: 8
38
+ cfg_channels: null
39
+ pretrained_path: null
40
+ pretrained_source: dc-ae
41
+ train_scheduler: SiTSampler
42
+ eval_scheduler: ODE_heun2
43
+ num_inference_steps: 30
44
+ flow_shift: 3.0
45
+ reverse_time: false
46
+ use_cads: false
47
+ cads_noise_scale: 0.1
48
+ cads_mixing_factor: 1.0
49
+ cads_tau_min: 0.2
50
+ cads_tau_max: 0.9
51
+ use_guidance_interval: false
52
+ guidance_t_min: 0.2
53
+ guidance_t_max: 0.8
54
+ count_nfe: false
55
+ patch_size: 1
56
+ hidden_size: 1152
57
+ depth: 28
58
+ num_heads: 16
59
+ mlp_ratio: 4.0
60
+ post_norm: false
61
+ class_dropout_prob: 0.1
62
+ num_classes: 1000
63
+ learn_sigma: false
64
+ unconditional: false
65
+ use_checkpoint: true
66
+ adaptive_channel: false
67
+ adaptive_channel_share_weights: true
68
+ only_load_backbone: false
69
+ freeze_backbone: false
70
+ uvit:
71
+ name: UViT
72
+ in_channels: 4
73
+ input_size: 32
74
+ cfg_channels: null
75
+ pretrained_path: null
76
+ pretrained_source: dc-ae
77
+ train_scheduler: DPM_Solver
78
+ eval_scheduler: DPM_Solver
79
+ num_inference_steps: 30
80
+ flow_shift: 3.0
81
+ reverse_time: false
82
+ use_cads: false
83
+ cads_noise_scale: 0.1
84
+ cads_mixing_factor: 1.0
85
+ cads_tau_min: 0.2
86
+ cads_tau_max: 0.9
87
+ use_guidance_interval: false
88
+ guidance_t_min: 0.2
89
+ guidance_t_max: 0.8
90
+ count_nfe: false
91
+ patch_size: 2
92
+ hidden_size: 1152
93
+ depth: 28
94
+ num_heads: 16
95
+ mlp_ratio: 4.0
96
+ mlp_time_embed: false
97
+ qkv_bias: false
98
+ act_layer: gelu
99
+ use_checkpoint: true
100
+ class_dropout_prob: 0.1
101
+ num_classes: 1000
102
+ attn_mode: null
103
+ sana_cls:
104
+ name: SanaCls
105
+ in_channels: 4
106
+ input_size: 32
107
+ cfg_channels: null
108
+ pretrained_path: null
109
+ pretrained_source: dc-ae
110
+ train_scheduler: SanaScheduler
111
+ eval_scheduler: SanaScheduler
112
+ num_inference_steps: 250
113
+ flow_shift: 3.0
114
+ reverse_time: false
115
+ use_cads: false
116
+ cads_noise_scale: 0.1
117
+ cads_mixing_factor: 1.0
118
+ cads_tau_min: 0.2
119
+ cads_tau_max: 0.9
120
+ use_guidance_interval: false
121
+ guidance_t_min: 0.2
122
+ guidance_t_max: 0.8
123
+ count_nfe: false
124
+ patch_size: 2
125
+ hidden_size: 1152
126
+ depth: 28
127
+ num_heads: 16
128
+ mlp_ratio: 4.0
129
+ post_norm: false
130
+ class_dropout_prob: 0.1
131
+ num_classes: 1000
132
+ unconditional: false
133
+ use_checkpoint: true
134
+ only_load_backbone: false
135
+ freeze_backbone: false
136
+ learn_sigma: false
137
+ usana_cls:
138
+ name: USanaCls
139
+ in_channels: 4
140
+ input_size: 32
141
+ cfg_channels: null
142
+ pretrained_path: null
143
+ pretrained_source: dc-ae
144
+ train_scheduler: DPM_Solver
145
+ eval_scheduler: DPM_Solver
146
+ num_inference_steps: 30
147
+ flow_shift: 3.0
148
+ reverse_time: false
149
+ use_cads: false
150
+ cads_noise_scale: 0.1
151
+ cads_mixing_factor: 1.0
152
+ cads_tau_min: 0.2
153
+ cads_tau_max: 0.9
154
+ use_guidance_interval: false
155
+ guidance_t_min: 0.2
156
+ guidance_t_max: 0.8
157
+ count_nfe: false
158
+ patch_size: 2
159
+ hidden_size: 1152
160
+ depth: 28
161
+ num_heads: 16
162
+ mlp_ratio: 4.0
163
+ mlp_time_embed: false
164
+ qkv_bias: false
165
+ act_layer: gelu
166
+ use_checkpoint: true
167
+ class_dropout_prob: 0.1
168
+ num_classes: 1000
169
+ num_training_steps: 1000
170
+ fp8:
171
+ name: FP8DiT
172
+ fp8coat:
173
+ name: FP8COATDiT
174
+ qchoice: linear
175
+ symm: true
176
+ row_blocksize: -1
177
+ col_blocksize: -1
178
+ linear_row_blocksize: 1
179
+ linear_col_blocksize: 32
180
+ min_blockunit_row: -1
181
+ min_blockunit_col: -1
182
+ fabit: MXE2M1
183
+ fwbit: MXE2M1
184
+ babit: MXE2M1
185
+ bwbit: MXE2M1
186
+ bobit: MXE2M1
187
+ epsilon: 1.0e-08
188
+ compute_fid: true
189
+ fid:
190
+ save_path: null
191
+ ref_path: assets/data/fid/imagenet_train_256.npz
192
+ precision_recall_ref_path: assets/data/precision_recall/VIRTUAL_imagenet256.npy
193
+ compute_inception_score: true
194
+ inception_score: {}
195
+ compute_cmmd: true
196
+ cmmd:
197
+ save_path: null
198
+ ref_path: assets/data/cmmd/VIRTUAL_imagenet256.npy
199
+ verbose: false
200
+ train_dataset: latent_imagenet
201
+ latent_imagenet:
202
+ name: LatentImageNet
203
+ batch_size: 128
204
+ n_worker: 8
205
+ drop_last: true
206
+ seed: 0
207
+ shuffle: true
208
+ data_dir: assets/data/latent/dc_ae_f32c32_in_1.0_256px/imagenet_256
209
+ latent_mjhq:
210
+ name: LatentMJHQ
211
+ batch_size: 32
212
+ n_worker: 8
213
+ drop_last: true
214
+ seed: 0
215
+ shuffle: true
216
+ data_dir: assets/data/latent/dc_ae_f32c32/mjhq_1024
217
+ latent_ffhq:
218
+ name: LatentFFHQ
219
+ batch_size: 32
220
+ n_worker: 8
221
+ drop_last: true
222
+ seed: 0
223
+ shuffle: true
224
+ data_dir: assets/data/latent/dc_ae_f32c32/ffhq_1024
225
+ latent_mapillary_vistas:
226
+ name: LatentMapillaryVistas
227
+ batch_size: 32
228
+ n_worker: 8
229
+ drop_last: true
230
+ seed: 0
231
+ shuffle: true
232
+ data_dir: assets/data/latent/dc_ae_f32c32/mapillary_vistas_2048
233
+ latent_multiple_channel_imagenet:
234
+ name: LatentMultipleChannelImageNet
235
+ batch_size: 32
236
+ n_worker: 8
237
+ drop_last: true
238
+ seed: 0
239
+ shuffle: true
240
+ dataset_sample_ratio: null
241
+ num_channels_list: null
242
+ data_dirs:
243
+ - assets/data/latent/dc_ae_f32c32/imagenet_512
244
+ resume: true
245
+ resume_path: null
246
+ resume_schedule: true
247
+ num_epochs: null
248
+ max_steps: 500000
249
+ clip_grad: null
250
+ num_store_images: 64
251
+ save_checkpoint_steps: 1000
252
+ eval_steps: 5000
253
+ save_eval_checkpoint_steps: 100000
254
+ optimizer:
255
+ name: adamw
256
+ lr: 0.0001
257
+ warmup_lr: 0.0
258
+ weight_decay: 0.0
259
+ no_wd_keys: []
260
+ betas:
261
+ - 0.9
262
+ - 0.999
263
+ lr_scheduler:
264
+ name: constant
265
+ warmup_steps: 1000
266
+ log: true
267
+ wandb_entity: han2024
268
+ wandb_project: dc_ae_diffusion
269
+ ema_decay: 0.9999
270
+ ema_warmup_steps: 2000
271
+ eval_ema: true
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/eval_results.csv ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,fid,precision,recall,inception_score_mean,inception_score_std,cmmd
2
+ step_100000_autoencoder_setting_0_cfg_1.0,27.46606584701101,0.61735999584198,0.5893999934196472,46.54857947447967,1.3332273204939689,0.685572624206543
3
+ step_10000_autoencoder_setting_0_cfg_1.0,77.05727446679134,0.3295599818229675,0.4676999747753143,15.565048888106762,0.268597983349972,1.496434211730957
4
+ step_105000_autoencoder_setting_0_cfg_1.0,27.715464220831564,0.6100999712944031,0.5995999574661255,45.302445631410805,1.2576683673143063,0.690460205078125
5
+ step_110000_autoencoder_setting_0_cfg_1.0,27.141188774822467,0.6067799925804138,0.6047999858856201,45.71671888649294,1.074692222450376,0.6685256958007812
6
+ step_115000_autoencoder_setting_0_cfg_1.0,29.350415158731263,0.5864599943161011,0.5963999629020691,43.579235183056014,1.2193228974808052,0.7292032241821289
7
+ step_120000_autoencoder_setting_0_cfg_1.0,27.38260828210184,0.6115800142288208,0.598800003528595,47.638758831764754,1.2527766028282552,0.7010698318481445
8
+ step_125000_autoencoder_setting_0_cfg_1.0,83.55567058324493,0.2736199796199798,0.4508000016212463,18.344966937487577,0.389236454215938,1.780986785888672
9
+ step_130000_autoencoder_setting_0_cfg_1.0,114.60912176227862,0.1869200021028518,0.2345999926328659,13.028534859800804,0.23888282689829,2.300500869750977
10
+ step_135000_autoencoder_setting_0_cfg_1.0,97.17474583044005,0.2395999878644943,0.3242000043392181,16.750898003550397,0.2424382515054995,1.890420913696289
11
+ step_140000_autoencoder_setting_0_cfg_1.0,62.01676963957283,0.3773399889469147,0.5047999620437622,24.835525419780133,0.5307866202988152,1.2725591659545898
12
+ step_145000_autoencoder_setting_0_cfg_1.0,44.22959053359051,0.4382599890232086,0.5212999582290649,35.67310203500692,0.5913120500560604,1.043081283569336
13
+ step_150000_autoencoder_setting_0_cfg_1.0,32.55213508177684,0.537559986114502,0.5720999836921692,40.40690785039882,1.132214501737283,0.7479190826416016
14
+ step_15000_autoencoder_setting_0_cfg_1.0,64.88207396149625,0.3858200013637543,0.5281000137329102,18.014370533263538,0.3676093952837374,1.2459754943847656
15
+ step_155000_autoencoder_setting_0_cfg_1.0,40.266173895939176,0.5103999972343445,0.585599958896637,28.388441572318595,0.4970437606726969,0.7547140121459961
16
+ step_160000_autoencoder_setting_0_cfg_1.0,36.156868500368944,0.5500999689102173,0.5920000076293945,33.231766280420786,0.6152553127261183,0.6874799728393555
17
+ step_165000_autoencoder_setting_0_cfg_1.0,38.89370050754872,0.5418199896812439,0.5845000147819519,30.68981609633452,0.4129188269920643,0.6977319717407227
18
+ step_170000_autoencoder_setting_0_cfg_1.0,30.694090357101345,0.5943799614906311,0.5412999987602234,39.55446709901744,0.745065321938258,0.5748271942138672
19
+ step_175000_autoencoder_setting_0_cfg_1.0,37.083586034121424,0.5604400038719177,0.4668999910354614,34.84011334894741,0.8153509818257131,0.64849853515625
20
+ step_180000_autoencoder_setting_0_cfg_1.0,27.748739835219794,0.6278799772262573,0.5295000076293945,45.49423802870099,1.0167031906161783,0.5700588226318359
21
+ step_185000_autoencoder_setting_0_cfg_1.0,25.23685241176776,0.6418399810791016,0.5458999872207642,49.39908635955667,0.8054656414757482,0.5505084991455078
22
+ step_190000_autoencoder_setting_0_cfg_1.0,23.13110201175607,0.6460599899291992,0.5361999869346619,55.5172910005238,1.4440970382251597,0.540614128112793
23
+ step_195000_autoencoder_setting_0_cfg_1.0,23.538359742343346,0.6541399955749512,0.5478999614715576,56.7474705741029,1.2274480853058574,0.598907470703125
24
+ step_200000_autoencoder_setting_0_cfg_1.0,23.92483618921034,0.6505599617958069,0.561199963092804,54.45099339865616,1.093298317638356,0.5929470062255859
25
+ step_20000_autoencoder_setting_0_cfg_1.0,58.60705841394633,0.4182599782943725,0.5557000041007996,19.6572384893356,0.3906942602501593,1.1314153671264648
26
+ step_205000_autoencoder_setting_0_cfg_1.0,26.66331853579948,0.6324999928474426,0.5630999803543091,48.94374481668279,0.7540630131991589,0.6198883056640625
27
+ step_210000_autoencoder_setting_0_cfg_1.0,27.182584950619287,0.6284199953079224,0.5674999952316284,47.74365201665509,0.8291473491519269,0.6200075149536133
28
+ step_215000_autoencoder_setting_0_cfg_1.0,27.06325435066248,0.6215599775314331,0.5681999921798706,47.31741172494515,1.0237269085835967,0.6133317947387695
29
+ step_220000_autoencoder_setting_0_cfg_1.0,27.20068400837704,0.6174600124359131,0.5679000020027161,46.26930037428583,0.9438617752214088,0.5998611450195312
30
+ step_225000_autoencoder_setting_0_cfg_1.0,30.37256821875951,0.5977399945259094,0.5534999966621399,41.23860861317935,0.7901778911911452,0.6161928176879883
31
+ step_230000_autoencoder_setting_0_cfg_1.0,34.054256909763865,0.5787599682807922,0.5667999982833862,37.56948925905199,0.855416136230482,0.6885528564453125
32
+ step_235000_autoencoder_setting_0_cfg_1.0,37.78871189972301,0.5626999735832214,0.5475000143051147,36.7783688249036,0.8235595865583547,0.7672309875488281
33
+ step_240000_autoencoder_setting_0_cfg_1.0,34.93812927402263,0.5778799653053284,0.5565999746322632,39.86825228223786,0.7340756105042252,0.7361173629760742
34
+ step_245000_autoencoder_setting_0_cfg_1.0,36.087229732776905,0.5704999566078186,0.5575000047683716,38.57220970973545,0.914182454177906,0.7581710815429688
35
+ step_250000_autoencoder_setting_0_cfg_1.0,45.18380360504688,0.5283399820327759,0.5587999820709229,31.677529143387662,0.8310292837008046,0.9241104125976562
36
+ step_25000_autoencoder_setting_0_cfg_1.0,54.78462564683889,0.4369199872016907,0.560699999332428,20.89925753264752,0.4969876692833168,1.0616779327392578
37
+ step_255000_autoencoder_setting_0_cfg_1.0,45.76596311543864,0.5237799882888794,0.5626999735832214,31.552457359973697,0.7716041779496106,0.9648799896240234
38
+ step_260000_autoencoder_setting_0_cfg_1.0,47.279312671421735,0.5155199766159058,0.5676000118255615,30.325805138480234,0.6582780217704692,0.9881258010864258
39
+ step_265000_autoencoder_setting_0_cfg_1.0,44.83983435376615,0.5292800068855286,0.5715999603271484,31.390541326123003,0.8134063420420725,0.9305477142333984
40
+ step_270000_autoencoder_setting_0_cfg_1.0,38.73975484264247,0.5501999855041504,0.5708000063896179,35.52544017242904,0.8652260562194196,0.8199214935302734
41
+ step_275000_autoencoder_setting_0_cfg_1.0,38.77210438903603,0.5509999990463257,0.5715999603271484,35.44527252818729,1.0544389007409831,0.7992982864379883
42
+ step_280000_autoencoder_setting_0_cfg_1.0,33.48746168703428,0.5824999809265137,0.5663999915122986,40.810818631937806,1.2478690465740545,0.7320642471313477
43
+ step_285000_autoencoder_setting_0_cfg_1.0,36.32169918558572,0.5613799691200256,0.5709999799728394,38.53071838412784,0.913512937325494,0.8158683776855469
44
+ step_290000_autoencoder_setting_0_cfg_1.0,46.39197448834176,0.5216599702835083,0.5680999755859375,29.67758757261446,0.6341125924727178,0.912189483642578
45
+ step_295000_autoencoder_setting_0_cfg_1.0,45.04938679619204,0.5299599766731262,0.5613999962806702,30.68297905279561,0.6612615131016247,0.9065866470336914
46
+ step_300000_autoencoder_setting_0_cfg_1.0,52.62663548825503,0.5005399584770203,0.5475999712944031,27.40174913093044,0.5743170027896177,1.0752677917480469
47
+ step_30000_autoencoder_setting_0_cfg_1.0,51.11758756774248,0.4611199796199798,0.5708000063896179,22.615113190655308,0.5158982480242132,1.0036230087280271
48
+ step_305000_autoencoder_setting_0_cfg_1.0,45.67455880145883,0.5220999717712402,0.5546999573707581,30.240217515698056,0.5965084969428479,0.9334087371826172
49
+ step_310000_autoencoder_setting_0_cfg_1.0,40.7856380902565,0.531059980392456,0.5634999871253967,33.664514027515104,0.7228171352764384,0.8565187454223633
50
+ step_315000_autoencoder_setting_0_cfg_1.0,36.91062019126514,0.5459200143814087,0.5716999769210815,35.633312098853764,0.7608190318318035,0.7877349853515625
51
+ step_320000_autoencoder_setting_0_cfg_1.0,37.70445902023067,0.544439971446991,0.5766000151634216,35.30922628868343,0.6470175937942215,0.8041858673095703
52
+ step_325000_autoencoder_setting_0_cfg_1.0,40.59112778457501,0.526479959487915,0.5719000101089478,32.94323669682364,0.7779966445993756,0.8525848388671875
53
+ step_330000_autoencoder_setting_0_cfg_1.0,41.36808095788467,0.5224800109863281,0.57669997215271,31.92972678430424,0.8519911306550041,0.8611679077148438
54
+ step_335000_autoencoder_setting_0_cfg_1.0,40.70737066945162,0.5106399655342102,0.5771999955177307,31.90316849544508,0.8207527243075626,0.8548498153686523
55
+ step_340000_autoencoder_setting_0_cfg_1.0,55.28661371068199,0.4687599837779999,0.5615999698638916,24.1637554510839,0.5237266040978577,1.0663270950317383
56
+ step_35000_autoencoder_setting_0_cfg_1.0,47.86400868705658,0.4843999743461609,0.5753999948501587,24.44938966535462,0.4740879065412466,0.9522438049316406
57
+ step_40000_autoencoder_setting_0_cfg_1.0,44.9806935532348,0.5034399628639221,0.5691999793052673,26.29743248955851,0.5344097675106841,0.9119510650634766
58
+ step_45000_autoencoder_setting_0_cfg_1.0,42.565378047629565,0.5239599943161011,0.5755000114440918,28.31337645810432,0.7533079439820995,0.8807182312011719
59
+ step_50000_autoencoder_setting_0_cfg_1.0,40.306019801100376,0.5388399958610535,0.5862999558448792,30.31891718660548,0.8627970481147651,0.8524656295776367
60
+ step_5000_autoencoder_setting_0_cfg_1.0,94.56607537638756,0.2631199955940246,0.3899999856948852,12.82565688732519,0.2361612010165683,1.894235610961914
61
+ step_55000_autoencoder_setting_0_cfg_1.0,37.902322258016966,0.5573999881744385,0.5845000147819519,32.649221478044026,0.935466874893832,0.8213520050048828
62
+ step_60000_autoencoder_setting_0_cfg_1.0,35.683201608006414,0.5734800100326538,0.5792999863624573,35.13069249041864,1.0521529429123773,0.7933378219604492
63
+ step_65000_autoencoder_setting_0_cfg_1.0,33.64241122812501,0.5882200002670288,0.5812000036239624,37.57651136185181,1.069450384565616,0.7615089416503906
64
+ step_70000_autoencoder_setting_0_cfg_1.0,32.061544728339015,0.5981799960136414,0.5773999691009521,39.65159192976438,1.0475217356430864,0.7382631301879883
65
+ step_75000_autoencoder_setting_0_cfg_1.0,30.8947741839886,0.6048799753189087,0.5803999900817871,41.48548670094574,1.1469000012472177,0.7206201553344727
66
+ step_80000_autoencoder_setting_0_cfg_1.0,29.825967575364075,0.6105200052261353,0.5796999931335449,42.90583151384847,0.9982311615890564,0.7059574127197266
67
+ step_85000_autoencoder_setting_0_cfg_1.0,28.89284787860163,0.616159975528717,0.5788999795913696,44.170102925623326,1.238303746466466,0.6911754608154297
68
+ step_90000_autoencoder_setting_0_cfg_1.0,28.134619316356066,0.6207000017166138,0.5791999697685242,45.48569601232439,1.263262369188022,0.6834268569946289
69
+ step_95000_autoencoder_setting_0_cfg_1.0,27.76390992589802,0.6223999857902527,0.5803999900817871,46.33348094756814,1.1956481347018857,0.6825923919677734
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/model.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FP8COATDiT(
2
+ (x_embedder): PatchEmbed(
3
+ (proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
4
+ (norm): Identity()
5
+ )
6
+ (t_embedder): TimestepEmbedder(
7
+ (mlp): Sequential(
8
+ (0): Linear(in_features=256, out_features=1152, bias=True)
9
+ (1): SiLU()
10
+ (2): Linear(in_features=1152, out_features=1152, bias=True)
11
+ )
12
+ )
13
+ (y_embedder): LabelEmbedder(
14
+ (embedding_table): Embedding(1001, 1152)
15
+ )
16
+ (blocks): ModuleList(
17
+ (0-27): 28 x FP8COATDiTBlock(
18
+ (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
19
+ (attn): FP8COATAttention(
20
+ (qkv): QAct_Linear(in_features=1152, out_features=3456, bias=True)
21
+ (q_norm): Identity()
22
+ (k_norm): Identity()
23
+ (attn_drop): Dropout(p=0.0, inplace=False)
24
+ (proj): QAct_Linear(in_features=1152, out_features=1152, bias=True)
25
+ (proj_drop): Dropout(p=0.0, inplace=False)
26
+ (qact_q_quantize): QAct_Quantize()
27
+ (qact_k_quantize): QAct_Quantize()
28
+ )
29
+ (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
30
+ (mlp): FP8COATMlp(
31
+ (fc1): QAct_Linear(in_features=1152, out_features=4608, bias=True)
32
+ (act): GELU(approximate='tanh')
33
+ (drop1): Dropout(p=0, inplace=False)
34
+ (norm): Identity()
35
+ (fc2): QAct_Linear(in_features=4608, out_features=1152, bias=True)
36
+ (drop2): Dropout(p=0, inplace=False)
37
+ (qact_quantize_gelu): QAct_Quantize()
38
+ (qact_quantize_norm): QAct_Quantize()
39
+ )
40
+ (adaLN_modulation): Sequential(
41
+ (0): SiLU()
42
+ (1): Linear(in_features=1152, out_features=6912, bias=True)
43
+ )
44
+ (qact_quantize_norm1): QAct_Quantize()
45
+ (qact_quantize_norm2): QAct_Quantize()
46
+ )
47
+ )
48
+ (final_layer): FinalLayer(
49
+ (norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
50
+ (linear): Linear(in_features=1152, out_features=32, bias=True)
51
+ (adaLN_modulation): Sequential(
52
+ (0): SiLU()
53
+ (1): Linear(in_features=1152, out_features=2304, bias=True)
54
+ )
55
+ )
56
+ )
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/slurm.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -A nvr_elm_llm #account
3
+ #SBATCH -p grizzly,polar,polar3,polar4 #partition
4
+ #SBATCH -t 04:00:00 #wall time limit, hr:min:sec
5
+ #SBATCH -N 1 #number of nodes
6
+ #SBATCH -J train_diffusion_dit #job name
7
+ #SBATCH --array=1-30%1
8
+ #SBATCH --output=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/slurm_out/%A_%a.out
9
+ #SBATCH --gpus-per-node 8
10
+ #SBATCH --exclusive
11
+
12
+ export LOGLEVEL=INFO
13
+ export PATH="/home/hcai/workspace/anaconda3/envs/efficientvit/bin:$PATH"
14
+
15
+ cd /lustre/fs12/portfolios/nvr/users/hcai/workspace/code/fp8-dit
16
+
17
+ read -r -d '' cmd <<EOF
18
+ torchrun --nnodes=1 --nproc_per_node=8 \
19
+ -m efficientvit.diffusioncore.trainer yaml=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/config.yaml
20
+ EOF
21
+
22
+ srun bash -c "${cmd}"
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/step_100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9007db4f24c3816618b1d3d700296016608d13f5894be542c5d6c6a57c631482
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/step_200000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c5513b9e354f76d96d634f251170143fe849e8287b640e3e7bc365899a67ac
3
+ size 10798884178
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/mxfp4_e2m1_cs/step_300000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98755caee05af2ded160178bb7417f63ae883b6495f962d18eed85303f9a9ccc
3
+ size 10798884178