Upload folder using huggingface_hub
Browse files- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint.pt +3 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint_.pt +3 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/config.yaml +271 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/eval_results.csv +77 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/log.txt +0 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/model.txt +56 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/slurm.sh +22 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_100000.pt +3 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_200000.pt +3 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_300000.pt +3 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_400000.pt +3 -0
- exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_500000.pt +3 -0
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02b88b20fa7a404ff2ebcb1dae8300a4ca50433c19b3d02101b8f5ff86108ece
|
3 |
+
size 10798884178
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/checkpoint_.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02b88b20fa7a404ff2ebcb1dae8300a4ca50433c19b3d02101b8f5ff86108ece
|
3 |
+
size 10798884178
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/config.yaml
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
run_dir: exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs
|
2 |
+
seed: 0
|
3 |
+
allow_tf32: true
|
4 |
+
timeout: null
|
5 |
+
resolution: 256
|
6 |
+
amp: bf16
|
7 |
+
cfg_scale: 1.0
|
8 |
+
evaluate_split: test
|
9 |
+
eval_dir_name: null
|
10 |
+
num_save_images: 64
|
11 |
+
save_all_images: false
|
12 |
+
save_image_format: jpg
|
13 |
+
save_images_at_all_procs: false
|
14 |
+
save_latent_samples: false
|
15 |
+
latent_samples_dir: null
|
16 |
+
evaluate_dataset: sample_class
|
17 |
+
sample_class:
|
18 |
+
name: SampleClass
|
19 |
+
batch_size: 128
|
20 |
+
n_worker: 8
|
21 |
+
drop_last: false
|
22 |
+
seed: 0
|
23 |
+
shuffle: false
|
24 |
+
num_classes: 1000
|
25 |
+
num_samples: 50000
|
26 |
+
autoencoder:
|
27 |
+
num_settings: 1
|
28 |
+
name: dc-ae-f32c32-in-1.0-256px
|
29 |
+
scaling_factor: 0.3285
|
30 |
+
latent_channels: null
|
31 |
+
autoencoder_dtype: fp32
|
32 |
+
eval_autoencoder_setting_list: null
|
33 |
+
model: fp8coat_dit
|
34 |
+
dit:
|
35 |
+
name: DiT
|
36 |
+
in_channels: 32
|
37 |
+
input_size: 8
|
38 |
+
cfg_channels: null
|
39 |
+
pretrained_path: null
|
40 |
+
pretrained_source: dc-ae
|
41 |
+
train_scheduler: SiTSampler
|
42 |
+
eval_scheduler: ODE_heun2
|
43 |
+
num_inference_steps: 30
|
44 |
+
flow_shift: 3.0
|
45 |
+
reverse_time: false
|
46 |
+
use_cads: false
|
47 |
+
cads_noise_scale: 0.1
|
48 |
+
cads_mixing_factor: 1.0
|
49 |
+
cads_tau_min: 0.2
|
50 |
+
cads_tau_max: 0.9
|
51 |
+
use_guidance_interval: false
|
52 |
+
guidance_t_min: 0.2
|
53 |
+
guidance_t_max: 0.8
|
54 |
+
count_nfe: false
|
55 |
+
patch_size: 1
|
56 |
+
hidden_size: 1152
|
57 |
+
depth: 28
|
58 |
+
num_heads: 16
|
59 |
+
mlp_ratio: 4.0
|
60 |
+
post_norm: false
|
61 |
+
class_dropout_prob: 0.1
|
62 |
+
num_classes: 1000
|
63 |
+
learn_sigma: false
|
64 |
+
unconditional: false
|
65 |
+
use_checkpoint: true
|
66 |
+
adaptive_channel: false
|
67 |
+
adaptive_channel_share_weights: true
|
68 |
+
only_load_backbone: false
|
69 |
+
freeze_backbone: false
|
70 |
+
uvit:
|
71 |
+
name: UViT
|
72 |
+
in_channels: 4
|
73 |
+
input_size: 32
|
74 |
+
cfg_channels: null
|
75 |
+
pretrained_path: null
|
76 |
+
pretrained_source: dc-ae
|
77 |
+
train_scheduler: DPM_Solver
|
78 |
+
eval_scheduler: DPM_Solver
|
79 |
+
num_inference_steps: 30
|
80 |
+
flow_shift: 3.0
|
81 |
+
reverse_time: false
|
82 |
+
use_cads: false
|
83 |
+
cads_noise_scale: 0.1
|
84 |
+
cads_mixing_factor: 1.0
|
85 |
+
cads_tau_min: 0.2
|
86 |
+
cads_tau_max: 0.9
|
87 |
+
use_guidance_interval: false
|
88 |
+
guidance_t_min: 0.2
|
89 |
+
guidance_t_max: 0.8
|
90 |
+
count_nfe: false
|
91 |
+
patch_size: 2
|
92 |
+
hidden_size: 1152
|
93 |
+
depth: 28
|
94 |
+
num_heads: 16
|
95 |
+
mlp_ratio: 4.0
|
96 |
+
mlp_time_embed: false
|
97 |
+
qkv_bias: false
|
98 |
+
act_layer: gelu
|
99 |
+
use_checkpoint: true
|
100 |
+
class_dropout_prob: 0.1
|
101 |
+
num_classes: 1000
|
102 |
+
attn_mode: null
|
103 |
+
sana_cls:
|
104 |
+
name: SanaCls
|
105 |
+
in_channels: 4
|
106 |
+
input_size: 32
|
107 |
+
cfg_channels: null
|
108 |
+
pretrained_path: null
|
109 |
+
pretrained_source: dc-ae
|
110 |
+
train_scheduler: SanaScheduler
|
111 |
+
eval_scheduler: SanaScheduler
|
112 |
+
num_inference_steps: 250
|
113 |
+
flow_shift: 3.0
|
114 |
+
reverse_time: false
|
115 |
+
use_cads: false
|
116 |
+
cads_noise_scale: 0.1
|
117 |
+
cads_mixing_factor: 1.0
|
118 |
+
cads_tau_min: 0.2
|
119 |
+
cads_tau_max: 0.9
|
120 |
+
use_guidance_interval: false
|
121 |
+
guidance_t_min: 0.2
|
122 |
+
guidance_t_max: 0.8
|
123 |
+
count_nfe: false
|
124 |
+
patch_size: 2
|
125 |
+
hidden_size: 1152
|
126 |
+
depth: 28
|
127 |
+
num_heads: 16
|
128 |
+
mlp_ratio: 4.0
|
129 |
+
post_norm: false
|
130 |
+
class_dropout_prob: 0.1
|
131 |
+
num_classes: 1000
|
132 |
+
unconditional: false
|
133 |
+
use_checkpoint: true
|
134 |
+
only_load_backbone: false
|
135 |
+
freeze_backbone: false
|
136 |
+
learn_sigma: false
|
137 |
+
usana_cls:
|
138 |
+
name: USanaCls
|
139 |
+
in_channels: 4
|
140 |
+
input_size: 32
|
141 |
+
cfg_channels: null
|
142 |
+
pretrained_path: null
|
143 |
+
pretrained_source: dc-ae
|
144 |
+
train_scheduler: DPM_Solver
|
145 |
+
eval_scheduler: DPM_Solver
|
146 |
+
num_inference_steps: 30
|
147 |
+
flow_shift: 3.0
|
148 |
+
reverse_time: false
|
149 |
+
use_cads: false
|
150 |
+
cads_noise_scale: 0.1
|
151 |
+
cads_mixing_factor: 1.0
|
152 |
+
cads_tau_min: 0.2
|
153 |
+
cads_tau_max: 0.9
|
154 |
+
use_guidance_interval: false
|
155 |
+
guidance_t_min: 0.2
|
156 |
+
guidance_t_max: 0.8
|
157 |
+
count_nfe: false
|
158 |
+
patch_size: 2
|
159 |
+
hidden_size: 1152
|
160 |
+
depth: 28
|
161 |
+
num_heads: 16
|
162 |
+
mlp_ratio: 4.0
|
163 |
+
mlp_time_embed: false
|
164 |
+
qkv_bias: false
|
165 |
+
act_layer: gelu
|
166 |
+
use_checkpoint: true
|
167 |
+
class_dropout_prob: 0.1
|
168 |
+
num_classes: 1000
|
169 |
+
num_training_steps: 1000
|
170 |
+
fp8:
|
171 |
+
name: FP8DiT
|
172 |
+
fp8coat:
|
173 |
+
name: FP8COATDiT
|
174 |
+
qchoice: linear
|
175 |
+
symm: true
|
176 |
+
row_blocksize: -1
|
177 |
+
col_blocksize: -1
|
178 |
+
linear_row_blocksize: 1
|
179 |
+
linear_col_blocksize: 16
|
180 |
+
min_blockunit_row: -1
|
181 |
+
min_blockunit_col: -1
|
182 |
+
fabit: NVE2M1
|
183 |
+
fwbit: NVE2M1
|
184 |
+
babit: NVE2M1
|
185 |
+
bwbit: NVE2M1
|
186 |
+
bobit: NVE2M1
|
187 |
+
epsilon: 1.0e-08
|
188 |
+
compute_fid: true
|
189 |
+
fid:
|
190 |
+
save_path: null
|
191 |
+
ref_path: assets/data/fid/imagenet_train_256.npz
|
192 |
+
precision_recall_ref_path: assets/data/precision_recall/VIRTUAL_imagenet256.npy
|
193 |
+
compute_inception_score: true
|
194 |
+
inception_score: {}
|
195 |
+
compute_cmmd: true
|
196 |
+
cmmd:
|
197 |
+
save_path: null
|
198 |
+
ref_path: assets/data/cmmd/VIRTUAL_imagenet256.npy
|
199 |
+
verbose: false
|
200 |
+
train_dataset: latent_imagenet
|
201 |
+
latent_imagenet:
|
202 |
+
name: LatentImageNet
|
203 |
+
batch_size: 128
|
204 |
+
n_worker: 8
|
205 |
+
drop_last: true
|
206 |
+
seed: 0
|
207 |
+
shuffle: true
|
208 |
+
data_dir: assets/data/latent/dc_ae_f32c32_in_1.0_256px/imagenet_256
|
209 |
+
latent_mjhq:
|
210 |
+
name: LatentMJHQ
|
211 |
+
batch_size: 32
|
212 |
+
n_worker: 8
|
213 |
+
drop_last: true
|
214 |
+
seed: 0
|
215 |
+
shuffle: true
|
216 |
+
data_dir: assets/data/latent/dc_ae_f32c32/mjhq_1024
|
217 |
+
latent_ffhq:
|
218 |
+
name: LatentFFHQ
|
219 |
+
batch_size: 32
|
220 |
+
n_worker: 8
|
221 |
+
drop_last: true
|
222 |
+
seed: 0
|
223 |
+
shuffle: true
|
224 |
+
data_dir: assets/data/latent/dc_ae_f32c32/ffhq_1024
|
225 |
+
latent_mapillary_vistas:
|
226 |
+
name: LatentMapillaryVistas
|
227 |
+
batch_size: 32
|
228 |
+
n_worker: 8
|
229 |
+
drop_last: true
|
230 |
+
seed: 0
|
231 |
+
shuffle: true
|
232 |
+
data_dir: assets/data/latent/dc_ae_f32c32/mapillary_vistas_2048
|
233 |
+
latent_multiple_channel_imagenet:
|
234 |
+
name: LatentMultipleChannelImageNet
|
235 |
+
batch_size: 32
|
236 |
+
n_worker: 8
|
237 |
+
drop_last: true
|
238 |
+
seed: 0
|
239 |
+
shuffle: true
|
240 |
+
dataset_sample_ratio: null
|
241 |
+
num_channels_list: null
|
242 |
+
data_dirs:
|
243 |
+
- assets/data/latent/dc_ae_f32c32/imagenet_512
|
244 |
+
resume: true
|
245 |
+
resume_path: null
|
246 |
+
resume_schedule: true
|
247 |
+
num_epochs: null
|
248 |
+
max_steps: 500000
|
249 |
+
clip_grad: null
|
250 |
+
num_store_images: 64
|
251 |
+
save_checkpoint_steps: 1000
|
252 |
+
eval_steps: 20000
|
253 |
+
save_eval_checkpoint_steps: 100000
|
254 |
+
optimizer:
|
255 |
+
name: adamw
|
256 |
+
lr: 0.0001
|
257 |
+
warmup_lr: 0.0
|
258 |
+
weight_decay: 0.0
|
259 |
+
no_wd_keys: []
|
260 |
+
betas:
|
261 |
+
- 0.9
|
262 |
+
- 0.999
|
263 |
+
lr_scheduler:
|
264 |
+
name: constant
|
265 |
+
warmup_steps: 1000
|
266 |
+
log: true
|
267 |
+
wandb_entity: han2024
|
268 |
+
wandb_project: dc_ae_diffusion
|
269 |
+
ema_decay: 0.9999
|
270 |
+
ema_warmup_steps: 2000
|
271 |
+
eval_ema: true
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/eval_results.csv
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,fid,precision,recall,inception_score_mean,inception_score_std,cmmd
|
2 |
+
step_100000_autoencoder_setting_0_cfg_1.0,27.82081598705525,0.580020010471344,0.6139999628067017,46.420777211047714,1.0247874181815988,0.6939172744750977
|
3 |
+
step_10000_autoencoder_setting_0_cfg_1.0,78.50917779509035,0.3088999986648559,0.4490000009536743,15.570617267902197,0.3332917058620098,1.505136489868164
|
4 |
+
step_105000_autoencoder_setting_0_cfg_1.0,27.108763303815124,0.58433997631073,0.6107999682426453,47.7493937125107,1.2485355820824466,0.6833076477050781
|
5 |
+
step_110000_autoencoder_setting_0_cfg_1.0,26.572085273128607,0.5875200033187866,0.6103000044822693,48.91806079101078,1.320462264402051,0.6742477416992188
|
6 |
+
step_115000_autoencoder_setting_0_cfg_1.0,26.04053809255868,0.5945199728012085,0.6122999787330627,50.06943499322904,1.271234794980543,0.6644725799560547
|
7 |
+
step_120000_autoencoder_setting_0_cfg_1.0,25.48026900045761,0.59961998462677,0.6194999814033508,51.20050723062443,1.5078486704815637,0.6552934646606445
|
8 |
+
step_125000_autoencoder_setting_0_cfg_1.0,25.05591886633323,0.6003400087356567,0.6232999563217163,52.10802955307636,1.581453473677152,0.649571418762207
|
9 |
+
step_130000_autoencoder_setting_0_cfg_1.0,24.5844863096371,0.6046000123023987,0.6189000010490417,52.79456366277475,1.507362587027362,0.6395578384399414
|
10 |
+
step_135000_autoencoder_setting_0_cfg_1.0,24.18458923313409,0.6058200001716614,0.620199978351593,53.62105523874786,1.4409782134398583,0.6331205368041992
|
11 |
+
step_140000_autoencoder_setting_0_cfg_1.0,23.866081545629527,0.6055200099945068,0.6236000061035156,54.368579244722106,1.4469492288369885,0.6299018859863281
|
12 |
+
step_145000_autoencoder_setting_0_cfg_1.0,23.26773026378873,0.6069599986076355,0.6215999722480774,55.51033309373391,1.387295168322456,0.6201267242431641
|
13 |
+
step_150000_autoencoder_setting_0_cfg_1.0,22.445572104073108,0.6144399642944336,0.6243999600410461,57.17583559018384,1.5491262770197771,0.6072521209716797
|
14 |
+
step_15000_autoencoder_setting_0_cfg_1.0,68.12664722088442,0.3365599811077118,0.5128999948501587,17.673999189426127,0.1972535523911879,1.3135671615600586
|
15 |
+
step_155000_autoencoder_setting_0_cfg_1.0,21.6969907496823,0.6131199598312378,0.6244999766349792,58.32637361785574,1.5920632516988755,0.5936622619628906
|
16 |
+
step_160000_autoencoder_setting_0_cfg_1.0,21.259075753956097,0.6131199598312378,0.6218000054359436,59.29787913474204,1.77534055094449,0.5881786346435547
|
17 |
+
step_165000_autoencoder_setting_0_cfg_1.0,21.028425986100444,0.6096999645233154,0.6220999956130981,60.28639353096686,1.6039359868962844,0.5855560302734375
|
18 |
+
step_170000_autoencoder_setting_0_cfg_1.0,20.78157261474053,0.6087200045585632,0.623699963092804,61.02744304547458,1.5003141917637632,0.5818605422973633
|
19 |
+
step_175000_autoencoder_setting_0_cfg_1.0,20.412292669880458,0.613319993019104,0.6223999857902527,61.96596631327077,1.7293084120916875,0.577092170715332
|
20 |
+
step_180000_autoencoder_setting_0_cfg_1.0,20.27882333039713,0.612280011177063,0.6218999624252319,62.114020147042574,1.8010197629079103,0.5730390548706055
|
21 |
+
step_185000_autoencoder_setting_0_cfg_1.0,19.89168937788316,0.615339994430542,0.6232999563217163,63.27708418568627,1.451279607657161,0.5658864974975586
|
22 |
+
step_190000_autoencoder_setting_0_cfg_1.0,19.419406988353785,0.6139999628067017,0.6232999563217163,64.56478640219703,1.471862549083792,0.5564689636230469
|
23 |
+
step_195000_autoencoder_setting_0_cfg_1.0,18.97561403631505,0.6157199740409851,0.6226999759674072,65.76082161550052,1.3146110081612372,0.5470514297485352
|
24 |
+
step_200000_autoencoder_setting_0_cfg_1.0,19.026450618121032,0.6161999702453613,0.6227999925613403,66.43766544317899,1.4349545277425972,0.5500316619873047
|
25 |
+
step_20000_autoencoder_setting_0_cfg_1.0,69.80893117916492,0.3557800054550171,0.5060999989509583,17.63329056852647,0.2319949836815072,1.3469457626342771
|
26 |
+
step_205000_autoencoder_setting_0_cfg_1.0,18.5947097582615,0.6192600131034851,0.6297999620437622,67.63410420493474,1.536225845054557,0.5457401275634766
|
27 |
+
step_210000_autoencoder_setting_0_cfg_1.0,18.233572638968496,0.6248599886894226,0.623199999332428,68.6561121314874,1.5783403374324958,0.538945198059082
|
28 |
+
step_215000_autoencoder_setting_0_cfg_1.0,17.797767795291293,0.6285399794578552,0.6223999857902527,70.42323234086862,1.5883405198437344,0.5366802215576172
|
29 |
+
step_220000_autoencoder_setting_0_cfg_1.0,17.74635629321216,0.6273399591445923,0.6311999559402466,70.2145545808094,1.5785695221108964,0.5356073379516602
|
30 |
+
step_225000_autoencoder_setting_0_cfg_1.0,17.623212029857086,0.6253600120544434,0.6233999729156494,70.10889264234,1.402678692708326,0.5316734313964844
|
31 |
+
step_230000_autoencoder_setting_0_cfg_1.0,17.56820183523979,0.6272199749946594,0.6281999945640564,70.24948808293567,1.3719698368922175,0.5235671997070312
|
32 |
+
step_235000_autoencoder_setting_0_cfg_1.0,17.140324617321994,0.6325399875640869,0.6326999664306641,71.8477116161524,1.4453070017749934,0.5209445953369141
|
33 |
+
step_240000_autoencoder_setting_0_cfg_1.0,16.875031621310654,0.6319199800491333,0.6304000020027161,72.40102219505278,1.572925419822807,0.5216598510742188
|
34 |
+
step_245000_autoencoder_setting_0_cfg_1.0,16.603363011407623,0.6376399993896484,0.6248999834060669,73.19131748156653,1.634987063086786,0.5117654800415039
|
35 |
+
step_250000_autoencoder_setting_0_cfg_1.0,16.0659137008808,0.644540011882782,0.6232999563217163,75.05675046856344,1.4611675195953364,0.4990100860595703
|
36 |
+
step_25000_autoencoder_setting_0_cfg_1.0,69.20439116845444,0.3652600049972534,0.5146999955177307,17.835205670638455,0.1921896922272648,1.3399124145507812
|
37 |
+
step_255000_autoencoder_setting_0_cfg_1.0,16.18191134397,0.643779993057251,0.6266999840736389,74.87994735992554,1.5313646958234384,0.5011558532714844
|
38 |
+
step_260000_autoencoder_setting_0_cfg_1.0,16.307349111818837,0.6428399682044983,0.6293999552726746,74.55537296593175,1.7462488184454217,0.5011558532714844
|
39 |
+
step_265000_autoencoder_setting_0_cfg_1.0,16.351515906573184,0.6398400068283081,0.6279999613761902,73.60775233144288,2.1094810857369937,0.4945993423461914
|
40 |
+
step_270000_autoencoder_setting_0_cfg_1.0,15.720775344146716,0.6462000012397766,0.6182999610900879,75.93336593862954,1.9112681424085896,0.4835128784179687
|
41 |
+
step_275000_autoencoder_setting_0_cfg_1.0,15.563540840291353,0.6510800123214722,0.6226999759674072,76.73444862274553,2.1060209810007886,0.4808902740478515
|
42 |
+
step_280000_autoencoder_setting_0_cfg_1.0,15.702402057568406,0.6554399728775024,0.6204000115394592,76.80205589593925,1.7598216362881405,0.4860162734985351
|
43 |
+
step_285000_autoencoder_setting_0_cfg_1.0,15.735781046575084,0.6553199887275696,0.6211000084877014,76.72250513048573,1.9211665275192824,0.4916191101074219
|
44 |
+
step_290000_autoencoder_setting_0_cfg_1.0,15.69120289879271,0.6527199745178223,0.6200000047683716,77.60842637374755,2.02590143858928,0.4960298538208008
|
45 |
+
step_295000_autoencoder_setting_0_cfg_1.0,15.545950959937784,0.6571399569511414,0.626800000667572,78.00591850878438,1.96422643425752,0.4917383193969726
|
46 |
+
step_300000_autoencoder_setting_0_cfg_1.0,14.454307153848449,0.6623199582099915,0.616599977016449,82.6760750956561,2.181097692838078,0.4781484603881836
|
47 |
+
step_30000_autoencoder_setting_0_cfg_1.0,71.33487248804539,0.3709399998188019,0.5120999813079834,17.564975662944967,0.2190062162059579,1.3840198516845703
|
48 |
+
step_305000_autoencoder_setting_0_cfg_1.0,14.727343043843916,0.660319983959198,0.618399977684021,81.5701813105076,1.9043765039281204,0.4681348800659179
|
49 |
+
step_310000_autoencoder_setting_0_cfg_1.0,15.338016119750534,0.6581599712371826,0.6150999665260315,78.50163318136279,1.3744470633165384,0.4767179489135742
|
50 |
+
step_315000_autoencoder_setting_0_cfg_1.0,15.670858997474229,0.6570199728012085,0.6189999580383301,77.5421772667251,1.3151557061430104,0.4895925521850586
|
51 |
+
step_320000_autoencoder_setting_0_cfg_1.0,15.351098933787853,0.6589800119400024,0.6159999966621399,78.85159621337543,1.8036081361014704,0.4880428314208984
|
52 |
+
step_325000_autoencoder_setting_0_cfg_1.0,16.137012142700883,0.6609199643135071,0.6116999983787537,77.20548121816479,1.4635409927233674,0.506281852722168
|
53 |
+
step_330000_autoencoder_setting_0_cfg_1.0,16.935113423647522,0.6579399704933167,0.6132999658584595,75.14944105179288,1.6728809851158315,0.5131959915161133
|
54 |
+
step_335000_autoencoder_setting_0_cfg_1.0,16.642815521207183,0.6565999984741211,0.6182000041007996,74.89506149880103,1.7185640977473733,0.5041360855102539
|
55 |
+
step_340000_autoencoder_setting_0_cfg_1.0,16.329822569891235,0.6578199863433838,0.6171000003814697,76.13033508421296,1.453925495743798,0.5016326904296875
|
56 |
+
step_35000_autoencoder_setting_0_cfg_1.0,68.4526505825479,0.3855199813842773,0.523099958896637,18.50040439034826,0.2102358995042317,1.3387203216552734
|
57 |
+
step_360000_autoencoder_setting_0_cfg_1.0,18.51766157845032,0.641539990901947,0.6207000017166138,68.6112735921217,1.5894359804544205,0.5263090133666992
|
58 |
+
step_380000_autoencoder_setting_0_cfg_1.0,19.047118711635164,0.6372199654579163,0.6157000064849854,67.32176540925762,1.6363590511183173,0.5387067794799805
|
59 |
+
step_400000_autoencoder_setting_0_cfg_1.0,20.46042710661544,0.6325799822807312,0.6110000014305115,64.22305233265526,1.8580822376880224,0.5658864974975586
|
60 |
+
step_40000_autoencoder_setting_0_cfg_1.0,59.98117677660253,0.4123599827289581,0.5669999718666077,20.98464259395777,0.3452826016560876,1.1975765228271484
|
61 |
+
step_420000_autoencoder_setting_0_cfg_1.0,20.048889679310776,0.6316800117492676,0.611299991607666,64.82227407909255,1.662291796280963,0.5481243133544922
|
62 |
+
step_440000_autoencoder_setting_0_cfg_1.0,21.6126295301454,0.6124599575996399,0.6274999976158142,62.33693934455978,1.8068080125059365,0.582575798034668
|
63 |
+
step_45000_autoencoder_setting_0_cfg_1.0,52.35728384337608,0.4363399744033813,0.5849999785423279,23.625650383680828,0.4867593086153808,1.0684728622436523
|
64 |
+
step_460000_autoencoder_setting_0_cfg_1.0,24.502855264928712,0.5979999899864197,0.6233999729156494,57.90086638570746,1.6518872144366603,0.6576776504516602
|
65 |
+
step_480000_autoencoder_setting_0_cfg_1.0,22.52686733943784,0.6110399961471558,0.6092999577522278,62.57142330892974,1.7219275950198052,0.6278753280639648
|
66 |
+
step_500000_autoencoder_setting_0_cfg_1.0,23.343317025502756,0.6067399978637695,0.6193000078201294,60.4235201455905,1.2737023659678393,0.6389617919921875
|
67 |
+
step_50000_autoencoder_setting_0_cfg_1.0,49.921173116398734,0.4559399783611297,0.5971999764442444,25.213578656547632,0.4281732070550088,1.0330677032470703
|
68 |
+
step_5000_autoencoder_setting_0_cfg_1.0,90.67715449183368,0.2694199979305267,0.3879999816417694,13.429481826679131,0.210033970069514,1.7157793045043943
|
69 |
+
step_55000_autoencoder_setting_0_cfg_1.0,47.73685522428235,0.4755999743938446,0.5899999737739563,26.89910198011636,0.4931733345923623,1.0018348693847656
|
70 |
+
step_60000_autoencoder_setting_0_cfg_1.0,44.80032758845715,0.4915199875831604,0.5986999869346619,28.861349883325506,0.6176201506732034,0.9595155715942384
|
71 |
+
step_65000_autoencoder_setting_0_cfg_1.0,41.53898757569908,0.5051400065422058,0.6049000024795532,31.154324157233617,0.6160978188506026,0.9080171585083008
|
72 |
+
step_70000_autoencoder_setting_0_cfg_1.0,37.99431155890278,0.5188400149345398,0.6146000027656555,33.93576625895251,0.5479828376716884,0.8559226989746094
|
73 |
+
step_75000_autoencoder_setting_0_cfg_1.0,35.3211073866222,0.5319399833679199,0.6132999658584595,36.31162383244789,0.8240562594862988,0.8103847503662109
|
74 |
+
step_80000_autoencoder_setting_0_cfg_1.0,33.006981752393926,0.5404999852180481,0.6175000071525574,38.86142600259436,0.8896361831768164,0.774383544921875
|
75 |
+
step_85000_autoencoder_setting_0_cfg_1.0,30.9545240118008,0.5546599626541138,0.6133999824523926,41.26951645107668,0.9114569673854772,0.7370710372924805
|
76 |
+
step_90000_autoencoder_setting_0_cfg_1.0,29.674156243314485,0.5648999810218811,0.6136999726295471,43.06725174936357,0.909306073199211,0.7197856903076172
|
77 |
+
step_95000_autoencoder_setting_0_cfg_1.0,28.471735321998835,0.572659969329834,0.6085000038146973,44.97528607860245,0.9621155062307708,0.7002353668212891
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/log.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/model.txt
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FP8COATDiT(
|
2 |
+
(x_embedder): PatchEmbed(
|
3 |
+
(proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
|
4 |
+
(norm): Identity()
|
5 |
+
)
|
6 |
+
(t_embedder): TimestepEmbedder(
|
7 |
+
(mlp): Sequential(
|
8 |
+
(0): Linear(in_features=256, out_features=1152, bias=True)
|
9 |
+
(1): SiLU()
|
10 |
+
(2): Linear(in_features=1152, out_features=1152, bias=True)
|
11 |
+
)
|
12 |
+
)
|
13 |
+
(y_embedder): LabelEmbedder(
|
14 |
+
(embedding_table): Embedding(1001, 1152)
|
15 |
+
)
|
16 |
+
(blocks): ModuleList(
|
17 |
+
(0-27): 28 x FP8COATDiTBlock(
|
18 |
+
(norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
|
19 |
+
(attn): FP8COATAttention(
|
20 |
+
(qkv): QAct_Linear(in_features=1152, out_features=3456, bias=True)
|
21 |
+
(q_norm): Identity()
|
22 |
+
(k_norm): Identity()
|
23 |
+
(attn_drop): Dropout(p=0.0, inplace=False)
|
24 |
+
(proj): QAct_Linear(in_features=1152, out_features=1152, bias=True)
|
25 |
+
(proj_drop): Dropout(p=0.0, inplace=False)
|
26 |
+
(qact_q_quantize): QAct_Quantize()
|
27 |
+
(qact_k_quantize): QAct_Quantize()
|
28 |
+
)
|
29 |
+
(norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
|
30 |
+
(mlp): FP8COATMlp(
|
31 |
+
(fc1): QAct_Linear(in_features=1152, out_features=4608, bias=True)
|
32 |
+
(act): GELU(approximate='tanh')
|
33 |
+
(drop1): Dropout(p=0, inplace=False)
|
34 |
+
(norm): Identity()
|
35 |
+
(fc2): QAct_Linear(in_features=4608, out_features=1152, bias=True)
|
36 |
+
(drop2): Dropout(p=0, inplace=False)
|
37 |
+
(qact_quantize_gelu): QAct_Quantize()
|
38 |
+
(qact_quantize_norm): QAct_Quantize()
|
39 |
+
)
|
40 |
+
(adaLN_modulation): Sequential(
|
41 |
+
(0): SiLU()
|
42 |
+
(1): Linear(in_features=1152, out_features=6912, bias=True)
|
43 |
+
)
|
44 |
+
(qact_quantize_norm1): QAct_Quantize()
|
45 |
+
(qact_quantize_norm2): QAct_Quantize()
|
46 |
+
)
|
47 |
+
)
|
48 |
+
(final_layer): FinalLayer(
|
49 |
+
(norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
|
50 |
+
(linear): Linear(in_features=1152, out_features=32, bias=True)
|
51 |
+
(adaLN_modulation): Sequential(
|
52 |
+
(0): SiLU()
|
53 |
+
(1): Linear(in_features=1152, out_features=2304, bias=True)
|
54 |
+
)
|
55 |
+
)
|
56 |
+
)
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/slurm.sh
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH -A nvr_elm_llm #account
|
3 |
+
#SBATCH -p grizzly,polar,polar3,polar4 #partition
|
4 |
+
#SBATCH -t 04:00:00 #wall time limit, hr:min:sec
|
5 |
+
#SBATCH -N 1 #number of nodes
|
6 |
+
#SBATCH -J train_diffusion_dit #job name
|
7 |
+
#SBATCH --array=1-30%1
|
8 |
+
#SBATCH --output=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/slurm_out/%A_%a.out
|
9 |
+
#SBATCH --gpus-per-node 8
|
10 |
+
#SBATCH --exclusive
|
11 |
+
|
12 |
+
export LOGLEVEL=INFO
|
13 |
+
export PATH="/home/hcai/workspace/anaconda3/envs/efficientvit/bin:$PATH"
|
14 |
+
|
15 |
+
cd /lustre/fs12/portfolios/nvr/users/hcai/workspace/code/fp8-dit
|
16 |
+
|
17 |
+
read -r -d '' cmd <<EOF
|
18 |
+
torchrun --nnodes=1 --nproc_per_node=8 \
|
19 |
+
-m efficientvit.diffusioncore.trainer yaml=exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/config.yaml
|
20 |
+
EOF
|
21 |
+
|
22 |
+
srun bash -c "${cmd}"
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_100000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46e93e161bc858fecdac014ca25bd5c06721b2b561375aa3a3dcc8c1f990a349
|
3 |
+
size 10798884178
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_200000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ef4dbcf4e31575f3e05c8413b7ab0b60e66680d1b8e48b446ae3fa9fc2a93e0
|
3 |
+
size 10798884178
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_300000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:141dc8d80d0907cc16f2d3786fa475ce7a823d263208636e92ba00eeb05c5d2c
|
3 |
+
size 10798884178
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_400000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4c0ae8c74d2d361bd0e4a4a1570ee2cd0d8688a26a07067e5ccbde17cdd431
|
3 |
+
size 10798884178
|
exp_hcai/latent_diffusion/imagenet_256/dc_ae_f32c32_in_1.0_256px/dit_xl_1/SiTSampler_ODE_heun2_30/nvfp4_e2m1_cs/step_500000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11fe39b36b6414f011fa2cabf7e212f1e9f0cbb5ae631a7896b11a0f20cbade0
|
3 |
+
size 10798884178
|