ddpd / owt_planner /.hydra /config.yaml
sulinliu's picture
Upload 2 files
8893e0a verified
ngpus: 4
tokens: 50257
compile: true
load_dir: null
work_dir: null
wandb:
project: openwebtext
run_name: pred_mask_nowe
id: null
is_resume: false
training:
batch_size: 256
accum: 2
n_iters: 1300001
snapshot_freq: 50000
log_freq: 50
eval_freq: 100
snapshot_freq_for_preemption: 10000
weight: standard
snapshot_sampling: true
ema: 0.9999
weighted_by_time: false
data:
train: openwebtext
valid: wikitext103
cache_dir: /pscratch/sd/s/sulinl/data
num_proc: 64
graph:
type: uniform
file: data
report_all: false
noise:
type: loglinear
sigma_min: 0.0001
sigma_max: 20
sampling:
predictor: euler
steps: 128
noise_removal: true
eval:
batch_size: 256
perplexity: true
perplexity_batch_size: 32
optim:
weight_decay: 0
optimizer: AdamW
lr: 0.0003
beta1: 0.9
beta2: 0.999
eps: 1.0e-08
warmup: 2500
grad_clip: 1.0
model:
name: small
type: ddit
hidden_size: 768
cond_dim: 128
length: 1024
n_blocks: 12
n_heads: 12
scale_by_sigma: true
dropout: 0.1