|
dataset: |
|
video_processor: ShardedVideoProcessor |
|
bert_name: bert-base-uncased |
|
meta_processor: ShardedHow2MetaProcessor |
|
train_path: data/how2/how2_s3d_train.lst |
|
val_path: data/how2/how2_s3d_val.lst |
|
vfeat_dir: data/feat/feat_how2_s3d_shard_small |
|
text_processor: ShardedTextProcessor |
|
tfeat_dir: data/feat/feat_how2_s3d_shard_small/raw_caption_dedup.bert-base-uncased. |
|
aligner: MFMMLMAligner |
|
subsampling: 32 |
|
sampled_min_len: 8 |
|
sampled_max_len: 64 |
|
max_video_len: 32 |
|
max_len: 96 |
|
lazy_vfeat_mask: true |
|
mfm_probability: 0.15 |
|
mlm_probability: 0.15 |
|
mm_prob: 0.5 |
|
fairseq: |
|
common: |
|
tensorboard_logdir: run |
|
log_interval: 1000 |
|
fp16: true |
|
dataset: |
|
num_workers: 4 |
|
batch_size: 256 |
|
optimization: |
|
lr: |
|
- 5.0e-05 |
|
clip_norm: 2.0 |
|
optimizer: adam |
|
adam_betas: (0.9, 0.98) |
|
lr_scheduler: polynomial_decay |
|
total_num_update: 1000000 |
|
warmup_updates: 1000 |
|
weight_decay: 0.0 |
|
ddp_backend: no_c10d |
|
max_epoch: 15 |
|
checkpoint: |
|
save_dir: runs/mtm/vlm |
|
save_interval_updates: 1024 |
|
keep_interval_updates: 2 |
|
keep_last_epochs: 30 |
|
task_type: sweep_big |
|
slurm_config: big |
|
eval: |
|
save_path: runs/mtm/vlm |
|
model: |
|
model_cls: MMFusionMTM |
|
mm_encoder_cls: MMBertForMFMMLM |
|
use_seg_emb: true |
|
loss: |
|
loss_cls: MTM |
|
task: VLMTask |
|
|