# Copyright (c) 2022, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause model: arch: albef_vqa model_type: vqav2 load_finetuned: False image_size: 384 datasets: coco_vqa: # name of the dataset builder vis_processor: train: name: "blip_image_train" image_size: 384 eval: name: "blip_image_eval" image_size: 384 text_processor: train: name: "blip_question" eval: name: "blip_question" vg_vqa: # name of the dataset builder vis_processor: train: name: "blip_image_train" image_size: 384 text_processor: train: name: "blip_question" run: task: vqa # optimization-specific lr_sched: "linear_warmup_cosine_lr" init_lr: 2e-5 min_lr: 1e-6 weight_decay: 0.02 max_epoch: 8 batch_size_train: 32 batch_size_eval: 64 num_workers: 4 # inference-specific max_len: 10 min_len: 1 num_beams: 3 num_ans_candidates: 128 inference_method: "rank" seed: 42 output_dir: "output/ALBEF/VQA" amp: False resume_ckpt_path: null evaluate: False train_splits: ["train"] test_splits: ["test"] # distribution-specific device: "cuda" world_size: 1 dist_url: "env://" distributed: True