jchwenger's picture
Upload 351 files (#2)
d9272c6 verified
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
model:
arch: blip_caption
model_type: base_coco
# pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
datasets:
nocaps: # name of the dataset builder
vis_processor:
eval:
name: "blip_image_eval"
image_size: 384
text_processor:
eval:
name: "blip_caption"
prompt: "a picture of "
run:
# task: retrieval
task: captioning
# optimizer
batch_size_train: 32
batch_size_eval: 64
num_workers: 4
max_len: 20
min_len: 5
num_beams: 3
seed: 42
output_dir: "output/BLIP/NoCaps"
evaluate: True
test_splits: ["val", "test"]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
report_metric: False