|
""" |
|
Copyright (c) 2022, salesforce.com, inc. |
|
All rights reserved. |
|
SPDX-License-Identifier: BSD-3-Clause |
|
For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause |
|
""" |
|
import random |
|
|
|
import webdataset as wds |
|
from minigpt4.datasets.datasets.base_dataset import BaseDataset |
|
|
|
|
|
class LaionDataset(BaseDataset): |
|
def __init__(self, vis_processor, text_processor, location): |
|
super().__init__(vis_processor=vis_processor, text_processor=text_processor) |
|
self.instruction_pool = [ |
|
'Briefly describe this image.', |
|
'Provide a concise depiction of this image.', |
|
'Present a short description of this image.', |
|
'Summarize this image in a few words.', |
|
'A short image caption:', |
|
'A short image description:', |
|
'A photo of ', |
|
'An image that shows ', |
|
'Write a short description for the image. ', |
|
'Write a description for the photo.', |
|
'Provide a description of what is presented in the photo.', |
|
'Briefly describe the content of the image.', |
|
'Can you briefly explain what you see in the image?', |
|
'Could you use a few words to describe what you perceive in the photo?', |
|
'Please provide a short depiction of the picture.', |
|
'Using language, provide a short account of the image.', |
|
'Use a few words to illustrate what is happening in the picture.', |
|
] |
|
|
|
self.inner_dataset = wds.DataPipeline( |
|
wds.ResampledShards(location), |
|
wds.tarfile_to_samples(handler=wds.warn_and_continue), |
|
wds.shuffle(1000, handler=wds.warn_and_continue), |
|
wds.decode("pilrgb", handler=wds.warn_and_continue), |
|
wds.to_tuple("jpg", "json", handler=wds.warn_and_continue), |
|
wds.map_tuple(self.vis_processor, handler=wds.warn_and_continue), |
|
wds.map(self.to_dict, handler=wds.warn_and_continue), |
|
) |
|
|
|
def to_dict(self, sample): |
|
instruction = random.choice(self.instruction_pool) |
|
|
|
|
|
instruction = "<Img><ImageHere></Img> [caption] {} ".format(instruction) |
|
|
|
return { |
|
"image": sample[0], |
|
"instruction_input": instruction, |
|
"answer": self.text_processor(sample[1]["caption"]), |
|
} |
|
|
|
|