Spaces:

fffiloni
/

miniGPT4-Video-Zero

Paused

App Files Files Community

miniGPT4-Video-Zero / minigpt4 /datasets /datasets /laion_dataset.py

fffiloni

Upload 164 files

2ada650 verified about 1 year ago

raw

history blame

2.48 kB

	"""
	Copyright (c) 2022, salesforce.com, inc.
	All rights reserved.
	SPDX-License-Identifier: BSD-3-Clause
	For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
	"""
	import random

	import webdataset as wds
	from minigpt4.datasets.datasets.base_dataset import BaseDataset


	class LaionDataset(BaseDataset):
	def __init__(self, vis_processor, text_processor, location):
	super().__init__(vis_processor=vis_processor, text_processor=text_processor)
	self.instruction_pool = [
	'Briefly describe this image.',
	'Provide a concise depiction of this image.',
	'Present a short description of this image.',
	'Summarize this image in a few words.',
	'A short image caption:',
	'A short image description:',
	'A photo of ',
	'An image that shows ',
	'Write a short description for the image. ',
	'Write a description for the photo.',
	'Provide a description of what is presented in the photo.',
	'Briefly describe the content of the image.',
	'Can you briefly explain what you see in the image?',
	'Could you use a few words to describe what you perceive in the photo?',
	'Please provide a short depiction of the picture.',
	'Using language, provide a short account of the image.',
	'Use a few words to illustrate what is happening in the picture.',
	]

	self.inner_dataset = wds.DataPipeline(
	wds.ResampledShards(location),
	wds.tarfile_to_samples(handler=wds.warn_and_continue),
	wds.shuffle(1000, handler=wds.warn_and_continue),
	wds.decode("pilrgb", handler=wds.warn_and_continue),
	wds.to_tuple("jpg", "json", handler=wds.warn_and_continue),
	wds.map_tuple(self.vis_processor, handler=wds.warn_and_continue),
	wds.map(self.to_dict, handler=wds.warn_and_continue),
	)

	def to_dict(self, sample):
	instruction = random.choice(self.instruction_pool)

	# instruction = "###Human: <Img><ImageHere></Img> {}###Assistant: ".format(instruction)
	instruction = "<Img><ImageHere></Img> [caption] {} ".format(instruction)

	return {
	"image": sample[0],
	"instruction_input": instruction,
	"answer": self.text_processor(sample[1]["caption"]),
	}