fff01

Paused

App Files Files Community

fff01 / deepseek_vl2 /utils /io.py

doubility123

update codes

edced0a 10 months ago

raw

history blame

2.84 kB

	# Copyright (c) 2023-2024 DeepSeek.
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy of
	# this software and associated documentation files (the "Software"), to deal in
	# the Software without restriction, including without limitation the rights to
	# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
	# the Software, and to permit persons to whom the Software is furnished to do so,
	# subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
	# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
	# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
	# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
	# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	import json
	from typing import Dict, List

	import PIL.Image
	import torch
	from transformers import AutoModelForCausalLM


	def load_pretrained_model(model_path: str):

	from deepseek_vl2.models.processing_deepseek_vl_v2 import DeepseekVLV2Processor
	from deepseek_vl2.models.modeling_deepseek_vl_v2 import DeepseekVLV2ForCausalLM

	vl_chat_processor = DeepseekVLV2Processor.from_pretrained(model_path)
	tokenizer = vl_chat_processor.tokenizer

	vl_gpt: DeepseekVLV2ForCausalLM = AutoModelForCausalLM.from_pretrained(
	model_path, trust_remote_code=True
	)
	vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()

	return tokenizer, vl_chat_processor, vl_gpt


	def load_pil_images(conversations: List[Dict[str, str]]) -> List[PIL.Image.Image]:
	"""

	Args:
	conversations (List[Dict[str, str]]): the conversations with a list of messages. An example is :
	[
	{
	"role": "User",
	"content": "<image>\nExtract all information from this image and convert them into markdown format.",
	"images": ["./examples/table_datasets.png"]
	},
	{"role": "Assistant", "content": ""},
	]

	Returns:
	pil_images (List[PIL.Image.Image]): the list of PIL images.

	"""

	pil_images = []

	for message in conversations:
	if "images" not in message:
	continue

	for image_path in message["images"]:
	pil_img = PIL.Image.open(image_path)
	pil_img = pil_img.convert("RGB")
	pil_images.append(pil_img)

	return pil_images


	def load_json(filepath):
	with open(filepath, "r") as f:
	data = json.load(f)
	return data