Spaces:

X-iZhang
/

Libra

Running

Upload 27 files

23c9ef8 verified 9 months ago

1.62 kB

	# Copyright 2024 Xi Zhang
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	from .clip_encoder import CLIPVisionTower
	from .dino_encoder import DINOVisionTower

	def build_vision_tower(vision_tower_cfg, **kwargs):

	vision_tower = getattr(vision_tower_cfg, 'mm_vision_tower', getattr(vision_tower_cfg, 'vision_tower', None))

	if vision_tower is None:
	raise ValueError("No vision tower specified in configuration.")

	is_absolute_path_exists = os.path.exists(vision_tower)

	if is_absolute_path_exists or vision_tower.startswith("openai") or \
	vision_tower.startswith("facebook") or vision_tower.startswith("microsoft"):

	if "clip" in vision_tower.lower():
	return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
	elif "dino" in vision_tower.lower():
	return DINOVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
	else:
	raise ValueError(f'Unknown vision model type in vision_tower: {vision_tower}')

	raise ValueError(f'Unknown vision tower: {vision_tower}')