Spaces:

3oly
/

grBird

Running

Brice Vandeputte

Pick bioclip src and adapt demo

6bde7ff over 1 year ago

11.7 kB

	import json
	import torch
	from torchvision import transforms
	from open_clip import create_model, get_tokenizer
	import torch.nn.functional as F
	import numpy as np
	import collections
	import heapq
	import PIL.Image
	from huggingface_hub import hf_hub_download
	from typing import Union, List
	from enum import Enum


	HF_DATAFILE_REPO = "imageomics/bioclip-demo"
	HF_DATAFILE_REPO_TYPE = "space"
	PRED_FILENAME_KEY = "file_name"
	PRED_CLASSICATION_KEY = "classification"
	PRED_SCORE_KEY = "score"

	OPENA_AI_IMAGENET_TEMPLATE = [
	lambda c: f"a bad photo of a {c}.",
	lambda c: f"a photo of many {c}.",
	lambda c: f"a sculpture of a {c}.",
	lambda c: f"a photo of the hard to see {c}.",
	lambda c: f"a low resolution photo of the {c}.",
	lambda c: f"a rendering of a {c}.",
	lambda c: f"graffiti of a {c}.",
	lambda c: f"a bad photo of the {c}.",
	lambda c: f"a cropped photo of the {c}.",
	lambda c: f"a tattoo of a {c}.",
	lambda c: f"the embroidered {c}.",
	lambda c: f"a photo of a hard to see {c}.",
	lambda c: f"a bright photo of a {c}.",
	lambda c: f"a photo of a clean {c}.",
	lambda c: f"a photo of a dirty {c}.",
	lambda c: f"a dark photo of the {c}.",
	lambda c: f"a drawing of a {c}.",
	lambda c: f"a photo of my {c}.",
	lambda c: f"the plastic {c}.",
	lambda c: f"a photo of the cool {c}.",
	lambda c: f"a close-up photo of a {c}.",
	lambda c: f"a black and white photo of the {c}.",
	lambda c: f"a painting of the {c}.",
	lambda c: f"a painting of a {c}.",
	lambda c: f"a pixelated photo of the {c}.",
	lambda c: f"a sculpture of the {c}.",
	lambda c: f"a bright photo of the {c}.",
	lambda c: f"a cropped photo of a {c}.",
	lambda c: f"a plastic {c}.",
	lambda c: f"a photo of the dirty {c}.",
	lambda c: f"a jpeg corrupted photo of a {c}.",
	lambda c: f"a blurry photo of the {c}.",
	lambda c: f"a photo of the {c}.",
	lambda c: f"a good photo of the {c}.",
	lambda c: f"a rendering of the {c}.",
	lambda c: f"a {c} in a video game.",
	lambda c: f"a photo of one {c}.",
	lambda c: f"a doodle of a {c}.",
	lambda c: f"a close-up photo of the {c}.",
	lambda c: f"a photo of a {c}.",
	lambda c: f"the origami {c}.",
	lambda c: f"the {c} in a video game.",
	lambda c: f"a sketch of a {c}.",
	lambda c: f"a doodle of the {c}.",
	lambda c: f"a origami {c}.",
	lambda c: f"a low resolution photo of a {c}.",
	lambda c: f"the toy {c}.",
	lambda c: f"a rendition of the {c}.",
	lambda c: f"a photo of the clean {c}.",
	lambda c: f"a photo of a large {c}.",
	lambda c: f"a rendition of a {c}.",
	lambda c: f"a photo of a nice {c}.",
	lambda c: f"a photo of a weird {c}.",
	lambda c: f"a blurry photo of a {c}.",
	lambda c: f"a cartoon {c}.",
	lambda c: f"art of a {c}.",
	lambda c: f"a sketch of the {c}.",
	lambda c: f"a embroidered {c}.",
	lambda c: f"a pixelated photo of a {c}.",
	lambda c: f"itap of the {c}.",
	lambda c: f"a jpeg corrupted photo of the {c}.",
	lambda c: f"a good photo of a {c}.",
	lambda c: f"a plushie {c}.",
	lambda c: f"a photo of the nice {c}.",
	lambda c: f"a photo of the small {c}.",
	lambda c: f"a photo of the weird {c}.",
	lambda c: f"the cartoon {c}.",
	lambda c: f"art of the {c}.",
	lambda c: f"a drawing of the {c}.",
	lambda c: f"a photo of the large {c}.",
	lambda c: f"a black and white photo of a {c}.",
	lambda c: f"the plushie {c}.",
	lambda c: f"a dark photo of a {c}.",
	lambda c: f"itap of a {c}.",
	lambda c: f"graffiti of the {c}.",
	lambda c: f"a toy {c}.",
	lambda c: f"itap of my {c}.",
	lambda c: f"a photo of a cool {c}.",
	lambda c: f"a photo of a small {c}.",
	lambda c: f"a tattoo of the {c}.",
	]


	def get_cached_datafile(filename:str):
	return hf_hub_download(repo_id=HF_DATAFILE_REPO, filename=filename, repo_type=HF_DATAFILE_REPO_TYPE)


	def get_txt_emb():
	txt_emb_npy = get_cached_datafile("txt_emb_species.npy")
	return torch.from_numpy(np.load(txt_emb_npy))


	def get_txt_names():
	txt_names_json = get_cached_datafile("txt_emb_species.json")
	with open(txt_names_json) as fd:
	txt_names = json.load(fd)
	return txt_names


	preprocess_img = transforms.Compose(
	[
	transforms.ToTensor(),
	transforms.Resize((224, 224), antialias=True),
	transforms.Normalize(
	mean=(0.48145466, 0.4578275, 0.40821073),
	std=(0.26862954, 0.26130258, 0.27577711),
	),
	]
	)

	class Rank(Enum):
	KINGDOM = 0
	PHYLUM = 1
	CLASS = 2
	ORDER = 3
	FAMILY = 4
	GENUS = 5
	SPECIES = 6

	def get_label(self):
	return self.name.lower()


	# The datafile of names ('txt_emb_species.json') contains species epithet.
	# To create a label for species we concatenate the genus and species epithet.
	SPECIES_LABEL = Rank.SPECIES.get_label()
	SPECIES_EPITHET_LABEL = "species_epithet"
	COMMON_NAME_LABEL = "common_name"


	def create_bioclip_model(model_str="hf-hub:imageomics/bioclip", device="cuda"):
	model = create_model(model_str, output_dict=True, require_pretrained=True)
	model = model.to(device)
	return torch.compile(model)


	def create_bioclip_tokenizer(tokenizer_str="ViT-B-16"):
	return get_tokenizer(tokenizer_str)


	class CustomLabelsClassifier(object):
	def __init__(self, device: Union[str, torch.device] = 'cpu'):
	self.device = device
	self.model = create_bioclip_model(device=device)
	self.tokenizer = create_bioclip_tokenizer()

	def get_txt_features(self, classnames):
	all_features = []
	for classname in classnames:
	txts = [template(classname) for template in OPENA_AI_IMAGENET_TEMPLATE]
	txts = self.tokenizer(txts).to(self.device)
	txt_features = self.model.encode_text(txts)
	txt_features = F.normalize(txt_features, dim=-1).mean(dim=0)
	txt_features /= txt_features.norm()
	all_features.append(txt_features)
	all_features = torch.stack(all_features, dim=1)
	return all_features

	@torch.no_grad()
	def predict(self, image_path: str, cls_ary: List[str]) -> dict[str, float]:
	img = PIL.Image.open(image_path)
	classes = [cls.strip() for cls in cls_ary]
	txt_features = self.get_txt_features(classes)

	img = preprocess_img(img).to(self.device)
	img_features = self.model.encode_image(img.unsqueeze(0))
	img_features = F.normalize(img_features, dim=-1)

	logits = (self.model.logit_scale.exp() * img_features @ txt_features).squeeze()
	probs = F.softmax(logits, dim=0).to("cpu").tolist()
	pred_list = []
	for cls, prob in zip(classes, probs):
	pred_list.append({
	PRED_FILENAME_KEY: image_path,
	PRED_CLASSICATION_KEY: cls,
	PRED_SCORE_KEY: prob
	})
	return pred_list


	def predict_classifications_from_list(img: Union[PIL.Image.Image, str], cls_ary: List[str], device: Union[str, torch.device] = 'cpu') -> dict[str, float]:
	classifier = CustomLabelsClassifier(device=device)
	return classifier.predict(img, cls_ary)


	def get_tol_classification_labels(rank: Rank) -> List[str]:
	names = []
	for i in range(rank.value + 1):
	i_rank = Rank(i)
	if i_rank == Rank.SPECIES:
	names.append(SPECIES_EPITHET_LABEL)
	rank_name = i_rank.name.lower()
	names.append(rank_name)
	if rank == Rank.SPECIES:
	names.append(COMMON_NAME_LABEL)
	return names


	def create_classification_dict(names: List[List[str]], rank: Rank) -> dict[str, str]:
	scientific_names = names[0]
	common_name = names[1]
	classification_dict = {}
	for idx, label in enumerate(get_tol_classification_labels(rank=rank)):
	if label == SPECIES_LABEL:
	value = scientific_names[-2] + " " + scientific_names[-1]
	elif label == COMMON_NAME_LABEL:
	value = common_name
	else:
	value = scientific_names[idx]
	classification_dict[label] = value
	return classification_dict


	def join_names(classification_dict: dict[str, str]) -> str:
	return " ".join(classification_dict.values())


	class TreeOfLifeClassifier(object):
	def __init__(self, device: Union[str, torch.device] = 'cpu'):
	self.device = device
	self.model = create_bioclip_model(device=device)
	self.txt_emb = get_txt_emb().to(device)
	self.txt_names = get_txt_names()

	def encode_image(self, img: PIL.Image.Image) -> torch.Tensor:
	img = preprocess_img(img).to(self.device)
	img_features = self.model.encode_image(img.unsqueeze(0))
	return img_features

	def predict_species(self, img: PIL.Image.Image) -> torch.Tensor:
	img_features = self.encode_image(img)
	img_features = F.normalize(img_features, dim=-1)
	logits = (self.model.logit_scale.exp() * img_features @ self.txt_emb).squeeze()
	probs = F.softmax(logits, dim=0)
	return probs

	def format_species_probs(self, image_path: str, probs: torch.Tensor, k: int = 5) -> List[dict[str, float]]:
	topk = probs.topk(k)
	result = []
	for i, prob in zip(topk.indices, topk.values):
	item = { PRED_FILENAME_KEY: image_path }
	item.update(create_classification_dict(self.txt_names[i], Rank.SPECIES))
	item[PRED_SCORE_KEY] = prob.item()
	result.append(item)
	return result

	def format_grouped_probs(self, image_path: str, probs: torch.Tensor, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> List[dict[str, float]]:
	output = collections.defaultdict(float)
	class_dict_lookup = {}
	name_to_class_dict = {}
	for i in torch.nonzero(probs > min_prob).squeeze():
	classification_dict = create_classification_dict(self.txt_names[i], rank)
	name = join_names(classification_dict)
	class_dict_lookup[name] = classification_dict
	output[name] += probs[i]
	name_to_class_dict[name] = classification_dict
	topk_names = heapq.nlargest(k, output, key=output.get)
	prediction_ary = []
	for name in topk_names:
	item = { PRED_FILENAME_KEY: image_path }
	item.update(name_to_class_dict[name])
	#item.update(class_dict_lookup)
	item[PRED_SCORE_KEY] = output[name].item()
	prediction_ary.append(item)
	return prediction_ary

	@torch.no_grad()
	def predict(self, image_path: str, rank: Rank, min_prob: float = 1e-9, k: int = 5) -> List[dict[str, float]]:
	img = PIL.Image.open(image_path)
	probs = self.predict_species(img)
	if rank == Rank.SPECIES:
	return self.format_species_probs(image_path, probs, k)
	return self.format_grouped_probs(image_path, probs, rank, min_prob, k)


	def predict_classification(img: str, rank: Rank, device: Union[str, torch.device] = 'cpu',
	min_prob: float = 1e-9, k: int = 5) -> dict[str, float]:
	"""
	Predicts from the entire tree of life.
	If targeting a higher rank than species, then this function predicts among all
	species, then sums up species-level probabilities for the given rank.
	"""
	classifier = TreeOfLifeClassifier(device=device)
	return classifier.predict(img, rank, min_prob, k)