Spaces:
Runtime error
Runtime error
import data | |
import torch | |
import gradio as gr | |
from models import imagebind_model | |
from models.imagebind_model import ModalityType | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
model = imagebind_model.imagebind_huge(pretrained=True) | |
model.eval() | |
model.to(device) | |
def image_text_zeroshot(image, text_list): | |
image_paths = [image] | |
labels = [label.strip(" ") for label in text_list.strip(" ").split("|")] | |
inputs = { | |
ModalityType.TEXT: data.load_and_transform_text(labels, device), | |
ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device), | |
} | |
with torch.no_grad(): | |
embeddings = model(inputs) | |
scores = torch.softmax( | |
embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, | |
dim=-1 | |
).squeeze(0).tolist() | |
score_dict = {label:score for label, score in zip(labels, scores)} | |
return score_dict | |
inputs = [ | |
gr.inputs.Image(type='filepath', | |
label="Input image"), | |
gr.inputs.Textbox(lines=1, | |
label="Candidate texts"), | |
] | |
iface = gr.Interface(image_text_zeroshot, | |
inputs, | |
"label", | |
examples=[["assets/dog_image.jpg", "A dog|A car|A bird"], | |
["assets/car_image.jpg", "A dog|A car|A bird"], | |
["assets/bird_image.jpg", "A dog|A car|A bird"]], | |
description="""<p>This is a simple demo of ImageBind for zeroshot image classification. Please refer to the original <a href='https://arxiv.org/abs/2305.05665' target='_blank'>paper</a> and <a href='https://github.com/facebookresearch/ImageBind' target='_blank'>repo</a> for more details.<br> | |
To test your own cases, you can upload an image, and provide the candidate texts separated by "|".<br> | |
You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a></p>""", | |
title="ImageBind: Zero-shot Image Classification") | |
iface.launch() |