Spaces:
Runtime error
Runtime error
File size: 2,629 Bytes
f457390 7f0b913 f457390 7f0b913 de33293 f457390 7f53b0a f457390 624ee8e de33293 fa2cb47 a96ef2d de33293 a96ef2d 952a07a f2dda80 1e90462 931456e f2dda80 a96ef2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import gradio as gr
import os
import skimage
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from collections import OrderedDict
import torch
from imagebind import data
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType
import torch.nn as nn
import pickle
device = "cpu" #"cuda:0" if torch.cuda.is_available() else "cpu"
model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model.to(device)
image_features = pickle.load(open("./assets/image_features_norm_2.pkl","rb"))
image_paths = pickle.load(open("./assets/image_paths.pkl","rb"))
def generate_image(text):
inputs = {
ModalityType.TEXT: data.load_and_transform_text([text], device)
}
with torch.no_grad():
embeddings = model(inputs)
text_features = embeddings[ModalityType.TEXT]
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T
#pega index maior
index_img = np.argmax(similarity)
img_name = os.path.basename(image_paths[index_img])
im = Image.open(f"./assets/images/{img_name}").convert("RGB")
return im
# Interface do Gradio
iface = gr.Interface(
fn=generate_image,
inputs="text",
outputs="image",
examples=[
["a page of text about segmentation", "assets/images/page.png"],
["a facial photo of a tabby cat", "assets/images/chelsea.png"],
["a portrait of an astronaut with the American flag", "assets/images/astronaut.png"],
["a rocket standing on a launchpad", "assets/images/rocket.png"],
["a red motorcycle standing in a garage", "assets/images/motorcycle_right.png"],
["a person looking at a camera on a tripod", "assets/images/camera.png"],
["a black-and-white silhouette of a horse", "assets/images/horse.png"],
["a cup of coffee on a saucer", "assets/images/coffee.png"]
],
title="Find the image most similar to the given text",
description='''<p>
Welcome to a straightforward demonstration of ImageBind, a powerful tool designed to
find the image most similar to a given text using cosine similarity. For a comprehensive
understanding of its capabilities, we encourage you to explore the original research <a href='https://arxiv.org/abs/2305.05665' target='_blank'>paper</a>
and visit the <a href='https://github.com/facebookresearch/ImageBind' target='_blank'>repository</a>
for more in-depth information.<p>
'''
)
# Executa o servidor Gradio
iface.launch()
|