Spaces:
Runtime error
Runtime error
File size: 1,751 Bytes
b05aa6a 7413961 b05aa6a 9caa677 b05aa6a 9caa677 b05aa6a 7413961 a012f3a 7413961 a012f3a 7413961 b05aa6a a012f3a 7413961 b05aa6a a012f3a 7413961 b05aa6a a012f3a 7413961 b05aa6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import clip
import numpy as np
import torch
import gradio as gr
from PIL import Image
import os
# Load the CLIP model
model, preprocess = clip.load("ViT-B/32")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()
print(device)
# Define the Business Listing variable
Business_Listing = "Air Guide"
def find_similarity(images, text_input):
image_features = []
# Preprocess and encode multiple images
for image in images:
image = preprocess(image).unsqueeze(0).to(device)
with torch.no_grad():
image_feature = model.encode_image(image).float()
image_features.append(image_feature)
# Prepare input text
text_tokens = clip.tokenize([text_input]).to(device)
text_features = model.encode_text(text_tokens).float()
# Normalize text features
text_features /= text_features.norm(dim=-1, keepdim=True)
similarities = []
# Calculate cosine similarity for each image
for image_feature in image_features:
image_feature /= image_feature.norm(dim=-1, keepdim=True)
similarity = (text_features @ image_feature.T).cpu().numpy()
similarities.append(similarity[0, 0])
# Find the index of the image with the highest similarity
best_match_index = np.argmax(similarities)
return similarities, best_match_index
# Define a Gradio interface
iface = gr.Interface(
fn=find_similarity,
inputs=[gr.Image(type="pil", label="Image 1"), gr.Image(type="pil", label="Image 2"), "text"],
outputs=["text", "number"],
live=True,
interpretation="default",
title="CLIP Model Image-Text Cosine Similarity",
description="Upload two images and enter text to find their cosine similarity.",
)
iface.launch()
|