Spaces:
Runtime error
Runtime error
File size: 1,893 Bytes
b05aa6a 7413961 b05aa6a 9caa677 b05aa6a 9caa677 b05aa6a d4c665a e57bbcd 7413961 d4c665a 7413961 a012f3a 7413961 a012f3a 7413961 b05aa6a a012f3a 7413961 e57bbcd 7413961 d4c665a 7413961 b05aa6a a012f3a d4c665a 7413961 b05aa6a a012f3a 7413961 b05aa6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import clip
import numpy as np
import torch
import gradio as gr
from PIL import Image
import os
# Load the CLIP model
model, preprocess = clip.load("ViT-B/32")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()
print(device)
# Define the Business Listing variable
Business_Listing = "Air Guide"
def find_similarity(image1, image2, text_input):
if image1 is None or image2 is None:
return None
image_features = []
# Preprocess and encode the two images
for image in [image1, image2]:
image = preprocess(image).unsqueeze(0).to(device)
with torch.no_grad():
image_feature = model.encode_image(image).float()
image_features.append(image_feature)
# Prepare input text
text_tokens = clip.tokenize([text_input]).to(device)
text_features = model.encode_text(text_tokens).float()
# Normalize text features
text_features /= text_features.norm(dim=-1, keepdim=True)
similarities = []
# Calculate cosine similarity for each image
for image_feature in image_features:
image_feature /= image_feature.norm(dim=-1, keepdim=True)
similarity = (text_features @ image_feature.T).cpu().detach().numpy()
similarities.append(similarity[0, 0])
# Determine which image has a higher similarity to the text
best_match_index = 0 if similarities[0] > similarities[1] else 1
return similarities, best_match_index
# Define a Gradio interface
iface = gr.Interface(
fn=find_similarity,
inputs=[
gr.Image(type="pil", label="Image 1"),
gr.Image(type="pil", label="Image 2"),
"text"
],
outputs=["text", "number"],
live=True,
interpretation="default",
title="CLIP Model Image-Text Cosine Similarity",
description="Upload two images and enter text to find their cosine similarity.",
)
iface.launch()
|