Spaces:
Runtime error
Runtime error
import clip | |
import numpy as np | |
import torch | |
import gradio as gr | |
from PIL import Image | |
import os | |
# Load the CLIP model | |
model, preprocess = clip.load("ViT-B/32") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device).eval() | |
print(device) | |
# Define the Business Listing variable | |
Business_Listing = "Air Guide" | |
def find_similarity(image1, image2, text_input): | |
image_features = [] | |
# Preprocess and encode the two images | |
for image in [image1, image2]: | |
image = preprocess(image).unsqueeze(0).to(device) | |
with torch.no_grad(): | |
image_feature = model.encode_image(image).float() | |
image_features.append(image_feature) | |
# Prepare input text | |
text_tokens = clip.tokenize([text_input]).to(device) | |
text_features = model.encode_text(text_tokens).float() | |
# Normalize text features | |
text_features /= text_features.norm(dim=-1, keepdim=True) | |
similarities = [] | |
# Calculate cosine similarity for each image | |
for image_feature in image_features: | |
image_feature /= image_feature.norm(dim=-1, keepdim=True) | |
similarity = (text_features @ image_feature.T).cpu().numpy() | |
similarities.append(similarity[0, 0]) | |
# Determine which image has a higher similarity to the text | |
best_match_index = 0 if similarities[0] > similarities[1] else 1 | |
return similarities, best_match_index | |
# Define a Gradio interface | |
iface = gr.Interface( | |
fn=find_similarity, | |
inputs=[ | |
gr.Image(type="pil", label="Image 1"), | |
gr.Image(type="pil", label="Image 2"), | |
"text" | |
], | |
outputs=["text", "number"], | |
live=True, | |
interpretation="default", | |
title="CLIP Model Image-Text Cosine Similarity", | |
description="Upload two images and enter text to find their cosine similarity.", | |
) | |
iface.launch() | |