import numpy as np import clip import torch import gradio as gr from PIL import Image import os # Load the CLIP model model, preprocess = clip.load("ViT-B/32") device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device).eval() # Define the Business Listing variable Business_Listing = "Air Guide" def find_similarity(image, text_input): # Preprocess the uploaded image image = preprocess(image).unsqueeze(0).to(device) # Prepare input text text_tokens = clip.tokenize([text_input]).to(device) # Encode image and text features with torch.no_grad(): image_features = model.encode_image(image) text_features = model.encode_text(text_tokens) # Normalize features and calculate similarity image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) similarity = (text_features @ image_features.T).squeeze(0).cpu().numpy() return similarity[0, 0] # Define a Gradio interface iface = gr.Interface( fn=find_similarity, inputs=[gr.Image(type="pil"), "text"], outputs="number", live=True, interpretation="default", title="CLIP Model Image-Text Cosine Similarity", description="Upload an image and enter text to find their cosine similarity.", ) iface.launch()