File size: 1,325 Bytes
b05aa6a
8a63e04
b05aa6a
 
7413961
 
b05aa6a
5888543
b05aa6a
 
9caa677
b05aa6a
 
 
 
 
5888543
 
 
 
a012f3a
 
 
5888543
 
a012f3a
5888543
7a385ac
 
7413961
5888543
 
 
7a385ac
7413961
5888543
7413961
b05aa6a
 
 
a012f3a
5888543
 
b05aa6a
 
a012f3a
5888543
b05aa6a
 
5888543
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
import clip
import torch
import gradio as gr
from PIL import Image
import os


# Load the CLIP model
model, preprocess = clip.load("ViT-B/32")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()

# Define the Business Listing variable
Business_Listing = "Air Guide"

def find_similarity(image, text_input):
    # Preprocess the uploaded image
    image = preprocess(image).unsqueeze(0).to(device)
    
    # Prepare input text
    text_tokens = clip.tokenize([text_input]).to(device)

    # Encode image and text features


    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text_tokens)

    # Normalize features and calculate similarity
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    similarity = (text_features @ image_features.T).squeeze(0).cpu().numpy()

    return similarity[0, 0]


# Define a Gradio interface
iface = gr.Interface(
    fn=find_similarity,
    inputs=[gr.Image(type="pil"), "text"],
    outputs="number",
    live=True,
    interpretation="default",
    title="CLIP Model Image-Text Cosine Similarity",
    description="Upload an image and enter text to find their cosine similarity.",
)

iface.launch()