ktllc commited on
Commit
5888543
·
1 Parent(s): e57bbcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -35
app.py CHANGED
@@ -1,65 +1,49 @@
1
- import clip
2
  import numpy as np
3
  import torch
4
  import gradio as gr
5
  from PIL import Image
6
  import os
7
 
 
8
  # Load the CLIP model
9
  model, preprocess = clip.load("ViT-B/32")
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model.to(device).eval()
12
- print(device)
13
 
14
  # Define the Business Listing variable
15
  Business_Listing = "Air Guide"
16
 
17
- def find_similarity(image1, image2, text_input):
18
- if image1 is None or image2 is None:
19
- return None
20
-
21
- image_features = []
22
-
23
- # Preprocess and encode the two images
24
- for image in [image1, image2]:
25
- image = preprocess(image).unsqueeze(0).to(device)
26
- with torch.no_grad():
27
- image_feature = model.encode_image(image).float()
28
- image_features.append(image_feature)
29
-
30
  # Prepare input text
31
  text_tokens = clip.tokenize([text_input]).to(device)
32
- text_features = model.encode_text(text_tokens).float()
33
 
34
- # Normalize text features
35
- text_features /= text_features.norm(dim=-1, keepdim=True)
36
 
37
- similarities = []
 
 
38
 
39
- # Calculate cosine similarity for each image
40
- for image_feature in image_features:
41
- image_feature /= image_feature.norm(dim=-1, keepdim=True)
42
- similarity = (text_features @ image_feature.T).cpu().detach().numpy()
43
- similarities.append(similarity[0, 0])
44
 
45
- # Determine which image has a higher similarity to the text
46
- best_match_index = 0 if similarities[0] > similarities[1] else 1
47
 
48
- return similarities, best_match_index
49
 
50
  # Define a Gradio interface
51
  iface = gr.Interface(
52
  fn=find_similarity,
53
- inputs=[
54
- gr.Image(type="pil", label="Image 1"),
55
- gr.Image(type="pil", label="Image 2"),
56
- "text"
57
- ],
58
- outputs=["text", "number"],
59
  live=True,
60
  interpretation="default",
61
  title="CLIP Model Image-Text Cosine Similarity",
62
- description="Upload two images and enter text to find their cosine similarity.",
63
  )
64
 
65
- iface.launch()
 
 
1
  import numpy as np
2
  import torch
3
  import gradio as gr
4
  from PIL import Image
5
  import os
6
 
7
+
8
  # Load the CLIP model
9
  model, preprocess = clip.load("ViT-B/32")
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model.to(device).eval()
 
12
 
13
  # Define the Business Listing variable
14
  Business_Listing = "Air Guide"
15
 
16
+ def find_similarity(image, text_input):
17
+ # Preprocess the uploaded image
18
+ image = preprocess(image).unsqueeze(0).to(device)
19
+
 
 
 
 
 
 
 
 
 
20
  # Prepare input text
21
  text_tokens = clip.tokenize([text_input]).to(device)
 
22
 
23
+ # Encode image and text features
24
+
25
 
26
+ with torch.no_grad():
27
+ image_features = model.encode_image(image).float()
28
+ text_features = model.encode_text(text_tokens).float()
29
 
30
+ # Normalize features and calculate similarity
31
+ image_features /= image_features.norm(dim=-1, keepdim=True)
32
+ text_features /= text_features.norm(dim=-1, keepdim=True)
33
+ similarity = (text_features @ image_features.T).cpu().numpy()
 
34
 
35
+ return similarity[0, 0]
 
36
 
 
37
 
38
  # Define a Gradio interface
39
  iface = gr.Interface(
40
  fn=find_similarity,
41
+ inputs=[gr.Image(type="pil"), "text"],
42
+ outputs="number",
 
 
 
 
43
  live=True,
44
  interpretation="default",
45
  title="CLIP Model Image-Text Cosine Similarity",
46
+ description="Upload an image and enter text to find their cosine similarity.",
47
  )
48
 
49
+ iface.launch()