wilwork commited on
Commit
1d65703
·
verified ·
1 Parent(s): cf16f32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -47
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  from transformers import AutoModel
3
  from PIL import Image
4
  import torch
5
- import numpy as np
6
 
7
  # Load JinaAI CLIP model
8
  model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
@@ -14,54 +13,36 @@ def compute_similarity(input1_type, input1_text, input1_image, input2_type, inpu
14
  - Image-Image
15
  - Text-Image & Image-Text
16
  """
17
-
18
- # Determine input types
19
- if input1_type == "Text":
20
- input1 = input1_text.strip()
21
- input1_is_text = bool(input1)
22
- input1_is_image = False
23
- else:
24
- input1 = input1_image
25
- input1_is_text = False
26
- input1_is_image = input1 is not None
27
-
28
- if input2_type == "Text":
29
- input2 = input2_text.strip()
30
- input2_is_text = bool(input2)
31
- input2_is_image = False
32
- else:
33
- input2 = input2_image
34
- input2_is_text = False
35
- input2_is_image = input2 is not None
36
-
37
- # Ensure valid input
38
- if not (input1_is_text or input1_is_image) or not (input2_is_text or input2_is_image):
39
- return "Error: Please provide valid inputs (text or image) for both fields!"
40
 
41
  try:
42
  with torch.no_grad():
43
- if input1_is_text and input2_is_text:
44
  # Text-Text Similarity
45
- emb1 = model.encode_text([input1])
46
- emb2 = model.encode_text([input2])
47
- elif input1_is_image and input2_is_image:
48
  # Image-Image Similarity
49
- image1 = Image.fromarray(input1)
50
- image2 = Image.fromarray(input2)
51
- emb1 = model.encode_image([image1])
52
- emb2 = model.encode_image([image2])
53
  else:
54
- # Image-Text Similarity
55
- if input1_is_image:
56
- image = Image.fromarray(input1)
57
- text = input2
58
- emb1 = model.encode_image([image])
59
- emb2 = model.encode_text([text])
60
  else:
61
- image = Image.fromarray(input2)
62
- text = input1
63
- emb1 = model.encode_text([text])
64
- emb2 = model.encode_image([image])
65
 
66
  # Compute cosine similarity
67
  similarity_score = (emb1 @ emb2.T).item()
@@ -74,7 +55,7 @@ def compute_similarity(input1_type, input1_text, input1_image, input2_type, inpu
74
  # Gradio UI
75
  with gr.Blocks() as demo:
76
  gr.Markdown("# JinaAI CLIP Multimodal Similarity")
77
- gr.Markdown("Compare similarity between two inputs: **Text-Text, Image-Image, or Image-Text**.")
78
 
79
  with gr.Row():
80
  input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
@@ -90,10 +71,10 @@ with gr.Blocks() as demo:
90
 
91
  def update_visibility(input1_type, input2_type):
92
  return (
93
- input1_type == "Text", # Input 1 text visibility
94
- input1_type == "Image", # Input 1 image visibility
95
- input2_type == "Text", # Input 2 text visibility
96
- input2_type == "Image" # Input 2 image visibility
97
  )
98
 
99
  input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])
 
2
  from transformers import AutoModel
3
  from PIL import Image
4
  import torch
 
5
 
6
  # Load JinaAI CLIP model
7
  model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
 
13
  - Image-Image
14
  - Text-Image & Image-Text
15
  """
16
+
17
+ # Validate inputs
18
+ if input1_type == "Text" and not input1_text.strip():
19
+ return "Error: Input 1 is empty!"
20
+ if input1_type == "Image" and input1_image is None:
21
+ return "Error: Please upload an image for Input 1!"
22
+
23
+ if input2_type == "Text" and not input2_text.strip():
24
+ return "Error: Input 2 is empty!"
25
+ if input2_type == "Image" and input2_image is None:
26
+ return "Error: Please upload an image for Input 2!"
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  try:
29
  with torch.no_grad():
30
+ if input1_type == "Text" and input2_type == "Text":
31
  # Text-Text Similarity
32
+ emb1 = model.encode_text([input1_text])
33
+ emb2 = model.encode_text([input2_text])
34
+ elif input1_type == "Image" and input2_type == "Image":
35
  # Image-Image Similarity
36
+ emb1 = model.encode_image([Image.fromarray(input1_image)])
37
+ emb2 = model.encode_image([Image.fromarray(input2_image)])
 
 
38
  else:
39
+ # Image-Text Similarity (either order)
40
+ if input1_type == "Image":
41
+ emb1 = model.encode_image([Image.fromarray(input1_image)])
42
+ emb2 = model.encode_text([input2_text])
 
 
43
  else:
44
+ emb1 = model.encode_text([input1_text])
45
+ emb2 = model.encode_image([Image.fromarray(input2_image)])
 
 
46
 
47
  # Compute cosine similarity
48
  similarity_score = (emb1 @ emb2.T).item()
 
55
  # Gradio UI
56
  with gr.Blocks() as demo:
57
  gr.Markdown("# JinaAI CLIP Multimodal Similarity")
58
+ gr.Markdown("Compare similarity between **Text-Text, Image-Image, or Image-Text**.")
59
 
60
  with gr.Row():
61
  input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
 
71
 
72
  def update_visibility(input1_type, input2_type):
73
  return (
74
+ input1_type == "Text", # Input 1 text visible
75
+ input1_type == "Image", # Input 1 image visible
76
+ input2_type == "Text", # Input 2 text visible
77
+ input2_type == "Image" # Input 2 image visible
78
  )
79
 
80
  input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])