wilwork commited on
Commit
ab10c56
·
verified ·
1 Parent(s): 2a8e08c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -27
app.py CHANGED
@@ -7,46 +7,39 @@ import torch
7
  model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
8
 
9
  def compute_similarity(input1_type, input1_text, input1_image, input2_type, input2_text, input2_image):
10
- """
11
- Computes similarity between:
12
- - Text-Text
13
- - Image-Image
14
- - Text-Image & Image-Text
15
- """
16
-
17
- # Validate inputs
18
  if input1_type == "Text" and not input1_text.strip():
19
- return "Error: Input 1 is empty!"
20
  if input1_type == "Image" and (input1_image is None or isinstance(input1_image, bool)):
21
- return "Error: Please upload an image for Input 1!"
22
 
23
  if input2_type == "Text" and not input2_text.strip():
24
- return "Error: Input 2 is empty!"
25
  if input2_type == "Image" and (input2_image is None or isinstance(input2_image, bool)):
26
- return "Error: Please upload an image for Input 2!"
27
 
28
  try:
29
  with torch.no_grad():
30
  if input1_type == "Text" and input2_type == "Text":
31
- # Text-Text Similarity
32
  emb1 = model.encode_text([input1_text])
33
  emb2 = model.encode_text([input2_text])
34
  elif input1_type == "Image" and input2_type == "Image":
35
- # Image-Image Similarity
36
- emb1 = model.encode_image([Image.fromarray(input1_image)])
37
- emb2 = model.encode_image([Image.fromarray(input2_image)])
38
  else:
39
- # Image-Text Similarity (either order)
40
  if input1_type == "Image":
41
- emb1 = model.encode_image([Image.fromarray(input1_image)])
42
  emb2 = model.encode_text([input2_text])
43
  else:
44
  emb1 = model.encode_text([input1_text])
45
- emb2 = model.encode_image([Image.fromarray(input2_image)])
46
 
47
- # Compute cosine similarity
48
- similarity_score = (emb1 @ emb2.T).item()
49
 
 
50
  return similarity_score
51
 
52
  except Exception as e:
@@ -55,7 +48,7 @@ def compute_similarity(input1_type, input1_text, input1_image, input2_type, inpu
55
  # Gradio UI
56
  with gr.Blocks() as demo:
57
  gr.Markdown("# JinaAI CLIP Multimodal Similarity")
58
- gr.Markdown("Compare similarity between **Text-Text, Image-Image, or Image-Text**.")
59
 
60
  with gr.Row():
61
  input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
@@ -71,10 +64,10 @@ with gr.Blocks() as demo:
71
 
72
  def update_visibility(input1_type, input2_type):
73
  return (
74
- input1_type == "Text", # Input 1 text visible
75
- input1_type == "Image", # Input 1 image visible
76
- input2_type == "Text", # Input 2 text visible
77
- input2_type == "Image" # Input 2 image visible
78
  )
79
 
80
  input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])
@@ -83,4 +76,4 @@ with gr.Blocks() as demo:
83
  compute_button = gr.Button("Compute Similarity")
84
  compute_button.click(compute_similarity, inputs=[input1_type, input1_text, input1_image, input2_type, input2_text, input2_image], outputs=output)
85
 
86
- demo.launch()
 
7
  model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
8
 
9
  def compute_similarity(input1_type, input1_text, input1_image, input2_type, input2_text, input2_image):
10
+ """Computes similarity for Text-Text, Image-Image, or Text-Image comparisons."""
11
+
12
+ # Handle empty inputs properly
 
 
 
 
 
13
  if input1_type == "Text" and not input1_text.strip():
14
+ return "Error: Input 1 (Text) is empty!"
15
  if input1_type == "Image" and (input1_image is None or isinstance(input1_image, bool)):
16
+ return "Error: Please upload a valid image for Input 1!"
17
 
18
  if input2_type == "Text" and not input2_text.strip():
19
+ return "Error: Input 2 (Text) is empty!"
20
  if input2_type == "Image" and (input2_image is None or isinstance(input2_image, bool)):
21
+ return "Error: Please upload a valid image for Input 2!"
22
 
23
  try:
24
  with torch.no_grad():
25
  if input1_type == "Text" and input2_type == "Text":
 
26
  emb1 = model.encode_text([input1_text])
27
  emb2 = model.encode_text([input2_text])
28
  elif input1_type == "Image" and input2_type == "Image":
29
+ emb1 = model.encode_image([Image.fromarray(input1_image)]) if input1_image is not None else None
30
+ emb2 = model.encode_image([Image.fromarray(input2_image)]) if input2_image is not None else None
 
31
  else:
 
32
  if input1_type == "Image":
33
+ emb1 = model.encode_image([Image.fromarray(input1_image)]) if input1_image is not None else None
34
  emb2 = model.encode_text([input2_text])
35
  else:
36
  emb1 = model.encode_text([input1_text])
37
+ emb2 = model.encode_image([Image.fromarray(input2_image)]) if input2_image is not None else None
38
 
39
+ if emb1 is None or emb2 is None:
40
+ return "Error: Failed to process one or both inputs."
41
 
42
+ similarity_score = (emb1 @ emb2.T).item()
43
  return similarity_score
44
 
45
  except Exception as e:
 
48
  # Gradio UI
49
  with gr.Blocks() as demo:
50
  gr.Markdown("# JinaAI CLIP Multimodal Similarity")
51
+ gr.Markdown("Compare **Text-Text, Image-Image, or Text-Image** similarity.")
52
 
53
  with gr.Row():
54
  input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
 
64
 
65
  def update_visibility(input1_type, input2_type):
66
  return (
67
+ input1_type == "Text", # Show text input 1
68
+ input1_type == "Image", # Show image input 1
69
+ input2_type == "Text", # Show text input 2
70
+ input2_type == "Image" # Show image input 2
71
  )
72
 
73
  input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])
 
76
  compute_button = gr.Button("Compute Similarity")
77
  compute_button.click(compute_similarity, inputs=[input1_type, input1_text, input1_image, input2_type, input2_text, input2_image], outputs=output)
78
 
79
+ demo.launch()