wilwork commited on
Commit
cf604df
·
verified ·
1 Parent(s): 3dcfaf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -61
app.py CHANGED
@@ -1,80 +1,90 @@
1
  import gradio as gr
2
  from transformers import AutoModel
3
  from PIL import Image
4
- import numpy as np
5
  import torch
 
6
 
7
  # Load JinaAI CLIP model
8
- model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True)
9
 
10
- # Function to process input
11
- def process_input(input_data, input_type):
12
- if input_type == "Text":
13
- return model.encode_text([input_data]) if input_data.strip() else None
14
- elif input_type == "Image":
15
- if isinstance(input_data, np.ndarray): # Gradio provides NumPy array for images
16
- image = Image.fromarray(input_data) # Convert NumPy to PIL Image
17
- return model.encode_image(image) # Directly pass image (no list)
18
- return None # If input is not valid
19
- return None
 
 
 
20
 
21
- # Function to compute similarity
22
- def compute_similarity(input1, input2, input1_type, input2_type):
23
- # Validate inputs
24
- if input1_type == "Text" and not input1.strip():
25
- return "Error: Input 1 is empty!"
26
- if input2_type == "Text" and not input2.strip():
27
- return "Error: Input 2 is empty!"
28
- if input1_type == "Image" and input1 is None:
29
- return "Error: Image 1 is missing!"
30
- if input2_type == "Image" and input2 is None:
31
- return "Error: Image 2 is missing!"
32
 
33
- # Process inputs
34
- embedding1 = process_input(input1, input1_type)
35
- embedding2 = process_input(input2, input2_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- if embedding1 is None or embedding2 is None:
38
- return "Error: Failed to process input!"
39
 
40
- # Compute cosine similarity
41
- similarity_score = (embedding1 @ embedding2.T).item()
42
- return f"Similarity Score: {similarity_score:.4f}"
43
 
44
- # Function to update UI dynamically
45
- def update_visibility(input1_type, input2_type):
46
- return (
47
- gr.update(visible=(input1_type == "Text"), value="" if input1_type == "Image" else None),
48
- gr.update(visible=(input1_type == "Image"), value=None),
49
- gr.update(visible=(input2_type == "Text"), value="" if input2_type == "Image" else None),
50
- gr.update(visible=(input2_type == "Image"), value=None)
51
- )
52
 
53
  # Gradio UI
54
- with gr.Blocks() as demo:
55
- gr.Markdown("## JinaAI CLIP Multimodal Similarity")
56
-
57
- with gr.Row():
58
- input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
59
- input2_type = gr.Radio(["Text", "Image"], label="Input 2 Type", value="Image")
60
 
61
- with gr.Row():
62
- input1_text = gr.Textbox(label="Text Input 1", visible=True)
63
- input1_image = gr.Image(type="numpy", interactive=True, label="Image Input 1", visible=False)
 
 
 
 
 
64
 
65
- with gr.Row():
66
- input2_text = gr.Textbox(label="Text Input 2", visible=False)
67
- input2_image = gr.Image(type="numpy", interactive=True, label="Image Input 2", visible=True)
68
-
69
- output = gr.Textbox(label="Similarity Score / Error", interactive=False)
70
-
71
- # Toggle visibility of inputs dynamically
72
- input1_type.change(update_visibility, inputs=[input1_type, input2_type],
73
- outputs=[input1_text, input1_image, input2_text, input2_image])
74
- input2_type.change(update_visibility, inputs=[input1_type, input2_type],
75
- outputs=[input1_text, input1_image, input2_text, input2_image])
76
 
77
- btn = gr.Button("Compute Similarity")
78
- btn.click(compute_similarity, inputs=[input1_text, input2_text, input1_type, input2_type], outputs=output)
79
 
80
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModel
3
  from PIL import Image
 
4
  import torch
5
+ import numpy as np
6
 
7
  # Load JinaAI CLIP model
8
+ model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
9
 
10
+ def compute_similarity(input1, input2):
11
+ """
12
+ Computes similarity between:
13
+ - Image and Text
14
+ - Image and Image
15
+ - Text and Text
16
+ """
17
+
18
+ # Detect input types
19
+ input1_is_text = isinstance(input1, str) and input1.strip() != ""
20
+ input2_is_text = isinstance(input2, str) and input2.strip() != ""
21
+ input1_is_image = isinstance(input1, np.ndarray)
22
+ input2_is_image = isinstance(input2, np.ndarray)
23
 
24
+ # Ensure valid input
25
+ if not (input1_is_text or input1_is_image) or not (input2_is_text or input2_is_image):
26
+ return "Error: Both inputs must be valid (image or text)!"
 
 
 
 
 
 
 
 
27
 
28
+ try:
29
+ with torch.no_grad():
30
+ if input1_is_text and input2_is_text:
31
+ # Text-Text Similarity
32
+ emb1 = model.encode_text([input1])
33
+ emb2 = model.encode_text([input2])
34
+ elif input1_is_image and input2_is_image:
35
+ # Image-Image Similarity
36
+ image1 = Image.fromarray(input1)
37
+ image2 = Image.fromarray(input2)
38
+ emb1 = model.encode_image([image1])
39
+ emb2 = model.encode_image([image2])
40
+ else:
41
+ # Image-Text Similarity
42
+ if input1_is_image:
43
+ image = Image.fromarray(input1)
44
+ text = input2
45
+ emb1 = model.encode_image([image])
46
+ emb2 = model.encode_text([text])
47
+ else:
48
+ image = Image.fromarray(input2)
49
+ text = input1
50
+ emb1 = model.encode_text([text])
51
+ emb2 = model.encode_image([image])
52
 
53
+ # Compute cosine similarity
54
+ similarity_score = (emb1 @ emb2.T).item()
55
 
56
+ return similarity_score
 
 
57
 
58
+ except Exception as e:
59
+ return f"Error: {str(e)}"
 
 
 
 
 
 
60
 
61
  # Gradio UI
62
+ demo = gr.Interface(
63
+ fn=compute_similarity,
64
+ inputs=[
65
+ gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text"),
66
+ gr.Textbox(label="Text Input 1", visible=True),
67
+ gr.Image(type="numpy", label="Image Input 1", visible=False),
68
 
69
+ gr.Radio(["Text", "Image"], label="Input 2 Type", value="Text"),
70
+ gr.Textbox(label="Text Input 2", visible=True),
71
+ gr.Image(type="numpy", label="Image Input 2", visible=False),
72
+ ],
73
+ outputs=gr.Textbox(label="Similarity Score / Error", interactive=False),
74
+ title="JinaAI CLIP Multimodal Similarity",
75
+ description="Compare similarity between two inputs (Text, Image, or both)."
76
+ )
77
 
78
+ # Update visibility dynamically
79
+ def update_visibility(input1_type, input2_type):
80
+ return (
81
+ input1_type == "Text", # Text input 1 visibility
82
+ input1_type == "Image", # Image input 1 visibility
83
+ input2_type == "Text", # Text input 2 visibility
84
+ input2_type == "Image" # Image input 2 visibility
85
+ )
 
 
 
86
 
87
+ # Add event handlers for input type change
88
+ demo.load(update_visibility, inputs=["Input 1 Type", "Input 2 Type"], outputs=["Text Input 1", "Image Input 1", "Text Input 2", "Image Input 2"])
89
 
90
  demo.launch()