KC / app.py
wilwork's picture
Update app.py
2a8e08c verified
raw
history blame
3.64 kB
import gradio as gr
from transformers import AutoModel
from PIL import Image
import torch
# Load JinaAI CLIP model
model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
def compute_similarity(input1_type, input1_text, input1_image, input2_type, input2_text, input2_image):
"""
Computes similarity between:
- Text-Text
- Image-Image
- Text-Image & Image-Text
"""
# Validate inputs
if input1_type == "Text" and not input1_text.strip():
return "Error: Input 1 is empty!"
if input1_type == "Image" and (input1_image is None or isinstance(input1_image, bool)):
return "Error: Please upload an image for Input 1!"
if input2_type == "Text" and not input2_text.strip():
return "Error: Input 2 is empty!"
if input2_type == "Image" and (input2_image is None or isinstance(input2_image, bool)):
return "Error: Please upload an image for Input 2!"
try:
with torch.no_grad():
if input1_type == "Text" and input2_type == "Text":
# Text-Text Similarity
emb1 = model.encode_text([input1_text])
emb2 = model.encode_text([input2_text])
elif input1_type == "Image" and input2_type == "Image":
# Image-Image Similarity
emb1 = model.encode_image([Image.fromarray(input1_image)])
emb2 = model.encode_image([Image.fromarray(input2_image)])
else:
# Image-Text Similarity (either order)
if input1_type == "Image":
emb1 = model.encode_image([Image.fromarray(input1_image)])
emb2 = model.encode_text([input2_text])
else:
emb1 = model.encode_text([input1_text])
emb2 = model.encode_image([Image.fromarray(input2_image)])
# Compute cosine similarity
similarity_score = (emb1 @ emb2.T).item()
return similarity_score
except Exception as e:
return f"Error: {str(e)}"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# JinaAI CLIP Multimodal Similarity")
gr.Markdown("Compare similarity between **Text-Text, Image-Image, or Image-Text**.")
with gr.Row():
input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
input2_type = gr.Radio(["Text", "Image"], label="Input 2 Type", value="Text")
input1_text = gr.Textbox(label="Input 1 (Text)", visible=True)
input1_image = gr.Image(type="numpy", label="Input 1 (Image)", visible=False)
input2_text = gr.Textbox(label="Input 2 (Text)", visible=True)
input2_image = gr.Image(type="numpy", label="Input 2 (Image)", visible=False)
output = gr.Textbox(label="Similarity Score / Error", interactive=False)
def update_visibility(input1_type, input2_type):
return (
input1_type == "Text", # Input 1 text visible
input1_type == "Image", # Input 1 image visible
input2_type == "Text", # Input 2 text visible
input2_type == "Image" # Input 2 image visible
)
input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])
input2_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])
compute_button = gr.Button("Compute Similarity")
compute_button.click(compute_similarity, inputs=[input1_type, input1_text, input1_image, input2_type, input2_text, input2_image], outputs=output)
demo.launch()