|
import gradio as gr |
|
from sentence_transformers import SentenceTransformer |
|
import requests |
|
|
|
def greet(name): |
|
|
|
model = SentenceTransformer("intfloat/mmE5-mllama-11b-instruct", trust_remote_code=True) |
|
|
|
|
|
dog_cat_image_bytes = requests.get('https://github.com/haon-chen/mmE5/blob/main/figures/example.jpg?raw=true', stream=True).raw.read() |
|
with open("cat_dog_example.jpg", "wb") as f: |
|
f.write(dog_cat_image_bytes) |
|
|
|
|
|
image_embeddings = model.encode([{ |
|
"image": "cat_dog_example.jpg", |
|
"text": "Represent the given image with the following question: What is in the image", |
|
}]) |
|
text_embeddings = model.encode([ |
|
{"text": "A cat and a dog"}, |
|
{"text": "A cat and a tiger"}, |
|
]) |
|
|
|
similarity = model.similarity(image_embeddings, text_embeddings) |
|
print(similarity) |
|
|
|
|
|
|
|
|
|
image_embeddings = model.encode([ |
|
{"image": dog_cat_image_bytes, "text": "Represent the given image."}, |
|
]) |
|
text_embeddings = model.encode([ |
|
{"text": "Find me an everyday image that matches the given caption: A cat and a dog."}, |
|
{"text": "Find me an everyday image that matches the given caption: A cat and a tiger."}, |
|
]) |
|
|
|
similarity = model.similarity(image_embeddings, text_embeddings) |
|
print(similarity) |
|
return "Hello " + name + "!!" |
|
|
|
demo = gr.Interface(fn=greet, inputs="text", outputs="text") |
|
demo.launch() |
|
|