Spaces:

gschurck
/

mmE5-test

Sleeping

mmE5-test / app.py

Update app.py

a50a7f8 verified 3 months ago

1.61 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	import requests

	def greet(name):
	# Load the model
	model = SentenceTransformer("intfloat/mmE5-mllama-11b-instruct", trust_remote_code=True)

	# Download an example image of a cat and a dog
	dog_cat_image_bytes = requests.get('https://github.com/haon-chen/mmE5/blob/main/figures/example.jpg?raw=true', stream=True).raw.read()
	with open("cat_dog_example.jpg", "wb") as f:
	f.write(dog_cat_image_bytes)

	# Image + Text -> Text
	image_embeddings = model.encode([{
	"image": "cat_dog_example.jpg",
	"text": "Represent the given image with the following question: What is in the image",
	}])
	text_embeddings = model.encode([
	{"text": "A cat and a dog"},
	{"text": "A cat and a tiger"},
	])

	similarity = model.similarity(image_embeddings, text_embeddings)
	print(similarity)
	# tensor([[0.3967, 0.3090]])
	# ✅ The first text is most similar to the image

	# Text -> Image
	image_embeddings = model.encode([
	{"image": dog_cat_image_bytes, "text": "Represent the given image."},
	])
	text_embeddings = model.encode([
	{"text": "Find me an everyday image that matches the given caption: A cat and a dog."},
	{"text": "Find me an everyday image that matches the given caption: A cat and a tiger."},
	])

	similarity = model.similarity(image_embeddings, text_embeddings)
	print(similarity)
	return "Hello " + name + "!!"

	demo = gr.Interface(fn=greet, inputs="text", outputs="text")
	demo.launch()