from ops_mm_embedding_v1 import OpsMMEmbeddingV1, fetch_image model = OpsMMEmbeddingV1( "OpenSearch-AI/Ops-MM-embedding-v1-2B", device="cuda", attn_implementation="flash_attention_2" ) t2i_prompt = "Find an image that matches the given text." texts = [ "The Tesla Cybertruck is a battery electric pickup truck built by Tesla, Inc. since 2023.", "Alibaba office.", "Alibaba office.", ] images = [ "https://upload.wikimedia.org/wikipedia/commons/e/e9/Tesla_Cybertruck_damaged_window.jpg", "https://upload.wikimedia.org/wikipedia/commons/e/e0/TaobaoCity_Alibaba_Xixi_Park.jpg", "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Alibaba_Binjiang_Park.jpg/1024px-Alibaba_Binjiang_Park.jpg" ] images = [fetch_image(image) for image in images] # Text and image embedding text_embeddings = model.get_text_embeddings(texts) image_embeddings = model.get_image_embeddings(images) print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) # Fused Embedding text_with_image_embeddings = model.get_fused_embeddings(texts=texts, images=images, instruction=t2i_prompt) print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) # Multi-image embeddings multi_images = [ [images[0]], [images[1], images[2]], ] multi_image_embeddings = model.get_image_embeddings(multi_images) print('Multi-image embeddings', (multi_image_embeddings @ multi_image_embeddings.T).tolist())