Spaces:
Running
Running
File size: 2,367 Bytes
e75a985 e205ab6 e75a985 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
"""
Builds a similarity index for a directory of images
"""
import glob
import os
import sys
import tarfile
import requests
import streamlit as st
from PIL import Image
from txtai.embeddings import Embeddings
def images(directory):
"""
Generator that loops over each image in a directory.
Args:
directory: directory with images
"""
for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
yield (path, Image.open(path), None)
@st.cache(allow_output_mutation=True)
def build(directory):
"""
Builds an image embeddings index.
Args:
directory: directory with images
Returns:
Embeddings index
"""
embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
embeddings.index(images(directory))
# Update model to support multilingual queries
embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
embeddings.model = embeddings.loadVectors()
return embeddings
def app(directory):
"""
Streamlit application that runs searches against an image embeddings index.
Args:
directory: directory with images
"""
# Build embeddings index
embeddings = build(directory)
st.title("Image search")
st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
st.markdown(
"[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
+ "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
+ "the same space, enabling image similarity search. txtai can directly utilize these models."
)
query = st.text_input("Search query:")
if query:
index, _ = embeddings.search(query, 1)[0]
st.image(Image.open(index))
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
images = "/tmp/txtai"
if not os.path.exists(images):
os.makedirs(images)
response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
f = tarfile.open(fileobj=response.raw, mode="r|gz")
f.extractall(path="/tmp")
app(images)
|