File size: 2,367 Bytes
e75a985
e205ab6
e75a985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Builds a similarity index for a directory of images
"""

import glob
import os
import sys
import tarfile

import requests
import streamlit as st

from PIL import Image

from txtai.embeddings import Embeddings


def images(directory):
    """
    Generator that loops over each image in a directory.

    Args:
        directory: directory with images
    """

    for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
        yield (path, Image.open(path), None)


@st.cache(allow_output_mutation=True)
def build(directory):
    """
    Builds an image embeddings index.

    Args:
        directory: directory with images

    Returns:
        Embeddings index
    """

    embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
    embeddings.index(images(directory))

    # Update model to support multilingual queries
    embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
    embeddings.model = embeddings.loadVectors()

    return embeddings


def app(directory):
    """
    Streamlit application that runs searches against an image embeddings index.

    Args:
        directory: directory with images
    """

    # Build embeddings index
    embeddings = build(directory)

    st.title("Image search")

    st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
    st.markdown(
        "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
        + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
        + "the same space, enabling image similarity search. txtai can directly utilize these models."
    )

    query = st.text_input("Search query:")
    if query:
        index, _ = embeddings.search(query, 1)[0]
        st.image(Image.open(index))


if __name__ == "__main__":
    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    images = "/tmp/txtai"
    if not os.path.exists(images):
        os.makedirs(images)

        response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
        f = tarfile.open(fileobj=response.raw, mode="r|gz")
        f.extractall(path="/tmp")

    app(images)