File size: 8,115 Bytes
cfd3735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
"""Test OpenSearch functionality."""

import pytest

from langchain.docstore.document import Document
from langchain.vectorstores.opensearch_vector_search import (
    PAINLESS_SCRIPTING_SEARCH,
    SCRIPT_SCORING_SEARCH,
    OpenSearchVectorSearch,
)
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings

DEFAULT_OPENSEARCH_URL = "http://localhost:9200"
texts = ["foo", "bar", "baz"]


def test_opensearch() -> None:
    """Test end to end indexing and search using Approximate Search."""
    docsearch = OpenSearchVectorSearch.from_texts(
        texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
    )
    output = docsearch.similarity_search("foo", k=1)
    assert output == [Document(page_content="foo")]


def test_similarity_search_with_score() -> None:
    """Test similarity search with score using Approximate Search."""
    metadatas = [{"page": i} for i in range(len(texts))]
    docsearch = OpenSearchVectorSearch.from_texts(
        texts,
        FakeEmbeddings(),
        metadatas=metadatas,
        opensearch_url=DEFAULT_OPENSEARCH_URL,
    )
    output = docsearch.similarity_search_with_score("foo", k=2)
    assert output == [
        (Document(page_content="foo", metadata={"page": 0}), 1.0),
        (Document(page_content="bar", metadata={"page": 1}), 0.5),
    ]


def test_opensearch_with_custom_field_name() -> None:
    """Test indexing and search using custom vector field and text field name."""
    docsearch = OpenSearchVectorSearch.from_texts(
        texts,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
        vector_field="my_vector",
        text_field="custom_text",
    )
    output = docsearch.similarity_search(
        "foo", k=1, vector_field="my_vector", text_field="custom_text"
    )
    assert output == [Document(page_content="foo")]

    text_input = ["test", "add", "text", "method"]
    OpenSearchVectorSearch.add_texts(
        docsearch, text_input, vector_field="my_vector", text_field="custom_text"
    )
    output = docsearch.similarity_search(
        "add", k=1, vector_field="my_vector", text_field="custom_text"
    )
    assert output == [Document(page_content="foo")]


def test_opensearch_with_metadatas() -> None:
    """Test end to end indexing and search with metadata."""
    metadatas = [{"page": i} for i in range(len(texts))]
    docsearch = OpenSearchVectorSearch.from_texts(
        texts,
        FakeEmbeddings(),
        metadatas=metadatas,
        opensearch_url=DEFAULT_OPENSEARCH_URL,
    )
    output = docsearch.similarity_search("foo", k=1)
    assert output == [Document(page_content="foo", metadata={"page": 0})]


def test_add_text() -> None:
    """Test adding additional text elements to existing index."""
    text_input = ["test", "add", "text", "method"]
    metadatas = [{"page": i} for i in range(len(text_input))]
    docsearch = OpenSearchVectorSearch.from_texts(
        texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
    )
    docids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas)
    assert len(docids) == len(text_input)


def test_opensearch_script_scoring() -> None:
    """Test end to end indexing and search using Script Scoring Search."""
    pre_filter_val = {"bool": {"filter": {"term": {"text": "bar"}}}}
    docsearch = OpenSearchVectorSearch.from_texts(
        texts,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
        is_appx_search=False,
    )
    output = docsearch.similarity_search(
        "foo", k=1, search_type=SCRIPT_SCORING_SEARCH, pre_filter=pre_filter_val
    )
    assert output == [Document(page_content="bar")]


def test_add_text_script_scoring() -> None:
    """Test adding additional text elements and validating using Script Scoring."""
    text_input = ["test", "add", "text", "method"]
    metadatas = [{"page": i} for i in range(len(text_input))]
    docsearch = OpenSearchVectorSearch.from_texts(
        text_input,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
        is_appx_search=False,
    )
    OpenSearchVectorSearch.add_texts(docsearch, texts, metadatas)
    output = docsearch.similarity_search(
        "add", k=1, search_type=SCRIPT_SCORING_SEARCH, space_type="innerproduct"
    )
    assert output == [Document(page_content="test")]


def test_opensearch_painless_scripting() -> None:
    """Test end to end indexing and search using Painless Scripting Search."""
    pre_filter_val = {"bool": {"filter": {"term": {"text": "baz"}}}}
    docsearch = OpenSearchVectorSearch.from_texts(
        texts,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
        is_appx_search=False,
    )
    output = docsearch.similarity_search(
        "foo", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, pre_filter=pre_filter_val
    )
    assert output == [Document(page_content="baz")]


def test_add_text_painless_scripting() -> None:
    """Test adding additional text elements and validating using Painless Scripting."""
    text_input = ["test", "add", "text", "method"]
    metadatas = [{"page": i} for i in range(len(text_input))]
    docsearch = OpenSearchVectorSearch.from_texts(
        text_input,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
        is_appx_search=False,
    )
    OpenSearchVectorSearch.add_texts(docsearch, texts, metadatas)
    output = docsearch.similarity_search(
        "add", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, space_type="cosineSimilarity"
    )
    assert output == [Document(page_content="test")]


def test_opensearch_invalid_search_type() -> None:
    """Test to validate similarity_search by providing invalid search_type."""
    docsearch = OpenSearchVectorSearch.from_texts(
        texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
    )
    with pytest.raises(ValueError):
        docsearch.similarity_search("foo", k=1, search_type="invalid_search_type")


def test_opensearch_embedding_size_zero() -> None:
    """Test to validate indexing when embedding size is zero."""
    with pytest.raises(RuntimeError):
        OpenSearchVectorSearch.from_texts(
            [], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
        )


def test_appx_search_with_boolean_filter() -> None:
    """Test Approximate Search with Boolean Filter."""
    boolean_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
    docsearch = OpenSearchVectorSearch.from_texts(
        texts,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
    )
    output = docsearch.similarity_search(
        "foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should"
    )
    assert output == [Document(page_content="bar")]


def test_appx_search_with_lucene_filter() -> None:
    """Test Approximate Search with Lucene Filter."""
    lucene_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
    docsearch = OpenSearchVectorSearch.from_texts(
        texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL, engine="lucene"
    )
    output = docsearch.similarity_search("foo", k=3, lucene_filter=lucene_filter_val)
    assert output == [Document(page_content="bar")]


def test_opensearch_with_custom_field_name_appx_true() -> None:
    """Test Approximate Search with custom field name appx true."""
    text_input = ["add", "test", "text", "method"]
    docsearch = OpenSearchVectorSearch.from_texts(
        text_input,
        FakeEmbeddings(),
        opensearch_url=DEFAULT_OPENSEARCH_URL,
        is_appx_search=True,
    )
    output = docsearch.similarity_search("add", k=1)
    assert output == [Document(page_content="add")]


def test_opensearch_with_custom_field_name_appx_false() -> None:
    """Test Approximate Search with custom field name appx true."""
    text_input = ["add", "test", "text", "method"]
    docsearch = OpenSearchVectorSearch.from_texts(
        text_input, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
    )
    output = docsearch.similarity_search("add", k=1)
    assert output == [Document(page_content="add")]