Spaces:
Running
Running
"""Test Deep Lake functionality.""" | |
import deeplake | |
import pytest | |
from pytest import FixtureRequest | |
from langchain.docstore.document import Document | |
from langchain.vectorstores import DeepLake | |
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings | |
def deeplake_datastore() -> DeepLake: | |
texts = ["foo", "bar", "baz"] | |
metadatas = [{"page": str(i)} for i in range(len(texts))] | |
docsearch = DeepLake.from_texts( | |
dataset_path="mem://test_path", | |
texts=texts, | |
metadatas=metadatas, | |
embedding=FakeEmbeddings(), | |
) | |
return docsearch | |
def distance_metric(request: FixtureRequest) -> str: | |
return request.param | |
def test_deeplake() -> None: | |
"""Test end to end construction and search.""" | |
texts = ["foo", "bar", "baz"] | |
docsearch = DeepLake.from_texts( | |
dataset_path="mem://test_path", texts=texts, embedding=FakeEmbeddings() | |
) | |
output = docsearch.similarity_search("foo", k=1) | |
assert output == [Document(page_content="foo")] | |
def test_deeplake_with_metadatas() -> None: | |
"""Test end to end construction and search.""" | |
texts = ["foo", "bar", "baz"] | |
metadatas = [{"page": str(i)} for i in range(len(texts))] | |
docsearch = DeepLake.from_texts( | |
dataset_path="mem://test_path", | |
texts=texts, | |
embedding=FakeEmbeddings(), | |
metadatas=metadatas, | |
) | |
output = docsearch.similarity_search("foo", k=1) | |
assert output == [Document(page_content="foo", metadata={"page": "0"})] | |
def test_deeplakewith_persistence() -> None: | |
"""Test end to end construction and search, with persistence.""" | |
dataset_path = "./tests/persist_dir" | |
if deeplake.exists(dataset_path): | |
deeplake.delete(dataset_path) | |
texts = ["foo", "bar", "baz"] | |
docsearch = DeepLake.from_texts( | |
dataset_path=dataset_path, | |
texts=texts, | |
embedding=FakeEmbeddings(), | |
) | |
output = docsearch.similarity_search("foo", k=1) | |
assert output == [Document(page_content="foo")] | |
docsearch.persist() | |
# Get a new VectorStore from the persisted directory | |
docsearch = DeepLake( | |
dataset_path=dataset_path, | |
embedding_function=FakeEmbeddings(), | |
) | |
output = docsearch.similarity_search("foo", k=1) | |
# Clean up | |
docsearch.delete_dataset() | |
# Persist doesn't need to be called again | |
# Data will be automatically persisted on object deletion | |
# Or on program exit | |
def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -> None: | |
"""Test similarity search.""" | |
output = deeplake_datastore.similarity_search( | |
"foo", k=1, distance_metric=distance_metric | |
) | |
assert output == [Document(page_content="foo", metadata={"page": "0"})] | |
deeplake_datastore.delete_dataset() | |
def test_similarity_search_by_vector( | |
deeplake_datastore: DeepLake, distance_metric: str | |
) -> None: | |
"""Test similarity search by vector.""" | |
embeddings = FakeEmbeddings().embed_documents(["foo", "bar", "baz"]) | |
output = deeplake_datastore.similarity_search_by_vector( | |
embeddings[1], k=1, distance_metric=distance_metric | |
) | |
assert output == [Document(page_content="bar", metadata={"page": "1"})] | |
deeplake_datastore.delete_dataset() | |
def test_similarity_search_with_score( | |
deeplake_datastore: DeepLake, distance_metric: str | |
) -> None: | |
"""Test similarity search with score.""" | |
output, score = deeplake_datastore.similarity_search_with_score( | |
"foo", k=1, distance_metric=distance_metric | |
)[0] | |
assert output == Document(page_content="foo", metadata={"page": "0"}) | |
if distance_metric == "cos": | |
assert score == 1.0 | |
else: | |
assert score == 0.0 | |
deeplake_datastore.delete_dataset() | |
def test_similarity_search_with_filter( | |
deeplake_datastore: DeepLake, distance_metric: str | |
) -> None: | |
"""Test similarity search.""" | |
output = deeplake_datastore.similarity_search( | |
"foo", k=1, distance_metric=distance_metric, filter={"page": "1"} | |
) | |
assert output == [Document(page_content="bar", metadata={"page": "1"})] | |
deeplake_datastore.delete_dataset() | |
def test_max_marginal_relevance_search(deeplake_datastore: DeepLake) -> None: | |
"""Test max marginal relevance search by vector.""" | |
output = deeplake_datastore.max_marginal_relevance_search("foo", k=1, fetch_k=2) | |
assert output == [Document(page_content="foo", metadata={"page": "0"})] | |
embeddings = FakeEmbeddings().embed_documents(["foo", "bar", "baz"]) | |
output = deeplake_datastore.max_marginal_relevance_search_by_vector( | |
embeddings[0], k=1, fetch_k=2 | |
) | |
assert output == [Document(page_content="foo", metadata={"page": "0"})] | |
deeplake_datastore.delete_dataset() | |
def test_delete_dataset_by_ids(deeplake_datastore: DeepLake) -> None: | |
"""Test delete dataset.""" | |
id = deeplake_datastore.ds.ids.data()["value"][0] | |
deeplake_datastore.delete(ids=[id]) | |
assert deeplake_datastore.similarity_search("foo", k=1, filter={"page": "0"}) == [] | |
assert len(deeplake_datastore.ds) == 2 | |
deeplake_datastore.delete_dataset() | |
def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None: | |
"""Test delete dataset.""" | |
deeplake_datastore.delete(filter={"page": "1"}) | |
assert deeplake_datastore.similarity_search("bar", k=1, filter={"page": "1"}) == [] | |
assert len(deeplake_datastore.ds) == 2 | |
deeplake_datastore.delete_dataset() | |
def test_delete_by_path(deeplake_datastore: DeepLake) -> None: | |
"""Test delete dataset.""" | |
path = deeplake_datastore.dataset_path | |
DeepLake.force_delete_by_path(path) | |
assert not deeplake.exists(path) | |