Spaces:
Running
Running
import os | |
import pytest | |
from astrapy.db import AstraDB | |
from langchain_core.documents import Document | |
from langflow.components.embeddings import OpenAIEmbeddingsComponent | |
from langflow.components.vectorstores import AstraDBVectorStoreComponent | |
from langflow.schema.data import Data | |
from tests.api_keys import get_astradb_api_endpoint, get_astradb_application_token, get_openai_api_key | |
from tests.integration.components.mock_components import TextToData | |
from tests.integration.utils import ComponentInputHandle, run_single_component | |
BASIC_COLLECTION = "test_basic" | |
SEARCH_COLLECTION = "test_search" | |
# MEMORY_COLLECTION = "test_memory" | |
VECTORIZE_COLLECTION = "test_vectorize" | |
VECTORIZE_COLLECTION_OPENAI = "test_vectorize_openai" | |
VECTORIZE_COLLECTION_OPENAI_WITH_AUTH = "test_vectorize_openai_auth" | |
ALL_COLLECTIONS = [ | |
BASIC_COLLECTION, | |
SEARCH_COLLECTION, | |
# MEMORY_COLLECTION, | |
VECTORIZE_COLLECTION, | |
VECTORIZE_COLLECTION_OPENAI, | |
VECTORIZE_COLLECTION_OPENAI_WITH_AUTH, | |
] | |
def astradb_client(): | |
client = AstraDB(api_endpoint=get_astradb_api_endpoint(), token=get_astradb_application_token()) | |
yield client | |
for collection in ALL_COLLECTIONS: | |
client.delete_collection(collection) | |
async def test_base(astradb_client: AstraDB): | |
from langflow.components.embeddings import OpenAIEmbeddingsComponent | |
application_token = get_astradb_application_token() | |
api_endpoint = get_astradb_api_endpoint() | |
results = await run_single_component( | |
AstraDBVectorStoreComponent, | |
inputs={ | |
"token": application_token, | |
"api_endpoint": api_endpoint, | |
"collection_name": BASIC_COLLECTION, | |
"embedding_model": ComponentInputHandle( | |
clazz=OpenAIEmbeddingsComponent, | |
inputs={"openai_api_key": get_openai_api_key()}, | |
output_name="embeddings", | |
), | |
}, | |
) | |
assert results["vector_store"] is not None | |
assert results["search_results"] == [] | |
assert astradb_client.collection(BASIC_COLLECTION) | |
async def test_astra_embeds_and_search(): | |
application_token = get_astradb_application_token() | |
api_endpoint = get_astradb_api_endpoint() | |
results = await run_single_component( | |
AstraDBVectorStoreComponent, | |
inputs={ | |
"token": application_token, | |
"api_endpoint": api_endpoint, | |
"collection_name": BASIC_COLLECTION, | |
"number_of_results": 1, | |
"search_input": "test1", | |
"ingest_data": ComponentInputHandle( | |
clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="from_text" | |
), | |
"embedding_model": ComponentInputHandle( | |
clazz=OpenAIEmbeddingsComponent, | |
inputs={"openai_api_key": get_openai_api_key()}, | |
output_name="embeddings", | |
), | |
}, | |
) | |
assert len(results["search_results"]) == 1 | |
def test_astra_vectorize(): | |
from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions | |
application_token = get_astradb_application_token() | |
api_endpoint = get_astradb_api_endpoint() | |
store = None | |
try: | |
options = {"provider": "nvidia", "modelName": "NV-Embed-QA"} | |
options_comp = {"embedding_provider": "nvidia", "model": "NV-Embed-QA"} | |
store = AstraDBVectorStore( | |
collection_name=VECTORIZE_COLLECTION, | |
api_endpoint=api_endpoint, | |
token=application_token, | |
collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options), | |
) | |
documents = [Document(page_content="test1"), Document(page_content="test2")] | |
records = [Data.from_document(d) for d in documents] | |
component = AstraDBVectorStoreComponent() | |
vectorize_options = component.build_vectorize_options(**options_comp) | |
component.build( | |
token=application_token, | |
api_endpoint=api_endpoint, | |
collection_name=VECTORIZE_COLLECTION, | |
ingest_data=records, | |
search_input="test", | |
number_of_results=2, | |
pre_delete_collection=True, | |
) | |
vector_store = component.build_vector_store(vectorize_options) | |
records = component.search_documents(vector_store=vector_store) | |
assert len(records) == 2 | |
finally: | |
if store is not None: | |
store.delete_collection() | |
def test_astra_vectorize_with_provider_api_key(): | |
"""Tests vectorize using an openai api key.""" | |
from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions | |
application_token = get_astradb_application_token() | |
api_endpoint = get_astradb_api_endpoint() | |
store = None | |
try: | |
options = { | |
"provider": "openai", | |
"modelName": "text-embedding-3-small", | |
"parameters": {}, | |
"authentication": {"providerKey": "openai"}, | |
} | |
options_comp = { | |
"embedding_provider": "openai", | |
"model": "text-embedding-3-small", | |
"z_01_model_parameters": {}, | |
"z_03_provider_api_key": "openai", | |
"z_04_authentication": {}, | |
} | |
store = AstraDBVectorStore( | |
collection_name=VECTORIZE_COLLECTION_OPENAI, | |
api_endpoint=api_endpoint, | |
token=application_token, | |
collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options), | |
collection_embedding_api_key=os.getenv("OPENAI_API_KEY"), | |
) | |
documents = [Document(page_content="test1"), Document(page_content="test2")] | |
records = [Data.from_document(d) for d in documents] | |
component = AstraDBVectorStoreComponent() | |
vectorize_options = component.build_vectorize_options(**options_comp) | |
component.build( | |
token=application_token, | |
api_endpoint=api_endpoint, | |
collection_name=VECTORIZE_COLLECTION_OPENAI, | |
ingest_data=records, | |
search_input="test", | |
number_of_results=2, | |
pre_delete_collection=True, | |
) | |
vector_store = component.build_vector_store(vectorize_options) | |
records = component.search_documents(vector_store=vector_store) | |
assert len(records) == 2 | |
finally: | |
if store is not None: | |
store.delete_collection() | |
def test_astra_vectorize_passes_authentication(): | |
"""Tests vectorize using the authentication parameter.""" | |
from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions | |
store = None | |
try: | |
application_token = get_astradb_application_token() | |
api_endpoint = get_astradb_api_endpoint() | |
options = { | |
"provider": "openai", | |
"modelName": "text-embedding-3-small", | |
"parameters": {}, | |
"authentication": {"providerKey": "openai"}, | |
} | |
options_comp = { | |
"embedding_provider": "openai", | |
"model": "text-embedding-3-small", | |
"z_01_model_parameters": {}, | |
"z_04_authentication": {"providerKey": "openai"}, | |
} | |
store = AstraDBVectorStore( | |
collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH, | |
api_endpoint=api_endpoint, | |
token=application_token, | |
collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options), | |
) | |
documents = [Document(page_content="test1"), Document(page_content="test2")] | |
records = [Data.from_document(d) for d in documents] | |
component = AstraDBVectorStoreComponent() | |
vectorize_options = component.build_vectorize_options(**options_comp) | |
component.build( | |
token=application_token, | |
api_endpoint=api_endpoint, | |
collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH, | |
ingest_data=records, | |
search_input="test", | |
number_of_results=2, | |
pre_delete_collection=True, | |
) | |
vector_store = component.build_vector_store(vectorize_options) | |
records = component.search_documents(vector_store=vector_store) | |
assert len(records) == 2 | |
finally: | |
if store is not None: | |
store.delete_collection() | |