from copy import deepcopy from chromadb.config import Settings from langchain_chroma import Chroma from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.base.vectorstores.utils import chroma_collection_to_data from langflow.io import BoolInput, DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, StrInput from langflow.schema import Data class ChromaVectorStoreComponent(LCVectorStoreComponent): """Chroma Vector Store with search capabilities.""" display_name: str = "Chroma DB" description: str = "Chroma Vector Store with search capabilities" documentation = "https://python.langchain.com/docs/integrations/vectorstores/chroma" name = "Chroma" icon = "Chroma" inputs = [ StrInput( name="collection_name", display_name="Collection Name", value="langflow", ), StrInput( name="persist_directory", display_name="Persist Directory", ), MultilineInput( name="search_query", display_name="Search Query", ), DataInput( name="ingest_data", display_name="Ingest Data", is_list=True, ), HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), StrInput( name="chroma_server_cors_allow_origins", display_name="Server CORS Allow Origins", advanced=True, ), StrInput( name="chroma_server_host", display_name="Server Host", advanced=True, ), IntInput( name="chroma_server_http_port", display_name="Server HTTP Port", advanced=True, ), IntInput( name="chroma_server_grpc_port", display_name="Server gRPC Port", advanced=True, ), BoolInput( name="chroma_server_ssl_enabled", display_name="Server SSL Enabled", advanced=True, ), BoolInput( name="allow_duplicates", display_name="Allow Duplicates", advanced=True, info="If false, will not add documents that are already in the Vector Store.", ), DropdownInput( name="search_type", display_name="Search Type", options=["Similarity", "MMR"], value="Similarity", advanced=True, ), IntInput( name="number_of_results", display_name="Number of Results", info="Number of results to return.", advanced=True, value=10, ), IntInput( name="limit", display_name="Limit", advanced=True, info="Limit the number of records to compare when Allow Duplicates is False.", ), ] @check_cached_vector_store def build_vector_store(self) -> Chroma: """Builds the Chroma object.""" try: from chromadb import Client from langchain_chroma import Chroma except ImportError as e: msg = "Could not import Chroma integration package. Please install it with `pip install langchain-chroma`." raise ImportError(msg) from e # Chroma settings chroma_settings = None client = None if self.chroma_server_host: chroma_settings = Settings( chroma_server_cors_allow_origins=self.chroma_server_cors_allow_origins or [], chroma_server_host=self.chroma_server_host, chroma_server_http_port=self.chroma_server_http_port or None, chroma_server_grpc_port=self.chroma_server_grpc_port or None, chroma_server_ssl_enabled=self.chroma_server_ssl_enabled, ) client = Client(settings=chroma_settings) # Check persist_directory and expand it if it is a relative path persist_directory = self.resolve_path(self.persist_directory) if self.persist_directory is not None else None chroma = Chroma( persist_directory=persist_directory, client=client, embedding_function=self.embedding, collection_name=self.collection_name, ) self._add_documents_to_vector_store(chroma) self.status = chroma_collection_to_data(chroma.get(limit=self.limit)) return chroma def _add_documents_to_vector_store(self, vector_store: "Chroma") -> None: """Adds documents to the Vector Store.""" if not self.ingest_data: self.status = "" return stored_documents_without_id = [] if self.allow_duplicates: stored_data = [] else: stored_data = chroma_collection_to_data(vector_store.get(limit=self.limit)) for value in deepcopy(stored_data): del value.id stored_documents_without_id.append(value) documents = [] for _input in self.ingest_data or []: if isinstance(_input, Data): if _input not in stored_documents_without_id: documents.append(_input.to_lc_document()) else: msg = "Vector Store Inputs must be Data objects." raise TypeError(msg) if documents and self.embedding is not None: logger.debug(f"Adding {len(documents)} documents to the Vector Store.") vector_store.add_documents(documents) else: logger.debug("No documents to add to the Vector Store.")