Spaces:
Running
Running
from langchain_community.vectorstores import Clickhouse, ClickhouseSettings | |
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store | |
from langflow.helpers.data import docs_to_data | |
from langflow.inputs import BoolInput, FloatInput | |
from langflow.io import ( | |
DataInput, | |
DictInput, | |
DropdownInput, | |
HandleInput, | |
IntInput, | |
MultilineInput, | |
SecretStrInput, | |
StrInput, | |
) | |
from langflow.schema import Data | |
class ClickhouseVectorStoreComponent(LCVectorStoreComponent): | |
display_name = "Clickhouse" | |
description = "Clickhouse Vector Store with search capabilities" | |
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/clickhouse/" | |
name = "Clickhouse" | |
icon = "Clickhouse" | |
inputs = [ | |
StrInput(name="host", display_name="hostname", required=True, value="localhost"), | |
IntInput(name="port", display_name="port", required=True, value=8123), | |
StrInput(name="database", display_name="database", required=True), | |
StrInput(name="table", display_name="Table name", required=True), | |
StrInput(name="username", display_name="The ClickHouse user name.", required=True), | |
SecretStrInput(name="password", display_name="The password for username.", required=True), | |
DropdownInput( | |
name="index_type", | |
display_name="index_type", | |
options=["annoy", "vector_similarity"], | |
info="Type of the index.", | |
value="annoy", | |
advanced=True, | |
), | |
DropdownInput( | |
name="metric", | |
display_name="metric", | |
options=["angular", "euclidean", "manhattan", "hamming", "dot"], | |
info="Metric to compute distance.", | |
value="angular", | |
advanced=True, | |
), | |
BoolInput( | |
name="secure", | |
display_name="Use https/TLS. This overrides inferred values from the interface or port arguments.", | |
value=False, | |
advanced=True, | |
), | |
StrInput(name="index_param", display_name="Param of the index", value="'L2Distance',100", advanced=True), | |
DictInput(name="index_query_params", display_name="index query params", advanced=True), | |
MultilineInput(name="search_query", display_name="Search Query"), | |
DataInput(name="ingest_data", display_name="Ingest Data", is_list=True), | |
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), | |
IntInput( | |
name="number_of_results", | |
display_name="Number of Results", | |
info="Number of results to return.", | |
value=4, | |
advanced=True, | |
), | |
FloatInput(name="score_threshold", display_name="Score threshold", advanced=True), | |
] | |
def build_vector_store(self) -> Clickhouse: | |
try: | |
import clickhouse_connect | |
except ImportError as e: | |
msg = ( | |
"Failed to import Clickhouse dependencies. " | |
"Install it using `pip install langflow[clickhouse-connect] --pre`" | |
) | |
raise ImportError(msg) from e | |
try: | |
client = clickhouse_connect.get_client(host=self.host, username=self.username, password=self.password) | |
client.command("SELECT 1") | |
except Exception as e: | |
msg = f"Failed to connect to Clickhouse: {e}" | |
raise ValueError(msg) from e | |
documents = [] | |
for _input in self.ingest_data or []: | |
if isinstance(_input, Data): | |
documents.append(_input.to_lc_document()) | |
else: | |
documents.append(_input) | |
kwargs = {} | |
if self.index_param: | |
kwargs["index_param"] = self.index_param.split(",") | |
if self.index_query_params: | |
kwargs["index_query_params"] = self.index_query_params | |
settings = ClickhouseSettings( | |
table=self.table, | |
database=self.database, | |
host=self.host, | |
index_type=self.index_type, | |
metric=self.metric, | |
password=self.password, | |
port=self.port, | |
secure=self.secure, | |
username=self.username, | |
**kwargs, | |
) | |
if documents: | |
clickhouse_vs = Clickhouse.from_documents(documents=documents, embedding=self.embedding, config=settings) | |
else: | |
clickhouse_vs = Clickhouse(embedding=self.embedding, config=settings) | |
return clickhouse_vs | |
def search_documents(self) -> list[Data]: | |
vector_store = self.build_vector_store() | |
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): | |
kwargs = {} | |
if self.score_threshold: | |
kwargs["score_threshold"] = self.score_threshold | |
docs = vector_store.similarity_search(query=self.search_query, k=self.number_of_results, **kwargs) | |
data = docs_to_data(docs) | |
self.status = data | |
return data | |
return [] | |