Tai Truong
fix readme
d202ada
from langchain_community.vectorstores import Clickhouse, ClickhouseSettings
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.helpers.data import docs_to_data
from langflow.inputs import BoolInput, FloatInput
from langflow.io import (
DataInput,
DictInput,
DropdownInput,
HandleInput,
IntInput,
MultilineInput,
SecretStrInput,
StrInput,
)
from langflow.schema import Data
class ClickhouseVectorStoreComponent(LCVectorStoreComponent):
display_name = "Clickhouse"
description = "Clickhouse Vector Store with search capabilities"
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/clickhouse/"
name = "Clickhouse"
icon = "Clickhouse"
inputs = [
StrInput(name="host", display_name="hostname", required=True, value="localhost"),
IntInput(name="port", display_name="port", required=True, value=8123),
StrInput(name="database", display_name="database", required=True),
StrInput(name="table", display_name="Table name", required=True),
StrInput(name="username", display_name="The ClickHouse user name.", required=True),
SecretStrInput(name="password", display_name="The password for username.", required=True),
DropdownInput(
name="index_type",
display_name="index_type",
options=["annoy", "vector_similarity"],
info="Type of the index.",
value="annoy",
advanced=True,
),
DropdownInput(
name="metric",
display_name="metric",
options=["angular", "euclidean", "manhattan", "hamming", "dot"],
info="Metric to compute distance.",
value="angular",
advanced=True,
),
BoolInput(
name="secure",
display_name="Use https/TLS. This overrides inferred values from the interface or port arguments.",
value=False,
advanced=True,
),
StrInput(name="index_param", display_name="Param of the index", value="'L2Distance',100", advanced=True),
DictInput(name="index_query_params", display_name="index query params", advanced=True),
MultilineInput(name="search_query", display_name="Search Query"),
DataInput(name="ingest_data", display_name="Ingest Data", is_list=True),
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
IntInput(
name="number_of_results",
display_name="Number of Results",
info="Number of results to return.",
value=4,
advanced=True,
),
FloatInput(name="score_threshold", display_name="Score threshold", advanced=True),
]
@check_cached_vector_store
def build_vector_store(self) -> Clickhouse:
try:
import clickhouse_connect
except ImportError as e:
msg = (
"Failed to import Clickhouse dependencies. "
"Install it using `pip install langflow[clickhouse-connect] --pre`"
)
raise ImportError(msg) from e
try:
client = clickhouse_connect.get_client(host=self.host, username=self.username, password=self.password)
client.command("SELECT 1")
except Exception as e:
msg = f"Failed to connect to Clickhouse: {e}"
raise ValueError(msg) from e
documents = []
for _input in self.ingest_data or []:
if isinstance(_input, Data):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
kwargs = {}
if self.index_param:
kwargs["index_param"] = self.index_param.split(",")
if self.index_query_params:
kwargs["index_query_params"] = self.index_query_params
settings = ClickhouseSettings(
table=self.table,
database=self.database,
host=self.host,
index_type=self.index_type,
metric=self.metric,
password=self.password,
port=self.port,
secure=self.secure,
username=self.username,
**kwargs,
)
if documents:
clickhouse_vs = Clickhouse.from_documents(documents=documents, embedding=self.embedding, config=settings)
else:
clickhouse_vs = Clickhouse(embedding=self.embedding, config=settings)
return clickhouse_vs
def search_documents(self) -> list[Data]:
vector_store = self.build_vector_store()
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
kwargs = {}
if self.score_threshold:
kwargs["score_threshold"] = self.score_threshold
docs = vector_store.similarity_search(query=self.search_query, k=self.number_of_results, **kwargs)
data = docs_to_data(docs)
self.status = data
return data
return []