Tai Truong
fix readme
d202ada
from typing import Any
from langflow.custom import Component
from langflow.inputs.inputs import DictInput, DropdownInput, MessageTextInput, SecretStrInput
from langflow.template.field.base import Output
class AstraVectorizeComponent(Component):
display_name: str = "Astra Vectorize [DEPRECATED]"
description: str = (
"Configuration options for Astra Vectorize server-side embeddings. "
"This component is deprecated. Please use the Astra DB Component directly."
)
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
icon = "AstraDB"
name = "AstraVectorize"
VECTORIZE_PROVIDERS_MAPPING = {
"Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
"Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]],
"Hugging Face - Serverless": [
"huggingface",
[
"sentence-transformers/all-MiniLM-L6-v2",
"intfloat/multilingual-e5-large",
"intfloat/multilingual-e5-large-instruct",
"BAAI/bge-small-en-v1.5",
"BAAI/bge-base-en-v1.5",
"BAAI/bge-large-en-v1.5",
],
],
"Jina AI": [
"jinaAI",
[
"jina-embeddings-v2-base-en",
"jina-embeddings-v2-base-de",
"jina-embeddings-v2-base-es",
"jina-embeddings-v2-base-code",
"jina-embeddings-v2-base-zh",
],
],
"Mistral AI": ["mistral", ["mistral-embed"]],
"NVIDIA": ["nvidia", ["NV-Embed-QA"]],
"OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]],
"Upstage": ["upstageAI", ["solar-embedding-1-large"]],
"Voyage AI": [
"voyageAI",
["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"],
],
}
VECTORIZE_MODELS_STR = "\n\n".join(
[provider + ": " + (", ".join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()]
)
inputs = [
DropdownInput(
name="provider",
display_name="Provider",
options=VECTORIZE_PROVIDERS_MAPPING.keys(),
value="",
required=True,
),
MessageTextInput(
name="model_name",
display_name="Model Name",
info="The embedding model to use for the selected provider. Each provider has a different set of models "
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}",
required=True,
),
MessageTextInput(
name="api_key_name",
display_name="API Key name",
info="The name of the embeddings provider API key stored on Astra. "
"If set, it will override the 'ProviderKey' in the authentication parameters.",
),
DictInput(
name="authentication",
display_name="Authentication parameters",
is_list=True,
advanced=True,
),
SecretStrInput(
name="provider_api_key",
display_name="Provider API Key",
info="An alternative to the Astra Authentication that passes an API key for the provider with each request "
"to Astra DB. "
"This may be used when Vectorize is configured for the collection, "
"but no corresponding provider secret is stored within Astra's key management system.",
advanced=True,
),
DictInput(
name="authentication",
display_name="Authentication Parameters",
is_list=True,
advanced=True,
),
DictInput(
name="model_parameters",
display_name="Model Parameters",
advanced=True,
is_list=True,
),
]
outputs = [
Output(display_name="Vectorize", name="config", method="build_options", types=["dict"]),
]
def build_options(self) -> dict[str, Any]:
provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0]
authentication = {**(self.authentication or {})}
api_key_name = self.api_key_name
if api_key_name:
authentication["providerKey"] = api_key_name
return {
# must match astrapy.info.CollectionVectorServiceOptions
"collection_vector_service_options": {
"provider": provider_value,
"modelName": self.model_name,
"authentication": authentication,
"parameters": self.model_parameters or {},
},
"collection_embedding_api_key": self.provider_api_key,
}