from typing import Any from langflow.custom import Component from langflow.inputs.inputs import DictInput, DropdownInput, MessageTextInput, SecretStrInput from langflow.template.field.base import Output class AstraVectorizeComponent(Component): display_name: str = "Astra Vectorize [DEPRECATED]" description: str = ( "Configuration options for Astra Vectorize server-side embeddings. " "This component is deprecated. Please use the Astra DB Component directly." ) documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html" icon = "AstraDB" name = "AstraVectorize" VECTORIZE_PROVIDERS_MAPPING = { "Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], "Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]], "Hugging Face - Serverless": [ "huggingface", [ "sentence-transformers/all-MiniLM-L6-v2", "intfloat/multilingual-e5-large", "intfloat/multilingual-e5-large-instruct", "BAAI/bge-small-en-v1.5", "BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5", ], ], "Jina AI": [ "jinaAI", [ "jina-embeddings-v2-base-en", "jina-embeddings-v2-base-de", "jina-embeddings-v2-base-es", "jina-embeddings-v2-base-code", "jina-embeddings-v2-base-zh", ], ], "Mistral AI": ["mistral", ["mistral-embed"]], "NVIDIA": ["nvidia", ["NV-Embed-QA"]], "OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], "Upstage": ["upstageAI", ["solar-embedding-1-large"]], "Voyage AI": [ "voyageAI", ["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"], ], } VECTORIZE_MODELS_STR = "\n\n".join( [provider + ": " + (", ".join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()] ) inputs = [ DropdownInput( name="provider", display_name="Provider", options=VECTORIZE_PROVIDERS_MAPPING.keys(), value="", required=True, ), MessageTextInput( name="model_name", display_name="Model Name", info="The embedding model to use for the selected provider. Each provider has a different set of models " f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", required=True, ), MessageTextInput( name="api_key_name", display_name="API Key name", info="The name of the embeddings provider API key stored on Astra. " "If set, it will override the 'ProviderKey' in the authentication parameters.", ), DictInput( name="authentication", display_name="Authentication parameters", is_list=True, advanced=True, ), SecretStrInput( name="provider_api_key", display_name="Provider API Key", info="An alternative to the Astra Authentication that passes an API key for the provider with each request " "to Astra DB. " "This may be used when Vectorize is configured for the collection, " "but no corresponding provider secret is stored within Astra's key management system.", advanced=True, ), DictInput( name="authentication", display_name="Authentication Parameters", is_list=True, advanced=True, ), DictInput( name="model_parameters", display_name="Model Parameters", advanced=True, is_list=True, ), ] outputs = [ Output(display_name="Vectorize", name="config", method="build_options", types=["dict"]), ] def build_options(self) -> dict[str, Any]: provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0] authentication = {**(self.authentication or {})} api_key_name = self.api_key_name if api_key_name: authentication["providerKey"] = api_key_name return { # must match astrapy.info.CollectionVectorServiceOptions "collection_vector_service_options": { "provider": provider_value, "modelName": self.model_name, "authentication": authentication, "parameters": self.model_parameters or {}, }, "collection_embedding_api_key": self.provider_api_key, }