Spaces:
Running
Running
from typing import Any | |
from langflow.custom import Component | |
from langflow.inputs.inputs import DictInput, DropdownInput, MessageTextInput, SecretStrInput | |
from langflow.template.field.base import Output | |
class AstraVectorizeComponent(Component): | |
display_name: str = "Astra Vectorize [DEPRECATED]" | |
description: str = ( | |
"Configuration options for Astra Vectorize server-side embeddings. " | |
"This component is deprecated. Please use the Astra DB Component directly." | |
) | |
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html" | |
icon = "AstraDB" | |
name = "AstraVectorize" | |
VECTORIZE_PROVIDERS_MAPPING = { | |
"Azure OpenAI": ["azureOpenAI", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], | |
"Hugging Face - Dedicated": ["huggingfaceDedicated", ["endpoint-defined-model"]], | |
"Hugging Face - Serverless": [ | |
"huggingface", | |
[ | |
"sentence-transformers/all-MiniLM-L6-v2", | |
"intfloat/multilingual-e5-large", | |
"intfloat/multilingual-e5-large-instruct", | |
"BAAI/bge-small-en-v1.5", | |
"BAAI/bge-base-en-v1.5", | |
"BAAI/bge-large-en-v1.5", | |
], | |
], | |
"Jina AI": [ | |
"jinaAI", | |
[ | |
"jina-embeddings-v2-base-en", | |
"jina-embeddings-v2-base-de", | |
"jina-embeddings-v2-base-es", | |
"jina-embeddings-v2-base-code", | |
"jina-embeddings-v2-base-zh", | |
], | |
], | |
"Mistral AI": ["mistral", ["mistral-embed"]], | |
"NVIDIA": ["nvidia", ["NV-Embed-QA"]], | |
"OpenAI": ["openai", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]], | |
"Upstage": ["upstageAI", ["solar-embedding-1-large"]], | |
"Voyage AI": [ | |
"voyageAI", | |
["voyage-large-2-instruct", "voyage-law-2", "voyage-code-2", "voyage-large-2", "voyage-2"], | |
], | |
} | |
VECTORIZE_MODELS_STR = "\n\n".join( | |
[provider + ": " + (", ".join(models[1])) for provider, models in VECTORIZE_PROVIDERS_MAPPING.items()] | |
) | |
inputs = [ | |
DropdownInput( | |
name="provider", | |
display_name="Provider", | |
options=VECTORIZE_PROVIDERS_MAPPING.keys(), | |
value="", | |
required=True, | |
), | |
MessageTextInput( | |
name="model_name", | |
display_name="Model Name", | |
info="The embedding model to use for the selected provider. Each provider has a different set of models " | |
f"available (full list at https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html):\n\n{VECTORIZE_MODELS_STR}", | |
required=True, | |
), | |
MessageTextInput( | |
name="api_key_name", | |
display_name="API Key name", | |
info="The name of the embeddings provider API key stored on Astra. " | |
"If set, it will override the 'ProviderKey' in the authentication parameters.", | |
), | |
DictInput( | |
name="authentication", | |
display_name="Authentication parameters", | |
is_list=True, | |
advanced=True, | |
), | |
SecretStrInput( | |
name="provider_api_key", | |
display_name="Provider API Key", | |
info="An alternative to the Astra Authentication that passes an API key for the provider with each request " | |
"to Astra DB. " | |
"This may be used when Vectorize is configured for the collection, " | |
"but no corresponding provider secret is stored within Astra's key management system.", | |
advanced=True, | |
), | |
DictInput( | |
name="authentication", | |
display_name="Authentication Parameters", | |
is_list=True, | |
advanced=True, | |
), | |
DictInput( | |
name="model_parameters", | |
display_name="Model Parameters", | |
advanced=True, | |
is_list=True, | |
), | |
] | |
outputs = [ | |
Output(display_name="Vectorize", name="config", method="build_options", types=["dict"]), | |
] | |
def build_options(self) -> dict[str, Any]: | |
provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0] | |
authentication = {**(self.authentication or {})} | |
api_key_name = self.api_key_name | |
if api_key_name: | |
authentication["providerKey"] = api_key_name | |
return { | |
# must match astrapy.info.CollectionVectorServiceOptions | |
"collection_vector_service_options": { | |
"provider": provider_value, | |
"modelName": self.model_name, | |
"authentication": authentication, | |
"parameters": self.model_parameters or {}, | |
}, | |
"collection_embedding_api_key": self.provider_api_key, | |
} | |