import numpy as np from langchain_pinecone import Pinecone from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data from langflow.io import DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput from langflow.schema import Data class PineconeVectorStoreComponent(LCVectorStoreComponent): display_name = "Pinecone" description = "Pinecone Vector Store with search capabilities" documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/" name = "Pinecone" icon = "Pinecone" inputs = [ StrInput(name="index_name", display_name="Index Name", required=True), StrInput(name="namespace", display_name="Namespace", info="Namespace for the index."), DropdownInput( name="distance_strategy", display_name="Distance Strategy", options=["Cosine", "Euclidean", "Dot Product"], value="Cosine", advanced=True, ), SecretStrInput(name="pinecone_api_key", display_name="Pinecone API Key", required=True), StrInput( name="text_key", display_name="Text Key", info="Key in the record to use as text.", value="text", advanced=True, ), MultilineInput(name="search_query", display_name="Search Query"), DataInput( name="ingest_data", display_name="Ingest Data", is_list=True, ), HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), IntInput( name="number_of_results", display_name="Number of Results", info="Number of results to return.", value=4, advanced=True, ), ] @check_cached_vector_store def build_vector_store(self) -> Pinecone: """Build and return a Pinecone vector store instance.""" try: from langchain_pinecone._utilities import DistanceStrategy # Wrap the embedding model to ensure float32 output wrapped_embeddings = Float32Embeddings(self.embedding) # Convert distance strategy distance_strategy = self.distance_strategy.replace(" ", "_").upper() distance_strategy = DistanceStrategy[distance_strategy] # Initialize Pinecone instance with wrapped embeddings pinecone = Pinecone( index_name=self.index_name, embedding=wrapped_embeddings, # Use wrapped embeddings text_key=self.text_key, namespace=self.namespace, distance_strategy=distance_strategy, pinecone_api_key=self.pinecone_api_key, ) except Exception as e: error_msg = "Error building Pinecone vector store" raise ValueError(error_msg) from e else: # Process documents if any documents = [] if self.ingest_data: for doc in self.ingest_data: if isinstance(doc, Data): documents.append(doc.to_lc_document()) else: documents.append(doc) if documents: pinecone.add_documents(documents) return pinecone def search_documents(self) -> list[Data]: """Search documents in the vector store.""" try: if not self.search_query or not isinstance(self.search_query, str) or not self.search_query.strip(): return [] vector_store = self.build_vector_store() docs = vector_store.similarity_search( query=self.search_query, k=self.number_of_results, ) except Exception as e: error_msg = "Error searching documents" raise ValueError(error_msg) from e else: data = docs_to_data(docs) self.status = data return data class Float32Embeddings: """Wrapper class to ensure float32 embeddings.""" def __init__(self, base_embeddings): self.base_embeddings = base_embeddings def embed_documents(self, texts): embeddings = self.base_embeddings.embed_documents(texts) if isinstance(embeddings, np.ndarray): return [[self._force_float32(x) for x in vec] for vec in embeddings] return [[self._force_float32(x) for x in vec] for vec in embeddings] def embed_query(self, text): embedding = self.base_embeddings.embed_query(text) if isinstance(embedding, np.ndarray): return [self._force_float32(x) for x in embedding] return [self._force_float32(x) for x in embedding] def _force_float32(self, value): """Convert any numeric type to Python float.""" return float(np.float32(value))