Spaces:
Runtime error
Runtime error
import logging | |
import os | |
from typing import List, Optional, Tuple | |
from langchain_chroma import Chroma | |
from langchain_core.documents import Document | |
from langchain_core.embeddings import Embeddings | |
logger = logging.getLogger(__name__) | |
class VectorStoreManager: | |
def __init__(self, persist_directory: str = "./chroma_db", embedding_function: Optional[Embeddings] = None): | |
self.persist_directory = persist_directory | |
self.embedding_function = embedding_function | |
self.vector_store = None | |
self._ensure_persist_directory() | |
def _ensure_persist_directory(self): | |
try: | |
os.makedirs(self.persist_directory, exist_ok=True) | |
logger.info(f"Persist directory ensured: {self.persist_directory}") | |
except Exception as e: | |
logger.error(f"Error creating persist directory: {e}") | |
raise e | |
def initialize_vector_store(self, embedding_function: Optional[Embeddings] = None): | |
if embedding_function: | |
self.embedding_function = embedding_function | |
if not self.embedding_function: | |
raise ValueError("Embedding function must be provided") | |
try: | |
logger.info("Initializing vector store") | |
self.vector_store = Chroma( | |
persist_directory=self.persist_directory, | |
embedding_function=self.embedding_function | |
) | |
logger.info("Vector store initialized successfully") | |
except Exception as e: | |
logger.error(f"Error initializing vector store: {e}") | |
raise e | |
def add_documents(self, documents: List[Document]) -> bool: | |
try: | |
if not self.vector_store: | |
raise ValueError("Vector store not initialized") | |
logger.info(f"Adding {len(documents)} document(s) to vector store") | |
self.vector_store.add_documents(documents) | |
logger.info("Documents added successfully") | |
return True | |
except Exception as e: | |
logger.error(f"Error adding documents to vector store: {e}") | |
return False | |
def similarity_search(self, query: str, k: int = 5) -> List[Document]: | |
try: | |
if not self.vector_store: | |
raise ValueError("Vector store not initialized") | |
logger.info(f"Performing similarity search for query: '{query[:50]}...'") | |
results = self.vector_store.similarity_search(query, k=k) | |
logger.info(f"Found {len(results)} similar documents") | |
return results | |
except Exception as e: | |
logger.error(f"Error performing similarity search: {e}") | |
return [] | |
def similarity_search_with_score(self, query: str, k: int = 5) -> List[Tuple[Document, float]]: | |
try: | |
if not self.vector_store: | |
raise ValueError("Vector store not initialized") | |
logger.info(f"Performing similarity search with scores for query: '{query[:50]}...'") | |
results = self.vector_store.similarity_search_with_score(query, k=k) | |
logger.info(f"Found {len(results)} similar documents with scores") | |
return results | |
except Exception as e: | |
logger.error(f"Error performing similarity search with scores: {e}") | |
return [] | |
def get_retriever(self, search_kwargs: Optional[dict] = None): | |
try: | |
if not self.vector_store: | |
raise ValueError("Vector store not initialized") | |
default_kwargs = {"k": 5} | |
if search_kwargs: | |
default_kwargs.update(search_kwargs) | |
retriever = self.vector_store.as_retriever(search_kwargs=default_kwargs) | |
logger.info("Retriever created successfully") | |
return retriever | |
except Exception as e: | |
logger.error(f"Error creating retriever: {e}") | |
raise e | |
def get_collection_stats(self) -> dict: | |
try: | |
if not self.vector_store: | |
return {'total_documents': 0, 'collection_name': None} | |
collection = self.vector_store._collection | |
count = collection.count() | |
return { | |
'total_documents': count, | |
'collection_name': collection.name, | |
'persist_directory': self.persist_directory | |
} | |
except Exception as e: | |
logger.error(f"Error getting collection stats: {e}") | |
return {'total_documents': 0, 'collection_name': None} | |
def clear_vector_store(self) -> bool: | |
try: | |
if not self.vector_store: | |
return True | |
logger.info("Clearing vector store") | |
self.vector_store._collection.delete(where={}) | |
logger.info("Vector store cleared successfully") | |
return True | |
except Exception as e: | |
logger.error(f"Error clearing vector store: {e}") | |
return False | |
def is_initialized(self) -> bool: | |
return self.vector_store is not None | |