Spaces:
Runtime error
Runtime error
File size: 5,329 Bytes
816825a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import logging
import os
from typing import List, Optional, Tuple
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
logger = logging.getLogger(__name__)
class VectorStoreManager:
def __init__(self, persist_directory: str = "./chroma_db", embedding_function: Optional[Embeddings] = None):
self.persist_directory = persist_directory
self.embedding_function = embedding_function
self.vector_store = None
self._ensure_persist_directory()
def _ensure_persist_directory(self):
try:
os.makedirs(self.persist_directory, exist_ok=True)
logger.info(f"Persist directory ensured: {self.persist_directory}")
except Exception as e:
logger.error(f"Error creating persist directory: {e}")
raise e
def initialize_vector_store(self, embedding_function: Optional[Embeddings] = None):
if embedding_function:
self.embedding_function = embedding_function
if not self.embedding_function:
raise ValueError("Embedding function must be provided")
try:
logger.info("Initializing vector store")
self.vector_store = Chroma(
persist_directory=self.persist_directory,
embedding_function=self.embedding_function
)
logger.info("Vector store initialized successfully")
except Exception as e:
logger.error(f"Error initializing vector store: {e}")
raise e
def add_documents(self, documents: List[Document]) -> bool:
try:
if not self.vector_store:
raise ValueError("Vector store not initialized")
logger.info(f"Adding {len(documents)} document(s) to vector store")
self.vector_store.add_documents(documents)
logger.info("Documents added successfully")
return True
except Exception as e:
logger.error(f"Error adding documents to vector store: {e}")
return False
def similarity_search(self, query: str, k: int = 5) -> List[Document]:
try:
if not self.vector_store:
raise ValueError("Vector store not initialized")
logger.info(f"Performing similarity search for query: '{query[:50]}...'")
results = self.vector_store.similarity_search(query, k=k)
logger.info(f"Found {len(results)} similar documents")
return results
except Exception as e:
logger.error(f"Error performing similarity search: {e}")
return []
def similarity_search_with_score(self, query: str, k: int = 5) -> List[Tuple[Document, float]]:
try:
if not self.vector_store:
raise ValueError("Vector store not initialized")
logger.info(f"Performing similarity search with scores for query: '{query[:50]}...'")
results = self.vector_store.similarity_search_with_score(query, k=k)
logger.info(f"Found {len(results)} similar documents with scores")
return results
except Exception as e:
logger.error(f"Error performing similarity search with scores: {e}")
return []
def get_retriever(self, search_kwargs: Optional[dict] = None):
try:
if not self.vector_store:
raise ValueError("Vector store not initialized")
default_kwargs = {"k": 5}
if search_kwargs:
default_kwargs.update(search_kwargs)
retriever = self.vector_store.as_retriever(search_kwargs=default_kwargs)
logger.info("Retriever created successfully")
return retriever
except Exception as e:
logger.error(f"Error creating retriever: {e}")
raise e
def get_collection_stats(self) -> dict:
try:
if not self.vector_store:
return {'total_documents': 0, 'collection_name': None}
collection = self.vector_store._collection
count = collection.count()
return {
'total_documents': count,
'collection_name': collection.name,
'persist_directory': self.persist_directory
}
except Exception as e:
logger.error(f"Error getting collection stats: {e}")
return {'total_documents': 0, 'collection_name': None}
def clear_vector_store(self) -> bool:
try:
if not self.vector_store:
return True
logger.info("Clearing vector store")
self.vector_store._collection.delete(where={})
logger.info("Vector store cleared successfully")
return True
except Exception as e:
logger.error(f"Error clearing vector store: {e}")
return False
def is_initialized(self) -> bool:
return self.vector_store is not None
|