Spaces:
Runtime error
Runtime error
import logging | |
from typing import List, Optional | |
#from langchain_openai import OpenAIEmbeddings | |
from langchain_core.documents import Document | |
from langchain_huggingface import HuggingFaceEmbeddings | |
logger = logging.getLogger(__name__) | |
class EmbeddingManager: | |
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"): # text-embedding-3-small | |
self.model_name = model_name | |
self.embeddings = None | |
self._initialize_embeddings() | |
def _initialize_embeddings(self): | |
try: | |
logger.info(f"Initializing embedding model: {self.model_name}") | |
self.embeddings = HuggingFaceEmbeddings(model=self.model_name, model_kwargs={'device': 'cpu'}) | |
logger.info("Embedding model initialized successfully") | |
except Exception as e: | |
logger.error(f"Error initializing embedding model: {e}") | |
raise e | |
def get_embeddings(self) -> HuggingFaceEmbeddings: | |
if self.embeddings is None: | |
self._initialize_embeddings() | |
return self.embeddings | |
def generate_embeddings(self, texts: List[str]) -> List[List[float]]: | |
try: | |
logger.info(f"Generating embeddings for {len(texts)} text(s)") | |
embeddings = self.embeddings.embed_documents(texts) | |
logger.info(f"Successfully generated {len(embeddings)} embeddings") | |
return embeddings | |
except Exception as e: | |
logger.error(f"Error generating embeddings: {e}") | |
raise e | |
def generate_single_embedding(self, text: str) -> List[float]: | |
try: | |
embedding = self.embeddings.embed_query(text) | |
return embedding | |
except Exception as e: | |
logger.error(f"Error generating single embedding: {e}") | |
raise e | |
def get_embedding_dimension(self) -> int: | |
try: | |
test_embedding = self.generate_single_embedding("test") | |
return len(test_embedding) | |
except Exception as e: | |
logger.error(f"Error getting embedding dimension: {e}") | |
raise e | |
def get_model_info(self) -> dict: | |
return { | |
'model_name': self.model_name, | |
'dimension': self.get_embedding_dimension(), | |
'is_initialized': self.embeddings is not None | |
} | |