rag-document-system / src /embedding_manager.py
hugging2021's picture
Upload folder using huggingface_hub
816825a verified
import logging
from typing import List, Optional
#from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
logger = logging.getLogger(__name__)
class EmbeddingManager:
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"): # text-embedding-3-small
self.model_name = model_name
self.embeddings = None
self._initialize_embeddings()
def _initialize_embeddings(self):
try:
logger.info(f"Initializing embedding model: {self.model_name}")
self.embeddings = HuggingFaceEmbeddings(model=self.model_name, model_kwargs={'device': 'cpu'})
logger.info("Embedding model initialized successfully")
except Exception as e:
logger.error(f"Error initializing embedding model: {e}")
raise e
def get_embeddings(self) -> HuggingFaceEmbeddings:
if self.embeddings is None:
self._initialize_embeddings()
return self.embeddings
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
try:
logger.info(f"Generating embeddings for {len(texts)} text(s)")
embeddings = self.embeddings.embed_documents(texts)
logger.info(f"Successfully generated {len(embeddings)} embeddings")
return embeddings
except Exception as e:
logger.error(f"Error generating embeddings: {e}")
raise e
def generate_single_embedding(self, text: str) -> List[float]:
try:
embedding = self.embeddings.embed_query(text)
return embedding
except Exception as e:
logger.error(f"Error generating single embedding: {e}")
raise e
def get_embedding_dimension(self) -> int:
try:
test_embedding = self.generate_single_embedding("test")
return len(test_embedding)
except Exception as e:
logger.error(f"Error getting embedding dimension: {e}")
raise e
def get_model_info(self) -> dict:
return {
'model_name': self.model_name,
'dimension': self.get_embedding_dimension(),
'is_initialized': self.embeddings is not None
}