Spaces:
Runtime error
Runtime error
File size: 2,394 Bytes
816825a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import logging
from typing import List, Optional
#from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
logger = logging.getLogger(__name__)
class EmbeddingManager:
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"): # text-embedding-3-small
self.model_name = model_name
self.embeddings = None
self._initialize_embeddings()
def _initialize_embeddings(self):
try:
logger.info(f"Initializing embedding model: {self.model_name}")
self.embeddings = HuggingFaceEmbeddings(model=self.model_name, model_kwargs={'device': 'cpu'})
logger.info("Embedding model initialized successfully")
except Exception as e:
logger.error(f"Error initializing embedding model: {e}")
raise e
def get_embeddings(self) -> HuggingFaceEmbeddings:
if self.embeddings is None:
self._initialize_embeddings()
return self.embeddings
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
try:
logger.info(f"Generating embeddings for {len(texts)} text(s)")
embeddings = self.embeddings.embed_documents(texts)
logger.info(f"Successfully generated {len(embeddings)} embeddings")
return embeddings
except Exception as e:
logger.error(f"Error generating embeddings: {e}")
raise e
def generate_single_embedding(self, text: str) -> List[float]:
try:
embedding = self.embeddings.embed_query(text)
return embedding
except Exception as e:
logger.error(f"Error generating single embedding: {e}")
raise e
def get_embedding_dimension(self) -> int:
try:
test_embedding = self.generate_single_embedding("test")
return len(test_embedding)
except Exception as e:
logger.error(f"Error getting embedding dimension: {e}")
raise e
def get_model_info(self) -> dict:
return {
'model_name': self.model_name,
'dimension': self.get_embedding_dimension(),
'is_initialized': self.embeddings is not None
}
|