Spaces:
Runtime error
Runtime error
from loguru import logger | |
from openai import AsyncOpenAI | |
from pydantic import ConfigDict | |
from typing import Any, Sequence, Self | |
from ctp_slack_bot.core import ApplicationComponentBase, Settings | |
class EmbeddingsModelService(ApplicationComponentBase): | |
""" | |
Service for embeddings model operations. | |
""" | |
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True) | |
settings: Settings | |
open_ai_client: AsyncOpenAI | |
async def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]: | |
"""Get embeddings for a collection of texts using OpenAI’s API. | |
Args: | |
texts (Collection[str]): Collection of text chunks to embed | |
Returns: | |
NDArray: Array of embeddings with shape (n_texts, vector_dimension) | |
Raises: | |
ValueError: If the embedding dimensions don't match expected size | |
""" | |
logger.debug("Creating embeddings for {} text string(s)…", len(texts)) | |
response = await self.open_ai_client.embeddings.create( | |
model=self.settings.embedding_model, | |
input=texts, | |
encoding_format="float" # Ensure we get raw float values. | |
) | |
embeddings = tuple(tuple(data.embedding) for data in response.data) | |
match embeddings: | |
case (first, _) if len(first) != self.settings.vector_dimension: | |
logger.error("Embedding dimension mismatch and/or misconfiguration: expected configured dimension {}, but got {}.", self.settings.vector_dimension, len(first)) | |
raise ValueError() # TODO: raise a more specific type. | |
return embeddings | |
def name(self: Self) -> str: | |
return "embeddings_model_service" | |