from loguru import logger from pydantic import ConfigDict from typing import Self, Sequence from ctp_slack_bot.core import ApplicationComponentBase, Settings from ctp_slack_bot.models import Chunk, VectorizedChunk from .embeddings_model_service import EmbeddingsModelService class VectorizationService(ApplicationComponentBase): """ Service for vectorizing chunks of text data. """ model_config = ConfigDict(frozen=True) settings: Settings embeddings_model_service: EmbeddingsModelService async def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]: embeddings = await self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks]) return tuple(VectorizedChunk( text=chunk.text, parent_id=chunk.parent_id, chunk_id=chunk.chunk_id, metadata=chunk.metadata, embedding=embedding ) for chunk, embedding in zip(chunks, embeddings)) @property def name(self: Self) -> str: return "vectorization_service"