from abc import abstractmethod from pydantic import BaseModel, ConfigDict, Field, field_validator from types import MappingProxyType from typing import Any, final, Mapping, Optional, Self from ctp_slack_bot.core import AbstractBaseModel from ctp_slack_bot.utils import to_deep_immutable class Chunk(BaseModel): """A class representing a chunk of content.""" model_config = ConfigDict(frozen=True) text: str # The text representation parent_id: str # The source content’s identity chunk_id: str # This chunk’s identity—unique within the source content metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({})) @field_validator('metadata') @classmethod def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]: return to_deep_immutable(value) @final class VectorQuery(BaseModel): """Model for vector database similarity search queries. Attributes: query_text: The text to be vectorized and used for similarity search k: Number of similar documents to retrieve score_threshold: Minimum similarity score threshold for inclusion in results filter_metadata: Optional filters for metadata fields """ model_config = ConfigDict(frozen=True) query_embeddings: tuple[float, ...] k: int score_threshold: float = Field(default=0.7) filter_metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({})) @field_validator('filter_metadata') @classmethod def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]: return to_deep_immutable(value) @final class VectorizedChunk(Chunk): """A class representing a vectorized chunk of content.""" embedding: tuple[float, ...] # The vector representation class Content(AbstractBaseModel): """An abstract base class for all types of content.""" model_config = ConfigDict(frozen=True) @abstractmethod def get_id(self: Self) -> str: pass @abstractmethod def get_chunks(self: Self) -> tuple[Chunk, ...]: pass @abstractmethod def get_metadata(self: Self) -> Mapping[str, Any]: pass