LiKenun's picture
Refactor #3
bb7c9a3
from abc import abstractmethod
from pydantic import BaseModel, ConfigDict, Field, field_validator
from types import MappingProxyType
from typing import Any, final, Mapping, Optional, Self
from ctp_slack_bot.core import AbstractBaseModel
from ctp_slack_bot.utils import to_deep_immutable
class Chunk(BaseModel):
"""A class representing a chunk of content."""
model_config = ConfigDict(frozen=True)
text: str # The text representation
parent_id: str # The source content’s identity
chunk_id: str # This chunk’s identity—unique within the source content
metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
@field_validator('metadata')
@classmethod
def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]:
return to_deep_immutable(value)
@final
class VectorQuery(BaseModel):
"""Model for vector database similarity search queries.
Attributes:
query_text: The text to be vectorized and used for similarity search
k: Number of similar documents to retrieve
score_threshold: Minimum similarity score threshold for inclusion in results
filter_metadata: Optional filters for metadata fields
"""
model_config = ConfigDict(frozen=True)
query_embeddings: tuple[float, ...]
k: int
score_threshold: float = Field(default=0.7)
filter_metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
@field_validator('filter_metadata')
@classmethod
def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]:
return to_deep_immutable(value)
@final
class VectorizedChunk(Chunk):
"""A class representing a vectorized chunk of content."""
embedding: tuple[float, ...] # The vector representation
class Content(AbstractBaseModel):
"""An abstract base class for all types of content."""
model_config = ConfigDict(frozen=True)
@abstractmethod
def get_id(self: Self) -> str:
pass
@abstractmethod
def get_chunks(self: Self) -> tuple[Chunk, ...]:
pass
@abstractmethod
def get_metadata(self: Self) -> Mapping[str, Any]:
pass