from abc import ABC, abstractmethod from pydantic import BaseModel, ConfigDict, Field from typing import Any, final, Mapping, Self, Sequence, Optional class Chunk(BaseModel): """A class representing a chunk of content.""" text: str # The text representation parent_id: str # The source content’s identity chunk_id: str # This chunk’s identity—unique within the source content metadata: Mapping[str, Any] model_config = ConfigDict(frozen=True) @final class VectorQuery(BaseModel): """Model for vector database similarity search queries. Attributes: query_text: The text to be vectorized and used for similarity search k: Number of similar documents to retrieve score_threshold: Minimum similarity score threshold for inclusion in results filter_metadata: Optional filters for metadata fields """ query_embeddings: Sequence[float] k: int score_threshold: float = Field(default=0.7) filter_metadata: Optional[Mapping[str, Any]] = None model_config = ConfigDict(frozen=True) @final class VectorizedChunk(Chunk): """A class representing a vectorized chunk of content.""" embedding: Sequence[float] # The vector representation class Content(ABC, BaseModel): """An abstract base class for all types of content.""" model_config = ConfigDict(frozen=True) @abstractmethod def get_id(self: Self) -> str: pass @abstractmethod def get_chunks(self: Self) -> Sequence[Chunk]: pass @abstractmethod def get_metadata(self: Self) -> Mapping[str, Any]: pass