LiKenun's picture
Simplify models
67436c8
raw
history blame
1.67 kB
from abc import ABC, abstractmethod
from pydantic import BaseModel, ConfigDict, Field
from typing import Any, final, Mapping, Self, Sequence, Optional
class Chunk(BaseModel):
"""A class representing a chunk of content."""
text: str # The text representation
parent_id: str # The source content’s identity
chunk_id: str # This chunk’s identity—unique within the source content
metadata: Mapping[str, Any]
model_config = ConfigDict(frozen=True)
@final
class VectorQuery(BaseModel):
"""Model for vector database similarity search queries.
Attributes:
query_text: The text to be vectorized and used for similarity search
k: Number of similar documents to retrieve
score_threshold: Minimum similarity score threshold for inclusion in results
filter_metadata: Optional filters for metadata fields
"""
query_embeddings: Sequence[float]
k: int
score_threshold: float = Field(default=0.7)
filter_metadata: Optional[Mapping[str, Any]] = None
model_config = ConfigDict(frozen=True)
@final
class VectorizedChunk(Chunk):
"""A class representing a vectorized chunk of content."""
embedding: Sequence[float] # The vector representation
class Content(ABC, BaseModel):
"""An abstract base class for all types of content."""
model_config = ConfigDict(frozen=True)
@abstractmethod
def get_id(self: Self) -> str:
pass
@abstractmethod
def get_chunks(self: Self) -> Sequence[Chunk]:
pass
@abstractmethod
def get_metadata(self: Self) -> Mapping[str, Any]:
pass