Spaces:
Runtime error
Runtime error
File size: 1,667 Bytes
005a292 5639669 67436c8 005a292 9fd6e20 005a292 9fd6e20 a60b3fc 005a292 9fd6e20 005a292 67436c8 5639669 a60b3fc 005a292 67436c8 5639669 9fd6e20 005a292 67436c8 005a292 9fd6e20 67436c8 9fd6e20 67436c8 9fd6e20 67436c8 9fd6e20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
from abc import ABC, abstractmethod
from pydantic import BaseModel, ConfigDict, Field
from typing import Any, final, Mapping, Self, Sequence, Optional
class Chunk(BaseModel):
"""A class representing a chunk of content."""
text: str # The text representation
parent_id: str # The source content’s identity
chunk_id: str # This chunk’s identity—unique within the source content
metadata: Mapping[str, Any]
model_config = ConfigDict(frozen=True)
@final
class VectorQuery(BaseModel):
"""Model for vector database similarity search queries.
Attributes:
query_text: The text to be vectorized and used for similarity search
k: Number of similar documents to retrieve
score_threshold: Minimum similarity score threshold for inclusion in results
filter_metadata: Optional filters for metadata fields
"""
query_embeddings: Sequence[float]
k: int
score_threshold: float = Field(default=0.7)
filter_metadata: Optional[Mapping[str, Any]] = None
model_config = ConfigDict(frozen=True)
@final
class VectorizedChunk(Chunk):
"""A class representing a vectorized chunk of content."""
embedding: Sequence[float] # The vector representation
class Content(ABC, BaseModel):
"""An abstract base class for all types of content."""
model_config = ConfigDict(frozen=True)
@abstractmethod
def get_id(self: Self) -> str:
pass
@abstractmethod
def get_chunks(self: Self) -> Sequence[Chunk]:
pass
@abstractmethod
def get_metadata(self: Self) -> Mapping[str, Any]:
pass
|