File size: 1,667 Bytes
005a292
5639669
67436c8
005a292
 
9fd6e20
 
005a292
9fd6e20
 
 
a60b3fc
005a292
9fd6e20
005a292
67436c8
5639669
 
 
 
 
 
 
 
 
 
 
 
 
 
a60b3fc
005a292
67436c8
 
5639669
9fd6e20
 
 
005a292
67436c8
005a292
 
9fd6e20
 
 
 
 
 
67436c8
9fd6e20
 
 
67436c8
9fd6e20
 
 
67436c8
9fd6e20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from abc import ABC, abstractmethod
from pydantic import BaseModel, ConfigDict, Field
from typing import Any, final, Mapping, Self, Sequence, Optional


class Chunk(BaseModel):
    """A class representing a chunk of content."""

    text: str                   # The text representation
    parent_id: str              # The source content’s identity
    chunk_id: str               # This chunk’s identity—unique within the source content
    metadata: Mapping[str, Any]

    model_config = ConfigDict(frozen=True)


@final
class VectorQuery(BaseModel):
    """Model for vector database similarity search queries.
    
    Attributes:
        query_text: The text to be vectorized and used for similarity search
        k: Number of similar documents to retrieve
        score_threshold: Minimum similarity score threshold for inclusion in results
        filter_metadata: Optional filters for metadata fields
    """

    query_embeddings: Sequence[float]
    k: int
    score_threshold: float = Field(default=0.7)
    filter_metadata: Optional[Mapping[str, Any]] = None

    model_config = ConfigDict(frozen=True)

    
@final
class VectorizedChunk(Chunk):
    """A class representing a vectorized chunk of content."""

    embedding: Sequence[float] # The vector representation


class Content(ABC, BaseModel):
    """An abstract base class for all types of content."""

    model_config = ConfigDict(frozen=True)

    @abstractmethod
    def get_id(self: Self) -> str:
        pass

    @abstractmethod
    def get_chunks(self: Self) -> Sequence[Chunk]:
        pass

    @abstractmethod
    def get_metadata(self: Self) -> Mapping[str, Any]:
        pass