File size: 2,135 Bytes
005a292
bb5dde5
 
 
 
005a292
 
9fd6e20
 
005a292
9fd6e20
 
 
bb5dde5
005a292
9fd6e20
005a292
bb5dde5
 
 
 
 
67436c8
5639669
 
 
 
 
 
 
 
 
 
 
bb5dde5
5639669
 
bb5dde5
005a292
67436c8
 
bb5dde5
 
 
 
 
5639669
9fd6e20
 
 
005a292
bb5dde5
005a292
 
9fd6e20
 
 
 
 
 
67436c8
9fd6e20
 
 
bb5dde5
9fd6e20
 
 
67436c8
9fd6e20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from abc import ABC, abstractmethod
from pydantic import BaseModel, ConfigDict, Field, field_validator
from typing import Any, final, Mapping, Optional, Self

from ctp_slack_bot.utils import to_deep_immutable


class Chunk(BaseModel):
    """A class representing a chunk of content."""

    text: str                   # The text representation
    parent_id: str              # The source content’s identity
    chunk_id: str               # This chunk’s identity—unique within the source content
    metadata: Mapping[str, Any] = Field(default_factory=dict)

    model_config = ConfigDict(frozen=True)

    @field_validator('metadata')
    @classmethod
    def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]:
        return to_deep_immutable(value)


@final
class VectorQuery(BaseModel):
    """Model for vector database similarity search queries.
    
    Attributes:
        query_text: The text to be vectorized and used for similarity search
        k: Number of similar documents to retrieve
        score_threshold: Minimum similarity score threshold for inclusion in results
        filter_metadata: Optional filters for metadata fields
    """

    query_embeddings: tuple[float, ...]
    k: int
    score_threshold: float = Field(default=0.7)
    filter_metadata: Mapping[str, Any] = Field(default_factory=dict)

    model_config = ConfigDict(frozen=True)

    @field_validator('filter_metadata')
    @classmethod
    def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]:
        return to_deep_immutable(value)

    
@final
class VectorizedChunk(Chunk):
    """A class representing a vectorized chunk of content."""

    embedding: tuple[float, ...] # The vector representation


class Content(ABC, BaseModel):
    """An abstract base class for all types of content."""

    model_config = ConfigDict(frozen=True)

    @abstractmethod
    def get_id(self: Self) -> str:
        pass

    @abstractmethod
    def get_chunks(self: Self) -> tuple[Chunk, ...]:
        pass

    @abstractmethod
    def get_metadata(self: Self) -> Mapping[str, Any]:
        pass