File size: 2,265 Bytes
bb7c9a3
bb5dde5
bb7c9a3
bb5dde5
 
bb7c9a3
bb5dde5
005a292
 
9fd6e20
 
005a292
bb7c9a3
 
9fd6e20
 
 
bb7c9a3
005a292
bb5dde5
 
 
 
 
67436c8
5639669
 
 
 
 
 
 
 
 
 
 
bb7c9a3
 
bb5dde5
5639669
 
bb7c9a3
67436c8
bb5dde5
 
 
 
 
5639669
9fd6e20
 
 
005a292
bb5dde5
005a292
 
bb7c9a3
9fd6e20
 
 
 
 
67436c8
9fd6e20
 
 
bb5dde5
9fd6e20
 
 
67436c8
9fd6e20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from abc import abstractmethod
from pydantic import BaseModel, ConfigDict, Field, field_validator
from types import MappingProxyType
from typing import Any, final, Mapping, Optional, Self

from ctp_slack_bot.core import AbstractBaseModel
from ctp_slack_bot.utils import to_deep_immutable


class Chunk(BaseModel):
    """A class representing a chunk of content."""

    model_config = ConfigDict(frozen=True)

    text: str                   # The text representation
    parent_id: str              # The source content’s identity
    chunk_id: str               # This chunk’s identity—unique within the source content
    metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))

    @field_validator('metadata')
    @classmethod
    def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]:
        return to_deep_immutable(value)


@final
class VectorQuery(BaseModel):
    """Model for vector database similarity search queries.
    
    Attributes:
        query_text: The text to be vectorized and used for similarity search
        k: Number of similar documents to retrieve
        score_threshold: Minimum similarity score threshold for inclusion in results
        filter_metadata: Optional filters for metadata fields
    """

    model_config = ConfigDict(frozen=True)

    query_embeddings: tuple[float, ...]
    k: int
    score_threshold: float = Field(default=0.7)
    filter_metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))

    @field_validator('filter_metadata')
    @classmethod
    def __make_metadata_readonly(cls, value: Mapping[str, Any]) -> Mapping[str, Any]:
        return to_deep_immutable(value)

    
@final
class VectorizedChunk(Chunk):
    """A class representing a vectorized chunk of content."""

    embedding: tuple[float, ...] # The vector representation


class Content(AbstractBaseModel):
    """An abstract base class for all types of content."""

    model_config = ConfigDict(frozen=True)

    @abstractmethod
    def get_id(self: Self) -> str:
        pass

    @abstractmethod
    def get_chunks(self: Self) -> tuple[Chunk, ...]:
        pass

    @abstractmethod
    def get_metadata(self: Self) -> Mapping[str, Any]:
        pass