Hussam commited on
Commit
5639669
·
1 Parent(s): 5d52cb3

added score_threshold to the configs and .env.template

Browse files
.env.template CHANGED
@@ -24,6 +24,7 @@ TOP_K_MATCHES=1
24
  # MongoDB Configuration
25
  MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
26
  MONGODB_NAME=ctp_slack_bot
 
27
 
28
  # Hugging Face Configuration
29
  HF_API_TOKEN=🤗
 
24
  # MongoDB Configuration
25
  MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
26
  MONGODB_NAME=ctp_slack_bot
27
+ SCORE_THRESHOLD=0.5
28
 
29
  # Hugging Face Configuration
30
  HF_API_TOKEN=🤗
src/ctp_slack_bot/core/config.py CHANGED
@@ -31,6 +31,7 @@ class Settings(BaseSettings): # TODO: Strong guarantees of validity, because gar
31
  # MongoDB Configuration
32
  MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
33
  MONGODB_NAME: str
 
34
 
35
  # Hugging Face Configuration
36
  HF_API_TOKEN: Optional[SecretStr] = None
 
31
  # MongoDB Configuration
32
  MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
33
  MONGODB_NAME: str
34
+ SCORE_THRESHOLD: NonNegativeFloat
35
 
36
  # Hugging Face Configuration
37
  HF_API_TOKEN: Optional[SecretStr] = None
src/ctp_slack_bot/models/base.py CHANGED
@@ -1,6 +1,6 @@
1
  from abc import ABC, abstractmethod
2
- from pydantic import BaseModel, ConfigDict
3
- from typing import Any, Dict, final, Self, Sequence
4
 
5
 
6
  class Chunk(BaseModel):
@@ -13,7 +13,23 @@ class Chunk(BaseModel):
13
 
14
  model_config = ConfigDict(frozen=True)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
17
  @final
18
  class VectorizedChunk(Chunk):
19
  """A class representing a vectorized chunk of content."""
 
1
  from abc import ABC, abstractmethod
2
+ from pydantic import BaseModel, ConfigDict, Field
3
+ from typing import Any, Dict, final, Self, Sequence, Optional
4
 
5
 
6
  class Chunk(BaseModel):
 
13
 
14
  model_config = ConfigDict(frozen=True)
15
 
16
+ @final
17
+ class VectorQuery(BaseModel):
18
+ """Model for vector database similarity search queries.
19
+
20
+ Attributes:
21
+ query_text: The text to be vectorized and used for similarity search
22
+ k: Number of similar documents to retrieve
23
+ score_threshold: Minimum similarity score threshold for inclusion in results
24
+ filter_metadata: Optional filters for metadata fields
25
+ """
26
+
27
+ query_embeddings: Sequence[float]
28
+ k: int
29
+ score_threshold: float = Field(default=0.7)
30
+ filter_metadata: Optional[Dict[str, Any]] = None
31
 
32
+
33
  @final
34
  class VectorizedChunk(Chunk):
35
  """A class representing a vectorized chunk of content."""
src/ctp_slack_bot/models/vector_query.py DELETED
@@ -1,17 +0,0 @@
1
- from pydantic import BaseModel, Field, model_validator
2
- from typing import Any, Dict, Optional, Sequence
3
-
4
- class VectorQuery(BaseModel):
5
- """Model for vector database similarity search queries.
6
-
7
- Attributes:
8
- query_text: The text to be vectorized and used for similarity search
9
- k: Number of similar documents to retrieve
10
- score_threshold: Minimum similarity score threshold for inclusion in results
11
- filter_metadata: Optional filters for metadata fields
12
- """
13
-
14
- query_embeddings: Sequence[float]
15
- k: int
16
- score_threshold: float = Field(default=0.7)
17
- filter_metadata: Optional[Dict[str, Any]] = None