Hussam commited on
Commit
100a4fd
·
1 Parent(s): 5639669

redone context retrieval

Browse files
src/ctp_slack_bot/services/context_retrieval_service.py CHANGED
@@ -14,67 +14,52 @@ class ContextRetrievalService(BaseModel):
14
 
15
  settings: Settings
16
  vectorization_service: VectorizationService
17
- vector_database_service: VectorDatabaseService
18
 
 
19
  @model_validator(mode='after')
20
  def post_init(self: Self) -> Self:
21
  logger.debug("Created {}", self.__class__.__name__)
22
  return self
23
 
24
- # Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
25
- # async def initialize(self):
26
- # """
27
- # Initialize the required services.
28
- # """
29
- # await self.vector_database_service.initialize()
30
-
31
- def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
32
  """
33
- Retrieve relevant context for a given Slack message.
34
-
35
- This function:
36
- 1. Extracts the question text from the message
37
- 2. Vectorizes the question using VectorizationService
38
- 3. Queries VectorDatabaseService for similar context
39
- 4. Returns the relevant context as a list of RetreivedContext objects
40
 
41
  Args:
42
  message: The SlackMessage containing the user's question
43
 
44
  Returns:
45
- List[RetreivedContext]: List of retrieved context items with similarity scores
46
  """
47
- # if not message.is_question:
48
- # logger.debug(f"Message {message.key} is not a question, skipping context retrieval")
49
- # return []
50
 
51
- # try:
52
- # # Vectorize the message text
53
- # embeddings = self.vectorization_service.vectorize([message.text])
54
- # if embeddings is None or len(embeddings) == 0:
55
- # logger.error(f"Failed to generate embedding for message: {message.key}")
56
- # return []
57
-
58
- # query_embedding = embeddings[0].tolist()
59
-
60
- # # Create vector query
61
- # vector_query = VectorQuery(
62
- # query_text=message.text,
63
- # k=self.settings.TOP_K_MATCHES,
64
- # score_threshold=0.7 # Minimum similarity threshold
65
- # )
66
-
67
- # # Search for similar content chunks in vector database
68
- # context_results = await self.vector_database_service.search_by_similarity(
69
- # query=vector_query,
70
- # query_embedding=query_embedding
71
- # )
72
-
73
- # logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}")
74
- # return context_results
75
 
76
- # except Exception as e:
77
- # logger.error(f"Error retrieving context for message {message.key}: {str(e)}")
78
- # return []
79
- return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}, embedding=tuple()),
80
- VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}, embedding=tuple()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  settings: Settings
16
  vectorization_service: VectorizationService
17
+ vector_db_service: VectorDatabaseService
18
 
19
+ # Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
20
  @model_validator(mode='after')
21
  def post_init(self: Self) -> Self:
22
  logger.debug("Created {}", self.__class__.__name__)
23
  return self
24
 
25
+ async def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
 
 
 
 
 
 
 
26
  """
27
+ Retrieve relevant context for a given SlackMessage by vectorizing the message and
28
+ querying the vectorstore.
 
 
 
 
 
29
 
30
  Args:
31
  message: The SlackMessage containing the user's question
32
 
33
  Returns:
34
+ Sequence[Chunk]: List of retrieved context items with similarity scores
35
  """
36
+ # Extract chunks from the message
37
+ message_chunks = message.get_chunks()
 
38
 
39
+ # Vectorize the chunks
40
+ vectorized_chunks = self.vectorization_service.vectorize(message_chunks)
41
+
42
+ # Create vector query using the first chunk's embedding (typically there's only one chunk for a message)
43
+ if not vectorized_chunks:
44
+ logger.warning("No vectorized chunks were created for message")
45
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ query = VectorQuery(
48
+ query_embeddings=vectorized_chunks[0].embedding,
49
+ k=self.settings.TOP_K_MATCHES,
50
+ score_threshold=self.settings.SCORE_THRESHOLD,
51
+ filter_metadata=None # Can be expanded to include filters based on message metadata
52
+ )
53
+
54
+ # Perform similarity search
55
+ try:
56
+ results = await self.vector_db_service.search_by_similarity(query)
57
+ logger.info(f"Retrieved {len(results)} context chunks for query")
58
+ return results
59
+ except Exception as e:
60
+ logger.error(f"Error retrieving context: {str(e)}")
61
+ return []
62
+
63
+ # test return statement
64
+ # return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}, embedding=tuple()),
65
+ # VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}, embedding=tuple()))