Spaces:

KingZack
/

ctp-slack-bot

Runtime error

App Files Files Community

Hussam commited on Apr 13

Commit

100a4fd

1 Parent(s): 5639669

redone context retrieval

Browse files

Files changed (1) hide show

src/ctp_slack_bot/services/context_retrieval_service.py +34 -49

src/ctp_slack_bot/services/context_retrieval_service.py CHANGED Viewed

@@ -14,67 +14,52 @@ class ContextRetrievalService(BaseModel):
     settings: Settings
     vectorization_service: VectorizationService
-    vector_database_service: VectorDatabaseService
     @model_validator(mode='after')
     def post_init(self: Self) -> Self:
         logger.debug("Created {}", self.__class__.__name__)
         return self
-    # Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
-    # async def initialize(self):
-    #     """
-    #     Initialize the required services.
-    #     """
-    #     await self.vector_database_service.initialize()
-    def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
         """
-        Retrieve relevant context for a given Slack message.
-        This function:
-        1. Extracts the question text from the message
-        2. Vectorizes the question using VectorizationService
-        3. Queries VectorDatabaseService for similar context
-        4. Returns the relevant context as a list of RetreivedContext objects
         Args:
             message: The SlackMessage containing the user's question
         Returns:
-            List[RetreivedContext]: List of retrieved context items with similarity scores
         """
-        # if not message.is_question:
-        #     logger.debug(f"Message {message.key} is not a question, skipping context retrieval")
-        #     return []
-        # try:
-        #     # Vectorize the message text
-        #     embeddings = self.vectorization_service.vectorize([message.text])
-        #     if embeddings is None or len(embeddings) == 0:
-        #         logger.error(f"Failed to generate embedding for message: {message.key}")
-        #         return []
-        #     query_embedding = embeddings[0].tolist()
-        #     # Create vector query
-        #     vector_query = VectorQuery(
-        #         query_text=message.text,
-        #         k=self.settings.TOP_K_MATCHES,
-        #         score_threshold=0.7  # Minimum similarity threshold
-        #     )
-        #     # Search for similar content chunks in vector database
-        #     context_results = await self.vector_database_service.search_by_similarity(
-        #         query=vector_query,
-        #         query_embedding=query_embedding
-        #     )
-        #     logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}")
-        #     return context_results
-        # except Exception as e:
-        #     logger.error(f"Error retrieving context for message {message.key}: {str(e)}")
-        #     return []
-        return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}, embedding=tuple()),
-                VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}, embedding=tuple()))

     settings: Settings
     vectorization_service: VectorizationService
+    vector_db_service: VectorDatabaseService
+    # Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
     @model_validator(mode='after')
     def post_init(self: Self) -> Self:
         logger.debug("Created {}", self.__class__.__name__)
         return self
+    async def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
         """
+        Retrieve relevant context for a given SlackMessage by vectorizing the message and
+        querying the vectorstore.
         Args:
             message: The SlackMessage containing the user's question
         Returns:
+            Sequence[Chunk]: List of retrieved context items with similarity scores
         """
+        # Extract chunks from the message
+        message_chunks = message.get_chunks()
+        # Vectorize the chunks
+        vectorized_chunks = self.vectorization_service.vectorize(message_chunks)
+        # Create vector query using the first chunk's embedding (typically there's only one chunk for a message)
+        if not vectorized_chunks:
+            logger.warning("No vectorized chunks were created for message")
+            return []
+        query = VectorQuery(
+            query_embeddings=vectorized_chunks[0].embedding,
+            k=self.settings.TOP_K_MATCHES,
+            score_threshold=self.settings.SCORE_THRESHOLD,
+            filter_metadata=None  # Can be expanded to include filters based on message metadata
+        )
+        # Perform similarity search
+        try:
+            results = await self.vector_db_service.search_by_similarity(query)
+            logger.info(f"Retrieved {len(results)} context chunks for query")
+            return results
+        except Exception as e:
+            logger.error(f"Error retrieving context: {str(e)}")
+            return []
+        # test return statement
+        # return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}, embedding=tuple()),
+        #         VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}, embedding=tuple()))