Spaces:
Runtime error
Runtime error
Hussam
commited on
Commit
·
100a4fd
1
Parent(s):
5639669
redone context retrieval
Browse files
src/ctp_slack_bot/services/context_retrieval_service.py
CHANGED
@@ -14,67 +14,52 @@ class ContextRetrievalService(BaseModel):
|
|
14 |
|
15 |
settings: Settings
|
16 |
vectorization_service: VectorizationService
|
17 |
-
|
18 |
|
|
|
19 |
@model_validator(mode='after')
|
20 |
def post_init(self: Self) -> Self:
|
21 |
logger.debug("Created {}", self.__class__.__name__)
|
22 |
return self
|
23 |
|
24 |
-
|
25 |
-
# async def initialize(self):
|
26 |
-
# """
|
27 |
-
# Initialize the required services.
|
28 |
-
# """
|
29 |
-
# await self.vector_database_service.initialize()
|
30 |
-
|
31 |
-
def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
|
32 |
"""
|
33 |
-
Retrieve relevant context for a given
|
34 |
-
|
35 |
-
This function:
|
36 |
-
1. Extracts the question text from the message
|
37 |
-
2. Vectorizes the question using VectorizationService
|
38 |
-
3. Queries VectorDatabaseService for similar context
|
39 |
-
4. Returns the relevant context as a list of RetreivedContext objects
|
40 |
|
41 |
Args:
|
42 |
message: The SlackMessage containing the user's question
|
43 |
|
44 |
Returns:
|
45 |
-
|
46 |
"""
|
47 |
-
#
|
48 |
-
|
49 |
-
# return []
|
50 |
|
51 |
-
#
|
52 |
-
|
53 |
-
|
54 |
-
#
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
# query_embedding = embeddings[0].tolist()
|
59 |
-
|
60 |
-
# # Create vector query
|
61 |
-
# vector_query = VectorQuery(
|
62 |
-
# query_text=message.text,
|
63 |
-
# k=self.settings.TOP_K_MATCHES,
|
64 |
-
# score_threshold=0.7 # Minimum similarity threshold
|
65 |
-
# )
|
66 |
-
|
67 |
-
# # Search for similar content chunks in vector database
|
68 |
-
# context_results = await self.vector_database_service.search_by_similarity(
|
69 |
-
# query=vector_query,
|
70 |
-
# query_embedding=query_embedding
|
71 |
-
# )
|
72 |
-
|
73 |
-
# logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}")
|
74 |
-
# return context_results
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
settings: Settings
|
16 |
vectorization_service: VectorizationService
|
17 |
+
vector_db_service: VectorDatabaseService
|
18 |
|
19 |
+
# Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
|
20 |
@model_validator(mode='after')
|
21 |
def post_init(self: Self) -> Self:
|
22 |
logger.debug("Created {}", self.__class__.__name__)
|
23 |
return self
|
24 |
|
25 |
+
async def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
"""
|
27 |
+
Retrieve relevant context for a given SlackMessage by vectorizing the message and
|
28 |
+
querying the vectorstore.
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
Args:
|
31 |
message: The SlackMessage containing the user's question
|
32 |
|
33 |
Returns:
|
34 |
+
Sequence[Chunk]: List of retrieved context items with similarity scores
|
35 |
"""
|
36 |
+
# Extract chunks from the message
|
37 |
+
message_chunks = message.get_chunks()
|
|
|
38 |
|
39 |
+
# Vectorize the chunks
|
40 |
+
vectorized_chunks = self.vectorization_service.vectorize(message_chunks)
|
41 |
+
|
42 |
+
# Create vector query using the first chunk's embedding (typically there's only one chunk for a message)
|
43 |
+
if not vectorized_chunks:
|
44 |
+
logger.warning("No vectorized chunks were created for message")
|
45 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
query = VectorQuery(
|
48 |
+
query_embeddings=vectorized_chunks[0].embedding,
|
49 |
+
k=self.settings.TOP_K_MATCHES,
|
50 |
+
score_threshold=self.settings.SCORE_THRESHOLD,
|
51 |
+
filter_metadata=None # Can be expanded to include filters based on message metadata
|
52 |
+
)
|
53 |
+
|
54 |
+
# Perform similarity search
|
55 |
+
try:
|
56 |
+
results = await self.vector_db_service.search_by_similarity(query)
|
57 |
+
logger.info(f"Retrieved {len(results)} context chunks for query")
|
58 |
+
return results
|
59 |
+
except Exception as e:
|
60 |
+
logger.error(f"Error retrieving context: {str(e)}")
|
61 |
+
return []
|
62 |
+
|
63 |
+
# test return statement
|
64 |
+
# return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}, embedding=tuple()),
|
65 |
+
# VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}, embedding=tuple()))
|