gabykim commited on
Commit
5f5f1b6
·
1 Parent(s): 4376f6f

minor reranking configuration refactor

Browse files
src/know_lang_bot/chat_bot/chat_graph.py CHANGED
@@ -220,7 +220,7 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
220
  query=query,
221
  embedding_config=ctx.deps.config.embedding,
222
  collection=ctx.deps.collection,
223
- n_results=min(ctx.deps.config.chat.max_context_chunks * 2, 20)
224
  )
225
 
226
  # Log top k initial results by distance
@@ -228,7 +228,7 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
228
  zip(initial_chunks, distances),
229
  key=lambda x: x[1]
230
  )[:ctx.deps.config.reranker.top_k]
231
- logfire.info('top k initial results: {results}', results=top_k_initial)
232
 
233
  # Only proceed to reranking if we have initial results
234
  if not initial_chunks:
@@ -243,7 +243,7 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
243
  chunks=initial_chunks,
244
  reranker_config=ctx.deps.config.reranker
245
  )
246
- logfire.info('reranked results: {results}', results=reranking.results)
247
 
248
  # Build final context from reranked results
249
  relevant_chunks = []
@@ -251,10 +251,13 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
251
 
252
  for result in reranking.results:
253
  # Only include if score is good enough
254
- if result.relevance_score >= ctx.deps.config.chat.similarity_threshold:
255
  relevant_chunks.append(result.document)
256
  # Get corresponding metadata using original index
257
  relevant_metadatas.append(initial_metadatas[result.index])
 
 
 
258
 
259
 
260
  except Exception as e:
 
220
  query=query,
221
  embedding_config=ctx.deps.config.embedding,
222
  collection=ctx.deps.collection,
223
+ n_results=min(ctx.deps.config.chat.max_context_chunks * 2, 50)
224
  )
225
 
226
  # Log top k initial results by distance
 
228
  zip(initial_chunks, distances),
229
  key=lambda x: x[1]
230
  )[:ctx.deps.config.reranker.top_k]
231
+ logfire.info('top k embedding search results: {results}', results=top_k_initial)
232
 
233
  # Only proceed to reranking if we have initial results
234
  if not initial_chunks:
 
243
  chunks=initial_chunks,
244
  reranker_config=ctx.deps.config.reranker
245
  )
246
+ logfire.info('top k reranking search results: {results}', results=reranking.results)
247
 
248
  # Build final context from reranked results
249
  relevant_chunks = []
 
251
 
252
  for result in reranking.results:
253
  # Only include if score is good enough
254
+ if result.relevance_score >= ctx.deps.config.reranker.relevance_threshold:
255
  relevant_chunks.append(result.document)
256
  # Get corresponding metadata using original index
257
  relevant_metadatas.append(initial_metadatas[result.index])
258
+
259
+ if not relevant_chunks:
260
+ raise Exception("No relevant chunks found through reranking")
261
 
262
 
263
  except Exception as e:
src/know_lang_bot/config.py CHANGED
@@ -157,9 +157,13 @@ class RerankerConfig(BaseSettings):
157
  description="API key for the model provider"
158
  )
159
  top_k: int = Field(
160
- default=4,
161
  description="Number of most relevant documents to return from reranking"
162
  )
 
 
 
 
163
 
164
  @field_validator('api_key', mode='after')
165
  @classmethod
 
157
  description="API key for the model provider"
158
  )
159
  top_k: int = Field(
160
+ default=5,
161
  description="Number of most relevant documents to return from reranking"
162
  )
163
+ relevance_threshold: float = Field(
164
+ default=0.5,
165
+ description="Minimum relevance score to include a document in reranking"
166
+ )
167
 
168
  @field_validator('api_key', mode='after')
169
  @classmethod