dnzblgn commited on
Commit
b392f40
·
verified ·
1 Parent(s): 7aa6142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -159,7 +159,7 @@ def create_db(splits):
159
  vectordb = FAISS.from_documents(splits, embeddings)
160
  return vectordb
161
 
162
- def rerank_documents(query, docs, top_k=3):
163
  pairs = [[query, doc.page_content] for doc in docs]
164
  scores = reranker.predict(pairs)
165
  doc_score_pairs = list(zip(docs, scores))
@@ -187,10 +187,10 @@ def retrieve_documents(query, retriever, embeddings):
187
  print("No initial results found")
188
  return []
189
 
190
- reranked_results = rerank_documents(query, results, top_k=3)
191
  print(f"Reranked results count: {len(reranked_results)}")
192
 
193
- filtered_chunks = filter_relevant_chunks(query, reranked_results, embeddings, threshold=0.7)
194
  print(f"Filtered chunks count: {len(filtered_chunks)}")
195
 
196
  if not filtered_chunks:
@@ -206,7 +206,7 @@ def retrieve_documents(query, retriever, embeddings):
206
  print(f"Score: {score:.4f} | Source: {doc.metadata.get('source', 'Unknown')}")
207
  print(f"Content Preview: {doc.page_content[:100]}...\n")
208
 
209
- MIN_SIMILARITY = 0.5
210
  filtered_results = [(doc, sim) for doc, sim in zip(filtered_chunks, similarity_scores) if sim >= MIN_SIMILARITY]
211
  print(f"Final filtered results count: {len(filtered_results)}")
212
 
 
159
  vectordb = FAISS.from_documents(splits, embeddings)
160
  return vectordb
161
 
162
+ def rerank_documents(query, docs, top_k=5):
163
  pairs = [[query, doc.page_content] for doc in docs]
164
  scores = reranker.predict(pairs)
165
  doc_score_pairs = list(zip(docs, scores))
 
187
  print("No initial results found")
188
  return []
189
 
190
+ reranked_results = rerank_documents(query, results, top_k=5)
191
  print(f"Reranked results count: {len(reranked_results)}")
192
 
193
+ filtered_chunks = filter_relevant_chunks(query, reranked_results, embeddings, threshold=0.3)
194
  print(f"Filtered chunks count: {len(filtered_chunks)}")
195
 
196
  if not filtered_chunks:
 
206
  print(f"Score: {score:.4f} | Source: {doc.metadata.get('source', 'Unknown')}")
207
  print(f"Content Preview: {doc.page_content[:100]}...\n")
208
 
209
+ MIN_SIMILARITY = 0.3
210
  filtered_results = [(doc, sim) for doc, sim in zip(filtered_chunks, similarity_scores) if sim >= MIN_SIMILARITY]
211
  print(f"Final filtered results count: {len(filtered_results)}")
212