Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -159,7 +159,7 @@ def create_db(splits):
|
|
159 |
vectordb = FAISS.from_documents(splits, embeddings)
|
160 |
return vectordb
|
161 |
|
162 |
-
def rerank_documents(query, docs, top_k=
|
163 |
pairs = [[query, doc.page_content] for doc in docs]
|
164 |
scores = reranker.predict(pairs)
|
165 |
doc_score_pairs = list(zip(docs, scores))
|
@@ -187,10 +187,10 @@ def retrieve_documents(query, retriever, embeddings):
|
|
187 |
print("No initial results found")
|
188 |
return []
|
189 |
|
190 |
-
reranked_results = rerank_documents(query, results, top_k=
|
191 |
print(f"Reranked results count: {len(reranked_results)}")
|
192 |
|
193 |
-
filtered_chunks = filter_relevant_chunks(query, reranked_results, embeddings, threshold=0.
|
194 |
print(f"Filtered chunks count: {len(filtered_chunks)}")
|
195 |
|
196 |
if not filtered_chunks:
|
@@ -206,7 +206,7 @@ def retrieve_documents(query, retriever, embeddings):
|
|
206 |
print(f"Score: {score:.4f} | Source: {doc.metadata.get('source', 'Unknown')}")
|
207 |
print(f"Content Preview: {doc.page_content[:100]}...\n")
|
208 |
|
209 |
-
MIN_SIMILARITY = 0.
|
210 |
filtered_results = [(doc, sim) for doc, sim in zip(filtered_chunks, similarity_scores) if sim >= MIN_SIMILARITY]
|
211 |
print(f"Final filtered results count: {len(filtered_results)}")
|
212 |
|
|
|
159 |
vectordb = FAISS.from_documents(splits, embeddings)
|
160 |
return vectordb
|
161 |
|
162 |
+
def rerank_documents(query, docs, top_k=5):
|
163 |
pairs = [[query, doc.page_content] for doc in docs]
|
164 |
scores = reranker.predict(pairs)
|
165 |
doc_score_pairs = list(zip(docs, scores))
|
|
|
187 |
print("No initial results found")
|
188 |
return []
|
189 |
|
190 |
+
reranked_results = rerank_documents(query, results, top_k=5)
|
191 |
print(f"Reranked results count: {len(reranked_results)}")
|
192 |
|
193 |
+
filtered_chunks = filter_relevant_chunks(query, reranked_results, embeddings, threshold=0.3)
|
194 |
print(f"Filtered chunks count: {len(filtered_chunks)}")
|
195 |
|
196 |
if not filtered_chunks:
|
|
|
206 |
print(f"Score: {score:.4f} | Source: {doc.metadata.get('source', 'Unknown')}")
|
207 |
print(f"Content Preview: {doc.page_content[:100]}...\n")
|
208 |
|
209 |
+
MIN_SIMILARITY = 0.3
|
210 |
filtered_results = [(doc, sim) for doc, sim in zip(filtered_chunks, similarity_scores) if sim >= MIN_SIMILARITY]
|
211 |
print(f"Final filtered results count: {len(filtered_results)}")
|
212 |
|