Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -62,16 +62,18 @@ def add_text_to_db(text):
|
|
62 |
|
63 |
return f"Text added successfully with ID: {doc_id}"
|
64 |
|
65 |
-
def search_similar_texts(query, k):
|
66 |
"""
|
67 |
Search for the top k similar texts in the vector database and rerank them.
|
|
|
68 |
|
69 |
Args:
|
70 |
query (str): The search query.
|
71 |
k (int): Number of results to return.
|
|
|
72 |
|
73 |
Returns:
|
74 |
-
str: Formatted search results
|
75 |
"""
|
76 |
if not query or not query.strip():
|
77 |
return "Error: Query cannot be empty."
|
@@ -79,21 +81,29 @@ def search_similar_texts(query, k):
|
|
79 |
if not isinstance(k, int) or k < 1:
|
80 |
return "Error: k must be a positive integer."
|
81 |
|
|
|
|
|
|
|
82 |
# Retrieve and rerank
|
83 |
retriever.search_kwargs["k"] = max(k * 2, 10) # Retrieve 2k or at least 10
|
84 |
compressor.top_n = k # Rerank to top k
|
85 |
docs = compression_retriever.get_relevant_documents(query)
|
86 |
|
87 |
if not docs:
|
88 |
-
return "No
|
89 |
|
90 |
-
#
|
91 |
results = []
|
92 |
-
for i, doc in enumerate(docs[:k]): # Ensure
|
93 |
text = doc.metadata.get("text", "No text available")
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
return "\n".join(results)
|
97 |
|
98 |
# Gradio interface
|
99 |
with gr.Blocks() as demo:
|
@@ -110,6 +120,7 @@ with gr.Blocks() as demo:
|
|
110 |
gr.Markdown("## Search Similar Texts")
|
111 |
query_input = gr.Textbox(label="Enter search query")
|
112 |
k_input = gr.Number(label="Number of results (k)", value=5, precision=0)
|
|
|
113 |
search_button = gr.Button("Search")
|
114 |
search_output = gr.Textbox(label="Search Results")
|
115 |
|
@@ -121,7 +132,7 @@ with gr.Blocks() as demo:
|
|
121 |
)
|
122 |
search_button.click(
|
123 |
fn=search_similar_texts,
|
124 |
-
inputs=[query_input, k_input],
|
125 |
outputs=search_output
|
126 |
)
|
127 |
|
|
|
62 |
|
63 |
return f"Text added successfully with ID: {doc_id}"
|
64 |
|
65 |
+
def search_similar_texts(query, k, threshold):
|
66 |
"""
|
67 |
Search for the top k similar texts in the vector database and rerank them.
|
68 |
+
Only return results with similarity scores above the threshold.
|
69 |
|
70 |
Args:
|
71 |
query (str): The search query.
|
72 |
k (int): Number of results to return.
|
73 |
+
threshold (float): Minimum similarity score (0 to 1).
|
74 |
|
75 |
Returns:
|
76 |
+
str: Formatted search results with similarity scores or "No such record".
|
77 |
"""
|
78 |
if not query or not query.strip():
|
79 |
return "Error: Query cannot be empty."
|
|
|
81 |
if not isinstance(k, int) or k < 1:
|
82 |
return "Error: k must be a positive integer."
|
83 |
|
84 |
+
if not isinstance(threshold, (int, float)) or threshold < 0 or threshold > 1:
|
85 |
+
return "Error: Threshold must be a number between 0 and 1."
|
86 |
+
|
87 |
# Retrieve and rerank
|
88 |
retriever.search_kwargs["k"] = max(k * 2, 10) # Retrieve 2k or at least 10
|
89 |
compressor.top_n = k # Rerank to top k
|
90 |
docs = compression_retriever.get_relevant_documents(query)
|
91 |
|
92 |
if not docs:
|
93 |
+
return "No such record."
|
94 |
|
95 |
+
# Filter results by threshold
|
96 |
results = []
|
97 |
+
for i, doc in enumerate(docs[:k]): # Ensure at most k results
|
98 |
text = doc.metadata.get("text", "No text available")
|
99 |
+
score = doc.metadata.get("score", 0.0) # Reranker score
|
100 |
+
if score >= threshold:
|
101 |
+
results.append(f"Result {i+1}:\nText: {text}\nScore: {score:.4f}\n")
|
102 |
+
|
103 |
+
if not results:
|
104 |
+
return "No such record."
|
105 |
|
106 |
+
return "\n".join(results)
|
107 |
|
108 |
# Gradio interface
|
109 |
with gr.Blocks() as demo:
|
|
|
120 |
gr.Markdown("## Search Similar Texts")
|
121 |
query_input = gr.Textbox(label="Enter search query")
|
122 |
k_input = gr.Number(label="Number of results (k)", value=5, precision=0)
|
123 |
+
threshold_input = gr.Number(label="Similarity threshold (0 to 1)", value=0.5, minimum=0, maximum=1)
|
124 |
search_button = gr.Button("Search")
|
125 |
search_output = gr.Textbox(label="Search Results")
|
126 |
|
|
|
132 |
)
|
133 |
search_button.click(
|
134 |
fn=search_similar_texts,
|
135 |
+
inputs=[query_input, k_input, threshold_input],
|
136 |
outputs=search_output
|
137 |
)
|
138 |
|