Y-Mangoes commited on
Commit
cee4fc8
·
verified ·
1 Parent(s): ac90524

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -62,16 +62,18 @@ def add_text_to_db(text):
62
 
63
  return f"Text added successfully with ID: {doc_id}"
64
 
65
- def search_similar_texts(query, k):
66
  """
67
  Search for the top k similar texts in the vector database and rerank them.
 
68
 
69
  Args:
70
  query (str): The search query.
71
  k (int): Number of results to return.
 
72
 
73
  Returns:
74
- str: Formatted search results without similarity scores.
75
  """
76
  if not query or not query.strip():
77
  return "Error: Query cannot be empty."
@@ -79,21 +81,29 @@ def search_similar_texts(query, k):
79
  if not isinstance(k, int) or k < 1:
80
  return "Error: k must be a positive integer."
81
 
 
 
 
82
  # Retrieve and rerank
83
  retriever.search_kwargs["k"] = max(k * 2, 10) # Retrieve 2k or at least 10
84
  compressor.top_n = k # Rerank to top k
85
  docs = compression_retriever.get_relevant_documents(query)
86
 
87
  if not docs:
88
- return "No results found."
89
 
90
- # Format results without similarity scores
91
  results = []
92
- for i, doc in enumerate(docs[:k]): # Ensure we return at most k
93
  text = doc.metadata.get("text", "No text available")
94
- results.append(f"Result {i+1}:\nText: {text}\n")
 
 
 
 
 
95
 
96
- return "\n".join(results) or "No results found."
97
 
98
  # Gradio interface
99
  with gr.Blocks() as demo:
@@ -110,6 +120,7 @@ with gr.Blocks() as demo:
110
  gr.Markdown("## Search Similar Texts")
111
  query_input = gr.Textbox(label="Enter search query")
112
  k_input = gr.Number(label="Number of results (k)", value=5, precision=0)
 
113
  search_button = gr.Button("Search")
114
  search_output = gr.Textbox(label="Search Results")
115
 
@@ -121,7 +132,7 @@ with gr.Blocks() as demo:
121
  )
122
  search_button.click(
123
  fn=search_similar_texts,
124
- inputs=[query_input, k_input],
125
  outputs=search_output
126
  )
127
 
 
62
 
63
  return f"Text added successfully with ID: {doc_id}"
64
 
65
+ def search_similar_texts(query, k, threshold):
66
  """
67
  Search for the top k similar texts in the vector database and rerank them.
68
+ Only return results with similarity scores above the threshold.
69
 
70
  Args:
71
  query (str): The search query.
72
  k (int): Number of results to return.
73
+ threshold (float): Minimum similarity score (0 to 1).
74
 
75
  Returns:
76
+ str: Formatted search results with similarity scores or "No such record".
77
  """
78
  if not query or not query.strip():
79
  return "Error: Query cannot be empty."
 
81
  if not isinstance(k, int) or k < 1:
82
  return "Error: k must be a positive integer."
83
 
84
+ if not isinstance(threshold, (int, float)) or threshold < 0 or threshold > 1:
85
+ return "Error: Threshold must be a number between 0 and 1."
86
+
87
  # Retrieve and rerank
88
  retriever.search_kwargs["k"] = max(k * 2, 10) # Retrieve 2k or at least 10
89
  compressor.top_n = k # Rerank to top k
90
  docs = compression_retriever.get_relevant_documents(query)
91
 
92
  if not docs:
93
+ return "No such record."
94
 
95
+ # Filter results by threshold
96
  results = []
97
+ for i, doc in enumerate(docs[:k]): # Ensure at most k results
98
  text = doc.metadata.get("text", "No text available")
99
+ score = doc.metadata.get("score", 0.0) # Reranker score
100
+ if score >= threshold:
101
+ results.append(f"Result {i+1}:\nText: {text}\nScore: {score:.4f}\n")
102
+
103
+ if not results:
104
+ return "No such record."
105
 
106
+ return "\n".join(results)
107
 
108
  # Gradio interface
109
  with gr.Blocks() as demo:
 
120
  gr.Markdown("## Search Similar Texts")
121
  query_input = gr.Textbox(label="Enter search query")
122
  k_input = gr.Number(label="Number of results (k)", value=5, precision=0)
123
+ threshold_input = gr.Number(label="Similarity threshold (0 to 1)", value=0.5, minimum=0, maximum=1)
124
  search_button = gr.Button("Search")
125
  search_output = gr.Textbox(label="Search Results")
126
 
 
132
  )
133
  search_button.click(
134
  fn=search_similar_texts,
135
+ inputs=[query_input, k_input, threshold_input],
136
  outputs=search_output
137
  )
138