mongo-vector-search-util / ui /search_tab.py
airabbitX's picture
Upload 3 files
50e3a95 verified
import gradio as gr
from typing import Tuple, List
from openai import OpenAI
from utils.db_utils import DatabaseUtils
from utils.embedding_utils import get_embedding
def create_search_tab(openai_client: OpenAI, db_utils: DatabaseUtils, databases: List[str]) -> Tuple[gr.Tab, dict]:
"""Create the vector search tab UI
Args:
openai_client: OpenAI client instance
db_utils: DatabaseUtils instance
databases: List of available databases
Returns:
Tuple[gr.Tab, dict]: The tab component and its interface elements
"""
def update_collections(db_name: str) -> gr.Dropdown:
"""Update collections dropdown when database changes"""
collections = db_utils.get_collections(db_name)
# If there's only one collection, select it by default
value = collections[0] if len(collections) == 1 else None
return gr.Dropdown(choices=collections, value=value)
def vector_search(
query_text: str,
db_name: str,
collection_name: str,
embedding_field: str,
index_name: str
) -> str:
"""Perform vector search using embeddings"""
try:
print(f"\nProcessing query: {query_text}")
db = db_utils.client[db_name]
collection = db[collection_name]
# Get embeddings for query
embedding = get_embedding(query_text, openai_client)
print("Generated embeddings successfully")
results = collection.aggregate([
{
'$vectorSearch': {
"index": index_name,
"path": embedding_field,
"queryVector": embedding,
"numCandidates": 50,
"limit": 5
}
},
{
"$project": {
"search_score": { "$meta": "vectorSearchScore" },
"document": "$$ROOT"
}
}
])
# Format results
results_list = list(results)
formatted_results = []
for idx, result in enumerate(results_list, 1):
doc = result['document']
formatted_result = f"{idx}. Score: {result['search_score']:.4f}\n"
# Add all fields except _id and embeddings
for key, value in doc.items():
if key not in ['_id', embedding_field]:
formatted_result += f"{key}: {value}\n"
formatted_results.append(formatted_result)
return "\n".join(formatted_results) if formatted_results else "No results found"
except Exception as e:
return f"Error: {str(e)}"
# Create the tab UI
with gr.Tab("Search") as tab:
with gr.Row():
db_input = gr.Dropdown(
choices=databases,
label="Select Database",
info="Database containing the vectors"
)
collection_input = gr.Dropdown(
choices=[],
label="Select Collection",
info="Collection containing the vectors"
)
with gr.Row():
embedding_field_input = gr.Textbox(
label="Embedding Field Name",
value="embedding",
info="Field containing the vectors"
)
index_input = gr.Textbox(
label="Vector Search Index Name",
value="vector_index",
info="Index created in Atlas UI"
)
query_input = gr.Textbox(
label="Search Query",
lines=2,
placeholder="What would you like to search for?"
)
search_btn = gr.Button("Search")
search_output = gr.Textbox(label="Results", lines=10)
# Set up event handlers
db_input.change(
fn=update_collections,
inputs=[db_input],
outputs=[collection_input]
)
search_btn.click(
fn=vector_search,
inputs=[
query_input,
db_input,
collection_input,
embedding_field_input,
index_input
],
outputs=search_output
)
# Return the tab and its interface elements
interface = {
'db_input': db_input,
'collection_input': collection_input,
'embedding_field_input': embedding_field_input,
'index_input': index_input,
'query_input': query_input,
'search_btn': search_btn,
'search_output': search_output
}
return tab, interface