Spaces:

airabbitX
/

mongo-vector-search-util

Running

File size: 4,916 Bytes

50e3a95

import gradio as gr
from typing import Tuple, List
from openai import OpenAI
from utils.db_utils import DatabaseUtils
from utils.embedding_utils import get_embedding

def create_search_tab(openai_client: OpenAI, db_utils: DatabaseUtils, databases: List[str]) -> Tuple[gr.Tab, dict]:
    """Create the vector search tab UI
    
    Args:
        openai_client: OpenAI client instance
        db_utils: DatabaseUtils instance
        databases: List of available databases
        
    Returns:
        Tuple[gr.Tab, dict]: The tab component and its interface elements
    """
    def update_collections(db_name: str) -> gr.Dropdown:
        """Update collections dropdown when database changes"""
        collections = db_utils.get_collections(db_name)
        # If there's only one collection, select it by default
        value = collections[0] if len(collections) == 1 else None
        return gr.Dropdown(choices=collections, value=value)
    
    def vector_search(
        query_text: str, 
        db_name: str, 
        collection_name: str, 
        embedding_field: str, 
        index_name: str
    ) -> str:
        """Perform vector search using embeddings"""
        try:
            print(f"\nProcessing query: {query_text}")
            
            db = db_utils.client[db_name]
            collection = db[collection_name]
            
            # Get embeddings for query
            embedding = get_embedding(query_text, openai_client)
            print("Generated embeddings successfully")
            
            results = collection.aggregate([
                {
                    '$vectorSearch': {
                        "index": index_name,
                        "path": embedding_field,
                        "queryVector": embedding,
                        "numCandidates": 50,
                        "limit": 5
                    }
                },
                {
                    "$project": {
                        "search_score": { "$meta": "vectorSearchScore" },
                        "document": "$$ROOT"
                    }
                }
            ])
            
            # Format results
            results_list = list(results)
            formatted_results = []
            
            for idx, result in enumerate(results_list, 1):
                doc = result['document']
                formatted_result = f"{idx}. Score: {result['search_score']:.4f}\n"
                # Add all fields except _id and embeddings
                for key, value in doc.items():
                    if key not in ['_id', embedding_field]:
                        formatted_result += f"{key}: {value}\n"
                formatted_results.append(formatted_result)
                
            return "\n".join(formatted_results) if formatted_results else "No results found"
            
        except Exception as e:
            return f"Error: {str(e)}"
    
    # Create the tab UI
    with gr.Tab("Search") as tab:
        with gr.Row():
            db_input = gr.Dropdown(
                choices=databases,
                label="Select Database",
                info="Database containing the vectors"
            )
            collection_input = gr.Dropdown(
                choices=[],
                label="Select Collection",
                info="Collection containing the vectors"
            )
        with gr.Row():
            embedding_field_input = gr.Textbox(
                label="Embedding Field Name",
                value="embedding",
                info="Field containing the vectors"
            )
            index_input = gr.Textbox(
                label="Vector Search Index Name",
                value="vector_index",
                info="Index created in Atlas UI"
            )
            
        query_input = gr.Textbox(
            label="Search Query",
            lines=2,
            placeholder="What would you like to search for?"
        )
        search_btn = gr.Button("Search")
        search_output = gr.Textbox(label="Results", lines=10)
        
        # Set up event handlers
        db_input.change(
            fn=update_collections,
            inputs=[db_input],
            outputs=[collection_input]
        )
        
        search_btn.click(
            fn=vector_search,
            inputs=[
                query_input,
                db_input,
                collection_input,
                embedding_field_input,
                index_input
            ],
            outputs=search_output
        )
    
    # Return the tab and its interface elements
    interface = {
        'db_input': db_input,
        'collection_input': collection_input,
        'embedding_field_input': embedding_field_input,
        'index_input': index_input,
        'query_input': query_input,
        'search_btn': search_btn,
        'search_output': search_output
    }
    
    return tab, interface