File size: 2,444 Bytes
e2b9039
 
3ecbeff
e2b9039
 
b21411e
cb1057f
e2b9039
d792c52
 
 
b21411e
 
 
 
 
 
 
 
 
 
e2b9039
3ecbeff
 
 
 
 
cb1057f
3ecbeff
 
b21411e
3ecbeff
 
e2b9039
b21411e
 
 
e2b9039
b21411e
fff69e7
e2b9039
b21411e
 
3ecbeff
 
b0a56a3
b21411e
 
b0a56a3
b21411e
 
b0a56a3
b21411e
 
 
 
be3515a
b21411e
 
 
 
be3515a
b21411e
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import streamlit as st
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer

# Title of the Streamlit App
st.title("Medical Hybrid Search")

# Initialize Pinecone globally
index = None

# Function to initialize Pinecone
def initialize_pinecone():
    api_key = os.getenv('PINECONE_API_KEY')  # Get Pinecone API key from environment variable
    if api_key:
        # Initialize Pinecone client using the new class instance method
        pc = Pinecone(api_key=api_key)
        return pc
    else:
        st.error("Pinecone API key not found! Please set the PINECONE_API_KEY environment variable.")
        return None

# Function to connect to the 'pubmed-splade' index
def connect_to_index(pc):
    index_name = 'pubmed-splade'  # Hardcoded index name
    # Connect to the 'pubmed-splade' index
    if index_name in pc.list_indexes().names():
        #st.info(f"Successfully connected to index '{index_name}'")
        index = pc.Index(index_name)
        return index
    else:
        st.error(f"Index '{index_name}' not found!")
        return None

# Function to encode query using sentence transformers model
def encode_query(model, query_text):
    return model.encode(query_text).tolist()

# Initialize Pinecone
pc = initialize_pinecone()

# If Pinecone initialized successfully, proceed with index management
if pc:
    # Connect directly to 'pubmed-splade' index
    index = connect_to_index(pc)

    # Model for query encoding
    model = SentenceTransformer('msmarco-bert-base-dot-v5')

    # Query input
    query_text = st.text_input("Enter a Query to Search", "Can clinicians use the PHQ-9 to assess depression?")
    
    # Button to encode query and search the Pinecone index
    if st.button("Search Query"):
        if query_text and index:
            dense_vector = encode_query(model, query_text)
            #st.write(f"Encoded Query Vector: {dense_vector}")
            
            # Search the index (sparse values can be added here as well)
            results = index.query(
                vector=dense_vector,
                top_k=3,
                include_metadata=True
            )
            
            st.write("Search Results:")
            for match in results.matches:
                st.write(f"ID: {match.id}, Score: {match.score}, Metadata: {match.metadata}")
        else:
            st.error("Please enter a query and ensure the index is initialized.")