Spaces:

shrut123
/

PubMedSearch

Sleeping

App Files Files Community

shrut123 commited on Sep 21, 2024

Commit

dd92eea

verified ·

1 Parent(s): 9a6f924

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -2

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import streamlit as st
 from pinecone import Pinecone
 from sentence_transformers import SentenceTransformer
 import torch
 # Title of the Streamlit App
 st.title("Medical Hybrid Search")
@@ -55,6 +57,12 @@ if pc:
     # Model for query encoding
     model = SentenceTransformer('msmarco-bert-base-dot-v5')
     # Query input
     query_text = st.text_input("Enter a Query to Search", "Can clinicians use the PHQ-9 to assess depression?")
@@ -67,7 +75,7 @@ if pc:
         if query_text and index:
             # Encode query to get dense and sparse vectors
             dense_vector = encode_query(model, query_text)
-            input_ids = model.tokenizer(query_text, return_tensors='pt')
             with torch.no_grad():
                 sparse_vector = sparse_model(d_kwargs=input_ids.to(device))['d_rep'].squeeze()
@@ -90,7 +98,7 @@ if pc:
             st.write("### Search Results:")
             for match in results.matches:
                 st.markdown(f"#### Score: **{match.score:.4f}**")
-                st.write(f" #### Context: {match.metadata.get('context', 'No context available.')}")
                 st.write("---")
         else:
             st.error("Please enter a query and ensure the index is initialized.")

 from pinecone import Pinecone
 from sentence_transformers import SentenceTransformer
 import torch
+from splade.models.transformer_rep import Splade
+from transformers import AutoTokenizer
 # Title of the Streamlit App
 st.title("Medical Hybrid Search")
     # Model for query encoding
     model = SentenceTransformer('msmarco-bert-base-dot-v5')
+    # Initialize sparse model and tokenizer
+    sparse_model_id = 'naver/splade-cocondenser-ensembledistil'
+    sparse_model = Splade(sparse_model_id, agg='max')
+    sparse_model.eval()  # Set the model to evaluation mode
+    tokenizer = AutoTokenizer.from_pretrained(sparse_model_id)
     # Query input
     query_text = st.text_input("Enter a Query to Search", "Can clinicians use the PHQ-9 to assess depression?")
         if query_text and index:
             # Encode query to get dense and sparse vectors
             dense_vector = encode_query(model, query_text)
+            input_ids = tokenizer(query_text, return_tensors='pt')
             with torch.no_grad():
                 sparse_vector = sparse_model(d_kwargs=input_ids.to(device))['d_rep'].squeeze()
             st.write("### Search Results:")
             for match in results.matches:
                 st.markdown(f"#### Score: **{match.score:.4f}**")
+                st.write(f"####Context:{match.metadata.get('context', 'No context available.')}")
                 st.write("---")
         else:
             st.error("Please enter a query and ensure the index is initialized.")