semantic-search-with-retrieve-and-rerank

Sleeping

nickmuchi commited on Jan 6, 2023

Commit

3c8f0ad

1 Parent(s): dbd6797

Update app.py

Added e5 embedding model

Files changed (1) hide show

app.py CHANGED Viewed

@@ -137,8 +137,15 @@ def bi_encode(bi_enc,passages):
     #Compute the embeddings using the multi-process pool
     with st.spinner('Encoding passages into a vector space...'):
-        corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
     st.success(f"Embeddings computed. Shape: {corpus_embeddings.shape}")
@@ -178,7 +185,7 @@ def bm25_api(passages):
     return bm25
-bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1","neeva/query2query"]
 def display_df_as_table(model,top_k,score='score'):
     # Display the df with text and scores as a table
@@ -204,7 +211,7 @@ top_k = st.sidebar.slider("Number of Top Hits Generated",min_value=1,max_value=5
 # This function will search all wikipedia articles for passages that
 # answer the query
-def search_func(query, top_k=top_k):
     global bi_encoder, cross_encoder
@@ -229,6 +236,8 @@ def search_func(query, top_k=top_k):
     bm25_df = display_df_as_table(bm25_hits,top_k)
     st.write(bm25_df.to_html(index=False), unsafe_allow_html=True)
     ##### Sematic Search #####
     # Encode the query using the bi-encoder and find potentially relevant passages
     question_embedding = bi_encoder.encode(query, convert_to_tensor=True)

     #Compute the embeddings using the multi-process pool
     with st.spinner('Encoding passages into a vector space...'):
+        if bi_enc == 'intfloat/e5-base':
+            corpus_embeddings = bi_encoder.encode(['passage: ' + sentence for sentence in passages], convert_to_tensor=True)
+        else:
+            corpus_embeddings = bi_encoder.encode([passages, convert_to_tensor=True)
     st.success(f"Embeddings computed. Shape: {corpus_embeddings.shape}")
     return bm25
+bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1",'intfloat/e5-base',"neeva/query2query"]
 def display_df_as_table(model,top_k,score='score'):
     # Display the df with text and scores as a table
 # This function will search all wikipedia articles for passages that
 # answer the query
+def search_func(query, top_k=top_k, bi_encoder_type):
     global bi_encoder, cross_encoder
     bm25_df = display_df_as_table(bm25_hits,top_k)
     st.write(bm25_df.to_html(index=False), unsafe_allow_html=True)
+    if bi_encoder_type == 'intfloat/e5-base':
+        query = 'query: ' + query
     ##### Sematic Search #####
     # Encode the query using the bi-encoder and find potentially relevant passages
     question_embedding = bi_encoder.encode(query, convert_to_tensor=True)