semantic-search-with-retrieve-and-rerank

Sleeping

nickmuchi commited on May 5, 2022

Commit

9312f0f

1 Parent(s): b259233

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -114,8 +114,9 @@ def preprocess_plain_text(text,window_size=3):
     return passages
 @st.cache(allow_output_mutation=True)
-def bi_encoder(bi_enc,passages):
     #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
     bi_encoder = SentenceTransformer(bi_enc)
@@ -128,8 +129,9 @@ def bi_encoder(bi_enc,passages):
     return corpus_embeddings
 @st.cache(allow_output_mutation=True)
-def cross_encoder():
     #The bi-encoder will retrieve 100 documents. We use a cross-encoder, to re-rank the results list to improve the quality
     cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
     return cross_encoder
@@ -291,8 +293,8 @@ if search:
         with st.spinner(
             text=f"Loading {bi_encoder_type} bi-encoder and embedding document into vector space. This might take a few seconds depending on the length of your document..."
         ):
-            corpus_embeddings = bi_encoder(bi_encoder_type,passages)
-            cross_encoder = cross_encoder()
             bm25 = bm25_api(passages)
         with st.spinner(

     return passages
 @st.cache(allow_output_mutation=True)
+def bi_encode(bi_enc,passages):
+    global bi_encoder
     #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
     bi_encoder = SentenceTransformer(bi_enc)
     return corpus_embeddings
 @st.cache(allow_output_mutation=True)
+def cross_encode():
+    global cross_encoder
     #The bi-encoder will retrieve 100 documents. We use a cross-encoder, to re-rank the results list to improve the quality
     cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
     return cross_encoder
         with st.spinner(
             text=f"Loading {bi_encoder_type} bi-encoder and embedding document into vector space. This might take a few seconds depending on the length of your document..."
         ):
+            corpus_embeddings = bi_encode(bi_encoder_type,passages)
+            cross_enc = cross_encode()
             bm25 = bm25_api(passages)
         with st.spinner(