semantic-search-with-retrieve-and-rerank

Sleeping

nickmuchi commited on May 27, 2022

Commit

ffc0638

1 Parent(s): 0c4f912

Quantization

Added quantization to speed up the embedding process

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,6 +17,8 @@ import nltk
 import warnings
 import streamlit as st
 from PIL import Image
 nltk.download('punkt')
@@ -121,7 +123,10 @@ def bi_encode(bi_enc,passages):
     global bi_encoder
     #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
-    bi_encoder = SentenceTransformer(bi_enc)
     #Compute the embeddings using the multi-process pool
     with st.spinner('Encoding passages into a vector space...'):

 import warnings
 import streamlit as st
 from PIL import Image
+from torch.nn import Embedding, Linear
+from torch.quantization import quantize_dynamic
 nltk.download('punkt')
     global bi_encoder
     #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
+    model = SentenceTransformer(bi_enc)
+    #quantize the model
+    bi_encoder = quantize_dynamic(model, {Linear, Embedding})
     #Compute the embeddings using the multi-process pool
     with st.spinner('Encoding passages into a vector space...'):