nickmuchi commited on
Commit
ffc0638
·
1 Parent(s): 0c4f912

Quantization

Browse files

Added quantization to speed up the embedding process

Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -17,6 +17,8 @@ import nltk
17
  import warnings
18
  import streamlit as st
19
  from PIL import Image
 
 
20
 
21
  nltk.download('punkt')
22
 
@@ -121,7 +123,10 @@ def bi_encode(bi_enc,passages):
121
 
122
  global bi_encoder
123
  #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
124
- bi_encoder = SentenceTransformer(bi_enc)
 
 
 
125
 
126
  #Compute the embeddings using the multi-process pool
127
  with st.spinner('Encoding passages into a vector space...'):
 
17
  import warnings
18
  import streamlit as st
19
  from PIL import Image
20
+ from torch.nn import Embedding, Linear
21
+ from torch.quantization import quantize_dynamic
22
 
23
  nltk.download('punkt')
24
 
 
123
 
124
  global bi_encoder
125
  #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
126
+ model = SentenceTransformer(bi_enc)
127
+
128
+ #quantize the model
129
+ bi_encoder = quantize_dynamic(model, {Linear, Embedding})
130
 
131
  #Compute the embeddings using the multi-process pool
132
  with st.spinner('Encoding passages into a vector space...'):