Quantization
Browse filesAdded quantization to speed up the embedding process
app.py
CHANGED
@@ -17,6 +17,8 @@ import nltk
|
|
17 |
import warnings
|
18 |
import streamlit as st
|
19 |
from PIL import Image
|
|
|
|
|
20 |
|
21 |
nltk.download('punkt')
|
22 |
|
@@ -121,7 +123,10 @@ def bi_encode(bi_enc,passages):
|
|
121 |
|
122 |
global bi_encoder
|
123 |
#We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
|
124 |
-
|
|
|
|
|
|
|
125 |
|
126 |
#Compute the embeddings using the multi-process pool
|
127 |
with st.spinner('Encoding passages into a vector space...'):
|
|
|
17 |
import warnings
|
18 |
import streamlit as st
|
19 |
from PIL import Image
|
20 |
+
from torch.nn import Embedding, Linear
|
21 |
+
from torch.quantization import quantize_dynamic
|
22 |
|
23 |
nltk.download('punkt')
|
24 |
|
|
|
123 |
|
124 |
global bi_encoder
|
125 |
#We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
|
126 |
+
model = SentenceTransformer(bi_enc)
|
127 |
+
|
128 |
+
#quantize the model
|
129 |
+
bi_encoder = quantize_dynamic(model, {Linear, Embedding})
|
130 |
|
131 |
#Compute the embeddings using the multi-process pool
|
132 |
with st.spinner('Encoding passages into a vector space...'):
|