Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,7 @@ from rank_bm25 import BM25Okapi
|
|
6 |
from sklearn.feature_extraction import _stop_words
|
7 |
import string
|
8 |
import numpy as np
|
9 |
-
|
10 |
-
import time
|
11 |
from newspaper import Article
|
12 |
import base64
|
13 |
import docx2txt
|
@@ -168,9 +167,16 @@ def bm25_api(passages):
|
|
168 |
|
169 |
bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1"]
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
# This function will search all wikipedia articles for passages that
|
172 |
# answer the query
|
173 |
-
def search_func(query, top_k=
|
174 |
st.write(f"Search Query: {query}")
|
175 |
|
176 |
if url_text:
|
@@ -188,8 +194,9 @@ def search_func(query, top_k=2):
|
|
188 |
bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
|
189 |
|
190 |
st.subheader(f"Top-{top_k} lexical search (BM25) hits")
|
191 |
-
|
192 |
-
|
|
|
193 |
|
194 |
##### Sematic Search #####
|
195 |
# Encode the query using the bi-encoder and find potentially relevant passages
|
@@ -211,15 +218,17 @@ def search_func(query, top_k=2):
|
|
211 |
st.markdown("\n-------------------------\n")
|
212 |
st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
|
213 |
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
214 |
-
|
215 |
-
|
|
|
216 |
|
217 |
# Output of top-3 hits from re-ranker
|
218 |
st.markdown("\n-------------------------\n")
|
219 |
st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
|
220 |
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
221 |
-
|
222 |
-
|
|
|
223 |
|
224 |
#Streamlit App
|
225 |
|
|
|
6 |
from sklearn.feature_extraction import _stop_words
|
7 |
import string
|
8 |
import numpy as np
|
9 |
+
import pandas as pd
|
|
|
10 |
from newspaper import Article
|
11 |
import base64
|
12 |
import docx2txt
|
|
|
167 |
|
168 |
bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1"]
|
169 |
|
170 |
+
def display_df_as_table(model,top_k,score):
|
171 |
+
# Display the df with text and scores as a table
|
172 |
+
df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in model[0:top_k]],columns=['Score','Text'])
|
173 |
+
df['Score'] = round(df['Score'],2)
|
174 |
+
|
175 |
+
return df
|
176 |
+
|
177 |
# This function will search all wikipedia articles for passages that
|
178 |
# answer the query
|
179 |
+
def search_func(query, top_k=top_k):
|
180 |
st.write(f"Search Query: {query}")
|
181 |
|
182 |
if url_text:
|
|
|
194 |
bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
|
195 |
|
196 |
st.subheader(f"Top-{top_k} lexical search (BM25) hits")
|
197 |
+
|
198 |
+
bm25_df = display_df_as_table(bm25_hits,top_k,'score')
|
199 |
+
st.write(bm25_df.to_html(index=False), unsafe_allow_html=True)
|
200 |
|
201 |
##### Sematic Search #####
|
202 |
# Encode the query using the bi-encoder and find potentially relevant passages
|
|
|
218 |
st.markdown("\n-------------------------\n")
|
219 |
st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
|
220 |
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
221 |
+
|
222 |
+
cross_df = display_df_as_table(hits,top_k,'score')
|
223 |
+
st.write(cross_df.to_html(index=False), unsafe_allow_html=True)
|
224 |
|
225 |
# Output of top-3 hits from re-ranker
|
226 |
st.markdown("\n-------------------------\n")
|
227 |
st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
|
228 |
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
229 |
+
|
230 |
+
rerank_df = display_df_as_table(hits,top_k,'cross-score')
|
231 |
+
st.write(rerank_df.to_html(index=False), unsafe_allow_html=True)
|
232 |
|
233 |
#Streamlit App
|
234 |
|