nickmuchi commited on
Commit
7580e3c
·
1 Parent(s): 35f456f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -9
app.py CHANGED
@@ -6,8 +6,7 @@ from rank_bm25 import BM25Okapi
6
  from sklearn.feature_extraction import _stop_words
7
  import string
8
  import numpy as np
9
- from bs4 import BeautifulSoup
10
- import time
11
  from newspaper import Article
12
  import base64
13
  import docx2txt
@@ -168,9 +167,16 @@ def bm25_api(passages):
168
 
169
  bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1"]
170
 
 
 
 
 
 
 
 
171
  # This function will search all wikipedia articles for passages that
172
  # answer the query
173
- def search_func(query, top_k=2):
174
  st.write(f"Search Query: {query}")
175
 
176
  if url_text:
@@ -188,8 +194,9 @@ def search_func(query, top_k=2):
188
  bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
189
 
190
  st.subheader(f"Top-{top_k} lexical search (BM25) hits")
191
- for hit in bm25_hits[0:top_k]:
192
- st.write("\t{:.3f}\t{}".format(hit['score'], passages[hit['corpus_id']].replace("\n", " ")))
 
193
 
194
  ##### Sematic Search #####
195
  # Encode the query using the bi-encoder and find potentially relevant passages
@@ -211,15 +218,17 @@ def search_func(query, top_k=2):
211
  st.markdown("\n-------------------------\n")
212
  st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
213
  hits = sorted(hits, key=lambda x: x['score'], reverse=True)
214
- for hit in hits[0:top_k]:
215
- st.write("\t{:.3f}\t{}".format(hit['score'], passages[hit['corpus_id']].replace("\n", " ")))
 
216
 
217
  # Output of top-3 hits from re-ranker
218
  st.markdown("\n-------------------------\n")
219
  st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
220
  hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
221
- for hit in hits[0:top_k]:
222
- st.write("\t{:.3f}\t{}".format(hit['cross-score'], passages[hit['corpus_id']].replace("\n", " ")))
 
223
 
224
  #Streamlit App
225
 
 
6
  from sklearn.feature_extraction import _stop_words
7
  import string
8
  import numpy as np
9
+ import pandas as pd
 
10
  from newspaper import Article
11
  import base64
12
  import docx2txt
 
167
 
168
  bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-MiniLM-L6-cos-v1"]
169
 
170
+ def display_df_as_table(model,top_k,score):
171
+ # Display the df with text and scores as a table
172
+ df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in model[0:top_k]],columns=['Score','Text'])
173
+ df['Score'] = round(df['Score'],2)
174
+
175
+ return df
176
+
177
  # This function will search all wikipedia articles for passages that
178
  # answer the query
179
+ def search_func(query, top_k=top_k):
180
  st.write(f"Search Query: {query}")
181
 
182
  if url_text:
 
194
  bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
195
 
196
  st.subheader(f"Top-{top_k} lexical search (BM25) hits")
197
+
198
+ bm25_df = display_df_as_table(bm25_hits,top_k,'score')
199
+ st.write(bm25_df.to_html(index=False), unsafe_allow_html=True)
200
 
201
  ##### Sematic Search #####
202
  # Encode the query using the bi-encoder and find potentially relevant passages
 
218
  st.markdown("\n-------------------------\n")
219
  st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
220
  hits = sorted(hits, key=lambda x: x['score'], reverse=True)
221
+
222
+ cross_df = display_df_as_table(hits,top_k,'score')
223
+ st.write(cross_df.to_html(index=False), unsafe_allow_html=True)
224
 
225
  # Output of top-3 hits from re-ranker
226
  st.markdown("\n-------------------------\n")
227
  st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
228
  hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
229
+
230
+ rerank_df = display_df_as_table(hits,top_k,'cross-score')
231
+ st.write(rerank_df.to_html(index=False), unsafe_allow_html=True)
232
 
233
  #Streamlit App
234