nickmuchi commited on
Commit
9002add
·
1 Parent(s): 0b292ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -107,9 +107,9 @@ def preprocess_plain_text(text,window_size=3):
107
  end_idx = min(start_idx+window_size, len(paragraph))
108
  passages.append(" ".join(paragraph[start_idx:end_idx]))
109
 
110
- print("Paragraphs: ", len(paragraphs))
111
- print("Sentences: ", sum([len(p) for p in paragraphs]))
112
- print("Passages: ", len(passages))
113
 
114
  return passages
115
 
@@ -121,10 +121,10 @@ def bi_encode(bi_enc,passages):
121
  bi_encoder = SentenceTransformer(bi_enc)
122
 
123
  #Compute the embeddings using the multi-process pool
124
- print('encoding passages into a vector space...')
125
  corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
126
 
127
- print("Embeddings computed. Shape:", corpus_embeddings.shape)
128
 
129
  return corpus_embeddings
130
 
@@ -169,7 +169,9 @@ bi_enc_options = ["multi-qa-mpnet-base-dot-v1","all-mpnet-base-v2","multi-qa-Min
169
  def search_func(query, top_k=2):
170
  st.write(f"Search Query: {query}")
171
 
172
- st.write("Document Header: ")
 
 
173
 
174
  ##### BM25 search (lexical search) #####
175
  bm25_scores = bm25.get_scores(bm25_tokenizer(query))
 
107
  end_idx = min(start_idx+window_size, len(paragraph))
108
  passages.append(" ".join(paragraph[start_idx:end_idx]))
109
 
110
+ st.write(f"Paragraphs: {len(paragraphs)}")
111
+ st.write(f"Sentences: {sum([len(p) for p in paragraphs])}")
112
+ st.write(f"Passages: {len(passages)}")
113
 
114
  return passages
115
 
 
121
  bi_encoder = SentenceTransformer(bi_enc)
122
 
123
  #Compute the embeddings using the multi-process pool
124
+ st.write('encoding passages into a vector space...')
125
  corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
126
 
127
+ st.write(f"Embeddings computed. Shape: {corpus_embeddings.shape}")
128
 
129
  return corpus_embeddings
130
 
 
169
  def search_func(query, top_k=2):
170
  st.write(f"Search Query: {query}")
171
 
172
+ if url_text:
173
+
174
+ st.write(f"Document Header: {title}")
175
 
176
  ##### BM25 search (lexical search) #####
177
  bm25_scores = bm25.get_scores(bm25_tokenizer(query))