Thiago Vieira commited on
Commit
0a3b978
·
1 Parent(s): 7016a99

download models and dataset

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import subprocess
 
3
  import pickle
4
  import time
5
  import streamlit as st
@@ -8,8 +9,6 @@ from bm25Simple import BM25Simple
8
 
9
  path = os.path.dirname(__file__)
10
  print(path)
11
- print(subprocess.run(['pwd'], shell=True))
12
- print()
13
  print(subprocess.run(['ls -la'], shell=True))
14
  print()
15
  print(subprocess.run(['ls -la models/'], shell=True))
@@ -142,7 +141,9 @@ def load_docs():
142
  doc_set = {}
143
  doc_id = ""
144
  doc_text = ""
145
- with open(path + '/content/CISI.ALL') as f:
 
 
146
  lines = ""
147
  for l in f.readlines():
148
  lines += "\n" + l.strip() if l.startswith(".") else " " + l.strip()
@@ -162,18 +163,26 @@ def load_docs():
162
 
163
  @st.cache(ttl=3600, allow_output_mutation=True, show_spinner=True, max_entries=2)
164
  def load_models():
165
- with open(path + '/models/BM25_simple.pkl', 'rb') as file:
 
 
 
166
  bm25_simple: BM25Simple = pickle.load(file)
167
  print(bm25_simple.corpus_size)
168
 
169
- with open(path + '/models/BM25OKapi.pkl', 'rb') as file:
 
 
170
  bm25_okapi: BM25Okapi = pickle.load(file)
171
  print(bm25_okapi.corpus_size)
172
 
173
- with open(path + '/models/BM25Plus.pkl', 'rb') as file:
 
 
174
  bm25_plus: BM25Plus = pickle.load(file)
175
  print(bm25_plus.corpus_size)
176
 
 
177
  st.success("BM25 models loaded!", icon='✅')
178
  return bm25_simple, bm25_okapi, bm25_plus
179
 
 
1
  import os
2
  import subprocess
3
+ import urllib
4
  import pickle
5
  import time
6
  import streamlit as st
 
9
 
10
  path = os.path.dirname(__file__)
11
  print(path)
 
 
12
  print(subprocess.run(['ls -la'], shell=True))
13
  print()
14
  print(subprocess.run(['ls -la models/'], shell=True))
 
141
  doc_set = {}
142
  doc_id = ""
143
  doc_text = ""
144
+ documents_file, _ = urllib.request.urlretrieve(
145
+ 'https://raw.githubusercontent.com/tcvieira/bm25-exercise-report/main/content/CISI.ALL', 'CISI.ALL.downloaded')
146
+ with open(documents_file) as f:
147
  lines = ""
148
  for l in f.readlines():
149
  lines += "\n" + l.strip() if l.startswith(".") else " " + l.strip()
 
163
 
164
  @st.cache(ttl=3600, allow_output_mutation=True, show_spinner=True, max_entries=2)
165
  def load_models():
166
+
167
+ bm25_simple_file, _ = urllib.request.urlretrieve(
168
+ 'https://github.com/tcvieira/bm25-exercise-report/blob/main/models/BM25_simple.pkl?raw=true', 'bm25_simple_file.downloaded')
169
+ with open(bm25_simple_file, 'rb') as file:
170
  bm25_simple: BM25Simple = pickle.load(file)
171
  print(bm25_simple.corpus_size)
172
 
173
+ bm25_okapi_file, _ = urllib.request.urlretrieve(
174
+ 'https://github.com/tcvieira/bm25-exercise-report/blob/main/models/BM25Okapi.pkl?raw=true', 'bm25_okapi_file.downloaded')
175
+ with open(bm25_okapi_file, 'rb') as file:
176
  bm25_okapi: BM25Okapi = pickle.load(file)
177
  print(bm25_okapi.corpus_size)
178
 
179
+ bm25_plus_file, _ = urllib.request.urlretrieve(
180
+ 'https://github.com/tcvieira/bm25-exercise-report/blob/main/models/BM25Plus.pkl?raw=true', 'bm25_plus_file.downloaded')
181
+ with open(bm25_plus_file, 'rb') as file:
182
  bm25_plus: BM25Plus = pickle.load(file)
183
  print(bm25_plus.corpus_size)
184
 
185
+ print(subprocess.run(['ls -la'], shell=True))
186
  st.success("BM25 models loaded!", icon='✅')
187
  return bm25_simple, bm25_okapi, bm25_plus
188