Spaces:
Runtime error
Runtime error
Thiago Vieira
commited on
Commit
·
0a3b978
1
Parent(s):
7016a99
download models and dataset
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
|
|
|
| 3 |
import pickle
|
| 4 |
import time
|
| 5 |
import streamlit as st
|
|
@@ -8,8 +9,6 @@ from bm25Simple import BM25Simple
|
|
| 8 |
|
| 9 |
path = os.path.dirname(__file__)
|
| 10 |
print(path)
|
| 11 |
-
print(subprocess.run(['pwd'], shell=True))
|
| 12 |
-
print()
|
| 13 |
print(subprocess.run(['ls -la'], shell=True))
|
| 14 |
print()
|
| 15 |
print(subprocess.run(['ls -la models/'], shell=True))
|
|
@@ -142,7 +141,9 @@ def load_docs():
|
|
| 142 |
doc_set = {}
|
| 143 |
doc_id = ""
|
| 144 |
doc_text = ""
|
| 145 |
-
|
|
|
|
|
|
|
| 146 |
lines = ""
|
| 147 |
for l in f.readlines():
|
| 148 |
lines += "\n" + l.strip() if l.startswith(".") else " " + l.strip()
|
|
@@ -162,18 +163,26 @@ def load_docs():
|
|
| 162 |
|
| 163 |
@st.cache(ttl=3600, allow_output_mutation=True, show_spinner=True, max_entries=2)
|
| 164 |
def load_models():
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
| 166 |
bm25_simple: BM25Simple = pickle.load(file)
|
| 167 |
print(bm25_simple.corpus_size)
|
| 168 |
|
| 169 |
-
|
|
|
|
|
|
|
| 170 |
bm25_okapi: BM25Okapi = pickle.load(file)
|
| 171 |
print(bm25_okapi.corpus_size)
|
| 172 |
|
| 173 |
-
|
|
|
|
|
|
|
| 174 |
bm25_plus: BM25Plus = pickle.load(file)
|
| 175 |
print(bm25_plus.corpus_size)
|
| 176 |
|
|
|
|
| 177 |
st.success("BM25 models loaded!", icon='✅')
|
| 178 |
return bm25_simple, bm25_okapi, bm25_plus
|
| 179 |
|
|
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
| 3 |
+
import urllib
|
| 4 |
import pickle
|
| 5 |
import time
|
| 6 |
import streamlit as st
|
|
|
|
| 9 |
|
| 10 |
path = os.path.dirname(__file__)
|
| 11 |
print(path)
|
|
|
|
|
|
|
| 12 |
print(subprocess.run(['ls -la'], shell=True))
|
| 13 |
print()
|
| 14 |
print(subprocess.run(['ls -la models/'], shell=True))
|
|
|
|
| 141 |
doc_set = {}
|
| 142 |
doc_id = ""
|
| 143 |
doc_text = ""
|
| 144 |
+
documents_file, _ = urllib.request.urlretrieve(
|
| 145 |
+
'https://raw.githubusercontent.com/tcvieira/bm25-exercise-report/main/content/CISI.ALL', 'CISI.ALL.downloaded')
|
| 146 |
+
with open(documents_file) as f:
|
| 147 |
lines = ""
|
| 148 |
for l in f.readlines():
|
| 149 |
lines += "\n" + l.strip() if l.startswith(".") else " " + l.strip()
|
|
|
|
| 163 |
|
| 164 |
@st.cache(ttl=3600, allow_output_mutation=True, show_spinner=True, max_entries=2)
|
| 165 |
def load_models():
|
| 166 |
+
|
| 167 |
+
bm25_simple_file, _ = urllib.request.urlretrieve(
|
| 168 |
+
'https://github.com/tcvieira/bm25-exercise-report/blob/main/models/BM25_simple.pkl?raw=true', 'bm25_simple_file.downloaded')
|
| 169 |
+
with open(bm25_simple_file, 'rb') as file:
|
| 170 |
bm25_simple: BM25Simple = pickle.load(file)
|
| 171 |
print(bm25_simple.corpus_size)
|
| 172 |
|
| 173 |
+
bm25_okapi_file, _ = urllib.request.urlretrieve(
|
| 174 |
+
'https://github.com/tcvieira/bm25-exercise-report/blob/main/models/BM25Okapi.pkl?raw=true', 'bm25_okapi_file.downloaded')
|
| 175 |
+
with open(bm25_okapi_file, 'rb') as file:
|
| 176 |
bm25_okapi: BM25Okapi = pickle.load(file)
|
| 177 |
print(bm25_okapi.corpus_size)
|
| 178 |
|
| 179 |
+
bm25_plus_file, _ = urllib.request.urlretrieve(
|
| 180 |
+
'https://github.com/tcvieira/bm25-exercise-report/blob/main/models/BM25Plus.pkl?raw=true', 'bm25_plus_file.downloaded')
|
| 181 |
+
with open(bm25_plus_file, 'rb') as file:
|
| 182 |
bm25_plus: BM25Plus = pickle.load(file)
|
| 183 |
print(bm25_plus.corpus_size)
|
| 184 |
|
| 185 |
+
print(subprocess.run(['ls -la'], shell=True))
|
| 186 |
st.success("BM25 models loaded!", icon='✅')
|
| 187 |
return bm25_simple, bm25_okapi, bm25_plus
|
| 188 |
|