Spaces:
Running
Running
Set HF token when initializing BM25 retriever and tokenizer
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import re
|
2 |
import unicodedata
|
3 |
from pathlib import Path
|
@@ -83,8 +84,14 @@ def normalize_text(text: str) -> str:
|
|
83 |
|
84 |
|
85 |
bm25_tokenizer = TokenizerHF(lower=True, splitter=tokenizer.tokenize, stopwords=None, stemmer=None)
|
86 |
-
bm25_tokenizer.load_vocab_from_hub(
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
|
90 |
def get_texts_from_file(file_path):
|
|
|
1 |
+
import os
|
2 |
import re
|
3 |
import unicodedata
|
4 |
from pathlib import Path
|
|
|
84 |
|
85 |
|
86 |
bm25_tokenizer = TokenizerHF(lower=True, splitter=tokenizer.tokenize, stopwords=None, stemmer=None)
|
87 |
+
bm25_tokenizer.load_vocab_from_hub(
|
88 |
+
"studio-ousia/luxe-nayose-bm25",
|
89 |
+
token=os.getenv("HF_TOKEN"), # needed for demo at HF Spaces
|
90 |
+
)
|
91 |
+
bm25_retriever = BM25HF.load_from_hub(
|
92 |
+
"studio-ousia/luxe-nayose-bm25",
|
93 |
+
token=os.getenv("HF_TOKEN"), # needed for demo at HF Spaces
|
94 |
+
)
|
95 |
|
96 |
|
97 |
def get_texts_from_file(file_path):
|