singletongue commited on
Commit
888df00
·
verified ·
1 Parent(s): 6234321

Set HF token when initializing BM25 retriever and tokenizer

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import re
2
  import unicodedata
3
  from pathlib import Path
@@ -83,8 +84,14 @@ def normalize_text(text: str) -> str:
83
 
84
 
85
  bm25_tokenizer = TokenizerHF(lower=True, splitter=tokenizer.tokenize, stopwords=None, stemmer=None)
86
- bm25_tokenizer.load_vocab_from_hub("studio-ousia/luxe-nayose-bm25")
87
- bm25_retriever = BM25HF.load_from_hub("studio-ousia/luxe-nayose-bm25")
 
 
 
 
 
 
88
 
89
 
90
  def get_texts_from_file(file_path):
 
1
+ import os
2
  import re
3
  import unicodedata
4
  from pathlib import Path
 
84
 
85
 
86
  bm25_tokenizer = TokenizerHF(lower=True, splitter=tokenizer.tokenize, stopwords=None, stemmer=None)
87
+ bm25_tokenizer.load_vocab_from_hub(
88
+ "studio-ousia/luxe-nayose-bm25",
89
+ token=os.getenv("HF_TOKEN"), # needed for demo at HF Spaces
90
+ )
91
+ bm25_retriever = BM25HF.load_from_hub(
92
+ "studio-ousia/luxe-nayose-bm25",
93
+ token=os.getenv("HF_TOKEN"), # needed for demo at HF Spaces
94
+ )
95
 
96
 
97
  def get_texts_from_file(file_path):