Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -49,14 +49,6 @@ def create_tokenizer(training_corpus):
|
|
49 |
|
50 |
def load_tokenizer(training_corpus):
|
51 |
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_REPO)
|
52 |
-
special_tokens = ["<s>", "<pad>", "</s>", "<unk>", "<mask>"]
|
53 |
-
special_tokens.append(["<|user|>", "<|bot|>", "<|end|>"])
|
54 |
-
tokenizer.train_from_iterator(
|
55 |
-
training_corpus,
|
56 |
-
vocab_size=VOCAB_SIZE,
|
57 |
-
min_frequency=2,
|
58 |
-
special_tokens=special_tokens
|
59 |
-
)
|
60 |
return tokenizer
|
61 |
|
62 |
def get_training_corpus(dataset):
|
|
|
49 |
|
50 |
def load_tokenizer(training_corpus):
|
51 |
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_REPO)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
return tokenizer
|
53 |
|
54 |
def get_training_corpus(dataset):
|