Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -29,8 +29,9 @@ def create_tokenizer(training_corpus):
|
|
29 |
min_frequency=2,
|
30 |
special_tokens=["<s>", "<pad>", "</s>", "<unk>", "<mask>", "<|user|>", "<|bot|>", "<|end|>"]
|
31 |
)
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
def get_training_corpus(dataset):
|
36 |
for i in range(0, len(dataset), 1000):
|
@@ -48,9 +49,7 @@ def format_prompts(examples, tokenizer):
|
|
48 |
conversation.append({"role": "assistant", "content": response})
|
49 |
formatted_conversation = tokenizer.apply_chat_template(conversation, tokenize=False)
|
50 |
texts.append(formatted_conversation)
|
51 |
-
|
52 |
-
output['text'] = texts
|
53 |
-
return output
|
54 |
|
55 |
def create_model(tokenizer, factor):
|
56 |
config = LlamaConfig(
|
|
|
29 |
min_frequency=2,
|
30 |
special_tokens=["<s>", "<pad>", "</s>", "<unk>", "<mask>", "<|user|>", "<|bot|>", "<|end|>"]
|
31 |
)
|
32 |
+
|
33 |
+
fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
|
34 |
+
return fast_tokenizer
|
35 |
|
36 |
def get_training_corpus(dataset):
|
37 |
for i in range(0, len(dataset), 1000):
|
|
|
49 |
conversation.append({"role": "assistant", "content": response})
|
50 |
formatted_conversation = tokenizer.apply_chat_template(conversation, tokenize=False)
|
51 |
texts.append(formatted_conversation)
|
52 |
+
return {"text": texts}
|
|
|
|
|
53 |
|
54 |
def create_model(tokenizer, factor):
|
55 |
config = LlamaConfig(
|