nroggendorff commited on
Commit
ece3888
·
verified ·
1 Parent(s): ee162e7

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +10 -1
train.py CHANGED
@@ -94,7 +94,16 @@ def format_prompts(examples, tokenizer, isinst):
94
  else:
95
  print('Found empty entry in examples. Moving on..')
96
  continue
97
- return {"text": texts}
 
 
 
 
 
 
 
 
 
98
 
99
  def create_model(tokenizer):
100
  config = LlamaConfig(
 
94
  else:
95
  print('Found empty entry in examples. Moving on..')
96
  continue
97
+ tokenized_texts = tokenizer(
98
+ texts,
99
+ padding="max_length",
100
+ truncation=True,
101
+ max_length=MAX_SEQ_LENGTH,
102
+ return_tensors="pt"
103
+ )
104
+ decoded_texts = tokenizer.batch_decode(tokenized_texts)
105
+
106
+ return decoded_texts
107
 
108
  def create_model(tokenizer):
109
  config = LlamaConfig(