Spaces:

nroggendorff
/

train-llama

Runtime error

nroggendorff commited on Nov 9, 2024

Commit

ece3888

verified ·

1 Parent(s): ee162e7

Update train.py

Files changed (1) hide show

train.py CHANGED Viewed

@@ -94,7 +94,16 @@ def format_prompts(examples, tokenizer, isinst):
         else:
             print('Found empty entry in examples. Moving on..')
             continue
-    return {"text": texts}
 def create_model(tokenizer):
     config = LlamaConfig(

         else:
             print('Found empty entry in examples. Moving on..')
             continue
+    tokenized_texts = tokenizer(
+        texts,
+        padding="max_length",
+        truncation=True,
+        max_length=MAX_SEQ_LENGTH,
+        return_tensors="pt"
+    )
+    decoded_texts = tokenizer.batch_decode(tokenized_texts)
+    return decoded_texts
 def create_model(tokenizer):
     config = LlamaConfig(