Update app.py
Browse files
app.py
CHANGED
@@ -37,6 +37,11 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
37 |
data = {"text": [clean_text]}
|
38 |
dataset = Dataset.from_dict(data)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
40 |
# Tokenization function
|
41 |
def tokenize_function(examples):
|
42 |
tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
|
|
|
37 |
data = {"text": [clean_text]}
|
38 |
dataset = Dataset.from_dict(data)
|
39 |
|
40 |
+
# Set a padding token manually
|
41 |
+
tokenizer.pad_token = tokenizer.eos_token # Use EOS as PAD token
|
42 |
+
# Alternatively, add a new custom pad token
|
43 |
+
# tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
44 |
+
|
45 |
# Tokenization function
|
46 |
def tokenize_function(examples):
|
47 |
tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
|