jonACE commited on
Commit
971d26b
·
verified ·
1 Parent(s): 015e26a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -37,6 +37,11 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
37
  data = {"text": [clean_text]}
38
  dataset = Dataset.from_dict(data)
39
 
 
 
 
 
 
40
  # Tokenization function
41
  def tokenize_function(examples):
42
  tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
 
37
  data = {"text": [clean_text]}
38
  dataset = Dataset.from_dict(data)
39
 
40
+ # Set a padding token manually
41
+ tokenizer.pad_token = tokenizer.eos_token # Use EOS as PAD token
42
+ # Alternatively, add a new custom pad token
43
+ # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
44
+
45
  # Tokenization function
46
  def tokenize_function(examples):
47
  tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)