jonACE commited on
Commit
a183e25
·
verified ·
1 Parent(s): 8203218

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -37,7 +37,9 @@ model_name = "distilbert-base-uncased"
37
  tokenizer = AutoTokenizer.from_pretrained(model_name)
38
 
39
  def tokenize_function(examples):
40
- return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
 
 
41
 
42
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
43
 
 
37
  tokenizer = AutoTokenizer.from_pretrained(model_name)
38
 
39
  def tokenize_function(examples):
40
+ tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
41
+ tokens["labels"] = tokens["input_ids"].copy() # Use input as labels for unsupervised learning
42
+ return tokens
43
 
44
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
45