Update app.py
Browse files
app.py
CHANGED
@@ -37,7 +37,9 @@ model_name = "distilbert-base-uncased"
|
|
37 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
38 |
|
39 |
def tokenize_function(examples):
|
40 |
-
|
|
|
|
|
41 |
|
42 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
43 |
|
|
|
37 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
38 |
|
39 |
def tokenize_function(examples):
|
40 |
+
tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
|
41 |
+
tokens["labels"] = tokens["input_ids"].copy() # Use input as labels for unsupervised learning
|
42 |
+
return tokens
|
43 |
|
44 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
45 |
|