updated again idk what
Browse files- train_llama.py +3 -1
train_llama.py
CHANGED
|
@@ -59,7 +59,9 @@ def tokenize_data(example):
|
|
| 59 |
}
|
| 60 |
|
| 61 |
tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
|
| 62 |
-
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# Data collator
|
| 65 |
def custom_data_collator(features):
|
|
|
|
| 59 |
}
|
| 60 |
|
| 61 |
tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
|
| 62 |
+
# Fix print to handle potential list or tensor
|
| 63 |
+
first_example = tokenized_dataset[0]
|
| 64 |
+
print("First tokenized example:", {k: (type(v), v.shape if hasattr(v, 'shape') else len(v)) for k, v in first_example.items()})
|
| 65 |
|
| 66 |
# Data collator
|
| 67 |
def custom_data_collator(features):
|