Kevin Fink
commited on
Commit
·
77c210a
1
Parent(s):
e585d7a
init
Browse files
app.py
CHANGED
@@ -40,7 +40,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
40 |
|
41 |
# Tokenize the dataset
|
42 |
def tokenize_function(examples):
|
43 |
-
max_length =
|
44 |
# Assuming 'text' is the input and 'target' is the expected output
|
45 |
model_inputs = tokenizer(
|
46 |
examples['text'],
|
@@ -50,14 +50,13 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
50 |
)
|
51 |
|
52 |
# Setup the decoder input IDs (shifted right)
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
)
|
61 |
|
62 |
# Add labels to the model inputs
|
63 |
model_inputs["labels"] = labels["input_ids"]
|
|
|
40 |
|
41 |
# Tokenize the dataset
|
42 |
def tokenize_function(examples):
|
43 |
+
max_length = 64
|
44 |
# Assuming 'text' is the input and 'target' is the expected output
|
45 |
model_inputs = tokenizer(
|
46 |
examples['text'],
|
|
|
50 |
)
|
51 |
|
52 |
# Setup the decoder input IDs (shifted right)
|
53 |
+
labels = tokenizer(
|
54 |
+
examples['target'],
|
55 |
+
max_length=max_length, # Set to None for dynamic padding
|
56 |
+
padding=False, # Disable padding here, we will handle it later
|
57 |
+
truncation=True,
|
58 |
+
text_target=examples['target'] # Use text_target for target text
|
59 |
+
)
|
|
|
60 |
|
61 |
# Add labels to the model inputs
|
62 |
model_inputs["labels"] = labels["input_ids"]
|