Kevin Fink commited on
Commit
77c210a
·
1 Parent(s): e585d7a
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -40,7 +40,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
40
 
41
  # Tokenize the dataset
42
  def tokenize_function(examples):
43
- max_length = 128
44
  # Assuming 'text' is the input and 'target' is the expected output
45
  model_inputs = tokenizer(
46
  examples['text'],
@@ -50,14 +50,13 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
50
  )
51
 
52
  # Setup the decoder input IDs (shifted right)
53
- with tokenizer.as_target_tokenizer():
54
- labels = tokenizer(
55
- examples['target'],
56
- max_length=max_length, # Set to None for dynamic padding
57
- padding=False, # Disable padding here, we will handle it later
58
- truncation=True,
59
-
60
- )
61
 
62
  # Add labels to the model inputs
63
  model_inputs["labels"] = labels["input_ids"]
 
40
 
41
  # Tokenize the dataset
42
  def tokenize_function(examples):
43
+ max_length = 64
44
  # Assuming 'text' is the input and 'target' is the expected output
45
  model_inputs = tokenizer(
46
  examples['text'],
 
50
  )
51
 
52
  # Setup the decoder input IDs (shifted right)
53
+ labels = tokenizer(
54
+ examples['target'],
55
+ max_length=max_length, # Set to None for dynamic padding
56
+ padding=False, # Disable padding here, we will handle it later
57
+ truncation=True,
58
+ text_target=examples['target'] # Use text_target for target text
59
+ )
 
60
 
61
  # Add labels to the model inputs
62
  model_inputs["labels"] = labels["input_ids"]