nroggendorff commited on
Commit
509eefc
·
verified ·
1 Parent(s): 2be1eee

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +3 -3
train.py CHANGED
@@ -67,8 +67,8 @@ def format_prompts(examples, tokenizer, isinst):
67
  conversation = []
68
  parts = text.split('<|end|>')
69
  for i in range(0, len(parts) - 1, 2):
70
- prompt = parts[i].replace("<|user|>", "")
71
- response = parts[i + 1].replace("<|bot|>", "")
72
  conversation.append({"role": "user", "content": prompt})
73
  conversation.append({"role": "assistant", "content": response})
74
  formatted_conversation = tokenizer.apply_chat_template(conversation, tokenize=False)
@@ -110,7 +110,7 @@ def configure_tokenizer(tokenizer):
110
  special_tokens["additional_special_tokens"] = ["<|user|>", "<|bot|>", "<|end|>"]
111
  tokenizer.add_special_tokens(special_tokens)
112
 
113
- tokenizer.pad_token_id = MAX_SEQ_LENGTH - 1
114
 
115
  if INSTRUCT_FINETUNE_BOOL:
116
  tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
 
67
  conversation = []
68
  parts = text.split('<|end|>')
69
  for i in range(0, len(parts) - 1, 2):
70
+ prompt = parts[i].replace("<|user|>", "").strip()
71
+ response = parts[i + 1].replace("<|bot|>", "").strip()
72
  conversation.append({"role": "user", "content": prompt})
73
  conversation.append({"role": "assistant", "content": response})
74
  formatted_conversation = tokenizer.apply_chat_template(conversation, tokenize=False)
 
110
  special_tokens["additional_special_tokens"] = ["<|user|>", "<|bot|>", "<|end|>"]
111
  tokenizer.add_special_tokens(special_tokens)
112
 
113
+ tokenizer.pad_token_id = tokenizer.eos_token_id
114
 
115
  if INSTRUCT_FINETUNE_BOOL:
116
  tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")