File size: 1,292 Bytes

2c07569

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

# Define the path to your questions file
questions_file = 'C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt'

# Load your data from the questions file
with open(questions_file, 'r') as f:
    questions = f.read().splitlines()

# Define your custom tokenizer
def custom_tokenizer(text):
    """
    Define your custom tokenizer function here
    """
    return text.split()

# Tokenize your questions
tokenized_questions = [custom_tokenizer(question) for question in questions]

# Load your custom model
model = AutoModelForSeq2SeqLM.from_pretrained('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained model.pt')

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    learning_rate=2e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
)

# Define the trainer and train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_questions,
)
trainer.train()

# Save the trained model
model_path = './trained_model'
model.save_pretrained(model_path)