decoder-only-transformer-learning / shakespeare_config.py
vmal's picture
decoder only transformer learning
ca9f11d
import os
import torch
from pathlib import Path
from transformers import GPT2Tokenizer
def get_config():
return {
"batch_size": 8,
"num_epochs": 600000,
"lr": 10**-4,
"seq_len": 350,
"d_model": 512,
"vocab_size": 50304,
"datasource": 'shakespeare',
"model_folder": "weights",
"model_basename": "tmodel_",
"preload": "latest",
"tokenizer_file": "tokenizer.json",
"experiment_name": "runs/tmodel"
}
current_directory = os.path.dirname(os.path.abspath(__file__))
def get_weights_file_path(config, epoch: str):
model_folder = f"{current_directory}/{config['datasource']}/{config['model_folder']}"
# Create the folder and subfolders if they don't exist
Path(model_folder).mkdir(parents=True, exist_ok=True)
model_filename = f"{config['model_basename']}{epoch}.pt"
return model_folder + '/' + model_filename
def get_data_folder_path(config):
model_folder = f"{current_directory}/{config['datasource']}/data"
Path(model_folder).mkdir(parents=True, exist_ok=True)
return model_folder
# Find the latest weights file in the weights folder
def latest_weights_file_path(config):
model_folder = f"{current_directory}/{config['datasource']}/{config['model_folder']}"
model_filename = f"{config['model_basename']}*"
weights_files = list(Path(model_folder).glob(model_filename))
if len(weights_files) == 0:
return None
weights_files.sort()
return str(weights_files[-1])
def get_gpt2_tokenizer(config):
tokenizer:GPT2Tokenizer = GPT2Tokenizer.from_pretrained(
pretrained_model_name_or_path="openai-community/gpt2",
model_max_length=config['seq_len'],
pad_token='[PAD]')
return tokenizer
def causal_mask(size):
mask = torch.triu(torch.ones((1, size, size)), diagonal=1).type(torch.int)
return mask==0