Spaces:
Build error
Build error
import random | |
import torch | |
import comet.src.train.atomic_train as train | |
import comet.src.models.models as models | |
import comet.src.data.data as data | |
import comet.utils.utils as utils | |
import comet.src.train.utils as train_utils | |
import comet.src.data.config as cfg | |
from comet.src.data.utils import TextEncoder | |
from comet.src.train.opt import OpenAIAdam | |
def main(num): | |
# Generate configuration files depending on experiment being run | |
utils.generate_config_files("atomic", num) | |
# Loads the correct configuration file | |
config_file = "config/atomic/config_{}.json".format(num) | |
print(config_file) | |
# Read config file to option | |
config = cfg.read_config(cfg.load_config(config_file)) | |
opt, meta = cfg.get_parameters(config) | |
# Set the random seeds | |
torch.manual_seed(opt.train.static.seed) | |
random.seed(opt.train.static.seed) | |
if config.gpu_mode: | |
torch.cuda.manual_seed_all(opt.train.static.seed) | |
# Where to find the data | |
splits = ["train", "dev", "test"] | |
opt.train.dynamic.epoch = 0 | |
print("Loading Data") | |
categories = opt.data.categories | |
path = "data/atomic/processed/{}/{}.pickle".format( | |
opt.exp, utils.make_name_string(opt.data)) | |
data_loader = data.make_data_loader(opt, categories) | |
loaded = data_loader.load_data(path) | |
print(data_loader.sequences["train"]["total"].size(0)) | |
data_loader.opt = opt | |
data_loader.batch_size = opt.train.dynamic.bs | |
print("Done.") | |
# Initialize text_encoder | |
text_encoder = TextEncoder(config.encoder_path, config.bpe_path) | |
special = [data.start_token, data.end_token] | |
special += ["<{}>".format(cat) for cat in categories] | |
special += [data.blank_token] | |
text_encoder.encoder = data_loader.vocab_encoder | |
text_encoder.decoder = data_loader.vocab_decoder | |
opt.data.maxe1 = data_loader.max_event | |
opt.data.maxe2 = data_loader.max_effect | |
opt.data.maxr = data.atomic_data.num_delimiter_tokens["category"] | |
n_special = len(special) | |
n_ctx = opt.data.maxe1 + opt.data.maxe2 | |
n_vocab = len(text_encoder.encoder) + n_ctx | |
print(data_loader.__dict__.keys()) | |
opt.net.vSize = n_vocab | |
print("Building Model") | |
model = models.make_model( | |
opt, n_vocab, n_ctx, n_special, | |
load=(opt.net.init=="pt")) | |
print("Done.") | |
print("Files will be logged at: {}".format( | |
utils.make_name(opt, prefix="results/losses/", | |
is_dir=True, eval_=True))) | |
data_loader.reset_offsets("train") | |
# Get number of examples | |
data.set_max_sizes(data_loader) | |
if config.gpu_mode: | |
print("Pushing to GPU: {}".format(config.gpu_index)) | |
cfg.device = config.gpu_index | |
cfg.do_gpu = True | |
torch.cuda.set_device(cfg.device) | |
if config.multigpu: | |
model = models.multi_gpu( | |
model, config.gpu_indices).cuda() | |
else: | |
model.cuda(cfg.device) | |
print("Done.") | |
print("Training") | |
optimizer = OpenAIAdam(model.parameters(), | |
lr=opt.train.dynamic.lr, | |
schedule=opt.train.static.lrsched, | |
warmup=opt.train.static.lrwarm, | |
t_total=meta.iterations, | |
b1=opt.train.static.b1, | |
b2=opt.train.static.b2, | |
e=opt.train.static.e, | |
l2=opt.train.static.l2, | |
vector_l2=opt.train.static.vl2, | |
max_grad_norm=opt.train.static.clip) | |
scorers = ["bleu", "rouge", "cider"] | |
trainer = train.make_trainer( | |
opt, meta, data_loader, model, optimizer) | |
trainer.set_evaluator(opt, model, data_loader) | |
trainer.run() | |