SummaryProject / src /train.py
EveSa's picture
Initial Commit
ad78747
raw
history blame
4.41 kB
"""
Training the network
"""
import datetime
import logging
import time
from typing import Sequence, Tuple
import torch
import dataloader
# logging INFO, WARNING, ERROR, CRITICAL, DEBUG
logging.basicConfig(level=logging.INFO)
logging.disable(level=10)
data1 = dataloader.Data("data/train_extract.jsonl")
words = data1.get_words()
vectoriser = dataloader.Vectoriser(words)
def train_network(
model: torch.nn.Module,
train_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
dev_set: Sequence[Tuple[torch.tensor, torch.Tensor]],
epochs: int,
clip: int = 1,
):
"""
Train the EncoderDecoderModel network for a given number of epoch
-----------
Parameters
model: torch.nn.Module
EncoderDecoderModel defined in model.py
train_set: Sequence[Tuple[torch.tensor, torch.tensor]]
tuple of vectorized (text, summary) from the training set
dev_set: Sequence[Tuple[torch.tensor, torch.tensor]]
tuple of vectorized (text, summary) for the dev set
epochs: int
the number of epochs to train on
clip: int
no idea
Return
None
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print("Device check. You are using:", model.device)
model.train()
# with torch.no_grad():
optim = torch.optim.Adam(model.parameters(), lr=0.01)
print("Epoch\ttrain loss\tdev accuracy\tcompute time")
for epoch_n in range(epochs):
# Tell the model it's in train mode for layers designed to
# behave differently in train or evaluation
# https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch
model.train()
# To get the computing time per epoch
epoch_start_time = time.time()
# To get the model accuracy per epoch
epoch_loss = 0.0
epoch_length = 0
# Iterates over all the text, summary tuples
for source, target in train_set:
source = source.to(device)
target = target.to(device)
# DEBUG Block
# logging.debug("TRAIN")
# logging.debug(f"cuda available ? {torch.cuda.is_available()}")
# logging.debug(f"Source sur cuda ? {source.is_cuda}")
# logging.debug(f"Target sur cuda ? {target.is_cuda}")
out = model(source).to(device)
logging.debug(f"outputs = {out.shape}")
target = torch.nn.functional.pad(
target, (0, len(out) - len(target)), value=-100
)
# logging.debug(f"predition : {vectoriser.decode(output_predictions)}")
loss = torch.nn.functional.nll_loss(out, target).to(device)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optim.step()
epoch_loss += loss.item()
epoch_length += source.shape[0]
# To check the model accuracy on new data
dev_correct = 0
dev_total = 0
# Iterates over text, summary tuple from dev
for source, target in dev_set:
# We here want to evaluate the model
# so we're switching to evaluation mode
model.eval()
source = source.to(device)
target = target.to(device)
# We compute the result
output = model(source).to(device)
output_dim = output.shape[-1]
output = output[1:].view(-1, output_dim)
logging.debug(f"dev output : {output.shape}")
target = target[1:].view(-1)
# To compare the output with the target,
# they have to be of same length so we're
# padding the target with -100 idx that will
# be ignored by the nll_loss function
target = torch.nn.functional.pad(
target, (0, len(output) - len(target)), value=-100
)
dev_loss = torch.nn.functional.nll_loss(output, target)
dev_correct += dev_loss.item()
dev_total += source.shape[0]
# Compute of the epoch training time
epoch_compute_time = time.time() - epoch_start_time
print(
f"{epoch_n}\t{epoch_loss/epoch_length:.5}\t{abs(dev_correct/dev_total):.2%}\t\t{datetime.timedelta(seconds=epoch_compute_time)}"
)