File size: 2,327 Bytes
ca9f11d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import torch # create tensors and provides helper functions
import torch.nn as nn # for nn.Module(), nn.Embedding() and nn.Linear()
import torch.nn.functional as F # gives us the softmax() and argmax()
from torch.optim import Adam # Adam optimizer, stochastic gradient descent
from torch.utils.data import TensorDataset, DataLoader # for storing data loader
# first, create a dict that maps vocabulary tokens to id numbers
token_to_id = ({
'what': 0,
'is': 1,
'your': 2,
'name': 3,
'gpt': 4,
'my': 5,
'<EOS>': 10, # END OF SEQUENCE
'<PAD>': 11, # PADDING
})
## create the dict that maps the ids to tokens, for interpretintg the model output.
id_to_token = dict(map(reversed, token_to_id.items()))
VOCAB_SIZE = len(token_to_id)
SEQ_LEN = 6
D_MODEL = 2
# we use decoder only transformer, the inputs contain
# the questions followed by <EOS> token followed by the response 'gpt'
# this is because all of the tokens will be used as inputs to the decoder only
# transformer during training.
# it's called teacher forcing
# teacher forcing helps us train the neural network faster
inputs = torch.tensor([
[
token_to_id['what'],
token_to_id['is'],
token_to_id['your'],
token_to_id['name'],
],
[
token_to_id['gpt'],
token_to_id['is'],
token_to_id['my'],
]
])
# we are using decoder only transformer the outputs, or
# the predictions, are the input questions (minus the first word) followed by
# <EOS> gpt <EOS>. the first <EOS> means we are dong processing the input question
# and the second means we are done generating the output.
labels = torch.tensor([
[
token_to_id['is'],
token_to_id['your'],
token_to_id['name'],
token_to_id['<EOS>'],
token_to_id['gpt'],
token_to_id['<EOS>'],
],
[
token_to_id['is'],
token_to_id['my'],
token_to_id['<EOS>'],
token_to_id['name'],
token_to_id['<EOS>'],
token_to_id['<PAD>'],
]
])
dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset=dataset)
print(f'Shape of the input: {inputs.shape}')
print(f'Shape of the labels: {labels.shape}')
x = inputs.unsqueeze(0)
y = labels.unsqueeze(0)
print(f'Batch input: {x.shape}')
print(f'Batch labels: {y.shape}') |