Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| """Sequential implementation of Recurrent Neural Network Language Model.""" | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from espnet.nets.lm_interface import LMInterface | |
| class SequentialRNNLM(LMInterface, torch.nn.Module): | |
| """Sequential RNNLM. | |
| See also: | |
| https://github.com/pytorch/examples/blob/4581968193699de14b56527296262dd76ab43557/word_language_model/model.py | |
| """ | |
| def add_arguments(parser): | |
| """Add arguments to command line argument parser.""" | |
| parser.add_argument( | |
| "--type", | |
| type=str, | |
| default="lstm", | |
| nargs="?", | |
| choices=["lstm", "gru"], | |
| help="Which type of RNN to use", | |
| ) | |
| parser.add_argument( | |
| "--layer", "-l", type=int, default=2, help="Number of hidden layers" | |
| ) | |
| parser.add_argument( | |
| "--unit", "-u", type=int, default=650, help="Number of hidden units" | |
| ) | |
| parser.add_argument( | |
| "--dropout-rate", type=float, default=0.5, help="dropout probability" | |
| ) | |
| return parser | |
| def __init__(self, n_vocab, args): | |
| """Initialize class. | |
| Args: | |
| n_vocab (int): The size of the vocabulary | |
| args (argparse.Namespace): configurations. see py:method:`add_arguments` | |
| """ | |
| torch.nn.Module.__init__(self) | |
| self._setup( | |
| rnn_type=args.type.upper(), | |
| ntoken=n_vocab, | |
| ninp=args.unit, | |
| nhid=args.unit, | |
| nlayers=args.layer, | |
| dropout=args.dropout_rate, | |
| ) | |
| def _setup( | |
| self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False | |
| ): | |
| self.drop = nn.Dropout(dropout) | |
| self.encoder = nn.Embedding(ntoken, ninp) | |
| if rnn_type in ["LSTM", "GRU"]: | |
| self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) | |
| else: | |
| try: | |
| nonlinearity = {"RNN_TANH": "tanh", "RNN_RELU": "relu"}[rnn_type] | |
| except KeyError: | |
| raise ValueError( | |
| "An invalid option for `--model` was supplied, " | |
| "options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']" | |
| ) | |
| self.rnn = nn.RNN( | |
| ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout | |
| ) | |
| self.decoder = nn.Linear(nhid, ntoken) | |
| # Optionally tie weights as in: | |
| # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) | |
| # https://arxiv.org/abs/1608.05859 | |
| # and | |
| # "Tying Word Vectors and Word Classifiers: | |
| # A Loss Framework for Language Modeling" (Inan et al. 2016) | |
| # https://arxiv.org/abs/1611.01462 | |
| if tie_weights: | |
| if nhid != ninp: | |
| raise ValueError( | |
| "When using the tied flag, nhid must be equal to emsize" | |
| ) | |
| self.decoder.weight = self.encoder.weight | |
| self._init_weights() | |
| self.rnn_type = rnn_type | |
| self.nhid = nhid | |
| self.nlayers = nlayers | |
| def _init_weights(self): | |
| # NOTE: original init in pytorch/examples | |
| # initrange = 0.1 | |
| # self.encoder.weight.data.uniform_(-initrange, initrange) | |
| # self.decoder.bias.data.zero_() | |
| # self.decoder.weight.data.uniform_(-initrange, initrange) | |
| # NOTE: our default.py:RNNLM init | |
| for param in self.parameters(): | |
| param.data.uniform_(-0.1, 0.1) | |
| def forward(self, x, t): | |
| """Compute LM loss value from buffer sequences. | |
| Args: | |
| x (torch.Tensor): Input ids. (batch, len) | |
| t (torch.Tensor): Target ids. (batch, len) | |
| Returns: | |
| tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of | |
| loss to backward (scalar), | |
| negative log-likelihood of t: -log p(t) (scalar) and | |
| the number of elements in x (scalar) | |
| Notes: | |
| The last two return values are used | |
| in perplexity: p(t)^{-n} = exp(-log p(t) / n) | |
| """ | |
| y = self._before_loss(x, None)[0] | |
| mask = (x != 0).to(y.dtype) | |
| loss = F.cross_entropy(y.view(-1, y.shape[-1]), t.view(-1), reduction="none") | |
| logp = loss * mask.view(-1) | |
| logp = logp.sum() | |
| count = mask.sum() | |
| return logp / count, logp, count | |
| def _before_loss(self, input, hidden): | |
| emb = self.drop(self.encoder(input)) | |
| output, hidden = self.rnn(emb, hidden) | |
| output = self.drop(output) | |
| decoded = self.decoder( | |
| output.view(output.size(0) * output.size(1), output.size(2)) | |
| ) | |
| return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden | |
| def init_state(self, x): | |
| """Get an initial state for decoding. | |
| Args: | |
| x (torch.Tensor): The encoded feature tensor | |
| Returns: initial state | |
| """ | |
| bsz = 1 | |
| weight = next(self.parameters()) | |
| if self.rnn_type == "LSTM": | |
| return ( | |
| weight.new_zeros(self.nlayers, bsz, self.nhid), | |
| weight.new_zeros(self.nlayers, bsz, self.nhid), | |
| ) | |
| else: | |
| return weight.new_zeros(self.nlayers, bsz, self.nhid) | |
| def score(self, y, state, x): | |
| """Score new token. | |
| Args: | |
| y (torch.Tensor): 1D torch.int64 prefix tokens. | |
| state: Scorer state for prefix tokens | |
| x (torch.Tensor): 2D encoder feature that generates ys. | |
| Returns: | |
| tuple[torch.Tensor, Any]: Tuple of | |
| torch.float32 scores for next token (n_vocab) | |
| and next state for ys | |
| """ | |
| y, new_state = self._before_loss(y[-1].view(1, 1), state) | |
| logp = y.log_softmax(dim=-1).view(-1) | |
| return logp, new_state | |