batch_size: 1 dropout: 0 learning_rate: 0.0001 max_length: 64 n_embed: 256 n_head: 8 n_layer: 8 vocab_size: 2048