|
|
|
"""lyrics_generation_rnn.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1MkBq8eqZoPqaVDczKmYhSThcV4r23z25 |
|
""" |
|
|
|
!pip install pickle |
|
import pickle |
|
!pip install string |
|
import string |
|
|
|
import tensorflow as tf |
|
from string import punctuation |
|
import numpy as np |
|
import os |
|
import time |
|
import pickle |
|
model_path='/content/drive/MyDrive/Colab Notebooks' |
|
|
|
if not os.path.exists(f'/content/drive/MyDrive/Colab Notebooks/pkl'): |
|
os.mkdir(f'/content/drive/MyDrive/Colab Notebooks/pkl') |
|
|
|
|
|
|
|
|
|
|
|
|
|
gpus = tf.config.experimental.list_physical_devices('GPU') |
|
|
|
if gpus: |
|
try: |
|
for gpu in gpus: |
|
tf.config.experimental.set_memory_growth(gpu, True) |
|
logical_gpus = tf.config.experimental.list_logical_devices('GPU') |
|
print('\n', len(gpus), 'Physical GPUs,', len(logical_gpus), 'Logical GPU') |
|
except RuntimeError as e: |
|
|
|
print('\n', e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text = str(open('/content/drake.txt', 'r').read()) |
|
|
|
|
|
|
|
words = text.lower().replace('\n', ' \n ') |
|
|
|
|
|
for punc in punctuation: |
|
words = words.replace(punc, '') |
|
|
|
|
|
words = words.split(' ') |
|
|
|
|
|
vocab = sorted(set(words)) |
|
print(f'\nThere are {len(vocab)} unique words in the lyrics file.') |
|
|
|
|
|
outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/vocab', mode='wb') |
|
pickle.dump(vocab, outfile) |
|
outfile.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
word2idx = {u:i for i, u in enumerate(vocab)} |
|
|
|
|
|
outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/word2idx', mode='wb') |
|
pickle.dump(word2idx, outfile) |
|
outfile.close() |
|
|
|
|
|
idx2word = np.array(vocab) |
|
|
|
|
|
outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/idx2word', mode='wb') |
|
pickle.dump(idx2word, outfile) |
|
outfile.close() |
|
|
|
|
|
words_as_int = np.array([word2idx[c] for c in words]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seq_length = 100 |
|
|
|
examples_per_epoch = len(words) // (seq_length + 1) |
|
|
|
|
|
word_dataset = tf.data.Dataset.from_tensor_slices(words_as_int) |
|
|
|
|
|
print('\n', type(word_dataset)) |
|
|
|
|
|
sequences = word_dataset.batch(seq_length + 1, drop_remainder=True) |
|
print('\n', type(sequences)) |
|
|
|
|
|
def split_input_target(chunk): |
|
input_text = chunk[:-1] |
|
target_text = chunk[1:] |
|
return input_text, target_text |
|
|
|
|
|
dataset = sequences.map(split_input_target) |
|
|
|
|
|
|
|
|
|
|
|
|
|
BATCH_SIZE = 64 |
|
|
|
|
|
|
|
|
|
|
|
BUFFER_SIZE = 10000 |
|
|
|
|
|
dataset_sb = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) |
|
|
|
|
|
print('\n', dataset_sb) |
|
|
|
|
|
|
|
|
|
|
|
|
|
vocab_size = len(vocab) |
|
|
|
|
|
embedding_dim = 256 |
|
|
|
|
|
rnn_units = 1024 |
|
|
|
|
|
model_params = [vocab_size, embedding_dim, rnn_units] |
|
outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/model_params', mode='wb') |
|
pickle.dump(model_params, outfile) |
|
outfile.close() |
|
|
|
|
|
def build_model(vocab_size, embedding_dim, rnn_units, batch_size): |
|
|
|
|
|
model = tf.keras.Sequential() |
|
|
|
|
|
model.add(tf.keras.layers.Embedding( |
|
input_dim = vocab_size, |
|
output_dim = embedding_dim, |
|
batch_input_shape=[batch_size, None] |
|
)) |
|
|
|
|
|
model.add(tf.keras.layers.GRU( |
|
units = rnn_units, |
|
return_sequences = True, |
|
stateful = True, |
|
recurrent_initializer = 'glorot_uniform' |
|
)) |
|
|
|
|
|
model.add(tf.keras.layers.Dense(units=vocab_size)) |
|
model_path= '/content/drive/MyDrive/Colab Notebooks' |
|
|
|
def save_model(self, model_path): |
|
|
|
self.save_weights(model_path) |
|
print(f"Model saved to {model_path}") |
|
return model |
|
|
|
|
|
rnn = build_model( |
|
vocab_size = vocab_size, |
|
embedding_dim = embedding_dim, |
|
rnn_units = rnn_units, |
|
batch_size = BATCH_SIZE |
|
) |
|
|
|
|
|
for input_example_batch, target_example_batch in dataset_sb.take(1): |
|
example_batch_predictions = rnn(input_example_batch) |
|
print('\n', example_batch_predictions.shape, '# (batch_size, sequence_length, vocab_size)') |
|
|
|
|
|
print('\n', rnn.summary(), '\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
def loss(labels, logits): |
|
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True) |
|
|
|
|
|
rnn.compile( |
|
optimizer = 'adam', |
|
loss = loss, |
|
metrics = ['accuracy'] |
|
) |
|
|
|
|
|
checkpoint_dir = '/content/drive/MyDrive/Colab Notebooks/training_checkpoints' |
|
|
|
|
|
checkpoint_prefix = os.path.join(checkpoint_dir, 'checkpoint') |
|
|
|
|
|
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( |
|
filepath = checkpoint_prefix, |
|
monitor = 'loss', |
|
save_best_only = True, |
|
mode = 'min', |
|
save_weights_only = True |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
EPOCHS = 200 |
|
|
|
|
|
history = rnn.fit( |
|
x = dataset_sb, |
|
epochs = EPOCHS, |
|
callbacks = [checkpoint_callback] |
|
) |
|
|
|
build_model.save('/content/drive/MyDrive/Colab Notebooks') |
|
|
|
import tensorflow as tf |
|
from string import punctuation |
|
import pickle |
|
|
|
|
|
|
|
|
|
|
|
|
|
gpus = tf.config.experimental.list_physical_devices('GPU') |
|
|
|
if gpus: |
|
try: |
|
for gpu in gpus: |
|
tf.config.experimental.set_memory_growth(gpu, True) |
|
logical_gpus = tf.config.experimental.list_logical_devices('GPU') |
|
print('\n', len(gpus), 'Physical GPUs,', len(logical_gpus), 'Logical GPU') |
|
except RuntimeError as e: |
|
|
|
print('\n', e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_model(vocab_size, embedding_dim, rnn_units, batch_size): |
|
model = tf.keras.Sequential() |
|
|
|
model.add(tf.keras.layers.Embedding( |
|
input_dim = vocab_size, |
|
output_dim = embedding_dim, |
|
batch_input_shape=[batch_size, None] |
|
)) |
|
|
|
model.add(tf.keras.layers.GRU( |
|
units = rnn_units, |
|
return_sequences = True, |
|
stateful = True, |
|
recurrent_initializer = 'glorot_uniform' |
|
)) |
|
|
|
model.add(tf.keras.layers.Dense(units=vocab_size)) |
|
|
|
model_path= '/content/drive/MyDrive/Colab Notebooks' |
|
|
|
def save_model(self, model_path): |
|
|
|
self.save_weights(model_path) |
|
print(f"Model saved to {model_path}") |
|
|
|
return model |
|
|
|
|
|
|
|
|
|
|
|
|
|
infile = open(file='pkl/model_params', mode='rb') |
|
vocab_size, embedding_dim, rnn_units = pickle.load(infile) |
|
infile.close() |
|
|
|
|
|
rnn_cp = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1) |
|
|
|
|
|
rnn_cp.load_weights(tf.train.latest_checkpoint('./training_checkpoints')) |
|
|
|
|
|
rnn_cp.build(tf.TensorShape([1, None])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
infile = open(file='pkl/word2idx', mode='rb') |
|
word2idx = pickle.load(infile) |
|
infile.close() |
|
infile = open(file='pkl/idx2word', mode='rb') |
|
idx2word = pickle.load(infile) |
|
infile.close() |
|
|
|
|
|
|
|
def generate_text(model, start_string, num_generate=500, temperature=1.0): |
|
|
|
|
|
num_generate = num_generate |
|
|
|
|
|
input_eval = [word2idx[s] for s in start_string] |
|
input_eval = tf.expand_dims(input=input_eval, axis=0) |
|
|
|
|
|
text_generated = list() |
|
|
|
|
|
|
|
|
|
|
|
temperature = 1.0 |
|
|
|
|
|
|
|
model.reset_states() |
|
for i in range(num_generate): |
|
predictions = model(input_eval) |
|
|
|
|
|
predictions = tf.squeeze(predictions, 0) |
|
|
|
|
|
preidctions = predictions / temperature |
|
predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy() |
|
|
|
|
|
input_eval = tf.expand_dims([predicted_id], 0) |
|
|
|
text_generated.append(idx2word[predicted_id]) |
|
|
|
return(' '.join(start_string + text_generated)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infile = open(file='pkl/vocab', mode='rb') |
|
vocab = pickle.load(infile) |
|
infile.close() |
|
|
|
|
|
check = True |
|
|
|
while check: |
|
|
|
|
|
start_string = input('\nPlease input some text to initiate the lyrics generation (caps insensitive):\n') |
|
|
|
|
|
start_string = start_string.lower() |
|
|
|
|
|
for punc in punctuation: |
|
start_string = start_string.replace(punc, '') |
|
|
|
|
|
start_string = start_string.split(' ') |
|
|
|
|
|
non_vocab = [] |
|
|
|
|
|
for word in start_string: |
|
|
|
|
|
if word not in vocab: |
|
|
|
|
|
non_vocab.append(word) |
|
|
|
|
|
if non_vocab == []: |
|
|
|
|
|
check = False |
|
|
|
|
|
else: |
|
|
|
|
|
print(f'\nWords in the input text not present in the vocabulary are: {", ".join(non_vocab)}') |
|
print('\nAll input words must be in the vocabulary.') |
|
|
|
|
|
|
|
|
|
|
|
|
|
print('\n', generate_text(rnn_cp, start_string=start_string, num_generate=250)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
build_model.save('/content/drive/MyDrive/Colab Notebooks') |
|
|
|
model = build_model |
|
|
|
"""import tensorflow as tf |
|
build_model.state_dict() |
|
# Assuming you have a trained model named 'model' |
|
model = ... |
|
|
|
# Define the path to save the model |
|
model_path = 'path_to_save_model' |
|
|
|
# Save the entire model (architecture, weights, and optimizer state) |
|
model.save(model_path) |
|
|
|
[link text](https:// [link text](https://))# Alternatively, you can save only the model weights |
|
model.save_weights('path_to_save_weights') |
|
|
|
# You can also save the model in a format optimized for serving |
|
tf.saved_model.save(model, 'path_for_serving') |
|
|
|
""" |