<a href="https://colab.research.google.com/github/jsebdev/Stock_Predictor/blob/main/stock_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')
project_path = '/content/drive/MyDrive/projects/Stock_Predicter'
%cd $project_path

Mounted at /content/drive
/content/drive/MyDrive/projects/Stock_Predicter


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
import yfinance as yfin
import tensorflow as tf
import os
import re

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM


# Get Data

In [93]:
# Select a company for now
ticker = 'AAPL'

start = dt.datetime(2013,1,1)
end = dt.datetime(2023,4,5)

In [5]:
yfin.pdr_override()
data = web.data.get_data_yahoo(ticker, start, end)


[*********************100%***********************]  1 of 1 completed


# Preprocess_data

In [111]:
def normalize_data(data, relative_to_previous=True, scaler=None):
  def substract_to_values(data, value):
    df_copy = pd.DataFrame.copy(data)
    df_copy[['Open', 'High', 'Low', 'Close', 'Adj Close']] = df_copy[['Open', 'High', 'Low', 'Close', 'Adj Close']] - value
    return df_copy
  if relative_to_previous:
    the_data = pd.DataFrame(substract_to_values(data.iloc[0], data.iloc[0]['Open'])).T
    # the_data = substract_to_values(data.iloc[0], data.iloc[0]['Open']).to_frame().T # This is the same as the previous line
    for i in range(1,len(data)):
      the_data = pd.concat((the_data, substract_to_values(data.iloc[i], data.iloc[i-1]['Close']).to_frame().T))
  else:
    the_data = pd.DataFrame.copy(data)
  
  if scaler is None:
    # Create the scaler
    values = the_data.values
    # print('values')
    # print(values)
    max_value = np.max(values[:,:-1])
    # print(max_value)
    min_value = np.min(values[:,:-1])
    # print(min_value)
    max_volume = np.max(values[:,-1])
    min_volume = np.min(values[:,-1])
    # print(max_volume, min_volume)
    def scaler(data):
      values = data.values
      # print(values)
      values[:,:-1] = (values[:,:-1] - min_value) / (max_value-min_value) * 2 - 1
      values[:,-1] = (values[:,-1] - min_volume) / (max_volume-min_volume) * 2 - 1
      # print(values)
      return data
    def anti_scaler(values):
      decoded_values = (values + 1) * (max_value-min_value) / 2 + min_value  
      return decoded_values
  
  normalized_data = scaler(the_data)

  return normalized_data, scaler, anti_scaler




In [112]:
norm_data, the_scaler, the_decoder = normalize_data(data, relative_to_previous=True)
#todo: save the_scaler somehow to use in new runtimes

In [41]:
len(norm_data)

2583

In [9]:
prediction_days = 100

x_train_list = []
y_train_list = []

for i in range(prediction_days, len(norm_data)):
  x_train_list.append(norm_data[i-prediction_days:i])
  y_train_list.append(norm_data.iloc[i].values[0:4])

x_train = np.array(x_train_list)
y_train = np.array(y_train_list)

In [10]:
print(x_train.shape)
print(y_train.shape)
print(x_train.shape[1:])

(2483, 100, 6)
(2483, 4)
(100, 6)


# Model

## Create Model

In [66]:
def create_model():
  model = Sequential()
  # model.add(LSTM(units=112, return_sequences=True, input_shape=(x_train.shape[1:])))
  model.add(LSTM(units=112, return_sequences=True, input_shape=(None,x_train.shape[-1],)))
  model.add(Dropout(0.2))
  model.add(LSTM(units=112, return_sequences=True))
  model.add(Dropout(0.2))
  model.add(LSTM(units=50))
  model.add(Dropout(0.2))
  model.add(Dense(units=4))
  return model

model = create_model()
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, None, 112)         53312     
                                                                 
 dropout_3 (Dropout)         (None, None, 112)         0         
                                                                 
 lstm_4 (LSTM)               (None, None, 112)         100800    
                                                                 
 dropout_4 (Dropout)         (None, None, 112)         0         
                                                                 
 lstm_5 (LSTM)               (None, 50)                32600     
                                                                 
 dropout_5 (Dropout)         (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 4)                

In [12]:
model.compile(optimizer='adam', loss='mean_squared_error')

## Create checkpoint callback

In [35]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints_'+dt.datetime.now().strftime("%Y%m%d%H%M%S")
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_epoch{epoch}_loss{loss}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

## Model Train

In [15]:
print(x_train.shape)
print(y_train.shape)

(2483, 100, 6)
(2483, 4)


In [40]:
y_train[-2]

array([ 0.02002301,  0.0391905 , -0.09898045, -0.05744885])

In [37]:
model.fit(x_train, y_train, epochs=25, batch_size=32, callbacks=[checkpoint_callback])


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f5203d70cd0>

# Testing a model

In [49]:
#print trainings directories to pick one
!ls -d training_checkpoints_*/

training_checkpoints_20230406041748/


In [72]:
test_model = create_model()

In [87]:
checkpoint_dir = 'training_checkpoints_20230406041748'

def load_weights(epoch=None):
  if epoch is None:
    weights_file = tf.train.latest_checkpoint(checkpoint_dir)
  else:
    with os.scandir(checkpoint_dir) as entries:
      for entry in entries:
        if re.search(f'^ckpt_epoch{epoch}_.*\.index', entry.name):
          weights_file = checkpoint_dir + '/'+ entry.name[:-6]

  print(weights_file)
  test_model.load_weights(weights_file)
  return test_model

test_model = load_weights()

training_checkpoints_20230406041748/ckpt_epoch25_loss0.01064301934093237


In [99]:
test_start = dt.date.today() - dt.timedelta(days=200)
test_end = dt.date.today()

yfin.pdr_override()
test_data = web.data.get_data_yahoo(ticker, test_start, test_end)

[*********************100%***********************]  1 of 1 completed


In [100]:
test_data_norm, _ = normalize_data(test_data, scaler=the_scaler)

In [102]:
print(type(test_data_norm))

<class 'pandas.core.frame.DataFrame'>


In [104]:
input_data = np.expand_dims(test_data_norm.values, axis=0)
print(input_data.shape)

(1, 138, 6)


In [105]:
results = test_model.predict(input_data, batch_size=1)



In [113]:
print(results)
print(the_decoder(results))

[[-0.01962117  0.09634934 -0.10176479 -0.00849891]]
[[-0.06636524  1.3856668  -1.0948591   0.0728941 ]]


In [107]:
test_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-09-19,149.309998,154.559998,149.100006,154.479996,153.989029,81474200
2022-09-20,153.399994,158.080002,153.080002,156.899994,156.401352,107689800
2022-09-21,157.339996,158.740005,153.600006,153.720001,153.231461,101696800
2022-09-22,152.380005,154.470001,150.910004,152.740005,152.254578,86652500
2022-09-23,151.190002,151.470001,148.559998,150.429993,149.951904,96029900
