Skip to content

Added lstm stock prediction #1777

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions neural_network/lstm_stock/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"data": {
"filename": "sp500.csv",
"columns": [
"Close",
"Volume"
],
"sequence_length": 50,
"train_test_split": 0.85,
"normalise": true
},
"training": {
"epochs": 2,
"batch_size": 32
},
"model": {
"loss": "mse",
"optimizer": "adam",
"save_dir": "saved_models",
"layers": [
{
"type": "lstm",
"neurons": 100,
"input_timesteps": 49,
"input_dim": 2,
"return_seq": true
},
{
"type": "dropout",
"rate": 0.2
},
{
"type": "lstm",
"neurons": 100,
"return_seq": true
},
{
"type": "lstm",
"neurons": 100,
"return_seq": false
},
{
"type": "dropout",
"rate": 0.2
},
{
"type": "dense",
"neurons": 1,
"activation": "linear"
}
]
}
}
2 changes: 2 additions & 0 deletions neural_network/lstm_stock/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import warnings
warnings.filterwarnings("ignore")
84 changes: 84 additions & 0 deletions neural_network/lstm_stock/core/data_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import math
import numpy as np
import pandas as pd

class DataLoader():
"""A class for loading and transforming data for the lstm model"""

def __init__(self, filename, split, cols):
Copy link
Member

@cclauss cclauss Feb 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type hints and doctests please.

dataframe = pd.read_csv(filename)
i_split = int(len(dataframe) * split)
self.data_train = dataframe.get(cols).values[:i_split]
self.data_test = dataframe.get(cols).values[i_split:]
self.len_train = len(self.data_train)
self.len_test = len(self.data_test)
self.len_train_windows = None

def get_test_data(self, seq_len, normalise):
'''
Create x, y test data windows
Warning: batch method, not generative, make sure you have enough memory to
load data, otherwise reduce size of the training split.
'''
data_windows = []
for i in range(self.len_test - seq_len):
data_windows.append(self.data_test[i:i+seq_len])

data_windows = np.array(data_windows).astype(float)
data_windows = self.normalise_windows(data_windows, single_window=False) if normalise else data_windows
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is too long for a ternary if... Just do a normal if and wrap lines to 88 char max.


x = data_windows[:, :-1]
y = data_windows[:, -1, [0]]
return x,y

def get_train_data(self, seq_len, normalise):
'''
Create x, y train data windows
Warning: batch method, not generative, make sure you have enough memory to
load data, otherwise use generate_training_window() method.
'''
data_x = []
data_y = []
for i in range(self.len_train - seq_len):
x, y = self._next_window(i, seq_len, normalise)
data_x.append(x)
data_y.append(y)
return np.array(data_x), np.array(data_y)

def generate_train_batch(self, seq_len, batch_size, normalise):
'''Yield a generator of training data from filename on given list of cols split for train/test'''
i = 0
while i < (self.len_train - seq_len):
x_batch = []
y_batch = []
for b in range(batch_size):
if i >= (self.len_train - seq_len):
# stop-condition for a smaller final batch if data doesn't divide evenly
yield np.array(x_batch), np.array(y_batch)
i = 0
x, y = self._next_window(i, seq_len, normalise)
x_batch.append(x)
y_batch.append(y)
i += 1
yield np.array(x_batch), np.array(y_batch)

def _next_window(self, i, seq_len, normalise):
'''Generates the next data window from the given index location i'''
window = self.data_train[i:i+seq_len]
window = self.normalise_windows(window, single_window=True)[0] if normalise else window
x = window[:-1]
y = window[-1, [0]]
return x, y

def normalise_windows(self, window_data, single_window=False):
'''Normalise window with a base value of zero'''
normalised_data = []
window_data = [window_data] if single_window else window_data
for window in window_data:
normalised_window = []
for col_i in range(window.shape[1]):
normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]
normalised_window.append(normalised_col)
normalised_window = np.array(normalised_window).T # reshape and transpose array back into original multidimensional format
normalised_data.append(normalised_window)
return np.array(normalised_data)
119 changes: 119 additions & 0 deletions neural_network/lstm_stock/core/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import os
import math
import numpy as np
import datetime as dt
from numpy import newaxis
from core.utils import Timer
from keras.layers import Dense, Activation, Dropout, LSTM
from keras.models import Sequential, load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint

class Model():
"""A class for an building and inferencing an lstm model"""

def __init__(self):
self.model = Sequential()

def load_model(self, filepath):
print('[Model] Loading model from file %s' % filepath)
self.model = load_model(filepath)

def build_model(self, configs):
timer = Timer()
timer.start()

for layer in configs['model']['layers']:
neurons = layer['neurons'] if 'neurons' in layer else None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use dict.get()... https://docs.python.org/3/library/stdtypes.html#dict.get

neurons = layer.get('neurons') # Repeat below...

dropout_rate = layer['rate'] if 'rate' in layer else None
activation = layer['activation'] if 'activation' in layer else None
return_seq = layer['return_seq'] if 'return_seq' in layer else None
input_timesteps = layer['input_timesteps'] if 'input_timesteps' in layer else None
input_dim = layer['input_dim'] if 'input_dim' in layer else None

if layer['type'] == 'dense':
self.model.add(Dense(neurons, activation=activation))
if layer['type'] == 'lstm':
self.model.add(LSTM(neurons, input_shape=(input_timesteps, input_dim), return_sequences=return_seq))
if layer['type'] == 'dropout':
self.model.add(Dropout(dropout_rate))

self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'])

print('[Model] Model Compiled')
timer.stop()

def train(self, x, y, epochs, batch_size, save_dir):
timer = Timer()
timer.start()
print('[Model] Training Started')
print('[Model] %s epochs, %s batch size' % (epochs, batch_size))

save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
callbacks = [
EarlyStopping(monitor='val_loss', patience=2),
ModelCheckpoint(filepath=save_fname, monitor='val_loss', save_best_only=True)
]
self.model.fit(
x,
y,
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks
)
self.model.save(save_fname)

print('[Model] Training Completed. Model saved as %s' % save_fname)
timer.stop()

def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_dir):
timer = Timer()
timer.start()
print('[Model] Training Started')
print('[Model] %s epochs, %s batch size, %s batches per epoch' % (epochs, batch_size, steps_per_epoch))

save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
callbacks = [
ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True)
]
self.model.fit_generator(
data_gen,
steps_per_epoch=steps_per_epoch,
epochs=epochs,
callbacks=callbacks,
workers=1
)

print('[Model] Training Completed. Model saved as %s' % save_fname)
timer.stop()

def predict_point_by_point(self, data):
#Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
print('[Model] Predicting Point-by-Point...')
predicted = self.model.predict(data)
predicted = np.reshape(predicted, (predicted.size,))
return predicted

def predict_sequences_multiple(self, data, window_size, prediction_len):
#Predict sequence of 50 steps before shifting prediction run forward by 50 steps
print('[Model] Predicting Sequences Multiple...')
prediction_seqs = []
for i in range(int(len(data)/prediction_len)):
curr_frame = data[i*prediction_len]
predicted = []
for j in range(prediction_len):
predicted.append(self.model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-2], predicted[-1], axis=0)
prediction_seqs.append(predicted)
return prediction_seqs

def predict_sequence_full(self, data, window_size):
#Shift the window by 1 new prediction each time, re-run predictions on new window
print('[Model] Predicting Sequences Full...')
curr_frame = data[0]
predicted = []
for i in range(len(data)):
predicted.append(self.model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-2], predicted[-1], axis=0)
return predicted
13 changes: 13 additions & 0 deletions neural_network/lstm_stock/core/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import datetime as dt

class Timer():

def __init__(self):
self.start_dt = None

def start(self):
self.start_dt = dt.datetime.now()

def stop(self):
end_dt = dt.datetime.now()
print('Time taken: %s' % (end_dt - self.start_dt))
Loading