TheAlgorithms · Tejaswgupta · Feb 20, 2020 · Feb 20, 2020 · Feb 20, 2020 · Feb 20, 2020
diff --git a/neural_network/lstm_stock/config.json b/neural_network/lstm_stock/config.json
@@ -0,0 +1,53 @@
+{
+	"data": {
+		"filename": "sp500.csv",
+		"columns": [
+			"Close",
+			"Volume"
+		],
+		"sequence_length": 50,
+		"train_test_split": 0.85,
+		"normalise": true
+	},
+	"training": {
+		"epochs": 2,
+		"batch_size": 32
+	},
+	"model": {
+		"loss": "mse",
+		"optimizer": "adam",
+		"save_dir": "saved_models",
+		"layers": [
+			{
+				"type": "lstm",
+				"neurons": 100,
+				"input_timesteps": 49,
+				"input_dim": 2,
+				"return_seq": true
+			},
+			{
+				"type": "dropout",
+				"rate": 0.2
+			},
+			{
+				"type": "lstm",
+				"neurons": 100,
+				"return_seq": true
+			},
+			{
+				"type": "lstm",
+				"neurons": 100,
+				"return_seq": false
+			},
+			{
+				"type": "dropout",
+				"rate": 0.2
+			},
+			{
+				"type": "dense",
+				"neurons": 1,
+				"activation": "linear"
+			}
+		]
+	}
+}
diff --git a/neural_network/lstm_stock/core/__init__.py b/neural_network/lstm_stock/core/__init__.py
@@ -0,0 +1,2 @@
+import warnings
+warnings.filterwarnings("ignore")
diff --git a/neural_network/lstm_stock/core/data_processor.py b/neural_network/lstm_stock/core/data_processor.py
@@ -0,0 +1,84 @@
+import math
+import numpy as np
+import pandas as pd
+
+class DataLoader():
+    """A class for loading and transforming data for the lstm model"""
+
+    def __init__(self, filename, split, cols):
+        dataframe = pd.read_csv(filename)
+        i_split = int(len(dataframe) * split)
+        self.data_train = dataframe.get(cols).values[:i_split]
+        self.data_test  = dataframe.get(cols).values[i_split:]
+        self.len_train  = len(self.data_train)
+        self.len_test   = len(self.data_test)
+        self.len_train_windows = None
+
+    def get_test_data(self, seq_len, normalise):
+        '''
+        Create x, y test data windows
+        Warning: batch method, not generative, make sure you have enough memory to
+        load data, otherwise reduce size of the training split.
+        '''
+        data_windows = []
+        for i in range(self.len_test - seq_len):
+            data_windows.append(self.data_test[i:i+seq_len])
+
+        data_windows = np.array(data_windows).astype(float)
+        data_windows = self.normalise_windows(data_windows, single_window=False) if normalise else data_windows
+
+        x = data_windows[:, :-1]
+        y = data_windows[:, -1, [0]]
+        return x,y
+
+    def get_train_data(self, seq_len, normalise):
+        '''
+        Create x, y train data windows
+        Warning: batch method, not generative, make sure you have enough memory to
+        load data, otherwise use generate_training_window() method.
+        '''
+        data_x = []
+        data_y = []
+        for i in range(self.len_train - seq_len):
+            x, y = self._next_window(i, seq_len, normalise)
+            data_x.append(x)
+            data_y.append(y)
+        return np.array(data_x), np.array(data_y)
+
+    def generate_train_batch(self, seq_len, batch_size, normalise):
+        '''Yield a generator of training data from filename on given list of cols split for train/test'''
+        i = 0
+        while i < (self.len_train - seq_len):
+            x_batch = []
+            y_batch = []
+            for b in range(batch_size):
+                if i >= (self.len_train - seq_len):
+                    # stop-condition for a smaller final batch if data doesn't divide evenly
+                    yield np.array(x_batch), np.array(y_batch)
+                    i = 0
+                x, y = self._next_window(i, seq_len, normalise)
+                x_batch.append(x)
+                y_batch.append(y)
+                i += 1
+            yield np.array(x_batch), np.array(y_batch)
+
+    def _next_window(self, i, seq_len, normalise):
+        '''Generates the next data window from the given index location i'''
+        window = self.data_train[i:i+seq_len]
+        window = self.normalise_windows(window, single_window=True)[0] if normalise else window
+        x = window[:-1]
+        y = window[-1, [0]]
+        return x, y
+
+    def normalise_windows(self, window_data, single_window=False):
+        '''Normalise window with a base value of zero'''
+        normalised_data = []
+        window_data = [window_data] if single_window else window_data
+        for window in window_data:
+            normalised_window = []
+            for col_i in range(window.shape[1]):
+                normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]
+                normalised_window.append(normalised_col)
+            normalised_window = np.array(normalised_window).T # reshape and transpose array back into original multidimensional format
+            normalised_data.append(normalised_window)
+        return np.array(normalised_data)
diff --git a/neural_network/lstm_stock/core/model.py b/neural_network/lstm_stock/core/model.py
@@ -0,0 +1,119 @@
+import os
+import math
+import numpy as np
+import datetime as dt
+from numpy import newaxis
+from core.utils import Timer
+from keras.layers import Dense, Activation, Dropout, LSTM
+from keras.models import Sequential, load_model
+from keras.callbacks import EarlyStopping, ModelCheckpoint
+
+class Model():
+	"""A class for an building and inferencing an lstm model"""
+
+	def __init__(self):
+		self.model = Sequential()
+
+	def load_model(self, filepath):
+		print('[Model] Loading model from file %s' % filepath)
+		self.model = load_model(filepath)
+
+	def build_model(self, configs):
+		timer = Timer()
+		timer.start()
+
+		for layer in configs['model']['layers']:
+			neurons = layer['neurons'] if 'neurons' in layer else None
+			dropout_rate = layer['rate'] if 'rate' in layer else None
+			activation = layer['activation'] if 'activation' in layer else None
+			return_seq = layer['return_seq'] if 'return_seq' in layer else None
+			input_timesteps = layer['input_timesteps'] if 'input_timesteps' in layer else None
+			input_dim = layer['input_dim'] if 'input_dim' in layer else None
+
+			if layer['type'] == 'dense':
+				self.model.add(Dense(neurons, activation=activation))
+			if layer['type'] == 'lstm':
+				self.model.add(LSTM(neurons, input_shape=(input_timesteps, input_dim), return_sequences=return_seq))
+			if layer['type'] == 'dropout':
+				self.model.add(Dropout(dropout_rate))
+
+		self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'])
+
+		print('[Model] Model Compiled')
+		timer.stop()
+
+	def train(self, x, y, epochs, batch_size, save_dir):
+		timer = Timer()
+		timer.start()
+		print('[Model] Training Started')
+		print('[Model] %s epochs, %s batch size' % (epochs, batch_size))
+
+		save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
+		callbacks = [
+			EarlyStopping(monitor='val_loss', patience=2),
+			ModelCheckpoint(filepath=save_fname, monitor='val_loss', save_best_only=True)
+		]
+		self.model.fit(
+			x,
+			y,
+			epochs=epochs,
+			batch_size=batch_size,
+			callbacks=callbacks
+		)
+		self.model.save(save_fname)
+
+		print('[Model] Training Completed. Model saved as %s' % save_fname)
+		timer.stop()
+
+	def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_dir):
+		timer = Timer()
+		timer.start()
+		print('[Model] Training Started')
+		print('[Model] %s epochs, %s batch size, %s batches per epoch' % (epochs, batch_size, steps_per_epoch))
+
+		save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
+		callbacks = [
+			ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True)
+		]
+		self.model.fit_generator(
+			data_gen,
+			steps_per_epoch=steps_per_epoch,
+			epochs=epochs,
+			callbacks=callbacks,
+			workers=1
+		)
+
+		print('[Model] Training Completed. Model saved as %s' % save_fname)
+		timer.stop()
+
+	def predict_point_by_point(self, data):
+		#Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
+		print('[Model] Predicting Point-by-Point...')
+		predicted = self.model.predict(data)
+		predicted = np.reshape(predicted, (predicted.size,))
+		return predicted
+
+	def predict_sequences_multiple(self, data, window_size, prediction_len):
+		#Predict sequence of 50 steps before shifting prediction run forward by 50 steps
+		print('[Model] Predicting Sequences Multiple...')
+		prediction_seqs = []
+		for i in range(int(len(data)/prediction_len)):
+			curr_frame = data[i*prediction_len]
+			predicted = []
+			for j in range(prediction_len):
+				predicted.append(self.model.predict(curr_frame[newaxis,:,:])[0,0])
+				curr_frame = curr_frame[1:]
+				curr_frame = np.insert(curr_frame, [window_size-2], predicted[-1], axis=0)
+			prediction_seqs.append(predicted)
+		return prediction_seqs
+
+	def predict_sequence_full(self, data, window_size):
+		#Shift the window by 1 new prediction each time, re-run predictions on new window
+		print('[Model] Predicting Sequences Full...')
+		curr_frame = data[0]
+		predicted = []
+		for i in range(len(data)):
+			predicted.append(self.model.predict(curr_frame[newaxis,:,:])[0,0])
+			curr_frame = curr_frame[1:]
+			curr_frame = np.insert(curr_frame, [window_size-2], predicted[-1], axis=0)
+		return predicted
diff --git a/neural_network/lstm_stock/core/utils.py b/neural_network/lstm_stock/core/utils.py
@@ -0,0 +1,13 @@
+import datetime as dt
+
+class Timer():
+
+	def __init__(self):
+		self.start_dt = None
+
+	def start(self):
+		self.start_dt = dt.datetime.now()
+
+	def stop(self):
+		end_dt = dt.datetime.now()
+		print('Time taken: %s' % (end_dt - self.start_dt))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		import warnings
		warnings.filterwarnings("ignore")