Hi guys!
I need your valuable help to understand better LSTM’s for what I think is a relatively simple sequence , the code below runs however I am not getting the expected results, I am suspecting the way to shape the data, or the sequence definition, could you please shed light ?
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
#function to plot training
def plot_graphs(history, string):
plt.plot(history.history[string])
plt.xlabel("Epochs")
plt.ylabel(string)
plt.show()
#the data
# eleven samples, each sample has been divided in 5 sub-sequences of 3 elements each
# e.g. 1,2,3 is one part of the first line / sequence, 10,11,12 is the next
# Expected behaviour:
# input 50,51 output 52
# input 61,62 output 70
# input 2002,2003 output 2010
data = np.array([
1,2,3,10,11,12,20,21,22,30,31,32,40,41,42
,101,102,103,110,111,112,120,121,122,130,131,132,140,141,142
,201,202,203,210,211,212,220,221,222,230,231,232,240,241,242
,301,302,303,310,311,312,320,321,322,330,331,332,340,341,342
,401,402,403,410,411,412,420,421,422,430,431,432,440,441,442
,501,502,503,510,511,512,520,521,522,530,531,532,540,541,542
,601,602,603,610,611,612,620,621,622,630,631,632,640,641,642
,701,702,703,710,711,712,720,721,722,730,731,732,740,741,742
,801,802,803,810,811,812,820,821,822,830,831,832,840,841,842
,901,902,903,910,911,912,920,921,922,930,931,932,940,941,942
,1001,1002,1003,1010,1011,1012,1020,1021,1022,1030,1031,1032,1040,1041,1042
])
#I am not sure if this is the right way to shape the data
data = data.reshape(11,5,3)
#print(data)
#slice the data , so the 3rd element of each subsequence of 3 elements is the label and the first 2 are the input
#e.g. 1,2,3 1,2 is the input, 3 is the label
xs = data[:,:,:-1]
ys = data[:,:,-1:]
#print ('xs')
#print (xs)
#print ('ys')
#print (ys)
#define the model
lossf = tf.keras.losses.MeanAbsoluteError()
model = Sequential()
#tried this but didn't make a difference for good
#model.add(tf.keras.layers.BatchNormalization(input_shape=( 5, 2) ))
#model.add(Bidirectional(LSTM(150, activation='relu')))
model.add(Bidirectional(LSTM(50, activation='relu' ), input_shape=( 5, 2)))
model.add(Dense(1))
adam = Adam(learning_rate=0.0001, )
model.compile(loss=lossf, optimizer=adam, metrics=['accuracy'])
#fit
history = model.fit(xs, ys, epochs=120,
verbose=1 ,
validation_split=0.1 ,
batch_size=5
#, shuffle=True
)
#plot
plot_graphs(history, 'accuracy')
plot_graphs(history, 'loss')
plot_graphs(history, 'val_accuracy')
plot_graphs(history, 'val_loss')
#try it
predicted = model.predict( [[[50, 51]]], verbose=0) # expected 52
print ('Predicted value ' , predicted )
predicted = model.predict( [[[61, 62]]], verbose=0) # expected 70
print ('Predicted value ' , predicted )
predicted = model.predict( [[[2002, 2003]]], verbose=0) # expected 2010
print ('Predicted value ' , predicted )