I’m curious about the zero-padding effect after the LSTM. I observed that when we have sequences that are 0 padded and then masked, the LSTM can handle them.
Thus, my question is whether, if I use another method as a downstream layer after LSTM, such as self-attention, Zero-padding will have no effect on self-attention and attention weights and thus will have no effect on the model’s performance?
Example of Input feeding into model:
[ 2 3.2 0.2 4.2 1.7
3 1.0 3.2 1.0 2.3
1 0.3 3.1 0.4 3.8
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0]
Here is the code:
def LSTM_model(input_shape, total_classes):
input_layer = tf.keras.Input(shape=input_shape, name="time_series_activity")
input_mask = tf.keras.layers.Masking(mask_value=0.00000)(input_layer)
lstm_l5 = tf.keras.layers.LSTM(128, activation='tanh',
recurrent_initializer=tf.keras.initializers.Orthogonal(), dropout=0.5,
recurrent_dropout=0.5, return_sequences=True)(input_mask)
lstm_l6 = tf.keras.layers.LSTM(128, activation='tanh',
recurrent_initializer=tf.keras.initializers.Orthogonal(), dropout=0.9,
recurrent_dropout=0.5)(lstm_l5)
output_layer = tf.keras.layers.Dense(total_classes, activation="softmax")(lstm_l6)
return tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
train_generator = Generator(train_x, train_y, shuffle_input_segments=True)
test_generator = Generator(test_x, test_y)
input_shape = (None, train_x[0].shape[1])
model = LSTM_model(input_shape, total_classes)
model.summary()
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss="categorical_crossentropy",
optimizer=opt,
metrics=[tfa.metrics.F1Score(num_classes=total_classes, average='macro')]
)
path_checkpoint = os.path.join(filepath,
dataset,
"best_model_checkpoint"
)
callback_model_saving = tf.keras.callbacks.ModelCheckpoint(filepath=path_checkpoint,
monitor='val_f1_score',
mode='max',
verbose=1,
save_best_only=True)
history = model.fit_generator(train_generator, epochs=total_epoch, steps_per_epoch=len(train_generator),
callbacks=[callbacks, callback_model_saving],
validation_data=test_generator, validation_steps=len(test_generator))