Adding a transformer layer

how do I add a transformer layer to this below code. I am finding difficulties finding the right parameters. I have many lstm layers put in. I want lstm + transformer and check the performance

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(win_length, num_features), return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))

model.add(tf.keras.layers.LSTM(128, return_sequences=False))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.RepeatVector(win_length))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))

model.add(tf.keras.layers.LSTM(128, return_sequences=False))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.Dense(1))

code is for anomaly detection btw

@ociy,

Welcome to the Tensorflow Forum

You can use the tf.keras.layers.MultiHeadAttention layer and choose parameters (i.e.num_heads and key_dim) as per your use case

Here is the example code to use MultiHeadAttention layer with num_heads=4 and key_dim=32

# Reshape output for transformer
model.add(tf.keras.layers.Reshape((-1, 128)))  
# Adjust the parameters as needed
model.add(tf.keras.layers.MultiHeadAttention(num_heads=4, key_dim=32))  

For more details please refer to tf.keras.layers.MultiHeadAttention  |  TensorFlow v2.16.1

Thank you!

I have already tried doing that

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(win_length, num_features), return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))

model.add(tf.keras.layers.LSTM(128, return_sequences=False))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))

model.add(RepeatVector(win_length))

model.add(tf.keras.layers.LSTM(128, input_shape=(win_length, num_features), return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))

model.add(tf.keras.layers.LSTM(128, return_sequences=False))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))


attention_layer = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=128)
model.add(attention_layer)

model.add(tf.keras.layers.Dense(1))

I am getting error MultiHeadAttention.call() missing 1 required positional argument: ‘value’ so I am pretty sure there is more to the story

@ociy,

Sorry for the confusion. You can use the functional API as shown below


# Define the input shape
input_shape = (win_length, num_features)
inputs = tf.keras.Input(shape=input_shape)

x = tf.keras.layers.LSTM(128, return_sequences=True)(inputs)
x = tf.keras.layers.LeakyReLU(alpha=0.5)(x)

x = tf.keras.layers.LSTM(128, return_sequences=False)(x)
x = tf.keras.layers.LeakyReLU(alpha=0.5)(x)
x = tf.keras.layers.Dropout(0.3)(x)

x = tf.keras.layers.RepeatVector(win_length)(x)

x = tf.keras.layers.LSTM(128, return_sequences=True)(x)
x = tf.keras.layers.LeakyReLU(alpha=0.5)(x)

x = tf.keras.layers.LSTM(128, return_sequences=False)(x)
x = tf.keras.layers.LeakyReLU(alpha=0.5)(x)
x = tf.keras.layers.Dropout(0.3)(x)

# MultiHeadAttention layer
attention_layer = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=128)
x = tf.reshape(x, (-1, 1, 2))
x = attention_layer(x, x, x)

outputs = tf.keras.layers.Dense(1)(x)

# Create the model
model = tf.keras.Model(inputs=inputs, outputs=outputs)

Thank you!