Sorry, I just started learning to build neural networks using tensorflow. The tensorflow version I am using is 2.3. I want to use a custom attention layer to associate the output of the encoding layer with another input, this is my main code
tf.config.experimental_run_functions_eagerly(True)
class InteractionLayer(tf.keras.Model):
def __init__(self, hidden_units, num_neighbors, step_input):
super(InteractionLayer, self).__init__()
self.hidden_units = hidden_units
self.step_input = step_input
def build(self, input_shape):
self.wq = tf.keras.layers.Dense(self.step_input, trainable=True, name='wq')
self.wk = tf.keras.layers.Dense(self.step_input, trainable=True, name='wk')
self.wv = tf.keras.layers.Dense(self.step_input, trainable=True, name='wv')
self.reshape = tf.keras.layers.Reshape(target_shape=(self.step_input, self.hidden_units \* 2),trainable=True)
self.reshape1 = tf.keras.layers.Reshape(target_shape=(self.step_input, 1, 2, self.hidden_units),trainable=True)
self.dense1 = tf.keras.layers.Dense(self.hidden_units \* 2, trainable=True)
self.reshape_weight = tf.keras.layers.Reshape(target_shape=(self.step_input, 1, self.step_input),trainable=True)
self.dense_weight = tf.keras.layers.Dense(2, trainable=True)
def call(self, hidden, risk, **kwargs):
hidden_reshape = self.reshape(hidden)
current_hidden = self.wq(hidden_reshape)
neighbor_hidden = self.wk(risk)
v_hidden = self.wv(hidden_reshape)
similarity_scores = tf.linalg.matmul(current_hidden, neighbor_hidden, transpose_b=True)
interaction_weights = tf.nn.softmax(similarity_scores)
fused_representation = tf.linalg.matmul(interaction_weights, v_hidden)
fused_representation_1 = self.dense1(fused_representation)
fused_representation_2 = self.reshape1(fused_representation_1)
attention_weights = self.reshape_weight(interaction_weights)
attention_weights_out = self.dense_weight(attention_weights)
return fused_representation_2, attention_weights_out
class MyLoss(tf.keras.losses.Loss):
def __init__(self, attention):
super(MyLoss, self).__init__()
self.risk = attention
def call(self, y_true, y_pred):
loss = tf.reduce_sum(tf.square(y_true - y_pred) * self.risk)
return loss
model.compile(optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=1.0),loss=MyLoss(myloss),experimental_run_tf_function=False
Sorry, I just started learning to build neural networks using tensorflow. The tensorflow version I am using is 2.3. I want to use a custom attention layer to associate the output of the encoding layer with another input, this is my main code
tf.config.experimental_run_functions_eagerly(True)
class InteractionLayer(tf.keras.Model):
def __init__(self, hidden_units, num_neighbors, step_input):
super(InteractionLayer, self).__init__()
self.hidden_units = hidden_units
self.step_input = step_input
def build(self, input_shape):
self.wq = tf.keras.layers.Dense(self.step_input, trainable=True, name='wq')
self.wk = tf.keras.layers.Dense(self.step_input, trainable=True, name='wk')
self.wv = tf.keras.layers.Dense(self.step_input, trainable=True, name='wv')
self.reshape = tf.keras.layers.Reshape(target_shape=(self.step_input, self.hidden_units \* 2),trainable=True)
self.reshape1 = tf.keras.layers.Reshape(target_shape=(self.step_input, 1, 2, self.hidden_units),trainable=True)
self.dense1 = tf.keras.layers.Dense(self.hidden_units \* 2, trainable=True)
self.reshape_weight = tf.keras.layers.Reshape(target_shape=(self.step_input, 1, self.step_input),trainable=True)
self.dense_weight = tf.keras.layers.Dense(2, trainable=True)
def call(self, hidden, risk, **kwargs):
hidden_reshape = self.reshape(hidden)
current_hidden = self.wq(hidden_reshape)
neighbor_hidden = self.wk(risk)
v_hidden = self.wv(hidden_reshape)
similarity_scores = tf.linalg.matmul(current_hidden, neighbor_hidden, transpose_b=True)
interaction_weights = tf.nn.softmax(similarity_scores)
fused_representation = tf.linalg.matmul(interaction_weights, v_hidden)
fused_representation_1 = self.dense1(fused_representation)
fused_representation_2 = self.reshape1(fused_representation_1)
attention_weights = self.reshape_weight(interaction_weights)
attention_weights_out = self.dense_weight(attention_weights)
return fused_representation_2, attention_weights_out
def Seq2Seq(hidden_units, step_input, feature, num_neighbors):
encoder_inputs = tf.keras.Input(shape=(step_input, 1, feature, 1), name="encode_input")
decoder_inputs = tf.keras.Input(shape=(step_input, 1, feature, 1), name="decode_input")
risk_input = tf.keras.Input(shape=(step_input, 6), name="risk_input")
encoder = Encoder(hidden_units, step_input, num_neighbors)
enc_outputs, enc_state_h, enc_state_c = encoder(encoder_inputs, risk_input)
interaction = InteractionLayer(hidden_units, num_neighbors, step_input)
interaction_output, attention_weights = interaction(enc_outputs, risk_input)
dec_states_inputs = [enc_state_h, enc_state_c]
decoder = Decoder(hidden_units, step_input, num_neighbors)
attention_output, dec_state_h, dec_state_c = decoder(interaction_output, decoder_inputs,dec_states_inputs)
conv2d_outputs = Conv2D(filters=1, kernel_size=(1, 2), activation='relu', padding='same',name="conv2d")(attention_output)
model = tf.keras.Model(inputs=[encoder_inputs, decoder_inputs, risk_input], outputs=conv2d_outputs)
return model, attention_weights
model, myloss = Seq2Seq(hidden_units, step_input, feature, Surrounding_Risk_Quantity)
class MyLoss(tf.keras.losses.Loss):
def __init__(self, attention):
super(MyLoss, self).__init__()
self.risk = attention
def call(self, y_true, y_pred):
loss = tf.reduce_sum(tf.square(y_true - y_pred) * self.risk)
return loss
model.compile(optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=1.0),loss=MyLoss(myloss),experimental_run_tf_function=False
)
history = model.fit(\[x_train_1, x_train_eval, x_train_risk\], y_train, batch_size=16, epochs=2,validation_data=(\[x_test_1, x_test_eval, x_test_risk\], y_test),validation_freq=1, callbacks=\[cp_callback\])
When I run this code a warning appears WARNING:tensorflow:Gradients do not exist for variables [‘interaction_layer/wq/kernel:0’, ‘interaction_layer/wq/bias:0’, ‘interaction_layer/wk/kernel:0’, ‘interaction_layer/wk/bias:0’, ‘interaction_layer/wv/kernel:0’, ‘interaction_layer/wv/bias:0’, ‘interaction_layer/dense/kernel:0’, ‘interaction_layer/dense/bias:0’, ‘interaction_layer/dense_1/kernel:0’, ‘interaction_layer/dense_1/bias:0’] when minimizing the loss.
Although it is a warning, I guess it will affect the accuracy of the model. I guess the error may be related to the reshape operation, so I tried to use ‘tf.debugging.assert_all_finite(hidden_reshape,“Tensor contains non-finite values.”)’ to detect whether its direction propagation is normal, and no error was reported.
I used tf.print to print the output of wq, wk and wv, and the output shows that their weights have values and the bias term is 0.
I have interaction_layer in my model.summary.
My question is 1. Is there any structural error in my custom attention mechanism? 2. How should I solve this error problem? 3. Is my custom loss function correct? If I want to add the weight of the attention mechanism to it, how should I define this loss function? I would appreciate it if you could give me some advice.