This is a program that generates captions for images and then updates the weights of the model based on the cross entropy loss. While updating the weights of the model, I come across this error and cannot figure out the root cause. Please guide me to fix this.
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
def custom_loss(y_true, y_pred, feedback):
# Compute cross-entropy loss
cross_entropy_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
# Compute negative feedback loss
negative_feedback_loss = tf.reduce_mean(-1 * np.exp(feedback))
# Combine both losses with a weighted sum
total_loss = cross_entropy_loss + negative_feedback_loss
return total_loss
# Define loss function and optimizer
loss_fn = custom_loss
optimizer = Adam(learning_rate=0.001)
# Update model parameters using RL algorithm
for img in os.listdir(dataset_images): # Number of RL iterations
RL_learning_loop += 1
if RL_learning_loop < 10:
image_path = dataset_images + "/" + img
image = Image.open(image_path)
#image = image.resize(300,300)
image.show()
photo = extract_features(image_path, xception_model)
in_text = 'start'
for i in range(max_length):
sequence = tokenizer.texts_to_sequences([in_text])[0]
sequence = pad_sequences([sequence], maxlen=max_length)
#preds = model.predict([photo,sequence], verbose=0)
preds = model([photo,sequence])
next_index = sample_pred(preds, temperature=0.5) # introduce randomness with temperature=0.5
next_word = word_for_id(next_index, tokenizer)
if next_word is None:
break
in_text += ' ' + next_word
if next_word == 'end':
break
print("Generated caption:", in_text)
feedback = float(input("Rate the quality of the caption (0-1): "))
# Compute loss between predicted and ground-truth caption
true_caption = get_true_caption(image_path, cln_descriptions) # function to get ground-truth caption
true_sequence = tokenizer.texts_to_sequences([true_caption])[0]
true_sequence = pad_sequences([true_sequence], maxlen=max_length)
y_true = np.zeros((1, max_length, len(tokenizer.word_index) + 1))
for i, word_index in enumerate(true_sequence[0]):
y_true[0, i, word_index] = 1
y_pred = np.zeros((1, max_length, len(tokenizer.word_index) + 1))
for i, word_index in enumerate(sequence[0]):
y_pred[0, i, word_index] = 1
loss = loss_fn(y_true, y_pred, feedback)
loss = tf.reduce_mean(loss)
print("Loss: ",loss)
trainable_vars = []
for var in model.trainable_variables:
if 'embedding' not in var.name and 'conv2d' not in var.name:
trainable_vars.append(var)
grads = tape.gradient(loss, trainable_vars)
print("Gradient",grads)
print("**************************")
optimizer.apply_gradients(zip(grads, trainable_vars))
# Update Q-table based on feedback
for i in range(len(in_text.split())):
word = in_text.split()[i]
action = tokenizer.word_index[word]
if i == len(in_text.split())-1:
next_max_q_value = 0
else:
next_word = in_text.split()[i+1]
next_action = tokenizer.word_index[next_word]
next_max_q_value = np.max(q_table[i+1][next_action])
q_table[i][action] += alpha * (feedback + gamma * next_max_q_value - q_table[i][action])
# Save updated model and Q-table
model.save('models/model_rl.h5')
dump(q_table, open("q_table.p", "wb"))
The gradient is being calculated as
Gradient [None, None, None, None, None, None, None, None, None, None, None, None, None]
and I get the following error.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-114-6d5266f12187> in <cell line: 18>()
64 print("Gradient",grads)
65 print("**************************")
---> 66 optimizer.apply_gradients(zip(grads, trainable_vars))
67
68 # Update model weights based on loss
3 frames
/usr/local/lib/python3.9/dist-packages/keras/optimizers/utils.py in filter_empty_gradients(grads_and_vars)
75 if not filtered:
76 variable = ([v.name for _, v in grads_and_vars],)
---> 77 raise ValueError(
78 f"No gradients provided for any variable: {variable}. "
79 f"Provided `grads_and_vars` is {grads_and_vars}."
ValueError: No gradients provided for any variable: (['image_layer_1/kernel:0', 'image_layer_1/bias:0', 'LSTM_layer_1/lstm_cell_8/kernel:0', 'LSTM_layer_1/lstm_cell_8/recurrent_kernel:0', 'LSTM_layer_1/lstm_cell_8/bias:0', 'image_layer_2/kernel:0', 'image_layer_2/bias:0', 'LSTM_layer_2/kernel:0', 'LSTM_layer_2/bias:0', 'dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'image_layer_1/kernel:0' shape=(2048, 256) dtype=float32, numpy=
array([[-0.03491995, -0.04523179, -0.00278588, ..., 0.02073344,