Trying to create a english story generator with BERT model and graph execution error came unwanted

There seems to be a problem during the execution of the TensorFlow graph, specifically in the node model/tf_distil_bert_model/distilbert/embeddings/Gather_1 .

I am using winows 10, tensorflow and keras both version 2.12.0.

This is the code:
‘’’
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertModel
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers.legacy import Adam

Set the path to the directory containing the text files

data_dir = “C:/Users/Shah/AppData/Local/Programs/Python/Python310/LAB/Project/Data”

Initialize lists to hold the text and label data

text_data =
label_data =

Loop through each file in the directory

for filename in os.listdir(data_dir):
# Read the contents of the file
with open(os.path.join(data_dir, filename), “r”) as f:
lines = f.readlines()
label = lines[0].strip().split()[0] # extract the label from the first column of the first line
text = “”.join(lines[1:]).strip()

# Append the text and label data to their respective lists
text_data.append(text)
label_data.append(label)

Create a Pandas dataframe from the text and label data

data = pd.DataFrame({“text”: text_data, “label”: label_data})

Split the data into training and validation sets

train_data, valid_data = train_test_split(data, test_size=0.2, random_state=42)

Print out the ‘label’ column of the ‘train_data’ DataFrame

print(train_data[‘label’])
print(data.head(10))

Initialize the tokenizer and the pre-trained model

tokenizer = DistilBertTokenizer.from_pretrained(‘distilbert-base-uncased’)
MAX_LENGTH = 2048
transformer_model = TFDistilBertModel.from_pretrained(‘distilbert-base-uncased’, max_length=MAX_LENGTH)

Tokenize the training and validation data

train_sequences = [tokenizer.encode(seq, add_special_tokens=True) for seq in train_data[‘text’]]
valid_sequences = [tokenizer.encode(seq, add_special_tokens=True) for seq in valid_data[‘text’]]

Pad the sequences to ensure they are all the same length

train_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_sequences, maxlen=MAX_LENGTH, padding=‘post’, truncating=‘post’, value=0)
valid_sequences = tf.keras.preprocessing.sequence.pad_sequences(valid_sequences, maxlen=MAX_LENGTH, padding=‘post’, truncating=‘post’, value=0)

Convert the sequences to tensors

train_inputs = tf.constant(train_sequences)
valid_inputs = tf.constant(valid_sequences)

Convert the labels to one-hot encoded format

num_classes = len(data[‘label’].unique())
train_labels = tf.one_hot(train_data[‘label’].astype(int), num_classes)
valid_labels = tf.one_hot(valid_data[‘label’].astype(‘int32’), num_classes)

print(“train_inputs shape:”, train_inputs.shape)
print(“train_labels shape:”, train_labels.shape)
print(“valid_inputs shape:”, valid_inputs.shape)
print(“valid_labels shape:”, valid_labels.shape)

print(“train_inputs type:”, type(train_inputs))
print(“train_labels type:”, type(train_labels))
print(“valid_inputs type:”, type(valid_inputs))
print(“valid_labels type:”, type(valid_labels))

Define the model architecture

input_layer = tf.keras.layers.Input(shape=(MAX_LENGTH,), dtype=‘int32’)
bert_layer = transformer_model(input_layer)[0]
output_layer = tf.keras.layers.Dense(units=num_classes, activation=‘softmax’)(bert_layer[:,0,:])
model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)

model.summary()

Define a callback function to monitor the validation loss and save the best model

checkpoint_filepath = ‘best_model.h5’
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor=‘val_loss’,
mode=‘min’,
save_best_only=True)

Compile the model

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss=‘categorical_crossentropy’, metrics=[‘accuracy’])

Train the model

epochs = 3
batch_size = 32
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])

Save the final version of the model

model.save(‘final_model.h5’)

Generate a story using the trained model

prompt = “Once upon a time”
max_length = 256
generated_story = [tokenizer.encode(prompt, add_special_tokens=True)]
for i in range(max_length):
input_sequence = tf.constant(generated_story[-128:])
predicted_label = model.predict(input_sequence)[0]
next_token_id = np.argmax(predicted_label)
if next_token_id == tokenizer.sep_token_id:
break
generated_story.append(next_token_id)
generated_text = tokenizer.decode
‘’’

This is the error message:

Traceback (most recent call last):
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\LAB\Project\lm2.py”, line 97, in
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\execute.py”, line 52, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:

Detected at node ‘model/tf_distil_bert_model/distilbert/embeddings/Gather_1’ defined at (most recent call last):
File “”, line 1, in
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\idlelib\run.py”, line 164, in main
ret = method(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\idlelib\run.py”, line 578, in runcode
exec(code, self.locals)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\LAB\Project\lm2.py”, line 97, in
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1685, in fit
tmp_logs = self.train_function(iterator)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1284, in train_function
return step_function(self, iterator)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1268, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1249, in run_step
outputs = model.train_step(data)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1050, in train_step
y_pred = self(x, training=True)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 558, in call
return super().call(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py”, line 512, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py”, line 669, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 558, in call
return super().call(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py”, line 558, in run_call_with_unpacked_inputs
if isinstance(main_input, (tf.Tensor, KerasTensor)) or main_input is None:
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 559, in call
outputs = self.distilbert(
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py”, line 558, in run_call_with_unpacked_inputs
if isinstance(main_input, (tf.Tensor, KerasTensor)) or main_input is None:
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 409, in call
embedding_output = self.embeddings(input_ids, inputs_embeds=inputs_embeds) # (bs, seq_length, dim)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 129, in call
position_embeds = tf.gather(params=self.position_embeddings, indices=position_ids)
Node: ‘model/tf_distil_bert_model/distilbert/embeddings/Gather_1’
indices[0,1984] = 1984 is not in [0, 512)
[[{{node model/tf_distil_bert_model/distilbert/embeddings/Gather_1}}]] [Op:__inference_train_function_20930]

Hi @Neo_TheOne,

Sorry for the delay in response.
This InvalidArgumentError: Graph execution error is due to input sequences exceeding DistilBERT’s maximum length of 512 tokens, as you have set MAX_LENGTH to 2048. To fix this,I suggest to change MAX_LENGTH to 512 and use tokenizer.batch_encode_plus for tokenization, which will handle padding and truncation automatically and make sure that your input tensors match the expected shape for the model.

Please check out this and let us know if it works.Thank You.