There seems to be a problem during the execution of the TensorFlow graph, specifically in the node model/tf_distil_bert_model/distilbert/embeddings/Gather_1
.
I am using winows 10, tensorflow and keras both version 2.12.0.
This is the code:
‘’’
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertModel
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers.legacy import Adam
Set the path to the directory containing the text files
data_dir = “C:/Users/Shah/AppData/Local/Programs/Python/Python310/LAB/Project/Data”
Initialize lists to hold the text and label data
text_data =
label_data =
Loop through each file in the directory
for filename in os.listdir(data_dir):
# Read the contents of the file
with open(os.path.join(data_dir, filename), “r”) as f:
lines = f.readlines()
label = lines[0].strip().split()[0] # extract the label from the first column of the first line
text = “”.join(lines[1:]).strip()
# Append the text and label data to their respective lists
text_data.append(text)
label_data.append(label)
Create a Pandas dataframe from the text and label data
data = pd.DataFrame({“text”: text_data, “label”: label_data})
Split the data into training and validation sets
train_data, valid_data = train_test_split(data, test_size=0.2, random_state=42)
Print out the ‘label’ column of the ‘train_data’ DataFrame
print(train_data[‘label’])
print(data.head(10))
Initialize the tokenizer and the pre-trained model
tokenizer = DistilBertTokenizer.from_pretrained(‘distilbert-base-uncased’)
MAX_LENGTH = 2048
transformer_model = TFDistilBertModel.from_pretrained(‘distilbert-base-uncased’, max_length=MAX_LENGTH)
Tokenize the training and validation data
train_sequences = [tokenizer.encode(seq, add_special_tokens=True) for seq in train_data[‘text’]]
valid_sequences = [tokenizer.encode(seq, add_special_tokens=True) for seq in valid_data[‘text’]]
Pad the sequences to ensure they are all the same length
train_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_sequences, maxlen=MAX_LENGTH, padding=‘post’, truncating=‘post’, value=0)
valid_sequences = tf.keras.preprocessing.sequence.pad_sequences(valid_sequences, maxlen=MAX_LENGTH, padding=‘post’, truncating=‘post’, value=0)
Convert the sequences to tensors
train_inputs = tf.constant(train_sequences)
valid_inputs = tf.constant(valid_sequences)
Convert the labels to one-hot encoded format
num_classes = len(data[‘label’].unique())
train_labels = tf.one_hot(train_data[‘label’].astype(int), num_classes)
valid_labels = tf.one_hot(valid_data[‘label’].astype(‘int32’), num_classes)
print(“train_inputs shape:”, train_inputs.shape)
print(“train_labels shape:”, train_labels.shape)
print(“valid_inputs shape:”, valid_inputs.shape)
print(“valid_labels shape:”, valid_labels.shape)
print(“train_inputs type:”, type(train_inputs))
print(“train_labels type:”, type(train_labels))
print(“valid_inputs type:”, type(valid_inputs))
print(“valid_labels type:”, type(valid_labels))
Define the model architecture
input_layer = tf.keras.layers.Input(shape=(MAX_LENGTH,), dtype=‘int32’)
bert_layer = transformer_model(input_layer)[0]
output_layer = tf.keras.layers.Dense(units=num_classes, activation=‘softmax’)(bert_layer[:,0,:])
model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
model.summary()
Define a callback function to monitor the validation loss and save the best model
checkpoint_filepath = ‘best_model.h5’
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor=‘val_loss’,
mode=‘min’,
save_best_only=True)
Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss=‘categorical_crossentropy’, metrics=[‘accuracy’])
Train the model
epochs = 3
batch_size = 32
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
Save the final version of the model
model.save(‘final_model.h5’)
Generate a story using the trained model
prompt = “Once upon a time”
max_length = 256
generated_story = [tokenizer.encode(prompt, add_special_tokens=True)]
for i in range(max_length):
input_sequence = tf.constant(generated_story[-128:])
predicted_label = model.predict(input_sequence)[0]
next_token_id = np.argmax(predicted_label)
if next_token_id == tokenizer.sep_token_id:
break
generated_story.append(next_token_id)
generated_text = tokenizer.decode
‘’’
This is the error message:
Traceback (most recent call last):
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\LAB\Project\lm2.py”, line 97, in
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\execute.py”, line 52, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
Detected at node ‘model/tf_distil_bert_model/distilbert/embeddings/Gather_1’ defined at (most recent call last):
File “”, line 1, in
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\idlelib\run.py”, line 164, in main
ret = method(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\idlelib\run.py”, line 578, in runcode
exec(code, self.locals)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\LAB\Project\lm2.py”, line 97, in
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1685, in fit
tmp_logs = self.train_function(iterator)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1284, in train_function
return step_function(self, iterator)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1268, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1249, in run_step
outputs = model.train_step(data)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1050, in train_step
y_pred = self(x, training=True)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 558, in call
return super().call(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py”, line 512, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py”, line 669, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 558, in call
return super().call(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py”, line 558, in run_call_with_unpacked_inputs
if isinstance(main_input, (tf.Tensor, KerasTensor)) or main_input is None:
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 559, in call
outputs = self.distilbert(
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py”, line 558, in run_call_with_unpacked_inputs
if isinstance(main_input, (tf.Tensor, KerasTensor)) or main_input is None:
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 409, in call
embedding_output = self.embeddings(input_ids, inputs_embeds=inputs_embeds) # (bs, seq_length, dim)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 129, in call
position_embeds = tf.gather(params=self.position_embeddings, indices=position_ids)
Node: ‘model/tf_distil_bert_model/distilbert/embeddings/Gather_1’
indices[0,1984] = 1984 is not in [0, 512)
[[{{node model/tf_distil_bert_model/distilbert/embeddings/Gather_1}}]] [Op:__inference_train_function_20930]