Hello, I’ve been having some trouble with an AlexNet model, whenever I start to train it an error interrumps the training, here is my code:
# Set the path to your dataset directory
train_data_dir = '/content/drive/MyDrive/Intel Image Classification Dataset/seg_train/seg_train'
validation_data_dir = '/content/drive/MyDrive/Intel Image Classification Dataset/balanced_test'
num_classes = 6
img_width, img_height = 224, 224 # AlexNet requires input images to be 224x224 pixels
# Define data generators for training and validation
train_datagen = ImageDataGenerator(
rescale=1.0/255.0,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
zoom_range=0.15,
horizontal_flip=True,
fill_mode='nearest',
brightness_range=[0.5, 1.7]
)
validation_datagen = ImageDataGenerator(rescale=1.0/255.0)
# Create the AlexNet model
model = Sequential()
# Layer 1
model.add(Conv2D(96, (11, 11), input_shape=(img_width, img_height, 3), strides=(4, 4), padding='valid'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
# Layer 2
model.add(Conv2D(256, (5, 5), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
# Layer 3
model.add(Conv2D(384, (3, 3), padding='same'))
model.add(Activation('relu'))
# Layer 4
model.add(Conv2D(384, (3, 3), padding='same'))
model.add(Activation('relu'))
# Layer 5
model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
# Flatten and fully connected layers
model.add(Flatten())
model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=SGD(learning_rate=0.001), metrics=['accuracy'])
# Create data generators for training and validation
batch_size = 32
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle = True, # shuffle the data, default is true but just to point it out
)
validation_generator = validation_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle = True, # shuffle the data, default is true but just to point it out
)
# Train the model
epochs = 100 # You can increase this for better performance
checkpoint = ModelCheckpoint("alexnet_image_classification_model.h5", monitor="val_accuracy", save_best_only=True, mode="max")
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
history = model.fit(
train_generator,
steps_per_epoch=train_generator.samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_generator.samples // batch_size,
callbacks=[checkpoint, early_stopping]
)
# Save the model
model.save('alexnet_image_classification_model.h5')
And this is the error:
UnknownError Traceback (most recent call last)
<ipython-input-6-ef09a962beef> in <cell line: 6>()
4 early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
5
----> 6 history = model.fit(
7 train_generator,
8 steps_per_epoch=train_generator.samples // batch_size,
1 frames
/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 for t in inputs
59 ]
---> 60 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
61 inputs, attrs, num_outputs)
62 except core._NotOkStatusException as e:
UnknownError: Graph execution error:
Detected at node PyFunc defined at (most recent call last):
<stack traces unavailable>
FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Intel Image Classification Dataset/seg_train/seg_train/sea/20041.jpg'
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/ops/script_ops.py", line 270, in __call__
ret = func(*args)
File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/data_adapter.py", line 917, in wrapped_generator
for data in generator_fn():
File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/data_adapter.py", line 1064, in generator_fn
yield x[i]
File "/usr/local/lib/python3.10/dist-packages/keras/src/preprocessing/image.py", line 116, in __getitem__
return self._get_batches_of_transformed_samples(index_array)
File "/usr/local/lib/python3.10/dist-packages/keras/src/preprocessing/image.py", line 370, in _get_batches_of_transformed_samples
img = image_utils.load_img(
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/image_utils.py", line 422, in load_img
with open(path, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Intel Image Classification Dataset/seg_train/seg_train/sea/20041.jpg'
[[{{node PyFunc}}]]
[[IteratorGetNext]] [Op:__inference_train_function_1386]
Sometimes the error pops up at the middle of the epoch 1, other times at the start of epoch 35, it happens randomly and I dont know what to do.