Following is the code that was run on tensorflow 2.8.0 on a balanced dataset of images containing 9 classes (about 1000 images per class). The training continued up to 30 epochs giving a validation F1 score of about 0.83 with similar validation accuracy values.
However when the same code is run on tensorflow versions 2.9, 2.11, 2.12 and 2.13 the model is not well trained and reaches a maximum validation F1 score of about 0.63 before early stopping kicks in. Could anyone point me to why this is happening?
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3
import keras.backend as K
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import os
import argparse
import logging
logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.INFO)
def f1_score(y_true, y_pred): # taken from old keras source code
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
recall = true_positives / (possible_positives + K.epsilon())
f1_val = 2 * (precision * recall) / (precision + recall + K.epsilon())
return f1_val
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--model_name",
required=True,
help="Name of the model")
parser.add_argument(
"--dataset",
required=False,
default="../../training_dataset",
help="Dataset directory name")
args = parser.parse_args()
model_name = args.model_name
model_num = model_name.split("_")[0][-1]
dataset_name = args.dataset
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
width_shift_range=0.9,
fill_mode='nearest',
validation_split=0.2)
logging.info("Loading the Training Dataset Generator")
train_generator = train_datagen.flow_from_directory(
args.dataset,
color_mode='rgb',
shuffle=True,
batch_size=16,
class_mode='categorical',
target_size=(135, 735),
subset='training')
val_generator = train_datagen.flow_from_directory(
args.dataset,
color_mode='rgb',
shuffle=True,
batch_size=16,
class_mode='categorical',
target_size=(135, 735),
subset='validation')
os.system("!wget --no-check-certificate \
https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_\
dim_ordering_tf_kernels_notop.h5 -O /tmp/inception_v3_weights_tf_d\
im_ordering_tf_kernels_notop.h5")
pre_trained_model = InceptionV3(
input_shape=(135, 735, 3),
include_top=False,
weights=None)
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_'+ 'kernels_notop.h5'
pre_trained_model.load_weights(local_weights_file)
for i, layer in enumerate(pre_trained_model.layers):
layer.trainable = True
last_layer = pre_trained_model.get_layer('mixed10')
last_output = last_layer.output
x = layers.GlobalAveragePooling2D()(last_output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(9, activation='softmax')(x)
model = Model(pre_trained_model.input, x)
logging.info("Compiling the model")
model.compile(
optimizer=Adam(learning_rate=1e-4,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-06),
loss='categorical_crossentropy',
metrics=[f1_score, "Accuracy"])
es = EarlyStopping(
monitor='val_f1_score',
mode='max',
verbose=1,
patience=10)
history = model.fit_generator(
generator=train_generator,
validation_data=val_generator,
steps_per_epoch=50,
epochs=100,
validation_steps=50,
verbose=2,
callbacks=es)
logging.info("Saving the model")
model.save('{}.h5'.format(model_name))