Validation loss keeps increasing even though accuracy levels out

As the title suggests the validation loss continues to increase even though the validation accuracy levels out. I checked the MSE and RMSE and they level off as well. If my loss is increasing then shouldn’t my accuracy be decreasing? I am using the CIFAR-10 data for classification. I will copy and paste all of the code leading up to the issue, however, some of it is not relevant for the issue at hand. Any help or suggestions would be appreciated.

import os

"""will force tensorflow on cpu only, even if gpu is available"""
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
# manipulation imports
import pickle
import pandas as pd
import numpy as np

# Scikit-Learn imports
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

# Plotting imports
import seaborn as sns
import plotly.express as px
from plotly.io import write_image
import plotly.graph_objects as go
import matplotlib.pyplot as plt

# Tensorflow & Keras imports
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras import backend as K
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

devices = tf.config.list_physical_devices()
print("Physical devices:\n", devices)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if tf.config.experimental.list_physical_devices('GPU'):
    print("TensorFlow will run on GPU.")
else:
    print("TensorFlow will run on CPU.")
# List of files to be unpickled
files = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5", "test_batch"]

data_batches = []

for file in files:
    with open(os.path.join("../data_final", file), 'rb') as fo:
        data_batch = pickle.load(fo, encoding='latin1')
        data_batches.append(data_batch)
# create an empty list to hold dataframes
dfs = []

# Iterate over all data_batches
for data_batch in data_batches:
    # Create temporary DataFrame from each batch
    temp_df = pd.DataFrame(data_batch['data'])

    # Assuming 'labels' is the key for the targets/labels in your batch data
    temp_df['label'] = data_batch['labels']

    # add the temporary dataframe to the list
    dfs.append(temp_df)

# concatenate all dataframes in the list into one dataframe
X_train = pd.concat(dfs, ignore_index=True)
X_test = X_train[-10000:].copy().reset_index(drop=True)
X_train = X_train[:-10000]
y_train = X_train.pop('label')
y_test = X_test.pop('label')
# scaling the data for the MLP 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train_corrMatrix = np.corrcoef(X_train.T)
# Compute the eigenvalues and eigenvectors of the correlation matrix new_df_corr
eigvals, eigvects = np.linalg.eig(X_train_corrMatrix)

# Initialize an empty list to store the cumulative sum of eigenvalues
eig_part_sum_list = []

# Initialize an empty list to store the Percentage of Variance Explained (PVE)
PVE_list = []

# Initialize a counter h at 0. This represents the eigenvalue location in the list eigvals
b = 0

# Initialize a variable to store the cumulative sum of eigenvalues
eig_part_sum = 0

# Initialize another counter q at 0. This will be used to print the smallest integer s such that PVE(m) is greater than or equal to 95%.
q = 0

# Loop over each eigenvalue
for eig in eigvals:

    # Update the cumulative sum of eigenvalues by adding the current eigenvalue
    eig_part_sum += eig

    # Append the updated cumulative sum to the list
    eig_part_sum_list.append(eig_part_sum)

    # Calculate the Percentage of Variance Explained (PVE) 
    PVE = 100 * (eig_part_sum / len(eigvals))

    # Append the PVE to the list
    PVE_list.append(PVE)

    # If PVE is greater than 95
    if PVE > 95:

        # If q is 0 (i.e., this is the first time we get PVE > 95)
        if q == 0:

            # Print m (the smallest integer such that PVE(h) is greater than or equal to 95%) and PVE
            print(f'The smallest integer h such that PVE(h) is greater than or equal to 95% is h = {b} with PVE = {PVE}.  This value of h={b} is saved and used in the MLP hidden layer units assignment')
            h = b

            # Increment q
            q += 1

    # Increment m for the next iteration
    b += 1
len(eigvals)
# determine the number of classes based on unique values in y_train, t_test
num_classes = len(np.unique(y_train)) 
classes = np.unique(y_train)

# one hot encoding the y_train, and y_test sets.
y_train_ohe = to_categorical(y_train, num_classes)
y_test_ohe = to_categorical(y_test, num_classes)
mse = MeanSquaredError()

# Define RMSE
def rmse(y_true, y_pred):
    return K.sqrt(mse(y_true, y_pred))
# dimension of input to layer 1
input_dim_cols = X_train.shape[1]

# Creating an instance of a Sequential model
model_0 = Sequential()

# add the input layer, based on the dimension of the X_train matrix
model_0.add(Input(shape=(input_dim_cols,)))

# add the hidden layer, h units corresponds to the h found from PEV(h)
model_0.add(Dense(units=h, activation='relu'))

# add the output layer, 10 units for 10 classes
model_0.add(Dense(units=10, activation='softmax'))

# Compiling the model: setting 'adam' as the optimizer, 'categorical_crossentropy' as the loss function, 'accuracy' as the predefined evaluating metric along with the user defined mse metric
model_0.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', mse, rmse])

history_0 = model_0.fit(X_train,y_train_ohe, epochs=500, validation_data=(X_test, y_test_ohe), batch_size=250)
# Plot training & validation accuracy values
plt.plot(history_0.history['accuracy'], color='blue')
plt.plot(history_0.history['val_accuracy'], color='red')
plt.title('Model accuracy with batch_size=250')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.savefig('../images/final/250_batch_size.png')
plt.show()

# Plot training & validation MSE
plt.plot(history_0.history['loss'], color='blue')
plt.plot(history_0.history['val_loss'], color='red')
plt.title('Training & Validation Cross Entropy Loss')
plt.ylabel('Cross Entropy Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.savefig('../images/final/cross_entropy_loss.png')
plt.show()

Hi @evanhowington, You mentioned that you have tried with different loss functions like MSE and RMSE but while training the model on cifar-10 dataset it will come under the classification task in that case the loss functions need to use was sparse_categorical_crossentropy/categorical_crossentropy depends upon the labels.

This is called over fitting, you can use early stopping, it will stop training when a monitored metric has stopped improving. Thank You.

1 Like