As the title suggests the validation loss continues to increase even though the validation accuracy levels out. I checked the MSE and RMSE and they level off as well. If my loss is increasing then shouldn’t my accuracy be decreasing? I am using the CIFAR-10 data for classification. I will copy and paste all of the code leading up to the issue, however, some of it is not relevant for the issue at hand. Any help or suggestions would be appreciated.
import os
"""will force tensorflow on cpu only, even if gpu is available"""
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
# manipulation imports
import pickle
import pandas as pd
import numpy as np
# Scikit-Learn imports
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
# Plotting imports
import seaborn as sns
import plotly.express as px
from plotly.io import write_image
import plotly.graph_objects as go
import matplotlib.pyplot as plt
# Tensorflow & Keras imports
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras import backend as K
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
devices = tf.config.list_physical_devices()
print("Physical devices:\n", devices)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if tf.config.experimental.list_physical_devices('GPU'):
print("TensorFlow will run on GPU.")
else:
print("TensorFlow will run on CPU.")
# List of files to be unpickled
files = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5", "test_batch"]
data_batches = []
for file in files:
with open(os.path.join("../data_final", file), 'rb') as fo:
data_batch = pickle.load(fo, encoding='latin1')
data_batches.append(data_batch)
# create an empty list to hold dataframes
dfs = []
# Iterate over all data_batches
for data_batch in data_batches:
# Create temporary DataFrame from each batch
temp_df = pd.DataFrame(data_batch['data'])
# Assuming 'labels' is the key for the targets/labels in your batch data
temp_df['label'] = data_batch['labels']
# add the temporary dataframe to the list
dfs.append(temp_df)
# concatenate all dataframes in the list into one dataframe
X_train = pd.concat(dfs, ignore_index=True)
X_test = X_train[-10000:].copy().reset_index(drop=True)
X_train = X_train[:-10000]
y_train = X_train.pop('label')
y_test = X_test.pop('label')
# scaling the data for the MLP
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train_corrMatrix = np.corrcoef(X_train.T)
# Compute the eigenvalues and eigenvectors of the correlation matrix new_df_corr
eigvals, eigvects = np.linalg.eig(X_train_corrMatrix)
# Initialize an empty list to store the cumulative sum of eigenvalues
eig_part_sum_list = []
# Initialize an empty list to store the Percentage of Variance Explained (PVE)
PVE_list = []
# Initialize a counter h at 0. This represents the eigenvalue location in the list eigvals
b = 0
# Initialize a variable to store the cumulative sum of eigenvalues
eig_part_sum = 0
# Initialize another counter q at 0. This will be used to print the smallest integer s such that PVE(m) is greater than or equal to 95%.
q = 0
# Loop over each eigenvalue
for eig in eigvals:
# Update the cumulative sum of eigenvalues by adding the current eigenvalue
eig_part_sum += eig
# Append the updated cumulative sum to the list
eig_part_sum_list.append(eig_part_sum)
# Calculate the Percentage of Variance Explained (PVE)
PVE = 100 * (eig_part_sum / len(eigvals))
# Append the PVE to the list
PVE_list.append(PVE)
# If PVE is greater than 95
if PVE > 95:
# If q is 0 (i.e., this is the first time we get PVE > 95)
if q == 0:
# Print m (the smallest integer such that PVE(h) is greater than or equal to 95%) and PVE
print(f'The smallest integer h such that PVE(h) is greater than or equal to 95% is h = {b} with PVE = {PVE}. This value of h={b} is saved and used in the MLP hidden layer units assignment')
h = b
# Increment q
q += 1
# Increment m for the next iteration
b += 1
len(eigvals)
# determine the number of classes based on unique values in y_train, t_test
num_classes = len(np.unique(y_train))
classes = np.unique(y_train)
# one hot encoding the y_train, and y_test sets.
y_train_ohe = to_categorical(y_train, num_classes)
y_test_ohe = to_categorical(y_test, num_classes)
mse = MeanSquaredError()
# Define RMSE
def rmse(y_true, y_pred):
return K.sqrt(mse(y_true, y_pred))
# dimension of input to layer 1
input_dim_cols = X_train.shape[1]
# Creating an instance of a Sequential model
model_0 = Sequential()
# add the input layer, based on the dimension of the X_train matrix
model_0.add(Input(shape=(input_dim_cols,)))
# add the hidden layer, h units corresponds to the h found from PEV(h)
model_0.add(Dense(units=h, activation='relu'))
# add the output layer, 10 units for 10 classes
model_0.add(Dense(units=10, activation='softmax'))
# Compiling the model: setting 'adam' as the optimizer, 'categorical_crossentropy' as the loss function, 'accuracy' as the predefined evaluating metric along with the user defined mse metric
model_0.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', mse, rmse])
history_0 = model_0.fit(X_train,y_train_ohe, epochs=500, validation_data=(X_test, y_test_ohe), batch_size=250)
# Plot training & validation accuracy values
plt.plot(history_0.history['accuracy'], color='blue')
plt.plot(history_0.history['val_accuracy'], color='red')
plt.title('Model accuracy with batch_size=250')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.savefig('../images/final/250_batch_size.png')
plt.show()
# Plot training & validation MSE
plt.plot(history_0.history['loss'], color='blue')
plt.plot(history_0.history['val_loss'], color='red')
plt.title('Training & Validation Cross Entropy Loss')
plt.ylabel('Cross Entropy Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.savefig('../images/final/cross_entropy_loss.png')
plt.show()