Hello everyone,
I’ve had an error for 1 week that I can’t solve. My problem is that when I train my model I get this:
Epoch 1/5
426/426 [==============================] - 8s 12ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 2/5
426/426 [==============================] - 4s 9ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 3/5
426/426 [==============================] - 3s 7ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 4/5
426/426 [==============================] - 2s 6ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 5/5
426/426 [==============================] - 2s 6ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
My images are of size (64,32,1) and I have to predict 17 classes. My images contain no nan and I’ve used a minmax to normalize my images (because they take values between 0 and 10000). I’ve also tried changing the learning rate but nothing changes. I’m in a bit of a bind. Do you know what I should do? Is the problem with the data, my normalization or the model itself (my images aren’t RGB, they’re just 1-dimensional)?
Strangely enough, my model.fit() works when I output a layer higher than 17 neurons. I no longer have this error, but it’s not what I want because I want to predict for 17 classes.
I’d be very grateful if you could help me. You can find my code below:
reference_df = pd.read_csv('/gdrive/MyDrive/HDA_project/reference.csv',names=['id_image','label_subject','label_posture'])
reference_df = reference_df.drop(index=reference_df.index[0], axis=0) #drop first column
reference_df = reference_df.set_index('id_image') #set index
reference_df.index.name = None #remove the name of the index
reference_df['label_subject'] = reference_df['label_subject'].astype(int)
reference_df['label_posture'] = reference_df['label_posture'].astype(int)
# 72 % train, 8% validation, 20% test
train_val_reference_df, test_reference_df = train_test_split(reference_df, train_size=0.8, stratify=reference_df['label_posture'], random_state=123)
train_reference_df, val_reference_df = train_test_split(train_val_reference_df, train_size=0.9, random_state=123)
def load_data(file_name, data_dir):
if isinstance(data_dir, bytes):
data_dir = data_dir.decode()
if isinstance(file_name, bytes):
file_name = file_name.decode()
# Load the image from the .txt file
file_mat = '/gdrive/MyDrive/HDA_project/' + data_dir + '/' + file_name + '.txt'
data = np.loadtxt(file_mat)
return data
def normalize_data(data):
min_val = np.min(data)
max_val = np.max(data)
data = (data - min_val) / (max_val - min_val)
return data
def load_and_preprocess_data(file_name, data_dir):
# Load data
data = load_data(file_name, data_dir)
# Normalize
data = normalize_data(data)
return data.astype(np.float32)
def create_dataset(reference_df, batch_size, shuffle, cache_file=None):
# Convert dataframe to lists
file_names = list(reference_df.index)
labels = reference_df['label_posture']
# Create a Dataset object
dataset = tf.data.Dataset.from_tensor_slices((file_names, labels))
# Map the load_and_preprocess_data function
py_func = lambda file_name, label: (tf.numpy_function(load_and_preprocess_data, [file_name, data_dir],
tf.float32), label)
dataset = dataset.map(py_func, num_parallel_calls=os.cpu_count())
# Cache dataset
if cache_file:
dataset = dataset.cache(cache_file)
# Shuffle
if shuffle:
dataset = dataset.shuffle(len(file_names))
# Repeat the dataset indefinitely
dataset = dataset.repeat()
# Correct input shape for the network
dataset = dataset.map(lambda data, label: (tf.reshape(data, [64,32,1]), label))
# Batch
dataset = dataset.batch(batch_size=batch_size)
# Prefetch
dataset = dataset.prefetch(buffer_size=1)
return dataset
batch_size = 32
train_dataset = create_dataset(train_reference_df,
batch_size=batch_size,
shuffle=True,
cache_file='train_cache')
val_dataset = create_dataset(val_reference_df,
batch_size=batch_size,
shuffle=False,
cache_file='val_cache')
test_dataset = create_dataset(test_reference_df,
batch_size=batch_size,
shuffle=False,
cache_file='test_cache')
train_steps = int(np.ceil(len(train_reference_df)/batch_size))
val_steps = int(np.ceil(len(val_reference_df)/batch_size))
test_steps = int(np.ceil(len(test_reference_df)/batch_size))
# FUNCTION: PostureModel
def PostureModel(input_shape):
"""
Implementation of the PostureModel
Arguments:
input_shape -- shape of the images of the dataset
Returns:
model -- a Model() instance in TensorFlow
"""
# Input placeholder as a tensor with shape input_shape
X_input = tf.keras.Input(input_shape)
# CONV -> Batch Normalization -> ReLU Block -> MAXPOOL applied to X
X = tf.keras.layers.Conv2D(16, (3, 3), strides=(1,1), padding='same', activation=None)(X_input)
X = tf.keras.layers.BatchNormalization(axis=-1)(X)
X = tf.keras.layers.Activation('relu')(X)
X = tf.keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same')(X)
# CONV -> Batch Normalization -> ReLU Block -> MAXPOOL applied to X
X = tf.keras.layers.Conv2D(32, (3, 3), strides=(1,1), padding='same', activation=None)(X)
X = tf.keras.layers.BatchNormalization(axis=-1)(X)
X = tf.keras.layers.Activation('relu')(X)
X = tf.keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same')(X)
# FLATTEN THE TENSOR
X = tf.keras.layers.Flatten()(X)
# 2 FULLYCONNECTED (DENSE) LAYERS WITH RELU ACTIVATION AND 256 OUTPUT NEURONS
X = tf.keras.layers.Dense(256, activation='relu')(X)
X = tf.keras.layers.Dense(256, activation='relu')(X)
# DROPOUT LAYER (DISCARD PROBABILITY 0.4)
X = tf.keras.layers.Dropout(0.4)(X)
# DENSE LAYER WITHOUT ACTIVATION AND 17 OUTPUT NEURONS
X = tf.keras.layers.Dense(17, activation='softmax')(X)
model = tf.keras.Model(inputs = X_input, outputs = X, name='PostureModel')
return model
# Create and compile the network model
input_shape = (64, 32, 1)
network_model = PostureModel(input_shape)
network_model.compile(optimizer = 'adam', loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
# Train the model
num_epochs = 5
early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
# Fit the model
history = network_model.fit(train_dataset,
epochs=num_epochs,
steps_per_epoch=train_steps,
validation_data=val_dataset,
validation_steps=val_steps,
callbacks=[early_stop_callback])
# Save the model
network_model.save('my_model.h5')