Getting loss as nan after some steps in 1st epoch only and the accuracy is around 0.50

class My_Custom_Generator(tf.keras.utils.Sequence) :
    def __init__(self, x_set, batch_size):
        self.x = x_set
        self.batch_size = batch_size
#         self.folder_path= folder_path

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        low = idx * self.batch_size
        # Cap upper bound at array length; the last batch may be smaller
        # if the total number of items is not a multiple of batch size.
        high = min(low + self.batch_size, len(self.x))
        batch_x = self.x[low:high]
    
        return fetch_batch(batch_x)
    
def fetch_batch(data):
    input_data = []
    ref_data = []
    labels = []
    for item in data:
        input_path, ref_path, label = item

        # Load and resize images without keeping the original high-res versions in memory
        with Image.open(input_path) as img:
            img = img.convert('L')  # Convert to grayscale
            input_img = np.array(img) / 255.0  # Normalize

        with Image.open(ref_path) as img:
            img = img.convert('L')  # Convert to grayscale
            ref_img = np.array(img) / 255.0  # Normalize

        # Append processed images and labels
        input_data.append(input_img)
        ref_data.append(ref_img)
        labels.append(label)

    input_data = np.array(input_data)
    ref_data = np.array(ref_data)
    labels = np.array(labels)

    # Release memory occupied by the original loaded images
    del data

    return ([input_data, ref_data], labels)
            

class SiameseNetwork:
    def __init__(self, model_path):
        self.loaded_model = tf.keras.models.load_model(model_path)
        self.encoder = Model(inputs=self.loaded_model.input, outputs=self.loaded_model.get_layer('code_layer').output)

    def build_shared_network(self):
        encoded_output = self.loaded_model.get_layer('code_layer').output
        flatten_layer = Flatten()(encoded_output)
        fc_layer1 = Dense(128, activation='relu')(flatten_layer)
        fc_layer2 = Dense(64, activation='relu')(fc_layer1)
        shared_network = Model(self.loaded_model.input, fc_layer2)
        return shared_network

    def create_siamese_model(self, shared_network):
        input_shape = (352, 288, 1)
        input_pair1 = Input(input_shape, name="input1")
        input_pair2 = Input(input_shape, name="input2")
        output_pair1 = shared_network(input_pair1)
        output_pair2 = shared_network(input_pair2)
        distance = tf.norm(output_pair1 - output_pair2, axis=1, keepdims=True)
        outputs = Dense(1, activation='sigmoid')(distance)
        siamese_model = Model([input_pair1, input_pair2], outputs)
        return siamese_model

    def train_siamese_model(self, siamese_model,epochs,itr):
        
        siamese_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        batch_size=32
        file_path="data/pos_neg_data/pos_neg_1st_set/train/pos_neg_1st_train_set"
        with open(file_path, "rb") as fp: 
            train_data = pickle.load(fp)        
        file_path="data/pos_neg_data/pos_neg_1st_set/val/pos_neg_1st_val_set"
        with open(file_path, "rb") as fp: 
            val_data = pickle.load(fp)        
        training_batch_generator = My_Custom_Generator(train_data, batch_size)
        validation_batch_generator = My_Custom_Generator(val_data, batch_size)

        siamese_model.fit(training_batch_generator, epochs=epochs,
                          validation_data=(validation_batch_generator),  # Validation data
                          callbacks=[csv_logger,tensorboard_callback,early_stopping, reduce_lr_on_plateau])
        return siamese_model
    

    def predict_similarity(self, siamese_model, reference_image, test_image):
        similarity_score = siamese_model.predict([
            np.expand_dims(reference_image, axis=0),
            np.expand_dims(test_image, axis=0)
        ])
        return similarity_score
    def model_summary(self,model):
        print(model.summary())


def main():

    model_path = 'autoencoder_model/itr4/model.h5'

    siamese_network = SiameseNetwork(model_path)
    shared_network = siamese_network.build_shared_network()
    siamese_network.model_summary(shared_network)
    siamese_model = siamese_network.create_siamese_model(shared_network)
    siamese_network.model_summary(siamese_model)
    epochs=5
    itr="itr1"
    siamese_model = siamese_network.train_siamese_model(siamese_model,epochs,itr)
    siamese_model.save("f'Results/Tensorboard/{itr}/siamese_model.h5")
    
if __name__ == "__main__": 
    main()

Hi @Aniket_Ingle, As per my knowledge i think you may have an issue with the input data. Please make sure that

  • the input data does not contain any Nan values in it.
  • all of the target values are valid.
  • the data is properly normalized.

Thank You.

I checked data , it is properly preprocessed
I have trained another model with same data so I guess that is not a problem

Hi @Aniket_Ingle, If possible could you please provide the sample data that you have used to train the model so that i can replicate and debug the issue. Thank You.