Getting loss as nan after some steps in 1st epoch only and the accuracy is around 0.50

Aniket_Ingle · November 1, 2023, 3:59am

class My_Custom_Generator(tf.keras.utils.Sequence) :
    def __init__(self, x_set, batch_size):
        self.x = x_set
        self.batch_size = batch_size
#         self.folder_path= folder_path

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        low = idx * self.batch_size
        # Cap upper bound at array length; the last batch may be smaller
        # if the total number of items is not a multiple of batch size.
        high = min(low + self.batch_size, len(self.x))
        batch_x = self.x[low:high]
    
        return fetch_batch(batch_x)
    
def fetch_batch(data):
    input_data = []
    ref_data = []
    labels = []
    for item in data:
        input_path, ref_path, label = item

        # Load and resize images without keeping the original high-res versions in memory
        with Image.open(input_path) as img:
            img = img.convert('L')  # Convert to grayscale
            input_img = np.array(img) / 255.0  # Normalize

        with Image.open(ref_path) as img:
            img = img.convert('L')  # Convert to grayscale
            ref_img = np.array(img) / 255.0  # Normalize

        # Append processed images and labels
        input_data.append(input_img)
        ref_data.append(ref_img)
        labels.append(label)

    input_data = np.array(input_data)
    ref_data = np.array(ref_data)
    labels = np.array(labels)

    # Release memory occupied by the original loaded images
    del data

    return ([input_data, ref_data], labels)
            

class SiameseNetwork:
    def __init__(self, model_path):
        self.loaded_model = tf.keras.models.load_model(model_path)
        self.encoder = Model(inputs=self.loaded_model.input, outputs=self.loaded_model.get_layer('code_layer').output)

    def build_shared_network(self):
        encoded_output = self.loaded_model.get_layer('code_layer').output
        flatten_layer = Flatten()(encoded_output)
        fc_layer1 = Dense(128, activation='relu')(flatten_layer)
        fc_layer2 = Dense(64, activation='relu')(fc_layer1)
        shared_network = Model(self.loaded_model.input, fc_layer2)
        return shared_network

    def create_siamese_model(self, shared_network):
        input_shape = (352, 288, 1)
        input_pair1 = Input(input_shape, name="input1")
        input_pair2 = Input(input_shape, name="input2")
        output_pair1 = shared_network(input_pair1)
        output_pair2 = shared_network(input_pair2)
        distance = tf.norm(output_pair1 - output_pair2, axis=1, keepdims=True)
        outputs = Dense(1, activation='sigmoid')(distance)
        siamese_model = Model([input_pair1, input_pair2], outputs)
        return siamese_model

    def train_siamese_model(self, siamese_model,epochs,itr):
        
        siamese_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        batch_size=32
        file_path="data/pos_neg_data/pos_neg_1st_set/train/pos_neg_1st_train_set"
        with open(file_path, "rb") as fp: 
            train_data = pickle.load(fp)        
        file_path="data/pos_neg_data/pos_neg_1st_set/val/pos_neg_1st_val_set"
        with open(file_path, "rb") as fp: 
            val_data = pickle.load(fp)        
        training_batch_generator = My_Custom_Generator(train_data, batch_size)
        validation_batch_generator = My_Custom_Generator(val_data, batch_size)

        siamese_model.fit(training_batch_generator, epochs=epochs,
                          validation_data=(validation_batch_generator),  # Validation data
                          callbacks=[csv_logger,tensorboard_callback,early_stopping, reduce_lr_on_plateau])
        return siamese_model
    

    def predict_similarity(self, siamese_model, reference_image, test_image):
        similarity_score = siamese_model.predict([
            np.expand_dims(reference_image, axis=0),
            np.expand_dims(test_image, axis=0)
        ])
        return similarity_score
    def model_summary(self,model):
        print(model.summary())


def main():

    model_path = 'autoencoder_model/itr4/model.h5'

    siamese_network = SiameseNetwork(model_path)
    shared_network = siamese_network.build_shared_network()
    siamese_network.model_summary(shared_network)
    siamese_model = siamese_network.create_siamese_model(shared_network)
    siamese_network.model_summary(siamese_model)
    epochs=5
    itr="itr1"
    siamese_model = siamese_network.train_siamese_model(siamese_model,epochs,itr)
    siamese_model.save("f'Results/Tensorboard/{itr}/siamese_model.h5")
    
if __name__ == "__main__": 
    main()

Kiran_Sai_Ramineni · November 1, 2023, 9:02am

Hi @Aniket_Ingle, As per my knowledge i think you may have an issue with the input data. Please make sure that

the input data does not contain any Nan values in it.
all of the target values are valid.
the data is properly normalized.

Thank You.

Aniket_Ingle · November 1, 2023, 9:34am

I checked data , it is properly preprocessed
I have trained another model with same data so I guess that is not a problem

Kiran_Sai_Ramineni · November 3, 2023, 6:46am

Hi @Aniket_Ingle, If possible could you please provide the sample data that you have used to train the model so that i can replicate and debug the issue. Thank You.

Topic		Replies	Views
Loss become nan after some epochs General Discussion datasets , keras , help_request	5	4168	February 13, 2023
Help - Triplett Loss not working properly General Discussion model-code , tfimage , keras-documentation	0	310	January 8, 2024
Loss = nan accuracy = 0 when model.fit() General Discussion models , accuracy	2	716	December 6, 2023
Getting NaN for loss HELP! General Discussion models , keras , getting_started , help_request	1	1573	April 18, 2022
Zero gradients problem for gradCAM in Siamese Network General Discussion models , keras , help_request	0	604	September 4, 2021

Getting loss as nan after some steps in 1st epoch only and the accuracy is around 0.50

Related topics