Hello, i am making a siamese neural network with triplet loss using keras, and have encountered an ood problem. I tried saving my history twice: once in a callback (saved as a dictionary), and once after training with model.fit(), using pickle (saved as a history object). In both instances, the code runs smoothly, but afterwards the saved files are only several hundred bytes and when I plot them with matplotlib, the plot is empty.
Here’s the callback I passed to model.fit():
class MetricsCallback(tensorflow.keras.callbacks.Callback):
def __init__(self, train_data, validation_data, embedding_model, folder_path, file_name):
super(MetricsCallback, self).__init__()
self.validation_data = validation_data
self.train_data = train_data
self.history={}
self.embedding_model=embedding_model
self.file_name = file_name
self.folder_path = folder_path
def calc_metrics(self ,dataset):
#for accuracy
y_true_acc=[]
y_pred=[]
#for auc-roc and eer
y_true=[]
y_scores=[]
for triplet in dataset: #dataset is train_data or validation_data
anchor, positive, negative = triplet #get the anchor, posotive and negative tensors from each triplet tuple
a_emb=embedding_model(np.expand_dims(anchor, axis=0)) #get the embeddings for each
p_emb=embedding_model(np.expand_dims(positive, axis=0))
n_emb=embedding_model(np.expand_dims(negative, axis=0))
dist_p = np.linalg.norm(a_emb - p_emb) #calculate distances
dist_n = np.linalg.norm(a_emb - n_emb)
#for triplet acc:
y_true_acc. append(1) #the true label is always 1
if dist_p +alpha<dist_n : #accurate
y_pred.append(1)
else:
y_pred.append(0)
#for auc-roc and eer:
y_true.append(0) #for a-p
y_true.append(1) #for a-n
y_scores.append(1-dist_p)#if it's small like we want, it gets a high score
y_scores.append(dist_n)# if it's big like we want, it gets a high score
#compute acc:
accuracy = accuracy_score(y_true_acc, y_pred)
#compute auc-roc and eer:
fpr, tpr, thresholds = roc_curve(y_true, y_scores)
auc_roc = auc(fpr, tpr)
eer = 1 - max(tpr - fpr)
return accuracy, auc_roc, eer, fpr, tpr
def on_epoch_end(self, epoch, logs=None):
# Compute metrics on train data
loss_train = logs.get('loss')
accuracy_train , auc_roc_train, eer_train, fpr_train, tpr_train= self.calc_metrics(self.train_data)
self.history['train_loss'] = loss_train
self.history['train_accuracy'] = accuracy_train
self.history['train_auc_roc'] = auc_roc_train
self.history['train_eer'] = eer_train
self.history['train_fpr'] = fpr_train
self.history['train_tpr'] = tpr_train
# Log the metrics
print(f'Epoch {epoch + 1} - Train Loss: {loss_train}, Accuracy: {accuracy_train}, AUC-ROC: {auc_roc_train}, EER: {eer_train}')
# Compute metrics on validation data
loss_val = logs.get('val_loss')
accuracy_val, auc_roc_val, eer_val, fpr_val, tpr_val= self.calc_metrics(self.validation_data)
self.history['val_loss'] = loss_val
self.history['val_accuracy'] = accuracy_val
self.history['val_auc_roc'] = auc_roc_val
self.history['val_eer'] = eer_val
self.history['val_fpr'] = fpr_val
self.history['val_tpr'] = tpr_val
# Log the metrics
print(f'Epoch {epoch + 1} - Validation Loss: {loss_val}, Accuracy: {accuracy_val}, AUC-ROC: {auc_roc_val}, EER: {eer_val}')
# Save the metric history to Google Drive
epoch_number = epoch + 1 # Epoch starts from 0, so we add 1 to get the actual epoch number
file_name_with_epoch = f"{self.file_name}_epoch_{epoch_number}.pkl"
file_path = os.path.join(self.folder_path, file_name_with_epoch)
with open(file_path, 'wb') as file_pi:
pickle.dump(self.history, file_pi)
print('saved metrics')
Does anyone know why this is happening? Thanks.