I am using the following code to get the optimal learning rate for a given neural network model - lrfinder/lrfinder/lrfinder.py at master · beringresearch/lrfinder · GitHub - specifically via get_best_lr
function. So after I get the value of the optimal learning rate, how do I find out programmatically the LR bounds (minimum and maximum) values wrt optimal LR found to use the Cyclical Learning Rate (CLR) method ([1506.01186] Cyclical Learning Rates for Training Neural Networks)?
Code from the cited GitHub repository:
import math
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
import numpy as np
from tensorflow.keras.callbacks import LambdaCallback
class LRFinder:
"""
Learning rate range test detailed in Cyclical Learning Rates for Training
Neural Networks by Leslie N. Smith. The learning rate range test is a test
that provides valuable information about the optimal learning rate. During
a pre-training run, the learning rate is increased linearly or
exponentially between two boundaries. The low initial learning rate allows
the network to start converging and as the learning rate is increased it
will eventually be too large and the network will diverge.
"""
def __init__(self, model):
self.model = model
self.losses = []
self.learning_rates = []
self.best_loss = 1e9
def on_batch_end(self, batch, logs):
lr = K.get_value(self.model.optimizer.lr)
self.learning_rates.append(lr)
loss = logs['loss']
self.losses.append(loss)
if batch > 5 and (math.isnan(loss) or loss > self.best_loss * 4):
self.model.stop_training = True
return
if loss < self.best_loss:
self.best_loss = loss
lr *= self.lr_mult
K.set_value(self.model.optimizer.lr, lr)
def find(self, dataset, start_lr, end_lr, epochs=1,
steps_per_epoch=None, **kw_fit):
if steps_per_epoch is None:
raise Exception('To correctly train on the datagenerator,'
'`steps_per_epoch` cannot be None.'
'You can calculate it as '
'`np.ceil(len(TRAINING_LIST) / BATCH)`')
self.lr_mult = (float(end_lr) /
float(start_lr)) ** (float(1) /
float(epochs * steps_per_epoch))
initial_weights = self.model.get_weights()
original_lr = K.get_value(self.model.optimizer.lr)
K.set_value(self.model.optimizer.lr, start_lr)
callback = LambdaCallback(on_batch_end=lambda batch,
logs: self.on_batch_end(batch, logs))
self.model.fit(dataset,
epochs=epochs, callbacks=[callback], **kw_fit)
self.model.set_weights(initial_weights)
K.set_value(self.model.optimizer.lr, original_lr)
def get_learning_rates(self):
return(self.learning_rates)
def get_losses(self):
return(self.losses)
def get_derivatives(self, sma):
assert sma >= 1
derivatives = [0] * sma
for i in range(sma, len(self.learning_rates)):
derivatives.append((self.losses[i] - self.losses[i - sma]) / sma)
return derivatives
def get_best_lr(self, sma, n_skip_beginning=10, n_skip_end=5):
derivatives = self.get_derivatives(sma)
best_der_idx = np.argmin(derivatives[n_skip_beginning:-n_skip_end])
return self.learning_rates[n_skip_beginning:-n_skip_end][best_der_idx]