I’m encountering an issue when loading a model that uses a custom optimizer to scale the learning rate of layers. Here’s the optimizer code:
def scaling_factor(alpha, layer_index):
"""
Compute the scaling factor for a given layer index based on the formula:
s(t) = 1 - e^{-alpha * l}
Parameters:
- alpha (float): The alpha value used in the scaling formula.
- layer_index (int): The index of the layer.
Returns:
- float: The computed scaling factor for the layer.
"""
return 1 - tf.exp(-alpha * layer_index)
def get_all_layers(model):
"""
Recursively retrieve all layers from a model, including those inside nested sub-models.
Parameters:
- model (tf.keras.Model): The input model.
Returns:
- list: A list of all layers in the model.
"""
layers = []
for layer in model.layers:
if isinstance(layer, tf.keras.Model): # Check if the layer is a sub-model
layers.extend(get_all_layers(layer))
else:
layers.append(layer)
return layers
@keras.saving.register_keras_serializable()
class LayerwiseLROptimizer(tf.keras.optimizers.Adam):
"""
Custom optimizer that scales the learning rate for each layer based on its index.
Inherits from the Adam optimizer but can be adapted to other optimizers.
"""
def __init__(self, alpha=None, model=None, *args, **kwargs):
"""
Initialize the optimizer.
Parameters:
- alpha (float): The alpha value used in the scaling formula.
- model (tf.keras.Model): The model whose layers' learning rates will be scaled.
"""
super(LayerwiseLROptimizer, self).__init__(*args, **kwargs)
self.alpha = alpha
self.all_layers = get_all_layers(model)
self.layer_multipliers = [scaling_factor(self.alpha, i+1) for i in range(len(self.all_layers))]
def _resource_apply_dense(self, grad, var, apply_state=None):
"""
Apply the scaled gradient to the variable.
Parameters:
- grad (tf.Tensor): The gradient tensor.
- var (tf.Variable): The variable to be updated.
- apply_state (dict, optional): A dictionary containing optimizer-specific information.
Returns:
- tf.Operation: The operation that applies the gradient update.
"""
for i, layer in enumerate(self.all_layers):
if var in layer.trainable_variables:
sf = self.layer_multipliers[i]
break
else:
sf = 1.0
adjusted_grad = grad * sf
return super(LayerwiseLROptimizer, self)._resource_apply_dense(adjusted_grad, var, apply_state)
def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
"""
Apply the scaled gradient to the variable for sparse gradients.
Parameters:
- grad (tf.Tensor): The gradient tensor.
- var (tf.Variable): The variable to be updated.
- indices (tf.Tensor): A tensor of indices into the first dimension of var.
- apply_state (dict, optional): A dictionary containing optimizer-specific information.
Returns:
- tf.Operation: The operation that applies the gradient update.
"""
for i, layer in enumerate(self.all_layers):
if var in layer.trainable_variables:
sf = self.layer_multipliers[i]
break
else:
sf = 1.0
adjusted_grad = grad * sf
return super(LayerwiseLROptimizer, self)._resource_apply_sparse(adjusted_grad, var, indices, apply_state)
def get_config(self):
# Get the configuration of the parent class
config = super(LayerwiseLROptimizer, self).get_config()
# Add the custom attributes to the configuration
config.update({
"alpha": self.alpha,
"learning_rate": self.learning_rate
})
return config
@classmethod
def from_config(cls, config):
# Extract the custom attributes from the configuration
alpha = config.pop("alpha")
learning_rate = config.pop("learning_rate")
# Create a new instance of the optimizer using the extracted attributes and the remaining configuration
return cls(alpha=alpha, learning_rate=learning_rate **config)
When I attempt to load a model that uses this custom optimizer, it doesn’t return anything. However, the issue disappears when I swap out the custom optimizer with a built-in one like Adam
. It’s important to note that the optimizer works perfectly and achieves exactly what I needed it to do.
What I’ve Tried:
- Custom Objects During Loading: I ensured that I provided the custom optimizer as a custom object when loading the model.
- Checking Model Path: I’ve verified that the path to the model is correct and that the model file exists.
- Model File Integrity: I’ve tried loading other models to see if the issue is specific to this particular model file.
- Keras Version: I’ve ensured that the version of Keras I used to save the model is the same as the one I’m using to load the model.
Current Workaround:
Currently, I’m setting the include_optimizer
argument to False
when saving the model. After loading the model, I reinitialize the optimizer:
model.save(f"{directory_paths['experiment_models_directory']}/model.h5", include_optimizer=False)
This works, but I’d like to find a solution that allows me to save and load the model with the optimizer’s state intact.