Hi!
I’m attempted to train, then save a custom keras model (TF v 2.7.0) with a custom tf.data.dataset on Apple M1 (Montery v 12.6). I’ve tried many variations of the custom model (all train without problems) but I can’t save the model after training- the model is built before training by invoking model.build(input_shape)
but this doesn’t seem to help the problem. I’ve also found even if I pass in tensors (not the dataset) this doesn’t solve the problem either.
I’ve included a code snippet that should reproduce the issue, as well as the response to when this is run in my local environment.
import numpy as np
import sys
import tensorflow as tf
import datapipe as tfds
class CANet(tf.keras.Model):
def __init__(
self,
**kwargs,
):
super().__init__(**kwargs)
self.dimensions = 256
self.ratio = 4
self.filters = 256
self.do = 0.1
self.num_classes = 6
self.batch_size = 32
self.num_heads = 1
def build(self,input_shape):
self.conv1 = tf.keras.layers.Conv1D(self.dimensions//2,
3,
strides=2,
padding='same',
activation=tf.nn.gelu,
input_shape=(32,4901,1)
)
self.conv2 = tf.keras.layers.Conv1D(self.dimensions,
3,
strides=2,
padding='same',
activation=tf.nn.gelu,
)
self.conv3 = tf.keras.layers.Conv1D(self.dimensions,
3,
strides=2,
padding='same',
activation=tf.nn.gelu,
)
self.conv4 = tf.keras.layers.Conv1D(self.dimensions,
3,
strides=2,
padding='same',
activation=tf.nn.gelu,
)
self.gap1 = tf.keras.layers.GlobalAveragePooling1D(keepdims=False)
self.dense4 = tf.keras.layers.Dense(units=self.dimensions, activation=tf.nn.gelu)
self.dense5 = tf.keras.layers.Dense(units=self.num_classes)
def get_config(self):
config = super().get_config()
config.update(
{
"dimensions": self.dimensions,
"dropout": self.do,
"ratio": self.ratio,
"filters": self.dimensions,
"num_classes": self.filters,
"num_heads": self.num_heads,
}
)
return config
@classmethod
def from_config(cls, config):
return cls(**config)
def _calculate_loss(self, inputs, test=False):
x,labels = inputs
x = self.conv1(x['intensity'])
#x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.gap1(x)
x = self.dense4(x)
x = self.dense4(x)
logits = self.dense5(x)
total_loss = self.compiled_loss(labels, logits)
return total_loss, logits
def train_step(self, inputs):
with tf.GradientTape() as tape:
total_loss, logits = self._calculate_loss(inputs)
train_vars = [
self.conv1.trainable_variables,
self.conv2.trainable_variables,
self.conv3.trainable_variables,
self.conv4.trainable_variables,
self.dense4.trainable_variables,
self.dense5.trainable_variables,
]
grads = tape.gradient(total_loss, train_vars)
trainable_variable_list = []
for (grad, var) in zip(grads, train_vars):
for g, v in zip(grad, var):
trainable_variable_list.append((g, v))
self.optimizer.apply_gradients(trainable_variable_list)
_, labels = inputs
self.compiled_metrics.update_state(labels, logits)
return {m.name: m.result() for m in self.metrics}
def test_step(self, inputs):
total_loss, logits = self._calculate_loss(inputs, test=True)
_, labels = inputs
self.compiled_metrics.update_state(labels, logits)
return {m.name: m.result() for m in self.metrics}
def call(self, inputs):
x = self.conv1(x['intensity'])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.gap1(x)
x = self.dense4(x)
logits = self.dense5(x)
return logits, tf.squeeze(viz_weights)[..., 1:]
Here is the code used to build, train and save the model
tf.keras.backend.clear_session()
num_epochs = 1
categories = [int(1), int(2), int(3), int(4), int(5), int(6) ]
num_classes = len(categories)
batch_size = 32
pattern_len = 4901
trainset = "/Users/vanessa/work/repos/ml_4_unit_cell/data_curation/grand_set/mixed"
ds = tfds.data_pipe(categories, data_dir=trainset, batch_size=batch_size,
length=pattern_len, num_epochs=num_epochs,)
dataset = ds.get_tfdataset()
convnet = CANet()
total_steps = int((16520 / batch_size) * num_epochs)
optimizer = tf.optimizers.Adam()
convnet.compile(
optimizer=optimizer,
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=[
tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
tf.keras.metrics.TopKCategoricalAccuracy(5, name="top-5-accuracy"),
],
)
ishape = (32, 4901, 1)
print(f'building model with shape {ishape}')
convnet.build(input_shape=ishape)
print(f'TF version: {tf.__version__}')
diter = next(iter(dataset))
x = diter[0]['intensity']
y = diter[1]
print(f'training data has shape: {x.shape.as_list()}')
print(f'labels have shape {y.shape.as_list()}')
history = convnet.fit(dataset, epochs=num_epochs, steps_per_epoch=16520//batch_size,)
convnet.save("./model")
Here’s the output:
building model with shape (32, 4901, 1)
TF version: 2.7.0
training data has shape: [32, 4901, 1]
labels have shape [32, 6]
2022-12-19 11:23:00.809121: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
516/516 [==============================] - 82s 157ms/step - loss: 1.4685 - accuracy: 0.3593 - top-5-accuracy: 0.9740
WARNING:tensorflow:Skipping full serialization of Keras layer <__main__.CANet object at 0x154157310>, because it is not built.
Traceback (most recent call last):
File "/Users/vanessa/work/repos/xtal_powder_pattern_ml/benchmark_data/simulated_data/grand_set/test_attns_short.py", line 175, in <module>
convnet.save("./model")
File "/Users/vanessa/miniforge3/envs/xtal_ml_stuff/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/Users/vanessa/miniforge3/envs/xtal_ml_stuff/lib/python3.9/site-packages/keras/saving/saving_utils.py", line 84, in raise_model_input_error
raise ValueError(
ValueError: Model <__main__.CANet object at 0x154157310> cannot be saved because the input shapes have not been set. Usually, input shapes are automatically determined when calling `.fit()` or `.predict()`. To manually set the shapes, call `model.build(input_shape)
I’m confused why the line convnet.build(input_shape=input_shape)
was not effective. I’ve also played with adding an input layer at various locations, but that just shifts the problem elsewhere.
Thanks in advance!