Hi everyone,
I am currently in the process of using an existing network architecture ( TSMixer: An All-MLP Architecture for Time Series Forecasting) for a sequence classification.
The existing structure of my data: It is Time Series data, which are measurements of 5.5 seconds with a frequency of 128 Hz and three features. This means that my data can be divided into coherent sequences of 704 data series.
The aim is to assign a complete sequence consisting of these 704 consecutive and coherent data series to a single class. It is a binary classification task.
I generate my data for training, validation and testing as follows:
def _split_window(self, data, label):
inputs = data[:, : self.seq_len, :]
# Slicing doesn't preserve static shape information, so set the shapes
# manually. This way the `tf.data.Datasets` are easier to inspect.
inputs.set_shape([None, self.seq_len, None])
return inputs, label
def _make_dataset(self, data, label, shuffle=True,):
data = np.array(data, dtype=np.float32)
label = np.array(label, dtype=np.float32)
ds = tf.keras.utils.timeseries_dataset_from_array(
data=data,
targets=label,
sequence_length=self.seq_len,
sequence_stride=1,
shuffle=shuffle,
batch_size=self.batch_size
)
ds = ds.map(self._split_window)
return ds
The respective tensors look like this:
train data Tensor: <_MapDataset element_spec=(TensorSpec(shape=(None, 704, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))>
val data Tensor: <_MapDataset element_spec=(TensorSpec(shape=(None, 704, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))>
test data Tensor: <_MapDataset element_spec=(TensorSpec(shape=(None, 704, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))>
My model is structured as follows:
def res_block(inputs, norm_type, activation, dropout, ff_dim):
"""Residual block of TSMixer."""
norm = (
layers.LayerNormalization
if norm_type == 'L'
else layers.BatchNormalization
)
# Temporal Linear
x = norm(axis=-1)(inputs)
#x = norm(axis=[-2, -1])(inputs) # -1 = feature axis, -2 = sample
#x = tf.transpose(x, perm=[0, 2, 1]) # [Batch, Channel, Input Length] -> Leading to error, due to incomatibility between keras and tf tensors
print('bT1', x.shape)
#x = tf.transpose(x, perm=[0, 2, 1])
#x = TransposeLayer(perm=[0, 2, 1])(x)
x = ops.transpose(x, axes=[0, 2, 1]) # [Batch, Channel, Input Length]
print('aT1', x.shape)
x = layers.Dense(x.shape[-1], activation=activation)(x)
print('bT2', x.shape)
#x = TransposeLayer(perm=[0, 2, 1])(x)
x = ops.transpose(x, axes=[0, 2, 1]) # [Batch, Input Length, Channel]
print('aT2', x.shape)
x = layers.Dropout(dropout)(x)
res = x + inputs
print('res: ', res.shape)
# Feature Linear
#x = norm(axis=[-2, -1])(res) # -1 = feature axis, -2 = sample
x = norm(axis=-1)(res)
x = layers.Dense(ff_dim, activation=activation)(x) # [Batch, Input Length, FF_Dim]
x = layers.Dropout(dropout)(x)
x = layers.Dense(inputs.shape[-1])(x) # [Batch, Input Length, Channel]
x = layers.Dropout(dropout)(x)
print(x.shape, res.shape)
return x + res
def build_model(
input_shape,
pred_len,
norm_type,
activation,
n_block,
dropout,
ff_dim,
target_slice,
):
"""Build TSMixer model."""
inputs = tf.keras.Input(shape=input_shape)
x = inputs # [Batch, Input Length, Channel]
print('inputs shape: ', x.shape)
for _ in range(n_block):
x = res_block(x, norm_type, activation, dropout, ff_dim)
print('1', x.shape)
# FC output layer for binary classification 1 = pain, 0 = no pain
outputs = layers.Dense(pred_len, activation='sigmoid')(x)
#outputs = tf.reduce_mean(outputs, axis=1)
print('outputs shape:', outputs.shape)
return tf.keras.Model(inputs, outputs)
Unfortunately, I am currently getting the following error and have not found a solution after several hours of analysis:
inputs shape: (None, 704, 3)
...
1: (None, 704, 3)
outputs shape: (None, 704, 1)
Epoch 1/3
Traceback (most recent call last):
File "/home/maven96/workspace/tsmixer_PR/run.py", line 342, in <module>
main()
File "/home/maven96/workspace/tsmixer_PR/run.py", line 287, in main
history = model.fit(
File "/home/maven96/workspace/venv_dir/test_env/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/maven96/workspace/venv_dir/test_env/lib/python3.10/site-packages/keras/src/backend/tensorflow/nn.py", line 689, in binary_crossentropy
raise ValueError(
ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 1), output.shape=(None, 704)
As far as I know, it should be possible to classify a complete sequence into a class and I don’t quite understand why this doesn’t work here.
One possibility would of course be to label and classify each data series individually and to calculate an average value for the entire sequence. However, I would prefer to classify the entire sequence.
Does anyone here have any tips?
Thank you very much in advance!