Hi all, I am trying to train a multi-input model, but have a single large dataset. that has 3 column, featuers_mlp, featuers_lstm, labels
(using pentastorm as the dataset is very large)
code :
model
LEARNING_RATE=0.001
BATCH_SIZE = 256
TRAIN_MAX_STEPS=None
STEPS = None
NUM_EPOCHS = 2
LEAKY_RELU_ALPHA = 0.1
input_layer_mlp = Input(shape=(len(mlp_feature),), name='input_mlp')
input_layer_lstm = Input(shape=(5,6,), name='input_lstm')
l1 = LSTM(64, return_sequences=True, kernel_initializer='glorot_uniform')(input_layer_lstm)
l2 = LSTM(32, return_sequences=False)(l1)
m1 = Dense(32, activation=LeakyReLU(alpha=LEAKY_RELU_ALPHA), kernel_initializer='glorot_uniform')(input_layer_mlp)
m2 = Concatenate()([l2, m1])
m3 = Dense(16, activation=LeakyReLU(alpha=LEAKY_RELU_ALPHA))(m2)
out = Dense(1, activation=LeakyReLU(alpha=LEAKY_RELU_ALPHA))(m3)
optimizer = tf.keras.optimizers.Adam(learning_rate = LEARNING_RATE)
model = tf.keras.Model(inputs=[input_layer_mlp, input_layer_lstm],
outputs=[out])
losses = {'out': Huber(delta=1.0)}
metrics = {'out': MeanAbsoluteError()}
model.compile(optimizer=optimizer, loss=losses, metrics = metrics)
Training code
model = build_model_mlp(in_shape=11)
with penta_train_mlp.make_tf_dataset(transform_spec=transform_spec_fn,
batch_size=BATCH_SIZE) as train_dataset, \
penta_test_mlp.make_tf_dataset(transform_spec=transform_spec_fn,
batch_size=BATCH_SIZE) as val_dataset:
# tf.keras only accept tuples, not namedtuples
# print(train_dataset)
train_dataset_val = train_dataset.map(lambda x: (x.features, x.labels))
steps_per_epoch = len(penta_train_mlp) // BATCH_SIZE
val_dataset_val = val_dataset.map(lambda x: (x.features, x.labels))
validation_steps = max(1, len(penta_test_mlp) // BATCH_SIZE)
print(f"steps_per_epoch: {steps_per_epoch}, validation_steps: {validation_steps}")
hist = model.fit(train_dataset_val,
steps_per_epoch=steps_per_epoch,
epochs=NUM_EPOCHS,
validation_data=val_dataset_val,
validation_steps=validation_steps,
verbose=2)
# print("Validation Accuracy: {}".format(accuracy))
error:
Failed to find data adapter that can handle input: (<class 'tuple'> containing values of types {"<class 'tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter'>"}), <class 'tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter'>
How do I create a multi-input dataset?
I tried using tensor slices, but as the data is already in dataset format it does not work
dataset_train = tf.data.Dataset.from_tensor_slices(({'input_mlp': train_mlp_x, 'input_lstm': train_lstm_x}, train_mlp_y)).batch(1)#tf.data.Dataset.from_tensor_slices(((val_mlp_x, data_b), labels)).batch(1).repeat()
error:
Unbatching a dataset is only supported for rank >= 1