While my code runs without any problems with Keras Tuner and standard loss functions like ‘mse’ I am trying to figure out how to write a custom loss function that accept an external argument in addition to true and forecasted y to use inside Keras Tuner for LSTM model selection. I am looking for the easiest and less painful way and I didn’t find a working solution in old posts.
One approach I follewed is this one. Let’s say I have these variables
# external vector needed in custom loss function
ex_loss= np.logical_not(klines_backtest.loc[i_sel,['d']].to_numpy(dtype=np.float32)[:sample_start])
# create data sequences for x and vector to forecasy y
x_train, y_train = lstm_data_sequence(dataset[:sample_start,:-1], dataset[:sample_start,-1], lstm_sequence)
# concatenate external vector to y so y shape is Nx2
y_train = np.vstack((y_train, ex_loss[lstm_sequence:,0])).T
I have defined the following loss function
def bande_loss(y_true, y_pred):
mse = K.square(y_pred - y_true[:,0])
i_loss = K.equal(y_true[:,1], 1) and K.greater_equal(y_pred, y_true[:,0])
i_loss = K.cast(~i_loss, 'float32')
return K.mean(mse*i_loss)
Basically I tryied to avoid the loss function override passing the additional variable (of the same size of y_true) I need in the loss function inside y_train where I expext to have y_true and the corresponding external variable correctly sized for the batch.
The LSTM for model selection is
def lstm_model(hp):
model = Sequential()
model.add(InputLayer(input_shape=(48*3, 13)))
num_layers = hp.Int('num_layers', min_value=4, max_value=8, step=2)
num_units = hp.Choice('units', values=[50, 100, 250, 500])
n_dropout = hp.Choice('n_dropout', values=[float(0), 0.10, 0.20])
n_rec_dropout = hp.Choice('n_rec_dropout', values=[float(0), 0.10, 0.20])
learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])
for i in range(num_layers):
if i < num_layers - 1:
r_sequence = True
else:
r_sequence = False
model.add(LSTM(
units=num_units,
dropout=n_dropout,
recurrent_dropout=n_rec_dropout,
return_sequences=r_sequence))
model.add(Dense(1))
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss=bande_loss,
metrics=[bande_loss])
return model
Executing this code
tuner = Hyperband(
hypermodel=lstm_model,
objective=Objective("bande_loss", direction="min"),
max_epochs=50,
hyperband_iterations=2,
executions_per_trial=1,
overwrite=True,
project_name='hyperband_tuner')
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, verbose=1)
tuner.search(x_train, y_train, epochs=30, validation_split=p_train, callbacks=[stop_early],
shuffle=False, verbose=1)
I get this error
The second input must be a scalar, but it has shape [32]
[[{{node bande_loss/cond/switch_pred/_2736}}]] [Op:__inference_train_function_45266]
Function call stack:
train_function
Note that 32 is the (default) batch size.
Also running the same code with
def bande_loss(y_true, y_pred):
mse = K.square(y_pred - y_true[:,0])
return K.mean(mse)
seems to work fine while running with
def bande_loss(y_true, y_pred):
mse = K.square(y_pred - y_true[:,1])
return K.mean(mse)
gives me the same error and I cannot understand why.
I also tried the loss function override in this way
def lstm_model(hp):
model = Sequential()
model.add(InputLayer(input_shape=(48*3, 13)))
num_layers = hp.Int('num_layers', min_value=4, max_value=8, step=2)
num_units = hp.Choice('units', values=[50, 100, 250, 500])
n_dropout = hp.Choice('n_dropout', values=[float(0), 0.10, 0.20])
n_rec_dropout = hp.Choice('n_rec_dropout', values=[float(0), 0.10, 0.20])
learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])
for i in range(num_layers):
if i < num_layers - 1:
r_sequence = True
else:
r_sequence = False
model.add(LSTM(
units=num_units,
dropout=n_dropout,
recurrent_dropout=n_rec_dropout,
return_sequences=r_sequence))
model.add(Dense(1))
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss=bande_loss(ex_loss),
metrics=[bande_loss(ex_loss)])
return model
def bande_loss(ex_loss):
def loss(y_true, y_pred):
mse = K.square(y_pred - y_true)
i_loss = K.equal(ex_loss, True) and K.greater_equal(y_pred, y_true)
i_loss = K.cast(~i_loss, 'float32')
return K.mean(mse*i_loss)
return loss
...
# external vector needed in custom loss function
ex_loss= np.logical_not(klines_backtest.loc[i_sel,['d']].to_numpy(dtype=np.float32)[:sample_start])
# create data sequences for x and vector to forecasy y
x_train, y_train = lstm_data_sequence(dataset[:sample_start,:-1], dataset[:sample_start,-1], lstm_sequence)
ex_loss = K.variable(ex_loss[lstm_sequence:], dtype=bool)
tuner = Hyperband(
hypermodel=lstm_model,
objective=Objective("bande_loss(ex_loss)", direction="min"),
max_epochs=50,
hyperband_iterations=2,
executions_per_trial=1,
overwrite=True,
project_name='hyperband_tuner')
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, verbose=1)
tuner.search(x_train, y_train, epochs=30, validation_split=p_train, callbacks=[stop_early],
shuffle=False, verbose=1)
But I get this error
tensorflow.python.framework.errors_impl.InvalidArgumentError: The second input must be a scalar, but it has shape [4176]
[[{{node cond/switch_pred/_12}}]] [Op:__inference_train_function_34471]
Function call stack:
train_function
Can anyone provide me help or a simpler and effective way to implement custom loss functions with external parameters inside Keras Tuner?