Is keras_tuner
also able to tune the hyperparameters of xgboost
-based models? In particular, I am trying to use keras_tuner
to tune the hyperparameters of the blender (an XGBRegressor
) of a stacking regressor. The hyperparameters of its lower-level regressors are already found, so I am only interested in the optimal values for max_depth
and learning_rate
of the blender. Here is my procedure to do so:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
import xgboost
import keras_tuner as kt
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, train_size=0.8, test_size=0.2)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, train_size=0.8, test_size=0.2)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
def build_dnn_reg_opt():
model = tf.keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=X_train.shape[1:]))
model.add(tf.keras.layers.BatchNormalization(momentum=0.999))
model.add(tf.keras.layers.Dense(42, tf.keras.activations.selu, kernel_initializer="lecun_normal"))
model.add(tf.keras.layers.BatchNormalization(momentum=0.999))
model.add(tf.keras.layers.Dense(42, tf.keras.activations.selu, kernel_initializer="lecun_normal"))
model.add(tf.keras.layers.BatchNormalization(momentum=0.999))
model.add(tf.keras.layers.Dense(1, kernel_initializer="lecun_normal"))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
model.compile(loss="mae", optimizer=optimizer, metrics=["mse"])
return model
dnn_reg_opt = build_dnn_reg_opt()
rnd_reg_opt = DecisionTreeRegressor(max_depth=8,
min_samples_leaf=32,
max_leaf_nodes=10)
rf_reg_opt = RandomForestRegressor(n_estimators=76, max_leaf_nodes=20)
def build_model_stack(hp):
max_depth = hp.Int("max_depth", min_value=1, max_value=10, step=1)
learning_rate = hp.Choice("learning_rate", values=[0.01,0.02,0.03,0.04,0.05,0.06,0.07])
model = StackingRegressor(estimators=[("rnd_reg_opt", rnd_reg_opt), ("rf_reg_opt", rf_reg_opt), ("dnn_reg_opt", dnn_reg_opt)],
final_estimator=xgboost.XGBRegressor(max_depth=max_depth,
learning_rate=learning_rate))
return model
rnd_reg_opt.fit(X_train, y_train)
def exponential_decay(lr0, s):
def exponential_decay_fn(epoch):
return lr0 * 0.1 ** (epoch / s)
return exponential_decay_fn
exponential_decay_fn = exponential_decay(lr0=0.01, s=20)
lr_scheduler_cb = tf.keras.callbacks.LearningRateScheduler(exponential_decay_fn)
early_stop_cb = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)
dnn_reg_opt.fit(X_train, y_train,
validation_data = (X_valid, y_valid),
epochs=50,
callbacks =[early_stop_cb, lr_scheduler_cb])
rf_reg_opt.fit(X_train, y_train)
tuner_BO = kt.BayesianOptimization(build_model_stack,
objective=kt.Objective("val_loss", direction="min"),
max_trials=10,
seed=seed_value)
tuner_BO.search(X_train, y_train, epochs=50,
validation_data = (X_valid, y_valid),
callbacks =[early_stop_cb, lr_scheduler_cb])
best_hps_BO = tuner_BO.get_best_hyperparameters(num_trials=1)[0]
print("BO results:")
print("max_depth: {0}".format(best_hps_BO.get("max_depth")))
print("learning_rate: {0}".format(best_hps_BO.get("learning_rate")))
But, the following error is thrown:
RuntimeError: Model-building function did not return a valid Keras Model instance, found StackingRegressor(estimators=[('rnd_reg_opt',
DecisionTreeRegressor(max_depth=8,
max_leaf_nodes=10,
min_samples_leaf=32)),
('rf_reg_opt',
RandomForestRegressor(max_leaf_nodes=20,
n_estimators=76)),
('dnn_reg_opt',
<tensorflow.python.keras.engine.sequential.Sequential object at 0x0000012D308E0D30>)],
final_estimator=XGBRegressor(base_score=None, booster=None,
col...
importance_type='gain',
interaction_constraints=None,
learning_rate=0.01,
max_delta_step=None, max_depth=1,
min_child_weight=None,
missing=nan,
monotone_constraints=None,
n_estimators=100, n_jobs=None,
num_parallel_tree=None,
random_state=None,
reg_alpha=None, reg_lambda=None,
scale_pos_weight=None,
subsample=None, tree_method=None,
validate_parameters=None,
verbosity=None))
Can one kindly share a work-around for this issue?