Hi,
I want to do Bayesian optimization to find the best set of values. This is the dimension of my set data [102069 rows x 1415 columns] and my model is :
def build_model(hp):
“”“Creates a model.”“”
growing_strategy = hp.Choice("growing_strategy", ["LOCAL", "BEST_FIRST_GLOBAL"])
split_axis = hp.Choice("split_axis", ["SPARSE_OBLIQUE", "AXIS_ALIGNED"])
print(split_axis)
model_params = {
"task": tfdf.keras.Task.REGRESSION,
"min_examples": hp.Choice("min_examples", [2, 5, 7, 10]),
"categorical_algorithm": hp.Choice("categorical_algorithm", ["CART", "RANDOM"]),
"shrinkage": hp.Choice("shrinkage", [0.02, 0.05, 0.10, 0.15]),
"early_stopping":hp.Choice("early_stopping",["NONE","MIN_LOSS_FINAL","LOSS_INCREASE"]),
"num_candidate_attributes_ratio": hp.Choice("num_candidate_attributes_ratio", [0.2, 0.5, 0.9, 1.0]),
"growing_strategy": growing_strategy,
"split_axis" :split_axis,
"l1_regularization":hp.Choice("l1_regularization",[0.0,0.001,0.01,0.1]),
"l2_regularization":hp.Choice("l2_regularization",[0.0,0.001,0.01,0.1]),
"l2_categorical_regularization":hp.Choice("l2_categorical_regularization",[0.0,0.1,0.01,1.0,2.0,10.0]),
"num_trees":hp.Choice("num_trees",[200,250,300,350,400,500,600,700]),
"max_depth":hp.Choice("max_depth", [3, 4, 5, 6, 8])
}
model_params["max_depth"] = hp.Choice("max_depth", [3, 4, 5, 6, 8])
if growing_strategy == "BEST_FIRST_GLOBAL":
model_params["max_num_nodes"] = hp.Choice("max_num_nodes", [16, 32, 64, 128, 256])
if split_axis == "SPARSE_OBLIQUE":
model_params["sparse_oblique_weights"] = hp.Choice("sparse_oblique_weights", ["BINARY", "CONTINUOUS"])
model_params["sparse_oblique_normalization"] = hp.Choice("sparse_oblique_normalization", ["NONE", "STANDARD_DEVIATION", "MIN_MAX"])
model_params["sparse_oblique_num_projections_exponent"] = hp.Choice("sparse_oblique_num_projections_exponent", [1.0, 1.5])
print(model_params)
model = tfdf.keras.GradientBoostedTreesModel(**model_params)
# Optimize the model accuracy as computed on the validation dataset.
model.compile(metrics=["mse"])
return model
#Implemento la bayesian_opt
keras_tuner_bayes = kt.BayesianOptimization(
build_model,
objective=“val_mse”,
max_trials=150,
overwrite=True,
directory=“/tmp/keras_tuning”)
but I have this problem
AXIS_ALIGNED
{‘task’: 2, ‘min_examples’: 7, ‘categorical_algorithm’: ‘CART’, ‘shrinkage’: 0.05, ‘early_stopping’: ‘NONE’, ‘num_candidate_attributes_ratio’: 0.2, ‘growing_strategy’: ‘LOCAL’, ‘split_axis’: ‘AXIS_ALIGNED’, ‘l1_regularization’: 0.0, ‘l2_regularization’: 0.0, ‘l2_categorical_regularization’: 10.0, ‘num_trees’: 700, ‘max_depth’: 6}
Use /tmp/tmpahoa71ah as temporary training directory
[WARNING 23-10-30 19:47:27.0726 CET gradient_boosted_trees.cc:1818] “goss_alpha” set but “sampling_method” not equal to “GOSS”.
[WARNING 23-10-30 19:47:27.0729 CET gradient_boosted_trees.cc:1829] “goss_beta” set but “sampling_method” not equal to “GOSS”.
[WARNING 23-10-30 19:47:27.0732 CET gradient_boosted_trees.cc:1843] “selective_gradient_boosting_ratio” set but “sampling_method” not equal to “SELGB”.
Reading training dataset…
Segmentation fault (core dumped)
in the test system I am using at the moment I have no GPU at disposal. However I am planning for a much better kit. Unfortunately I am not sure what I should be looking for, I mean should it be only GPU power?if so how much (for a decent processing time i.e. less than 1 hour)? and what about memory?
Any help will be much appreciated.