I’m working on a problem where I predict 9 different labels. I want each of the targets to be output to the probe. For this, I need to optimize the algorithm.
primary_labels = ["target1", "target2","target3", "target4","target5", "target6", "target7","target8", "target9"]
secondary_labels = []
non_feature_columns = ["id"]
def to_tf_dataset(pd_dataset: pd.DataFrame, label_keys: list[str], droped_features: list[str]) -> tf.data.Dataset:
features = dict(pd_dataset.drop(label_keys + droped_features, axis=1))
labels = dict(pd_dataset[label_keys])
return tf.data.Dataset.from_tensor_slices((features, labels)).batch(100)
train_tf = to_tf_dataset(train_pd, label_keys=primary_labels, droped_features=non_feature_columns + secondary_labels)
test_tf = to_tf_dataset(test_pd, label_keys=[], droped_features=non_feature_columns)
model = tfdf.model = tfdf.keras.GradientBoostedTreesModel(num_trees=1000,
multitask=[tfdf.keras.MultiTaskItem(label=l, task=tfdf.keras.Task.CLASSIFICATION) for l in primary_labels],
verbose=1, tuner=tuner
)
model.fit(train_tf)
ValueError: Multi-task learning is not compatible with the tuner