I took the TF_Transformer_xl model from huggingspace and tried to automatically perform hyperparameter tuning, but I keep getting errors.
The method I’m currently using is hyperopt.
The problem is that the following error occurs when the first training is finished in the place decorated with @tf.function, and the hyperparameter is changed and retrained.
@tf.function
def train_step(model, data1,data2, target, mems, optimizer):
with tf.GradientTape() as tape:
outputs = model(concepts=data1,responses=data2, labels=target, mems=mems)
logit = outputs.logit
mems = outputs.mems
logit_mx = target != -100
logit_value = logit[logit_mx]
logit_value = tf.reshape(logit_value, [-1, config_xl.R_vocab_size])
labels = target[logit_mx]
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logit_value)
# batch_loss = tf.reduce_sum(loss) / valid_samples
mean_loss = tf.reduce_mean(loss)
train_loss(loss)
train_accuracy(labels,logit_value)
predictions =tf.nn.softmax(logit_value)
train_auc(tf.one_hot(labels, depth=predictions.shape[1]), predictions)
gradients = tape.gradient(mean_loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
return mems,mean_loss
def evaluate(model,test_dataset,config_xl):
total_loss = 0.0
num_batches = 0
evaluation_metrics = []
test_mems = None
for input_data, masked_responses, responses in tqdm(test_dataset, desc='eval'):
outputs = model(concepts=input_data, responses=masked_responses, labels=responses, mems=test_mems, training=False)
logit = outputs.logit
test_mems = outputs.mems
logit_mx = responses != -100
logit_value = logit[logit_mx]
logit_value = tf.reshape(logit_value, [-1, config_xl.R_vocab_size])
labels = responses[logit_mx]
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logit_value)
mean_loss = tf.reduce_mean(loss)
# Update precision and recall metrics
predicted_labels = tf.argmax(logit_value, axis=1)
predictions =tf.nn.softmax(logit_value)
test_auc(tf.one_hot(labels, depth=predictions.shape[1]), predictions)
test_precision(labels, predicted_labels)
test_recall(labels, predicted_labels)
test_accuracy(labels, logit_value)
test_loss(loss)
precision = test_precision.result().numpy()
recall = test_recall.result().numpy()
f1_score = 2 * (precision * recall) / (precision + recall + 1e-7)
evaluation_metrics.append(test_accuracy.result().numpy())
total_loss += mean_loss.numpy()
num_batches += 1
def train(train_dataset,config_xl):
try:
learning_rate = CustomSchedule(config_xl.d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
model = TFTransfoXLMLMHeadModel(config=config_xl)
loss_values = []
num_batches = 0
for epoch in range(config_xl.epoch):
start = time.time()
total_loss = 0.0
mems = None
for input_data, masked_responses, responses in tqdm(train_dataset, desc='train'):
mems, loss_value = train_step(model, input_data,masked_responses, responses, mems, optimizer)
num_batches += 1
total_loss += loss_value.numpy()
def main(config_xl) :
train_dataset,test_dataset,dkeyid2idx=load_TFdataset(config_xl)
model =train(train_dataset.take(10),config_xl)
test_loss,test_acc,test_precision, test_recall, test_f1_score = evaluate(model, test_dataset,config_xl)
if __name__ == "__main__":
config_xl = TransfoXLConfig(
d_embed=args.d_embed,
d_head = args.d_head,
d_model=args.d_model,
mem_len=args.mem_len,
n_head=args.n_head,
n_layer=args.n_layer,
eos_token = args.eos_token,
mask_token=args.mask_token,
batch_size=args.batch_size,
tgt_len=args.tgt_len,
C_vocab_size=args.C_vocab_size,
Q_vocab_size = args.Q_vocab_size,
R_vocab_size = args.R_vocab_size,
epoch = args.epoch,
mode = args.mode, # concepts or questions
tf_data_dir = args.tf_data_dir,
tensorboard_log_dir = args.tensorboard_log_dir,
tensorboard_emb_log_dir = args.tensorboard_emb_log_dir,
model_save_dir = args.model_save_dir
)
#hyperparameter
# d_inner, num_layer, n_head, dropout 변동은 자유롭게, d_embedding과 d_model은 일치
space = {
'num_layer': hp.quniform('num_layer', low=4, high=12, q=2), # 32에서 128 사이의 값을 8의 배수로 선택
'n_head': hp.quniform('n_head', low=6, high=12, q=2) # 128에서 512 사이의 값을 64의 배수로 선택
}
logging.info('config_xl: %s',config_xl)
# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Test")
# Start an MLflow run
with mlflow.start_run():
#set a run name
mlflow.set_tag("mlflow.runName", '{}ep_{}mem_{}'.format(args.epoch,args.mem_len, args.mode))
# Set a tag that we can use to remind ourselves what this run was for
mlflow.set_tag("Training Info", '{}ep_{}mem_{}'.format(args.epoch,args.mem_len, args.mode))
# Log the hyperparameters
mlflow.log_params(config_xl.to_dict())
# mlflow.tensorflow.autolog()
# main(config_xl)
trials = Trials()
best = fmin(
fn=objective,
space=space,
algo=tpe.suggest,
max_evals=8,
trials=trials,
)
train_dataset,test_dataset,dkeyid2idx=load_TFdataset(config_xl)
input_data, masked_responses, responses = next(iter(test_dataset))
input_schema = Schema(
[
TensorSpec(np.dtype(np.int32), (-1,len(input_data[1].numpy())), "input_data"),
TensorSpec(np.dtype(np.int32), (-1,len(masked_responses[1].numpy())), "responses"),
])
signature = ModelSignature(input_schema)
best_run = sorted(trials.results, key=lambda x: x["loss"])[0]
ERROR:root:Error: in user code:
File "train_args_mlflows.py", line 113, in train_step *
outputs = model(concepts=data1,responses=data2, labels=target, mems=mems)
File "/home/jun/miniconda3/envs/new1/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler **
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_filej0_kgex_.py", line 26, in tf__call
transformer_outputs = ag__.converted_call(ag__.ld(self).transformer, (ag__.ld(inputs)['concepts'], ag__.ld(inputs)['responses'], ag__.ld(inputs)['mems'], ag__.ld(inputs)['head_mask'], ag__.ld(inputs)['inputs_embeds'], ag__.ld(inputs)['output_attentions'], ag__.ld(inputs)['output_hidden_states'], ag__.ld(inputs)['return_dict']), dict(training=ag__.ld(inputs)['training']), fscope)
File "/tmp/__autograph_generated_filekwj7svil.py", line 126, in tf__call
ag__.if_stmt((ag__.ld(inputs)['inputs_embeds'] is not None), if_body_6, else_body_6, get_state_6, set_state_6, ('word_emb',), 1)
File "/tmp/__autograph_generated_filekwj7svil.py", line 120, in else_body_6
word_emb_C = ag__.converted_call(ag__.ld(self).word_emb_C, (ag__.ld(inputs)['concepts'],), None, fscope)
ValueError: Exception encountered when calling layer 'tf_transfo_xlmlm_head_model_1' (type TFTransfoXLMLMHeadModel).
in user code:
File "/home/jun/workspace/KT/models/model_for_kt.py", line 1782, in call *
transformer_outputs = self.transformer(
File "/home/jun/miniconda3/envs/new1/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler **
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_filekwj7svil.py", line 126, in tf__call
ag__.if_stmt((ag__.ld(inputs)['inputs_embeds'] is not None), if_body_6, else_body_6, get_state_6, set_state_6, ('word_emb',), 1)
File "/tmp/__autograph_generated_filekwj7svil.py", line 120, in else_body_6
word_emb_C = ag__.converted_call(ag__.ld(self).word_emb_C, (ag__.ld(inputs)['concepts'],), None, fscope)
ValueError: Exception encountered when calling layer 'transformer' (type TFTransfoXLMLMMainLayer).
in user code:
File "/home/jun/workspace/KT/models/model_for_kt.py", line 1171, in call *
word_emb_C = self.word_emb_C(inputs["concepts"])
File "/home/jun/miniconda3/envs/new1/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler **
raise e.with_traceback(filtered_tb) from None
ValueError: tf.function only supports singleton tf.Variables created on the first call. Make sure the tf.Variable is only created once or created outside tf.function. See https://www.tensorflow.org/guide/function#creating_tfvariables for more information.
Call arguments received by layer 'transformer' (type TFTransfoXLMLMMainLayer):
• concepts=tf.Tensor(shape=(65, 140), dtype=int32)
• responses=tf.Tensor(shape=(65, 140), dtype=int32)
• mems=None
• head_mask=None
• inputs_embeds=None
• output_attentions=False
• output_hidden_states=False
• return_dict=True
• labels=None
• training=False
• kwargs=<class 'inspect._empty'>
Call arguments received by layer 'tf_transfo_xlmlm_head_model_1' (type TFTransfoXLMLMHeadModel):
• concepts=tf.Tensor(shape=(65, 140), dtype=int32)
• responses=tf.Tensor(shape=(65, 140), dtype=int32)
• mems=None
• head_mask=None
• inputs_embeds=None
• output_attentions=None
• output_hidden_states=None
• return_dict=None
• labels=tf.Tensor(shape=(65, 140), dtype=int32)
• training=False
• kwargs=<class 'inspect._empty'>
Is there any automated hyperparameter tuning method that I can use in the code I provided, even if it’s not hyperopt?