Multiprocessing Keras Tuner

Hi All,

I am trying to run a distributed keras hyper tuning as below. The below is an example of a script file which should spin up multiple tuner processes which communicate to the chief worker.

def tunerProcess(tunerId=0):
    logging.basicConfig(
    filename='app.log',
    level=logging.DEBUG,
    filemode='a'
    )
    logging.info('********************* %s',tunerId)
    def build_model3(hp):
        model = Sequential()
        active_func_ss = ['relu', 'tanh']
        active_func = hp.Choice('activation', active_func_ss)
        active_func_last_ss = ['sigmoid', 'softmax','tanh']
        active_func_last = hp.Choice('activation', active_func_last_ss)
        lr_list = [1e-1,1e-2,1e-3,1e-4,1e-5,1.0,
                    2e-1,2e-2,2e-3,2e-4,2e-5,2.0,
                    3e-1,3e-2,3e-3,3e-4,3e-5,3.0,
                    4e-1,4e-2,4e-3,4e-4,4e-5,4.0,
                    5e-1,5e-2,5e-3,5e-4,5e-5,5.0]
        lr=hp.Choice('learning_rate', values=lr_list)

        for i in range(hp.Int('num_layers', min_value=1, max_value=8)):
            model.add(Dense(units=hp.Int('units_' + str(i),
                                         min_value=64,
                                         max_value=1024,
                                         step=32),
                            activation=active_func))

        model.add(Dense(1, activation=active_func_last))
        optimizer=hp.Choice('optimizer', values=['rmsprop','adamw','adam','SGD',
                                                 'adadelta','adagrad','adamax',
                                                 'adafactor','nadam'])

        # Conditional for each optimizer
        if optimizer == 'adam':
           optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        elif optimizer == 'SGD':
           optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
        elif optimizer == 'rmsprop':
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
        elif optimizer == 'adamw':
            optimizer = tf.keras.optimizers.AdamW(learning_rate=lr)
        elif optimizer == 'adadelta':
            optimizer = tf.keras.optimizers.Adadelta(learning_rate=lr)
        elif optimizer == 'adagrad':
            optimizer = tf.keras.optimizers.Adagrad(learning_rate=lr)
        elif optimizer == 'adamax':
            optimizer = tf.keras.optimizers.Adamax(learning_rate=lr)
        elif optimizer == 'adafactor':
            optimizer = tf.keras.optimizers.Adafactor(learning_rate=lr)
        elif optimizer == 'nadam':
            optimizer = tf.keras.optimizers.Nadam(learning_rate=lr)
        model.compile(optimizer,
                      loss='binary_crossentropy',
                      metrics=['precision'])
        return model

    x_train_normalized=np.loadtxt('x_train_normalized.csv',delimiter=',')
    y_train=np.loadtxt('y_train.csv',delimiter=',')
    x_test_normalized=np.loadtxt('x_test_normalized.csv',delimiter=',')
    y_test=np.loadtxt('y_test.csv',delimiter=',')

    os.environ['KERASTUNER_TUNER_ID'] = "tuner"+"_"+str(tunerId)
    os.environ['KERASTUNER_ORACLE_IP'] = "127.0.0.1"
    os.environ['KERASTUNER_ORACLE_PORT'] = "8000"

    tuner = GridSearch(
        hypermodel=build_model3,
        objective='val_precision',
        max_trials=10000,
        directory='myDir',
        distribution_strategy=tf.distribute.MirroredStrategy(),
        seed=42
    )
    tuner.search(x_train_normalized, y_train, epochs=10, validation_data=(x_test_normalized, y_test))
    best_model = tuner.get_best_models(num_models=1)[0]
    logging.info('********************* finished %s',tunerId)
    logging.info(best_model.summary())
    best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
    logging.info(best_hyperparameters.values)


# In[ ]:


if __name__ == "__main__":  # confirms that the code is under main function

    print(multiprocessing.cpu_count())
    procs=[]
    for id in range(multiprocessing.cpu_count()):
        print(id)
        proc = Process(target=tunerProcess, args=(id,))
        procs.append(proc)
        proc.start()
    print(procs)

    for proc in procs:
        proc.join()


I am running this file on a 128 core machine but still the speed of evaluation does not seem to catch up. I need to evaluate all the models thru GRIDSEARCH in a fast manner. How to go about this ? Is it possible to spin up multi process like above and have the processes communicate to the chief thread (below) ? The only and main difference in the code is the environment variable setting. Nothing much.

Chief thread

def tunerChiefProcess():
    print('*********************')
    def build_model3(hp):
        model = Sequential()
        active_func_ss = ['relu', 'tanh']
        active_func = hp.Choice('activation', active_func_ss)
        active_func_last_ss = ['sigmoid', 'softmax','tanh']
        active_func_last = hp.Choice('activation', active_func_last_ss)
        lr_list = [1e-1,1e-2,1e-3,1e-4,1e-5,1.0,
                    2e-1,2e-2,2e-3,2e-4,2e-5,2.0,
                    3e-1,3e-2,3e-3,3e-4,3e-5,3.0,
                    4e-1,4e-2,4e-3,4e-4,4e-5,4.0,
                    5e-1,5e-2,5e-3,5e-4,5e-5,5.0]
        lr=hp.Choice('learning_rate', values=lr_list)
        # Tune the number of layers
        for i in range(hp.Int('num_layers', min_value=1, max_value=8)):
            model.add(Dense(units=hp.Int('units_' + str(i),
                                         min_value=64,
                                         max_value=1024,
                                         step=32),
                            activation=active_func))
        #lr_min, lr_max = 1e-4, 1e-1
        #lr = hp.Float('learning_rate', min_value=lr_min, max_value=lr_max, sampling='log')
        model.add(Dense(1, activation=active_func_last))
        optimizer=hp.Choice('optimizer', values=['rmsprop','adamw','adam','SGD',
                                                 'adadelta','adagrad','adamax',
                                                 'adafactor','nadam'])

        # Conditional for each optimizer
        if optimizer == 'adam':
           optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        elif optimizer == 'SGD':
           optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
        elif optimizer == 'rmsprop':
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
        elif optimizer == 'adamw':
            optimizer = tf.keras.optimizers.AdamW(learning_rate=lr)
        elif optimizer == 'adadelta':
            optimizer = tf.keras.optimizers.Adadelta(learning_rate=lr)
        elif optimizer == 'adagrad':
            optimizer = tf.keras.optimizers.Adagrad(learning_rate=lr)
        elif optimizer == 'adamax':
            optimizer = tf.keras.optimizers.Adamax(learning_rate=lr)
        elif optimizer == 'adafactor':
            optimizer = tf.keras.optimizers.Adafactor(learning_rate=lr)
        elif optimizer == 'nadam':
            optimizer = tf.keras.optimizers.Nadam(learning_rate=lr)
        model.compile(optimizer,
                      loss='binary_crossentropy',
                      metrics=['precision'])
        return model

    x_train_normalized=np.loadtxt('x_train_normalized.csv',delimiter=',')
    y_train=np.loadtxt('y_train.csv',delimiter=',')
    x_test_normalized=np.loadtxt('x_test_normalized.csv',delimiter=',')
    y_test=np.loadtxt('y_test.csv',delimiter=',')

    os.environ['KERASTUNER_TUNER_ID'] = "chief"
    os.environ['KERASTUNER_ORACLE_IP'] = "127.0.0.1"
    os.environ['KERASTUNER_ORACLE_PORT'] = "8000"

    tuner = GridSearch(
        hypermodel=build_model3,
        objective='val_precision',
        max_trials=10000,
        directory='myDir',
        distribution_strategy=tf.distribute.MirroredStrategy(),
        seed=42
    )
    tuner.search(x_train_normalized, y_train, epochs=10, validation_data=(x_test_normalized, y_test))
    best_model = tuner.get_best_models(num_models=1)[0]
    print('*********************')
    print(best_model.summary())
    best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(best_hyperparameters.values)

All the processes are being run on the same machine. Will it actually achieve the result of running this on multi core machine where multiple model combinations are getting evaluated on different processes ?