Gradients for Encoder in my BiGAN network are returning None values. Can someone with experience in this field identify the issue?

PaulD153 · December 2, 2022, 11:18am

I’m implementing a BiGAN network for multivariate time-series data and I’m running into problems with training my Encoder. When calling tape.gradient(e_loss, encoder.trainable_variables) I’m receiving only None values. Calling encoder.trainable_variables does yield the correct variables. I’ve used multiple implementations but seem to be running into the same problem. Also I’ve gone through multiple stackoverflow issues that address this issue however those solutions don’t apply to my use case. This is the most clear example I have:

Building the networks:

def build_encoder(self, x_shape, latent_code_length):
    x = Input(x_shape)
    model = Dense(128, name='encoder_conv_1')(x)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(128, name='encoder_conv_2')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(256, name='encoder_conv_3')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(256, name='encoder_conv_4')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(512, name='encoder_conv_5')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(latent_code_length[-1], name='encoder_conv_6')(model)

    return Model(x, model)

def build_generator(self, x_shape, latent_code_length):
    model = Sequential()
    z = Input(latent_code_length)

    
    model = Dense(512, name = 'first_dense_layer_generator', kernel_regularizer = 'l2')(z)
    model = LeakyReLU(alpha=0.2)(model)
    model = BatchNormalization(momentum=0.8)(model)
    model = Dense(512, name = 'second_dense_layer_generator', kernel_regularizer = 'l2')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = BatchNormalization(momentum=0.8)(model)
    model = Dense(x_shape[-1], activation='tanh', name = 'third_dense_layer_generator', kernel_regularizer = 'l2')(model)
    model = (Reshape(x_shape))(model)

    return Model(z, model)

def build_discriminator(self, x_shape, latent_code_length):

    z = Input(latent_code_length)
    x = Input(x_shape)
    _z = Flatten()(z)
    _z = Dense(x_shape[0]*x_shape[1])(_z)
    _z = Reshape(x_shape)(_z)
    
    model = Concatenate()([x,_z])
    model = LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='encoder_1')(x)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dropout(0.5)(model)
    model = LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='encoder_2')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dropout(0.5)(model)
    model = LSTM(16, kernel_initializer='he_uniform', return_sequences=False, name='encoder_3')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dropout(0.5)(model)
    model = RepeatVector(window_size, name='encoder_decoder_bridge')(model)
    model = LSTM(16, kernel_initializer='he_uniform', return_sequences=True, name='decoder_1')(model)
    model = LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='decoder_2')(model)
    model = LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='decoder_3')(model)
    validity = Dense(1, activation="sigmoid")(model)
    
    return Model([x, z], validity)

Defining train functions:

def build_train_step(self, generator, encoder, discriminator):
    g_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
    e_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
    d_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
    
    @tf.function
    def train_step(x, z):
        tf.keras.backend.learning_phase()
        with tf.GradientTape(persistent = True) as tape:
          
          x_ = generator(z)
          z_  = encoder(x)

          d_inputs = [tf.concat([x_, x], axis=0),
                      tf.concat([z, z_], axis=0)]
          d_preds = discriminator(d_inputs)
          pred_g, pred_e = tf.split(d_preds,num_or_size_splits=2, axis=0)

          d_loss = tf.reduce_mean(tf.nn.softplus(pred_g)) + \
                   tf.reduce_mean(tf.nn.softplus(-pred_e))
          g_loss = tf.reduce_mean(tf.nn.softplus(-pred_g))
          e_loss = tf.reduce_mean(tf.nn.softplus(pred_e))

          d_gradients = tape.gradient(d_loss, discriminator.trainable_variables)
          g_gradients = tape.gradient(g_loss, generator.trainable_variables)
          e_gradients = tape.gradient(e_loss, encoder.trainable_variables)

          d_optimizer.apply_gradients(zip(d_gradients, discriminator.trainable_variables))
          g_optimizer.apply_gradients(zip(g_gradients, generator.trainable_variables))
          e_optimizer.apply_gradients(zip(e_gradients, encoder.trainable_variables))

        return d_loss, g_loss, e_loss

    return train_step

def train(self):
    check_point = 1000
    iters = 200 * check_point
    x_shape = (1,15)
    latent_code_length = (1,20)
    batch_size = 64

    feat, x_train, _, x_test, _ = generate_datasets_for_training(pdf_train.drop(columns=['timestamp', 'shuttle_id']), window_size)

    num_of_data = x_train.shape[0]
    x_train = np.reshape(x_train, (-1, )+x_shape)
    x_train = (x_train.astype("float32"))

    z_train = np.random.uniform(-1.0, 1.0, (num_of_data, )+latent_code_length).astype("float32")
    z_test = np.random.uniform(-1.0, 1.0, (100, )+latent_code_length).astype("float32")
    
    generator = self.build_generator(x_shape, latent_code_length)
    encoder = self.build_encoder(x_shape, latent_code_length)

    discriminator = self.build_discriminator(x_shape, latent_code_length)
    train_step = self.build_train_step(generator, encoder, discriminator)

    for i in range(iters):
        X_batched = x_train[self.generate_indices_sequence_batches(x_train, batch_size)]
        x = X_batched[np.random.randint(0, X_batched.shape[0])][:][:][:]
        print(x.shape)
        z = z_train[np.random.permutation(num_of_data)[:batch_size]]
        print(z.shape)
        d_loss, g_loss, e_loss = train_step(x,z)
        print("\r[{}/{}]  d_loss: {:.4}, g_loss: {:.4}, e_loss: {:.4}".format(i,iters, d_loss, g_loss, e_loss),end="")

I’ve tried to use tf.gradients but also with no luck. I hope someone can help me or point out what I’m missing. If more information is needed I’m happy to provide it.

Calling the function in a class with:

if __name__ == '__main__':
  
    bigan = BIGAN()
    bigan.train()

Topic		Replies	Views
ValueError: No gradients provided for any variable in gan network General Discussion	2	530	October 14, 2023
GradientTape on eager mode General Discussion models , keras , help_request	1	723	July 6, 2021
ValueError: No gradients provided for any variable: (['embedding_1/embeddings:0', 'image_layer_1/kernel:0', 'image_layer_1/bias:0', General Discussion models , keras , custom-loss	1	985	December 6, 2024
Gradients being computed incorrectly with custom loss function General Discussion models , help_request	1	903	November 28, 2024
Zero gradients problem for gradCAM in Siamese Network General Discussion models , keras , help_request	0	610	September 4, 2021

Gradients for Encoder in my BiGAN network are returning None values. Can someone with experience in this field identify the issue?

Related topics