I’m implementing a BiGAN network for multivariate time-series data and I’m running into problems with training my Encoder. When calling tape.gradient(e_loss, encoder.trainable_variables)
I’m receiving only None
values. Calling encoder.trainable_variables
does yield the correct variables. I’ve used multiple implementations but seem to be running into the same problem. Also I’ve gone through multiple stackoverflow issues that address this issue however those solutions don’t apply to my use case. This is the most clear example I have:
Building the networks:
def build_encoder(self, x_shape, latent_code_length):
x = Input(x_shape)
model = Dense(128, name='encoder_conv_1')(x)
model = LeakyReLU(alpha=0.2)(model)
model = Dense(128, name='encoder_conv_2')(model)
model = LeakyReLU(alpha=0.2)(model)
model = Dense(256, name='encoder_conv_3')(model)
model = LeakyReLU(alpha=0.2)(model)
model = Dense(256, name='encoder_conv_4')(model)
model = LeakyReLU(alpha=0.2)(model)
model = Dense(512, name='encoder_conv_5')(model)
model = LeakyReLU(alpha=0.2)(model)
model = Dense(latent_code_length[-1], name='encoder_conv_6')(model)
return Model(x, model)
def build_generator(self, x_shape, latent_code_length):
model = Sequential()
z = Input(latent_code_length)
model = Dense(512, name = 'first_dense_layer_generator', kernel_regularizer = 'l2')(z)
model = LeakyReLU(alpha=0.2)(model)
model = BatchNormalization(momentum=0.8)(model)
model = Dense(512, name = 'second_dense_layer_generator', kernel_regularizer = 'l2')(model)
model = LeakyReLU(alpha=0.2)(model)
model = BatchNormalization(momentum=0.8)(model)
model = Dense(x_shape[-1], activation='tanh', name = 'third_dense_layer_generator', kernel_regularizer = 'l2')(model)
model = (Reshape(x_shape))(model)
return Model(z, model)
def build_discriminator(self, x_shape, latent_code_length):
z = Input(latent_code_length)
x = Input(x_shape)
_z = Flatten()(z)
_z = Dense(x_shape[0]*x_shape[1])(_z)
_z = Reshape(x_shape)(_z)
model = Concatenate()([x,_z])
model = LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='encoder_1')(x)
model = LeakyReLU(alpha=0.2)(model)
model = Dropout(0.5)(model)
model = LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='encoder_2')(model)
model = LeakyReLU(alpha=0.2)(model)
model = Dropout(0.5)(model)
model = LSTM(16, kernel_initializer='he_uniform', return_sequences=False, name='encoder_3')(model)
model = LeakyReLU(alpha=0.2)(model)
model = Dropout(0.5)(model)
model = RepeatVector(window_size, name='encoder_decoder_bridge')(model)
model = LSTM(16, kernel_initializer='he_uniform', return_sequences=True, name='decoder_1')(model)
model = LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='decoder_2')(model)
model = LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='decoder_3')(model)
validity = Dense(1, activation="sigmoid")(model)
return Model([x, z], validity)
Defining train functions:
def build_train_step(self, generator, encoder, discriminator):
g_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
e_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
d_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
@tf.function
def train_step(x, z):
tf.keras.backend.learning_phase()
with tf.GradientTape(persistent = True) as tape:
x_ = generator(z)
z_ = encoder(x)
d_inputs = [tf.concat([x_, x], axis=0),
tf.concat([z, z_], axis=0)]
d_preds = discriminator(d_inputs)
pred_g, pred_e = tf.split(d_preds,num_or_size_splits=2, axis=0)
d_loss = tf.reduce_mean(tf.nn.softplus(pred_g)) + \
tf.reduce_mean(tf.nn.softplus(-pred_e))
g_loss = tf.reduce_mean(tf.nn.softplus(-pred_g))
e_loss = tf.reduce_mean(tf.nn.softplus(pred_e))
d_gradients = tape.gradient(d_loss, discriminator.trainable_variables)
g_gradients = tape.gradient(g_loss, generator.trainable_variables)
e_gradients = tape.gradient(e_loss, encoder.trainable_variables)
d_optimizer.apply_gradients(zip(d_gradients, discriminator.trainable_variables))
g_optimizer.apply_gradients(zip(g_gradients, generator.trainable_variables))
e_optimizer.apply_gradients(zip(e_gradients, encoder.trainable_variables))
return d_loss, g_loss, e_loss
return train_step
def train(self):
check_point = 1000
iters = 200 * check_point
x_shape = (1,15)
latent_code_length = (1,20)
batch_size = 64
feat, x_train, _, x_test, _ = generate_datasets_for_training(pdf_train.drop(columns=['timestamp', 'shuttle_id']), window_size)
num_of_data = x_train.shape[0]
x_train = np.reshape(x_train, (-1, )+x_shape)
x_train = (x_train.astype("float32"))
z_train = np.random.uniform(-1.0, 1.0, (num_of_data, )+latent_code_length).astype("float32")
z_test = np.random.uniform(-1.0, 1.0, (100, )+latent_code_length).astype("float32")
generator = self.build_generator(x_shape, latent_code_length)
encoder = self.build_encoder(x_shape, latent_code_length)
discriminator = self.build_discriminator(x_shape, latent_code_length)
train_step = self.build_train_step(generator, encoder, discriminator)
for i in range(iters):
X_batched = x_train[self.generate_indices_sequence_batches(x_train, batch_size)]
x = X_batched[np.random.randint(0, X_batched.shape[0])][:][:][:]
print(x.shape)
z = z_train[np.random.permutation(num_of_data)[:batch_size]]
print(z.shape)
d_loss, g_loss, e_loss = train_step(x,z)
print("\r[{}/{}] d_loss: {:.4}, g_loss: {:.4}, e_loss: {:.4}".format(i,iters, d_loss, g_loss, e_loss),end="")
I’ve tried to use tf.gradients
but also with no luck. I hope someone can help me or point out what I’m missing. If more information is needed I’m happy to provide it.
Calling the function in a class with:
if __name__ == '__main__':
bigan = BIGAN()
bigan.train()