I am a novice, I am building a gan network, but I found that there is no gradient transfer in my network, the following is the reproducible code, I do not know where the problem, can help me?
# Model def generator(): model=tf.keras.Sequential() model.add(tf.keras.layers.InputLayer(input_shape=(opt.latent_dim,))) model.add(tf.keras.layers.Dense(1024, activation="relu")) model.add(tf.keras.layers.Dense(5024, activation="relu")) model.add(tf.keras.layers.Dense(10 * 10 * 128,activation="relu")) model.add(tf.keras.layers.Reshape([10, 10, 128])) model.add(tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="SAME",activation="relu")) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=4, padding="SAME",activation="relu")) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="SAME",activation="relu")) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding="SAME",activation="tanh")) return model Generator = generator() Generator.summary()
def discriminator(): input_values_dim = Input(shape=(opt.latent_dim,)) x1 = Dense(units=8, activation='relu')(input_values_dim) x1 = Dense(units=4, activation='relu')(x1) mlp = Model(input_values_dim, x1) #mlp.summary() filters = (16, 32, 64) input_images_dim = Input(shape=(opt.img_size*2,opt.img_size*2,opt.channels)) for (i, f) in enumerate(filters): if i == 0: x2 = input_images_dim x2 = Conv2D(f, (3, 3), padding="same")(x2) x2 = Activation("relu")(x2) x2 = BatchNormalization()(x2) x2 = MaxPooling2D(pool_size=(2, 2))(x2) # flatten the volume, then FC => RELU => BN => DROPOUT x2 = Flatten()(x2) x2 = Dense(16)(x2) x2 = Activation("relu")(x2) x2 = BatchNormalization()(x2) x2 = Dropout(0.5)(x2) x2 = Dense(4)(x2) x2 = Activation("relu")(x2) cnn = Model(input_images_dim, x2) #cnn.summary() combinedInput = concatenate([mlp.output, cnn.output]) x3 = Dense(4, activation="relu")(combinedInput) x3 = Dense(1, activation="linear")(x3) discriminator = Model(inputs=[mlp.input, cnn.input], outputs=x3) return discriminator Discriminator = discriminator() Discriminator.summary() Discriminator.trainable = False
My loss is as follows:
def discriminator_loss(real_output, fake_output): real_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(real_output), real_output) fake_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.zeros_like(fake_output), fake_output) total_loss = real_loss + fake_loss return total_loss def generator_loss(fake_output): gen_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(fake_output), fake_output) return gen_loss optimizer = tf.keras.optimizers.Adam(learning_rate=0.00002, beta_1=0.6, beta_2=0.9) Generator.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy']) Discriminator.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])
But I printed the following results and found that there are some problems with the gradient.
a = tf.random.normal(shape=(2,10)) fake_images = Generator(a, training=True) with tf.GradientTape() as gen_tape: gen_loss = generator_loss(fake_output) gradients_of_generator = gen_tape.gradient(gen_loss, Generator.trainable_variables) print(gradients_of_generator) b = tf.random.normal(shape=(2,320,320,3)) real_output = Discriminator([a,b], training=True) fake_output = Discriminator([a,fake_images], training=True) with tf.GradientTape() as disc_tape: disc_loss = discriminator_loss(real_output, fake_output) gradients_of_discriminator = disc_tape.gradient(disc_loss, Discriminator.trainable_variables) print(gradients_of_discriminator)
error:
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None] []