I am trying to work on building an variational autoencoder
in Keras
, with an input shape of X= (1,50) and Y= (1,20).
I made 2 inputs, one for pritection of Y, and the second for reconstruction and I will use it to generate new simples. The reconstruction train very well however, the model can’t learn. The val_predictor_loss stack in 0.77 What is the problem ?
This is my code:
# %% [code]
from keras.layers import Lambda, Input, Dense, Reshape, RepeatVector, Dropout
from keras.models import Model
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K
from keras.constraints import unit_norm, max_norm
import tensorflow as tf
from scipy import stats
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import argparse
import os
from sklearn.manifold import MDS
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error, r2_score
from keras.layers import Input, Dense, Flatten, Lambda,Conv1D, BatchNormalization, MaxPooling1D, Activation
from keras.models import Model
import keras.backend as K
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
"""Reparameterization trick by sampling fr an isotropic unit Gaussian.
# Arguments:
args (tensor): mean and log of variance of Q(z|X)
# Returns:
z (tensor): sampled latent vector
"""
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean=0 and std=1.0
epsilon = K.random_normal(shape=(batch, dim))
thre = K.random_uniform(shape=(batch,1))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# Load my Data
training_feature = X
ground_truth_r = Y
np.random.seed(seed=0)
original_dim = 32
# Define VAE model components
input_shape_x = (32, )
input_shape_r = (16, )
intermediate_dim = 32
latent_dim = 32
# Encoder network
inputs_x = Input(shape=input_shape_x, name='encoder_input')
inputs_x_dropout = Dropout(0.25)(inputs_x)
inter_x1 = Dense(128, activation='tanh')(inputs_x_dropout)
inter_x2 = Dense(intermediate_dim, activation='tanh')(inter_x1)
z_mean = Dense(latent_dim, name='z_mean')(inter_x2)
z_log_var = Dense(latent_dim, name='z_log_var')(inter_x2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs_x, [z_mean, z_log_var, z], name='encoder')
# Decoder network for reconstruction
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
inter_y1 = Dense(intermediate_dim, activation='tanh')(latent_inputs)
inter_y2 = Dense(128, activation='tanh')(inter_y1)
outputs_reconstruction = Dense(original_dim)(inter_y2)
decoder = Model(latent_inputs, outputs_reconstruction, name='decoder')
# Separate network for prediction from latent space
outputs_prediction = Dense(Y.shape[1])(inter_y2) # Adjust Y.shape[1] as per your data
predictor = Model(latent_inputs, outputs_prediction, name='predictor')
# Instantiate VAE model with two outputs
outputs_vae = [decoder(encoder(inputs_x)[2]), predictor(encoder(inputs_x)[2])]
vae = Model(inputs_x, outputs_vae, name='vae_mlp')
vae.compile(optimizer='adam', loss=['mean_squared_error', 'mean_squared_error'])
# Train the model
history = vae.fit(X, [X, Y], epochs=200, batch_size=64, shuffle=True,validation_data=(XX,[XX, YY]))