I am uncertain how this is getting bigger over time, can anyone with a good eye for keras point it out?
os.environ["KERAS_BACKEND"] = "jax"
import numpy as np
from tensorflow.keras.layers import Input, Flatten, Dense, Reshape
from tensorflow.keras import Model
import tensorflow as tf
def create_critic(n, m, d, population_dummy, scores_dummy, output_dummy):
# Create an additional input for the scores and actor's output
score_input = tf.keras.layers.Input(shape=(n,))
actor_output_input = tf.keras.layers.Input(shape=output_dummy.shape[1:])
# Define the critic model
critic_input = tf.keras.layers.Input(shape=(n, m, d))
x1 = Flatten()(critic_input)
x2 = Dense(64, activation='relu')(score_input) # Dense layer for the scores
x3 = Flatten()(actor_output_input) # Flatten the actor's output
x = tf.keras.layers.Concatenate()([x1, x2, x3]) # Concatenate the flattened critic input, score input, and actor's output
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(1, activation='linear')(x) # Output layer with single linear unit
critic_model = tf.keras.models.Model([critic_input, score_input, actor_output_input], x)
critic_model.compile(optimizer='adam', loss='mean_squared_error') # Use MSE loss for value prediction
return critic_model
def create_actor(n, m, d, total_parents, population_dummy, scores_dummy):
# Create an additional input for the scores
score_input = tf.keras.layers.Input(shape=(n,))
# Define the actor model
actor_input = tf.keras.layers.Input(shape=(n, m, d))
x1 = Flatten()(actor_input)
x2 = Dense(64, activation='relu')(score_input) # Dense layer for the scores
x = tf.keras.layers.Concatenate()([x1, x2]) # Concatenate the flattened actor input and the score input
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(n, activation='relu')(x) # Output layer with n linear units
actor_model = tf.keras.models.Model([actor_input, score_input], x)
actor_model.compile(optimizer='adam', loss='mean_squared_error') # Use MSE loss for value prediction
return actor_model
import numpy as np
#dummy functions to generate fake data to develop the training pipeline
def pop_gen(b, n, m, d):
return np.random.randint(2, size=(b, n, m, d))
def reward_gen():
return np.random.rand
def scores_gen(n):
return np.random.rand(1, n)
n = int(nInd[0])
m = int(nMarkers)
d = 2
total_parents = n*2
population_dummy = pop_gen(1, n, m, d) # Extra dimension for batch size
scores_dummy = scores_gen(n) # Extra dimension for batch size
#init actor and critic models
actor_model = create_actor(n,m,d,total_parents, population_dummy, scores_dummy)
# Feed the dummy data through the network to get an example for creating critic model
actor_output = actor_model([population_dummy, scores_dummy])
critic_model = create_critic(n,m,d, population_dummy, scores_dummy,actor_output)
print_memory_usage()
for _ in range(1000):
actor_output = actor_model([population_dummy, scores_dummy])
#HOW TO TAKE ACTION
policy = actor_output # find the policy
parent_values, parent_indices = select_parents(policy) #select parents
selected_parents = population_dummy[0][parent_indices.numpy()] #grab parents from our current population
#HOW TO CALCULATE REWARD
past_fitness = scores_gen(n)
new_fitness = scores_gen(n)
reward = new_fitness-past_fitness
# update the actor
with tf.GradientTape() as tape:
new_policy = actor_model([population_dummy, scores_dummy], training=True) # compute new policy with actor
actor_loss = -tf.reduce_mean(critic_model([population_dummy, scores_dummy, new_policy])) # compute actor loss
# Get the gradients
actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables)
# Update the weights
actor_model.optimizer.apply_gradients(zip(actor_grad, actor_model.trainable_variables))
# update the critic
with tf.GradientTape() as tape:
critic_value = critic_model([population_dummy, scores_dummy, actor_output], training=True) # compute critic value
critic_loss = tf.keras.losses.MSE(reward, critic_value) # compute critic loss
# Get the gradients
critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables)
# Update the weights
critic_model.optimizer.apply_gradients(zip(critic_grad, critic_model.trainable_variables))
population_dummy = pop_gen(1, n, m, d)
scores_dummy = scores_gen(n)
tf.keras.backend.clear_session()
print_memory_usage()