Hi all, i’m beginner in deep learning.
I need your help for following codes
(i’m using tensorflow 2.2.0 and keras version is 1.1.2)
below code is original code that i found in youtube
class DuelingDeepQNetwork(keras.Model):
def __init__(self, n_actions, fc1_dims, fc2_dims, fc3_dims):
super(DuelingDeepQNetwork, self).__init__()
self.conv1 = keras.layers.Conv2D(filters=32, kernel_size=(8,8), strides=(4,4) ,activation='relu', input_shape=(84,84,1), kernel_initializer=tf.initializers.GlorotNormal())
self.maxpool1 = keras.layers.MaxPooling2D((2, 2))
self.conv2 = keras.layers.Conv2D(filters=64, kernel_size=(4,4), strides=(2,2) ,activation='relu', kernel_initializer=tf.initializers.GlorotNormal())
self.conv3 = keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=(1,1) ,activation='relu', kernel_initializer=tf.initializers.GlorotNormal())
self.flat = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(fc1_dims, activation='relu', kernel_initializer=tf.initializers.GlorotNormal())
self.dense2 = keras.layers.Dense(fc2_dims, activation='relu', kernel_initializer=tf.initializers.GlorotNormal())
self.dense3 = keras.layers.Dense(fc3_dims, activation='relu', kernel_initializer=tf.initializers.GlorotNormal())
self.V = keras.layers.Dense(1, activation=None)
self.A = keras.layers.Dense(n_actions, activation=None)
def call(self, state):
x = self.conv1(state)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.flat(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
V = self.V(x)
A = self.A(x)
Q = (V + (A - tf.math.reduce_mean(A, axis=1, keepdims=True)))
return Q
def advantage(self, state):
x = self.conv1(state)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.flat(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
A = self.A(x)
return A
and i fixed the original code like following code
class DuelingDeepQNetwork(keras.Model):
def __init__(self, n_actions, fc1_dims, fc2_dims, fc3_dims):
super(DuelingDeepQNetwork, self).__init__()
self.model = keras.Sequential()
self.model.add(layers.Conv2D(filters=32, kernel_size=(8,8), strides=(4,4) ,activation='relu', input_shape=(84,84,1), kernel_initializer=tf.initializers.GlorotNormal()))
self.model.add(layers.MaxPooling2D((2, 2)))
self.model.add(layers.Conv2D(filters=64, kernel_size=(4,4), strides=(2,2) ,activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
self.model.add(layers.Conv2D(filters=64, kernel_size=(3,3), strides=(1,1) ,activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
self.model.add(layers.Flatten())
self.model.add(layers.Dense(fc1_dims, activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
self.model.add(layers.Dense(fc2_dims, activation='relu', kernel_initializer=tf.initializers.GlorotNormal()))
self.model.add(layers.Dense(fc3_dims, kernel_initializer=tf.initializers.GlorotNormal()))
self.V = keras.layers.Dense(1, activation=None)
self.A = keras.layers.Dense(n_actions, activation=None)
def call(self, state):
x = self.model(state)
V = self.V(x)
A = self.A(x)
Q = (V + (A - tf.math.reduce_mean(A, axis=1, keepdims=True)))
return Q
def advantage(self, state):
x = self.model(state)
A = self.A(x)
return A
when i run ‘train_on_batch’ or ‘fit’ like below,
(...)
self.q_eval.compile(optimizer=Adam(learning_rate=lr),
loss='mean_squared_error')
(...)
self.q_eval.fit(states, q_target)
the error occurred
NotImplementedError: Cannot convert a symbolic Tensor (IteratorGetNext:0) to a numpy array.
so, i fixed the code. (and it didn’t occurred any error)
the question is that
when i run ‘fit’ or ‘train_on_batch’ in my fixed code like below,
(...)
self.q_eval.compile(optimizer=Adam(learning_rate=lr),
loss='mean_squared_error')
(...)
self.q_eval.fit(states, q_target)
should i add this self.q_eval.model.fit(states, q_target)
code ?
i don’t know sequential model’s weights are updated in above. even i didn’t compile the sequential model like self.q_eval.model.compile(...)