I am having errors below running a tensorflow model for reinforcement learning
2023-06-13 19:48:02.939444: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:02.956705: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:02.975127: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:02.990477: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:03.009181: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [4096,3]
[[{{node Placeholder/_1}}]]
2023-06-13 19:48:03.070364: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:03.087099: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:03.104020: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:03.140367: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
2023-06-13 19:48:03.172794: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [4096,3]
[[{{node Placeholder/_1}}]]
2023-06-13 19:48:03.296127: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
[[{{node Placeholder/_0}}]]
Below is my code
class DDQNAgent:
def __init__(self, state_dim,
num_actions,
learning_rate,
gamma,
epsilon_start,
epsilon_end,
epsilon_decay_steps,
epsilon_exponential_decay,
replay_capacity,
architecture,
l2_reg,
tau,
batch_size):
self.state_dim = state_dim
self.num_actions = num_actions
self.experience = deque([], maxlen=replay_capacity)
self.learning_rate = learning_rate
self.gamma = gamma
self.architecture = architecture
self.l2_reg = l2_reg
self.online_network = self.build_model()
self.target_network = self.build_model(trainable=False)
self.update_target()
self.epsilon = epsilon_start
self.epsilon_decay_steps = epsilon_decay_steps
self.epsilon_decay = (epsilon_start - epsilon_end) / epsilon_decay_steps
self.epsilon_exponential_decay = epsilon_exponential_decay
self.epsilon_history = []
self.total_steps = self.train_steps = 0
self.episodes = self.episode_length = self.train_episodes = 0
self.steps_per_episode = []
self.episode_reward = 0
self.rewards_history = []
self.batch_size = batch_size
self.tau = tau
self.losses = []
self.idx = tf.range(batch_size)
self.train = True
def build_model(self, trainable=True):
layers = []
n = len(self.architecture)
for i, units in enumerate(self.architecture, 1):
layers.append(Dense(units=units,
input_dim=self.state_dim if i == 1 else None,
activation='relu',
kernel_regularizer=l2(self.l2_reg),
name=f'Dense_{i}',
trainable=trainable))
layers.append(Dropout(.1))
layers.append(Dense(units=self.num_actions,
trainable=trainable,
name='Output'))
model = Sequential(layers)
model.compile(loss='mean_squared_error',
optimizer=Adam(learning_rate=self.learning_rate))
return model
def update_target(self):
self.target_network.set_weights(self.online_network.get_weights())
def epsilon_greedy_policy(self, state):
self.total_steps += 1
if np.random.rand() <= self.epsilon:
return np.random.choice(self.num_actions)
q = self.online_network.predict(state)
return np.argmax(q, axis=1).squeeze()
def memorize_transition(self, s, a, r, s_prime, not_done):
if not_done:
self.episode_reward += r
self.episode_length += 1
else:
if self.train:
if self.episodes < self.epsilon_decay_steps:
self.epsilon -= self.epsilon_decay
else:
self.epsilon *= self.epsilon_exponential_decay
self.episodes += 1
self.rewards_history.append(self.episode_reward)
self.steps_per_episode.append(self.episode_length)
self.episode_reward, self.episode_length = 0, 0
self.experience.append((s, a, r, s_prime, not_done))
def experience_replay(self):
if self.batch_size > len(self.experience):
return
minibatch = map(np.array, zip(*sample(self.experience, self.batch_size)))
states, actions, rewards, next_states, not_done = minibatch
next_q_values = self.online_network.predict_on_batch(states)
best_actions = tf.argmax(tf.convert_to_tensor(next_q_values), axis=1)
next_q_values_target = self.target_network.predict_on_batch(states)
target_q_values = tf.gather_nd(next_q_values_target,
tf.stack((self.idx, tf.cast(best_actions, tf.int32)), axis=1))
targets = rewards + not_done * self.gamma * target_q_values
q_values = self.online_network.predict_on_batch(states)
indices = tf.stack((self.idx, tf.cast(actions, tf.int32)), axis=1)
q_values = tf.convert_to_tensor(q_values)
q_values = tf.tensor_scatter_nd_update(q_values, indices, targets)
q_values = self.online_network.predict_on_batch(states)
q_values[self.idx, actions] = targets
loss = self.online_network.train_on_batch(x=states, y=q_values)
self.losses.append(loss)
if self.total_steps % self.tau == 0:
self.update_target()
I have tried downgrading the tf from the latest version to ft 2.11 and 2.12 but another error pops up like below
ile ~/anaconda3/envs/test-env/lib/python3.8/site-packages/tensorflow/python/feature_column/feature_column_lib.py:18
15 """FeatureColumns: tools for ingesting and representing features."""
...
318 return Tester
--> 320 raise AttributeError("module {!r} has no attribute "
321 "{!r}".format(__name__, attr))
AttributeError: module 'numpy' has no attribute 'typeDict'
I am wondering what i am missing in the model above.