INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]

I am having errors below running a tensorflow model for reinforcement learning

2023-06-13 19:48:02.939444: I tensorflow/core/common_runtime/] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [4096,10]
	 [[{{node Placeholder/_0}}]]
2023-06-13 19:48:03.009181: I tensorflow/core/common_runtime/] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [4096,3]
	 [[{{node Placeholder/_1}}]]
Below is my code

class DDQNAgent:
    def __init__(self, state_dim,

        self.state_dim = state_dim
        self.num_actions = num_actions
        self.experience = deque([], maxlen=replay_capacity)
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.architecture = architecture
        self.l2_reg = l2_reg

        self.online_network = self.build_model()
        self.target_network = self.build_model(trainable=False)

        self.epsilon = epsilon_start
        self.epsilon_decay_steps = epsilon_decay_steps
        self.epsilon_decay = (epsilon_start - epsilon_end) / epsilon_decay_steps
        self.epsilon_exponential_decay = epsilon_exponential_decay
        self.epsilon_history = []

        self.total_steps = self.train_steps = 0
        self.episodes = self.episode_length = self.train_episodes = 0
        self.steps_per_episode = []
        self.episode_reward = 0
        self.rewards_history = []

        self.batch_size = batch_size
        self.tau = tau
        self.losses = []
        self.idx = tf.range(batch_size)
        self.train = True

    def build_model(self, trainable=True):
        layers = []
        n = len(self.architecture)
        for i, units in enumerate(self.architecture, 1):
                                input_dim=self.state_dim if i == 1 else None,
        model = Sequential(layers)
        return model

    def update_target(self):

    def epsilon_greedy_policy(self, state):
        self.total_steps += 1
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.num_actions)
        q = self.online_network.predict(state)
        return np.argmax(q, axis=1).squeeze()

    def memorize_transition(self, s, a, r, s_prime, not_done):
        if not_done:
            self.episode_reward += r
            self.episode_length += 1
            if self.train:
                if self.episodes < self.epsilon_decay_steps:
                    self.epsilon -= self.epsilon_decay
                    self.epsilon *= self.epsilon_exponential_decay

            self.episodes += 1
            self.episode_reward, self.episode_length = 0, 0

        self.experience.append((s, a, r, s_prime, not_done))

    def experience_replay(self):
        if self.batch_size > len(self.experience):
        minibatch = map(np.array, zip(*sample(self.experience, self.batch_size)))
        states, actions, rewards, next_states, not_done = minibatch
        next_q_values = self.online_network.predict_on_batch(states)
        best_actions = tf.argmax(tf.convert_to_tensor(next_q_values), axis=1)

        next_q_values_target = self.target_network.predict_on_batch(states)
        target_q_values = tf.gather_nd(next_q_values_target,
                                       tf.stack((self.idx, tf.cast(best_actions, tf.int32)), axis=1))

        targets = rewards + not_done * self.gamma * target_q_values
        q_values = self.online_network.predict_on_batch(states)
        indices = tf.stack((self.idx, tf.cast(actions, tf.int32)), axis=1)
        q_values = tf.convert_to_tensor(q_values)
        q_values = tf.tensor_scatter_nd_update(q_values, indices, targets)

        q_values = self.online_network.predict_on_batch(states)
        q_values[self.idx, actions] = targets

        loss = self.online_network.train_on_batch(x=states, y=q_values)

        if self.total_steps % self.tau == 0:

I have tried downgrading the tf from the latest version to ft 2.11 and 2.12 but another error pops up like below

ile ~/anaconda3/envs/test-env/lib/python3.8/site-packages/tensorflow/python/feature_column/
     15 """FeatureColumns: tools for ingesting and representing features."""
    318     return Tester
--> 320 raise AttributeError("module {!r} has no attribute "
    321                      "{!r}".format(__name__, attr))

AttributeError: module 'numpy' has no attribute 'typeDict'

I am wondering what i am missing in the model above.


This log is safe to ignore as you can see the below message from log

Executor start aborting (this does not indicate an error and you can ignore this message)

Could you please include the following code at the top of your code using Tensorflow 2.12 and let us know?

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

Thank you!