TF-agents mismatched trajectory spec

Krzysztof_Nowak · March 22, 2022, 11:38am

Below is my simplified implementation of the tf_agents environment and my attempt to tie it to TFUniformReplayBuffer and DynamicEpisodeDriver. It is not going to do anything fancy and will be used only in my tests. The episode is supposed to run for 10 steps with gradually increasing value of observations (which is irrelevant) and generating actions using random policy.

class TestEnv(PyEnvironment):
    def __init__(self):
        n_actions = 2
        self._n_observations = 3
        self._action_spec = BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=n_actions - 1, name="act")
        self._observation_spec = ArraySpec(shape=(self._n_observations,), dtype=np.float64, name="obs")
        self._idx = 0

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        self._idx = 0
        observation = np.asarray([self._idx] * self._n_observations, dtype=np.float64)
        return time_step.restart(observation)

    def _step(self, action):
        self._idx += 1
        observation = np.asarray([self._idx] * self._n_observations, dtype=np.float64)
        if self._idx >= 10:
            return time_step.termination(observation, reward=0)
        return time_step.transition(observation, reward=1, discount=1)


class TestExperienceReply(object):
    def __init__(self, policy, environment):
        self._policy = policy
        self._environment = environment
        self._replay_buffer = self._make_replay_buffer(self._environment)
        observers = [self._replay_buffer.add_batch]
        self._driver = DynamicEpisodeDriver(self._environment, policy, observers, num_episodes=1)

    def _make_replay_buffer(self, tf_env):
        time_step_spec = tf_env.time_step_spec()
        action_spec = tf_env.action_spec()
        action_step_spec = policy_step.PolicyStep(action_spec, (), tensor_spec.TensorSpec((), tf.int32))
        trajectory_spec = trajectory.from_transition(time_step_spec, action_step_spec, time_step_spec)
        return TFUniformReplayBuffer(data_spec=trajectory_spec, batch_size=tf_env.batch_size)

    def collect(self):
        _, _ = self._driver.run()

env = TestEnv()
tf_env = tf_py_environment.TFPyEnvironment(env)
policy = RandomTFPolicy(time_step_spec=tf_env.time_step_spec(), action_spec=tf_env.action_spec())
experience_replay = TestExperienceReply(policy=policy, environment=tf_env)
experience_replay.collect()

When I try to run it, I get following error triggered within collect method:

E       ValueError: The two structures do not match:
E       (...)
E       Values:
E         Trajectory(
E       {'action': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([0], dtype=int32)>,
E        'discount': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>,
E        'next_step_type': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([1], dtype=int32)>,
E        'observation': <tf.Tensor: shape=(1, 3), dtype=float64, numpy=array([[0., 0., 0.]])>,
E        'policy_info': (),
E        'reward': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>,
E        'step_type': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([0], dtype=int32)>})
E       vs.
E         Trajectory(
E       {'action': BoundedTensorSpec(shape=(), dtype=tf.int32, name='act', minimum=array(0, dtype=int32), maximum=array(1, dtype=int32)),
E        'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),
E        'next_step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type'),
E        'observation': TensorSpec(shape=(3,), dtype=tf.float64, name='obs'),
E        'policy_info': TensorSpec(shape=(), dtype=tf.int32, name=None),
E        'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),
E        'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')}).

Looks like I am somehow missing an extra tensor dimension somewhere but I really can not figure out where and why. Does anyone see any problem with the code above?

Topic		Replies	Views
Time_step doesn't match 'time_step_spec' in a custom py_environment General Discussion datasets , timeseries , array	9	586	December 1, 2023
Problems with utils.validate_py_environment in Custom Environment General Discussion tf_agents , help_request	1	1018	September 16, 2022
Agent doing the same actions General Discussion tf_agents	0	507	January 19, 2023
CategoricalDQN_agent not working with a Mask? General Discussion help_request , tf-agent	0	241	May 29, 2023
Understanding observation spec Custom environment General Discussion tf_agents , help_request	2	406	January 17, 2023

TF-agents mismatched trajectory spec

Related topics