I’m trying to apply the Tensorflow PPO agent to the card game environment (can be found in the “Environments” Tensoflow guide page.)
And I defined actor network, value network, and PPO agent as below.
def create_networks(tf_env):
actor_net = ActorDistributionRnnNetwork(
tf_env.observation_spec(),
tf_env.action_spec(),
input_fc_layer_params=None,
lstm_size=(128,128),
output_fc_layer_params=None,
activation_fn=None)
value_net = ValueRnnNetwork(
tf_env.observation_spec(),
input_fc_layer_params=None,
lstm_size=(128,128),
output_fc_layer_params=None,
activation_fn=None)
return actor_net, value_net
learning_rate = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
tf_env = tf_py_environment.TFPyEnvironment(CardGameEnv)
actor_net, value_net = create_networks(tf_env)
agent = ppo_agent.PPOAgent(
tf_env.time_step_spec(),
tf_env.action_spec(),
optimizer=optimizer,
actor_net=actor_net,
value_net=value_net,
num_epochs=10,
gradient_clipping=0.5,
entropy_regularization=1e-2,
importance_ratio_clipping=0.2,
use_gae=True,
use_td_lambda_return=True
)
But when I run it, I get an error code saying
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-12-14a7e79a4d9b> in <module>
15 importance_ratio_clipping=0.2,
16 use_gae=True,
---> 17 use_td_lambda_return=True
18 )
11 frames
/usr/local/lib/python3.7/dist-packages/tf_agents/keras_layers/dynamic_unroll_layer.py in _static_unroll_single_step(cell, inputs, reset_mask, state, zero_state, training)
375 zero_state)
376
--> 377 outputs, final_state = cell(inputs, state, training=training)
378 outputs = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1), outputs)
379
InvalidArgumentError: Exception encountered when calling layer "lstm_cell" (type LSTMCell).
cannot compute MatMul as input #1(zero-based) was expected to be a int32 tensor but is a float tensor [Op:MatMul]
Call arguments received by layer "lstm_cell" (type LSTMCell):
• inputs=tf.Tensor(shape=(1, 1), dtype=int32)
• states=ListWrapper(['tf.Tensor(shape=(1, 128), dtype=float32)', 'tf.Tensor(shape=(1, 128), dtype=float32)'])
• training=False
In call to configurable 'PPOAgent' (<class 'tf_agents.agents.ppo.ppo_agent.PPOAgent'>)
Is it because the input #1 (which I assume is states) is first defined as self._state = 0
in the class CardGameEnv
and the dtype needs to be explicitly defined as int32? But how can I do it if the initial state is just 0?