Hi everybody,
at the moment I am learning Reinforcement Learning with tf-agents.
I implemented a custom env with three ations and state which consists of three states:
class CardGameEnv(py_environment.PyEnvironment):
def __init__(self):
self._action_spec = array_spec.BoundedArraySpec(
shape=(3,), dtype=np.int32, minimum=[0,0,0], maximum=[1,1,1], name='action')
self._observation_spec = array_spec.BoundedArraySpec(
shape=(1,3), dtype=np.float32, minimum=0, name='observation')
self._state = [0, 0, 0]
self._episode_ended = False
def action_spec(self):
return self._action_spec
def observation_spec(self):
return self._observation_spec
def _reset(self):
self._state = [0, 0, 0]
self._episode_ended = False
return ts.restart(np.array([self._state], dtype=np.float32))
def _step(self, action):
if self._episode_ended:
# The last action ended the episode. Ignore the current action and start
# a new episode.
return self.reset()
# check action
print('Action: ', action)
print('Action data type: ', type(action))
# calc state
print('State before: ', self._state)
self._state[0] = self._state[0] + action[0]
self._state[1] = self._state[1] + action[1]
self._state[2] = self._state[2] + action[2]
print('State after: ', self._state)
if self._state[3] >= 5.0:
self._episode_ended = True
reward = self._state[0] - 21
return ts.termination(np.array([self._state], dtype=np.float32), reward)
else:
return ts.transition(
np.array([self._state], dtype=np.float32), reward=0.0, discount=1.0)
Now I want to coulpe the env with a DQN agent.
But all tutorials that I found work with envs with only one action.
For example: Train a Deep Q Network with TF-Agents | TensorFlow Agents
Is there a tutorial describing a more advance example?
Thanks in advance!