I am doing TensorFlow’s text generation tutorial and it says that a way to improve the model is to add another RNN layer.
The model in the tutorial is this:
class MyModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
return_sequences=True,
return_state=True)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
And I tried adding a layer doing this:
class MyModel(tf.keras.Model):
def init(self, vocab_size, embedding_dim, rnn_units):
super().init(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True)
self.gru2 = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x, states = self.gru2(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
The accuracy during training is above 90% but the text that it generates is nonsensical.
What am I doing wrong?
How should I add that new layer?
Edit:
This is an example of the text generated:
Y gué el chme th ¡G : i uit: R dud d RR dududut ded,d!D! ties, is: y ui: iu,: ¡RRAShad wy…Ze…Zlegh Fither k.#É…WIkk.DR… t: W: R: IXII?IllawfGh…ZEWThedWe td y: W,Y,!:Z
Edit 2:
this is the tutorial I am following:
my code is essentially the same except for the new GRU layer