Tensorflow keras InvalidArgumentError

I tried to develop a simple chatbot with seq2seq architecture and it gave the following error that I could not solve.
Code :

import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding,LSTM,GRU
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

CuDNNGRU = GRU
mark_start = "ssss "
mark_end = " eeee"
sorular=[]
cevaplar=[]

#TXT dataset
for line in open("dialogs.txt", encoding="UTF-8"):
    soru, cevap = line.rstrip().split("\t")
    cevap = mark_start + cevap + mark_end
    sorular.append(soru)
    cevaplar.append(cevap)
soru=''
cevap=''

ques=sorular
ans=cevaplar

class TokenizerWrap(Tokenizer):
    def __init__(self,texts, padding, reverse=False,num_words=None):
        Tokenizer.__init__(self, num_words=num_words)
        self.fit_on_texts(texts)
        self.index_to_word=dict(zip(self.word_index.values(), self.word_index.keys()))
        self.tokens=self.texts_to_sequences(texts)
        if reverse:
            self.tokens = [list(reversed(x)) for x in self.tokens]
            truncating="pre"
        else:
            truncating="post"

        self.num_tokens=[len(x) for x in self.tokens]
        self.max_tokens=int(np.mean(self.num_tokens)+2*np.std(self.num_tokens))
        self.tokens_padded=pad_sequences(self.tokens,
                                  maxlen=self.max_tokens,
                                  padding=padding,
                                  truncating=truncating)
    def token_to_word(self,token):
        word = ' ' if token==0 else self.index_to_word[token]
        return word
    def tokens_to_string(self,tokens):
        words = [self.index_to_word[token] for token in tokens if token!=0]
        text = ' '.join(words)
        return text
    def text_to_tokens(self,text,padding,reverse=False):
        tokens = self.texts_to_sequences([text])
        tokens = np.array(tokens)
        if reverse:
            tokens = np.flip(tokens,axis=1)
            truncating='pre'
        else:
            truncating='post'
        tokens=pad_sequences(tokens,maxlen=self.max_tokens,padding=padding,truncating=truncating)
        return tokens

tokenizer_ques = TokenizerWrap(ques,padding='pre',reverse=True)

tokenizer_ans = TokenizerWrap(ans,padding='post',reverse=False)

tokens_ques=tokenizer_ques.tokens_padded
tokens_ans=tokenizer_ans.tokens_padded

token_start = tokenizer_ans.word_index[mark_start.strip()]
token_start
token_end = tokenizer_ans.word_index[mark_end.strip()]
token_end

encoder_input_data = tokens_ques

decoder_input_data=tokens_ans[:, :-1]
decoder_output_data=tokens_ans[:, 1:]

num_encoder_words = len(tokenizer_ques.word_index)
num_decoder_words = len(tokenizer_ans.word_index)

embedding_size = 50

word2vec = {}
with open('glove.6B.50d.txt', encoding='UTF-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vec = np.asarray(values[1:], dtype='float32')
        word2vec[word] = vec

embedding_matrix=np.random.uniform(-1,1,(num_encoder_words, embedding_size))
for word, i in tokenizer_ques.word_index.items():
    if i < num_encoder_words:
        embedding_vector = word2vec.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
embedding_matrix.shape

encoder_input = Input(shape=(None,),name='encoder_input')

encoder_embedding = Embedding(input_dim=num_encoder_words,
                              output_dim=embedding_size,
                              weights=[embedding_matrix],
                              trainable=True,
                              name='encoder_embedding')

state_size = 256

encoder_gru1=CuDNNGRU(state_size,name="encoder_gru1",return_sequences=True)
encoder_gru2=CuDNNGRU(state_size,name="encoder_gru2",return_sequences=True)
encoder_gru3=CuDNNGRU(state_size,name="encoder_gru3",return_sequences=False)

def connect_encoder():
    net = encoder_input
    net = encoder_embedding(net)
    net = encoder_gru1(net)
    net = encoder_gru2(net)
    net = encoder_gru3(net)
    encoder_output = net
    return encoder_output
encoder_output = connect_encoder()

decoder_initial_state = Input(shape=(state_size,),name='decoder_initial_state')

decoder_input = Input(shape=(None,),name='decoder_input')

decoder_embedding = Embedding(input_dim=num_decoder_words,output_dim=embedding_size,name='decoder_embedding')

decoder_gru1=CuDNNGRU(state_size,name="decoder_gru1",return_sequences=True)
decoder_gru2=CuDNNGRU(state_size,name="decoder_gru2",return_sequences=True)
decoder_gru3=CuDNNGRU(state_size,name="decoder_gru3",return_sequences=True)

decoder_dense = Dense(num_decoder_words,activation='linear',name='decoder_output')

def connect_decoder(initial_state):
    net = decoder_input
    net = decoder_embedding(net)
    net = decoder_gru1(net,initial_state=initial_state)
    net = decoder_gru2(net,initial_state=initial_state)
    net = decoder_gru3(net,initial_state=initial_state)
    decoder_output = decoder_dense(net)
    return decoder_output
decoder_output = connect_decoder(initial_state=encoder_output)

model_train = Model(inputs=[encoder_input,decoder_input],outputs=[decoder_output])

model_encoder = Model(inputs=[encoder_input],outputs=[encoder_output])

decoder_output = connect_decoder(initial_state=decoder_initial_state)
model_decoder = Model(inputs=[decoder_input, decoder_initial_state], outputs=[decoder_output])

def sparse_cross_entropy(y_true,y_pred):
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels = y_true,
        logits = y_pred)
    loss_mean = tf.reduce_mean(loss)
    return loss_mean

optimizer = RMSprop(learning_rate=1e-4)

tf.compat.v1.disable_eager_execution()
decoder_target = tf.compat.v1.placeholder(dtype="int32", shape=(None, None))

model_train.compile(optimizer="rmsprop",loss=sparse_cross_entropy,run_eagerly=False,target_tensors=[decoder_target])

x_data = {'encoder_input':encoder_input_data,
          'decoder_input':decoder_input_data}
y_data={'decoder_output':decoder_output_data}

model_train.fit(x_data,y_data,batch_size=256,epochs=53)

def chat(input_text):
    input_tokens = tokenizer_ques.text_to_tokens(input_text,
                                                reverse=True,
                                                padding='pre')
    initial_state = model_encoder.predict(input_tokens)
    max_tokens = tokenizer_ans.max_tokens
    decoder_input_data = np.zeros(shape=(1,max_tokens),dtype=np.int_)
    token_int = token_start
    output_text = ""
    count_tokens = 0
    while token_int != token_end and count_tokens < max_tokens+1:
        decoder_input_data[0,count_tokens] = token_int
        x_data = {'decoder_initial_state':initial_state,'decoder_input':decoder_input_data}
        decoder_output = model_decoder.predict(x_data)
        token_onehot = decoder_output[0, count_tokens,:]
        token_int = np.argmax(token_onehot)
        sampled_word = tokenizer_ans.token_to_word(token_int)
        output_text += ' ' + sampled_word
        count_tokens+=1
    print("Input metni:")
    print('-',input_text.capitalize())
    print('-'*44)
    print("Model çevirisi:")
    print('-'+output_text.replace(" eeee","").capitalize())
    print('-'*44)

chat('hello how are you')

Error :


Loss : While I was solving it after 2 months of dealing with the nan problem, now this error is driving me crazy.
Thank you from now.:blush:

Hi @William_Afton, The InvalidArgumentError is due to the while creating a embeddings for index values, the index value is not present in the model vocabulary. while defining the vocab size make sure that the vocab size is maximum integer index + 1. Thank You.