I tried to develop a simple chatbot with seq2seq architecture and it gave the following error that I could not solve.
Code :
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding,LSTM,GRU
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
CuDNNGRU = GRU
mark_start = "ssss "
mark_end = " eeee"
sorular=[]
cevaplar=[]
#TXT dataset
for line in open("dialogs.txt", encoding="UTF-8"):
soru, cevap = line.rstrip().split("\t")
cevap = mark_start + cevap + mark_end
sorular.append(soru)
cevaplar.append(cevap)
soru=''
cevap=''
ques=sorular
ans=cevaplar
class TokenizerWrap(Tokenizer):
def __init__(self,texts, padding, reverse=False,num_words=None):
Tokenizer.__init__(self, num_words=num_words)
self.fit_on_texts(texts)
self.index_to_word=dict(zip(self.word_index.values(), self.word_index.keys()))
self.tokens=self.texts_to_sequences(texts)
if reverse:
self.tokens = [list(reversed(x)) for x in self.tokens]
truncating="pre"
else:
truncating="post"
self.num_tokens=[len(x) for x in self.tokens]
self.max_tokens=int(np.mean(self.num_tokens)+2*np.std(self.num_tokens))
self.tokens_padded=pad_sequences(self.tokens,
maxlen=self.max_tokens,
padding=padding,
truncating=truncating)
def token_to_word(self,token):
word = ' ' if token==0 else self.index_to_word[token]
return word
def tokens_to_string(self,tokens):
words = [self.index_to_word[token] for token in tokens if token!=0]
text = ' '.join(words)
return text
def text_to_tokens(self,text,padding,reverse=False):
tokens = self.texts_to_sequences([text])
tokens = np.array(tokens)
if reverse:
tokens = np.flip(tokens,axis=1)
truncating='pre'
else:
truncating='post'
tokens=pad_sequences(tokens,maxlen=self.max_tokens,padding=padding,truncating=truncating)
return tokens
tokenizer_ques = TokenizerWrap(ques,padding='pre',reverse=True)
tokenizer_ans = TokenizerWrap(ans,padding='post',reverse=False)
tokens_ques=tokenizer_ques.tokens_padded
tokens_ans=tokenizer_ans.tokens_padded
token_start = tokenizer_ans.word_index[mark_start.strip()]
token_start
token_end = tokenizer_ans.word_index[mark_end.strip()]
token_end
encoder_input_data = tokens_ques
decoder_input_data=tokens_ans[:, :-1]
decoder_output_data=tokens_ans[:, 1:]
num_encoder_words = len(tokenizer_ques.word_index)
num_decoder_words = len(tokenizer_ans.word_index)
embedding_size = 50
word2vec = {}
with open('glove.6B.50d.txt', encoding='UTF-8') as f:
for line in f:
values = line.split()
word = values[0]
vec = np.asarray(values[1:], dtype='float32')
word2vec[word] = vec
embedding_matrix=np.random.uniform(-1,1,(num_encoder_words, embedding_size))
for word, i in tokenizer_ques.word_index.items():
if i < num_encoder_words:
embedding_vector = word2vec.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
embedding_matrix.shape
encoder_input = Input(shape=(None,),name='encoder_input')
encoder_embedding = Embedding(input_dim=num_encoder_words,
output_dim=embedding_size,
weights=[embedding_matrix],
trainable=True,
name='encoder_embedding')
state_size = 256
encoder_gru1=CuDNNGRU(state_size,name="encoder_gru1",return_sequences=True)
encoder_gru2=CuDNNGRU(state_size,name="encoder_gru2",return_sequences=True)
encoder_gru3=CuDNNGRU(state_size,name="encoder_gru3",return_sequences=False)
def connect_encoder():
net = encoder_input
net = encoder_embedding(net)
net = encoder_gru1(net)
net = encoder_gru2(net)
net = encoder_gru3(net)
encoder_output = net
return encoder_output
encoder_output = connect_encoder()
decoder_initial_state = Input(shape=(state_size,),name='decoder_initial_state')
decoder_input = Input(shape=(None,),name='decoder_input')
decoder_embedding = Embedding(input_dim=num_decoder_words,output_dim=embedding_size,name='decoder_embedding')
decoder_gru1=CuDNNGRU(state_size,name="decoder_gru1",return_sequences=True)
decoder_gru2=CuDNNGRU(state_size,name="decoder_gru2",return_sequences=True)
decoder_gru3=CuDNNGRU(state_size,name="decoder_gru3",return_sequences=True)
decoder_dense = Dense(num_decoder_words,activation='linear',name='decoder_output')
def connect_decoder(initial_state):
net = decoder_input
net = decoder_embedding(net)
net = decoder_gru1(net,initial_state=initial_state)
net = decoder_gru2(net,initial_state=initial_state)
net = decoder_gru3(net,initial_state=initial_state)
decoder_output = decoder_dense(net)
return decoder_output
decoder_output = connect_decoder(initial_state=encoder_output)
model_train = Model(inputs=[encoder_input,decoder_input],outputs=[decoder_output])
model_encoder = Model(inputs=[encoder_input],outputs=[encoder_output])
decoder_output = connect_decoder(initial_state=decoder_initial_state)
model_decoder = Model(inputs=[decoder_input, decoder_initial_state], outputs=[decoder_output])
def sparse_cross_entropy(y_true,y_pred):
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels = y_true,
logits = y_pred)
loss_mean = tf.reduce_mean(loss)
return loss_mean
optimizer = RMSprop(learning_rate=1e-4)
tf.compat.v1.disable_eager_execution()
decoder_target = tf.compat.v1.placeholder(dtype="int32", shape=(None, None))
model_train.compile(optimizer="rmsprop",loss=sparse_cross_entropy,run_eagerly=False,target_tensors=[decoder_target])
x_data = {'encoder_input':encoder_input_data,
'decoder_input':decoder_input_data}
y_data={'decoder_output':decoder_output_data}
model_train.fit(x_data,y_data,batch_size=256,epochs=53)
def chat(input_text):
input_tokens = tokenizer_ques.text_to_tokens(input_text,
reverse=True,
padding='pre')
initial_state = model_encoder.predict(input_tokens)
max_tokens = tokenizer_ans.max_tokens
decoder_input_data = np.zeros(shape=(1,max_tokens),dtype=np.int_)
token_int = token_start
output_text = ""
count_tokens = 0
while token_int != token_end and count_tokens < max_tokens+1:
decoder_input_data[0,count_tokens] = token_int
x_data = {'decoder_initial_state':initial_state,'decoder_input':decoder_input_data}
decoder_output = model_decoder.predict(x_data)
token_onehot = decoder_output[0, count_tokens,:]
token_int = np.argmax(token_onehot)
sampled_word = tokenizer_ans.token_to_word(token_int)
output_text += ' ' + sampled_word
count_tokens+=1
print("Input metni:")
print('-',input_text.capitalize())
print('-'*44)
print("Model çevirisi:")
print('-'+output_text.replace(" eeee","").capitalize())
print('-'*44)
chat('hello how are you')
Error :
Loss : While I was solving it after 2 months of dealing with the nan problem, now this error is driving me crazy.
Thank you from now.