I am trying to follow tensorflow documentation and applying same technique to one of toy dataset.
During training I am getting all loss as Nan. I have tried to debug the same using Debugger V2 and I could see that tf.keras.layers.GlobalAveragePooling1D
is giving Nan due to division by 0, which is causing all values to be Nan during backpropagation. But what is not clear from the debugger V2 GUI why the sum is becoming 0. I did try to reduce the number of features and the size of the dataset, but each of this activity is giving me new error (probably I shall start a separate question thread for each issues at a later point ).
Below is the code for reference. I am providing the dataset as well here. I had tried below code on Google Colab.
import os
import pprint
import tempfile
from typing import Dict, Text
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
tf.debugging.experimental.enable_dump_debug_info(
"./tfdbg2_logdir",
tensor_debug_mode="FULL_HEALTH",
circular_buffer_size=-1)
!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs
Preparing Data
ds=pd.read_csv('train_recom.csv')
ds['year'].replace(0,1,inplace=True)
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})
ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')
Reading data to tensorflow dataset
ratings = tf.data.experimental.make_csv_dataset(
"./train_recom_transformed.csv",
batch_size=5,
select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
'year'],
header=True,
num_epochs=1,
ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
"./songs_details.csv",
batch_size=128,
select_columns=['song_id','title','release','artist_name','year'],
num_epochs=1,
ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
"song_id": x["song_id"],
"user_id": x["user_id"],
"release" : x["release"],
"artist_name" : x["artist_name"],
"title" : x["title"],
"year" : x["year"],
"listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: x["song_id"])
Preparing train and test dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)
train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()
title = songs.batch(1000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=np.concatenate(list(ratings.map(lambda x: x['year']).batch(4000)))
User model class
class UserModel(tf.keras.Model):
def __init__(self):
super().__init__()
max_tokens = 1_000_000
embedding_dimension = 32
self.user_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids, mask_token=None),
tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])
self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=max_tokens)
self.release_text_embedding = tf.keras.Sequential([
self.release_vectorizer,
tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
tf.keras.layers.GlobalAveragePooling1D(),
])
self.release_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['release']).batch(4000))))
self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=max_tokens)
self.artist_text_embedding = tf.keras.Sequential([
self.artist_vectorizer,
tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
tf.keras.layers.GlobalAveragePooling1D(),
])
self.artist_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['artist_name']).batch(4000))))
self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=max_tokens)
self.title_text_embedding = tf.keras.Sequential([
self.title_vectorizer,
tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
tf.keras.layers.GlobalAveragePooling1D(),
])
self.title_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['title']).batch(4000))))
self.year_embedding = tf.keras.Sequential([
tf.keras.layers.Embedding(len(year_data) + 1, 32),
])
def call(self, inputs):
return tf.concat([
self.user_embedding(inputs['user_id']),
self.release_text_embedding(inputs['release'])
,
self.year_embedding(inputs['year']),
self.artist_text_embedding(inputs['artist_name']),
self.title_text_embedding(inputs['title']),
], axis=1)
Item model
class ItemModel(tf.keras.Model):
def __init__(self):
super().__init__()
max_tokens = 10_000
embedding_dimension = 32
## embed title from unique_song_titles
self.title_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_song_titles, mask_token=None),
tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
])
def call(self, inputs):
return self.title_embedding(inputs)
Query model . Creating Deep model
class QueryModel(tf.keras.Model):
"""Model for encoding user queries."""
def __init__(self, layer_sizes):
"""Model for encoding user queries.
Args:
layer_sizes:
A list of integers where the i-th entry represents the number of units
the i-th layer contains.
"""
super().__init__()
# We first use the user model for generating embeddings.
self.embedding_model = UserModel()
# Then construct the layers.
self.dense_layers = tf.keras.Sequential()
# Use the ReLU activation for all but the last layer.
for layer_size in layer_sizes[:-1]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
# No activation for the last layer.
for layer_size in layer_sizes[-1:]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size))
def call(self, inputs):
feature_embedding = self.embedding_model(inputs)
return self.dense_layers(feature_embedding)
Creating deep model for the Item model
class CandidateModel(tf.keras.Model):
"""Model for encoding movies."""
def __init__(self, layer_sizes):
"""Model for encoding movies.
Args:
layer_sizes:
A list of integers where the i-th entry represents the number of units
the i-th layer contains.
"""
super().__init__()
self.embedding_model = ItemModel()
# Then construct the layers.
self.dense_layers = tf.keras.Sequential()
# Use the ReLU activation for all but the last layer.
for layer_size in layer_sizes[:-1]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
# No activation for the last layer.
for layer_size in layer_sizes[-1:]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size))
def call(self, inputs):
feature_embedding = self.embedding_model(inputs)
return self.dense_layers(feature_embedding)
Combining both query and candidate model
class SongModel(tfrs.models.Model):
def __init__(self, layer_sizes):
super().__init__()
self.query_model = QueryModel(layer_sizes)
self.candidate_model = CandidateModel(layer_sizes)
self.task = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=songs.batch(128).map(self.candidate_model),
),
)
def compute_loss(self, features, training=False):
print('type of feature ----',type(features))
query_embeddings = self.query_model({
"user_id": features["user_id"]
,
"release" : features["release"]
,
"artist_name" : features["artist_name"],
"title": features["title"],
"year" : features["year"],
})
item_embeddings = self.candidate_model(features["song_id"])
return self.task(query_embeddings, item_embeddings)
training the model
model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)
Below id the output that I got
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-26-fdc864fc30cf>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-26-fdc864fc30cf>
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-25-e3009db55439>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-25-e3009db55439>
Epoch 1/9
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
10/10 [==============================] - 63s 1s/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0022 - factorized_top_k/top_10_categorical_accuracy: 0.0033 - factorized_top_k/top_50_categorical_accuracy: 0.0073 - factorized_top_k/top_100_categorical_accuracy: 0.0103 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 2/9
10/10 [==============================] - 9s 945ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 3/9
10/10 [==============================] - 10s 953ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 4/9
10/10 [==============================] - 9s 948ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 5/9
10/10 [==============================] - 10s 966ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 6/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 7/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 8/9
10/10 [==============================] - 10s 958ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 9/9
10/10 [==============================] - 10s 971ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan