I am trying to implement deep ranking model on using listwise loss. The main reference document used is here
I have created the model successfully but while trying to make prediction on an sample data it is giving me error as IndexError: tuple index out of range
Below is the detailed code for creating the model
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q tensorflow-ranking
import pprint
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs
from typing import Dict, Text
import os
import tempfile
import datetime
%load_ext tensorboard
ratings = tfds.load("movielens/100k-ratings", split="train")
movies = tfds.load("movielens/100k-movies", split="train")
ratings = ratings.map(lambda x: {
"movie_title": x["movie_title"],
"user_id": x["user_id"],
"user_rating": x["user_rating"],
# "timestamp": x["timestamp"],
})
movies = movies.map(lambda x: x["movie_title"])
unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1000))))
unique_user_ids = np.unique(np.concatenate(list(ratings.batch(1_000).map(
lambda x: x["user_id"]))))
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)
train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)
train = tfrs.examples.movielens.sample_listwise(
train,
num_list_per_user=50,
num_examples_per_list=5,
seed=42
)
test = tfrs.examples.movielens.sample_listwise(
test,
num_list_per_user=1,
num_examples_per_list=5,
seed=42
)
class UserModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.user_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids, mask_token=None),
tf.keras.layers.Embedding(len(unique_user_ids) + 1, 32),
])
def call(self, inputs):
return self.user_embedding(inputs["user_id"])
class QueryModel(tf.keras.Model):
"""Model for encoding user queries."""
def __init__(self, layer_sizes):
"""Model for encoding user queries.
Args:
layer_sizes:
A list of integers where the i-th entry represents the number of units
the i-th layer contains.
"""
super().__init__()
# We first use the user model for generating embeddings.
self.embedding_model = UserModel()
# Then construct the layers.
self.dense_layers = tf.keras.Sequential()
# Use the ReLU activation for all but the last layer.
for layer_size in layer_sizes[:-1]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
# No activation for the last layer.
for layer_size in layer_sizes[-1:]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size))
def call(self, inputs):
feature_embedding = self.embedding_model(inputs)
return self.dense_layers(feature_embedding)
class MovieModel(tf.keras.Model):
def __init__(self):
super().__init__()
max_tokens = 10_000_00
self.title_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_movie_titles,mask_token=None),
tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32)
])
self.title_vectorizer = tf.keras.layers.TextVectorization(
max_tokens=max_tokens,output_sequence_length = 4)
self.title_text_embedding = tf.keras.Sequential([
self.title_vectorizer,
tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
])
self.title_vectorizer.adapt(movies)
def call(self, titles, pool_size):
avg_layer = tf.keras.layers.AveragePooling2D(pool_size=pool_size,strides=1,padding='valid',)
len_titles=tf.shape(titles)[0]
# return avg_layer(self.title_text_embedding(titles))
return tf.concat([
self.title_embedding(tf.reshape(titles,[len_titles,5,1])),
avg_layer(self.title_text_embedding(tf.reshape(titles,[len_titles,5,1]))),
], axis=3)
class CandidateModel(tf.keras.Model):
"""Model for encoding movies."""
def __init__(self, layer_sizes):
"""Model for encoding movies.
Args:
layer_sizes:
A list of integers where the i-th entry represents the number of units
the i-th layer contains.
"""
super().__init__()
self.embedding_model = MovieModel()
# Then construct the layers.
self.dense_layers = tf.keras.Sequential()
# Use the ReLU activation for all but the last layer.
for layer_size in layer_sizes[:-1]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
# No activation for the last layer.
for layer_size in layer_sizes[-1:]:
self.dense_layers.add(tf.keras.layers.Dense(layer_size))
def call(self, inputs,pool_size):
feature_embedding = self.embedding_model(inputs,pool_size)
return self.dense_layers(feature_embedding)
class MovielensModel(tfrs.models.Model):
def __init__(self, layer_sizes):
super().__init__()
self.query_model = QueryModel(layer_sizes)
self.candidate_model = CandidateModel(layer_sizes)
self.rating_model = tf.keras.Sequential([
tf.keras.layers.Dense(256, activation="relu"),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(1),
])
self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
loss=tfr.keras.losses.ListMLELoss(),
metrics=[tfr.keras.metrics.NDCGMetric(name="ndcg_metric"),
tf.keras.metrics.RootMeanSquaredError()],
)
def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
self.query_embeddings = self.query_model({
"user_id": features["user_id"],
})
self.movie_embeddings = self.candidate_model(features["movie_title"],pool_size=(1,4))
list_length = features["movie_title"].shape[1]
self.query_embeddings_repeated = tf.repeat(
tf.expand_dims(tf.expand_dims(self.query_embeddings, 1), 1), [list_length], axis=1)
self.embd_concat=tf.concat([self.query_embeddings_repeated, self.movie_embeddings], 3)
return (
self.query_embeddings,
self.movie_embeddings,
self.rating_model(
self.embd_concat
),
)
def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
ratings = features.pop("user_rating")
user_embeddings, movie_embeddings, rating_predictions = self(features)
rating_loss = self.rating_task(
labels=ratings,
predictions=tf.squeeze(tf.squeeze(rating_predictions, axis=-1), axis=-1),
)
return (rating_loss)
model = MovielensModel(layer_sizes=[32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
cached_train = train.shuffle(100_000).batch(1000).cache()
cached_test = test.batch(4096).cache()
model_hist = model.fit(cached_train, epochs=10)
metrics = model.evaluate(cached_test, return_dict=True)
Now model is created, if we try to do prediction this fails
for x in train.take(1):
# for x in ratings.take(1):
print(x)
predicted_rating = model.predict({
"user_id": np.array(x['user_id'].numpy().decode('utf-8')),
"movie_title": list(map(lambda x: np.array(x.decode('utf-8')), list(x['movie_title'].numpy()))),
"user_rating" : list(map(lambda x: np.array(x),x['user_rating'].numpy()))
})
Any suggestion what could be going wrong