@rcauvin Were you able to use SHAP with tensorflow recommenders ?
ratings = tfds.load(‘movielens/100k-ratings’, split=“train”)
movies = tfds.load(‘movielens/100k-movies’, split=“train”)
Select the basic features.
ratings = ratings.map(lambda x: {
“movie_title”: x[“movie_title”],
“user_id”: x[“user_id”],
“user_rating”: x[“user_rating”],
})
movies = movies.map(lambda x: x[“movie_title”])
Randomly shuffle data and split between train and test.
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)
train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)
movie_titles = movies.batch(1_000)
user_ids = ratings.batch(1_000_000).map(lambda x: x[“user_id”])
unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
class MovielensModel(tfrs.models.Model):
def init(self, rating_weight: float, retrieval_weight: float) → None:
We take the loss weights in the constructor: this allows us to instantiate
several model objects with different loss weights.
super().init()
embedding_dimension = 32
User and movie models.
self.movie_model: tf.keras.layers.Layer = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_movie_titles, mask_token=None),
tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
])
self.user_model: tf.keras.layers.Layer = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids, mask_token=None),
tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])
A small model to take in user and movie embeddings and predict ratings.
We can make this as complicated as we want as long as we output a scalar
as our prediction.
self.rating_model = tf.keras.Sequential([
tf.keras.layers.Dense(256, activation=“relu”),
tf.keras.layers.Dense(128, activation=“relu”),
tf.keras.layers.Dense(1),
])
The tasks.
self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
loss=tf.keras.losses.MeanSquaredError(),
metrics=[tf.keras.metrics.RootMeanSquaredError()],
)
self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=movies.batch(128).map(self.movie_model)
)
)
The loss weights.
self.rating_weight = rating_weight
self.retrieval_weight = retrieval_weight
def call(self, features: Dict[Text, tf.Tensor]) → tf.Tensor:
We pick out the user features and pass them into the user model.
user_embeddings = self.user_model(features[“user_id”])
And pick out the movie features and pass them into the movie model.
movie_embeddings = self.movie_model(features[“movie_title”])
return (
user_embeddings,
movie_embeddings,
We apply the multi-layered rating model to a concatentation of
user and movie embeddings.
self.rating_model(
tf.concat([user_embeddings, movie_embeddings], axis=1)
),
)
def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) → tf.Tensor:
ratings = features.pop(“user_rating”)
user_embeddings, movie_embeddings, rating_predictions = self(features)
We compute the loss for each task.
rating_loss = self.rating_task(
labels=ratings,
predictions=rating_predictions,
)
retrieval_loss = self.retrieval_task(user_embeddings, movie_embeddings)
And combine them using the loss weights.
return (self.rating_weight * rating_loss
- self.retrieval_weight * retrieval_loss)
model = MovielensModel(rating_weight=1.0, retrieval_weight=0.0)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()
train_np=np.stack(list(train))
model.fit(cached_train, epochs=3)
metrics = model.evaluate(cached_test, return_dict=True)
model = MovielensModel(rating_weight=0.0, retrieval_weight=1.0)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(cached_train, epochs=3)
metrics = model.evaluate(cached_test, return_dict=True)
train_np=np.stack(list(train))
trained_movie_embeddings, trained_user_embeddings, predicted_rating = model({
“user_id”: np.array([“42”]),
“movie_title”: np.array([“Dances with Wolves (1990)”])
})
print(“Predicted rating:”)
print(predicted_rating)
model = MovielensModel(rating_weight=1.0, retrieval_weight=0.0)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
import shap
background=train_np[np.random.choice(train_np.shape[0],100,replace=False)]
explainer=shap.DeepExplainer(model,background)
: object of type ‘NoneType’ has no len()
explainer=shap.DeepExplainer((model.layers[0].input,model.layers[-1].output),background)
tf.Tensor([[3.402324]], shape=(1, 1), dtype=float32) --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) in () 8 background=train_np[np.random.choice(train_np.shape[0],100,replace=False)] 9 #explainer=shap.DeepExplainer(model,background) —> 10 explainer=shap.DeepExplainer((model.layers[0].input,model.layers[-1].output),background) /usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py in output(self) 2167 “”" 2168 if not self._inbound_nodes: → 2169 raise AttributeError(‘Layer ’ + self.name + ’ has no inbound nodes.’) 2170 return self._get_node_attribute_at_index(0, ‘output_tensors’, ‘output’) 2171 AttributeError: Layer retrieval_2 has no inbound nodes.