The code for the subclassing method is given below. I created class `GTN`

inherited from `layers.Layer`

and defined parameters to be trained as,

`self.layers`

and 2.`self.weight`

.

On looking further you will get to know that `self.layers`

has got 2 parameters of itself `self.weight`

and `self.bias`

(defined in class GTconv)

That makes a total of parameters:

- self.weight (defined in GTN)
- 2x
`self.weights`

and 2x`self.bias`

for each GTLayer (defined in GTLayer and GTconv) , and there are 4 layers

Therefore, total parameters : 1 + 4x4 = 17 parameters. But I am not able to find the parameters associated with `self.layers`

in the `model.trainable_weights`

list , wile calculating the grads and optimising the model. Kindly help me with this.

Code:

```
class GTN(layers.Layer): # layers.Layer keeps track of everything under the hood!
def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
super(GTN, self).__init__()
self.num_channels = num_channels
self.w_in = w_in
self.w_out = w_out
self.num_class = num_class
self.num_layers = num_layers # 3 layers
self.layers = []
for i in tf.range(num_layers):
if i == 0:
self.layers.append(GTLayer(num_edge, num_channels, first=True))
else:
self.layers.append(GTLayer(num_edge, num_channels, first=False))
w_init = tf.random_normal_initializer()
self.weight = tf.Variable(initial_value= w_init(shape=(w_in, w_out)),trainable=True)
class GTLayer(keras.layers.Layer):
def __init__(self, in_channels, out_channels):
super(GTLayer, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.conv1 = GTConv(in_channels, out_channels)
self.conv2 = GTConv(in_channels, out_channels)
def call(self, A, H_=None):
if self.first == True:
a = self.conv1(A)
b = self.conv2(A)
H = tf.matmul( a, b)
W = [tf.stop_gradient(tf.nn.softmax(self.conv1.weight, axis=1))]
return H,W
class GTConv(keras.layers.Layer):
def __init__(self, in_channels, out_channels):
super(GTConv, self).__init__()
w_init = tf.random_normal_initializer()
self.weight = tf.Variable(
initial_value=w_init(shape=(out_channels,in_channels,1,1)),
trainable=True)
self.bias = None
self.scale = tf.Variable([0.1] , trainable=False)
def call(self, A):
A = tf.reduce_sum(A*(tf.nn.softmax(self.weight,1)), 1)
return A
```

**Training:**

```
final_f1 = 0
for l in tf.range(1):
model = GTN(num_edge=A.shape[-1],
num_channels=num_channels,
w_in = node_features.shape[1],
w_out = node_dim,
num_class=num_classes,
num_layers=num_layers)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)
for i in range(epochs):
with tf.GradientTape() as tape:
loss,y_train,Ws = model(A, node_features, train_node, train_target)
train_f1 = tf.reduce_mean(f1_score(tf.math.argmax(y_train, 1), train_target, num_classes=num_classes)).cpu()
print('Train - Loss: {}, Macro_F1: {}'.format(loss.cpu().numpy(), train_f1))
grads = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
```