I used the keras distillation tutorial. During the training I have the following warning. These layer are releated with the residual block that I don’t use in my output
WARNING:tensorflow:Gradients do not exist for variables [‘ResBlock/conv2d_137/kernel:0’, ‘ResBlock/conv2d_137/bias:0’, ‘ResBlock/batch_normalization_137/gamma:0’, ‘ResBlock/batch_normalization_137/beta:0’, ‘ResBlock/conv2d_138/kernel:0’, ‘ResBlock/conv2d_138/bias:0’, ‘ResBlock/batch_normalization_138/gamma:0’, ‘ResBlock/batch_normalization_138/beta:0’, ‘ResBlock/batch_normalization_139/gamma:0’, ‘ResBlock/batch_normalization_139/beta:0’, ‘ResBlock/conv2d_139/kernel:0’, ‘ResBlock/conv2d_139/bias:0’, ‘ResBlock/conv2d_140/kernel:0’, ‘ResBlock/conv2d_140/bias:0’, ‘ResBlock/batch_normalization_140/gamma:0’, ‘ResBlock/batch_normalization_140/beta:0’, ‘ResBlock/conv2d_141/kernel:0’, ‘ResBlock/conv2d_141/bias:0’, ‘ResBlock/batch_normalization_141/gamma:0’, ‘ResBlock/batch_normalization_141/beta:0’, ‘ResBlock/conv2d_142/kernel:0’, ‘ResBlock/conv2d_142/bias:0’, ‘ResBlock/batch_normalization_142/gamma:0’, ‘ResBlock/batch_normalization_142/beta:0’, ‘ResBlock/conv2d_143/kernel:0’, ‘ResBlock/conv2d_143/bias:0’, ‘ResBlock/batch_normalization_143/gamma:0’, ‘ResBlock/batch_normalization_143/beta:0’, ‘dense_9/kernel:0’, ‘dense_9/bias:0’, ‘dense_10/kernel:0’, ‘dense_10/bias:0’, ‘dense_11/kernel:0’, ‘dense_11/bias:0’] when minimizing the loss.
class ResBlock(Model):
def init(self, channels, stride = 1):
super(ResBlock, self).__init__(name='ResBlock')
self.flag = (stride != 1)
self.conv1 = Conv2D(channels, 3, stride, padding='same')
self.bn1 = BatchNormalization()
self.conv2 = Conv2D(channels, 3, padding='same')
self.bn2 = BatchNormalization()
self.relu = ReLU()
if self.flag:
self.bn3 = BatchNormalization()
self.conv3 = Conv2D(channels, 1, stride)
def call(self, x):
x1 = self.conv1(x)
x1 = self.bn1(x1)
x1 = self.relu(x1)
x1 = self.conv2(x1)
x1 = self.bn2(x1)
if self.flag:
x = self.conv3(x)
x = self.bn3(x)
x1 = Layers.add([x, x1])
x1 = self.relu(x1)
return x1
class ResNet34(Model):
def init(self):
super(ResNet34, self).__init__(name = 'ResNet34')
self.conv1 = Conv2D(64, 7, 2, padding = 'same')
self.bn = BatchNormalization()
self.relu = ReLU()
self.mp1 = MaxPooling2D(3, 2)
self.conv2_1 = ResBlock(64)
self.conv2_2 = ResBlock(64)
self.conv2_3 = ResBlock(64)
self.conv3_1 = ResBlock(128, 2)
self.conv3_2 = ResBlock(128)
self.conv3_3 = ResBlock(128)
self.conv3_4 = ResBlock(128)
self.conv4_1 = ResBlock(256, 2)
self.conv4_2 = ResBlock(256)
self.conv4_3 = ResBlock(256)
self.conv4_4 = ResBlock(256)
self.conv4_5 = ResBlock(256)
self.conv4_6 = ResBlock(256)
self.conv5_1 = ResBlock(512, 2)
self.conv5_2 = ResBlock(512)
self.conv5_3 = ResBlock(512)
self.pool = GlobalAveragePooling2D()
self.fc1 = Dense(512, activation = 'relu')
self.dp1 = Dropout(0.5)
self.fc2 = Dense(512, activation = 'relu')
self.dp2 = Dropout(0.5)
self.fc3 = Dense(64)
def call(self, x):
x = self.conv1(x)
x = self.bn(x)
x = self.relu(x)
x = self.mp1(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
output_1 = self.conv2_3(x)
x = self.conv3_1(output_1)
x = self.conv3_2(x)
x = self.conv3_3(x)
output_2 = self.conv3_4(x)
x = self.conv4_1(output_2)
x = self.conv4_2(x)
x = self.conv4_3(x)
x = self.conv4_4(x)
x = self.conv4_5(x)
output_3 = self.conv4_6(x)
x = self.conv5_1(output_3)
x = self.conv5_2(x)
x = self.conv5_3(x)
x = self.pool(x)
x = self.fc1(x)
x = self.dp1(x)
x = self.fc2(x)
x = self.dp2(x)
x = self.fc3(x)
return output_1, output_2, output_3
class Distiller(Model):
def init(self, student, teacher):
super(Distiller, self).init()
self.teacher = teacher
self.student = student
def compile(self, optimizer):
super(Distiller, self).compile(optimizer = optimizer)
def Feature_Loss(self, ft_list, fs_list):
tot_loss = 0
for i in range(len(ft_list)):
fs = fs_list[i]
ft = ft_list[i]
_, _, h, w = fs.shape
fs_norm = K.l2_normalize(fs, axis = 1)
ft_norm = K.l2_normalize(ft, axis = 1)
f_loss = (0.5/(w*h))*K.sum(K.square(fs_norm - ft_norm))
tot_loss += f_loss
return tot_loss
def train_step(self, x):
# Forward pass of teacher
Feature_t = self.teacher(x, training = False)
with tf.GradientTape() as tape:
# Forward pass of student
Feature_s = self.student(x, training = True)
# Compute losses
loss = self.Feature_Loss(Feature_t, Feature_s)
# Compute gradients
trainable_vars = self.student.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Return a dict of performance
results = {m.name: m.result() for m in self.metrics}
results.update({"student_loss": loss})
return results
#--------------------------------- Dati ---------------------------------------
img_shape = 128
batch_size = 8
num_channel = 3
path = r’C:\Users\accdan\Desktop\Dataset_Wafer\10x\L1’
momentum = 0.9
l_rate = 0.4
decay = 0.0001
epochs = 1
SEED = 3222
AUTO = tf.data.experimental.AUTOTUNE
#------------------------------------------------------------------------------
#---------------------------- Dataset da Folder -------------------------------
file_list = glob.glob(path + ‘*.jpg’)
dataset = tf.data.Dataset.from_tensor_slices(file_list)
dataset = dataset.shuffle(10000, seed = SEED)
dataset = dataset.map(lambda x: parse_image(x, img_shape)).batch(batch_size).prefetch(10)
#dataset = dataset.map(lambda x : (x, x))
#------------------------------------------------------------------------------
#------------------------------ Model definition ------------------------------
model_t = ResNet34()
model_t.build(input_shape = (batch_size, img_shape, img_shape, num_channel))
model_s = ResNet34()
model_s.build(input_shape = (batch_size, img_shape, img_shape, num_channel))
#------------------------------------------------------------------------------
#------------------------------ Fit Model -------------------------------------
optimizer = SGD(lr = l_rate, momentum = momentum, decay = decay, nesterov = True)
#------------------------------------------------------------------------------
#------------------------------ Distiller -------------------------------------
distiller = Distiller(model_s, model_t)
distiller.compile(optimizer = optimizer)
#------------------------------------------------------------------------------
#------------------------------ Fit Model -------------------------------------
distiller.fit(dataset,
epochs = epochs)
#------------------------------------------------------------------------------