I’m using tensorflow.data
and custom layers
to solve the bottleneck of data augmentation, but I found that using tensorflow.data
alone is faster than mixing, I don’t know what’s going on in the custom layers
, can someone please tell me?
Thanks in advance!
This is my data augmentation code, mainly to do standardization and resize.
def random_normalization(data, mean, std):
mean = tf.multiply(mean, tf.random.uniform(shape=(), minval=0.5,maxval=0.9, dtype=tf.float64))
std = tf.multiply(std, tf.random.uniform(shape=(), minval=0.5,maxval=0.9, dtype=tf.float64))
return tf.divide((tf.subtract(data, mean)), std)
def random_resize(data):
def resizing(index, data, choice, enable, new_data, number, overlap):
FrontEnd = tf.cond(tf.math.greater_equal(tf.subtract(index, overlap), tf.constant(0)),
lambda: tf.subtract(index, overlap),
lambda: index)
BackEnd = tf.cond(tf.math.less(tf.add(tf.add(index, 10),overlap),tf.constant(2000)),
lambda: tf.add(tf.add(index, 10),overlap),
lambda: index)
z1 = tf.gather(data, indices=[0], axis=1)
z1 = tf.gather(z1, indices=tf.range(FrontEnd, BackEnd), axis=0)
z2 = tf.gather(data, indices=[1], axis=1)
z2 = tf.gather(z2, indices=tf.range(FrontEnd, BackEnd), axis=0)
z3 = tf.gather(data, indices=[2], axis=1)
z3 = tf.gather(z3, indices=tf.range(FrontEnd, BackEnd), axis=0)
z4 = tf.gather(data, indices=[3], axis=1)
z4 = tf.gather(z4, indices=tf.range(FrontEnd, BackEnd), axis=0)
z5 = tf.gather(data, indices=[4], axis=1)
z5 = tf.gather(z5, indices=tf.range(FrontEnd, BackEnd), axis=0)
z6 = tf.gather(data, indices=[5], axis=1)
z6 = tf.gather(z6, indices=tf.range(FrontEnd, BackEnd), axis=0)
new_data = tf.tensor_scatter_nd_update(new_data, [[number, 0], [number, 1], [number, 2],
[number, 3], [number, 4], [number, 5]],
[tf.math.reduce_mean(z1), tf.math.reduce_mean(z2),
tf.math.reduce_mean(z3), tf.math.reduce_mean(z4),
tf.math.reduce_mean(z5), tf.math.reduce_mean(z6)])
return tf.add(index, 10), data, choice, enable, new_data, tf.add(number, 1), overlap
choice = tf.random.uniform(shape=(), minval=0,maxval=4,dtype=tf.int32)
enable = tf.random.uniform(shape=(), minval=0,maxval=1,dtype=tf.float64)
overlap = tf.random.uniform(shape=(), minval=5,maxval=21,dtype=tf.int32)
new_data = tf.zeros((200,6), dtype=tf.float64)
index = tf.constant(0)
number = tf.constant(0)
condition = lambda index, data, choice, enable, new_data, number, overlap: tf.less(index, 2000)
r = tf.while_loop(condition, resizing, loop_vars=(index, data, choice, enable, new_data, number, overlap))
return r[4]
def normal_resize(data):
data = tf.reshape(data, (2000,6,1))
data = tf.image.resize(data, size=[200,6])
return tf.cast(tf.reshape(data, (200,6)),dtype=tf.float64)
def augmentation(data, labels):
mean = tf.math.reduce_mean(data,axis=0)
std = tf.math.reduce_std(data,axis=0)
data = tf.cond(tf.random.uniform(shape=(), minval=0, maxval=1,dtype=tf.float64) < tf.constant(0.8,dtype=tf.float64),
lambda: random_normalization(data, mean, std),
lambda: tf.divide((tf.subtract(data, mean)), std))
# 2000 resize to 200
data = tf.cond(tf.random.uniform(shape=(), minval=0, maxval=1,dtype=tf.float64) < tf.constant(0.8,dtype=tf.float64),
lambda: random_resize(data),
lambda: normal_resize(data))
return data, labels
Main code, including tf.data
and model
if __name__ == '__main__':
trainDS = tf.data.Dataset.from_tensor_slices((np.random.rand(3000,2000,6),
np.concatenate((np.zeros((1500)),np.ones((1500))))))
trainDS = (
trainDS
.cache()
.shuffle(1000, reshuffle_each_iteration=False)
.map(augmentation, num_parallel_calls=tf.data.AUTOTUNE)
.batch(128, drop_remainder=True)
.prefetch(tf.data.AUTOTUNE))
input = Input((200,6))
x = LSTM(64, return_sequences=True)(input)
output = Dense(1,activation='sigmoid')(x)
model = Model(input, output)
model.compile(optimizer='adam', loss='BinaryCrossentropy')
model.fit(trainDS, epochs=3)
Then this is the code of my custom layer, although it is a bit cumbersome, it still achieves the result I want.
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
import numpy as np
class CustomLayer(Layer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def execute(self, data, batch_size, new_data, _type):
def _fun(index, data, _type, new_data):
resized = tf.cond(_type,
lambda:augmentation(tf.reshape(tf.gather(data,[index]), (2000,6))),
lambda:normal_resize(tf.reshape(tf.gather(data,[index]), (2000,6))))
values = tf.reshape(resized, (1,-1))[0]
_Indices = self.createIndices(index)
new_data = tf.tensor_scatter_nd_update(new_data, _Indices, values)
return tf.add(index,1), data, _type, new_data
index = tf.constant(0)
condition = lambda index, data, _type, new_data: tf.less(index, batch_size)
r = tf.while_loop(condition, _fun, loop_vars=(index, data, _type, new_data))
return r[-1]
def createIndices(self, BatchSizeIndex):
def loop1(_i, BatchSizeIndex, col_num, _Indices):
def loop2(_i, _j, BatchSizeIndex, col_num, _Indices):
_Indices = tf.tensor_scatter_nd_update(_Indices, [[col_num, 0], [col_num, 1], [col_num, 2]],
[BatchSizeIndex, _i, _j])
return _i, tf.add(_j,1), BatchSizeIndex, tf.add(col_num,1), _Indices
_j = tf.constant(0)
condition_loop2 = lambda _i, _j, BatchSizeIndex, col_num, _Indices: tf.less(_j, 6)
r_loop2 = tf.while_loop(condition_loop2, loop2, loop_vars=(_i, _j, BatchSizeIndex, col_num, _Indices))
return tf.add(_i,1), BatchSizeIndex, r_loop2[3], r_loop2[4]
_Indices = tf.zeros((1200,3), dtype=tf.int32)
col_num = tf.constant(0)
_i = tf.constant(0)
condition_loop1 = lambda _i, BatchSizeIndex, col_num, _Indices: tf.less(_i, 200)
r_loop1 = tf.while_loop(condition_loop1, loop1, loop_vars=(_i, BatchSizeIndex, col_num, _Indices))
return r_loop1[-1]
def call(self, images, training):
batch_size = tf.shape(images)[0]
new_data = tf.zeros((batch_size, 200, 6), dtype=tf.float64)
images = tf.cast(images, dtype=tf.float64)
if training:
data = self.execute(images, batch_size, new_data, tf.constant(True))
else:
data = self.execute(images, batch_size, new_data, tf.constant(False))
return data
The final code can be modified to execute like this.
def augmentation(data):
.....
return data
if __name__ == '__main__':
trainDS = tf.data.Dataset.from_tensor_slices((np.random.rand(3000,2000,6),
np.concatenate((np.zeros((1500)),np.ones((1500))))))
trainDS = (
trainDS
.cache()
.shuffle(1000, reshuffle_each_iteration=False)
.batch(128, drop_remainder=True)
.prefetch(tf.data.AUTOTUNE))
input = Input((2000,6))
x = CustomLayer()(input)
x = LSTM(64, return_sequences=True)(x)
output = Dense(1,activation='sigmoid')(x)
model = Model(input, output)
model.compile(optimizer='adam', loss='BinaryCrossentropy')
model.fit(trainDS, epochs=3)
Results: Alone tf.data
spend about 18s
, tf.data
+CustomLayer
spend about 38s
.
The thing I want to clarify is that the use of map
in tf.data
to run augmentation is on the CPU
, but if I write augmentation in the Layer
, it should theoretically run on the GPU
. Why is there such a big gap between the two?
Environment: python3.6, tensorflow2.4.0