I have a dataset which is too big to fit into memory so I used a class to generate new examples each time. The model I am using is a seq-to-seq model that takes only 50 points of time of a series, but my signals are 5000 points long, so I would like to use tf.keras.utils.timeseries_dataset_from_array
in oder to convolve over the signals that were drawn into the batch. How do I combine timeseries_dataset_from_array
in the following batch function/generator class.
class DataGenerator():
def __init__(self, list_IDs, batch_size=32,
use_last = True, shuffle = True,
lead1='LI', lead2='LII'):
# L,D = df.shape
self.list_IDs = list_IDs
self.indices = np.arange(len(self.list_IDs))
self.signals_length = np.arange(500)
self.batch_size = batch_size
self.use_last = use_last
self.shuffle = shuffle
self.lead1 = lead1
self.lead2 = lead2
def on_epoch_end(self):
'Updates indexes after each epoch'
if self.shuffle == True:
np.random.shuffle(self.indices)
def __data_generation(self, list_IDs_temp):
"""
Generates data containing batch_size samples.
Here we have a seq-to-seq model so instead of X and y we
have X1 and X2.
"""
lead1 = self.lead1
lead2 = self.lead2
# Initialization
X1 = np.empty((self.batch_size))
X2 = np.empty((self.batch_size))
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
X1[i,] = np.load('data/' + ID + '.npy')
X2[i,] = np.load('data/' + ID + '.npy')
return X1, X2
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indices]
# Generate data
X1, X2 = self.__data_generation(list_IDs_temp)
return X1, X2