Use `tf.keras.utils.timeseries_dataset_from_array` for a given batch inside of a generator

I have a dataset which is too big to fit into memory so I used a class to generate new examples each time. The model I am using is a seq-to-seq model that takes only 50 points of time of a series, but my signals are 5000 points long, so I would like to use tf.keras.utils.timeseries_dataset_from_array in oder to convolve over the signals that were drawn into the batch. How do I combine timeseries_dataset_from_array in the following batch function/generator class.

class DataGenerator():
  def __init__(self, list_IDs, batch_size=32, 
               use_last = True, shuffle = True, 
               lead1='LI', lead2='LII'):
    # L,D = df.shape
    self.list_IDs = list_IDs
    self.indices = np.arange(len(self.list_IDs))
    self.signals_length = np.arange(500)
    self.batch_size = batch_size
    self.use_last = use_last
    self.shuffle = shuffle
    self.lead1 = lead1
    self.lead2 = lead2

  def on_epoch_end(self):
    'Updates indexes after each epoch'
    if self.shuffle == True:
        np.random.shuffle(self.indices)
  
  def __data_generation(self, list_IDs_temp):
    """
    Generates data containing batch_size samples. 
    Here we have a seq-to-seq model so instead of X and y we
    have X1 and X2. 
    """
    lead1 = self.lead1
    lead2 = self.lead2

    # Initialization
    X1 = np.empty((self.batch_size))
    X2 = np.empty((self.batch_size))

    # Generate data
    for i, ID in enumerate(list_IDs_temp):
      # Store sample
      X1[i,] = np.load('data/' + ID + '.npy')
      X2[i,] = np.load('data/' + ID + '.npy')
      
    return X1, X2

  def __len__(self):
    'Denotes the number of batches per epoch'
    return int(np.floor(len(self.list_IDs) / self.batch_size))
  
  def __getitem__(self, index):
    'Generate one batch of data'
    # Generate indexes of the batch
    indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indices]

    # Generate data
    X1, X2 = self.__data_generation(list_IDs_temp)

    return X1, X2

Hi @David_Harar,

Sorry for the delay in response.
Yes, I would recommend to use tf.keras.utils.timeseries_dataset_from_array if you are woking with large timeseries dataset which doesn’t fit into memory.You could add this as below:

def __data_generation(self, list_IDs_temp):
        # Load data into a single NumPy array
        all_data = np.empty((len(list_IDs_temp), 5000))
        for i, ID in enumerate(list_IDs_temp):
            all_data[i] = np.load('data/' + ID + '.npy')

        # Create a timeseries dataset
        dataset = tf.keras.utils.timeseries_dataset_from_array(
            all_data, all_data, sequence_length=self.seq_length, batch_size=self.batch_size
        )

        for batch in dataset:
            X1, X2 = batch
            return X1, X2

In addition, Kindly refer this timeseries implementation for more information.

Thank You.