How remove the label data from feature data from window time series (tensorflow)

I train a models of time series forecasting use tensorflow. The models in this tutorial will make a set of predictions based on a window of consecutive samples from the data.

I want to remove the label data from feature data in this function

def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=True,
      batch_size=32,)

  ds = ds.map(self.split_window)

  return ds


WindowGenerator.make_dataset = make_dataset

This the whole code of windwing

class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # Slicing doesn't preserve static shape information, so set the shapes
  # manually. This way the `tf.data.Datasets` are easier to inspect.
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels


WindowGenerator.split_window = split_window


def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=True,
      batch_size=32,)

  ds = ds.map(self.split_window)

  return ds


WindowGenerator.make_dataset = make_dataset

@property
def train(self):
  return self.make_dataset(self.train_df)


@property
def val(self):
  return self.make_dataset(self.val_df)


@property
def test(self):
  return self.make_dataset(self.test_df)


@property
def example(self):
  """Get and cache an example batch of `inputs, labels` for plotting."""
  result = getattr(self, '_example', None)
  if result is None:
    # No example batch was found, so get one from the `.train` dataset
    result = next(iter(self.train))
    # And cache it for next time
    self._example = result
  return result


WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

This code from Zaman serisi tahmini  |  TensorFlow Core

I want to remove the label data from feature data

if the data is [col1, col2, col3, col4] I want label data is [col4] feature data id [col1, col2, col3]

Hi @Abdelrahman_Hamdy, you can remove the target data from the input data by using tf.keras.utils.timeseries_dataset_from_array like shown below

data=np.array([0,1,2,3])
input_data = data
targets = data[3:]
dataset = tf.keras.utils.timeseries_dataset_from_array(
    input_data, targets, sequence_length=3)
for batch in dataset:
  inputs, targets = batch
  print(inputs)
  print(targets)

output

tf.Tensor([[0 1 2]], shape=(1, 3), dtype=int64)
tf.Tensor([3], shape=(1,), dtype=int64)

Thank You.

1 Like

Hello,
I this will not working because the target is none and the target split in self.split_window,
split_window return label and feature
so I found the solution

def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  # inputs_col = [#col_names]
  #               
  
  # inputs = tf.stack([inputs[:, :, self.column_indices[name]] for name in inputs_col],axis=-1)
    
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # Slicing doesn't preserve static shape information, so set the shapes
  # manually. This way the `tf.data.Datasets` are easier to inspect.
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels


WindowGenerator.split_window = split_window

the solution is commented
and remove target from time series give worse training results

Thanks brother