Unable to apply image preprocessing to tf.data.Dataset

Hi,

I’m trying to apply preprocessing to tf.data.Dataset (option 2 in the Keras Preprocessing Guide: Working with preprocessing layers  |  TensorFlow Core). I get an exception thrown when trying to read from a mapped Dataset which includes the RandomFlip preprocessing layer.

Any ideas what am I doing wrong here? code + output below.

Thanks!

import functools
import numpy as np
import tensorflow as tf


def data_gen():
    for i in range(10):
        x = np.random.random(size=(80, 80, 3)) * 255
        y = np.random.random(size=(40, 40, 1)) * 255
        yield x, y


def preprocess(image, label, cropped_image_size, cropped_label_size, skip_augmentations=False):

    x = image
    y = label

    x_size = cropped_image_size
    y_size = cropped_label_size

    if not skip_augmentations:
        x = tf.keras.layers.RandomFlip(mode="horizontal")(x)
        y = tf.keras.layers.RandomFlip(mode="horizontal")(y)

        x = tf.keras.layers.RandomRotation(factor=1.0, fill_mode='constant')(x)
        y = tf.keras.layers.RandomRotation(factor=1.0, fill_mode='constant')(y)

    x = tf.keras.layers.CenterCrop(x_size, x_size)(x)
    y = tf.keras.layers.CenterCrop(y_size, y_size)(y)

    return x, y


print(tf.__version__)
dataset = tf.data.Dataset.from_generator(data_gen, output_signature=(
    tf.TensorSpec(shape=(80, 80, 3), dtype='float32'),
    tf.TensorSpec(shape=(40, 40, 1), dtype='float32')
))

crop_only_fn = functools.partial(preprocess, cropped_image_size=50, cropped_label_size=25, skip_augmentations=True)
train_preprocess_fn = functools.partial(preprocess, cropped_image_size=50, cropped_label_size=25, skip_augmentations=False)

crop_dataset = dataset.map(crop_only_fn)
train_dataset = dataset.map(train_preprocess_fn)

it_crop = iter(crop_dataset)
ex_crop = next(it_crop)         # this works

it_train = iter(train_dataset)
ex_train = next(it_train)       # this fails

Output:

2021-12-13 16:10:15.106947: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-13 16:10:15.876142: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22302 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:17:00.0, compute capability: 8.6
2021-12-13 16:10:15.877134: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22302 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:65:00.0, compute capability: 8.6
2021-12-13 16:10:16.720083: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at stateful_random_ops.cc:278 : NOT_FOUND: Resource localhost/_AnonymousVar3/N10tensorflow3VarE does not exist.
2021-12-13 16:10:16.720119: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at stateful_random_ops.cc:278 : NOT_FOUND: Resource localhost/_AnonymousVar1/N10tensorflow3VarE does not exist.
2021-12-13 16:10:16.720143: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at stateful_random_ops.cc:278 : NOT_FOUND: Resource localhost/_AnonymousVar0/N10tensorflow3VarE does not exist.
2021-12-13 16:10:16.720199: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at stateful_random_ops.cc:278 : NOT_FOUND: Resource localhost/_AnonymousVar2/N10tensorflow3VarE does not exist.
2.7.0
Traceback (most recent call last):
  File "./issue_dataaug.py", line 51, in <module>
    ex_train = next(it_train)       # this fails
  File "/.../mickey/venvs/cvi/lib/python3.8/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 800, in __next__
    return self._next_internal()
  File "/.../mickey/venvs/cvi/lib/python3.8/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 783, in _next_internal
    ret = gen_dataset_ops.iterator_get_next(
  File "/.../mickey/venvs/cvi/lib/python3.8/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 2845, in iterator_get_next
    _ops.raise_from_not_ok_status(e, name)
  File "/.../mickey/venvs/cvi/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 7107, in raise_from_not_ok_status
    raise core._status_to_exception(e) from None  # pylint: disable=protected-access
tensorflow.python.framework.errors_impl.NotFoundError: 2 root error(s) found.
  (0) NOT_FOUND: {{function_node __inference_Dataset_map_classfunctools.partial_972}} Resource localhost/_AnonymousVar3/N10tensorflow3VarE does not exist.
	 [[{{node random_rotation_1/stateful_uniform/RngReadAndSkip}}]]
	 [[random_rotation_1/stateful_uniform/RngReadAndSkip/_14]]
  (1) NOT_FOUND: {{function_node __inference_Dataset_map_classfunctools.partial_972}} Resource localhost/_AnonymousVar3/N10tensorflow3VarE does not exist.
	 [[{{node random_rotation_1/stateful_uniform/RngReadAndSkip}}]]
0 successful operations.
0 derived errors ignored. [Op:IteratorGetNext]
2021-12-13 16:10:16.949380: W tensorflow/core/kernels/data/generator_dataset_op.cc:107] Error occurred when finalizing GeneratorDataset iterator: FAILED_PRECONDITION: Python interpreter state is not initialized. The process may be terminated.
	 [[{{node PyFunc}}]]

How did you install tensorflow? It looks like an operation your code requires isn’t available. I think this is usually due to a bad install or some version mismatches

The issue was that an initialization was needed for the RandomFlip, RandomRotation layers.

This is the code that solves the issue (credit to M.Innat from StackOverflow):

def data_gen():
    for i in range(10):
        x = np.random.random(size=(80, 80, 3)) * 255  # rgb image
        x = x.astype('uint8')
        y = np.random.random(size=(40, 40, 1)) * 255  # downsized mono image
        y = y.astype('uint8')
        yield x, y


class Augment(tf.keras.layers.Layer):
    def __init__(self, seed=42):
        super().__init__()
        self.flip_a = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)
        self.flip_b = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)

        self.rot_a = tf.keras.layers.RandomRotation(factor=1.0,
                                 fill_mode='constant', seed=seed)
        self.rot_b = tf.keras.layers.RandomRotation(factor=1.0, 
                                fill_mode='constant', seed=seed)
        
    def call(self, inputs, labels):
        x = self.flip_a(inputs)
        x = self.rot_a(x)

        y = self.flip_b(labels)
        y = self.rot_b(y)
        return x, y


def preprocess(image, label, cropped_image_size, cropped_label_size):
    x = image
    y = label
    x_size = cropped_image_size
    y_size = cropped_label_size

    x = tf.cast(x, dtype=tf.float32)
    y = tf.cast(y, dtype=tf.float32)

    x = tf.keras.layers.CenterCrop(x_size, x_size)(x)
    y = tf.keras.layers.CenterCrop(y_size, y_size)(y)

    x = tf.cast(x, dtype=tf.uint8)
    y = tf.cast(y, dtype=tf.uint8)
    return x, y


dataset = tf.data.Dataset.from_generator(data_gen, output_signature=(
    tf.TensorSpec(shape=(80, 80, 3), dtype='uint8'),
    tf.TensorSpec(shape=(40, 40, 1), dtype='uint8')
))

crop_only_fn = functools.partial(preprocess, 
                                 cropped_image_size=50,  
                                 cropped_label_size=25)

# This works
crop_dataset = dataset.map(crop_only_fn)
x, y = next(iter(crop_dataset))
x.shape, y.shape
(TensorShape([50, 50, 3]), TensorShape([25, 25, 1]))

train_preprocess_fn = functools.partial(preprocess, 
                                        cropped_image_size=50,
                                        cropped_label_size=25)
train_dataset = dataset.map(train_preprocess_fn)
train_dataset = train_dataset.map(Augment()) # < calling now.
x, y = next(iter(train_dataset))
x.shape, y.shape
(TensorShape([50, 50, 3]), TensorShape([25, 25, 1]))