Hi, I’m trying to use tf.data.Dataset.list_files
to load .tiff
images and infer their labels from their names.
I use the following code but stumbled upon a strange issue, as described bellow:
import os
import datetime as dt
import numpy as np
import pathlib
from pathlib import Path
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2
DATA_PATH = Path('PATH TO DATA')
BATCH_SIZE = 32
INPUT_IMAGE_SHAPE = (128, 128, 1)
CROP_SHAPE = INPUT_IMAGE_SHAPE
CENTRAL_CROP_PROP = .7
BRIGHTNESS_DELTA = 0.1
CONTRAST = (0.4, 0.6)
class ConvModel(keras.Model):
def __init__(self, input_shape):
super().__init__()
self.input_image_shape = input_shape
self.model = keras.Sequential([
layers.Input(shape=input_shape),
layers.Conv2D(32, 3),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(64, 5),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(128, 3, kernel_regularizer=keras.regularizers.l2(0.01)),
layers.BatchNormalization(),
layers.ReLU(),
layers.Flatten(),
layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
layers.Dropout(0.5),
layers.Dense(10)
])
def call(self, inputs):
return self.model(inputs)
def preprocessing_func(image):
img = tf.image.central_crop(image, CENTRAL_CROP_PROP)
if img.shape[2] == 3:
img = tf.image.rgb_to_grayscale(img)
return img
def augment(image):
img = tf.image.random_crop(image, size=CROP_SHAPE) # Slices a shape size portion out of value at a uniformly chosen offset. Requires value.shape >= size.
img = tf.image.random_brightness(img, max_delta=BRIGHTNESS_DELTA) # Equivalent to adjust_brightness() using a delta randomly picked in the interval [-max_delta, max_delta)
img = tf.image.random_contrast(img, lower=CONTRAST[0], upper=CONTRAST[1]) # Equivalent to adjust_contrast() but uses a contrast_factor randomly picked in the interval [lower, upper).
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
return img
def load_image(image_file):
# 1) Decode the path
image_file = image_file.decode('utf-8')
# 2) Read the image
img = cv2.imread(image_file)
if len(img.shape) < 3:
img = np.expand_dims(img, axis=-1)
img = preprocessing_func(image=img)
img = augment(img)
img = tf.cast(img, tf.float32)
img.set_shape(INPUT_IMAGE_SHAPE)
# 3) Get the label
label = tf.strings.split(image_file, "\\")[-1]
label = tf.strings.substr(label, pos=0, len=1)
label = tf.strings.to_number(label, out_type=tf.float32)
label = tf.cast(label, tf.float32)
label.set_shape([])
return img, label
def _fixup_shape(images, labels):
images.set_shape(INPUT_IMAGE_SHAPE)
labels.set_shape([])
return images, labels
if __name__=='__main__':
train_ds = tf.data.Dataset.list_files(str(DATA_PATH / '*.tiff'))
train_ds = train_ds.map(lambda x: tf.numpy_function(load_image, [x], (tf.float32, tf.float32)))
# train_ds = train_ds.map(_fixup_shape)
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.shuffle(buffer_size=1000)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
train_ds = train_ds.repeat()
model = ConvModel(input_shape=INPUT_IMAGE_SHAPE)
model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.Adam(learning_rate=3e-4),
metrics=['accuracy']
)
train_log_dir = f'./logs/{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}/train_data'
callbacks = [
keras.callbacks.TensorBoard(
log_dir=train_log_dir,
write_images=True
)
]
model.fit(
train_ds,
batch_size=32,
steps_per_epoch=10,
epochs=10,
callbacks=callbacks
)
While I try to run it it throws up an error :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-213-b1f3d317135b> in <module>
----> 1 model.fit(
2 train_ds,
3 batch_size=32,
4 steps_per_epoch=10,
5 epochs=10,
~\anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py in autograph_handler(*args, **kwargs)
1127 except Exception as e: # pylint:disable=broad-except
1128 if hasattr(e, "ag_error_metadata"):
-> 1129 raise e.ag_error_metadata.to_exception(e)
1130 else:
1131 raise
ValueError: in user code:
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\training.py", line 878, in train_function *
return step_function(self, iterator)
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\training.py", line 867, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\training.py", line 860, in run_step **
outputs = model.train_step(data)
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\training.py", line 817, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 439, in update_state
self.build(y_pred, y_true)
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 359, in build
self._metrics = tf.__internal__.nest.map_structure_up_to(y_pred, self._get_metric_objects,
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 485, in _get_metric_objects
return [self._get_metric_object(m, y_t, y_p) for m in metrics]
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 485, in <listcomp>
return [self._get_metric_object(m, y_t, y_p) for m in metrics]
File "C:\Users\mchls\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 506, in _get_metric_object
y_t_rank = len(y_t.shape.as_list())
ValueError: as_list() is not defined on an unknown TensorShape.
though manually running X.shape.as_list()
and y.shape.as_list()
works, as shown below:
X, y = next(iter(train_ds))
X.shape.as_list(), y.shape.as_list()
[OUT] ([16, 128, 128, 1], [16])
This issue is fixed by manually mapping the following function on the dataset
by train_ds = train_ds.map(_fixup_shape).batch(BATCH_SIZE)
:
def _fixup_shape(images, labels):
images.set_shape([128, 128, 1])
labels.set_shape([]) # I have 19 classes
# weights.set_shape([None])
return images, labels
if __name__=='__main__':
train_ds = tf.data.Dataset.list_files(str(DATA_PATH / '*.tiff'))
train_ds = train_ds.map(lambda x: tf.numpy_function(load_image, [x], (tf.float32, tf.float32)))
train_ds = train_ds.map(_fixup_shape)
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.shuffle(buffer_size=1000)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
train_ds = train_ds.repeat()
as described in this GitHub thread there is a problem while using the tf.data.Dataset.list_files
, which can be solved by mapping a fix function on the dataset.
Is it a bug in TF 2.6.1 or is it an expected behavior?
Thanks