[
tfds.dataset_builders.TfDataBuilder
](tfds.dataset_builders.TfDataBuilder | TensorFlow Datasets)
After creating a dataset several ways from image data, tfds.dataset_builders.TfDataBuilder.download_and_prepare() throws an error that stems from being unable to hash a numpy array. The example at the website above works fine on simple data types, but the following throws the same error no matter how I make the dataset with images.
TfDataBuilder is new and only available in the nightly build. Is it just not fully developed to handle all data types or is this my error?
data_dir = r’C:\Users\bengh\SeniorProject\rawDominoImages’
img_height = 240
img_width = 240
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir2,
validation_split=0.2,
subset=“training”,
seed=123,
image_size=(img_height, img_width),
batch_size=None)
print(type(train_ds))
print('train_ds.class_names = ',train_ds.class_names)
print(train_ds)
Produces the following output.
Found 29 files belonging to 28 classes.
Using 24 files for training.
<class ‘tensorflow.python.data.ops.dataset_ops.ShuffleDataset’>
train_ds.class_names = [‘0_0’, ‘0_1’, ‘0_2’, ‘0_3’, ‘0_4’, ‘0_5’, ‘0_6’, ‘1_1’, ‘1_2’, ‘1_3’, ‘1_4’, ‘1_5’, ‘1_6’, ‘2_2’, ‘2_3’, ‘2_4’, ‘2_5’, ‘2_6’, ‘3_3’, ‘3_4’, ‘3_5’, ‘3_6’, ‘4_4’, ‘4_5’, ‘4_6’, ‘5_5’, ‘5_6’, ‘6_6’]
<ShuffleDataset element_spec=(TensorSpec(shape=(240, 240, 3), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None))>
Now with the above dataset, I would like to use tfds.dataset_builders.TfDataBuilder .
my_dataset_builder = tfds.core.dataset_builders.TfDataBuilder(
name=“my_dataset”,
config=None,
version=“1.0.0”,
data_dir=None,
split_datasets={
“train”: train_ds,
},
features=tfds.features.FeaturesDict({
‘image’: tfds.features.Image(shape=(240, 240, 3)),
‘label’: tfds.features.ClassLabel(
names=[‘0_0’, ‘0_1’, ‘0_2’, ‘0_3’, ‘0_4’, ‘0_5’, ‘0_6’, ‘1_1’, ‘1_2’, ‘1_3’, ‘1_4’, ‘1_5’, ‘1_6’, ‘2_2’, ‘2_3’, ‘2_4’, ‘2_5’, ‘2_6’, ‘3_3’, ‘3_4’, ‘3_5’, ‘3_6’, ‘4_4’, ‘4_5’, ‘4_6’, ‘5_5’, ‘5_6’, ‘6_6’],
doc=‘Class of picture of a domino’),
}),
description=“My dataset with test images.”,
release_notes={
“1.0.0”: “Initial release with dominos up to 6_6”,
}
)
print(my_dataset_builder.info)
Make the builder store the data as a TFDS dataset.
my_dataset_builder.download_and_prepare()
which produces:
tfds.core.DatasetInfo(
name=‘my_dataset’,
full_name=‘my_dataset/1.0.0’,
description=“”"
My dataset with test images.
“”“,
homepage=‘https://www.tensorflow.org/datasets/catalog/my_dataset’,
data_path=‘C:\Users\bengh\tensorflow_datasets\my_dataset\1.0.0’,
file_format=tfrecord,
download_size=Unknown size,
dataset_size=Unknown size,
features=FeaturesDict({
‘image’: Image(shape=(240, 240, 3), dtype=tf.uint8),
‘label’: ClassLabel(shape=(), dtype=tf.int64, num_classes=28),
}),
supervised_keys=None,
disable_shuffling=False,
splits={
},
citation=”“”“”",
)
Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\bengh\tensorflow_datasets\my_dataset\1.0.0…
Generating splits…: 0%
0/1 [00:00<?, ? splits/s]
Generating train examples…:
0/? [00:00<?, ? examples/s]
TypeError Traceback (most recent call last)
Input In [29], in <cell line: 25>()
22 print(my_dataset_builder.info)
24 # Make the builder store the data as a TFDS dataset.
—> 25 my_dataset_builder.download_and_prepare()
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\dataset_builder.py:521, in DatasetBuilder.download_and_prepare(self, download_dir, download_config, file_format)
519 self.info.read_from_directory(self._data_dir)
520 else:
→ 521 self._download_and_prepare(
522 dl_manager=dl_manager,
523 download_config=download_config,
524 )
526 # NOTE: If modifying the lines below to put additional information in
527 # DatasetInfo, you’ll likely also want to update
528 # DatasetInfo.read_from_directory to possibly restore these attributes
529 # when reading from package data.
530 self.info.download_size = dl_manager.downloaded_size
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\dataset_builder.py:1259, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, download_config)
1248 for split_name, generator in utils.tqdm(
1249 split_generators.items(),
1250 desc=“Generating splits…”,
1251 unit=" splits",
1252 leave=False,
1253 ):
1254 filename_template = naming.ShardedFileTemplate(
1255 split=split_name,
1256 dataset_name=self.name,
1257 data_dir=self.data_path,
1258 filetype_suffix=path_suffix)
→ 1259 future = split_builder.submit_split_generation(
1260 split_name=split_name,
1261 generator=generator,
1262 filename_template=filename_template,
1263 disable_shuffling=self.info.disable_shuffling,
1264 )
1265 split_info_futures.append(future)
1267 # Process the result of the beam pipeline.
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\split_builder.py:311, in SplitBuilder.submit_split_generation(self, split_name, generator, filename_template, disable_shuffling)
308 # Depending on the type of generator, we use the corresponding
309 # _build_from_xyz
method.
310 if isinstance(generator, collections.abc.Iterable):
→ 311 return self._build_from_generator(**build_kwargs)
312 else: # Otherwise, beam required
313 unknown_generator_type = TypeError(
314 f’Invalid split generator value for split {split_name}
. ’
315 ‘Expected generator or apache_beam object. Got: ’
316 f’{type(generator)}')
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\split_builder.py:383, in SplitBuilder._build_from_generator(self, split_name, generator, filename_template, disable_shuffling)
381 example = self._features.encode_example(example)
382 except Exception as e: # pylint: disable=broad-except
→ 383 utils.reraise(e, prefix=f’Failed to encode example:\n{example}\n’)
384 writer.write(key, example)
385 shard_lengths, total_size = writer.finalize()
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\split_builder.py:381, in SplitBuilder._build_from_generator(self, split_name, generator, filename_template, disable_shuffling)
373 for key, example in utils.tqdm(
374 generator,
375 desc=f’Generating {split_name} examples…‘,
(…)
378 leave=False,
379 ):
380 try:
→ 381 example = self._features.encode_example(example)
382 except Exception as e: # pylint: disable=broad-except
383 utils.reraise(e, prefix=f’Failed to encode example:\n{example}\n’)
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\features\features_dict.py:235, in FeaturesDict.encode_example(self, example_dict)
233 “”“See base class for details.”“”
234 example = {}
→ 235 for k, (feature, example_value) in utils.zip_dict(self._feature_dict,
236 example_dict):
237 try:
238 example[k] = feature.encode_example(example_value)
File ~.conda\envs\tf2_9\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py:105, in zip_dict(*dicts)
103 def zip_dict(*dicts):
104 “”“Iterate over items of dictionaries grouped by their keys.”“”
→ 105 for key in set(itertools.chain(*dicts)): # set merge all keys
106 # Will raise KeyError if the dict don’t have the same keys
107 yield key, tuple(d[key] for d in dicts)
TypeError: Failed to encode example:
(array([[[160.66667 , 140.66667 , 107.666664],
[164. , 142.5 , 110. ],
[156.5 , 133.5 , 101.5 ],
…,
[158.83344 , 140.83344 , 116.833435],
[160. , 143. , 118.5 ],
[158.66669 , 142.66669 , 117.66669 ]],
[[158. , 138. , 105. ],
[159.5 , 138. , 105.5 ],
[159.33333 , 136.33333 , 104.333336],
...,
[156.3335 , 138.3335 , 114.333496],
[162.5 , 145.5 , 121. ],
[161.16666 , 145.16666 , 120.16666 ]],
[[155.16667 , 132.16667 , 100.166664],
[158.5 , 135.5 , 103.5 ],
[163.5 , 140.5 , 108.5 ],
...,
[156.50006 , 138.50006 , 116.50006 ],
[159.5 , 141.5 , 119.5 ],
[158.33331 , 140.33331 , 118.33331 ]],
...,
[[145.83333 , 112.833336, 77.833336],
[145. , 112. , 77. ],
[144.16667 , 111.166664, 76.166664],
...,
[118.83331 , 91.83331 , 64.83331 ],
[116.5 , 89.5 , 61.5 ],
[115.83334 , 88.83334 , 59.833344]],
[[142.83333 , 109.833336, 74.833336],
[141. , 108. , 73. ],
[140. , 107. , 72. ],
...,
[119. , 92. , 65. ],
[118. , 91. , 64. ],
[116.16666 , 89.16666 , 62.166656]],
[[140.33333 , 107.333336, 72.333336],
[141. , 108. , 73. ],
[140.83333 , 107.833336, 72.833336],
...,
[120. , 93. , 66. ],
[119.5 , 92.5 , 65.5 ],
[116.49997 , 89.49997 , 62.49997 ]]], dtype=float32), 8)
unhashable type: ‘numpy.ndarray’