Hi, I encountered weird error when I pass list of byte sequences into the keras StringLookup layer and then I try to save it. Is there a way how to persist list of bytes?
import tensorflow as tf
a_mapping = [b"aaab", b"bbbc",b"cccd"]
label1_layer = tf.keras.layers.StringLookup(vocabulary=a_mapping)
label2_layer = tf.keras.layers.StringLookup(vocabulary=a_mapping, output_mode="int", invert=True)
input_type = tf.keras.Input(dtype=object, shape=(None,1))
a_model = tf.keras.Model(
inputs=input_type,
outputs=label2_layer(label1_layer(input_type))
)
a_model.compile()
a_model(tf.constant(b"aaab"))
a_model.save("a_string_lookup")
raises following error:
TypeError: Unable to serialize b'aaab' to JSON. Unrecognized type <class 'bytes'>.
TypeError Traceback (most recent call last)
Cell In [18], line 11
9 a_model.compile()
10 a_model(tf.constant(b"aaab"))
---> 11 a_model.save("a_string_lookup")
File ~/miniconda3/envs/hmic-playground-py39/lib/python3.9/site-packages/keras/utils/traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~/miniconda3/envs/hmic-playground-py39/lib/python3.9/json/encoder.py:199, in JSONEncoder.encode(self, o)
195 return encode_basestring(o)
196 # This doesn't pass the iterator directly to ''.join() because the
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
File ~/miniconda3/envs/hmic-playground-py39/lib/python3.9/json/encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
252 else:
253 _iterencode = _make_iterencode(
254 markers, self.default, _encoder, self.indent, floatstr,
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
TypeError: Unable to serialize b'aaab' to JSON. Unrecognized type <class 'bytes'>.
is this a bug in tf.keras? Should I report it?