The following code is intended to to set up a vectorizer for pipe-delimited text and then clone the vectorizer.
text_dataset = tf.data.Dataset.from_tensor_slices([
"this|is|some|pipe-delimited|text",
"some|more|pipe-delimited|text",
"yet|more|pipe-delimited|text"])
vectorizer = tf.keras.layers.TextVectorization(
max_tokens = 10,
standardize = None,
split = lambda input: tf.strings.split(input, sep = "|"))
vectorizer.adapt(text_dataset)
tf.keras.models.clone_model(vectorizer)
Output:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[232], line 13
10 vectorizer.adapt(text_dataset)
12 # tf.keras.saving.deserialize_keras_object(vectorizer)
---> 13 tf.keras.models.clone_model(vectorizer)
File ~/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/keras/src/models/cloning.py:555, in clone_model(model, input_tensors, clone_function)
546 raise ValueError(
547 "Arguments clone_function and input_tensors "
548 "are only supported for Sequential models "
(...)
552 f"input_tensors={input_tensors}"
553 )
554 # Note that a custom object scope may be required in this case.
--> 555 return model.__class__.from_config(model.get_config())
File ~/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/keras/src/layers/preprocessing/text_vectorization.py:538, in TextVectorization.from_config(cls, config)
534 config["standardize"] = deserialize_keras_object(
535 config["standardize"]
536 )
537 if config["split"] not in (WHITESPACE, CHARACTER):
--> 538 config["split"] = deserialize_keras_object(config["split"])
539 return cls(**config)
File ~/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/keras/src/saving/serialization_lib.py:619, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
617 return config
618 if not isinstance(config, dict):
--> 619 raise TypeError(f"Could not parse config: {config}")
621 if "class_name" not in config or "config" not in config:
622 return {
623 key: deserialize_keras_object(
624 value, custom_objects=custom_objects, safe_mode=safe_mode
625 )
626 for key, value in config.items()
627 }
TypeError: Could not parse config: <function <lambda> at 0x7fa0024f51b0>
If I omit the split parameter, it runs with no errors.
(I also tried defining a full-blown Callable class with get_config and from_config, decorated with @tf.keras.saving.register_keras_serializable(), passing an instance of it as the split parameter, and cloning failed with a similar error.)