@tf.keras.utils.register_keras_serializable()
class PositionalEmbedding(keras.layers.Layer):
def __init__(self, seq_len, input_dim, output_dim, **kwargs):
super().__init__(**kwargs)
self.token_embed_layer = keras.layers.Embedding(input_dim=input_dim, output_dim=output_dim)
self.position_embed_layer = keras.layers.Embedding(input_dim=seq_len, output_dim=output_dim)
self.seq_len = seq_len
self.input_dim = input_dim,
self.output_dim = output_dim
def call(self, inputs):
# Input.shape = (None, sequence_len)
# Output.shape= (None, sequence_len, ebmed_size)
token_embed = self.token_embed_layer(inputs)
# [0, 1, 2, ... seq_len-1)
positions = tf.range(start=0, limit=self.seq_len, delta=1)
#Input.shape = (sequence_len, )
#Output.shape= (sequence_len, embed_size)
position_embed = self.position_embed_layer(positions)
return token_embed + position_embed
def compute_mask(self, inputs, mask=None):
return tf.math.not_equal(inputs, 0)
def get_config(self):
config = super().get_config()
config.update({
"seq_len": self.seq_len,
"input_dim": self.input_dim,
"output_dim": self.output_dim
})
return config
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
@tf.keras.utils.register_keras_serializable()
class TransformerEncoder(layers.Layer):
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.dense_dim = dense_dim
self.num_heads = num_heads
self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.dense_proj = keras.Sequential(
[layers.Dense(dense_dim, activation="relu"),
layers.Dense(embed_dim),]
)
self.layernorm_1 = layers.LayerNormalization()
self.layernorm_2 = layers.LayerNormalization()
def call(self, inputs, mask=None):
if mask is not None:
mask = mask[:, tf.newaxis, :]
attention_output = self.attention(inputs, inputs, attention_mask=mask)
proj_input = self.layernorm_1(inputs + attention_output)
proj_output = self.dense_proj(proj_input)
return self.layernorm_2(proj_input + proj_output)
def get_config(self):
config = super().get_config()
config.update({
"embed_dim": self.embed_dim,
"num_heads": self.num_heads,
"dense_dim": self.dense_dim,
})
return config
@tf.keras.utils.register_keras_serializable()
class TransformerDecoder(layers.Layer):
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.dense_dim = dense_dim
self.num_heads = num_heads
self.attention_1 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.attention_2 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.dense_proj = keras.Sequential(
[layers.Dense(dense_dim, activation="relu"),
layers.Dense(embed_dim),]
)
self.layernorm_1 = layers.LayerNormalization()
self.layernorm_2 = layers.LayerNormalization()
self.layernorm_3 = layers.LayerNormalization()
self.supports_masking = True
def get_config(self):
config = super().get_config()
config.update({
"embed_dim": self.embed_dim,
"num_heads": self.num_heads,
"dense_dim": self.dense_dim,
})
return config
def call(self, inputs, encoder_outputs, mask=None):
attention_output_1 = self.attention_1(query=inputs, value=inputs, key=inputs, use_causal_mask=True)
attention_output_1 = self.layernorm_1(inputs + attention_output_1)
attention_output_2 = self.attention_2(query=attention_output_1, value=encoder_outputs, key=encoder_outputs)
attention_output_2 = self.layernorm_2(attention_output_1 + attention_output_2)
proj_output = self.dense_proj(attention_output_2)
return self.layernorm_3(attention_output_2 + proj_output)
encoder_inputs = keras.layers.Input(shape=(None,), dtype='int64', name='english')
encoder_embed_outs = PositionalEmbedding(SEQ_LEN, MAX_VOCAB, EMBED_DIM)(encoder_inputs)
encoder_transformer_outs = TransformerEncoder(num_heads=8, embed_dim=EMBED_DIM, dense_dim=2048)(encoder_embed_outs)
#encoder_transformer_outs == (None, 80, 256)
decoder_inputs = keras.layers.Input(shape=(None,), dtype='int64', name='spanish')
decoder_embed_outs = PositionalEmbedding(SEQ_LEN, MAX_VOCAB, EMBED_DIM)(decoder_inputs)
#decoder_embed_outs == (None, 80, 256)
decoder_transformer_outs = TransformerDecoder(num_heads=8, embed_dim=EMBED_DIM, dense_dim=2048)(decoder_embed_outs, encoder_transformer_outs)
decoder_dropout_outs = keras.layers.Dropout(0.5)(decoder_transformer_outs)
decoder_outputs = keras.layers.Dense(MAX_VOCAB, activation='softmax')(decoder_dropout_outs)
transformer_model = keras.Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs)
transformer_model.summary()
transformer_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
start_time = time.time()
cb_checkpoint = keras.callbacks.ModelCheckpoint('model/eng_spa_transformer', save_best_only=True)
history = transformer_model.fit(train_ds, epochs=3, validation_data=val_ds, callbacks=[cb_checkpoint])
print('elapsed_time=', time.time() - start_time)
After finishing model.fit
transformer_model.save('model/eng_spa_transformer')
new_model = keras.models.load_model('model/eng_spa_transformer.tf', custom_objects={'TransformerDecoder': TransformerDecoder,
'TransformerEncoder':TransformerEncoder,
'PositionalEmbedding':PositionalEmbedding})
I loaded model to new_model however, it’s not working well…
eng=Come home with me., 스페인어=```
how can i save the model and load model?