Tensorflow Inference on CPU after training model on GPU with AMP and XLA

I am facing one challenge of doing inference on CPU. Model is trained on GPU using AMP and XLA. its throwing following error

pywrap tfe. TFE Py Execute(ctx. handle, device name,
op name
tensorflow.python.framework.errors impl.         InvalidArgumentError: No OpKernel was registered to support Op   'AvgPool3D' used by ({node model 1/average pooling3d 1/AvgPool3D}} with these attrs: [strides=[1, 1, 1, 7, 1], data format="NDHWC", ksize=[1, 1, 1, 7, 1], padd
ing="SAME", T=DT HALF]
Registered devices: [CPU]
Registered kernels:
device='GPU'; T in [DT HALF]
device='GPU';
T in [DT FLOAT]
device='CPU': T in [DT FLOAT]

After I disable amp for avgpool3D layer using dtype = 32 code is running for very long time.

model is stored in h5 format

Hi Yogesh_Nakhate, welcome to the TensorFlow Forum!

What version of Tensorflow are you currently using? Please share standalone code and supporting files to replicate the issue reported above? Thank you!

Hi Chunduriv ,

thanks for your message Please find sample code for training and inference

tensroflow version

print(tf. version)
2.5.0

training code saves .h5 model

# Import necessary modules and libraries
import os
import tensorflow as tf
from tensorflow.python.keras.layers import Input, concatenate, BatchNormalization
from tensorflow.python.keras.layers import Conv3D, MaxPooling3D, Conv3DTranspose
from tensorflow.python.keras.layers import AveragePooling3D
from tensorflow.python.keras.models import Model
import keras


policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

g1 = tf.random.Generator.from_seed(1)
x_train=g1.normal(shape=[4,32,32,4,1])
y_tain=g1.normal(shape=[4,32,32,4,1])

class Resnet_base_3D():  
    def __init__(self, config=None, hparams_in={}):
           self.x=5 
    def model(self):
        inputs = Input(shape=(32, 32, 4, 1))
        #inputs = Input(shape=(1000, 32, 7, 1))
        x = Conv3D(4, 3, padding='same',use_bias=False)(inputs)
        last_layer = Conv3D(1, (1, 1, 1), activation='linear',use_bias=False)(x)
        max_pool_3d = tf.keras.layers.AveragePooling3D(pool_size=(1, 1, 7), padding='same')
        last_layer = max_pool_3d(last_layer)
        print('Outputs dtype:********** %s' % last_layer.dtype.name)
        return Model(inputs=[inputs], outputs=[last_layer])
    
# Build model.
app=Resnet_base_3D()
model = app.model()
model.compile(
    loss="binary_crossentropy",
    optimizer='sgd')

model.fit(x_train,y_tain,epochs=30)
model.summary()
model.save("my_model_test_h2.h5",save_format='h5')

inference code

from tensorflow import keras
import tensorflow as tf

model1=keras.models.load_model(<path to h5 model>)
g1 = tf.random.Generator.from_seed(1234)
x_train=g1.normal(shape=[4,32,32,4])
y1=model1.predict(x_train)


tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: No OpKernel was registered to support Op ‘AvgPool3D’ used by {{node model_1/average_pooling3d_1/AvgPool3D}} with these attrs: [strides=[1, 1, 1, 7, 1], data_format=“NDHWC”, ksize=[1, 1, 1, 7, 1], padding=“SAME”, T=DT_HALF]
Registered devices: [CPU]
Registered kernels:
device=‘CPU’; T in [DT_FLOAT]
device=‘GPU’; T in [DT_HALF]
device=‘GPU’; T in [DT_FLOAT]