autodifferentiation tensorflow not converging as the manual differentiation

In this code, I try to optimize the phase of a hologram. I used Gradient Descent Algorithm through Tensorflow. I used Adam optimizer. the problem I am facing is the following:
In the Gradient descent algorithm, we must compute the partial derivatives with respect to a certain variables. In here, the variable is a tensor (tf.Variable) that have the same shape of the input image (i.e. 256x256). I analytically computed the partial derivative of the loss function. it is given in the code. I also tried to implement an autodifferentiation method base on taensorflow GradientTap. the problem is that it doesn’t converge as the manually computed one.
The loss function I used is :

              MSE (I_predicted - I_target).

I_predicted = Abs( FFT( np.exp(1j*phase) ),

the autodifferentiation should handle the derivation with the presence of the FFT(Complex function). I have seen some people succeded carrying the autodifferentiation. I cannot achieve that. Please Help. These people used Pytorch instead of Tensorflow. Also, they didn’t exlicitly give a relation between the I_predicted and the variable (i.e. phase).

Thank you for your help.

import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from scipy.fftpack import fft2, fftshift, ifft2



imgBGR = cv2.imread('CirCTria.png')
imgGray = cv2.cvtColor(imgBGR, cv2.COLOR_BGR2GRAY)



def FFT(inputArray):
    shift = tf.cast( tf.signal.ifftshift( inputArray ), tf.complex64  )
    FourierTransform = tf.signal.fft2d( shift )
    unshift =  tf.signal.fftshift( FourierTransform )
    return unshift

def iFFT(inputArray):
    shift = tf.cast( tf.signal.ifftshift( inputArray ), tf.complex64  )
    FourierTransform = tf.signal.ifft2d( shift )
    unshift =  tf.signal.fftshift( FourierTransform )
    return unshift

def SGDHoloCompute(inputImg, nEpochs, initialPhase='FFT', lr=0.5):
    optimizer = tf.keras.optimizers.Adam(lr) ### Optimizer based on ADAM
    train_loss_results = []
    train_psnr_results = []
    h,w = inputImg.shape
    inputImg = np.fliplr(inputImg)/np.amax(inputImg)    ### Normalize image Array
    
    phase = tf.Variable( np.random.rand(h,w) *2*np.pi ,  name = 'Phase', dtype = tf.float32)
    for epoch in range(nEpochs):
        epoch_loss_avg = tf.keras.metrics.Mean()
        epoch_psnr_avg = tf.keras.metrics.Mean()
        
        holo = tf.math.exp(1.j * tf.dtypes.cast(phase, tf.complex64))
        reconstruction = iFFT(holo)
        phaseImagePlane = tf.math.angle(reconstruction)    # Phase image plane
        reconIntensity =  tf.math.abs(reconstruction) **2
        I_t = inputImg
        I_r = reconIntensity / tf.math.reduce_mean(reconIntensity, axis = [0, 1], keepdims = True) * np.mean(I_t, axis = (0, 1))
        I_r = tf.clip_by_value(I_r, 0.0, 1.0)
        I_rPSNR = I_r[:,:, np.newaxis]
        I_tPSNR = I_t[:,:, np.newaxis]
        phase_mod =  tf.experimental.numpy.mod( phase  , 2*np.pi)
        
        ## Define Loss
        mse = tf.keras.losses.MeanSquaredError()
        loss_mse = mse(I_t, I_r)
        loss_psnr = tf.reduce_mean(tf.image.psnr(I_tPSNR, I_rPSNR, max_val = 1))
        
        
        ################ Please uncomment this pat for MANUAL DIFFERENTIATION GRADIENT DESCENT
        # delta_f =  2 * tf.dtypes.cast((I_r - I_t), dtype = tf.complex64)  * 2 *reconstruction/ h / w
        # delta = FFT(delta_f)
        # recon_grad = - 1.j * tf.math.exp(-1.j * tf.dtypes.cast(phase, dtype = tf.complex64)) * delta 
        # recon_const = tf.math.real(recon_grad)        
        # total_grad = [recon_const]
        
        ############### Please Un comment this part for AUTOMATIC DIFFERENTIATION FROM TENSORFLOW
        with tf.GradientTape() as tape:
            tape.watch(phase)
            Lossfunction = tf.math.l2_normalize( tf.abs(iFFT( tf.exp(tf.convert_to_tensor(1j, dtype=tf.complex64)* tf.cast(phase, dtype=tf.complex64 ) )))**2 - I_t  ) 
        grads = tape.gradient(Lossfunction, [phase])
        total_grad = grads

        # Update Gradient Descent
        optimizer.apply_gradients(zip(total_grad, [phase]))
        
        # Track the loss and PSNR
        epoch_loss_avg(loss_mse)
        epoch_psnr_avg(loss_psnr)
        train_loss_results.append(epoch_loss_avg.result())
        train_psnr_results.append(epoch_psnr_avg.result())
        print("Epoch: {:03d} MSE Loss: {:.6f} PSNR: {:.2f}".format(epoch, epoch_loss_avg.result(), epoch_psnr_avg.result()))
        
        plt.imshow(np.flipud(np.abs(fftshift(  fft2(  fftshift( 1*np.exp(1j*phase.numpy() )) )  )))**2)
        plt.pause(0.05)
    return phase_mod


if __name__ == "__main__":
    phase = SGDHoloCompute(imgGray, 100, lr=0.4)

Hi @Omer_landau,

Sorry for the delay in response.
To optimize the phase of a hologram using TensorFlow, I suggest to correct complex numbers by using tf.complex64 and tf.math.exp(1j * phase) for complex exponentiation and use tf.GradientTape() for automatic differentiation, compute the MSE, PSNR losses and update the phase with the Adam optimizer.Normalize it and clip the intensity to keep values within a valid range. Adjust the learning rate and experiment with a weighted loss function if needed for better convergence.

Hope this helps.Thank You.