Incorrect Input Shape for pyRadiomics Patch Features

Hello,

I am loading a 2D lung cancer image dataset each with its 2D nodule target array. I broke the image in patches and extracted the features of each patch with pyRadiomics along with the patch center. Then I tryied to use the patch features of each image to predict the nodule polsition. I using numpy to break the image in patches and then send the numpy matrices to tensorflow but I get the wrong input shape dimensions error

Thank you in advance,

Rafael Scatena

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[2], line 505
    498 plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
    501 #model.load_weights('best_weights.weights.h5')  # Adjust filepath as necessary
    502 
    503 
    504 #Train the model
--> 505 model.fit(X_train, y_train,
    506           epochs=num_epochs,
    507           batch_size=batch_size,
    508           validation_data=(X_val, y_val),
    509            callbacks=[checkpoint_callback])
    511 #callbacks=[LossPlotCallback(X_val, y_val),
    513 model.load_weights('best_weights.weights.h5')  # Adjust filepath as necessary

File ~/anaconda3/envs/tf/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py:122, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    119     filtered_tb = _process_traceback_frames(e.__traceback__)
    120     # To get the full stack trace, call:
    121     # `keras.config.disable_traceback_filtering()`
--> 122     raise e.with_traceback(filtered_tb) from None
    123 finally:
    124     del filtered_tb

File ~/anaconda3/envs/tf/lib/python3.9/site-packages/keras/src/models/functional.py:288, in Functional._adjust_input_rank(self, flat_inputs)
    286             adjusted.append(ops.expand_dims(x, axis=-1))
    287             continue
--> 288     raise ValueError(
    289         f"Invalid input shape for input {x}. Expected shape "
    290         f"{ref_shape}, but input has incompatible shape {x.shape}"
    291     )
    292 # Add back metadata.
    293 for i in range(len(flat_inputs)):

ValueError: Exception encountered when calling Sequential.call().

Invalid input shape for input [[[3.0000000e+01 3.0000000e+01 1.0000000e+00 ... 1.3395919e-02
   1.3395919e-02 1.3395919e-02]]]. Expected shape (None, 2, 1, 21984), but input has incompatible shape (1, 1, 21984)

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 1, 21984), dtype=float32)
  • training=True
  • mask=None



import tensorflow as tf
import tensorflow_io as tfio
#import os
import numpy as np
#import matplotlib
from matplotlib import pyplot as plt
from PIL import Image
import pydot

import six

from radiomics import firstorder, getTestCase, glcm, glrlm, glszm, imageoperations, shape


from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.utils import plot_model
import SimpleITK as sitk
import csv


from radiomics import featureextractor
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()
tf.compat.v1.enable_eager_execution()

#os.environ["QT_QPA_PLATFORM"] = "wayland"

#matplotlib.use('Qt5Agg')


plt.switch_backend('qt5agg')  # Switch to qt backend


extractor = featureextractor.RadiomicsFeatureExtractor()






# Define the checkpoint callback
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='best_weights.weights.h5',  # Filepath to save the weights
    monitor='val_loss',  # Metric to monitor for saving the weights (e.g., validation loss)
    save_best_only=True,  # Save only the best weights
    save_weights_only=True,  # Save only the weights, not the entire model
    verbose=1  # Verbosity mode (0 or 1)
)


class LossPlotCallback(tf.keras.callbacks.Callback):
    def __init__(self, X_val, y_val):
        super(LossPlotCallback, self).__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.losses = []
        self.val_losses = []
        self.fig, self.ax = plt.subplots()
        plt.ion()  # Turn on interactive mode for real-time plotting
        plt.savefig('training.png')
        self.fig.show()

   
    def on_train_begin(self, logs={}):
        pass

    def on_epoch_end(self, epoch, logs={}):
        self.losses.append(logs.get('mae'))
        val_loss = self.model.evaluate(self.X_val, self.y_val, verbose=0)
        val_loss_mae = val_loss[1]  # Assuming Mae is the second metric in the list
        self.val_losses.append(val_loss_mae)
        self.ax.clear()
        self.ax.plot(range(1, len(self.losses) + 1), self.losses, label='Training Loss')
        self.ax.plot(range(1, len(self.val_losses) + 1), self.val_losses, label='Validation Loss')
        self.ax.set_xlabel('Epoch')
        self.ax.set_ylabel('Loss (Mae)')
        self.ax.set_title('Training and Validation Loss')
        self.ax.legend()
#       self.ax.set_yscale('log')  # Set logarithmic scale for the y-axis
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()  # Flush the GUI events for the figure
        plt.savefig('training.png')
        plt.pause(0.5)  # Pause for a short time to update the plot
        
############################

def extract_patches_np(image, patch_size, stride):
    patches = []
    centers = []
    height, width = image.shape[:2]
    for y in range(0, height - patch_size + 1, stride):
        for x in range(0, width - patch_size + 1, stride):
            patch = image[y:y+patch_size, x:x+patch_size]
            patches.append(patch)
            center = (y + patch_size // 2, x + patch_size // 2)
            centers.append(center)
    return np.array(patches), np.array(centers)



def load_tiff_image_np(file_path):
    image = sitk.ReadImage(file_path)
    image_array = sitk.GetArrayFromImage(image).astype(np.float32)  # Convert to float32
    image_array = normalize_images_np(image_array)  # Normalize image
    return image_array

def normalize_images_np(image):
    min_value = np.min(image)
    max_value = np.max(image)
    normalized_image = (image - min_value) / (max_value - min_value)
    return normalized_image




def extract_features_pyradiomics(patches, settings_file, centers):
    # Initialize PyRadiomics feature extractor
    extractor = featureextractor.RadiomicsFeatureExtractor(settings_file)
    
    centers_and_features_list=[]
    
    # Loop through patches and extract features
    for idx, (patch, center) in enumerate(zip(patches, centers), start=1):

        print("1 patch shape:", patch.shape)

        patch_image = sitk.GetImageFromArray(patch)
        
        # Create a dummy mask as a numpy array with the same height and width as the patch
        mask = np.ones(patch.shape, dtype=np.uint8)
        
        # Convert the numpy mask to SimpleITK image
        mask_image = sitk.GetImageFromArray(mask)

        features = extractor.execute(patch_image,mask_image)

#       print("features shape:", features.shape)

        
 #       feature_labels = list(features.keys())

        # Print the feature labels
#        print("Extracted feature labels:")
#        for label in features:
#           print(label)


        collapsed_center = center.reshape(-1)
        print("collapsed_center shape:", collapsed_center)
        print("collapsed_center shape:", collapsed_center.shape)

    
        
        # Concatenate feature values into a single list
        features_values = []

 #       for featureName in features.keys():
 #         print("Computed %s: %s" % (featureName, features[featureName]))        


        
        for (key, val) in six.iteritems(features):
            #print('  ', key, ':', val)
            #print('    Class:', type(val))

            if isinstance(val, tuple):
                # If val is a tuple, extract its elements and add them to the list
                filtered_values = [v for v in val if isinstance(v, (int, float, np.integer, np.floating,np.float64,np.float32,np.ndarray))]
                # Ensure feature length consistency
                features_values.extend(filtered_values)
            else:
                # If val is not a tuple, check if it's a scalar value and add it directly
                if isinstance(val, (int, float, np.integer, np.floating,np.float64,np.float32,np.ndarray)):
                    features_values.append(val)

           
        # Ensure feature length consistency
        print(f"filtered_values{idx}: {filtered_values}")
            
        features_array = np.array(features_values)
       
        # Flatten the array to create a vector
        features_vector = features_array.flatten()
       
        print("features_vector shape:", features_vector.shape)
        print("i:", idx)
        
        # Concatenate collapsed center coordinates with extracted features into a single vector
        center_and_feature_vector = np.hstack((collapsed_center,features_vector))
        print("center_and_feature_vector shape:", center_and_feature_vector.shape)

        centers_and_features_list.append(center_and_feature_vector)


    features_go = np.array(centers_and_features_list)

       
    # Flatten the array to create a vector
    features_go = features_go.flatten()
    print("features_go shape:", features_go.shape)

    return features_go



def pad_image_np(image, padding):
    original_height = 240
    original_width = 358

    target_height = original_height
    target_width = original_width + padding

    # Calculate the padding amounts for height and width
    padding_height = (target_height - image.shape[0]) // 2
    padding_width = padding

    # Ensure padding is non-negative
    if padding_height < 0 or padding_width < 0:
        raise ValueError("Padding results in negative dimensions, please check the input dimensions and padding values.")

    # Pad the image with zeros
    padded_image = np.pad(image, ((padding_height, padding_height), (padding_width, padding_width)), mode='constant', constant_values=0)

#    print("Padded image shape:", padded_image.shape)
    return padded_image

settings_file = "exampleCT.yaml"

def parse_csv_line_np(line):
    parts = line.strip().split(",")
    targets = np.array(parts[:2], dtype=np.float32)
    filename = parts[2]

    image = load_tiff_image_np(filename)
    padded_image = pad_image_np(image, padding=1)
      
    patches, centers = extract_patches_np(padded_image, patch_size=60, stride=60)

    print("patches shape:", patches.shape)
    print("centers shape:", centers.shape)

    
    centers_and_features_list  = extract_features_pyradiomics(patches, settings_file,centers)
    
    centers_and_features = np.array(centers_and_features_list)

    print("features_array shape:", centers_and_features.shape)

    # Collapse dimensions of centers to match the features_array
    
    return centers_and_features, targets



max_size=4

def count_rows_in_csv(csv_file_path):
    with open(csv_file_path, newline='') as csvfile:
        row_count = sum(1 for row in csvfile)
    return row_count


def create_matrix_with_csv_rows(csv_file_path):
    row_count = count_rows_in_csv(csv_file_path)
    # Define the number of columns in your matrix
    num_columns = 916  # Replace 10 with the actual number of columns
    # Initialize a matrix with the row_count
    matrix = [[None] * num_columns for _ in range(row_count)]
    return matrix





def process_csv_file(csv_file_path,max_size):
    centers_and_features_list = []
    target_lists = []

    counter =0

    matrix = create_matrix_with_csv_rows(csv_file_path)

    
    
    with open(csv_file_path, newline='') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter=',')

        for row in csv_reader:
            if counter >= max_size:
                print(f"Reached max size: {counter}")
                break
            
            else:
                line = ",".join(row)
                centers_and_features, targets = parse_csv_line_np(line)
                if centers_and_features is not None:
     
                    centers_and_features_list.append(centers_and_features)
                    target_lists.append(targets)
    
      
    #               centers_and_features_list.append(centers_and_features)
    #               target_lists.append(targets)
                    
                    counter += 1
                    print(f"Processed {counter} items")

    # Convert lists to numpy arrays
    features_matrix = np.array(centers_and_features_list)
    targets_matrix = np.array(target_lists)
    
 
    print("features_matrix shape:", features_matrix.shape)
    print("targets_matrix shape:", targets_matrix.shape)


    return features_matrix, targets_matrix



    



cvs = "Coord (fixed).csv"

image_path = "1.tiff"
img = Image.open(image_path)

# Get the size of the image
width, height = img.size
print(f"Image size: {width}x{height}")



def create_mlp(input_shape):
    model = tf.keras.Sequential([
       tf.keras.layers.Dense(2, activation='relu', input_shape=input_shape),
#       tf.keras.layers.Dense(100, activation='relu', input_shape=(343680,)),
       tf.keras.layers.Dense(2, input_shape=input_shape)  # Output layer with 2 neurons for the real-valued targets
    ])    
    return model




# Assuming flat_patches_dataset contains (patch, label)
features_list = []

features_array, targets_array = process_csv_file(cvs,max_size)

print("targest array shape:", targets_array.shape)
print("targest features array shape:", features_array.shape)


buffer_size=20

# Convert to TensorFlow dataset
def create_dataset(features_array, targets_array):
    # Create TensorFlow dataset
    dataset = tf.data.Dataset.from_tensor_slices((features_array, targets_array))
    
    # Shuffle and batch the dataset
    dataset = dataset.shuffle(buffer_size=buffer_size)
    
    return dataset




############

dataset = create_dataset(features_array, targets_array)


for data in dataset.take(10):
    tf.print(data)




dataset = dataset.shuffle(buffer_size=buffer_size)

train_size = 2
test_size = 1
val_size = 1

batch_size=1

# Split the dataset
train_dataset = dataset.take(train_size)
remaining_dataset = dataset.skip(train_size)
test_dataset = remaining_dataset.take(test_size)
remaining_dataset = dataset.skip(train_size+test_size)
val_dataset = remaining_dataset.take(val_size)

# Optionally, you may want to batch the datasets
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

# Optionally, you may want to prefetch the datasets for better performance
train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE)




# Create lists to store preprocessed images and target values
features_list_train = []
target_values_train = []

features_list_test = []
target_values_test = []

features_list_val = []
target_values_val = []


for a, b in val_dataset:
  print('shapes: {a.shape}, {b.shape}'.format(a=a, b=b))


for combined_features, targets in train_dataset:
    features_list_train.append(combined_features)
    target_values_train.append(targets)

# Iterate over the testing dataset and populate the lists
for combined_features, targets in test_dataset:
    features_list_test.append(combined_features)
    target_values_test.append(targets)

# Iterate over the validation dataset and populate the lists
for combined_features, targets in val_dataset:
    features_list_val.append(combined_features)
    target_values_val.append(targets)




# Convert lists to NumPy arrays or TensorFlow tensors
X_train = tf.stack(features_list_train)
y_train = tf.stack(target_values_train)


# Convert lists to NumPy arrays or TensorFlow tensors
X_test = tf.stack(features_list_test)
y_test = tf.stack(target_values_test)


# Convert lists to NumPy arrays or TensorFlow tensors
X_val = tf.stack(features_list_val)
y_val = tf.stack(target_values_val)


print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

"""
X_train = tf.shape(tf.squeeze(X_train, [1])) 
y_train = tf.shape(tf.squeeze(y_train, [1]))

X_val = tf.shape(tf.squeeze(X_val, [1]))
y_val = tf.shape(tf.squeeze(y_val, [1]))

X_test = tf.shape(tf.squeeze(X_test, [1]))
y_test = tf.shape(tf.squeeze(y_test, [1]))

"""
X_train = tf.reshape(X_train, ( 2,  1, 21984))
y_train = tf.reshape(y_train, ( 2,  1, 2))

X_val = tf.reshape(X_val, ( 1, 1, 21984))
y_val = tf.reshape(y_val, ( 1, 1, 2))

X_test = tf.reshape(X_test, (  1, 1, 21984))
y_test = tf.reshape(y_test, (  1, 1, 2))


# Check the shapes of X_train and y_train
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)


num_epochs = 1000;

input_shape = (2,1, 21984)  # Replace with the correct dimensions as per your requirement
input_shape = (1, 1, 21984)

input_shape = X_train.shape

model = create_mlp(input_shape=input_shape)  # Replace height, width, and channels with actual values
model.compile(optimizer='adam', loss='mse', metrics=['mae'])


plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


#model.load_weights('best_weights.weights.h5')  # Adjust filepath as necessary


#Train the model
model.fit(X_train, y_train,
          epochs=num_epochs,
          batch_size=batch_size,
          validation_data=(X_val, y_val),
           callbacks=[checkpoint_callback])

#callbacks=[LossPlotCallback(X_val, y_val),

model.load_weights('best_weights.weights.h5')  # Adjust filepath as necessary

test_mae = model.evaluate(test_dataset)[1]  # Get the second element (MAE) from the evaluation result


print("Test MAE:", test_mae)


visualize_multiple_images(test_dataset, image_shape, patch_size=30, stride=30, padding=2, num_images=5)
center_and_feature_vector shape: (916,)
features_go shape: (21984,)
features_array shape: (21984,)
Processed 4 items
Reached max size: 4
features_matrix shape: (4, 21984)
targets_matrix shape: (4, 2)
targest array shape: (4, 2)
targest features array shape: (4, 21984)
([30 30 1 ... 0.013395919067215364 0.013395919067215364 0.013395919067215364], [256.826508 145.49292])
([30 30 1 ... 0.013395919067215364 0.013395919067215364 0.013395919067215364], [41.1423721 106.030334])
([30 30 1 ... 0.013395919067215364 0.013395919067215364 0.013395919067215364], [120.418457 137.227463])
([30 30 1 ... 0.013395919067215364 0.013395919067215364 0.013395919067215364], [270.512177 63.3346138])
shapes: (1, 21984), (1, 2)
X_val shape: (1, 1, 21984)
y_val shape: (1, 1, 2)
X_train shape: (2, 1, 21984)
y_train shape: (2, 1, 2)