Regression in a dataset with flattened images and target reals

Rafael_Scatena · May 30, 2024, 10:05pm

Hello,

I am trying to set up a dataset with flattened lung images and 2 reals indicating the lung nodule location but I am facing some problems:

import tensorflow as tf
import tensorflow_io as tfio
import matplotlib.pyplot as plt
import os
import numpy as np

class LossPlotCallback(tf.keras.callbacks.Callback):
def init(self, X_val, y_val):
super(LossPlotCallback, self).init()
self.X_val = X_val
self.y_val = y_val
self.losses =
self.val_losses =
self.fig, self.ax = plt.subplots()
plt.ion() # Turn on interactive mode for real-time plotting

def on_train_begin(self, logs={}):
    pass

def on_epoch_end(self, epoch, logs={}):
    self.losses.append(logs.get('loss'))
    self.val_losses.append(self.model.evaluate(self.X_val, self.y_val, verbose=0))
    self.ax.clear()
    self.ax.plot(range(1, len(self.losses) + 1), self.losses, label='Training Loss')
    self.ax.plot(range(1, len(self.val_losses) + 1), self.val_losses, label='Validation Loss')
    self.ax.set_xlabel('Epoch')
    self.ax.set_ylabel('Loss')
    self.ax.set_title('Training and Validation Loss')
    self.ax.legend()
    self.fig.canvas.draw()
    plt.pause(0.01)  # Pause for a short time to update the plot

def load_tiff_image(file_path):
image = tf.io.read_file(file_path)
image = tfio.experimental.image.decode_tiff(image)
image = tf.image.convert_image_dtype(image, tf.float32) # Normalize to [0, 1]
return image

def normalize_images(input_image):
# Calculate the minimum and maximum values in the input and target images
min_value_input = tf.reduce_min(input_image)
max_value_input = tf.reduce_max(input_image)

# Normalize input image to [0, 1]
normalized_input_image = (input_image - min_value_input) / (max_value_input - min_value_input)

return normalized_input_image

def preprocess_image(image):
# Flatten the image into a vector
flattened_image = tf.reshape(image, [-1])
return flattened_image

PATH = “/home/rafael/Área de Trabalho/BioInspirada/Trabalho 2/All in one”

cvs = “Coord.csv”

csv = os.path.join(PATH, cvs)
cvs = “Coord.csv”

def parse_csv_line(line):

parts = tf.strings.split(line, sep=',')
# Assuming the first two columns are the targets
try:
    # Assuming the first two columns are the targets
    target1 = tf.strings.to_number(parts[0], out_type=tf.float32)
    target2 = tf.strings.to_number(parts[1], out_type=tf.float32)
    targets = tf.stack([target1, target2])
except tf.errors.InvalidArgumentError as e:
    print("Error parsing targets:", e)
    return None, None
   
# Load and preprocess the image data
filename = parts[-1]  # Assuming image filename is the last element
file_path = tf.strings.join([PATH, filename])
image = load_tiff_image(filename)
image = normalize_images(image)
flattened_image = preprocess_image(image)
    

return flattened_image, targets

dataset = tf.data.TextLineDataset(csv)

Parse each line of the CSV file

dataset = dataset.map(parse_csv_line)

for flattened_image, targets in dataset.take(5):
print(“Flattened Image:”, flattened_image)
print(“Targets:”, targets)

def create_mlp(input_shape):
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation=‘relu’, input_shape=input_shape),
tf.keras.layers.Dense(64, activation=‘relu’),
tf.keras.layers.Dense(2) # Output layer with 2 neurons for the real-valued targets
])
return model

def count_tuples(count, _):
return count + 1

Reduce the dataset to count the number of tuples

num_tuples = dataset.reduce(0, count_tuples)

Print the number of tuples

print(“Dataset size:”, num_tuples.numpy())

dataset = dataset.shuffle(buffer_size=1000)

train_size = 1000
test_size = 100
val_size = 66

Split the dataset

train_dataset = dataset.take(train_size)
remaining_dataset = dataset.skip(train_size)
test_dataset = remaining_dataset.take(test_size)
remaining_dataset = dataset.skip(train_size+test_size)
val_dataset = remaining_dataset.take(val_size)

Optionally, you may want to batch the datasets

batch_size = 32
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

Optionally, you may want to prefetch the datasets for better performance

train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE)

model = create_mlp(input_shape=(240 * 358 * 1,)) # Replace height, width, and channels with actual values
model.compile(optimizer=‘adam’, loss=‘mse’, metrics=[‘mae’])

Create lists to store preprocessed images and target values

flattened_image_list_train =
target_values_list_train =

flattened_image_list_test =
target_values_list_test =

flattened_image_list_val =
target_values_list_val =

Recreate iterators for datasets

train_iter = iter(train_dataset)
test_iter = iter(test_dataset)
val_iter = iter(val_dataset)

Clear existing lists

flattened_image_list_train.clear()
target_values_list_train.clear()
flattened_image_list_test.clear()
target_values_list_test.clear()
flattened_image_list_val.clear()
target_values_list_val.clear()

Populate the lists

for flattened_image, targets in train_iter:
flattened_image_list_train.append(flattened_image)
target_values_list_train.append(targets)

for flattened_image, targets in test_iter:
flattened_image_list_test.append(flattened_image)
target_values_list_test.append(targets)

for flattened_image, targets in val_iter:
flattened_image_list_val.append(flattened_image)
target_values_list_val.append(targets)

Convert lists to NumPy arrays or TensorFlow tensors

X_train = tf.stack(flattened_image_list_train)
y_train = tf.constant(target_values_list)

Convert lists to NumPy arrays or TensorFlow tensors

X_test = tf.stack(flattened_image_list_test)
y_test = tf.constant(target_values_test)

Convert lists to NumPy arrays or TensorFlow tensors

X_val = tf.stack(flattened_image_list_val)
y_val = tf.constant(target_values_val)

Check the shapes of X_train and y_train

print(“X_train shape:”, X_train.shape)
print(“y_train shape:”, y_train.shape)

num_epochs = 500;

Train the model

model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size,validation_data=(X_val, y_val), callbacks=[LossPlotCallback()])

I get:

Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([ 41.142372 106.030334], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([270.51218 63.334614], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([256.8265 145.49292], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([120.41846 137.22746], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([148.83672 120.33603], shape=(2,), dtype=float32)
2024-05-30 18:58:56.257933: W tensorflow/core/framework/op_kernel.cc:1828] OP_REQUIRES failed at whole_file_reafile:///home/rafael/%C3%81rea%20de%20Trabalho/BioInspirada/Trabalho%202/Perceptron.ipynb
d_ops.cc:116 : NOT_FOUND: ; No such file or directory
2024-05-30 18:58:56.262418: W tensorflow/core/framework/op_kernel.cc:1828] OP_REQUIRES failed at whole_file_read_ops.cc:116 : NOT_FOUND: ; No such file or directory

NotFoundError Traceback (most recent call last)
Cell In[47], line 126
123 return count + 1
125 # Reduce the dataset to count the number of tuples
→ 126 num_tuples = dataset.reduce(0, count_tuples)
128 # Print the number of tuples
129 print(“Dataset size:”, num_tuples.numpy())

File ~/miniconda3/envs/tensorenv/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py:2787, in DatasetV2.reduce(self, initial_state, reduce_func, name)
2783 if name:
2784 metadata.name = _validate_and_encode(name)
2785 return structure.from_compatible_tensor_list(
2786 state_structure,
→ 2787 gen_dataset_ops.reduce_dataset(
2788 dataset._variant_tensor,
2789 structure.to_tensor_list(state_structure, initial_state),
2790 reduce_func.captured_inputs,
2791 f=reduce_func,
2792 output_shapes=structure.get_flat_tensor_shapes(state_structure),
2793 output_types=structure.get_flat_tensor_types(state_structure),
2794 metadata=metadata.SerializeToString()))

File ~/miniconda3/envs/tensorenv/lib/python3.9/site-packages/tensorflow/python/ops/gen_dataset_ops.py:6178, in reduce_dataset(input_dataset, initial_state, other_arguments, f, output_types, output_shapes, use_inter_op_parallelism, metadata, name)
6176 return _result
6177 except _core._NotOkStatusException as e:
→ 6178 _ops.raise_from_not_ok_status(e, name)
6179 except _core._FallbackException:
6180 pass

File ~/miniconda3/envs/tensorenv/lib/python3.9/site-packages/tensorflow/python/framework/ops.py:6656, in raise_from_not_ok_status(e, name)
6654 def raise_from_not_ok_status(e, name):
6655 e.message += (" name: " + str(name if name is not None else “”))
→ 6656 raise core._status_to_exception(e) from None

NotFoundError: {{function_node _wrapped__ReduceDataset_Targuments_0_Tstate_1_output_types_1_device/job:localhost/replica:0/task:0/device:CPU:0}} ; No such file or directory
[[{{node ReadFile}}]] [Op:ReduceDataset] name:

Topic		Replies	Views
Working with multiple errors at once General Discussion help_request	1	759	July 20, 2023
ValueError: as_list() is not defined on an unknown TensorShape General Discussion datasets , tfdata , help_request	4	8977	November 9, 2021
Loading npy files using tensorflow dataset pipeline General Discussion datasets , help_request	1	462	February 16, 2024
Addressing Shape Mismatch Error in TensorFlow Code for (None, 224, 224, 3) vs. (TensorSpec(shape=(None, None, 224, 224, 3)) Shape: Troubleshooting and Resolution General Discussion datasets , tfkeraslayer , tfmodel	7	736	January 26, 2024
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (5513, 1) + inhomogeneous part General Discussion models , help_request	2	4516	October 23, 2024

Regression in a dataset with flattened images and target reals

Parse each line of the CSV file

Reduce the dataset to count the number of tuples

Print the number of tuples

Split the dataset

Optionally, you may want to batch the datasets

Optionally, you may want to prefetch the datasets for better performance

Create lists to store preprocessed images and target values

Recreate iterators for datasets

Clear existing lists

Populate the lists

Populate the lists

Convert lists to NumPy arrays or TensorFlow tensors

Convert lists to NumPy arrays or TensorFlow tensors

Convert lists to NumPy arrays or TensorFlow tensors

Check the shapes of X_train and y_train

Train the model

Related topics