Hello, I am new to Keras_CV and Computer vision as a whole, I am trying to load in a image dataset from Kaggle - Underwater Object Detection Dataset | Kaggle - And I am using this tutorial from the keras website for a guide - Efficient Object Detection with YOLOV8 and KerasCV - I am trying to preform object detection from the YOLOV8 model while loading in a custom dataset
Code:
import keras_cv import keras import tensorflow as tf import os import numpy as np import tensorflow_datasets as tfds import PIL classes = ['fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', 'stingray'] class_mapping = dict(zip(range(len(classes)), classes)) train_dir = '/home/arch_dan/Projects/Tensorflow-Keras/KerasCV_KerasNLP/CV_datasets/Fish_dataset/aquarium_pretrain/train/' img_path = train_dir + 'images/' label_path = train_dir + 'labels/' def get_bboxes_classes(label_dir): classes_ = list() bboxes = list() for labels in os.listdir(label_dir): temp_c = [] temp_b = [] num_boxes = 0 with open(os.path.join(label_dir, labels)) as file: temp = file.readlines() for cords in temp: temp_c.append(int(cords.split(' ')[0])) num_boxes += 1 temp_b.append(num_boxes) classes_.append(temp_c) bboxes.append(len(temp_b)) bbox = tf.ragged.constant(bboxes) classes = tf.ragged.constant(classes_) return bbox, classes def create_dataset(image_paths, bboxes, classes): return tf.data.Dataset.from_tensor_slices((image_paths, bboxes, classes)) def load_image_paths(img_dir): image_paths = list() for images in os.listdir(img_dir): image_paths.append(os.path.join(img_dir, images)) return tf.ragged.constant(image_paths) image_paths = load_image_paths(img_path) bboxes, classes = get_bboxes_classes(label_path) loaded_dataset = create_dataset(image_paths, bboxes, classes) train_data = loaded_dataset.take(int(len(os.listdir(label_path)) * 0.8)) val_data = loaded_dataset.skip(int(len(os.listdir(label_path)) * 0.8)) augmentations = keras.Sequential(layers=[ keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xyxy"), keras_cv.layers.RandomShear(x_factor=0.2, y_factor=0.2, bounding_box_format="xyxy"), keras_cv.layers.JitteredResize(target_size=(640, 640), scale_factor=(0.75, 1.3), bounding_box_format="xyxy")]) resizing = keras_cv.layers.JitteredResize(target_size=(640, 640), scale_factor=(0.75, 1.3), bounding_box_format="xyxy") def load_image(image_path): image = tf.io.read_file(image_path) image = tf.image.decode_jpeg(image, channels=3) return image def load_dataset(image_path, classes, bbox): image = load_image(image_path) boxes = keras_cv.bounding_box.convert_format(bbox, images=image, source="xyxy", target="xywh") bounding_boxes = { "classes": tf.cast(classes, dtype=tf.float64), "boxes": boxes, } return {"images": tf.cast(image, tf.float64), "bounding_boxes": bounding_boxes} train_set = train_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE) train_set = train_set.shuffle(4 * 4) train_set = train_set.ragged_batch(4, drop_remainder=True) val_set = val_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE) val_set = val_set.shuffle(4 * 4) val_set = val_set.ragged_batch(4, drop_remainder=True) val_set = val_set.map(resizing, num_parallel_calls=tf.data.AUTOTUNE) train_set = train_set.map(augmentations, num_parallel_calls=tf.data.AUTOTUNE) def dict_to_tuple(inputs): return inputs["images"], inputs["bounding_boxes"] train_set = train_set.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE) train_set = train_set.prefetch(tf.data.AUTOTUNE) val_set = val_set.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE) val_set = val_set.prefetch(tf.data.AUTOTUNE) backbone = keras_cv.models.YOLOV8Backbone.from_preset("yolo_v8_s_backbone_coco") yolo = keras_cv.models.YOLOV8Detector( num_classes=len(class_mapping), bounding_box_format="xywh", backbone=backbone, fpn_depth=1) yolo.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001, global_clipnorm=10.0), classification_loss="binary_crossentropy", box_loss="ciou") yolo.fit(train_set, validation_data=val_set, epochs=3)
I keep getting these errors when I run this code
- When I try and preform data augmentation
convert_format() expects both boxes and images to be batched, or both boxes and images to be unbatched. Received len(boxes.shape)=2, len(images.shape)=4. Expected either len(boxes.shape)=2 AND len(images.shape)=3, or len(boxes.shape)=3 AND len(images.shape)=4. Arguments received by JitteredResize.call(): • inputs={'images': 'tf.Tensor(shape=(4, None, None, 3), dtype=float64)', 'bounding_boxes': {'classes': 'tf.Tensor(shape=(4,), dtype=float32)', 'boxes': 'tf.Tensor(shape=(4, None), dtype=float32)'}}
- I ignore the data augmentation and proceed to try and train the model
Failed to convert elements of tf.RaggedTensor(values=tf.RaggedTensor(values=Tensor("yolov8_detector_1/functional_1_1/rescaling_1/Add:0", shape=(None, 3), dtype=float32), row_splits=Tensor("data_2:0", shape=(None,), dtype=int64)), row_splits=Tensor("data_1:0", shape=(5,), dtype=int64)) to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes. Arguments received by Pad.call(): • x=tf.Tensor(shape=(4, None, None, 3), dtype=float32) • constant_values=None
I do need assistance with this, I have tried numerous things to fix this but it does not help - Thank you