I have six sub-folders which are classes for my classification task. They have images extension jpg, png, jpeg which are accepted by tensorflow.
I am using the function image_dataset_from_directory and then I am printing the shapes of each batch just to check if they are correct.
for x, y in val_ds:
print(x.shape, y.shape)
In this code I am getting that unknown image error.
I also used 2 scripts which I took from the similar questions which removed corrupted images.
from pathlib import Path
import imghdr
import shutil
destination_folder_path = '../bekar/flood_bekar'
data_dir = "./flood"
image_extensions = [".png", ".jpg", 'bmp', 'jpeg'] # add there all your images file extensions
img_type_accepted_by_tf = ["bmp", "gif", "jpeg", "png"]
for filepath in Path(data_dir).rglob("*"):
if filepath.suffix.lower() in image_extensions:
img_type = imghdr.what(filepath)
destination_file_path = os.path.join(destination_folder_path, os.path.basename(filepath))
if img_type is None:
print(f"{filepath} is not an image")
shutil.move(filepath, destination_file_path)
elif img_type not in img_type_accepted_by_tf:
print(f"{filepath} is a {img_type}, not accepted by TensorFlow")
shutil.move(filepath, destination_file_path)
This removed atleast 500 images from each of the sub folders(classes)
import os
import cv2
import imghdr
def check_images( s_dir, ext_list):
bad_images=[]
bad_ext=[]
s_list= os.listdir(s_dir)
for klass in s_list:
klass_path=os.path.join (s_dir, klass)
print ('processing class directory ', klass)
if os.path.isdir(klass_path):
file_list = os.listdir(klass_path)
for f in file_list:
f_path = os.path.join (klass_path,f)
tip = imghdr.what(f_path)
if ext_list.count(tip) == 0:
bad_images.append(f_path)
if os.path.isfile(f_path):
try:
img = cv2.imread(f_path)
shape = img.shape
except Exception:
print('file ', f_path, ' is not a valid image file')
bad_images.append(f_path)
else:
print('*** fatal error, you a sub directory ', f, ' in class directory ', klass)
else:
print ('*** WARNING*** you have files in ', s_dir, ' it should only contain sub directories')
return bad_images, bad_ext
source_dir = './'
good_exts=['jpg', 'png', 'jpeg', 'gif', 'bmp' ]
bad_file_list, bad_ext_list=check_images(source_dir, good_exts)
if len(bad_file_list) != 0:
print('improper image files are listed below')
for i in range (len(bad_file_list)):
print (bad_file_list[i])
else:
print('no improper image files were found')
I used the above 2 scripts to remove corrupted images but still I am getting the unknown file format error.