Thank you, yes I was able to understand and the code was also updated by the author, now I have another doubt
# Get data(label, xmin, ymin, xmax, ymax)
output = []
for index, score in enumerate(output_dict['detection_scores']):
if score < threshold:
continue
label = category_index[output_dict['detection_classes'][index]]['name']
ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index]
output.append((label, int(xmin * image_width), int(ymin * image_height), int(xmax * image_width), int(ymax * image_height)))
# Save incident (could be extended to send a email or something)
for l, x_min, y_min, x_max, y_max in output:
if l == label_to_look_for:
array = cv2.cvtColor(np.array(image_show), cv2.COLOR_RGB2BGR)
image = Image.fromarray(array)
cropped_img = image.crop((x_min, y_min, x_max, y_max))
file_path = output_directory+'/images/'+str(len(df))+'.jpg'
cropped_img.save(file_path, "JPEG", icc_profile=cropped_img.info.get('icc_profile'))
df.loc[len(df)] = [datetime.datetime.now(), file_path]
df.to_csv(output_directory+'/results.csv', index=None)
What is the score means!? , I guessed it is the score of the detection_scores that are returned. which is output_dict['detection_scores], so when I apply threshold 0.5, no images are saved but when I apply 0.0 as my threshold nearly 2000 images are cropped from the single image and saved, so I checked out my output_dict['scores], it has many values
Here are my detection scores
Detection scores
0.9999884
0.9875551
0.9298591
0.18066546
0.06862515
0.060081333
0.05767244
0.043635964
0.040076256
0.037350416
0.033092856
0.03055805
0.030125767
0.029847085
0.029215574
0.028655708
0.027012408
0.025616944
0.02515155
0.023829997
0.023615092
0.02239129
0.021808654
0.021342427
0.020629108
0.01946026
0.01930508
0.019111484
0.018848777
0.017635822
0.017435431
0.016988814
0.016978234
0.01697129
0.01664561
0.016387343
0.016295582
0.016104639
0.016039342
0.015885413
0.01586929
0.015589744
0.015241742
0.015219361
0.015110254
0.015015632
0.014730513
0.014715463
0.01455313
0.0144896805
0.014403313
0.014309466
0.01429531
0.01426512
0.014217079
0.014211059
0.014092535
0.013988614
0.013938546
0.013933927
0.01387459
0.013772488
0.013516575
0.0134027
0.013376057
0.013336897
0.01318419
0.013004512
0.0129831135
0.01276961
0.012724757
0.012371838
0.012347668
0.012268215
0.0122665465
0.012233138
0.01222229
0.012182564
0.012130201
0.0121108
0.012091279
0.012085319
0.0120278895
0.011973709
0.0119514465
0.011933267
0.011857897
0.011782587
0.011546642
0.011545628
0.011477649
0.011402994
0.011328131
0.011262983
0.011066496
0.010975838
0.010870099
0.010821551
0.010576516
0.01054436
So, I doubt is there any problem with my model or is the scores are correct and my code is wrong!?
Here is my code
def run_inference_and_extract(model,category_index,threshold,label_to_look_for,
output_dir):
#create output dir if not already created
os.makedirs(output_dir, exist_ok=True)
#os.makedirs(output_dir,'/images', exist_ok=True)
if os.path.exists(output_dir+'/results.csv'):
df = pd.read_csv(output_dir+'/results.csv')
else:
df = pd.DataFrame(columns=['timestamp','image_path'])
image_show = np.copy(image_np)
image_height, image_width, _ = image_np.shape
#Actual detection
output_dict = run_inference_for_single_image(model,image_np)
vis_util.visualize_boxes_and_labels_on_image_array(image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
#cv2.imshow('object_detection', cv2.resize(image_np, (600,600)))
#get data(label, xmin, ymin, xmax, ymax)
output = []
for index, score in enumerate(output_dict['detection_scores']):
if score < threshold:
continue
label = category_index[output_dict['detection_classes'][index]]['name']
ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index]
output.append((label, int(xmin * image_width), int(ymin * image_height),
int(xmax * image_width), int(ymax * image_height)))
# save incident
for l, x_min, y_min, x_max, y_max in output:
if l == label_to_look_for:
array = cv2.cvtColor(np.array(image_show), cv2.COLOR_RGB2BGR)
image = Image.fromarray(array)
cropped_img = image.crop((x_min, y_min, x_max, y_max))
file_path = output_dir+'/images/'+str(len(df))+'.jpg'
cropped_img.save(file_path, 'JPEG', icc_profile=cropped_img.info.get('icc_profile'))
df.loc[len(df)] = [datetime.datetime.now(), file_path]
df.to_csv(output_dir+'/results.csv', index=None)
And I call the function like this
output_dir = '/content/sample_data'
label_to_look_for = "INVOICE NO"
threshold=0.5
run_inference_and_extract(model,category_index,threshold,label_to_look_for,
output_dir)
It would be helpful, if you show me any light on this, Thanks in Advance!