I am a newcomer to TensorFlow and I have a FasterRCNN savedmodel.
Recently, I am trying to load this model using Python for object detection.
Due to the large number of images that need to be detected every day, I try to use multithreading to call the model method, but I am not sure if I have written the code correctly because my server’s GPU usage is high, and my graphics card is NVIDIA A10 (16 vCPU, gpu-mem: 24GB).
The following is the code to load the model
import tensorflow as tf
from tensorflow import saved_model as sm
def init_model_for_target(od_target):
rtn_val = False
try:
global g_model, g_model_fn
time_init_start = time.time()
model_with_signature_path = Config["OD_MODEL_DIR"][od_target]
assert os.path.exists(model_with_signature_path), "od model not existed"
g_model = sm.load(model_with_signature_path)
g_model_fn = g_model.signatures[sm.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
logger.info(f"MAIN: init od model for [{od_target}], costs: {time.time() - time_init_start}")
rtn_val = True
except BaseException as e:
logger.error(f"Exception: init od model for [{od_target}] failed, {e}")
return rtn_val
And this is the code that calls the model
def run_inference_for_single_image(image):
input_image = tf.convert_to_tensor(image)
input_image = input_image[tf.newaxis, ...]
input_true_image_shape = tf.reshape(tf.constant(image.shape, dtype=tf.int32, name="true_image_shape:0"), (1, 3))
output_dict = g_model_fn(image=input_image, true_image_shape=input_true_image_shape)
num_detections = int(output_dict.pop("num_detections"))
output_dict = {key: value[0, :num_detections].numpy() for key, value in output_dict.items()}
output_dict["num_detections"] = num_detections
output_dict["detection_classes"] = output_dict["detection_classes"].astype(np.int64)
return output_dict
def subthread02_load_image_and_detect(image_path, image_folder, image_name):
from PIL import Image
rtn_val = False; time_od_costs = 0
res_dict = {"image_folder": image_folder, "image_name": image_name, "od_results": []}
try:
time_od_start = time.time()
image_data = np.array(Image.open(image_path))
od_res_dict = run_inference_for_single_image(image_data)
time_od_costs = time.time() - time_od_start
res_dict.update(od_results=od_res_dict)
rtn_val = True
except BaseException as e:
logger.error(f"DETECT: Exception: {e}")
return rtn_val, time_od_costs, res_dict
Finally, I submit multithreaded tasks like this (MAX_THREADS_DETECT
= 3)
from concurrent.futures import ThreadPoolExecutor, as_completed
pool_detect = ThreadPoolExecutor(max_workers=MAX_THREADS_DETECT, thread_name_prefix=od_target + "_od")
ls_future_detect = []
...
ls_future_detect.append(pool_detect.submit(subthread02_load_image_and_detect, pic_save_path, image_folder, image_name)
for index, future in enumerate(as_completed(ls_future_all_detect)):
is_success, time_od_costs, od_res_dict = future.result()
...
pool_detect.shutdown()
What I’m puzzled about is that my GPU usage is high, and my gpu-mem usage is also high. Is it because I didn’t write multithreaded code correctly? Or does FasterRCNN require high computational power?