I am trying to run the ssd_mobilenet_v3_float
TFLite model using TFLM (TensorFlow Lite for Microcontrollers). However, I am encountering discrepancies in the output shapes when comparing the results obtained from Python and C++ code.
Below are the Python and C++ codes I used to verify the input and output shapes, along with their respective outputs. Is there an issue with my model conversion, or am I using the TFLM library incorrectly?
Any insights or suggestions would be greatly appreciated.
ssd_mobilenet_v3_float_tflite link(google drive) :
Python
import tensorflow as tf
# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path='ssd_mobilenet_v3_float.tflite')
interpreter.allocate_tensors()
# Get input details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Details:")
for detail in input_details:
print(f"Name: {detail['name']}")
print(f"Shape: {detail['shape']}")
print(f"Type: {detail['dtype']}")
print("\nOutput Details:")
for detail in output_details:
print(f"Name: {detail['name']}")
print(f"Shape: {detail['shape']}")
print(f"Type: {detail['dtype']}")
output
Input Details:
Name: normalized_input_image_tensor
Shape: [ 1 320 320 3]
Type: <class ‘numpy.float32’>
Output Details:
Name: TFLite_Detection_PostProcess
Shape: [ 1 10 4]
Type: <class ‘numpy.float32’>
Name: TFLite_Detection_PostProcess:1
Shape: [ 1 10]
Type: <class ‘numpy.float32’>
Name: TFLite_Detection_PostProcess:2
Shape: [ 1 10]
Type: <class ‘numpy.float32’>
Name: TFLite_Detection_PostProcess:3
Shape: [1]
Type: <class ‘numpy.float32’>
TFLM C++
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "ssd_mobilenet_v3_float.h"
constexpr int kNumCols = 320;
constexpr int kNumRows = 320;
constexpr int kNumChannels = 3;
constexpr int kMaxImageSize = kNumCols * kNumRows * kNumChannels;
constexpr int kPersonIndex = 1;
constexpr int kNotAPersonIndex = 0;
constexpr int tensor_arena_size = 16420 * 1024;
uint8_t tensor_arena[tensor_arena_size];
TfLiteStatus GetImage(int image_width, int image_height, int channels, float* image_data) {
for (int i = 0; i < image_width * image_height * channels; ++i) {
image_data[i] = 0.0f;
}
return kTfLiteOk;
}
int main() {
clock_t start, end;
double cpu_time_used;
start = clock();
const tflite::Model* model = ::tflite::GetModel(ssd_mobilenet_v3_float_tflite);
if (model->version() != TFLITE_SCHEMA_VERSION) {
printf("Model provided is schema version %lu not equal to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION);
return -1;
}
tflite::MicroMutableOpResolver<20> micro_op_resolver;
micro_op_resolver.AddConv2D();
micro_op_resolver.AddAdd();
micro_op_resolver.AddDepthwiseConv2D();
micro_op_resolver.AddReshape();
micro_op_resolver.AddConcatenation();
micro_op_resolver.AddLogistic();
micro_op_resolver.AddDetectionPostprocess();
micro_op_resolver.AddPack();
micro_op_resolver.AddMul();
micro_op_resolver.AddMean();
micro_op_resolver.AddHardSwish();
tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena, tensor_arena_size);
TfLiteStatus allocate_status = interpreter.AllocateTensors();
if (allocate_status != kTfLiteOk) {
printf("Tensor allocation failed\n");
return -1;
}
TfLiteTensor* input = interpreter.input(0);
GetImage(kNumCols, kNumRows, kNumChannels, input->data.f);
TfLiteStatus invoke_status = interpreter.Invoke();
if (invoke_status != kTfLiteOk) {
printf("Model invocation failed\n");
return -1;
}
for (int i = 0; i < interpreter.outputs_size(); ++i) {
TfLiteTensor* output = interpreter.output(i);
printf("Output %d:\n", i);
printf(" dims->size: %d\n", output->dims->size);
printf(" dims->data[0]: %d\n", output->dims->data[0]);
printf(" dims->data[1]: %d\n", output->dims->data[1]);
printf(" dims->data[2]: %d\n", output->dims->data[2]);
printf(" dims->data[3]: %d\n", output->dims->data[3]);
printf(" type: %d\n", output->type);
}
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("Function took %f seconds to execute\n", cpu_time_used);
return 0;
}
output
Output 0:
dims->size: 0
dims->data[0]: 28
dims->data[0]: 1766606420
dims->data[0]: 1147102580
dims->data[0]: 1667593317
type: 1
Output 1:
dims->size: 0
dims->data[0]: 30
dims->data[0]: 1766606420
dims->data[0]: 1147102580
dims->data[0]: 1667593317
type: 1
Output 2:
dims->size: 0
dims->data[0]: 30
dims->data[0]: 1766606420
dims->data[0]: 1147102580
dims->data[0]: 1667593317
type: 1
Output 3:
dims->size: 0
dims->data[0]: 30
dims->data[0]: 1766606420
dims->data[0]: 1147102580
dims->data[0]: 1667593317
type: 1
Function took 0.204229 seconds to execute