I try to use the tflite model via c++ and deploy it on mobile.
Here is the code example:
void Model::Inference(const std::vector<int> &inputs,
std::vector<int> &outputs){
int size = inputs.size();
const std::vector<int> input_size = {size};
std::cout<<"model memo 1: "<<physical_memory_used_by_process_temp()<<std::endl;
interpreter_->ResizeInputTensorStrict(0, input_size);
interpreter_->AllocateTensors();
std::cout<<"model memo 2: "<<physical_memory_used_by_process_temp()<<std::endl;
int* input_tensor = interpreter_->typed_input_tensor<int>(0);
for(int i = 0; i < input_size[0]; i++){
input_tensor[i] = inputs[i];
}
std::cout<<"model memo 3: "<<physical_memory_used_by_process_temp()<<std::endl;
interpreter_->Invoke();
std::cout<<"model memo 4: "<<physical_memory_used_by_process_temp()<<std::endl;
int* output_tensor = interpreter_->typed_output_tensor<int>(0);
std::cout<<"model memo 5: "<<physical_memory_used_by_process_temp()<<std::endl;
auto output_size = interpreter_->output_tensor(0)->dims->data[0];
for(int i = 0; i < output_size; i++){
if(output_tensor[i] == end_token_id_)
break;
outputs.push_back(output_tensor[i]);
}
}
The model file size is 85MB. The memory usage is only 140MB when the model is loaded. After the interpreter_->Invoke()
, the memory usage increase to 200 or even 250 mb and never go down. Is this normal? Is there any way to release the increased memory usage?