I am using the TFLite C library to generate model inference C code for an edge device.
In my use case, I require certain operations, such as tf.unpack with shape (0, 2), which are supported by TensorFlow but not currently available in TFLite.
How I built the TFLite C Library
I found a solution article on the Google AI Edge website (here), which provides instructions on building the TFLite C library using CMake. I followed these steps and successfully built the library without any errors.
As a result, I obtained libtensorflowlite_c.so
and placed it in /usr/local/lib
, which is the system library directory on Ubuntu.
How I built the Tensorflow Flex Delegate library
Similarly, there is an article here that explains how to build libtensorflowlite_flex.so.
I followed the instructions provided and successfully built the library without encountering any issues.
To verify the build, I ran the command:
strings /usr/local/lib/libtensorflowlite_flex.so | grep Unpack
This command displayed several symbols containing Unpack, which indicates that the build was successful, I think.
Click to show output
UnpackOp<CPUDevice, ::tensorflow::uint64>
UnpackOp<CPUDevice, ::int64_t>
UnpackOp<CPUDevice, ::tensorflow::uint32>
UnpackOp<CPUDevice, ::tensorflow::uint16>
UnpackOp<CPUDevice, ::tensorflow::int16>
UnpackOp<CPUDevice, ::tensorflow::uint8>
UnpackOp<CPUDevice, ::tensorflow::int8>
UnpackOp<CPUDevice, ::tensorflow::int32>
UnpackOp<CPUDevice, Eigen::half>
UnpackOp<CPUDevice, ::tensorflow::bfloat16>
UnpackOp<CPUDevice, ::tensorflow::complex64>
UnpackOp<CPUDevice, ::tensorflow::complex128>
UnpackOp<CPUDevice, ::tensorflow::tstring>
UnpackOp<CPUDevice, ::tensorflow::ResourceHandle>
UnpackOp<CPUDevice, ::tensorflow::Variant>
UnpackOp<CPUDevice, ::tensorflow::float8_e5m2>
UnpackOp<CPUDevice, ::tensorflow::float8_e4m3fn>
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint64, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint64, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::int64_t, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::int64_t, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint32, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint32, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint16, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint16, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::int16, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::int16, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint8, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::uint8, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::int8, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::int8, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::int32, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::int32, false >
TensorArrayUnpackOrScatterOp<CPUDevice, Eigen::half, true >
TensorArrayUnpackOrScatterOp<CPUDevice, Eigen::half, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::bfloat16, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::bfloat16, false >
TensorArrayUnpackOrScatterOp<CPUDevice, float, true >
TensorArrayUnpackOrScatterOp<CPUDevice, float, false >
TensorArrayUnpackOrScatterOp<CPUDevice, double, true >
TensorArrayUnpackOrScatterOp<CPUDevice, double, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::complex64, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::complex64, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::complex128, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::complex128, false >
TensorArrayUnpackOrScatterOp<CPUDevice, bool, true >
TensorArrayUnpackOrScatterOp<CPUDevice, bool, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::tstring, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::tstring, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::ResourceHandle, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::ResourceHandle, false >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::Variant, true >
TensorArrayUnpackOrScatterOp<CPUDevice, ::tensorflow::Variant, false >
'(new gtl::FlatSet<string>{"ArgMax", "ArgMin", "AudioSpectrogram", "AvgPool", "BatchMatMul", "BatchMatMulV2", "BatchNormWithGlobalNormalization", "BatchToSpace", "BatchToSpaceND", "Bincount", "BroadcastArgs", "BroadcastGradientArgs", "Bucketize", "CTCBeamSearchDecoder", "CTCGreedyDecoder", "CTCLoss", "CompareAndBitpack", "ComplexAbs", "Concat", "ConcatOffset", "ConcatV2", "Conv2D", "Copy", "CopyHost", "Cross", "CudnnRNN", "CudnnRNNBackprop", "CudnnRNNBackpropV2", "CudnnRNNBackpropV3", "CudnnRNNCanonicalToParams", "CudnnRNNCanonicalToParamsV2", "CudnnRNNParamsSize", "CudnnRNNParamsToCanonical", "CudnnRNNParamsToCanonicalV2", "CudnnRNNV2", "CudnnRNNV3", "CumProd", "CumSum", "DebugNanCount", "DebugNumericSummary", "DecodeProtoV2", "DecodeWav", "DeepCopy", "DepthToSpace", "Dequantize", "Diag", "DiagPart", "EditDistance", "Empty", "EncodeProtoV2", "EncodeWav", "ExtractImagePatches", "ExtractVolumePatches", "Fill", "Gather", "GatherNd", "GatherV2", "HistogramFixedWidth", "InvertPermutation", "IsInf", "IsNan", "Isfinite", "LinSpace", "LowerBound", "MatMul", "MatrixDiag", "MatrixDiagPart", "MatrixDiagPartV2", "MatrixDiagV2", "Mfcc", "Multinomial", "OneHot", "Pack", "ParameterizedTruncatedNormal", "PopulationCount", "RandomGamma", "RandomPoisson", "RandomPoissonV2", "RandomStandardNormal", "RandomUniform", "RandomUniformInt", "Range", "Rank", "RequantizationRange", "Requantize", "ReverseSequence", "Shape", "ShapeN", "Size", "SpaceToBatch", "SpaceToBatchND", "SpaceToDepth", "SparseMatMul", "Split", "SplitV", "TruncatedNormal", "Unique", "UniqueV2", "UniqueWithCounts", "UniqueWithCountsV2", "Unpack", "UnravelIndex", "UpperBound", "Where"})' Must be non NULL
TensorArrayUnpack
Unpack
UnpackOp<CPUDevice, float>
UnpackOp<CPUDevice, double>
UnpackOp<CPUDevice, bool>
UnpackOp<CPUDevice, int32>
UnpackOp<CPUDevice, int64>
UnpackGrad
tfg.Unpack
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEmEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceElEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEjEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEtEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEsEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEhEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEaEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEiEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceENS1_4halfEEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceENS1_8bfloat16EEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEfEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEdEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceESt7complexIfEEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceESt7complexIdEEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEbEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEN3tsl7tstringEEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceENS_14ResourceHandleEEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceENS_7VariantEEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEN9ml_dtypes15float8_internal11float8_e5m2EEE
N10tensorflow8UnpackOpIN5Eigen16ThreadPoolDeviceEN9ml_dtypes15float8_internal13float8_e4m3fnEEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEmLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEmLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceElLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceElLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEjLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEjLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEtLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEtLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEsLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEsLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEhLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEhLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEaLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEaLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEiLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEiLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS1_4halfELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS1_4halfELb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS1_8bfloat16ELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS1_8bfloat16ELb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEfLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEfLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEdLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEdLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceESt7complexIfELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceESt7complexIfELb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceESt7complexIdELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceESt7complexIdELb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEbLb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEbLb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEN3tsl7tstringELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceEN3tsl7tstringELb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS_14ResourceHandleELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS_14ResourceHandleELb0EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS_7VariantELb1EEE
N10tensorflow28TensorArrayUnpackOrScatterOpIN5Eigen16ThreadPoolDeviceENS_7VariantELb0EEE
Unpack
How I use C Library Function to load model and delegate library
I have check the model that some ops do have Flex
prefix in the op names.
It was converted using the following settings:
converter = tf.lite.TFLiteConverter.from_saved_model(model_folder)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
tf.lite.OpsSet.SELECT_TF_OPS, tf.lite.OpsSet.TFLITE_BUILTINS,
]
converter.allow_custom_ops = True
converter.legalize_custom_tensor_list_ops = True
converter._experimental_lower_tensor_list_ops = False
converter.experimental_enable_resource_variables = True
tflite_model = converter.convert()
Then, I use the code below to load the model and delegate operations that are not supported by TFLite to the Flex library.
void tflite_load(const string model_path, map<string, TfLiteSignatureRunner*>& sig_map) {
// Load model
TfLiteModel* model = TfLiteModelCreateFromFile(model_path.c_str());
if (model == nullptr) {
cerr << "Failed to load model." << endl;
return;
}
// Create Interpreter
TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate();
TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options);
TfLiteInterpreterOptionsDelete(options);
auto hdll = SharedLibrary::LoadLibrary("libtensorflowlite_flex.so");
if (hdll == nullptr) {
cerr << "Load failed:" << dlerror() << endl;
return;
}
auto TF_AcquireFlexDelegate = reinterpret_cast<Interpreter::TfLiteDelegatePtr(*)()>(SharedLibrary::GetLibrarySymbol(hdll, "TF_AcquireFlexDelegate"));
if (TF_AcquireFlexDelegate == NULL) {
cerr << "TF_AcquireFlexDelegate couldn't be run: " << dlerror() << endl;
return;
}
std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)> delegate = TF_AcquireFlexDelegate();
auto TfLiteStatus = TfLiteInterpreterModifyGraphWithDelegate(interpreter, delegate.get());
if(TfLiteStatus==0)
cout << "ModifyGraphWithDelegate Ok" << endl;
cout << "----------Load model successfully!----------" << endl;
}
However, it always displays a message indicating that no nodes have been delegated by the Flex library.
INFO: Created TensorFlow Lite delegate for select TF ops.
2025-03-14 17:43:31.412379: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO: TfLiteFlexDelegate delegate: 0 nodes delegated out of 623 nodes with 0 partitions.
ModifyGraphWithDelegate Ok
----------Load model successfully!----------
How should I resolve this issue?
I would really appreciate any insights or suggestions. Thanks in advance for your help!