| /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" |
| |
| #include <algorithm> |
| #include <cstdarg> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstdio> |
| #include <cstring> |
| #include <functional> |
| #include <initializer_list> |
| #include <iostream> |
| #include <iterator> |
| #include <map> |
| #include <memory> |
| #include <string> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h" |
| |
| #ifdef __ANDROID__ |
| #include <sys/system_properties.h> |
| #endif |
| |
| #if defined __ANDROID__ || defined __unix__ |
| #define TFLITE_NNAPI_ALLOW_MMAP_SHARING |
| #include <sys/mman.h> |
| #include <unistd.h> |
| #endif |
| |
| #include "tensorflow/lite/allocation.h" |
| #include "tensorflow/lite/builtin_op_data.h" |
| #include "tensorflow/lite/builtin_ops.h" |
| #include "tensorflow/lite/c/builtin_op_data.h" |
| #include "tensorflow/lite/c/common.h" |
| #include "tensorflow/lite/context_util.h" |
| #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h" |
| #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h" |
| #include "tensorflow/lite/delegates/utils.h" |
| #include "tensorflow/lite/kernels/kernel_util.h" |
| #include "tensorflow/lite/minimal_logging.h" |
| #include "tensorflow/lite/nnapi/nnapi_implementation.h" |
| #include "tensorflow/lite/nnapi/nnapi_util.h" |
| #include "tensorflow/lite/util.h" |
| #ifdef NNAPI_VERBOSE_VALIDATION |
| #include "tensorflow/lite/schema/schema_generated.h" |
| #endif |
| #include <farmhash.h> |
| |
| namespace tflite { |
| namespace { |
| |
| // Returns the enum name corresponding to the given error code if the given |
| // value corresponds to an of the error codes in the enumeration above or |
| // an message with the unknown code. |
| // LINT.IfChange(NnApiErrorDescription) |
| std::string NnApiErrorDescription(int error_code) { |
| switch (error_code) { |
| case ANEURALNETWORKS_NO_ERROR: |
| return "ANEURALNETWORKS_NO_ERROR"; |
| case ANEURALNETWORKS_OUT_OF_MEMORY: |
| return "ANEURALNETWORKS_OUT_OF_MEMORY"; |
| case ANEURALNETWORKS_INCOMPLETE: |
| return "ANEURALNETWORKS_INCOMPLETE"; |
| case ANEURALNETWORKS_UNEXPECTED_NULL: |
| return "ANEURALNETWORKS_UNEXPECTED_NULL"; |
| case ANEURALNETWORKS_BAD_DATA: |
| return "ANEURALNETWORKS_BAD_DATA"; |
| case ANEURALNETWORKS_OP_FAILED: |
| return "ANEURALNETWORKS_OP_FAILED"; |
| case ANEURALNETWORKS_BAD_STATE: |
| return "ANEURALNETWORKS_BAD_STATE"; |
| case ANEURALNETWORKS_UNMAPPABLE: |
| return "ANEURALNETWORKS_UNMAPPABLE"; |
| case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE: |
| return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE"; |
| case ANEURALNETWORKS_UNAVAILABLE_DEVICE: |
| return "ANEURALNETWORKS_UNAVAILABLE_DEVICE"; |
| case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT: |
| return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT"; |
| case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT: |
| return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT"; |
| case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT: |
| return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT"; |
| case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT: |
| return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT"; |
| case ANEURALNETWORKS_DEAD_OBJECT: |
| return "ANEURALNETWORKS_DEAD_OBJECT"; |
| default: |
| return "Unknown NNAPI error code: " + std::to_string(error_code); |
| } |
| } |
| // LINT.ThenChange() |
| |
| #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno) \ |
| do { \ |
| const auto _code = (code); \ |
| const auto _call_desc = (call_desc); \ |
| if (_code != ANEURALNETWORKS_NO_ERROR) { \ |
| const auto error_desc = NnApiErrorDescription(_code); \ |
| TF_LITE_KERNEL_LOG(context, \ |
| "NN API returned error %s at line %d while %s.\n", \ |
| error_desc.c_str(), __LINE__, _call_desc); \ |
| *p_errno = _code; \ |
| return kTfLiteError; \ |
| } \ |
| } while (0) |
| |
| #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \ |
| p_tensor, p_errno) \ |
| do { \ |
| const auto _code = (code); \ |
| const auto _call_desc = (call_desc); \ |
| if (_code != ANEURALNETWORKS_NO_ERROR) { \ |
| const auto error_desc = NnApiErrorDescription(_code); \ |
| TF_LITE_KERNEL_LOG(context, \ |
| "NN API returned error %s at line %d while %s " \ |
| "for tensor '%s'.\n", \ |
| error_desc.c_str(), __LINE__, _call_desc, \ |
| (p_tensor)->name ? (p_tensor)->name : "no-name"); \ |
| *p_errno = _code; \ |
| return kTfLiteError; \ |
| } \ |
| } while (0) |
| |
| bool IsFloat(TfLiteType type) { |
| switch (type) { |
| case kTfLiteFloat32: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsFloatOrUInt8(TfLiteType type) { |
| switch (type) { |
| case kTfLiteFloat32: |
| case kTfLiteUInt8: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsQuantized(TfLiteType type) { |
| switch (type) { |
| case kTfLiteUInt8: |
| case kTfLiteInt8: |
| return true; |
| default: |
| // kTfLiteInt16 isn't supported as quantized type yet. |
| return false; |
| } |
| } |
| |
| bool IsInt32(TfLiteType type) { |
| switch (type) { |
| case kTfLiteInt32: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsFloatOrQuantized(TfLiteType type) { |
| switch (type) { |
| case kTfLiteFloat32: |
| case kTfLiteUInt8: |
| case kTfLiteInt8: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsFloatOrInt32(TfLiteType type) { |
| switch (type) { |
| case kTfLiteFloat32: |
| case kTfLiteInt32: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsFloatQuantizedOrInt32(TfLiteType type) { |
| switch (type) { |
| case kTfLiteFloat32: |
| case kTfLiteUInt8: |
| case kTfLiteInt8: |
| case kTfLiteInt32: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsScalarInputSupported(int builtin_code) { |
| switch (builtin_code) { |
| case kTfLiteBuiltinAdd: |
| case kTfLiteBuiltinMul: |
| case kTfLiteBuiltinSub: |
| case kTfLiteBuiltinDiv: |
| case kTfLiteBuiltinEqual: |
| case kTfLiteBuiltinNotEqual: |
| case kTfLiteBuiltinGreater: |
| case kTfLiteBuiltinGreaterEqual: |
| case kTfLiteBuiltinLess: |
| case kTfLiteBuiltinLessEqual: |
| case kTfLiteBuiltinPow: |
| case kTfLiteBuiltinMaximum: |
| case kTfLiteBuiltinMinimum: |
| case kTfLiteBuiltinPrelu: |
| case kTfLiteBuiltinLeakyRelu: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| // Check if the operation requires explicit conversion from int8 to uint8 |
| // values. |
| bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code, |
| const TfLiteNode* node) { |
| const int input_id = node->inputs->data[0]; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| switch (builtin_code) { |
| case kTfLiteBuiltinConv2d: |
| case kTfLiteBuiltinDepthwiseConv2d: |
| case kTfLiteBuiltinFullyConnected: { |
| if (input_type == kTfLiteInt8) { |
| const int weights_id = node->inputs->data[1]; |
| const auto& weights_tensor = context->tensors[weights_id]; |
| if ((weights_tensor.type == kTfLiteInt8 || |
| weights_tensor.type == kTfLiteUInt8) && |
| weights_tensor.quantization.type == kTfLiteAffineQuantization) { |
| return true; |
| } |
| } |
| return false; |
| } |
| case kTfLiteBuiltinTransposeConv: { |
| // Transpose convolution has a different order of inputs: |
| // 0: output_shape, 1: filter, 2: input, 3: bias. |
| const int input_id = 2; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| if (input_type == kTfLiteInt8) { |
| return true; |
| } |
| return false; |
| } |
| case kTfLiteBuiltinSelect: { |
| const auto value_type = context->tensors[node->inputs->data[1]].type; |
| return value_type == kTfLiteInt8; |
| } |
| case kTfLiteBuiltinAdd: |
| case kTfLiteBuiltinArgMax: |
| case kTfLiteBuiltinArgMin: |
| case kTfLiteBuiltinAveragePool2d: |
| case kTfLiteBuiltinBatchToSpaceNd: |
| case kTfLiteBuiltinConcatenation: |
| case kTfLiteBuiltinEqual: |
| case kTfLiteBuiltinExpandDims: |
| case kTfLiteBuiltinGather: |
| case kTfLiteBuiltinGreater: |
| case kTfLiteBuiltinGreaterEqual: |
| case kTfLiteBuiltinHardSwish: |
| case kTfLiteBuiltinL2Normalization: |
| case kTfLiteBuiltinLeakyRelu: |
| case kTfLiteBuiltinLess: |
| case kTfLiteBuiltinLessEqual: |
| case kTfLiteBuiltinLogistic: |
| case kTfLiteBuiltinMaximum: |
| case kTfLiteBuiltinMaxPool2d: |
| case kTfLiteBuiltinMean: |
| case kTfLiteBuiltinMinimum: |
| case kTfLiteBuiltinMul: |
| case kTfLiteBuiltinNotEqual: |
| case kTfLiteBuiltinPad: |
| case kTfLiteBuiltinPadv2: |
| case kTfLiteBuiltinPrelu: |
| case kTfLiteBuiltinReduceMax: |
| case kTfLiteBuiltinReduceMin: |
| case kTfLiteBuiltinRelu: |
| case kTfLiteBuiltinReluN1To1: |
| case kTfLiteBuiltinRelu6: |
| case kTfLiteBuiltinResizeBilinear: |
| case kTfLiteBuiltinResizeNearestNeighbor: |
| case kTfLiteBuiltinReshape: |
| case kTfLiteBuiltinSlice: |
| case kTfLiteBuiltinSoftmax: |
| case kTfLiteBuiltinSpaceToBatchNd: |
| case kTfLiteBuiltinSpaceToDepth: |
| case kTfLiteBuiltinDepthToSpace: |
| case kTfLiteBuiltinStridedSlice: |
| case kTfLiteBuiltinSub: |
| case kTfLiteBuiltinTanh: |
| case kTfLiteBuiltinTile: |
| case kTfLiteBuiltinTopkV2: |
| case kTfLiteBuiltinTranspose: { |
| return input_type == kTfLiteInt8; |
| } |
| default: |
| return false; |
| } |
| } |
| |
| constexpr int kLstmFullKernelInputSize = 24; |
| // The 20 input version is deprecated and kept only to |
| // support old model. The latest version of the LSTM Full Kernel |
| // is the one with 24 inputs |
| constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20; |
| constexpr int kLstmBasicKernelInputSize = 5; |
| |
| inline bool isLstmBasicKernel(const TfLiteNode* node) { |
| return node->inputs->size == kLstmBasicKernelInputSize; |
| } |
| |
| inline bool isLstmFullKernel(const TfLiteNode* node) { |
| return node->inputs->size == kLstmFullKernelInputSize || |
| node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize; |
| } |
| |
| bool IsHybridOperator(const TfLiteContext* context, int builtin_code, |
| const TfLiteNode* node) { |
| switch (builtin_code) { |
| case kTfLiteBuiltinConv2d: |
| case kTfLiteBuiltinFullyConnected: { |
| const int input_id = node->inputs->data[0]; |
| const int filter_id = node->inputs->data[1]; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| const TfLiteType filter_type = context->tensors[filter_id].type; |
| return IsFloat(input_type) && IsQuantized(filter_type); |
| } |
| case kTfLiteBuiltinLstm: { |
| const int input_id = node->inputs->data[0]; |
| // Input #1 is optional so use #2 to determine if hybrid. |
| const int weights_id = node->inputs->data[2]; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| const TfLiteType weights_type = context->tensors[weights_id].type; |
| return isLstmFullKernel(node) && IsFloat(input_type) && |
| IsQuantized(weights_type); |
| } |
| case kTfLiteBuiltinUnidirectionalSequenceLstm: { |
| const int input_id = node->inputs->data[0]; |
| // Input #1 is optional so use #2 to determine if hybrid. |
| const int weights_id = node->inputs->data[2]; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| const TfLiteType weights_type = context->tensors[weights_id].type; |
| return IsFloat(input_type) && IsQuantized(weights_type); |
| } |
| case kTfLiteBuiltinBidirectionalSequenceLstm: { |
| const int input_id = node->inputs->data[0]; |
| // Input #1 is optional so use #2 to determine if hybrid. |
| const int weights_id = node->inputs->data[2]; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| const TfLiteType weights_type = context->tensors[weights_id].type; |
| return IsFloat(input_type) && IsQuantized(weights_type); |
| } |
| case kTfLiteBuiltinUnidirectionalSequenceRnn: { |
| const int input_id = node->inputs->data[0]; |
| const int weights_id = node->inputs->data[1]; |
| const TfLiteType input_type = context->tensors[input_id].type; |
| const TfLiteType weights_type = context->tensors[weights_id].type; |
| return IsFloat(input_type) && IsQuantized(weights_type); |
| } |
| default: |
| return false; |
| } |
| } |
| |
| bool HasUnspecifiedDimension(const TfLiteTensor* tensor) { |
| if (tensor->dims_signature) { |
| for (int i : TfLiteIntArrayView(tensor->dims_signature)) { |
| if (i == -1) return true; |
| } |
| } |
| return false; |
| } |
| |
| ANeuralNetworksOperandType ConvertTensorTypeToNNType( |
| const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) { |
| int32_t nn_type = 0; |
| float scale = 0.0f; |
| int32_t zero_point = 0; |
| switch (tensor->type) { |
| case kTfLiteFloat32: |
| nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; |
| break; |
| case kTfLiteUInt8: |
| nn_type = ann_type_equivalent == kTfLiteInt32 |
| ? ANEURALNETWORKS_TENSOR_INT32 |
| : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; |
| scale = tensor->params.scale; |
| zero_point = tensor->params.zero_point; |
| if (scale == 0) { |
| // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM |
| // with zero scale are not valid in NNAPI. |
| scale = 1; |
| } |
| break; |
| case kTfLiteInt8: |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM; |
| scale = tensor->params.scale; |
| zero_point = tensor->params.zero_point; |
| if (ann_type_equivalent == kTfLiteUInt8) { |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; |
| zero_point += 128; |
| } else if (ann_type_equivalent == kTfLiteInt32) { |
| nn_type = ANEURALNETWORKS_TENSOR_INT32; |
| zero_point += 128; |
| } |
| if (scale == 0) { |
| // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM |
| // with zero scale are not valid in NNAPI. |
| scale = 1; |
| } |
| break; |
| case kTfLiteInt32: |
| nn_type = ANEURALNETWORKS_TENSOR_INT32; |
| scale = tensor->params.scale; |
| zero_point = tensor->params.zero_point; |
| break; |
| case kTfLiteBool: |
| nn_type = ANEURALNETWORKS_TENSOR_BOOL8; |
| break; |
| case kTfLiteInt16: |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM; |
| scale = tensor->params.scale; |
| zero_point = tensor->params.zero_point; |
| break; |
| default: |
| break; |
| } |
| uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size); |
| uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data); |
| static uint32_t scalar_rank = 1; |
| // treat scalar input as single cell tensor in NNAPI. |
| if (tensor_rank == 0) { |
| tensor_rank = scalar_rank; |
| tensor_dims = &scalar_rank; |
| } |
| ANeuralNetworksOperandType nn_operand_type{ |
| .type = nn_type, |
| .dimensionCount = tensor_rank, |
| .dimensions = tensor_dims, |
| .scale = scale, |
| .zeroPoint = zero_point, |
| }; |
| return nn_operand_type; |
| } |
| |
| // NNAPI in API 31 hard-code the preferred alignment/padding with 64 bytes. |
| constexpr size_t kDefaultByteAlignmentForNNAPI = 64; |
| |
| static size_t GetNumPaddingBytes(size_t byte_size) { |
| size_t num_padding_bytes = 0; |
| if (byte_size % kDefaultByteAlignmentForNNAPI) { |
| num_padding_bytes = kDefaultByteAlignmentForNNAPI - |
| (byte_size % kDefaultByteAlignmentForNNAPI); |
| } |
| return num_padding_bytes; |
| } |
| |
| static size_t GetNNTensorSize(size_t tensor_size, bool allow_padding) { |
| size_t padding_bytes = GetNumPaddingBytes(tensor_size); |
| size_t nn_tensor_size = tensor_size; |
| if (allow_padding) { |
| nn_tensor_size += padding_bytes; |
| } |
| return nn_tensor_size; |
| } |
| |
| // Return NNAPI device handle with the provided null-terminated device name. |
| // Returns kTfLiteError in case of any NNAPI error and if no device with the |
| // given name can be found. |
| TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context, |
| const char* device_name_ptr, |
| ANeuralNetworksDevice** result, int* nnapi_errno) { |
| if (!device_name_ptr) return kTfLiteError; |
| *result = nullptr; |
| std::string device_name(device_name_ptr); |
| uint32_t num_devices = 0; |
| nnapi->ANeuralNetworks_getDeviceCount(&num_devices); |
| |
| for (uint32_t i = 0; i < num_devices; i++) { |
| ANeuralNetworksDevice* device = nullptr; |
| const char* buffer = nullptr; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, nnapi->ANeuralNetworks_getDevice(i, &device), |
| "Searching for target device", nnapi_errno); |
| |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, nnapi->ANeuralNetworksDevice_getName(device, &buffer), |
| "Searching for target device", nnapi_errno); |
| |
| if (device_name == buffer) { |
| *result = device; |
| return kTfLiteOk; |
| } |
| } |
| |
| context->ReportError(context, |
| "Could not find the specified NNAPI accelerator: %s. " |
| "Must be one of: {%s}.", |
| device_name_ptr, |
| nnapi::GetStringDeviceNamesList().c_str()); |
| return kTfLiteError; |
| } |
| |
| // Compute the hash of a TfLiteIntArray. |
| uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) { |
| constexpr auto kHashConst = 0x9e3779b97f4a7800ULL; |
| uint64_t result = combine_with; |
| for (auto i : TfLiteIntArrayView(int_array)) { |
| result = result ^ (i + kHashConst + (result << 10) + (result >> 4)); |
| } |
| return result; |
| } |
| |
| bool HasZeroes(TfLiteIntArrayView array) { |
| for (auto value : array) { |
| if (value == 0) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| // Bit mask for tensor flags. |
| enum { |
| NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0, |
| NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1, |
| NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2, |
| NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3, |
| }; |
| |
| // Returns the SDK level to target when delegating to the given devices. |
| // The SDK level is the max of the ones supported by the devices or |
| // the current Android SDK level if no device is present. |
| TfLiteStatus GetTargetSdkVersion( |
| TfLiteContext* context, const NnApi* nnapi, |
| const std::vector<ANeuralNetworksDevice*>& device_handles, |
| int* target_sdk_version, int* nnapi_errno) { |
| *target_sdk_version = nnapi->android_sdk_version; |
| int64_t devices_sdk_version = -1; |
| for (const auto* device_handle : device_handles) { |
| int64_t curr_device_sdk_version; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi->ANeuralNetworksDevice_getFeatureLevel(device_handle, |
| &curr_device_sdk_version), |
| "Searching for target device", nnapi_errno); |
| |
| devices_sdk_version = |
| std::max(curr_device_sdk_version, devices_sdk_version); |
| } |
| |
| if ((devices_sdk_version > 0) && |
| // This second check is necessary since if the nnapi-reference device is |
| // in the list of target devices the devices_sdk_version value will be |
| // 1000. |
| (devices_sdk_version < nnapi->android_sdk_version)) { |
| TFLITE_LOG(TFLITE_LOG_INFO, |
| "Changing Android NN SDK version %d to version " |
| "supported by target devices: %lld", |
| nnapi->android_sdk_version, devices_sdk_version); |
| |
| *target_sdk_version = devices_sdk_version; |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| // Returns true if this delegate is configured to use a specific set of devices. |
| // This will happen either if: |
| // - accelerator_name option has been specified |
| // - NNAPI CPU implementation has been explicitly disabled. |
| // If exclude_nnapi_reference is true this method will return false if the |
| // accelerator_name in the delegate options is equal to "nnapi-reference" |
| bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options, |
| const NnApi* nnapi, |
| bool exclude_nnapi_reference = false) { |
| const char* device_name_ptr = delegate_options.accelerator_name; |
| std::string nnapi_cpu("nnapi-reference"); |
| bool has_selected_accelerator = device_name_ptr != nullptr; |
| if (exclude_nnapi_reference && has_selected_accelerator) { |
| if (nnapi_cpu == device_name_ptr) return false; |
| } |
| return (delegate_options.disallow_nnapi_cpu && |
| nnapi->android_sdk_version >= |
| delegate::nnapi::kMinSdkVersionForNNAPI12) || |
| has_selected_accelerator; |
| } |
| |
| // Fills the given result vector with the list of devices the given delegate |
| // is referring to. |
| // There are three possible results: |
| // - an empty array (not the full list of available accelerators, |
| // for efficiency reasons) if no accelerator is chosen and the |
| // disallow_nnapi_cpu delegate option is false. |
| // - A single element array with the target processor, if an accelerator name |
| // is specified in the delegate options. |
| // - The full list of devices available on device less the nnapi reference |
| // implementation if the delegate option disallow_nnapi_cpu has been |
| // specified. |
| TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate, |
| const NnApi* nnapi, int* nnapi_errno, |
| std::vector<ANeuralNetworksDevice*>* result) { |
| if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) { |
| return kTfLiteError; |
| } |
| |
| const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate); |
| const char* device_name_ptr = delegate_options.accelerator_name; |
| |
| if (device_name_ptr != nullptr) { |
| // User specified an accelerator to use. |
| ANeuralNetworksDevice* nnapi_device = nullptr; |
| TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr, |
| &nnapi_device, nnapi_errno)); |
| result->push_back(nnapi_device); |
| } else if (delegate_options.disallow_nnapi_cpu) { |
| std::string nnapi_cpu("nnapi-reference"); |
| uint32_t num_devices = 0; |
| nnapi->ANeuralNetworks_getDeviceCount(&num_devices); |
| |
| for (uint32_t i = 0; i < num_devices; i++) { |
| ANeuralNetworksDevice* device = nullptr; |
| const char* buffer = nullptr; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, nnapi->ANeuralNetworks_getDevice(i, &device), |
| "Getting list of available devices", nnapi_errno); |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, nnapi->ANeuralNetworksDevice_getName(device, &buffer), |
| "Getting list of available devices", nnapi_errno); |
| if (nnapi_cpu != buffer) { |
| result->push_back(device); |
| } |
| } |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| } // namespace |
| |
| namespace delegate { |
| namespace nnapi { |
| |
| #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING |
| NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) { |
| if (name && size > 0) { |
| nnapi_ = nnapi; |
| byte_size_ = size; |
| fd_ = nnapi_->ASharedMemory_create(name, size); |
| data_ptr_ = reinterpret_cast<uint8_t*>( |
| mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0)); |
| nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE, |
| fd_, 0, &nn_memory_handle_); |
| } |
| } |
| #else |
| NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, |
| size_t /*size*/) |
| : nnapi_(nullptr) {} |
| #endif |
| |
| NNMemory::~NNMemory() { |
| #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING |
| if (data_ptr_) { |
| munmap(data_ptr_, byte_size_); |
| } |
| if (nn_memory_handle_) { |
| nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_); |
| } |
| if (fd_ >= 0) close(fd_); |
| #endif |
| } |
| |
| class DequantizeMapping { |
| public: |
| int DequantizedAnnIndex(int ann_index, TfLiteType type) const { |
| for (const auto& element : mapping_) { |
| if (ann_index == std::get<0>(element) && type == std::get<1>(element)) { |
| return std::get<2>(element); |
| } |
| } |
| return -1; |
| } |
| |
| void Add(int ann_index, TfLiteType type, int dequantized_ann_index) { |
| // This assumes it is not already mapped. |
| mapping_.emplace_back(ann_index, type, dequantized_ann_index); |
| } |
| |
| private: |
| // Each tuple specifies the ANN (quantized) tensor index, the desired |
| // floating-point type and the matching ANN (dequantized) tensor index. This |
| // could use a map but instead std::vector is used to keep code size lower. |
| std::vector<std::tuple<int, TfLiteType, int>> mapping_; |
| }; |
| |
| // Abstract builder for building an op in the NN API graph. This handles |
| // the disparity between TFLite and NN API operand types. NN API has singular |
| // operands for both tensors and parameters, and TFLite separates the two. |
| class NNAPIOpBuilder { |
| public: |
| NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context, |
| OperandMapping* tensor_mapping, |
| DequantizeMapping* dequantize_mapping, |
| std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* |
| allocation_mapping, |
| std::vector<int>* nnapi_to_tflite_op_mapping, |
| ANeuralNetworksModel* nn_model, int* nnapi_errno, |
| bool allow_dynamic_dimensions) |
| : nnapi_(nnapi), |
| context_(context), |
| operand_mapping_(tensor_mapping), |
| dequantize_mapping_(dequantize_mapping), |
| allocation_memory_mapping_(allocation_mapping), |
| nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping), |
| nn_model_(nn_model), |
| nnapi_errno_(nnapi_errno), |
| allow_dynamic_dimensions_(allow_dynamic_dimensions) {} |
| |
| TfLiteStatus AddScalarBoolOperand(bool value) { |
| return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL); |
| } |
| |
| TfLiteStatus AddScalarInt32Operand(int32_t value) { |
| return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32); |
| } |
| |
| TfLiteStatus AddScalarFloat32Operand(float value) { |
| return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32); |
| } |
| |
| TfLiteStatus AddVectorInt32Operand(const int32_t* values, |
| uint32_t num_values) { |
| return AddVectorOperand<int32_t>(values, num_values, |
| ANEURALNETWORKS_TENSOR_INT32, |
| /*scale=*/0.f, /*zero_point=*/0); |
| } |
| |
| TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values, |
| float scale, int32_t zero_point) { |
| return AddVectorOperand<int32_t>( |
| values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point); |
| } |
| |
| TfLiteStatus AddVectorInt16Operand(const int16_t* values, |
| uint32_t num_values) { |
| return AddVectorOperand<int16_t>(values, num_values, |
| ANEURALNETWORKS_TENSOR_QUANT16_SYMM, |
| /*scale=*/1.f, /*zero_point=*/0); |
| } |
| |
| TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) { |
| return AddVectorOperand<int8_t>(values, num_values, |
| ANEURALNETWORKS_TENSOR_QUANT8_SYMM, |
| /*scale=*/1.f, /*zero_point=*/0); |
| } |
| |
| TfLiteStatus AddVectorFloat32Operand(const float* values, |
| uint32_t num_values) { |
| return AddVectorOperand<float>(values, num_values, |
| ANEURALNETWORKS_TENSOR_FLOAT32); |
| } |
| |
| TfLiteStatus AddPoolingParams(void* data) { |
| auto builtin = reinterpret_cast<TfLitePoolParams*>(data); |
| AddScalarInt32Operand(builtin->padding); |
| AddScalarInt32Operand(builtin->stride_width); |
| AddScalarInt32Operand(builtin->stride_height); |
| AddScalarInt32Operand(builtin->filter_width); |
| AddScalarInt32Operand(builtin->filter_height); |
| AddScalarInt32Operand(builtin->activation); |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op, |
| int tensor_flags = 0) { |
| return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags); |
| } |
| |
| TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) { |
| return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_, |
| tensor_flags); |
| } |
| |
| TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) { |
| std::vector<uint32_t> dims(dimension_count, 0); |
| return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr); |
| } |
| |
| TfLiteStatus AddStateFloat32Tensor(int tensor_index, |
| int* ann_tensor_index_out) { |
| TfLiteTensor* tensor = &context_->tensors[tensor_index]; |
| return AddFloat32OutputTensor( |
| tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data), |
| ann_tensor_index_out); |
| } |
| |
| TfLiteStatus AddStateInt16Tensor(int tensor_index, |
| int* ann_tensor_index_out) { |
| TfLiteTensor* tensor = &context_->tensors[tensor_index]; |
| return AddAdditionalOutputTensor( |
| tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data), |
| ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale, |
| tensor->params.zero_point, ann_tensor_index_out); |
| } |
| |
| TfLiteStatus AddStateInt8AsymTensor(int tensor_index, |
| int* ann_tensor_index_out) { |
| TfLiteTensor* tensor = &context_->tensors[tensor_index]; |
| return AddAdditionalOutputTensor( |
| tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data), |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale, |
| tensor->params.zero_point, ann_tensor_index_out); |
| } |
| |
| // Add a constant tensor with a single element, intended for broadcast capable |
| // ops. |
| TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) { |
| if (!is_quantized) { |
| return AddVectorFloat32Operand(&value, 1); |
| } else { |
| // in the case that we need to add a quantized tensor, set the value to |
| // 64, zero_point to be 0 and adjust scale accordingly. |
| const uint8_t quant8_value = 64; |
| return AddVectorOperand<uint8_t>(&quant8_value, 1, |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, |
| value / quant8_value, 0); |
| } |
| } |
| |
| // Calculate the scale and zero_point for 8-bit unsigned tensor, given float |
| // min and max. zero_point is clamped to [0, 255]. |
| TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale, |
| int* zero_point) { |
| if (max < min) return kTfLiteError; |
| *scale = (max - min) / 255.f; |
| if (min > 0.f) { |
| *zero_point = 0; |
| } else if (max < 0.f) { |
| *zero_point = 255; |
| } else { |
| *zero_point = (0.f - min) / (*scale); |
| } |
| return kTfLiteOk; |
| } |
| |
| // Lower hardswish according to the following equation: |
| // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6 |
| // = 0.5x * Relu_N1_to_1(x/3) + 0.5x |
| TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index, |
| int lite_output_index, |
| bool need_int8_conversion, |
| int lite_node_index) { |
| const TfLiteTensor& tensor = context_->tensors[lite_input_index]; |
| float input_scale = tensor.params.scale; |
| int input_zero_point = tensor.params.zero_point; |
| float input_min = 0.f; |
| float input_max = 0.f; |
| int tensor_flags = 0; |
| if (need_int8_conversion) { |
| tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION; |
| input_zero_point += 128; |
| } |
| bool is_quantized = false; |
| int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; |
| if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) { |
| is_quantized = true; |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; |
| input_min = (0 - input_zero_point) * input_scale; |
| input_max = (255 - input_zero_point) * input_scale; |
| } |
| |
| // Stage1 : s1 = Relu1(x * 1/3) |
| float s1_output_min = 0.f; |
| float s1_output_max = 0.f; |
| int s1_out_ann_index = 0; |
| { |
| float s1_output_scale = 0.f; |
| int s1_output_zero_point = 0; |
| if (is_quantized) { |
| // clamp the output range to [-1, 1] if needed. |
| s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f; |
| s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f; |
| CalculateQuantizationParams(s1_output_min, s1_output_max, |
| &s1_output_scale, &s1_output_zero_point); |
| } |
| TF_LITE_ENSURE_OK(context_, |
| AddTensorInput(lite_input_index, false, tensor_flags)); |
| const float value3f = 1.f / 3.f; |
| TF_LITE_ENSURE_OK(context_, |
| AddSingleValueConstantTensor(value3f, is_quantized)); |
| TF_LITE_ENSURE_OK(context_, |
| AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1)); |
| TF_LITE_ENSURE_OK( |
| context_, |
| AddAdditionalOutputTensor( |
| tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data), |
| nn_type, s1_output_scale, s1_output_zero_point, |
| &s1_out_ann_index)); |
| TF_LITE_ENSURE_OK( |
| context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index)); |
| } |
| |
| // Stage2 : s2 = x / 2 |
| float s2_output_min = input_min / 2.f; |
| float s2_output_max = input_max / 2.f; |
| int s2_out_ann_index = 0; |
| { |
| float s2_output_scale = input_scale / 2.0f; |
| int s2_output_zero_point = input_zero_point; |
| TF_LITE_ENSURE_OK(context_, |
| AddTensorInput(lite_input_index, false, tensor_flags)); |
| const float value2f = 0.5f; |
| TF_LITE_ENSURE_OK(context_, |
| AddSingleValueConstantTensor(value2f, is_quantized)); |
| TF_LITE_ENSURE_OK(context_, |
| AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE)); |
| TF_LITE_ENSURE_OK( |
| context_, |
| AddAdditionalOutputTensor( |
| tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data), |
| nn_type, s2_output_scale, s2_output_zero_point, |
| &s2_out_ann_index)); |
| TF_LITE_ENSURE_OK( |
| context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index)); |
| } |
| |
| // Stage 3 : s3 = s1 * s2 |
| int s3_out_ann_index = 0; |
| { |
| augmented_inputs_.push_back(s1_out_ann_index); |
| augmented_inputs_.push_back(s2_out_ann_index); |
| TF_LITE_ENSURE_OK(context_, |
| AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE)); |
| float s3_output_scale = 0.f; |
| int s3_output_zero_point = 0; |
| if (is_quantized) { |
| // the min for stage 3 is always 0.0f. |
| float s3_output_min = 0.f; |
| // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max). |
| float s3_output_max = |
| s1_output_max * s2_output_max > s1_output_min * s2_output_min |
| ? s1_output_max * s2_output_max |
| : s1_output_min * s2_output_min; |
| CalculateQuantizationParams(s3_output_min, s3_output_max, |
| &s3_output_scale, &s3_output_zero_point); |
| } |
| TF_LITE_ENSURE_OK( |
| context_, |
| AddAdditionalOutputTensor( |
| tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data), |
| nn_type, s3_output_scale, s3_output_zero_point, |
| &s3_out_ann_index)); |
| TF_LITE_ENSURE_OK( |
| context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index)); |
| } |
| |
| // Stage 4: y = s3 + s2 |
| { |
| augmented_inputs_.push_back(s2_out_ann_index); |
| augmented_inputs_.push_back(s3_out_ann_index); |
| TF_LITE_ENSURE_OK(context_, |
| AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE)); |
| TF_LITE_ENSURE_OK(context_, |
| AddTensorOutput(lite_output_index, tensor_flags)); |
| TF_LITE_ENSURE_OK( |
| context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index)); |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| // Adds the operation to the model and maps the operation to the originating |
| // TFLite one. |
| TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type, |
| uint32_t input_count, const uint32_t* inputs, |
| uint32_t output_count, |
| const uint32_t* outputs, |
| int lite_node_index) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperation( |
| nn_model_, type, input_count, inputs, output_count, outputs), |
| "adding operation", nnapi_errno_); |
| nnapi_to_tflite_op_mapping_->push_back(lite_node_index); |
| return kTfLiteOk; |
| } |
| |
| // Adds a Dequantize operator and replaces the input tensor index with the |
| // dequantized version. If the dequantized version of the operator already |
| // exists then it is not added again. |
| TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index, |
| TfLiteType dequantized_type, int lite_node_index) { |
| const int ann_index = |
| operand_mapping_->lite_index_to_ann(lite_tensor_index); |
| int dequantized_ann_index = |
| dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type); |
| |
| if (dequantized_ann_index == -1) { |
| // The dequantized version does not exist yet, it has to be added: a new |
| // Dequantize operation is added, yielding a new tensor. |
| const TfLiteTensor& tensor = context_->tensors[lite_tensor_index]; |
| ANeuralNetworksOperandType operand_type{ |
| ANEURALNETWORKS_TENSOR_FLOAT32, |
| static_cast<uint32_t>(tensor.dims->size), |
| reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0}; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", nnapi_errno_); |
| dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand(); |
| |
| // Add Dequantize operation. |
| const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)}; |
| const uint32_t dequantize_output[1] = { |
| static_cast<uint32_t>(dequantized_ann_index)}; |
| TF_LITE_ENSURE_OK( |
| context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE, |
| /*input_count=*/1, dequantize_input, |
| /*output_count=*/1, dequantize_output, |
| lite_node_index)); |
| dequantize_mapping_->Add(ann_index, dequantized_type, |
| dequantized_ann_index); |
| } |
| |
| // The input for the original operation is modified so that the operation |
| // now uses the dequantized tensor as input. |
| augmented_inputs_[nn_input_index] = dequantized_ann_index; |
| |
| return kTfLiteOk; |
| } |
| |
| // Finish emitting the op (of type `type`) into the NN API. |
| TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type, |
| int lite_node_index) { |
| // Actually add a NN API operation |
| TF_LITE_ENSURE_OK(context_, |
| AddOperationToModel( |
| type, static_cast<uint32_t>(augmented_inputs_.size()), |
| augmented_inputs_.data(), |
| static_cast<uint32_t>(augmented_outputs_.size()), |
| augmented_outputs_.data(), lite_node_index)); |
| augmented_inputs_.clear(); |
| augmented_outputs_.clear(); |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index, |
| int nn_type) { |
| const TfLiteTensor* tensor = &context_->tensors[tensor_index]; |
| TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1); |
| |
| ANeuralNetworksOperandType operand_type{.type = nn_type}; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", tensor, nnapi_errno_); |
| int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index); |
| if (ann_tensor_index != -1) { |
| augmented_inputs_.push_back(ann_tensor_index); |
| return kTfLiteOk; |
| } |
| // Allocate a new tensor index |
| ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index); |
| augmented_inputs_.push_back(ann_tensor_index); |
| |
| const TfLiteType tensor_type = tensor->type; |
| TfLiteType nn_type_equivalent; |
| TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type, |
| &nn_type_equivalent)); |
| if (tensor_type != nn_type_equivalent) { |
| operand_mapping_->add_type_conversion(tensor_index, nn_type_equivalent); |
| } |
| return kTfLiteOk; |
| } |
| |
| template <typename T> |
| TfLiteStatus AddNewInputConstantTensor( |
| int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims, |
| const std::vector<T>& tensor_value, |
| const TfLiteQuantizationParams& quant_params, int* tensor_index) { |
| TF_LITE_ENSURE_OK(context_, |
| context_->AddTensors(context_, 1, tensor_index)); |
| |
| TfLiteTensor* new_tensor = &context_->tensors[*tensor_index]; |
| new_tensor->type = type; |
| new_tensor->allocation_type = kTfLiteDynamic; |
| new_tensor->params = quant_params; |
| |
| // Not removing the new tensor in case of resizing errors since it will |
| // be cleared by the context |
| TF_LITE_ENSURE_OK( |
| context_, |
| context_->ResizeTensor( |
| context_, new_tensor, |
| // Resize Tensor takes ownership of the dims array passed as param |
| TfLiteIntArrayCopy(dims))); |
| |
| memcpy(new_tensor->data.raw, |
| reinterpret_cast<const char*>(tensor_value.data()), |
| tensor_value.size() * sizeof(T)); |
| |
| const uint32_t tensor_rank = static_cast<uint32_t>(dims->size); |
| const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data); |
| ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims, |
| quant_params.scale, |
| quant_params.zero_point}; |
| |
| const int ann_tensor_index = |
| operand_mapping_->add_delegate_generated_input_ann_tensors_operand(); |
| |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", nnapi_errno_); |
| |
| augmented_inputs_.push_back(ann_tensor_index); |
| |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandValue( |
| nn_model_, ann_tensor_index, new_tensor->data.raw, |
| new_tensor->bytes), |
| "setting new operand value", nnapi_errno_); |
| |
| return kTfLiteOk; |
| } |
| |
| template <typename T> |
| TfLiteStatus AddNewInputConstantTensor( |
| int32_t nn_type, TfLiteType type, std::initializer_list<int> dims, |
| const std::vector<T>& tensor_value, |
| const TfLiteQuantizationParams& quant_params, int* tensor_index) { |
| TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size()); |
| dim_array->size = dims.size(); |
| std::copy(dims.begin(), dims.end(), dim_array->data); |
| |
| const auto result = AddNewInputConstantTensor( |
| nn_type, type, dim_array, tensor_value, quant_params, tensor_index); |
| TfLiteIntArrayFree(dim_array); |
| return result; |
| } |
| |
| private: |
| // Returns a TF Lite type which has the same memory representation as a |
| // provided NN API type. |
| TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type, |
| TfLiteType* type) { |
| switch (nn_type) { |
| case ANEURALNETWORKS_INT32: |
| *type = kTfLiteInt32; |
| return kTfLiteOk; |
| case ANEURALNETWORKS_FLOAT32: |
| *type = kTfLiteFloat32; |
| return kTfLiteOk; |
| default: |
| context->ReportError(context, |
| "NN API Delegate: Can't get an equivalent TF Lite " |
| "type for provided NN API type: %d.\n", |
| nn_type); |
| return kTfLiteError; |
| } |
| } |
| |
| template <typename T> |
| TfLiteStatus AddScalarOperand(T value, int32_t nn_type) { |
| ANeuralNetworksOperandType operand_type{.type = nn_type}; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", nnapi_errno_); |
| const int ann_index = operand_mapping_->add_new_non_tensor_operand(); |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index, |
| &value, sizeof(T)), |
| "setting new operand value", nnapi_errno_); |
| augmented_inputs_.push_back(ann_index); |
| return kTfLiteOk; |
| } |
| |
| template <typename T> |
| TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values, |
| int32_t nn_type, float scale, |
| int32_t zero_point) { |
| ANeuralNetworksOperandType operand_type{.type = nn_type, |
| .dimensionCount = 1, |
| .dimensions = &num_values, |
| .scale = scale, |
| .zeroPoint = zero_point}; |
| |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", nnapi_errno_); |
| |
| const int ann_index = operand_mapping_->add_new_non_tensor_operand(); |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandValue( |
| nn_model_, ann_index, values, sizeof(T) * num_values), |
| "settings new operand value", nnapi_errno_); |
| augmented_inputs_.push_back(ann_index); |
| return kTfLiteOk; |
| } |
| |
| template <typename T> |
| TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values, |
| int32_t nn_type) { |
| return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f, |
| /*zero_point=*/0); |
| } |
| |
| TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count, |
| const uint32_t* dimension_data, |
| int* ann_index_out) { |
| return AddAdditionalOutputTensor( |
| dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32, |
| /*scale=*/0.f, /*zero_point=*/0, ann_index_out); |
| } |
| |
| TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count, |
| const uint32_t* dimension_data, |
| int32_t nn_type, float scale, |
| int32_t zero_point, |
| int* ann_index_out) { |
| ANeuralNetworksOperandType operand_type{ |
| .type = nn_type, |
| .dimensionCount = dimension_count, |
| .dimensions = dimension_data, |
| .scale = scale, |
| .zeroPoint = zero_point, |
| }; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", nnapi_errno_); |
| const int ann_index = operand_mapping_->add_new_non_tensor_operand(); |
| augmented_outputs_.push_back(ann_index); |
| if (ann_index_out) *ann_index_out = ann_index; |
| return kTfLiteOk; |
| } |
| |
| // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`. |
| // This returns the NN API tensor index corresponding to the created tensor. |
| // If another caller previously created a NN API tensor for `tensor_index` |
| // then the existing one is returned. |
| TfLiteStatus AddTensor(int tensor_index, bool hybrid_op, |
| std::vector<uint32_t>* indices, int tensor_flags = 0) { |
| const bool scalar_as_tensor = |
| tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR; |
| const bool need_int8_conversion = |
| tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION; |
| const bool use_int8_asymm_signed = |
| tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED; |
| const bool force_per_channel = |
| tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL; |
| int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index); |
| if (ann_tensor_index != -1) { |
| indices->push_back(ann_tensor_index); |
| return kTfLiteOk; |
| } |
| // Allocate a new tensor index |
| ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index); |
| |
| // Parameters needed for new type. |
| int32_t nn_type = 0; |
| float scale = 0.0f; |
| int32_t zeroPoint = 0; |
| ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params; |
| TfLiteTensor* tensor = &context_->tensors[tensor_index]; |
| TfLiteType tensor_type = tensor->type; |
| if (hybrid_op && (tensor_type == kTfLiteUInt8)) { |
| // For legacy reason, UINT8 weights in hybrid operators are actually INT8 |
| // values and should be interpreted as such. |
| tensor_type = kTfLiteInt8; |
| } |
| switch (tensor_type) { |
| case kTfLiteNoType: |
| // Tensors added during initialization of Ops don't have a type yet and |
| // should not be registered with the NNAPI. |
| indices->push_back(-1); |
| return kTfLiteOk; |
| case kTfLiteFloat32: |
| nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; |
| break; |
| case kTfLiteUInt8: |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; |
| scale = tensor->params.scale; |
| zeroPoint = tensor->params.zero_point; |
| if (scale == 0) { |
| // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in |
| // NNAPI. |
| scale = 1; |
| } |
| break; |
| case kTfLiteInt8: |
| // If explicit int8 conversion is needed, we still need |
| // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type. |
| if (use_int8_asymm_signed) { |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED; |
| } else if (need_int8_conversion) { |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; |
| } else { |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM; |
| } |
| scale = tensor->params.scale; |
| zeroPoint = tensor->params.zero_point; |
| if (tensor->quantization.type == kTfLiteAffineQuantization) { |
| TfLiteAffineQuantization* quantization_params = |
| static_cast<TfLiteAffineQuantization*>( |
| tensor->quantization.params); |
| if (quantization_params->scale->size > 1 || force_per_channel) { |
| // Set up per-channel quantization. |
| ann_perchannel_params = { |
| .channelDim = static_cast<uint32_t>( |
| quantization_params->quantized_dimension), |
| .scaleCount = |
| static_cast<uint32_t>(quantization_params->scale->size), |
| .scales = quantization_params->scale->data, |
| }; |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL; |
| scale = 0.0f; |
| zeroPoint = 0; |
| } else if (quantization_params->scale->size == 1) { |
| scale = quantization_params->scale->data[0]; |
| zeroPoint = quantization_params->zero_point->data[0]; |
| } |
| } |
| if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) { |
| if (need_int8_conversion) { |
| zeroPoint += 128; |
| operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8); |
| } |
| if (scale == 0) { |
| // QUANT8 tensors with zero scale are not valid in NNAPI. |
| scale = 1; |
| } |
| } |
| break; |
| case kTfLiteInt32: |
| nn_type = ANEURALNETWORKS_TENSOR_INT32; |
| scale = tensor->params.scale; |
| zeroPoint = tensor->params.zero_point; |
| break; |
| case kTfLiteBool: |
| nn_type = ANEURALNETWORKS_TENSOR_BOOL8; |
| break; |
| case kTfLiteInt16: |
| nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM; |
| scale = tensor->params.scale; |
| zeroPoint = tensor->params.zero_point; |
| break; |
| default: |
| context_->ReportError( |
| context_, "Failed to add NN API tensor: type %s is not supported.", |
| TfLiteTypeGetName(tensor_type)); |
| return kTfLiteError; |
| } |
| bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor); |
| uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size); |
| std::vector<uint32_t> dims_unspecified(tensor_rank, 0); |
| if (has_unspecified_dimensions) { |
| for (int i = 0; i < tensor->dims_signature->size; i++) { |
| dims_unspecified[i] = tensor->dims_signature->data[i] == -1 |
| ? 0 |
| : tensor->dims_signature->data[i]; |
| } |
| } |
| uint32_t* tensor_dims = |
| has_unspecified_dimensions && allow_dynamic_dimensions_ |
| ? dims_unspecified.data() |
| : reinterpret_cast<uint32_t*>(tensor->dims->data); |
| if (scalar_as_tensor && tensor_rank == 0) { |
| // Use rank 1, shape {1} operand for TFLite scalar tensors. |
| tensor_rank = 1; |
| tensor_dims = &tensor_rank; |
| } |
| if (tensor_rank == 0) { |
| // if the tensor_rank is 0, the dimension ptr must be nullptr. |
| tensor_dims = nullptr; |
| } |
| |
| ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims, |
| scale, zeroPoint}; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context_, |
| nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type), |
| "adding operand", tensor, nnapi_errno_); |
| |
| if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( |
| nn_model_, ann_tensor_index, &ann_perchannel_params), |
| "setting new operand per channel quantization params", tensor, |
| nnapi_errno_); |
| } |
| if (tensor->allocation_type == kTfLiteMmapRo) { |
| if (IsQuantized(tensor_type) && need_int8_conversion && |
| nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) { |
| // We need to add a tensor and convert the weights into uint8. |
| // Currently this is only needed for fully_connected. The new_tensor is |
| // needed for lifetime management for the converted weights. |
| int new_tensor_index = -1; |
| TF_LITE_ENSURE_OK(context_, |
| context_->AddTensors(context_, 1, &new_tensor_index)); |
| TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index]; |
| new_tensor->type = kTfLiteUInt8; |
| new_tensor->allocation_type = kTfLiteDynamic; |
| new_tensor->params.scale = scale; |
| new_tensor->params.zero_point = zeroPoint; |
| // Not removing the new tensor in case of resizing errors since it will |
| // be cleared by the context |
| TF_LITE_ENSURE_OK( |
| context_, context_->ResizeTensor(context_, new_tensor, |
| // Resize Tensor takes ownership of |
| // the dims array passed as param |
| TfLiteIntArrayCopy(tensor->dims))); |
| // Convert the int8 value into corresponding uint8 value; |
| const auto num_elements = NumElements(tensor); |
| for (int i = 0; i < num_elements; ++i) { |
| new_tensor->data.uint8[i] = static_cast<const uint8_t>( |
| static_cast<int32_t>(tensor->data.int8[i]) + 128); |
| } |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandValue( |
| nn_model_, ann_tensor_index, new_tensor->data.raw, |
| new_tensor->bytes), |
| "setting new operand value", tensor, nnapi_errno_); |
| #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING |
| } else if (tensor->allocation && |
| static_cast<const Allocation*>(tensor->allocation)->type() == |
| Allocation::Type::kMMap) { |
| const MMAPAllocation* mmap_alloc = |
| static_cast<const MMAPAllocation*>(tensor->allocation); |
| if (allocation_memory_mapping_->count(mmap_alloc) == 0) { |
| ANeuralNetworksMemory* ann_memory_handle = nullptr; |
| nnapi_->ANeuralNetworksMemory_createFromFd( |
| mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0, |
| &ann_memory_handle); |
| allocation_memory_mapping_->insert( |
| std::make_pair(mmap_alloc, ann_memory_handle)); |
| } |
| ANeuralNetworksMemory* ann_memory_handle = |
| allocation_memory_mapping_->at(mmap_alloc); |
| // Compute the offset to the base pointer of the MMAPAllocation. |
| auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) - |
| reinterpret_cast<const uint8_t*>(mmap_alloc->base()); |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandValueFromMemory( |
| nn_model_, ann_tensor_index, ann_memory_handle, offset, |
| tensor->bytes), |
| "setting new operand value from memory", tensor, nnapi_errno_); |
| #endif |
| } else { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context_, |
| nnapi_->ANeuralNetworksModel_setOperandValue( |
| nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes), |
| "setting new operand value", tensor, nnapi_errno_); |
| } |
| } |
| indices->push_back(ann_tensor_index); |
| return kTfLiteOk; |
| } |
| |
| // Access to NNAPI. |
| const NnApi* const nnapi_; |
| |
| // TfLiteContext for error handling. |
| TfLiteContext* const context_; |
| |
| // Tracks relationship between indices. |
| OperandMapping* const operand_mapping_; |
| |
| // Keeps mapping of ANN quantized tensor and float data type to equivalent |
| // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map |
| // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert |
| // tensor #4 to a FLOAT32 tensor. |
| DequantizeMapping* const dequantize_mapping_; |
| |
| std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const |
| allocation_memory_mapping_; |
| |
| // Tracks for every operation in the NNAPI model the source TfLite model |
| // node index. |
| std::vector<int>* const nnapi_to_tflite_op_mapping_; |
| |
| // The NNAPI model. |
| ANeuralNetworksModel* const nn_model_; |
| |
| // Inputs and outputs for the current op. These are augmented in the sense |
| // that NN API uses operands for all arguments, not just tensors, unlike |
| // TensorFlow Lite. |
| std::vector<uint32_t> augmented_inputs_; |
| std::vector<uint32_t> augmented_outputs_; |
| |
| // Return status code of the latest NNAPI call. |
| int* nnapi_errno_; |
| |
| // Whether to allow dynamic batch size without re-compilation. |
| bool allow_dynamic_dimensions_; |
| }; // namespace nnapi |
| |
| namespace { |
| struct OpValidationContext { |
| bool is_valid; |
| std::vector<NNAPIValidationFailure>* validation_failures; |
| }; |
| |
| #define EXPECT_INPUT_TYPE_IN(actual_type, ...) \ |
| ExpectTypeIn(actual_type, {__VA_ARGS__}, \ |
| NNAPIValidationFailureType::kUnsupportedInputType, \ |
| "Input type not in expected list " #__VA_ARGS__, &val_ctx) |
| |
| inline void AddValidationFailure(NNAPIValidationFailureType failure_type, |
| const char* message, |
| OpValidationContext* val_ctx) { |
| val_ctx->is_valid = false; |
| |
| #ifdef NNAPI_VERBOSE_VALIDATION |
| if (val_ctx->validation_failures) { |
| val_ctx->validation_failures->push_back({failure_type, message}); |
| } |
| #endif |
| } |
| |
| template <typename... Args> |
| inline void AddValidationFailureFmt(OpValidationContext* val_ctx, |
| NNAPIValidationFailureType failure_type, |
| const char* message_fmt, Args... args) { |
| val_ctx->is_valid = false; |
| #ifdef NNAPI_VERBOSE_VALIDATION |
| if (val_ctx->validation_failures) { |
| size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1; |
| std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]); |
| snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...); |
| |
| val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()}); |
| } |
| #endif |
| } |
| |
| inline bool Expect(bool condition, NNAPIValidationFailureType failure_type, |
| const char* message, OpValidationContext* val_ctx) { |
| if (!condition) { |
| AddValidationFailure(failure_type, message, val_ctx); |
| return false; |
| } |
| return true; |
| } |
| |
| template <typename... Args> |
| inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx, |
| NNAPIValidationFailureType failure_type, |
| const char* message_fmt, Args... args) { |
| if (!condition) { |
| AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...); |
| return false; |
| } |
| return true; |
| } |
| |
| inline bool ExpectTypeIn(TfLiteType actual_type, |
| std::initializer_list<TfLiteType> allowed_types, |
| NNAPIValidationFailureType failure_type, |
| const char* msg, OpValidationContext* val_ctx) { |
| return Expect(std::find(allowed_types.begin(), allowed_types.end(), |
| actual_type) != allowed_types.end(), |
| failure_type, msg, val_ctx); |
| } |
| |
| inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version, |
| OpValidationContext* val_ctx) { |
| return ExpectFmt(curr_version >= min_version, val_ctx, |
| NNAPIValidationFailureType::kUnsupportedAndroidVersion, |
| "Android sdk version less than %d", min_version); |
| } |
| |
| inline bool ExpectMaxOpVersion(int curr_version, int max_version, |
| OpValidationContext* val_ctx) { |
| return ExpectFmt(curr_version <= max_version, val_ctx, |
| NNAPIValidationFailureType::kUnsupportedOperatorVersion, |
| "OP Version higher than %d", max_version); |
| } |
| |
| inline bool ExpectOpVersion(int curr_version, int max_version, |
| OpValidationContext* val_ctx) { |
| return ExpectFmt(curr_version <= max_version, val_ctx, |
| NNAPIValidationFailureType::kUnsupportedOperatorVersion, |
| "OP Version different from %d", max_version); |
| } |
| |
| inline bool ExpectIsFloatOperator(const TfLiteContext* context, |
| const TfLiteNode* node, |
| OpValidationContext* val_ctx) { |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| return Expect(IsFloat(input_type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be Float", val_ctx); |
| } |
| |
| bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context, |
| const TfLiteNode* node, |
| OpValidationContext* val_ctx) { |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| return Expect(IsFloatOrUInt8(input_type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be Float or UINT8", val_ctx); |
| } |
| |
| bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context, |
| const TfLiteNode* node, |
| OpValidationContext* val_ctx) { |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| return Expect(IsFloatOrQuantized(input_type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be Float or Quant8", val_ctx); |
| } |
| |
| bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context, |
| const TfLiteNode* node, |
| OpValidationContext* val_ctx) { |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| return Expect(IsFloatOrInt32(input_type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be Float or Int32", val_ctx); |
| } |
| |
| bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context, |
| const TfLiteNode* node, |
| OpValidationContext* val_ctx) { |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| return Expect(IsFloatQuantizedOrInt32(input_type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be Float, Quant8, or Int32", val_ctx); |
| } |
| |
| // When using NN API version 1.0 or 1.1, the condition below must be true for |
| // quantized versions of the following ops: |
| // * CONV_2D |
| // * DEPTHWISE_CONV_2D |
| // * FULLY_CONNECTED (where filter actually stands for weights) |
| // The condition is relaxed and no longer required since version 1.2. |
| bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context, |
| const TfLiteNode* node, |
| OpValidationContext* val_ctx) { |
| const int input_id = node->inputs->data[0]; |
| const int filter_id = node->inputs->data[1]; |
| const int output_id = node->outputs->data[0]; |
| const float input_scale = context->tensors[input_id].params.scale; |
| const float filter_scale = context->tensors[filter_id].params.scale; |
| const float output_scale = context->tensors[output_id].params.scale; |
| return Expect(input_scale * filter_scale < output_scale, |
| NNAPIValidationFailureType::kNotRestrictedScaleCompliant, |
| "When using NN API version 1.0 or 1.1, input_scale * " |
| "filter_scale < output_scale.", |
| val_ctx); |
| } |
| |
| } // namespace |
| |
| // Return a function that knows how to translate a node into its operands |
| // when called. You can use this function to see if a node is supported |
| // (i.e. if the returned MappingFn is null, then the node is not supported). |
| bool NNAPIDelegateKernel::Validate( |
| const TfLiteContext* context, int builtin_code, int version, |
| int android_sdk_version, const TfLiteNode* node, |
| bool is_accelerator_specified, |
| std::vector<NNAPIValidationFailure>* map_failures) { |
| OpValidationContext val_ctx{true, map_failures}; |
| switch (builtin_code) { |
| case kTfLiteBuiltinAdd: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| if (android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx); |
| if (IsInt32(context->tensors[node->inputs->data[0]].type)) { |
| Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data) |
| ->activation == kTfLiteActNone, |
| NNAPIValidationFailureType::kNoActivationExpected, |
| "No activation function supported", &val_ctx); |
| } |
| } else { |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinArgMax: |
| case kTfLiteBuiltinArgMin: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| // Those operators were introduced in NNAPI 1.2. |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const TfLiteType input_type = |
| context->tensors[node->inputs->data[(0)]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32, |
| kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8); |
| |
| const auto& axis_tensor = context->tensors[node->inputs->data[1]]; |
| if (axis_tensor.type == kTfLiteInt64) { |
| Expect( |
| axis_tensor.allocation_type == kTfLiteMmapRo && |
| *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() && |
| *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only supports axis as int32. If the axis type is int64 and " |
| "constant we can convert it to int32 if the value isn't too " |
| "large.", |
| &val_ctx); |
| } else { |
| Expect(axis_tensor.type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Axis should be Int32", &val_ctx); |
| } |
| if (builtin_code == kTfLiteBuiltinArgMax) { |
| auto builtin = |
| reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data); |
| Expect(builtin->output_type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI only supports int32 output.", &val_ctx); |
| } else { |
| auto builtin = |
| reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data); |
| Expect(builtin->output_type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI only supports int32 output.", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinMul: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| if (android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx); |
| if (IsInt32(context->tensors[node->inputs->data[0]].type)) { |
| Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data) |
| ->activation == kTfLiteActNone, |
| NNAPIValidationFailureType::kNoActivationExpected, |
| "No activation function supported", &val_ctx); |
| } |
| } else { |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinAveragePool2d: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data); |
| // TODO(b/138756912): Large filter window would overflow on the |
| // quantized reference CPU path. |
| if (IsQuantized(context->tensors[node->inputs->data[0]].type)) { |
| Expect(is_accelerator_specified || |
| (builtin->filter_width * builtin->filter_height <= 256), |
| NNAPIValidationFailureType::kUnsupportedOperandSize, |
| "Large filter window would overflow on the reference CPU path", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinMaxPool2d: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| } break; |
| case kTfLiteBuiltinL2Pool2d: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectIsFloatOperator(context, node, &val_ctx); |
| |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data); |
| Expect(builtin->activation == kTfLiteActNone, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "Before NNAPI 1.2 fused activation for l2_pool may not be " |
| "supported.", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinConv2d: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| Expect(!IsHybridOperator(context, builtin_code, node), |
| NNAPIValidationFailureType::kUnsupportedHybridOperator, |
| "Hybrid operators not supported before NNAPI 1.2", &val_ctx); |
| ExpectIsFloatOrUint8Operator(context, node, &val_ctx); |
| |
| const auto& filter_tensor = context->tensors[node->inputs->data[1]]; |
| if (filter_tensor.quantization.type == kTfLiteAffineQuantization) { |
| TfLiteAffineQuantization* quantization_params = |
| static_cast<TfLiteAffineQuantization*>( |
| filter_tensor.quantization.params); |
| Expect(quantization_params->scale->size <= 1, |
| NNAPIValidationFailureType::kUnsupportedQuantizationType, |
| "Per-channel quantized convolution not supported before NNAPI " |
| "1.2.", |
| &val_ctx); |
| } |
| } |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| if (android_sdk_version < kMinSdkVersionForNNAPI12 && |
| input_type == kTfLiteUInt8) { |
| ExpectIsRestrictedScalesCompliant(context, node, &val_ctx); |
| } |
| auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data); |
| // TODO(b/132950584): Add support for Conv2D with omitted bias. |
| Expect(node->inputs->size == 3, |
| NNAPIValidationFailureType::kMissingRequiredOperand, |
| "Conv2D with omitted bias not supported", &val_ctx); |
| if (builtin->dilation_width_factor != 1 || |
| builtin->dilation_height_factor != 1) { |
| Expect(android_sdk_version >= kMinSdkVersionForNNAPI12, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinDepthwiseConv2d: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| ExpectIsFloatOrUint8Operator(context, node, &val_ctx); |
| |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| if (input_type == kTfLiteUInt8) { |
| ExpectIsRestrictedScalesCompliant(context, node, &val_ctx); |
| } |
| |
| auto builtin = |
| reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data); |
| Expect(builtin->dilation_width_factor == 1 && |
| builtin->dilation_height_factor == 1, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "dilation_width_factor and dilation_height_factor expected to " |
| "be equal to 1", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinFullyConnected: { |
| ExpectMaxOpVersion(version, 5, &val_ctx); |
| // TODO(b/132950584): Add support for FullyConnected with no bias. |
| Expect(node->inputs->size == 3 && |
| node->inputs->data[2] != kTfLiteOptionalTensor, |
| NNAPIValidationFailureType::kMissingRequiredOperand, |
| "FullyConnected with no bias not supported", &val_ctx); |
| const auto output_type = context->tensors[node->outputs->data[0]].type; |
| Expect(output_type != kTfLiteInt16, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "Unsupported output of type kTfLiteInt16", &val_ctx); |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| Expect(!IsHybridOperator(context, builtin_code, node), |
| NNAPIValidationFailureType::kUnsupportedHybridOperator, |
| "Hybrid operators not supported before NNAPI 1.2", &val_ctx); |
| ExpectIsFloatOrUint8Operator(context, node, &val_ctx); |
| } |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| if (android_sdk_version < kMinSdkVersionForNNAPI12 && |
| input_type == kTfLiteUInt8) { |
| ExpectIsRestrictedScalesCompliant(context, node, &val_ctx); |
| } |
| auto builtin = |
| reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data); |
| Expect(!builtin->keep_num_dims, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "keep_num_dims == true not supported", &val_ctx); |
| } break; |
| case kTfLiteBuiltinHardSwish: { |
| // Add support for hardswish. For Pre-Q devices, deconstructing it into |
| // basic ops. Though for some nnapi accelerators using optimized tflite |
| // kernels might even be faster. |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| } break; |
| case kTfLiteBuiltinSoftmax: { |
| ExpectOpVersion(version, 2, &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| const auto& output = context->tensors[node->outputs->data[0]]; |
| ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8}, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, " |
| "kTfLiteInt8.", |
| &val_ctx); |
| const auto& input = context->tensors[node->inputs->data[0]]; |
| const int input_rank = input.dims->size; |
| Expect(input_rank <= 4, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "Input rank should be <= 4", &val_ctx); |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| Expect( |
| input_rank == 2 || input_rank == 4, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "Before API level 29 only 2D and 4D input tensors were supported.", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinReshape: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| if (node->inputs->size >= 2) { |
| Expect(context->tensors[node->inputs->data[1]].allocation_type == |
| kTfLiteMmapRo, |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "The shape input tensor must be constant.", &val_ctx); |
| } |
| if (node->inputs->size == 1) { |
| // reject scalar reshaping |
| auto* params = |
| reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data); |
| int num_dimensions = params->num_dimensions; |
| if (num_dimensions == 1 && params->shape[0] == 0) { |
| // Legacy tflite models use a shape parameter of [0] to indicate |
| // scalars. |
| num_dimensions = 0; |
| } |
| Expect(num_dimensions > 0, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "New shape rank should be > 0", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinResizeBilinear: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| const auto& input = context->tensors[node->inputs->data[0]]; |
| const auto output_dims = context->tensors[node->outputs->data[0]].dims; |
| Expect(input.dims->size == 4, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "Input should have rank 4", &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| Expect(node->inputs->size >= 2, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Expected at least 2 inputs", &val_ctx); |
| if (node->inputs->size >= 2) { |
| Expect(context->tensors[node->inputs->data[1]].allocation_type == |
| kTfLiteMmapRo, |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "The size input tensor must be constant.", &val_ctx); |
| } |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| Expect(output_dims->data[1] == output_dims->data[2], |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "Require width == height due to driver differences in NNAPI " |
| "< 1.2", |
| &val_ctx); |
| } |
| auto builtin = |
| reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data); |
| if (android_sdk_version <= kMinSdkVersionForNNAPI12) { |
| Expect(!builtin->align_corners, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI does not support align_corners == true.", &val_ctx); |
| Expect(!builtin->half_pixel_centers, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI does not support half_pixel_centers == true.", &val_ctx); |
| } |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| Expect(input.type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinResizeNearestNeighbor: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| Expect(node->inputs->size >= 2, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Expected at least 2 inputs", &val_ctx); |
| if (node->inputs->size >= 2) { |
| Expect(context->tensors[node->inputs->data[1]].allocation_type == |
| kTfLiteMmapRo, |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "The size input tensor must be constant.", &val_ctx); |
| } |
| auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>( |
| node->builtin_data); |
| if (android_sdk_version <= kMinSdkVersionForNNAPI12) { |
| Expect(!builtin->align_corners, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI does not support align_corners == true.", &val_ctx); |
| Expect(!builtin->half_pixel_centers, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI does not support half_pixel_centers == true.", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinSqueeze: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data); |
| if (android_sdk_version == kMinSdkVersionForNNAPI11) { |
| Expect(builtin->num_squeeze_dims != 0, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI 1.1 does not support null squeeze_dims properly.", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinUnidirectionalSequenceLstm: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| |
| Expect(!IsHybridOperator(context, builtin_code, node), |
| NNAPIValidationFailureType::kUnsupportedHybridOperator, |
| "Hybrid version of this op is not supported by NN API.", &val_ctx); |
| |
| Expect(node->inputs->size == 20 || node->inputs->size == 24, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Supporting only operation with 20 or 24 inputs", &val_ctx); |
| } break; |
| case kTfLiteBuiltinL2Normalization: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| ExpectIsFloatOperator(context, node, &val_ctx); |
| |
| const auto& input = context->tensors[node->inputs->data[0]]; |
| Expect(input.dims->size == 4, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Expected 4 inputs", &val_ctx); |
| } |
| auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data); |
| Expect(builtin->activation == kTfLiteActNone, |
| NNAPIValidationFailureType::kNoActivationExpected, |
| "Expected no activation", &val_ctx); |
| } break; |
| case kTfLiteBuiltinLocalResponseNormalization: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| } break; |
| case kTfLiteBuiltinLshProjection: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| |
| if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data) |
| ->type == kTfLiteLshProjectionSparse) { |
| // NNAPI does not support sparse projection correctly pre-Q |
| // (b/111751836). |
| Expect(android_sdk_version >= kMinSdkVersionForNNAPI12, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI does not support sparse projection correctly pre-Q", |
| &val_ctx); |
| Expect(node->inputs->size == 2, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| " NNAPI does not support weights for sparse projects.", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinConcatenation: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data) |
| ->activation == kTfLiteActNone, |
| NNAPIValidationFailureType::kNoActivationExpected, |
| "No activation function supported", &val_ctx); |
| Expect(context->tensors[node->inputs->data[0]].dims->size <= 4, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "Input rank should be less than 4", &val_ctx); |
| |
| const auto& input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32, |
| kTfLiteUInt8, kTfLiteInt8); |
| |
| if (input_type == kTfLiteUInt8 && |
| android_sdk_version < kMinSdkVersionForNNAPI12) { |
| auto first_param = context->tensors[node->inputs->data[0]].params; |
| for (int i = 1; i < node->inputs->size; i++) { |
| auto curr_param = context->tensors[node->inputs->data[i]].params; |
| if (!Expect(curr_param.scale == first_param.scale && |
| curr_param.zero_point == first_param.zero_point, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI 1.0-1 only supported concatenating quantized " |
| "tensor of the same scale and offset.", |
| &val_ctx)) { |
| break; |
| } |
| } |
| } |
| } break; |
| case kTfLiteBuiltinDequantize: { |
| Expect(version == 1 || version == 2, |
| NNAPIValidationFailureType::kUnsupportedOperatorVersion, |
| "Supported op versions are 1 and 2 only", &val_ctx); |
| |
| const auto& input = context->tensors[node->inputs->data[0]]; |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8); |
| } else { |
| EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8); |
| |
| if (android_sdk_version == kMinSdkVersionForNNAPI12 && |
| input.type == kTfLiteInt8) { |
| const auto zero_point = input.params.zero_point; |
| Expect(zero_point == 0, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NN API supports int8 type since version 1.2 but only for " |
| "symmetric quantization.", |
| &val_ctx); |
| } |
| } |
| } break; |
| case kTfLiteBuiltinFloor: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| } break; |
| case kTfLiteBuiltinRelu: |
| case kTfLiteBuiltinReluN1To1: |
| case kTfLiteBuiltinRelu6: |
| case kTfLiteBuiltinLogistic: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| } break; |
| case kTfLiteBuiltinTanh: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| const TfLiteType input_type = |
| context->tensors[node->inputs->data[0]].type; |
| Expect(IsFloat(input_type) || |
| (IsQuantized(input_type) && |
| android_sdk_version >= kMinSdkVersionForNNAPI12), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| " NNAPI only support float tanh.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinSub: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| const TfLiteType input_type = |
| context->tensors[node->inputs->data[0]].type; |
| Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 && |
| IsFloat(input_type)) || |
| (android_sdk_version >= kMinSdkVersionForNNAPI12 && |
| IsQuantized(input_type)) || |
| (android_sdk_version >= kMinSdkVersionForNNAPI13 && |
| IsInt32(input_type)), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only support float sub.", &val_ctx); |
| if (IsInt32(input_type)) { |
| Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data) |
| ->activation == kTfLiteActNone, |
| NNAPIValidationFailureType::kNoActivationExpected, |
| "No activation function supported", &val_ctx); |
| } |
| const int input0_rank = |
| context->tensors[node->inputs->data[0]].dims->size; |
| const int input1_rank = |
| context->tensors[node->inputs->data[1]].dims->size; |
| Expect(input0_rank <= 4 && input1_rank <= 4, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "Input rank must be <= 4", &val_ctx); |
| } break; |
| case kTfLiteBuiltinDiv: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only support float div.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinPad: |
| case kTfLiteBuiltinPadv2: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| |
| const TfLiteIntArrayView input_shape( |
| context->tensors[node->inputs->data[0]].dims); |
| Expect(!HasZeroes(input_shape), |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NN API pad ops do not support input tensors with no elements", |
| &val_ctx); |
| |
| Expect(node->inputs->size >= 2, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Expecting at least 2 inputs", &val_ctx); |
| |
| if (node->inputs->size == 3) { |
| // This is going to be mapped with a PadV2 |
| Expect( |
| android_sdk_version >= kMinSdkVersionForNNAPI12, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Specification of the padding value is supported from NNAPI 1.2.", |
| &val_ctx); |
| } else { // this is going to be mapped as Pad |
| if (android_sdk_version < kMinSdkVersionForNNAPI12) { |
| Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Only Float32 inputs are supported before NNAPI 1.2", |
| &val_ctx); |
| } |
| } |
| } break; |
| case kTfLiteBuiltinUnidirectionalSequenceRnn: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| Expect(!IsHybridOperator(context, builtin_code, node), |
| NNAPIValidationFailureType::kUnsupportedHybridOperator, |
| "Hybrid version of this op is not supported by NN API.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinSpaceToBatchNd: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| } break; |
| case kTfLiteBuiltinBatchToSpaceNd: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| auto crops = context->tensors[node->inputs->data[2]]; |
| auto crops_data = crops.data.i32; |
| Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 && |
| crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "All crops should be 0.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinStridedSlice: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| } break; |
| case kTfLiteBuiltinTranspose: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| // Note that the permutation input tensor value dictates the output |
| // dimensions. |
| // TODO(b/110888333): Support dynamically-sized tensors in delegates. |
| Expect((node->inputs->size > 1) && |
| (context->tensors[node->inputs->data[1]].allocation_type == |
| kTfLiteMmapRo), |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "Dynamically-sized tensors not supported.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinAbs: |
| case kTfLiteBuiltinExp: |
| case kTfLiteBuiltinLog: |
| case kTfLiteBuiltinRsqrt: |
| case kTfLiteBuiltinPow: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| ExpectIsFloatOperator(context, node, &val_ctx); |
| } break; |
| case kTfLiteBuiltinSlice: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| const auto begin_type = context->tensors[node->inputs->data[1]].type; |
| const auto size_type = context->tensors[node->inputs->data[2]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, |
| kTfLiteUInt8, kTfLiteInt8); |
| Expect(begin_type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Begin type should be Int32", &val_ctx); |
| Expect(size_type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Size type should be Int32", &val_ctx); |
| } break; |
| case kTfLiteBuiltinSin: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| ExpectIsFloatOperator(context, node, &val_ctx); |
| } break; |
| case kTfLiteBuiltinTransposeConv: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| Expect((node->inputs->size > 1) && |
| (context->tensors[node->inputs->data[0]].allocation_type == |
| kTfLiteMmapRo) && |
| (context->tensors[node->inputs->data[1]].allocation_type == |
| kTfLiteMmapRo), |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "Dynamically-sized tensors not supported.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinSqrt: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| ExpectIsFloatOperator(context, node, &val_ctx); |
| } break; |
| case kTfLiteBuiltinRnn: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| Expect(node->inputs->size == 5, |
| NNAPIValidationFailureType::kUnsupportedOperatorVariant, |
| "Expected 5 input", &val_ctx); |
| if (node->inputs->size >= 2) { |
| Expect( |
| context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type == |
| kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only support float32 weights.", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinSpaceToDepth: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| const TfLiteType input_type = |
| context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8); |
| } break; |
| case kTfLiteBuiltinSvdf: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| Expect(node->inputs->size == 5, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "Expected input of rank 5", &val_ctx); |
| if (node->inputs->size >= 2) { |
| Expect( |
| context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type == |
| kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only support float32 weights.", &val_ctx); |
| } |
| Expect(android_sdk_version >= kMinSdkVersionForNNAPI11, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx); |
| Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]] |
| .type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Weights should be Float32", &val_ctx); |
| } break; |
| case kTfLiteBuiltinLstm: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| Expect( |
| android_sdk_version >= kMinSdkVersionForNNAPI11, |
| NNAPIValidationFailureType::kUnsupportedAndroidVersion, |
| "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.", |
| &val_ctx); |
| Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 || |
| !IsHybridOperator(context, builtin_code, node), |
| NNAPIValidationFailureType::kUnsupportedHybridOperator, |
| "Hybrid operators not supported before NNAPI 1.2.", &val_ctx); |
| |
| const auto weight_input_index = |
| isLstmBasicKernel(node) ? 2 /* basic::kInputWeights */ |
| : 4 /* full::kInputToOutputWeightsTensor */; |
| |
| const TfLiteType weight_type = |
| context->tensors[node->inputs->data[weight_input_index]].type; |
| |
| if (isLstmBasicKernel(node)) { |
| Expect(weight_type == kTfLiteUInt8, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Basic LSTM Kernels support only UINT8 weights", &val_ctx); |
| |
| const auto input_quantization_params = |
| context->tensors[node->inputs->data[0]].params; |
| Expect(input_quantization_params.scale == 1. / 128. && |
| input_quantization_params.zero_point == 128, |
| NNAPIValidationFailureType::kUnsupportedQuantizationParameters, |
| "Invalid input quantization", &val_ctx); |
| |
| const auto output_quantization_params = |
| context->tensors[node->outputs->data[0]].params; |
| Expect(output_quantization_params.scale == 1. / 128. && |
| output_quantization_params.zero_point == 128, |
| NNAPIValidationFailureType::kUnsupportedQuantizationParameters, |
| "Invalid output quantization", &val_ctx); |
| |
| const auto cell_state_quantization_params = |
| context->tensors[node->outputs->data[1]].params; |
| Expect(cell_state_quantization_params.scale == 16. / 32768. || |
| cell_state_quantization_params.zero_point == 0, |
| NNAPIValidationFailureType::kUnsupportedQuantizationParameters, |
| "Invalid cell state quantization", &val_ctx); |
| |
| auto is_const_tensor = [&node, &context](int tensor_idx) { |
| return context->tensors[node->inputs->data[tensor_idx]] |
| .allocation_type == kTfLiteMmapRo; |
| }; |
| |
| Expect(is_const_tensor(2 /* kInputWeights */), |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "Weights tensor should be constant", &val_ctx); |
| Expect(is_const_tensor(3 /* kInputBiases */), |
| NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape, |
| "Biases tensor should be constant", &val_ctx); |
| |
| return val_ctx.is_valid; |
| } else { |
| if (node->inputs->size == 24) { |
| ExpectMinAndroidSdkVersion(android_sdk_version, |
| kMinSdkVersionForNNAPI12, &val_ctx); |
| } |
| |
| if (android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 || |
| weight_type == kTfLiteInt8, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Weight has to be Float32 or UINT8 or INT8", &val_ctx); |
| } else { |
| Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Weight has to be Float32 or UINT8", &val_ctx); |
| } |
| } |
| } break; |
| case kTfLiteBuiltinMean: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| if (android_sdk_version >= kMinSdkVersionForNNAPI12) { |
| Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 || |
| IsQuantized(context->tensors[node->inputs->data[0]].type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Expected Float32 or Quantized input", &val_ctx); |
| } else { |
| Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Expected Float32 input", &val_ctx); |
| } |
| Expect(context->tensors[node->outputs->data[0]].dims->size > 0, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI does not support generating a scalar as output for MEAN.", |
| &val_ctx); |
| |
| auto input_param = context->tensors[node->inputs->data[0]].params; |
| auto output_param = context->tensors[node->outputs->data[0]].params; |
| Expect(input_param.scale == output_param.scale && |
| input_param.zero_point == output_param.zero_point, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI requires that the input and output have the same " |
| "quantization parameters.", |
| &val_ctx); |
| } break; |
| case kTfLiteBuiltinEmbeddingLookup: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only support float32 values.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinHashtableLookup: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI only support float32 output.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinMaximum: |
| case kTfLiteBuiltinMinimum: { |
| ExpectMaxOpVersion(version, 3, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8, kTfLiteInt32); |
| const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]]; |
| if (operand0.dims->size == 0) { |
| Expect(operand0.allocation_type == kTfLiteMmapRo, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Scalar operand should be constant", &val_ctx); |
| } |
| const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]]; |
| if (operand1.dims->size == 0) { |
| Expect(operand1.allocation_type == kTfLiteMmapRo, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Scalar operand should be constant", &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinCast: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const TfLiteType input_type = |
| context->tensors[node->inputs->data[0]].type; |
| const TfLiteType output_type = |
| context->tensors[node->outputs->data[0]].type; |
| if (android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, |
| kTfLiteUInt8, kTfLiteInt8); |
| |
| ExpectTypeIn( |
| output_type, |
| {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8}, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "Output type should be one of kTfLiteFloat32, kTfLiteInt32, " |
| "kTfLiteUInt8, kTfLiteInt8.", |
| &val_ctx); |
| } else { |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, |
| kTfLiteUInt8); |
| |
| ExpectTypeIn( |
| output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8}, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "Output type should be one of kTfLiteFloat32, kTfLiteInt32, " |
| "kTfLiteUInt8.", |
| &val_ctx); |
| } |
| } break; |
| case kTfLiteBuiltinLeakyRelu: |
| case kTfLiteBuiltinPrelu: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8); |
| } break; |
| case kTfLiteBuiltinTile: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8, |
| kTfLiteUInt8, kTfLiteInt32); |
| const auto multipliers_type = |
| context->tensors[node->inputs->data[1]].type; |
| Expect(multipliers_type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Multipliers should be Int32", &val_ctx); |
| } break; |
| case kTfLiteBuiltinLogicalOr: |
| case kTfLiteBuiltinLogicalAnd: |
| case kTfLiteBuiltinLogicalNot: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| Expect(input_type == kTfLiteBool, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be bool", &val_ctx); |
| } break; |
| case kTfLiteBuiltinLess: |
| case kTfLiteBuiltinLessEqual: |
| case kTfLiteBuiltinGreater: |
| case kTfLiteBuiltinGreaterEqual: |
| case kTfLiteBuiltinEqual: |
| case kTfLiteBuiltinNotEqual: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8, kTfLiteBool, kTfLiteInt32); |
| } break; |
| case kTfLiteBuiltinNeg: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32); |
| } break; |
| case kTfLiteBuiltinTopkV2: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto& input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, |
| kTfLiteUInt8, kTfLiteInt8); |
| const auto& k_param = context->tensors[node->inputs->data[1]]; |
| Expect(k_param.type == kTfLiteInt32 && |
| k_param.allocation_type == kTfLiteMmapRo, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "K param should be a constant of type Int32", &val_ctx); |
| } break; |
| case kTfLiteBuiltinSelect: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11, |
| &val_ctx); |
| const auto value_type = context->tensors[node->inputs->data[1]].type; |
| EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32, |
| kTfLiteUInt8, kTfLiteInt8); |
| TfLiteIntArray* condition_shape = |
| context->tensors[node->inputs->data[0]].dims; |
| TfLiteIntArray* input_shape = |
| context->tensors[node->inputs->data[1]].dims; |
| Expect(TfLiteIntArrayEqual(condition_shape, input_shape), |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "Condition and inputs tensors should have the same shape", |
| &val_ctx); |
| } break; |
| case kTfLiteBuiltinGather: { |
| ExpectOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| const auto& positions = context->tensors[node->inputs->data[1]]; |
| |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16, |
| kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8); |
| |
| Expect(positions.type == kTfLiteInt32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Positions type should be one of kTfLiteInt32", &val_ctx); |
| Expect(positions.dims->size != 0, |
| NNAPIValidationFailureType::kUnsupportedOperandRank, |
| "0-dimension args are not supported by NNAPI.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinBidirectionalSequenceLstm: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| Expect(!IsHybridOperator(context, builtin_code, node), |
| NNAPIValidationFailureType::kUnsupportedHybridOperator, |
| "Hybrid version of this op is not supported by NN API.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinExpandDims: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16, |
| kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8); |
| const auto axis = context->tensors[node->inputs->data[1]]; |
| Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only supports constant int32 axis tensor.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinSplit: { |
| ExpectOpVersion(version, 3, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| // Tensor indices: split_dim: 0, value: 1 |
| const TfLiteTensor& input = context->tensors[node->inputs->data[1]]; |
| if (android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8, kTfLiteInt32); |
| } else { |
| EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt32); |
| } |
| const TfLiteTensor& axis = context->tensors[node->inputs->data[0]]; |
| Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only supports constant int32 axis tensor.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinLogSoftmax: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| Expect(input_type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Input should be Float32.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinQuantize: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto value_type = context->tensors[node->inputs->data[0]].type; |
| Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "Value should be quantized or Float32.", &val_ctx); |
| if (IsQuantized(value_type)) { |
| const auto quantization_params = |
| context->tensors[node->inputs->data[0]].params; |
| Expect(quantization_params.scale > 0.f, |
| NNAPIValidationFailureType::kUnsupportedQuantizationParameters, |
| "Quantization scale should be > 0.", &val_ctx); |
| } |
| const auto output_type = context->tensors[node->outputs->data[0]].type; |
| if (android_sdk_version < kMinSdkVersionForNNAPI13) { |
| Expect(output_type == kTfLiteUInt8, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "Output should be kTfLiteUInt8.", &val_ctx); |
| } else { |
| ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8}, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "Output should be kTfLiteUInt8.", &val_ctx); |
| } |
| const auto quantization_params = |
| context->tensors[node->outputs->data[0]].params; |
| Expect(quantization_params.scale > 0.f, |
| NNAPIValidationFailureType::kUnsupportedQuantizationParameters, |
| "Quantization scale should be > 0.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinReduceAny: { |
| ExpectOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| Expect(context->tensors[node->outputs->data[0]].dims->size != 0, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI does not support generating a scalar as output.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinReduceMin: |
| case kTfLiteBuiltinReduceMax: { |
| ExpectMaxOpVersion(version, 2, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| const auto input_tensor = context->tensors[node->inputs->data[0]]; |
| const auto input_type = input_tensor.type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8); |
| Expect(input_tensor.dims->size != 0, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI does not support generating a scalar as output.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinDepthToSpace: { |
| const TfLiteType input_type = |
| context->tensors[node->inputs->data[0]].type; |
| EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, |
| kTfLiteInt8); |
| } break; |
| case kTfLiteBuiltinReduceProd: |
| case kTfLiteBuiltinSum: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, |
| &val_ctx); |
| Expect(context->tensors[node->outputs->data[0]].dims->size != 0, |
| NNAPIValidationFailureType::kUnsupportedOutputType, |
| "NNAPI does not support generating a scalar as output", &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| Expect(input_type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only supports floating point input.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinElu: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13, |
| &val_ctx); |
| const auto input_type = context->tensors[node->inputs->data[0]].type; |
| Expect(input_type == kTfLiteFloat32, |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only supports floating point input.", &val_ctx); |
| } break; |
| case kTfLiteBuiltinFill: { |
| ExpectOpVersion(version, 1, &val_ctx); |
| ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13, |
| &val_ctx); |
| const auto& dims_tensor = context->tensors[node->inputs->data[0]]; |
| Expect(IsConstantTensor(&dims_tensor), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx); |
| EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64); |
| if (IsConstantTensor(&dims_tensor)) { |
| Expect(dims_tensor.dims->data[0] != 0, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI doesn't support generating scalars from FILL", &val_ctx); |
| if (dims_tensor.type == kTfLiteInt64) { |
| bool fit_in_int32 = |
| std::all_of(dims_tensor.data.i64, |
| dims_tensor.data.i64 + dims_tensor.dims->data[0], |
| [](int64_t dim) { |
| return std::numeric_limits<int32_t>::min() <= dim && |
| dim <= std::numeric_limits<int32_t>::max(); |
| }); |
| Expect(fit_in_int32, |
| NNAPIValidationFailureType::kUnsupportedOperandValue, |
| "NNAPI only supports int32 dimensions tensor. If the " |
| "dimensions type is int64 and they are constant we can " |
| "convert them to int32 if the value isn't too large.", |
| &val_ctx); |
| } |
| } |
| const auto& value_tensor = context->tensors[node->inputs->data[1]]; |
| EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32, |
| kTfLiteInt64); |
| if (value_tensor.type == kTfLiteInt64) { |
| Expect( |
| IsConstantTensor(&value_tensor) && |
| *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() && |
| *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(), |
| NNAPIValidationFailureType::kUnsupportedInputType, |
| "NNAPI only supports int32 input. If the input type is int64 and " |
| "constant we can convert it to int32 if the value isn't too " |
| "large.", |
| &val_ctx); |
| } |
| } break; |
| default: |
| // All other operators are not mapped. |
| AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator, |
| "Unsupported operation type.", &val_ctx); |
| } |
| return val_ctx.is_valid; |
| } // NOLINT(readability/fn_size) |
| |
| TfLiteStatus NNAPIDelegateKernel::Map( |
| TfLiteContext* context, int builtin_code, int version, |
| int android_sdk_version, const NNAPIOpMappingArgs& mapping_args, |
| ANeuralNetworksOperationType* nn_op_type) { |
| switch (builtin_code) { |
| case kTfLiteBuiltinAdd: { |
| auto builtin = |
| reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_ADD; |
| } break; |
| case kTfLiteBuiltinArgMax: { |
| *nn_op_type = ANEURALNETWORKS_ARGMAX; |
| } break; |
| case kTfLiteBuiltinArgMin: { |
| *nn_op_type = ANEURALNETWORKS_ARGMIN; |
| } break; |
| case kTfLiteBuiltinMul: { |
| auto builtin = |
| reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_MUL; |
| } break; |
| case kTfLiteBuiltinAveragePool2d: { |
| mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data); |
| *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D; |
| } break; |
| case kTfLiteBuiltinMaxPool2d: { |
| mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data); |
| *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D; |
| } break; |
| case kTfLiteBuiltinL2Pool2d: { |
| mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data); |
| *nn_op_type = ANEURALNETWORKS_L2_POOL_2D; |
| } break; |
| case kTfLiteBuiltinConv2d: { |
| auto builtin = |
| reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->padding); |
| mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); |
| mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| // NNAPI supports dilated Conv2D since NNAPI 1.2. |
| if (builtin->dilation_width_factor != 1 || |
| builtin->dilation_height_factor != 1) { |
| mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format |
| mapping_args.builder->AddScalarInt32Operand( |
| builtin->dilation_width_factor); |
| mapping_args.builder->AddScalarInt32Operand( |
| builtin->dilation_height_factor); |
| } |
| *nn_op_type = ANEURALNETWORKS_CONV_2D; |
| } break; |
| case kTfLiteBuiltinDepthwiseConv2d: { |
| auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->padding); |
| mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); |
| mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); |
| mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| if (builtin->dilation_width_factor != 1 || |
| builtin->dilation_height_factor != 1) { |
| mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format. |
| mapping_args.builder->AddScalarInt32Operand( |
| builtin->dilation_width_factor); |
| mapping_args.builder->AddScalarInt32Operand( |
| builtin->dilation_height_factor); |
| } |
| *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D; |
| } break; |
| case kTfLiteBuiltinFullyConnected: { |
| auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED; |
| } break; |
| case kTfLiteBuiltinHardSwish: { |
| *nn_op_type = ANEURALNETWORKS_HARD_SWISH; |
| } break; |
| case kTfLiteBuiltinSoftmax: { |
| auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->beta); |
| // Optional scalar specifying the dimension the activation would be |
| // performed on is not added. Default to -1. |
| *nn_op_type = ANEURALNETWORKS_SOFTMAX; |
| } break; |
| case kTfLiteBuiltinReshape: { |
| if (mapping_args.node->inputs->size == 1) { |
| // if no new_shape tensor, construct the new shape from params. |
| auto* params = reinterpret_cast<TfLiteReshapeParams*>( |
| mapping_args.node->builtin_data); |
| int num_dimensions = params->num_dimensions; |
| std::vector<int32_t> output_shape(num_dimensions); |
| for (int i = 0; i < num_dimensions; ++i) { |
| output_shape[i] = params->shape[i]; |
| } |
| mapping_args.builder->AddVectorInt32Operand( |
| output_shape.data(), static_cast<uint32_t>(num_dimensions)); |
| } |
| *nn_op_type = ANEURALNETWORKS_RESHAPE; |
| } break; |
| case kTfLiteBuiltinResizeBilinear: { |
| const int output_id = mapping_args.node->outputs->data[0]; |
| auto& output = mapping_args.context->tensors[output_id]; |
| const int output_height = output.dims->data[1]; |
| const int output_width = output.dims->data[2]; |
| mapping_args.builder->AddScalarInt32Operand(output_width); |
| mapping_args.builder->AddScalarInt32Operand(output_height); |
| auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>( |
| mapping_args.node->builtin_data); |
| if (builtin->align_corners == true || |
| builtin->half_pixel_centers == true) { |
| mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format |
| mapping_args.builder->AddScalarBoolOperand(builtin->align_corners); |
| mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers); |
| } |
| *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR; |
| } break; |
| case kTfLiteBuiltinResizeNearestNeighbor: { |
| const TfLiteTensor& new_shape = |
| mapping_args.context->tensors[mapping_args.node->inputs->data[1]]; |
| // NNAPI uses scalar inputs for height and width. |
| mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]); |
| mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]); |
| mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format |
| auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>( |
| mapping_args.node->builtin_data); |
| if (builtin->align_corners == true || |
| builtin->half_pixel_centers == true) { |
| mapping_args.builder->AddScalarBoolOperand(builtin->align_corners); |
| mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers); |
| } |
| *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; |
| } break; |
| case kTfLiteBuiltinSqueeze: { |
| auto builtin = reinterpret_cast<TfLiteSqueezeParams*>( |
| mapping_args.node->builtin_data); |
| // Note that we add the squeeze dimensions even if the dimensions |
| // were unspecified (empty), as NNAPI requires the operand. |
| mapping_args.builder->AddVectorInt32Operand( |
| builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr, |
| static_cast<uint32_t>(builtin->num_squeeze_dims)); |
| *nn_op_type = ANEURALNETWORKS_SQUEEZE; |
| } break; |
| case kTfLiteBuiltinUnidirectionalSequenceLstm: { |
| auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); |
| mapping_args.builder->AddScalarBoolOperand(builtin->time_major); |
| const bool hybrid_op = IsHybridOperator( |
| mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm, |
| mapping_args.node); |
| if (mapping_args.node->inputs->size == 24) { |
| // Add layer normalization tensors if they are provided. |
| for (int i = 20; i < 24; ++i) { |
| const int input_index = mapping_args.node->inputs->data[i]; |
| if (input_index != kTfLiteOptionalTensor) { |
| mapping_args.builder->AddTensorInput(input_index, hybrid_op); |
| } else { |
| mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); |
| } |
| } |
| } else { |
| for (int i = 0; i < 4; ++i) { |
| mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); |
| } |
| } |
| |
| *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM; |
| } break; |
| case kTfLiteBuiltinL2Normalization: { |
| *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION; |
| } break; |
| case kTfLiteBuiltinLocalResponseNormalization: { |
| auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->radius); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->bias); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->alpha); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->beta); |
| *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION; |
| } break; |
| case kTfLiteBuiltinLshProjection: { |
| auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>( |
| mapping_args.node->builtin_data); |
| int type = builtin->type; |
| // In Android Q+, NNAPI uses 3 to denote |
| // kTfLiteLshProjectionSparse. |
| const int kNNAPILshProjectionSparse = 3; |
| if (builtin->type == kTfLiteLshProjectionSparse) { |
| type = kNNAPILshProjectionSparse; |
| // Add NNAPI null weight operand. |
| mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); |
| } |
| mapping_args.builder->AddScalarInt32Operand(type); |
| *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION; |
| } break; |
| case kTfLiteBuiltinConcatenation: { |
| auto builtin = reinterpret_cast<TfLiteConcatenationParams*>( |
| mapping_args.node->builtin_data); |
| int axis = builtin->axis < 0 |
| ? mapping_args.context |
| ->tensors[mapping_args.node->inputs->data[0]] |
| .dims->size + |
| builtin->axis |
| : builtin->axis; |
| mapping_args.builder->AddScalarInt32Operand(axis); |
| *nn_op_type = ANEURALNETWORKS_CONCATENATION; |
| } break; |
| case kTfLiteBuiltinDequantize: { |
| *nn_op_type = ANEURALNETWORKS_DEQUANTIZE; |
| } break; |
| case kTfLiteBuiltinFloor: { |
| *nn_op_type = ANEURALNETWORKS_FLOOR; |
| } break; |
| case kTfLiteBuiltinRelu: { |
| *nn_op_type = ANEURALNETWORKS_RELU; |
| } break; |
| case kTfLiteBuiltinReluN1To1: { |
| *nn_op_type = ANEURALNETWORKS_RELU1; |
| } break; |
| case kTfLiteBuiltinRelu6: { |
| *nn_op_type = ANEURALNETWORKS_RELU6; |
| } break; |
| case kTfLiteBuiltinLogistic: { |
| *nn_op_type = ANEURALNETWORKS_LOGISTIC; |
| } break; |
| case kTfLiteBuiltinTanh: { |
| *nn_op_type = ANEURALNETWORKS_TANH; |
| } break; |
| case kTfLiteBuiltinSub: { |
| auto builtin = |
| reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_SUB; |
| } break; |
| case kTfLiteBuiltinDiv: { |
| auto builtin = |
| reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_DIV; |
| } break; |
| case kTfLiteBuiltinPad: |
| case kTfLiteBuiltinPadv2: { |
| // We want to map to PAD as much as possible since it is more widely |
| // supported. We map to PadV2 only when there is the need to specify |
| // the padding value |
| if (mapping_args.node->inputs->size == 2) { |
| *nn_op_type = ANEURALNETWORKS_PAD; |
| } else { |
| const int constant_value_id = mapping_args.node->inputs->data[2]; |
| if (constant_value_id == kTfLiteOptionalTensor) { |
| *nn_op_type = ANEURALNETWORKS_PAD; |
| } else { |
| *nn_op_type = ANEURALNETWORKS_PAD_V2; |
| } |
| } |
| } break; |
| case kTfLiteBuiltinUnidirectionalSequenceRnn: { |
| auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| mapping_args.builder->AddScalarInt32Operand(builtin->time_major); |
| *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN; |
| } break; |
| case kTfLiteBuiltinSpaceToBatchNd: { |
| *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND; |
| } break; |
| case kTfLiteBuiltinBatchToSpaceNd: { |
| *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND; |
| } break; |
| case kTfLiteBuiltinStridedSlice: { |
| auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask); |
| mapping_args.builder->AddScalarInt32Operand(builtin->end_mask); |
| mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask); |
| *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE; |
| } break; |
| case kTfLiteBuiltinTranspose: { |
| *nn_op_type = ANEURALNETWORKS_TRANSPOSE; |
| } break; |
| case kTfLiteBuiltinAbs: { |
| *nn_op_type = ANEURALNETWORKS_ABS; |
| } break; |
| case kTfLiteBuiltinExp: { |
| *nn_op_type = ANEURALNETWORKS_EXP; |
| } break; |
| case kTfLiteBuiltinLog: { |
| *nn_op_type = ANEURALNETWORKS_LOG; |
| } break; |
| case kTfLiteBuiltinRsqrt: { |
| *nn_op_type = ANEURALNETWORKS_RSQRT; |
| } break; |
| case kTfLiteBuiltinPow: { |
| *nn_op_type = ANEURALNETWORKS_POW; |
| } break; |
| case kTfLiteBuiltinSlice: { |
| *nn_op_type = ANEURALNETWORKS_SLICE; |
| } break; |
| case kTfLiteBuiltinSin: { |
| *nn_op_type = ANEURALNETWORKS_SIN; |
| } break; |
| case kTfLiteBuiltinTransposeConv: { |
| int input_tensor_flags = 0; |
| const int input_tensor_id = |
| mapping_args.node->inputs->data[/*kDataInputTensor*/ 2]; |
| const int weight_tensor_id = |
| mapping_args.node->inputs->data[/*kWeightsTensor*/ 1]; |
| |
| // Transpose convolution doesn't have hybrid variation. |
| const bool hybrid_op = false; |
| |
| if (android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| mapping_args.builder->AddTensorInput( |
| input_tensor_id, hybrid_op, |
| input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED); |
| |
| } else { |
| mapping_args.builder->AddTensorInput( |
| input_tensor_id, hybrid_op, |
| input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION); |
| } |
| // Transpose convlution uses per-channel quantization with int8 inputs |
| // even if the number of channels in quantization parameters is equal to 1 |
| // (as opposed to conv2d, which uses per-tensor quantization in this |
| // case). |
| mapping_args.builder->AddTensorInput( |
| weight_tensor_id, hybrid_op, |
| input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL); |
| |
| const bool is_bias_present = |
| mapping_args.node->inputs->size == 4 && |
| mapping_args.node->inputs->data[/*kBiasTensor*/ 3] != |
| kTfLiteOptionalTensor; |
| |
| if (is_bias_present) { |
| mapping_args.builder->AddTensorInput( |
| mapping_args.node->inputs->data[/*kBiasTensor*/ 3], hybrid_op); |
| } else { |
| // NNAPI requires a bias tensor, so we allocate a new tensor to fill |
| // it with zeroes. It is deleted with other tensors in the context |
| // during subgraph destructor call. |
| int bias_index = -1; |
| mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index); |
| TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index]; |
| const auto input_type = |
| mapping_args.context |
| ->tensors[mapping_args.node->inputs |
| ->data[/*kDataInputTensor*/ 2]] |
| .type; |
| if (input_type == kTfLiteFloat32) { |
| bias_tensor->type = kTfLiteFloat32; |
| } else { |
| bias_tensor->type = kTfLiteInt32; |
| } |
| |
| // Create an array with a required bias shape and resize the bias |
| // tensor. |
| TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1); |
| const TfLiteTensor& output_shape = |
| mapping_args.context->tensors[mapping_args.node->inputs |
| ->data[/*kOutputShapeTensor*/ 0]]; |
| const int output_depth = output_shape.data.i32[3]; |
| bias_shape->data[0] = output_depth; |
| bias_tensor->allocation_type = kTfLiteDynamic; |
| mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor, |
| bias_shape); |
| |
| // Set tensor's values to zeroes and add it using AddVector*, so |
| // that the values are copied to NNAPI. We don't use the AddTensor |
| // function because it doesn't copy values and the tensor we just |
| // created is not in the node->inputs. |
| if (input_type == kTfLiteFloat32) { |
| memset(bias_tensor->data.f, 0, output_depth * sizeof(float)); |
| mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f, |
| output_depth); |
| } else { |
| memset(bias_tensor->data.i32, 0, output_depth * sizeof(int)); |
| const TfLiteTensor& input_tensor = |
| mapping_args.context->tensors[mapping_args.node->inputs |
| ->data[/*kDataInputTensor*/ 2]]; |
| const TfLiteTensor& filter_tensor = |
| mapping_args.context->tensors[mapping_args.node->inputs |
| ->data[/*kWeightsTensor*/ 1]]; |
| // NNAPI requires bias scale to be a product of an input scale and |
| // a filter scale. |
| bias_tensor->params.scale = |
| input_tensor.params.scale * filter_tensor.params.scale; |
| mapping_args.builder->AddVectorInt32Operand( |
| bias_tensor->data.i32, output_depth, |
| input_tensor.params.scale * filter_tensor.params.scale, |
| /*zero_point=*/0); |
| } |
| } |
| mapping_args.builder->AddTensorInput( |
| mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op); |
| |
| auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->padding); |
| mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); |
| mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); |
| mapping_args.builder->AddScalarInt32Operand( |
| /*ANEURALNETWORKS_FUSED_NONE*/ 0); |
| // Use NHWC layout for input and output. |
| mapping_args.builder->AddScalarBoolOperand(false); |
| *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV; |
| } break; |
| case kTfLiteBuiltinSqrt: { |
| *nn_op_type = ANEURALNETWORKS_SQRT; |
| } break; |
| case kTfLiteBuiltinRnn: { |
| // NNAPI need both state_in and state_out. |
| int ann_index; |
| mapping_args.builder->AddStateFloat32Tensor( |
| mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4], |
| &ann_index); |
| mapping_args.model_state_outputs->push_back(ann_index); |
| mapping_args.model_state_tfl_inputs->push_back( |
| mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]); |
| auto builtin = |
| reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_RNN; |
| } break; |
| case kTfLiteBuiltinSpaceToDepth: { |
| auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->block_size); |
| *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH; |
| } break; |
| case kTfLiteBuiltinSvdf: { |
| // NNAPI need both state_in and state_out. |
| int ann_index; |
| mapping_args.builder->AddStateFloat32Tensor( |
| mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4], |
| &ann_index); |
| mapping_args.model_state_outputs->push_back(ann_index); |
| mapping_args.model_state_tfl_inputs->push_back( |
| mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]); |
| |
| auto builtin = |
| reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->rank); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| *nn_op_type = ANEURALNETWORKS_SVDF; |
| } break; |
| case kTfLiteBuiltinLstm: { |
| if (isLstmBasicKernel(mapping_args.node)) { |
| const auto output_dims = |
| mapping_args.context->tensors[mapping_args.node->outputs->data[1]] |
| .dims; |
| |
| // Inputs kInputData |
| mapping_args.builder->AddTensorInput( |
| mapping_args.node->inputs->data[0 /* kInputData */], |
| /* hybrid_op */ false, |
| /* scalar_as_tensor */ false); |
| |
| // The 8 weights tensors are set decomposing the |
| // kInputWeights param |
| const auto weight_tensor = |
| mapping_args.context->tensors[mapping_args.node->inputs |
| ->data[2 /* kInputWeights */]]; |
| |
| std::vector<uint8_t> recurrent_to_input; |
| std::vector<uint8_t> input_to_input; |
| std::vector<uint8_t> recurrent_to_cell; |
| std::vector<uint8_t> input_to_cell; |
| std::vector<uint8_t> recurrent_to_forget; |
| std::vector<uint8_t> input_to_forget; |
| std::vector<uint8_t> recurrent_to_output; |
| std::vector<uint8_t> input_to_output; |
| tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor( |
| weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input, |
| &input_to_input, &recurrent_to_cell, &input_to_cell, |
| &recurrent_to_forget, &input_to_forget, &recurrent_to_output, |
| &input_to_output); |
| |
| TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2); |
| TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2); |
| tflite::delegate::nnapi::SetWeightSubmatrixDims( |
| weight_tensor.dims, recurrent_weight_dims, input_weight_dims); |
| |
| int new_tensor_index = -1; |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| input_weight_dims, input_to_input, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| input_weight_dims, input_to_forget, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| input_weight_dims, input_to_cell, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| input_weight_dims, input_to_output, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| recurrent_weight_dims, recurrent_to_input, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| recurrent_weight_dims, recurrent_to_forget, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| recurrent_weight_dims, recurrent_to_cell, weight_tensor.params, |
| &new_tensor_index); |
| |
| mapping_args.builder->AddNewInputConstantTensor<uint8_t>( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| recurrent_weight_dims, recurrent_to_output, weight_tensor.params, |
| &new_tensor_index); |
| |
| TfLiteIntArrayFree(input_weight_dims); |
| TfLiteIntArrayFree(recurrent_weight_dims); |
| |
| // Biases have to be split in four. |
| const auto bias_size = output_dims->data[1]; |
| const TfLiteTensor& biases_tensor = |
| mapping_args.context->tensors[mapping_args.node->inputs |
| ->data[3 /* kInputBiases */]]; |
| |
| std::vector<int32_t> input_bias; |
| std::vector<int32_t> cell_bias; |
| std::vector<int32_t> forget_bias; |
| std::vector<int32_t> output_bias; |
| delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size, |
| &input_bias, &cell_bias, |
| &forget_bias, &output_bias); |
| |
| int input_bias_tensor = -1; |
| mapping_args.builder->AddNewInputConstantTensor<int32_t>( |
| ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias, |
| biases_tensor.params, &input_bias_tensor); |
| int forget_bias_tensor = -1; |
| mapping_args.builder->AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, |
| forget_bias, biases_tensor.params, &forget_bias_tensor); |
| int cell_gate_bias_tensor = -1; |
| mapping_args.builder->AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias, |
| biases_tensor.params, &cell_gate_bias_tensor); |
| int output_gate_bias_tensor = -1; |
| mapping_args.builder->AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, |
| output_bias, biases_tensor.params, &output_gate_bias_tensor); |
| |
| mapping_args.builder->AddTensorInput( |
| mapping_args.node->inputs->data[4 /* kInputPrevState */], |
| /* hybrid_op */ false, |
| /* scalar_as_tensor */ false); |
| |
| // kInputPrevActivation |
| mapping_args.builder->AddTensorInput( |
| mapping_args.node->inputs->data[1 /* kInputPrevActivation */], |
| /* hybrid_op */ false, |
| /* scalar_as_tensor */ false); |
| |
| // Configuring the copy from the activation, state outputs |
| // to their associated inputs |
| mapping_args.feedback_loops->push_back(std::make_tuple( |
| mapping_args.node->outputs->data[0 /*kOutputActivation*/], |
| mapping_args.node->inputs->data[1 /*kInputPrevActivation*/])); |
| |
| mapping_args.feedback_loops->push_back(std::make_tuple( |
| mapping_args.node->outputs->data[1 /*kOutputState*/], |
| mapping_args.node->inputs->data[4 /*kInputPrevState*/])); |
| |
| // OUTPUTS |
| // Setting only the first two since the remaining ones are |
| // ignored by NNAPI |
| mapping_args.builder->AddTensorOutput( |
| mapping_args.node->outputs->data[1 /* kOutputState */], 0); |
| |
| mapping_args.builder->AddTensorOutput( |
| mapping_args.node->outputs->data[0 /* kOutputActivation */], 0); |
| |
| *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM; |
| } else { |
| auto builtin = reinterpret_cast<TfLiteLSTMParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); |
| |
| // Current NNAPI implementation requires the scratch_buffer as |
| // output. |
| mapping_args.builder->AddAdditionalFloat32OutputTensor(2); |
| |
| // NNAPI need both state_in and state_out for cell_state and |
| // output_state. |
| int ann_index; |
| mapping_args.builder->AddStateFloat32Tensor( |
| mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18], |
| &ann_index); |
| mapping_args.model_state_outputs->push_back(ann_index); |
| mapping_args.model_state_tfl_inputs->push_back( |
| mapping_args.node->inputs |
| ->data[/*kInputActivationStateTensor*/ 18]); |
| mapping_args.builder->AddStateFloat32Tensor( |
| mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19], |
| &ann_index); |
| mapping_args.model_state_outputs->push_back(ann_index); |
| mapping_args.model_state_tfl_inputs->push_back( |
| mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]); |
| |
| const bool hybrid_op = IsHybridOperator( |
| mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node); |
| |
| if (mapping_args.node->inputs->size == 24) { |
| for (int i = 20; i < 24; ++i) { |
| const auto input_index = mapping_args.node->inputs->data[i]; |
| if (input_index != kTfLiteOptionalTensor) { |
| mapping_args.builder->AddTensorInput(input_index, hybrid_op); |
| } else { |
| mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); |
| } |
| } |
| } |
| |
| *nn_op_type = ANEURALNETWORKS_LSTM; |
| } |
| } break; |
| case kTfLiteBuiltinMean: { |
| auto builtin = reinterpret_cast<TfLiteReducerParams*>( |
| mapping_args.node->builtin_data); |
| int32_t keep_dims = 0; |
| if (builtin->keep_dims) keep_dims = 1; |
| mapping_args.builder->AddScalarInt32Operand(keep_dims); |
| *nn_op_type = ANEURALNETWORKS_MEAN; |
| } break; |
| case kTfLiteBuiltinEmbeddingLookup: { |
| *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP; |
| } break; |
| case kTfLiteBuiltinHashtableLookup: { |
| *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP; |
| } break; |
| case kTfLiteBuiltinMaximum: { |
| *nn_op_type = ANEURALNETWORKS_MAXIMUM; |
| } break; |
| case kTfLiteBuiltinMinimum: { |
| *nn_op_type = ANEURALNETWORKS_MINIMUM; |
| } break; |
| case kTfLiteBuiltinCast: { |
| *nn_op_type = ANEURALNETWORKS_CAST; |
| } break; |
| case kTfLiteBuiltinLeakyRelu: { |
| const auto input_type = |
| mapping_args.context->tensors[mapping_args.node->inputs->data[0]] |
| .type; |
| auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>( |
| mapping_args.node->builtin_data); |
| |
| TfLiteTensor alpha_tensor; |
| alpha_tensor.type = input_type; |
| alpha_tensor.allocation_type = kTfLiteDynamic; |
| alpha_tensor.dims = TfLiteIntArrayCreate(1); |
| alpha_tensor.dims->data[0] = 1; |
| alpha_tensor.params.zero_point = 0; |
| |
| int new_tensor_index = -1; |
| if (input_type == kTfLiteFloat32) { |
| alpha_tensor.params.scale = 0; |
| std::vector<float> alpha_value = {builtin->alpha}; |
| mapping_args.builder->AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims, |
| alpha_value, alpha_tensor.params, &new_tensor_index); |
| } else if (input_type == kTfLiteInt8 && |
| android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| alpha_tensor.params.scale = builtin->alpha; |
| std::vector<int8_t> alpha_value = {1}; |
| mapping_args.builder->AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8, |
| alpha_tensor.dims, alpha_value, alpha_tensor.params, |
| &new_tensor_index); |
| } else { |
| alpha_tensor.params.scale = builtin->alpha; |
| std::vector<uint8_t> alpha_value = {1}; |
| mapping_args.builder->AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, |
| alpha_tensor.dims, alpha_value, alpha_tensor.params, |
| &new_tensor_index); |
| } |
| |
| *nn_op_type = ANEURALNETWORKS_PRELU; |
| } break; |
| case kTfLiteBuiltinPrelu: { |
| *nn_op_type = ANEURALNETWORKS_PRELU; |
| } break; |
| case kTfLiteBuiltinTile: { |
| *nn_op_type = ANEURALNETWORKS_TILE; |
| } break; |
| case kTfLiteBuiltinLogicalOr: { |
| *nn_op_type = ANEURALNETWORKS_LOGICAL_OR; |
| } break; |
| case kTfLiteBuiltinLogicalAnd: { |
| *nn_op_type = ANEURALNETWORKS_LOGICAL_AND; |
| } break; |
| case kTfLiteBuiltinLogicalNot: { |
| *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT; |
| } break; |
| case kTfLiteBuiltinLess: { |
| *nn_op_type = ANEURALNETWORKS_LESS; |
| } break; |
| case kTfLiteBuiltinLessEqual: { |
| *nn_op_type = ANEURALNETWORKS_LESS_EQUAL; |
| } break; |
| case kTfLiteBuiltinGreater: { |
| *nn_op_type = ANEURALNETWORKS_GREATER; |
| } break; |
| case kTfLiteBuiltinGreaterEqual: { |
| *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL; |
| } break; |
| case kTfLiteBuiltinEqual: { |
| *nn_op_type = ANEURALNETWORKS_EQUAL; |
| } break; |
| case kTfLiteBuiltinNotEqual: { |
| *nn_op_type = ANEURALNETWORKS_NOT_EQUAL; |
| } break; |
| case kTfLiteBuiltinNeg: { |
| *nn_op_type = ANEURALNETWORKS_NEG; |
| } break; |
| case kTfLiteBuiltinTopkV2: { |
| const TfLiteTensor& k_param = |
| mapping_args.context->tensors[mapping_args.node->inputs->data[1]]; |
| mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32); |
| *nn_op_type = ANEURALNETWORKS_TOPK_V2; |
| } break; |
| case kTfLiteBuiltinSelect: { |
| *nn_op_type = ANEURALNETWORKS_SELECT; |
| } break; |
| case kTfLiteBuiltinGather: { |
| auto builtin = reinterpret_cast<TfLiteGatherParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->axis); |
| mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1], |
| /* hybrid_op */ false, |
| /* tensor_flags */ 0); |
| *nn_op_type = ANEURALNETWORKS_GATHER; |
| } break; |
| case kTfLiteBuiltinBidirectionalSequenceLstm: { |
| auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->activation); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); |
| mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); |
| mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs); |
| mapping_args.builder->AddScalarBoolOperand(builtin->time_major); |
| // TF Lite doesn't support layer normalization in bidirectional |
| // sequence LSTM, so we insert optional tensors for NNAPI. |
| for (int i = 0; i < 8; ++i) { |
| mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); |
| } |
| *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM; |
| } break; |
| case kTfLiteBuiltinExpandDims: { |
| const TfLiteTensor& axis_param = |
| mapping_args.context->tensors[mapping_args.node->inputs->data[1]]; |
| mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32); |
| *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS; |
| } break; |
| case kTfLiteBuiltinSplit: { |
| const TfLiteTensor& axis = |
| mapping_args.context->tensors[mapping_args.node->inputs->data[0]]; |
| auto builtin = |
| reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(*axis.data.i32); |
| mapping_args.builder->AddScalarInt32Operand(builtin->num_splits); |
| *nn_op_type = ANEURALNETWORKS_SPLIT; |
| } break; |
| case kTfLiteBuiltinLogSoftmax: { |
| // Scaling and axis are hardcoded to respectively 1 and -1 |
| // in TFLite. |
| mapping_args.builder->AddScalarFloat32Operand(1); |
| mapping_args.builder->AddScalarInt32Operand(-1); |
| *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX; |
| } break; |
| case kTfLiteBuiltinQuantize: { |
| auto input_index = mapping_args.node->inputs->data[0]; |
| // NNAPI doesn't support requantization cases but only quantizations |
| // from float. Dequantizing our input adding a Dequantize node before |
| // this one. |
| if (IsQuantized(mapping_args.context->tensors[input_index].type)) { |
| mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32, |
| mapping_args.node_index); |
| } |
| |
| *nn_op_type = ANEURALNETWORKS_QUANTIZE; |
| } break; |
| case kTfLiteBuiltinReduceAny: { |
| auto builtin = reinterpret_cast<TfLiteReducerParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); |
| *nn_op_type = ANEURALNETWORKS_REDUCE_ANY; |
| } break; |
| case kTfLiteBuiltinReduceMin: { |
| auto builtin = reinterpret_cast<TfLiteReducerParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); |
| *nn_op_type = ANEURALNETWORKS_REDUCE_MIN; |
| } break; |
| case kTfLiteBuiltinReduceMax: { |
| auto builtin = reinterpret_cast<TfLiteReducerParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); |
| *nn_op_type = ANEURALNETWORKS_REDUCE_MAX; |
| } break; |
| case kTfLiteBuiltinDepthToSpace: { |
| auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarInt32Operand(builtin->block_size); |
| *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE; |
| } break; |
| case kTfLiteBuiltinReduceProd: { |
| auto builtin = reinterpret_cast<TfLiteReducerParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); |
| *nn_op_type = ANEURALNETWORKS_REDUCE_PROD; |
| } break; |
| case kTfLiteBuiltinSum: { |
| auto builtin = reinterpret_cast<TfLiteReducerParams*>( |
| mapping_args.node->builtin_data); |
| mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); |
| *nn_op_type = ANEURALNETWORKS_REDUCE_SUM; |
| } break; |
| case kTfLiteBuiltinElu: { |
| mapping_args.builder->AddScalarFloat32Operand(1.0); |
| *nn_op_type = ANEURALNETWORKS_ELU; |
| } break; |
| case kTfLiteBuiltinFill: { |
| *nn_op_type = ANEURALNETWORKS_FILL; |
| } break; |
| default: |
| // All other operators are not mapped. |
| return kTfLiteError; |
| } |
| return kTfLiteOk; |
| } |
| |
| // Initialize the kernel (a NN model). |
| TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context, |
| const TfLiteDelegateParams* params, |
| int* nnapi_errno) { |
| for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) { |
| nodes_.push_back(node_index); |
| } |
| |
| const auto delegate_options = |
| StatefulNnApiDelegate::GetOptions(params->delegate); |
| if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && |
| ShouldUseTargetDevices(delegate_options, nnapi_)) { |
| TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_, |
| nnapi_errno, &nnapi_devices_)); |
| |
| if (nnapi_devices_.empty()) { |
| context->ReportError( |
| context, "NNAPI delegate requested but no accelerators available."); |
| return kTfLiteError; |
| } |
| } |
| |
| // Mark the handle backed tensors. |
| tensor_memory_map_ = |
| &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate); |
| |
| if (!nn_model_) { |
| ANeuralNetworksModel* model = nullptr; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, |
| nnapi_->ANeuralNetworksModel_create(&model), |
| "creating NNAPI model", nnapi_errno); |
| nn_model_.reset(model); |
| |
| TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options, |
| params->input_tensors, |
| params->output_tensors, nnapi_errno)); |
| } |
| |
| // Calculating model compilation cache here since the value depends on |
| // some of the TfLiteDelegateParams |
| nn_compilation_cache_token_.clear(); |
| const char* cache_dir = delegate_options.cache_dir; |
| const char* model_token = delegate_options.model_token; |
| if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir && |
| model_token) { |
| // Compilation caching could be enabled, try construct the uint8 |
| // token. |
| // TODO(b/133342794): use a generic token generator class. |
| uint64_t token_parts[4]; |
| // Create bits from model_token. |
| // Using farmhash fingerprint instead of std::hash, as the latter is not |
| // guaranteed to be stable across program invocations. |
| token_parts[0] = |
| ::util::Fingerprint64(model_token, std::strlen(model_token)); |
| // Create bits from params->nodes_to_replace. |
| token_parts[1] = GetHash(params->nodes_to_replace); |
| // Create bits from params->input_tensors. These include the input tensor |
| // sizes, as the cached compilations are size-dependent. |
| token_parts[2] = GetHash(params->input_tensors); |
| for (int i : TfLiteIntArrayView(params->input_tensors)) { |
| if (i != kTfLiteOptionalTensor) { |
| TfLiteTensor* t = &context->tensors[i]; |
| TF_LITE_ENSURE(context, t->dims); |
| token_parts[2] = GetHash(t->dims, token_parts[2]); |
| } |
| } |
| // bits from params->output_tensors. |
| token_parts[3] = GetHash(params->output_tensors); |
| // NNAPI requires the token to be 256bit long. |
| // TODO(b/172238515): get token size from header instead of |
| // hardcoding. |
| std::vector<uint8_t> nnapi_cache_token(32, 0); |
| // Copy the token bits. |
| uint8_t* p = reinterpret_cast<uint8_t*>(token_parts); |
| for (int i = 0; i < 4 * sizeof(uint64_t); i++) { |
| nnapi_cache_token[i] = p[i]; |
| } |
| |
| nn_compilation_cache_token_ = nnapi_cache_token; |
| } |
| |
| initialised_ = true; |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context, |
| TfLiteNode* node, int* nnapi_errno) { |
| if (!initialised_) { |
| return kTfLiteError; |
| } |
| |
| const auto delegate_options = |
| StatefulNnApiDelegate::GetOptions(node->delegate); |
| if (nn_compilation_) { |
| return kTfLiteOk; |
| } |
| |
| ANeuralNetworksCompilation* compilation = nullptr; |
| if (!nnapi_devices_.empty()) { |
| // Compile for the selected accelerator. |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksCompilation_createForDevices( |
| nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(), |
| &compilation), |
| "creating NNAPI model for given devices", nnapi_errno); |
| } else { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, |
| nnapi_->ANeuralNetworksCompilation_create( |
| nn_model_.get(), &compilation), |
| "creating NNAPI compilation", nnapi_errno); |
| } |
| |
| auto preference = delegate_options.execution_preference; |
| if (preference != |
| StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) { |
| const int preference_result = |
| nnapi_->ANeuralNetworksCompilation_setPreference(compilation, |
| preference); |
| if (preference_result != ANEURALNETWORKS_NO_ERROR) { |
| nnapi_->ANeuralNetworksCompilation_free(compilation); |
| compilation = nullptr; |
| } |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result, |
| "setting compilation preferences", |
| nnapi_errno); |
| } |
| |
| if (!nn_compilation_cache_token_.empty()) { |
| const char* cache_dir = delegate_options.cache_dir; |
| const int set_caching_result = |
| nnapi_->ANeuralNetworksCompilation_setCaching( |
| compilation, cache_dir, nn_compilation_cache_token_.data()); |
| if (set_caching_result != ANEURALNETWORKS_NO_ERROR) { |
| nnapi_->ANeuralNetworksCompilation_free(compilation); |
| compilation = nullptr; |
| } |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result, |
| "configuring NNAPI caching", nnapi_errno); |
| } |
| // Set compilation timeout if applicable. |
| if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| if (delegate_options.max_compilation_timeout_duration_ns > 0) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksCompilation_setTimeout( |
| compilation, |
| delegate_options.max_compilation_timeout_duration_ns), |
| "setting compilation timeout", nnapi_errno); |
| } |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksCompilation_setPriority( |
| compilation, delegate_options.execution_priority), |
| "setting compilation priority", nnapi_errno); |
| } |
| const int finish_result = |
| nnapi_->ANeuralNetworksCompilation_finish(compilation); |
| if (finish_result != ANEURALNETWORKS_NO_ERROR) { |
| nnapi_->ANeuralNetworksCompilation_free(compilation); |
| compilation = nullptr; |
| } |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result, |
| "completing NNAPI compilation", nnapi_errno); |
| nn_compilation_.reset(compilation); |
| |
| bool should_use_burst_mode = delegate_options.use_burst_computation; |
| // Override should_use_burst_mode to true if the selected NNAPI devices are of |
| // NNAPI feature level 5 or higher. |
| if (!nnapi_devices_.empty() && |
| target_sdk_version_ >= kNNAPIRuntimeFeatureLevel5) { |
| should_use_burst_mode = true; |
| } |
| // Create burst object to be reused across a sequence of executions |
| if (should_use_burst_mode && |
| nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && |
| nnapi_->ANeuralNetworksBurst_create) { |
| ANeuralNetworksBurst* burst = nullptr; |
| const int create_burst_result = |
| nnapi_->ANeuralNetworksBurst_create(nn_compilation_.get(), &burst); |
| if (create_burst_result != ANEURALNETWORKS_NO_ERROR) { |
| nnapi_->ANeuralNetworksBurst_free(burst); |
| burst = nullptr; |
| } |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, create_burst_result, |
| "creating NNAPI burst", nnapi_errno); |
| nn_burst_.reset(burst); |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices( |
| TfLiteContext* context, std::vector<int>* supported_nodes, |
| int* nnapi_errno) { |
| if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) { |
| return kTfLiteError; |
| } |
| |
| const auto nnapi_model_size = nnapi_to_tflite_op_mapping_.size(); |
| |
| // Determine the list of operations the device actually supports |
| std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]); |
| |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices( |
| nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(), |
| nnapi_ops_support_flags.get()), |
| "Checking supported operations for devices", nnapi_errno); |
| |
| // A TfLite op is supported only if all the associated NNAPI ones are. |
| auto tflite_ops_support_status = std::map<int, bool>(); |
| std::for_each(nodes_.begin(), nodes_.end(), |
| [&tflite_ops_support_status](int tflite_node_index) { |
| tflite_ops_support_status[tflite_node_index] = true; |
| }); |
| for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size; |
| nnapi_op_index++) { |
| const auto tflite_op_index = nnapi_to_tflite_op_mapping_[nnapi_op_index]; |
| tflite_ops_support_status[tflite_op_index] &= |
| nnapi_ops_support_flags[nnapi_op_index]; |
| } |
| |
| supported_nodes->clear(); |
| std::for_each(nodes_.begin(), nodes_.end(), |
| [&supported_nodes, &tflite_ops_support_status](int node_index) { |
| if (tflite_ops_support_status[node_index]) { |
| supported_nodes->push_back(node_index); |
| } |
| }); |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, |
| TfLiteNode* node, int* nnapi_errno) { |
| const bool allow_padding = |
| nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 && |
| nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding != nullptr; |
| const auto delegate_options = |
| StatefulNnApiDelegate::GetOptions(node->delegate); |
| |
| // Check for conditions where we need to re-create NN Execution object and |
| // re-configure the settings and inputs / outputs. |
| bool should_reset_execution = false; |
| if (nnapi_->nnapi_runtime_feature_level <= kMinSdkVersionForNNAPI13 || |
| delegate_options.allow_dynamic_dimensions) { |
| // Must reset execution before Android API 31, or using dynamic dimensions. |
| should_reset_execution = true; |
| } else { |
| // For Android API 31+, check for BufferHandle changes and reset the |
| // execution if any. |
| std::vector<int> curr_in_tensor_handle_map(context->tensors_size); |
| for (int i = 0; i < curr_in_tensor_handle_map.size(); i++) { |
| curr_in_tensor_handle_map[i] = context->tensors[i].buffer_handle; |
| } |
| if (!(tensor_handle_map_ == curr_in_tensor_handle_map)) { |
| should_reset_execution = true; |
| tensor_handle_map_ = curr_in_tensor_handle_map; |
| } |
| } |
| if (should_reset_execution) { |
| ANeuralNetworksExecution* execution = nullptr; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, |
| nnapi_->ANeuralNetworksExecution_create( |
| nn_compilation_.get(), &execution), |
| "creating NNAPI execution", nnapi_errno); |
| if (nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setReusable(execution, |
| /*reusable=*/true), |
| "making execution reusable", nnapi_errno); |
| } |
| nn_execution_.reset(execution); |
| |
| // Allow padding bytes for execution inputs & outputs if applicable. |
| if (allow_padding) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding( |
| nn_execution_.get(), |
| /*enable=*/true), |
| "setting allow padding for execution intputs and outputs", |
| nnapi_errno); |
| } |
| // Set compilation timeout if applicable. |
| if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) { |
| if (delegate_options.max_execution_timeout_duration_ns > 0) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setTimeout( |
| nn_execution_.get(), |
| delegate_options.max_execution_timeout_duration_ns), |
| "setting execution timeout", nnapi_errno); |
| } |
| if (delegate_options.max_execution_loop_timeout_duration_ns > 0) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setLoopTimeout( |
| nn_execution_.get(), |
| delegate_options.max_execution_loop_timeout_duration_ns), |
| "setting execution loop timeout", nnapi_errno); |
| } |
| } |
| // Check if the size of input and output memory pool needs to be resized. |
| if (delegate_options.allow_dynamic_dimensions) { |
| size_t total_input_byte_size = 0; |
| // Make the TensorFlow Lite inputs and outputs to ann_indices. |
| for (int i : TfLiteIntArrayView(node->inputs)) { |
| // Constant tensors are not NNAPI inputs. |
| if (i != kTfLiteOptionalTensor && |
| context->tensors[i].allocation_type != kTfLiteMmapRo && |
| // The delegate might not have mapped this input (this can |
| // happen if one tensor is split in several ones) |
| operand_mapping_.lite_index_to_ann(i) != -1) { |
| if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { |
| continue; |
| } |
| const TfLiteType nn_type_conversion = |
| operand_mapping_.lite_index_to_ann_type_conversion(i); |
| int tensor_size = 0; |
| if (nn_type_conversion == kTfLiteNoType) { |
| tensor_size = context->tensors[i].bytes; |
| } else { |
| size_t type_size; |
| TF_LITE_ENSURE_OK( |
| context, |
| GetSizeOfType(context, nn_type_conversion, &type_size)); |
| tensor_size = NumElements(&context->tensors[i]) * type_size; |
| } |
| total_input_byte_size += tensor_size; |
| total_input_byte_size += GetNumPaddingBytes(tensor_size); |
| } |
| } |
| if (total_input_byte_size > nn_input_memory_->get_byte_size()) { |
| nn_input_memory_.reset( |
| new NNMemory(nnapi_, "input_pool", total_input_byte_size)); |
| } |
| |
| size_t total_output_byte_size = 0; |
| for (int i : TfLiteIntArrayView(node->outputs)) { |
| if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { |
| continue; |
| } |
| total_output_byte_size += context->tensors[i].bytes; |
| total_output_byte_size += GetNumPaddingBytes(context->tensors[i].bytes); |
| } |
| if (total_output_byte_size > nn_output_memory_->get_byte_size()) { |
| nn_output_memory_.reset( |
| new NNMemory(nnapi_, "output_pool", total_output_byte_size)); |
| } |
| } |
| } |
| // Set the input tensor buffers. Note: we access tflite tensors using |
| // absolute indices but NN api indices inputs by relative indices. |
| int relative_input_index = 0; |
| |
| const bool use_int8_asymm_signed = |
| target_sdk_version_ >= kMinSdkVersionForNNAPI13; |
| |
| size_t input_offset = 0; |
| for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) { |
| if (absolute_input_index == kTfLiteOptionalTensor) { |
| continue; |
| } |
| ANeuralNetworksOperandType input_nn_operand_type; |
| ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr; |
| TfLiteTensor* tensor = &context->tensors[absolute_input_index]; |
| TfLiteType ann_type_equivalent = |
| operand_mapping_.lite_index_to_ann_type_conversion( |
| absolute_input_index); |
| if (delegate_options.allow_dynamic_dimensions && |
| HasUnspecifiedDimension(tensor)) { |
| input_nn_operand_type = |
| ConvertTensorTypeToNNType(tensor, ann_type_equivalent); |
| input_nn_operand_type_ptr = &input_nn_operand_type; |
| } |
| if (tensor->allocation_type != kTfLiteMmapRo) { |
| if (tensor->buffer_handle != kTfLiteNullBufferHandle && |
| tensor->buffer_handle < tensor_memory_map_->size()) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setInputFromMemory( |
| nn_execution_.get(), relative_input_index, |
| input_nn_operand_type_ptr, |
| tensor_memory_map_->at(tensor->buffer_handle).memory, 0, |
| tensor->bytes), |
| "associating NNAPI execution input with a memory object", tensor, |
| nnapi_errno); |
| relative_input_index++; |
| continue; |
| } |
| int tensor_size = 0; |
| int padding_bytes = 0; |
| if (ann_type_equivalent != kTfLiteNoType) { |
| const auto num_elements = NumElements(tensor); |
| uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset; |
| if (tensor->type == kTfLiteUInt8 && |
| ann_type_equivalent == kTfLiteInt32) { |
| for (int i = 0; i < num_elements; ++i) { |
| reinterpret_cast<int32_t*>(input_ptr)[i] = |
| static_cast<const int32_t>(tensor->data.uint8[i]); |
| } |
| } else if (tensor->type == kTfLiteInt8 && |
| ann_type_equivalent == kTfLiteUInt8) { |
| // Explicitly convert int8 values to uint8 values. |
| for (int i = 0; i < num_elements; ++i) { |
| input_ptr[i] = static_cast<const uint8_t>( |
| static_cast<int32_t>(tensor->data.int8[i]) + 128); |
| } |
| } else if (tensor->type == kTfLiteInt8 && |
| ann_type_equivalent == kTfLiteInt32) { |
| if (use_int8_asymm_signed) { |
| for (int i = 0; i < num_elements; ++i) { |
| reinterpret_cast<int32_t*>(input_ptr)[i] = |
| static_cast<const int32_t>(tensor->data.int8[i]); |
| } |
| } else { |
| for (int i = 0; i < num_elements; ++i) { |
| reinterpret_cast<int32_t*>(input_ptr)[i] = |
| static_cast<const int32_t>(tensor->data.int8[i]) + 128; |
| } |
| } |
| } else { |
| context->ReportError( |
| context, |
| "NN API Delegate: unsupported tensor types conversion: " |
| "from type code %d to type code %d.\n", |
| tensor->type, ann_type_equivalent); |
| return kTfLiteError; |
| } |
| size_t type_size; |
| TF_LITE_ENSURE_OK( |
| context, GetSizeOfType(context, ann_type_equivalent, &type_size)); |
| tensor_size = NumElements(tensor) * type_size; |
| padding_bytes = GetNumPaddingBytes(tensor_size); |
| if (should_reset_execution) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setInputFromMemory( |
| nn_execution_.get(), relative_input_index, |
| input_nn_operand_type_ptr, nn_input_memory_->get_handle(), |
| input_offset, GetNNTensorSize(tensor_size, allow_padding)), |
| "associating NNAPI execution input with a memory object", tensor, |
| nnapi_errno); |
| } |
| } else { |
| // copy data to pre-allocated shared memory. |
| memcpy(nn_input_memory_->get_data_ptr() + input_offset, |
| tensor->data.raw, tensor->bytes); |
| tensor_size = tensor->bytes; |
| padding_bytes = GetNumPaddingBytes(tensor_size); |
| if (should_reset_execution) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setInputFromMemory( |
| nn_execution_.get(), relative_input_index, |
| input_nn_operand_type_ptr, nn_input_memory_->get_handle(), |
| input_offset, GetNNTensorSize(tensor_size, allow_padding)), |
| "associating NNAPI execution input with a memory object", tensor, |
| nnapi_errno); |
| } |
| } |
| input_offset += tensor_size + padding_bytes; |
| relative_input_index++; |
| } |
| } |
| |
| // Set the output tensor buffers. |
| int relative_output_index = 0; |
| size_t output_offset = 0; |
| for (auto output_index : TfLiteIntArrayView(node->outputs)) { |
| // If the NNAPI implementation doesn't have some of the outputs |
| // they are left unmapped and we should not try to read their value here |
| if (operand_mapping_.lite_index_to_ann(output_index) == -1) { |
| continue; |
| } |
| ANeuralNetworksOperandType output_nn_operand_type; |
| ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr; |
| TfLiteTensor* tensor = &context->tensors[output_index]; |
| if (delegate_options.allow_dynamic_dimensions && |
| HasUnspecifiedDimension(tensor)) { |
| TfLiteType ann_type_equivalent = |
| operand_mapping_.lite_index_to_ann_type_conversion(output_index); |
| output_nn_operand_type = |
| ConvertTensorTypeToNNType(tensor, ann_type_equivalent); |
| output_nn_operand_type_ptr = &output_nn_operand_type; |
| } |
| if (tensor->buffer_handle != kTfLiteNullBufferHandle && |
| tensor->buffer_handle < tensor_memory_map_->size() && |
| should_reset_execution) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setOutputFromMemory( |
| nn_execution_.get(), relative_output_index, |
| output_nn_operand_type_ptr, |
| tensor_memory_map_->at(tensor->buffer_handle).memory, 0, |
| tensor->bytes), |
| "associating NNAPI execution output to a memory object", tensor, |
| nnapi_errno); |
| |
| } else { |
| int padding_bytes = GetNumPaddingBytes(tensor->bytes); |
| if (should_reset_execution) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setOutputFromMemory( |
| nn_execution_.get(), relative_output_index, |
| output_nn_operand_type_ptr, nn_output_memory_->get_handle(), |
| output_offset, GetNNTensorSize(tensor->bytes, allow_padding)), |
| "associating NNAPI execution output to a memory object", tensor, |
| nnapi_errno); |
| } |
| output_offset += tensor->bytes + padding_bytes; |
| } |
| relative_output_index++; |
| } |
| |
| // Set memory for NNAPI state_outputs. |
| for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) { |
| int state_tensor_idx = model_state_tfl_inputs_[i]; |
| TfLiteTensor* tensor = &context->tensors[state_tensor_idx]; |
| int padding_bytes = GetNumPaddingBytes(tensor->bytes); |
| if (should_reset_execution) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_setOutputFromMemory( |
| nn_execution_.get(), relative_output_index, nullptr, |
| nn_output_memory_->get_handle(), output_offset, |
| GetNNTensorSize(tensor->bytes, allow_padding)), |
| "associating NNAPI execution state output to a memory object", |
| nnapi_errno); |
| } |
| output_offset += tensor->bytes + padding_bytes; |
| relative_output_index++; |
| } |
| |
| // Invoke ANN in blocking fashion. |
| if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) { |
| ANeuralNetworksEvent* event = nullptr; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_startCompute(nn_execution_.get(), |
| &event), |
| "starting async computation", nnapi_errno); |
| const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event); |
| nnapi_->ANeuralNetworksEvent_free(event); |
| RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result, |
| "waiting for async computation completion", |
| nnapi_errno); |
| } else { |
| // Use Burst mode by default for NNAPI 1.2+. |
| if (nn_burst_) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_burstCompute(nn_execution_.get(), |
| nn_burst_.get()), |
| "running burst computation", nnapi_errno); |
| } else { |
| // Use synchronous execution for NNAPI 1.2+ as a fallback. |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksExecution_compute(nn_execution_.get()), |
| "running computation", nnapi_errno); |
| } |
| } |
| |
| // copy results from shared memory to the destination. |
| output_offset = 0; |
| for (auto output_index : TfLiteIntArrayView(node->outputs)) { |
| TfLiteTensor* tensor = &context->tensors[output_index]; |
| if (tensor->buffer_handle != kTfLiteNullBufferHandle) { |
| continue; |
| } |
| TfLiteType ann_type_equivalent = |
| operand_mapping_.lite_index_to_ann_type_conversion(output_index); |
| if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) { |
| // Explicitly convert uint8 values to int8 values. |
| uint8_t* output_ptr = reinterpret_cast<uint8_t*>( |
| nn_output_memory_->get_data_ptr() + output_offset); |
| const auto num_elements = NumElements(tensor); |
| for (int i = 0; i < num_elements; ++i) { |
| output_ptr[i] = |
| static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128); |
| } |
| } |
| memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset, |
| tensor->bytes); |
| output_offset += tensor->bytes; |
| output_offset += GetNumPaddingBytes(tensor->bytes); |
| } |
| // The state_out of previous invocation need to be copied to state_in of |
| // current invocation. |
| for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) { |
| int state_tensor_idx = model_state_tfl_inputs_[i]; |
| TfLiteTensor* tensor = &context->tensors[state_tensor_idx]; |
| memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset, |
| tensor->bytes); |
| output_offset += tensor->bytes; |
| output_offset += GetNumPaddingBytes(tensor->bytes); |
| } |
| |
| // copy output of all output tensors in feedback_loops_ into the |
| // associated input |
| for (auto feedback_loop : feedback_loops_) { |
| int output_tensor_idx; |
| int input_tensor_idx; |
| std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop; |
| TfLiteTensor& src = context->tensors[output_tensor_idx]; |
| TfLiteTensor& dest = context->tensors[input_tensor_idx]; |
| |
| memcpy(dest.data.raw, src.data.raw, src.bytes); |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded( |
| const TfLiteContext* context, int builtin_code, const TfLiteNode* node, |
| int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) { |
| // Depending on the operator and the input data format, Dequantize |
| // operators may need to be added. For example when the input is |
| // floating-point but weights are quantized then the weights will first be |
| // dequantized to the same format as the input before being passed to the |
| // operator. |
| |
| // The tensor determining whether the inputs should be floating-point. |
| int input_tensor_index = -1; |
| std::vector<int> inputs_to_potentially_dequantize; |
| |
| switch (builtin_code) { |
| case kTfLiteBuiltinConv2d: |
| case kTfLiteBuiltinFullyConnected: { |
| input_tensor_index = 0; |
| // Weights and bias are inputs #1 and #2 respectively and may require |
| // dequantization. |
| inputs_to_potentially_dequantize = {1, 2}; |
| break; |
| } |
| case kTfLiteBuiltinLstm: { |
| input_tensor_index = 0; |
| inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7, |
| 8, 9, 10, 11, 12, 13, 14, |
| 15, 16, 17, 20, 21, 22, 23}; |
| break; |
| } |
| default: |
| return; |
| } |
| |
| int tensor_id = node->inputs->data[input_tensor_index]; |
| if (tensor_id < 0) return; |
| |
| // Nothing to do if the input is not floating-point. |
| if (!IsFloat(context->tensors[tensor_id].type)) return; |
| |
| for (int i : inputs_to_potentially_dequantize) { |
| if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index. |
| tensor_id = node->inputs->data[i]; |
| if (tensor_id < 0) continue; // Ignore optional input. |
| |
| const TfLiteType type = context->tensors[tensor_id].type; |
| // Nothing to do for this tensor if it's not quantized. |
| if (!IsQuantized(type)) continue; |
| |
| // Insert Dequantize operator if it hasn't been done already and change |
| // the node's input accordingly. |
| builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index); |
| } |
| } |
| |
| TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors( |
| TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) { |
| DequantizeMapping dequantize_mapping; |
| // The operand builder allows creating a single op. It is created outside |
| // the for loop to avoid reallocating the vectors. |
| NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_, |
| &dequantize_mapping, &allocation_memory_mapping_, |
| &nnapi_to_tflite_op_mapping_, nn_model_.get(), |
| nnapi_errno, allow_dynamic_dimensions); |
| // If we have target accelerators the target SDK version might be |
| // different than the current android version. |
| target_sdk_version_ = nnapi_->android_sdk_version; |
| if (!nnapi_devices_.empty()) { |
| TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( |
| context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno)); |
| } |
| // Add Tensors. |
| for (auto node_index : nodes_) { |
| // Obtain the op and registration. |
| TfLiteNode* node; |
| TfLiteRegistration* reg; |
| TF_LITE_ENSURE_STATUS( |
| context->GetNodeAndRegistration(context, node_index, &node, ®)); |
| |
| // Fully quantized full LSTM. |
| if (target_sdk_version_ >= kMinSdkVersionForNNAPI13 && |
| reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) && |
| context->tensors[node->inputs->data[0]].type == kTfLiteInt8) { |
| const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM; |
| |
| constexpr int kInputTensor = 0; |
| constexpr int kInputToInputWeightsTensor = 1; |
| constexpr int kRecurrentToInputWeightsTensor = 5; |
| constexpr int kInputGateBiasTensor = 12; |
| constexpr int kForgetGateBiasTensor = 13; |
| constexpr int kCellGateBiasTensor = 14; |
| constexpr int kOutputGateBiasTensor = 15; |
| constexpr int kProjectionWeightsTensor = 16; |
| constexpr int kProjectionBiasTensor = 17; |
| constexpr int kPrevOutputTensor = 18; |
| |
| // Add input tensors. |
| for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) { |
| const auto input_index = node->inputs->data[input_pos]; |
| if (input_index == kTfLiteOptionalTensor) { |
| if (input_pos == kInputToInputWeightsTensor || |
| input_pos == kRecurrentToInputWeightsTensor || |
| input_pos == kProjectionWeightsTensor) { |
| TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0)); |
| } else if (input_pos == kInputGateBiasTensor || |
| input_pos == kForgetGateBiasTensor || |
| input_pos == kCellGateBiasTensor || |
| input_pos == kOutputGateBiasTensor || |
| input_pos == kProjectionBiasTensor) { |
| TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0)); |
| } else { // cell-to-* and layer norm weights. |
| TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0)); |
| } |
| } else { |
| // Only input and previous output use INT8_ASYM_SIGNED. |
| int flags = |
| (input_pos == kInputTensor || input_pos == kPrevOutputTensor) |
| ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED |
| : 0; |
| TF_LITE_ENSURE_STATUS( |
| builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags)); |
| } |
| } |
| |
| // Add clip parameters. |
| auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data); |
| TF_LITE_ENSURE_STATUS( |
| builder.AddScalarFloat32Operand(builtin->cell_clip)); |
| TF_LITE_ENSURE_STATUS( |
| builder.AddScalarFloat32Operand(builtin->proj_clip)); |
| |
| // Add quantization parameters for intermediate tensors. |
| TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5); |
| for (int intermediate_pos = 0; |
| intermediate_pos < node->intermediates->size; ++intermediate_pos) { |
| const auto intermediate_index = |
| node->intermediates->data[intermediate_pos]; |
| const TfLiteTensor& tensor = context->tensors[intermediate_index]; |
| TfLiteAffineQuantization* quantization_params = |
| static_cast<TfLiteAffineQuantization*>(tensor.quantization.params); |
| if (intermediate_pos == 4) { |
| TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( |
| quantization_params->zero_point->data[0])); |
| } |
| TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand( |
| quantization_params->scale->data[0])); |
| } |
| |
| // Activation state output. |
| int ann_index; |
| builder.AddStateInt8AsymTensor( |
| node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index); |
| model_state_outputs_.push_back(ann_index); |
| model_state_tfl_inputs_.push_back( |
| node->inputs->data[/*kInputActivationStateTensor*/ 18]); |
| |
| // Cell state output. |
| builder.AddStateInt16Tensor( |
| node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index); |
| model_state_outputs_.push_back(ann_index); |
| model_state_tfl_inputs_.push_back( |
| node->inputs->data[/*kInputCellStateTensor*/ 19]); |
| |
| // Add output tensors. |
| for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) { |
| const auto output_index = node->outputs->data[output_pos]; |
| TF_LITE_ENSURE_STATUS(builder.AddTensorOutput( |
| output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED)); |
| } |
| |
| builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index); |
| continue; |
| } |
| |
| const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node); |
| const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code); |
| const bool need_int8_conversion = |
| target_sdk_version_ < kMinSdkVersionForNNAPI13 && |
| NeedInt8Conversion(context, reg->builtin_code, node); |
| const bool use_int8_asymm_signed = |
| target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op; |
| |
| int input_tensor_flags = 0; |
| if (scalar_as_tensor) { |
| input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR; |
| } |
| if (use_int8_asymm_signed) { |
| input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED; |
| } |
| |
| // On SDK level less than 30, h_swish will be lowered into supported NNAPI |
| // operations. Since SDK level 30, h_swish is supported as a single |
| // operation. |
| if (reg->builtin_code == kTfLiteBuiltinHardSwish && |
| nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) { |
| builder.TransformHardSwishIntoSupportedOps( |
| node->inputs->data[0], node->outputs->data[0], need_int8_conversion, |
| node_index); |
| continue; |
| } |
| // Map inputs to NN API tensor indices. |
| for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) { |
| if (reg->builtin_code == kTfLiteBuiltinTransposeConv) { |
| // Everything is added during Map since input tensors |
| // have different order. |
| continue; |
| } |
| const auto input_index = node->inputs->data[input_pos]; |
| if (need_int8_conversion && |
| (input_pos == 0 || |
| reg->builtin_code == kTfLiteBuiltinFullyConnected || |
| reg->builtin_code == kTfLiteBuiltinConv2d || |
| reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d || |
| reg->builtin_code == kTfLiteBuiltinAdd || |
| reg->builtin_code == kTfLiteBuiltinMul || |
| reg->builtin_code == kTfLiteBuiltinSub || |
| reg->builtin_code == kTfLiteBuiltinConcatenation || |
| reg->builtin_code == kTfLiteBuiltinMaximum || |
| reg->builtin_code == kTfLiteBuiltinMinimum || |
| reg->builtin_code == kTfLiteBuiltinLeakyRelu || |
| reg->builtin_code == kTfLiteBuiltinLess || |
| reg->builtin_code == kTfLiteBuiltinLessEqual || |
| reg->builtin_code == kTfLiteBuiltinPrelu || |
| reg->builtin_code == kTfLiteBuiltinGreater || |
| reg->builtin_code == kTfLiteBuiltinGreaterEqual || |
| reg->builtin_code == kTfLiteBuiltinEqual || |
| reg->builtin_code == kTfLiteBuiltinNotEqual || |
| reg->builtin_code == kTfLiteBuiltinSelect)) { |
| // Only selected inputs require int8 conversion. |
| TF_LITE_ENSURE_STATUS(builder.AddTensorInput( |
| input_index, hybrid_op, |
| input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION)); |
| continue; |
| } |
| if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) && |
| input_pos >= 20) { |
| // Skip layer normalization weights. They are added in the Map |
| // function (after all the other inputs added there) since layer |
| // normalization weights are the last four inputs of the LSTM op in |
| // NNAPI. |
| continue; |
| } |
| if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) { |
| // Configuring all inputs in the Map function |
| continue; |
| } |
| if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) { |
| if (input_pos >= 20) { |
| // Skip layer normalization weights. They are added in the Map |
| // function (after all the other inputs added there) since layer |
| // normalization weights are the last four inputs of the |
| // unidirectional sequence LSTM op in NNAPI. |
| continue; |
| } |
| if (input_index == kTfLiteOptionalTensor) { |
| TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0)); |
| continue; |
| } |
| } |
| if ((reg->builtin_code == kTfLiteBuiltinSplit) && |
| (input_index == node->inputs->data[0])) { |
| // Skip the axis input tensor; it will be added as a scalar operand |
| // by the Map() mapping. |
| continue; |
| } |
| |
| // Pad and Padv2 have an optional parameter for a pad value which has |
| // to be converted to a scalar type in NN API. |
| if ((reg->builtin_code == kTfLiteBuiltinPadv2 || |
| reg->builtin_code == kTfLiteBuiltinPad) && |
| node->inputs->size == 3 && input_pos == 2) { |
| const int constant_value_id = node->inputs->data[2]; |
| if (constant_value_id == kTfLiteOptionalTensor) { |
| continue; |
| } |
| const TfLiteTensor constant_value = context->tensors[constant_value_id]; |
| |
| switch (constant_value.type) { |
| case kTfLiteFloat32: |
| if (constant_value.allocation_type == kTfLiteMmapRo) { |
| builder.AddScalarFloat32Operand(*constant_value.data.f); |
| } else { |
| builder.AddSingleValueTensorAsScalarOperand( |
| constant_value_id, ANEURALNETWORKS_FLOAT32); |
| } |
| break; |
| case kTfLiteUInt8: |
| if (constant_value.allocation_type == kTfLiteMmapRo) { |
| builder.AddScalarInt32Operand( |
| static_cast<int32_t>(*constant_value.data.uint8)); |
| } else { |
| builder.AddSingleValueTensorAsScalarOperand( |
| constant_value_id, ANEURALNETWORKS_INT32); |
| } |
| break; |
| case kTfLiteInt8: |
| if (constant_value.allocation_type == kTfLiteMmapRo) { |
| if (need_int8_conversion) { |
| builder.AddScalarInt32Operand( |
| static_cast<int32_t>(*constant_value.data.int8) + 128); |
| } else { |
| builder.AddScalarInt32Operand(*constant_value.data.int8); |
| } |
| } else { |
| builder.AddSingleValueTensorAsScalarOperand( |
| constant_value_id, ANEURALNETWORKS_INT32); |
| } |
| break; |
| default: |
| context->ReportError(context, |
| "Unsupported type of pad value for pad_v2\n"); |
| return kTfLiteError; |
| } |
| continue; |
| } |
| |
| if (input_index == kTfLiteOptionalTensor && |
| (reg->builtin_code == kTfLiteBuiltinLstm || |
| reg->builtin_code == kTfLiteBuiltinSvdf || |
| reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) { |
| // properly handle the optional tensor for LSTM and SVDF. |
| // currently only support float32. |
| TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0)); |
| } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear || |
| reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) { |
| if (input_pos == 0) { |
| // Only the first input tensor is added. The second one, |
| // specifying the output height and width, is not added and |
| // instead the height and width will be added individually as |
| // scalars by the mapping function returned by Map(). |
| TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, |
| input_tensor_flags)); |
| } |
| } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) { |
| // The K parameter tensor is not handled here but by the functor |
| // returned by Map, the input tensor is instead added in |
| // the else clause below |
| continue; |
| } else if (reg->builtin_code == kTfLiteBuiltinGather) { |
| // Everything else is added during Map since input tensors |
| // have different order. |
| if (input_pos == 0) { |
| TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, |
| input_tensor_flags)); |
| } |
| continue; |
| } else if (reg->builtin_code == kTfLiteBuiltinExpandDims && |
| input_pos == 1) { |
| // The axis param is added during Map |
| continue; |
| } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd && |
| input_pos == 2) { |
| // NNAPI does not support crops. |
| // The Map function will check if all crops are zero. |
| continue; |
| } else if (reg->builtin_code == kTfLiteBuiltinArgMin || |
| reg->builtin_code == kTfLiteBuiltinArgMax) { |
| // The first input tensor is added as is. The second one, specifying |
| // the axis, needs to be converted to a scalar since TFLite uses a |
| // tensor but NNAPI uses a scalar as the axis. |
| if (input_pos == 0) { |
| TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, |
| input_tensor_flags)); |
| } else { |
| const int axis_id = node->inputs->data[1]; |
| const TfLiteTensor& axis_tensor = context->tensors[axis_id]; |
| switch (axis_tensor.type) { |
| case kTfLiteInt32: |
| if (axis_tensor.allocation_type == kTfLiteMmapRo) { |
| TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( |
| static_cast<int32_t>(*axis_tensor.data.i32))); |
| } else { |
| TF_LITE_ENSURE_STATUS( |
| builder.AddSingleValueTensorAsScalarOperand( |
| axis_id, ANEURALNETWORKS_INT32)); |
| } |
| break; |
| case kTfLiteInt64: |
| // Map() function already makes sure int64 input is constant. |
| TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( |
| static_cast<int32_t>(*axis_tensor.data.i64))); |
| break; |
| default: |
| return kTfLiteError; |
| } |
| } |
| } else if (reg->builtin_code == kTfLiteBuiltinMaximum || |
| reg->builtin_code == kTfLiteBuiltinMinimum) { |
| const TfLiteTensor& operand_tensor = |
| context->tensors[node->inputs->data[input_pos]]; |
| if (operand_tensor.dims->size == 0) { |
| int tensor_index; |
| |
| TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type, |
| kTfLiteMmapRo); |
| switch (operand_tensor.type) { |
| case kTfLiteFloat32: |
| TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1}, |
| std::vector<float>(1, operand_tensor.data.f[0]), |
| operand_tensor.params, &tensor_index)); |
| break; |
| case kTfLiteUInt8: |
| TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1}, |
| std::vector<uint8_t>(1, operand_tensor.data.uint8[0]), |
| operand_tensor.params, &tensor_index)); |
| break; |
| case kTfLiteInt8: { |
| auto params = operand_tensor.params; |
| if (params.scale == 0.0) { |
| params.scale = 1.0; |
| } |
| |
| if (use_int8_asymm_signed) { |
| TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, |
| operand_tensor.type, {1}, |
| std::vector<int8_t>(1, operand_tensor.data.int8[0]), params, |
| &tensor_index)); |
| } else { |
| TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, |
| {1}, |
| std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128), |
| params, &tensor_index)); |
| } |
| } break; |
| case kTfLiteInt32: |
| TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1}, |
| std::vector<int32_t>(1, operand_tensor.data.i32[0]), |
| operand_tensor.params, &tensor_index)); |
| break; |
| default: |
| return kTfLiteError; |
| } |
| } else { |
| TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, |
| input_tensor_flags)); |
| } |
| } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny || |
| reg->builtin_code == kTfLiteBuiltinReduceMax || |
| reg->builtin_code == kTfLiteBuiltinReduceMin || |
| reg->builtin_code == kTfLiteBuiltinReduceProd || |
| reg->builtin_code == kTfLiteBuiltinSum) && |
| (input_pos == 1)) { |
| // The axis needs, be converted to a tensor if specified as scalar |
| const TfLiteTensor& axis_tensor = |
| context->tensors[node->inputs->data[input_pos]]; |
| if (axis_tensor.dims->size == 0) { |
| TF_LITE_ENSURE_STATUS( |
| builder.AddVectorInt32Operand(axis_tensor.data.i32, 1)); |
| } else { |
| TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, |
| input_tensor_flags)); |
| } |
| } else if (reg->builtin_code == kTfLiteBuiltinFill) { |
| if (input_pos == 0) { |
| const int dims_id = node->inputs->data[0]; |
| const TfLiteTensor& dims_tensor = context->tensors[dims_id]; |
| switch (dims_tensor.type) { |
| case kTfLiteInt32: |
| TF_LITE_ENSURE_STATUS( |
| builder.AddTensorInput(input_index, hybrid_op)); |
| break; |
| case kTfLiteInt64: { |
| // We made sure that dimensions are constant and fit into int32 |
| // in Map(), so we can safely create a new tensor with casted |
| // values. |
| const int dims_size = dims_tensor.dims->data[0]; |
| std::vector<int32_t> dims_int32(dims_size); |
| std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size, |
| dims_int32.begin()); |
| int new_tensor_index = -1; |
| builder.AddNewInputConstantTensor( |
| ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims, |
| dims_int32, dims_tensor.params, &new_tensor_index); |
| } break; |
| default: |
| return kTfLiteError; |
| } |
| } else { |
| const int value_id = node->inputs->data[1]; |
| const TfLiteTensor& value_tensor = context->tensors[value_id]; |
| switch (value_tensor.type) { |
| case kTfLiteFloat32: |
| TF_LITE_ENSURE_STATUS( |
| builder.AddScalarFloat32Operand(*value_tensor.data.f)); |
| break; |
| case kTfLiteInt32: |
| TF_LITE_ENSURE_STATUS( |
| builder.AddScalarInt32Operand(*value_tensor.data.i32)); |
| break; |
| case kTfLiteInt64: |
| // Map() function already makes sure int64 input is constant and |
| // fits into int32. |
| TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( |
| static_cast<int32_t>(*value_tensor.data.i64))); |
| break; |
| default: |
| return kTfLiteError; |
| } |
| } |
| } else { |
| TF_LITE_ENSURE_STATUS( |
| builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags)); |
| } |
| } |
| |
| // Get op type and operands |
| // Fails if the Validate function failed |
| int nn_op_type; |
| TF_LITE_ENSURE_STATUS( |
| Map(context, reg->builtin_code, reg->version, target_sdk_version_, |
| {context, &builder, node, node_index, &model_state_outputs_, |
| &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno}, |
| &nn_op_type)); |
| |
| // Map outputs to NN API tensor indices. |
| int output_tensor_flags = 0; |
| if (need_int8_conversion) { |
| output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION; |
| } |
| if (use_int8_asymm_signed) { |
| output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED; |
| } |
| for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) { |
| const auto output_index = node->outputs->data[output_pos]; |
| |
| // Outputs for basic LSTM cell are set in the Map function since |
| if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) { |
| continue; |
| } |
| |
| TF_LITE_ENSURE_STATUS( |
| builder.AddTensorOutput(output_index, output_tensor_flags)); |
| } |
| |
| // Dequantize operators may have to be added in case inputs are to be |
| // floating-point. |
| AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node, |
| node_index, &builder, nnapi_errno); |
| |
| TF_LITE_ENSURE_OK(context_, |
| builder.FinalizeAddOperation(nn_op_type, node_index)); |
| } |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus NNAPIDelegateKernel::BuildGraph( |
| TfLiteContext* context, |
| const StatefulNnApiDelegate::Options& delegate_options, |
| const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors, |
| int* nnapi_errno) { |
| // Build the ops and tensors. |
| TF_LITE_ENSURE_STATUS(AddOpsAndTensors( |
| context, nnapi_errno, delegate_options.allow_dynamic_dimensions)); |
| // Map input and output tensor indices to ANN |
| std::vector<uint32_t> inputs; |
| inputs.reserve(input_tensors->size); |
| std::vector<uint32_t> outputs; |
| outputs.reserve(output_tensors->size); |
| |
| size_t total_input_byte_size = 0; |
| // Make the TensorFlow Lite inputs and outputs to ann_indices. |
| for (int i : TfLiteIntArrayView(input_tensors)) { |
| // Constant tensors are not NNAPI inputs. |
| if (i != kTfLiteOptionalTensor && |
| context->tensors[i].allocation_type != kTfLiteMmapRo && |
| // The delegate might not have mapped this input (this can |
| // happen if one tensor is split in several ones) |
| operand_mapping_.lite_index_to_ann(i) != -1) { |
| inputs.push_back(operand_mapping_.lite_index_to_ann(i)); |
| if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { |
| continue; |
| } |
| const TfLiteType nn_type_conversion = |
| operand_mapping_.lite_index_to_ann_type_conversion(i); |
| int tensor_size = 0; |
| if (nn_type_conversion == kTfLiteNoType) { |
| tensor_size = context->tensors[i].bytes; |
| } else { |
| size_t type_size; |
| TF_LITE_ENSURE_OK( |
| context, GetSizeOfType(context, nn_type_conversion, &type_size)); |
| tensor_size = NumElements(&context->tensors[i]) * type_size; |
| } |
| total_input_byte_size += tensor_size; |
| total_input_byte_size += GetNumPaddingBytes(tensor_size); |
| } |
| } |
| |
| size_t total_output_byte_size = 0; |
| for (int i : TfLiteIntArrayView(output_tensors)) { |
| const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i); |
| // Unmapped outputs are not added |
| if (output_tensor_ann_index != -1) { |
| outputs.push_back(output_tensor_ann_index); |
| } |
| if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { |
| continue; |
| } |
| total_output_byte_size += context->tensors[i].bytes; |
| total_output_byte_size += GetNumPaddingBytes(context->tensors[i].bytes); |
| } |
| |
| // Add state output tensors as model outputs. |
| for (int i = 0; i < model_state_outputs_.size(); i++) { |
| outputs.push_back(model_state_outputs_[i]); |
| auto tfl_state_idx = model_state_tfl_inputs_[i]; |
| total_output_byte_size += context->tensors[tfl_state_idx].bytes; |
| total_output_byte_size += |
| GetNumPaddingBytes(context->tensors[tfl_state_idx].bytes); |
| } |
| |
| // Tell ANN to declare inputs/outputs |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs( |
| nn_model_.get(), inputs.size(), inputs.data(), outputs.size(), |
| outputs.data()), |
| "identifying model inputs and outputs", nnapi_errno); |
| |
| auto allow_fp16 = |
| context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16; |
| if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) { |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, |
| nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16( |
| nn_model_.get(), allow_fp16), |
| "set relaxed computation mode for fp32 if possible", nnapi_errno); |
| } |
| |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()), |
| "finalizing the model", nnapi_errno); |
| |
| // Create shared memory pool for inputs and outputs. |
| nn_input_memory_.reset( |
| new NNMemory(nnapi_, "input_pool", total_input_byte_size)); |
| nn_output_memory_.reset( |
| new NNMemory(nnapi_, "output_pool", total_output_byte_size)); |
| |
| return kTfLiteOk; |
| } |
| |
| } // namespace nnapi |
| } // namespace delegate |
| |
| using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI; |
| using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11; |
| using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12; |
| using ::tflite::delegate::nnapi::NNAPIDelegateKernel; |
| |
| StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {} |
| |
| StatefulNnApiDelegate::Data::~Data() { |
| std::for_each(std::begin(delegate_state_cache), |
| std::end(delegate_state_cache), |
| [](const std::pair<int, NNAPIDelegateKernel*>& entry) { |
| delete entry.second; |
| }); |
| } |
| |
| void StatefulNnApiDelegate::Data::CacheDelegateKernel( |
| const TfLiteDelegateParams* delegate_params, |
| NNAPIDelegateKernel* delegate_state) { |
| const int cache_key = delegate_params->nodes_to_replace->data[0]; |
| delegate_state_cache.emplace(cache_key, delegate_state); |
| } |
| |
| NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel( |
| const TfLiteDelegateParams* delegate_params) { |
| const int cache_key = delegate_params->nodes_to_replace->data[0]; |
| const auto cached_state = delegate_state_cache.find(cache_key); |
| if (cached_state != std::end(delegate_state_cache)) { |
| auto result = cached_state->second; |
| delegate_state_cache.erase(cached_state); |
| return result; |
| } else { |
| return nullptr; |
| } |
| } |
| |
| StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi) |
| : StatefulNnApiDelegate(nnapi, Options()) {} |
| |
| StatefulNnApiDelegate::StatefulNnApiDelegate(Options options) |
| : StatefulNnApiDelegate(NnApiImplementation(), options) {} |
| |
| StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi, |
| Options options) |
| : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) { |
| if (options.accelerator_name) { |
| delegate_data_.accelerator_name = options.accelerator_name; |
| } |
| if (options.cache_dir) { |
| delegate_data_.cache_dir = options.cache_dir; |
| } |
| if (options.model_token) { |
| delegate_data_.model_token = options.model_token; |
| } |
| delegate_data_.execution_preference = options.execution_preference; |
| delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu; |
| delegate_data_.max_number_delegated_partitions = |
| options.max_number_delegated_partitions; |
| delegate_data_.allow_fp16 = options.allow_fp16; |
| delegate_data_.execution_priority = options.execution_priority; |
| delegate_data_.max_compilation_timeout_duration_ns = |
| options.max_compilation_timeout_duration_ns; |
| delegate_data_.max_execution_timeout_duration_ns = |
| options.max_execution_timeout_duration_ns; |
| delegate_data_.max_execution_loop_timeout_duration_ns = |
| options.max_execution_loop_timeout_duration_ns; |
| if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) { |
| delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions; |
| } |
| delegate_data_.use_burst_computation = options.use_burst_computation; |
| TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, |
| "Created TensorFlow Lite delegate for NNAPI."); |
| Prepare = DoPrepare; |
| CopyFromBufferHandle = DoCopyFromBufferHandle; |
| CopyToBufferHandle = DoCopyToBufferHandle; |
| FreeBufferHandle = DoFreeBufferHandle; |
| data_ = &delegate_data_; |
| if (delegate_data_.allow_dynamic_dimensions) { |
| flags |= kTfLiteDelegateFlagsAllowDynamicTensors; |
| flags |= kTfLiteDelegateFlagsRequirePropagatedShapes; |
| } |
| } |
| |
| StatefulNnApiDelegate::StatefulNnApiDelegate() |
| : StatefulNnApiDelegate(Options()) {} |
| |
| const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions( |
| TfLiteDelegate* delegate) { |
| auto delegate_data = reinterpret_cast<Data*>(delegate->data_); |
| StatefulNnApiDelegate::Options options; |
| options.execution_preference = delegate_data->execution_preference; |
| options.accelerator_name = delegate_data->accelerator_name.empty() |
| ? nullptr |
| : delegate_data->accelerator_name.c_str(); |
| options.cache_dir = delegate_data->cache_dir.empty() |
| ? nullptr |
| : delegate_data->cache_dir.c_str(); |
| options.model_token = delegate_data->model_token.empty() |
| ? nullptr |
| : delegate_data->model_token.c_str(); |
| options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu; |
| options.max_number_delegated_partitions = |
| delegate_data->max_number_delegated_partitions; |
| options.allow_fp16 = delegate_data->allow_fp16; |
| options.execution_priority = delegate_data->execution_priority; |
| options.max_compilation_timeout_duration_ns = |
| delegate_data->max_compilation_timeout_duration_ns; |
| options.max_execution_timeout_duration_ns = |
| delegate_data->max_execution_timeout_duration_ns; |
| options.max_execution_loop_timeout_duration_ns = |
| delegate_data->max_execution_loop_timeout_duration_ns; |
| options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions; |
| options.use_burst_computation = delegate_data->use_burst_computation; |
| return options; |
| } |
| |
| const std::vector<StatefulNnApiDelegate::MemoryRegistration>& |
| StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) { |
| auto delegate_data = reinterpret_cast<Data*>(delegate->data_); |
| return delegate_data->tensor_memory_map; |
| } |
| |
| TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory( |
| ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback, |
| void* callback_context) { |
| int map_size = delegate_data_.tensor_memory_map.size(); |
| for (int i = 0; i < map_size; i++) { |
| if (delegate_data_.tensor_memory_map[i].memory == nullptr) { |
| delegate_data_.tensor_memory_map[i] = {memory, callback, |
| callback_context}; |
| return i; |
| } |
| } |
| delegate_data_.tensor_memory_map.push_back( |
| {memory, callback, callback_context}); |
| return map_size; |
| } |
| |
| TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle( |
| TfLiteContext* context, TfLiteDelegate* delegate, |
| TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) { |
| auto delegate_data = reinterpret_cast<Data*>(delegate->data_); |
| if (buffer_handle < 0 || |
| buffer_handle >= delegate_data->tensor_memory_map.size()) { |
| return kTfLiteError; |
| } |
| auto memory = delegate_data->tensor_memory_map[buffer_handle].memory; |
| auto callback = delegate_data->tensor_memory_map[buffer_handle].callback; |
| auto callback_context = |
| delegate_data->tensor_memory_map[buffer_handle].callback_context; |
| if (!memory || !callback) { |
| return kTfLiteError; |
| } |
| return callback(tensor, memory, 0, tensor->bytes, callback_context); |
| } |
| |
| TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle( |
| TfLiteContext* context, TfLiteDelegate* delegate, |
| TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) { |
| return kTfLiteError; |
| } |
| |
| void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context, |
| TfLiteDelegate* delegate, |
| TfLiteBufferHandle* handle) { |
| auto delegate_data = reinterpret_cast<Data*>(delegate->data_); |
| if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) { |
| delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr}; |
| *handle = kTfLiteNullBufferHandle; |
| } |
| } |
| |
| int StatefulNnApiDelegate::GetNnApiErrno() const { |
| return delegate_data_.nnapi_errno; |
| } |
| |
| // static |
| TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator( |
| TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi, |
| const std::vector<int>& supported_nodes, |
| std::vector<int>* device_supported_nodes, int* num_partitions, |
| TfLiteDelegateParams** params_array, int* nnapi_errno) { |
| auto* delegate_data = static_cast<Data*>(delegate->data_); |
| // The first entry in the array is the element count |
| |
| auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes); |
| TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning( |
| context, supported_nodes_int_array.get(), params_array, num_partitions)); |
| // For each partition check if which nodes are actually supported by the |
| // target accelerators. |
| delegate_data->delegate_state_cache.clear(); |
| for (int idx = 0; idx < *num_partitions; idx++) { |
| const auto& partition_params = (*params_array)[idx]; |
| std::unique_ptr<NNAPIDelegateKernel> kernel_state( |
| new NNAPIDelegateKernel(nnapi)); |
| TfLiteDelegateParams params_with_delegate = partition_params; |
| params_with_delegate.delegate = delegate; |
| TF_LITE_ENSURE_STATUS( |
| kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno)); |
| std::vector<int> supported_partition_nodes; |
| TF_LITE_ENSURE_STATUS( |
| kernel_state->GetOperationsSupportedByTargetNnApiDevices( |
| context, &supported_partition_nodes, nnapi_errno)); |
| device_supported_nodes->insert(device_supported_nodes->end(), |
| supported_partition_nodes.begin(), |
| supported_partition_nodes.end()); |
| |
| bool model_fully_supported = (supported_partition_nodes.size() == |
| partition_params.nodes_to_replace->size); |
| if (model_fully_supported) { |
| delegate_data->CacheDelegateKernel(&partition_params, |
| kernel_state.release()); |
| } |
| } |
| |
| if (device_supported_nodes->size() != supported_nodes.size()) { |
| // We changed the set of nodes to delegate this will create a different |
| // partitioning layout. |
| auto device_sup_nodes_int_array = |
| BuildTfLiteIntArray(*device_supported_nodes); |
| TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning( |
| context, device_sup_nodes_int_array.get(), params_array, |
| num_partitions)); |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| // static |
| TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions( |
| int max_partitions, |
| std::vector<TfLiteDelegateParams> partition_params_array, |
| std::vector<int>* nodes_to_delegate) { |
| int num_partitions = partition_params_array.size(); |
| if (max_partitions <= 0 || num_partitions <= max_partitions) { |
| return kTfLiteOk; |
| } |
| |
| int number_delegated_partitions = std::count_if( |
| partition_params_array.begin(), partition_params_array.end(), |
| [nodes_to_delegate](const TfLiteDelegateParams& partition_params) { |
| return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(), |
| partition_params.nodes_to_replace->data[0]) != |
| nodes_to_delegate->end(); |
| }); |
| |
| if (number_delegated_partitions > max_partitions) { |
| std::sort(partition_params_array.begin(), partition_params_array.end(), |
| [](const TfLiteDelegateParams& left, |
| const TfLiteDelegateParams& right) -> bool { |
| // Reverse sort |
| return left.nodes_to_replace->size > |
| right.nodes_to_replace->size; |
| }); |
| |
| nodes_to_delegate->clear(); |
| |
| for (int i = 0; i < max_partitions; i++) { |
| const TfLiteDelegateParams& partition_params = partition_params_array[i]; |
| |
| nodes_to_delegate->insert(nodes_to_delegate->end(), |
| partition_params.nodes_to_replace->data, |
| partition_params.nodes_to_replace->data + |
| partition_params.nodes_to_replace->size); |
| } |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, |
| TfLiteDelegate* delegate) { |
| auto* delegate_data = static_cast<Data*>(delegate->data_); |
| int* nnapi_errno = &(delegate_data->nnapi_errno); |
| const NnApi* nnapi = delegate_data->nnapi; |
| |
| // Resetting the error code when the delegate is initialized |
| // by TFLite. This causes the error to be reset if reusing the same |
| // StatefulNnApiDelegate after a failure |
| *nnapi_errno = 0; |
| |
| // Do not check nodes_ if NN API is unavailable. |
| if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI || |
| !nnapi->nnapi_exists) { |
| return kTfLiteOk; |
| } |
| |
| int target_sdk_version = nnapi->android_sdk_version; |
| const StatefulNnApiDelegate::Options delegate_options = |
| StatefulNnApiDelegate::GetOptions(delegate); |
| // For NNAPI 1.2+, check if there is any accelerator available. |
| // If not, don't delegate to NNAPI's CPU reference implementation unless |
| // it has been specified as target accelerator. |
| if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) { |
| if (ShouldUseTargetDevices(delegate_options, nnapi)) { |
| std::vector<ANeuralNetworksDevice*> devices; |
| TF_LITE_ENSURE_STATUS( |
| GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices)); |
| |
| if (devices.empty()) { |
| if (delegate_options.accelerator_name) { |
| // There was a selected device and it is not available. |
| return kTfLiteError; |
| } else { |
| // Only nnapi-reference is available but was disabled by the delegate |
| // options |
| return kTfLiteOk; |
| } |
| } |
| |
| TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( |
| context, nnapi, devices, &target_sdk_version, nnapi_errno)); |
| } else { |
| // If no accelerator is specified, only use NNAPI if an accelerator is |
| // available. Any available accelerator will make the device_count larger |
| // than 1. More sophisticated check and allowlisting can be added later. |
| uint32_t device_count = 0; |
| RETURN_TFLITE_ERROR_IF_NN_ERROR( |
| context, nnapi->ANeuralNetworks_getDeviceCount(&device_count), |
| "getting number of NNAPI devices", nnapi_errno); |
| if (device_count <= 1) { |
| return kTfLiteOk; |
| } |
| } |
| } |
| |
| std::vector<int> supported_nodes; |
| // We don't care about all nodes_, we only care about ones in the |
| // current plan. |
| TfLiteIntArray* plan; |
| TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan)); |
| |
| // Check for every node if it is supported |
| const bool is_accelerator_specified = ShouldUseTargetDevices( |
| delegate_options, nnapi, /*exclude_nnapi_reference=*/true); |
| std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures; |
| for (int node_index : TfLiteIntArrayView(plan)) { |
| TfLiteNode* node; |
| TfLiteRegistration* registration; |
| TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration( |
| context, node_index, &node, ®istration)); |
| if (NNAPIDelegateKernel::Validate(context, registration->builtin_code, |
| registration->version, target_sdk_version, |
| node, is_accelerator_specified, |
| &map_failures)) { |
| supported_nodes.push_back(node_index); |
| } |
| #ifdef NNAPI_VERBOSE_VALIDATION |
| for (auto& failure : map_failures) { |
| TFLITE_LOG_PROD( |
| TFLITE_LOG_WARNING, "Operator %s (v%d) refused by NNAPI delegate: %s", |
| tflite::EnumNameBuiltinOperator( |
| static_cast<BuiltinOperator>(registration->builtin_code)), |
| registration->version, failure.message.c_str()); |
| } |
| map_failures.clear(); |
| #endif |
| } |
| |
| // If there are no delegated nodes, short-circuit node replacement. |
| if (supported_nodes.empty()) { |
| return kTfLiteOk; |
| } |
| |
| // NN API Delegate Registration (the pseudo kernel that will invoke NN |
| // API node sub sets) |
| static const TfLiteRegistration nnapi_delegate_kernel = { |
| .init = [](TfLiteContext* context, const char* buffer, |
| size_t length) -> void* { |
| const TfLiteDelegateParams* params = |
| reinterpret_cast<const TfLiteDelegateParams*>(buffer); |
| |
| auto* delegate_data = static_cast<Data*>(params->delegate->data_); |
| int* nnapi_errno = &(delegate_data->nnapi_errno); |
| |
| NNAPIDelegateKernel* kernel_state = |
| delegate_data->MaybeGetCachedDelegateKernel(params); |
| if (!kernel_state) { |
| kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi); |
| kernel_state->Init(context, params, nnapi_errno); |
| } |
| |
| return kernel_state; |
| }, |
| |
| .free = [](TfLiteContext* context, void* buffer) -> void { |
| delete reinterpret_cast<NNAPIDelegateKernel*>(buffer); |
| }, |
| |
| .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus { |
| NNAPIDelegateKernel* state = |
| reinterpret_cast<NNAPIDelegateKernel*>(node->user_data); |
| int* nnapi_errno = |
| &(static_cast<Data*>(node->delegate->data_)->nnapi_errno); |
| return state->Prepare(context, node, nnapi_errno); |
| }, |
| |
| .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus { |
| NNAPIDelegateKernel* state = |
| reinterpret_cast<NNAPIDelegateKernel*>(node->user_data); |
| int* nnapi_errno = |
| &(static_cast<Data*>(node->delegate->data_)->nnapi_errno); |
| return state->Invoke(context, node, nnapi_errno); |
| }, |
| |
| .profiling_string = nullptr, |
| .builtin_code = kTfLiteBuiltinDelegate, |
| .custom_name = "TfLiteNnapiDelegate", |
| .version = 1, |
| }; |
| |
| std::vector<int> nodes_to_delegate; |
| |
| int num_partitions; |
| TfLiteDelegateParams* params_array; |
| if (is_accelerator_specified && |
| nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) { |
| // Filtering out nodes not supported by target accelerators. |
| // Cannot query supported operation before NNAPI 1.2 |
| TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator( |
| context, delegate, nnapi, supported_nodes, &nodes_to_delegate, |
| &num_partitions, ¶ms_array, nnapi_errno)); |
| } else { |
| nodes_to_delegate = supported_nodes; |
| auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes); |
| TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning( |
| context, supported_nodes_int_array.get(), ¶ms_array, |
| &num_partitions)); |
| } |
| |
| TF_LITE_ENSURE_STATUS( |
| LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions, |
| std::vector<TfLiteDelegateParams>( |
| params_array, params_array + num_partitions), |
| &nodes_to_delegate)); |
| |
| if (nodes_to_delegate.empty()) { |
| return kTfLiteOk; |
| } else { |
| // Request TFLite to partition the graph and make kernels |
| // for each independent node sub set a new nnapi_delegate_kernel. |
| auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate); |
| return context->ReplaceNodeSubsetsWithDelegateKernels( |
| context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(), |
| delegate); |
| } |
| } |
| |
| // Returns a singleton NNAPI Delegate that can check for support of ops. |
| TfLiteDelegate* NnApiDelegate() { |
| static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate(); |
| return delegate; |
| } |
| |
| } // namespace tflite |