blob: 2fbfda36286cf2e4e23a766d5f9e808082e77bcb [file] [log] [blame]
//
// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <armnn_delegate.hpp>
#include <OpaqueDelegateUtils.hpp>
#include <Version.hpp>
#include "Activation.hpp"
#include "ArgMinMax.hpp"
#include "BatchMatMul.hpp"
#include "BatchSpace.hpp"
#include "Comparison.hpp"
#include "Convolution.hpp"
#include "Control.hpp"
#include "ElementwiseBinary.hpp"
#include "ElementwiseUnary.hpp"
#include "Fill.hpp"
#include "FullyConnected.hpp"
#include "Gather.hpp"
#include "GatherNd.hpp"
#include "LogicalBinary.hpp"
#include "Lstm.hpp"
#include "Normalization.hpp"
#include "Pack.hpp"
#include "Pad.hpp"
#include "Pooling.hpp"
#include "Prelu.hpp"
#include "Quantization.hpp"
#include "Redefine.hpp"
#include "Reduce.hpp"
#include "Resize.hpp"
#include "Round.hpp"
#include "Shape.hpp"
#include "Slice.hpp"
#include "StridedSlice.hpp"
#include "Softmax.hpp"
#include "SpaceDepth.hpp"
#include "Split.hpp"
#include "Transpose.hpp"
#include "UnidirectionalSequenceLstm.hpp"
#include "Unpack.hpp"
#include <armnn/utility/IgnoreUnused.hpp>
#include <armnnUtils/Filesystem.hpp>
#include <armnn/utility/Timer.hpp>
#include <flatbuffers/flatbuffers.h>
#include <tensorflow/lite/context_util.h>
#include <tensorflow/lite/schema/schema_generated.h>
#include <tensorflow/lite/minimal_logging.h>
#include <tensorflow/lite/logger.h>
#include <algorithm>
#include <iostream>
#include <sstream>
namespace armnnOpaqueDelegate
{
ArmnnOpaqueDelegate::ArmnnOpaqueDelegate(armnnDelegate::DelegateOptions options)
: m_Options(std::move(options))
{
// Configures logging for ARMNN
if (m_Options.IsLoggingEnabled())
{
armnn::ConfigureLogging(true, true, m_Options.GetLoggingSeverity());
}
// Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all armnn_delegate
// instances so the RuntimeOptions cannot be altered for different armnn_delegate instances.
m_Runtime = GetRuntime(m_Options.GetRuntimeOptions());
std::vector<armnn::BackendId> backends;
if (m_Runtime)
{
const armnn::BackendIdSet supportedDevices = m_Runtime->GetDeviceSpec().GetSupportedBackends();
for (auto& backend : m_Options.GetBackends())
{
if (std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) == supportedDevices.cend())
{
TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO,
"TfLiteArmnnDelegate: Requested unknown backend %s", backend.Get().c_str());
}
else
{
backends.push_back(backend);
}
}
}
if (backends.empty())
{
// No known backend specified
throw armnn::InvalidArgumentException("TfLiteArmnnOpaqueDelegate: No known backend specified.");
}
m_Options.SetBackends(backends);
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "TfLiteArmnnOpaqueDelegate: Created TfLite ArmNN delegate.");
}
TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate* tfLiteDelegate)
{
TfLiteIntArray* supportedOperators =
static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>
(tfLiteDelegate->data_)->IdentifyOperatorsToDelegate(tfLiteContext);
if(supportedOperators == nullptr)
{
return kTfLiteError;
}
// ArmNN Opaque Delegate Registration
TfLiteRegistrationExternal* kernelRegistration =
TfLiteRegistrationExternalCreate(kTfLiteBuiltinDelegate, "TfLiteArmNNOpaqueDelegate", /*version=*/1);
if(kernelRegistration == nullptr)
{
return kTfLiteError;
}
TfLiteRegistrationExternalSetInit(
kernelRegistration,
[](TfLiteOpaqueContext* tfLiteContext, const char* buffer, size_t length) -> void*
{
armnn::IgnoreUnused(length);
const TfLiteOpaqueDelegateParams* parameters =
reinterpret_cast<const TfLiteOpaqueDelegateParams*>(buffer);
if(parameters == nullptr)
{
TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
"TfLiteArmnnOpaqueDelegate: Unable to get parameters.");
return nullptr;
}
return static_cast<void*>(
ArmnnSubgraph::Create(tfLiteContext,
parameters,
static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(
parameters->delegate->data_)));
}
);
TfLiteRegistrationExternalSetFree(
kernelRegistration,
[](TfLiteOpaqueContext* tfLiteContext, void* buffer) -> void
{
armnn::IgnoreUnused(tfLiteContext);
if (buffer != nullptr)
{
delete static_cast<ArmnnSubgraph*>(buffer);
}
}
);
TfLiteRegistrationExternalSetPrepare(
kernelRegistration,
[](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus
{
void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode);
if (userData == nullptr)
{
return kTfLiteError;
}
return static_cast<ArmnnSubgraph*>(userData)->Prepare(tfLiteContext);
}
);
TfLiteRegistrationExternalSetInvoke(
kernelRegistration,
[](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus
{
void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode);
if (userData == nullptr)
{
return kTfLiteError;
}
return static_cast<ArmnnSubgraph*>(userData)->Invoke(tfLiteContext, tfLiteNode);
}
);
const TfLiteStatus status =
TfLiteOpaqueContextReplaceNodeSubsetsWithDelegateKernels(
tfLiteContext, kernelRegistration, supportedOperators, tfLiteDelegate);
TfLiteIntArrayFree(supportedOperators);
return status;
}
TfLiteOpaqueDelegate* TfLiteArmnnOpaqueDelegateCreate(const void* settings)
{
// This method will always create Opaque Delegate with default settings until
// we have a DelegateOptions Constructor which can parse the void* settings
armnn::IgnoreUnused(settings);
auto options = TfLiteArmnnDelegateOptionsDefault();
auto* armnnDelegate = new ::armnnOpaqueDelegate::ArmnnOpaqueDelegate(options);
return TfLiteOpaqueDelegateCreate(armnnDelegate->GetDelegateBuilder());
}
::armnnDelegate::DelegateOptions TfLiteArmnnDelegateOptionsDefault()
{
::armnnDelegate::DelegateOptions options(armnn::Compute::CpuRef);
return options;
}
void TfLiteArmnnOpaqueDelegateDelete(TfLiteOpaqueDelegate* tfLiteDelegate)
{
if (tfLiteDelegate != nullptr)
{
delete static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(TfLiteOpaqueDelegateGetData(tfLiteDelegate));
TfLiteOpaqueDelegateDelete(tfLiteDelegate);
}
}
const TfLiteOpaqueDelegatePlugin* GetArmnnDelegatePluginApi()
{
static constexpr TfLiteOpaqueDelegatePlugin armnnPlugin{
TfLiteArmnnOpaqueDelegateCreate, TfLiteArmnnOpaqueDelegateDelete, TfLiteArmnnOpaqueDelegateErrno};
return &armnnPlugin;
}
const std::string ArmnnOpaqueDelegate::GetVersion() {
return OPAQUE_DELEGATE_VERSION;
}
TfLiteIntArray* ArmnnOpaqueDelegate::IdentifyOperatorsToDelegate(TfLiteOpaqueContext* tfLiteContext)
{
TfLiteIntArray* executionPlan = nullptr;
if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
{
TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to get graph execution plan.");
return nullptr;
}
// Delegate data with null network
DelegateData delegateData(m_Options.GetBackends());
TfLiteIntArray* nodesToDelegate = TfLiteIntArrayCreate(executionPlan->size);
if (nodesToDelegate == nullptr)
{
TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
"TfLiteArmnnOpaqueDelegate: Unable to create int array from execution plan.");
return nullptr;
}
nodesToDelegate->size = 0;
std::set<int32_t> unsupportedOperators;
for (int i = 0; i < executionPlan->size; ++i)
{
const int nodeIndex = executionPlan->data[i];
// If TfLiteOpaqueNodes can be delegated to ArmNN
TfLiteOpaqueNode* tfLiteNode = nullptr;
TfLiteRegistrationExternal* tfLiteRegistration = nullptr;
if (TfLiteOpaqueContextGetNodeAndRegistration(
tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
{
TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
"TfLiteArmnnOpaqueDelegate: Unable to get node and registration for node %d.",
nodeIndex);
continue;
}
TfLiteStatus visitStatus;
try
{
visitStatus = ArmnnSubgraph::VisitNode(
delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex);
}
catch(std::exception& ex)
{
ARMNN_LOG(error) << "ArmNN Failed to visit node with error: " << ex.what();
visitStatus = kTfLiteError;
}
if (visitStatus != kTfLiteOk)
{
// node is not supported by ArmNN
unsupportedOperators.insert(TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration));
continue;
}
nodesToDelegate->data[nodesToDelegate->size++] = nodeIndex;
}
for (std::set<int32_t>::iterator it=unsupportedOperators.begin(); it!=unsupportedOperators.end(); ++it)
{
TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
"Operator %s [%d] is not supported by armnn_opaque_delegate.",
tflite::EnumNameBuiltinOperator(tflite::BuiltinOperator(*it)),
*it);
}
if (!unsupportedOperators.empty() && m_Options.TfLiteRuntimeFallbackDisabled())
{
std::stringstream exMessage;
exMessage << "TfLiteArmnnOpaqueDelegate: There are unsupported operators in the model. ";
exMessage << "Not falling back to TfLite Runtime as fallback is disabled. ";
exMessage << "This should only be disabled under test conditions.";
throw armnn::Exception(exMessage.str());
}
if (nodesToDelegate->size == 0)
{
ARMNN_LOG(info) << "No operators in this model are supported by the Arm NN TfLite delegate." <<
" The model will be executed entirely by TfLite runtime.";
}
std::sort(&nodesToDelegate->data[0], &nodesToDelegate->data[nodesToDelegate->size]);
return nodesToDelegate;
}
TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData,
TfLiteOpaqueContext* tfLiteContext,
const TfLiteIntArray* inputs,
std::vector<armnn::BindingPointInfo>& inputBindings)
{
const size_t numInputs = static_cast<size_t>(inputs->size);
for (unsigned int i = 0; i < numInputs; ++i)
{
const int32_t tensorId = inputs->data[i];
const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);
if(!tensor)
{
return kTfLiteError;
}
// Do not create bindings for constant inputs
if (TfLiteOpaqueTensorGetAllocationType(tensor) == kTfLiteMmapRo)
{
continue;
}
auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
armnn::IConnectableLayer* layer = delegateData.m_Network->AddInputLayer(bindingId);
auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor);
armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
outputSlot.SetTensorInfo(tensorInfo);
// Store for creating connections
delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] = &outputSlot;
inputBindings.push_back(std::make_pair(bindingId, tensorInfo));
}
return kTfLiteOk;
}
TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData,
TfLiteOpaqueContext* tfLiteContext,
const TfLiteIntArray* outputs,
std::vector<armnn::BindingPointInfo>& outputBindings)
{
const size_t numOutputs = static_cast<size_t>(outputs->size);
for (unsigned int i = 0; i < numOutputs; ++i)
{
const int32_t tensorId = outputs->data[i];
const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);
if(!tensor)
{
return kTfLiteError;
}
auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
armnn::IConnectableLayer* layer = delegateData.m_Network->AddOutputLayer(bindingId);
auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor);
ARMNN_ASSERT(delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] != nullptr);
delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)]->Connect(layer->GetInputSlot(0));
outputBindings.push_back(std::make_pair(bindingId, tensorInfo));
}
return kTfLiteOk;
}
ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteOpaqueContext* tfLiteContext,
const TfLiteOpaqueDelegateParams* parameters,
const ArmnnOpaqueDelegate* delegate)
{
const auto startTime = armnn::GetTimeNow();
ARMNN_LOG(info) << "ArmnnSubgraph creation";
TfLiteIntArray* executionPlan;
if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
{
return nullptr;
}
// Initialize DelegateData holds network and output slots information
DelegateData delegateData(delegate->m_Options.GetBackends());
// Build ArmNN Network
armnn::NetworkOptions networkOptions = delegate->m_Options.GetOptimizerOptions().m_ModelOptions;
armnn::NetworkId networkId;
delegateData.m_Network = armnn::INetwork::Create(networkOptions);
delegateData.m_OutputSlotForNode = std::vector<armnn::IOutputSlot*>(
TfLiteOpaqueContextGetNumTensors(tfLiteContext), nullptr);
std::vector<armnn::BindingPointInfo> inputBindings;
std::vector<armnn::BindingPointInfo> outputBindings;
// Add input layer
auto status = AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings);
if (status != kTfLiteOk)
{
throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Inputs to the network!");
}
// Parse TfLite delegate nodes to ArmNN
const auto parseStartTime = armnn::GetTimeNow();
for (int i = 0; i < parameters->nodes_to_replace->size; ++i)
{
const int nodeIndex = parameters->nodes_to_replace->data[i];
TfLiteOpaqueNode* tfLiteNode = nullptr;
TfLiteRegistrationExternal* tfLiteRegistration = nullptr;
if (TfLiteOpaqueContextGetNodeAndRegistration(
tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
{
throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to get node registration: " [ nodeIndex]);
}
if (VisitNode(delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk)
{
throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to parse node: " [ nodeIndex]);
}
}
ARMNN_LOG(info) << "Parse nodes to ArmNN time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms";
// Add Output layer
status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings);
if (status != kTfLiteOk)
{
throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Outputs to the network!");
}
// Optimize ArmNN network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
try
{
const auto optimizeStartTime = armnn::GetTimeNow();
optNet = armnn::Optimize(*(delegateData.m_Network.get()),
delegate->m_Options.GetBackends(),
delegate->m_Runtime->GetDeviceSpec(),
delegate->m_Options.GetOptimizerOptions());
ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms";
}
catch (std::exception& ex)
{
std::stringstream exMessage;
exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from optimize.";
throw armnn::Exception(exMessage.str());
}
if (!optNet)
{
// Optimize failed
throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to optimize the network!");
}
// If set, we will serialize the optimized model into a dot file.
const std::string serializeToDotFile = delegate->m_Options.GetSerializeToDot();
if (!serializeToDotFile.empty())
{
ARMNN_LOG(info) << "Writing graph to dot file: " << serializeToDotFile;
fs::path filename = serializeToDotFile;
std::fstream file(filename.c_str(), std::ios_base::out);
optNet->SerializeToDot(file);
}
try
{
const auto loadStartTime = armnn::GetTimeNow();
// Load graph into runtime
std::string errorMessage;
armnn::Status loadingStatus;
armnn::MemorySource inputSource = armnn::MemorySource::Undefined;
armnn::MemorySource outputSource = armnn::MemorySource::Undefined;
// There's a bit of an assumption here that the delegate will only support Malloc memory source.
if (delegate->m_Options.GetOptimizerOptions().m_ImportEnabled)
{
inputSource = armnn::MemorySource::Malloc;
}
if (delegate->m_Options.GetOptimizerOptions().m_ExportEnabled)
{
outputSource = armnn::MemorySource::Malloc;
}
armnn::INetworkProperties networkProperties(false,
inputSource,
outputSource,
delegate->m_Options.GetInternalProfilingState(),
delegate->m_Options.GetInternalProfilingDetail());
loadingStatus = delegate->m_Runtime->LoadNetwork(networkId,
std::move(optNet),
errorMessage,
networkProperties);
if (loadingStatus != armnn::Status::Success)
{
// Network load failed.
throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Network could not be loaded: " + errorMessage);
}
ARMNN_LOG(info) << "Load ArmnnSubgraph time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(loadStartTime).count() << " ms";
}
catch (std::exception& ex)
{
std::stringstream exMessage;
exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from LoadNetwork.";
throw armnn::Exception(exMessage.str());
}
// Register debug callback function
if (delegate->m_Options.GetDebugCallbackFunction().has_value())
{
delegate->m_Runtime->RegisterDebugCallback(networkId, delegate->m_Options.GetDebugCallbackFunction().value());
}
ARMNN_LOG(info) << "Overall ArmnnSubgraph creation time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";
// Create a new SubGraph with networkId and runtime
return new ArmnnSubgraph(networkId, delegate->m_Runtime, inputBindings, outputBindings);
}
TfLiteStatus ArmnnSubgraph::Prepare(TfLiteOpaqueContext* tfLiteContext)
{
armnn::IgnoreUnused(tfLiteContext);
return kTfLiteOk;
}
TfLiteStatus ArmnnSubgraph::Invoke(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode)
{
// Prepare inputs
armnn::InputTensors inputTensors;
size_t inputIndex = 0;
const int* inputs;
int numInputs;
if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputs, &numInputs) != kTfLiteOk)
{
throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph inputs!");
}
for (int inputIdx = 0; inputIdx < numInputs; inputIdx++)
{
TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputs[inputIdx]);
if(!tensor)
{
return kTfLiteError;
}
if (TfLiteOpaqueTensorGetAllocationType(tensor) != kTfLiteMmapRo)
{
const armnn::BindingPointInfo& inputBinding = m_InputBindings[inputIndex];
armnn::TensorInfo inputTensorInfo = inputBinding.second;
inputTensorInfo.SetConstant(true);
const armnn::ConstTensor inputTensor(inputTensorInfo, TfLiteOpaqueTensorData(tensor));
inputTensors.emplace_back(inputIdx, inputTensor);
++inputIndex;
}
}
// Prepare outputs
armnn::OutputTensors outputTensors;
size_t outputIndex = 0;
const int* outputs;
int numOutputs;
if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputs, &numOutputs) != kTfLiteOk)
{
throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph outputs!");
}
for (int outputIdx = 0; outputIdx < numOutputs; outputIdx++)
{
const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIndex];
TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputs[outputIdx]);
if(!tensor)
{
return kTfLiteError;
}
const armnn::Tensor outputTensor(outputBinding.second, TfLiteOpaqueTensorData(tensor));
outputTensors.emplace_back(outputIdx, outputTensor);
++outputIndex;
}
// Run graph
auto status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
// The delegate holds its own Arm NN runtime so this is our last chance to print internal profiling data.
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
if (profiler && profiler->IsProfilingEnabled())
{
profiler->Print(std::cout);
}
return (status == armnn::Status::Success) ? kTfLiteOk : kTfLiteError;
}
TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
TfLiteOpaqueContext* tfLiteContext,
TfLiteRegistrationExternal* tfLiteRegistration,
TfLiteOpaqueNode* tfLiteNode,
int nodeIndex)
{
switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration))
{
default:
return kTfLiteError;
}
}
} // armnnOpaqueDelegate namespace