delegate/opaque/src/armnn_delegate.cpp - platform/external/armnn - Git at Google

 //
 // Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include <armnn_delegate.hpp>
 #include <OpaqueDelegateUtils.hpp>

 #include <Version.hpp>

 #include "Activation.hpp"
 #include "ArgMinMax.hpp"
 #include "BatchMatMul.hpp"
 #include "BatchSpace.hpp"
 #include "Comparison.hpp"
 #include "Convolution.hpp"
 #include "Control.hpp"
 #include "ElementwiseBinary.hpp"
 #include "ElementwiseUnary.hpp"
 #include "Fill.hpp"
 #include "FullyConnected.hpp"
 #include "Gather.hpp"
 #include "GatherNd.hpp"
 #include "LogicalBinary.hpp"
 #include "Lstm.hpp"
 #include "Normalization.hpp"
 #include "Pack.hpp"
 #include "Pad.hpp"
 #include "Pooling.hpp"
 #include "Prelu.hpp"
 #include "Quantization.hpp"
 #include "Redefine.hpp"
 #include "Reduce.hpp"
 #include "Resize.hpp"
 #include "Round.hpp"
 #include "Shape.hpp"
 #include "Slice.hpp"
 #include "StridedSlice.hpp"
 #include "Softmax.hpp"
 #include "SpaceDepth.hpp"
 #include "Split.hpp"
 #include "Transpose.hpp"
 #include "UnidirectionalSequenceLstm.hpp"
 #include "Unpack.hpp"

 #include <armnn/utility/IgnoreUnused.hpp>
 #include <armnnUtils/Filesystem.hpp>
 #include <armnn/utility/Timer.hpp>
 #include <flatbuffers/flatbuffers.h>
 #include <tensorflow/lite/context_util.h>
 #include <tensorflow/lite/schema/schema_generated.h>
 #include <tensorflow/lite/minimal_logging.h>
 #include <tensorflow/lite/logger.h>

 #include <algorithm>
 #include <iostream>
 #include <sstream>

 namespace armnnOpaqueDelegate
 {

 ArmnnOpaqueDelegate::ArmnnOpaqueDelegate(armnnDelegate::DelegateOptions options)
     : m_Options(std::move(options))
 {
     // Configures logging for ARMNN
     if (m_Options.IsLoggingEnabled())
     {
         armnn::ConfigureLogging(true, true, m_Options.GetLoggingSeverity());
     }
     // Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all armnn_delegate
     // instances so the RuntimeOptions cannot be altered for different armnn_delegate instances.
     m_Runtime = GetRuntime(m_Options.GetRuntimeOptions());
     std::vector<armnn::BackendId> backends;
     if (m_Runtime)
     {
         const armnn::BackendIdSet supportedDevices = m_Runtime->GetDeviceSpec().GetSupportedBackends();
         for (auto& backend : m_Options.GetBackends())
         {
             if (std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) == supportedDevices.cend())
             {
                 TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO,
                                 "TfLiteArmnnDelegate: Requested unknown backend %s", backend.Get().c_str());
             }
             else
             {
                 backends.push_back(backend);
             }
         }
     }

     if (backends.empty())
     {
         // No known backend specified
         throw armnn::InvalidArgumentException("TfLiteArmnnOpaqueDelegate: No known backend specified.");
     }
     m_Options.SetBackends(backends);

     TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "TfLiteArmnnOpaqueDelegate: Created TfLite ArmNN delegate.");
 }

 TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate* tfLiteDelegate)
 {
     TfLiteIntArray* supportedOperators =
             static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>
                     (tfLiteDelegate->data_)->IdentifyOperatorsToDelegate(tfLiteContext);
     if(supportedOperators == nullptr)
     {
         return kTfLiteError;
     }

     // ArmNN Opaque Delegate Registration
     TfLiteRegistrationExternal* kernelRegistration =
             TfLiteRegistrationExternalCreate(kTfLiteBuiltinDelegate, "TfLiteArmNNOpaqueDelegate", /*version=*/1);
     if(kernelRegistration == nullptr)
     {
         return kTfLiteError;
     }

     TfLiteRegistrationExternalSetInit(
             kernelRegistration,
             [](TfLiteOpaqueContext* tfLiteContext, const char* buffer, size_t length) -> void*
             {
                 armnn::IgnoreUnused(length);
                 const TfLiteOpaqueDelegateParams* parameters =
                         reinterpret_cast<const TfLiteOpaqueDelegateParams*>(buffer);
                 if(parameters == nullptr)
                 {
                     TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
                                               "TfLiteArmnnOpaqueDelegate: Unable to get parameters.");
                     return nullptr;
                 }

                 return static_cast<void*>(
                         ArmnnSubgraph::Create(tfLiteContext,
                                               parameters,
                                               static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(
                                                       parameters->delegate->data_)));
             }
     );

     TfLiteRegistrationExternalSetFree(
             kernelRegistration,
             [](TfLiteOpaqueContext* tfLiteContext, void* buffer) -> void
             {
                 armnn::IgnoreUnused(tfLiteContext);
                 if (buffer != nullptr)
                 {
                     delete static_cast<ArmnnSubgraph*>(buffer);
                 }
             }
     );

     TfLiteRegistrationExternalSetPrepare(
             kernelRegistration,
             [](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus
             {
                 void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode);
                 if (userData == nullptr)
                 {
                     return kTfLiteError;
                 }
                 return static_cast<ArmnnSubgraph*>(userData)->Prepare(tfLiteContext);
             }
     );

     TfLiteRegistrationExternalSetInvoke(
             kernelRegistration,
             [](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus
             {
                 void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode);
                 if (userData == nullptr)
                 {
                     return kTfLiteError;
                 }

                 return static_cast<ArmnnSubgraph*>(userData)->Invoke(tfLiteContext, tfLiteNode);
             }
     );

     const TfLiteStatus status =
             TfLiteOpaqueContextReplaceNodeSubsetsWithDelegateKernels(
                     tfLiteContext, kernelRegistration, supportedOperators, tfLiteDelegate);

     TfLiteIntArrayFree(supportedOperators);
     return status;
 }

 TfLiteOpaqueDelegate* TfLiteArmnnOpaqueDelegateCreate(const void* settings)
 {
     // This method will always create Opaque Delegate with default settings until
     // we have a DelegateOptions Constructor which can parse the void* settings
     armnn::IgnoreUnused(settings);
     auto options = TfLiteArmnnDelegateOptionsDefault();
     auto* armnnDelegate = new ::armnnOpaqueDelegate::ArmnnOpaqueDelegate(options);
     return TfLiteOpaqueDelegateCreate(armnnDelegate->GetDelegateBuilder());
 }

 ::armnnDelegate::DelegateOptions TfLiteArmnnDelegateOptionsDefault()
 {
     ::armnnDelegate::DelegateOptions options(armnn::Compute::CpuRef);
     return options;
 }

 void TfLiteArmnnOpaqueDelegateDelete(TfLiteOpaqueDelegate* tfLiteDelegate)
 {
     if (tfLiteDelegate != nullptr)
     {
         delete static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(TfLiteOpaqueDelegateGetData(tfLiteDelegate));
         TfLiteOpaqueDelegateDelete(tfLiteDelegate);
     }
 }

 const TfLiteOpaqueDelegatePlugin* GetArmnnDelegatePluginApi()
 {
     static constexpr TfLiteOpaqueDelegatePlugin armnnPlugin{
             TfLiteArmnnOpaqueDelegateCreate, TfLiteArmnnOpaqueDelegateDelete, TfLiteArmnnOpaqueDelegateErrno};
     return &armnnPlugin;
 }

 const std::string ArmnnOpaqueDelegate::GetVersion() {
     return OPAQUE_DELEGATE_VERSION;
 }

 TfLiteIntArray* ArmnnOpaqueDelegate::IdentifyOperatorsToDelegate(TfLiteOpaqueContext* tfLiteContext)
 {
     TfLiteIntArray* executionPlan = nullptr;
     if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
     {
         TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to get graph execution plan.");
         return nullptr;
     }

     // Delegate data with null network
     DelegateData delegateData(m_Options.GetBackends());

     TfLiteIntArray* nodesToDelegate = TfLiteIntArrayCreate(executionPlan->size);
     if (nodesToDelegate == nullptr)
     {
         TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
                                   "TfLiteArmnnOpaqueDelegate: Unable to create int array from execution plan.");
         return nullptr;
     }
     nodesToDelegate->size = 0;

     std::set<int32_t> unsupportedOperators;

     for (int i = 0; i < executionPlan->size; ++i)
     {
         const int nodeIndex = executionPlan->data[i];

         // If TfLiteOpaqueNodes can be delegated to ArmNN
         TfLiteOpaqueNode* tfLiteNode = nullptr;
         TfLiteRegistrationExternal* tfLiteRegistration = nullptr;

         if (TfLiteOpaqueContextGetNodeAndRegistration(
                 tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
         {
             TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
                                       "TfLiteArmnnOpaqueDelegate: Unable to get node and registration for node %d.",
                                       nodeIndex);
             continue;
         }

         TfLiteStatus visitStatus;
         try
         {
             visitStatus = ArmnnSubgraph::VisitNode(
                     delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex);
         }
         catch(std::exception& ex)
         {
             ARMNN_LOG(error) << "ArmNN Failed to visit node with error: " << ex.what();
             visitStatus = kTfLiteError;
         }

         if (visitStatus != kTfLiteOk)
         {
             // node is not supported by ArmNN
             unsupportedOperators.insert(TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration));
             continue;
         }

         nodesToDelegate->data[nodesToDelegate->size++] = nodeIndex;
     }

     for (std::set<int32_t>::iterator it=unsupportedOperators.begin(); it!=unsupportedOperators.end(); ++it)
     {
         TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
                                   "Operator %s [%d] is not supported by armnn_opaque_delegate.",
                                   tflite::EnumNameBuiltinOperator(tflite::BuiltinOperator(*it)),
                                   *it);
     }

     if (!unsupportedOperators.empty() && m_Options.TfLiteRuntimeFallbackDisabled())
     {
         std::stringstream exMessage;
         exMessage << "TfLiteArmnnOpaqueDelegate: There are unsupported operators in the model. ";
         exMessage << "Not falling back to TfLite Runtime as fallback is disabled. ";
         exMessage << "This should only be disabled under test conditions.";
         throw armnn::Exception(exMessage.str());
     }
     if (nodesToDelegate->size == 0)
     {
         ARMNN_LOG(info) << "No operators in this model are supported by the Arm NN TfLite delegate." <<
                         " The model will be executed entirely by TfLite runtime.";
     }

     std::sort(&nodesToDelegate->data[0], &nodesToDelegate->data[nodesToDelegate->size]);
     return nodesToDelegate;
 }

 TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData,
                                           TfLiteOpaqueContext* tfLiteContext,
                                           const TfLiteIntArray* inputs,
                                           std::vector<armnn::BindingPointInfo>& inputBindings)
 {
     const size_t numInputs = static_cast<size_t>(inputs->size);
     for (unsigned int i = 0; i < numInputs; ++i)
     {
         const int32_t tensorId = inputs->data[i];
         const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);

         if(!tensor)
         {
             return kTfLiteError;
         }

         // Do not create bindings for constant inputs
         if (TfLiteOpaqueTensorGetAllocationType(tensor) == kTfLiteMmapRo)
         {
             continue;
         }

         auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
         armnn::IConnectableLayer* layer = delegateData.m_Network->AddInputLayer(bindingId);

         auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor);
         armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
         outputSlot.SetTensorInfo(tensorInfo);

         // Store for creating connections
         delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] = &outputSlot;

         inputBindings.push_back(std::make_pair(bindingId, tensorInfo));
     }

     return kTfLiteOk;
 }

 TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData,
                                            TfLiteOpaqueContext* tfLiteContext,
                                            const TfLiteIntArray* outputs,
                                            std::vector<armnn::BindingPointInfo>& outputBindings)
 {
     const size_t numOutputs = static_cast<size_t>(outputs->size);
     for (unsigned int i = 0; i < numOutputs; ++i)
     {
         const int32_t tensorId = outputs->data[i];
         const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);

         if(!tensor)
         {
             return kTfLiteError;
         }

         auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
         armnn::IConnectableLayer* layer = delegateData.m_Network->AddOutputLayer(bindingId);

         auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor);
         ARMNN_ASSERT(delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] != nullptr);
         delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)]->Connect(layer->GetInputSlot(0));
         outputBindings.push_back(std::make_pair(bindingId, tensorInfo));
     }

     return kTfLiteOk;
 }

 ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteOpaqueContext* tfLiteContext,
                                      const TfLiteOpaqueDelegateParams* parameters,
                                      const ArmnnOpaqueDelegate* delegate)
 {
     const auto startTime = armnn::GetTimeNow();
     ARMNN_LOG(info) << "ArmnnSubgraph creation";

     TfLiteIntArray* executionPlan;
     if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
     {
         return nullptr;
     }

     // Initialize DelegateData holds network and output slots information
     DelegateData delegateData(delegate->m_Options.GetBackends());

     // Build ArmNN Network
     armnn::NetworkOptions networkOptions = delegate->m_Options.GetOptimizerOptions().m_ModelOptions;
     armnn::NetworkId networkId;
     delegateData.m_Network = armnn::INetwork::Create(networkOptions);

     delegateData.m_OutputSlotForNode = std::vector<armnn::IOutputSlot*>(
                                                             TfLiteOpaqueContextGetNumTensors(tfLiteContext), nullptr);

     std::vector<armnn::BindingPointInfo> inputBindings;
     std::vector<armnn::BindingPointInfo> outputBindings;

     // Add input layer
     auto status = AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings);
     if (status != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Inputs to the network!");
     }

     // Parse TfLite delegate nodes to ArmNN
     const auto parseStartTime = armnn::GetTimeNow();
     for (int i = 0; i < parameters->nodes_to_replace->size; ++i)
     {
         const int nodeIndex = parameters->nodes_to_replace->data[i];

         TfLiteOpaqueNode* tfLiteNode = nullptr;
         TfLiteRegistrationExternal* tfLiteRegistration = nullptr;
         if (TfLiteOpaqueContextGetNodeAndRegistration(
             tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
         {
             throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to get node registration: " [ nodeIndex]);
         }

         if (VisitNode(delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk)
         {
             throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to parse node: " [ nodeIndex]);
         }
     }
     ARMNN_LOG(info) << "Parse nodes to ArmNN time: " << std::setprecision(2)
                     << std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms";

     // Add Output layer
     status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings);
     if (status != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Outputs to the network!");
     }

     // Optimize ArmNN network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     try
     {
         const auto optimizeStartTime = armnn::GetTimeNow();
         optNet = armnn::Optimize(*(delegateData.m_Network.get()),
                                  delegate->m_Options.GetBackends(),
                                  delegate->m_Runtime->GetDeviceSpec(),
                                  delegate->m_Options.GetOptimizerOptions());
         ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2)
                         << std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms";
     }
     catch (std::exception& ex)
     {
         std::stringstream exMessage;
         exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from optimize.";
         throw armnn::Exception(exMessage.str());
     }
     if (!optNet)
     {
         // Optimize failed
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to optimize the network!");
     }

     // If set, we will serialize the optimized model into a dot file.
     const std::string serializeToDotFile = delegate->m_Options.GetSerializeToDot();
     if (!serializeToDotFile.empty())
     {
         ARMNN_LOG(info) << "Writing graph to dot file: " << serializeToDotFile;
         fs::path filename = serializeToDotFile;
         std::fstream file(filename.c_str(), std::ios_base::out);
         optNet->SerializeToDot(file);
     }

     try
     {
         const auto loadStartTime = armnn::GetTimeNow();

         // Load graph into runtime
         std::string errorMessage;
         armnn::Status loadingStatus;
         armnn::MemorySource inputSource = armnn::MemorySource::Undefined;
         armnn::MemorySource outputSource = armnn::MemorySource::Undefined;
         // There's a bit of an assumption here that the delegate will only support Malloc memory source.
         if (delegate->m_Options.GetOptimizerOptions().m_ImportEnabled)
         {
             inputSource = armnn::MemorySource::Malloc;
         }
         if (delegate->m_Options.GetOptimizerOptions().m_ExportEnabled)
         {
             outputSource = armnn::MemorySource::Malloc;
         }
         armnn::INetworkProperties networkProperties(false,
                                                     inputSource,
                                                     outputSource,
                                                     delegate->m_Options.GetInternalProfilingState(),
                                                     delegate->m_Options.GetInternalProfilingDetail());
         loadingStatus = delegate->m_Runtime->LoadNetwork(networkId,
                                                          std::move(optNet),
                                                          errorMessage,
                                                          networkProperties);
         if (loadingStatus != armnn::Status::Success)
         {
             // Network load failed.
             throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Network could not be loaded: " + errorMessage);
         }

         ARMNN_LOG(info) << "Load ArmnnSubgraph time: " << std::setprecision(2)
                         << std::fixed << armnn::GetTimeDuration(loadStartTime).count() << " ms";
     }
     catch (std::exception& ex)
     {
         std::stringstream exMessage;
         exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from LoadNetwork.";
         throw armnn::Exception(exMessage.str());
     }

     // Register debug callback function
     if (delegate->m_Options.GetDebugCallbackFunction().has_value())
     {
         delegate->m_Runtime->RegisterDebugCallback(networkId, delegate->m_Options.GetDebugCallbackFunction().value());
     }

     ARMNN_LOG(info) << "Overall ArmnnSubgraph creation time: " << std::setprecision(2)
                     << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";

     // Create a new SubGraph with networkId and runtime
     return new ArmnnSubgraph(networkId, delegate->m_Runtime, inputBindings, outputBindings);
 }

 TfLiteStatus ArmnnSubgraph::Prepare(TfLiteOpaqueContext* tfLiteContext)
 {
     armnn::IgnoreUnused(tfLiteContext);
     return kTfLiteOk;
 }

 TfLiteStatus ArmnnSubgraph::Invoke(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode)
 {
     // Prepare inputs
     armnn::InputTensors inputTensors;
     size_t inputIndex = 0;
     const int* inputs;
     int numInputs;
     if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputs, &numInputs) != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph inputs!");
     }
     for (int inputIdx = 0; inputIdx < numInputs; inputIdx++)
     {
         TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputs[inputIdx]);

         if(!tensor)
         {
             return kTfLiteError;
         }

         if (TfLiteOpaqueTensorGetAllocationType(tensor) != kTfLiteMmapRo)
         {
             const armnn::BindingPointInfo& inputBinding = m_InputBindings[inputIndex];
             armnn::TensorInfo inputTensorInfo = inputBinding.second;
             inputTensorInfo.SetConstant(true);
             const armnn::ConstTensor inputTensor(inputTensorInfo, TfLiteOpaqueTensorData(tensor));
             inputTensors.emplace_back(inputIdx, inputTensor);

             ++inputIndex;
         }
     }

     // Prepare outputs
     armnn::OutputTensors outputTensors;
     size_t outputIndex = 0;
     const int* outputs;
     int numOutputs;
     if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputs, &numOutputs) != kTfLiteOk)
     {
         throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph outputs!");
     }
     for (int outputIdx = 0; outputIdx < numOutputs; outputIdx++)
     {
         const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIndex];
         TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputs[outputIdx]);

         if(!tensor)
         {
             return kTfLiteError;
         }

         const armnn::Tensor outputTensor(outputBinding.second, TfLiteOpaqueTensorData(tensor));
         outputTensors.emplace_back(outputIdx, outputTensor);

         ++outputIndex;
     }

     // Run graph
     auto status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
     // The delegate holds its own Arm NN runtime so this is our last chance to print internal profiling data.
     std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
     if (profiler && profiler->IsProfilingEnabled())
     {
         profiler->Print(std::cout);
     }
     return (status == armnn::Status::Success) ? kTfLiteOk : kTfLiteError;
 }

 TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
                                       TfLiteOpaqueContext* tfLiteContext,
                                       TfLiteRegistrationExternal* tfLiteRegistration,
                                       TfLiteOpaqueNode* tfLiteNode,
                                       int nodeIndex)
 {
     switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration))
     {
         default:
             return kTfLiteError;
     }
 }

 } // armnnOpaqueDelegate namespace
	//
	// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
	// SPDX-License-Identifier: MIT
	//

	#include <armnn_delegate.hpp>
	#include <OpaqueDelegateUtils.hpp>

	#include <Version.hpp>

	#include "Activation.hpp"
	#include "ArgMinMax.hpp"
	#include "BatchMatMul.hpp"
	#include "BatchSpace.hpp"
	#include "Comparison.hpp"
	#include "Convolution.hpp"
	#include "Control.hpp"
	#include "ElementwiseBinary.hpp"
	#include "ElementwiseUnary.hpp"
	#include "Fill.hpp"
	#include "FullyConnected.hpp"
	#include "Gather.hpp"
	#include "GatherNd.hpp"
	#include "LogicalBinary.hpp"
	#include "Lstm.hpp"
	#include "Normalization.hpp"
	#include "Pack.hpp"
	#include "Pad.hpp"
	#include "Pooling.hpp"
	#include "Prelu.hpp"
	#include "Quantization.hpp"
	#include "Redefine.hpp"
	#include "Reduce.hpp"
	#include "Resize.hpp"
	#include "Round.hpp"
	#include "Shape.hpp"
	#include "Slice.hpp"
	#include "StridedSlice.hpp"
	#include "Softmax.hpp"
	#include "SpaceDepth.hpp"
	#include "Split.hpp"
	#include "Transpose.hpp"
	#include "UnidirectionalSequenceLstm.hpp"
	#include "Unpack.hpp"

	#include <armnn/utility/IgnoreUnused.hpp>
	#include <armnnUtils/Filesystem.hpp>
	#include <armnn/utility/Timer.hpp>
	#include <flatbuffers/flatbuffers.h>
	#include <tensorflow/lite/context_util.h>
	#include <tensorflow/lite/schema/schema_generated.h>
	#include <tensorflow/lite/minimal_logging.h>
	#include <tensorflow/lite/logger.h>

	#include <algorithm>
	#include <iostream>
	#include <sstream>

	namespace armnnOpaqueDelegate
	{

	ArmnnOpaqueDelegate::ArmnnOpaqueDelegate(armnnDelegate::DelegateOptions options)
	: m_Options(std::move(options))
	{
	// Configures logging for ARMNN
	if (m_Options.IsLoggingEnabled())
	{
	armnn::ConfigureLogging(true, true, m_Options.GetLoggingSeverity());
	}
	// Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all armnn_delegate
	// instances so the RuntimeOptions cannot be altered for different armnn_delegate instances.
	m_Runtime = GetRuntime(m_Options.GetRuntimeOptions());
	std::vector<armnn::BackendId> backends;
	if (m_Runtime)
	{
	const armnn::BackendIdSet supportedDevices = m_Runtime->GetDeviceSpec().GetSupportedBackends();
	for (auto& backend : m_Options.GetBackends())
	{
	if (std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) == supportedDevices.cend())
	{
	TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO,
	"TfLiteArmnnDelegate: Requested unknown backend %s", backend.Get().c_str());
	}
	else
	{
	backends.push_back(backend);
	}
	}
	}

	if (backends.empty())
	{
	// No known backend specified
	throw armnn::InvalidArgumentException("TfLiteArmnnOpaqueDelegate: No known backend specified.");
	}
	m_Options.SetBackends(backends);

	TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "TfLiteArmnnOpaqueDelegate: Created TfLite ArmNN delegate.");
	}

	TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate* tfLiteDelegate)
	{
	TfLiteIntArray* supportedOperators =
	static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>
	(tfLiteDelegate->data_)->IdentifyOperatorsToDelegate(tfLiteContext);
	if(supportedOperators == nullptr)
	{
	return kTfLiteError;
	}

	// ArmNN Opaque Delegate Registration
	TfLiteRegistrationExternal* kernelRegistration =
	TfLiteRegistrationExternalCreate(kTfLiteBuiltinDelegate, "TfLiteArmNNOpaqueDelegate", /version=/1);
	if(kernelRegistration == nullptr)
	{
	return kTfLiteError;
	}

	TfLiteRegistrationExternalSetInit(
	kernelRegistration,
	[](TfLiteOpaqueContext* tfLiteContext, const char* buffer, size_t length) -> void*
	{
	armnn::IgnoreUnused(length);
	const TfLiteOpaqueDelegateParams* parameters =
	reinterpret_cast<const TfLiteOpaqueDelegateParams*>(buffer);
	if(parameters == nullptr)
	{
	TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
	"TfLiteArmnnOpaqueDelegate: Unable to get parameters.");
	return nullptr;
	}

	return static_cast<void*>(
	ArmnnSubgraph::Create(tfLiteContext,
	parameters,
	static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(
	parameters->delegate->data_)));
	}
	);

	TfLiteRegistrationExternalSetFree(
	kernelRegistration,
	[](TfLiteOpaqueContext* tfLiteContext, void* buffer) -> void
	{
	armnn::IgnoreUnused(tfLiteContext);
	if (buffer != nullptr)
	{
	delete static_cast<ArmnnSubgraph*>(buffer);
	}
	}
	);

	TfLiteRegistrationExternalSetPrepare(
	kernelRegistration,
	[](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus
	{
	void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode);
	if (userData == nullptr)
	{
	return kTfLiteError;
	}
	return static_cast<ArmnnSubgraph*>(userData)->Prepare(tfLiteContext);
	}
	);

	TfLiteRegistrationExternalSetInvoke(
	kernelRegistration,
	[](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus
	{
	void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode);
	if (userData == nullptr)
	{
	return kTfLiteError;
	}

	return static_cast<ArmnnSubgraph*>(userData)->Invoke(tfLiteContext, tfLiteNode);
	}
	);

	const TfLiteStatus status =
	TfLiteOpaqueContextReplaceNodeSubsetsWithDelegateKernels(
	tfLiteContext, kernelRegistration, supportedOperators, tfLiteDelegate);

	TfLiteIntArrayFree(supportedOperators);
	return status;
	}

	TfLiteOpaqueDelegate* TfLiteArmnnOpaqueDelegateCreate(const void* settings)
	{
	// This method will always create Opaque Delegate with default settings until
	// we have a DelegateOptions Constructor which can parse the void* settings
	armnn::IgnoreUnused(settings);
	auto options = TfLiteArmnnDelegateOptionsDefault();
	auto* armnnDelegate = new ::armnnOpaqueDelegate::ArmnnOpaqueDelegate(options);
	return TfLiteOpaqueDelegateCreate(armnnDelegate->GetDelegateBuilder());
	}

	::armnnDelegate::DelegateOptions TfLiteArmnnDelegateOptionsDefault()
	{
	::armnnDelegate::DelegateOptions options(armnn::Compute::CpuRef);
	return options;
	}

	void TfLiteArmnnOpaqueDelegateDelete(TfLiteOpaqueDelegate* tfLiteDelegate)
	{
	if (tfLiteDelegate != nullptr)
	{
	delete static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(TfLiteOpaqueDelegateGetData(tfLiteDelegate));
	TfLiteOpaqueDelegateDelete(tfLiteDelegate);
	}
	}

	const TfLiteOpaqueDelegatePlugin* GetArmnnDelegatePluginApi()
	{
	static constexpr TfLiteOpaqueDelegatePlugin armnnPlugin{
	TfLiteArmnnOpaqueDelegateCreate, TfLiteArmnnOpaqueDelegateDelete, TfLiteArmnnOpaqueDelegateErrno};
	return &armnnPlugin;
	}

	const std::string ArmnnOpaqueDelegate::GetVersion() {
	return OPAQUE_DELEGATE_VERSION;
	}

	TfLiteIntArray* ArmnnOpaqueDelegate::IdentifyOperatorsToDelegate(TfLiteOpaqueContext* tfLiteContext)
	{
	TfLiteIntArray* executionPlan = nullptr;
	if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
	{
	TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to get graph execution plan.");
	return nullptr;
	}

	// Delegate data with null network
	DelegateData delegateData(m_Options.GetBackends());

	TfLiteIntArray* nodesToDelegate = TfLiteIntArrayCreate(executionPlan->size);
	if (nodesToDelegate == nullptr)
	{
	TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
	"TfLiteArmnnOpaqueDelegate: Unable to create int array from execution plan.");
	return nullptr;
	}
	nodesToDelegate->size = 0;

	std::set<int32_t> unsupportedOperators;

	for (int i = 0; i < executionPlan->size; ++i)
	{
	const int nodeIndex = executionPlan->data[i];

	// If TfLiteOpaqueNodes can be delegated to ArmNN
	TfLiteOpaqueNode* tfLiteNode = nullptr;
	TfLiteRegistrationExternal* tfLiteRegistration = nullptr;

	if (TfLiteOpaqueContextGetNodeAndRegistration(
	tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
	{
	TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
	"TfLiteArmnnOpaqueDelegate: Unable to get node and registration for node %d.",
	nodeIndex);
	continue;
	}

	TfLiteStatus visitStatus;
	try
	{
	visitStatus = ArmnnSubgraph::VisitNode(
	delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex);
	}
	catch(std::exception& ex)
	{
	ARMNN_LOG(error) << "ArmNN Failed to visit node with error: " << ex.what();
	visitStatus = kTfLiteError;
	}

	if (visitStatus != kTfLiteOk)
	{
	// node is not supported by ArmNN
	unsupportedOperators.insert(TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration));
	continue;
	}

	nodesToDelegate->data[nodesToDelegate->size++] = nodeIndex;
	}

	for (std::set<int32_t>::iterator it=unsupportedOperators.begin(); it!=unsupportedOperators.end(); ++it)
	{
	TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext,
	"Operator %s [%d] is not supported by armnn_opaque_delegate.",
	tflite::EnumNameBuiltinOperator(tflite::BuiltinOperator(*it)),
	*it);
	}

	if (!unsupportedOperators.empty() && m_Options.TfLiteRuntimeFallbackDisabled())
	{
	std::stringstream exMessage;
	exMessage << "TfLiteArmnnOpaqueDelegate: There are unsupported operators in the model. ";
	exMessage << "Not falling back to TfLite Runtime as fallback is disabled. ";
	exMessage << "This should only be disabled under test conditions.";
	throw armnn::Exception(exMessage.str());
	}
	if (nodesToDelegate->size == 0)
	{
	ARMNN_LOG(info) << "No operators in this model are supported by the Arm NN TfLite delegate." <<
	" The model will be executed entirely by TfLite runtime.";
	}

	std::sort(&nodesToDelegate->data[0], &nodesToDelegate->data[nodesToDelegate->size]);
	return nodesToDelegate;
	}

	TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData,
	TfLiteOpaqueContext* tfLiteContext,
	const TfLiteIntArray* inputs,
	std::vector<armnn::BindingPointInfo>& inputBindings)
	{
	const size_t numInputs = static_cast<size_t>(inputs->size);
	for (unsigned int i = 0; i < numInputs; ++i)
	{
	const int32_t tensorId = inputs->data[i];
	const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);

	if(!tensor)
	{
	return kTfLiteError;
	}

	// Do not create bindings for constant inputs
	if (TfLiteOpaqueTensorGetAllocationType(tensor) == kTfLiteMmapRo)
	{
	continue;
	}

	auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
	armnn::IConnectableLayer* layer = delegateData.m_Network->AddInputLayer(bindingId);

	auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor);
	armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
	outputSlot.SetTensorInfo(tensorInfo);

	// Store for creating connections
	delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] = &outputSlot;

	inputBindings.push_back(std::make_pair(bindingId, tensorInfo));
	}

	return kTfLiteOk;
	}

	TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData,
	TfLiteOpaqueContext* tfLiteContext,
	const TfLiteIntArray* outputs,
	std::vector<armnn::BindingPointInfo>& outputBindings)
	{
	const size_t numOutputs = static_cast<size_t>(outputs->size);
	for (unsigned int i = 0; i < numOutputs; ++i)
	{
	const int32_t tensorId = outputs->data[i];
	const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId);

	if(!tensor)
	{
	return kTfLiteError;
	}

	auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
	armnn::IConnectableLayer* layer = delegateData.m_Network->AddOutputLayer(bindingId);

	auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor);
	ARMNN_ASSERT(delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] != nullptr);
	delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)]->Connect(layer->GetInputSlot(0));
	outputBindings.push_back(std::make_pair(bindingId, tensorInfo));
	}

	return kTfLiteOk;
	}

	ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteOpaqueContext* tfLiteContext,
	const TfLiteOpaqueDelegateParams* parameters,
	const ArmnnOpaqueDelegate* delegate)
	{
	const auto startTime = armnn::GetTimeNow();
	ARMNN_LOG(info) << "ArmnnSubgraph creation";

	TfLiteIntArray* executionPlan;
	if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
	{
	return nullptr;
	}

	// Initialize DelegateData holds network and output slots information
	DelegateData delegateData(delegate->m_Options.GetBackends());

	// Build ArmNN Network
	armnn::NetworkOptions networkOptions = delegate->m_Options.GetOptimizerOptions().m_ModelOptions;
	armnn::NetworkId networkId;
	delegateData.m_Network = armnn::INetwork::Create(networkOptions);

	delegateData.m_OutputSlotForNode = std::vector<armnn::IOutputSlot*>(
	TfLiteOpaqueContextGetNumTensors(tfLiteContext), nullptr);

	std::vector<armnn::BindingPointInfo> inputBindings;
	std::vector<armnn::BindingPointInfo> outputBindings;

	// Add input layer
	auto status = AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings);
	if (status != kTfLiteOk)
	{
	throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Inputs to the network!");
	}

	// Parse TfLite delegate nodes to ArmNN
	const auto parseStartTime = armnn::GetTimeNow();
	for (int i = 0; i < parameters->nodes_to_replace->size; ++i)
	{
	const int nodeIndex = parameters->nodes_to_replace->data[i];

	TfLiteOpaqueNode* tfLiteNode = nullptr;
	TfLiteRegistrationExternal* tfLiteRegistration = nullptr;
	if (TfLiteOpaqueContextGetNodeAndRegistration(
	tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
	{
	throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to get node registration: " [ nodeIndex]);
	}

	if (VisitNode(delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk)
	{
	throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to parse node: " [ nodeIndex]);
	}
	}
	ARMNN_LOG(info) << "Parse nodes to ArmNN time: " << std::setprecision(2)
	<< std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms";

	// Add Output layer
	status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings);
	if (status != kTfLiteOk)
	{
	throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Outputs to the network!");
	}

	// Optimize ArmNN network
	armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
	try
	{
	const auto optimizeStartTime = armnn::GetTimeNow();
	optNet = armnn::Optimize(*(delegateData.m_Network.get()),
	delegate->m_Options.GetBackends(),
	delegate->m_Runtime->GetDeviceSpec(),
	delegate->m_Options.GetOptimizerOptions());
	ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2)
	<< std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms";
	}
	catch (std::exception& ex)
	{
	std::stringstream exMessage;
	exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from optimize.";
	throw armnn::Exception(exMessage.str());
	}
	if (!optNet)
	{
	// Optimize failed
	throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to optimize the network!");
	}

	// If set, we will serialize the optimized model into a dot file.
	const std::string serializeToDotFile = delegate->m_Options.GetSerializeToDot();
	if (!serializeToDotFile.empty())
	{
	ARMNN_LOG(info) << "Writing graph to dot file: " << serializeToDotFile;
	fs::path filename = serializeToDotFile;
	std::fstream file(filename.c_str(), std::ios_base::out);
	optNet->SerializeToDot(file);
	}

	try
	{
	const auto loadStartTime = armnn::GetTimeNow();

	// Load graph into runtime
	std::string errorMessage;
	armnn::Status loadingStatus;
	armnn::MemorySource inputSource = armnn::MemorySource::Undefined;
	armnn::MemorySource outputSource = armnn::MemorySource::Undefined;
	// There's a bit of an assumption here that the delegate will only support Malloc memory source.
	if (delegate->m_Options.GetOptimizerOptions().m_ImportEnabled)
	{
	inputSource = armnn::MemorySource::Malloc;
	}
	if (delegate->m_Options.GetOptimizerOptions().m_ExportEnabled)
	{
	outputSource = armnn::MemorySource::Malloc;
	}
	armnn::INetworkProperties networkProperties(false,
	inputSource,
	outputSource,
	delegate->m_Options.GetInternalProfilingState(),
	delegate->m_Options.GetInternalProfilingDetail());
	loadingStatus = delegate->m_Runtime->LoadNetwork(networkId,
	std::move(optNet),
	errorMessage,
	networkProperties);
	if (loadingStatus != armnn::Status::Success)
	{
	// Network load failed.
	throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Network could not be loaded: " + errorMessage);
	}

	ARMNN_LOG(info) << "Load ArmnnSubgraph time: " << std::setprecision(2)
	<< std::fixed << armnn::GetTimeDuration(loadStartTime).count() << " ms";
	}
	catch (std::exception& ex)
	{
	std::stringstream exMessage;
	exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from LoadNetwork.";
	throw armnn::Exception(exMessage.str());
	}

	// Register debug callback function
	if (delegate->m_Options.GetDebugCallbackFunction().has_value())
	{
	delegate->m_Runtime->RegisterDebugCallback(networkId, delegate->m_Options.GetDebugCallbackFunction().value());
	}

	ARMNN_LOG(info) << "Overall ArmnnSubgraph creation time: " << std::setprecision(2)
	<< std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";

	// Create a new SubGraph with networkId and runtime
	return new ArmnnSubgraph(networkId, delegate->m_Runtime, inputBindings, outputBindings);
	}

	TfLiteStatus ArmnnSubgraph::Prepare(TfLiteOpaqueContext* tfLiteContext)
	{
	armnn::IgnoreUnused(tfLiteContext);
	return kTfLiteOk;
	}

	TfLiteStatus ArmnnSubgraph::Invoke(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode)
	{
	// Prepare inputs
	armnn::InputTensors inputTensors;
	size_t inputIndex = 0;
	const int* inputs;
	int numInputs;
	if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputs, &numInputs) != kTfLiteOk)
	{
	throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph inputs!");
	}
	for (int inputIdx = 0; inputIdx < numInputs; inputIdx++)
	{
	TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputs[inputIdx]);

	if(!tensor)
	{
	return kTfLiteError;
	}

	if (TfLiteOpaqueTensorGetAllocationType(tensor) != kTfLiteMmapRo)
	{
	const armnn::BindingPointInfo& inputBinding = m_InputBindings[inputIndex];
	armnn::TensorInfo inputTensorInfo = inputBinding.second;
	inputTensorInfo.SetConstant(true);
	const armnn::ConstTensor inputTensor(inputTensorInfo, TfLiteOpaqueTensorData(tensor));
	inputTensors.emplace_back(inputIdx, inputTensor);

	++inputIndex;
	}
	}

	// Prepare outputs
	armnn::OutputTensors outputTensors;
	size_t outputIndex = 0;
	const int* outputs;
	int numOutputs;
	if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputs, &numOutputs) != kTfLiteOk)
	{
	throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph outputs!");
	}
	for (int outputIdx = 0; outputIdx < numOutputs; outputIdx++)
	{
	const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIndex];
	TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputs[outputIdx]);

	if(!tensor)
	{
	return kTfLiteError;
	}

	const armnn::Tensor outputTensor(outputBinding.second, TfLiteOpaqueTensorData(tensor));
	outputTensors.emplace_back(outputIdx, outputTensor);

	++outputIndex;
	}

	// Run graph
	auto status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
	// The delegate holds its own Arm NN runtime so this is our last chance to print internal profiling data.
	std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
	if (profiler && profiler->IsProfilingEnabled())
	{
	profiler->Print(std::cout);
	}
	return (status == armnn::Status::Success) ? kTfLiteOk : kTfLiteError;
	}

	TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
	TfLiteOpaqueContext* tfLiteContext,
	TfLiteRegistrationExternal* tfLiteRegistration,
	TfLiteOpaqueNode* tfLiteNode,
	int nodeIndex)
	{
	switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration))
	{
	default:
	return kTfLiteError;
	}
	}

	} // armnnOpaqueDelegate namespace