blob: 49c5a72a908bc71fe16e78ec1ad3b4ac1ca93f57 [file] [log] [blame]
//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "NeonWorkloadFactoryHelper.hpp"
#include <backendsCommon/MemCopyWorkload.hpp>
#include <aclCommon/test/CreateWorkloadClNeon.hpp>
#include <neon/NeonWorkloadFactory.hpp>
#include <neon/NeonTensorHandle.hpp>
#include <neon/workloads/NeonWorkloadUtils.hpp>
#include <neon/workloads/NeonWorkloads.hpp>
BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
namespace
{
bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
{
using namespace armnn::armcomputetensorutils;
const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
if (handleInfo->data_type() != expectedAclInfo.data_type())
{
return false;
}
if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
{
return false;
}
if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
{
return false;
}
for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
{
if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
{
return false;
}
}
return true;
}
} // namespace
template <typename armnn::DataType DataType>
static void NeonCreateActivationWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
// Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
ActivationQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
{
NeonCreateActivationWorkloadTest<DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
{
NeonCreateActivationWorkloadTest<DataType::Float32>();
}
template <typename WorkloadType,
typename DescriptorType,
typename LayerType,
armnn::DataType DataType>
static void NeonCreateElementwiseWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
DescriptorType queueDescriptor = workload->GetData();
auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto inputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
{
NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
AdditionQueueDescriptor,
AdditionLayer,
DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
{
NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
AdditionQueueDescriptor,
AdditionLayer,
DataType::Float32>();
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
{
NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
SubtractionQueueDescriptor,
SubtractionLayer,
DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
{
NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
SubtractionQueueDescriptor,
SubtractionLayer,
DataType::Float32>();
}
BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload)
{
NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
SubtractionQueueDescriptor,
SubtractionLayer,
DataType::QuantisedAsymm8>();
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
{
NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
MultiplicationQueueDescriptor,
MultiplicationLayer,
DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
{
NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
MultiplicationQueueDescriptor,
MultiplicationLayer,
DataType::Float32>();
}
BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload)
{
NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
MultiplicationQueueDescriptor,
MultiplicationLayer,
DataType::QuantisedAsymm8>();
}
template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
(factory, graph, dataLayout);
// Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
{
NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload)
{
NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
}
#endif
BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
{
NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
{
NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
}
template <typename armnn::DataType DataType>
static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
// Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
{
NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
}
BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
{
NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
}
#endif
BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
{
NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
}
BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
{
NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
}
template <typename armnn::DataType DataType>
static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
DataType>(factory, graph, dataLayout);
// Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
: std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
: std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
{
NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
{
NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
}
#endif
template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
static void NeonCreateFullyConnectedWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
// Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
{
NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload)
{
NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
}
template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
// Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
NormalizationQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
{
NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
{
NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
}
#endif
BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
{
NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
{
NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
}
template <typename armnn::DataType DataType>
static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
// Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
{
NeonCreatePooling2dWorkloadTest<DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
{
NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
{
NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
}
BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload)
{
NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload)
{
NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NHWC);
}
static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
const armnn::TensorShape& alphaShape,
const armnn::TensorShape& outputShape,
armnn::DataType dataType)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
graph,
inputShape,
alphaShape,
outputShape,
dataType);
// Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
PreluQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto alphaHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
{
NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
}
#endif
BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
{
NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
}
BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
{
NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8);
}
template <typename armnn::DataType DataType>
static void NeonCreateReshapeWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
// Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
ReshapeQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
{
NeonCreateReshapeWorkloadTest<DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
{
NeonCreateReshapeWorkloadTest<DataType::Float32>();
}
BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
{
NeonCreateReshapeWorkloadTest<DataType::QuantisedAsymm8>();
}
template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
static void NeonCreateSoftmaxWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
// Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
{
NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float16>();
}
#endif
BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
{
NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float32>();
}
template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
static void NeonSpaceToDepthWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
}
BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
{
NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
}
BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
{
NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
}
BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
{
NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedAsymm8>();
}
BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
{
NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
}
BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
// Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
SplitterQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
auto outputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
auto outputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
auto outputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
}
BOOST_AUTO_TEST_CASE(CreateSplitterConcat)
{
// Tests that it is possible to decide which output of the splitter layer
// should be lined to which input of the concat layer.
// We tested that is is possible to specify 0th output
// of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
// of the concat.
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workloads =
CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
DataType::Float32>(factory, graph);
auto wlSplitter = std::move(workloads.first);
auto wlConcat = std::move(workloads.second);
//Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
BOOST_TEST(sOut0);
BOOST_TEST(sOut1);
BOOST_TEST(mIn0);
BOOST_TEST(mIn1);
bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
BOOST_TEST(validDataPointers);
}
BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
{
// Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
// We created a splitter with two outputs. That each of those outputs is used by two different activation layers
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
std::unique_ptr<NeonSplitterWorkload> wlSplitter;
std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
wlActiv1_0, wlActiv1_1);
armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
BOOST_TEST(sOut0);
BOOST_TEST(sOut1);
BOOST_TEST(activ0_0Im);
BOOST_TEST(activ0_1Im);
BOOST_TEST(activ1_0Im);
BOOST_TEST(activ1_1Im);
bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
(sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
BOOST_TEST(validDataPointers);
}
BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
{
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
CreateMemCopyWorkloads<IAclTensorHandle>(factory);
}
template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload =
CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
// Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
{
NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
{
NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
}
#endif
BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
{
NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
}
BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
{
NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
}
template <typename LstmWorkloadType>
static void NeonCreateLstmWorkloadTest()
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
LstmQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
}
BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
{
NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
}
template <typename ConcatWorkloadType, armnn::DataType DataType>
static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
unsigned int concatAxis)
{
Graph graph;
NeonWorkloadFactory factory =
NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
ConcatQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
}
BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
{
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
}
BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
{
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
}
BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
{
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
}
BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
{
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 4, 3, 2, 5 }, 0);
}
BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
{
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 6, 2, 5 }, 1);
}
BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
{
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 3, 2, 10 }, 3);
}
BOOST_AUTO_TEST_SUITE_END()