blob: 2352a3c49814d4941774197a94e3a2219da4463c [file] [log] [blame]
//
// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "LayersFwd.hpp"
#include <Network.hpp>
#include <ResolveType.hpp>
#include <armnn/INetwork.hpp>
#include "test/GraphUtils.hpp"
#include <test/TestUtils.hpp>
#include <doctest/doctest.h>
#include <QuantizeHelper.hpp>
#include <string>
using namespace armnn;
namespace
{
template<typename T>
std::vector<T> GetVector(unsigned int size, float initial, float increment)
{
std::vector<float> typeVector(size, initial);
std::vector<T> vector(size);
if (size > 1)
{
for (unsigned int i = 0; i < size; ++i)
{
vector[i] = T(initial + (increment * static_cast<float>(i)));
}
}
return vector;
}
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct Convolution2dTest
{
using LayerType = Convolution2dLayer;
static const bool isElementWise = false;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
Convolution2dDescriptor descriptor;
descriptor.m_DataLayout = DataLayout::NHWC;
descriptor.m_StrideX = 1;
descriptor.m_StrideY = 1;
std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
ConstTensor weights(weightsInfo, weightsVector);
Optional<ConstTensor> optionalBias;
return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct DWConvolution2dTest
{
public:
using LayerType = DepthwiseConvolution2dLayer;
static const bool isElementWise = false;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin]
static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout]
static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); } // [1,H,W,Cout]
constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
DepthwiseConvolution2dDescriptor descriptor;
descriptor.m_BiasEnabled = false;
descriptor.m_DataLayout = DataLayout::NHWC;
descriptor.m_StrideX = 1;
descriptor.m_StrideY = 1;
std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
ConstTensor weights(weightsInfo, weightsVector);
Optional<ConstTensor> optionalBias;
return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct FullyConnectedTest
{
public:
using LayerType = FullyConnectedLayer;
static const bool isElementWise = false;
static const bool isConstTensorAsInputSupported = true;
static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(scale);
IgnoreUnused(offset);
FullyConnectedDescriptor descriptor;
descriptor.m_BiasEnabled = false;
return network->AddFullyConnectedLayer(descriptor, name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
std::vector<float> weightsData = { 1, 2, 3, 4, 5,
6, 7, 8, 9, 10,
11, 12, 13, 14, 15};
std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
ConstTensor weights(weightsInfo, weightsVector);
IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
std::vector<IConnectableLayer*> layers = { weightsLayer };
return layers;
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct BatchNormTest
{
public:
using LayerType = BatchNormalizationLayer;
static const bool isElementWise = false;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(scale);
IgnoreUnused(offset);
BatchNormalizationDescriptor descriptor;
descriptor.m_DataLayout = DataLayout::NHWC;
std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType), betaVector);
ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType), gammaVector);
ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType), meanVector);
ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType), varianceVector);
return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct MultiplicationTest
{
using LayerType = MultiplicationLayer;
static const bool isElementWise = true;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(scale);
IgnoreUnused(offset);
return network->AddMultiplicationLayer(name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct AdditionTest
{
using LayerType = AdditionLayer;
static const bool isElementWise = true;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(scale);
IgnoreUnused(offset);
return network->AddAdditionLayer(name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct SubtractionTest
{
using LayerType = SubtractionLayer;
static const bool isElementWise = true;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(scale);
IgnoreUnused(offset);
return network->AddSubtractionLayer(name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct DivisionTest
{
using LayerType = DivisionLayer;
static const bool isElementWise = true;
static const bool isConstTensorAsInputSupported = false;
static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
const char* name,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(scale);
IgnoreUnused(offset);
return network->AddDivisionLayer(name);
}
static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
float scale = 1.f,
int32_t offset = 0)
{
IgnoreUnused(network);
IgnoreUnused(scale);
IgnoreUnused(offset);
return {};
}
};
template<typename LayerTest,
DataType ArmnnType>
INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
float scale, int32_t offset)
{
// Create a network
INetworkPtr network = INetwork::Create();
IConnectableLayer* inputLayer = network->AddInputLayer(0);
IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
"receiverLayer",
scale,
offset);
IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
"activation");
IConnectableLayer* outputLayer = network->AddOutputLayer(0);
IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
// If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
if(LayerTest::isConstTensorAsInputSupported)
{
std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
scale,
offset);
// Connect constant layers to receiverLayer.
for (unsigned int i = 0; i < constantLayers.size(); ++i)
{
constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1));
}
}
// Define layers information
TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
// Set layer information
inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
// Connect layers
inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
if (LayerTest::isElementWise)
{
inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
}
if (preventFusing)
{
receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
}
return network;
}
template<typename LayerTest,
DataType ArmnnType,
typename LayerType = typename LayerTest::LayerType,
typename T = ResolveType<ArmnnType>>
void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
float scale = 1.f, int32_t offset=0)
{
// FIRST NETWORK: Fused
// Construct ArmNN network
INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
// Create ArmNN runtime
IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
// Optimise ArmNN network
IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
Graph& graphFused = GetGraphForTesting(optNetFused.get());
auto checkFusedConv2d = [](const Layer* const layer)->bool {
return IsLayerOfType<LayerType>(layer) &&
(layer->GetNameStr() == "fused-activation-into-receiverLayer");
};
// If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
if(LayerTest::isConstTensorAsInputSupported)
{
CHECK(4 == graphFused.GetNumLayers());
CHECK(CheckSequence(graphFused.cbegin(),
graphFused.cend(),
&IsLayerOfType<InputLayer>,
&IsLayerOfType<ConstantLayer>,
checkFusedConv2d,
&IsLayerOfType<OutputLayer>));
// Check if new constant layer is connected to fused receiver layer.
Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer");
CHECK(fusedReceiverLayer);
CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr);
}
else
{
CHECK(3 == graphFused.GetNumLayers());
CHECK(CheckSequence(graphFused.cbegin(),
graphFused.cend(),
&IsLayerOfType<InputLayer>,
checkFusedConv2d,
&IsLayerOfType<OutputLayer>));
}
// Load network into runtime
NetworkId networkIdentifier;
CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
//Creates structures for inputs and outputs.
std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
std::vector<T> outputDataFused(LayerTest::outputSize);
InputTensors inputTensorsFused{
{0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
OutputTensors outputTensorsFused{
{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
// Execute network
CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) == Status::Success);
// SECOND NETWORK: NotFused
// Construct ArmNN network
INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
// Create ArmNN runtime
IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
// Optimise ArmNN network
IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
// If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
if(LayerTest::isConstTensorAsInputSupported)
{
CHECK(6 == graphNotFused.GetNumLayers());
CHECK(CheckSequence(graphNotFused.cbegin(),
graphNotFused.cend(),
&IsLayerOfType<InputLayer>,
&IsLayerOfType<ConstantLayer>,
&IsLayerOfType<LayerType>,
&IsLayerOfType<ActivationLayer>,
&IsLayerOfType<OutputLayer>,
&IsLayerOfType<OutputLayer>));
}
else
{
CHECK(5 == graphNotFused.GetNumLayers());
CHECK(CheckSequence(graphNotFused.cbegin(),
graphNotFused.cend(),
&IsLayerOfType<InputLayer>,
&IsLayerOfType<LayerType>,
&IsLayerOfType<ActivationLayer>,
&IsLayerOfType<OutputLayer>,
&IsLayerOfType<OutputLayer>));
}
// Load network into runtime
NetworkId networkIdentifierNotFused;
CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
//Creates structures for inputs and outputs.
std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
std::vector<T> outputDataNotFused(LayerTest::outputSize);
std::vector<T> outputData2NotFused(LayerTest::outputSize);
InputTensors inputTensorsNotFused{
{0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
OutputTensors outputTensorsNotFused{
{0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
{1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
// Execute network
CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
== Status::Success);
// Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
for (unsigned int n = 0; n < outputDataFused.size(); ++n)
{
auto outputNotFused = static_cast<float>(outputDataNotFused[n]);
CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
}
}
template<typename LayerTest,
DataType ArmnnType,
typename LayerType = typename LayerTest::LayerType,
typename T = ResolveType<ArmnnType>>
bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
float scale = 1.f, int32_t offset = 0)
{
bool success;
try
{
// Construct ArmNN network
INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
// Create ArmNN runtime
IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
// Optimise ArmNN network
IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
// Load network into runtime
NetworkId networkIdentifier;
CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
//Creates structures for inputs and outputs.
std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
std::vector<T> outputDataFused(LayerTest::outputSize);
InputTensors inputTensorsFused{
{0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
OutputTensors outputTensorsFused{
{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
// Execute network
run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
success = true;
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
success = false;
}
return success;
}
}
#if defined(ARMCOMPUTENEON_ENABLED)
TEST_SUITE("Optimizer")
{
// ReLu fused into Receiver Layers Float32
TEST_CASE("FuseReLUIntoConvFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseReLUIntoDWConvFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseReLUIntoFullyConnectedFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseReLUIntoBatchNormFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// BoundedReLu fused into Receiver Layers Float32
TEST_CASE("FuseBoundedReLUIntoConvFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// ReLU fused into Receiver Layers QAsymmU8
TEST_CASE("FuseReLUIntoConvQAsymmU8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseReLUIntoDWConvQAsymmU8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// BoundedReLu fused into Receiver Layers QAsymmS8
TEST_CASE("FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 6.0f;
activationDescriptor.m_B = 0.0f;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 6.0f;
activationDescriptor.m_B = 0.0f;
FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 6.0f;
activationDescriptor.m_B = 0.0f;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// TanH fused into Receiver Layers Float32
TEST_CASE("FuseTanHIntoConvFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// HardSwish fused into Receiver Layers Float32
TEST_CASE("FuseHardSwishIntoConvFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// Test that all receiver layers follow by all activation layers work, either fused or not fused
TEST_CASE("LayerFollowedByActivationFloat32CpuAccTest")
{
ActivationDescriptor activationDescriptor;
for (int i = 0; i != 12; ++i)
{
activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
}
}
TEST_CASE("LayerFollowedByActivationFloat16CpuAccTest")
{
ActivationDescriptor activationDescriptor;
for (int i = 0; i != 12; ++i)
{
activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
}
}
TEST_CASE("LayerFollowedByActivationQAsymmU8CpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::Sigmoid;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::TanH;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::ReLu;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::HardSwish;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
}
}
#endif
#if defined(ARMCOMPUTECL_ENABLED)
TEST_SUITE("Optimizer")
{
// ReLu fused into Receiver Layers Float32
TEST_CASE("FuseReLUIntoConvFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoDWConvFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoFullyConnectedFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoBatchNormFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoMulFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoAddFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoSubFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoDivFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// BoundedReLu fused into Receiver Layers Float32
TEST_CASE("FuseBoundedReLUIntoConvFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoMulFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoAddFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoSubFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoDivFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// ReLu fused into Receiver Layers Float16
TEST_CASE("FuseReLUIntoConvFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoDWConvFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoFullyConnectedFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoBatchNormFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoMulFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoAddFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoSubFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUIntoDivFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// ReLU fused into Receiver Layers QAsymmU8
TEST_CASE("FuseReLUQIntoConvAsymmU8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUQIntoDWConvAsymmU8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// BoundedReLu fused into Receiver Layers QAsymmS8
TEST_CASE("FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 6.0f;
activationDescriptor.m_B = 0.0f;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 6.0f;
activationDescriptor.m_B = 0.0f;
FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 6.0f;
activationDescriptor.m_B = 0.0f;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// TanH fused into Receiver Layers Float32
TEST_CASE("FuseTanHIntoConvFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseTanHIntoMulFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseTanHIntoAddFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseTanHIntoSubFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseTanHIntoDivFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// HardSwish fused into Receiver Layers Float32
TEST_CASE("FuseHardSwishIntoConvFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseHardSwishIntoMulFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseHardSwishIntoAddFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseHardSwishIntoSubFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
TEST_CASE("FuseHardSwishIntoDivFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// Test that all receiver layers follow by all activation layers work, either fused or not fused
TEST_CASE("LayerFollowedByActivationFloat32GpuAccTest")
{
ActivationDescriptor activationDescriptor;
for (int i = 0; i != 12; ++i)
{
activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
if (activationDescriptor.m_Function != ActivationFunction::Elu)
{
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
(activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
}
}
}
TEST_CASE("LayerFollowedByActivationFloat16GpuAccTest")
{
ActivationDescriptor activationDescriptor;
for (int i = 0; i != 12; ++i)
{
activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
if (activationDescriptor.m_Function != ActivationFunction::Elu)
{
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
(activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
}
}
}
TEST_CASE("LayerFollowedByActivationQAsymmU8GpuAccTest")
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::Sigmoid;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::TanH;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::ReLu;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
activationDescriptor.m_Function = ActivationFunction::HardSwish;
CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
(activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
static_cast<int>(activationDescriptor.m_Function));
}
}
#endif