IVGCVSW-2074: Updated the DepthwiseConvolution2dDepthMul1 for NHWC

Change-Id: Ibaa0b909680a6f2a155e4ca6fa24e1144d6e6e73
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index f2a28ec..4e7f785 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -69,11 +69,27 @@
 ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquareNhwc, SimpleConvolution2d3x3NhwcTest, false)
 
 // Depthwise Convolution
-ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1,
+                     DepthwiseConvolution2dDepthMul1Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1,
+                     DepthwiseConvolution2dDepthMul1Test, false, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, false, armnn::DataLayout::NCHW)
+
+// NHWC Depthwise Convolution
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Nhwc,
+                     DepthwiseConvolution2dDepthMul1Test, true, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Nhwc,
+                     DepthwiseConvolution2dDepthMul1Test, false, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8Nhwc,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, true, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8Nhwc,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, false, armnn::DataLayout::NHWC)
+
+
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthNhwc, DepthwiseConvolution2dDepthNhwcTest, false)
-ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false)
-ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
-ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
 
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric,
                      DepthwiseConvolution2dAsymmetricTest, true, armnn::DataLayout::NCHW)
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 8fda825..53dad94 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -92,11 +92,27 @@
 }
 
 // Depthwise Convolution
-ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1,
+                     DepthwiseConvolution2dDepthMul1Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1,
+                     DepthwiseConvolution2dDepthMul1Test, false, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, false, armnn::DataLayout::NCHW)
+
+// NHWC Depthwise Convolution
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1NHhwc,
+                     DepthwiseConvolution2dDepthMul1Test, true, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Nhwc,
+                     DepthwiseConvolution2dDepthMul1Test, false, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8Nhwc,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, true, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8Nhwc,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, false, armnn::DataLayout::NHWC)
+
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthNhwc, DepthwiseConvolution2dDepthNhwcTest, false)
-ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false)
-ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
-ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
+
 
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric,
                      DepthwiseConvolution2dAsymmetricTest, true, armnn::DataLayout::NCHW)
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index f5884ae..00fba20 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -74,11 +74,26 @@
                      false,
                      armnn::DataLayout::NHWC)
 
-ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true)
-ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
 
-ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false)
-ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1,
+                     DepthwiseConvolution2dDepthMul1Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, true, armnn::DataLayout::NCHW)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1,
+                     DepthwiseConvolution2dDepthMul1Test, false, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, false, armnn::DataLayout::NCHW)
+
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Nhwc,
+                     DepthwiseConvolution2dDepthMul1Test, true, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8Nhwc,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, true, armnn::DataLayout::NHWC)
+
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Nhwc,
+                     DepthwiseConvolution2dDepthMul1Test, false, armnn::DataLayout::NHWC)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8Nhwc,
+                     DepthwiseConvolution2dDepthMul1Uint8Test, false, armnn::DataLayout::NHWC)
 
 ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric,
                      DepthwiseConvolution2dAsymmetricTest, true, armnn::DataLayout::NCHW)
@@ -89,6 +104,7 @@
 ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetricNhwc,
                      DepthwiseConvolution2dAsymmetricTest, false, armnn::DataLayout::NHWC)
 
+
 // Pooling
 ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false)
 ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false)
diff --git a/src/backends/test/Conv2dTestImpl.hpp b/src/backends/test/Conv2dTestImpl.hpp
index 9bb36fb..3791fb0 100755
--- a/src/backends/test/Conv2dTestImpl.hpp
+++ b/src/backends/test/Conv2dTestImpl.hpp
@@ -457,7 +457,8 @@
 LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory& workloadFactory,
                                                               float qScale,
                                                               int32_t qOffset,
-                                                              bool biasEnabled)
+                                                              bool biasEnabled,
+                                                              const armnn::DataLayoutIndexed& layout)
 {
     unsigned int inputHeight = 3;
     unsigned int inputWidth = 3;
@@ -473,10 +474,9 @@
     unsigned int outputChannels = kernelChannels;
     unsigned int outputNum = inputNum;
 
-    armnn::TensorInfo inputTensorInfo({ inputNum, inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>());
-    armnn::TensorInfo outputTensorInfo({ outputNum, outputChannels, outputHeight, outputWidth },
-        armnn::GetDataType<T>());
-    armnn::TensorInfo kernelDesc({ 1, outputChannels, kernelHeight, kernelWidth }, armnn::GetDataType<T>());
+    armnn::TensorInfo inputTensorInfo = GetTensorInfo<T>(inputNum, inputChannels, inputHeight, inputWidth, layout);
+    armnn::TensorInfo outputTensorInfo = GetTensorInfo<T>(outputNum, outputChannels, outputHeight, outputWidth, layout);
+    armnn::TensorInfo kernelDesc = GetTensorInfo<T>(1, outputChannels, kernelHeight, kernelWidth, layout);
     armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>());
 
     // Set quantization parameters if the requested type is a quantized type.
@@ -491,32 +491,47 @@
         biasDesc.SetQuantizationScale(qScale*qScale);
         biasDesc.SetQuantizationOffset(0);
     }
+    std::vector<T> inputData = std::vector<T>(
+            QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
+                    1.f, 2.f, 1.f,
+                    2.f, 1.f, 2.f,
+                    1.f, 2.f, 1.f,
 
-    auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
-        QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
-            1.f, 2.f, 1.f,
-            2.f, 1.f, 2.f,
-            1.f, 2.f, 1.f,
-
-            1.f, 2.f, 1.f,
-            2.f, 1.f, 2.f,
-            1.f, 2.f, 1.f,
-        })));
+                    1.f, 2.f, 1.f,
+                    2.f, 1.f, 2.f,
+                    1.f, 2.f, 1.f,
+            }));
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        inputData = tmp;
+    }
+    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
 
     std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
                                             {0, 2}));
     auto bias = MakeTensor<B, 1>(biasDesc, biasV);
 
-    auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
-        QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
-            1.f, 0.f,  1.f,
-            0.f, 0.f,  0.f,
-           -1.f, 0.f, -1.f,
+    std::vector<T> kernelData = std::vector<T>(
+            QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), {
+                    1.f, 0.f,  1.f,
+                    0.f, 0.f,  0.f,
+                    -1.f, 0.f, -1.f,
 
-            1.f, 0.f,  1.f,
-            0.f, 0.f,  0.f,
-           -1.f, 0.f, -1.f,
-        })));
+                    1.f, 0.f,  1.f,
+                    0.f, 0.f,  0.f,
+                    -1.f, 0.f, -1.f,
+            }));
+    if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(kernelData.size());
+        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, kernelData.data(), tmp.data());
+        kernelData = tmp;
+    }
+    auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData);
 
     // Manually calculated.
     std::vector<T> outputImage(
@@ -534,6 +549,13 @@
     }
 
     LayerTestResult<T, 4> ret(outputTensorInfo);
+    if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(outputImage.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data());
+        outputImage = tmp;
+    }
+
     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
@@ -559,6 +581,7 @@
     data.m_Parameters.m_PadTop = 0;
     data.m_Parameters.m_PadBottom = 0;
     data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 
     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
     inputHandle->Allocate();
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index 49c6d30..3a4e95c 100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -640,9 +640,10 @@
 }
 
 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
-                                                              bool biasEnabled)
+                                                              bool biasEnabled,
+                                                              const armnn::DataLayoutIndexed& layout)
 {
-    return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
+    return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled, layout);
 }
 
 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory,
@@ -660,9 +661,10 @@
 }
 
 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory,
-                                                                     bool biasEnabled)
+                                                                     bool biasEnabled,
+                                                                     const armnn::DataLayoutIndexed& layout)
 {
-    return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled);
+    return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled, layout);
 }
 
 LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index 006425a..39cd4c4 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -80,7 +80,8 @@
                                                               bool biasEnabled);
 
 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
-                                                              bool biasEnabled);
+                                                              bool biasEnabled,
+                                                              const armnn::DataLayoutIndexed& layout);
 
 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory,
                                                                bool biasEnabled,
@@ -342,7 +343,8 @@
                                                             const armnn::DataLayoutIndexed& layout);
 
 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory,
-                                                                     bool biasEnabled);
+                                                                     bool biasEnabled,
+                                                                     const armnn::DataLayoutIndexed& layout);
 
 LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory);