IVGCVSW-3639 Add 5d tensor support

* Increased MaxNumOfTensorDimensions and fixed issues related to its use
* Fixed issues caused by assuming 5d tensors are invalid
* Updated ArmComputeTensorUtils for 5d tensors
* Added 5d tensor unit tests for add, mul, stack and reshape (needed by IVGCVSW-3527)

Signed-off-by: Matthew Jackson <matthew.jackson@arm.com>
Change-Id: I5bcd64942d0d04efcc6c5acb240ad4b88e010743
diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp
index 12897e2..6d8ea6f 100644
--- a/include/armnn/Types.hpp
+++ b/include/armnn/Types.hpp
@@ -13,7 +13,7 @@
 namespace armnn
 {
 
-constexpr unsigned int MaxNumOfTensorDimensions = 4U;
+constexpr unsigned int MaxNumOfTensorDimensions = 5U;
 
 /// @enum Status enumeration
 /// @var Status::Successful
@@ -167,7 +167,12 @@
 
     bool IsEqual(const PermutationVector& other) const
     {
-        return std::equal(begin(), end(), other.begin(), other.end());
+        if (m_NumDimMappings != other.m_NumDimMappings) return false;
+        for (unsigned int i = 0; i < m_NumDimMappings; ++i)
+        {
+            if (m_DimMappings[i] != other.m_DimMappings[i]) return false;
+        }
+        return true;
     }
 
     bool IsInverse(const PermutationVector& other) const
diff --git a/src/armnn/layers/MeanLayer.cpp b/src/armnn/layers/MeanLayer.cpp
index c72d79b..c925a3e 100644
--- a/src/armnn/layers/MeanLayer.cpp
+++ b/src/armnn/layers/MeanLayer.cpp
@@ -45,7 +45,7 @@
 
     const TensorInfo& input = GetInputSlot(0).GetConnection()->GetTensorInfo();
 
-    BOOST_ASSERT_MSG(input.GetNumDimensions() > 0 && input.GetNumDimensions() <= MaxNumOfTensorDimensions,
+    BOOST_ASSERT_MSG(input.GetNumDimensions() > 0 && input.GetNumDimensions() <= 4,
                      "MeanLayer: Mean supports up to 4D input.");
 
     unsigned int rank = input.GetNumDimensions();
diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp
index 0fa67e5..897a35f 100644
--- a/src/armnn/test/UtilsTests.cpp
+++ b/src/armnn/test/UtilsTests.cpp
@@ -25,7 +25,7 @@
 
 BOOST_AUTO_TEST_CASE(PermuteDescriptorWithTooManyMappings)
 {
-    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 3u, 4u }), armnn::InvalidArgumentException);
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 3u, 4u, 5u }), armnn::InvalidArgumentException);
 }
 
 BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings1d)
@@ -48,6 +48,11 @@
     BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 4u }), armnn::InvalidArgumentException);
 }
 
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings5d)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 3u, 5u }), armnn::InvalidArgumentException);
+}
+
 BOOST_AUTO_TEST_CASE(PermuteDescriptorWithDuplicatedMappings)
 {
     BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u, 1u, 0u }), armnn::InvalidArgumentException);
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index d35be6f..faf3d82 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -615,9 +615,9 @@
     auto flatBufferPermuteBaseLayer = CreateLayerBase(layer, serializer::LayerType::LayerType_Permute);
 
     std::vector<unsigned int> dimMappings;
-    for (auto& v: permuteDescriptor.m_DimMappings)
+    for (unsigned int i=0; i<permuteDescriptor.m_DimMappings.GetSize(); ++i)
     {
-        dimMappings.push_back(v);
+        dimMappings.push_back(permuteDescriptor.m_DimMappings[i]);
     }
 
     auto flatBufferPermuteDesc = serializer::CreatePermuteDescriptor(m_flatBufferBuilder,
diff --git a/src/armnnTfLiteParser/test/InputOutputTensorNames.cpp b/src/armnnTfLiteParser/test/InputOutputTensorNames.cpp
index d42ae2e..d7a4371 100644
--- a/src/armnnTfLiteParser/test/InputOutputTensorNames.cpp
+++ b/src/armnnTfLiteParser/test/InputOutputTensorNames.cpp
@@ -61,12 +61,12 @@
                 "operator_codes": [ ],
                 "subgraphs": [{
                     "tensors": [ {
-                        "shape": [ 1, 1, 1, 1, 1 ],
+                        "shape": [ 1, 1, 1, 1, 1, 1 ],
                         "type": "FLOAT32",
                         "name": "In",
                         "buffer": 0
                     }, {
-                        "shape": [ 1, 1, 1, 1, 1 ],
+                        "shape": [ 1, 1, 1, 1, 1, 1 ],
                         "type": "FLOAT32",
                         "name": "Out",
                         "buffer": 1
@@ -81,6 +81,7 @@
 BOOST_FIXTURE_TEST_CASE(InvalidTensorsThrowException, InvalidTensorsFixture)
 {
     // Tensor numDimensions must be less than or equal to MaxNumOfTensorDimensions
+    static_assert(armnn::MaxNumOfTensorDimensions == 5, "Please update InvalidTensorsFixture");
     BOOST_CHECK_THROW(Setup(), armnn::InvalidArgumentException);
 }
 
diff --git a/src/armnnTfLiteParser/test/Squeeze.cpp b/src/armnnTfLiteParser/test/Squeeze.cpp
index 7f6fb27..13261fa 100644
--- a/src/armnnTfLiteParser/test/Squeeze.cpp
+++ b/src/armnnTfLiteParser/test/Squeeze.cpp
@@ -106,11 +106,12 @@
 
 struct SqueezeFixtureWithInvalidInput : SqueezeFixture
 {
-    SqueezeFixtureWithInvalidInput() : SqueezeFixture("[ 1, 2, 2, 1, 2 ]", "[ 1, 2, 2, 1 ]", "[ ]") {}
+    SqueezeFixtureWithInvalidInput() : SqueezeFixture("[ 1, 2, 2, 1, 2, 2 ]", "[ 1, 2, 2, 1, 2 ]", "[ ]") {}
 };
 
 BOOST_FIXTURE_TEST_CASE(ParseSqueezeInvalidInput, SqueezeFixtureWithInvalidInput)
 {
+    static_assert(armnn::MaxNumOfTensorDimensions == 5, "Please update SqueezeFixtureWithInvalidInput");
     BOOST_CHECK_THROW((SetupSingleInputSingleOutput("inputTensor", "outputTensor")),
                       armnn::InvalidArgumentException);
 }
diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp
index 39e6971..76d25d1 100755
--- a/src/armnnTfParser/TfParser.cpp
+++ b/src/armnnTfParser/TfParser.cpp
@@ -2090,10 +2090,11 @@
                      % CHECK_LOCATION().AsString()));
     }
 
+    const unsigned int supportedNumDims = 4;
     unsigned int numConcatViews = numInputs - 1;
-    OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatViews), MaxNumOfTensorDimensions);
+    OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatViews), supportedNumDims);
     concatDescriptor.SetConcatAxis(concatDim);
-    TensorShape mergeDims(MaxNumOfTensorDimensions);
+    TensorShape mergeDims(supportedNumDims);
     unsigned int mergeDim = 0;
     for (unsigned int viewIndex = 0; viewIndex < numConcatViews; ++viewIndex)
     {
@@ -2102,7 +2103,7 @@
         TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
 
         // Double check dimensions of the tensors
-        if (inputTensorInfo.GetNumDimensions() != MaxNumOfTensorDimensions)
+        if (inputTensorInfo.GetNumDimensions() != supportedNumDims)
         {
             throw armnn::ParseException(
                     boost::str(
@@ -2110,14 +2111,14 @@
                         "The number of dimensions: %1% for input tensors of the "
                         "concatenation op should be %2% %3%")
                         % inputTensorInfo.GetNumDimensions()
-                        % MaxNumOfTensorDimensions
+                        % supportedNumDims
                         % CHECK_LOCATION().AsString()));
         }
 
         // Copy the input tensor shape to mergeDimSizes and initialize the view origin coordinates for the current input
         mergeDims = inputTensorInfo.GetShape();
         unsigned int* viewOrigin = const_cast<unsigned int*>(concatDescriptor.GetViewOrigin(viewIndex));
-        std::fill(viewOrigin, viewOrigin + MaxNumOfTensorDimensions, 0);
+        std::fill(viewOrigin, viewOrigin + supportedNumDims, 0);
 
         // Update the view origin coordinates and the merge dimension value
         concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
@@ -2652,9 +2653,10 @@
     IOutputSlot& inputSlot = inputs[1 - index].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1 - index].m_Index);
     TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
 
+    const unsigned int supportedNumDims = 4;
     auto inputDimSize = inputTensorInfo.GetNumDimensions();
 
-    if (inputDimSize != MaxNumOfTensorDimensions)
+    if (inputDimSize != supportedNumDims)
     {
         throw armnn::ParseException(
                 boost::str(
@@ -2662,7 +2664,7 @@
                     "The number of dimensions: %1% for input tensors of the "
                     "split op should be %2% %3%")
                     % inputTensorInfo.GetNumDimensions()
-                    % MaxNumOfTensorDimensions
+                    % supportedNumDims
                     % CHECK_LOCATION().AsString()));
     }
 
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
index c9587a7..34565fe 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
@@ -108,12 +108,14 @@
 
 // Helper function to obtain byte offset into tensor data
 inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info,
+                              uint32_t depthIndex,
                               uint32_t batchIndex,
                               uint32_t channelIndex,
                               uint32_t y,
                               uint32_t x)
 {
     arm_compute::Coordinates coords;
+    coords.set(4, static_cast<int>(depthIndex));
     coords.set(3, static_cast<int>(batchIndex));
     coords.set(2, static_cast<int>(channelIndex));
     coords.set(1, static_cast<int>(y));
@@ -123,6 +125,7 @@
 
 // Helper function to obtain element offset into data buffer representing tensor data (assuming no strides).
 inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info,
+                                    uint32_t depthIndex,
                                     uint32_t batchIndex,
                                     uint32_t channelIndex,
                                     uint32_t y,
@@ -132,14 +135,15 @@
     uint32_t width = static_cast<uint32_t>(shape[0]);
     uint32_t height = static_cast<uint32_t>(shape[1]);
     uint32_t numChannels = static_cast<uint32_t>(shape[2]);
-    return ((batchIndex * numChannels + channelIndex) * height + y) * width + x;
+    uint32_t numBatches = static_cast<uint32_t>(shape[3]);
+    return (((depthIndex * numBatches + batchIndex) * numChannels + channelIndex) * height + y) * width + x;
 }
 
 template <typename T>
 void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData)
 {
     // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
-    static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData");
+    static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData");
     {
         const arm_compute::ITensorInfo& info = *srcTensor.info();
         const arm_compute::TensorShape& shape = info.tensor_shape();
@@ -148,18 +152,23 @@
         uint32_t height = static_cast<uint32_t>(shape[1]);
         uint32_t numChannels = static_cast<uint32_t>(shape[2]);
         uint32_t numBatches = static_cast<uint32_t>(shape[3]);
+        uint32_t depth = static_cast<uint32_t>(shape[4]);
 
-        for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
+        for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex)
         {
-            for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
+            for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
             {
-                for (unsigned int y = 0; y < height; ++y)
+                for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
                 {
-                    // Copies one row from arm_compute tensor buffer to linear memory buffer.
-                    // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
-                    memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0),
-                           bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0),
-                           width * sizeof(T));
+                    for (unsigned int y = 0; y < height; ++y)
+                    {
+                        // Copies one row from arm_compute tensor buffer to linear memory buffer.
+                        // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
+                        memcpy(
+                         dstData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
+                         bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
+                         width * sizeof(T));
+                    }
                 }
             }
         }
@@ -170,7 +179,7 @@
 void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor)
 {
     // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
-    static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData");
+    static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData");
     {
         const arm_compute::ITensorInfo& info = *dstTensor.info();
         const arm_compute::TensorShape& shape = info.tensor_shape();
@@ -179,18 +188,23 @@
         uint32_t height = static_cast<uint32_t>(shape[1]);
         uint32_t numChannels = static_cast<uint32_t>(shape[2]);
         uint32_t numBatches = static_cast<uint32_t>(shape[3]);
+        uint32_t depth = static_cast<uint32_t>(shape[4]);
 
-        for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
+        for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex)
         {
-            for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
+            for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
             {
-                for (unsigned int y = 0; y < height; ++y)
+                for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
                 {
-                    // Copies one row from linear memory buffer to arm_compute tensor buffer.
-                    // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
-                    memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0),
-                           srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0),
-                           width * sizeof(T));
+                    for (unsigned int y = 0; y < height; ++y)
+                    {
+                        // Copies one row from linear memory buffer to arm_compute tensor buffer.
+                        // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
+                        memcpy(
+                         bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
+                         srcData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
+                         width * sizeof(T));
+                    }
                 }
             }
         }
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 4b0b84a..109aeb9 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -669,6 +669,11 @@
         }
     }
 
+    if (inputShape.GetNumDimensions() > 4)
+    {
+        throw InvalidArgumentException(descriptorName + ": Input tensor may have up to 4 dimensions.");
+    }
+
     // m_Axis is 0-based and may take values from 0 to the number of input dimensions (inclusive),
     // since the output tensor has an additional dimension.
     if (m_Parameters.m_Axis > inputShape.GetNumDimensions())
@@ -703,6 +708,11 @@
         }
     }
 
+    if (outputShape.GetNumDimensions() > 5)
+    {
+        throw InvalidArgumentException(descriptorName + ": Output tensor may have up to 5 dimensions.");
+    }
+
     // Check the supported data types
     std::vector<DataType> supportedTypes =
     {
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index a1a8d2a..7e3ac39 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -46,13 +46,14 @@
 template<typename CopyFunc>
 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
 {
-    static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyTensorContents");
+    static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
 
     TensorShape srcStrides = srcTensor->GetStrides();
     const TensorShape& srcShape = srcTensor->GetShape();
     TensorShape dstStrides = dstTensor->GetStrides();
     const TensorShape& dstShape = dstTensor->GetShape();
 
+    size_t srcDepth = 1;
     size_t srcBatches = 1;
     size_t srcChannels = 1;
     size_t srcHeight = 1;
@@ -61,8 +62,10 @@
                  srcWidth,
                  srcHeight,
                  srcChannels,
-                 srcBatches);
+                 srcBatches,
+                 srcDepth);
 
+    size_t srcDepthStride = 0;
     size_t srcBatchStride = 0;
     size_t srcChannelStride = 0;
     size_t srcHeightStride = 0;
@@ -71,8 +74,10 @@
                  srcWidthStride,
                  srcHeightStride,
                  srcChannelStride,
-                 srcBatchStride);
+                 srcBatchStride,
+                 srcDepthStride);
 
+    size_t dstDepth = 1;
     size_t dstBatches = 1;
     size_t dstChannels = 1;
     size_t dstHeight = 1;
@@ -81,8 +86,10 @@
                  dstWidth,
                  dstHeight,
                  dstChannels,
-                 dstBatches);
+                 dstBatches,
+                 dstDepth);
 
+    size_t dstDepthStride = 0;
     size_t dstBatchStride = 0;
     size_t dstChannelStride = 0;
     size_t dstHeightStride = 0;
@@ -91,7 +98,8 @@
                  dstWidthStride,
                  dstHeightStride,
                  dstChannelStride,
-                 dstBatchStride);
+                 dstBatchStride,
+                 dstDepthStride);
 
     const unsigned char* srcData;
     unsigned char* dstData;
@@ -105,26 +113,34 @@
     size_t copyHeight = std::min(srcHeight, dstHeight);
     size_t copyChannels = std::min(srcChannels, dstChannels);
     size_t copyBatches = std::min(srcBatches, dstBatches);
+    size_t copyDepth = std::min(srcDepth, dstDepth);
 
-    for(unsigned int b=0; b < copyBatches; ++b)
+    for (unsigned int d=0; d < copyDepth; ++d)
     {
-        auto srcPtrBatch = srcData;
-        auto dstPtrBatch = dstData;
-        for (unsigned int c=0; c< copyChannels; ++c)
+        auto srcPtrDepth = srcData;
+        auto dstPtrDepth = dstData;
+        for (unsigned int b=0; b < copyBatches; ++b)
         {
-            auto srcPtrChannel = srcData;
-            auto dstPtrChannel = dstData;
-            for (unsigned int h=0; h < copyHeight; ++h)
+            auto srcPtrBatch = srcData;
+            auto dstPtrBatch = dstData;
+            for (unsigned int c=0; c< copyChannels; ++c)
             {
-                copy(dstData, srcData, copyLength);
-                dstData += dstHeightStride;
-                srcData += srcHeightStride;
+                auto srcPtrChannel = srcData;
+                auto dstPtrChannel = dstData;
+                for (unsigned int h=0; h < copyHeight; ++h)
+                {
+                    copy(dstData, srcData, copyLength);
+                    dstData += dstHeightStride;
+                    srcData += srcHeightStride;
+                }
+                dstData += (static_cast<long>(dstChannelStride) - (dstData - dstPtrChannel));
+                srcData += (static_cast<long>(srcChannelStride) - (srcData - srcPtrChannel));
             }
-            dstData += (static_cast<long>(dstChannelStride) - (dstData - dstPtrChannel));
-            srcData += (static_cast<long>(srcChannelStride) - (srcData - srcPtrChannel));
+            dstData += (static_cast<long>(dstBatchStride)-(dstData - dstPtrBatch));
+            srcData += (static_cast<long>(srcBatchStride)-(srcData - srcPtrBatch));
         }
-        dstData += (static_cast<long>(dstBatchStride)-(dstData - dstPtrBatch));
-        srcData += (static_cast<long>(srcBatchStride)-(srcData - srcPtrBatch));
+        dstData += (static_cast<long>(dstDepthStride)-(dstData - dstPtrDepth));
+        srcData += (static_cast<long>(srcDepthStride)-(srcData - srcPtrDepth));
     }
 
     srcTensor->Unmap();
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index 6754106..2201499 100644
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -2641,6 +2641,103 @@
     return ret;
 }
 
+LayerTestResult<float, 5> Addition5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    unsigned int depth     = 2;
+    unsigned int batchSize = 2;
+    unsigned int channels  = 2;
+    unsigned int height    = 2;
+    unsigned int width     = 3;
+
+    armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int shape[] = {depth, batchSize, channels, height, width};
+
+    inputTensorInfo1 = armnn::TensorInfo(5, shape, armnn::DataType::Float32);
+    inputTensorInfo2 = armnn::TensorInfo(5, shape, armnn::DataType::Float32);
+    outputTensorInfo = armnn::TensorInfo(5, shape, armnn::DataType::Float32);
+
+
+    auto input1 = MakeTensor<float, 5>(inputTensorInfo1, std::vector<float>(
+        {
+            2.6f, 4.0f, 4.4f,  2.7f, 4.6f, 2.8f,
+            2.3f, 1.9f, 3.4f,  2.9f, 2.2f, 4.5f,
+
+            2.8f, 1.9f, 2.3f,  2.6f, 4.7f, 3.5f,
+            0.4f, 1.5f, 2.1f,  0.7f, 5.0f, 1.1f,
+
+
+            1.0f, 2.7f, 0.0f,  0.6f, 0.8f, 0.9f,
+            1.0f, 2.6f, 0.4f,  3.8f, 0.4f, 0.8f,
+
+            0.5f, 4.3f, 3.1f,  4.4f, 0.7f, 1.4f,
+            0.4f, 4.4f, 0.7f,  0.6f, 4.7f, 1.2f,
+
+        }));
+
+    auto input2 = MakeTensor<float, 5>(inputTensorInfo2, std::vector<float>(
+        {
+            4.4f, 3.0f, 1.0f,  0.0f, 3.9f, 3.1f,
+            1.7f, 2.9f, 1.3f,  0.4f, 0.4f, 4.3f,
+
+            4.5f, 0.2f, 2.2f,  4.1f, 3.9f, 3.0f,
+            0.1f, 2.5f, 4.1f,  4.6f, 1.5f, 0.0f,
+
+
+            0.5f, 4.9f, 2.5f,  1.5f, 3.4f, 4.5f,
+            2.0f, 3.0f, 4.9f,  1.6f, 2.4f, 3.4f,
+
+            3.6f, 1.8f, 1.3f,  2.6f, 2.1f, 4.8f,
+            2.0f, 4.3f, 4.0f,  0.2f, 0.6f, 4.4f,
+        }));
+
+    LayerTestResult<float, 5> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<float, 5>(outputTensorInfo, std::vector<float>(
+        {
+            7.0f, 7.0f, 5.4f,  2.7f, 8.5f, 5.9f,
+            4.0f, 4.8f, 4.7f,  3.3f, 2.6f, 8.8f,
+
+            7.3f, 2.1f, 4.5f,  6.7f, 8.6f, 6.5f,
+            0.5f, 4.0f, 6.2f,  5.3f, 6.5f, 1.1f,
+
+
+            1.5f, 7.6f, 2.5f,  2.1f, 4.2f, 5.4f,
+            3.0f, 5.6f, 5.3f,  5.4f, 2.8f, 4.2f,
+
+            4.1f, 6.1f, 4.4f,  7.0f, 2.8f, 6.2f,
+            2.4f, 8.7f, 4.7f,  0.8f, 5.3f, 5.6f,
+        }));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
+    std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::AdditionQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
+
+    inputHandle1->Allocate();
+    inputHandle2->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0][0]);
+    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0][0]);
+
+    workload->PostAllocationConfigure();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 4> AdditionBroadcastTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
@@ -4103,25 +4200,25 @@
 }
 
 namespace {
-LayerTestResult<float,4> MultiplicationTestHelper(
+template<std::size_t NumDims>
+LayerTestResult<float,NumDims> MultiplicationTestHelper(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    const unsigned int shape0[4],
+    const unsigned int shape0[NumDims],
     const std::vector<float> & values0,
-    const unsigned int shape1[4],
+    const unsigned int shape1[NumDims],
     const std::vector<float> & values1,
-    const unsigned int outShape[4],
+    const unsigned int outShape[NumDims],
     const std::vector<float> & outValues)
 {
-    const uint32_t dimensionCount = 4;
-    armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32};
-    armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32};
-    armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32};
+    armnn::TensorInfo inputTensorInfo0{NumDims, shape0, armnn::DataType::Float32};
+    armnn::TensorInfo inputTensorInfo1{NumDims, shape1, armnn::DataType::Float32};
+    armnn::TensorInfo outputTensorInfo{NumDims, outShape, armnn::DataType::Float32};
 
-    auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0);
-    auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1);
+    auto input0 = MakeTensor<float, NumDims>(inputTensorInfo0, values0);
+    auto input1 = MakeTensor<float, NumDims>(inputTensorInfo1, values1);
 
-    LayerTestResult<float,4> ret(outputTensorInfo);
+    LayerTestResult<float,NumDims> ret(outputTensorInfo);
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
@@ -4139,15 +4236,15 @@
     inputHandle1->Allocate();
     outputHandle->Allocate();
 
-    CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
-    CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
+    CopyDataToITensorHandle(inputHandle0.get(), input0.origin());
+    CopyDataToITensorHandle(inputHandle1.get(), input1.origin());
 
     workload->PostAllocationConfigure();
     workload->Execute();
 
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
 
-    ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues);
+    ret.outputExpected = MakeTensor<float, NumDims>(outputTensorInfo, outValues);
     return ret;
 }
 } // anonymous namespace
@@ -4176,14 +4273,81 @@
         2,  2,  2,  2,    6,  6,  6,  6,
         12, 12, 12, 12,  20, 20, 20, 20 });
 
-    return MultiplicationTestHelper(workloadFactory,
-                                    memoryManager,
-                                    shape,
-                                    input0,
-                                    shape,
-                                    input1,
-                                    shape,
-                                    output);
+    return MultiplicationTestHelper<4>(workloadFactory,
+                                       memoryManager,
+                                       shape,
+                                       input0,
+                                       shape,
+                                       input1,
+                                       shape,
+                                       output);
+}
+
+LayerTestResult<float,5> Multiplication5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int width = 3;
+    const unsigned int height = 2;
+    const unsigned int channelCount = 2;
+    const unsigned int batchSize = 2;
+    const unsigned int depth = 2;
+
+    unsigned int shape[] = { depth, batchSize, channelCount, height, width };
+
+    std::vector<float> input0({
+        1.80f, 0.20f, 2.30f,  1.30f, 2.10f, 1.00f,
+        2.60f, 0.60f, 2.10f,  2.30f, 2.30f, 2.00f,
+
+        2.50f, 1.00f, 2.90f,  3.10f, 1.50f, 2.40f,
+        2.80f, 1.10f, 1.00f,  3.20f, 1.00f, 2.30f,
+
+
+        0.30f, 2.20f, 1.00f,  0.20f, 1.60f, 1.40f,
+        0.80f, 3.20f, 0.10f,  0.10f, 3.10f, 2.10f,
+
+        1.50f, 2.40f, 1.40f,  0.70f, 2.40f, 1.40f,
+        1.60f, 1.20f, 1.90f,  0.80f, 0.00f, 0.10f,
+    });
+
+    std::vector<float> input1({
+        0.70f, 1.00f, 2.90f,  2.20f, 3.10f, 2.80f,
+        1.80f, 2.00f, 0.50f,  2.30f, 1.20f, 2.70f,
+
+        2.40f, 0.20f, 3.20f,  1.60f, 0.20f, 2.50f,
+        2.30f, 0.70f, 2.70f,  1.80f, 2.90f, 2.70f,
+
+
+        3.20f, 3.20f, 0.70f,  1.90f, 2.70f, 2.50f,
+        2.40f, 0.90f, 2.30f,  1.80f, 2.50f, 2.00f,
+
+        1.60f, 2.20f, 1.60f,  2.00f, 0.30f, 3.20f,
+        0.40f, 3.00f, 2.60f,  0.30f, 0.00f, 2.50f,
+    });
+
+    std::vector<float> output({
+        1.26f, 0.20f, 6.67f,  2.86f, 6.51f, 2.80f,
+        4.68f, 1.20f, 1.05f,  5.29f, 2.76f, 5.40f,
+
+        6.00f, 0.20f, 9.28f,  4.96f, 0.30f, 6.00f,
+        6.44f, 0.77f, 2.70f,  5.76f, 2.90f, 6.21f,
+
+
+        0.96f, 7.04f, 0.70f,  0.38f, 4.32f, 3.50f,
+        1.92f, 2.88f, 0.23f,  0.18f, 7.75f, 4.20f,
+
+        2.40f, 5.28f, 2.24f,  1.40f, 0.72f, 4.48f,
+        0.64f, 3.60f, 4.94f,  0.24f, 0.00f, 0.25f,
+    });
+
+    return MultiplicationTestHelper<5>(workloadFactory,
+                                       memoryManager,
+                                       shape,
+                                       input0,
+                                       shape,
+                                       input1,
+                                       shape,
+                                       output);
 }
 
 LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(
@@ -4198,14 +4362,14 @@
 
     std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16});
 
-    return MultiplicationTestHelper(workloadFactory,
-                                    memoryManager,
-                                    shape0,
-                                    input0,
-                                    shape1,
-                                    input1,
-                                    shape0,
-                                    output);
+    return MultiplicationTestHelper<4>(workloadFactory,
+                                       memoryManager,
+                                       shape0,
+                                       input0,
+                                       shape1,
+                                       input1,
+                                       shape0,
+                                       output);
 }
 
 LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(
@@ -4226,14 +4390,14 @@
         7,   16,      9, 20,     11, 24,
         13,  28,     15, 32,     17, 36});
 
-    return MultiplicationTestHelper(workloadFactory,
-                                    memoryManager,
-                                    shape0,
-                                    input0,
-                                    shape1,
-                                    input1,
-                                    shape0,
-                                    output);
+    return MultiplicationTestHelper<4>(workloadFactory,
+                                       memoryManager,
+                                       shape0,
+                                       input0,
+                                       shape1,
+                                       input1,
+                                       shape0,
+                                       output);
 }
 
 LayerTestResult<float,4> CompareMultiplicationTest(
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 0fe5d09..df33aa1 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -539,6 +539,11 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> Reshape5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<float, 4> SimpleFloorTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -712,6 +717,10 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+LayerTestResult<float, 5> Addition5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<float, 4> AdditionBroadcast1ElementTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -764,6 +773,10 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+LayerTestResult<float, 5> Multiplication5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
@@ -2084,6 +2097,37 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+std::vector<T> ConvertToDataType(const std::vector<float>& input,
+                                 const armnn::TensorInfo& inputTensorInfo)
+{
+    std::vector<T> output(input.size());
+    auto outputTensorInfo = inputTensorInfo;
+    outputTensorInfo.SetDataType(ArmnnType);
+
+    std::unique_ptr<armnn::Encoder<float>> pOutputEncoder = armnn::MakeEncoder<float>(outputTensorInfo, output.data());
+    armnn::Encoder<float>& rOutputEncoder = *pOutputEncoder;
+
+    for (auto it = input.begin(); it != input.end(); ++it)
+    {
+        rOutputEncoder.Set(*it);
+        ++rOutputEncoder;
+    }
+    return output;
+}
+
+// Utility method to convert a single value to the correct type
+template <typename T>
+T ConvertToDataType(const float& value,
+                    const armnn::TensorInfo& tensorInfo)
+{
+    std::vector<T> output(1);
+    std::unique_ptr<armnn::Encoder<float>> pEncoder = armnn::MakeEncoder<float>(tensorInfo, output.data());
+    armnn::Encoder<float>& rEncoder = *pEncoder;
+    rEncoder.Set(value);
+    return output[0];
+}
+
 template<typename T, typename B>
 LayerTestResult<T, 2> SimpleFullyConnectedTestImpl(
         armnn::IWorkloadFactory& workloadFactory,
@@ -2130,35 +2174,75 @@
     return result;
 }
 
-template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-std::vector<T> ConvertToDataType(const std::vector<float>& input,
-                                 const armnn::TensorInfo& inputTensorInfo)
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 2> FullyConnectedTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        bool biasEnabled)
 {
-    std::vector<T> output(input.size());
-    auto outputTensorInfo = inputTensorInfo;
-    outputTensorInfo.SetDataType(ArmnnType);
+    constexpr static unsigned int inputWidth = 3u;
+    constexpr static unsigned int inputHeight = 2u;
+    constexpr static unsigned int inputChannels = 1u;
 
-    std::unique_ptr<armnn::Encoder<float>> pOutputEncoder = armnn::MakeEncoder<float>(outputTensorInfo, output.data());
-    armnn::Encoder<float>& rOutputEncoder = *pOutputEncoder;
+    constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels;
 
-    for (auto it = input.begin(); it != input.end(); ++it)
+    constexpr static unsigned int outputChannels = 2u;
+
+    armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(0.1f);
+    inputTensorInfo.SetQuantizationOffset(63);
+
+    armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(5.f);
+    outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10);
+
+    armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, ArmnnType);
+    weightsDesc.SetQuantizationScale(0.2f);
+    weightsDesc.SetQuantizationOffset(93);
+
+    armnn::TensorInfo biasesDesc({ outputChannels }, GetBiasTypeFromWeightsType(weightsDesc.GetDataType()).value());
+    biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale());
+    biasesDesc.SetQuantizationOffset(0);
+
+    LayerTestResult<T, 2> result(outputTensorInfo);
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(
+        {
+            -1.2f, 6.1f, -3.5f,
+            18.8f, -5.5f, 2.9f
+        },
+        inputTensorInfo));
+
+    auto weights = MakeTensor<T, 2>(weightsDesc, ConvertToDataType<ArmnnType>(
+        {
+            -8.4f, 20.0f, -10.4f, -8, 16.4f, -11.8f,
+            23.4f, 10.4f, -14.0f, -3.8f, -11.8f, 11.4f
+        },
+        weightsDesc));
+
+    auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500});
+
+    result = SimpleFullyConnectedTestImpl<T>(
+            workloadFactory,
+            memoryManager,
+            inputTensorInfo, outputTensorInfo,
+            weightsDesc, biasesDesc,
+            weights, bias, input,
+            biasEnabled, true
+    );
+
+    if (biasEnabled)
     {
-        rOutputEncoder.Set(*it);
-        ++rOutputEncoder;
+        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
+                                                 ConvertToDataType<ArmnnType>({80.f, 1460.f}, outputTensorInfo));
     }
-    return output;
-}
+    else
+    {
+        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
+                                                 ConvertToDataType<ArmnnType>({-107.04f, 110.f}, outputTensorInfo));
+    }
 
-// Utility method to convert a single value to the correct type
-template <typename T>
-T ConvertToDataType(const float& value,
-                    const armnn::TensorInfo& tensorInfo)
-{
-    std::vector<T> output(1);
-    std::unique_ptr<armnn::Encoder<float>> pEncoder = armnn::MakeEncoder<float>(tensorInfo, output.data());
-    armnn::Encoder<float>& rEncoder = *pEncoder;
-    rEncoder.Set(value);
-    return output[0];
+    return result;
 }
 
 template<armnn::DataType ArmnnType, typename T>
@@ -2354,8 +2438,8 @@
                                 inputValues, expectedOutputValues);
 }
 
-template<typename T>
-LayerTestResult<T, 4> SimpleReshapeTestImpl(
+template<typename T, size_t NumDims>
+LayerTestResult<T, NumDims> SimpleReshapeTestImpl(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     armnn::TensorInfo inputTensorInfo,
@@ -2363,10 +2447,10 @@
     const std::vector<T>& inputData,
     const std::vector<T>& outputExpectedData)
 {
-    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
+    auto input = MakeTensor<T, NumDims>(inputTensorInfo, inputData);
 
-    LayerTestResult<T, 4> ret(outputTensorInfo);
-    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData);
+    LayerTestResult<T, NumDims> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<T, NumDims>(outputTensorInfo, outputExpectedData);
 
     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
@@ -2381,87 +2465,16 @@
     inputHandle->Allocate();
     outputHandle->Allocate();
 
-    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
 
     workload->Execute();
 
-    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+    CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
 
     return ret;
 }
 
 template<armnn::DataType ArmnnType, typename T>
-LayerTestResult<T, 2> FullyConnectedTest(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        bool biasEnabled)
-{
-    constexpr static unsigned int inputWidth = 3u;
-    constexpr static unsigned int inputHeight = 2u;
-    constexpr static unsigned int inputChannels = 1u;
-
-    constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels;
-
-    constexpr static unsigned int outputChannels = 2u;
-
-    armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, ArmnnType);
-    inputTensorInfo.SetQuantizationScale(0.1f);
-    inputTensorInfo.SetQuantizationOffset(63);
-
-    armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, ArmnnType);
-    outputTensorInfo.SetQuantizationScale(5.f);
-    outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10);
-
-    armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, ArmnnType);
-    weightsDesc.SetQuantizationScale(0.2f);
-    weightsDesc.SetQuantizationOffset(93);
-
-    armnn::TensorInfo biasesDesc({ outputChannels }, GetBiasTypeFromWeightsType(weightsDesc.GetDataType()).value());
-    biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale());
-    biasesDesc.SetQuantizationOffset(0);
-
-    LayerTestResult<T, 2> result(outputTensorInfo);
-
-    auto input = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(
-        {
-            -1.2f, 6.1f, -3.5f,
-            18.8f, -5.5f, 2.9f
-        },
-        inputTensorInfo));
-
-    auto weights = MakeTensor<T, 2>(weightsDesc, ConvertToDataType<ArmnnType>(
-        {
-            -8.4f, 20.0f, -10.4f, -8, 16.4f, -11.8f,
-            23.4f, 10.4f, -14.0f, -3.8f, -11.8f, 11.4f
-        },
-        weightsDesc));
-
-    auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500});
-
-    result = SimpleFullyConnectedTestImpl<T>(
-            workloadFactory,
-            memoryManager,
-            inputTensorInfo, outputTensorInfo,
-            weightsDesc, biasesDesc,
-            weights, bias, input,
-            biasEnabled, true
-    );
-
-    if (biasEnabled)
-    {
-        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
-                                                 ConvertToDataType<ArmnnType>({80.f, 1460.f}, outputTensorInfo));
-    }
-    else
-    {
-        result.outputExpected = MakeTensor<T, 2>(outputTensorInfo,
-                                                 ConvertToDataType<ArmnnType>({-107.04f, 110.f}, outputTensorInfo));
-    }
-
-    return result;
-}
-
-template<armnn::DataType ArmnnType, typename T>
 LayerTestResult<T, 4> SimpleReshapeTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
@@ -2509,7 +2522,69 @@
         },
         outputTensorInfo);
 
-    return SimpleReshapeTestImpl<T>(
+    return SimpleReshapeTestImpl<T, 4>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 5> Reshape5dTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[] = { 2, 2, 8, 1, 1 };
+    unsigned int outputShape[] = { 2, 2, 2, 2, 2 };
+
+    inputTensorInfo = armnn::TensorInfo(5, inputShape, ArmnnType);
+    inputTensorInfo.SetQuantizationScale(1.0f);
+    outputTensorInfo = armnn::TensorInfo(5, outputShape, ArmnnType);
+    outputTensorInfo.SetQuantizationScale(1.0f);
+
+    auto input = ConvertToDataType<ArmnnType>(
+        {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+            8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+
+            16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f,
+            24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
+        },
+        inputTensorInfo);
+
+    auto outputExpected = ConvertToDataType<ArmnnType>(
+        {
+            0.0f, 1.0f,
+            2.0f, 3.0f,
+
+            4.0f, 5.0f,
+            6.0f, 7.0f,
+
+
+            8.0f, 9.0f,
+            10.0f, 11.0f,
+
+            12.0f, 13.0f,
+            14.0f, 15.0f,
+
+
+
+            16.0f, 17.0f,
+            18.0f, 19.0f,
+
+            20.0f, 21.0f,
+            22.0f, 23.0f,
+
+
+            24.0f, 25.0f,
+            26.0f, 27.0f,
+
+            28.0f, 29.0f,
+            30.0f, 31.0f,
+        },
+        outputTensorInfo);
+
+    return SimpleReshapeTestImpl<T, 5>(
         workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected);
 }
 
@@ -4746,3 +4821,88 @@
         outputExpectedData
     );
 }
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 5> Stack5dOutputTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo ({ 2, 2, 2, 3 }, ArmnnType);
+    armnn::TensorInfo outputTensorInfo({ 2, 2, 2, 2, 3 }, ArmnnType);
+
+    std::vector<std::vector<T>> inputData;
+
+    inputData.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24
+    });
+
+    inputData.push_back(
+    {
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36,
+
+
+        37, 38, 39,
+        40, 41, 42,
+
+        43, 44, 45,
+        46, 47, 48
+    });
+
+    std::vector<T> outputExpectedData =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36,
+
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        19, 20, 21,
+        22, 23, 24,
+
+
+        37, 38, 39,
+        40, 41, 42,
+
+        43, 44, 45,
+        46, 47, 48
+
+    };
+
+    return StackTestHelper<ArmnnType, T, 5>(
+        workloadFactory,
+        memoryManager,
+        inputTensorInfo,
+        outputTensorInfo,
+        1U,
+        inputData,
+        outputExpectedData
+    );
+}
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index c2ccd21..d3f3921 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -255,6 +255,7 @@
 
 // Add
 ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest)
+ARMNN_AUTO_TEST_CASE(Add5d, Addition5dTest)
 ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest)
 ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest)
 
@@ -286,6 +287,7 @@
 ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test)
 ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test)
 ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test)
+ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest)
 
 // Batch Norm
 ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -359,6 +361,7 @@
 // Reshape
 ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeTest<DataType::QuantisedAsymm8>)
+ARMNN_AUTO_TEST_CASE(Reshape5d, Reshape5dTest<DataType::Float32>)
 
 // Pad
 ARMNN_AUTO_TEST_CASE(PadFloat322d, PadFloat322dTest)
@@ -504,6 +507,7 @@
 ARMNN_AUTO_TEST_CASE(Stack4dOutput2Axis,       Stack4dOutput2AxisTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(Stack4dOutput3Axis,       Stack4dOutput3AxisTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(Stack3dOutput1Axis3Input, Stack3dOutput1Axis3InputTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack5dOutput,            Stack5dOutputTest<DataType::Float32>)
 
 // Strided Slice
 ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 8ef1462..e8f69d2 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -394,6 +394,7 @@
 
 // Add
 ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest)
+ARMNN_AUTO_TEST_CASE(Add5d, Addition5dTest)
 ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest)
 ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest)
 
@@ -412,6 +413,7 @@
 ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test)
 ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test)
 ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test)
+ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest)
 
 // Batch Norm
 ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -492,6 +494,7 @@
 // Reshape
 ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeTest<armnn::DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeTest<armnn::DataType::QuantisedAsymm8>)
+ARMNN_AUTO_TEST_CASE(Reshape5d, Reshape5dTest<armnn::DataType::Float32>)
 
 // Pad
 ARMNN_AUTO_TEST_CASE(PadFloat322d, PadFloat322dTest)
@@ -723,6 +726,7 @@
 ARMNN_AUTO_TEST_CASE(Stack4dOutput2Axis,       Stack4dOutput2AxisTest<armnn::DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(Stack4dOutput3Axis,       Stack4dOutput3AxisTest<armnn::DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(Stack3dOutput1Axis3Input, Stack3dOutput1Axis3InputTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack5dOutput,            Stack5dOutputTest<armnn::DataType::Float32>)
 
 // ============================================================================
 // COMPARE tests
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index f7a35f6..d0032f6 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -468,6 +468,7 @@
 
 // Add
 ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest)
+ARMNN_AUTO_TEST_CASE(Add5d, Addition5dTest)
 ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest)
 ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest)
 
@@ -552,6 +553,7 @@
 ARMNN_AUTO_TEST_CASE(MultiplicationInt16, MultiplicationInt16Test)
 ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementInt16, MultiplicationBroadcast1ElementInt16Test)
 ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorInt16, MultiplicationBroadcast1DVectorInt16Test)
+ARMNN_AUTO_TEST_CASE(Multiplication5d, Multiplication5dTest)
 
 // Batch Norm
 ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -863,6 +865,7 @@
 ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(SimpleReshapeQuantisedAsymm8, SimpleReshapeTest<DataType::QuantisedAsymm8>)
 ARMNN_AUTO_TEST_CASE(SimpleReshapeQuantisedSymm16, SimpleReshapeTest<DataType::QuantisedSymm16>)
+ARMNN_AUTO_TEST_CASE(Reshape5d, Reshape5dTest<DataType::Float32>)
 
 // Rsqrt
 ARMNN_AUTO_TEST_CASE(Rsqrt2d, Rsqrt2dTest<DataType::Float32>)
@@ -1324,5 +1327,6 @@
 ARMNN_AUTO_TEST_CASE(Stack4dOutput2Axis,       Stack4dOutput2AxisTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(Stack4dOutput3Axis,       Stack4dOutput3AxisTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE(Stack3dOutput1Axis3Input, Stack3dOutput1Axis3InputTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack5dOutput,            Stack5dOutputTest<DataType::Float32>)
 
 BOOST_AUTO_TEST_SUITE_END()