IVGCVSW-3452 Support dynamic output shape in hal_1_2::HalPolicy::ConvertConv2d

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: I8694e1f1c62da6f74eb356558b17a63758ccfdad
diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp
index a82db80..69cc471 100644
--- a/1.2/HalPolicy.cpp
+++ b/1.2/HalPolicy.cpp
@@ -172,13 +172,8 @@
         return Fail("%s: Could not read output 0", __func__);
     }
 
-    const armnn::TensorInfo& inputInfo  = input.GetTensorInfo();
-    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
-
-    if (IsDynamicOutput(outputInfo))
-    {
-        return Fail("%s: Dynamic output not supported", __func__);
-    }
+    const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+    armnn::TensorInfo outputInfo = GetTensorInfoForOperand(*output);
 
     armnn::Convolution2dDescriptor desc;
     desc.m_DataLayout = armnn::DataLayout::NHWC;
@@ -272,6 +267,21 @@
     desc.m_BiasEnabled = true;
     armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
 
+    if (IsDynamicOutput(outputInfo))
+    {
+        try
+        {
+            ALOGD("Output shape not set, will infer from inputs");
+            outputInfo.SetShape(InferConvolution2dOutputShape(inputInfo.GetShape(),
+                                                              weights.GetInfo().GetShape(),
+                                                              desc));
+        }
+        catch (armnn::Exception& e)
+        {
+            return Fail("%s: Could not infer dynamic output shape: %s", __func__, e.what());
+        }
+    }
+
     bool isSupported = false;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsConvolution2dSupported,
@@ -282,6 +292,7 @@
                                desc,
                                weights.GetInfo(),
                                biases);
+
     if (!isSupported)
     {
         return false;
@@ -304,7 +315,12 @@
 
     input.Connect(startLayer->GetInputSlot(0));
 
-    return SetupAndTrackLayerOutputSlot<hal_1_2::HalPolicy>(operation, 0, *endLayer, model, data);
+    return SetupAndTrackLayerOutputSlot<hal_1_2::HalPolicy>(operation,
+                                                            0,
+                                                            *endLayer,
+                                                            model,
+                                                            data,
+                                                            armnn::Optional<armnn::TensorInfo>(outputInfo));
 }
 
 bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model& model, ConversionData& data)
diff --git a/OutputShapeUtils.cpp b/OutputShapeUtils.cpp
index b6cdb31..6a9bf90 100644
--- a/OutputShapeUtils.cpp
+++ b/OutputShapeUtils.cpp
@@ -5,6 +5,8 @@
 
 #include "OutputShapeUtils.hpp"
 
+#include <DataLayoutIndexed.hpp>
+
 #include <algorithm>
 #include <vector>
 
@@ -54,6 +56,48 @@
     return outputInfo.GetNumElements() == 0u;
 }
 
+TensorShape InferConvolution2dOutputShape(const TensorShape& inputShape,
+                                          const TensorShape& kernelShape,
+                                          const Convolution2dDescriptor& descriptor)
+{
+    if (inputShape.GetNumDimensions() != 4)
+    {
+        throw InvalidArgumentException("Input shape for Convolution2d must be 4D");
+    }
+
+    armnnUtils::DataLayoutIndexed dataLayoutIndex(descriptor.m_DataLayout);
+
+    const unsigned int cIndex = dataLayoutIndex.GetChannelsIndex();
+    const unsigned int wIndex = dataLayoutIndex.GetWidthIndex();
+    const unsigned int hIndex = dataLayoutIndex.GetHeightIndex();
+
+    const unsigned int wInput = inputShape[wIndex];
+    const unsigned int hInput = inputShape[hIndex];
+
+    const unsigned int wKernel  = kernelShape[wIndex];
+    const unsigned int wDilated = wKernel + (descriptor.m_DilationX - 1) * (wKernel - 1);
+
+    const unsigned int wRead   = (wInput + descriptor.m_PadLeft + descriptor.m_PadRight) - wDilated;
+    const unsigned int wOutput = 1 + (wRead / descriptor.m_StrideX);
+
+    const unsigned int hKernel  = kernelShape[hIndex];
+    const unsigned int hDilated = hKernel + (descriptor.m_DilationY - 1) * (hKernel - 1);
+
+    const unsigned int hRead   = (hInput + descriptor.m_PadTop + descriptor.m_PadBottom) - hDilated;
+    const unsigned int hOutput = 1 + (hRead / descriptor.m_StrideY);
+
+    const unsigned int batches  = inputShape[0];
+    const unsigned int channels = kernelShape[0];
+
+    TensorShape outputShape(4);
+    outputShape[0]      = batches;
+    outputShape[cIndex] = channels;
+    outputShape[wIndex] = wOutput;
+    outputShape[hIndex] = hOutput;
+
+    return outputShape;
+}
+
 TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
                                     const armnn::TensorShape& input1Shape)
 {
diff --git a/OutputShapeUtils.hpp b/OutputShapeUtils.hpp
index dac4a19..5868695 100644
--- a/OutputShapeUtils.hpp
+++ b/OutputShapeUtils.hpp
@@ -12,6 +12,10 @@
 
 bool IsDynamicOutput(const armnn::TensorInfo& outputInfo);
 
+armnn::TensorShape InferConvolution2dOutputShape(const armnn::TensorShape& inputShape,
+                                                 const armnn::TensorShape& kernelShape,
+                                                 const armnn::Convolution2dDescriptor& descriptor);
+
 armnn::TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
                                            const armnn::TensorShape& input1Shape);