IVGCVSW-3455 Support dynamic output shape in hal_1_2::HalPolicy::ConvertDepthwiseConv2d

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Iba64a674d772a76ca071553cb423ed870fae9bfd
diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp
index 69cc471..0c57636 100644
--- a/1.2/HalPolicy.cpp
+++ b/1.2/HalPolicy.cpp
@@ -340,7 +340,6 @@
     }
 
     const armnn::TensorInfo& inputInfo  = input.GetTensorInfo();
-    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
     // ArmNN does not currently support non-fixed weights or bias
     // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
@@ -447,6 +446,22 @@
     desc.m_BiasEnabled = true;
     armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
 
+    armnn::TensorInfo outputInfo = GetTensorInfoForOperand(*output);
+    if (IsDynamicOutput(outputInfo))
+    {
+        try
+        {
+            ALOGD("Output shape not set, will infer from inputs");
+            outputInfo.SetShape(InferDepthwiseConvolution2dOutputShape(inputInfo.GetShape(),
+                                                                       weights.GetInfo().GetShape(),
+                                                                       desc));
+        }
+        catch (armnn::Exception& e)
+        {
+            return Fail("%s: Could not infer dynamic output shape: %s", __func__, e.what());
+        }
+    }
+
     bool isSupported = false;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsDepthwiseConvolutionSupported,
@@ -457,6 +472,7 @@
                                desc,
                                weights.GetInfo(),
                                biases);
+
     if (!isSupported)
     {
         return false;
@@ -464,6 +480,7 @@
 
     armnn::IConnectableLayer* startLayer =
         data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+
     if (!startLayer)
     {
         return Fail("%s: AddDepthwiseConvolution2dLayer failed", __func__);
@@ -477,7 +494,12 @@
 
     input.Connect(startLayer->GetInputSlot(0));
 
-    return SetupAndTrackLayerOutputSlot<hal_1_2::HalPolicy>(operation, 0, *endLayer, model, data);
+    return SetupAndTrackLayerOutputSlot<hal_1_2::HalPolicy>(operation,
+                                                            0,
+                                                            *endLayer,
+                                                            model,
+                                                            data,
+                                                            armnn::Optional<armnn::TensorInfo>(outputInfo));
 }
 
 bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data)
diff --git a/OutputShapeUtils.cpp b/OutputShapeUtils.cpp
index 6a9bf90..285e25f 100644
--- a/OutputShapeUtils.cpp
+++ b/OutputShapeUtils.cpp
@@ -43,8 +43,53 @@
     return outputShape;
 }
 
-} // namespace annonymous
+template<typename ConvolutionDescriptor>
+TensorShape InferConvolution2dOutputShapeImpl(const TensorShape& inputShape,
+                                              const TensorShape& kernelShape,
+                                              const ConvolutionDescriptor& descriptor,
+                                              bool isDepthwiseConvolution)
+{
+    if (inputShape.GetNumDimensions() != 4)
+    {
+        throw InvalidArgumentException("Input shape must be 4D");
+    }
 
+    armnnUtils::DataLayoutIndexed dataLayoutIndex(descriptor.m_DataLayout);
+
+    const unsigned int cIndex = dataLayoutIndex.GetChannelsIndex();
+    const unsigned int wIndex = dataLayoutIndex.GetWidthIndex();
+    const unsigned int hIndex = dataLayoutIndex.GetHeightIndex();
+
+    const unsigned int wInput = inputShape[wIndex];
+    const unsigned int hInput = inputShape[hIndex];
+
+    const unsigned int wKernel  = isDepthwiseConvolution ? kernelShape[2] : kernelShape[wIndex];
+    const unsigned int wDilated = wKernel + (descriptor.m_DilationX - 1) * (wKernel - 1);
+
+    const unsigned int wRead   = (wInput + descriptor.m_PadLeft + descriptor.m_PadRight) - wDilated;
+    const unsigned int wOutput = 1 + (wRead / descriptor.m_StrideX);
+
+    const unsigned int hKernel  = isDepthwiseConvolution ? kernelShape[3] : kernelShape[hIndex];
+    const unsigned int hDilated = hKernel + (descriptor.m_DilationY - 1) * (hKernel - 1);
+
+    const unsigned int hRead   = (hInput + descriptor.m_PadTop + descriptor.m_PadBottom) - hDilated;
+    const unsigned int hOutput = 1 + (hRead / descriptor.m_StrideY);
+
+    TensorShape outputShape(4);
+    outputShape[0]      = inputShape[0];
+    outputShape[cIndex] = kernelShape[0];
+    outputShape[wIndex] = wOutput;
+    outputShape[hIndex] = hOutput;
+
+    if (isDepthwiseConvolution)
+    {
+        outputShape[cIndex] *= inputShape[cIndex];
+    }
+
+    return outputShape;
+}
+
+} // anonymous namespace
 
 namespace armnn_driver
 {
@@ -60,42 +105,14 @@
                                           const TensorShape& kernelShape,
                                           const Convolution2dDescriptor& descriptor)
 {
-    if (inputShape.GetNumDimensions() != 4)
-    {
-        throw InvalidArgumentException("Input shape for Convolution2d must be 4D");
-    }
+    return InferConvolution2dOutputShapeImpl(inputShape, kernelShape, descriptor, false);
+}
 
-    armnnUtils::DataLayoutIndexed dataLayoutIndex(descriptor.m_DataLayout);
-
-    const unsigned int cIndex = dataLayoutIndex.GetChannelsIndex();
-    const unsigned int wIndex = dataLayoutIndex.GetWidthIndex();
-    const unsigned int hIndex = dataLayoutIndex.GetHeightIndex();
-
-    const unsigned int wInput = inputShape[wIndex];
-    const unsigned int hInput = inputShape[hIndex];
-
-    const unsigned int wKernel  = kernelShape[wIndex];
-    const unsigned int wDilated = wKernel + (descriptor.m_DilationX - 1) * (wKernel - 1);
-
-    const unsigned int wRead   = (wInput + descriptor.m_PadLeft + descriptor.m_PadRight) - wDilated;
-    const unsigned int wOutput = 1 + (wRead / descriptor.m_StrideX);
-
-    const unsigned int hKernel  = kernelShape[hIndex];
-    const unsigned int hDilated = hKernel + (descriptor.m_DilationY - 1) * (hKernel - 1);
-
-    const unsigned int hRead   = (hInput + descriptor.m_PadTop + descriptor.m_PadBottom) - hDilated;
-    const unsigned int hOutput = 1 + (hRead / descriptor.m_StrideY);
-
-    const unsigned int batches  = inputShape[0];
-    const unsigned int channels = kernelShape[0];
-
-    TensorShape outputShape(4);
-    outputShape[0]      = batches;
-    outputShape[cIndex] = channels;
-    outputShape[wIndex] = wOutput;
-    outputShape[hIndex] = hOutput;
-
-    return outputShape;
+TensorShape InferDepthwiseConvolution2dOutputShape(const TensorShape& inputShape,
+                                                   const TensorShape& kernelShape,
+                                                   const DepthwiseConvolution2dDescriptor& descriptor)
+{
+    return InferConvolution2dOutputShapeImpl(inputShape, kernelShape, descriptor, true);
 }
 
 TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
diff --git a/OutputShapeUtils.hpp b/OutputShapeUtils.hpp
index 5868695..bcb4347 100644
--- a/OutputShapeUtils.hpp
+++ b/OutputShapeUtils.hpp
@@ -16,6 +16,10 @@
                                                  const armnn::TensorShape& kernelShape,
                                                  const armnn::Convolution2dDescriptor& descriptor);
 
+armnn::TensorShape InferDepthwiseConvolution2dOutputShape(const armnn::TensorShape& inputShape,
+                                                          const armnn::TensorShape& kernelShape,
+                                                          const armnn::DepthwiseConvolution2dDescriptor& descriptor);
+
 armnn::TensorShape InferMaximumOutputShape(const armnn::TensorShape& input0Shape,
                                            const armnn::TensorShape& input1Shape);