IVGCVSW-4483 Remove boost::polymorphic_downcast

 * exchange boost::polymorphic_downcast with armnn::PolymorphicDowncast
 * remove unnecessary includes of boost::polymorphic_downcast

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ie603fb82860fe05fee547dc78073230cc62b2e1f
diff --git a/include/armnn/utility/PolymorphicDowncast.hpp b/include/armnn/utility/PolymorphicDowncast.hpp
index d529867..b4a5cad 100644
--- a/include/armnn/utility/PolymorphicDowncast.hpp
+++ b/include/armnn/utility/PolymorphicDowncast.hpp
@@ -30,11 +30,11 @@
 
 
 template<typename DestType, typename SourceType>
-DestType polymorphic_downcast(SourceType value)
+DestType PolymorphicDowncast(SourceType value)
 {
     static_assert(std::is_pointer<SourceType>::value &&
                   std::is_pointer<DestType>::value,
-                  "polymorphic_downcast only works with pointer types.");
+                  "PolymorphicDowncast only works with pointer types.");
 
     ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == static_cast<DestType>(value));
     return static_cast<DestType>(value);
diff --git a/src/armnn/BackendSettings.hpp b/src/armnn/BackendSettings.hpp
index 211af8b..08a8921 100644
--- a/src/armnn/BackendSettings.hpp
+++ b/src/armnn/BackendSettings.hpp
@@ -5,10 +5,11 @@
 
 #pragma once
 
-#include <armnn/BackendId.hpp>
-
 #include "DeviceSpec.hpp"
 
+#include <armnn/BackendId.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <vector>
 
 namespace armnn
@@ -84,7 +85,7 @@
         m_PreferredBackends = preferredBackends;
 
         // Obtain list of supported backends
-        const DeviceSpec& spec = *boost::polymorphic_downcast<const DeviceSpec*>(&deviceSpec);
+        const DeviceSpec& spec = *PolymorphicDowncast<const DeviceSpec*>(&deviceSpec);
         m_SupportedBackends = spec.GetSupportedBackends();
     }
 
diff --git a/src/armnn/DynamicQuantizationVisitor.cpp b/src/armnn/DynamicQuantizationVisitor.cpp
index 0b03a38..2202910 100644
--- a/src/armnn/DynamicQuantizationVisitor.cpp
+++ b/src/armnn/DynamicQuantizationVisitor.cpp
@@ -6,8 +6,9 @@
 #include "DynamicQuantizationVisitor.hpp"
 #include "NetworkUtils.hpp"
 
-#include <armnn/utility/IgnoreUnused.hpp>
 #include <armnn/Descriptors.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnn/Types.hpp>
 
 #include <limits>
@@ -52,7 +53,7 @@
     for (const IConnectableLayer* layer : m_LayersToCalibrate)
     {
         std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
-            m_Graph, *boost::polymorphic_downcast<Layer*>(const_cast<IConnectableLayer*>(layer)));
+            m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
         // record them so we can take them out again efficiently afterward
         m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
     }
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index 78b08ec..ea9930b 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -15,7 +15,6 @@
 #include <armnn/Utils.hpp>
 #include <armnn/utility/Assert.hpp>
 
-#include <boost/polymorphic_cast.hpp>
 #include <boost/format.hpp>
 
 #include <unordered_map>
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index 00ab8de..0946188 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -12,6 +12,7 @@
 #include <armnn/NetworkFwd.hpp>
 #include <armnn/Exceptions.hpp>
 #include <armnn/utility/Assert.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <list>
 #include <map>
@@ -32,7 +33,7 @@
     template <typename LayerType>
     static LayerType* PtrCast(Layer* const layer)
     {
-        return boost::polymorphic_downcast<LayerType*>(layer);
+        return PolymorphicDowncast<LayerType*>(layer);
     }
 
     template <typename Func>
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index ec35d71..5947523 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -18,16 +18,16 @@
 #include <armnn/Tensor.hpp>
 #include <armnn/INetwork.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <algorithm>
+#include <functional>
+#include <iostream>
+#include <list>
 #include <memory>
 #include <string>
 #include <vector>
-#include <iostream>
-#include <functional>
-#include <list>
 
-#include <boost/numeric/conversion/cast.hpp>
 #include <boost/cast.hpp>
 
 namespace armnn
@@ -145,12 +145,12 @@
 
     int Connect(IInputSlot& destination) override
     {
-        return Connect(*boost::polymorphic_downcast<InputSlot*>(&destination));
+        return Connect(*PolymorphicDowncast<InputSlot*>(&destination));
     }
 
     void Disconnect(IInputSlot& slot) override
     {
-        return Disconnect(*boost::polymorphic_downcast<InputSlot*>(&slot));
+        return Disconnect(*PolymorphicDowncast<InputSlot*>(&slot));
     }
 
     unsigned int CalculateIndexOnOwner() const override;
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 9da988b..4b31fa3 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -22,7 +22,6 @@
 
 #include <LabelsAndEventClasses.hpp>
 
-#include <boost/polymorphic_cast.hpp>
 #include <boost/format.hpp>
 
 namespace armnn
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index ac5159a..c2bf27a 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -24,6 +24,7 @@
 #include <armnn/Logging.hpp>
 #include <armnn/utility/Assert.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <ProfilingService.hpp>
 
@@ -53,12 +54,12 @@
 
 void INetwork::Destroy(INetwork* network)
 {
-    delete boost::polymorphic_downcast<Network*>(network);
+    delete PolymorphicDowncast<Network*>(network);
 }
 
 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
 {
-    delete boost::polymorphic_downcast<OptimizedNetwork*>(network);
+    delete PolymorphicDowncast<OptimizedNetwork*>(network);
 }
 
 Status OptimizedNetwork::PrintGraph()
@@ -149,7 +150,7 @@
 template <typename LayerT>
 LayerT* ConvertBf16ToFp32Weight(Layer* l)
 {
-    LayerT* layer = boost::polymorphic_downcast<LayerT*>(l);
+    LayerT* layer = PolymorphicDowncast<LayerT*>(l);
     if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
          && layer->m_Weight)
     {
@@ -1015,12 +1016,12 @@
         throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
     }
 
-    const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
+    const Network& network = *PolymorphicDowncast<const Network*>(&inNetwork);
     std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
 
     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
 
-    OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
 
     // Get the optimized graph
     Graph& optGraph = optNetObjPtr->GetGraph();
diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp
index d55fca6..3712c7b 100644
--- a/src/armnn/NetworkQuantizer.cpp
+++ b/src/armnn/NetworkQuantizer.cpp
@@ -21,6 +21,7 @@
 #include <armnn/Types.hpp>
 
 #include <armnnUtils/TensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <boost/variant.hpp>
 
@@ -44,12 +45,12 @@
 
 void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer)
 {
-    delete boost::polymorphic_downcast<NetworkQuantizer*>(quantizer);
+    delete PolymorphicDowncast<NetworkQuantizer*>(quantizer);
 }
 
 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
 {
-    const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
+    const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph();
     auto inputLayers = graph.GetInputLayers();
 
     // Walk the input layers of the graph and override the quantization parameters of the one with the given id
@@ -68,7 +69,7 @@
     {
         m_RefineCount = 0;
         m_Ranges.SetDynamicMode(true);
-        const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
+        const Graph& cGraph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
 
         // need to insert Debug layers in the DynamicQuantizationVisitor
         Graph& graph = const_cast<Graph&>(cGraph);
@@ -135,7 +136,7 @@
 
 INetworkPtr NetworkQuantizer::ExportNetwork()
 {
-    const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
+    const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
 
     // Step 1) Walk the graph and populate default min/max values for
     // intermediate tensors, only if Runtime does not exist (created
diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp
index 16e8a60..9c1ac17 100644
--- a/src/armnn/QuantizerVisitor.cpp
+++ b/src/armnn/QuantizerVisitor.cpp
@@ -4,9 +4,11 @@
 //
 
 #include "Network.hpp"
+#include "NetworkQuantizerUtils.hpp"
 #include "QuantizerVisitor.hpp"
 #include "StaticRangeVisitor.hpp"
-#include "NetworkQuantizerUtils.hpp"
+
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -28,7 +30,7 @@
     for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++)
     {
         const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i);
-        const InputSlot* inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+        const InputSlot* inputSlot = PolymorphicDowncast<const InputSlot*>(&srcInputSlot);
         ARMNN_ASSERT(inputSlot);
         const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
 
@@ -70,7 +72,7 @@
 {
     ARMNN_ASSERT(srcLayer);
     const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0);
-    auto inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+    auto inputSlot = PolymorphicDowncast<const InputSlot*>(&srcInputSlot);
     ARMNN_ASSERT(inputSlot);
     const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
 
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index f44606c..32c7c39 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -10,10 +10,10 @@
 
 #include <armnn/backends/IBackendContext.hpp>
 #include <backendsCommon/DynamicBackendUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <iostream>
 
-#include <boost/polymorphic_cast.hpp>
 #include <backends/BackendProfiling.hpp>
 
 using namespace armnn;
@@ -34,7 +34,7 @@
 
 void IRuntime::Destroy(IRuntime* runtime)
 {
-    delete boost::polymorphic_downcast<Runtime*>(runtime);
+    delete PolymorphicDowncast<Runtime*>(runtime);
 }
 
 int Runtime::GenerateNetworkId()
@@ -71,7 +71,7 @@
     }
 
     unique_ptr<LoadedNetwork> loadedNetwork = LoadedNetwork::MakeLoadedNetwork(
-        std::unique_ptr<OptimizedNetwork>(boost::polymorphic_downcast<OptimizedNetwork*>(rawNetwork)),
+        std::unique_ptr<OptimizedNetwork>(PolymorphicDowncast<OptimizedNetwork*>(rawNetwork)),
         errorMessage,
         networkProperties,
         m_ProfilingService);
diff --git a/src/armnn/SubgraphView.cpp b/src/armnn/SubgraphView.cpp
index 446485f..d65c677 100644
--- a/src/armnn/SubgraphView.cpp
+++ b/src/armnn/SubgraphView.cpp
@@ -7,6 +7,7 @@
 #include "Graph.hpp"
 
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <boost/numeric/conversion/cast.hpp>
 #include <utility>
@@ -74,20 +75,20 @@
 SubgraphView::SubgraphView(IConnectableLayer* layer)
     : m_InputSlots{}
     , m_OutputSlots{}
-    , m_Layers{boost::polymorphic_downcast<Layer*>(layer)}
+    , m_Layers{PolymorphicDowncast<Layer*>(layer)}
 {
     unsigned int numInputSlots = layer->GetNumInputSlots();
     m_InputSlots.resize(numInputSlots);
     for (unsigned int i = 0; i < numInputSlots; i++)
     {
-        m_InputSlots.at(i) = boost::polymorphic_downcast<InputSlot*>(&(layer->GetInputSlot(i)));
+        m_InputSlots.at(i) = PolymorphicDowncast<InputSlot*>(&(layer->GetInputSlot(i)));
     }
 
     unsigned int numOutputSlots = layer->GetNumOutputSlots();
     m_OutputSlots.resize(numOutputSlots);
     for (unsigned int i = 0; i < numOutputSlots; i++)
     {
-        m_OutputSlots.at(i) = boost::polymorphic_downcast<OutputSlot*>(&(layer->GetOutputSlot(i)));
+        m_OutputSlots.at(i) = PolymorphicDowncast<OutputSlot*>(&(layer->GetOutputSlot(i)));
     }
 
     CheckSubgraph();
diff --git a/src/armnn/SubgraphViewSelector.cpp b/src/armnn/SubgraphViewSelector.cpp
index fa2fad9..96e75ab 100644
--- a/src/armnn/SubgraphViewSelector.cpp
+++ b/src/armnn/SubgraphViewSelector.cpp
@@ -8,6 +8,7 @@
 
 #include <armnn/utility/Assert.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <algorithm>
 #include <map>
@@ -267,7 +268,7 @@
 
     for (auto inputSlot : layer.GetInputSlots())
     {
-        auto connectedInput = boost::polymorphic_downcast<OutputSlot*>(inputSlot.GetConnection());
+        auto connectedInput = PolymorphicDowncast<OutputSlot*>(inputSlot.GetConnection());
         ARMNN_ASSERT_MSG(connectedInput, "Dangling input slot detected.");
         Layer& inputLayer = connectedInput->GetOwningLayer();
 
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp
index 5df5ec8..b51303b 100644
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -6,6 +6,7 @@
 #include "LayerCloneBase.hpp"
 
 #include <armnn/TypesUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/WorkloadData.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
 
@@ -118,7 +119,7 @@
                 if (inputLayer.GetType() == LayerType::Concat)
                 {
                     // Continue with the substitution if the connected inputs are also concat layers
-                    m_ConcatLayers.push(boost::polymorphic_downcast<ConcatLayer*>(&inputLayer));
+                    m_ConcatLayers.push(PolymorphicDowncast<ConcatLayer*>(&inputLayer));
                 }
                 ++i;
             }
diff --git a/src/armnn/layers/PermuteLayer.hpp b/src/armnn/layers/PermuteLayer.hpp
index 4984cf2..f2057d4 100644
--- a/src/armnn/layers/PermuteLayer.hpp
+++ b/src/armnn/layers/PermuteLayer.hpp
@@ -6,6 +6,8 @@
 
 #include "LayerWithParameters.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 namespace armnn
 {
 
@@ -45,7 +47,7 @@
     bool IsInverse(const Layer& other) const
     {
         return (other.GetType() == LayerType::Permute) &&
-            GetPermutation().IsInverse(boost::polymorphic_downcast<const PermuteLayer*>(&other)->GetPermutation());
+            GetPermutation().IsInverse(PolymorphicDowncast<const PermuteLayer*>(&other)->GetPermutation());
     }
 
     /// Indicates if the other layer received is equal to this one.
@@ -54,7 +56,7 @@
     bool IsEqual(const Layer& other) const
     {
         return (other.GetType() == LayerType::Permute) &&
-               GetPermutation().IsEqual(boost::polymorphic_downcast<const PermuteLayer*>(&other)->GetPermutation());
+               GetPermutation().IsEqual(PolymorphicDowncast<const PermuteLayer*>(&other)->GetPermutation());
     }
 
     void Accept(ILayerVisitor& visitor) const override;
diff --git a/src/armnn/layers/ReshapeLayer.hpp b/src/armnn/layers/ReshapeLayer.hpp
index 4fd5c3e..5e0e883 100644
--- a/src/armnn/layers/ReshapeLayer.hpp
+++ b/src/armnn/layers/ReshapeLayer.hpp
@@ -6,6 +6,8 @@
 
 #include "LayerWithParameters.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 namespace armnn
 {
 
@@ -39,7 +41,7 @@
     bool IsEqual(const Layer& other) const
     {
         return (other.GetType() == LayerType::Reshape) &&
-               m_Param.m_TargetShape == boost::polymorphic_downcast<const ReshapeLayer*>(&other)->m_Param.m_TargetShape;
+               m_Param.m_TargetShape == PolymorphicDowncast<const ReshapeLayer*>(&other)->m_Param.m_TargetShape;
     }
 
     void Accept(ILayerVisitor& visitor) const override;
diff --git a/src/armnn/layers/TransposeLayer.hpp b/src/armnn/layers/TransposeLayer.hpp
index 4906bc9..a668ce8 100644
--- a/src/armnn/layers/TransposeLayer.hpp
+++ b/src/armnn/layers/TransposeLayer.hpp
@@ -6,6 +6,8 @@
 
 #include "LayerWithParameters.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 namespace armnn
 {
 
@@ -43,7 +45,7 @@
     bool IsInverse(const Layer& other) const
     {
         return (other.GetType() == LayerType::Transpose) &&
-            GetPermutation().IsInverse(boost::polymorphic_downcast<const TransposeLayer*>(&other)->GetPermutation());
+            GetPermutation().IsInverse(PolymorphicDowncast<const TransposeLayer*>(&other)->GetPermutation());
     }
 
     /// Indicates if the other layer received is equal to this one.
@@ -52,7 +54,7 @@
     bool IsEqual(const Layer& other) const
     {
         return (other.GetType() == LayerType::Transpose) &&
-               GetPermutation().IsEqual(boost::polymorphic_downcast<const TransposeLayer*>(&other)->GetPermutation());
+               GetPermutation().IsEqual(PolymorphicDowncast<const TransposeLayer*>(&other)->GetPermutation());
     }
 
     void Accept(ILayerVisitor& visitor) const override;
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
index 222414c..ca42cac 100644
--- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
+++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
@@ -7,6 +7,8 @@
 #include "NetworkUtils.hpp"
 #include "Optimization.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 namespace armnn
 {
 namespace optimizations
@@ -15,7 +17,7 @@
 template <typename LayerT>
 inline LayerT* ConvertWeight(Layer* l)
 {
-    LayerT* layer = boost::polymorphic_downcast<LayerT*>(l);
+    LayerT* layer = PolymorphicDowncast<LayerT*>(l);
     if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
          && layer->m_Weight)
     {
diff --git a/src/armnn/optimizations/FoldPadIntoConvolution2d.hpp b/src/armnn/optimizations/FoldPadIntoConvolution2d.hpp
index e598deb..66fffbb 100644
--- a/src/armnn/optimizations/FoldPadIntoConvolution2d.hpp
+++ b/src/armnn/optimizations/FoldPadIntoConvolution2d.hpp
@@ -7,6 +7,8 @@
 
 #include "Optimization.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 namespace armnn
 {
 namespace optimizations
@@ -24,8 +26,8 @@
         ARMNN_ASSERT(base.GetType() == LayerType::Pad);
         ARMNN_ASSERT(child.GetType() == LayerType::Convolution2d);
 
-        PadLayer* padLayer = boost::polymorphic_downcast<PadLayer*>(&base);
-        Convolution2dLayer* convolution2dLayer = boost::polymorphic_downcast<Convolution2dLayer*>(&child);
+        PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(&base);
+        Convolution2dLayer* convolution2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&child);
 
         OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot();
         const TensorInfo& outInfo = child.GetOutputHandler().GetTensorInfo();
diff --git a/src/armnn/optimizations/MovePermuteUp.hpp b/src/armnn/optimizations/MovePermuteUp.hpp
index a7a477b..15c6f61 100644
--- a/src/armnn/optimizations/MovePermuteUp.hpp
+++ b/src/armnn/optimizations/MovePermuteUp.hpp
@@ -6,6 +6,7 @@
 
 #include "Optimization.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnUtils/Permute.hpp>
 
 namespace armnn
@@ -29,7 +30,7 @@
 
             if (CanMovePermuteToInputs(base))
             {
-                auto permute = boost::polymorphic_downcast<PermuteLayer*>(&connection.GetOwningLayer());
+                auto permute = PolymorphicDowncast<PermuteLayer*>(&connection.GetOwningLayer());
                 const PermutationVector& perm = permute->GetPermutation();
 
                 // Inserts an equivalent permute before every input of the base layer.
diff --git a/src/armnn/optimizations/MoveTransposeUp.hpp b/src/armnn/optimizations/MoveTransposeUp.hpp
index 6654306..86c0188 100644
--- a/src/armnn/optimizations/MoveTransposeUp.hpp
+++ b/src/armnn/optimizations/MoveTransposeUp.hpp
@@ -6,6 +6,7 @@
 
 #include "Optimization.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnUtils/Transpose.hpp>
 
 namespace armnn
@@ -29,7 +30,7 @@
 
             if (CanMoveTransposeToInputs(base))
             {
-                auto transpose = boost::polymorphic_downcast<TransposeLayer*>(&connection.GetOwningLayer());
+                auto transpose = PolymorphicDowncast<TransposeLayer*>(&connection.GetOwningLayer());
                 const PermutationVector& perm = transpose->GetPermutation();
 
                 // Inserts an equivalent transpose before every input of the base layer.
diff --git a/src/armnn/optimizations/Optimization.hpp b/src/armnn/optimizations/Optimization.hpp
index efe3930..565f543 100644
--- a/src/armnn/optimizations/Optimization.hpp
+++ b/src/armnn/optimizations/Optimization.hpp
@@ -7,6 +7,8 @@
 #include "Graph.hpp"
 #include "LayersFwd.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 namespace armnn
 {
 
@@ -37,7 +39,7 @@
     {
         if (base.GetType() == LayerEnumOf<BaseType>())
         {
-            Wrapped::Run(graph, *boost::polymorphic_downcast<BaseType*>(&base));
+            Wrapped::Run(graph, *PolymorphicDowncast<BaseType*>(&base));
         }
     }
 
diff --git a/src/armnn/optimizations/OptimizeInversePermutes.hpp b/src/armnn/optimizations/OptimizeInversePermutes.hpp
index 98e87c3..fe0b312 100644
--- a/src/armnn/optimizations/OptimizeInversePermutes.hpp
+++ b/src/armnn/optimizations/OptimizeInversePermutes.hpp
@@ -7,6 +7,7 @@
 #include "Optimization.hpp"
 
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -23,9 +24,9 @@
     {
         IgnoreUnused(graph);
         Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
-        auto child = boost::polymorphic_downcast<PermuteType*>(&connection.GetOwningLayer());
+        auto child = PolymorphicDowncast<PermuteType*>(&connection.GetOwningLayer());
 
-        if (child->IsInverse(*boost::polymorphic_downcast<PermuteType*>(&base)))
+        if (child->IsInverse(*PolymorphicDowncast<PermuteType*>(&base)))
         {
             // Bypass both layers. Child will be removed as it's left unconnected.
             // Base layer will be removed if left unconnected.
diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp
index bac27c0..d836a9c 100644
--- a/src/armnn/optimizations/SquashEqualSiblings.hpp
+++ b/src/armnn/optimizations/SquashEqualSiblings.hpp
@@ -7,6 +7,7 @@
 #include "Optimization.hpp"
 
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -32,7 +33,7 @@
 
             if (baseOutput.GetNumConnections() > 1)
             {
-                auto& comparableChild = *boost::polymorphic_downcast<Comparable*>(&child);
+                auto& comparableChild = *PolymorphicDowncast<Comparable*>(&child);
 
                 Layer* lowestPriorityChild = &child;
                 for (auto&& it : baseOutput.GetConnections())
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 72ad9d4..b6ffd21 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -12,6 +12,7 @@
 
 #include <armnnUtils/DataLayoutIndexed.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/WorkloadData.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
@@ -34,7 +35,7 @@
 std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, const IWorkloadFactory& factory)
 {
     std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
-    BOOST_TEST(workload.get() == boost::polymorphic_downcast<Workload*>(workload.get()),
+    BOOST_TEST(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
                "Cannot convert to derived class");
     std::string reasonIfUnsupported;
     layer.SetBackendId(factory.GetBackendId());
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 30e5c87..8e6af31 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -9,6 +9,7 @@
 
 #include <armnn/TypesUtils.hpp>
 #include <armnn/Exceptions.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <armnn/backends/IBackendInternal.hpp>
 
@@ -274,7 +275,7 @@
             const unsigned int numConnections = outputSlot.GetNumConnections();
             for (unsigned int c = 0; c < numConnections; ++c)
             {
-                auto inputSlot = boost::polymorphic_downcast<const armnn::InputSlot*>(outputSlot.GetConnection(c));
+                auto inputSlot = armnn::PolymorphicDowncast<const armnn::InputSlot*>(outputSlot.GetConnection(c));
                 edges.emplace_back(srcLayer, &inputSlot->GetOwningLayer());
             }
         }
diff --git a/src/armnn/test/GraphUtils.cpp b/src/armnn/test/GraphUtils.cpp
index 1f9bb44..36db900 100644
--- a/src/armnn/test/GraphUtils.cpp
+++ b/src/armnn/test/GraphUtils.cpp
@@ -5,6 +5,8 @@
 
 #include "GraphUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 bool GraphHasNamedLayer(const armnn::Graph& graph, const std::string& name)
 {
     for (auto&& layer : graph)
@@ -52,7 +54,7 @@
     const unsigned int numConnections = outputSlot.GetNumConnections();
     for (unsigned int c = 0; c < numConnections; ++c)
     {
-        auto inputSlot = boost::polymorphic_downcast<const armnn::InputSlot*>(outputSlot.GetConnection(c));
+        auto inputSlot = armnn::PolymorphicDowncast<const armnn::InputSlot*>(outputSlot.GetConnection(c));
         if (inputSlot->GetOwningLayer().GetNameStr() == destLayer->GetNameStr() &&
             inputSlot->GetSlotIndex() == destSlot)
         {
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index c7883ff..ca85e11 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -15,6 +15,7 @@
 #include <armnn/LayerVisitorBase.hpp>
 
 #include <armnnUtils/FloatingPointConverter.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/IBackendInternal.hpp>
@@ -695,7 +696,7 @@
                              const char* name = nullptr) override
         {
             IgnoreUnused(id, name);
-            auto inputLayer = boost::polymorphic_downcast<const InputLayer*>(layer);
+            auto inputLayer = PolymorphicDowncast<const InputLayer*>(layer);
             BOOST_TEST((inputLayer->GetBackendId() == "MockBackend"));
         }
 
@@ -704,7 +705,7 @@
                               const char* name = nullptr) override
         {
             IgnoreUnused(id, name);
-            auto outputLayer = boost::polymorphic_downcast<const OutputLayer*>(layer);
+            auto outputLayer = PolymorphicDowncast<const OutputLayer*>(layer);
             BOOST_TEST((outputLayer->GetBackendId() == "MockBackend"));
         }
 
@@ -713,7 +714,7 @@
                                   const char* name = nullptr) override
         {
             IgnoreUnused(activationDescriptor, name);
-            auto activation = boost::polymorphic_downcast<const ActivationLayer*>(layer);
+            auto activation = PolymorphicDowncast<const ActivationLayer*>(layer);
             BOOST_TEST((activation->GetBackendId() == "CustomBackend"));
         }
     };
@@ -765,7 +766,7 @@
 
     auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
 
-    OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
 
     // Get the optimized graph
     Graph& optGraph = optNetObjPtr->GetGraph();
diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp
index ebdfbc5..669703c 100644
--- a/src/armnn/test/QuantizerTest.cpp
+++ b/src/armnn/test/QuantizerTest.cpp
@@ -15,6 +15,7 @@
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnQuantizer/INetworkQuantizer.hpp>
 #include <QuantizeHelper.hpp>
 
@@ -190,7 +191,7 @@
 
 void VisitLayersTopologically(const INetwork* inputNetwork, ILayerVisitor& visitor)
 {
-    auto network = boost::polymorphic_downcast<const Network*>(inputNetwork);
+    auto network = PolymorphicDowncast<const Network*>(inputNetwork);
     auto graph = network->GetGraph().TopologicalSort();
 
     VisitLayers(graph, visitor);
@@ -346,7 +347,7 @@
 {
     INetworkPtr network = CreateNetworkWithInputOutputLayers();
 
-    armnn::TensorInfo tensorInfo = GetInputTensorInfo(boost::polymorphic_downcast<const Network*>(network.get()));
+    armnn::TensorInfo tensorInfo = GetInputTensorInfo(PolymorphicDowncast<const Network*>(network.get()));
 
     // Outliers -56 and 98
     std::vector<float> inputData({0, 0, 0, -56, 98, 0, 0, 0});
@@ -3033,12 +3034,12 @@
     reLULayer2->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32));
     addLayer1->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32));
 
-    TestConnectionPreservation visitor1(boost::polymorphic_downcast<const Network*>(network.get())->GetGraph());
+    TestConnectionPreservation visitor1(PolymorphicDowncast<const Network*>(network.get())->GetGraph());
     VisitLayersTopologically(network.get(), visitor1);
 
     armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get());
 
-    armnn::TensorInfo tensorInfo = GetInputTensorInfo(boost::polymorphic_downcast<const Network*>(network.get()));
+    armnn::TensorInfo tensorInfo = GetInputTensorInfo(PolymorphicDowncast<const Network*>(network.get()));
 
     std::vector<float> inputData({0, 2, 0, 4});
     armnn::ConstTensor inputTensor(tensorInfo, inputData.data());
@@ -3049,7 +3050,7 @@
 
     INetworkPtr quantNetwork = quantizer->ExportNetwork();
 
-    TestConnectionPreservation visitor2(boost::polymorphic_downcast<const Network*>(quantNetwork.get())->GetGraph());
+    TestConnectionPreservation visitor2(PolymorphicDowncast<const Network*>(quantNetwork.get())->GetGraph());
     VisitLayersTopologically(quantNetwork.get(), visitor2);
 }
 
diff --git a/src/armnn/test/UtilityTests.cpp b/src/armnn/test/UtilityTests.cpp
index 7be5c95..d5779c1 100644
--- a/src/armnn/test/UtilityTests.cpp
+++ b/src/armnn/test/UtilityTests.cpp
@@ -45,12 +45,12 @@
     Base* base1 = &child1;
     auto ptr1 = dynamic_cast<Child1*>(base1);
     BOOST_CHECK(ptr1 != nullptr);
-    BOOST_CHECK_NO_THROW(polymorphic_downcast<Child1*>(base1));
-    BOOST_CHECK(polymorphic_downcast<Child1*>(base1) == ptr1);
+    BOOST_CHECK_NO_THROW(armnn::PolymorphicDowncast<Child1*>(base1));
+    BOOST_CHECK(armnn::PolymorphicDowncast<Child1*>(base1) == ptr1);
 
     auto ptr2 = dynamic_cast<Child2*>(base1);
     BOOST_CHECK(ptr2 == nullptr);
-    BOOST_CHECK_THROW(polymorphic_downcast<Child2*>(base1), std::bad_cast);
+    BOOST_CHECK_THROW(armnn::PolymorphicDowncast<Child2*>(base1), std::bad_cast);
 
     armnn::IgnoreUnused(ptr1, ptr2);
 }
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 2975675..42b0052 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -21,10 +21,7 @@
 
 #include <boost/filesystem.hpp>
 #include <boost/format.hpp>
-#include <boost/format.hpp>
-#include <boost/format.hpp>
 #include <boost/numeric/conversion/cast.hpp>
-#include <boost/polymorphic_cast.hpp>
 
 #include <fstream>
 #include <algorithm>
diff --git a/src/armnnTfLiteParser/test/DetectionPostProcess.cpp b/src/armnnTfLiteParser/test/DetectionPostProcess.cpp
index f12b2b9..cb4173a 100644
--- a/src/armnnTfLiteParser/test/DetectionPostProcess.cpp
+++ b/src/armnnTfLiteParser/test/DetectionPostProcess.cpp
@@ -4,16 +4,16 @@
 //
 
 #include "../TfLiteParser.hpp"
-
-#include <boost/test/unit_test.hpp>
-#include "test/GraphUtils.hpp"
-
 #include "ParserFlatbuffersFixture.hpp"
 #include "ParserPrototxtFixture.hpp"
 #include "ParserHelper.hpp"
+#include "test/GraphUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <QuantizeHelper.hpp>
 
+#include <boost/test/unit_test.hpp>
+
 BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
 
 struct DetectionPostProcessFixture : ParserFlatbuffersFixture
@@ -241,7 +241,7 @@
 
     auto optimized = Optimize(*network, { armnn::Compute::CpuRef }, m_Runtime->GetDeviceSpec());
 
-    auto optimizedNetwork = boost::polymorphic_downcast<armnn::OptimizedNetwork*>(optimized.get());
+    auto optimizedNetwork = armnn::PolymorphicDowncast<armnn::OptimizedNetwork*>(optimized.get());
     auto graph = optimizedNetwork->GetGraph();
 
     // Check the number of layers in the graph
diff --git a/src/armnnTfLiteParser/test/Unsupported.cpp b/src/armnnTfLiteParser/test/Unsupported.cpp
index 21392ac..dd77bca 100644
--- a/src/armnnTfLiteParser/test/Unsupported.cpp
+++ b/src/armnnTfLiteParser/test/Unsupported.cpp
@@ -8,10 +8,10 @@
 
 #include <armnn/LayerVisitorBase.hpp>
 #include <armnn/utility/Assert.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <layers/StandInLayer.hpp>
 
-#include <boost/polymorphic_cast.hpp>
 #include <boost/test/unit_test.hpp>
 
 #include <sstream>
@@ -47,7 +47,7 @@
         BOOST_CHECK(descriptor.m_NumOutputs    == numOutputs);
         BOOST_CHECK(layer->GetNumOutputSlots() == numOutputs);
 
-        const StandInLayer* standInLayer = boost::polymorphic_downcast<const StandInLayer*>(layer);
+        const StandInLayer* standInLayer = PolymorphicDowncast<const StandInLayer*>(layer);
         for (unsigned int i = 0u; i < numInputs; ++i)
         {
             const OutputSlot* connectedSlot = standInLayer->GetInputSlot(i).GetConnectedOutputSlot();
diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp
index 491a964..7a7c5a4 100755
--- a/src/armnnTfParser/TfParser.cpp
+++ b/src/armnnTfParser/TfParser.cpp
@@ -12,6 +12,7 @@
 #include <armnnUtils/DataLayoutIndexed.hpp>
 #include <armnnUtils/Transpose.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <GraphTopologicalSort.hpp>
 #include <ParserHelper.hpp>
@@ -22,10 +23,8 @@
 #include <tensorflow/core/framework/graph.pb.h>
 
 #include <boost/format.hpp>
-#include <boost/format.hpp>
 #include <boost/numeric/conversion/cast.hpp>
-#include <boost/polymorphic_cast.hpp>
-
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <numeric>
 
 using namespace armnnUtils;
@@ -1220,7 +1219,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<float>* weightNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
 
     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
@@ -1364,7 +1363,7 @@
     }
 
     ParsedConstTfOperation<float>* weightNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
 
     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
@@ -1578,7 +1577,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<float>* scaleNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
 
     if (!HasParsedConstTensor<float>(inputs[2].m_IndexedValue->GetNode().name()))
     {
@@ -1592,7 +1591,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<float>* offsetNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[2].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<float> *>(inputs[2].m_IndexedValue);
 
     if (!HasParsedConstTensor<float>(inputs[3].m_IndexedValue->GetNode().name()))
     {
@@ -1606,7 +1605,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<float>* meanNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[3].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<float> *>(inputs[3].m_IndexedValue);
 
     if (!HasParsedConstTensor<float>(inputs[4].m_IndexedValue->GetNode().name()))
     {
@@ -1620,7 +1619,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<float>* varianceNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[4].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<float> *>(inputs[4].m_IndexedValue);
 
     const std::string dataFormat = ReadOptionalNodeStringAttribute(nodeDef, "data_format", "NHWC");
     CHECK_DATA_FORMAT(nodeDef, dataFormat, "FusedBatchNorm");
@@ -1689,7 +1688,7 @@
             if (HasParsedConstTensor<float>(inputs[alphaLayerIndex].m_IndexedValue->GetNode().name()))
             {
                 ParsedConstTfOperation<float>* alpha =
-                    boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(
+                    PolymorphicDowncast<ParsedConstTfOperation<float> *>(
                         inputs[alphaLayerIndex].m_IndexedValue);
 
                 std::vector<float> const_data;
@@ -2079,7 +2078,7 @@
 
     const auto  constInput         = inputs[GetConstInputIndex(inputs)];
     auto*       permuteVectorInput =
-        boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(constInput.m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(constInput.m_IndexedValue);
     const auto& permuteVectorInfo  = permuteVectorInput->GetTensorInfo();
 
     std::vector<int32_t> permuteVectorData;
@@ -2177,7 +2176,7 @@
 
     }
     ParsedConstTfOperation<int32_t>* paddingTensorOp =
-            boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
+            PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
 
     std::vector<int32_t> paddingTensorData;
     ConstTensor paddingTensor = paddingTensorOp->GetConstTensor(paddingTensorData);
@@ -2244,7 +2243,7 @@
     unsigned int index = GetConstInputIndex(inputs);
     // Get the axis tensor data
     ParsedConstTfOperation<int32_t>* shapeNode =
-            boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[index].m_IndexedValue);
+            PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(inputs[index].m_IndexedValue);
 
     std::vector<int32_t> axisTensorData;
     shapeNode->GetConstTensor(axisTensorData);
@@ -2377,7 +2376,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<int32_t>* shapeNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
 
     armnn::IOutputSlot& prevLayerOutputSlot = inputNode->ResolveArmnnOutputSlot(inputs[0].m_Index);
     TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
@@ -2415,7 +2414,7 @@
                     % CHECK_LOCATION().AsString()));
     }
     ParsedConstTfOperation<int32_t>* sizeNode =
-        boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
+        PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
 
     // Checks the align_corners attribute is not set.
     if (ReadOptionalNodeBoolAttribute(nodeDef, "align_corners", false))
@@ -2630,7 +2629,7 @@
     bool keepDims = ReadMandatoryNodeBoolAttribute(nodeDef, "keep_dims");
 
     ParsedConstTfOperation<int32_t>* axisNode =
-             boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
+             PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
 
     const TensorInfo& axisTensorInfo = axisNode->GetTensorInfo();
 
@@ -2810,7 +2809,7 @@
     unsigned int index = GetConstInputIndex(inputs);
     // Get the axis tensor data
     ParsedConstTfOperation<int32_t>* shapeNode =
-                boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[index].m_IndexedValue);
+                PolymorphicDowncast<ParsedConstTfOperation<int32_t>*>(inputs[index].m_IndexedValue);
 
     std::vector<int32_t> axisTensorData;
     shapeNode->GetConstTensor(axisTensorData);
@@ -2913,17 +2912,17 @@
     std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, numInputs);
 
     ParsedConstTfOperation<int32_t>* beginNode =
-            boost::polymorphic_downcast<ParsedConstTfOperation<int32_t> *>(inputs[1].m_IndexedValue);
+            PolymorphicDowncast<ParsedConstTfOperation<int32_t> *>(inputs[1].m_IndexedValue);
     std::vector<int32_t> beginTensorData;
     beginNode->GetConstTensor(beginTensorData);
 
     ParsedConstTfOperation<int32_t>* endNode =
-            boost::polymorphic_downcast<ParsedConstTfOperation<int32_t> *>(inputs[2].m_IndexedValue);
+            PolymorphicDowncast<ParsedConstTfOperation<int32_t> *>(inputs[2].m_IndexedValue);
     std::vector<int32_t> endTensorData;
     endNode->GetConstTensor(endTensorData);
 
     ParsedConstTfOperation<int32_t>* stridesNode =
-            boost::polymorphic_downcast<ParsedConstTfOperation<int32_t> *>(inputs[3].m_IndexedValue);
+            PolymorphicDowncast<ParsedConstTfOperation<int32_t> *>(inputs[3].m_IndexedValue);
     std::vector<int32_t> stridesTensorData;
     stridesNode->GetConstTensor(stridesTensorData);
 
@@ -3311,11 +3310,11 @@
         // Finds our inputs.
         if (HasParsedConstTensor<float>(addInputs[0].m_IndexedValue->GetNode().name()))
         {
-            biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[0].m_IndexedValue);
+            biasNode = PolymorphicDowncast<ParsedConstTfOperation<float>*>(addInputs[0].m_IndexedValue);
         }
         else if (HasParsedConstTensor<float>(addInputs[1].m_IndexedValue->GetNode().name()))
         {
-            biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[1].m_IndexedValue);
+            biasNode = PolymorphicDowncast<ParsedConstTfOperation<float>*>(addInputs[1].m_IndexedValue);
         }
         else
         {
@@ -3339,13 +3338,13 @@
     std::vector<OutputOfParsedTfOperation> mulInputs = GetInputParsedTfOperationsChecked(matMulNodeDef, 2);
     if (HasParsedConstTensor<float>(mulInputs[0].m_IndexedValue->GetNode().name()))
     {
-        weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[0].m_IndexedValue);
+        weightNode = PolymorphicDowncast<ParsedConstTfOperation<float>*>(mulInputs[0].m_IndexedValue);
         inputNode = mulInputs[1].m_IndexedValue;
         inputIdx = mulInputs[1].m_Index;
     }
     else if (HasParsedConstTensor<float>(mulInputs[1].m_IndexedValue->GetNode().name()))
     {
-        weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[1].m_IndexedValue);
+        weightNode = PolymorphicDowncast<ParsedConstTfOperation<float>*>(mulInputs[1].m_IndexedValue);
         inputNode = mulInputs[0].m_IndexedValue;
         inputIdx = mulInputs[0].m_Index;
     }
diff --git a/src/armnnTfParser/test/Assert.cpp b/src/armnnTfParser/test/Assert.cpp
index 111f158..b978f02 100644
--- a/src/armnnTfParser/test/Assert.cpp
+++ b/src/armnnTfParser/test/Assert.cpp
@@ -7,6 +7,8 @@
 #include "ParserPrototxtFixture.hpp"
 #include "test/GraphUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <boost/test/unit_test.hpp>
 
 BOOST_AUTO_TEST_SUITE(TensorflowParser)
@@ -100,7 +102,7 @@
 {
     auto optimized = SetupOptimizedNetwork({ { "Placeholder", { 1, 1, 1, 4 } } }, { "Add" });
 
-    auto optimizedNetwork = boost::polymorphic_downcast<armnn::OptimizedNetwork*>(optimized.get());
+    auto optimizedNetwork = armnn::PolymorphicDowncast<armnn::OptimizedNetwork*>(optimized.get());
     auto graph = optimizedNetwork->GetGraph();
 
     BOOST_TEST((graph.GetNumInputs() == 1));
@@ -256,7 +258,7 @@
                                              { "Input1", { 1, 1, 2, 2 } } },
                                            { "Output" });
 
-    auto optimizedNetwork = boost::polymorphic_downcast<armnn::OptimizedNetwork*>(optimized.get());
+    auto optimizedNetwork = armnn::PolymorphicDowncast<armnn::OptimizedNetwork*>(optimized.get());
     auto graph = optimizedNetwork->GetGraph();
 
     BOOST_TEST((graph.GetNumInputs() == 2));
diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp
index b43eaf8..aaadc94 100644
--- a/src/backends/aclCommon/BaseMemoryManager.cpp
+++ b/src/backends/aclCommon/BaseMemoryManager.cpp
@@ -10,7 +10,6 @@
 #include "arm_compute/runtime/OffsetLifetimeManager.h"
 #endif
 
-#include <boost/polymorphic_cast.hpp>
 
 namespace armnn
 {
diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
index 83cec2a..b14e148 100644
--- a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
+++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
@@ -6,6 +6,7 @@
 
 #include <test/CreateWorkload.hpp>
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 #include <reference/RefWorkloadFactory.hpp>
 #include <reference/RefTensorHandle.hpp>
@@ -93,8 +94,8 @@
     MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData();
     BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1);
-    auto inputHandle1  = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor1.m_Inputs[0]);
-    auto outputHandle1 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]);
+    auto inputHandle1  = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor1.m_Inputs[0]);
+    auto outputHandle1 = PolymorphicDowncast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]);
     BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
     BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(outputHandle1, {2, 3}));
 
@@ -102,8 +103,8 @@
     MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData();
     BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1);
-    auto inputHandle2  = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]);
-    auto outputHandle2 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor2.m_Outputs[0]);
+    auto inputHandle2  = PolymorphicDowncast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]);
+    auto outputHandle2 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor2.m_Outputs[0]);
     BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(inputHandle2, {2, 3}));
     BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
 }
diff --git a/src/backends/backendsCommon/MemCopyWorkload.cpp b/src/backends/backendsCommon/MemCopyWorkload.cpp
index 572c0fc..c1aa79c 100644
--- a/src/backends/backendsCommon/MemCopyWorkload.cpp
+++ b/src/backends/backendsCommon/MemCopyWorkload.cpp
@@ -8,7 +8,7 @@
 #include <backendsCommon/MemCopyWorkload.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cstring>
 
@@ -27,9 +27,9 @@
 
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast<SrcTensorHandleType*>(
+        SrcTensorHandleType* const srcTensorHandle = PolymorphicDowncast<SrcTensorHandleType*>(
             descriptor.m_Inputs[i]);
-        DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast<DstTensorHandleType*>(
+        DstTensorHandleType* const dstTensorHandle = PolymorphicDowncast<DstTensorHandleType*>(
             descriptor.m_Outputs[i]);
 
         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index a7e8576..c55c70a 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -10,6 +10,7 @@
 #include <armnn/LayerSupport.hpp>
 #include <armnn/ILayerSupport.hpp>
 #include <armnn/BackendRegistry.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/WorkloadFactory.hpp>
 #include <armnn/backends/IBackendInternal.hpp>
@@ -18,7 +19,6 @@
 
 #include <backendsCommon/test/WorkloadTestUtils.hpp>
 
-#include <boost/cast.hpp>
 #include <boost/iterator/transform_iterator.hpp>
 
 #include <cstring>
@@ -49,7 +49,7 @@
 {
     Optional<std::string&> reason = outReasonIfUnsupported;
     bool result;
-    const Layer& layer = *(boost::polymorphic_downcast<const Layer*>(&connectableLayer));
+    const Layer& layer = *(PolymorphicDowncast<const Layer*>(&connectableLayer));
 
     auto const& backendRegistry = BackendRegistryInstance();
     if (!backendRegistry.IsBackendRegistered(backendId))
@@ -70,7 +70,7 @@
     {
         case LayerType::Activation:
         {
-            auto cLayer = boost::polymorphic_downcast<const ActivationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ActivationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsActivationSupported(
@@ -94,7 +94,7 @@
         }
         case LayerType::ArgMinMax:
         {
-            auto cLayer = boost::polymorphic_downcast<const ArgMinMaxLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ArgMinMaxLayer*>(&layer);
             const ArgMinMaxDescriptor& descriptor = cLayer->GetParameters();
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -108,7 +108,7 @@
         }
         case LayerType::BatchNormalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const BatchNormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const BatchNormalizationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             const TensorInfo& mean = cLayer->m_Mean->GetTensorInfo();
@@ -130,7 +130,7 @@
         {
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
-            auto cLayer = boost::polymorphic_downcast<const BatchToSpaceNdLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const BatchToSpaceNdLayer*>(&layer);
 
             result = layerSupportObject->IsBatchToSpaceNdSupported(OverrideDataType(input, dataType),
                                                                    OverrideDataType(output, dataType),
@@ -140,7 +140,7 @@
         }
         case LayerType::Comparison:
         {
-            auto cLayer = boost::polymorphic_downcast<const ComparisonLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ComparisonLayer*>(&layer);
 
             const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
@@ -189,7 +189,7 @@
         }
         case LayerType::Convolution2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const Convolution2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const Convolution2dLayer*>(&layer);
 
             const TensorInfo input  = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
                                                        dataType);
@@ -227,7 +227,7 @@
         }
         case LayerType::DepthToSpace:
         {
-            auto cLayer = boost::polymorphic_downcast<const DepthToSpaceLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const DepthToSpaceLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -240,7 +240,7 @@
         }
         case LayerType::DepthwiseConvolution2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const DepthwiseConvolution2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const DepthwiseConvolution2dLayer*>(&layer);
             const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
                                                        dataType);
             const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType);
@@ -277,7 +277,7 @@
         }
         case LayerType::DetectionPostProcess:
         {
-            auto cLayer = boost::polymorphic_downcast<const DetectionPostProcessLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const DetectionPostProcessLayer*>(&layer);
             const TensorInfo& boxEncodings = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& scores = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
             const TensorInfo& anchors = cLayer->m_Anchors->GetTensorInfo();
@@ -301,7 +301,7 @@
         }
         case LayerType::ElementwiseUnary:
         {
-            auto cLayer = boost::polymorphic_downcast<const ElementwiseUnaryLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ElementwiseUnaryLayer*>(&layer);
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -314,7 +314,7 @@
         }
         case LayerType::FakeQuantization:
         {
-            auto cLayer = boost::polymorphic_downcast<const FakeQuantizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const FakeQuantizationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             result = layerSupportObject->IsFakeQuantizationSupported(OverrideDataType(input, dataType),
                                                                      cLayer->GetParameters(),
@@ -332,7 +332,7 @@
         }
         case LayerType::FullyConnected:
         {
-            auto cLayer = boost::polymorphic_downcast<const FullyConnectedLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const FullyConnectedLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             ARMNN_ASSERT(cLayer->m_Weight.get() != nullptr);
@@ -414,7 +414,7 @@
         }
         case LayerType::InstanceNormalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const InstanceNormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const InstanceNormalizationLayer*>(&layer);
             const InstanceNormalizationDescriptor& descriptor = cLayer->GetParameters();
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -429,7 +429,7 @@
         }
         case LayerType::L2Normalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const L2NormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const L2NormalizationLayer*>(&layer);
             const L2NormalizationDescriptor& descriptor = cLayer->GetParameters();
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -444,7 +444,7 @@
         }
         case LayerType::LogSoftmax:
         {
-            auto cLayer = boost::polymorphic_downcast<const LogSoftmaxLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const LogSoftmaxLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -457,7 +457,7 @@
         }
         case LayerType::Lstm:
         {
-            auto cLayer = boost::polymorphic_downcast<const LstmLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const LstmLayer*>(&layer);
             const LstmDescriptor& descriptor = cLayer->GetParameters();
 
             // All inputs.
@@ -645,7 +645,7 @@
         }
         case LayerType::Concat:
         {
-            auto cLayer = boost::polymorphic_downcast<const ConcatLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ConcatLayer*>(&layer);
 
             // Get vector of all inputs.
             auto getTensorInfo = [&dataType](const InputSlot& slot)
@@ -685,7 +685,7 @@
         }
         case LayerType::Normalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const NormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const NormalizationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsNormalizationSupported(OverrideDataType(input, dataType),
@@ -702,7 +702,7 @@
         }
         case LayerType::Permute:
         {
-            auto cLayer = boost::polymorphic_downcast<const PermuteLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const PermuteLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsPermuteSupported(OverrideDataType(input, dataType),
@@ -713,7 +713,7 @@
         }
         case LayerType::Pad:
         {
-            auto cLayer = boost::polymorphic_downcast<const PadLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const PadLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsPadSupported(
@@ -725,7 +725,7 @@
         }
         case LayerType::Pooling2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const Pooling2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const Pooling2dLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsPooling2dSupported(OverrideDataType(input, dataType),
@@ -736,7 +736,7 @@
         }
         case LayerType::PreCompiled:
         {
-            auto cLayer = boost::polymorphic_downcast<const PreCompiledLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const PreCompiledLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             result = layerSupportObject->IsPreCompiledSupported(OverrideDataType(input, dataType),
                                                                 cLayer->GetParameters(),
@@ -752,7 +752,7 @@
         }
         case LayerType::QLstm:
         {
-            auto cLayer = boost::polymorphic_downcast<const QLstmLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const QLstmLayer*>(&layer);
             const QLstmDescriptor& descriptor = cLayer->GetParameters();
 
             // Inputs
@@ -840,7 +840,7 @@
         }
         case LayerType::QuantizedLstm:
         {
-            auto cLayer = boost::polymorphic_downcast<const QuantizedLstmLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const QuantizedLstmLayer*>(&layer);
 
             // Inputs
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -904,7 +904,7 @@
         }
         case LayerType::Reshape:
         {
-            auto cLayer = boost::polymorphic_downcast<const ReshapeLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ReshapeLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsReshapeSupported(OverrideDataType(input, dataType),
@@ -915,7 +915,7 @@
         }
         case LayerType::Resize:
         {
-            auto cLayer = boost::polymorphic_downcast<const ResizeLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ResizeLayer*>(&layer);
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsResizeSupported(OverrideDataType(input, dataType),
@@ -926,7 +926,7 @@
         }
         case LayerType::Slice:
         {
-            auto cLayer = boost::polymorphic_downcast<const SliceLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SliceLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -939,7 +939,7 @@
         }
         case LayerType::Softmax:
         {
-            auto cLayer = boost::polymorphic_downcast<const SoftmaxLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SoftmaxLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsSoftmaxSupported(OverrideDataType(input, dataType),
@@ -950,7 +950,7 @@
         }
         case LayerType::SpaceToBatchNd:
         {
-            auto cLayer = boost::polymorphic_downcast<const SpaceToBatchNdLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SpaceToBatchNdLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsSpaceToBatchNdSupported(OverrideDataType(input, dataType),
@@ -961,7 +961,7 @@
         }
         case LayerType::SpaceToDepth:
         {
-            auto cLayer = boost::polymorphic_downcast<const SpaceToDepthLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SpaceToDepthLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -974,7 +974,7 @@
         }
         case LayerType::Splitter:
         {
-            auto cLayer = boost::polymorphic_downcast<const SplitterLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SplitterLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
 
             // Get vector of all outputs.
@@ -996,7 +996,7 @@
         }
         case LayerType::Stack:
         {
-            auto cLayer = boost::polymorphic_downcast<const StackLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const StackLayer*>(&layer);
 
             // Get vector of all inputs.
             auto getTensorInfo = [&dataType](const InputSlot& slot)
@@ -1023,7 +1023,7 @@
         }
         case LayerType::StandIn:
         {
-            auto cLayer = boost::polymorphic_downcast<const StandInLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const StandInLayer*>(&layer);
 
             // Get vector of all inputs.
             auto getTensorInfoIn = [&dataType](const InputSlot& slot)
@@ -1064,7 +1064,7 @@
         }
         case LayerType::StridedSlice:
         {
-            auto cLayer = boost::polymorphic_downcast<const StridedSliceLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const StridedSliceLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsStridedSliceSupported(OverrideDataType(input, dataType),
@@ -1100,7 +1100,7 @@
         }
         case LayerType::Mean:
         {
-            auto cLayer = boost::polymorphic_downcast<const MeanLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const MeanLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsMeanSupported(
@@ -1134,7 +1134,7 @@
         }
         case LayerType::Transpose:
         {
-            auto cLayer = boost::polymorphic_downcast<const TransposeLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const TransposeLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsTransposeSupported(OverrideDataType(input, dataType),
@@ -1145,7 +1145,7 @@
         }
         case LayerType::TransposeConvolution2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const TransposeConvolution2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const TransposeConvolution2dLayer*>(&layer);
 
             const TensorInfo input  = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
                                                        dataType);
@@ -1188,7 +1188,7 @@
                                         Optional<DataType> dataType,
                                         std::string& outReasonIfUnsupported)
 {
-    auto layer = boost::polymorphic_downcast<const Layer*>(&connectableLayer);
+    auto layer = PolymorphicDowncast<const Layer*>(&connectableLayer);
     return IsLayerSupported(layer->GetBackendId(), connectableLayer, dataType, outReasonIfUnsupported);
 }
 
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
index bd5e81e..37915cf 100644
--- a/src/backends/backendsCommon/WorkloadUtils.cpp
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -7,6 +7,8 @@
 
 #include <armnn/Utils.hpp>
 
+#include <boost/numeric/conversion/cast.hpp>
+
 namespace armnn
 {
 
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index a4da924..354362e 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -8,15 +8,13 @@
 #include "CpuTensorHandle.hpp"
 
 #include <armnn/backends/ITensorHandle.hpp>
-
 #include <armnn/Tensor.hpp>
-
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnUtils/Permute.hpp>
 
 #include <Half.hpp>
 #include <Profiling.hpp>
 
-#include <boost/cast.hpp>
 
 namespace armnn
 {
@@ -198,9 +196,9 @@
     for (unsigned int i = 0; i < numInputs; ++i)
     {
         SrcTensorHandleType* const srcTensorHandle =
-            boost::polymorphic_downcast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
+            PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
         DstTensorHandleType* const dstTensorHandle =
-            boost::polymorphic_downcast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
+            PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
 
         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
     }
diff --git a/src/backends/backendsCommon/test/DynamicBackendTests.hpp b/src/backends/backendsCommon/test/DynamicBackendTests.hpp
index 1276776..6371e53 100644
--- a/src/backends/backendsCommon/test/DynamicBackendTests.hpp
+++ b/src/backends/backendsCommon/test/DynamicBackendTests.hpp
@@ -6,15 +6,12 @@
 #pragma once
 
 #include <armnn/BackendRegistry.hpp>
-#include <armnn/ILayerSupport.hpp>
-
 #include <armnn/backends/DynamicBackend.hpp>
-
-#include <backendsCommon/DynamicBackendUtils.hpp>
+#include <armnn/ILayerSupport.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
-
+#include <backendsCommon/DynamicBackendUtils.hpp>
 #include <reference/workloads/RefConvolution2dWorkload.hpp>
-
 #include <Runtime.hpp>
 
 #include <string>
@@ -1212,7 +1209,7 @@
     IRuntime::CreationOptions creationOptions;
     IRuntimePtr runtime = IRuntime::Create(creationOptions);
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.empty());
 
@@ -1253,7 +1250,7 @@
         BOOST_TEST((backendIds.find(expectedRegisteredbackendId) != backendIds.end()));
     }
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size() == expectedRegisteredbackendIds.size());
     for (const BackendId& expectedRegisteredbackendId : expectedRegisteredbackendIds)
@@ -1294,7 +1291,7 @@
         BOOST_TEST((backendIds.find(expectedRegisteredbackendId) != backendIds.end()));
     }
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size() == expectedRegisteredbackendIds.size());
     for (const BackendId& expectedRegisteredbackendId : expectedRegisteredbackendIds)
@@ -1323,7 +1320,7 @@
     const BackendRegistry& backendRegistry = BackendRegistryInstance();
     BOOST_TEST(backendRegistry.Size() == 0);
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.empty());
 }
@@ -1343,7 +1340,7 @@
     const BackendRegistry& backendRegistry = BackendRegistryInstance();
     BOOST_TEST(backendRegistry.Size() == 0);
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.empty());
 }
@@ -1382,7 +1379,7 @@
     BackendIdSet backendIds = backendRegistry.GetBackendIds();
     BOOST_TEST((backendIds.find("CpuRef") != backendIds.end()));
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size() == 1);
     BOOST_TEST((supportedBackendIds.find("CpuRef") != supportedBackendIds.end()));
@@ -1433,7 +1430,7 @@
     // Create a convolution workload with the dummy settings
     auto workload = referenceWorkloadFactory->CreateConvolution2d(convolution2dQueueDescriptor, workloadInfo);
     BOOST_TEST((workload != nullptr));
-    BOOST_TEST(workload.get() == boost::polymorphic_downcast<RefConvolution2dWorkload*>(workload.get()));
+    BOOST_TEST(workload.get() == PolymorphicDowncast<RefConvolution2dWorkload*>(workload.get()));
 }
 
 #endif
@@ -1453,7 +1450,7 @@
     BackendIdSet backendIds = backendRegistry.GetBackendIds();
     BOOST_TEST((backendIds.find("SampleDynamic") != backendIds.end()));
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size()>= 1);
     BOOST_TEST((supportedBackendIds.find("SampleDynamic") != supportedBackendIds.end()));
diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
index 3aebe3e..c972b4b 100644
--- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
+++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
@@ -3,16 +3,20 @@
 // SPDX-License-Identifier: MIT
 //
 
-#include <boost/test/unit_test.hpp>
-#include <Graph.hpp>
-#include <SubgraphView.hpp>
-#include <SubgraphViewSelector.hpp>
-#include <armnn/backends/OptimizationViews.hpp>
-#include <Network.hpp>
 
 #include "CommonTestUtils.hpp"
 #include "MockBackend.hpp"
 
+#include <armnn/backends/OptimizationViews.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <Graph.hpp>
+#include <Network.hpp>
+#include <SubgraphView.hpp>
+#include <SubgraphViewSelector.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+
 using namespace armnn;
 
 void CheckLayers(Graph& graph)
@@ -208,7 +212,7 @@
     BOOST_CHECK(optNet);
 
     // Check the optimised graph
-    OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
     CheckLayers(optNetObjPtr->GetGraph());
 }
 
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index f612c37..bfe93bd 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -8,14 +8,13 @@
 
 #include <armnn/Logging.hpp>
 #include <armnn/utility/Assert.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/core/CL/OpenCL.h>
 #include <arm_compute/core/CL/CLKernelLibrary.h>
 #include <arm_compute/runtime/CL/CLScheduler.h>
 #include <arm_compute/runtime/CL/CLTunerTypes.h>
 
-#include <boost/polymorphic_cast.hpp>
-
 namespace armnn
 {
 
@@ -161,7 +160,7 @@
     bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
     if (useLegacyTunerAPI)
     {
-        auto clTunerParams = boost::polymorphic_downcast<ClTunedParameters*>(
+        auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>(
                                 options.m_GpuAccTunedParameters.get());
         tuner = &clTunerParams->m_Tuner;
 
diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp
index 9df3f1a..8af97f4 100644
--- a/src/backends/cl/ClTensorHandleFactory.cpp
+++ b/src/backends/cl/ClTensorHandleFactory.cpp
@@ -7,12 +7,12 @@
 #include "ClTensorHandleFactory.hpp"
 #include "ClTensorHandle.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <arm_compute/runtime/CL/CLTensor.h>
 #include <arm_compute/core/Coordinates.h>
 #include <arm_compute/runtime/CL/CLSubTensor.h>
 
-#include <boost/polymorphic_cast.hpp>
-
 
 namespace armnn
 {
@@ -42,7 +42,7 @@
     }
 
     return std::make_unique<ClSubTensorHandle>(
-            boost::polymorphic_downcast<IClTensorHandle *>(&parent), shape, coords);
+            PolymorphicDowncast<IClTensorHandle *>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index b1bd46c..b0d2fdf 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -10,6 +10,7 @@
 #include <armnn/Exceptions.hpp>
 #include <armnn/Utils.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/MakeWorkloadHelper.hpp>
@@ -24,7 +25,6 @@
 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
 #include <arm_compute/runtime/CL/CLScheduler.h>
 
-#include <boost/polymorphic_cast.hpp>
 #include <boost/format.hpp>
 
 namespace armnn
@@ -125,7 +125,7 @@
     }
 
     return std::make_unique<ClSubTensorHandle>(
-        boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords);
+        PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 92e7717..b09b26f 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -6,6 +6,7 @@
 #include "ClContextControlFixture.hpp"
 #include "ClWorkloadFactoryHelper.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
@@ -35,8 +36,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
     ActivationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1}));
@@ -66,9 +67,9 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
     DescriptorType queueDescriptor = workload->GetData();
-    auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
@@ -159,8 +160,8 @@
 
     DescriptorType queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
@@ -184,8 +185,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
      switch (dataLayout)
     {
@@ -232,8 +233,8 @@
     auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
 
     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
@@ -250,8 +251,8 @@
     auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
 
     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
@@ -277,8 +278,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((inputHandle->GetShape() == inputShape));
     BOOST_TEST((outputHandle->GetShape() == outputShape));
 }
@@ -315,8 +316,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
@@ -343,8 +344,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
 }
@@ -376,8 +377,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
 }
@@ -404,8 +405,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
                                                                : std::initializer_list<unsigned int>({3, 1, 5, 5});
@@ -452,8 +453,8 @@
 
     // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST((inputHandle->GetShape() == inputShape));
     BOOST_TEST((outputHandle->GetShape() == outputShape));
@@ -497,9 +498,9 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
     PreluQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto alphaHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST((inputHandle->GetShape() == inputShape));
     BOOST_TEST((alphaHandle->GetShape() == alphaShape));
@@ -532,8 +533,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4}));
@@ -565,8 +566,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
@@ -594,16 +595,16 @@
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
     SplitterQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
 
-    auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
 
-    auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
+    auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
 
-    auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7}));
 }
 
@@ -738,8 +739,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
                                                                : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
@@ -780,8 +781,8 @@
     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
 
     LstmQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
 }
@@ -802,8 +803,8 @@
 
     auto queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     switch (dataLayout)
     {
@@ -859,8 +860,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
     MeanQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {  1, 3, 7, 4 }));
@@ -893,9 +894,9 @@
     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
 
     ConcatQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle0  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle1  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle0  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle1  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 }));
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 }));
@@ -942,8 +943,8 @@
     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
 
     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 }));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 }));
@@ -990,10 +991,10 @@
     StackQueueDescriptor queueDescriptor = workload->GetData();
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
+        auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
         BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
     }
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
 }
 
@@ -1016,7 +1017,6 @@
 static void ClCreateQuantizedLstmWorkloadTest()
 {
     using namespace armnn::armcomputetensorutils;
-    using boost::polymorphic_downcast;
 
     Graph graph;
     ClWorkloadFactory factory =
@@ -1026,23 +1026,23 @@
 
     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
 
-    IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
     BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
     BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
+    IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
     BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 }
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index 058c453..d020eeb 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "ClWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
 #include <cl/ClTensorHandle.hpp>
@@ -29,8 +31,8 @@
 {
     m_Data.ValidateInputsOutputs("ClAbsWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_AbsLayer.configure(&input, &output);
 }
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index cc6333f..9f83cd3 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -8,6 +8,7 @@
 #include "ClWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cl/ClTensorHandle.hpp>
 
@@ -29,8 +30,8 @@
 {
     m_Data.ValidateInputsOutputs("ClNegWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_NegLayer.configure(&input, &output);
 }
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index be68759..a305a4a 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -8,6 +8,7 @@
 #include "ClWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cl/ClTensorHandle.hpp>
 
@@ -29,8 +30,8 @@
 {
     m_Data.ValidateInputsOutputs("ClRsqrtWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_RsqrtLayer.configure(&input, &output);
 }
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index fa99e7f..5ea4c4c 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -8,6 +8,7 @@
 #include "ClWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cl/ClTensorHandle.hpp>
 
@@ -36,8 +37,8 @@
 {
     m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index d541e4e..1acb5c6 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -10,7 +10,6 @@
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <cl/ClTensorHandle.hpp>
-#include <boost/polymorphic_pointer_cast.hpp>
 
 namespace armnn
 {
diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp
index 26b14af..a8b5b81 100644
--- a/src/backends/neon/NeonTensorHandleFactory.cpp
+++ b/src/backends/neon/NeonTensorHandleFactory.cpp
@@ -7,6 +7,7 @@
 #include "NeonTensorHandle.hpp"
 
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -36,7 +37,7 @@
     }
 
     return std::make_unique<NeonSubTensorHandle>(
-            boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords);
+            PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 47f7205..b3104b9 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -11,6 +11,7 @@
 
 #include <armnn/Utils.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/MakeWorkloadHelper.hpp>
@@ -69,7 +70,7 @@
     }
 
     return std::make_unique<NeonSubTensorHandle>(
-        boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords);
+        PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 3e1888c..447bad1 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -6,6 +6,7 @@
 #include "NeonWorkloadFactoryHelper.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
@@ -72,8 +73,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
     ActivationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
 }
@@ -103,9 +104,9 @@
     auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
 
     DescriptorType queueDescriptor = workload->GetData();
-    auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
@@ -201,8 +202,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
@@ -247,8 +248,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle,  TensorInfo(outputShape, DataType)));
 }
@@ -287,8 +288,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
@@ -322,8 +323,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
 }
@@ -351,8 +352,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
@@ -398,8 +399,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
 }
@@ -449,9 +450,9 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
     PreluQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto alphaHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
@@ -485,8 +486,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
 }
@@ -518,8 +519,8 @@
 
     auto queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     switch (dataLayout)
     {
@@ -565,8 +566,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
 }
@@ -593,8 +594,8 @@
     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
 
     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
@@ -630,16 +631,16 @@
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
     SplitterQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
 
-    auto outputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
 
-    auto outputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
 
-    auto outputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
+    auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
 }
 
@@ -743,8 +744,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ?
                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
@@ -788,8 +789,8 @@
 
     LstmQueueDescriptor queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
 
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
@@ -811,9 +812,9 @@
     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
 
     ConcatQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
@@ -871,10 +872,10 @@
     StackQueueDescriptor queueDescriptor = workload->GetData();
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
+        auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
         BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
     }
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
 }
 
@@ -898,8 +899,6 @@
 template <typename QuantizedLstmWorkloadType>
 static void NeonCreateQuantizedLstmWorkloadTest()
 {
-    using boost::polymorphic_downcast;
-
     Graph graph;
     NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
 
@@ -907,23 +906,23 @@
 
     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
 
-    IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
     BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
     BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
+    IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
     BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 }
diff --git a/src/backends/neon/workloads/NeonAbsWorkload.cpp b/src/backends/neon/workloads/NeonAbsWorkload.cpp
index 7f8ed5a..ea14ac3 100644
--- a/src/backends/neon/workloads/NeonAbsWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAbsWorkload.cpp
@@ -9,8 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -28,8 +27,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonAbsWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_AbsLayer.configure(&input, &output);
 }
diff --git a/src/backends/neon/workloads/NeonActivationWorkload.cpp b/src/backends/neon/workloads/NeonActivationWorkload.cpp
index 916d674..4b2169a 100644
--- a/src/backends/neon/workloads/NeonActivationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonActivationWorkload.cpp
@@ -5,7 +5,9 @@
 
 #include "NeonActivationWorkload.hpp"
 #include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEActivationLayer.h>
 
@@ -36,8 +38,8 @@
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEActivationLayer>();
     layer->configure(&input, &output, activationLayerInfo);
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index a025c0b..cb0c8a4 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -7,6 +7,7 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
@@ -35,9 +36,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonAdditionWorkload", 2, 1);
 
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input2 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEArithmeticAddition>();
     layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
index 0fa9d43..0fb819d 100644
--- a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
@@ -10,6 +10,7 @@
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnUtils/TensorUtils.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h>
@@ -54,8 +55,8 @@
                                              const WorkloadInfo& info)
         : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
 {
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto numDims = info.m_InputTensorInfos[0].GetNumDimensions();
     auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, m_Data.m_Parameters.m_Axis);
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
index cd931e3..ff777db 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
@@ -7,8 +7,9 @@
 
 #include "NeonWorkloadUtils.hpp"
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h>
 
@@ -53,8 +54,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonBatchNormalizationWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonConstantWorkload.cpp b/src/backends/neon/workloads/NeonConstantWorkload.cpp
index b9cb807..1cffbe1 100644
--- a/src/backends/neon/workloads/NeonConstantWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConstantWorkload.cpp
@@ -9,6 +9,7 @@
 #include <BFloat16.hpp>
 #include <Half.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <neon/NeonTensorHandle.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/Workload.hpp>
@@ -41,9 +42,9 @@
 
         ARMNN_ASSERT(data.m_LayerOutput != nullptr);
         arm_compute::ITensor& output =
-            boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor();
+            PolymorphicDowncast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor();
         arm_compute::DataType computeDataType =
-            boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType();
+            PolymorphicDowncast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType();
 
         switch (computeDataType)
         {
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index 5d45642..144baec 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -5,8 +5,9 @@
 
 #include "NeonConvolution2dWorkload.hpp"
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/workloads/NeonWorkloadUtils.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
@@ -65,8 +66,8 @@
 
     // todo: check tensor shapes match.
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
index 8b229a1..9ae82ff 100644
--- a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
@@ -10,6 +10,7 @@
 #include <arm_compute/runtime/NEON/functions/NEDequantizationLayer.h>
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
 
@@ -32,8 +33,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonDequantizeWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     std::unique_ptr<arm_compute::NEDequantizationLayer> layer(new arm_compute::NEDequantizationLayer());
     layer->configure(&input, &output);
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
index 2ed47e4..36f1cd9 100644
--- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
@@ -9,8 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -85,7 +84,7 @@
 
     auto AclTensorRef = [](ITensorHandle* tensor) -> arm_compute::ITensor&
         {
-            return boost::polymorphic_downcast<IAclTensorHandle*>(tensor)->GetTensor();
+            return PolymorphicDowncast<IAclTensorHandle*>(tensor)->GetTensor();
         };
 
     arm_compute::ITensor& boxEncodings  = AclTensorRef(m_Data.m_Inputs[0]);
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index 6fdb455..fc353f1 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -4,7 +4,9 @@
 //
 
 #include "NeonDivisionWorkload.hpp"
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 namespace armnn
@@ -29,9 +31,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonDivisionWorkload", 2, 1);
 
-    arm_compute::ITensor& input0 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_DivLayer.configure(&input0, &input1, &output);
 }
diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
index 5b4e909..c49df33 100644
--- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
@@ -7,9 +7,9 @@
 
 #include "NeonWorkloadUtils.hpp"
 
-#include <arm_compute/runtime/NEON/functions/NEFloor.h>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
-#include <boost/polymorphic_cast.hpp>
+#include <arm_compute/runtime/NEON/functions/NEFloor.h>
 
 namespace armnn
 {
@@ -19,8 +19,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEFloor>();
     layer->configure(&input, &output);
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 338c7eb..e808c60 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -8,6 +8,7 @@
 #include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
@@ -51,8 +52,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_WeightsTensor = std::make_unique<arm_compute::Tensor>();
     BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
index 9de6c82..d54607d 100644
--- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
@@ -8,6 +8,7 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h>
 
@@ -33,8 +34,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonMaximumWorkload.cpp b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
index c433d81..46d500b 100644
--- a/src/backends/neon/workloads/NeonMaximumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
@@ -5,6 +5,7 @@
 
 #include "NeonMaximumWorkload.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 namespace armnn
@@ -29,9 +30,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonMaximumWorkload", 2, 1);
 
-    arm_compute::ITensor& input0 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_MaxLayer.configure(&input0, &input1, &output);
 }
diff --git a/src/backends/neon/workloads/NeonMinimumWorkload.cpp b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
index 2867a80..53e483a 100644
--- a/src/backends/neon/workloads/NeonMinimumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
@@ -4,7 +4,9 @@
 //
 
 #include "NeonMinimumWorkload.hpp"
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 namespace armnn
@@ -29,9 +31,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonMinimumWorkload", 2, 1);
 
-    arm_compute::ITensor& input0 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_MinLayer.configure(&input0, &input1, &output);
 }
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index 66fbedf..d813970 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
 
 namespace armnn
@@ -37,9 +39,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonMultiplicationWorkload", 2, 1);
 
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input2 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
diff --git a/src/backends/neon/workloads/NeonNegWorkload.cpp b/src/backends/neon/workloads/NeonNegWorkload.cpp
index afe0558..06c1467 100644
--- a/src/backends/neon/workloads/NeonNegWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNegWorkload.cpp
@@ -9,8 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -28,8 +27,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonNegWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_NegLayer.configure(&input, &output);
 }
diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
index 8cb4ec9..77fc429 100644
--- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
@@ -8,6 +8,7 @@
 #include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h>
 
@@ -77,8 +78,8 @@
         throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality.");
     }
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
     output.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
index 9934c29..968d5ce 100644
--- a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <neon/NeonTensorHandle.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
@@ -37,8 +39,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonPooling2dWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonPreluWorkload.cpp b/src/backends/neon/workloads/NeonPreluWorkload.cpp
index 107090e..8e6ea30 100644
--- a/src/backends/neon/workloads/NeonPreluWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPreluWorkload.cpp
@@ -5,7 +5,9 @@
 
 #include "NeonPreluWorkload.hpp"
 #include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEPReluLayer.h>
 
@@ -31,9 +33,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonPreluWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& alpha = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& alpha = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEPReluLayer>();
     layer->configure(&input, &alpha, &output);
diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.cpp b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
index 659bb94..8b11da7 100644
--- a/src/backends/neon/workloads/NeonReshapeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
@@ -7,9 +7,9 @@
 
 #include "NeonWorkloadUtils.hpp"
 
-#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
-#include <boost/polymorphic_cast.hpp>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
 
 namespace armnn
 {
@@ -29,8 +29,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonReshapeWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEReshapeLayer>();
     layer->configure(&input, &output);
diff --git a/src/backends/neon/workloads/NeonResizeWorkload.cpp b/src/backends/neon/workloads/NeonResizeWorkload.cpp
index e936ab7..9e3be26 100644
--- a/src/backends/neon/workloads/NeonResizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonResizeWorkload.cpp
@@ -9,7 +9,9 @@
 
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
+
 #include <neon/NeonTensorHandle.hpp>
 
 using namespace armnn::armcomputetensorutils;
@@ -45,8 +47,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonResizeWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
index b629283..44980df 100644
--- a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
+++ b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
@@ -9,8 +9,8 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
-#include <boost/cast.hpp>
 
 namespace armnn
 {
@@ -28,8 +28,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonRsqrtWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_RsqrtLayer.configure(&input, &output);
 }
diff --git a/src/backends/neon/workloads/NeonSliceWorkload.cpp b/src/backends/neon/workloads/NeonSliceWorkload.cpp
index 171edc6..32cc042 100644
--- a/src/backends/neon/workloads/NeonSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSliceWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
 #include <neon/NeonTensorHandle.hpp>
@@ -37,8 +39,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonSliceWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
index 152d19c..a4690a7 100644
--- a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
@@ -8,6 +8,8 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
 
 namespace armnn
@@ -20,8 +22,8 @@
     m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1);
 
     // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions.
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager);
     unsigned int aclAxis = ComputeSoftmaxAclAxis(m_Data.m_Parameters, info.m_InputTensorInfos[0]);
diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
index 15a7066..05d93b9 100644
--- a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
@@ -7,6 +7,7 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
 
@@ -20,8 +21,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     const auto outputQuantization = output.info()->quantization_info();
 
diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
index a4204b2..2982cd1 100644
--- a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
@@ -5,6 +5,8 @@
 
 #include "NeonSpaceToDepthWorkload.hpp"
 #include "NeonWorkloadUtils.hpp"
+
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <ResolveType.hpp>
 
 namespace armnn
@@ -33,12 +35,12 @@
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     input.info()->set_data_layout(aclDataLayout);
 
     int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
 
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     output.info()->set_data_layout(aclDataLayout);
 
     m_Layer.reset(new arm_compute::NESpaceToDepthLayer());
diff --git a/src/backends/neon/workloads/NeonSplitterWorkload.cpp b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
index 224e97a..19fa7c6 100644
--- a/src/backends/neon/workloads/NeonSplitterWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
@@ -9,6 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
 
@@ -74,7 +75,7 @@
         return;
     }
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
 
     std::vector<arm_compute::ITensor *> aclOutputs;
     for (auto output : m_Data.m_Outputs)
diff --git a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
index 356c0ae..282005c 100644
--- a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
@@ -9,6 +9,7 @@
 #include <neon/NeonTensorHandle.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/WorkloadUtils.hpp>
 
 namespace armnn
@@ -50,8 +51,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonStridedSliceWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
index f4b4707..ccc2bfe 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
@@ -7,6 +7,7 @@
 
 #include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
@@ -34,9 +35,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonSubtractionWorkload", 2, 1);
 
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input2 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEArithmeticSubtraction>();
     layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
diff --git a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
index ffca207..985f540 100644
--- a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
@@ -9,6 +9,7 @@
 #include <Profiling.hpp>
 
 #include <armnn/Types.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
@@ -60,8 +61,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonTransposeConvolution2dWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index b83d205..29bfbc0 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -5,6 +5,7 @@
 
 #include <test/CreateWorkload.hpp>
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <reference/RefTensorHandle.hpp>
 #include <reference/RefWorkloadFactory.hpp>
 #include <reference/workloads/RefWorkloads.hpp>
@@ -16,8 +17,8 @@
 void CheckInputOutput(std::unique_ptr<Workload> workload, const TensorInfo& inputInfo, const TensorInfo& outputInfo)
 {
     auto queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((inputHandle->GetTensorInfo() == inputInfo));
     BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo));
 }
@@ -29,9 +30,9 @@
                        const TensorInfo&         outputInfo)
 {
     auto queueDescriptor = workload->GetData();
-    auto inputHandle0     = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle1     = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle    = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle0     = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle1     = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle    = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((inputHandle0->GetTensorInfo() == inputInfo0));
     BOOST_TEST((inputHandle1->GetTensorInfo() == inputInfo1));
     BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo));
@@ -538,16 +539,16 @@
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
     SplitterQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, DataType)));
 
-    auto outputHandle0 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle0 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, DataType)));
 
-    auto outputHandle1 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto outputHandle1 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType)));
 
-    auto outputHandle2 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[2]);
+    auto outputHandle2 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[2]);
     BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType)));
 }
 
@@ -910,7 +911,7 @@
 
     // Check output is as expected
     auto queueDescriptor = workload->GetData();
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, DataType)));
 }
 
@@ -950,7 +951,7 @@
 
     // Check output is as expected
     auto queueDescriptor = workload->GetData();
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, dataType)));
 }
 
@@ -1054,10 +1055,10 @@
     StackQueueDescriptor queueDescriptor = workload->GetData();
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        auto inputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[i]);
+        auto inputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[i]);
         BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo(inputShape, DataType)));
     }
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, DataType)));
 }
 
diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp
index f1b3157..a36ed45 100644
--- a/src/backends/reference/workloads/RefWorkloadUtils.hpp
+++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp
@@ -9,12 +9,12 @@
 
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <reference/RefTensorHandle.hpp>
 
 #include <BFloat16.hpp>
 #include <Half.hpp>
-#include <boost/polymorphic_cast.hpp>
 
 namespace armnn
 {
@@ -27,7 +27,7 @@
 {
     // We know that reference workloads use RefTensorHandles for inputs and outputs
     const RefTensorHandle* refTensorHandle =
-        boost::polymorphic_downcast<const RefTensorHandle*>(tensorHandle);
+        PolymorphicDowncast<const RefTensorHandle*>(tensorHandle);
     return refTensorHandle->GetTensorInfo();
 }
 
diff --git a/src/profiling/test/ProfilingTests.hpp b/src/profiling/test/ProfilingTests.hpp
index d1052ce..b41f2dd 100644
--- a/src/profiling/test/ProfilingTests.hpp
+++ b/src/profiling/test/ProfilingTests.hpp
@@ -8,12 +8,12 @@
 #include "ProfilingMocks.hpp"
 
 #include <armnn/Logging.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <CommandHandlerFunctor.hpp>
 #include <IProfilingConnection.hpp>
 #include <ProfilingService.hpp>
 
-#include <boost/polymorphic_cast.hpp>
 #include <boost/test/unit_test.hpp>
 
 #include <chrono>
@@ -229,7 +229,7 @@
     MockProfilingConnection* GetMockProfilingConnection()
     {
         IProfilingConnection* profilingConnection = GetProfilingConnection(m_ProfilingService);
-        return boost::polymorphic_downcast<MockProfilingConnection*>(profilingConnection);
+        return PolymorphicDowncast<MockProfilingConnection*>(profilingConnection);
     }
 
     void ForceTransitionToState(ProfilingState newState)