IVGCVSW-2833 Add Dynamic Quantization

Change-Id: Iba91e3f3625639f01d66f81a9f3e419e0e285d66
Signed-off-by: Jim Flynn <jim.flynn@arm.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c54c395..b6c9776 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,6 +61,7 @@
     )
 
 add_library_ex(armnnUtils STATIC ${armnnUtils_sources})
+target_include_directories(armnnUtils PRIVATE src/backends)
 
 if(BUILD_CAFFE_PARSER)
     # ArmNN Parser source files required for all build options
@@ -203,7 +204,6 @@
     include/armnn/ILayerSupport.hpp
     include/armnn/ILayerVisitor.hpp
     include/armnn/INetwork.hpp
-    include/armnn/INetworkQuantizer.hpp
     include/armnn/IProfiler.hpp
     include/armnn/IRuntime.hpp
     include/armnn/LayerSupport.hpp
@@ -315,6 +315,8 @@
     src/armnn/CompatibleTypes.hpp
     src/armnn/Descriptors.cpp
     src/armnn/DeviceSpec.hpp
+    src/armnn/DynamicQuantizationVisitor.cpp
+    src/armnn/DynamicQuantizationVisitor.hpp
     src/armnn/Exceptions.cpp
     src/armnn/ExecutionFrame.cpp
     src/armnn/ExecutionFrame.hpp
diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
index eabad58..ab793bc 100644
--- a/include/armnn/ILayerVisitor.hpp
+++ b/include/armnn/ILayerVisitor.hpp
@@ -347,6 +347,8 @@
     virtual void VisitSwitchLayer(const IConnectableLayer* layer,
                                   const char* name = nullptr) = 0;
 
+    virtual void StartVisit() {}
+    virtual void FinishVisit() {}
 
 };
 } // namespace armnn
diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
index 837490d..cb52471 100644
--- a/include/armnn/TypesUtils.hpp
+++ b/include/armnn/TypesUtils.hpp
@@ -4,8 +4,8 @@
 //
 #pragma once
 
-#include "Tensor.hpp"
-#include "Types.hpp"
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
 
 #include <cmath>
 #include <ostream>
diff --git a/include/armnn/INetworkQuantizer.hpp b/include/armnnQuantizer/INetworkQuantizer.hpp
similarity index 100%
rename from include/armnn/INetworkQuantizer.hpp
rename to include/armnnQuantizer/INetworkQuantizer.hpp
diff --git a/src/armnn/DynamicQuantizationVisitor.cpp b/src/armnn/DynamicQuantizationVisitor.cpp
new file mode 100644
index 0000000..9b33fb7
--- /dev/null
+++ b/src/armnn/DynamicQuantizationVisitor.cpp
@@ -0,0 +1,330 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DynamicQuantizationVisitor.hpp"
+#include "NetworkUtils.hpp"
+
+#include <boost/core/ignore_unused.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/Types.hpp>
+
+#include <limits>
+
+namespace armnn
+{
+
+DynamicQuantizationVisitor::DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph)
+        : m_RangeTracker(rangeTracker),
+          m_Graph(graph)
+{}
+
+void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
+{
+    m_RangeTracker.SetRange(layer, outputIdx, min, max);
+}
+
+void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
+{
+    for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
+    {
+        const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
+        LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
+        unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
+        const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
+        SetRange(layer, i, parentRange.first, parentRange.second);
+    }
+}
+
+void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
+{
+    m_LayersToCalibrate.push_back(layer);
+}
+
+void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
+{
+    m_LayersNotToCalibrate.push_back(layer);
+}
+
+void DynamicQuantizationVisitor::FinishVisit()
+{
+    for (const IConnectableLayer* layer : m_LayersToCalibrate)
+    {
+        std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
+            m_Graph, *boost::polymorphic_downcast<Layer*>(const_cast<IConnectableLayer*>(layer)));
+        // record them so we can take them out again efficiently afterward
+        m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
+    }
+}
+
+void DynamicQuantizationVisitor::RemoveDebugLayers()
+{
+    for (DebugLayer* debugLayer : m_DebugLayers)
+    {
+        OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
+        InputSlot& succeedingInputSlot = *debugLayer->GetOutputSlot(0).GetConnection(0);
+        proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
+        debugLayer->GetOutputSlot(0).Disconnect(succeedingInputSlot);
+
+        m_Graph.EraseLayer(debugLayer);
+
+        proceedingOutputSlot.Connect(succeedingInputSlot);
+    }
+    m_DebugLayers.clear();
+}
+
+void DynamicQuantizationVisitor::VisitNonCalibratedLayers() {
+    RemoveDebugLayers();
+    for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
+    {
+        ForwardParentParameters(layer);
+    }
+}
+
+void DynamicQuantizationVisitor::VisitAdditionLayer(const IConnectableLayer* layer, const char* name)
+{
+    SetRange(layer, 0, -20.f, 20.f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer,
+                                                              const BatchNormalizationDescriptor& desc,
+                                                              const ConstTensor& mean,
+                                                              const ConstTensor& variance,
+                                                              const ConstTensor& beta,
+                                                              const ConstTensor& gamma,
+                                                              const char* name)
+{
+    boost::ignore_unused(desc);
+    boost::ignore_unused(mean);
+    boost::ignore_unused(variance);
+    boost::ignore_unused(beta);
+    boost::ignore_unused(gamma);
+    boost::ignore_unused(name);
+    SetRange(layer, 0, -15.0f, 15.0f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
+                                                         const Convolution2dDescriptor& convolution2dDescriptor,
+                                                         const ConstTensor& weights,
+                                                         const Optional<ConstTensor>& biases,
+                                                         const char* name)
+{
+    boost::ignore_unused(convolution2dDescriptor);
+    boost::ignore_unused(weights);
+    boost::ignore_unused(biases);
+    boost::ignore_unused(name);
+    SetRange(layer, 0, -15.0f, 15.0f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
+                                                                  const DepthwiseConvolution2dDescriptor& desc,
+                                                                  const ConstTensor& weights,
+                                                                  const Optional<ConstTensor>& biases,
+                                                                  const char* name)
+{
+    boost::ignore_unused(desc);
+    boost::ignore_unused(weights);
+    boost::ignore_unused(biases);
+    boost::ignore_unused(name);
+    SetRange(layer, 0, -15.0f, 15.0f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitActivationLayer(const IConnectableLayer* layer,
+                                                      const ActivationDescriptor& activationDescriptor,
+                                                      const char* name)
+{
+    switch (activationDescriptor.m_Function)
+    {
+        // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
+        case ActivationFunction::Abs:
+        case ActivationFunction::Linear:
+        case ActivationFunction::ReLu:
+        case ActivationFunction::SoftReLu:
+            SetRange(layer, 0, 0.f, 15.f);
+            break;
+        case ActivationFunction::BoundedReLu:
+            SetRange(layer, 0, 0.f, activationDescriptor.m_A);
+            break;
+        case ActivationFunction::TanH:
+            SetRange(layer, 0, -1.f, 1.f);
+            break;
+        case ActivationFunction::LeakyReLu:
+            SetRange(layer, 0, -5.f, 15.f);
+            break;
+        default:
+            SetRange(layer, 0, -15.f, 15.f);
+            break;
+    }
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
+                                                          const FullyConnectedDescriptor& desc,
+                                                          const ConstTensor& weights,
+                                                          const Optional<ConstTensor>& biases,
+                                                          const char *name)
+{
+    boost::ignore_unused(desc);
+    boost::ignore_unused(weights);
+    boost::ignore_unused(biases);
+    boost::ignore_unused(name);
+    SetRange(layer, 0, -15.0f, 15.0f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitPermuteLayer(const IConnectableLayer* layer,
+                                                   const PermuteDescriptor& permuteDescriptor,
+                                                   const char* name)
+{
+    boost::ignore_unused(permuteDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
+                                                          const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
+                                                          const char* name)
+{
+    boost::ignore_unused(spaceToBatchNdDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitPooling2dLayer(const IConnectableLayer* layer,
+                                                     const Pooling2dDescriptor& pooling2dDescriptor,
+                                                     const char* name)
+{
+    boost::ignore_unused(pooling2dDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer,
+                                                   const SoftmaxDescriptor& softmaxDescriptor,
+                                                   const char* name)
+{
+    boost::ignore_unused(softmaxDescriptor);
+    boost::ignore_unused(name);
+    SetRange(layer, 0, 0.f, 1.f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitConstantLayer(const IConnectableLayer* layer,
+                                                    const ConstTensor& input,
+                                                    const char* name)
+{
+    boost::ignore_unused(name);
+
+    if (input.GetDataType() != DataType::Float32)
+    {
+        throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
+    }
+
+    // Work out the range based on the input constants
+    unsigned int inputNumElements = input.GetNumElements();
+    const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
+
+    float min = std::numeric_limits<float>::max();
+    float max = std::numeric_limits<float>::lowest();
+
+    for (unsigned int i = 0; i < inputNumElements; i++)
+    {
+        const float inputValue = inputData[i];
+
+        min = std::min(min, inputValue);
+        max = std::max(max, inputValue);
+    }
+    SetRange(layer, 0, min, max);
+}
+
+void DynamicQuantizationVisitor::VisitMergerLayer(const IConnectableLayer* layer,
+                                                  const OriginsDescriptor& mergerDescriptor,
+                                                  const char* name)
+{
+    float min = std::numeric_limits<float>::max();
+    float max = std::numeric_limits<float>::lowest();
+    for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
+    {
+        const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
+        LayerGuid layerId = outputSlot->GetOwningLayerGuid();
+        unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
+        RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
+        min = std::min(min, range.first);
+        max = std::max(max, range.second);
+    }
+    SetRange(layer, 0, min, max);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitReshapeLayer(const IConnectableLayer* layer,
+                                                   const ReshapeDescriptor& reshapeDescriptor,
+                                                   const char* name)
+{
+    boost::ignore_unused(reshapeDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitSplitterLayer(const IConnectableLayer* layer,
+                                                    const SplitterDescriptor& splitterDescriptor,
+                                                    const char* name)
+{
+    boost::ignore_unused(splitterDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer,
+                                                          const ResizeBilinearDescriptor& resizeDesc,
+                                                          const char* name)
+{
+    boost::ignore_unused(resizeDesc);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer,
+                                                        const StridedSliceDescriptor& stridedSliceDescriptor,
+                                                        const char* name)
+{
+    boost::ignore_unused(stridedSliceDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
+                                                          const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                                          const char* name)
+{
+    boost::ignore_unused(batchToSpaceNdDescriptor);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
+{
+    boost::ignore_unused(id);
+    boost::ignore_unused(name);
+    SetRange(layer, 0, -0.0f, 0.0f);
+    AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
+{
+    boost::ignore_unused(id);
+    boost::ignore_unused(name);
+    AddToNonCalibratedLayers(layer);
+    m_OutputLayers.push_back(id);
+}
+
+const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
+{
+    return m_OutputLayers;
+}
+
+} //namespace armnn
diff --git a/src/armnn/DynamicQuantizationVisitor.hpp b/src/armnn/DynamicQuantizationVisitor.hpp
new file mode 100644
index 0000000..6d430f1
--- /dev/null
+++ b/src/armnn/DynamicQuantizationVisitor.hpp
@@ -0,0 +1,137 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "armnn/LayerVisitorBase.hpp"
+#include "RangeTracker.hpp"
+#include "layers/DebugLayer.hpp"
+
+#include <armnn/INetwork.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
+
+namespace armnn
+{
+
+/// Visitor class to establish min/max ranges based on the type of the layer
+class DynamicQuantizationVisitor : public LayerVisitorBase<VisitorNoThrowPolicy>
+{
+public:
+    DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph);
+    ~DynamicQuantizationVisitor() = default;
+
+    /// Functions to set the Range on a per-layer-type basis
+    void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override;
+
+    void VisitBatchNormalizationLayer(const IConnectableLayer* layer,
+                                      const BatchNormalizationDescriptor& desc,
+                                      const ConstTensor& mean,
+                                      const ConstTensor& variance,
+                                      const ConstTensor& beta,
+                                      const ConstTensor& gamma,
+                                      const char* name = nullptr) override;
+
+    void VisitConvolution2dLayer(const IConnectableLayer* layer,
+                                 const Convolution2dDescriptor& convolution2dDescriptor,
+                                 const ConstTensor& weights,
+                                 const Optional<ConstTensor>& biases,
+                                 const char* name = nullptr) override;
+
+    void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
+                                          const DepthwiseConvolution2dDescriptor& desc,
+                                          const ConstTensor& weights,
+                                          const Optional<ConstTensor>& biases,
+                                          const char* name = nullptr) override;
+
+    void VisitActivationLayer(const IConnectableLayer* layer,
+                              const ActivationDescriptor& activationDescriptor,
+                              const char* name = nullptr) override;
+
+    void VisitFullyConnectedLayer(const IConnectableLayer *layer,
+                                  const FullyConnectedDescriptor& desc,
+                                  const ConstTensor& weights,
+                                  const Optional<ConstTensor>& biases,
+                                  const char *name) override;
+
+    void VisitPermuteLayer(const IConnectableLayer* layer,
+                           const PermuteDescriptor& permuteDescriptor,
+                           const char* name) override;
+
+    void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
+                                  const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
+                                  const char* name = nullptr) override;
+
+    void VisitPooling2dLayer(const IConnectableLayer* layer,
+                             const Pooling2dDescriptor& pooling2dDescriptor,
+                             const char* name) override;
+
+    void VisitSoftmaxLayer(const IConnectableLayer* layer,
+                           const SoftmaxDescriptor& softmaxDescriptor,
+                           const char* name = nullptr) override;
+
+    void VisitConstantLayer(const IConnectableLayer* layer,
+                            const ConstTensor& input,
+                            const char* name = nullptr) override;
+
+    void VisitMergerLayer(const IConnectableLayer* layer,
+                          const OriginsDescriptor& mergerDescriptor,
+                          const char* name = nullptr) override;
+
+    void VisitReshapeLayer(const IConnectableLayer* layer,
+                           const ReshapeDescriptor& reshapeDescriptor,
+                           const char* name = nullptr) override;
+
+    void VisitSplitterLayer(const IConnectableLayer* layer,
+                            const SplitterDescriptor& splitterDescriptor,
+                            const char* name = nullptr) override;
+
+    void VisitResizeBilinearLayer(const IConnectableLayer* layer,
+                                  const ResizeBilinearDescriptor& resizeDesc,
+                                  const char* name = nullptr) override;
+
+    void VisitStridedSliceLayer(const IConnectableLayer* layer,
+                                const StridedSliceDescriptor& stridedSliceDescriptor,
+                                const char* name = nullptr) override;
+
+    void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
+                                  const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                  const char* name = nullptr) override;
+
+    void VisitInputLayer(const IConnectableLayer* layer,
+                         LayerBindingId id,
+                         const char* name = nullptr) override;
+
+    void VisitOutputLayer(const IConnectableLayer* layer,
+                          LayerBindingId id,
+                          const char* name = nullptr) override;
+
+    void FinishVisit() override;
+    void VisitNonCalibratedLayers();
+
+    const std::vector<armnn::LayerBindingId>& GetOutputLayers();
+
+private:
+    /// Set the range for an output slot on a layer
+    void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max);
+
+    void ForwardParentParameters(const IConnectableLayer* layer);
+
+    /// Mapping from a layer Guid to an array of ranges for outputs
+    RangeTracker& m_RangeTracker;
+
+    Graph& m_Graph;
+
+    std::vector<const IConnectableLayer*> m_LayersToCalibrate;
+    std::vector<const IConnectableLayer*> m_LayersNotToCalibrate;
+    std::vector<DebugLayer*> m_DebugLayers;
+
+    std::vector<armnn::LayerBindingId> m_OutputLayers;
+
+    void AddToCalibratedLayers(const IConnectableLayer* layer);
+    void AddToNonCalibratedLayers(const IConnectableLayer* layer);
+    void RemoveDebugLayers();
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp
index 4692a68..12e459d 100644
--- a/src/armnn/NetworkQuantizer.cpp
+++ b/src/armnn/NetworkQuantizer.cpp
@@ -7,6 +7,8 @@
 #include <armnn/INetwork.hpp>
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
+#include <TensorUtils.hpp>
+#include <TensorIOUtils.hpp>
 
 #include "Graph.hpp"
 #include "Layer.hpp"
@@ -14,6 +16,7 @@
 #include "NetworkQuantizer.hpp"
 #include "NetworkQuantizerUtils.hpp"
 
+#include "DynamicQuantizationVisitor.hpp"
 #include "StaticRangeVisitor.hpp"
 #include "QuantizerVisitor.hpp"
 #include "OverrideInputRangeVisitor.hpp"
@@ -21,9 +24,15 @@
 #include <vector>
 #include <cmath>
 
+#include <boost/variant.hpp>
+
+
 namespace armnn
 {
 
+using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+
+
 INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
 {
     return new NetworkQuantizer(inputNetwork, options);
@@ -51,16 +60,102 @@
 
 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
 {
-    //Implementation in a following commit
+    // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
+    // will not have been created. Need to get the environment set up, Runtime loaded,
+    // DynamicQuantizationVisitor created and run over the network to initialise itself
+    // and the RangeTracker the Debug callback registered and an initial inference
+    // done to set up the first min/max values
+    if (!m_Runtime)
+    {
+        m_RefineCount = 0;
+        m_Ranges.SetDynamicMode(true);
+        const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
+
+        // need to insert Debug layers in the DynamicQuantizationVisitor
+        Graph& graph = const_cast<Graph&>(cGraph);
+
+        // Initialize RangeTracker to the default values for each layer.
+        // The default values are overwritten by the min/max that is
+        // recorded during the first dataset min/max calibration. This
+        // initialisation is only required for the first call of Refine().
+        m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
+        VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
+
+        IRuntime::CreationOptions options;
+        m_Runtime = IRuntime::Create(options);
+
+        // Optimize network - debug already enabled for layers that require quantization
+        OptimizerOptions optimizerOptions(false, false);
+        std::vector<BackendId> backends = {"CpuRef"};
+        IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
+                                                     backends,
+                                                     m_Runtime->GetDeviceSpec(),
+                                                     optimizerOptions);
+
+        m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
+
+        // Debug callback function to refine min/max in RangeTracker
+        auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
+            // Get min/max pair from tensor data
+            std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
+
+            // For first calibration dataset, set min/max range in RangeTracker to
+            // min/max ranges gathered during inference
+            if (m_RefineCount == 0)
+            {
+                m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
+            }
+            else
+            {
+                // For every other calibration dataset, only set min/max range if the
+                // values gathered are less than / greater than originally recorded.
+                m_Ranges.RefineMin(guid, slotIndex, minMax.first);
+                m_Ranges.RefineMax(guid, slotIndex, minMax.second);
+            }
+        };
+
+        m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
+    }
+
+    // Create output tensor for EnqueueWorkload
+    std::vector<armnn::BindingPointInfo> outputBindings;
+    auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
+    std::vector<TContainer> outputVectors;
+    for (auto outputLayerBindingId : outputLayers)
+    {
+        auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
+        outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
+        outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
+    }
+    OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
+
+    // Execute EnqueueWorkload with calibration image
+    m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+    ++m_RefineCount;
 }
 
 INetworkPtr NetworkQuantizer::ExportNetwork()
 {
     const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
 
-    // Step 1) Walk the graph and register min/max values for intermediate tensors
-    StaticRangeVisitor rangeVisitor(m_Ranges);
-    VisitLayers(graph, rangeVisitor);
+    // Step 1) Walk the graph and populate default min/max values for
+    // intermediate tensors, only if Runtime does not exist (created
+    // if Refine has been called)
+    if (!m_Runtime)
+    {
+        m_Ranges.SetDynamicMode(false);
+        StaticRangeVisitor rangeVisitor(m_Ranges);
+        VisitLayers(graph, rangeVisitor);
+    }
+    else
+    {
+        // Set min/max range of non-calibrated layers to parent layer's range
+        m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
+        // now tear down the runtime and the dynamic visitor.
+        m_Runtime.reset(nullptr);
+        m_DynamicQuantizationVisitor = EmptyOptional();
+        m_RefineCount = 0;
+    }
 
     // Step 2) Convert input InputNetwork to Quantized InputNetwork
     std::unique_ptr<IQuantizationScheme> quantizationScheme;
@@ -79,6 +174,9 @@
     QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get());
     VisitLayers(graph, quantizerVisitor);
 
+    // clear the ranges
+    m_Ranges.Reset();
+
     return quantizerVisitor.RetrieveFinalNetwork();
 }
 
diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp
index 4f6359f..d384bdc 100644
--- a/src/armnn/NetworkQuantizer.hpp
+++ b/src/armnn/NetworkQuantizer.hpp
@@ -6,9 +6,12 @@
 #pragma once
 
 #include <armnn/INetwork.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
+#include <armnn/IRuntime.hpp>
 #include <armnn/Types.hpp>
+#include <armnn/Optional.hpp>
 
+#include "DynamicQuantizationVisitor.hpp"
 #include "RangeTracker.hpp"
 
 namespace armnn
@@ -18,21 +21,41 @@
 {
 public:
     NetworkQuantizer(INetwork* inputNetwork, const QuantizerOptions& options)
-    : m_InputNetwork(inputNetwork), m_Options(options) {}
+    : m_InputNetwork(inputNetwork),
+      m_NetworkId(0),
+      m_Runtime(nullptr, &IRuntime::Destroy),
+      m_RefineCount(0),
+      m_Options(options) {}
 
     void OverrideInputRange(LayerBindingId layerId, float min, float max) override;
     void Refine(const InputTensors& inputTensors) override;
+
+    // Required for testing? Need some way to get min/max in RangeTracker (m_Ranges)
+    std::pair<float, float> GetMinMaxRange(LayerGuid guid, unsigned int idx) { return m_Ranges.GetRange(guid, idx); }
     INetworkPtr ExportNetwork() override;
 
 private:
     /// Original input network to quantize
     INetwork* m_InputNetwork;
 
+    NetworkId m_NetworkId;
+
+    // if we are run in dynamic mode this unique pointer will hold
+    // the runtime between invocations of the Refine method.
+    IRuntimePtr m_Runtime;
+
+    Optional<DynamicQuantizationVisitor> m_DynamicQuantizationVisitor;
+
+    // counts the number of times refine is called
+    unsigned int m_RefineCount;
+
     /// Mapping from Guid to an array of ranges for outputs
     RangeTracker m_Ranges;
 
     /// Options for the NetworkQuantizer
     QuantizerOptions m_Options;
+
+    std::pair<float, float> FindMinMax(ITensorHandle* tensorHandle);
 };
 
 } //namespace armnn
diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp
index 26f67f9..6176a9c 100644
--- a/src/armnn/NetworkQuantizerUtils.hpp
+++ b/src/armnn/NetworkQuantizerUtils.hpp
@@ -49,10 +49,12 @@
 template <typename LayerContainer>
 void VisitLayers(const LayerContainer& layerContainer, ILayerVisitor& visitor)
 {
+    visitor.StartVisit();
     for (auto layer : layerContainer)
     {
         layer->Accept(visitor);
     }
+    visitor.FinishVisit();
 }
 
 } // namespace armnn
diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp
index 1751229..eb9ebac 100644
--- a/src/armnn/QuantizerVisitor.hpp
+++ b/src/armnn/QuantizerVisitor.hpp
@@ -11,7 +11,7 @@
 
 #include <armnn/INetwork.hpp>
 #include <armnn/Types.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
 
 #include <unordered_map>
 
diff --git a/src/armnn/RangeTracker.cpp b/src/armnn/RangeTracker.cpp
index 2025103..ae756fb 100644
--- a/src/armnn/RangeTracker.cpp
+++ b/src/armnn/RangeTracker.cpp
@@ -4,17 +4,24 @@
 //
 
 #include "RangeTracker.hpp"
+#include "InternalTypes.hpp"
 
 namespace armnn
 {
 
-void RangeTracker::SetRange(const armnn::IConnectableLayer *layer, unsigned int outputIdx, float min, float max)
+void RangeTracker::SetRange(const armnn::IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
 {
     auto& ranges = m_GuidToRangesMap[layer->GetGuid()];
 
-    if (ranges.size() < layer->GetNumOutputSlots())
+    unsigned int numOfOutputSlots = layer->GetNumOutputSlots();
+    // output layers are a special case
+    if (numOfOutputSlots == 0)
     {
-        ranges.resize(layer->GetNumOutputSlots());
+        ++numOfOutputSlots;
+    }
+    if (ranges.size() < numOfOutputSlots)
+    {
+        ranges.resize(numOfOutputSlots);
     }
     ranges[outputIdx] = std::make_pair(min, max);
 }
@@ -24,9 +31,49 @@
     auto search = m_GuidToRangesMap.find(guid);
     if (search == m_GuidToRangesMap.end())
     {
-        return DefaultRange();
+        if (IsInDynamicMode())
+        {
+            throw armnn::Exception("Have no entry for layer GUID [" + std::to_string(guid) + "]");
+        }
+        else
+        {
+            return DefaultRange();
+        }
     }
     return search->second.at(idx);
 }
 
+void RangeTracker::RefineMin(LayerGuid guid, unsigned int idx, float newMin)
+{
+    auto& currentMin = m_GuidToRangesMap.find(guid)->second.at(idx).first;
+    if (newMin < currentMin)
+    {
+        currentMin = newMin;
+    }
+}
+
+void RangeTracker::RefineMax(LayerGuid guid, unsigned int idx, float newMax)
+{
+    auto& currentMax = m_GuidToRangesMap.find(guid)->second.at(idx).second;
+    if (newMax > currentMax)
+    {
+        currentMax = newMax;
+    }
+}
+
+void RangeTracker::ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax)
+{
+    auto minMaxPair = m_GuidToRangesMap.find(guid);
+    auto& currentMin = minMaxPair->second.at(idx).first;
+    auto& currentMax = minMaxPair->second.at(idx).second;
+
+    currentMin = newMin;
+    currentMax = newMax;
+}
+
+void RangeTracker::Reset()
+{
+    m_GuidToRangesMap.clear();
+}
+
 } //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/RangeTracker.hpp b/src/armnn/RangeTracker.hpp
index 2e8b33a..6662775 100644
--- a/src/armnn/RangeTracker.hpp
+++ b/src/armnn/RangeTracker.hpp
@@ -31,6 +31,21 @@
     /// Query that there is an entry for a layer
     bool HasRanges(LayerGuid guid) const { return m_GuidToRangesMap.find(guid) != m_GuidToRangesMap.end(); }
 
+    /// Update min in RangeTracker with new_min if it is lower than current value
+    void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin);
+
+    /// Update max in RangeTracker with new_max if it is greater than current value
+    void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax);
+
+    /// Overwrite min and max in RangeTracker with newMin and newMax
+    void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax);
+
+    void Reset();
+
+    void SetDynamicMode(bool flag) { m_DynamicMode = flag; }
+
+    bool IsInDynamicMode() const { return m_DynamicMode; }
+
 private:
     using MinMaxRanges = std::vector<MinMaxRange>;
 
@@ -39,6 +54,8 @@
 
     /// Mapping from a layer Guid to an array of ranges for outputs
     std::unordered_map<LayerGuid, MinMaxRanges> m_GuidToRangesMap;
+
+    bool m_DynamicMode = false;
 };
 
 } //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/StaticRangeVisitor.hpp b/src/armnn/StaticRangeVisitor.hpp
index 5a16e18..8f2e698 100644
--- a/src/armnn/StaticRangeVisitor.hpp
+++ b/src/armnn/StaticRangeVisitor.hpp
@@ -9,7 +9,7 @@
 #include "RangeTracker.hpp"
 
 #include <armnn/INetwork.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
 
 
 namespace armnn
diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp
index 372d0ca..259e90f 100644
--- a/src/armnn/test/QuantizerTest.cpp
+++ b/src/armnn/test/QuantizerTest.cpp
@@ -5,7 +5,7 @@
 
 #include <armnn/INetwork.hpp>
 #include <armnn/Tensor.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
 #include <armnn/Types.hpp>
 
 #include "armnn/LayerVisitorBase.hpp"
@@ -15,6 +15,7 @@
 #include "../OverrideInputRangeVisitor.hpp"
 #include "../RangeTracker.hpp"
 #include "../backends/backendsCommon/test/QuantizeHelper.hpp"
+#include "../../armnnQuantizer/CommandLineProcessor.hpp"
 
 #include <boost/test/unit_test.hpp>
 
@@ -207,6 +208,95 @@
     return network;
 }
 
+INetworkPtr CreateNetworkWithInputOutputLayers()
+{
+    INetworkPtr network = INetwork::Create();
+
+    // Add input/output layers
+    IConnectableLayer* inputLayer = network->AddInputLayer(0);
+    IConnectableLayer* output = network->AddOutputLayer(1);
+
+    // Establish connections
+    inputLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    // Set TensorInfo
+    TensorShape shape{8U};
+    TensorInfo info(shape, DataType::Float32);
+    inputLayer->GetOutputSlot(0).SetTensorInfo(info);
+
+    return network;
+}
+
+TensorInfo GetInputTensorInfo(const Network* network)
+{
+    for (auto&& inputLayer : network->GetGraph().GetInputLayers())
+    {
+        BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
+        return inputLayer->GetOutputSlot(0).GetTensorInfo();
+    }
+    throw InvalidArgumentException("Network has no input layers");
+}
+
+BOOST_AUTO_TEST_CASE(InputOutputLayerDynamicQuant)
+{
+    INetworkPtr network = CreateNetworkWithInputOutputLayers();
+
+    armnn::TensorInfo tensorInfo = GetInputTensorInfo(boost::polymorphic_downcast<const Network*>(network.get()));
+
+    // Outliers -56 and 98
+    std::vector<float> inputData({0, 0, 0, -56, 98, 0, 0, 0});
+    armnn::ConstTensor inputTensor(tensorInfo, inputData.data());
+
+    InputTensors inputTensors;
+    inputTensors.push_back(std::make_pair(0, inputTensor));
+
+    armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get());
+
+    quantizer->Refine(inputTensors);
+
+    // Outliers -77 and 65
+    std::vector<float> inputData2({0, -77, 0, -56, 65, 0, 0, 0});
+    armnn::ConstTensor inputTensor2(tensorInfo, inputData2.data());
+    InputTensors inputTensors2;
+    inputTensors2.push_back(std::make_pair(0, inputTensor2));
+
+    quantizer->Refine(inputTensors2);
+
+    INetworkPtr quantizedNetwork = quantizer->ExportNetwork();
+    // Output Layer should be quantized for a min max of -77 and 98
+    // according to QAsymm8 Quantization Scheme
+    std::unique_ptr<IQuantizationScheme> quantizationScheme = std::make_unique<QAsymm8QuantizationScheme>();
+    OffsetScalePair qParams = quantizationScheme->ComputeScheme(-77.0, 98.0);
+
+    class TestOutputLayerVisitor : public LayerVisitorBase<VisitorNoThrowPolicy>
+    {
+    public:
+        TestOutputLayerVisitor(const OffsetScalePair& offsetScalePair, const DataType& dataType) :
+            m_OffsetScalePair(offsetScalePair), m_DataType(dataType) {}
+
+        void VisitOutputLayer(const IConnectableLayer* layer,
+                                      LayerBindingId id,
+                                      const char* name = nullptr) override
+        {
+            const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo();
+            BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType,
+                                std::string(armnn::GetDataTypeName(info.GetDataType()))
+                                        .append(" == ").append(armnn::GetDataTypeName(m_DataType)));
+            // int_32t
+            BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second);
+            // float
+            BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, boost::test_tools::tolerance(0.001));
+        }
+
+    private:
+        const OffsetScalePair m_OffsetScalePair;
+        const DataType m_DataType;
+    };
+
+    TestOutputLayerVisitor visitor(qParams, quantizationScheme->GetDataType());
+    quantizedNetwork->Accept(visitor);
+}
+
 BOOST_AUTO_TEST_CASE(QuantizeAbsActivation)
 {
     ActivationDescriptor descriptor;
diff --git a/src/armnnQuantizer/ArmNNQuantizerMain.cpp b/src/armnnQuantizer/ArmNNQuantizerMain.cpp
index 103597a..96d6515 100644
--- a/src/armnnQuantizer/ArmNNQuantizerMain.cpp
+++ b/src/armnnQuantizer/ArmNNQuantizerMain.cpp
@@ -5,7 +5,7 @@
 
 #include "CommandLineProcessor.hpp"
 #include <armnnDeserializer/IDeserializer.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
 #include <armnnSerializer/ISerializer.hpp>
 #include "QuantizationDataSet.hpp"
 #include "QuantizationInput.hpp"
@@ -52,7 +52,7 @@
             armnnQuantizer::InputLayerVisitor inputLayerVisitor;
             network->Accept(inputLayerVisitor);
 
-            for(armnnQuantizer::QuantizationInput quantizationInput : dataSet)
+            for (armnnQuantizer::QuantizationInput quantizationInput : dataSet)
             {
                 armnn::InputTensors inputTensors;
                 std::vector<std::vector<float>> inputData(quantizationInput.GetNumberOfInputs());
diff --git a/src/armnnQuantizer/QuantizationInput.hpp b/src/armnnQuantizer/QuantizationInput.hpp
index ebabdd7..1bfe84b 100644
--- a/src/armnnQuantizer/QuantizationInput.hpp
+++ b/src/armnnQuantizer/QuantizationInput.hpp
@@ -7,7 +7,7 @@
 
 #include <map>
 #include <armnn/Types.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
 
 namespace armnnQuantizer
 {
diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp
index 57f823f..c2fbbe0 100644
--- a/src/armnnUtils/TensorUtils.cpp
+++ b/src/armnnUtils/TensorUtils.cpp
@@ -4,6 +4,7 @@
 //
 
 #include "TensorUtils.hpp"
+#include <backendsCommon/ITensorHandle.hpp>
 
 namespace armnnUtils
 {
@@ -47,4 +48,31 @@
     }
 }
 
+std::pair<float, float> FindMinMax(armnn::ITensorHandle* tensorHandle)
+{
+    auto tensor_data = static_cast<const float *>(tensorHandle->Map(true));
+    auto tensor_size = tensorHandle->GetShape().GetNumElements();
+
+    // Set min/max initially to first value in tensor
+    float min = tensor_data[0];
+    float max = tensor_data[0];
+
+    // Loop over rest of tensor and update min/max if necessary
+    for (unsigned int val = 1; val < tensor_size; val++)
+    {
+        if (tensor_data[val] < min)
+        {
+            min = tensor_data[val];
+        }
+        else if (tensor_data[val] > max)
+        {
+            max = tensor_data[val];
+        }
+    }
+
+    tensorHandle->Unmap();
+
+    return std::make_pair(min, max);
+}
+
 }
diff --git a/src/armnnUtils/TensorUtils.hpp b/src/armnnUtils/TensorUtils.hpp
index fb5e6eb..c273b49 100644
--- a/src/armnnUtils/TensorUtils.hpp
+++ b/src/armnnUtils/TensorUtils.hpp
@@ -22,4 +22,6 @@
                                 const armnn::DataLayout dataLayout,
                                 const armnn::DataType dataType);
 
+std::pair<float, float> FindMinMax(armnn::ITensorHandle* tensorHandle);
+
 } // namespace armnnUtils